From d8f2ebaac650dc35db3bf5cf10e8ee1115b455f8 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Thu, 13 Apr 2017 11:41:38 +1000
Subject: sync_file: get rid of internal reference count.

sync_file uses the reference count of the file, the internal
kref was never getting moved past 1.

We can reintroduce this if we decide we need it later.

[airlied: fix buildbot warnings]

Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Acked-by: Sumit Semwal <sumit.semwal@linaro.org>
Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170413014144.637-2-airlied@gmail.com
---
 include/linux/sync_file.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sync_file.h b/include/linux/sync_file.h
index 3e3ab84fc4cd..d37beefdfbd5 100644
--- a/include/linux/sync_file.h
+++ b/include/linux/sync_file.h
@@ -14,7 +14,6 @@
 #define _LINUX_SYNC_FILE_H
 
 #include <linux/types.h>
-#include <linux/kref.h>
 #include <linux/ktime.h>
 #include <linux/list.h>
 #include <linux/spinlock.h>
@@ -24,7 +23,6 @@
 /**
  * struct sync_file - sync file to export to the userspace
  * @file:		file representing this fence
- * @kref:		reference count on fence.
  * @name:		name of sync_file.  Useful for debugging
  * @sync_file_list:	membership in global file list
  * @wq:			wait queue for fence signaling
@@ -33,7 +31,6 @@
  */
 struct sync_file {
 	struct file		*file;
-	struct kref		kref;
 	char			name[32];
 #ifdef CONFIG_DEBUG_FS
 	struct list_head	sync_file_list;
-- 
cgit v1.2.3


From 9ff88edc5e7bad08bdd79a20f14533a5cf44b865 Mon Sep 17 00:00:00 2001
From: Song Hongyan <hongyan.song@intel.com>
Date: Sun, 7 May 2017 18:24:24 +0800
Subject: iio: hid-sensor-rotation: Add relative orientation sensor hid support

Relative orientation(AG) sensor is a 6dof orientation sensor,
it depends on acceleration and gyroscope sensor data. It gives
a quaternion describing the orientation of the device relative
to an initial orientation. It is a standard HID sensor.

More information can be found in:
http://www.usb.org/developers/hidpage/HUTRR59_-_Usages_for_Wearables.pdf

Relative orientation(AG) sensor and dev rotation sensor have same
channels and share channel usage id. So the most of the code for
relative orientation sensor can be reused.

Signed-off-by: Song Hongyan <hongyan.song@intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@intel.com>
Reviewed-by: Xu Even <even.xu@intel.com>
Acked-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/orientation/hid-sensor-rotation.c | 28 +++++++++++++++++++--------
 include/linux/hid-sensor-ids.h                |  1 +
 2 files changed, 21 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iio/orientation/hid-sensor-rotation.c b/drivers/iio/orientation/hid-sensor-rotation.c
index a97e802ca523..4d953c26325f 100644
--- a/drivers/iio/orientation/hid-sensor-rotation.c
+++ b/drivers/iio/orientation/hid-sensor-rotation.c
@@ -218,7 +218,7 @@ static int dev_rot_parse_report(struct platform_device *pdev,
 static int hid_dev_rot_probe(struct platform_device *pdev)
 {
 	int ret;
-	static char *name = "dev_rotation";
+	static char *name;
 	struct iio_dev *indio_dev;
 	struct dev_rot_state *rot_state;
 	struct hid_sensor_hub_device *hsdev = pdev->dev.platform_data;
@@ -234,8 +234,18 @@ static int hid_dev_rot_probe(struct platform_device *pdev)
 	rot_state->common_attributes.hsdev = hsdev;
 	rot_state->common_attributes.pdev = pdev;
 
-	ret = hid_sensor_parse_common_attributes(hsdev,
-				HID_USAGE_SENSOR_DEVICE_ORIENTATION,
+	switch (hsdev->usage) {
+	case HID_USAGE_SENSOR_DEVICE_ORIENTATION:
+		name = "dev_rotation";
+		break;
+	case HID_USAGE_SENSOR_RELATIVE_ORIENTATION:
+		name = "relative_orientation";
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	ret = hid_sensor_parse_common_attributes(hsdev, hsdev->usage,
 				&rot_state->common_attributes);
 	if (ret) {
 		dev_err(&pdev->dev, "failed to setup common attributes\n");
@@ -252,8 +262,7 @@ static int hid_dev_rot_probe(struct platform_device *pdev)
 
 	ret = dev_rot_parse_report(pdev, hsdev,
 				   (struct iio_chan_spec *)indio_dev->channels,
-				   HID_USAGE_SENSOR_DEVICE_ORIENTATION,
-				   rot_state);
+					hsdev->usage, rot_state);
 	if (ret) {
 		dev_err(&pdev->dev, "failed to setup attributes\n");
 		return ret;
@@ -288,8 +297,7 @@ static int hid_dev_rot_probe(struct platform_device *pdev)
 	rot_state->callbacks.send_event = dev_rot_proc_event;
 	rot_state->callbacks.capture_sample = dev_rot_capture_sample;
 	rot_state->callbacks.pdev = pdev;
-	ret = sensor_hub_register_callback(hsdev,
-					HID_USAGE_SENSOR_DEVICE_ORIENTATION,
+	ret = sensor_hub_register_callback(hsdev, hsdev->usage,
 					&rot_state->callbacks);
 	if (ret) {
 		dev_err(&pdev->dev, "callback reg failed\n");
@@ -314,7 +322,7 @@ static int hid_dev_rot_remove(struct platform_device *pdev)
 	struct iio_dev *indio_dev = platform_get_drvdata(pdev);
 	struct dev_rot_state *rot_state = iio_priv(indio_dev);
 
-	sensor_hub_remove_callback(hsdev, HID_USAGE_SENSOR_DEVICE_ORIENTATION);
+	sensor_hub_remove_callback(hsdev, hsdev->usage);
 	iio_device_unregister(indio_dev);
 	hid_sensor_remove_trigger(&rot_state->common_attributes);
 	iio_triggered_buffer_cleanup(indio_dev);
@@ -327,6 +335,10 @@ static const struct platform_device_id hid_dev_rot_ids[] = {
 		/* Format: HID-SENSOR-usage_id_in_hex_lowercase */
 		.name = "HID-SENSOR-20008a",
 	},
+	{
+		/* Relative orientation(AG) sensor */
+		.name = "HID-SENSOR-20008e",
+	},
 	{ /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(platform, hid_dev_rot_ids);
diff --git a/include/linux/hid-sensor-ids.h b/include/linux/hid-sensor-ids.h
index 761f86242473..b5469e878e99 100644
--- a/include/linux/hid-sensor-ids.h
+++ b/include/linux/hid-sensor-ids.h
@@ -90,6 +90,7 @@
 #define HID_USAGE_SENSOR_ORIENT_TILT_Z				0x200481
 
 #define HID_USAGE_SENSOR_DEVICE_ORIENTATION			0x20008A
+#define HID_USAGE_SENSOR_RELATIVE_ORIENTATION			0x20008E
 #define HID_USAGE_SENSOR_ORIENT_ROTATION_MATRIX			0x200482
 #define HID_USAGE_SENSOR_ORIENT_QUATERNION			0x200483
 #define HID_USAGE_SENSOR_ORIENT_MAGN_FLUX			0x200484
-- 
cgit v1.2.3


From 00907c7a3282053dd4782e02c3101809608e7ea7 Mon Sep 17 00:00:00 2001
From: Song Hongyan <hongyan.song@intel.com>
Date: Sun, 7 May 2017 18:24:25 +0800
Subject: iio: hid-sensor-rotation: Add geomagnetic orientation sensor hid
 support.

Geomagnetic orientation(AM) sensor is one kind of orientation 6dof sensor.
It gives the device rotation in respect to the earth center and the
magnetic north. The sensor is implemented through use of an accelerometer
and magnetometer do not use gyroscope. It is a standard HID sensor.

More information can be found in:
http://www.usb.org/developers/hidpage/HUTRR59_-_Usages_for_Wearables.pdf

Geomagnetic orientation(AM) sensor and dev rotation sensor have same
channel and share channel usage id. So the most of the code for relative
orientation sensor can be reused.

Signed-off-by: Song Hongyan <hongyan.song@intel.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/orientation/hid-sensor-rotation.c | 7 +++++++
 include/linux/hid-sensor-ids.h                | 1 +
 2 files changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/iio/orientation/hid-sensor-rotation.c b/drivers/iio/orientation/hid-sensor-rotation.c
index 4d953c26325f..60d3517a78af 100644
--- a/drivers/iio/orientation/hid-sensor-rotation.c
+++ b/drivers/iio/orientation/hid-sensor-rotation.c
@@ -241,6 +241,9 @@ static int hid_dev_rot_probe(struct platform_device *pdev)
 	case HID_USAGE_SENSOR_RELATIVE_ORIENTATION:
 		name = "relative_orientation";
 		break;
+	case HID_USAGE_SENSOR_GEOMAGNETIC_ORIENTATION:
+		name = "geomagnetic_orientation";
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -339,6 +342,10 @@ static const struct platform_device_id hid_dev_rot_ids[] = {
 		/* Relative orientation(AG) sensor */
 		.name = "HID-SENSOR-20008e",
 	},
+	{
+		/* Geomagnetic orientation(AM) sensor */
+		.name = "HID-SENSOR-2000c1",
+	},
 	{ /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(platform, hid_dev_rot_ids);
diff --git a/include/linux/hid-sensor-ids.h b/include/linux/hid-sensor-ids.h
index b5469e878e99..5af62c7e49f3 100644
--- a/include/linux/hid-sensor-ids.h
+++ b/include/linux/hid-sensor-ids.h
@@ -91,6 +91,7 @@
 
 #define HID_USAGE_SENSOR_DEVICE_ORIENTATION			0x20008A
 #define HID_USAGE_SENSOR_RELATIVE_ORIENTATION			0x20008E
+#define HID_USAGE_SENSOR_GEOMAGNETIC_ORIENTATION		0x2000C1
 #define HID_USAGE_SENSOR_ORIENT_ROTATION_MATRIX			0x200482
 #define HID_USAGE_SENSOR_ORIENT_QUATERNION			0x200483
 #define HID_USAGE_SENSOR_ORIENT_MAGN_FLUX			0x200484
-- 
cgit v1.2.3


From 6fb34812c2a2a4cdcdad4452b9634892812fa97b Mon Sep 17 00:00:00 2001
From: Fabrice Gasnier <fabrice.gasnier@st.com>
Date: Tue, 2 May 2017 14:33:45 +0200
Subject: iio: stm32 trigger: Add support for TRGO2 triggers

Add support for TRGO2 trigger that can be found on STM32F7.
Add additional master modes supported by TRGO2.
Register additional "tim[1/8]_trgo2" triggers for timer1 & timer8.
Detect TRGO2 timer capability (master mode selection 2).

Signed-off-by: Fabrice Gasnier <fabrice.gasnier@st.com>
Acked-by: Benjamin Gaignard <benjamin.gaignard@linaro.org>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 .../ABI/testing/sysfs-bus-iio-timer-stm32          |  48 +++++++++
 drivers/iio/trigger/stm32-timer-trigger.c          | 113 ++++++++++++++++++---
 include/linux/iio/timer/stm32-timer-trigger.h      |   2 +
 include/linux/mfd/stm32-timers.h                   |   2 +
 4 files changed, 151 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-bus-iio-timer-stm32 b/Documentation/ABI/testing/sysfs-bus-iio-timer-stm32
index 230020e06677..deb015935683 100644
--- a/Documentation/ABI/testing/sysfs-bus-iio-timer-stm32
+++ b/Documentation/ABI/testing/sysfs-bus-iio-timer-stm32
@@ -16,6 +16,54 @@ Description:
 		- "OC2REF"    : OC2REF signal is used as trigger output.
 		- "OC3REF"    : OC3REF signal is used as trigger output.
 		- "OC4REF"    : OC4REF signal is used as trigger output.
+		Additional modes (on TRGO2 only):
+		- "OC5REF"    : OC5REF signal is used as trigger output.
+		- "OC6REF"    : OC6REF signal is used as trigger output.
+		- "compare_pulse_OC4REF":
+		  OC4REF rising or falling edges generate pulses.
+		- "compare_pulse_OC6REF":
+		  OC6REF rising or falling edges generate pulses.
+		- "compare_pulse_OC4REF_r_or_OC6REF_r":
+		  OC4REF or OC6REF rising edges generate pulses.
+		- "compare_pulse_OC4REF_r_or_OC6REF_f":
+		  OC4REF rising or OC6REF falling edges generate pulses.
+		- "compare_pulse_OC5REF_r_or_OC6REF_r":
+		  OC5REF or OC6REF rising edges generate pulses.
+		- "compare_pulse_OC5REF_r_or_OC6REF_f":
+		  OC5REF rising or OC6REF falling edges generate pulses.
+
+		+-----------+   +-------------+            +---------+
+		| Prescaler +-> | Counter     |        +-> | Master  | TRGO(2)
+		+-----------+   +--+--------+-+        |-> | Control +-->
+		                   |        |          ||  +---------+
+		                +--v--------+-+ OCxREF ||  +---------+
+		                | Chx compare +----------> | Output  | ChX
+		                +-----------+-+         |  | Control +-->
+		                      .     |           |  +---------+
+		                      .     |           |    .
+		                +-----------v-+ OC6REF  |    .
+		                | Ch6 compare +---------+>
+		                +-------------+
+
+		Example with: "compare_pulse_OC4REF_r_or_OC6REF_r":
+
+		                X
+		              X   X
+		            X .   . X
+		          X   .   .   X
+		        X     .   .     X
+		count X .     .   .     . X
+		        .     .   .     .
+		        .     .   .     .
+		        +---------------+
+		OC4REF  |     .   .     |
+		      +-+     .   .     +-+
+		        .     +---+     .
+		OC6REF  .     |   |     .
+		      +-------+   +-------+
+		        +-+   +-+
+		TRGO2   | |   | |
+		      +-+ +---+ +---------+
 
 What:		/sys/bus/iio/devices/triggerX/master_mode
 KernelVersion:	4.11
diff --git a/drivers/iio/trigger/stm32-timer-trigger.c b/drivers/iio/trigger/stm32-timer-trigger.c
index 25248d644e7c..0797f2fe584f 100644
--- a/drivers/iio/trigger/stm32-timer-trigger.c
+++ b/drivers/iio/trigger/stm32-timer-trigger.c
@@ -14,19 +14,19 @@
 #include <linux/module.h>
 #include <linux/platform_device.h>
 
-#define MAX_TRIGGERS 6
+#define MAX_TRIGGERS 7
 #define MAX_VALIDS 5
 
 /* List the triggers created by each timer */
 static const void *triggers_table[][MAX_TRIGGERS] = {
-	{ TIM1_TRGO, TIM1_CH1, TIM1_CH2, TIM1_CH3, TIM1_CH4,},
+	{ TIM1_TRGO, TIM1_TRGO2, TIM1_CH1, TIM1_CH2, TIM1_CH3, TIM1_CH4,},
 	{ TIM2_TRGO, TIM2_CH1, TIM2_CH2, TIM2_CH3, TIM2_CH4,},
 	{ TIM3_TRGO, TIM3_CH1, TIM3_CH2, TIM3_CH3, TIM3_CH4,},
 	{ TIM4_TRGO, TIM4_CH1, TIM4_CH2, TIM4_CH3, TIM4_CH4,},
 	{ TIM5_TRGO, TIM5_CH1, TIM5_CH2, TIM5_CH3, TIM5_CH4,},
 	{ TIM6_TRGO,},
 	{ TIM7_TRGO,},
-	{ TIM8_TRGO, TIM8_CH1, TIM8_CH2, TIM8_CH3, TIM8_CH4,},
+	{ TIM8_TRGO, TIM8_TRGO2, TIM8_CH1, TIM8_CH2, TIM8_CH3, TIM8_CH4,},
 	{ TIM9_TRGO, TIM9_CH1, TIM9_CH2,},
 	{ }, /* timer 10 */
 	{ }, /* timer 11 */
@@ -56,9 +56,16 @@ struct stm32_timer_trigger {
 	u32 max_arr;
 	const void *triggers;
 	const void *valids;
+	bool has_trgo2;
 };
 
+static bool stm32_timer_is_trgo2_name(const char *name)
+{
+	return !!strstr(name, "trgo2");
+}
+
 static int stm32_timer_start(struct stm32_timer_trigger *priv,
+			     struct iio_trigger *trig,
 			     unsigned int frequency)
 {
 	unsigned long long prd, div;
@@ -102,7 +109,12 @@ static int stm32_timer_start(struct stm32_timer_trigger *priv,
 	regmap_update_bits(priv->regmap, TIM_CR1, TIM_CR1_ARPE, TIM_CR1_ARPE);
 
 	/* Force master mode to update mode */
-	regmap_update_bits(priv->regmap, TIM_CR2, TIM_CR2_MMS, 0x20);
+	if (stm32_timer_is_trgo2_name(trig->name))
+		regmap_update_bits(priv->regmap, TIM_CR2, TIM_CR2_MMS2,
+				   0x2 << TIM_CR2_MMS2_SHIFT);
+	else
+		regmap_update_bits(priv->regmap, TIM_CR2, TIM_CR2_MMS,
+				   0x2 << TIM_CR2_MMS_SHIFT);
 
 	/* Make sure that registers are updated */
 	regmap_update_bits(priv->regmap, TIM_EGR, TIM_EGR_UG, TIM_EGR_UG);
@@ -150,7 +162,7 @@ static ssize_t stm32_tt_store_frequency(struct device *dev,
 	if (freq == 0) {
 		stm32_timer_stop(priv);
 	} else {
-		ret = stm32_timer_start(priv, freq);
+		ret = stm32_timer_start(priv, trig, freq);
 		if (ret)
 			return ret;
 	}
@@ -183,6 +195,9 @@ static IIO_DEV_ATTR_SAMP_FREQ(0660,
 			      stm32_tt_read_frequency,
 			      stm32_tt_store_frequency);
 
+#define MASTER_MODE_MAX		7
+#define MASTER_MODE2_MAX	15
+
 static char *master_mode_table[] = {
 	"reset",
 	"enable",
@@ -191,7 +206,16 @@ static char *master_mode_table[] = {
 	"OC1REF",
 	"OC2REF",
 	"OC3REF",
-	"OC4REF"
+	"OC4REF",
+	/* Master mode selection 2 only */
+	"OC5REF",
+	"OC6REF",
+	"compare_pulse_OC4REF",
+	"compare_pulse_OC6REF",
+	"compare_pulse_OC4REF_r_or_OC6REF_r",
+	"compare_pulse_OC4REF_r_or_OC6REF_f",
+	"compare_pulse_OC5REF_r_or_OC6REF_r",
+	"compare_pulse_OC5REF_r_or_OC6REF_f",
 };
 
 static ssize_t stm32_tt_show_master_mode(struct device *dev,
@@ -199,10 +223,15 @@ static ssize_t stm32_tt_show_master_mode(struct device *dev,
 					 char *buf)
 {
 	struct stm32_timer_trigger *priv = dev_get_drvdata(dev);
+	struct iio_trigger *trig = to_iio_trigger(dev);
 	u32 cr2;
 
 	regmap_read(priv->regmap, TIM_CR2, &cr2);
-	cr2 = (cr2 & TIM_CR2_MMS) >> TIM_CR2_MMS_SHIFT;
+
+	if (stm32_timer_is_trgo2_name(trig->name))
+		cr2 = (cr2 & TIM_CR2_MMS2) >> TIM_CR2_MMS2_SHIFT;
+	else
+		cr2 = (cr2 & TIM_CR2_MMS) >> TIM_CR2_MMS_SHIFT;
 
 	return snprintf(buf, PAGE_SIZE, "%s\n", master_mode_table[cr2]);
 }
@@ -212,13 +241,25 @@ static ssize_t stm32_tt_store_master_mode(struct device *dev,
 					  const char *buf, size_t len)
 {
 	struct stm32_timer_trigger *priv = dev_get_drvdata(dev);
+	struct iio_trigger *trig = to_iio_trigger(dev);
+	u32 mask, shift, master_mode_max;
 	int i;
 
-	for (i = 0; i < ARRAY_SIZE(master_mode_table); i++) {
+	if (stm32_timer_is_trgo2_name(trig->name)) {
+		mask = TIM_CR2_MMS2;
+		shift = TIM_CR2_MMS2_SHIFT;
+		master_mode_max = MASTER_MODE2_MAX;
+	} else {
+		mask = TIM_CR2_MMS;
+		shift = TIM_CR2_MMS_SHIFT;
+		master_mode_max = MASTER_MODE_MAX;
+	}
+
+	for (i = 0; i <= master_mode_max; i++) {
 		if (!strncmp(master_mode_table[i], buf,
 			     strlen(master_mode_table[i]))) {
-			regmap_update_bits(priv->regmap, TIM_CR2,
-					   TIM_CR2_MMS, i << TIM_CR2_MMS_SHIFT);
+			regmap_update_bits(priv->regmap, TIM_CR2, mask,
+					   i << shift);
 			/* Make sure that registers are updated */
 			regmap_update_bits(priv->regmap, TIM_EGR,
 					   TIM_EGR_UG, TIM_EGR_UG);
@@ -229,8 +270,31 @@ static ssize_t stm32_tt_store_master_mode(struct device *dev,
 	return -EINVAL;
 }
 
-static IIO_CONST_ATTR(master_mode_available,
-	"reset enable update compare_pulse OC1REF OC2REF OC3REF OC4REF");
+static ssize_t stm32_tt_show_master_mode_avail(struct device *dev,
+					       struct device_attribute *attr,
+					       char *buf)
+{
+	struct iio_trigger *trig = to_iio_trigger(dev);
+	unsigned int i, master_mode_max;
+	size_t len = 0;
+
+	if (stm32_timer_is_trgo2_name(trig->name))
+		master_mode_max = MASTER_MODE2_MAX;
+	else
+		master_mode_max = MASTER_MODE_MAX;
+
+	for (i = 0; i <= master_mode_max; i++)
+		len += scnprintf(buf + len, PAGE_SIZE - len,
+			"%s ", master_mode_table[i]);
+
+	/* replace trailing space by newline */
+	buf[len - 1] = '\n';
+
+	return len;
+}
+
+static IIO_DEVICE_ATTR(master_mode_available, 0444,
+		       stm32_tt_show_master_mode_avail, NULL, 0);
 
 static IIO_DEVICE_ATTR(master_mode, 0660,
 		       stm32_tt_show_master_mode,
@@ -240,7 +304,7 @@ static IIO_DEVICE_ATTR(master_mode, 0660,
 static struct attribute *stm32_trigger_attrs[] = {
 	&iio_dev_attr_sampling_frequency.dev_attr.attr,
 	&iio_dev_attr_master_mode.dev_attr.attr,
-	&iio_const_attr_master_mode_available.dev_attr.attr,
+	&iio_dev_attr_master_mode_available.dev_attr.attr,
 	NULL,
 };
 
@@ -264,6 +328,12 @@ static int stm32_setup_iio_triggers(struct stm32_timer_trigger *priv)
 
 	while (cur && *cur) {
 		struct iio_trigger *trig;
+		bool cur_is_trgo2 = stm32_timer_is_trgo2_name(*cur);
+
+		if (cur_is_trgo2 && !priv->has_trgo2) {
+			cur++;
+			continue;
+		}
 
 		trig = devm_iio_trigger_alloc(priv->dev, "%s", *cur);
 		if  (!trig)
@@ -277,7 +347,7 @@ static int stm32_setup_iio_triggers(struct stm32_timer_trigger *priv)
 		 * should only be available on trgo trigger which
 		 * is always the first in the list.
 		 */
-		if (cur == priv->triggers)
+		if (cur == priv->triggers || cur_is_trgo2)
 			trig->dev.groups = stm32_trigger_attr_groups;
 
 		iio_trigger_set_drvdata(trig, priv);
@@ -584,6 +654,20 @@ bool is_stm32_timer_trigger(struct iio_trigger *trig)
 }
 EXPORT_SYMBOL(is_stm32_timer_trigger);
 
+static void stm32_timer_detect_trgo2(struct stm32_timer_trigger *priv)
+{
+	u32 val;
+
+	/*
+	 * Master mode selection 2 bits can only be written and read back when
+	 * timer supports it.
+	 */
+	regmap_update_bits(priv->regmap, TIM_CR2, TIM_CR2_MMS2, TIM_CR2_MMS2);
+	regmap_read(priv->regmap, TIM_CR2, &val);
+	regmap_update_bits(priv->regmap, TIM_CR2, TIM_CR2_MMS2, 0);
+	priv->has_trgo2 = !!val;
+}
+
 static int stm32_timer_trigger_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
@@ -614,6 +698,7 @@ static int stm32_timer_trigger_probe(struct platform_device *pdev)
 	priv->max_arr = ddata->max_arr;
 	priv->triggers = triggers_table[index];
 	priv->valids = valids_table[index];
+	stm32_timer_detect_trgo2(priv);
 
 	ret = stm32_setup_iio_triggers(priv);
 	if (ret)
diff --git a/include/linux/iio/timer/stm32-timer-trigger.h b/include/linux/iio/timer/stm32-timer-trigger.h
index 55535aef2e6c..fa7d786ed99e 100644
--- a/include/linux/iio/timer/stm32-timer-trigger.h
+++ b/include/linux/iio/timer/stm32-timer-trigger.h
@@ -10,6 +10,7 @@
 #define _STM32_TIMER_TRIGGER_H_
 
 #define TIM1_TRGO	"tim1_trgo"
+#define TIM1_TRGO2	"tim1_trgo2"
 #define TIM1_CH1	"tim1_ch1"
 #define TIM1_CH2	"tim1_ch2"
 #define TIM1_CH3	"tim1_ch3"
@@ -44,6 +45,7 @@
 #define TIM7_TRGO	"tim7_trgo"
 
 #define TIM8_TRGO	"tim8_trgo"
+#define TIM8_TRGO2	"tim8_trgo2"
 #define TIM8_CH1	"tim8_ch1"
 #define TIM8_CH2	"tim8_ch2"
 #define TIM8_CH3	"tim8_ch3"
diff --git a/include/linux/mfd/stm32-timers.h b/include/linux/mfd/stm32-timers.h
index 4a0abbc10ef6..ce7346e7f77a 100644
--- a/include/linux/mfd/stm32-timers.h
+++ b/include/linux/mfd/stm32-timers.h
@@ -34,6 +34,7 @@
 #define TIM_CR1_DIR	BIT(4)  /* Counter Direction	   */
 #define TIM_CR1_ARPE	BIT(7)	/* Auto-reload Preload Ena */
 #define TIM_CR2_MMS	(BIT(4) | BIT(5) | BIT(6)) /* Master mode selection */
+#define TIM_CR2_MMS2	GENMASK(23, 20) /* Master mode selection 2 */
 #define TIM_SMCR_SMS	(BIT(0) | BIT(1) | BIT(2)) /* Slave mode selection */
 #define TIM_SMCR_TS	(BIT(4) | BIT(5) | BIT(6)) /* Trigger selection */
 #define TIM_DIER_UIE	BIT(0)	/* Update interrupt	   */
@@ -60,6 +61,7 @@
 
 #define MAX_TIM_PSC		0xFFFF
 #define TIM_CR2_MMS_SHIFT	4
+#define TIM_CR2_MMS2_SHIFT	20
 #define TIM_SMCR_TS_SHIFT	4
 #define TIM_BDTR_BKF_MASK	0xF
 #define TIM_BDTR_BKF_SHIFT	16
-- 
cgit v1.2.3


From 73c73463189974ace90a05397197339071c6ecc7 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Wed, 12 Apr 2017 20:17:45 -0700
Subject: video: ARM CLCD: Move registers to a separate header.

We'd like to reuse these register definitions for the DRM CLCD driver,
but there's a bunch of fbdev-specific code in the current header.

v2: Add #ifndef guard.

Signed-off-by: Eric Anholt <eric@anholt.net>
Link: http://patchwork.freedesktop.org/patch/msgid/20170413031746.12921-1-eric@anholt.net
---
 include/linux/amba/clcd-regs.h | 81 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/amba/clcd.h      | 68 +----------------------------------
 2 files changed, 82 insertions(+), 67 deletions(-)
 create mode 100644 include/linux/amba/clcd-regs.h

(limited to 'include/linux')

diff --git a/include/linux/amba/clcd-regs.h b/include/linux/amba/clcd-regs.h
new file mode 100644
index 000000000000..69c0e2143003
--- /dev/null
+++ b/include/linux/amba/clcd-regs.h
@@ -0,0 +1,81 @@
+/*
+ * David A Rusling
+ *
+ * Copyright (C) 2001 ARM Limited
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of this archive
+ * for more details.
+ */
+
+#ifndef AMBA_CLCD_REGS_H
+#define AMBA_CLCD_REGS_H
+
+/*
+ * CLCD Controller Internal Register addresses
+ */
+#define CLCD_TIM0		0x00000000
+#define CLCD_TIM1 		0x00000004
+#define CLCD_TIM2 		0x00000008
+#define CLCD_TIM3 		0x0000000c
+#define CLCD_UBAS 		0x00000010
+#define CLCD_LBAS 		0x00000014
+
+#define CLCD_PL110_IENB		0x00000018
+#define CLCD_PL110_CNTL		0x0000001c
+#define CLCD_PL110_STAT		0x00000020
+#define CLCD_PL110_INTR 	0x00000024
+#define CLCD_PL110_UCUR		0x00000028
+#define CLCD_PL110_LCUR		0x0000002C
+
+#define CLCD_PL111_CNTL		0x00000018
+#define CLCD_PL111_IENB		0x0000001c
+#define CLCD_PL111_RIS		0x00000020
+#define CLCD_PL111_MIS		0x00000024
+#define CLCD_PL111_ICR		0x00000028
+#define CLCD_PL111_UCUR		0x0000002c
+#define CLCD_PL111_LCUR		0x00000030
+
+#define CLCD_PALL 		0x00000200
+#define CLCD_PALETTE		0x00000200
+
+#define TIM2_CLKSEL		(1 << 5)
+#define TIM2_IVS		(1 << 11)
+#define TIM2_IHS		(1 << 12)
+#define TIM2_IPC		(1 << 13)
+#define TIM2_IOE		(1 << 14)
+#define TIM2_BCD		(1 << 26)
+
+#define CNTL_LCDEN		(1 << 0)
+#define CNTL_LCDBPP1		(0 << 1)
+#define CNTL_LCDBPP2		(1 << 1)
+#define CNTL_LCDBPP4		(2 << 1)
+#define CNTL_LCDBPP8		(3 << 1)
+#define CNTL_LCDBPP16		(4 << 1)
+#define CNTL_LCDBPP16_565	(6 << 1)
+#define CNTL_LCDBPP16_444	(7 << 1)
+#define CNTL_LCDBPP24		(5 << 1)
+#define CNTL_LCDBW		(1 << 4)
+#define CNTL_LCDTFT		(1 << 5)
+#define CNTL_LCDMONO8		(1 << 6)
+#define CNTL_LCDDUAL		(1 << 7)
+#define CNTL_BGR		(1 << 8)
+#define CNTL_BEBO		(1 << 9)
+#define CNTL_BEPO		(1 << 10)
+#define CNTL_LCDPWR		(1 << 11)
+#define CNTL_LCDVCOMP(x)	((x) << 12)
+#define CNTL_LDMAFIFOTIME	(1 << 15)
+#define CNTL_WATERMARK		(1 << 16)
+
+/* ST Microelectronics variant bits */
+#define CNTL_ST_1XBPP_444	0x0
+#define CNTL_ST_1XBPP_5551	(1 << 17)
+#define CNTL_ST_1XBPP_565	(1 << 18)
+#define CNTL_ST_CDWID_12	0x0
+#define CNTL_ST_CDWID_16	(1 << 19)
+#define CNTL_ST_CDWID_18	(1 << 20)
+#define CNTL_ST_CDWID_24	((1 << 19)|(1 << 20))
+#define CNTL_ST_CEAEN		(1 << 21)
+#define CNTL_ST_LCDBPP24_PACKED	(6 << 1)
+
+#endif /* AMBA_CLCD_REGS_H */
diff --git a/include/linux/amba/clcd.h b/include/linux/amba/clcd.h
index 1035879b322c..d0c3be77c18e 100644
--- a/include/linux/amba/clcd.h
+++ b/include/linux/amba/clcd.h
@@ -10,73 +10,7 @@
  * for more details.
  */
 #include <linux/fb.h>
-
-/*
- * CLCD Controller Internal Register addresses
- */
-#define CLCD_TIM0		0x00000000
-#define CLCD_TIM1 		0x00000004
-#define CLCD_TIM2 		0x00000008
-#define CLCD_TIM3 		0x0000000c
-#define CLCD_UBAS 		0x00000010
-#define CLCD_LBAS 		0x00000014
-
-#define CLCD_PL110_IENB		0x00000018
-#define CLCD_PL110_CNTL		0x0000001c
-#define CLCD_PL110_STAT		0x00000020
-#define CLCD_PL110_INTR 	0x00000024
-#define CLCD_PL110_UCUR		0x00000028
-#define CLCD_PL110_LCUR		0x0000002C
-
-#define CLCD_PL111_CNTL		0x00000018
-#define CLCD_PL111_IENB		0x0000001c
-#define CLCD_PL111_RIS		0x00000020
-#define CLCD_PL111_MIS		0x00000024
-#define CLCD_PL111_ICR		0x00000028
-#define CLCD_PL111_UCUR		0x0000002c
-#define CLCD_PL111_LCUR		0x00000030
-
-#define CLCD_PALL 		0x00000200
-#define CLCD_PALETTE		0x00000200
-
-#define TIM2_CLKSEL		(1 << 5)
-#define TIM2_IVS		(1 << 11)
-#define TIM2_IHS		(1 << 12)
-#define TIM2_IPC		(1 << 13)
-#define TIM2_IOE		(1 << 14)
-#define TIM2_BCD		(1 << 26)
-
-#define CNTL_LCDEN		(1 << 0)
-#define CNTL_LCDBPP1		(0 << 1)
-#define CNTL_LCDBPP2		(1 << 1)
-#define CNTL_LCDBPP4		(2 << 1)
-#define CNTL_LCDBPP8		(3 << 1)
-#define CNTL_LCDBPP16		(4 << 1)
-#define CNTL_LCDBPP16_565	(6 << 1)
-#define CNTL_LCDBPP16_444	(7 << 1)
-#define CNTL_LCDBPP24		(5 << 1)
-#define CNTL_LCDBW		(1 << 4)
-#define CNTL_LCDTFT		(1 << 5)
-#define CNTL_LCDMONO8		(1 << 6)
-#define CNTL_LCDDUAL		(1 << 7)
-#define CNTL_BGR		(1 << 8)
-#define CNTL_BEBO		(1 << 9)
-#define CNTL_BEPO		(1 << 10)
-#define CNTL_LCDPWR		(1 << 11)
-#define CNTL_LCDVCOMP(x)	((x) << 12)
-#define CNTL_LDMAFIFOTIME	(1 << 15)
-#define CNTL_WATERMARK		(1 << 16)
-
-/* ST Microelectronics variant bits */
-#define CNTL_ST_1XBPP_444	0x0
-#define CNTL_ST_1XBPP_5551	(1 << 17)
-#define CNTL_ST_1XBPP_565	(1 << 18)
-#define CNTL_ST_CDWID_12	0x0
-#define CNTL_ST_CDWID_16	(1 << 19)
-#define CNTL_ST_CDWID_18	(1 << 20)
-#define CNTL_ST_CDWID_24	((1 << 19)|(1 << 20))
-#define CNTL_ST_CEAEN		(1 << 21)
-#define CNTL_ST_LCDBPP24_PACKED	(6 << 1)
+#include <linux/amba/clcd-regs.h>
 
 enum {
 	/* individual formats */
-- 
cgit v1.2.3


From 61df56bef97e1708bfbc006b307b00834ad61fe8 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 10 May 2017 17:12:52 +0200
Subject: HID: Add mapping for Microsoft Win8 Wireless Radio Controls
 extensions

Microsoft has defined some extra HUT codes for the Generic Desktop Page
for Wireless Radio controls, see:

https://docs.microsoft.com/en-us/windows-hardware/drivers/hid/airplane-mode-radio-management
https://web.archive.org/web/20170509144631/https://docs.microsoft.com/en-us/windows-hardware/drivers/hid/airplane-mode-radio-management

I've 3 2-in-1 keyboard docks: Dell Venue Pro 11 keyboard dock,
HP pavilion x2 keyboard dock and a PEAQ C1010 keyboard dock which have
a wireless radio toggle hotkey, which uses the 0x000100c6 HUT code
defined in these extensions.

This commit adds a mapping for this key, this makes the rfkill toggle
hotkey work on the Dell Venue Pro 11 and HP Pavilion X2 keyboards,
the PEAQ C1010 keyboard does generate events for the 0x000100c6 HUT
code when pressed, but the reported value is always 0.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-input.c |  9 +++++++++
 include/linux/hid.h     | 10 ++++++++++
 2 files changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index a1ebdd7d4d4d..412040b11268 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -656,6 +656,15 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel
 		case HID_GD_START:	map_key_clear(BTN_START);	break;
 		case HID_GD_SELECT:	map_key_clear(BTN_SELECT);	break;
 
+		case HID_GD_RFKILL_BTN:
+			/* MS wireless radio ctl extension, also check CA */
+			if (field->application == 0x0001000c) {
+				map_key_clear(KEY_RFKILL);
+				/* We need to simulate the btn release */
+				field->flags |= HID_MAIN_ITEM_RELATIVE;
+				break;
+			}
+
 		default: goto unknown;
 		}
 
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 5be325d890d9..0b29466bbc21 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -182,6 +182,12 @@ struct hid_item {
 #define HID_GD_KEYBOARD		0x00010006
 #define HID_GD_KEYPAD		0x00010007
 #define HID_GD_MULTIAXIS	0x00010008
+/*
+ * Microsoft Win8 Wireless Radio Controls extensions CA, see (checked 09052017):
+ * https://docs.microsoft.com/en-us/windows-hardware/drivers/hid/airplane-mode-radio-management
+ * https://web.archive.org/web/20170509144631/https://docs.microsoft.com/en-us/windows-hardware/drivers/hid/airplane-mode-radio-management
+ */
+#define HID_GD_WIRELESS_RADIO_CTLS	0x0001000c
 #define HID_GD_X		0x00010030
 #define HID_GD_Y		0x00010031
 #define HID_GD_Z		0x00010032
@@ -210,6 +216,10 @@ struct hid_item {
 #define HID_GD_DOWN		0x00010091
 #define HID_GD_RIGHT		0x00010092
 #define HID_GD_LEFT		0x00010093
+/* Microsoft Win8 Wireless Radio Controls CA usage codes */
+#define HID_GD_RFKILL_BTN	0x000100c6
+#define HID_GD_RFKILL_LED	0x000100c7
+#define HID_GD_RFKILL_SWITCH	0x000100c8
 
 #define HID_DC_BATTERYSTRENGTH	0x00060020
 
-- 
cgit v1.2.3


From 1b9a07ee25049724ab7f7c32282fbf5452530cea Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Wed, 10 May 2017 21:32:18 +0300
Subject: {net, IB}/mlx5: Replace mlx5_vzalloc with kvzalloc

Commit a7c3e901a46f ("mm: introduce kv[mz]alloc helpers") added
proper implementation of mlx5_vzalloc function to the MM core.

This made the mlx5_vzalloc function useless, so let's remove it.

Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/infiniband/hw/mlx5/cq.c                    |  6 ++--
 drivers/infiniband/hw/mlx5/mad.c                   |  4 +--
 drivers/infiniband/hw/mlx5/main.c                  |  6 ++--
 drivers/infiniband/hw/mlx5/mr.c                    |  2 +-
 drivers/infiniband/hw/mlx5/qp.c                    | 32 ++++++++++----------
 drivers/infiniband/hw/mlx5/srq.c                   |  4 +--
 drivers/net/ethernet/mellanox/mlx5/core/debugfs.c  |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c  |  8 ++---
 .../net/ethernet/mellanox/mlx5/core/en_common.c    |  4 +--
 .../net/ethernet/mellanox/mlx5/core/en_ethtool.c   |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_fs.c    | 24 ++++++---------
 .../ethernet/mellanox/mlx5/core/en_fs_ethtool.c    |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  | 33 ++++++++++-----------
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c    |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/eq.c       |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c  | 24 ++++++---------
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 13 ++++-----
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c   |  6 ++--
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c  | 10 +++----
 drivers/net/ethernet/mellanox/mlx5/core/ipoib.c    |  2 +-
 .../net/ethernet/mellanox/mlx5/core/pagealloc.c    |  4 +--
 drivers/net/ethernet/mellanox/mlx5/core/port.c     |  6 ++--
 drivers/net/ethernet/mellanox/mlx5/core/qp.c       |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/srq.c      | 14 ++++-----
 drivers/net/ethernet/mellanox/mlx5/core/transobj.c |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/vport.c    | 34 +++++++++-------------
 include/linux/mlx5/driver.h                        |  5 ----
 27 files changed, 111 insertions(+), 144 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 94c049b62c2f..a384d72ea3cd 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -788,7 +788,7 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
 
 	*inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
 		 MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * ncont;
-	*cqb = mlx5_vzalloc(*inlen);
+	*cqb = kvzalloc(*inlen, GFP_KERNEL);
 	if (!*cqb) {
 		err = -ENOMEM;
 		goto err_db;
@@ -884,7 +884,7 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
 
 	*inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
 		 MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * cq->buf.buf.npages;
-	*cqb = mlx5_vzalloc(*inlen);
+	*cqb = kvzalloc(*inlen, GFP_KERNEL);
 	if (!*cqb) {
 		err = -ENOMEM;
 		goto err_buf;
@@ -1314,7 +1314,7 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
 	inlen = MLX5_ST_SZ_BYTES(modify_cq_in) +
 		MLX5_FLD_SZ_BYTES(modify_cq_in, pas[0]) * npas;
 
-	in = mlx5_vzalloc(inlen);
+	in = kvzalloc(inlen, GFP_KERNEL);
 	if (!in) {
 		err = -ENOMEM;
 		goto ex_resize;
diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c
index f1b56de64871..95db929bdc34 100644
--- a/drivers/infiniband/hw/mlx5/mad.c
+++ b/drivers/infiniband/hw/mlx5/mad.c
@@ -218,7 +218,7 @@ static int process_pma_cmd(struct ib_device *ibdev, u8 port_num,
 			(struct ib_pma_portcounters_ext *)(out_mad->data + 40);
 		int sz = MLX5_ST_SZ_BYTES(query_vport_counter_out);
 
-		out_cnt = mlx5_vzalloc(sz);
+		out_cnt = kvzalloc(sz, GFP_KERNEL);
 		if (!out_cnt)
 			return IB_MAD_RESULT_FAILURE;
 
@@ -231,7 +231,7 @@ static int process_pma_cmd(struct ib_device *ibdev, u8 port_num,
 			(struct ib_pma_portcounters *)(out_mad->data + 40);
 		int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
 
-		out_cnt = mlx5_vzalloc(sz);
+		out_cnt = kvzalloc(sz, GFP_KERNEL);
 		if (!out_cnt)
 			return IB_MAD_RESULT_FAILURE;
 
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index d45772da0963..b6991204e5df 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -2263,7 +2263,7 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
 	if (!is_valid_attr(dev->mdev, flow_attr))
 		return ERR_PTR(-EINVAL);
 
-	spec = mlx5_vzalloc(sizeof(*spec));
+	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
 	handler = kzalloc(sizeof(*handler), GFP_KERNEL);
 	if (!handler || !spec) {
 		err = -ENOMEM;
@@ -3456,7 +3456,7 @@ static int mlx5_ib_query_q_counters(struct mlx5_ib_dev *dev,
 	__be32 val;
 	int ret, i;
 
-	out = mlx5_vzalloc(outlen);
+	out = kvzalloc(outlen, GFP_KERNEL);
 	if (!out)
 		return -ENOMEM;
 
@@ -3485,7 +3485,7 @@ static int mlx5_ib_query_cong_counters(struct mlx5_ib_dev *dev,
 	int ret, i;
 	int offset = port->cnts.num_q_counters;
 
-	out = mlx5_vzalloc(outlen);
+	out = kvzalloc(outlen, GFP_KERNEL);
 	if (!out)
 		return -ENOMEM;
 
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 366433f71b58..763bb5b36144 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1110,7 +1110,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
 
 	inlen = MLX5_ST_SZ_BYTES(create_mkey_in) +
 		sizeof(*pas) * ((npages + 1) / 2) * 2;
-	in = mlx5_vzalloc(inlen);
+	in = kvzalloc(inlen, GFP_KERNEL);
 	if (!in) {
 		err = -ENOMEM;
 		goto err_1;
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 93959e1e43a3..d17aad0f54c0 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -823,7 +823,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 
 	*inlen = MLX5_ST_SZ_BYTES(create_qp_in) +
 		 MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * ncont;
-	*in = mlx5_vzalloc(*inlen);
+	*in = kvzalloc(*inlen, GFP_KERNEL);
 	if (!*in) {
 		err = -ENOMEM;
 		goto err_umem;
@@ -931,7 +931,7 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
 	qp->sq.qend = mlx5_get_send_wqe(qp, qp->sq.wqe_cnt);
 	*inlen = MLX5_ST_SZ_BYTES(create_qp_in) +
 		 MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * qp->buf.npages;
-	*in = mlx5_vzalloc(*inlen);
+	*in = kvzalloc(*inlen, GFP_KERNEL);
 	if (!*in) {
 		err = -ENOMEM;
 		goto err_buf;
@@ -1060,7 +1060,7 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
 		return err;
 
 	inlen = MLX5_ST_SZ_BYTES(create_sq_in) + sizeof(u64) * ncont;
-	in = mlx5_vzalloc(inlen);
+	in = kvzalloc(inlen, GFP_KERNEL);
 	if (!in) {
 		err = -ENOMEM;
 		goto err_umem;
@@ -1140,7 +1140,7 @@ static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
 	u32 rq_pas_size = get_rq_pas_size(qpc);
 
 	inlen = MLX5_ST_SZ_BYTES(create_rq_in) + rq_pas_size;
-	in = mlx5_vzalloc(inlen);
+	in = kvzalloc(inlen, GFP_KERNEL);
 	if (!in)
 		return -ENOMEM;
 
@@ -1193,7 +1193,7 @@ static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
 	int err;
 
 	inlen = MLX5_ST_SZ_BYTES(create_tir_in);
-	in = mlx5_vzalloc(inlen);
+	in = kvzalloc(inlen, GFP_KERNEL);
 	if (!in)
 		return -ENOMEM;
 
@@ -1372,7 +1372,7 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
 	}
 
 	inlen = MLX5_ST_SZ_BYTES(create_tir_in);
-	in = mlx5_vzalloc(inlen);
+	in = kvzalloc(inlen, GFP_KERNEL);
 	if (!in)
 		return -ENOMEM;
 
@@ -1633,7 +1633,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 		if (err)
 			return err;
 	} else {
-		in = mlx5_vzalloc(inlen);
+		in = kvzalloc(inlen, GFP_KERNEL);
 		if (!in)
 			return -ENOMEM;
 
@@ -2164,7 +2164,7 @@ static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev,
 	int err;
 
 	inlen = MLX5_ST_SZ_BYTES(modify_tis_in);
-	in = mlx5_vzalloc(inlen);
+	in = kvzalloc(inlen, GFP_KERNEL);
 	if (!in)
 		return -ENOMEM;
 
@@ -2189,7 +2189,7 @@ static int modify_raw_packet_tx_affinity(struct mlx5_core_dev *dev,
 	int err;
 
 	inlen = MLX5_ST_SZ_BYTES(modify_tis_in);
-	in = mlx5_vzalloc(inlen);
+	in = kvzalloc(inlen, GFP_KERNEL);
 	if (!in)
 		return -ENOMEM;
 
@@ -2434,7 +2434,7 @@ static int modify_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
 	int err;
 
 	inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
-	in = mlx5_vzalloc(inlen);
+	in = kvzalloc(inlen, GFP_KERNEL);
 	if (!in)
 		return -ENOMEM;
 
@@ -2479,7 +2479,7 @@ static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev,
 	int err;
 
 	inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
-	in = mlx5_vzalloc(inlen);
+	in = kvzalloc(inlen, GFP_KERNEL);
 	if (!in)
 		return -ENOMEM;
 
@@ -4294,7 +4294,7 @@ static int query_raw_packet_qp_sq_state(struct mlx5_ib_dev *dev,
 	int err;
 
 	inlen = MLX5_ST_SZ_BYTES(query_sq_out);
-	out = mlx5_vzalloc(inlen);
+	out = kvzalloc(inlen, GFP_KERNEL);
 	if (!out)
 		return -ENOMEM;
 
@@ -4321,7 +4321,7 @@ static int query_raw_packet_qp_rq_state(struct mlx5_ib_dev *dev,
 	int err;
 
 	inlen = MLX5_ST_SZ_BYTES(query_rq_out);
-	out = mlx5_vzalloc(inlen);
+	out = kvzalloc(inlen, GFP_KERNEL);
 	if (!out)
 		return -ENOMEM;
 
@@ -4625,7 +4625,7 @@ static int  create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd,
 	dev = to_mdev(pd->device);
 
 	inlen = MLX5_ST_SZ_BYTES(create_rq_in) + sizeof(u64) * rwq->rq_num_pas;
-	in = mlx5_vzalloc(inlen);
+	in = kvzalloc(inlen, GFP_KERNEL);
 	if (!in)
 		return -ENOMEM;
 
@@ -4855,7 +4855,7 @@ struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
 		return ERR_PTR(-ENOMEM);
 
 	inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
-	in = mlx5_vzalloc(inlen);
+	in = kvzalloc(inlen, GFP_KERNEL);
 	if (!in) {
 		err = -ENOMEM;
 		goto err;
@@ -4934,7 +4934,7 @@ int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
 		return -EOPNOTSUPP;
 
 	inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
-	in = mlx5_vzalloc(inlen);
+	in = kvzalloc(inlen, GFP_KERNEL);
 	if (!in)
 		return -ENOMEM;
 
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index 7cb145f9a6db..43707b101f47 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -127,7 +127,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
 		goto err_umem;
 	}
 
-	in->pas = mlx5_vzalloc(sizeof(*in->pas) * ncont);
+	in->pas = kvzalloc(sizeof(*in->pas) * ncont, GFP_KERNEL);
 	if (!in->pas) {
 		err = -ENOMEM;
 		goto err_umem;
@@ -189,7 +189,7 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
 	}
 
 	mlx5_ib_dbg(dev, "srq->buf.page_shift = %d\n", srq->buf.page_shift);
-	in->pas = mlx5_vzalloc(sizeof(*in->pas) * srq->buf.npages);
+	in->pas = kvzalloc(sizeof(*in->pas) * srq->buf.npages, GFP_KERNEL);
 	if (!in->pas) {
 		err = -ENOMEM;
 		goto err_buf;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
index e94a9532e218..de40b6cfee95 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
@@ -405,7 +405,7 @@ static u64 cq_read_field(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
 	u32 *out;
 	int err;
 
-	out = mlx5_vzalloc(outlen);
+	out = kvzalloc(outlen, GFP_KERNEL);
 	if (!out)
 		return param;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
index c8a005326e30..f4017c06ddd2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
@@ -180,9 +180,8 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv,
 	struct mlx5_flow_spec *spec;
 	int err = 0;
 
-	spec = mlx5_vzalloc(sizeof(*spec));
+	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
 	if (!spec) {
-		netdev_err(priv->netdev, "%s: alloc failed\n", __func__);
 		err = -ENOMEM;
 		goto out;
 	}
@@ -237,7 +236,7 @@ static int arfs_create_groups(struct mlx5e_flow_table *ft,
 
 	ft->g = kcalloc(MLX5E_ARFS_NUM_GROUPS,
 			sizeof(*ft->g), GFP_KERNEL);
-	in = mlx5_vzalloc(inlen);
+	in = kvzalloc(inlen, GFP_KERNEL);
 	if  (!in || !ft->g) {
 		kvfree(ft->g);
 		kvfree(in);
@@ -481,9 +480,8 @@ static struct mlx5_flow_handle *arfs_add_rule(struct mlx5e_priv *priv,
 	struct mlx5_flow_table *ft;
 	int err = 0;
 
-	spec = mlx5_vzalloc(sizeof(*spec));
+	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
 	if (!spec) {
-		netdev_err(priv->netdev, "%s: alloc failed\n", __func__);
 		err = -ENOMEM;
 		goto out;
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
index f1f17f7a3cd0..46e56ec4c26f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
@@ -65,7 +65,7 @@ static int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn,
 	u32 *in;
 	int err;
 
-	in = mlx5_vzalloc(inlen);
+	in = kvzalloc(inlen, GFP_KERNEL);
 	if (!in)
 		return -ENOMEM;
 
@@ -147,7 +147,7 @@ int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb)
 
 
 	inlen = MLX5_ST_SZ_BYTES(modify_tir_in);
-	in = mlx5_vzalloc(inlen);
+	in = kvzalloc(inlen, GFP_KERNEL);
 	if (!in)
 		goto out;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index ce7b09d72ff6..e0dd1048c966 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -1045,7 +1045,7 @@ static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir,
 	    (hfunc != ETH_RSS_HASH_TOP))
 		return -EINVAL;
 
-	in = mlx5_vzalloc(inlen);
+	in = kvzalloc(inlen, GFP_KERNEL);
 	if (!in)
 		return -ENOMEM;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
index 576d6787b484..936fc6d96c18 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
@@ -218,11 +218,9 @@ static int mlx5e_add_vlan_rule(struct mlx5e_priv *priv,
 	struct mlx5_flow_spec *spec;
 	int err = 0;
 
-	spec = mlx5_vzalloc(sizeof(*spec));
-	if (!spec) {
-		netdev_err(priv->netdev, "%s: alloc failed\n", __func__);
+	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+	if (!spec)
 		return -ENOMEM;
-	}
 
 	if (rule_type == MLX5E_VLAN_RULE_TYPE_MATCH_VID)
 		mlx5e_vport_context_update_vlans(priv);
@@ -660,11 +658,9 @@ mlx5e_generate_ttc_rule(struct mlx5e_priv *priv,
 	struct mlx5_flow_spec *spec;
 	int err = 0;
 
-	spec = mlx5_vzalloc(sizeof(*spec));
-	if (!spec) {
-		netdev_err(priv->netdev, "%s: alloc failed\n", __func__);
+	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+	if (!spec)
 		return ERR_PTR(-ENOMEM);
-	}
 
 	if (proto) {
 		spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
@@ -742,7 +738,7 @@ static int mlx5e_create_ttc_table_groups(struct mlx5e_ttc_table *ttc)
 			sizeof(*ft->g), GFP_KERNEL);
 	if (!ft->g)
 		return -ENOMEM;
-	in = mlx5_vzalloc(inlen);
+	in = kvzalloc(inlen, GFP_KERNEL);
 	if (!in) {
 		kfree(ft->g);
 		return -ENOMEM;
@@ -853,11 +849,9 @@ static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv,
 	u8 *mc_dmac;
 	u8 *mv_dmac;
 
-	spec = mlx5_vzalloc(sizeof(*spec));
-	if (!spec) {
-		netdev_err(priv->netdev, "%s: alloc failed\n", __func__);
+	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+	if (!spec)
 		return -ENOMEM;
-	}
 
 	mc_dmac = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
 			       outer_headers.dmac_47_16);
@@ -917,7 +911,7 @@ static int mlx5e_create_l2_table_groups(struct mlx5e_l2_table *l2_table)
 	ft->g = kcalloc(MLX5E_NUM_L2_GROUPS, sizeof(*ft->g), GFP_KERNEL);
 	if (!ft->g)
 		return -ENOMEM;
-	in = mlx5_vzalloc(inlen);
+	in = kvzalloc(inlen, GFP_KERNEL);
 	if (!in) {
 		kfree(ft->g);
 		return -ENOMEM;
@@ -1072,7 +1066,7 @@ static int mlx5e_create_vlan_table_groups(struct mlx5e_flow_table *ft)
 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
 	int err;
 
-	in = mlx5_vzalloc(inlen);
+	in = kvzalloc(inlen, GFP_KERNEL);
 	if (!in)
 		return -ENOMEM;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
index 85bf4a389295..bdd82c9b3992 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
@@ -296,7 +296,7 @@ add_ethtool_flow_rule(struct mlx5e_priv *priv,
 	struct mlx5_flow_handle *rule;
 	int err = 0;
 
-	spec = mlx5_vzalloc(sizeof(*spec));
+	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
 	if (!spec)
 		return ERR_PTR(-ENOMEM);
 	err = set_flow_attrs(spec->match_criteria, spec->match_value,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index a61b71b6fff3..edc485e489cc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -252,9 +252,9 @@ static void mlx5e_update_pport_counters(struct mlx5e_priv *priv)
 	void *out;
 	u32 *in;
 
-	in = mlx5_vzalloc(sz);
+	in = kvzalloc(sz, GFP_KERNEL);
 	if (!in)
-		goto free_out;
+		return;
 
 	MLX5_SET(ppcnt_reg, in, local_port, 1);
 
@@ -288,7 +288,6 @@ static void mlx5e_update_pport_counters(struct mlx5e_priv *priv)
 				     MLX5_REG_PPCNT, 0, 0);
 	}
 
-free_out:
 	kvfree(in);
 }
 
@@ -314,7 +313,7 @@ static void mlx5e_update_pcie_counters(struct mlx5e_priv *priv)
 	if (!MLX5_CAP_MCAM_FEATURE(mdev, pcie_performance_group))
 		return;
 
-	in = mlx5_vzalloc(sz);
+	in = kvzalloc(sz, GFP_KERNEL);
 	if (!in)
 		return;
 
@@ -503,7 +502,7 @@ static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev,
 	if (!MLX5E_VALID_NUM_MTTS(npages))
 		return -EINVAL;
 
-	in = mlx5_vzalloc(inlen);
+	in = kvzalloc(inlen, GFP_KERNEL);
 	if (!in)
 		return -ENOMEM;
 
@@ -711,7 +710,7 @@ static int mlx5e_create_rq(struct mlx5e_rq *rq,
 
 	inlen = MLX5_ST_SZ_BYTES(create_rq_in) +
 		sizeof(u64) * rq->wq_ctrl.buf.npages;
-	in = mlx5_vzalloc(inlen);
+	in = kvzalloc(inlen, GFP_KERNEL);
 	if (!in)
 		return -ENOMEM;
 
@@ -748,7 +747,7 @@ static int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state,
 	int err;
 
 	inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
-	in = mlx5_vzalloc(inlen);
+	in = kvzalloc(inlen, GFP_KERNEL);
 	if (!in)
 		return -ENOMEM;
 
@@ -776,7 +775,7 @@ static int mlx5e_modify_rq_scatter_fcs(struct mlx5e_rq *rq, bool enable)
 	int err;
 
 	inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
-	in = mlx5_vzalloc(inlen);
+	in = kvzalloc(inlen, GFP_KERNEL);
 	if (!in)
 		return -ENOMEM;
 
@@ -805,7 +804,7 @@ static int mlx5e_modify_rq_vsd(struct mlx5e_rq *rq, bool vsd)
 	int err;
 
 	inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
-	in = mlx5_vzalloc(inlen);
+	in = kvzalloc(inlen, GFP_KERNEL);
 	if (!in)
 		return -ENOMEM;
 
@@ -1134,7 +1133,7 @@ static int mlx5e_create_sq(struct mlx5_core_dev *mdev,
 
 	inlen = MLX5_ST_SZ_BYTES(create_sq_in) +
 		sizeof(u64) * csp->wq_ctrl->buf.npages;
-	in = mlx5_vzalloc(inlen);
+	in = kvzalloc(inlen, GFP_KERNEL);
 	if (!in)
 		return -ENOMEM;
 
@@ -1182,7 +1181,7 @@ static int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn,
 	int err;
 
 	inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
-	in = mlx5_vzalloc(inlen);
+	in = kvzalloc(inlen, GFP_KERNEL);
 	if (!in)
 		return -ENOMEM;
 
@@ -1496,7 +1495,7 @@ static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param)
 
 	inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
 		sizeof(u64) * cq->wq_ctrl.frag_buf.npages;
-	in = mlx5_vzalloc(inlen);
+	in = kvzalloc(inlen, GFP_KERNEL);
 	if (!in)
 		return -ENOMEM;
 
@@ -2091,7 +2090,7 @@ mlx5e_create_rqt(struct mlx5e_priv *priv, int sz, struct mlx5e_rqt *rqt)
 	int i;
 
 	inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
-	in = mlx5_vzalloc(inlen);
+	in = kvzalloc(inlen, GFP_KERNEL);
 	if (!in)
 		return -ENOMEM;
 
@@ -2210,7 +2209,7 @@ int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz,
 	int err;
 
 	inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + sizeof(u32) * sz;
-	in = mlx5_vzalloc(inlen);
+	in = kvzalloc(inlen, GFP_KERNEL);
 	if (!in)
 		return -ENOMEM;
 
@@ -2433,7 +2432,7 @@ static int mlx5e_modify_tirs_lro(struct mlx5e_priv *priv)
 	int ix;
 
 	inlen = MLX5_ST_SZ_BYTES(modify_tir_in);
-	in = mlx5_vzalloc(inlen);
+	in = kvzalloc(inlen, GFP_KERNEL);
 	if (!in)
 		return -ENOMEM;
 
@@ -2850,7 +2849,7 @@ int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv)
 	int tt;
 
 	inlen = MLX5_ST_SZ_BYTES(create_tir_in);
-	in = mlx5_vzalloc(inlen);
+	in = kvzalloc(inlen, GFP_KERNEL);
 	if (!in)
 		return -ENOMEM;
 
@@ -2889,7 +2888,7 @@ int mlx5e_create_direct_tirs(struct mlx5e_priv *priv)
 	int ix;
 
 	inlen = MLX5_ST_SZ_BYTES(create_tir_in);
-	in = mlx5_vzalloc(inlen);
+	in = kvzalloc(inlen, GFP_KERNEL);
 	if (!in)
 		return -ENOMEM;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 11c27e4fadf6..66a9bd635176 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -1738,7 +1738,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol,
 	}
 
 	flow = kzalloc(sizeof(*flow) + attr_size, GFP_KERNEL);
-	parse_attr = mlx5_vzalloc(sizeof(*parse_attr));
+	parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL);
 	if (!parse_attr || !flow) {
 		err = -ENOMEM;
 		goto err_free;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index ea5d8d37a75c..df0034d8f48c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -548,7 +548,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
 	inlen = MLX5_ST_SZ_BYTES(create_eq_in) +
 		MLX5_FLD_SZ_BYTES(create_eq_in, pas[0]) * eq->buf.npages;
 
-	in = mlx5_vzalloc(inlen);
+	in = kvzalloc(inlen, GFP_KERNEL);
 	if (!in) {
 		err = -ENOMEM;
 		goto err_buf;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 2e34d95ea776..81dfcd90b1f5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -248,11 +248,10 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule,
 	if (rx_rule)
 		match_header |= MLX5_MATCH_MISC_PARAMETERS;
 
-	spec = mlx5_vzalloc(sizeof(*spec));
-	if (!spec) {
-		esw_warn(esw->dev, "FDB: Failed to alloc match parameters\n");
+	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+	if (!spec)
 		return NULL;
-	}
+
 	dmac_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
 			      outer_headers.dmac_47_16);
 	dmac_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
@@ -350,10 +349,9 @@ static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw, int nvports)
 		return -EOPNOTSUPP;
 	}
 
-	flow_group_in = mlx5_vzalloc(inlen);
+	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
 	if (!flow_group_in)
 		return -ENOMEM;
-	memset(flow_group_in, 0, inlen);
 
 	table_size = BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size));
 
@@ -961,7 +959,7 @@ static int esw_vport_enable_egress_acl(struct mlx5_eswitch *esw,
 		return -EOPNOTSUPP;
 	}
 
-	flow_group_in = mlx5_vzalloc(inlen);
+	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
 	if (!flow_group_in)
 		return -ENOMEM;
 
@@ -1078,7 +1076,7 @@ static int esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw,
 		return -EOPNOTSUPP;
 	}
 
-	flow_group_in = mlx5_vzalloc(inlen);
+	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
 	if (!flow_group_in)
 		return -ENOMEM;
 
@@ -1241,11 +1239,9 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw,
 		  "vport[%d] configure ingress rules, vlan(%d) qos(%d)\n",
 		  vport->vport, vport->info.vlan, vport->info.qos);
 
-	spec = mlx5_vzalloc(sizeof(*spec));
+	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
 	if (!spec) {
 		err = -ENOMEM;
-		esw_warn(esw->dev, "vport[%d] configure ingress rules failed, err(%d)\n",
-			 vport->vport, err);
 		goto out;
 	}
 
@@ -1322,11 +1318,9 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw,
 		  "vport[%d] configure egress rules, vlan(%d) qos(%d)\n",
 		  vport->vport, vport->info.vlan, vport->info.qos);
 
-	spec = mlx5_vzalloc(sizeof(*spec));
+	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
 	if (!spec) {
 		err = -ENOMEM;
-		esw_warn(esw->dev, "vport[%d] configure egress rules failed, err(%d)\n",
-			 vport->vport, err);
 		goto out;
 	}
 
@@ -2158,7 +2152,7 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
 	if (!LEGAL_VPORT(esw, vport))
 		return -EINVAL;
 
-	out = mlx5_vzalloc(outlen);
+	out = kvzalloc(outlen, GFP_KERNEL);
 	if (!out)
 		return -ENOMEM;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index f991f669047e..3795943ef2d1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -311,9 +311,8 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn
 	struct mlx5_flow_spec *spec;
 	void *misc;
 
-	spec = mlx5_vzalloc(sizeof(*spec));
+	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
 	if (!spec) {
-		esw_warn(esw->dev, "FDB: Failed to alloc match parameters\n");
 		flow_rule = ERR_PTR(-ENOMEM);
 		goto out;
 	}
@@ -401,9 +400,8 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
 	struct mlx5_flow_spec *spec;
 	int err = 0;
 
-	spec = mlx5_vzalloc(sizeof(*spec));
+	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
 	if (!spec) {
-		esw_warn(esw->dev, "FDB: Failed to alloc match parameters\n");
 		err = -ENOMEM;
 		goto out;
 	}
@@ -488,7 +486,7 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
 	u32 *flow_group_in;
 
 	esw_debug(esw->dev, "Create offloads FDB Tables\n");
-	flow_group_in = mlx5_vzalloc(inlen);
+	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
 	if (!flow_group_in)
 		return -ENOMEM;
 
@@ -631,7 +629,7 @@ static int esw_create_vport_rx_group(struct mlx5_eswitch *esw)
 	int err = 0;
 	int nvports = priv->sriov.num_vfs + 2;
 
-	flow_group_in = mlx5_vzalloc(inlen);
+	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
 	if (!flow_group_in)
 		return -ENOMEM;
 
@@ -675,9 +673,8 @@ mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn)
 	struct mlx5_flow_spec *spec;
 	void *misc;
 
-	spec = mlx5_vzalloc(sizeof(*spec));
+	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
 	if (!spec) {
-		esw_warn(esw->dev, "Failed to alloc match parameters\n");
 		flow_rule = ERR_PTR(-ENOMEM);
 		goto out;
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 19e3d2fc2099..b27c59af9640 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -235,11 +235,9 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
 	u32 *in;
 	int err;
 
-	in = mlx5_vzalloc(inlen);
-	if (!in) {
-		mlx5_core_warn(dev, "failed to allocate inbox\n");
+	in = kvzalloc(inlen, GFP_KERNEL);
+	if (!in)
 		return -ENOMEM;
-	}
 
 	MLX5_SET(set_fte_in, in, opcode, MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY);
 	MLX5_SET(set_fte_in, in, op_mod, opmod);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index b8a176503d38..20a50f23fb1b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -376,11 +376,9 @@ static void del_rule(struct fs_node *node)
 	int err;
 	bool update_fte = false;
 
-	match_value = mlx5_vzalloc(match_len);
-	if (!match_value) {
-		mlx5_core_warn(dev, "failed to allocate inbox\n");
+	match_value = kvzalloc(match_len, GFP_KERNEL);
+	if (!match_value)
 		return;
-	}
 
 	fs_get_obj(rule, node);
 	fs_get_obj(fte, rule->node.parent);
@@ -1159,7 +1157,7 @@ static struct mlx5_flow_group *create_autogroup(struct mlx5_flow_table *ft,
 	if (!ft->autogroup.active)
 		return ERR_PTR(-ENOENT);
 
-	in = mlx5_vzalloc(inlen);
+	in = kvzalloc(inlen, GFP_KERNEL);
 	if (!in)
 		return ERR_PTR(-ENOMEM);
 
@@ -1778,7 +1776,7 @@ static struct mlx5_flow_root_namespace *create_root_ns(struct mlx5_flow_steering
 	struct mlx5_flow_namespace *ns;
 
 	/* Create the root namespace */
-	root_ns = mlx5_vzalloc(sizeof(*root_ns));
+	root_ns = kvzalloc(sizeof(*root_ns), GFP_KERNEL);
 	if (!root_ns)
 		return NULL;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib.c
index 019c230da498..9b397fe3f159 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib.c
@@ -98,7 +98,7 @@ static int mlx5i_create_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core
 	void *qpc;
 
 	inlen = MLX5_ST_SZ_BYTES(create_qp_in);
-	in = mlx5_vzalloc(inlen);
+	in = kvzalloc(inlen, GFP_KERNEL);
 	if (!in)
 		return -ENOMEM;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
index a57d5a81eb05..efcded7ca27a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -279,7 +279,7 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
 	int i;
 
 	inlen += npages * MLX5_FLD_SZ_BYTES(manage_pages_in, pas[0]);
-	in = mlx5_vzalloc(inlen);
+	in = kvzalloc(inlen, GFP_KERNEL);
 	if (!in) {
 		err = -ENOMEM;
 		mlx5_core_warn(dev, "vzalloc failed %d\n", inlen);
@@ -376,7 +376,7 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
 		*nclaimed = 0;
 
 	outlen += npages * MLX5_FLD_SZ_BYTES(manage_pages_out, pas[0]);
-	out = mlx5_vzalloc(outlen);
+	out = kvzalloc(outlen, GFP_KERNEL);
 	if (!out)
 		return -ENOMEM;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
index 141583daf5a2..1975d4388d4f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -47,8 +47,8 @@ int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in,
 	u32 *in = NULL;
 	void *data;
 
-	in = mlx5_vzalloc(inlen);
-	out = mlx5_vzalloc(outlen);
+	in = kvzalloc(inlen, GFP_KERNEL);
+	out = kvzalloc(outlen, GFP_KERNEL);
 	if (!in || !out)
 		goto out;
 
@@ -454,7 +454,7 @@ int mlx5_core_query_ib_ppcnt(struct mlx5_core_dev *dev,
 	u32 *in;
 	int err;
 
-	in  = mlx5_vzalloc(sz);
+	in  = kvzalloc(sz, GFP_KERNEL);
 	if (!in) {
 		err = -ENOMEM;
 		return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
index cbbcef2884be..573a6b27fed8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
@@ -527,7 +527,7 @@ int mlx5_core_query_out_of_buffer(struct mlx5_core_dev *dev, u16 counter_id,
 	void *out;
 	int err;
 
-	out = mlx5_vzalloc(outlen);
+	out = kvzalloc(outlen, GFP_KERNEL);
 	if (!out)
 		return -ENOMEM;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/srq.c b/drivers/net/ethernet/mellanox/mlx5/core/srq.c
index 3099630015d7..f774de6f5fcb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/srq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/srq.c
@@ -162,7 +162,7 @@ static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
 
 	pas_size  = get_pas_size(in);
 	inlen	  = MLX5_ST_SZ_BYTES(create_srq_in) + pas_size;
-	create_in = mlx5_vzalloc(inlen);
+	create_in = kvzalloc(inlen, GFP_KERNEL);
 	if (!create_in)
 		return -ENOMEM;
 
@@ -221,7 +221,7 @@ static int query_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
 	void *srqc;
 	int err;
 
-	srq_out = mlx5_vzalloc(MLX5_ST_SZ_BYTES(query_srq_out));
+	srq_out = kvzalloc(MLX5_ST_SZ_BYTES(query_srq_out), GFP_KERNEL);
 	if (!srq_out)
 		return -ENOMEM;
 
@@ -256,7 +256,7 @@ static int create_xrc_srq_cmd(struct mlx5_core_dev *dev,
 
 	pas_size  = get_pas_size(in);
 	inlen	  = MLX5_ST_SZ_BYTES(create_xrc_srq_in) + pas_size;
-	create_in = mlx5_vzalloc(inlen);
+	create_in = kvzalloc(inlen, GFP_KERNEL);
 	if (!create_in)
 		return -ENOMEM;
 
@@ -320,7 +320,7 @@ static int query_xrc_srq_cmd(struct mlx5_core_dev *dev,
 	void *xrc_srqc;
 	int err;
 
-	xrcsrq_out = mlx5_vzalloc(MLX5_ST_SZ_BYTES(query_xrc_srq_out));
+	xrcsrq_out = kvzalloc(MLX5_ST_SZ_BYTES(query_xrc_srq_out), GFP_KERNEL);
 	if (!xrcsrq_out)
 		return -ENOMEM;
 	memset(xrcsrq_in, 0, sizeof(xrcsrq_in));
@@ -357,7 +357,7 @@ static int create_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
 
 	pas_size = get_pas_size(in);
 	inlen = MLX5_ST_SZ_BYTES(create_rmp_in) + pas_size;
-	create_in = mlx5_vzalloc(inlen);
+	create_in = kvzalloc(inlen, GFP_KERNEL);
 	if (!create_in)
 		return -ENOMEM;
 
@@ -390,7 +390,7 @@ static int arm_rmp_cmd(struct mlx5_core_dev *dev,
 	void *bitmask;
 	int err;
 
-	in = mlx5_vzalloc(MLX5_ST_SZ_BYTES(modify_rmp_in));
+	in = kvzalloc(MLX5_ST_SZ_BYTES(modify_rmp_in), GFP_KERNEL);
 	if (!in)
 		return -ENOMEM;
 
@@ -417,7 +417,7 @@ static int query_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
 	void *rmpc;
 	int err;
 
-	rmp_out =  mlx5_vzalloc(MLX5_ST_SZ_BYTES(query_rmp_out));
+	rmp_out =  kvzalloc(MLX5_ST_SZ_BYTES(query_rmp_out), GFP_KERNEL);
 	if (!rmp_out)
 		return -ENOMEM;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
index a00ff49eec18..5e128d7a9ffd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
@@ -284,7 +284,7 @@ int mlx5_core_arm_rmp(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm)
 	void *bitmask;
 	int  err;
 
-	in = mlx5_vzalloc(MLX5_ST_SZ_BYTES(modify_rmp_in));
+	in = kvzalloc(MLX5_ST_SZ_BYTES(modify_rmp_in), GFP_KERNEL);
 	if (!in)
 		return -ENOMEM;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index 15c2294dd2b4..06019d00ab7b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -172,7 +172,7 @@ int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev,
 	u8 *out_addr;
 	int err;
 
-	out = mlx5_vzalloc(outlen);
+	out = kvzalloc(outlen, GFP_KERNEL);
 	if (!out)
 		return -ENOMEM;
 
@@ -197,11 +197,9 @@ int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *mdev,
 	void *nic_vport_ctx;
 	u8 *perm_mac;
 
-	in = mlx5_vzalloc(inlen);
-	if (!in) {
-		mlx5_core_warn(mdev, "failed to allocate inbox\n");
+	in = kvzalloc(inlen, GFP_KERNEL);
+	if (!in)
 		return -ENOMEM;
-	}
 
 	MLX5_SET(modify_nic_vport_context_in, in,
 		 field_select.permanent_address, 1);
@@ -231,7 +229,7 @@ int mlx5_query_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
 	u32 *out;
 	int err;
 
-	out = mlx5_vzalloc(outlen);
+	out = kvzalloc(outlen, GFP_KERNEL);
 	if (!out)
 		return -ENOMEM;
 
@@ -251,7 +249,7 @@ int mlx5_modify_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 mtu)
 	void *in;
 	int err;
 
-	in = mlx5_vzalloc(inlen);
+	in = kvzalloc(inlen, GFP_KERNEL);
 	if (!in)
 		return -ENOMEM;
 
@@ -501,7 +499,7 @@ int mlx5_query_nic_vport_system_image_guid(struct mlx5_core_dev *mdev,
 	u32 *out;
 	int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
 
-	out = mlx5_vzalloc(outlen);
+	out = kvzalloc(outlen, GFP_KERNEL);
 	if (!out)
 		return -ENOMEM;
 
@@ -521,7 +519,7 @@ int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid)
 	u32 *out;
 	int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
 
-	out = mlx5_vzalloc(outlen);
+	out = kvzalloc(outlen, GFP_KERNEL);
 	if (!out)
 		return -ENOMEM;
 
@@ -551,7 +549,7 @@ int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev,
 	if (!MLX5_CAP_ESW(mdev, nic_vport_node_guid_modify))
 		return -EOPNOTSUPP;
 
-	in = mlx5_vzalloc(inlen);
+	in = kvzalloc(inlen, GFP_KERNEL);
 	if (!in)
 		return -ENOMEM;
 
@@ -577,7 +575,7 @@ int mlx5_query_nic_vport_qkey_viol_cntr(struct mlx5_core_dev *mdev,
 	u32 *out;
 	int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
 
-	out = mlx5_vzalloc(outlen);
+	out = kvzalloc(outlen, GFP_KERNEL);
 	if (!out)
 		return -ENOMEM;
 
@@ -879,11 +877,9 @@ int mlx5_modify_nic_vport_promisc(struct mlx5_core_dev *mdev,
 	int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
 	int err;
 
-	in = mlx5_vzalloc(inlen);
-	if (!in) {
-		mlx5_core_err(mdev, "failed to allocate inbox\n");
+	in = kvzalloc(inlen, GFP_KERNEL);
+	if (!in)
 		return -ENOMEM;
-	}
 
 	MLX5_SET(modify_nic_vport_context_in, in, field_select.promisc, 1);
 	MLX5_SET(modify_nic_vport_context_in, in,
@@ -913,11 +909,9 @@ static int mlx5_nic_vport_update_roce_state(struct mlx5_core_dev *mdev,
 	int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
 	int err;
 
-	in = mlx5_vzalloc(inlen);
-	if (!in) {
-		mlx5_core_warn(mdev, "failed to allocate inbox\n");
+	in = kvzalloc(inlen, GFP_KERNEL);
+	if (!in)
 		return -ENOMEM;
-	}
 
 	MLX5_SET(modify_nic_vport_context_in, in, field_select.roce_en, 1);
 	MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.roce_en,
@@ -952,7 +946,7 @@ int mlx5_core_query_vport_counter(struct mlx5_core_dev *dev, u8 other_vport,
 	int	err;
 
 	is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager);
-	in = mlx5_vzalloc(in_sz);
+	in = kvzalloc(in_sz, GFP_KERNEL);
 	if (!in) {
 		err = -ENOMEM;
 		return err;
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index bcdf739ee41a..c2740688d679 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -890,11 +890,6 @@ static inline u16 cmdif_rev(struct mlx5_core_dev *dev)
 	return ioread32be(&dev->iseg->cmdif_rev_fw_sub) >> 16;
 }
 
-static inline void *mlx5_vzalloc(unsigned long size)
-{
-	return kvzalloc(size, GFP_KERNEL);
-}
-
 static inline u32 mlx5_base_mkey(const u32 key)
 {
 	return key & 0xffffff00u;
-- 
cgit v1.2.3


From bb29b9cccd95feeb43e11e9b1c2479777082e28a Mon Sep 17 00:00:00 2001
From: Anders Darander <anders@chargestorm.se>
Date: Thu, 27 Apr 2017 08:37:33 +0200
Subject: leds: pca963x: Add bindings to invert polarity

Add a new DT property, nxp,inverted-out, to invert the polarity
of the output.

Tested on PCA9634.

Signed-off-by: Anders Darander <anders@chargestorm.se>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Jacek Anaszewski <jacek.anaszewski@gmail.com>
---
 Documentation/devicetree/bindings/leds/pca963x.txt |  1 +
 drivers/leds/leds-pca963x.c                        | 17 +++++++++++++++--
 include/linux/platform_data/leds-pca963x.h         |  6 ++++++
 3 files changed, 22 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/leds/pca963x.txt b/Documentation/devicetree/bindings/leds/pca963x.txt
index dfbdb123a9bf..4eee41482041 100644
--- a/Documentation/devicetree/bindings/leds/pca963x.txt
+++ b/Documentation/devicetree/bindings/leds/pca963x.txt
@@ -10,6 +10,7 @@ Optional properties:
 - nxp,period-scale : In some configurations, the chip blinks faster than expected.
 		     This parameter provides a scaling ratio (fixed point, decimal divided
 		     by 1000) to compensate, e.g. 1300=1.3x and 750=0.75x.
+- nxp,inverted-out: invert the polarity of the generated PWM
 
 Each led is represented as a sub-node of the nxp,pca963x device.
 
diff --git a/drivers/leds/leds-pca963x.c b/drivers/leds/leds-pca963x.c
index ded1e4dac36a..3bf9a1271819 100644
--- a/drivers/leds/leds-pca963x.c
+++ b/drivers/leds/leds-pca963x.c
@@ -342,6 +342,12 @@ pca963x_dt_init(struct i2c_client *client, struct pca963x_chipdef *chip)
 	if (of_property_read_u32(np, "nxp,period-scale", &chip->scaling))
 		chip->scaling = 1000;
 
+	/* default to non-inverted output, unless inverted is specified */
+	if (of_property_read_bool(np, "nxp,inverted-out"))
+		pdata->dir = PCA963X_INVERTED;
+	else
+		pdata->dir = PCA963X_NORMAL;
+
 	return pdata;
 }
 
@@ -452,11 +458,18 @@ static int pca963x_probe(struct i2c_client *client,
 	i2c_smbus_write_byte_data(client, PCA963X_MODE1, BIT(4));
 
 	if (pdata) {
+		u8 mode2 = i2c_smbus_read_byte_data(pca963x->chip->client,
+						    PCA963X_MODE2);
 		/* Configure output: open-drain or totem pole (push-pull) */
 		if (pdata->outdrv == PCA963X_OPEN_DRAIN)
-			i2c_smbus_write_byte_data(client, PCA963X_MODE2, 0x01);
+			mode2 |= 0x01;
 		else
-			i2c_smbus_write_byte_data(client, PCA963X_MODE2, 0x05);
+			mode2 |= 0x05;
+		/* Configure direction: normal or inverted */
+		if (pdata->dir == PCA963X_INVERTED)
+			mode2 |= 0x10;
+		i2c_smbus_write_byte_data(pca963x->chip->client, PCA963X_MODE2,
+					  mode2);
 	}
 
 	return 0;
diff --git a/include/linux/platform_data/leds-pca963x.h b/include/linux/platform_data/leds-pca963x.h
index e731f0036329..54e845ffb5ed 100644
--- a/include/linux/platform_data/leds-pca963x.h
+++ b/include/linux/platform_data/leds-pca963x.h
@@ -33,10 +33,16 @@ enum pca963x_blink_type {
 	PCA963X_HW_BLINK,
 };
 
+enum pca963x_direction {
+	PCA963X_NORMAL,
+	PCA963X_INVERTED,
+};
+
 struct pca963x_platform_data {
 	struct led_platform_data leds;
 	enum pca963x_outdrv outdrv;
 	enum pca963x_blink_type blink_type;
+	enum pca963x_direction dir;
 };
 
 #endif /* __LINUX_PCA963X_H*/
-- 
cgit v1.2.3


From 0179720d6be2096b8d0a4d143254ff9e77747daa Mon Sep 17 00:00:00 2001
From: Ilan Tayari <ilant@mellanox.com>
Date: Sun, 7 May 2017 13:48:31 +0300
Subject: net/mlx5: Introduce trigger_health_work function

Introduce new function for entering bad-health state.

This function will be called from FPGA-related logic in a later patch from
asynchronous event (IRQ) context, for that we change the spin lock to an
IRQ-safe one.

Signed-off-by: Ilan Tayari <ilant@mellanox.com>
Reviewed-by: Boris Pismenny <borisp@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/health.c | 32 ++++++++++++++++--------
 include/linux/mlx5/driver.h                      |  1 +
 2 files changed, 22 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index d0515391d33b..c3cedb6cec3f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -185,6 +185,7 @@ static void health_care(struct work_struct *work)
 	struct mlx5_core_health *health;
 	struct mlx5_core_dev *dev;
 	struct mlx5_priv *priv;
+	unsigned long flags;
 
 	health = container_of(work, struct mlx5_core_health, work);
 	priv = container_of(health, struct mlx5_priv, health);
@@ -192,13 +193,13 @@ static void health_care(struct work_struct *work)
 	mlx5_core_warn(dev, "handling bad device here\n");
 	mlx5_handle_bad_state(dev);
 
-	spin_lock(&health->wq_lock);
+	spin_lock_irqsave(&health->wq_lock, flags);
 	if (!test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags))
 		schedule_delayed_work(&health->recover_work, recover_delay);
 	else
 		dev_err(&dev->pdev->dev,
 			"new health works are not permitted at this stage\n");
-	spin_unlock(&health->wq_lock);
+	spin_unlock_irqrestore(&health->wq_lock, flags);
 }
 
 static const char *hsynd_str(u8 synd)
@@ -269,6 +270,20 @@ static unsigned long get_next_poll_jiffies(void)
 	return next;
 }
 
+void mlx5_trigger_health_work(struct mlx5_core_dev *dev)
+{
+	struct mlx5_core_health *health = &dev->priv.health;
+	unsigned long flags;
+
+	spin_lock_irqsave(&health->wq_lock, flags);
+	if (!test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags))
+		queue_work(health->wq, &health->work);
+	else
+		dev_err(&dev->pdev->dev,
+			"new health works are not permitted at this stage\n");
+	spin_unlock_irqrestore(&health->wq_lock, flags);
+}
+
 static void poll_health(unsigned long data)
 {
 	struct mlx5_core_dev *dev = (struct mlx5_core_dev *)data;
@@ -297,13 +312,7 @@ static void poll_health(unsigned long data)
 	if (in_fatal(dev) && !health->sick) {
 		health->sick = true;
 		print_health_info(dev);
-		spin_lock(&health->wq_lock);
-		if (!test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags))
-			queue_work(health->wq, &health->work);
-		else
-			dev_err(&dev->pdev->dev,
-				"new health works are not permitted at this stage\n");
-		spin_unlock(&health->wq_lock);
+		mlx5_trigger_health_work(dev);
 	}
 }
 
@@ -333,10 +342,11 @@ void mlx5_stop_health_poll(struct mlx5_core_dev *dev)
 void mlx5_drain_health_wq(struct mlx5_core_dev *dev)
 {
 	struct mlx5_core_health *health = &dev->priv.health;
+	unsigned long flags;
 
-	spin_lock(&health->wq_lock);
+	spin_lock_irqsave(&health->wq_lock, flags);
 	set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
-	spin_unlock(&health->wq_lock);
+	spin_unlock_irqrestore(&health->wq_lock, flags);
 	cancel_delayed_work_sync(&health->recover_work);
 	cancel_work_sync(&health->work);
 }
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index c2740688d679..a277bb36c21f 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -915,6 +915,7 @@ int mlx5_health_init(struct mlx5_core_dev *dev);
 void mlx5_start_health_poll(struct mlx5_core_dev *dev);
 void mlx5_stop_health_poll(struct mlx5_core_dev *dev);
 void mlx5_drain_health_wq(struct mlx5_core_dev *dev);
+void mlx5_trigger_health_work(struct mlx5_core_dev *dev);
 int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size,
 			struct mlx5_buf *buf, int node);
 int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, struct mlx5_buf *buf);
-- 
cgit v1.2.3


From e29341fb3a5b885a4bb5b9a38f2814ca07d3382c Mon Sep 17 00:00:00 2001
From: Ilan Tayari <ilant@mellanox.com>
Date: Mon, 13 Mar 2017 20:05:45 +0200
Subject: net/mlx5: FPGA, Add basic support for Innova

Mellanox Innova is a NIC with ConnectX and an FPGA on the same
board. The FPGA is a bump-on-the-wire and thus affects operation of
the mlx5_core driver on the ConnectX ASIC.

Add basic support for Innova in mlx5_core.

This allows using the Innova card as a regular NIC, by detecting
the FPGA capability bit, and verifying its load state before
initializing ConnectX interfaces.

Also detect FPGA fatal runtime failures and enter error state if
they ever happen.

All new FPGA-related logic is placed in its own subdirectory 'fpga',
which may be built by selecting CONFIG_MLX5_FPGA.
This prepares for further support of various Innova features in later
patchsets.
Additional details about hardware architecture will be provided as
more features get submitted.

Signed-off-by: Ilan Tayari <ilant@mellanox.com>
Reviewed-by: Boris Pismenny <borisp@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 MAINTAINERS                                        |  10 +
 drivers/net/ethernet/mellanox/mlx5/core/Kconfig    |  10 +
 drivers/net/ethernet/mellanox/mlx5/core/Makefile   |   3 +
 drivers/net/ethernet/mellanox/mlx5/core/eq.c       |  11 ++
 drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c |  64 +++++++
 drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h |  59 ++++++
 .../net/ethernet/mellanox/mlx5/core/fpga/core.c    | 202 +++++++++++++++++++++
 .../net/ethernet/mellanox/mlx5/core/fpga/core.h    |  99 ++++++++++
 drivers/net/ethernet/mellanox/mlx5/core/main.c     |  19 +-
 include/linux/mlx5/device.h                        |   6 +
 include/linux/mlx5/driver.h                        |   5 +
 include/linux/mlx5/mlx5_ifc.h                      |  11 +-
 include/linux/mlx5/mlx5_ifc_fpga.h                 | 144 +++++++++++++++
 13 files changed, 640 insertions(+), 3 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
 create mode 100644 include/linux/mlx5/mlx5_ifc_fpga.h

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index f7d568b8f133..374ebf1b5d5d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8304,6 +8304,16 @@ W:	http://www.mellanox.com
 Q:	http://patchwork.ozlabs.org/project/netdev/list/
 F:	drivers/net/ethernet/mellanox/mlx5/core/en_*
 
+MELLANOX ETHERNET INNOVA DRIVER
+M:	Ilan Tayari <ilant@mellanox.com>
+R:	Boris Pismenny <borisp@mellanox.com>
+L:	netdev@vger.kernel.org
+S:	Supported
+W:	http://www.mellanox.com
+Q:	http://patchwork.ozlabs.org/project/netdev/list/
+F:	drivers/net/ethernet/mellanox/mlx5/core/fpga/*
+F:	include/linux/mlx5/mlx5_ifc_fpga.h
+
 MELLANOX ETHERNET SWITCH DRIVERS
 M:	Jiri Pirko <jiri@mellanox.com>
 M:	Ido Schimmel <idosch@mellanox.com>
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
index fc52d742b7f7..28cf88483ca4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
@@ -11,6 +11,16 @@ config MLX5_CORE
 	  Core driver for low level functionality of the ConnectX-4 and
 	  Connect-IB cards by Mellanox Technologies.
 
+config MLX5_FPGA
+        bool "Mellanox Technologies Innova support"
+        depends on MLX5_CORE
+        ---help---
+          Build support for the Innova family of network cards by Mellanox
+          Technologies. Innova network cards are comprised of a ConnectX chip
+          and an FPGA chip on one board. If you select this option, the
+          mlx5_core driver will include the Innova FPGA core and allow building
+          sandbox-specific client drivers.
+
 config MLX5_CORE_EN
 	bool "Mellanox Technologies ConnectX-4 Ethernet support"
 	depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 9e644615f07a..12556c03eec4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -1,10 +1,13 @@
 obj-$(CONFIG_MLX5_CORE)		+= mlx5_core.o
+subdir-ccflags-y += -I$(src)
 
 mlx5_core-y :=	main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
 		health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \
 		mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \
 		fs_counters.o rl.o lag.o dev.o
 
+mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o
+
 mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o eswitch_offloads.o \
 		en_main.o en_common.o en_fs.o en_ethtool.o en_tx.o \
 		en_rx.o en_rx_am.o en_txrx.o en_clock.o vxlan.o \
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index df0034d8f48c..01d2cd7e4746 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -35,6 +35,7 @@
 #include <linux/mlx5/driver.h>
 #include <linux/mlx5/cmd.h>
 #include "mlx5_core.h"
+#include "fpga/core.h"
 #ifdef CONFIG_MLX5_CORE_EN
 #include "eswitch.h"
 #endif
@@ -156,6 +157,8 @@ static const char *eqe_type_str(u8 type)
 		return "MLX5_EVENT_TYPE_PAGE_FAULT";
 	case MLX5_EVENT_TYPE_PPS_EVENT:
 		return "MLX5_EVENT_TYPE_PPS_EVENT";
+	case MLX5_EVENT_TYPE_FPGA_ERROR:
+		return "MLX5_EVENT_TYPE_FPGA_ERROR";
 	default:
 		return "Unrecognized event";
 	}
@@ -476,6 +479,11 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
 			if (dev->event)
 				dev->event(dev, MLX5_DEV_EVENT_PPS, (unsigned long)eqe);
 			break;
+
+		case MLX5_EVENT_TYPE_FPGA_ERROR:
+			mlx5_fpga_event(dev, eqe->type, &eqe->data.raw);
+			break;
+
 		default:
 			mlx5_core_warn(dev, "Unhandled event 0x%x on EQ 0x%x\n",
 				       eqe->type, eq->eqn);
@@ -693,6 +701,9 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev)
 	if (MLX5_CAP_GEN(dev, pps))
 		async_event_mask |= (1ull << MLX5_EVENT_TYPE_PPS_EVENT);
 
+	if (MLX5_CAP_GEN(dev, fpga))
+		async_event_mask |= (1ull << MLX5_EVENT_TYPE_FPGA_ERROR);
+
 	err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD,
 				 MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD,
 				 "mlx5_cmd_eq", MLX5_EQ_TYPE_ASYNC);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c
new file mode 100644
index 000000000000..99cba644b4fc
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/mlx5/cmd.h>
+#include <linux/mlx5/driver.h>
+
+#include "mlx5_core.h"
+#include "fpga/cmd.h"
+
+int mlx5_fpga_caps(struct mlx5_core_dev *dev, u32 *caps)
+{
+	u32 in[MLX5_ST_SZ_DW(fpga_cap)] = {0};
+
+	return mlx5_core_access_reg(dev, in, sizeof(in), caps,
+				    MLX5_ST_SZ_BYTES(fpga_cap),
+				    MLX5_REG_FPGA_CAP, 0, 0);
+}
+
+int mlx5_fpga_query(struct mlx5_core_dev *dev, struct mlx5_fpga_query *query)
+{
+	u32 in[MLX5_ST_SZ_DW(fpga_ctrl)] = {0};
+	u32 out[MLX5_ST_SZ_DW(fpga_ctrl)];
+	int err;
+
+	err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
+				   MLX5_REG_FPGA_CTRL, 0, false);
+	if (err)
+		return err;
+
+	query->status = MLX5_GET(fpga_ctrl, out, status);
+	query->admin_image = MLX5_GET(fpga_ctrl, out, flash_select_admin);
+	query->oper_image = MLX5_GET(fpga_ctrl, out, flash_select_oper);
+	return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h
new file mode 100644
index 000000000000..a74396a61bc3
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies, Ltd.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __MLX5_FPGA_H__
+#define __MLX5_FPGA_H__
+
+#include <linux/mlx5/driver.h>
+
+enum mlx5_fpga_image {
+	MLX5_FPGA_IMAGE_USER = 0,
+	MLX5_FPGA_IMAGE_FACTORY,
+};
+
+enum mlx5_fpga_status {
+	MLX5_FPGA_STATUS_SUCCESS = 0,
+	MLX5_FPGA_STATUS_FAILURE = 1,
+	MLX5_FPGA_STATUS_IN_PROGRESS = 2,
+	MLX5_FPGA_STATUS_NONE = 0xFFFF,
+};
+
+struct mlx5_fpga_query {
+	enum mlx5_fpga_image admin_image;
+	enum mlx5_fpga_image oper_image;
+	enum mlx5_fpga_status status;
+};
+
+int mlx5_fpga_caps(struct mlx5_core_dev *dev, u32 *caps);
+int mlx5_fpga_query(struct mlx5_core_dev *dev, struct mlx5_fpga_query *query);
+
+#endif /* __MLX5_FPGA_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
new file mode 100644
index 000000000000..d88b332e9669
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
@@ -0,0 +1,202 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/etherdevice.h>
+#include <linux/mlx5/driver.h>
+
+#include "mlx5_core.h"
+#include "fpga/core.h"
+
+static const char *const mlx5_fpga_error_strings[] = {
+	"Null Syndrome",
+	"Corrupted DDR",
+	"Flash Timeout",
+	"Internal Link Error",
+	"Watchdog HW Failure",
+	"I2C Failure",
+	"Image Changed",
+	"Temperature Critical",
+};
+
+static struct mlx5_fpga_device *mlx5_fpga_device_alloc(void)
+{
+	struct mlx5_fpga_device *fdev = NULL;
+
+	fdev = kzalloc(sizeof(*fdev), GFP_KERNEL);
+	if (!fdev)
+		return NULL;
+
+	spin_lock_init(&fdev->state_lock);
+	fdev->state = MLX5_FPGA_STATUS_NONE;
+	return fdev;
+}
+
+static const char *mlx5_fpga_image_name(enum mlx5_fpga_image image)
+{
+	switch (image) {
+	case MLX5_FPGA_IMAGE_USER:
+		return "user";
+	case MLX5_FPGA_IMAGE_FACTORY:
+		return "factory";
+	default:
+		return "unknown";
+	}
+}
+
+static int mlx5_fpga_device_load_check(struct mlx5_fpga_device *fdev)
+{
+	struct mlx5_fpga_query query;
+	int err;
+
+	err = mlx5_fpga_query(fdev->mdev, &query);
+	if (err) {
+		mlx5_fpga_err(fdev, "Failed to query status: %d\n", err);
+		return err;
+	}
+
+	fdev->last_admin_image = query.admin_image;
+	fdev->last_oper_image = query.oper_image;
+
+	mlx5_fpga_dbg(fdev, "Status %u; Admin image %u; Oper image %u\n",
+		      query.status, query.admin_image, query.oper_image);
+
+	if (query.status != MLX5_FPGA_STATUS_SUCCESS) {
+		mlx5_fpga_err(fdev, "%s image failed to load; status %u\n",
+			      mlx5_fpga_image_name(fdev->last_oper_image),
+			      query.status);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+int mlx5_fpga_device_start(struct mlx5_core_dev *mdev)
+{
+	struct mlx5_fpga_device *fdev = mdev->fpga;
+	unsigned long flags;
+	int err;
+
+	if (!fdev)
+		return 0;
+
+	err = mlx5_fpga_device_load_check(fdev);
+	if (err)
+		goto out;
+
+	err = mlx5_fpga_caps(fdev->mdev,
+			     fdev->mdev->caps.hca_cur[MLX5_CAP_FPGA]);
+	if (err)
+		goto out;
+
+	mlx5_fpga_info(fdev, "device %u; %s image, version %u\n",
+		       MLX5_CAP_FPGA(fdev->mdev, fpga_device),
+		       mlx5_fpga_image_name(fdev->last_oper_image),
+		       MLX5_CAP_FPGA(fdev->mdev, image_version));
+
+out:
+	spin_lock_irqsave(&fdev->state_lock, flags);
+	fdev->state = err ? MLX5_FPGA_STATUS_FAILURE : MLX5_FPGA_STATUS_SUCCESS;
+	spin_unlock_irqrestore(&fdev->state_lock, flags);
+	return err;
+}
+
+int mlx5_fpga_device_init(struct mlx5_core_dev *mdev)
+{
+	struct mlx5_fpga_device *fdev = NULL;
+
+	if (!MLX5_CAP_GEN(mdev, fpga)) {
+		mlx5_core_dbg(mdev, "FPGA capability not present\n");
+		return 0;
+	}
+
+	mlx5_core_dbg(mdev, "Initializing FPGA\n");
+
+	fdev = mlx5_fpga_device_alloc();
+	if (!fdev)
+		return -ENOMEM;
+
+	fdev->mdev = mdev;
+	mdev->fpga = fdev;
+
+	return 0;
+}
+
+void mlx5_fpga_device_cleanup(struct mlx5_core_dev *mdev)
+{
+	kfree(mdev->fpga);
+	mdev->fpga = NULL;
+}
+
+static const char *mlx5_fpga_syndrome_to_string(u8 syndrome)
+{
+	if (syndrome < ARRAY_SIZE(mlx5_fpga_error_strings))
+		return mlx5_fpga_error_strings[syndrome];
+	return "Unknown";
+}
+
+void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data)
+{
+	struct mlx5_fpga_device *fdev = mdev->fpga;
+	const char *event_name;
+	bool teardown = false;
+	unsigned long flags;
+	u8 syndrome;
+
+	if (event != MLX5_EVENT_TYPE_FPGA_ERROR) {
+		mlx5_fpga_warn_ratelimited(fdev, "Unexpected event %u\n",
+					   event);
+		return;
+	}
+
+	syndrome = MLX5_GET(fpga_error_event, data, syndrome);
+	event_name = mlx5_fpga_syndrome_to_string(syndrome);
+
+	spin_lock_irqsave(&fdev->state_lock, flags);
+	switch (fdev->state) {
+	case MLX5_FPGA_STATUS_SUCCESS:
+		mlx5_fpga_warn(fdev, "Error %u: %s\n", syndrome, event_name);
+		teardown = true;
+		break;
+	default:
+		mlx5_fpga_warn_ratelimited(fdev, "Unexpected error event %u: %s\n",
+					   syndrome, event_name);
+	}
+	spin_unlock_irqrestore(&fdev->state_lock, flags);
+	/* We tear-down the card's interfaces and functionality because
+	 * the FPGA bump-on-the-wire is misbehaving and we lose ability
+	 * to communicate with the network. User may still be able to
+	 * recover by re-programming or debugging the FPGA
+	 */
+	if (teardown)
+		mlx5_trigger_health_work(fdev->mdev);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
new file mode 100644
index 000000000000..c55044d66778
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies, Ltd.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __MLX5_FPGA_CORE_H__
+#define __MLX5_FPGA_CORE_H__
+
+#ifdef CONFIG_MLX5_FPGA
+
+#include "fpga/cmd.h"
+
+/* Represents an Innova device */
+struct mlx5_fpga_device {
+	struct mlx5_core_dev *mdev;
+	spinlock_t state_lock; /* Protects state transitions */
+	enum mlx5_fpga_status state;
+	enum mlx5_fpga_image last_admin_image;
+	enum mlx5_fpga_image last_oper_image;
+};
+
+#define mlx5_fpga_dbg(__adev, format, ...) \
+	dev_dbg(&(__adev)->mdev->pdev->dev, "FPGA: %s:%d:(pid %d): " format, \
+		 __func__, __LINE__, current->pid, ##__VA_ARGS__)
+
+#define mlx5_fpga_err(__adev, format, ...) \
+	dev_err(&(__adev)->mdev->pdev->dev, "FPGA: %s:%d:(pid %d): " format, \
+		__func__, __LINE__, current->pid, ##__VA_ARGS__)
+
+#define mlx5_fpga_warn(__adev, format, ...) \
+	dev_warn(&(__adev)->mdev->pdev->dev, "FPGA: %s:%d:(pid %d): " format, \
+		__func__, __LINE__, current->pid, ##__VA_ARGS__)
+
+#define mlx5_fpga_warn_ratelimited(__adev, format, ...) \
+	dev_warn_ratelimited(&(__adev)->mdev->pdev->dev, "FPGA: %s:%d: " \
+		format, __func__, __LINE__, ##__VA_ARGS__)
+
+#define mlx5_fpga_notice(__adev, format, ...) \
+	dev_notice(&(__adev)->mdev->pdev->dev, "FPGA: " format, ##__VA_ARGS__)
+
+#define mlx5_fpga_info(__adev, format, ...) \
+	dev_info(&(__adev)->mdev->pdev->dev, "FPGA: " format, ##__VA_ARGS__)
+
+int mlx5_fpga_device_init(struct mlx5_core_dev *mdev);
+void mlx5_fpga_device_cleanup(struct mlx5_core_dev *mdev);
+int mlx5_fpga_device_start(struct mlx5_core_dev *mdev);
+void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data);
+
+#else
+
+static inline int mlx5_fpga_device_init(struct mlx5_core_dev *mdev)
+{
+	return 0;
+}
+
+static inline void mlx5_fpga_device_cleanup(struct mlx5_core_dev *mdev)
+{
+}
+
+static inline int mlx5_fpga_device_start(struct mlx5_core_dev *mdev)
+{
+	return 0;
+}
+
+static inline void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event,
+				   void *data)
+{
+}
+
+#endif
+
+#endif /* __MLX5_FPGA_CORE_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index f933922d5cca..ad0202cef203 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -56,6 +56,7 @@
 #ifdef CONFIG_MLX5_CORE_EN
 #include "eswitch.h"
 #endif
+#include "fpga/core.h"
 
 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
 MODULE_DESCRIPTION("Mellanox Connect-IB, ConnectX-4 core driver");
@@ -1113,10 +1114,16 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
 		goto err_disable_msix;
 	}
 
+	err = mlx5_fpga_device_init(dev);
+	if (err) {
+		dev_err(&pdev->dev, "fpga device init failed %d\n", err);
+		goto err_put_uars;
+	}
+
 	err = mlx5_start_eqs(dev);
 	if (err) {
 		dev_err(&pdev->dev, "Failed to start pages and async EQs\n");
-		goto err_put_uars;
+		goto err_fpga_init;
 	}
 
 	err = alloc_comp_eqs(dev);
@@ -1147,6 +1154,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
 		goto err_sriov;
 	}
 
+	err = mlx5_fpga_device_start(dev);
+	if (err) {
+		dev_err(&pdev->dev, "fpga device start failed %d\n", err);
+		goto err_reg_dev;
+	}
+
 	if (mlx5_device_registered(dev)) {
 		mlx5_attach_device(dev);
 	} else {
@@ -1182,6 +1195,9 @@ err_affinity_hints:
 err_stop_eqs:
 	mlx5_stop_eqs(dev);
 
+err_fpga_init:
+	mlx5_fpga_device_cleanup(dev);
+
 err_put_uars:
 	mlx5_put_uars_page(dev, priv->uar);
 
@@ -1246,6 +1262,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
 	mlx5_irq_clear_affinity_hints(dev);
 	free_comp_eqs(dev);
 	mlx5_stop_eqs(dev);
+	mlx5_fpga_device_cleanup(dev);
 	mlx5_put_uars_page(dev, priv->uar);
 	mlx5_disable_msix(dev);
 	if (cleanup)
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index dd9a263ed368..786a43843da9 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -300,6 +300,8 @@ enum mlx5_event {
 
 	MLX5_EVENT_TYPE_PAGE_FAULT	   = 0xc,
 	MLX5_EVENT_TYPE_NIC_VPORT_CHANGE   = 0xd,
+
+	MLX5_EVENT_TYPE_FPGA_ERROR         = 0x20,
 };
 
 enum {
@@ -967,6 +969,7 @@ enum mlx5_cap_type {
 	MLX5_CAP_RESERVED,
 	MLX5_CAP_VECTOR_CALC,
 	MLX5_CAP_QOS,
+	MLX5_CAP_FPGA,
 	/* NUM OF CAP Types */
 	MLX5_CAP_NUM
 };
@@ -1088,6 +1091,9 @@ enum mlx5_mcam_feature_groups {
 #define MLX5_CAP_MCAM_FEATURE(mdev, fld) \
 	MLX5_GET(mcam_reg, (mdev)->caps.mcam, mng_feature_cap_mask.enhanced_features.fld)
 
+#define MLX5_CAP_FPGA(mdev, cap) \
+	MLX5_GET(fpga_cap, (mdev)->caps.hca_cur[MLX5_CAP_FPGA], cap)
+
 enum {
 	MLX5_CMD_STAT_OK			= 0x0,
 	MLX5_CMD_STAT_INT_ERR			= 0x1,
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index a277bb36c21f..55bb712643cb 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -108,6 +108,8 @@ enum {
 	MLX5_REG_QTCT		 = 0x400a,
 	MLX5_REG_DCBX_PARAM      = 0x4020,
 	MLX5_REG_DCBX_APP        = 0x4021,
+	MLX5_REG_FPGA_CAP	 = 0x4022,
+	MLX5_REG_FPGA_CTRL	 = 0x4023,
 	MLX5_REG_PCAP		 = 0x5001,
 	MLX5_REG_PMTU		 = 0x5003,
 	MLX5_REG_PTYS		 = 0x5004,
@@ -761,6 +763,9 @@ struct mlx5_core_dev {
 	atomic_t		num_qps;
 	u32			issi;
 	struct mlx5e_resources  mlx5e_res;
+#ifdef CONFIG_MLX5_FPGA
+	struct mlx5_fpga_device *fpga;
+#endif
 #ifdef CONFIG_RFS_ACCEL
 	struct cpu_rmap         *rmap;
 #endif
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 32de0724b400..6fa1eb6766af 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -32,6 +32,8 @@
 #ifndef MLX5_IFC_H
 #define MLX5_IFC_H
 
+#include "mlx5_ifc_fpga.h"
+
 enum {
 	MLX5_EVENT_TYPE_CODING_COMPLETION_EVENTS                   = 0x0,
 	MLX5_EVENT_TYPE_CODING_PATH_MIGRATED_SUCCEEDED             = 0x1,
@@ -56,7 +58,8 @@ enum {
 	MLX5_EVENT_TYPE_CODING_STALL_VL_EVENT                      = 0x1b,
 	MLX5_EVENT_TYPE_CODING_DROPPED_PACKET_LOGGED_EVENT         = 0x1f,
 	MLX5_EVENT_TYPE_CODING_COMMAND_INTERFACE_COMPLETION        = 0xa,
-	MLX5_EVENT_TYPE_CODING_PAGE_REQUEST                        = 0xb
+	MLX5_EVENT_TYPE_CODING_PAGE_REQUEST                        = 0xb,
+	MLX5_EVENT_TYPE_CODING_FPGA_ERROR                          = 0x20,
 };
 
 enum {
@@ -854,7 +857,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         max_tc[0x4];
 	u8         reserved_at_1d0[0x1];
 	u8         dcbx[0x1];
-	u8         reserved_at_1d2[0x4];
+	u8         reserved_at_1d2[0x3];
+	u8         fpga[0x1];
 	u8         rol_s[0x1];
 	u8         rol_g[0x1];
 	u8         reserved_at_1d8[0x1];
@@ -2186,6 +2190,7 @@ union mlx5_ifc_hca_cap_union_bits {
 	struct mlx5_ifc_e_switch_cap_bits e_switch_cap;
 	struct mlx5_ifc_vector_calc_cap_bits vector_calc_cap;
 	struct mlx5_ifc_qos_cap_bits qos_cap;
+	struct mlx5_ifc_fpga_cap_bits fpga_cap;
 	u8         reserved_at_0[0x8000];
 };
 
@@ -8182,6 +8187,8 @@ union mlx5_ifc_ports_control_registers_document_bits {
 	struct mlx5_ifc_sltp_reg_bits sltp_reg;
 	struct mlx5_ifc_mtpps_reg_bits mtpps_reg;
 	struct mlx5_ifc_mtppse_reg_bits mtppse_reg;
+	struct mlx5_ifc_fpga_ctrl_bits fpga_ctrl_bits;
+	struct mlx5_ifc_fpga_cap_bits fpga_cap_bits;
 	u8         reserved_at_0[0x60e0];
 };
 
diff --git a/include/linux/mlx5/mlx5_ifc_fpga.h b/include/linux/mlx5/mlx5_ifc_fpga.h
new file mode 100644
index 000000000000..0032d10ac6cf
--- /dev/null
+++ b/include/linux/mlx5/mlx5_ifc_fpga.h
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies, Ltd.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef MLX5_IFC_FPGA_H
+#define MLX5_IFC_FPGA_H
+
+struct mlx5_ifc_fpga_shell_caps_bits {
+	u8         max_num_qps[0x10];
+	u8         reserved_at_10[0x8];
+	u8         total_rcv_credits[0x8];
+
+	u8         reserved_at_20[0xe];
+	u8         qp_type[0x2];
+	u8         reserved_at_30[0x5];
+	u8         rae[0x1];
+	u8         rwe[0x1];
+	u8         rre[0x1];
+	u8         reserved_at_38[0x4];
+	u8         dc[0x1];
+	u8         ud[0x1];
+	u8         uc[0x1];
+	u8         rc[0x1];
+
+	u8         reserved_at_40[0x1a];
+	u8         log_ddr_size[0x6];
+
+	u8         max_fpga_qp_msg_size[0x20];
+
+	u8         reserved_at_80[0x180];
+};
+
+struct mlx5_ifc_fpga_cap_bits {
+	u8         fpga_id[0x8];
+	u8         fpga_device[0x18];
+
+	u8         register_file_ver[0x20];
+
+	u8         fpga_ctrl_modify[0x1];
+	u8         reserved_at_41[0x5];
+	u8         access_reg_query_mode[0x2];
+	u8         reserved_at_48[0x6];
+	u8         access_reg_modify_mode[0x2];
+	u8         reserved_at_50[0x10];
+
+	u8         reserved_at_60[0x20];
+
+	u8         image_version[0x20];
+
+	u8         image_date[0x20];
+
+	u8         image_time[0x20];
+
+	u8         shell_version[0x20];
+
+	u8         reserved_at_100[0x80];
+
+	struct mlx5_ifc_fpga_shell_caps_bits shell_caps;
+
+	u8         reserved_at_380[0x8];
+	u8         ieee_vendor_id[0x18];
+
+	u8         sandbox_product_version[0x10];
+	u8         sandbox_product_id[0x10];
+
+	u8         sandbox_basic_caps[0x20];
+
+	u8         reserved_at_3e0[0x10];
+	u8         sandbox_extended_caps_len[0x10];
+
+	u8         sandbox_extended_caps_addr[0x40];
+
+	u8         fpga_ddr_start_addr[0x40];
+
+	u8         fpga_cr_space_start_addr[0x40];
+
+	u8         fpga_ddr_size[0x20];
+
+	u8         fpga_cr_space_size[0x20];
+
+	u8         reserved_at_500[0x300];
+};
+
+struct mlx5_ifc_fpga_ctrl_bits {
+	u8         reserved_at_0[0x8];
+	u8         operation[0x8];
+	u8         reserved_at_10[0x8];
+	u8         status[0x8];
+
+	u8         reserved_at_20[0x8];
+	u8         flash_select_admin[0x8];
+	u8         reserved_at_30[0x8];
+	u8         flash_select_oper[0x8];
+
+	u8         reserved_at_40[0x40];
+};
+
+enum {
+	MLX5_FPGA_ERROR_EVENT_SYNDROME_CORRUPTED_DDR        = 0x1,
+	MLX5_FPGA_ERROR_EVENT_SYNDROME_FLASH_TIMEOUT        = 0x2,
+	MLX5_FPGA_ERROR_EVENT_SYNDROME_INTERNAL_LINK_ERROR  = 0x3,
+	MLX5_FPGA_ERROR_EVENT_SYNDROME_WATCHDOG_FAILURE     = 0x4,
+	MLX5_FPGA_ERROR_EVENT_SYNDROME_I2C_FAILURE          = 0x5,
+	MLX5_FPGA_ERROR_EVENT_SYNDROME_IMAGE_CHANGED        = 0x6,
+	MLX5_FPGA_ERROR_EVENT_SYNDROME_TEMPERATURE_CRITICAL = 0x7,
+};
+
+struct mlx5_ifc_fpga_error_event_bits {
+	u8         reserved_at_0[0x40];
+
+	u8         reserved_at_40[0x18];
+	u8         syndrome[0x8];
+
+	u8         reserved_at_60[0x80];
+};
+
+#endif /* MLX5_IFC_FPGA_H */
-- 
cgit v1.2.3


From b421b22b00b0011f6a2ce3561176c4e79e640c49 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 21 Apr 2017 12:14:13 +0200
Subject: x86/tsc, sched/clock, clocksource: Use clocksource watchdog to
 provide stable sync points

Currently we keep sched_clock_tick() active for stable TSC in order to
keep the per-CPU state semi up-to-date. The (obvious) problem is that
by the time we detect TSC is borked, our per-CPU state is also borked.

So hook into the clocksource watchdog and call a method after we've
found it to still be stable.

There's the obvious race where the TSC goes wonky between finding it
stable and us running the callback, but closing that is too much work
and not really worth it, since we're already detecting TSC wobbles
after the fact, so we cannot, per definition, fully avoid funny clock
values.

And since the watchdog runs less often than the tick, this is also an
optimization.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/tsc.c       | 10 ++++++++++
 include/linux/clocksource.h |  1 +
 include/linux/sched/clock.h |  2 +-
 kernel/sched/clock.c        | 36 +++++++++++++++++++++++++++---------
 kernel/time/clocksource.c   |  3 +++
 5 files changed, 42 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 66015195bd18..c1b16b328abe 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -1033,6 +1033,15 @@ static void tsc_cs_mark_unstable(struct clocksource *cs)
 	pr_info("Marking TSC unstable due to clocksource watchdog\n");
 }
 
+static void tsc_cs_tick_stable(struct clocksource *cs)
+{
+	if (tsc_unstable)
+		return;
+
+	if (using_native_sched_clock())
+		sched_clock_tick_stable();
+}
+
 /*
  * .mask MUST be CLOCKSOURCE_MASK(64). See comment above read_tsc()
  */
@@ -1046,6 +1055,7 @@ static struct clocksource clocksource_tsc = {
 	.archdata               = { .vclock_mode = VCLOCK_TSC },
 	.resume			= tsc_resume,
 	.mark_unstable		= tsc_cs_mark_unstable,
+	.tick_stable		= tsc_cs_tick_stable,
 };
 
 void mark_tsc_unstable(char *reason)
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index f2b10d9ebd04..81490456c242 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -96,6 +96,7 @@ struct clocksource {
 	void (*suspend)(struct clocksource *cs);
 	void (*resume)(struct clocksource *cs);
 	void (*mark_unstable)(struct clocksource *cs);
+	void (*tick_stable)(struct clocksource *cs);
 
 	/* private: */
 #ifdef CONFIG_CLOCKSOURCE_WATCHDOG
diff --git a/include/linux/sched/clock.h b/include/linux/sched/clock.h
index 34fe92ce1ebd..978cbb0af5f3 100644
--- a/include/linux/sched/clock.h
+++ b/include/linux/sched/clock.h
@@ -63,8 +63,8 @@ extern void clear_sched_clock_stable(void);
  */
 extern u64 __sched_clock_offset;
 
-
 extern void sched_clock_tick(void);
+extern void sched_clock_tick_stable(void);
 extern void sched_clock_idle_sleep_event(void);
 extern void sched_clock_idle_wakeup_event(u64 delta_ns);
 
diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
index dc650851935f..f861637f7fdc 100644
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@ -366,20 +366,38 @@ void sched_clock_tick(void)
 {
 	struct sched_clock_data *scd;
 
+	if (sched_clock_stable())
+		return;
+
+	if (unlikely(!sched_clock_running))
+		return;
+
 	WARN_ON_ONCE(!irqs_disabled());
 
-	/*
-	 * Update these values even if sched_clock_stable(), because it can
-	 * become unstable at any point in time at which point we need some
-	 * values to fall back on.
-	 *
-	 * XXX arguably we can skip this if we expose tsc_clocksource_reliable
-	 */
 	scd = this_scd();
 	__scd_stamp(scd);
+	sched_clock_local(scd);
+}
+
+void sched_clock_tick_stable(void)
+{
+	u64 gtod, clock;
 
-	if (!sched_clock_stable() && likely(sched_clock_running))
-		sched_clock_local(scd);
+	if (!sched_clock_stable())
+		return;
+
+	/*
+	 * Called under watchdog_lock.
+	 *
+	 * The watchdog just found this TSC to (still) be stable, so now is a
+	 * good moment to update our __gtod_offset. Because once we find the
+	 * TSC to be unstable, any computation will be computing crap.
+	 */
+	local_irq_disable();
+	gtod = ktime_get_ns();
+	clock = sched_clock();
+	__gtod_offset = (clock + __sched_clock_offset) - gtod;
+	local_irq_enable();
 }
 
 /*
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 93621ae718d3..03918a19cf2d 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -233,6 +233,9 @@ static void clocksource_watchdog(unsigned long data)
 			continue;
 		}
 
+		if (cs == curr_clocksource && cs->tick_stable)
+			cs->tick_stable(cs);
+
 		if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) &&
 		    (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) &&
 		    (watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) {
-- 
cgit v1.2.3


From ac1e843f0900bea92fcb47f6205e1f9ffb0d469c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 21 Apr 2017 12:26:23 +0200
Subject: sched/clock: Remove unused argument to
 sched_clock_idle_wakeup_event()

The argument to sched_clock_idle_wakeup_event() has not been used in a
long time. Remove it.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/tsc.c       | 2 +-
 include/linux/sched/clock.h | 4 ++--
 kernel/sched/clock.c        | 4 ++--
 kernel/time/tick-sched.c    | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index c1b16b328abe..a3b544264360 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -182,7 +182,7 @@ static void __set_cyc2ns_scale(unsigned long khz, int cpu, unsigned long long ts
 	c2n->data[1] = data;
 
 done:
-	sched_clock_idle_wakeup_event(0);
+	sched_clock_idle_wakeup_event();
 	local_irq_restore(flags);
 }
 
diff --git a/include/linux/sched/clock.h b/include/linux/sched/clock.h
index 978cbb0af5f3..9c36f0722966 100644
--- a/include/linux/sched/clock.h
+++ b/include/linux/sched/clock.h
@@ -39,7 +39,7 @@ static inline void sched_clock_idle_sleep_event(void)
 {
 }
 
-static inline void sched_clock_idle_wakeup_event(u64 delta_ns)
+static inline void sched_clock_idle_wakeup_event(void)
 {
 }
 
@@ -66,7 +66,7 @@ extern u64 __sched_clock_offset;
 extern void sched_clock_tick(void);
 extern void sched_clock_tick_stable(void);
 extern void sched_clock_idle_sleep_event(void);
-extern void sched_clock_idle_wakeup_event(u64 delta_ns);
+extern void sched_clock_idle_wakeup_event(void);
 
 /*
  * As outlined in clock.c, provides a fast, high resolution, nanosecond
diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
index f861637f7fdc..750a92c9db7e 100644
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@ -410,9 +410,9 @@ void sched_clock_idle_sleep_event(void)
 EXPORT_SYMBOL_GPL(sched_clock_idle_sleep_event);
 
 /*
- * We just idled delta nanoseconds (called with irqs disabled):
+ * We just idled; resync with ktime. (called with irqs disabled):
  */
-void sched_clock_idle_wakeup_event(u64 delta_ns)
+void sched_clock_idle_wakeup_event(void)
 {
 	if (timekeeping_suspended)
 		return;
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 64c97fc130c4..9c2dc64e31d8 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -554,7 +554,7 @@ static void tick_nohz_stop_idle(struct tick_sched *ts, ktime_t now)
 	update_ts_time_stats(smp_processor_id(), ts, now, NULL);
 	ts->idle_active = 0;
 
-	sched_clock_idle_wakeup_event(0);
+	sched_clock_idle_wakeup_event();
 }
 
 static ktime_t tick_nohz_start_idle(struct tick_sched *ts)
-- 
cgit v1.2.3


From 2e44b7ddf8ab01cf98106c68388f87af15fbde73 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 21 Apr 2017 12:46:57 +0200
Subject: sched/clock: Use late_initcall() instead of sched_init_smp()

Core2 marks its TSC unstable in ACPI Processor Idle, which is probed
after sched_init_smp(). Luckily it appears both acpi_processor and
intel_idle (which has a similar check) are mandatory built-in.

This means we can delay switching to stable until after these drivers
have ran (if they were modules, this would be impossible).

Delay the stable switch to late_initcall() to allow these drivers to
mark TSC unstable and avoid difficult stable->unstable transitions.

Reported-by: Lofstedt, Marta <marta.lofstedt@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rafael J . Wysocki <rafael.j.wysocki@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/clock.h |  5 -----
 kernel/sched/clock.c        | 10 +++++++++-
 kernel/sched/core.c         |  2 --
 3 files changed, 9 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/clock.h b/include/linux/sched/clock.h
index 9c36f0722966..a55600ffdf4b 100644
--- a/include/linux/sched/clock.h
+++ b/include/linux/sched/clock.h
@@ -23,10 +23,6 @@ extern u64 sched_clock_cpu(int cpu);
 extern void sched_clock_init(void);
 
 #ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
-static inline void sched_clock_init_late(void)
-{
-}
-
 static inline void sched_clock_tick(void)
 {
 }
@@ -53,7 +49,6 @@ static inline u64 local_clock(void)
 	return sched_clock();
 }
 #else
-extern void sched_clock_init_late(void);
 extern int sched_clock_stable(void);
 extern void clear_sched_clock_stable(void);
 
diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
index d4c2f89fac92..a2f847c6ada8 100644
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@ -64,6 +64,7 @@
 #include <linux/workqueue.h>
 #include <linux/compiler.h>
 #include <linux/tick.h>
+#include <linux/init.h>
 
 /*
  * Scheduler clock - returns current time in nanosec units.
@@ -202,7 +203,11 @@ void clear_sched_clock_stable(void)
 		__clear_sched_clock_stable();
 }
 
-void sched_clock_init_late(void)
+/*
+ * We run this as late_initcall() such that it runs after all built-in drivers,
+ * notably: acpi_processor and intel_idle, which can mark the TSC as unstable.
+ */
+static int __init sched_clock_init_late(void)
 {
 	sched_clock_running = 2;
 	/*
@@ -216,7 +221,10 @@ void sched_clock_init_late(void)
 
 	if (__sched_clock_stable_early)
 		__set_sched_clock_stable();
+
+	return 0;
 }
+late_initcall(sched_clock_init_late);
 
 /*
  * min, max except they take wrapping into account
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 803c3bc274c4..5794f4acad15 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5984,7 +5984,6 @@ void __init sched_init_smp(void)
 	init_sched_dl_class();
 
 	sched_init_smt();
-	sched_clock_init_late();
 
 	sched_smp_initialized = true;
 }
@@ -6000,7 +5999,6 @@ early_initcall(migration_init);
 void __init sched_init_smp(void)
 {
 	sched_init_granularity();
-	sched_clock_init_late();
 }
 #endif /* CONFIG_SMP */
 
-- 
cgit v1.2.3


From c743f0a5c50f2fcbc628526279cfa24f3dabe182 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 14 Apr 2017 14:20:05 +0200
Subject: sched/fair, cpumask: Export for_each_cpu_wrap()

More users for for_each_cpu_wrap() have appeared. Promote the construct
to generic cpumask interface.

The implementation is slightly modified to reduce arguments.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Lauro Ramos Venancio <lvenanci@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: lwang@redhat.com
Link: http://lkml.kernel.org/r/20170414122005.o35me2h5nowqkxbv@hirez.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/cpumask.h | 17 +++++++++++++++++
 kernel/sched/fair.c     | 45 ++++-----------------------------------------
 lib/cpumask.c           | 32 ++++++++++++++++++++++++++++++++
 3 files changed, 53 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 2404ad238c0b..a21b1fb9a968 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -236,6 +236,23 @@ unsigned int cpumask_local_spread(unsigned int i, int node);
 		(cpu) = cpumask_next_zero((cpu), (mask)),	\
 		(cpu) < nr_cpu_ids;)
 
+extern int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap);
+
+/**
+ * for_each_cpu_wrap - iterate over every cpu in a mask, starting at a specified location
+ * @cpu: the (optionally unsigned) integer iterator
+ * @mask: the cpumask poiter
+ * @start: the start location
+ *
+ * The implementation does not assume any bit in @mask is set (including @start).
+ *
+ * After the loop, cpu is >= nr_cpu_ids.
+ */
+#define for_each_cpu_wrap(cpu, mask, start)					\
+	for ((cpu) = cpumask_next_wrap((start)-1, (mask), (start), false);	\
+	     (cpu) < nr_cpumask_bits;						\
+	     (cpu) = cpumask_next_wrap((cpu), (mask), (start), true))
+
 /**
  * for_each_cpu_and - iterate over every cpu in both masks
  * @cpu: the (optionally unsigned) integer iterator
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 4f1825d60937..f80c825e2b43 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5640,43 +5640,6 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
 	return shallowest_idle_cpu != -1 ? shallowest_idle_cpu : least_loaded_cpu;
 }
 
-/*
- * Implement a for_each_cpu() variant that starts the scan at a given cpu
- * (@start), and wraps around.
- *
- * This is used to scan for idle CPUs; such that not all CPUs looking for an
- * idle CPU find the same CPU. The down-side is that tasks tend to cycle
- * through the LLC domain.
- *
- * Especially tbench is found sensitive to this.
- */
-
-static int cpumask_next_wrap(int n, const struct cpumask *mask, int start, int *wrapped)
-{
-	int next;
-
-again:
-	next = find_next_bit(cpumask_bits(mask), nr_cpumask_bits, n+1);
-
-	if (*wrapped) {
-		if (next >= start)
-			return nr_cpumask_bits;
-	} else {
-		if (next >= nr_cpumask_bits) {
-			*wrapped = 1;
-			n = -1;
-			goto again;
-		}
-	}
-
-	return next;
-}
-
-#define for_each_cpu_wrap(cpu, mask, start, wrap)				\
-	for ((wrap) = 0, (cpu) = (start)-1;					\
-		(cpu) = cpumask_next_wrap((cpu), (mask), (start), &(wrap)),	\
-		(cpu) < nr_cpumask_bits; )
-
 #ifdef CONFIG_SCHED_SMT
 
 static inline void set_idle_cores(int cpu, int val)
@@ -5736,7 +5699,7 @@ unlock:
 static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int target)
 {
 	struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
-	int core, cpu, wrap;
+	int core, cpu;
 
 	if (!static_branch_likely(&sched_smt_present))
 		return -1;
@@ -5746,7 +5709,7 @@ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int
 
 	cpumask_and(cpus, sched_domain_span(sd), &p->cpus_allowed);
 
-	for_each_cpu_wrap(core, cpus, target, wrap) {
+	for_each_cpu_wrap(core, cpus, target) {
 		bool idle = true;
 
 		for_each_cpu(cpu, cpu_smt_mask(core)) {
@@ -5812,7 +5775,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
 	u64 avg_cost, avg_idle = this_rq()->avg_idle;
 	u64 time, cost;
 	s64 delta;
-	int cpu, wrap;
+	int cpu;
 
 	this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc));
 	if (!this_sd)
@@ -5829,7 +5792,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
 
 	time = local_clock();
 
-	for_each_cpu_wrap(cpu, sched_domain_span(sd), target, wrap) {
+	for_each_cpu_wrap(cpu, sched_domain_span(sd), target) {
 		if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
 			continue;
 		if (idle_cpu(cpu))
diff --git a/lib/cpumask.c b/lib/cpumask.c
index 81dedaab36cc..4731a0895760 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -43,6 +43,38 @@ int cpumask_any_but(const struct cpumask *mask, unsigned int cpu)
 }
 EXPORT_SYMBOL(cpumask_any_but);
 
+/**
+ * cpumask_next_wrap - helper to implement for_each_cpu_wrap
+ * @n: the cpu prior to the place to search
+ * @mask: the cpumask pointer
+ * @start: the start point of the iteration
+ * @wrap: assume @n crossing @start terminates the iteration
+ *
+ * Returns >= nr_cpu_ids on completion
+ *
+ * Note: the @wrap argument is required for the start condition when
+ * we cannot assume @start is set in @mask.
+ */
+int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap)
+{
+	int next;
+
+again:
+	next = cpumask_next(n, mask);
+
+	if (wrap && n < start && next >= start) {
+		return nr_cpumask_bits;
+
+	} else if (next >= nr_cpumask_bits) {
+		wrap = true;
+		n = -1;
+		goto again;
+	}
+
+	return next;
+}
+EXPORT_SYMBOL(cpumask_next_wrap);
+
 /* These are not inline because of header tangles. */
 #ifdef CONFIG_CPUMASK_OFFSTACK
 /**
-- 
cgit v1.2.3


From 324318f0248c31be8a08984146e7e4dd7cdd091d Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Tue, 9 May 2017 16:17:37 -0400
Subject: netfilter: xtables: zero padding in data_to_user

When looking up an iptables rule, the iptables binary compares the
aligned match and target data (XT_ALIGN). In some cases this can
exceed the actual data size to include padding bytes.

Before commit f77bc5b23fb1 ("iptables: use match, target and data
copy_to_user helpers") the malloc()ed bytes were overwritten by the
kernel with kzalloced contents, zeroing the padding and making the
comparison succeed. After this patch, the kernel copies and clears
only data, leaving the padding bytes undefined.

Extend the clear operation from data size to aligned data size to
include the padding bytes, if any.

Padding bytes can be observed in both match and target, and the bug
triggered, by issuing a rule with match icmp and target ACCEPT:

  iptables -t mangle -A INPUT -i lo -p icmp --icmp-type 1 -j ACCEPT
  iptables -t mangle -D INPUT -i lo -p icmp --icmp-type 1 -j ACCEPT

Fixes: f77bc5b23fb1 ("iptables: use match, target and data copy_to_user helpers")
Reported-by: Paul Moore <pmoore@redhat.com>
Reported-by: Richard Guy Briggs <rgb@redhat.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/x_tables.h | 2 +-
 net/bridge/netfilter/ebtables.c    | 9 ++++++---
 net/netfilter/x_tables.c           | 9 ++++++---
 3 files changed, 13 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index be378cf47fcc..b3044c2c62cb 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -294,7 +294,7 @@ int xt_match_to_user(const struct xt_entry_match *m,
 int xt_target_to_user(const struct xt_entry_target *t,
 		      struct xt_entry_target __user *u);
 int xt_data_to_user(void __user *dst, const void *src,
-		    int usersize, int size);
+		    int usersize, int size, int aligned_size);
 
 void *xt_copy_counters_from_user(const void __user *user, unsigned int len,
 				 struct xt_counters_info *info, bool compat);
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 9ec0c9f908fa..9c6e619f452b 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1373,7 +1373,8 @@ static inline int ebt_obj_to_user(char __user *um, const char *_name,
 	strlcpy(name, _name, sizeof(name));
 	if (copy_to_user(um, name, EBT_FUNCTION_MAXNAMELEN) ||
 	    put_user(datasize, (int __user *)(um + EBT_FUNCTION_MAXNAMELEN)) ||
-	    xt_data_to_user(um + entrysize, data, usersize, datasize))
+	    xt_data_to_user(um + entrysize, data, usersize, datasize,
+			    XT_ALIGN(datasize)))
 		return -EFAULT;
 
 	return 0;
@@ -1658,7 +1659,8 @@ static int compat_match_to_user(struct ebt_entry_match *m, void __user **dstptr,
 		if (match->compat_to_user(cm->data, m->data))
 			return -EFAULT;
 	} else {
-		if (xt_data_to_user(cm->data, m->data, match->usersize, msize))
+		if (xt_data_to_user(cm->data, m->data, match->usersize, msize,
+				    COMPAT_XT_ALIGN(msize)))
 			return -EFAULT;
 	}
 
@@ -1687,7 +1689,8 @@ static int compat_target_to_user(struct ebt_entry_target *t,
 		if (target->compat_to_user(cm->data, t->data))
 			return -EFAULT;
 	} else {
-		if (xt_data_to_user(cm->data, t->data, target->usersize, tsize))
+		if (xt_data_to_user(cm->data, t->data, target->usersize, tsize,
+				    COMPAT_XT_ALIGN(tsize)))
 			return -EFAULT;
 	}
 
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 8876b7da6884..d17769599c10 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -283,12 +283,13 @@ static int xt_obj_to_user(u16 __user *psize, u16 size,
 		       &U->u.user.revision, K->u.kernel.TYPE->revision)
 
 int xt_data_to_user(void __user *dst, const void *src,
-		    int usersize, int size)
+		    int usersize, int size, int aligned_size)
 {
 	usersize = usersize ? : size;
 	if (copy_to_user(dst, src, usersize))
 		return -EFAULT;
-	if (usersize != size && clear_user(dst + usersize, size - usersize))
+	if (usersize != aligned_size &&
+	    clear_user(dst + usersize, aligned_size - usersize))
 		return -EFAULT;
 
 	return 0;
@@ -298,7 +299,9 @@ EXPORT_SYMBOL_GPL(xt_data_to_user);
 #define XT_DATA_TO_USER(U, K, TYPE, C_SIZE)				\
 	xt_data_to_user(U->data, K->data,				\
 			K->u.kernel.TYPE->usersize,			\
-			C_SIZE ? : K->u.kernel.TYPE->TYPE##size)
+			C_SIZE ? : K->u.kernel.TYPE->TYPE##size,	\
+			C_SIZE ? COMPAT_XT_ALIGN(C_SIZE) :		\
+				 XT_ALIGN(K->u.kernel.TYPE->TYPE##size))
 
 int xt_match_to_user(const struct xt_entry_match *m,
 		     struct xt_entry_match __user *u)
-- 
cgit v1.2.3


From 785818fa8385fe55dab253e42a4c6728fca61333 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Sat, 29 Apr 2017 11:06:43 +0200
Subject: mtd: nand: add core support for on-die ECC

A number of NAND flashes have a capability called "on-die ECC" where the
NAND chip itself is capable of detecting and correcting errors.

Linux already has support for using the ECC implementation of the NAND
controller, or a software based ECC implementation, but not for using
the ECC implementation of the NAND controller. However, such an
implementation is sometimes useful in situations where the NAND
controller provides ECC algorithms that are not strong enough for the
NAND chip used on the system. A typical case is a NAND chip that
requires a 4-bit ECC, while the NAND controller only provides a 1-bit
ECC algorithm.

This commit introduces the support for the NAND_ECC_ON_DIE ECC mode:

 - Parsing of the "on-die" value for the "nand-ecc-mode" Device Tree
   property

 - Handling NAND_ECC_ON_DIE case in nand_scan_tail(). The idea is that
   the vendor specific code for the NAND chip must implement
   ->read_page() and ->write_page(). It may optionally provide its own
   ->read_page_raw() and ->write_page_raw() as well. For OOB operation,
   we assume the standard operations are good enough, but they can be
   overridden by the vendor specific code if needed.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Reviewed-by: Richard Weinberger <richard@nod.at>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/nand_base.c | 13 +++++++++++++
 include/linux/mtd/nand.h     |  1 +
 2 files changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index d474378ed810..f49aecd3f1de 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -4177,6 +4177,7 @@ static const char * const nand_ecc_modes[] = {
 	[NAND_ECC_HW]		= "hw",
 	[NAND_ECC_HW_SYNDROME]	= "hw_syndrome",
 	[NAND_ECC_HW_OOB_FIRST]	= "hw_oob_first",
+	[NAND_ECC_ON_DIE]	= "on-die",
 };
 
 static int of_get_nand_ecc_mode(struct device_node *np)
@@ -4717,6 +4718,18 @@ int nand_scan_tail(struct mtd_info *mtd)
 		}
 		break;
 
+	case NAND_ECC_ON_DIE:
+		if (!ecc->read_page || !ecc->write_page) {
+			WARN(1, "No ECC functions supplied; on-die ECC not possible\n");
+			ret = -EINVAL;
+			goto err_free;
+		}
+		if (!ecc->read_oob)
+			ecc->read_oob = nand_read_oob_std;
+		if (!ecc->write_oob)
+			ecc->write_oob = nand_write_oob_std;
+		break;
+
 	case NAND_ECC_NONE:
 		pr_warn("NAND_ECC_NONE selected by board driver. This is not recommended!\n");
 		ecc->read_page = nand_read_page_raw;
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 8f67b1581683..603522097ec9 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -116,6 +116,7 @@ typedef enum {
 	NAND_ECC_HW,
 	NAND_ECC_HW_SYNDROME,
 	NAND_ECC_HW_OOB_FIRST,
+	NAND_ECC_ON_DIE,
 } nand_ecc_modes_t;
 
 enum nand_ecc_algo {
-- 
cgit v1.2.3


From cc0f51ec111266f5d255e753bf3254ad411d5c12 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Sat, 29 Apr 2017 11:06:44 +0200
Subject: mtd: nand: export nand_{read,write}_page_raw()

The nand_read_page_raw() and nand_write_page_raw() functions might be
re-used by vendor-specific implementations of the read_page/write_page
functions. Instead of having vendor-specific code duplicate this code,
it is much better to export those functions and allow them to be
re-used.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Reviewed-by: Richard Weinberger <richard@nod.at>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/nand_base.c | 10 ++++++----
 include/linux/mtd/nand.h     |  8 ++++++++
 2 files changed, 14 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index f49aecd3f1de..0b3b1a88091a 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -1524,14 +1524,15 @@ EXPORT_SYMBOL(nand_check_erased_ecc_chunk);
  *
  * Not for syndrome calculating ECC controllers, which use a special oob layout.
  */
-static int nand_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
-			      uint8_t *buf, int oob_required, int page)
+int nand_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
+		       uint8_t *buf, int oob_required, int page)
 {
 	chip->read_buf(mtd, buf, mtd->writesize);
 	if (oob_required)
 		chip->read_buf(mtd, chip->oob_poi, mtd->oobsize);
 	return 0;
 }
+EXPORT_SYMBOL(nand_read_page_raw);
 
 /**
  * nand_read_page_raw_syndrome - [INTERN] read raw page data without ecc
@@ -2469,8 +2470,8 @@ static int nand_read_oob(struct mtd_info *mtd, loff_t from,
  *
  * Not for syndrome calculating ECC controllers, which use a special oob layout.
  */
-static int nand_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
-			       const uint8_t *buf, int oob_required, int page)
+int nand_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
+			const uint8_t *buf, int oob_required, int page)
 {
 	chip->write_buf(mtd, buf, mtd->writesize);
 	if (oob_required)
@@ -2478,6 +2479,7 @@ static int nand_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
 
 	return 0;
 }
+EXPORT_SYMBOL(nand_write_page_raw);
 
 /**
  * nand_write_page_raw_syndrome - [INTERN] raw page write function
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 603522097ec9..7a01d2eb7443 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -1259,6 +1259,14 @@ int nand_read_oob_std(struct mtd_info *mtd, struct nand_chip *chip, int page);
 int nand_read_oob_syndrome(struct mtd_info *mtd, struct nand_chip *chip,
 			   int page);
 
+/* Default read_page_raw implementation */
+int nand_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
+		       uint8_t *buf, int oob_required, int page);
+
+/* Default write_page_raw implementation */
+int nand_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
+			const uint8_t *buf, int oob_required, int page);
+
 /* Reset and initialize a NAND device */
 int nand_reset(struct nand_chip *chip, int chipnr);
 
-- 
cgit v1.2.3


From 14bebd01c5f5306c804bcb78d008df3a149dd0b3 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 9 May 2017 19:18:37 +0300
Subject: dmaengine: dw: Remove AVR32 bits from the driver

AVR32 is gone. Now it's time to clean up the driver by removing
leftovers that was used by AVR32 related code.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Vinod Koul <vinod.koul@intel.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 drivers/dma/dw/Kconfig |   7 +-
 drivers/dma/dw/core.c  | 332 +------------------------------------------------
 drivers/dma/dw/regs.h  |  50 ++------
 include/linux/dma/dw.h |  21 ----
 4 files changed, 14 insertions(+), 396 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/dma/dw/Kconfig b/drivers/dma/dw/Kconfig
index 5a37b9fcf40d..04b9728c1d26 100644
--- a/drivers/dma/dw/Kconfig
+++ b/drivers/dma/dw/Kconfig
@@ -6,17 +6,12 @@ config DW_DMAC_CORE
 	tristate
 	select DMA_ENGINE
 
-config DW_DMAC_BIG_ENDIAN_IO
-	bool
-
 config DW_DMAC
 	tristate "Synopsys DesignWare AHB DMA platform driver"
 	select DW_DMAC_CORE
-	select DW_DMAC_BIG_ENDIAN_IO if AVR32
-	default y if CPU_AT32AP7000
 	help
 	  Support the Synopsys DesignWare AHB DMA controller. This
-	  can be integrated in chips such as the Atmel AT32ap7000.
+	  can be integrated in chips such as the Intel Cherrytrail.
 
 config DW_DMAC_PCI
 	tristate "Synopsys DesignWare AHB DMA PCI driver"
diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c
index e500950dad82..f43e6dafe446 100644
--- a/drivers/dma/dw/core.c
+++ b/drivers/dma/dw/core.c
@@ -561,92 +561,14 @@ static void dwc_handle_error(struct dw_dma *dw, struct dw_dma_chan *dwc)
 	dwc_descriptor_complete(dwc, bad_desc, true);
 }
 
-/* --------------------- Cyclic DMA API extensions -------------------- */
-
-dma_addr_t dw_dma_get_src_addr(struct dma_chan *chan)
-{
-	struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
-	return channel_readl(dwc, SAR);
-}
-EXPORT_SYMBOL(dw_dma_get_src_addr);
-
-dma_addr_t dw_dma_get_dst_addr(struct dma_chan *chan)
-{
-	struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
-	return channel_readl(dwc, DAR);
-}
-EXPORT_SYMBOL(dw_dma_get_dst_addr);
-
-/* Called with dwc->lock held and all DMAC interrupts disabled */
-static void dwc_handle_cyclic(struct dw_dma *dw, struct dw_dma_chan *dwc,
-		u32 status_block, u32 status_err, u32 status_xfer)
-{
-	unsigned long flags;
-
-	if (status_block & dwc->mask) {
-		void (*callback)(void *param);
-		void *callback_param;
-
-		dev_vdbg(chan2dev(&dwc->chan), "new cyclic period llp 0x%08x\n",
-				channel_readl(dwc, LLP));
-		dma_writel(dw, CLEAR.BLOCK, dwc->mask);
-
-		callback = dwc->cdesc->period_callback;
-		callback_param = dwc->cdesc->period_callback_param;
-
-		if (callback)
-			callback(callback_param);
-	}
-
-	/*
-	 * Error and transfer complete are highly unlikely, and will most
-	 * likely be due to a configuration error by the user.
-	 */
-	if (unlikely(status_err & dwc->mask) ||
-			unlikely(status_xfer & dwc->mask)) {
-		unsigned int i;
-
-		dev_err(chan2dev(&dwc->chan),
-			"cyclic DMA unexpected %s interrupt, stopping DMA transfer\n",
-			status_xfer ? "xfer" : "error");
-
-		spin_lock_irqsave(&dwc->lock, flags);
-
-		dwc_dump_chan_regs(dwc);
-
-		dwc_chan_disable(dw, dwc);
-
-		/* Make sure DMA does not restart by loading a new list */
-		channel_writel(dwc, LLP, 0);
-		channel_writel(dwc, CTL_LO, 0);
-		channel_writel(dwc, CTL_HI, 0);
-
-		dma_writel(dw, CLEAR.BLOCK, dwc->mask);
-		dma_writel(dw, CLEAR.ERROR, dwc->mask);
-		dma_writel(dw, CLEAR.XFER, dwc->mask);
-
-		for (i = 0; i < dwc->cdesc->periods; i++)
-			dwc_dump_lli(dwc, dwc->cdesc->desc[i]);
-
-		spin_unlock_irqrestore(&dwc->lock, flags);
-	}
-
-	/* Re-enable interrupts */
-	channel_set_bit(dw, MASK.BLOCK, dwc->mask);
-}
-
-/* ------------------------------------------------------------------------- */
-
 static void dw_dma_tasklet(unsigned long data)
 {
 	struct dw_dma *dw = (struct dw_dma *)data;
 	struct dw_dma_chan *dwc;
-	u32 status_block;
 	u32 status_xfer;
 	u32 status_err;
 	unsigned int i;
 
-	status_block = dma_readl(dw, RAW.BLOCK);
 	status_xfer = dma_readl(dw, RAW.XFER);
 	status_err = dma_readl(dw, RAW.ERROR);
 
@@ -655,8 +577,7 @@ static void dw_dma_tasklet(unsigned long data)
 	for (i = 0; i < dw->dma.chancnt; i++) {
 		dwc = &dw->chan[i];
 		if (test_bit(DW_DMA_IS_CYCLIC, &dwc->flags))
-			dwc_handle_cyclic(dw, dwc, status_block, status_err,
-					status_xfer);
+			dev_vdbg(dw->dma.dev, "Cyclic xfer is not implemented\n");
 		else if (status_err & (1 << i))
 			dwc_handle_error(dw, dwc);
 		else if (status_xfer & (1 << i))
@@ -1264,255 +1185,6 @@ static void dwc_free_chan_resources(struct dma_chan *chan)
 	dev_vdbg(chan2dev(chan), "%s: done\n", __func__);
 }
 
-/* --------------------- Cyclic DMA API extensions -------------------- */
-
-/**
- * dw_dma_cyclic_start - start the cyclic DMA transfer
- * @chan: the DMA channel to start
- *
- * Must be called with soft interrupts disabled. Returns zero on success or
- * -errno on failure.
- */
-int dw_dma_cyclic_start(struct dma_chan *chan)
-{
-	struct dw_dma_chan	*dwc = to_dw_dma_chan(chan);
-	struct dw_dma		*dw = to_dw_dma(chan->device);
-	unsigned long		flags;
-
-	if (!test_bit(DW_DMA_IS_CYCLIC, &dwc->flags)) {
-		dev_err(chan2dev(&dwc->chan), "missing prep for cyclic DMA\n");
-		return -ENODEV;
-	}
-
-	spin_lock_irqsave(&dwc->lock, flags);
-
-	/* Enable interrupts to perform cyclic transfer */
-	channel_set_bit(dw, MASK.BLOCK, dwc->mask);
-
-	dwc_dostart(dwc, dwc->cdesc->desc[0]);
-
-	spin_unlock_irqrestore(&dwc->lock, flags);
-
-	return 0;
-}
-EXPORT_SYMBOL(dw_dma_cyclic_start);
-
-/**
- * dw_dma_cyclic_stop - stop the cyclic DMA transfer
- * @chan: the DMA channel to stop
- *
- * Must be called with soft interrupts disabled.
- */
-void dw_dma_cyclic_stop(struct dma_chan *chan)
-{
-	struct dw_dma_chan	*dwc = to_dw_dma_chan(chan);
-	struct dw_dma		*dw = to_dw_dma(dwc->chan.device);
-	unsigned long		flags;
-
-	spin_lock_irqsave(&dwc->lock, flags);
-
-	dwc_chan_disable(dw, dwc);
-
-	spin_unlock_irqrestore(&dwc->lock, flags);
-}
-EXPORT_SYMBOL(dw_dma_cyclic_stop);
-
-/**
- * dw_dma_cyclic_prep - prepare the cyclic DMA transfer
- * @chan: the DMA channel to prepare
- * @buf_addr: physical DMA address where the buffer starts
- * @buf_len: total number of bytes for the entire buffer
- * @period_len: number of bytes for each period
- * @direction: transfer direction, to or from device
- *
- * Must be called before trying to start the transfer. Returns a valid struct
- * dw_cyclic_desc if successful or an ERR_PTR(-errno) if not successful.
- */
-struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan,
-		dma_addr_t buf_addr, size_t buf_len, size_t period_len,
-		enum dma_transfer_direction direction)
-{
-	struct dw_dma_chan		*dwc = to_dw_dma_chan(chan);
-	struct dma_slave_config		*sconfig = &dwc->dma_sconfig;
-	struct dw_cyclic_desc		*cdesc;
-	struct dw_cyclic_desc		*retval = NULL;
-	struct dw_desc			*desc;
-	struct dw_desc			*last = NULL;
-	u8				lms = DWC_LLP_LMS(dwc->dws.m_master);
-	unsigned long			was_cyclic;
-	unsigned int			reg_width;
-	unsigned int			periods;
-	unsigned int			i;
-	unsigned long			flags;
-
-	spin_lock_irqsave(&dwc->lock, flags);
-	if (dwc->nollp) {
-		spin_unlock_irqrestore(&dwc->lock, flags);
-		dev_dbg(chan2dev(&dwc->chan),
-				"channel doesn't support LLP transfers\n");
-		return ERR_PTR(-EINVAL);
-	}
-
-	if (!list_empty(&dwc->queue) || !list_empty(&dwc->active_list)) {
-		spin_unlock_irqrestore(&dwc->lock, flags);
-		dev_dbg(chan2dev(&dwc->chan),
-				"queue and/or active list are not empty\n");
-		return ERR_PTR(-EBUSY);
-	}
-
-	was_cyclic = test_and_set_bit(DW_DMA_IS_CYCLIC, &dwc->flags);
-	spin_unlock_irqrestore(&dwc->lock, flags);
-	if (was_cyclic) {
-		dev_dbg(chan2dev(&dwc->chan),
-				"channel already prepared for cyclic DMA\n");
-		return ERR_PTR(-EBUSY);
-	}
-
-	retval = ERR_PTR(-EINVAL);
-
-	if (unlikely(!is_slave_direction(direction)))
-		goto out_err;
-
-	dwc->direction = direction;
-
-	if (direction == DMA_MEM_TO_DEV)
-		reg_width = __ffs(sconfig->dst_addr_width);
-	else
-		reg_width = __ffs(sconfig->src_addr_width);
-
-	periods = buf_len / period_len;
-
-	/* Check for too big/unaligned periods and unaligned DMA buffer. */
-	if (period_len > (dwc->block_size << reg_width))
-		goto out_err;
-	if (unlikely(period_len & ((1 << reg_width) - 1)))
-		goto out_err;
-	if (unlikely(buf_addr & ((1 << reg_width) - 1)))
-		goto out_err;
-
-	retval = ERR_PTR(-ENOMEM);
-
-	cdesc = kzalloc(sizeof(struct dw_cyclic_desc), GFP_KERNEL);
-	if (!cdesc)
-		goto out_err;
-
-	cdesc->desc = kzalloc(sizeof(struct dw_desc *) * periods, GFP_KERNEL);
-	if (!cdesc->desc)
-		goto out_err_alloc;
-
-	for (i = 0; i < periods; i++) {
-		desc = dwc_desc_get(dwc);
-		if (!desc)
-			goto out_err_desc_get;
-
-		switch (direction) {
-		case DMA_MEM_TO_DEV:
-			lli_write(desc, dar, sconfig->dst_addr);
-			lli_write(desc, sar, buf_addr + period_len * i);
-			lli_write(desc, ctllo, (DWC_DEFAULT_CTLLO(chan)
-				| DWC_CTLL_DST_WIDTH(reg_width)
-				| DWC_CTLL_SRC_WIDTH(reg_width)
-				| DWC_CTLL_DST_FIX
-				| DWC_CTLL_SRC_INC
-				| DWC_CTLL_INT_EN));
-
-			lli_set(desc, ctllo, sconfig->device_fc ?
-					DWC_CTLL_FC(DW_DMA_FC_P_M2P) :
-					DWC_CTLL_FC(DW_DMA_FC_D_M2P));
-
-			break;
-		case DMA_DEV_TO_MEM:
-			lli_write(desc, dar, buf_addr + period_len * i);
-			lli_write(desc, sar, sconfig->src_addr);
-			lli_write(desc, ctllo, (DWC_DEFAULT_CTLLO(chan)
-				| DWC_CTLL_SRC_WIDTH(reg_width)
-				| DWC_CTLL_DST_WIDTH(reg_width)
-				| DWC_CTLL_DST_INC
-				| DWC_CTLL_SRC_FIX
-				| DWC_CTLL_INT_EN));
-
-			lli_set(desc, ctllo, sconfig->device_fc ?
-					DWC_CTLL_FC(DW_DMA_FC_P_P2M) :
-					DWC_CTLL_FC(DW_DMA_FC_D_P2M));
-
-			break;
-		default:
-			break;
-		}
-
-		lli_write(desc, ctlhi, period_len >> reg_width);
-		cdesc->desc[i] = desc;
-
-		if (last)
-			lli_write(last, llp, desc->txd.phys | lms);
-
-		last = desc;
-	}
-
-	/* Let's make a cyclic list */
-	lli_write(last, llp, cdesc->desc[0]->txd.phys | lms);
-
-	dev_dbg(chan2dev(&dwc->chan),
-			"cyclic prepared buf %pad len %zu period %zu periods %d\n",
-			&buf_addr, buf_len, period_len, periods);
-
-	cdesc->periods = periods;
-	dwc->cdesc = cdesc;
-
-	return cdesc;
-
-out_err_desc_get:
-	while (i--)
-		dwc_desc_put(dwc, cdesc->desc[i]);
-out_err_alloc:
-	kfree(cdesc);
-out_err:
-	clear_bit(DW_DMA_IS_CYCLIC, &dwc->flags);
-	return (struct dw_cyclic_desc *)retval;
-}
-EXPORT_SYMBOL(dw_dma_cyclic_prep);
-
-/**
- * dw_dma_cyclic_free - free a prepared cyclic DMA transfer
- * @chan: the DMA channel to free
- */
-void dw_dma_cyclic_free(struct dma_chan *chan)
-{
-	struct dw_dma_chan	*dwc = to_dw_dma_chan(chan);
-	struct dw_dma		*dw = to_dw_dma(dwc->chan.device);
-	struct dw_cyclic_desc	*cdesc = dwc->cdesc;
-	unsigned int		i;
-	unsigned long		flags;
-
-	dev_dbg(chan2dev(&dwc->chan), "%s\n", __func__);
-
-	if (!cdesc)
-		return;
-
-	spin_lock_irqsave(&dwc->lock, flags);
-
-	dwc_chan_disable(dw, dwc);
-
-	dma_writel(dw, CLEAR.BLOCK, dwc->mask);
-	dma_writel(dw, CLEAR.ERROR, dwc->mask);
-	dma_writel(dw, CLEAR.XFER, dwc->mask);
-
-	spin_unlock_irqrestore(&dwc->lock, flags);
-
-	for (i = 0; i < cdesc->periods; i++)
-		dwc_desc_put(dwc, cdesc->desc[i]);
-
-	kfree(cdesc->desc);
-	kfree(cdesc);
-
-	dwc->cdesc = NULL;
-
-	clear_bit(DW_DMA_IS_CYCLIC, &dwc->flags);
-}
-EXPORT_SYMBOL(dw_dma_cyclic_free);
-
-/*----------------------------------------------------------------------*/
-
 int dw_dma_probe(struct dw_dma_chip *chip)
 {
 	struct dw_dma_platform_data *pdata;
@@ -1642,7 +1314,7 @@ int dw_dma_probe(struct dw_dma_chip *chip)
 		if (autocfg) {
 			unsigned int r = DW_DMA_MAX_NR_CHANNELS - i - 1;
 			void __iomem *addr = &__dw_regs(dw)->DWC_PARAMS[r];
-			unsigned int dwc_params = dma_readl_native(addr);
+			unsigned int dwc_params = readl(addr);
 
 			dev_dbg(chip->dev, "DWC_PARAMS[%d]: 0x%08x\n", i,
 					   dwc_params);
diff --git a/drivers/dma/dw/regs.h b/drivers/dma/dw/regs.h
index 32a328721c88..09e7dfdbb790 100644
--- a/drivers/dma/dw/regs.h
+++ b/drivers/dma/dw/regs.h
@@ -116,20 +116,6 @@ struct dw_dma_regs {
 	DW_REG(GLOBAL_CFG);
 };
 
-/*
- * Big endian I/O access when reading and writing to the DMA controller
- * registers.  This is needed on some platforms, like the Atmel AVR32
- * architecture.
- */
-
-#ifdef CONFIG_DW_DMAC_BIG_ENDIAN_IO
-#define dma_readl_native ioread32be
-#define dma_writel_native iowrite32be
-#else
-#define dma_readl_native readl
-#define dma_writel_native writel
-#endif
-
 /* Bitfields in DW_PARAMS */
 #define DW_PARAMS_NR_CHAN	8		/* number of channels */
 #define DW_PARAMS_NR_MASTER	11		/* number of AHB masters */
@@ -280,7 +266,6 @@ struct dw_dma_chan {
 	unsigned long		flags;
 	struct list_head	active_list;
 	struct list_head	queue;
-	struct dw_cyclic_desc	*cdesc;
 
 	unsigned int		descs_allocated;
 
@@ -302,9 +287,9 @@ __dwc_regs(struct dw_dma_chan *dwc)
 }
 
 #define channel_readl(dwc, name) \
-	dma_readl_native(&(__dwc_regs(dwc)->name))
+	readl(&(__dwc_regs(dwc)->name))
 #define channel_writel(dwc, name, val) \
-	dma_writel_native((val), &(__dwc_regs(dwc)->name))
+	writel((val), &(__dwc_regs(dwc)->name))
 
 static inline struct dw_dma_chan *to_dw_dma_chan(struct dma_chan *chan)
 {
@@ -333,9 +318,9 @@ static inline struct dw_dma_regs __iomem *__dw_regs(struct dw_dma *dw)
 }
 
 #define dma_readl(dw, name) \
-	dma_readl_native(&(__dw_regs(dw)->name))
+	readl(&(__dw_regs(dw)->name))
 #define dma_writel(dw, name, val) \
-	dma_writel_native((val), &(__dw_regs(dw)->name))
+	writel((val), &(__dw_regs(dw)->name))
 
 #define idma32_readq(dw, name)				\
 	hi_lo_readq(&(__dw_regs(dw)->name))
@@ -352,43 +337,30 @@ static inline struct dw_dma *to_dw_dma(struct dma_device *ddev)
 	return container_of(ddev, struct dw_dma, dma);
 }
 
-#ifdef CONFIG_DW_DMAC_BIG_ENDIAN_IO
-typedef __be32 __dw32;
-#else
-typedef __le32 __dw32;
-#endif
-
 /* LLI == Linked List Item; a.k.a. DMA block descriptor */
 struct dw_lli {
 	/* values that are not changed by hardware */
-	__dw32		sar;
-	__dw32		dar;
-	__dw32		llp;		/* chain to next lli */
-	__dw32		ctllo;
+	__le32		sar;
+	__le32		dar;
+	__le32		llp;		/* chain to next lli */
+	__le32		ctllo;
 	/* values that may get written back: */
-	__dw32		ctlhi;
+	__le32		ctlhi;
 	/* sstat and dstat can snapshot peripheral register state.
 	 * silicon config may discard either or both...
 	 */
-	__dw32		sstat;
-	__dw32		dstat;
+	__le32		sstat;
+	__le32		dstat;
 };
 
 struct dw_desc {
 	/* FIRST values the hardware uses */
 	struct dw_lli			lli;
 
-#ifdef CONFIG_DW_DMAC_BIG_ENDIAN_IO
-#define lli_set(d, reg, v)		((d)->lli.reg |= cpu_to_be32(v))
-#define lli_clear(d, reg, v)		((d)->lli.reg &= ~cpu_to_be32(v))
-#define lli_read(d, reg)		be32_to_cpu((d)->lli.reg)
-#define lli_write(d, reg, v)		((d)->lli.reg = cpu_to_be32(v))
-#else
 #define lli_set(d, reg, v)		((d)->lli.reg |= cpu_to_le32(v))
 #define lli_clear(d, reg, v)		((d)->lli.reg &= ~cpu_to_le32(v))
 #define lli_read(d, reg)		le32_to_cpu((d)->lli.reg)
 #define lli_write(d, reg, v)		((d)->lli.reg = cpu_to_le32(v))
-#endif
 
 	/* THEN values for driver housekeeping */
 	struct list_head		desc_node;
diff --git a/include/linux/dma/dw.h b/include/linux/dma/dw.h
index b63b25814d77..e166cac8e870 100644
--- a/include/linux/dma/dw.h
+++ b/include/linux/dma/dw.h
@@ -50,25 +50,4 @@ static inline int dw_dma_probe(struct dw_dma_chip *chip) { return -ENODEV; }
 static inline int dw_dma_remove(struct dw_dma_chip *chip) { return 0; }
 #endif /* CONFIG_DW_DMAC_CORE */
 
-/* DMA API extensions */
-struct dw_desc;
-
-struct dw_cyclic_desc {
-	struct dw_desc	**desc;
-	unsigned long	periods;
-	void		(*period_callback)(void *param);
-	void		*period_callback_param;
-};
-
-struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan,
-		dma_addr_t buf_addr, size_t buf_len, size_t period_len,
-		enum dma_transfer_direction direction);
-void dw_dma_cyclic_free(struct dma_chan *chan);
-int dw_dma_cyclic_start(struct dma_chan *chan);
-void dw_dma_cyclic_stop(struct dma_chan *chan);
-
-dma_addr_t dw_dma_get_src_addr(struct dma_chan *chan);
-
-dma_addr_t dw_dma_get_dst_addr(struct dma_chan *chan);
-
 #endif /* _DMA_DW_H */
-- 
cgit v1.2.3


From c512f36b581862df20acef050c3e1a875166bd6f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 09:31:19 +0200
Subject: sunrpc: properly type argument to kxdreproc_t

Pass struct rpc_request as the first argument instead of an untyped blob,
and mark the data object as const.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
---
 include/linux/sunrpc/xdr.h | 5 ++++-
 net/sunrpc/clnt.c          | 3 ++-
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 054c8cde18f3..290f189de200 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -17,6 +17,8 @@
 #include <asm/unaligned.h>
 #include <linux/scatterlist.h>
 
+struct rpc_rqst;
+
 /*
  * Buffer adjustment
  */
@@ -222,7 +224,8 @@ struct xdr_stream {
 /*
  * These are the xdr_stream style generic XDR encode and decode functions.
  */
-typedef void	(*kxdreproc_t)(void *rqstp, struct xdr_stream *xdr, void *obj);
+typedef void	(*kxdreproc_t)(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+		const void *obj);
 typedef int	(*kxdrdproc_t)(void *rqstp, struct xdr_stream *xdr, void *obj);
 
 extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index b5cb921775a0..9fee20dc0c80 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -2476,7 +2476,8 @@ out_overflow:
 	goto out_garbage;
 }
 
-static void rpcproc_encode_null(void *rqstp, struct xdr_stream *xdr, void *obj)
+static void rpcproc_encode_null(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+		const void *obj)
 {
 }
 
-- 
cgit v1.2.3


From 73c8dc133afb0cbe72a9234894ea72c2a0e71a73 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 14:58:11 +0200
Subject: sunrpc: properly type argument to kxdrdproc_t

Pass struct rpc_request as the first argument instead of an untyped blob.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Acked-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 include/linux/sunrpc/xdr.h | 3 ++-
 net/sunrpc/clnt.c          | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 290f189de200..ed0fbf0d8d0f 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -226,7 +226,8 @@ struct xdr_stream {
  */
 typedef void	(*kxdreproc_t)(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 		const void *obj);
-typedef int	(*kxdrdproc_t)(void *rqstp, struct xdr_stream *xdr, void *obj);
+typedef int	(*kxdrdproc_t)(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+		void *obj);
 
 extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
 extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 9fee20dc0c80..964d5c4a1b60 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -2481,7 +2481,8 @@ static void rpcproc_encode_null(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 {
 }
 
-static int rpcproc_decode_null(void *rqstp, struct xdr_stream *xdr, void *obj)
+static int rpcproc_decode_null(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+		void *obj)
 {
 	return 0;
 }
-- 
cgit v1.2.3


From 1c5876ddbdb401f814ef717394826e7dfb6704d4 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 23:27:10 +0200
Subject: sunrpc: move p_count out of struct rpc_procinfo

p_count is the only writeable memeber of struct rpc_procinfo, which is
a good candidate to be const-ified as it contains function pointers.

This patch moves it into out out struct rpc_procinfo, and into a
separate writable array that is pointed to by struct rpc_version and
indexed by p_statidx.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/lockd/clnt4xdr.c                  |  2 ++
 fs/lockd/clntxdr.c                   |  4 ++++
 fs/lockd/mon.c                       |  4 +++-
 fs/nfs/mount_clnt.c                  |  5 ++++-
 fs/nfs/nfs2xdr.c                     |  4 +++-
 fs/nfs/nfs3xdr.c                     |  6 +++++-
 fs/nfs/nfs4xdr.c                     |  4 +++-
 fs/nfsd/nfs4callback.c               |  4 +++-
 include/linux/sunrpc/clnt.h          |  2 +-
 net/sunrpc/auth_gss/gss_rpc_upcall.c |  3 ++-
 net/sunrpc/clnt.c                    |  6 ++++--
 net/sunrpc/rpcb_clnt.c               | 12 +++++++++---
 net/sunrpc/stats.c                   |  3 +--
 13 files changed, 44 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c
index f0ab7a99dd23..7c255d1d7c64 100644
--- a/fs/lockd/clnt4xdr.c
+++ b/fs/lockd/clnt4xdr.c
@@ -602,8 +602,10 @@ static struct rpc_procinfo	nlm4_procedures[] = {
 	PROC(GRANTED_RES,	res,		norep),
 };
 
+static unsigned int nlm_version4_counts[ARRAY_SIZE(nlm4_procedures)];
 const struct rpc_version nlm_version4 = {
 	.number		= 4,
 	.nrprocs	= ARRAY_SIZE(nlm4_procedures),
 	.procs		= nlm4_procedures,
+	.counts		= nlm_version4_counts,
 };
diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c
index bd8a976785ae..39500c5743a5 100644
--- a/fs/lockd/clntxdr.c
+++ b/fs/lockd/clntxdr.c
@@ -600,16 +600,20 @@ static struct rpc_procinfo	nlm_procedures[] = {
 	PROC(GRANTED_RES,	res,		norep),
 };
 
+static unsigned int nlm_version1_counts[ARRAY_SIZE(nlm_procedures)];
 static const struct rpc_version	nlm_version1 = {
 	.number		= 1,
 	.nrprocs	= ARRAY_SIZE(nlm_procedures),
 	.procs		= nlm_procedures,
+	.counts		= nlm_version1_counts,
 };
 
+static unsigned int nlm_version3_counts[ARRAY_SIZE(nlm_procedures)];
 static const struct rpc_version	nlm_version3 = {
 	.number		= 3,
 	.nrprocs	= ARRAY_SIZE(nlm_procedures),
 	.procs		= nlm_procedures,
+	.counts		= nlm_version3_counts,
 };
 
 static const struct rpc_version	*nlm_versions[] = {
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 62424e929a7f..fe4ec82764fe 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -552,10 +552,12 @@ static struct rpc_procinfo	nsm_procedures[] = {
 	},
 };
 
+static unsigned int nsm_version1_counts[ARRAY_SIZE(nsm_procedures)];
 static const struct rpc_version nsm_version1 = {
 	.number		= 1,
 	.nrprocs	= ARRAY_SIZE(nsm_procedures),
-	.procs		= nsm_procedures
+	.procs		= nsm_procedures,
+	.counts		= nsm_version1_counts,
 };
 
 static const struct rpc_version *nsm_version[] = {
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 806657d65074..d25914aa8bf9 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -504,17 +504,20 @@ static struct rpc_procinfo mnt3_procedures[] = {
 	},
 };
 
-
+static unsigned int mnt_counts[ARRAY_SIZE(mnt_procedures)];
 static const struct rpc_version mnt_version1 = {
 	.number		= 1,
 	.nrprocs	= ARRAY_SIZE(mnt_procedures),
 	.procs		= mnt_procedures,
+	.counts		= mnt_counts,
 };
 
+static unsigned int mnt3_counts[ARRAY_SIZE(mnt_procedures)];
 static const struct rpc_version mnt_version3 = {
 	.number		= 3,
 	.nrprocs	= ARRAY_SIZE(mnt3_procedures),
 	.procs		= mnt3_procedures,
+	.counts		= mnt3_counts,
 };
 
 static const struct rpc_version *mnt_version[] = {
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index a299648ea321..16b4526299c1 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -1170,8 +1170,10 @@ struct rpc_procinfo	nfs_procedures[] = {
 	PROC(STATFS,	fhandle,	statfsres,	0),
 };
 
+static unsigned int nfs_version2_counts[ARRAY_SIZE(nfs_procedures)];
 const struct rpc_version nfs_version2 = {
 	.number			= 2,
 	.nrprocs		= ARRAY_SIZE(nfs_procedures),
-	.procs			= nfs_procedures
+	.procs			= nfs_procedures,
+	.counts			= nfs_version2_counts,
 };
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index cc272eb8be3e..a017ec5c7a9d 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -2578,10 +2578,12 @@ struct rpc_procinfo	nfs3_procedures[] = {
 	PROC(COMMIT,		commit,		commit,		5),
 };
 
+static unsigned int nfs_version3_counts[ARRAY_SIZE(nfs3_procedures)];
 const struct rpc_version nfs_version3 = {
 	.number			= 3,
 	.nrprocs		= ARRAY_SIZE(nfs3_procedures),
-	.procs			= nfs3_procedures
+	.procs			= nfs3_procedures,
+	.counts			= nfs_version3_counts,
 };
 
 #ifdef CONFIG_NFS_V3_ACL
@@ -2606,10 +2608,12 @@ static struct rpc_procinfo	nfs3_acl_procedures[] = {
 	},
 };
 
+static unsigned int nfs3_acl_counts[ARRAY_SIZE(nfs3_acl_procedures)];
 const struct rpc_version nfsacl_version3 = {
 	.number			= 3,
 	.nrprocs		= sizeof(nfs3_acl_procedures)/
 				  sizeof(nfs3_acl_procedures[0]),
 	.procs			= nfs3_acl_procedures,
+	.counts			= nfs3_acl_counts,
 };
 #endif  /* CONFIG_NFS_V3_ACL */
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 797f3ce75286..40cf5529e65f 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -7661,10 +7661,12 @@ struct rpc_procinfo	nfs4_procedures[] = {
 #endif /* CONFIG_NFS_V4_2 */
 };
 
+static unsigned int nfs_version4_counts[ARRAY_SIZE(nfs4_procedures)];
 const struct rpc_version nfs_version4 = {
 	.number			= 4,
 	.nrprocs		= ARRAY_SIZE(nfs4_procedures),
-	.procs			= nfs4_procedures
+	.procs			= nfs4_procedures,
+	.counts			= nfs_version4_counts,
 };
 
 /*
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index a2bedbd05b2b..afa961fe073c 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -705,6 +705,7 @@ static struct rpc_procinfo nfs4_cb_procedures[] = {
 	PROC(CB_NOTIFY_LOCK,	COMPOUND,	cb_notify_lock,	cb_notify_lock),
 };
 
+static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)];
 static struct rpc_version nfs_cb_version4 = {
 /*
  * Note on the callback rpc program version number: despite language in rfc
@@ -715,7 +716,8 @@ static struct rpc_version nfs_cb_version4 = {
  */
 	.number			= 1,
 	.nrprocs		= ARRAY_SIZE(nfs4_cb_procedures),
-	.procs			= nfs4_cb_procedures
+	.procs			= nfs4_cb_procedures,
+	.counts			= nfs4_cb_counts,
 };
 
 static const struct rpc_version *nfs_cb_version[] = {
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 6095ecba0dde..c75ba37151fe 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -88,6 +88,7 @@ struct rpc_version {
 	u32			number;		/* version number */
 	unsigned int		nrprocs;	/* number of procs */
 	struct rpc_procinfo *	procs;		/* procedure array */
+	unsigned int		*counts;	/* call counts */
 };
 
 /*
@@ -99,7 +100,6 @@ struct rpc_procinfo {
 	kxdrdproc_t		p_decode;	/* XDR decode function */
 	unsigned int		p_arglen;	/* argument hdr length (u32) */
 	unsigned int		p_replen;	/* reply hdr length (u32) */
-	unsigned int		p_count;	/* call count */
 	unsigned int		p_timer;	/* Which RTT timer to use */
 	u32			p_statidx;	/* Which procedure to account */
 	const char *		p_name;		/* name of procedure */
diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c
index a80b8e607478..f8729b647605 100644
--- a/net/sunrpc/auth_gss/gss_rpc_upcall.c
+++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c
@@ -364,11 +364,12 @@ void gssp_free_upcall_data(struct gssp_upcall_data *data)
 /*
  * Initialization stuff
  */
-
+static unsigned int gssp_version1_counts[ARRAY_SIZE(gssp_procedures)];
 static const struct rpc_version gssp_version1 = {
 	.number		= GSSPROXY_VERS_1,
 	.nrprocs	= ARRAY_SIZE(gssp_procedures),
 	.procs		= gssp_procedures,
+	.counts		= gssp_version1_counts,
 };
 
 static const struct rpc_version *gssp_version[] = {
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 964d5c4a1b60..f2d1f971247b 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1517,14 +1517,16 @@ static void
 call_start(struct rpc_task *task)
 {
 	struct rpc_clnt	*clnt = task->tk_client;
+	int idx = task->tk_msg.rpc_proc->p_statidx;
 
 	dprintk("RPC: %5u call_start %s%d proc %s (%s)\n", task->tk_pid,
 			clnt->cl_program->name, clnt->cl_vers,
 			rpc_proc_name(task),
 			(RPC_IS_ASYNC(task) ? "async" : "sync"));
 
-	/* Increment call count */
-	task->tk_msg.rpc_proc->p_count++;
+	/* Increment call count (version might not be valid for ping) */
+	if (clnt->cl_program->version[clnt->cl_vers])
+		clnt->cl_program->version[clnt->cl_vers]->counts[idx]++;
 	clnt->cl_stats->rpccnt++;
 	task->tk_action = call_reserve;
 }
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index f67b9e2897b4..9d47b9d3bbee 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -1117,22 +1117,28 @@ static const struct rpcb_info rpcb_next_version6[] = {
 	},
 };
 
+static unsigned int rpcb_version2_counts[ARRAY_SIZE(rpcb_procedures2)];
 static const struct rpc_version rpcb_version2 = {
 	.number		= RPCBVERS_2,
 	.nrprocs	= ARRAY_SIZE(rpcb_procedures2),
-	.procs		= rpcb_procedures2
+	.procs		= rpcb_procedures2,
+	.counts		= rpcb_version2_counts,
 };
 
+static unsigned int rpcb_version3_counts[ARRAY_SIZE(rpcb_procedures3)];
 static const struct rpc_version rpcb_version3 = {
 	.number		= RPCBVERS_3,
 	.nrprocs	= ARRAY_SIZE(rpcb_procedures3),
-	.procs		= rpcb_procedures3
+	.procs		= rpcb_procedures3,
+	.counts		= rpcb_version3_counts,
 };
 
+static unsigned int rpcb_version4_counts[ARRAY_SIZE(rpcb_procedures4)];
 static const struct rpc_version rpcb_version4 = {
 	.number		= RPCBVERS_4,
 	.nrprocs	= ARRAY_SIZE(rpcb_procedures4),
-	.procs		= rpcb_procedures4
+	.procs		= rpcb_procedures4,
+	.counts		= rpcb_version4_counts,
 };
 
 static const struct rpc_version *rpcb_version[] = {
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index caeb01ad2b5a..91c84d18bf9a 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -55,8 +55,7 @@ static int rpc_proc_show(struct seq_file *seq, void *v) {
 		seq_printf(seq, "proc%u %u",
 					vers->number, vers->nrprocs);
 		for (j = 0; j < vers->nrprocs; j++)
-			seq_printf(seq, " %u",
-					vers->procs[j].p_count);
+			seq_printf(seq, " %u", vers->counts[j]);
 		seq_putc(seq, '\n');
 	}
 	return 0;
-- 
cgit v1.2.3


From 499b4988109e91b76f231fb1b4f1e53ec3260686 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 12 May 2017 15:36:49 +0200
Subject: sunrpc: mark all struct rpc_procinfo instances as const

struct rpc_procinfo contains function pointers, and marking it as
constant avoids it being able to be used as an attach vector for
code injections.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/lockd/clnt4xdr.c                  |  2 +-
 fs/lockd/clntxdr.c                   |  2 +-
 fs/lockd/mon.c                       |  2 +-
 fs/nfs/internal.h                    |  6 +++---
 fs/nfs/mount_clnt.c                  |  4 ++--
 fs/nfs/nfs2xdr.c                     |  2 +-
 fs/nfs/nfs3xdr.c                     |  4 ++--
 fs/nfs/nfs4_fs.h                     |  2 +-
 fs/nfs/nfs4xdr.c                     |  2 +-
 fs/nfsd/nfs4callback.c               |  2 +-
 include/linux/sunrpc/clnt.h          |  4 ++--
 include/linux/sunrpc/sched.h         |  2 +-
 net/sunrpc/auth_gss/gss_rpc_upcall.c |  2 +-
 net/sunrpc/clnt.c                    |  4 ++--
 net/sunrpc/rpcb_clnt.c               | 19 ++++++++++---------
 net/sunrpc/stats.c                   |  2 +-
 16 files changed, 31 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c
index 7c255d1d7c64..c349fc0f9b80 100644
--- a/fs/lockd/clnt4xdr.c
+++ b/fs/lockd/clnt4xdr.c
@@ -584,7 +584,7 @@ out:
 	.p_name      = #proc,						\
 	}
 
-static struct rpc_procinfo	nlm4_procedures[] = {
+static const struct rpc_procinfo nlm4_procedures[] = {
 	PROC(TEST,		testargs,	testres),
 	PROC(LOCK,		lockargs,	res),
 	PROC(CANCEL,		cancargs,	res),
diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c
index 39500c5743a5..3b4724a6c4ee 100644
--- a/fs/lockd/clntxdr.c
+++ b/fs/lockd/clntxdr.c
@@ -582,7 +582,7 @@ out:
 	.p_name      = #proc,						\
 	}
 
-static struct rpc_procinfo	nlm_procedures[] = {
+static const struct rpc_procinfo nlm_procedures[] = {
 	PROC(TEST,		testargs,	testres),
 	PROC(LOCK,		lockargs,	res),
 	PROC(CANCEL,		cancargs,	res),
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index fe4ec82764fe..9d8166c39c54 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -531,7 +531,7 @@ static int nsm_xdr_dec_stat(struct rpc_rqst *rqstp,
 #define SM_monres_sz	2
 #define SM_unmonres_sz	1
 
-static struct rpc_procinfo	nsm_procedures[] = {
+static const struct rpc_procinfo nsm_procedures[] = {
 [NSMPROC_MON] = {
 		.p_proc		= NSMPROC_MON,
 		.p_encode	= nsm_xdr_enc_mon,
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index e9b4c3320e37..c21254924389 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -270,12 +270,12 @@ static inline bool nfs_match_open_context(const struct nfs_open_context *ctx1,
 }
 
 /* nfs2xdr.c */
-extern struct rpc_procinfo nfs_procedures[];
+extern const struct rpc_procinfo nfs_procedures[];
 extern int nfs2_decode_dirent(struct xdr_stream *,
 				struct nfs_entry *, int);
 
 /* nfs3xdr.c */
-extern struct rpc_procinfo nfs3_procedures[];
+extern const struct rpc_procinfo nfs3_procedures[];
 extern int nfs3_decode_dirent(struct xdr_stream *,
 				struct nfs_entry *, int);
 
@@ -292,7 +292,7 @@ extern const u32 nfs41_maxgetdevinfo_overhead;
 
 /* nfs4proc.c */
 #if IS_ENABLED(CONFIG_NFS_V4)
-extern struct rpc_procinfo nfs4_procedures[];
+extern const struct rpc_procinfo nfs4_procedures[];
 #endif
 
 #ifdef CONFIG_NFS_V4_SECURITY_LABEL
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index d25914aa8bf9..3efe946672be 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -466,7 +466,7 @@ static int mnt_xdr_dec_mountres3(struct rpc_rqst *req,
 	return decode_auth_flavors(xdr, res);
 }
 
-static struct rpc_procinfo mnt_procedures[] = {
+static const struct rpc_procinfo mnt_procedures[] = {
 	[MOUNTPROC_MNT] = {
 		.p_proc		= MOUNTPROC_MNT,
 		.p_encode	= mnt_xdr_enc_dirpath,
@@ -485,7 +485,7 @@ static struct rpc_procinfo mnt_procedures[] = {
 	},
 };
 
-static struct rpc_procinfo mnt3_procedures[] = {
+static const struct rpc_procinfo mnt3_procedures[] = {
 	[MOUNTPROC3_MNT] = {
 		.p_proc		= MOUNTPROC3_MNT,
 		.p_encode	= mnt_xdr_enc_dirpath,
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 16b4526299c1..c8a7e98c1371 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -1152,7 +1152,7 @@ static int nfs_stat_to_errno(enum nfs_stat status)
 	.p_statidx  =  NFSPROC_##proc,					\
 	.p_name     =  #proc,						\
 	}
-struct rpc_procinfo	nfs_procedures[] = {
+const struct rpc_procinfo nfs_procedures[] = {
 	PROC(GETATTR,	fhandle,	attrstat,	1),
 	PROC(SETATTR,	sattrargs,	attrstat,	0),
 	PROC(LOOKUP,	diropargs,	diropres,	2),
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 85ff1187e637..670eddb3ae36 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -2554,7 +2554,7 @@ static int nfs3_stat_to_errno(enum nfs_stat status)
 	.p_name      = #proc,						\
 	}
 
-struct rpc_procinfo	nfs3_procedures[] = {
+const struct rpc_procinfo nfs3_procedures[] = {
 	PROC(GETATTR,		getattr,	getattr,	1),
 	PROC(SETATTR,		setattr,	setattr,	0),
 	PROC(LOOKUP,		lookup,		lookup,		2),
@@ -2587,7 +2587,7 @@ const struct rpc_version nfs_version3 = {
 };
 
 #ifdef CONFIG_NFS_V3_ACL
-static struct rpc_procinfo	nfs3_acl_procedures[] = {
+static const struct rpc_procinfo nfs3_acl_procedures[] = {
 	[ACLPROC3_GETACL] = {
 		.p_proc = ACLPROC3_GETACL,
 		.p_encode = nfs3_xdr_enc_getacl3args,
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index af285cc27ccf..9b0cf3872722 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -493,7 +493,7 @@ static inline void nfs4_unregister_sysctl(void)
 #endif
 
 /* nfs4xdr.c */
-extern struct rpc_procinfo nfs4_procedures[];
+extern const struct rpc_procinfo nfs4_procedures[];
 
 struct nfs4_mount_data;
 
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 40cf5529e65f..0f1f290c97cd 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -7594,7 +7594,7 @@ nfs4_stat_to_errno(int stat)
 	.p_name = #proc,	\
 }
 
-struct rpc_procinfo	nfs4_procedures[] = {
+const struct rpc_procinfo nfs4_procedures[] = {
 	PROC(READ,		enc_read,		dec_read),
 	PROC(WRITE,		enc_write,		dec_write),
 	PROC(COMMIT,		enc_commit,		dec_commit),
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index afa961fe073c..ac10f78c0fb3 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -696,7 +696,7 @@ static int nfs4_xdr_dec_cb_notify_lock(struct rpc_rqst *rqstp,
 	.p_name    = #proc,						\
 }
 
-static struct rpc_procinfo nfs4_cb_procedures[] = {
+static const struct rpc_procinfo nfs4_cb_procedures[] = {
 	PROC(CB_NULL,	NULL,		cb_null,	cb_null),
 	PROC(CB_RECALL,	COMPOUND,	cb_recall,	cb_recall),
 #ifdef CONFIG_NFSD_PNFS
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index c75ba37151fe..55ef67bea06b 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -39,7 +39,7 @@ struct rpc_clnt {
 	struct list_head	cl_tasks;	/* List of tasks */
 	spinlock_t		cl_lock;	/* spinlock */
 	struct rpc_xprt __rcu *	cl_xprt;	/* transport */
-	struct rpc_procinfo *	cl_procinfo;	/* procedure info */
+	const struct rpc_procinfo *cl_procinfo;	/* procedure info */
 	u32			cl_prog,	/* RPC program number */
 				cl_vers,	/* RPC version number */
 				cl_maxproc;	/* max procedure number */
@@ -87,7 +87,7 @@ struct rpc_program {
 struct rpc_version {
 	u32			number;		/* version number */
 	unsigned int		nrprocs;	/* number of procs */
-	struct rpc_procinfo *	procs;		/* procedure array */
+	const struct rpc_procinfo *procs;	/* procedure array */
 	unsigned int		*counts;	/* call counts */
 };
 
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 7ba040c797ec..ed60253abd0a 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -22,7 +22,7 @@
  */
 struct rpc_procinfo;
 struct rpc_message {
-	struct rpc_procinfo *	rpc_proc;	/* Procedure information */
+	const struct rpc_procinfo *rpc_proc;	/* Procedure information */
 	void *			rpc_argp;	/* Arguments */
 	void *			rpc_resp;	/* Result */
 	struct rpc_cred *	rpc_cred;	/* Credentials */
diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c
index f8729b647605..46b295e4f2b8 100644
--- a/net/sunrpc/auth_gss/gss_rpc_upcall.c
+++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c
@@ -63,7 +63,7 @@ enum {
 	.p_name   = #proc,				\
 }
 
-static struct rpc_procinfo gssp_procedures[] = {
+static const struct rpc_procinfo gssp_procedures[] = {
 	PROC(INDICATE_MECHS, indicate_mechs),
         PROC(GET_CALL_CONTEXT, get_call_context),
         PROC(IMPORT_AND_CANON_NAME, import_and_canon_name),
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index f2d1f971247b..2e49d1f892b7 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1674,7 +1674,7 @@ call_allocate(struct rpc_task *task)
 	unsigned int slack = task->tk_rqstp->rq_cred->cr_auth->au_cslack;
 	struct rpc_rqst *req = task->tk_rqstp;
 	struct rpc_xprt *xprt = req->rq_xprt;
-	struct rpc_procinfo *proc = task->tk_msg.rpc_proc;
+	const struct rpc_procinfo *proc = task->tk_msg.rpc_proc;
 	int status;
 
 	dprint_status(task);
@@ -2489,7 +2489,7 @@ static int rpcproc_decode_null(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 	return 0;
 }
 
-static struct rpc_procinfo rpcproc_null = {
+static const struct rpc_procinfo rpcproc_null = {
 	.p_encode = rpcproc_encode_null,
 	.p_decode = rpcproc_decode_null,
 };
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 9d47b9d3bbee..ea0676f199c8 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -128,13 +128,13 @@ struct rpcbind_args {
 	int			r_status;
 };
 
-static struct rpc_procinfo rpcb_procedures2[];
-static struct rpc_procinfo rpcb_procedures3[];
-static struct rpc_procinfo rpcb_procedures4[];
+static const struct rpc_procinfo rpcb_procedures2[];
+static const struct rpc_procinfo rpcb_procedures3[];
+static const struct rpc_procinfo rpcb_procedures4[];
 
 struct rpcb_info {
 	u32			rpc_vers;
-	struct rpc_procinfo *	rpc_proc;
+	const struct rpc_procinfo *rpc_proc;
 };
 
 static const struct rpcb_info rpcb_next_version[];
@@ -620,7 +620,8 @@ int rpcb_v4_register(struct net *net, const u32 program, const u32 version,
 	return -EAFNOSUPPORT;
 }
 
-static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbind_args *map, struct rpc_procinfo *proc)
+static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt,
+		struct rpcbind_args *map, const struct rpc_procinfo *proc)
 {
 	struct rpc_message msg = {
 		.rpc_proc = proc,
@@ -671,7 +672,7 @@ static struct rpc_clnt *rpcb_find_transport_owner(struct rpc_clnt *clnt)
 void rpcb_getport_async(struct rpc_task *task)
 {
 	struct rpc_clnt *clnt;
-	struct rpc_procinfo *proc;
+	const struct rpc_procinfo *proc;
 	u32 bind_version;
 	struct rpc_xprt *xprt;
 	struct rpc_clnt	*rpcb_clnt;
@@ -994,7 +995,7 @@ out_fail:
  * since the Linux kernel RPC code requires only these.
  */
 
-static struct rpc_procinfo rpcb_procedures2[] = {
+static const struct rpc_procinfo rpcb_procedures2[] = {
 	[RPCBPROC_SET] = {
 		.p_proc		= RPCBPROC_SET,
 		.p_encode	= rpcb_enc_mapping,
@@ -1027,7 +1028,7 @@ static struct rpc_procinfo rpcb_procedures2[] = {
 	},
 };
 
-static struct rpc_procinfo rpcb_procedures3[] = {
+static const struct rpc_procinfo rpcb_procedures3[] = {
 	[RPCBPROC_SET] = {
 		.p_proc		= RPCBPROC_SET,
 		.p_encode	= rpcb_enc_getaddr,
@@ -1060,7 +1061,7 @@ static struct rpc_procinfo rpcb_procedures3[] = {
 	},
 };
 
-static struct rpc_procinfo rpcb_procedures4[] = {
+static const struct rpc_procinfo rpcb_procedures4[] = {
 	[RPCBPROC_SET] = {
 		.p_proc		= RPCBPROC_SET,
 		.p_encode	= rpcb_enc_getaddr,
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index 91c84d18bf9a..8b6c35ae1d57 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -191,7 +191,7 @@ void rpc_count_iostats(const struct rpc_task *task, struct rpc_iostats *stats)
 EXPORT_SYMBOL_GPL(rpc_count_iostats);
 
 static void _print_name(struct seq_file *seq, unsigned int op,
-			struct rpc_procinfo *procs)
+			const struct rpc_procinfo *procs)
 {
 	if (procs[op].p_name)
 		seq_printf(seq, "\t%12s: ", procs[op].p_name);
-- 
cgit v1.2.3


From a6beb73272b4c0108e41bc7c7b5a447ae6c92863 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 17:35:49 +0200
Subject: sunrpc: properly type pc_func callbacks

Drop the argp and resp arguments as they can trivially be derived from
the rqstp argument.  With that all functions now have the same prototype,
and we can remove the unsafe casting to svc_procfunc as well as the
svc_procfunc typedef itself.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/lockd/svc4proc.c        | 118 ++++++++++++++++++++++-------------
 fs/lockd/svcproc.c         | 118 ++++++++++++++++++++++-------------
 fs/nfs/callback_xdr.c      |   7 ++-
 fs/nfsd/nfs2acl.c          |  25 ++++----
 fs/nfsd/nfs3acl.c          |  15 ++---
 fs/nfsd/nfs3proc.c         | 151 ++++++++++++++++++++++++++-------------------
 fs/nfsd/nfs4proc.c         |   9 +--
 fs/nfsd/nfsproc.c          | 104 +++++++++++++++++--------------
 fs/nfsd/nfssvc.c           |   2 +-
 include/linux/sunrpc/svc.h |   4 +-
 net/sunrpc/svc.c           |   2 +-
 11 files changed, 328 insertions(+), 227 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 09c576f26c7b..3e4cba029d3d 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -62,7 +62,7 @@ no_locks:
  * NULL: Test for presence of service
  */
 static __be32
-nlm4svc_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
+nlm4svc_proc_null(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: NULL          called\n");
 	return rpc_success;
@@ -72,9 +72,9 @@ nlm4svc_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
  * TEST: Check for conflicting lock
  */
 static __be32
-nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
-				         struct nlm_res  *resp)
+__nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 	__be32 rc = rpc_success;
@@ -99,9 +99,15 @@ nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
 }
 
 static __be32
-nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
-				         struct nlm_res  *resp)
+nlm4svc_proc_test(struct svc_rqst *rqstp)
 {
+	return __nlm4svc_proc_test(rqstp, rqstp->rq_resp);
+}
+
+static __be32
+__nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_res *resp)
+{
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 	__be32 rc = rpc_success;
@@ -141,9 +147,15 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
 }
 
 static __be32
-nlm4svc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp,
-				           struct nlm_res  *resp)
+nlm4svc_proc_lock(struct svc_rqst *rqstp)
+{
+	return __nlm4svc_proc_lock(rqstp, rqstp->rq_resp);
+}
+
+static __be32
+__nlm4svc_proc_cancel(struct svc_rqst *rqstp, struct nlm_res *resp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 
@@ -170,13 +182,19 @@ nlm4svc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp,
 	return rpc_success;
 }
 
+static __be32
+nlm4svc_proc_cancel(struct svc_rqst *rqstp)
+{
+	return __nlm4svc_proc_cancel(rqstp, rqstp->rq_resp);
+}
+
 /*
  * UNLOCK: release a lock
  */
 static __be32
-nlm4svc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp,
-				           struct nlm_res  *resp)
+__nlm4svc_proc_unlock(struct svc_rqst *rqstp, struct nlm_res *resp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 
@@ -203,14 +221,21 @@ nlm4svc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp,
 	return rpc_success;
 }
 
+static __be32
+nlm4svc_proc_unlock(struct svc_rqst *rqstp)
+{
+	return __nlm4svc_proc_unlock(rqstp, rqstp->rq_resp);
+}
+
 /*
  * GRANTED: A server calls us to tell that a process' lock request
  * was granted
  */
 static __be32
-nlm4svc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
-				            struct nlm_res  *resp)
+__nlm4svc_proc_granted(struct svc_rqst *rqstp, struct nlm_res *resp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
+
 	resp->cookie = argp->cookie;
 
 	dprintk("lockd: GRANTED       called\n");
@@ -219,6 +244,12 @@ nlm4svc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
 	return rpc_success;
 }
 
+static __be32
+nlm4svc_proc_granted(struct svc_rqst *rqstp)
+{
+	return __nlm4svc_proc_granted(rqstp, rqstp->rq_resp);
+}
+
 /*
  * This is the generic lockd callback for async RPC calls
  */
@@ -243,9 +274,10 @@ static const struct rpc_call_ops nlm4svc_callback_ops = {
  * because we send the callback before the reply proper. I hope this
  * doesn't break any clients.
  */
-static __be32 nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args *argp,
-		__be32 (*func)(struct svc_rqst *, struct nlm_args *, struct nlm_res  *))
+static __be32 nlm4svc_callback(struct svc_rqst *rqstp, u32 proc,
+		__be32 (*func)(struct svc_rqst *,  struct nlm_res *))
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 	struct nlm_rqst	*call;
 	__be32 stat;
@@ -261,7 +293,7 @@ static __be32 nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args
 	if (call == NULL)
 		return rpc_system_err;
 
-	stat = func(rqstp, argp, &call->a_res);
+	stat = func(rqstp, &call->a_res);
 	if (stat != 0) {
 		nlmsvc_release_call(call);
 		return stat;
@@ -273,48 +305,44 @@ static __be32 nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args
 	return rpc_success;
 }
 
-static __be32 nlm4svc_proc_test_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-					     void	     *resp)
+static __be32 nlm4svc_proc_test_msg(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: TEST_MSG      called\n");
-	return nlm4svc_callback(rqstp, NLMPROC_TEST_RES, argp, nlm4svc_proc_test);
+	return nlm4svc_callback(rqstp, NLMPROC_TEST_RES, __nlm4svc_proc_test);
 }
 
-static __be32 nlm4svc_proc_lock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-					     void	     *resp)
+static __be32 nlm4svc_proc_lock_msg(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: LOCK_MSG      called\n");
-	return nlm4svc_callback(rqstp, NLMPROC_LOCK_RES, argp, nlm4svc_proc_lock);
+	return nlm4svc_callback(rqstp, NLMPROC_LOCK_RES, __nlm4svc_proc_lock);
 }
 
-static __be32 nlm4svc_proc_cancel_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-					       void	       *resp)
+static __be32 nlm4svc_proc_cancel_msg(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: CANCEL_MSG    called\n");
-	return nlm4svc_callback(rqstp, NLMPROC_CANCEL_RES, argp, nlm4svc_proc_cancel);
+	return nlm4svc_callback(rqstp, NLMPROC_CANCEL_RES, __nlm4svc_proc_cancel);
 }
 
-static __be32 nlm4svc_proc_unlock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-                                               void            *resp)
+static __be32 nlm4svc_proc_unlock_msg(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: UNLOCK_MSG    called\n");
-	return nlm4svc_callback(rqstp, NLMPROC_UNLOCK_RES, argp, nlm4svc_proc_unlock);
+	return nlm4svc_callback(rqstp, NLMPROC_UNLOCK_RES, __nlm4svc_proc_unlock);
 }
 
-static __be32 nlm4svc_proc_granted_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-                                                void            *resp)
+static __be32 nlm4svc_proc_granted_msg(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: GRANTED_MSG   called\n");
-	return nlm4svc_callback(rqstp, NLMPROC_GRANTED_RES, argp, nlm4svc_proc_granted);
+	return nlm4svc_callback(rqstp, NLMPROC_GRANTED_RES, __nlm4svc_proc_granted);
 }
 
 /*
  * SHARE: create a DOS share or alter existing share.
  */
 static __be32
-nlm4svc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp,
-				          struct nlm_res  *resp)
+nlm4svc_proc_share(struct svc_rqst *rqstp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
+	struct nlm_res *resp = rqstp->rq_resp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 
@@ -345,9 +373,10 @@ nlm4svc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp,
  * UNSHARE: Release a DOS share.
  */
 static __be32
-nlm4svc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp,
-				            struct nlm_res  *resp)
+nlm4svc_proc_unshare(struct svc_rqst *rqstp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
+	struct nlm_res *resp = rqstp->rq_resp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 
@@ -378,22 +407,23 @@ nlm4svc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp,
  * NM_LOCK: Create an unmonitored lock
  */
 static __be32
-nlm4svc_proc_nm_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
-				            struct nlm_res  *resp)
+nlm4svc_proc_nm_lock(struct svc_rqst *rqstp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
+
 	dprintk("lockd: NM_LOCK       called\n");
 
 	argp->monitor = 0;		/* just clean the monitor flag */
-	return nlm4svc_proc_lock(rqstp, argp, resp);
+	return nlm4svc_proc_lock(rqstp);
 }
 
 /*
  * FREE_ALL: Release all locks and shares held by client
  */
 static __be32
-nlm4svc_proc_free_all(struct svc_rqst *rqstp, struct nlm_args *argp,
-					     void            *resp)
+nlm4svc_proc_free_all(struct svc_rqst *rqstp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 
 	/* Obtain client */
@@ -409,9 +439,10 @@ nlm4svc_proc_free_all(struct svc_rqst *rqstp, struct nlm_args *argp,
  * SM_NOTIFY: private callback from statd (not part of official NLM proto)
  */
 static __be32
-nlm4svc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
-					      void	        *resp)
+nlm4svc_proc_sm_notify(struct svc_rqst *rqstp)
 {
+	struct nlm_reboot *argp = rqstp->rq_argp;
+
 	dprintk("lockd: SM_NOTIFY     called\n");
 
 	if (!nlm_privileged_requester(rqstp)) {
@@ -429,9 +460,10 @@ nlm4svc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
  * client sent a GRANTED_RES, let's remove the associated block
  */
 static __be32
-nlm4svc_proc_granted_res(struct svc_rqst *rqstp, struct nlm_res  *argp,
-                                                void            *resp)
+nlm4svc_proc_granted_res(struct svc_rqst *rqstp)
 {
+	struct nlm_res *argp = rqstp->rq_argp;
+
         if (!nlmsvc_ops)
                 return rpc_success;
 
@@ -463,7 +495,7 @@ nlm4svc_proc_granted_res(struct svc_rqst *rqstp, struct nlm_res  *argp,
 struct nlm_void			{ int dummy; };
 
 #define PROC(name, xargt, xrest, argt, rest, respsize)	\
- { .pc_func	= (svc_procfunc) nlm4svc_proc_##name,	\
+ { .pc_func	= nlm4svc_proc_##name,	\
    .pc_decode	= (kxdrproc_t) nlm4svc_decode_##xargt,	\
    .pc_encode	= (kxdrproc_t) nlm4svc_encode_##xrest,	\
    .pc_release	= NULL,					\
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index fb26b9f522e7..3add50661fab 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -92,7 +92,7 @@ no_locks:
  * NULL: Test for presence of service
  */
 static __be32
-nlmsvc_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
+nlmsvc_proc_null(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: NULL          called\n");
 	return rpc_success;
@@ -102,9 +102,9 @@ nlmsvc_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
  * TEST: Check for conflicting lock
  */
 static __be32
-nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
-				         struct nlm_res  *resp)
+__nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 	__be32 rc = rpc_success;
@@ -130,9 +130,15 @@ nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
 }
 
 static __be32
-nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
-				         struct nlm_res  *resp)
+nlmsvc_proc_test(struct svc_rqst *rqstp)
 {
+	return __nlmsvc_proc_test(rqstp, rqstp->rq_resp);
+}
+
+static __be32
+__nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_res *resp)
+{
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 	__be32 rc = rpc_success;
@@ -172,9 +178,15 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
 }
 
 static __be32
-nlmsvc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp,
-				           struct nlm_res  *resp)
+nlmsvc_proc_lock(struct svc_rqst *rqstp)
+{
+	return __nlmsvc_proc_lock(rqstp, rqstp->rq_resp);
+}
+
+static __be32
+__nlmsvc_proc_cancel(struct svc_rqst *rqstp, struct nlm_res *resp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 	struct net *net = SVC_NET(rqstp);
@@ -202,13 +214,19 @@ nlmsvc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp,
 	return rpc_success;
 }
 
+static __be32
+nlmsvc_proc_cancel(struct svc_rqst *rqstp)
+{
+	return __nlmsvc_proc_cancel(rqstp, rqstp->rq_resp);
+}
+
 /*
  * UNLOCK: release a lock
  */
 static __be32
-nlmsvc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp,
-				           struct nlm_res  *resp)
+__nlmsvc_proc_unlock(struct svc_rqst *rqstp, struct nlm_res *resp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 	struct net *net = SVC_NET(rqstp);
@@ -236,14 +254,21 @@ nlmsvc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp,
 	return rpc_success;
 }
 
+static __be32
+nlmsvc_proc_unlock(struct svc_rqst *rqstp)
+{
+	return __nlmsvc_proc_unlock(rqstp, rqstp->rq_resp);
+}
+
 /*
  * GRANTED: A server calls us to tell that a process' lock request
  * was granted
  */
 static __be32
-nlmsvc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
-				            struct nlm_res  *resp)
+__nlmsvc_proc_granted(struct svc_rqst *rqstp, struct nlm_res *resp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
+
 	resp->cookie = argp->cookie;
 
 	dprintk("lockd: GRANTED       called\n");
@@ -252,6 +277,12 @@ nlmsvc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
 	return rpc_success;
 }
 
+static __be32
+nlmsvc_proc_granted(struct svc_rqst *rqstp)
+{
+	return __nlmsvc_proc_granted(rqstp, rqstp->rq_resp);
+}
+
 /*
  * This is the generic lockd callback for async RPC calls
  */
@@ -284,9 +315,10 @@ static const struct rpc_call_ops nlmsvc_callback_ops = {
  * because we send the callback before the reply proper. I hope this
  * doesn't break any clients.
  */
-static __be32 nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args *argp,
-		__be32 (*func)(struct svc_rqst *, struct nlm_args *, struct nlm_res  *))
+static __be32 nlmsvc_callback(struct svc_rqst *rqstp, u32 proc,
+		__be32 (*func)(struct svc_rqst *, struct nlm_res *))
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 	struct nlm_rqst	*call;
 	__be32 stat;
@@ -302,7 +334,7 @@ static __be32 nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args
 	if (call == NULL)
 		return rpc_system_err;
 
-	stat = func(rqstp, argp, &call->a_res);
+	stat = func(rqstp, &call->a_res);
 	if (stat != 0) {
 		nlmsvc_release_call(call);
 		return stat;
@@ -314,50 +346,46 @@ static __be32 nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args
 	return rpc_success;
 }
 
-static __be32 nlmsvc_proc_test_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-					     void	     *resp)
+static __be32 nlmsvc_proc_test_msg(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: TEST_MSG      called\n");
-	return nlmsvc_callback(rqstp, NLMPROC_TEST_RES, argp, nlmsvc_proc_test);
+	return nlmsvc_callback(rqstp, NLMPROC_TEST_RES, __nlmsvc_proc_test);
 }
 
-static __be32 nlmsvc_proc_lock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-					     void	     *resp)
+static __be32 nlmsvc_proc_lock_msg(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: LOCK_MSG      called\n");
-	return nlmsvc_callback(rqstp, NLMPROC_LOCK_RES, argp, nlmsvc_proc_lock);
+	return nlmsvc_callback(rqstp, NLMPROC_LOCK_RES, __nlmsvc_proc_lock);
 }
 
-static __be32 nlmsvc_proc_cancel_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-					       void	       *resp)
+static __be32 nlmsvc_proc_cancel_msg(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: CANCEL_MSG    called\n");
-	return nlmsvc_callback(rqstp, NLMPROC_CANCEL_RES, argp, nlmsvc_proc_cancel);
+	return nlmsvc_callback(rqstp, NLMPROC_CANCEL_RES, __nlmsvc_proc_cancel);
 }
 
 static __be32
-nlmsvc_proc_unlock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-                                               void            *resp)
+nlmsvc_proc_unlock_msg(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: UNLOCK_MSG    called\n");
-	return nlmsvc_callback(rqstp, NLMPROC_UNLOCK_RES, argp, nlmsvc_proc_unlock);
+	return nlmsvc_callback(rqstp, NLMPROC_UNLOCK_RES, __nlmsvc_proc_unlock);
 }
 
 static __be32
-nlmsvc_proc_granted_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-                                                void            *resp)
+nlmsvc_proc_granted_msg(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: GRANTED_MSG   called\n");
-	return nlmsvc_callback(rqstp, NLMPROC_GRANTED_RES, argp, nlmsvc_proc_granted);
+	return nlmsvc_callback(rqstp, NLMPROC_GRANTED_RES, __nlmsvc_proc_granted);
 }
 
 /*
  * SHARE: create a DOS share or alter existing share.
  */
 static __be32
-nlmsvc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp,
-				          struct nlm_res  *resp)
+nlmsvc_proc_share(struct svc_rqst *rqstp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
+	struct nlm_res *resp = rqstp->rq_resp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 
@@ -388,9 +416,10 @@ nlmsvc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp,
  * UNSHARE: Release a DOS share.
  */
 static __be32
-nlmsvc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp,
-				            struct nlm_res  *resp)
+nlmsvc_proc_unshare(struct svc_rqst *rqstp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
+	struct nlm_res *resp = rqstp->rq_resp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 
@@ -421,22 +450,23 @@ nlmsvc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp,
  * NM_LOCK: Create an unmonitored lock
  */
 static __be32
-nlmsvc_proc_nm_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
-				            struct nlm_res  *resp)
+nlmsvc_proc_nm_lock(struct svc_rqst *rqstp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
+
 	dprintk("lockd: NM_LOCK       called\n");
 
 	argp->monitor = 0;		/* just clean the monitor flag */
-	return nlmsvc_proc_lock(rqstp, argp, resp);
+	return nlmsvc_proc_lock(rqstp);
 }
 
 /*
  * FREE_ALL: Release all locks and shares held by client
  */
 static __be32
-nlmsvc_proc_free_all(struct svc_rqst *rqstp, struct nlm_args *argp,
-					     void            *resp)
+nlmsvc_proc_free_all(struct svc_rqst *rqstp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 
 	/* Obtain client */
@@ -452,9 +482,10 @@ nlmsvc_proc_free_all(struct svc_rqst *rqstp, struct nlm_args *argp,
  * SM_NOTIFY: private callback from statd (not part of official NLM proto)
  */
 static __be32
-nlmsvc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
-					      void	        *resp)
+nlmsvc_proc_sm_notify(struct svc_rqst *rqstp)
 {
+	struct nlm_reboot *argp = rqstp->rq_argp;
+
 	dprintk("lockd: SM_NOTIFY     called\n");
 
 	if (!nlm_privileged_requester(rqstp)) {
@@ -472,9 +503,10 @@ nlmsvc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
  * client sent a GRANTED_RES, let's remove the associated block
  */
 static __be32
-nlmsvc_proc_granted_res(struct svc_rqst *rqstp, struct nlm_res  *argp,
-                                                void            *resp)
+nlmsvc_proc_granted_res(struct svc_rqst *rqstp)
 {
+	struct nlm_res *argp = rqstp->rq_argp;
+
 	if (!nlmsvc_ops)
 		return rpc_success;
 
@@ -505,7 +537,7 @@ nlmsvc_proc_granted_res(struct svc_rqst *rqstp, struct nlm_res  *argp,
 struct nlm_void			{ int dummy; };
 
 #define PROC(name, xargt, xrest, argt, rest, respsize)	\
- { .pc_func	= (svc_procfunc) nlmsvc_proc_##name,	\
+ { .pc_func	= nlmsvc_proc_##name,			\
    .pc_decode	= (kxdrproc_t) nlmsvc_decode_##xargt,	\
    .pc_encode	= (kxdrproc_t) nlmsvc_encode_##xrest,	\
    .pc_release	= NULL,					\
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 287c02202b25..5a14bdaa5986 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -53,7 +53,7 @@ struct callback_op {
 
 static struct callback_op callback_ops[];
 
-static __be32 nfs4_callback_null(struct svc_rqst *rqstp, void *argp, void *resp)
+static __be32 nfs4_callback_null(struct svc_rqst *rqstp)
 {
 	return htonl(NFS4_OK);
 }
@@ -880,7 +880,7 @@ encode_hdr:
 /*
  * Decode, process and encode a COMPOUND
  */
-static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *resp)
+static __be32 nfs4_callback_compound(struct svc_rqst *rqstp)
 {
 	struct cb_compound_hdr_arg hdr_arg = { 0 };
 	struct cb_compound_hdr_res hdr_res = { NULL };
@@ -916,7 +916,8 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
 
 	while (status == 0 && nops != hdr_arg.nops) {
 		status = process_op(nops, rqstp, &xdr_in,
-				    argp, &xdr_out, resp, &cps);
+				    rqstp->rq_argp, &xdr_out, rqstp->rq_resp,
+				    &cps);
 		nops++;
 	}
 
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 12933d07204c..4b7f84fa1fa5 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -19,7 +19,7 @@
  * NULL call.
  */
 static __be32
-nfsacld_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
+nfsacld_proc_null(struct svc_rqst *rqstp)
 {
 	return nfs_ok;
 }
@@ -27,9 +27,10 @@ nfsacld_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
 /*
  * Get the Access and/or Default ACL of a file.
  */
-static __be32 nfsacld_proc_getacl(struct svc_rqst * rqstp,
-		struct nfsd3_getaclargs *argp, struct nfsd3_getaclres *resp)
+static __be32 nfsacld_proc_getacl(struct svc_rqst *rqstp)
 {
+	struct nfsd3_getaclargs *argp = rqstp->rq_argp;
+	struct nfsd3_getaclres *resp = rqstp->rq_resp;
 	struct posix_acl *acl;
 	struct inode *inode;
 	svc_fh *fh;
@@ -87,10 +88,10 @@ fail:
 /*
  * Set the Access and/or Default ACL of a file.
  */
-static __be32 nfsacld_proc_setacl(struct svc_rqst * rqstp,
-		struct nfsd3_setaclargs *argp,
-		struct nfsd_attrstat *resp)
+static __be32 nfsacld_proc_setacl(struct svc_rqst *rqstp)
 {
+	struct nfsd3_setaclargs *argp = rqstp->rq_argp;
+	struct nfsd_attrstat *resp = rqstp->rq_resp;
 	struct inode *inode;
 	svc_fh *fh;
 	__be32 nfserr = 0;
@@ -141,9 +142,10 @@ out_errno:
 /*
  * Check file attributes
  */
-static __be32 nfsacld_proc_getattr(struct svc_rqst * rqstp,
-		struct nfsd_fhandle *argp, struct nfsd_attrstat *resp)
+static __be32 nfsacld_proc_getattr(struct svc_rqst *rqstp)
 {
+	struct nfsd_fhandle *argp = rqstp->rq_argp;
+	struct nfsd_attrstat *resp = rqstp->rq_resp;
 	__be32 nfserr;
 	dprintk("nfsd: GETATTR  %s\n", SVCFH_fmt(&argp->fh));
 
@@ -158,9 +160,10 @@ static __be32 nfsacld_proc_getattr(struct svc_rqst * rqstp,
 /*
  * Check file access
  */
-static __be32 nfsacld_proc_access(struct svc_rqst *rqstp, struct nfsd3_accessargs *argp,
-		struct nfsd3_accessres *resp)
+static __be32 nfsacld_proc_access(struct svc_rqst *rqstp)
 {
+	struct nfsd3_accessargs *argp = rqstp->rq_argp;
+	struct nfsd3_accessres *resp = rqstp->rq_resp;
 	__be32 nfserr;
 
 	dprintk("nfsd: ACCESS(2acl)   %s 0x%x\n",
@@ -347,7 +350,7 @@ struct nfsd3_voidargs { int dummy; };
 
 #define PROC(name, argt, rest, relt, cache, respsize)			\
 {									\
-	.pc_func	= (svc_procfunc) nfsacld_proc_##name,		\
+	.pc_func	= nfsacld_proc_##name,				\
 	.pc_decode	= (kxdrproc_t) nfsaclsvc_decode_##argt##args,	\
 	.pc_encode	= (kxdrproc_t) nfsaclsvc_encode_##rest##res,	\
 	.pc_release	= (kxdrproc_t) nfsaclsvc_release_##relt,	\
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index db988a229b3a..5e42004035e0 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -18,7 +18,7 @@
  * NULL call.
  */
 static __be32
-nfsd3_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
+nfsd3_proc_null(struct svc_rqst *rqstp)
 {
 	return nfs_ok;
 }
@@ -26,9 +26,10 @@ nfsd3_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
 /*
  * Get the Access and/or Default ACL of a file.
  */
-static __be32 nfsd3_proc_getacl(struct svc_rqst * rqstp,
-		struct nfsd3_getaclargs *argp, struct nfsd3_getaclres *resp)
+static __be32 nfsd3_proc_getacl(struct svc_rqst *rqstp)
 {
+	struct nfsd3_getaclargs *argp = rqstp->rq_argp;
+	struct nfsd3_getaclres *resp = rqstp->rq_resp;
 	struct posix_acl *acl;
 	struct inode *inode;
 	svc_fh *fh;
@@ -80,10 +81,10 @@ fail:
 /*
  * Set the Access and/or Default ACL of a file.
  */
-static __be32 nfsd3_proc_setacl(struct svc_rqst * rqstp,
-		struct nfsd3_setaclargs *argp,
-		struct nfsd3_attrstat *resp)
+static __be32 nfsd3_proc_setacl(struct svc_rqst *rqstp)
 {
+	struct nfsd3_setaclargs *argp = rqstp->rq_argp;
+	struct nfsd3_attrstat *resp = rqstp->rq_resp;
 	struct inode *inode;
 	svc_fh *fh;
 	__be32 nfserr = 0;
@@ -239,7 +240,7 @@ struct nfsd3_voidargs { int dummy; };
 
 #define PROC(name, argt, rest, relt, cache, respsize)			\
 {									\
-	.pc_func	= (svc_procfunc) nfsd3_proc_##name,		\
+	.pc_func	= nfsd3_proc_##name,				\
 	.pc_decode	= (kxdrproc_t) nfs3svc_decode_##argt##args,	\
 	.pc_encode	= (kxdrproc_t) nfs3svc_encode_##rest##res,	\
 	.pc_release	= (kxdrproc_t) nfs3svc_release_##relt,		\
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 24e035190770..4a2bae07cfbf 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -31,7 +31,7 @@ static int	nfs3_ftypes[] = {
  * NULL call.
  */
 static __be32
-nfsd3_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
+nfsd3_proc_null(struct svc_rqst *rqstp)
 {
 	return nfs_ok;
 }
@@ -40,9 +40,10 @@ nfsd3_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
  * Get a file's attributes
  */
 static __be32
-nfsd3_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle  *argp,
-					   struct nfsd3_attrstat *resp)
+nfsd3_proc_getattr(struct svc_rqst *rqstp)
 {
+	struct nfsd_fhandle *argp = rqstp->rq_argp;
+	struct nfsd3_attrstat *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: GETATTR(3)  %s\n",
@@ -63,9 +64,10 @@ nfsd3_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle  *argp,
  * Set a file's attributes
  */
 static __be32
-nfsd3_proc_setattr(struct svc_rqst *rqstp, struct nfsd3_sattrargs *argp,
-					   struct nfsd3_attrstat  *resp)
+nfsd3_proc_setattr(struct svc_rqst *rqstp)
 {
+	struct nfsd3_sattrargs *argp = rqstp->rq_argp;
+	struct nfsd3_attrstat *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: SETATTR(3)  %s\n",
@@ -81,9 +83,10 @@ nfsd3_proc_setattr(struct svc_rqst *rqstp, struct nfsd3_sattrargs *argp,
  * Look up a path name component
  */
 static __be32
-nfsd3_proc_lookup(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp,
-					  struct nfsd3_diropres  *resp)
+nfsd3_proc_lookup(struct svc_rqst *rqstp)
 {
+	struct nfsd3_diropargs *argp = rqstp->rq_argp;
+	struct nfsd3_diropres  *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: LOOKUP(3)   %s %.*s\n",
@@ -105,9 +108,10 @@ nfsd3_proc_lookup(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp,
  * Check file access
  */
 static __be32
-nfsd3_proc_access(struct svc_rqst *rqstp, struct nfsd3_accessargs *argp,
-					  struct nfsd3_accessres *resp)
+nfsd3_proc_access(struct svc_rqst *rqstp)
 {
+	struct nfsd3_accessargs *argp = rqstp->rq_argp;
+	struct nfsd3_accessres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: ACCESS(3)   %s 0x%x\n",
@@ -124,9 +128,10 @@ nfsd3_proc_access(struct svc_rqst *rqstp, struct nfsd3_accessargs *argp,
  * Read a symlink.
  */
 static __be32
-nfsd3_proc_readlink(struct svc_rqst *rqstp, struct nfsd3_readlinkargs *argp,
-					   struct nfsd3_readlinkres *resp)
+nfsd3_proc_readlink(struct svc_rqst *rqstp)
 {
+	struct nfsd3_readlinkargs *argp = rqstp->rq_argp;
+	struct nfsd3_readlinkres *resp = rqstp->rq_resp;
 	__be32 nfserr;
 
 	dprintk("nfsd: READLINK(3) %s\n", SVCFH_fmt(&argp->fh));
@@ -142,9 +147,10 @@ nfsd3_proc_readlink(struct svc_rqst *rqstp, struct nfsd3_readlinkargs *argp,
  * Read a portion of a file.
  */
 static __be32
-nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
-				        struct nfsd3_readres  *resp)
+nfsd3_proc_read(struct svc_rqst *rqstp)
 {
+	struct nfsd3_readargs *argp = rqstp->rq_argp;
+	struct nfsd3_readres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 	u32	max_blocksize = svc_max_payload(rqstp);
 	unsigned long cnt = min(argp->count, max_blocksize);
@@ -179,9 +185,10 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
  * Write data to a file
  */
 static __be32
-nfsd3_proc_write(struct svc_rqst *rqstp, struct nfsd3_writeargs *argp,
-					 struct nfsd3_writeres  *resp)
+nfsd3_proc_write(struct svc_rqst *rqstp)
 {
+	struct nfsd3_writeargs *argp = rqstp->rq_argp;
+	struct nfsd3_writeres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 	unsigned long cnt = argp->len;
 
@@ -206,9 +213,10 @@ nfsd3_proc_write(struct svc_rqst *rqstp, struct nfsd3_writeargs *argp,
  * first reports about SunOS compatibility problems start to pour in...
  */
 static __be32
-nfsd3_proc_create(struct svc_rqst *rqstp, struct nfsd3_createargs *argp,
-					  struct nfsd3_diropres   *resp)
+nfsd3_proc_create(struct svc_rqst *rqstp)
 {
+	struct nfsd3_createargs *argp = rqstp->rq_argp;
+	struct nfsd3_diropres *resp = rqstp->rq_resp;
 	svc_fh		*dirfhp, *newfhp = NULL;
 	struct iattr	*attr;
 	__be32		nfserr;
@@ -243,9 +251,10 @@ nfsd3_proc_create(struct svc_rqst *rqstp, struct nfsd3_createargs *argp,
  * Make directory. This operation is not idempotent.
  */
 static __be32
-nfsd3_proc_mkdir(struct svc_rqst *rqstp, struct nfsd3_createargs *argp,
-					 struct nfsd3_diropres   *resp)
+nfsd3_proc_mkdir(struct svc_rqst *rqstp)
 {
+	struct nfsd3_createargs *argp = rqstp->rq_argp;
+	struct nfsd3_diropres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: MKDIR(3)    %s %.*s\n",
@@ -263,9 +272,10 @@ nfsd3_proc_mkdir(struct svc_rqst *rqstp, struct nfsd3_createargs *argp,
 }
 
 static __be32
-nfsd3_proc_symlink(struct svc_rqst *rqstp, struct nfsd3_symlinkargs *argp,
-					   struct nfsd3_diropres    *resp)
+nfsd3_proc_symlink(struct svc_rqst *rqstp)
 {
+	struct nfsd3_symlinkargs *argp = rqstp->rq_argp;
+	struct nfsd3_diropres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: SYMLINK(3)  %s %.*s -> %.*s\n",
@@ -284,9 +294,10 @@ nfsd3_proc_symlink(struct svc_rqst *rqstp, struct nfsd3_symlinkargs *argp,
  * Make socket/fifo/device.
  */
 static __be32
-nfsd3_proc_mknod(struct svc_rqst *rqstp, struct nfsd3_mknodargs *argp,
-					 struct nfsd3_diropres  *resp)
+nfsd3_proc_mknod(struct svc_rqst *rqstp)
 {
+	struct nfsd3_mknodargs *argp = rqstp->rq_argp;
+	struct nfsd3_diropres  *resp = rqstp->rq_resp;
 	__be32	nfserr;
 	int type;
 	dev_t	rdev = 0;
@@ -321,9 +332,10 @@ nfsd3_proc_mknod(struct svc_rqst *rqstp, struct nfsd3_mknodargs *argp,
  * Remove file/fifo/socket etc.
  */
 static __be32
-nfsd3_proc_remove(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp,
-					  struct nfsd3_attrstat  *resp)
+nfsd3_proc_remove(struct svc_rqst *rqstp)
 {
+	struct nfsd3_diropargs *argp = rqstp->rq_argp;
+	struct nfsd3_attrstat *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: REMOVE(3)   %s %.*s\n",
@@ -342,9 +354,10 @@ nfsd3_proc_remove(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp,
  * Remove a directory
  */
 static __be32
-nfsd3_proc_rmdir(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp,
-					 struct nfsd3_attrstat  *resp)
+nfsd3_proc_rmdir(struct svc_rqst *rqstp)
 {
+	struct nfsd3_diropargs *argp = rqstp->rq_argp;
+	struct nfsd3_attrstat *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: RMDIR(3)    %s %.*s\n",
@@ -359,9 +372,10 @@ nfsd3_proc_rmdir(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp,
 }
 
 static __be32
-nfsd3_proc_rename(struct svc_rqst *rqstp, struct nfsd3_renameargs *argp,
-					  struct nfsd3_renameres  *resp)
+nfsd3_proc_rename(struct svc_rqst *rqstp)
 {
+	struct nfsd3_renameargs *argp = rqstp->rq_argp;
+	struct nfsd3_renameres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: RENAME(3)   %s %.*s ->\n",
@@ -381,9 +395,10 @@ nfsd3_proc_rename(struct svc_rqst *rqstp, struct nfsd3_renameargs *argp,
 }
 
 static __be32
-nfsd3_proc_link(struct svc_rqst *rqstp, struct nfsd3_linkargs *argp,
-					struct nfsd3_linkres  *resp)
+nfsd3_proc_link(struct svc_rqst *rqstp)
 {
+	struct nfsd3_linkargs *argp = rqstp->rq_argp;
+	struct nfsd3_linkres  *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: LINK(3)     %s ->\n",
@@ -404,9 +419,10 @@ nfsd3_proc_link(struct svc_rqst *rqstp, struct nfsd3_linkargs *argp,
  * Read a portion of a directory.
  */
 static __be32
-nfsd3_proc_readdir(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp,
-					   struct nfsd3_readdirres  *resp)
+nfsd3_proc_readdir(struct svc_rqst *rqstp)
 {
+	struct nfsd3_readdirargs *argp = rqstp->rq_argp;
+	struct nfsd3_readdirres  *resp = rqstp->rq_resp;
 	__be32		nfserr;
 	int		count;
 
@@ -440,9 +456,10 @@ nfsd3_proc_readdir(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp,
  * For now, we choose to ignore the dircount parameter.
  */
 static __be32
-nfsd3_proc_readdirplus(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp,
-					       struct nfsd3_readdirres  *resp)
+nfsd3_proc_readdirplus(struct svc_rqst *rqstp)
 {
+	struct nfsd3_readdirargs *argp = rqstp->rq_argp;
+	struct nfsd3_readdirres  *resp = rqstp->rq_resp;
 	__be32	nfserr;
 	int	count = 0;
 	loff_t	offset;
@@ -507,9 +524,10 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp,
  * Get file system stats
  */
 static __be32
-nfsd3_proc_fsstat(struct svc_rqst * rqstp, struct nfsd_fhandle    *argp,
-					   struct nfsd3_fsstatres *resp)
+nfsd3_proc_fsstat(struct svc_rqst *rqstp)
 {
+	struct nfsd_fhandle *argp = rqstp->rq_argp;
+	struct nfsd3_fsstatres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: FSSTAT(3)   %s\n",
@@ -524,9 +542,10 @@ nfsd3_proc_fsstat(struct svc_rqst * rqstp, struct nfsd_fhandle    *argp,
  * Get file system info
  */
 static __be32
-nfsd3_proc_fsinfo(struct svc_rqst * rqstp, struct nfsd_fhandle    *argp,
-					   struct nfsd3_fsinfores *resp)
+nfsd3_proc_fsinfo(struct svc_rqst *rqstp)
 {
+	struct nfsd_fhandle *argp = rqstp->rq_argp;
+	struct nfsd3_fsinfores *resp = rqstp->rq_resp;
 	__be32	nfserr;
 	u32	max_blocksize = svc_max_payload(rqstp);
 
@@ -567,9 +586,10 @@ nfsd3_proc_fsinfo(struct svc_rqst * rqstp, struct nfsd_fhandle    *argp,
  * Get pathconf info for the specified file
  */
 static __be32
-nfsd3_proc_pathconf(struct svc_rqst * rqstp, struct nfsd_fhandle      *argp,
-					     struct nfsd3_pathconfres *resp)
+nfsd3_proc_pathconf(struct svc_rqst *rqstp)
 {
+	struct nfsd_fhandle *argp = rqstp->rq_argp;
+	struct nfsd3_pathconfres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: PATHCONF(3) %s\n",
@@ -610,9 +630,10 @@ nfsd3_proc_pathconf(struct svc_rqst * rqstp, struct nfsd_fhandle      *argp,
  * Commit a file (range) to stable storage.
  */
 static __be32
-nfsd3_proc_commit(struct svc_rqst * rqstp, struct nfsd3_commitargs *argp,
-					   struct nfsd3_commitres  *resp)
+nfsd3_proc_commit(struct svc_rqst *rqstp)
 {
+	struct nfsd3_commitargs *argp = rqstp->rq_argp;
+	struct nfsd3_commitres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: COMMIT(3)   %s %u@%Lu\n",
@@ -655,7 +676,7 @@ struct nfsd3_voidargs { int dummy; };
 
 static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_NULL] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_null,
+		.pc_func = nfsd3_proc_null,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_voidres,
 		.pc_argsize = sizeof(struct nfsd3_voidargs),
 		.pc_ressize = sizeof(struct nfsd3_voidres),
@@ -663,7 +684,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST,
 	},
 	[NFS3PROC_GETATTR] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_getattr,
+		.pc_func = nfsd3_proc_getattr,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_attrstatres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
@@ -673,7 +694,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+AT,
 	},
 	[NFS3PROC_SETATTR] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_setattr,
+		.pc_func = nfsd3_proc_setattr,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_sattrargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
@@ -683,7 +704,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+WC,
 	},
 	[NFS3PROC_LOOKUP] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_lookup,
+		.pc_func = nfsd3_proc_lookup,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_diropres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
@@ -693,7 +714,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+FH+pAT+pAT,
 	},
 	[NFS3PROC_ACCESS] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_access,
+		.pc_func = nfsd3_proc_access,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_accessargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_accessres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
@@ -703,7 +724,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+pAT+1,
 	},
 	[NFS3PROC_READLINK] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_readlink,
+		.pc_func = nfsd3_proc_readlink,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_readlinkargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_readlinkres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
@@ -713,7 +734,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+pAT+1+NFS3_MAXPATHLEN/4,
 	},
 	[NFS3PROC_READ] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_read,
+		.pc_func = nfsd3_proc_read,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_readargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_readres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
@@ -723,7 +744,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+pAT+4+NFSSVC_MAXBLKSIZE/4,
 	},
 	[NFS3PROC_WRITE] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_write,
+		.pc_func = nfsd3_proc_write,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_writeargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_writeres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
@@ -733,7 +754,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+WC+4,
 	},
 	[NFS3PROC_CREATE] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_create,
+		.pc_func = nfsd3_proc_create,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_createargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
@@ -743,7 +764,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+(1+FH+pAT)+WC,
 	},
 	[NFS3PROC_MKDIR] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_mkdir,
+		.pc_func = nfsd3_proc_mkdir,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_mkdirargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
@@ -753,7 +774,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+(1+FH+pAT)+WC,
 	},
 	[NFS3PROC_SYMLINK] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_symlink,
+		.pc_func = nfsd3_proc_symlink,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_symlinkargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
@@ -763,7 +784,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+(1+FH+pAT)+WC,
 	},
 	[NFS3PROC_MKNOD] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_mknod,
+		.pc_func = nfsd3_proc_mknod,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_mknodargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
@@ -773,7 +794,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+(1+FH+pAT)+WC,
 	},
 	[NFS3PROC_REMOVE] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_remove,
+		.pc_func = nfsd3_proc_remove,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
@@ -783,7 +804,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+WC,
 	},
 	[NFS3PROC_RMDIR] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_rmdir,
+		.pc_func = nfsd3_proc_rmdir,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
@@ -793,7 +814,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+WC,
 	},
 	[NFS3PROC_RENAME] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_rename,
+		.pc_func = nfsd3_proc_rename,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_renameargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_renameres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
@@ -803,7 +824,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+WC+WC,
 	},
 	[NFS3PROC_LINK] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_link,
+		.pc_func = nfsd3_proc_link,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_linkargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_linkres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
@@ -813,7 +834,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+pAT+WC,
 	},
 	[NFS3PROC_READDIR] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_readdir,
+		.pc_func = nfsd3_proc_readdir,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_readdirargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_readdirres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
@@ -822,7 +843,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_cachetype = RC_NOCACHE,
 	},
 	[NFS3PROC_READDIRPLUS] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_readdirplus,
+		.pc_func = nfsd3_proc_readdirplus,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_readdirplusargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_readdirres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
@@ -831,7 +852,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_cachetype = RC_NOCACHE,
 	},
 	[NFS3PROC_FSSTAT] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_fsstat,
+		.pc_func = nfsd3_proc_fsstat,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_fsstatres,
 		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
@@ -840,7 +861,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+pAT+2*6+1,
 	},
 	[NFS3PROC_FSINFO] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_fsinfo,
+		.pc_func = nfsd3_proc_fsinfo,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_fsinfores,
 		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
@@ -849,7 +870,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+pAT+12,
 	},
 	[NFS3PROC_PATHCONF] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_pathconf,
+		.pc_func = nfsd3_proc_pathconf,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_pathconfres,
 		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
@@ -858,7 +879,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+pAT+6,
 	},
 	[NFS3PROC_COMMIT] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_commit,
+		.pc_func = nfsd3_proc_commit,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_commitargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_commitres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 695e17605b67..7a0112bc3531 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1510,7 +1510,7 @@ out:
  * NULL call.
  */
 static __be32
-nfsd4_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
+nfsd4_proc_null(struct svc_rqst *rqstp)
 {
 	return nfs_ok;
 }
@@ -1524,6 +1524,7 @@ static inline void nfsd4_increment_op_stats(u32 opnum)
 typedef __be32(*nfsd4op_func)(struct svc_rqst *, struct nfsd4_compound_state *,
 			      void *);
 typedef u32(*nfsd4op_rsize)(struct svc_rqst *, struct nfsd4_op *op);
+
 typedef void(*stateid_setter)(struct nfsd4_compound_state *, void *);
 typedef void(*stateid_getter)(struct nfsd4_compound_state *, void *);
 
@@ -1673,10 +1674,10 @@ static void svcxdr_init_encode(struct svc_rqst *rqstp,
  * COMPOUND call.
  */
 static __be32
-nfsd4_proc_compound(struct svc_rqst *rqstp, void *arg, void *res)
+nfsd4_proc_compound(struct svc_rqst *rqstp)
 {
-	struct nfsd4_compoundargs *args = arg;
-	struct nfsd4_compoundres *resp = res;
+	struct nfsd4_compoundargs *args = rqstp->rq_argp;
+	struct nfsd4_compoundres *resp = rqstp->rq_resp;
 	struct nfsd4_op	*op;
 	struct nfsd4_operation *opdesc;
 	struct nfsd4_compound_state *cstate = &resp->cstate;
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 03a7e9da4da0..448505b939db 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -17,7 +17,7 @@ typedef struct svc_buf	svc_buf;
 
 
 static __be32
-nfsd_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
+nfsd_proc_null(struct svc_rqst *rqstp)
 {
 	return nfs_ok;
 }
@@ -39,9 +39,10 @@ nfsd_return_dirop(__be32 err, struct nfsd_diropres *resp)
  * N.B. After this call resp->fh needs an fh_put
  */
 static __be32
-nfsd_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle  *argp,
-					  struct nfsd_attrstat *resp)
+nfsd_proc_getattr(struct svc_rqst *rqstp)
 {
+	struct nfsd_fhandle *argp = rqstp->rq_argp;
+	struct nfsd_attrstat *resp = rqstp->rq_resp;
 	__be32 nfserr;
 	dprintk("nfsd: GETATTR  %s\n", SVCFH_fmt(&argp->fh));
 
@@ -56,9 +57,10 @@ nfsd_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle  *argp,
  * N.B. After this call resp->fh needs an fh_put
  */
 static __be32
-nfsd_proc_setattr(struct svc_rqst *rqstp, struct nfsd_sattrargs *argp,
-					  struct nfsd_attrstat  *resp)
+nfsd_proc_setattr(struct svc_rqst *rqstp)
 {
+	struct nfsd_sattrargs *argp = rqstp->rq_argp;
+	struct nfsd_attrstat *resp = rqstp->rq_resp;
 	struct iattr *iap = &argp->attrs;
 	struct svc_fh *fhp;
 	__be32 nfserr;
@@ -122,9 +124,10 @@ done:
  * N.B. After this call resp->fh needs an fh_put
  */
 static __be32
-nfsd_proc_lookup(struct svc_rqst *rqstp, struct nfsd_diropargs *argp,
-					 struct nfsd_diropres  *resp)
+nfsd_proc_lookup(struct svc_rqst *rqstp)
 {
+	struct nfsd_diropargs *argp = rqstp->rq_argp;
+	struct nfsd_diropres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: LOOKUP   %s %.*s\n",
@@ -142,9 +145,10 @@ nfsd_proc_lookup(struct svc_rqst *rqstp, struct nfsd_diropargs *argp,
  * Read a symlink.
  */
 static __be32
-nfsd_proc_readlink(struct svc_rqst *rqstp, struct nfsd_readlinkargs *argp,
-					   struct nfsd_readlinkres *resp)
+nfsd_proc_readlink(struct svc_rqst *rqstp)
 {
+	struct nfsd_readlinkargs *argp = rqstp->rq_argp;
+	struct nfsd_readlinkres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: READLINK %s\n", SVCFH_fmt(&argp->fh));
@@ -162,9 +166,10 @@ nfsd_proc_readlink(struct svc_rqst *rqstp, struct nfsd_readlinkargs *argp,
  * N.B. After this call resp->fh needs an fh_put
  */
 static __be32
-nfsd_proc_read(struct svc_rqst *rqstp, struct nfsd_readargs *argp,
-				       struct nfsd_readres  *resp)
+nfsd_proc_read(struct svc_rqst *rqstp)
 {
+	struct nfsd_readargs *argp = rqstp->rq_argp;
+	struct nfsd_readres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: READ    %s %d bytes at %d\n",
@@ -200,9 +205,10 @@ nfsd_proc_read(struct svc_rqst *rqstp, struct nfsd_readargs *argp,
  * N.B. After this call resp->fh needs an fh_put
  */
 static __be32
-nfsd_proc_write(struct svc_rqst *rqstp, struct nfsd_writeargs *argp,
-					struct nfsd_attrstat  *resp)
+nfsd_proc_write(struct svc_rqst *rqstp)
 {
+	struct nfsd_writeargs *argp = rqstp->rq_argp;
+	struct nfsd_attrstat *resp = rqstp->rq_resp;
 	__be32	nfserr;
 	unsigned long cnt = argp->len;
 
@@ -222,9 +228,10 @@ nfsd_proc_write(struct svc_rqst *rqstp, struct nfsd_writeargs *argp,
  * N.B. After this call _both_ argp->fh and resp->fh need an fh_put
  */
 static __be32
-nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
-					 struct nfsd_diropres   *resp)
+nfsd_proc_create(struct svc_rqst *rqstp)
 {
+	struct nfsd_createargs *argp = rqstp->rq_argp;
+	struct nfsd_diropres *resp = rqstp->rq_resp;
 	svc_fh		*dirfhp = &argp->fh;
 	svc_fh		*newfhp = &resp->fh;
 	struct iattr	*attr = &argp->attrs;
@@ -377,9 +384,9 @@ done:
 }
 
 static __be32
-nfsd_proc_remove(struct svc_rqst *rqstp, struct nfsd_diropargs *argp,
-					 void		       *resp)
+nfsd_proc_remove(struct svc_rqst *rqstp)
 {
+	struct nfsd_diropargs *argp = rqstp->rq_argp;
 	__be32	nfserr;
 
 	dprintk("nfsd: REMOVE   %s %.*s\n", SVCFH_fmt(&argp->fh),
@@ -392,9 +399,9 @@ nfsd_proc_remove(struct svc_rqst *rqstp, struct nfsd_diropargs *argp,
 }
 
 static __be32
-nfsd_proc_rename(struct svc_rqst *rqstp, struct nfsd_renameargs *argp,
-				  	 void		        *resp)
+nfsd_proc_rename(struct svc_rqst *rqstp)
 {
+	struct nfsd_renameargs *argp = rqstp->rq_argp;
 	__be32	nfserr;
 
 	dprintk("nfsd: RENAME   %s %.*s -> \n",
@@ -410,9 +417,9 @@ nfsd_proc_rename(struct svc_rqst *rqstp, struct nfsd_renameargs *argp,
 }
 
 static __be32
-nfsd_proc_link(struct svc_rqst *rqstp, struct nfsd_linkargs *argp,
-				void			    *resp)
+nfsd_proc_link(struct svc_rqst *rqstp)
 {
+	struct nfsd_linkargs *argp = rqstp->rq_argp;
 	__be32	nfserr;
 
 	dprintk("nfsd: LINK     %s ->\n",
@@ -430,9 +437,9 @@ nfsd_proc_link(struct svc_rqst *rqstp, struct nfsd_linkargs *argp,
 }
 
 static __be32
-nfsd_proc_symlink(struct svc_rqst *rqstp, struct nfsd_symlinkargs *argp,
-				          void			  *resp)
+nfsd_proc_symlink(struct svc_rqst *rqstp)
 {
+	struct nfsd_symlinkargs *argp = rqstp->rq_argp;
 	struct svc_fh	newfh;
 	__be32		nfserr;
 
@@ -460,9 +467,10 @@ nfsd_proc_symlink(struct svc_rqst *rqstp, struct nfsd_symlinkargs *argp,
  * N.B. After this call resp->fh needs an fh_put
  */
 static __be32
-nfsd_proc_mkdir(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
-					struct nfsd_diropres   *resp)
+nfsd_proc_mkdir(struct svc_rqst *rqstp)
 {
+	struct nfsd_createargs *argp = rqstp->rq_argp;
+	struct nfsd_diropres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: MKDIR    %s %.*s\n", SVCFH_fmt(&argp->fh), argp->len, argp->name);
@@ -484,9 +492,9 @@ nfsd_proc_mkdir(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
  * Remove a directory
  */
 static __be32
-nfsd_proc_rmdir(struct svc_rqst *rqstp, struct nfsd_diropargs *argp,
-				 	void		      *resp)
+nfsd_proc_rmdir(struct svc_rqst *rqstp)
 {
+	struct nfsd_diropargs *argp = rqstp->rq_argp;
 	__be32	nfserr;
 
 	dprintk("nfsd: RMDIR    %s %.*s\n", SVCFH_fmt(&argp->fh), argp->len, argp->name);
@@ -500,9 +508,10 @@ nfsd_proc_rmdir(struct svc_rqst *rqstp, struct nfsd_diropargs *argp,
  * Read a portion of a directory.
  */
 static __be32
-nfsd_proc_readdir(struct svc_rqst *rqstp, struct nfsd_readdirargs *argp,
-					  struct nfsd_readdirres  *resp)
+nfsd_proc_readdir(struct svc_rqst *rqstp)
 {
+	struct nfsd_readdirargs *argp = rqstp->rq_argp;
+	struct nfsd_readdirres *resp = rqstp->rq_resp;
 	int		count;
 	__be32		nfserr;
 	loff_t		offset;
@@ -540,9 +549,10 @@ nfsd_proc_readdir(struct svc_rqst *rqstp, struct nfsd_readdirargs *argp,
  * Get file system info
  */
 static __be32
-nfsd_proc_statfs(struct svc_rqst * rqstp, struct nfsd_fhandle   *argp,
-					  struct nfsd_statfsres *resp)
+nfsd_proc_statfs(struct svc_rqst *rqstp)
 {
+	struct nfsd_fhandle *argp = rqstp->rq_argp;
+	struct nfsd_statfsres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: STATFS   %s\n", SVCFH_fmt(&argp->fh));
@@ -565,7 +575,7 @@ struct nfsd_void { int dummy; };
 
 static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_NULL] = {
-		.pc_func = (svc_procfunc) nfsd_proc_null,
+		.pc_func = nfsd_proc_null,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_void,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_void),
@@ -574,7 +584,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST,
 	},
 	[NFSPROC_GETATTR] = {
-		.pc_func = (svc_procfunc) nfsd_proc_getattr,
+		.pc_func = nfsd_proc_getattr,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_fhandle,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_attrstat,
 		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
@@ -584,7 +594,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST+AT,
 	},
 	[NFSPROC_SETATTR] = {
-		.pc_func = (svc_procfunc) nfsd_proc_setattr,
+		.pc_func = nfsd_proc_setattr,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_sattrargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_attrstat,
 		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
@@ -602,7 +612,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST,
 	},
 	[NFSPROC_LOOKUP] = {
-		.pc_func = (svc_procfunc) nfsd_proc_lookup,
+		.pc_func = nfsd_proc_lookup,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_diropres,
 		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
@@ -612,7 +622,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST+FH+AT,
 	},
 	[NFSPROC_READLINK] = {
-		.pc_func = (svc_procfunc) nfsd_proc_readlink,
+		.pc_func = nfsd_proc_readlink,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_readlinkargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_readlinkres,
 		.pc_argsize = sizeof(struct nfsd_readlinkargs),
@@ -621,7 +631,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST+1+NFS_MAXPATHLEN/4,
 	},
 	[NFSPROC_READ] = {
-		.pc_func = (svc_procfunc) nfsd_proc_read,
+		.pc_func = nfsd_proc_read,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_readargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_readres,
 		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
@@ -639,7 +649,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST,
 	},
 	[NFSPROC_WRITE] = {
-		.pc_func = (svc_procfunc) nfsd_proc_write,
+		.pc_func = nfsd_proc_write,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_writeargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_attrstat,
 		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
@@ -649,7 +659,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST+AT,
 	},
 	[NFSPROC_CREATE] = {
-		.pc_func = (svc_procfunc) nfsd_proc_create,
+		.pc_func = nfsd_proc_create,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_createargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_diropres,
 		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
@@ -659,7 +669,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST+FH+AT,
 	},
 	[NFSPROC_REMOVE] = {
-		.pc_func = (svc_procfunc) nfsd_proc_remove,
+		.pc_func = nfsd_proc_remove,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_diropargs),
@@ -668,7 +678,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST,
 	},
 	[NFSPROC_RENAME] = {
-		.pc_func = (svc_procfunc) nfsd_proc_rename,
+		.pc_func = nfsd_proc_rename,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_renameargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_renameargs),
@@ -677,7 +687,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST,
 	},
 	[NFSPROC_LINK] = {
-		.pc_func = (svc_procfunc) nfsd_proc_link,
+		.pc_func = nfsd_proc_link,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_linkargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_linkargs),
@@ -686,7 +696,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST,
 	},
 	[NFSPROC_SYMLINK] = {
-		.pc_func = (svc_procfunc) nfsd_proc_symlink,
+		.pc_func = nfsd_proc_symlink,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_symlinkargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_symlinkargs),
@@ -695,7 +705,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST,
 	},
 	[NFSPROC_MKDIR] = {
-		.pc_func = (svc_procfunc) nfsd_proc_mkdir,
+		.pc_func = nfsd_proc_mkdir,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_createargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_diropres,
 		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
@@ -705,7 +715,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST+FH+AT,
 	},
 	[NFSPROC_RMDIR] = {
-		.pc_func = (svc_procfunc) nfsd_proc_rmdir,
+		.pc_func = nfsd_proc_rmdir,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_diropargs),
@@ -714,7 +724,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST,
 	},
 	[NFSPROC_READDIR] = {
-		.pc_func = (svc_procfunc) nfsd_proc_readdir,
+		.pc_func = nfsd_proc_readdir,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_readdirargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_readdirres,
 		.pc_argsize = sizeof(struct nfsd_readdirargs),
@@ -722,7 +732,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_cachetype = RC_NOCACHE,
 	},
 	[NFSPROC_STATFS] = {
-		.pc_func = (svc_procfunc) nfsd_proc_statfs,
+		.pc_func = nfsd_proc_statfs,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_fhandle,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_statfsres,
 		.pc_argsize = sizeof(struct nfsd_fhandle),
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 59979f0bbd4b..d64895fd8d25 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -827,7 +827,7 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
 	rqstp->rq_res.head[0].iov_len += sizeof(__be32);
 
 	/* Now call the procedure handler, and encode NFS status. */
-	nfserr = proc->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp);
+	nfserr = proc->pc_func(rqstp);
 	nfserr = map_new_errors(rqstp->rq_vers, nfserr);
 	if (nfserr == nfserr_dropit || test_bit(RQ_DROPME, &rqstp->rq_flags)) {
 		dprintk("nfsd: Dropping request; may be revisited later\n");
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 94631026f79c..5c222af2db41 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -418,9 +418,9 @@ struct svc_version {
 /*
  * RPC procedure info
  */
-typedef __be32	(*svc_procfunc)(struct svc_rqst *, void *argp, void *resp);
 struct svc_procedure {
-	svc_procfunc		pc_func;	/* process the request */
+	/* process the request: */
+	__be32			(*pc_func)(struct svc_rqst *);
 	kxdrproc_t		pc_decode;	/* XDR decode args */
 	kxdrproc_t		pc_encode;	/* XDR encode result */
 	kxdrproc_t		pc_release;	/* XDR free result */
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index bc0f5a0ecbdc..95335455ad38 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1281,7 +1281,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 		if (xdr && !xdr(rqstp, argv->iov_base, rqstp->rq_argp))
 			goto err_garbage;
 
-		*statp = procp->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp);
+		*statp = procp->pc_func(rqstp);
 
 		/* Encode reply */
 		if (*statp == rpc_drop_reply ||
-- 
cgit v1.2.3


From 8537488b5a2f33980e33f654b0a515304de2b267 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 18:48:24 +0200
Subject: sunrpc: properly type pc_release callbacks

Drop the p and resp arguments as they are always NULL or can trivially
be derived from the rqstp argument.  With that all functions now have the
same prototype, and we can remove the unsafe casting to kxdrproc_t.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/nfsd/nfs2acl.c          | 22 +++++++++++-----------
 fs/nfsd/nfs3acl.c          |  8 ++++----
 fs/nfsd/nfs3proc.c         | 36 ++++++++++++++++++------------------
 fs/nfsd/nfs3xdr.c          | 16 ++++++++--------
 fs/nfsd/nfs4xdr.c          |  4 +---
 fs/nfsd/nfsproc.c          | 14 +++++++-------
 fs/nfsd/nfsxdr.c           |  8 ++++----
 fs/nfsd/xdr.h              |  2 +-
 fs/nfsd/xdr3.h             |  6 ++----
 fs/nfsd/xdr4.h             |  2 +-
 include/linux/sunrpc/svc.h |  3 ++-
 net/sunrpc/svc.c           |  8 ++++----
 12 files changed, 63 insertions(+), 66 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 4b7f84fa1fa5..302441027f50 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -318,27 +318,27 @@ static int nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, __be32 *p,
 /*
  * XDR release functions
  */
-static int nfsaclsvc_release_getacl(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd3_getaclres *resp)
+static void nfsaclsvc_release_getacl(struct svc_rqst *rqstp)
 {
+	struct nfsd3_getaclres *resp = rqstp->rq_resp;
+
 	fh_put(&resp->fh);
 	posix_acl_release(resp->acl_access);
 	posix_acl_release(resp->acl_default);
-	return 1;
 }
 
-static int nfsaclsvc_release_attrstat(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd_attrstat *resp)
+static void nfsaclsvc_release_attrstat(struct svc_rqst *rqstp)
 {
+	struct nfsd_attrstat *resp = rqstp->rq_resp;
+
 	fh_put(&resp->fh);
-	return 1;
 }
 
-static int nfsaclsvc_release_access(struct svc_rqst *rqstp, __be32 *p,
-               struct nfsd3_accessres *resp)
+static void nfsaclsvc_release_access(struct svc_rqst *rqstp)
 {
-       fh_put(&resp->fh);
-       return 1;
+	struct nfsd3_accessres *resp = rqstp->rq_resp;
+
+	fh_put(&resp->fh);
 }
 
 #define nfsaclsvc_decode_voidargs	NULL
@@ -353,7 +353,7 @@ struct nfsd3_voidargs { int dummy; };
 	.pc_func	= nfsacld_proc_##name,				\
 	.pc_decode	= (kxdrproc_t) nfsaclsvc_decode_##argt##args,	\
 	.pc_encode	= (kxdrproc_t) nfsaclsvc_encode_##rest##res,	\
-	.pc_release	= (kxdrproc_t) nfsaclsvc_release_##relt,	\
+	.pc_release	= nfsaclsvc_release_##relt,	\
 	.pc_argsize	= sizeof(struct nfsd3_##argt##args),		\
 	.pc_ressize	= sizeof(struct nfsd3_##rest##res),		\
 	.pc_cachetype	= cache,					\
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 5e42004035e0..56cdff4e954c 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -223,13 +223,13 @@ static int nfs3svc_encode_setaclres(struct svc_rqst *rqstp, __be32 *p,
 /*
  * XDR release functions
  */
-static int nfs3svc_release_getacl(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd3_getaclres *resp)
+static void nfs3svc_release_getacl(struct svc_rqst *rqstp)
 {
+	struct nfsd3_getaclres *resp = rqstp->rq_resp;
+
 	fh_put(&resp->fh);
 	posix_acl_release(resp->acl_access);
 	posix_acl_release(resp->acl_default);
-	return 1;
 }
 
 #define nfs3svc_decode_voidargs		NULL
@@ -243,7 +243,7 @@ struct nfsd3_voidargs { int dummy; };
 	.pc_func	= nfsd3_proc_##name,				\
 	.pc_decode	= (kxdrproc_t) nfs3svc_decode_##argt##args,	\
 	.pc_encode	= (kxdrproc_t) nfs3svc_encode_##rest##res,	\
-	.pc_release	= (kxdrproc_t) nfs3svc_release_##relt,		\
+	.pc_release	= nfs3svc_release_##relt,			\
 	.pc_argsize	= sizeof(struct nfsd3_##argt##args),		\
 	.pc_ressize	= sizeof(struct nfsd3_##rest##res),		\
 	.pc_cachetype	= cache,					\
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 4a2bae07cfbf..f0cccc0768ce 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -687,7 +687,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_getattr,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_attrstatres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
 		.pc_ressize = sizeof(struct nfsd3_attrstatres),
 		.pc_cachetype = RC_NOCACHE,
@@ -697,7 +697,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_setattr,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_sattrargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_sattrargs),
 		.pc_ressize = sizeof(struct nfsd3_wccstatres),
 		.pc_cachetype = RC_REPLBUFF,
@@ -707,7 +707,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_lookup,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_diropres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
+		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_diropargs),
 		.pc_ressize = sizeof(struct nfsd3_diropres),
 		.pc_cachetype = RC_NOCACHE,
@@ -717,7 +717,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_access,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_accessargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_accessres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_accessargs),
 		.pc_ressize = sizeof(struct nfsd3_accessres),
 		.pc_cachetype = RC_NOCACHE,
@@ -727,7 +727,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_readlink,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_readlinkargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_readlinkres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_readlinkargs),
 		.pc_ressize = sizeof(struct nfsd3_readlinkres),
 		.pc_cachetype = RC_NOCACHE,
@@ -737,7 +737,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_read,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_readargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_readres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_readargs),
 		.pc_ressize = sizeof(struct nfsd3_readres),
 		.pc_cachetype = RC_NOCACHE,
@@ -747,7 +747,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_write,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_writeargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_writeres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_writeargs),
 		.pc_ressize = sizeof(struct nfsd3_writeres),
 		.pc_cachetype = RC_REPLBUFF,
@@ -757,7 +757,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_create,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_createargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
+		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_createargs),
 		.pc_ressize = sizeof(struct nfsd3_createres),
 		.pc_cachetype = RC_REPLBUFF,
@@ -767,7 +767,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_mkdir,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_mkdirargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
+		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_mkdirargs),
 		.pc_ressize = sizeof(struct nfsd3_createres),
 		.pc_cachetype = RC_REPLBUFF,
@@ -777,7 +777,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_symlink,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_symlinkargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
+		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_symlinkargs),
 		.pc_ressize = sizeof(struct nfsd3_createres),
 		.pc_cachetype = RC_REPLBUFF,
@@ -787,7 +787,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_mknod,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_mknodargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
+		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_mknodargs),
 		.pc_ressize = sizeof(struct nfsd3_createres),
 		.pc_cachetype = RC_REPLBUFF,
@@ -797,7 +797,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_remove,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_diropargs),
 		.pc_ressize = sizeof(struct nfsd3_wccstatres),
 		.pc_cachetype = RC_REPLBUFF,
@@ -807,7 +807,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_rmdir,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_diropargs),
 		.pc_ressize = sizeof(struct nfsd3_wccstatres),
 		.pc_cachetype = RC_REPLBUFF,
@@ -817,7 +817,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_rename,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_renameargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_renameres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
+		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_renameargs),
 		.pc_ressize = sizeof(struct nfsd3_renameres),
 		.pc_cachetype = RC_REPLBUFF,
@@ -827,7 +827,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_link,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_linkargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_linkres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
+		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_linkargs),
 		.pc_ressize = sizeof(struct nfsd3_linkres),
 		.pc_cachetype = RC_REPLBUFF,
@@ -837,7 +837,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_readdir,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_readdirargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_readdirres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_readdirargs),
 		.pc_ressize = sizeof(struct nfsd3_readdirres),
 		.pc_cachetype = RC_NOCACHE,
@@ -846,7 +846,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_readdirplus,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_readdirplusargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_readdirres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_readdirplusargs),
 		.pc_ressize = sizeof(struct nfsd3_readdirres),
 		.pc_cachetype = RC_NOCACHE,
@@ -882,7 +882,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_commit,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_commitargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_commitres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_commitargs),
 		.pc_ressize = sizeof(struct nfsd3_commitres),
 		.pc_cachetype = RC_NOCACHE,
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 12feac6ee2fd..7fd361a75287 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -1114,19 +1114,19 @@ nfs3svc_encode_commitres(struct svc_rqst *rqstp, __be32 *p,
 /*
  * XDR release functions
  */
-int
-nfs3svc_release_fhandle(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_attrstat *resp)
+void
+nfs3svc_release_fhandle(struct svc_rqst *rqstp)
 {
+	struct nfsd3_attrstat *resp = rqstp->rq_resp;
+
 	fh_put(&resp->fh);
-	return 1;
 }
 
-int
-nfs3svc_release_fhandle2(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_fhandle_pair *resp)
+void
+nfs3svc_release_fhandle2(struct svc_rqst *rqstp)
 {
+	struct nfsd3_fhandle_pair *resp = rqstp->rq_resp;
+
 	fh_put(&resp->fh1);
 	fh_put(&resp->fh2);
-	return 1;
 }
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 26780d53a6f9..5aa847bdfc63 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -4543,9 +4543,8 @@ nfs4svc_encode_voidres(struct svc_rqst *rqstp, __be32 *p, void *dummy)
         return xdr_ressize_check(rqstp, p);
 }
 
-int nfsd4_release_compoundargs(void *rq, __be32 *p, void *resp)
+void nfsd4_release_compoundargs(struct svc_rqst *rqstp)
 {
-	struct svc_rqst *rqstp = rq;
 	struct nfsd4_compoundargs *args = rqstp->rq_argp;
 
 	if (args->ops != args->iops) {
@@ -4559,7 +4558,6 @@ int nfsd4_release_compoundargs(void *rq, __be32 *p, void *resp)
 		args->to_free = tb->next;
 		kfree(tb);
 	}
-	return 1;
 }
 
 int
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 448505b939db..dc32e0f8480d 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -587,7 +587,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_func = nfsd_proc_getattr,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_fhandle,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_attrstat,
-		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
+		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_fhandle),
 		.pc_ressize = sizeof(struct nfsd_attrstat),
 		.pc_cachetype = RC_NOCACHE,
@@ -597,7 +597,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_func = nfsd_proc_setattr,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_sattrargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_attrstat,
-		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
+		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_sattrargs),
 		.pc_ressize = sizeof(struct nfsd_attrstat),
 		.pc_cachetype = RC_REPLBUFF,
@@ -615,7 +615,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_func = nfsd_proc_lookup,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_diropres,
-		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
+		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_diropargs),
 		.pc_ressize = sizeof(struct nfsd_diropres),
 		.pc_cachetype = RC_NOCACHE,
@@ -634,7 +634,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_func = nfsd_proc_read,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_readargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_readres,
-		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
+		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_readargs),
 		.pc_ressize = sizeof(struct nfsd_readres),
 		.pc_cachetype = RC_NOCACHE,
@@ -652,7 +652,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_func = nfsd_proc_write,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_writeargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_attrstat,
-		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
+		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_writeargs),
 		.pc_ressize = sizeof(struct nfsd_attrstat),
 		.pc_cachetype = RC_REPLBUFF,
@@ -662,7 +662,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_func = nfsd_proc_create,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_createargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_diropres,
-		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
+		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_createargs),
 		.pc_ressize = sizeof(struct nfsd_diropres),
 		.pc_cachetype = RC_REPLBUFF,
@@ -708,7 +708,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_func = nfsd_proc_mkdir,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_createargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_diropres,
-		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
+		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_createargs),
 		.pc_ressize = sizeof(struct nfsd_diropres),
 		.pc_cachetype = RC_REPLBUFF,
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 6a4947a3f4fa..de7b07ee489e 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -550,10 +550,10 @@ nfssvc_encode_entry(void *ccdv, const char *name,
 /*
  * XDR release functions
  */
-int
-nfssvc_release_fhandle(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_fhandle *resp)
+void
+nfssvc_release_fhandle(struct svc_rqst *rqstp)
 {
+	struct nfsd_fhandle *resp = rqstp->rq_resp;
+
 	fh_put(&resp->fh);
-	return 1;
 }
diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h
index 4f0481d63804..2c21fa843fbf 100644
--- a/fs/nfsd/xdr.h
+++ b/fs/nfsd/xdr.h
@@ -164,7 +164,7 @@ int nfssvc_encode_readdirres(struct svc_rqst *, __be32 *, struct nfsd_readdirres
 int nfssvc_encode_entry(void *, const char *name,
 			int namlen, loff_t offset, u64 ino, unsigned int);
 
-int nfssvc_release_fhandle(struct svc_rqst *, __be32 *, struct nfsd_fhandle *);
+void nfssvc_release_fhandle(struct svc_rqst *);
 
 /* Helper functions for NFSv2 ACL code */
 __be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, struct kstat *stat);
diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
index 335e04aaf7db..23fe456a223b 100644
--- a/fs/nfsd/xdr3.h
+++ b/fs/nfsd/xdr3.h
@@ -330,10 +330,8 @@ int nfs3svc_encode_pathconfres(struct svc_rqst *, __be32 *,
 int nfs3svc_encode_commitres(struct svc_rqst *, __be32 *,
 				struct nfsd3_commitres *);
 
-int nfs3svc_release_fhandle(struct svc_rqst *, __be32 *,
-				struct nfsd3_attrstat *);
-int nfs3svc_release_fhandle2(struct svc_rqst *, __be32 *,
-				struct nfsd3_fhandle_pair *);
+void nfs3svc_release_fhandle(struct svc_rqst *);
+void nfs3svc_release_fhandle2(struct svc_rqst *);
 int nfs3svc_encode_entry(void *, const char *name,
 				int namlen, loff_t offset, u64 ino,
 				unsigned int);
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 8fda4abdf3b1..a158579d55a2 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -743,7 +743,7 @@ extern __be32
 nfsd4_release_lockowner(struct svc_rqst *rqstp,
 		struct nfsd4_compound_state *,
 		struct nfsd4_release_lockowner *rlockowner);
-extern int nfsd4_release_compoundargs(void *rq, __be32 *p, void *resp);
+extern void nfsd4_release_compoundargs(struct svc_rqst *rqstp);
 extern __be32 nfsd4_delegreturn(struct svc_rqst *rqstp,
 		struct nfsd4_compound_state *, struct nfsd4_delegreturn *dr);
 extern __be32 nfsd4_renew(struct svc_rqst *rqstp,
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 5c222af2db41..1381e1343640 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -423,7 +423,8 @@ struct svc_procedure {
 	__be32			(*pc_func)(struct svc_rqst *);
 	kxdrproc_t		pc_decode;	/* XDR decode args */
 	kxdrproc_t		pc_encode;	/* XDR encode result */
-	kxdrproc_t		pc_release;	/* XDR free result */
+	/* XDR free result: */
+	void			(*pc_release)(struct svc_rqst *);
 	unsigned int		pc_argsize;	/* argument struct size */
 	unsigned int		pc_ressize;	/* result struct size */
 	unsigned int		pc_count;	/* call count */
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 95335455ad38..4611cb7adc04 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1287,12 +1287,12 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 		if (*statp == rpc_drop_reply ||
 		    test_bit(RQ_DROPME, &rqstp->rq_flags)) {
 			if (procp->pc_release)
-				procp->pc_release(rqstp, NULL, rqstp->rq_resp);
+				procp->pc_release(rqstp);
 			goto dropit;
 		}
 		if (*statp == rpc_autherr_badcred) {
 			if (procp->pc_release)
-				procp->pc_release(rqstp, NULL, rqstp->rq_resp);
+				procp->pc_release(rqstp);
 			goto err_bad_auth;
 		}
 		if (*statp == rpc_success &&
@@ -1307,7 +1307,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 		if (!versp->vs_dispatch(rqstp, statp)) {
 			/* Release reply info */
 			if (procp->pc_release)
-				procp->pc_release(rqstp, NULL, rqstp->rq_resp);
+				procp->pc_release(rqstp);
 			goto dropit;
 		}
 	}
@@ -1318,7 +1318,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 
 	/* Release reply info */
 	if (procp->pc_release)
-		procp->pc_release(rqstp, NULL, rqstp->rq_resp);
+		procp->pc_release(rqstp);
 
 	if (procp->pc_encode == NULL)
 		goto dropit;
-- 
cgit v1.2.3


From 026fec7e7c4723b5f26a753bbcad69f68c8299d4 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 19:01:48 +0200
Subject: sunrpc: properly type pc_decode callbacks

Drop the argp argument as it can trivially be derived from the rqstp
argument.  With that all functions now have the same prototype, and we
can remove the unsafe casting to kxdrproc_t.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/lockd/svc4proc.c        |  2 +-
 fs/lockd/svcproc.c         |  2 +-
 fs/lockd/xdr.c             | 29 ++++++++++++------
 fs/lockd/xdr4.c            | 29 ++++++++++++------
 fs/nfs/callback_xdr.c      |  4 +--
 fs/nfsd/nfs2acl.c          | 21 +++++++------
 fs/nfsd/nfs3acl.c          | 11 +++----
 fs/nfsd/nfs3proc.c         | 42 +++++++++++++-------------
 fs/nfsd/nfs3xdr.c          | 74 +++++++++++++++++++++++++++-------------------
 fs/nfsd/nfs4proc.c         |  2 +-
 fs/nfsd/nfs4xdr.c          |  4 ++-
 fs/nfsd/nfsproc.c          | 36 +++++++++++-----------
 fs/nfsd/nfssvc.c           |  5 ++--
 fs/nfsd/nfsxdr.c           | 53 ++++++++++++++++++++-------------
 fs/nfsd/xdr.h              | 34 ++++++++-------------
 fs/nfsd/xdr3.h             | 47 ++++++++++-------------------
 fs/nfsd/xdr4.h             |  3 +-
 include/linux/lockd/xdr.h  | 18 +++++------
 include/linux/lockd/xdr4.h | 18 +++++------
 include/linux/sunrpc/svc.h |  3 +-
 net/sunrpc/svc.c           |  9 ++++--
 21 files changed, 237 insertions(+), 209 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 3e4cba029d3d..804744f7528c 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -496,7 +496,7 @@ struct nlm_void			{ int dummy; };
 
 #define PROC(name, xargt, xrest, argt, rest, respsize)	\
  { .pc_func	= nlm4svc_proc_##name,	\
-   .pc_decode	= (kxdrproc_t) nlm4svc_decode_##xargt,	\
+   .pc_decode	= nlm4svc_decode_##xargt,	\
    .pc_encode	= (kxdrproc_t) nlm4svc_encode_##xrest,	\
    .pc_release	= NULL,					\
    .pc_argsize	= sizeof(struct nlm_##argt),		\
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 3add50661fab..204a698f7d41 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -538,7 +538,7 @@ struct nlm_void			{ int dummy; };
 
 #define PROC(name, xargt, xrest, argt, rest, respsize)	\
  { .pc_func	= nlmsvc_proc_##name,			\
-   .pc_decode	= (kxdrproc_t) nlmsvc_decode_##xargt,	\
+   .pc_decode	= nlmsvc_decode_##xargt,		\
    .pc_encode	= (kxdrproc_t) nlmsvc_encode_##xrest,	\
    .pc_release	= NULL,					\
    .pc_argsize	= sizeof(struct nlm_##argt),		\
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index 5b651daad518..b57af63fba56 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -182,8 +182,9 @@ nlm_encode_testres(__be32 *p, struct nlm_res *resp)
  * First, the server side XDR functions
  */
 int
-nlmsvc_decode_testargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
+nlmsvc_decode_testargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	u32	exclusive;
 
 	if (!(p = nlm_decode_cookie(p, &argp->cookie)))
@@ -207,8 +208,9 @@ nlmsvc_encode_testres(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
 }
 
 int
-nlmsvc_decode_lockargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
+nlmsvc_decode_lockargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	u32	exclusive;
 
 	if (!(p = nlm_decode_cookie(p, &argp->cookie)))
@@ -227,8 +229,9 @@ nlmsvc_decode_lockargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
 }
 
 int
-nlmsvc_decode_cancargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
+nlmsvc_decode_cancargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	u32	exclusive;
 
 	if (!(p = nlm_decode_cookie(p, &argp->cookie)))
@@ -243,8 +246,10 @@ nlmsvc_decode_cancargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
 }
 
 int
-nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
+nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
+
 	if (!(p = nlm_decode_cookie(p, &argp->cookie))
 	 || !(p = nlm_decode_lock(p, &argp->lock)))
 		return 0;
@@ -253,8 +258,9 @@ nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
 }
 
 int
-nlmsvc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
+nlmsvc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_lock	*lock = &argp->lock;
 
 	memset(lock, 0, sizeof(*lock));
@@ -293,8 +299,9 @@ nlmsvc_encode_res(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
 }
 
 int
-nlmsvc_decode_notify(struct svc_rqst *rqstp, __be32 *p, struct nlm_args *argp)
+nlmsvc_decode_notify(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_lock	*lock = &argp->lock;
 
 	if (!(p = xdr_decode_string_inplace(p, &lock->caller,
@@ -305,8 +312,10 @@ nlmsvc_decode_notify(struct svc_rqst *rqstp, __be32 *p, struct nlm_args *argp)
 }
 
 int
-nlmsvc_decode_reboot(struct svc_rqst *rqstp, __be32 *p, struct nlm_reboot *argp)
+nlmsvc_decode_reboot(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_reboot *argp = rqstp->rq_argp;
+
 	if (!(p = xdr_decode_string_inplace(p, &argp->mon, &argp->len, SM_MAXSTRLEN)))
 		return 0;
 	argp->state = ntohl(*p++);
@@ -316,8 +325,10 @@ nlmsvc_decode_reboot(struct svc_rqst *rqstp, __be32 *p, struct nlm_reboot *argp)
 }
 
 int
-nlmsvc_decode_res(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
+nlmsvc_decode_res(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_res *resp = rqstp->rq_argp;
+
 	if (!(p = nlm_decode_cookie(p, &resp->cookie)))
 		return 0;
 	resp->status = *p++;
@@ -325,7 +336,7 @@ nlmsvc_decode_res(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
 }
 
 int
-nlmsvc_decode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
+nlmsvc_decode_void(struct svc_rqst *rqstp, __be32 *p)
 {
 	return xdr_argsize_check(rqstp, p);
 }
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
index dfa4789cd460..46e18598a15c 100644
--- a/fs/lockd/xdr4.c
+++ b/fs/lockd/xdr4.c
@@ -179,8 +179,9 @@ nlm4_encode_testres(__be32 *p, struct nlm_res *resp)
  * First, the server side XDR functions
  */
 int
-nlm4svc_decode_testargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
+nlm4svc_decode_testargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	u32	exclusive;
 
 	if (!(p = nlm4_decode_cookie(p, &argp->cookie)))
@@ -204,8 +205,9 @@ nlm4svc_encode_testres(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
 }
 
 int
-nlm4svc_decode_lockargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
+nlm4svc_decode_lockargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	u32	exclusive;
 
 	if (!(p = nlm4_decode_cookie(p, &argp->cookie)))
@@ -224,8 +226,9 @@ nlm4svc_decode_lockargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
 }
 
 int
-nlm4svc_decode_cancargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
+nlm4svc_decode_cancargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	u32	exclusive;
 
 	if (!(p = nlm4_decode_cookie(p, &argp->cookie)))
@@ -240,8 +243,10 @@ nlm4svc_decode_cancargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
 }
 
 int
-nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
+nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
+
 	if (!(p = nlm4_decode_cookie(p, &argp->cookie))
 	 || !(p = nlm4_decode_lock(p, &argp->lock)))
 		return 0;
@@ -250,8 +255,9 @@ nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
 }
 
 int
-nlm4svc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
+nlm4svc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_lock	*lock = &argp->lock;
 
 	memset(lock, 0, sizeof(*lock));
@@ -290,8 +296,9 @@ nlm4svc_encode_res(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
 }
 
 int
-nlm4svc_decode_notify(struct svc_rqst *rqstp, __be32 *p, struct nlm_args *argp)
+nlm4svc_decode_notify(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_lock	*lock = &argp->lock;
 
 	if (!(p = xdr_decode_string_inplace(p, &lock->caller,
@@ -302,8 +309,10 @@ nlm4svc_decode_notify(struct svc_rqst *rqstp, __be32 *p, struct nlm_args *argp)
 }
 
 int
-nlm4svc_decode_reboot(struct svc_rqst *rqstp, __be32 *p, struct nlm_reboot *argp)
+nlm4svc_decode_reboot(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_reboot *argp = rqstp->rq_argp;
+
 	if (!(p = xdr_decode_string_inplace(p, &argp->mon, &argp->len, SM_MAXSTRLEN)))
 		return 0;
 	argp->state = ntohl(*p++);
@@ -313,8 +322,10 @@ nlm4svc_decode_reboot(struct svc_rqst *rqstp, __be32 *p, struct nlm_reboot *argp
 }
 
 int
-nlm4svc_decode_res(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
+nlm4svc_decode_res(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_res *resp = rqstp->rq_argp;
+
 	if (!(p = nlm4_decode_cookie(p, &resp->cookie)))
 		return 0;
 	resp->status = *p++;
@@ -322,7 +333,7 @@ nlm4svc_decode_res(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
 }
 
 int
-nlm4svc_decode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
+nlm4svc_decode_void(struct svc_rqst *rqstp, __be32 *p)
 {
 	return xdr_argsize_check(rqstp, p);
 }
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 5a14bdaa5986..23ecbf7a40c1 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -58,7 +58,7 @@ static __be32 nfs4_callback_null(struct svc_rqst *rqstp)
 	return htonl(NFS4_OK);
 }
 
-static int nfs4_decode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
+static int nfs4_decode_void(struct svc_rqst *rqstp, __be32 *p)
 {
 	return xdr_argsize_check(rqstp, p);
 }
@@ -998,7 +998,7 @@ static struct callback_op callback_ops[] = {
 static struct svc_procedure nfs4_callback_procedures1[] = {
 	[CB_NULL] = {
 		.pc_func = nfs4_callback_null,
-		.pc_decode = (kxdrproc_t)nfs4_decode_void,
+		.pc_decode = nfs4_decode_void,
 		.pc_encode = (kxdrproc_t)nfs4_encode_void,
 		.pc_xdrressize = 1,
 	},
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 302441027f50..bcfdaa83ee6c 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -182,9 +182,10 @@ static __be32 nfsacld_proc_access(struct svc_rqst *rqstp)
 /*
  * XDR decode functions
  */
-static int nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd3_getaclargs *argp)
+static int nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_getaclargs *argp = rqstp->rq_argp;
+
 	p = nfs2svc_decode_fh(p, &argp->fh);
 	if (!p)
 		return 0;
@@ -194,9 +195,9 @@ static int nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 
-static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd3_setaclargs *argp)
+static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_setaclargs *argp = rqstp->rq_argp;
 	struct kvec *head = rqstp->rq_arg.head;
 	unsigned int base;
 	int n;
@@ -220,18 +221,20 @@ static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p,
 	return (n > 0);
 }
 
-static int nfsaclsvc_decode_fhandleargs(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd_fhandle *argp)
+static int nfsaclsvc_decode_fhandleargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_fhandle *argp = rqstp->rq_argp;
+
 	p = nfs2svc_decode_fh(p, &argp->fh);
 	if (!p)
 		return 0;
 	return xdr_argsize_check(rqstp, p);
 }
 
-static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd3_accessargs *argp)
+static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_accessargs *argp = rqstp->rq_argp;
+
 	p = nfs2svc_decode_fh(p, &argp->fh);
 	if (!p)
 		return 0;
@@ -351,7 +354,7 @@ struct nfsd3_voidargs { int dummy; };
 #define PROC(name, argt, rest, relt, cache, respsize)			\
 {									\
 	.pc_func	= nfsacld_proc_##name,				\
-	.pc_decode	= (kxdrproc_t) nfsaclsvc_decode_##argt##args,	\
+	.pc_decode	= nfsaclsvc_decode_##argt##args,		\
 	.pc_encode	= (kxdrproc_t) nfsaclsvc_encode_##rest##res,	\
 	.pc_release	= nfsaclsvc_release_##relt,	\
 	.pc_argsize	= sizeof(struct nfsd3_##argt##args),		\
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 56cdff4e954c..4e68d6b5f409 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -124,9 +124,10 @@ out:
 /*
  * XDR decode functions
  */
-static int nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd3_getaclargs *args)
+static int nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_getaclargs *args = rqstp->rq_argp;
+
 	p = nfs3svc_decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
@@ -136,9 +137,9 @@ static int nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 
-static int nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd3_setaclargs *args)
+static int nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_setaclargs *args = rqstp->rq_argp;
 	struct kvec *head = rqstp->rq_arg.head;
 	unsigned int base;
 	int n;
@@ -241,7 +242,7 @@ struct nfsd3_voidargs { int dummy; };
 #define PROC(name, argt, rest, relt, cache, respsize)			\
 {									\
 	.pc_func	= nfsd3_proc_##name,				\
-	.pc_decode	= (kxdrproc_t) nfs3svc_decode_##argt##args,	\
+	.pc_decode	= nfs3svc_decode_##argt##args,			\
 	.pc_encode	= (kxdrproc_t) nfs3svc_encode_##rest##res,	\
 	.pc_release	= nfs3svc_release_##relt,			\
 	.pc_argsize	= sizeof(struct nfsd3_##argt##args),		\
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index f0cccc0768ce..ed83e8a9e7b4 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -685,7 +685,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_GETATTR] = {
 		.pc_func = nfsd3_proc_getattr,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs,
+		.pc_decode = nfs3svc_decode_fhandleargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_attrstatres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
@@ -695,7 +695,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_SETATTR] = {
 		.pc_func = nfsd3_proc_setattr,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_sattrargs,
+		.pc_decode = nfs3svc_decode_sattrargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_sattrargs),
@@ -705,7 +705,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_LOOKUP] = {
 		.pc_func = nfsd3_proc_lookup,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs,
+		.pc_decode = nfs3svc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_diropres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_diropargs),
@@ -715,7 +715,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_ACCESS] = {
 		.pc_func = nfsd3_proc_access,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_accessargs,
+		.pc_decode = nfs3svc_decode_accessargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_accessres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_accessargs),
@@ -725,7 +725,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_READLINK] = {
 		.pc_func = nfsd3_proc_readlink,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_readlinkargs,
+		.pc_decode = nfs3svc_decode_readlinkargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_readlinkres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_readlinkargs),
@@ -735,7 +735,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_READ] = {
 		.pc_func = nfsd3_proc_read,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_readargs,
+		.pc_decode = nfs3svc_decode_readargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_readres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_readargs),
@@ -745,7 +745,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_WRITE] = {
 		.pc_func = nfsd3_proc_write,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_writeargs,
+		.pc_decode = nfs3svc_decode_writeargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_writeres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_writeargs),
@@ -755,7 +755,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_CREATE] = {
 		.pc_func = nfsd3_proc_create,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_createargs,
+		.pc_decode = nfs3svc_decode_createargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_createargs),
@@ -765,7 +765,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_MKDIR] = {
 		.pc_func = nfsd3_proc_mkdir,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_mkdirargs,
+		.pc_decode = nfs3svc_decode_mkdirargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_mkdirargs),
@@ -775,7 +775,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_SYMLINK] = {
 		.pc_func = nfsd3_proc_symlink,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_symlinkargs,
+		.pc_decode = nfs3svc_decode_symlinkargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_symlinkargs),
@@ -785,7 +785,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_MKNOD] = {
 		.pc_func = nfsd3_proc_mknod,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_mknodargs,
+		.pc_decode = nfs3svc_decode_mknodargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_mknodargs),
@@ -795,7 +795,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_REMOVE] = {
 		.pc_func = nfsd3_proc_remove,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs,
+		.pc_decode = nfs3svc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_diropargs),
@@ -805,7 +805,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_RMDIR] = {
 		.pc_func = nfsd3_proc_rmdir,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs,
+		.pc_decode = nfs3svc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_diropargs),
@@ -815,7 +815,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_RENAME] = {
 		.pc_func = nfsd3_proc_rename,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_renameargs,
+		.pc_decode = nfs3svc_decode_renameargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_renameres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_renameargs),
@@ -825,7 +825,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_LINK] = {
 		.pc_func = nfsd3_proc_link,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_linkargs,
+		.pc_decode = nfs3svc_decode_linkargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_linkres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_linkargs),
@@ -835,7 +835,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_READDIR] = {
 		.pc_func = nfsd3_proc_readdir,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_readdirargs,
+		.pc_decode = nfs3svc_decode_readdirargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_readdirres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_readdirargs),
@@ -844,7 +844,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_READDIRPLUS] = {
 		.pc_func = nfsd3_proc_readdirplus,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_readdirplusargs,
+		.pc_decode = nfs3svc_decode_readdirplusargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_readdirres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_readdirplusargs),
@@ -853,7 +853,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_FSSTAT] = {
 		.pc_func = nfsd3_proc_fsstat,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs,
+		.pc_decode = nfs3svc_decode_fhandleargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_fsstatres,
 		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
 		.pc_ressize = sizeof(struct nfsd3_fsstatres),
@@ -862,7 +862,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_FSINFO] = {
 		.pc_func = nfsd3_proc_fsinfo,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs,
+		.pc_decode = nfs3svc_decode_fhandleargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_fsinfores,
 		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
 		.pc_ressize = sizeof(struct nfsd3_fsinfores),
@@ -871,7 +871,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_PATHCONF] = {
 		.pc_func = nfsd3_proc_pathconf,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs,
+		.pc_decode = nfs3svc_decode_fhandleargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_pathconfres,
 		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
 		.pc_ressize = sizeof(struct nfsd3_pathconfres),
@@ -880,7 +880,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_COMMIT] = {
 		.pc_func = nfsd3_proc_commit,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_commitargs,
+		.pc_decode = nfs3svc_decode_commitargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_commitres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_commitargs),
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 7fd361a75287..be8bf8af9917 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -273,8 +273,10 @@ void fill_post_wcc(struct svc_fh *fhp)
  * XDR decode functions
  */
 int
-nfs3svc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p, struct nfsd_fhandle *args)
+nfs3svc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_fhandle *args = rqstp->rq_argp;
+
 	p = decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
@@ -282,9 +284,10 @@ nfs3svc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p, struct nfsd_fhandle *a
 }
 
 int
-nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_sattrargs *args)
+nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_sattrargs *args = rqstp->rq_argp;
+
 	p = decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
@@ -300,9 +303,10 @@ nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_diropargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_diropargs *args)
+nfs3svc_decode_diropargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_diropargs *args = rqstp->rq_argp;
+
 	if (!(p = decode_fh(p, &args->fh))
 	 || !(p = decode_filename(p, &args->name, &args->len)))
 		return 0;
@@ -311,9 +315,10 @@ nfs3svc_decode_diropargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_accessargs *args)
+nfs3svc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_accessargs *args = rqstp->rq_argp;
+
 	p = decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
@@ -323,9 +328,9 @@ nfs3svc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_readargs *args)
+nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_readargs *args = rqstp->rq_argp;
 	unsigned int len;
 	int v;
 	u32 max_blocksize = svc_max_payload(rqstp);
@@ -356,9 +361,9 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_writeargs *args)
+nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_writeargs *args = rqstp->rq_argp;
 	unsigned int len, v, hdr, dlen;
 	u32 max_blocksize = svc_max_payload(rqstp);
 	struct kvec *head = rqstp->rq_arg.head;
@@ -416,9 +421,10 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_createargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_createargs *args)
+nfs3svc_decode_createargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_createargs *args = rqstp->rq_argp;
+
 	if (!(p = decode_fh(p, &args->fh))
 	 || !(p = decode_filename(p, &args->name, &args->len)))
 		return 0;
@@ -438,10 +444,12 @@ nfs3svc_decode_createargs(struct svc_rqst *rqstp, __be32 *p,
 
 	return xdr_argsize_check(rqstp, p);
 }
+
 int
-nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_createargs *args)
+nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_createargs *args = rqstp->rq_argp;
+
 	if (!(p = decode_fh(p, &args->fh)) ||
 	    !(p = decode_filename(p, &args->name, &args->len)))
 		return 0;
@@ -451,9 +459,9 @@ nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_symlinkargs *args)
+nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_symlinkargs *args = rqstp->rq_argp;
 	unsigned int len, avail;
 	char *old, *new;
 	struct kvec *vec;
@@ -503,9 +511,10 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_mknodargs *args)
+nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_mknodargs *args = rqstp->rq_argp;
+
 	if (!(p = decode_fh(p, &args->fh))
 	 || !(p = decode_filename(p, &args->name, &args->len)))
 		return 0;
@@ -525,9 +534,10 @@ nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_renameargs *args)
+nfs3svc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_renameargs *args = rqstp->rq_argp;
+
 	if (!(p = decode_fh(p, &args->ffh))
 	 || !(p = decode_filename(p, &args->fname, &args->flen))
 	 || !(p = decode_fh(p, &args->tfh))
@@ -538,9 +548,10 @@ nfs3svc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_readlinkargs *args)
+nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_readlinkargs *args = rqstp->rq_argp;
+
 	p = decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
@@ -552,9 +563,10 @@ nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_linkargs *args)
+nfs3svc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_linkargs *args = rqstp->rq_argp;
+
 	if (!(p = decode_fh(p, &args->ffh))
 	 || !(p = decode_fh(p, &args->tfh))
 	 || !(p = decode_filename(p, &args->tname, &args->tlen)))
@@ -564,9 +576,9 @@ nfs3svc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_readdirargs *args)
+nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_readdirargs *args = rqstp->rq_argp;
 	p = decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
@@ -585,9 +597,9 @@ nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_readdirargs *args)
+nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_readdirargs *args = rqstp->rq_argp;
 	int len;
 	u32 max_blocksize = svc_max_payload(rqstp);
 
@@ -613,9 +625,9 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_commitargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_commitargs *args)
+nfs3svc_decode_commitargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_commitargs *args = rqstp->rq_argp;
 	p = decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 7a0112bc3531..6ff434b77a9e 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -2528,7 +2528,7 @@ static struct svc_procedure		nfsd_procedures4[2] = {
 	},
 	[NFSPROC4_COMPOUND] = {
 		.pc_func = nfsd4_proc_compound,
-		.pc_decode = (kxdrproc_t) nfs4svc_decode_compoundargs,
+		.pc_decode = nfs4svc_decode_compoundargs,
 		.pc_encode = (kxdrproc_t) nfs4svc_encode_compoundres,
 		.pc_argsize = sizeof(struct nfsd4_compoundargs),
 		.pc_ressize = sizeof(struct nfsd4_compoundres),
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 5aa847bdfc63..3a7e117bd11e 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -4561,8 +4561,10 @@ void nfsd4_release_compoundargs(struct svc_rqst *rqstp)
 }
 
 int
-nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compoundargs *args)
+nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd4_compoundargs *args = rqstp->rq_argp;
+
 	if (rqstp->rq_arg.head[0].iov_len % 4) {
 		/* client is nuts */
 		dprintk("%s: compound not properly padded! (peeraddr=%pISc xid=0x%x)",
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index dc32e0f8480d..d351d0ef6d34 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -576,7 +576,7 @@ struct nfsd_void { int dummy; };
 static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_NULL] = {
 		.pc_func = nfsd_proc_null,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_void,
+		.pc_decode = nfssvc_decode_void,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_void),
 		.pc_ressize = sizeof(struct nfsd_void),
@@ -585,7 +585,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_GETATTR] = {
 		.pc_func = nfsd_proc_getattr,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_fhandle,
+		.pc_decode = nfssvc_decode_fhandle,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_attrstat,
 		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_fhandle),
@@ -595,7 +595,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_SETATTR] = {
 		.pc_func = nfsd_proc_setattr,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_sattrargs,
+		.pc_decode = nfssvc_decode_sattrargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_attrstat,
 		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_sattrargs),
@@ -604,7 +604,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST+AT,
 	},
 	[NFSPROC_ROOT] = {
-		.pc_decode = (kxdrproc_t) nfssvc_decode_void,
+		.pc_decode = nfssvc_decode_void,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_void),
 		.pc_ressize = sizeof(struct nfsd_void),
@@ -613,7 +613,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_LOOKUP] = {
 		.pc_func = nfsd_proc_lookup,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_diropargs,
+		.pc_decode = nfssvc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_diropres,
 		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_diropargs),
@@ -623,7 +623,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_READLINK] = {
 		.pc_func = nfsd_proc_readlink,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_readlinkargs,
+		.pc_decode = nfssvc_decode_readlinkargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_readlinkres,
 		.pc_argsize = sizeof(struct nfsd_readlinkargs),
 		.pc_ressize = sizeof(struct nfsd_readlinkres),
@@ -632,7 +632,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_READ] = {
 		.pc_func = nfsd_proc_read,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_readargs,
+		.pc_decode = nfssvc_decode_readargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_readres,
 		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_readargs),
@@ -641,7 +641,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST+AT+1+NFSSVC_MAXBLKSIZE_V2/4,
 	},
 	[NFSPROC_WRITECACHE] = {
-		.pc_decode = (kxdrproc_t) nfssvc_decode_void,
+		.pc_decode = nfssvc_decode_void,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_void),
 		.pc_ressize = sizeof(struct nfsd_void),
@@ -650,7 +650,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_WRITE] = {
 		.pc_func = nfsd_proc_write,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_writeargs,
+		.pc_decode = nfssvc_decode_writeargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_attrstat,
 		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_writeargs),
@@ -660,7 +660,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_CREATE] = {
 		.pc_func = nfsd_proc_create,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_createargs,
+		.pc_decode = nfssvc_decode_createargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_diropres,
 		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_createargs),
@@ -670,7 +670,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_REMOVE] = {
 		.pc_func = nfsd_proc_remove,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_diropargs,
+		.pc_decode = nfssvc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_diropargs),
 		.pc_ressize = sizeof(struct nfsd_void),
@@ -679,7 +679,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_RENAME] = {
 		.pc_func = nfsd_proc_rename,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_renameargs,
+		.pc_decode = nfssvc_decode_renameargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_renameargs),
 		.pc_ressize = sizeof(struct nfsd_void),
@@ -688,7 +688,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_LINK] = {
 		.pc_func = nfsd_proc_link,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_linkargs,
+		.pc_decode = nfssvc_decode_linkargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_linkargs),
 		.pc_ressize = sizeof(struct nfsd_void),
@@ -697,7 +697,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_SYMLINK] = {
 		.pc_func = nfsd_proc_symlink,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_symlinkargs,
+		.pc_decode = nfssvc_decode_symlinkargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_symlinkargs),
 		.pc_ressize = sizeof(struct nfsd_void),
@@ -706,7 +706,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_MKDIR] = {
 		.pc_func = nfsd_proc_mkdir,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_createargs,
+		.pc_decode = nfssvc_decode_createargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_diropres,
 		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_createargs),
@@ -716,7 +716,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_RMDIR] = {
 		.pc_func = nfsd_proc_rmdir,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_diropargs,
+		.pc_decode = nfssvc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_diropargs),
 		.pc_ressize = sizeof(struct nfsd_void),
@@ -725,7 +725,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_READDIR] = {
 		.pc_func = nfsd_proc_readdir,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_readdirargs,
+		.pc_decode = nfssvc_decode_readdirargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_readdirres,
 		.pc_argsize = sizeof(struct nfsd_readdirargs),
 		.pc_ressize = sizeof(struct nfsd_readdirres),
@@ -733,7 +733,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_STATFS] = {
 		.pc_func = nfsd_proc_statfs,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_fhandle,
+		.pc_decode = nfssvc_decode_fhandle,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_statfsres,
 		.pc_argsize = sizeof(struct nfsd_fhandle),
 		.pc_ressize = sizeof(struct nfsd_statfsres),
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index d64895fd8d25..3e00499d7ad7 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -801,9 +801,8 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
 	 */
 	rqstp->rq_cachetype = proc->pc_cachetype;
 	/* Decode arguments */
-	xdr = proc->pc_decode;
-	if (xdr && !xdr(rqstp, (__be32*)rqstp->rq_arg.head[0].iov_base,
-			rqstp->rq_argp)) {
+	if (proc->pc_decode &&
+	    !proc->pc_decode(rqstp, (__be32*)rqstp->rq_arg.head[0].iov_base)) {
 		dprintk("nfsd: failed to decode arguments!\n");
 		*statp = rpc_garbage_args;
 		return 1;
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index de7b07ee489e..0957ceebe1aa 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -206,14 +206,16 @@ __be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *f
  * XDR decode functions
  */
 int
-nfssvc_decode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
+nfssvc_decode_void(struct svc_rqst *rqstp, __be32 *p)
 {
 	return xdr_argsize_check(rqstp, p);
 }
 
 int
-nfssvc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p, struct nfsd_fhandle *args)
+nfssvc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_fhandle *args = rqstp->rq_argp;
+
 	p = decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
@@ -221,9 +223,10 @@ nfssvc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p, struct nfsd_fhandle *ar
 }
 
 int
-nfssvc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_sattrargs *args)
+nfssvc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_sattrargs *args = rqstp->rq_argp;
+
 	p = decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
@@ -233,9 +236,10 @@ nfssvc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfssvc_decode_diropargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_diropargs *args)
+nfssvc_decode_diropargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_diropargs *args = rqstp->rq_argp;
+
 	if (!(p = decode_fh(p, &args->fh))
 	 || !(p = decode_filename(p, &args->name, &args->len)))
 		return 0;
@@ -244,9 +248,9 @@ nfssvc_decode_diropargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_readargs *args)
+nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_readargs *args = rqstp->rq_argp;
 	unsigned int len;
 	int v;
 	p = decode_fh(p, &args->fh);
@@ -279,9 +283,9 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_writeargs *args)
+nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_writeargs *args = rqstp->rq_argp;
 	unsigned int len, hdr, dlen;
 	struct kvec *head = rqstp->rq_arg.head;
 	int v;
@@ -335,9 +339,10 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfssvc_decode_createargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_createargs *args)
+nfssvc_decode_createargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_createargs *args = rqstp->rq_argp;
+
 	if (   !(p = decode_fh(p, &args->fh))
 	    || !(p = decode_filename(p, &args->name, &args->len)))
 		return 0;
@@ -347,9 +352,10 @@ nfssvc_decode_createargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfssvc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_renameargs *args)
+nfssvc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_renameargs *args = rqstp->rq_argp;
+
 	if (!(p = decode_fh(p, &args->ffh))
 	 || !(p = decode_filename(p, &args->fname, &args->flen))
 	 || !(p = decode_fh(p, &args->tfh))
@@ -360,8 +366,10 @@ nfssvc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfssvc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_readlinkargs *args)
+nfssvc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_readlinkargs *args = rqstp->rq_argp;
+
 	p = decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
@@ -373,9 +381,10 @@ nfssvc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_readli
 }
 
 int
-nfssvc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_linkargs *args)
+nfssvc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_linkargs *args = rqstp->rq_argp;
+
 	if (!(p = decode_fh(p, &args->ffh))
 	 || !(p = decode_fh(p, &args->tfh))
 	 || !(p = decode_filename(p, &args->tname, &args->tlen)))
@@ -385,9 +394,10 @@ nfssvc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_symlinkargs *args)
+nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_symlinkargs *args = rqstp->rq_argp;
+
 	if (   !(p = decode_fh(p, &args->ffh))
 	    || !(p = decode_filename(p, &args->fname, &args->flen))
 	    || !(p = decode_pathname(p, &args->tname, &args->tlen)))
@@ -398,9 +408,10 @@ nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_readdirargs *args)
+nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_readdirargs *args = rqstp->rq_argp;
+
 	p = decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h
index 2c21fa843fbf..8eeb752cf6f8 100644
--- a/fs/nfsd/xdr.h
+++ b/fs/nfsd/xdr.h
@@ -131,28 +131,18 @@ union nfsd_xdrstore {
 #define NFS2_SVC_XDRSIZE	sizeof(union nfsd_xdrstore)
 
 
-int nfssvc_decode_void(struct svc_rqst *, __be32 *, void *);
-int nfssvc_decode_fhandle(struct svc_rqst *, __be32 *, struct nfsd_fhandle *);
-int nfssvc_decode_sattrargs(struct svc_rqst *, __be32 *,
-				struct nfsd_sattrargs *);
-int nfssvc_decode_diropargs(struct svc_rqst *, __be32 *,
-				struct nfsd_diropargs *);
-int nfssvc_decode_readargs(struct svc_rqst *, __be32 *,
-				struct nfsd_readargs *);
-int nfssvc_decode_writeargs(struct svc_rqst *, __be32 *,
-				struct nfsd_writeargs *);
-int nfssvc_decode_createargs(struct svc_rqst *, __be32 *,
-				struct nfsd_createargs *);
-int nfssvc_decode_renameargs(struct svc_rqst *, __be32 *,
-				struct nfsd_renameargs *);
-int nfssvc_decode_readlinkargs(struct svc_rqst *, __be32 *,
-				struct nfsd_readlinkargs *);
-int nfssvc_decode_linkargs(struct svc_rqst *, __be32 *,
-				struct nfsd_linkargs *);
-int nfssvc_decode_symlinkargs(struct svc_rqst *, __be32 *,
-				struct nfsd_symlinkargs *);
-int nfssvc_decode_readdirargs(struct svc_rqst *, __be32 *,
-				struct nfsd_readdirargs *);
+int nfssvc_decode_void(struct svc_rqst *, __be32 *);
+int nfssvc_decode_fhandle(struct svc_rqst *, __be32 *);
+int nfssvc_decode_sattrargs(struct svc_rqst *, __be32 *);
+int nfssvc_decode_diropargs(struct svc_rqst *, __be32 *);
+int nfssvc_decode_readargs(struct svc_rqst *, __be32 *);
+int nfssvc_decode_writeargs(struct svc_rqst *, __be32 *);
+int nfssvc_decode_createargs(struct svc_rqst *, __be32 *);
+int nfssvc_decode_renameargs(struct svc_rqst *, __be32 *);
+int nfssvc_decode_readlinkargs(struct svc_rqst *, __be32 *);
+int nfssvc_decode_linkargs(struct svc_rqst *, __be32 *);
+int nfssvc_decode_symlinkargs(struct svc_rqst *, __be32 *);
+int nfssvc_decode_readdirargs(struct svc_rqst *, __be32 *);
 int nfssvc_encode_void(struct svc_rqst *, __be32 *, void *);
 int nfssvc_encode_attrstat(struct svc_rqst *, __be32 *, struct nfsd_attrstat *);
 int nfssvc_encode_diropres(struct svc_rqst *, __be32 *, struct nfsd_diropres *);
diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
index 23fe456a223b..f79be4c42e4a 100644
--- a/fs/nfsd/xdr3.h
+++ b/fs/nfsd/xdr3.h
@@ -269,37 +269,22 @@ union nfsd3_xdrstore {
 
 #define NFS3_SVC_XDRSIZE		sizeof(union nfsd3_xdrstore)
 
-int nfs3svc_decode_fhandle(struct svc_rqst *, __be32 *, struct nfsd_fhandle *);
-int nfs3svc_decode_sattrargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_sattrargs *);
-int nfs3svc_decode_diropargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_diropargs *);
-int nfs3svc_decode_accessargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_accessargs *);
-int nfs3svc_decode_readargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_readargs *);
-int nfs3svc_decode_writeargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_writeargs *);
-int nfs3svc_decode_createargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_createargs *);
-int nfs3svc_decode_mkdirargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_createargs *);
-int nfs3svc_decode_mknodargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_mknodargs *);
-int nfs3svc_decode_renameargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_renameargs *);
-int nfs3svc_decode_readlinkargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_readlinkargs *);
-int nfs3svc_decode_linkargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_linkargs *);
-int nfs3svc_decode_symlinkargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_symlinkargs *);
-int nfs3svc_decode_readdirargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_readdirargs *);
-int nfs3svc_decode_readdirplusargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_readdirargs *);
-int nfs3svc_decode_commitargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_commitargs *);
+int nfs3svc_decode_fhandle(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_sattrargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_diropargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_accessargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_readargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_writeargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_createargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_mkdirargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_mknodargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_renameargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_readlinkargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_linkargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_symlinkargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_readdirargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_readdirplusargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_commitargs(struct svc_rqst *, __be32 *);
 int nfs3svc_encode_voidres(struct svc_rqst *, __be32 *, void *);
 int nfs3svc_encode_attrstat(struct svc_rqst *, __be32 *,
 				struct nfsd3_attrstat *);
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index a158579d55a2..2a53c1233884 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -683,8 +683,7 @@ set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp)
 
 bool nfsd4_mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp);
 int nfs4svc_encode_voidres(struct svc_rqst *, __be32 *, void *);
-int nfs4svc_decode_compoundargs(struct svc_rqst *, __be32 *,
-		struct nfsd4_compoundargs *);
+int nfs4svc_decode_compoundargs(struct svc_rqst *, __be32 *);
 int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *,
 		struct nfsd4_compoundres *);
 __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *, u32);
diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h
index d39ed1cc5fbf..0416600844ce 100644
--- a/include/linux/lockd/xdr.h
+++ b/include/linux/lockd/xdr.h
@@ -95,19 +95,19 @@ struct nlm_reboot {
  */
 #define NLMSVC_XDRSIZE		sizeof(struct nlm_args)
 
-int	nlmsvc_decode_testargs(struct svc_rqst *, __be32 *, struct nlm_args *);
+int	nlmsvc_decode_testargs(struct svc_rqst *, __be32 *);
 int	nlmsvc_encode_testres(struct svc_rqst *, __be32 *, struct nlm_res *);
-int	nlmsvc_decode_lockargs(struct svc_rqst *, __be32 *, struct nlm_args *);
-int	nlmsvc_decode_cancargs(struct svc_rqst *, __be32 *, struct nlm_args *);
-int	nlmsvc_decode_unlockargs(struct svc_rqst *, __be32 *, struct nlm_args *);
+int	nlmsvc_decode_lockargs(struct svc_rqst *, __be32 *);
+int	nlmsvc_decode_cancargs(struct svc_rqst *, __be32 *);
+int	nlmsvc_decode_unlockargs(struct svc_rqst *, __be32 *);
 int	nlmsvc_encode_res(struct svc_rqst *, __be32 *, struct nlm_res *);
-int	nlmsvc_decode_res(struct svc_rqst *, __be32 *, struct nlm_res *);
+int	nlmsvc_decode_res(struct svc_rqst *, __be32 *);
 int	nlmsvc_encode_void(struct svc_rqst *, __be32 *, void *);
-int	nlmsvc_decode_void(struct svc_rqst *, __be32 *, void *);
-int	nlmsvc_decode_shareargs(struct svc_rqst *, __be32 *, struct nlm_args *);
+int	nlmsvc_decode_void(struct svc_rqst *, __be32 *);
+int	nlmsvc_decode_shareargs(struct svc_rqst *, __be32 *);
 int	nlmsvc_encode_shareres(struct svc_rqst *, __be32 *, struct nlm_res *);
-int	nlmsvc_decode_notify(struct svc_rqst *, __be32 *, struct nlm_args *);
-int	nlmsvc_decode_reboot(struct svc_rqst *, __be32 *, struct nlm_reboot *);
+int	nlmsvc_decode_notify(struct svc_rqst *, __be32 *);
+int	nlmsvc_decode_reboot(struct svc_rqst *, __be32 *);
 /*
 int	nlmclt_encode_testargs(struct rpc_rqst *, u32 *, struct nlm_args *);
 int	nlmclt_encode_lockargs(struct rpc_rqst *, u32 *, struct nlm_args *);
diff --git a/include/linux/lockd/xdr4.h b/include/linux/lockd/xdr4.h
index e58c88b52ce1..951bbe31fdb8 100644
--- a/include/linux/lockd/xdr4.h
+++ b/include/linux/lockd/xdr4.h
@@ -23,19 +23,19 @@
 
 
-int	nlm4svc_decode_testargs(struct svc_rqst *, __be32 *, struct nlm_args *);
+int	nlm4svc_decode_testargs(struct svc_rqst *, __be32 *);
 int	nlm4svc_encode_testres(struct svc_rqst *, __be32 *, struct nlm_res *);
-int	nlm4svc_decode_lockargs(struct svc_rqst *, __be32 *, struct nlm_args *);
-int	nlm4svc_decode_cancargs(struct svc_rqst *, __be32 *, struct nlm_args *);
-int	nlm4svc_decode_unlockargs(struct svc_rqst *, __be32 *, struct nlm_args *);
+int	nlm4svc_decode_lockargs(struct svc_rqst *, __be32 *);
+int	nlm4svc_decode_cancargs(struct svc_rqst *, __be32 *);
+int	nlm4svc_decode_unlockargs(struct svc_rqst *, __be32 *);
 int	nlm4svc_encode_res(struct svc_rqst *, __be32 *, struct nlm_res *);
-int	nlm4svc_decode_res(struct svc_rqst *, __be32 *, struct nlm_res *);
+int	nlm4svc_decode_res(struct svc_rqst *, __be32 *);
 int	nlm4svc_encode_void(struct svc_rqst *, __be32 *, void *);
-int	nlm4svc_decode_void(struct svc_rqst *, __be32 *, void *);
-int	nlm4svc_decode_shareargs(struct svc_rqst *, __be32 *, struct nlm_args *);
+int	nlm4svc_decode_void(struct svc_rqst *, __be32 *);
+int	nlm4svc_decode_shareargs(struct svc_rqst *, __be32 *);
 int	nlm4svc_encode_shareres(struct svc_rqst *, __be32 *, struct nlm_res *);
-int	nlm4svc_decode_notify(struct svc_rqst *, __be32 *, struct nlm_args *);
-int	nlm4svc_decode_reboot(struct svc_rqst *, __be32 *, struct nlm_reboot *);
+int	nlm4svc_decode_notify(struct svc_rqst *, __be32 *);
+int	nlm4svc_decode_reboot(struct svc_rqst *, __be32 *);
 /*
 int	nlmclt_encode_testargs(struct rpc_rqst *, u32 *, struct nlm_args *);
 int	nlmclt_encode_lockargs(struct rpc_rqst *, u32 *, struct nlm_args *);
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 1381e1343640..047f04411dd4 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -421,7 +421,8 @@ struct svc_version {
 struct svc_procedure {
 	/* process the request: */
 	__be32			(*pc_func)(struct svc_rqst *);
-	kxdrproc_t		pc_decode;	/* XDR decode args */
+	/* XDR decode args: */
+	int			(*pc_decode)(struct svc_rqst *, __be32 *data);
 	kxdrproc_t		pc_encode;	/* XDR encode result */
 	/* XDR free result: */
 	void			(*pc_release)(struct svc_rqst *);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 4611cb7adc04..18024c1b9b7b 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1276,9 +1276,12 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 
 	/* Call the function that processes the request. */
 	if (!versp->vs_dispatch) {
-		/* Decode arguments */
-		xdr = procp->pc_decode;
-		if (xdr && !xdr(rqstp, argv->iov_base, rqstp->rq_argp))
+		/*
+		 * Decode arguments
+		 * XXX: why do we ignore the return value?
+		 */
+		if (procp->pc_decode &&
+		    !procp->pc_decode(rqstp, argv->iov_base))
 			goto err_garbage;
 
 		*statp = procp->pc_func(rqstp);
-- 
cgit v1.2.3


From 63f8de37951a64cc24479eafd33085537e088075 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 19:42:02 +0200
Subject: sunrpc: properly type pc_encode callbacks

Drop the resp argument as it can trivially be derived from the rqstp
argument.  With that all functions now have the same prototype, and we
can remove the unsafe casting to kxdrproc_t.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/lockd/svc4proc.c        |  2 +-
 fs/lockd/svcproc.c         |  2 +-
 fs/lockd/xdr.c             | 14 ++++++---
 fs/lockd/xdr4.c            | 14 ++++++---
 fs/nfs/callback_xdr.c      |  6 ++--
 fs/nfsd/nfs2acl.c          | 18 ++++++-----
 fs/nfsd/nfs3acl.c          | 11 +++----
 fs/nfsd/nfs3proc.c         | 44 +++++++++++++--------------
 fs/nfsd/nfs3xdr.c          | 74 +++++++++++++++++++++++++++-------------------
 fs/nfsd/nfs4proc.c         |  4 +--
 fs/nfsd/nfs4xdr.c          |  5 ++--
 fs/nfsd/nfsproc.c          | 36 +++++++++++-----------
 fs/nfsd/nfssvc.c           |  5 +---
 fs/nfsd/nfsxdr.c           | 31 +++++++++++--------
 fs/nfsd/xdr.h              | 14 ++++-----
 fs/nfsd/xdr3.h             | 45 ++++++++++------------------
 fs/nfsd/xdr4.h             |  5 ++--
 include/linux/lockd/xdr.h  |  8 ++---
 include/linux/lockd/xdr4.h |  8 ++---
 include/linux/sunrpc/svc.h |  3 +-
 net/sunrpc/svc.c           |  6 ++--
 21 files changed, 185 insertions(+), 170 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 804744f7528c..fed016155791 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -497,7 +497,7 @@ struct nlm_void			{ int dummy; };
 #define PROC(name, xargt, xrest, argt, rest, respsize)	\
  { .pc_func	= nlm4svc_proc_##name,	\
    .pc_decode	= nlm4svc_decode_##xargt,	\
-   .pc_encode	= (kxdrproc_t) nlm4svc_encode_##xrest,	\
+   .pc_encode	= nlm4svc_encode_##xrest,	\
    .pc_release	= NULL,					\
    .pc_argsize	= sizeof(struct nlm_##argt),		\
    .pc_ressize	= sizeof(struct nlm_##rest),		\
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 204a698f7d41..14648b051eba 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -539,7 +539,7 @@ struct nlm_void			{ int dummy; };
 #define PROC(name, xargt, xrest, argt, rest, respsize)	\
  { .pc_func	= nlmsvc_proc_##name,			\
    .pc_decode	= nlmsvc_decode_##xargt,		\
-   .pc_encode	= (kxdrproc_t) nlmsvc_encode_##xrest,	\
+   .pc_encode	= nlmsvc_encode_##xrest,		\
    .pc_release	= NULL,					\
    .pc_argsize	= sizeof(struct nlm_##argt),		\
    .pc_ressize	= sizeof(struct nlm_##rest),		\
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index b57af63fba56..442bbd0b0b29 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -200,8 +200,10 @@ nlmsvc_decode_testargs(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nlmsvc_encode_testres(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
+nlmsvc_encode_testres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_res *resp = rqstp->rq_resp;
+
 	if (!(p = nlm_encode_testres(p, resp)))
 		return 0;
 	return xdr_ressize_check(rqstp, p);
@@ -280,8 +282,10 @@ nlmsvc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nlmsvc_encode_shareres(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
+nlmsvc_encode_shareres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_res *resp = rqstp->rq_resp;
+
 	if (!(p = nlm_encode_cookie(p, &resp->cookie)))
 		return 0;
 	*p++ = resp->status;
@@ -290,8 +294,10 @@ nlmsvc_encode_shareres(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
 }
 
 int
-nlmsvc_encode_res(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
+nlmsvc_encode_res(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_res *resp = rqstp->rq_resp;
+
 	if (!(p = nlm_encode_cookie(p, &resp->cookie)))
 		return 0;
 	*p++ = resp->status;
@@ -342,7 +348,7 @@ nlmsvc_decode_void(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nlmsvc_encode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
+nlmsvc_encode_void(struct svc_rqst *rqstp, __be32 *p)
 {
 	return xdr_ressize_check(rqstp, p);
 }
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
index 46e18598a15c..2a0cd5679c49 100644
--- a/fs/lockd/xdr4.c
+++ b/fs/lockd/xdr4.c
@@ -197,8 +197,10 @@ nlm4svc_decode_testargs(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nlm4svc_encode_testres(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
+nlm4svc_encode_testres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_res *resp = rqstp->rq_resp;
+
 	if (!(p = nlm4_encode_testres(p, resp)))
 		return 0;
 	return xdr_ressize_check(rqstp, p);
@@ -277,8 +279,10 @@ nlm4svc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nlm4svc_encode_shareres(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
+nlm4svc_encode_shareres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_res *resp = rqstp->rq_resp;
+
 	if (!(p = nlm4_encode_cookie(p, &resp->cookie)))
 		return 0;
 	*p++ = resp->status;
@@ -287,8 +291,10 @@ nlm4svc_encode_shareres(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
 }
 
 int
-nlm4svc_encode_res(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
+nlm4svc_encode_res(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_res *resp = rqstp->rq_resp;
+
 	if (!(p = nlm4_encode_cookie(p, &resp->cookie)))
 		return 0;
 	*p++ = resp->status;
@@ -339,7 +345,7 @@ nlm4svc_decode_void(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nlm4svc_encode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
+nlm4svc_encode_void(struct svc_rqst *rqstp, __be32 *p)
 {
 	return xdr_ressize_check(rqstp, p);
 }
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 23ecbf7a40c1..acf75dc63e14 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -63,7 +63,7 @@ static int nfs4_decode_void(struct svc_rqst *rqstp, __be32 *p)
 	return xdr_argsize_check(rqstp, p);
 }
 
-static int nfs4_encode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
+static int nfs4_encode_void(struct svc_rqst *rqstp, __be32 *p)
 {
 	return xdr_ressize_check(rqstp, p);
 }
@@ -999,12 +999,12 @@ static struct svc_procedure nfs4_callback_procedures1[] = {
 	[CB_NULL] = {
 		.pc_func = nfs4_callback_null,
 		.pc_decode = nfs4_decode_void,
-		.pc_encode = (kxdrproc_t)nfs4_encode_void,
+		.pc_encode = nfs4_encode_void,
 		.pc_xdrressize = 1,
 	},
 	[CB_COMPOUND] = {
 		.pc_func = nfs4_callback_compound,
-		.pc_encode = (kxdrproc_t)nfs4_encode_void,
+		.pc_encode = nfs4_encode_void,
 		.pc_argsize = 256,
 		.pc_ressize = 256,
 		.pc_xdrressize = NFS4_CALLBACK_BUFSIZE,
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index bcfdaa83ee6c..fc6b179c8fff 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -251,15 +251,15 @@ static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p)
  * There must be an encoding function for void results so svc_process
  * will work properly.
  */
-static int nfsaclsvc_encode_voidres(struct svc_rqst *rqstp, __be32 *p, void *dummy)
+static int nfsaclsvc_encode_voidres(struct svc_rqst *rqstp, __be32 *p)
 {
 	return xdr_ressize_check(rqstp, p);
 }
 
 /* GETACL */
-static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd3_getaclres *resp)
+static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_getaclres *resp = rqstp->rq_resp;
 	struct dentry *dentry = resp->fh.fh_dentry;
 	struct inode *inode;
 	struct kvec *head = rqstp->rq_res.head;
@@ -302,17 +302,19 @@ static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p,
 	return (n > 0);
 }
 
-static int nfsaclsvc_encode_attrstatres(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd_attrstat *resp)
+static int nfsaclsvc_encode_attrstatres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_attrstat *resp = rqstp->rq_resp;
+
 	p = nfs2svc_encode_fattr(rqstp, p, &resp->fh, &resp->stat);
 	return xdr_ressize_check(rqstp, p);
 }
 
 /* ACCESS */
-static int nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd3_accessres *resp)
+static int nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_accessres *resp = rqstp->rq_resp;
+
 	p = nfs2svc_encode_fattr(rqstp, p, &resp->fh, &resp->stat);
 	*p++ = htonl(resp->access);
 	return xdr_ressize_check(rqstp, p);
@@ -355,7 +357,7 @@ struct nfsd3_voidargs { int dummy; };
 {									\
 	.pc_func	= nfsacld_proc_##name,				\
 	.pc_decode	= nfsaclsvc_decode_##argt##args,		\
-	.pc_encode	= (kxdrproc_t) nfsaclsvc_encode_##rest##res,	\
+	.pc_encode	= nfsaclsvc_encode_##rest##res,			\
 	.pc_release	= nfsaclsvc_release_##relt,	\
 	.pc_argsize	= sizeof(struct nfsd3_##argt##args),		\
 	.pc_ressize	= sizeof(struct nfsd3_##rest##res),		\
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 4e68d6b5f409..9437b758cbfd 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -168,9 +168,9 @@ static int nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p)
  */
 
 /* GETACL */
-static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd3_getaclres *resp)
+static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_getaclres *resp = rqstp->rq_resp;
 	struct dentry *dentry = resp->fh.fh_dentry;
 
 	p = nfs3svc_encode_post_op_attr(rqstp, p, &resp->fh);
@@ -213,9 +213,10 @@ static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p,
 }
 
 /* SETACL */
-static int nfs3svc_encode_setaclres(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd3_attrstat *resp)
+static int nfs3svc_encode_setaclres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_attrstat *resp = rqstp->rq_resp;
+
 	p = nfs3svc_encode_post_op_attr(rqstp, p, &resp->fh);
 
 	return xdr_ressize_check(rqstp, p);
@@ -243,7 +244,7 @@ struct nfsd3_voidargs { int dummy; };
 {									\
 	.pc_func	= nfsd3_proc_##name,				\
 	.pc_decode	= nfs3svc_decode_##argt##args,			\
-	.pc_encode	= (kxdrproc_t) nfs3svc_encode_##rest##res,	\
+	.pc_encode	= nfs3svc_encode_##rest##res,			\
 	.pc_release	= nfs3svc_release_##relt,			\
 	.pc_argsize	= sizeof(struct nfsd3_##argt##args),		\
 	.pc_ressize	= sizeof(struct nfsd3_##rest##res),		\
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index ed83e8a9e7b4..17c90c41a3a6 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -677,7 +677,7 @@ struct nfsd3_voidargs { int dummy; };
 static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_NULL] = {
 		.pc_func = nfsd3_proc_null,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_voidres,
+		.pc_encode = nfs3svc_encode_voidres,
 		.pc_argsize = sizeof(struct nfsd3_voidargs),
 		.pc_ressize = sizeof(struct nfsd3_voidres),
 		.pc_cachetype = RC_NOCACHE,
@@ -686,7 +686,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_GETATTR] = {
 		.pc_func = nfsd3_proc_getattr,
 		.pc_decode = nfs3svc_decode_fhandleargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_attrstatres,
+		.pc_encode = nfs3svc_encode_attrstatres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
 		.pc_ressize = sizeof(struct nfsd3_attrstatres),
@@ -696,7 +696,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_SETATTR] = {
 		.pc_func = nfsd3_proc_setattr,
 		.pc_decode = nfs3svc_decode_sattrargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
+		.pc_encode = nfs3svc_encode_wccstatres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_sattrargs),
 		.pc_ressize = sizeof(struct nfsd3_wccstatres),
@@ -706,7 +706,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_LOOKUP] = {
 		.pc_func = nfsd3_proc_lookup,
 		.pc_decode = nfs3svc_decode_diropargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_diropres,
+		.pc_encode = nfs3svc_encode_diropres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_diropargs),
 		.pc_ressize = sizeof(struct nfsd3_diropres),
@@ -716,7 +716,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_ACCESS] = {
 		.pc_func = nfsd3_proc_access,
 		.pc_decode = nfs3svc_decode_accessargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_accessres,
+		.pc_encode = nfs3svc_encode_accessres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_accessargs),
 		.pc_ressize = sizeof(struct nfsd3_accessres),
@@ -726,7 +726,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_READLINK] = {
 		.pc_func = nfsd3_proc_readlink,
 		.pc_decode = nfs3svc_decode_readlinkargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_readlinkres,
+		.pc_encode = nfs3svc_encode_readlinkres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_readlinkargs),
 		.pc_ressize = sizeof(struct nfsd3_readlinkres),
@@ -736,7 +736,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_READ] = {
 		.pc_func = nfsd3_proc_read,
 		.pc_decode = nfs3svc_decode_readargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_readres,
+		.pc_encode = nfs3svc_encode_readres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_readargs),
 		.pc_ressize = sizeof(struct nfsd3_readres),
@@ -746,7 +746,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_WRITE] = {
 		.pc_func = nfsd3_proc_write,
 		.pc_decode = nfs3svc_decode_writeargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_writeres,
+		.pc_encode = nfs3svc_encode_writeres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_writeargs),
 		.pc_ressize = sizeof(struct nfsd3_writeres),
@@ -756,7 +756,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_CREATE] = {
 		.pc_func = nfsd3_proc_create,
 		.pc_decode = nfs3svc_decode_createargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
+		.pc_encode = nfs3svc_encode_createres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_createargs),
 		.pc_ressize = sizeof(struct nfsd3_createres),
@@ -766,7 +766,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_MKDIR] = {
 		.pc_func = nfsd3_proc_mkdir,
 		.pc_decode = nfs3svc_decode_mkdirargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
+		.pc_encode = nfs3svc_encode_createres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_mkdirargs),
 		.pc_ressize = sizeof(struct nfsd3_createres),
@@ -776,7 +776,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_SYMLINK] = {
 		.pc_func = nfsd3_proc_symlink,
 		.pc_decode = nfs3svc_decode_symlinkargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
+		.pc_encode = nfs3svc_encode_createres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_symlinkargs),
 		.pc_ressize = sizeof(struct nfsd3_createres),
@@ -786,7 +786,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_MKNOD] = {
 		.pc_func = nfsd3_proc_mknod,
 		.pc_decode = nfs3svc_decode_mknodargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
+		.pc_encode = nfs3svc_encode_createres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_mknodargs),
 		.pc_ressize = sizeof(struct nfsd3_createres),
@@ -796,7 +796,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_REMOVE] = {
 		.pc_func = nfsd3_proc_remove,
 		.pc_decode = nfs3svc_decode_diropargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
+		.pc_encode = nfs3svc_encode_wccstatres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_diropargs),
 		.pc_ressize = sizeof(struct nfsd3_wccstatres),
@@ -806,7 +806,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_RMDIR] = {
 		.pc_func = nfsd3_proc_rmdir,
 		.pc_decode = nfs3svc_decode_diropargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
+		.pc_encode = nfs3svc_encode_wccstatres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_diropargs),
 		.pc_ressize = sizeof(struct nfsd3_wccstatres),
@@ -816,7 +816,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_RENAME] = {
 		.pc_func = nfsd3_proc_rename,
 		.pc_decode = nfs3svc_decode_renameargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_renameres,
+		.pc_encode = nfs3svc_encode_renameres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_renameargs),
 		.pc_ressize = sizeof(struct nfsd3_renameres),
@@ -826,7 +826,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_LINK] = {
 		.pc_func = nfsd3_proc_link,
 		.pc_decode = nfs3svc_decode_linkargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_linkres,
+		.pc_encode = nfs3svc_encode_linkres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_linkargs),
 		.pc_ressize = sizeof(struct nfsd3_linkres),
@@ -836,7 +836,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_READDIR] = {
 		.pc_func = nfsd3_proc_readdir,
 		.pc_decode = nfs3svc_decode_readdirargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_readdirres,
+		.pc_encode = nfs3svc_encode_readdirres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_readdirargs),
 		.pc_ressize = sizeof(struct nfsd3_readdirres),
@@ -845,7 +845,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_READDIRPLUS] = {
 		.pc_func = nfsd3_proc_readdirplus,
 		.pc_decode = nfs3svc_decode_readdirplusargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_readdirres,
+		.pc_encode = nfs3svc_encode_readdirres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_readdirplusargs),
 		.pc_ressize = sizeof(struct nfsd3_readdirres),
@@ -854,7 +854,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_FSSTAT] = {
 		.pc_func = nfsd3_proc_fsstat,
 		.pc_decode = nfs3svc_decode_fhandleargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_fsstatres,
+		.pc_encode = nfs3svc_encode_fsstatres,
 		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
 		.pc_ressize = sizeof(struct nfsd3_fsstatres),
 		.pc_cachetype = RC_NOCACHE,
@@ -863,7 +863,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_FSINFO] = {
 		.pc_func = nfsd3_proc_fsinfo,
 		.pc_decode = nfs3svc_decode_fhandleargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_fsinfores,
+		.pc_encode = nfs3svc_encode_fsinfores,
 		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
 		.pc_ressize = sizeof(struct nfsd3_fsinfores),
 		.pc_cachetype = RC_NOCACHE,
@@ -872,7 +872,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_PATHCONF] = {
 		.pc_func = nfsd3_proc_pathconf,
 		.pc_decode = nfs3svc_decode_fhandleargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_pathconfres,
+		.pc_encode = nfs3svc_encode_pathconfres,
 		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
 		.pc_ressize = sizeof(struct nfsd3_pathconfres),
 		.pc_cachetype = RC_NOCACHE,
@@ -881,7 +881,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_COMMIT] = {
 		.pc_func = nfsd3_proc_commit,
 		.pc_decode = nfs3svc_decode_commitargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_commitres,
+		.pc_encode = nfs3svc_encode_commitres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_commitargs),
 		.pc_ressize = sizeof(struct nfsd3_commitres),
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index be8bf8af9917..349e355edc73 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -645,16 +645,17 @@ nfs3svc_decode_commitargs(struct svc_rqst *rqstp, __be32 *p)
  * will work properly.
  */
 int
-nfs3svc_encode_voidres(struct svc_rqst *rqstp, __be32 *p, void *dummy)
+nfs3svc_encode_voidres(struct svc_rqst *rqstp, __be32 *p)
 {
 	return xdr_ressize_check(rqstp, p);
 }
 
 /* GETATTR */
 int
-nfs3svc_encode_attrstat(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_attrstat *resp)
+nfs3svc_encode_attrstat(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_attrstat *resp = rqstp->rq_resp;
+
 	if (resp->status == 0) {
 		lease_get_mtime(d_inode(resp->fh.fh_dentry),
 				&resp->stat.mtime);
@@ -665,18 +666,20 @@ nfs3svc_encode_attrstat(struct svc_rqst *rqstp, __be32 *p,
 
 /* SETATTR, REMOVE, RMDIR */
 int
-nfs3svc_encode_wccstat(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_attrstat *resp)
+nfs3svc_encode_wccstat(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_attrstat *resp = rqstp->rq_resp;
+
 	p = encode_wcc_data(rqstp, p, &resp->fh);
 	return xdr_ressize_check(rqstp, p);
 }
 
 /* LOOKUP */
 int
-nfs3svc_encode_diropres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_diropres *resp)
+nfs3svc_encode_diropres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_diropres *resp = rqstp->rq_resp;
+
 	if (resp->status == 0) {
 		p = encode_fh(p, &resp->fh);
 		p = encode_post_op_attr(rqstp, p, &resp->fh);
@@ -687,9 +690,10 @@ nfs3svc_encode_diropres(struct svc_rqst *rqstp, __be32 *p,
 
 /* ACCESS */
 int
-nfs3svc_encode_accessres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_accessres *resp)
+nfs3svc_encode_accessres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_accessres *resp = rqstp->rq_resp;
+
 	p = encode_post_op_attr(rqstp, p, &resp->fh);
 	if (resp->status == 0)
 		*p++ = htonl(resp->access);
@@ -698,9 +702,10 @@ nfs3svc_encode_accessres(struct svc_rqst *rqstp, __be32 *p,
 
 /* READLINK */
 int
-nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_readlinkres *resp)
+nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_readlinkres *resp = rqstp->rq_resp;
+
 	p = encode_post_op_attr(rqstp, p, &resp->fh);
 	if (resp->status == 0) {
 		*p++ = htonl(resp->len);
@@ -719,9 +724,10 @@ nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p,
 
 /* READ */
 int
-nfs3svc_encode_readres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_readres *resp)
+nfs3svc_encode_readres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_readres *resp = rqstp->rq_resp;
+
 	p = encode_post_op_attr(rqstp, p, &resp->fh);
 	if (resp->status == 0) {
 		*p++ = htonl(resp->count);
@@ -743,9 +749,9 @@ nfs3svc_encode_readres(struct svc_rqst *rqstp, __be32 *p,
 
 /* WRITE */
 int
-nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_writeres *resp)
+nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_writeres *resp = rqstp->rq_resp;
 	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 
 	p = encode_wcc_data(rqstp, p, &resp->fh);
@@ -760,9 +766,10 @@ nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p,
 
 /* CREATE, MKDIR, SYMLINK, MKNOD */
 int
-nfs3svc_encode_createres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_diropres *resp)
+nfs3svc_encode_createres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_diropres *resp = rqstp->rq_resp;
+
 	if (resp->status == 0) {
 		*p++ = xdr_one;
 		p = encode_fh(p, &resp->fh);
@@ -774,9 +781,10 @@ nfs3svc_encode_createres(struct svc_rqst *rqstp, __be32 *p,
 
 /* RENAME */
 int
-nfs3svc_encode_renameres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_renameres *resp)
+nfs3svc_encode_renameres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_renameres *resp = rqstp->rq_resp;
+
 	p = encode_wcc_data(rqstp, p, &resp->ffh);
 	p = encode_wcc_data(rqstp, p, &resp->tfh);
 	return xdr_ressize_check(rqstp, p);
@@ -784,9 +792,10 @@ nfs3svc_encode_renameres(struct svc_rqst *rqstp, __be32 *p,
 
 /* LINK */
 int
-nfs3svc_encode_linkres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_linkres *resp)
+nfs3svc_encode_linkres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_linkres *resp = rqstp->rq_resp;
+
 	p = encode_post_op_attr(rqstp, p, &resp->fh);
 	p = encode_wcc_data(rqstp, p, &resp->tfh);
 	return xdr_ressize_check(rqstp, p);
@@ -794,9 +803,10 @@ nfs3svc_encode_linkres(struct svc_rqst *rqstp, __be32 *p,
 
 /* READDIR */
 int
-nfs3svc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_readdirres *resp)
+nfs3svc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_readdirres *resp = rqstp->rq_resp;
+
 	p = encode_post_op_attr(rqstp, p, &resp->fh);
 
 	if (resp->status == 0) {
@@ -1044,9 +1054,9 @@ nfs3svc_encode_entry_plus(void *cd, const char *name,
 
 /* FSSTAT */
 int
-nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_fsstatres *resp)
+nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_fsstatres *resp = rqstp->rq_resp;
 	struct kstatfs	*s = &resp->stats;
 	u64		bs = s->f_bsize;
 
@@ -1066,9 +1076,10 @@ nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, __be32 *p,
 
 /* FSINFO */
 int
-nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_fsinfores *resp)
+nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_fsinfores *resp = rqstp->rq_resp;
+
 	*p++ = xdr_zero;	/* no post_op_attr */
 
 	if (resp->status == 0) {
@@ -1090,9 +1101,10 @@ nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, __be32 *p,
 
 /* PATHCONF */
 int
-nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_pathconfres *resp)
+nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_pathconfres *resp = rqstp->rq_resp;
+
 	*p++ = xdr_zero;	/* no post_op_attr */
 
 	if (resp->status == 0) {
@@ -1109,9 +1121,9 @@ nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, __be32 *p,
 
 /* COMMIT */
 int
-nfs3svc_encode_commitres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_commitres *resp)
+nfs3svc_encode_commitres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_commitres *resp = rqstp->rq_resp;
 	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 
 	p = encode_wcc_data(rqstp, p, &resp->fh);
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 6ff434b77a9e..ad0622efae4e 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -2520,7 +2520,7 @@ struct nfsd4_voidargs { int dummy; };
 static struct svc_procedure		nfsd_procedures4[2] = {
 	[NFSPROC4_NULL] = {
 		.pc_func = nfsd4_proc_null,
-		.pc_encode = (kxdrproc_t) nfs4svc_encode_voidres,
+		.pc_encode = nfs4svc_encode_voidres,
 		.pc_argsize = sizeof(struct nfsd4_voidargs),
 		.pc_ressize = sizeof(struct nfsd4_voidres),
 		.pc_cachetype = RC_NOCACHE,
@@ -2529,7 +2529,7 @@ static struct svc_procedure		nfsd_procedures4[2] = {
 	[NFSPROC4_COMPOUND] = {
 		.pc_func = nfsd4_proc_compound,
 		.pc_decode = nfs4svc_decode_compoundargs,
-		.pc_encode = (kxdrproc_t) nfs4svc_encode_compoundres,
+		.pc_encode = nfs4svc_encode_compoundres,
 		.pc_argsize = sizeof(struct nfsd4_compoundargs),
 		.pc_ressize = sizeof(struct nfsd4_compoundres),
 		.pc_release = nfsd4_release_compoundargs,
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 3a7e117bd11e..54e212e3541e 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -4538,7 +4538,7 @@ nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op)
 }
 
 int
-nfs4svc_encode_voidres(struct svc_rqst *rqstp, __be32 *p, void *dummy)
+nfs4svc_encode_voidres(struct svc_rqst *rqstp, __be32 *p)
 {
         return xdr_ressize_check(rqstp, p);
 }
@@ -4584,11 +4584,12 @@ nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compoundres *resp)
+nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p)
 {
 	/*
 	 * All that remains is to write the tag and operation count...
 	 */
+	struct nfsd4_compoundres *resp = rqstp->rq_resp;
 	struct xdr_buf *buf = resp->xdr.buf;
 
 	WARN_ON_ONCE(buf->len != buf->head[0].iov_len + buf->page_len +
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index d351d0ef6d34..0ef88d0e67d9 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -577,7 +577,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_NULL] = {
 		.pc_func = nfsd_proc_null,
 		.pc_decode = nfssvc_decode_void,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
+		.pc_encode = nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_void),
 		.pc_ressize = sizeof(struct nfsd_void),
 		.pc_cachetype = RC_NOCACHE,
@@ -586,7 +586,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_GETATTR] = {
 		.pc_func = nfsd_proc_getattr,
 		.pc_decode = nfssvc_decode_fhandle,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_attrstat,
+		.pc_encode = nfssvc_encode_attrstat,
 		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_fhandle),
 		.pc_ressize = sizeof(struct nfsd_attrstat),
@@ -596,7 +596,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_SETATTR] = {
 		.pc_func = nfsd_proc_setattr,
 		.pc_decode = nfssvc_decode_sattrargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_attrstat,
+		.pc_encode = nfssvc_encode_attrstat,
 		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_sattrargs),
 		.pc_ressize = sizeof(struct nfsd_attrstat),
@@ -605,7 +605,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_ROOT] = {
 		.pc_decode = nfssvc_decode_void,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
+		.pc_encode = nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_void),
 		.pc_ressize = sizeof(struct nfsd_void),
 		.pc_cachetype = RC_NOCACHE,
@@ -614,7 +614,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_LOOKUP] = {
 		.pc_func = nfsd_proc_lookup,
 		.pc_decode = nfssvc_decode_diropargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_diropres,
+		.pc_encode = nfssvc_encode_diropres,
 		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_diropargs),
 		.pc_ressize = sizeof(struct nfsd_diropres),
@@ -624,7 +624,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_READLINK] = {
 		.pc_func = nfsd_proc_readlink,
 		.pc_decode = nfssvc_decode_readlinkargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_readlinkres,
+		.pc_encode = nfssvc_encode_readlinkres,
 		.pc_argsize = sizeof(struct nfsd_readlinkargs),
 		.pc_ressize = sizeof(struct nfsd_readlinkres),
 		.pc_cachetype = RC_NOCACHE,
@@ -633,7 +633,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_READ] = {
 		.pc_func = nfsd_proc_read,
 		.pc_decode = nfssvc_decode_readargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_readres,
+		.pc_encode = nfssvc_encode_readres,
 		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_readargs),
 		.pc_ressize = sizeof(struct nfsd_readres),
@@ -642,7 +642,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_WRITECACHE] = {
 		.pc_decode = nfssvc_decode_void,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
+		.pc_encode = nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_void),
 		.pc_ressize = sizeof(struct nfsd_void),
 		.pc_cachetype = RC_NOCACHE,
@@ -651,7 +651,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_WRITE] = {
 		.pc_func = nfsd_proc_write,
 		.pc_decode = nfssvc_decode_writeargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_attrstat,
+		.pc_encode = nfssvc_encode_attrstat,
 		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_writeargs),
 		.pc_ressize = sizeof(struct nfsd_attrstat),
@@ -661,7 +661,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_CREATE] = {
 		.pc_func = nfsd_proc_create,
 		.pc_decode = nfssvc_decode_createargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_diropres,
+		.pc_encode = nfssvc_encode_diropres,
 		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_createargs),
 		.pc_ressize = sizeof(struct nfsd_diropres),
@@ -671,7 +671,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_REMOVE] = {
 		.pc_func = nfsd_proc_remove,
 		.pc_decode = nfssvc_decode_diropargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
+		.pc_encode = nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_diropargs),
 		.pc_ressize = sizeof(struct nfsd_void),
 		.pc_cachetype = RC_REPLSTAT,
@@ -680,7 +680,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_RENAME] = {
 		.pc_func = nfsd_proc_rename,
 		.pc_decode = nfssvc_decode_renameargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
+		.pc_encode = nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_renameargs),
 		.pc_ressize = sizeof(struct nfsd_void),
 		.pc_cachetype = RC_REPLSTAT,
@@ -689,7 +689,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_LINK] = {
 		.pc_func = nfsd_proc_link,
 		.pc_decode = nfssvc_decode_linkargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
+		.pc_encode = nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_linkargs),
 		.pc_ressize = sizeof(struct nfsd_void),
 		.pc_cachetype = RC_REPLSTAT,
@@ -698,7 +698,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_SYMLINK] = {
 		.pc_func = nfsd_proc_symlink,
 		.pc_decode = nfssvc_decode_symlinkargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
+		.pc_encode = nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_symlinkargs),
 		.pc_ressize = sizeof(struct nfsd_void),
 		.pc_cachetype = RC_REPLSTAT,
@@ -707,7 +707,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_MKDIR] = {
 		.pc_func = nfsd_proc_mkdir,
 		.pc_decode = nfssvc_decode_createargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_diropres,
+		.pc_encode = nfssvc_encode_diropres,
 		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_createargs),
 		.pc_ressize = sizeof(struct nfsd_diropres),
@@ -717,7 +717,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_RMDIR] = {
 		.pc_func = nfsd_proc_rmdir,
 		.pc_decode = nfssvc_decode_diropargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
+		.pc_encode = nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_diropargs),
 		.pc_ressize = sizeof(struct nfsd_void),
 		.pc_cachetype = RC_REPLSTAT,
@@ -726,7 +726,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_READDIR] = {
 		.pc_func = nfsd_proc_readdir,
 		.pc_decode = nfssvc_decode_readdirargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_readdirres,
+		.pc_encode = nfssvc_encode_readdirres,
 		.pc_argsize = sizeof(struct nfsd_readdirargs),
 		.pc_ressize = sizeof(struct nfsd_readdirres),
 		.pc_cachetype = RC_NOCACHE,
@@ -734,7 +734,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_STATFS] = {
 		.pc_func = nfsd_proc_statfs,
 		.pc_decode = nfssvc_decode_fhandle,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_statfsres,
+		.pc_encode = nfssvc_encode_statfsres,
 		.pc_argsize = sizeof(struct nfsd_fhandle),
 		.pc_ressize = sizeof(struct nfsd_statfsres),
 		.pc_cachetype = RC_NOCACHE,
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 3e00499d7ad7..555233664124 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -782,7 +782,6 @@ int
 nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
 {
 	struct svc_procedure	*proc;
-	kxdrproc_t		xdr;
 	__be32			nfserr;
 	__be32			*nfserrp;
 
@@ -841,9 +840,7 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
 	 * For NFSv2, additional info is never returned in case of an error.
 	 */
 	if (!(nfserr && rqstp->rq_vers == 2)) {
-		xdr = proc->pc_encode;
-		if (xdr && !xdr(rqstp, nfserrp,
-				rqstp->rq_resp)) {
+		if (proc->pc_encode && !proc->pc_encode(rqstp, nfserrp)) {
 			/* Failed to encode result. Release cache entry */
 			dprintk("nfsd: failed to encode result!\n");
 			nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 0957ceebe1aa..bb1998c5ae61 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -429,32 +429,35 @@ nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p)
  * XDR encode functions
  */
 int
-nfssvc_encode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
+nfssvc_encode_void(struct svc_rqst *rqstp, __be32 *p)
 {
 	return xdr_ressize_check(rqstp, p);
 }
 
 int
-nfssvc_encode_attrstat(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_attrstat *resp)
+nfssvc_encode_attrstat(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_attrstat *resp = rqstp->rq_resp;
+
 	p = encode_fattr(rqstp, p, &resp->fh, &resp->stat);
 	return xdr_ressize_check(rqstp, p);
 }
 
 int
-nfssvc_encode_diropres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_diropres *resp)
+nfssvc_encode_diropres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_diropres *resp = rqstp->rq_resp;
+
 	p = encode_fh(p, &resp->fh);
 	p = encode_fattr(rqstp, p, &resp->fh, &resp->stat);
 	return xdr_ressize_check(rqstp, p);
 }
 
 int
-nfssvc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_readlinkres *resp)
+nfssvc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_readlinkres *resp = rqstp->rq_resp;
+
 	*p++ = htonl(resp->len);
 	xdr_ressize_check(rqstp, p);
 	rqstp->rq_res.page_len = resp->len;
@@ -468,9 +471,10 @@ nfssvc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfssvc_encode_readres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_readres *resp)
+nfssvc_encode_readres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_readres *resp = rqstp->rq_resp;
+
 	p = encode_fattr(rqstp, p, &resp->fh, &resp->stat);
 	*p++ = htonl(resp->count);
 	xdr_ressize_check(rqstp, p);
@@ -487,9 +491,10 @@ nfssvc_encode_readres(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfssvc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_readdirres *resp)
+nfssvc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_readdirres *resp = rqstp->rq_resp;
+
 	xdr_ressize_check(rqstp, p);
 	p = resp->buffer;
 	*p++ = 0;			/* no more entries */
@@ -500,9 +505,9 @@ nfssvc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfssvc_encode_statfsres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_statfsres *resp)
+nfssvc_encode_statfsres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_statfsres *resp = rqstp->rq_resp;
 	struct kstatfs	*stat = &resp->stats;
 
 	*p++ = htonl(NFSSVC_MAXBLKSIZE_V2);	/* max transfer size */
diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h
index 8eeb752cf6f8..457ce45e5084 100644
--- a/fs/nfsd/xdr.h
+++ b/fs/nfsd/xdr.h
@@ -143,13 +143,13 @@ int nfssvc_decode_readlinkargs(struct svc_rqst *, __be32 *);
 int nfssvc_decode_linkargs(struct svc_rqst *, __be32 *);
 int nfssvc_decode_symlinkargs(struct svc_rqst *, __be32 *);
 int nfssvc_decode_readdirargs(struct svc_rqst *, __be32 *);
-int nfssvc_encode_void(struct svc_rqst *, __be32 *, void *);
-int nfssvc_encode_attrstat(struct svc_rqst *, __be32 *, struct nfsd_attrstat *);
-int nfssvc_encode_diropres(struct svc_rqst *, __be32 *, struct nfsd_diropres *);
-int nfssvc_encode_readlinkres(struct svc_rqst *, __be32 *, struct nfsd_readlinkres *);
-int nfssvc_encode_readres(struct svc_rqst *, __be32 *, struct nfsd_readres *);
-int nfssvc_encode_statfsres(struct svc_rqst *, __be32 *, struct nfsd_statfsres *);
-int nfssvc_encode_readdirres(struct svc_rqst *, __be32 *, struct nfsd_readdirres *);
+int nfssvc_encode_void(struct svc_rqst *, __be32 *);
+int nfssvc_encode_attrstat(struct svc_rqst *, __be32 *);
+int nfssvc_encode_diropres(struct svc_rqst *, __be32 *);
+int nfssvc_encode_readlinkres(struct svc_rqst *, __be32 *);
+int nfssvc_encode_readres(struct svc_rqst *, __be32 *);
+int nfssvc_encode_statfsres(struct svc_rqst *, __be32 *);
+int nfssvc_encode_readdirres(struct svc_rqst *, __be32 *);
 
 int nfssvc_encode_entry(void *, const char *name,
 			int namlen, loff_t offset, u64 ino, unsigned int);
diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
index f79be4c42e4a..80d7da620e91 100644
--- a/fs/nfsd/xdr3.h
+++ b/fs/nfsd/xdr3.h
@@ -285,35 +285,22 @@ int nfs3svc_decode_symlinkargs(struct svc_rqst *, __be32 *);
 int nfs3svc_decode_readdirargs(struct svc_rqst *, __be32 *);
 int nfs3svc_decode_readdirplusargs(struct svc_rqst *, __be32 *);
 int nfs3svc_decode_commitargs(struct svc_rqst *, __be32 *);
-int nfs3svc_encode_voidres(struct svc_rqst *, __be32 *, void *);
-int nfs3svc_encode_attrstat(struct svc_rqst *, __be32 *,
-				struct nfsd3_attrstat *);
-int nfs3svc_encode_wccstat(struct svc_rqst *, __be32 *,
-				struct nfsd3_attrstat *);
-int nfs3svc_encode_diropres(struct svc_rqst *, __be32 *,
-				struct nfsd3_diropres *);
-int nfs3svc_encode_accessres(struct svc_rqst *, __be32 *,
-				struct nfsd3_accessres *);
-int nfs3svc_encode_readlinkres(struct svc_rqst *, __be32 *,
-				struct nfsd3_readlinkres *);
-int nfs3svc_encode_readres(struct svc_rqst *, __be32 *, struct nfsd3_readres *);
-int nfs3svc_encode_writeres(struct svc_rqst *, __be32 *, struct nfsd3_writeres *);
-int nfs3svc_encode_createres(struct svc_rqst *, __be32 *,
-				struct nfsd3_diropres *);
-int nfs3svc_encode_renameres(struct svc_rqst *, __be32 *,
-				struct nfsd3_renameres *);
-int nfs3svc_encode_linkres(struct svc_rqst *, __be32 *,
-				struct nfsd3_linkres *);
-int nfs3svc_encode_readdirres(struct svc_rqst *, __be32 *,
-				struct nfsd3_readdirres *);
-int nfs3svc_encode_fsstatres(struct svc_rqst *, __be32 *,
-				struct nfsd3_fsstatres *);
-int nfs3svc_encode_fsinfores(struct svc_rqst *, __be32 *,
-				struct nfsd3_fsinfores *);
-int nfs3svc_encode_pathconfres(struct svc_rqst *, __be32 *,
-				struct nfsd3_pathconfres *);
-int nfs3svc_encode_commitres(struct svc_rqst *, __be32 *,
-				struct nfsd3_commitres *);
+int nfs3svc_encode_voidres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_attrstat(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_wccstat(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_diropres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_accessres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_readlinkres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_readres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_writeres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_createres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_renameres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_linkres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_readdirres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_fsstatres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_fsinfores(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_pathconfres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_commitres(struct svc_rqst *, __be32 *);
 
 void nfs3svc_release_fhandle(struct svc_rqst *);
 void nfs3svc_release_fhandle2(struct svc_rqst *);
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 2a53c1233884..eb7f9239304f 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -682,10 +682,9 @@ set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp)
 
 
 bool nfsd4_mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp);
-int nfs4svc_encode_voidres(struct svc_rqst *, __be32 *, void *);
+int nfs4svc_encode_voidres(struct svc_rqst *, __be32 *);
 int nfs4svc_decode_compoundargs(struct svc_rqst *, __be32 *);
-int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *,
-		struct nfsd4_compoundres *);
+int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *);
 __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *, u32);
 void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *);
 void nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op);
diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h
index 0416600844ce..7acbecc21a40 100644
--- a/include/linux/lockd/xdr.h
+++ b/include/linux/lockd/xdr.h
@@ -96,16 +96,16 @@ struct nlm_reboot {
 #define NLMSVC_XDRSIZE		sizeof(struct nlm_args)
 
 int	nlmsvc_decode_testargs(struct svc_rqst *, __be32 *);
-int	nlmsvc_encode_testres(struct svc_rqst *, __be32 *, struct nlm_res *);
+int	nlmsvc_encode_testres(struct svc_rqst *, __be32 *);
 int	nlmsvc_decode_lockargs(struct svc_rqst *, __be32 *);
 int	nlmsvc_decode_cancargs(struct svc_rqst *, __be32 *);
 int	nlmsvc_decode_unlockargs(struct svc_rqst *, __be32 *);
-int	nlmsvc_encode_res(struct svc_rqst *, __be32 *, struct nlm_res *);
+int	nlmsvc_encode_res(struct svc_rqst *, __be32 *);
 int	nlmsvc_decode_res(struct svc_rqst *, __be32 *);
-int	nlmsvc_encode_void(struct svc_rqst *, __be32 *, void *);
+int	nlmsvc_encode_void(struct svc_rqst *, __be32 *);
 int	nlmsvc_decode_void(struct svc_rqst *, __be32 *);
 int	nlmsvc_decode_shareargs(struct svc_rqst *, __be32 *);
-int	nlmsvc_encode_shareres(struct svc_rqst *, __be32 *, struct nlm_res *);
+int	nlmsvc_encode_shareres(struct svc_rqst *, __be32 *);
 int	nlmsvc_decode_notify(struct svc_rqst *, __be32 *);
 int	nlmsvc_decode_reboot(struct svc_rqst *, __be32 *);
 /*
diff --git a/include/linux/lockd/xdr4.h b/include/linux/lockd/xdr4.h
index 951bbe31fdb8..bf1645609225 100644
--- a/include/linux/lockd/xdr4.h
+++ b/include/linux/lockd/xdr4.h
@@ -24,16 +24,16 @@
 
 
 int	nlm4svc_decode_testargs(struct svc_rqst *, __be32 *);
-int	nlm4svc_encode_testres(struct svc_rqst *, __be32 *, struct nlm_res *);
+int	nlm4svc_encode_testres(struct svc_rqst *, __be32 *);
 int	nlm4svc_decode_lockargs(struct svc_rqst *, __be32 *);
 int	nlm4svc_decode_cancargs(struct svc_rqst *, __be32 *);
 int	nlm4svc_decode_unlockargs(struct svc_rqst *, __be32 *);
-int	nlm4svc_encode_res(struct svc_rqst *, __be32 *, struct nlm_res *);
+int	nlm4svc_encode_res(struct svc_rqst *, __be32 *);
 int	nlm4svc_decode_res(struct svc_rqst *, __be32 *);
-int	nlm4svc_encode_void(struct svc_rqst *, __be32 *, void *);
+int	nlm4svc_encode_void(struct svc_rqst *, __be32 *);
 int	nlm4svc_decode_void(struct svc_rqst *, __be32 *);
 int	nlm4svc_decode_shareargs(struct svc_rqst *, __be32 *);
-int	nlm4svc_encode_shareres(struct svc_rqst *, __be32 *, struct nlm_res *);
+int	nlm4svc_encode_shareres(struct svc_rqst *, __be32 *);
 int	nlm4svc_decode_notify(struct svc_rqst *, __be32 *);
 int	nlm4svc_decode_reboot(struct svc_rqst *, __be32 *);
 /*
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 047f04411dd4..6cfe41db7f31 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -423,7 +423,8 @@ struct svc_procedure {
 	__be32			(*pc_func)(struct svc_rqst *);
 	/* XDR decode args: */
 	int			(*pc_decode)(struct svc_rqst *, __be32 *data);
-	kxdrproc_t		pc_encode;	/* XDR encode result */
+	/* XDR encode result: */
+	int			(*pc_encode)(struct svc_rqst *, __be32 *data);
 	/* XDR free result: */
 	void			(*pc_release)(struct svc_rqst *);
 	unsigned int		pc_argsize;	/* argument struct size */
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 18024c1b9b7b..aa643a29fdc6 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1154,7 +1154,6 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 	struct svc_version	*versp = NULL;	/* compiler food */
 	struct svc_procedure	*procp = NULL;
 	struct svc_serv		*serv = rqstp->rq_server;
-	kxdrproc_t		xdr;
 	__be32			*statp;
 	u32			prog, vers, proc;
 	__be32			auth_stat, rpc_stat;
@@ -1298,9 +1297,8 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 				procp->pc_release(rqstp);
 			goto err_bad_auth;
 		}
-		if (*statp == rpc_success &&
-		    (xdr = procp->pc_encode) &&
-		    !xdr(rqstp, resv->iov_base+resv->iov_len, rqstp->rq_resp)) {
+		if (*statp == rpc_success && procp->pc_encode &&
+		    !procp->pc_encode(rqstp, resv->iov_base + resv->iov_len)) {
 			dprintk("svc: failed to encode reply\n");
 			/* serv->sv_stats->rpcsystemerr++; */
 			*statp = rpc_system_err;
-- 
cgit v1.2.3


From 35f297e5370bd511a171f7de0c5a31ee661f2e7e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 19:56:10 +0200
Subject: sunrpc: remove kxdrproc_t

Remove the now unused typedef.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/sunrpc/xdr.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index ed0fbf0d8d0f..261b48a2701d 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -34,13 +34,6 @@ struct xdr_netobj {
 	u8 *			data;
 };
 
-/*
- * This is the legacy generic XDR function. rqstp is either a rpc_rqst
- * (client side) or svc_rqst pointer (server side).
- * Encode functions always assume there's enough room in the buffer.
- */
-typedef int	(*kxdrproc_t)(void *rqstp, __be32 *data, void *obj);
-
 /*
  * Basic structure for transmission/reception of a client XDR message.
  * Features a header (for a linear buffer containing RPC headers
-- 
cgit v1.2.3


From 7fd38af9cae6aef1dfd28a7d1bd214eb5ddb7d53 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 23:40:27 +0200
Subject: sunrpc: move pc_count out of struct svc_procinfo

pc_count is the only writeable memeber of struct svc_procinfo, which is
a good candidate to be const-ified as it contains function pointers.

This patch moves it into out out struct svc_procinfo, and into a
separate writable array that is pointed to by struct svc_version.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/lockd/svc.c             |  6 ++++++
 fs/nfs/callback_xdr.c      |  4 ++++
 fs/nfsd/nfs2acl.c          |  2 ++
 fs/nfsd/nfs3acl.c          |  2 ++
 fs/nfsd/nfs3proc.c         |  2 ++
 fs/nfsd/nfs4proc.c         |  2 ++
 fs/nfsd/nfsproc.c          |  2 ++
 include/linux/sunrpc/svc.h |  2 +-
 net/sunrpc/stats.c         | 11 ++++++-----
 net/sunrpc/svc.c           |  2 +-
 10 files changed, 28 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 5d481e8a1b5d..cc6abe6280bc 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -739,23 +739,29 @@ module_exit(exit_nlm);
 /*
  * Define NLM program and procedures
  */
+static unsigned int nlmsvc_version1_count[17];
 static struct svc_version	nlmsvc_version1 = {
 		.vs_vers	= 1,
 		.vs_nproc	= 17,
 		.vs_proc	= nlmsvc_procedures,
+		.vs_count	= nlmsvc_version1_count,
 		.vs_xdrsize	= NLMSVC_XDRSIZE,
 };
+static unsigned int nlmsvc_version3_count[24];
 static struct svc_version	nlmsvc_version3 = {
 		.vs_vers	= 3,
 		.vs_nproc	= 24,
 		.vs_proc	= nlmsvc_procedures,
+		.vs_count	= nlmsvc_version3_count,
 		.vs_xdrsize	= NLMSVC_XDRSIZE,
 };
 #ifdef CONFIG_LOCKD_V4
+static unsigned int nlmsvc_version4_count[24];
 static struct svc_version	nlmsvc_version4 = {
 		.vs_vers	= 4,
 		.vs_nproc	= 24,
 		.vs_proc	= nlmsvc_procedures4,
+		.vs_count	= nlmsvc_version4_count,
 		.vs_xdrsize	= NLMSVC_XDRSIZE,
 };
 #endif
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index acf75dc63e14..ecd46b8c0985 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -1011,20 +1011,24 @@ static struct svc_procedure nfs4_callback_procedures1[] = {
 	}
 };
 
+static unsigned int nfs4_callback_count1[ARRAY_SIZE(nfs4_callback_procedures1)];
 struct svc_version nfs4_callback_version1 = {
 	.vs_vers = 1,
 	.vs_nproc = ARRAY_SIZE(nfs4_callback_procedures1),
 	.vs_proc = nfs4_callback_procedures1,
+	.vs_count = nfs4_callback_count1,
 	.vs_xdrsize = NFS4_CALLBACK_XDRSIZE,
 	.vs_dispatch = NULL,
 	.vs_hidden = true,
 	.vs_need_cong_ctrl = true,
 };
 
+static unsigned int nfs4_callback_count4[ARRAY_SIZE(nfs4_callback_procedures1)];
 struct svc_version nfs4_callback_version4 = {
 	.vs_vers = 4,
 	.vs_nproc = ARRAY_SIZE(nfs4_callback_procedures1),
 	.vs_proc = nfs4_callback_procedures1,
+	.vs_count = nfs4_callback_count4,
 	.vs_xdrsize = NFS4_CALLBACK_XDRSIZE,
 	.vs_dispatch = NULL,
 	.vs_hidden = true,
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index fc6b179c8fff..026edfe73fd5 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -378,10 +378,12 @@ static struct svc_procedure		nfsd_acl_procedures2[] = {
   PROC(access,	access,		access,		access,   RC_NOCACHE, ST+AT+1),
 };
 
+static unsigned int nfsd_acl_count2[ARRAY_SIZE(nfsd_acl_procedures2)];
 struct svc_version	nfsd_acl_version2 = {
 		.vs_vers	= 2,
 		.vs_nproc	= 5,
 		.vs_proc	= nfsd_acl_procedures2,
+		.vs_count	= nfsd_acl_count2,
 		.vs_dispatch	= nfsd_dispatch,
 		.vs_xdrsize	= NFS3_SVC_XDRSIZE,
 };
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 9437b758cbfd..73c0970ccefb 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -263,10 +263,12 @@ static struct svc_procedure		nfsd_acl_procedures3[] = {
   PROC(setacl,	setacl,		setacl,		fhandle,  RC_NOCACHE, ST+pAT),
 };
 
+static unsigned int nfsd_acl_count3[ARRAY_SIZE(nfsd_acl_procedures3)];
 struct svc_version	nfsd_acl_version3 = {
 		.vs_vers	= 3,
 		.vs_nproc	= 3,
 		.vs_proc	= nfsd_acl_procedures3,
+		.vs_count	= nfsd_acl_count3,
 		.vs_dispatch	= nfsd_dispatch,
 		.vs_xdrsize	= NFS3_SVC_XDRSIZE,
 };
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 17c90c41a3a6..b5823802e278 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -890,10 +890,12 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 };
 
+static unsigned int nfsd_count3[ARRAY_SIZE(nfsd_procedures3)];
 struct svc_version	nfsd_version3 = {
 		.vs_vers	= 3,
 		.vs_nproc	= 22,
 		.vs_proc	= nfsd_procedures3,
+		.vs_count	= nfsd_count3,
 		.vs_dispatch	= nfsd_dispatch,
 		.vs_xdrsize	= NFS3_SVC_XDRSIZE,
 };
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index a947dcef5e4e..bad5fec0ebc7 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -2554,10 +2554,12 @@ static struct svc_procedure		nfsd_procedures4[2] = {
 	},
 };
 
+static unsigned int nfsd_count3[ARRAY_SIZE(nfsd_procedures4)];
 struct svc_version	nfsd_version4 = {
 	.vs_vers		= 4,
 	.vs_nproc		= 2,
 	.vs_proc		= nfsd_procedures4,
+	.vs_count		= nfsd_count3,
 	.vs_dispatch		= nfsd_dispatch,
 	.vs_xdrsize		= NFS4_SVC_XDRSIZE,
 	.vs_rpcb_optnl		= true,
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 0ef88d0e67d9..44b157553733 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -743,10 +743,12 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 };
 
 
+static unsigned int nfsd_count2[ARRAY_SIZE(nfsd_procedures2)];
 struct svc_version	nfsd_version2 = {
 		.vs_vers	= 2,
 		.vs_nproc	= 18,
 		.vs_proc	= nfsd_procedures2,
+		.vs_count	= nfsd_count2,
 		.vs_dispatch	= nfsd_dispatch,
 		.vs_xdrsize	= NFS2_SVC_XDRSIZE,
 };
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 6cfe41db7f31..9f00384153f4 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -397,6 +397,7 @@ struct svc_version {
 	u32			vs_vers;	/* version number */
 	u32			vs_nproc;	/* number of procedures */
 	struct svc_procedure *	vs_proc;	/* per-procedure info */
+	unsigned int		*vs_count;	/* call counts */
 	u32			vs_xdrsize;	/* xdrsize needed for this version */
 
 	/* Don't register with rpcbind */
@@ -429,7 +430,6 @@ struct svc_procedure {
 	void			(*pc_release)(struct svc_rqst *);
 	unsigned int		pc_argsize;	/* argument struct size */
 	unsigned int		pc_ressize;	/* result struct size */
-	unsigned int		pc_count;	/* call count */
 	unsigned int		pc_cachetype;	/* cache info (NFS) */
 	unsigned int		pc_xdrressize;	/* maximum size of XDR reply */
 };
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index 8b6c35ae1d57..1e671333c3d5 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -77,9 +77,9 @@ static const struct file_operations rpc_proc_fops = {
 /*
  * Get RPC server stats
  */
-void svc_seq_show(struct seq_file *seq, const struct svc_stat *statp) {
+void svc_seq_show(struct seq_file *seq, const struct svc_stat *statp)
+{
 	const struct svc_program *prog = statp->program;
-	const struct svc_procedure *proc;
 	const struct svc_version *vers;
 	unsigned int i, j;
 
@@ -98,11 +98,12 @@ void svc_seq_show(struct seq_file *seq, const struct svc_stat *statp) {
 			statp->rpcbadclnt);
 
 	for (i = 0; i < prog->pg_nvers; i++) {
-		if (!(vers = prog->pg_vers[i]) || !(proc = vers->vs_proc))
+		vers = prog->pg_vers[i];
+		if (!vers)
 			continue;
 		seq_printf(seq, "proc%d %u", i, vers->vs_nproc);
-		for (j = 0; j < vers->vs_nproc; j++, proc++)
-			seq_printf(seq, " %u", proc->pc_count);
+		for (j = 0; j < vers->vs_nproc; j++)
+			seq_printf(seq, " %u", vers->vs_count[j]);
 		seq_putc(seq, '\n');
 	}
 }
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index aa643a29fdc6..6452592194ac 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1261,7 +1261,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 	svc_putnl(resv, RPC_SUCCESS);
 
 	/* Bump per-procedure stats counter */
-	procp->pc_count++;
+	versp->vs_count[proc]++;
 
 	/* Initialize storage for argp and resp */
 	memset(rqstp->rq_argp, 0, procp->pc_argsize);
-- 
cgit v1.2.3


From 860bda29b99afdc072a7a796fe81185f7ae85deb Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 12 May 2017 16:11:49 +0200
Subject: sunrpc: mark all struct svc_procinfo instances as const

struct svc_procinfo contains function pointers, and marking it as
constant avoids it being able to be used as an attach vector for
code injections.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/lockd/svc4proc.c         | 2 +-
 fs/lockd/svcproc.c          | 2 +-
 fs/nfs/callback_xdr.c       | 2 +-
 fs/nfsd/nfs2acl.c           | 2 +-
 fs/nfsd/nfs3acl.c           | 2 +-
 fs/nfsd/nfs3proc.c          | 2 +-
 fs/nfsd/nfs4proc.c          | 2 +-
 fs/nfsd/nfsproc.c           | 2 +-
 fs/nfsd/nfssvc.c            | 4 ++--
 include/linux/lockd/lockd.h | 4 ++--
 include/linux/sunrpc/svc.h  | 4 ++--
 net/sunrpc/svc.c            | 2 +-
 12 files changed, 15 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index fed016155791..82925f17ec45 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -507,7 +507,7 @@ struct nlm_void			{ int dummy; };
 #define	No	(1+1024/4)				/* netobj */
 #define	St	1					/* status */
 #define	Rg	4					/* range (offset + length) */
-struct svc_procedure		nlmsvc_procedures4[] = {
+const struct svc_procedure nlmsvc_procedures4[] = {
   PROC(null,		void,		void,		void,	void, 1),
   PROC(test,		testargs,	testres,	args,	res, Ck+St+2+No+Rg),
   PROC(lock,		lockargs,	res,		args,	res, Ck+St),
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 14648b051eba..07915162581d 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -551,7 +551,7 @@ struct nlm_void			{ int dummy; };
 #define	No	(1+1024/4)			/* Net Obj */
 #define	Rg	2				/* range - offset + size */
 
-struct svc_procedure		nlmsvc_procedures[] = {
+const struct svc_procedure nlmsvc_procedures[] = {
   PROC(null,		void,		void,		void,	void, 1),
   PROC(test,		testargs,	testres,	args,	res, Ck+St+2+No+Rg),
   PROC(lock,		lockargs,	res,		args,	res, Ck+St),
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index ecd46b8c0985..ae249f27297f 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -995,7 +995,7 @@ static struct callback_op callback_ops[] = {
 /*
  * Define NFS4 callback procedures
  */
-static struct svc_procedure nfs4_callback_procedures1[] = {
+static const struct svc_procedure nfs4_callback_procedures1[] = {
 	[CB_NULL] = {
 		.pc_func = nfs4_callback_null,
 		.pc_decode = nfs4_decode_void,
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 026edfe73fd5..c3f6b8a6b659 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -370,7 +370,7 @@ struct nfsd3_voidargs { int dummy; };
 #define pAT (1+AT)	/* post attributes - conditional */
 #define ACL (1+NFS_ACL_MAX_ENTRIES*3)  /* Access Control List */
 
-static struct svc_procedure		nfsd_acl_procedures2[] = {
+static const struct svc_procedure nfsd_acl_procedures2[] = {
   PROC(null,	void,		void,		void,	  RC_NOCACHE, ST),
   PROC(getacl,	getacl,		getacl,		getacl,	  RC_NOCACHE, ST+1+2*(1+ACL)),
   PROC(setacl,	setacl,		attrstat,	attrstat, RC_NOCACHE, ST+AT),
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 73c0970ccefb..1a482ac9d4e9 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -257,7 +257,7 @@ struct nfsd3_voidargs { int dummy; };
 #define pAT (1+AT)	/* post attributes - conditional */
 #define ACL (1+NFS_ACL_MAX_ENTRIES*3)  /* Access Control List */
 
-static struct svc_procedure		nfsd_acl_procedures3[] = {
+static const struct svc_procedure nfsd_acl_procedures3[] = {
   PROC(null,	void,		void,		void,	  RC_NOCACHE, ST),
   PROC(getacl,	getacl,		getacl,		getacl,	  RC_NOCACHE, ST+1+2*(1+ACL)),
   PROC(setacl,	setacl,		setacl,		fhandle,  RC_NOCACHE, ST+pAT),
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index b5823802e278..96e0e6a2af51 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -674,7 +674,7 @@ struct nfsd3_voidargs { int dummy; };
 #define pAT (1+AT)	/* post attributes - conditional */
 #define WC (7+pAT)	/* WCC attributes */
 
-static struct svc_procedure		nfsd_procedures3[22] = {
+static const struct svc_procedure nfsd_procedures3[22] = {
 	[NFS3PROC_NULL] = {
 		.pc_func = nfsd3_proc_null,
 		.pc_encode = nfs3svc_encode_voidres,
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index bad5fec0ebc7..a4d8aa3abc63 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -2533,7 +2533,7 @@ static const char *nfsd4_op_name(unsigned opnum)
 #define nfsd4_voidres			nfsd4_voidargs
 struct nfsd4_voidargs { int dummy; };
 
-static struct svc_procedure		nfsd_procedures4[2] = {
+static const struct svc_procedure nfsd_procedures4[2] = {
 	[NFSPROC4_NULL] = {
 		.pc_func = nfsd4_proc_null,
 		.pc_encode = nfs4svc_encode_voidres,
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 44b157553733..a68b686fda12 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -573,7 +573,7 @@ struct nfsd_void { int dummy; };
 #define FH 8		/* filehandle */
 #define	AT 18		/* attributes */
 
-static struct svc_procedure		nfsd_procedures2[18] = {
+static const struct svc_procedure nfsd_procedures2[18] = {
 	[NFSPROC_NULL] = {
 		.pc_func = nfsd_proc_null,
 		.pc_decode = nfssvc_decode_void,
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 555233664124..379b310c445d 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -756,7 +756,7 @@ static __be32 map_new_errors(u32 vers, __be32 nfserr)
  * problem, we enforce these assumptions here:
  */
 static bool nfs_request_too_big(struct svc_rqst *rqstp,
-				struct svc_procedure *proc)
+				const struct svc_procedure *proc)
 {
 	/*
 	 * The ACL code has more careful bounds-checking and is not
@@ -781,7 +781,7 @@ static bool nfs_request_too_big(struct svc_rqst *rqstp,
 int
 nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
 {
-	struct svc_procedure	*proc;
+	const struct svc_procedure *proc;
 	__be32			nfserr;
 	__be32			*nfserrp;
 
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 41f7b6a04d69..3eca67728366 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -192,9 +192,9 @@ struct nlm_block {
  * Global variables
  */
 extern const struct rpc_program	nlm_program;
-extern struct svc_procedure	nlmsvc_procedures[];
+extern const struct svc_procedure nlmsvc_procedures[];
 #ifdef CONFIG_LOCKD_V4
-extern struct svc_procedure	nlmsvc_procedures4[];
+extern const struct svc_procedure nlmsvc_procedures4[];
 #endif
 extern int			nlmsvc_grace_period;
 extern unsigned long		nlmsvc_timeout;
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 9f00384153f4..984e6b9c3043 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -237,7 +237,7 @@ struct svc_rqst {
 
 	struct svc_serv *	rq_server;	/* RPC service definition */
 	struct svc_pool *	rq_pool;	/* thread pool */
-	struct svc_procedure *	rq_procinfo;	/* procedure info */
+	const struct svc_procedure *rq_procinfo;/* procedure info */
 	struct auth_ops *	rq_authop;	/* authentication flavour */
 	struct svc_cred		rq_cred;	/* auth info */
 	void *			rq_xprt_ctxt;	/* transport specific context ptr */
@@ -396,7 +396,7 @@ struct svc_program {
 struct svc_version {
 	u32			vs_vers;	/* version number */
 	u32			vs_nproc;	/* number of procedures */
-	struct svc_procedure *	vs_proc;	/* per-procedure info */
+	const struct svc_procedure *vs_proc;	/* per-procedure info */
 	unsigned int		*vs_count;	/* call counts */
 	u32			vs_xdrsize;	/* xdrsize needed for this version */
 
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 6452592194ac..049963d676a7 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1152,7 +1152,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 {
 	struct svc_program	*progp;
 	struct svc_version	*versp = NULL;	/* compiler food */
-	struct svc_procedure	*procp = NULL;
+	const struct svc_procedure *procp = NULL;
 	struct svc_serv		*serv = rqstp->rq_server;
 	__be32			*statp;
 	u32			prog, vers, proc;
-- 
cgit v1.2.3


From e9679189e34b25a1b9aa77fe37d331559d1544af Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 12 May 2017 16:21:37 +0200
Subject: sunrpc: mark all struct svc_version instances as const

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/lockd/svc.c             | 38 +++++++++++++++++++-------------------
 fs/nfs/callback.c          |  2 +-
 fs/nfs/callback_xdr.c      |  4 ++--
 fs/nfs/internal.h          |  4 ++--
 fs/nfs/nfs4_fs.h           |  4 ++--
 fs/nfsd/nfs2acl.c          | 14 +++++++-------
 fs/nfsd/nfs3acl.c          | 14 +++++++-------
 fs/nfsd/nfs3proc.c         | 14 +++++++-------
 fs/nfsd/nfs4proc.c         |  2 +-
 fs/nfsd/nfsd.h             |  6 +++---
 fs/nfsd/nfsproc.c          | 14 +++++++-------
 fs/nfsd/nfssvc.c           |  8 ++++----
 include/linux/sunrpc/svc.h |  2 +-
 net/sunrpc/svc.c           |  4 ++--
 14 files changed, 65 insertions(+), 65 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index cc6abe6280bc..726b6cecf430 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -740,32 +740,32 @@ module_exit(exit_nlm);
  * Define NLM program and procedures
  */
 static unsigned int nlmsvc_version1_count[17];
-static struct svc_version	nlmsvc_version1 = {
-		.vs_vers	= 1,
-		.vs_nproc	= 17,
-		.vs_proc	= nlmsvc_procedures,
-		.vs_count	= nlmsvc_version1_count,
-		.vs_xdrsize	= NLMSVC_XDRSIZE,
+static const struct svc_version	nlmsvc_version1 = {
+	.vs_vers	= 1,
+	.vs_nproc	= 17,
+	.vs_proc	= nlmsvc_procedures,
+	.vs_count	= nlmsvc_version1_count,
+	.vs_xdrsize	= NLMSVC_XDRSIZE,
 };
 static unsigned int nlmsvc_version3_count[24];
-static struct svc_version	nlmsvc_version3 = {
-		.vs_vers	= 3,
-		.vs_nproc	= 24,
-		.vs_proc	= nlmsvc_procedures,
-		.vs_count	= nlmsvc_version3_count,
-		.vs_xdrsize	= NLMSVC_XDRSIZE,
+static const struct svc_version	nlmsvc_version3 = {
+	.vs_vers	= 3,
+	.vs_nproc	= 24,
+	.vs_proc	= nlmsvc_procedures,
+	.vs_count	= nlmsvc_version3_count,
+	.vs_xdrsize	= NLMSVC_XDRSIZE,
 };
 #ifdef CONFIG_LOCKD_V4
 static unsigned int nlmsvc_version4_count[24];
-static struct svc_version	nlmsvc_version4 = {
-		.vs_vers	= 4,
-		.vs_nproc	= 24,
-		.vs_proc	= nlmsvc_procedures4,
-		.vs_count	= nlmsvc_version4_count,
-		.vs_xdrsize	= NLMSVC_XDRSIZE,
+static const struct svc_version	nlmsvc_version4 = {
+	.vs_vers	= 4,
+	.vs_nproc	= 24,
+	.vs_proc	= nlmsvc_procedures4,
+	.vs_count	= nlmsvc_version4_count,
+	.vs_xdrsize	= NLMSVC_XDRSIZE,
 };
 #endif
-static struct svc_version *	nlmsvc_version[] = {
+static const struct svc_version *nlmsvc_version[] = {
 	[1] = &nlmsvc_version1,
 	[3] = &nlmsvc_version3,
 #ifdef CONFIG_LOCKD_V4
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 73a1f928226c..34323877ec13 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -439,7 +439,7 @@ static int nfs_callback_authenticate(struct svc_rqst *rqstp)
 /*
  * Define NFS4 callback program
  */
-static struct svc_version *nfs4_callback_version[] = {
+static const struct svc_version *nfs4_callback_version[] = {
 	[1] = &nfs4_callback_version1,
 	[4] = &nfs4_callback_version4,
 };
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index ae249f27297f..01a430e51daa 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -1012,7 +1012,7 @@ static const struct svc_procedure nfs4_callback_procedures1[] = {
 };
 
 static unsigned int nfs4_callback_count1[ARRAY_SIZE(nfs4_callback_procedures1)];
-struct svc_version nfs4_callback_version1 = {
+const struct svc_version nfs4_callback_version1 = {
 	.vs_vers = 1,
 	.vs_nproc = ARRAY_SIZE(nfs4_callback_procedures1),
 	.vs_proc = nfs4_callback_procedures1,
@@ -1024,7 +1024,7 @@ struct svc_version nfs4_callback_version1 = {
 };
 
 static unsigned int nfs4_callback_count4[ARRAY_SIZE(nfs4_callback_procedures1)];
-struct svc_version nfs4_callback_version4 = {
+const struct svc_version nfs4_callback_version4 = {
 	.vs_vers = 4,
 	.vs_nproc = ARRAY_SIZE(nfs4_callback_procedures1),
 	.vs_proc = nfs4_callback_procedures1,
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index c21254924389..9976d8498cf3 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -225,8 +225,8 @@ static inline void nfs_fs_proc_exit(void)
 #endif
 
 /* callback_xdr.c */
-extern struct svc_version nfs4_callback_version1;
-extern struct svc_version nfs4_callback_version4;
+extern const struct svc_version nfs4_callback_version1;
+extern const struct svc_version nfs4_callback_version4;
 
 struct nfs_pageio_descriptor;
 /* pagelist.c */
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 9b0cf3872722..40bd05f05e74 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -498,8 +498,8 @@ extern const struct rpc_procinfo nfs4_procedures[];
 struct nfs4_mount_data;
 
 /* callback_xdr.c */
-extern struct svc_version nfs4_callback_version1;
-extern struct svc_version nfs4_callback_version4;
+extern const struct svc_version nfs4_callback_version1;
+extern const struct svc_version nfs4_callback_version4;
 
 static inline void nfs4_stateid_copy(nfs4_stateid *dst, const nfs4_stateid *src)
 {
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index c3f6b8a6b659..6276ec8608b0 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -379,11 +379,11 @@ static const struct svc_procedure nfsd_acl_procedures2[] = {
 };
 
 static unsigned int nfsd_acl_count2[ARRAY_SIZE(nfsd_acl_procedures2)];
-struct svc_version	nfsd_acl_version2 = {
-		.vs_vers	= 2,
-		.vs_nproc	= 5,
-		.vs_proc	= nfsd_acl_procedures2,
-		.vs_count	= nfsd_acl_count2,
-		.vs_dispatch	= nfsd_dispatch,
-		.vs_xdrsize	= NFS3_SVC_XDRSIZE,
+const struct svc_version nfsd_acl_version2 = {
+	.vs_vers	= 2,
+	.vs_nproc	= 5,
+	.vs_proc	= nfsd_acl_procedures2,
+	.vs_count	= nfsd_acl_count2,
+	.vs_dispatch	= nfsd_dispatch,
+	.vs_xdrsize	= NFS3_SVC_XDRSIZE,
 };
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 1a482ac9d4e9..01976529f042 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -264,12 +264,12 @@ static const struct svc_procedure nfsd_acl_procedures3[] = {
 };
 
 static unsigned int nfsd_acl_count3[ARRAY_SIZE(nfsd_acl_procedures3)];
-struct svc_version	nfsd_acl_version3 = {
-		.vs_vers	= 3,
-		.vs_nproc	= 3,
-		.vs_proc	= nfsd_acl_procedures3,
-		.vs_count	= nfsd_acl_count3,
-		.vs_dispatch	= nfsd_dispatch,
-		.vs_xdrsize	= NFS3_SVC_XDRSIZE,
+const struct svc_version nfsd_acl_version3 = {
+	.vs_vers	= 3,
+	.vs_nproc	= 3,
+	.vs_proc	= nfsd_acl_procedures3,
+	.vs_count	= nfsd_acl_count3,
+	.vs_dispatch	= nfsd_dispatch,
+	.vs_xdrsize	= NFS3_SVC_XDRSIZE,
 };
 
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 96e0e6a2af51..2cb56a0d6625 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -891,11 +891,11 @@ static const struct svc_procedure nfsd_procedures3[22] = {
 };
 
 static unsigned int nfsd_count3[ARRAY_SIZE(nfsd_procedures3)];
-struct svc_version	nfsd_version3 = {
-		.vs_vers	= 3,
-		.vs_nproc	= 22,
-		.vs_proc	= nfsd_procedures3,
-		.vs_count	= nfsd_count3,
-		.vs_dispatch	= nfsd_dispatch,
-		.vs_xdrsize	= NFS3_SVC_XDRSIZE,
+const struct svc_version nfsd_version3 = {
+	.vs_vers	= 3,
+	.vs_nproc	= 22,
+	.vs_proc	= nfsd_procedures3,
+	.vs_dispatch	= nfsd_dispatch,
+	.vs_count	= nfsd_count3,
+	.vs_xdrsize	= NFS3_SVC_XDRSIZE,
 };
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index a4d8aa3abc63..e814c1946f6e 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -2555,7 +2555,7 @@ static const struct svc_procedure nfsd_procedures4[2] = {
 };
 
 static unsigned int nfsd_count3[ARRAY_SIZE(nfsd_procedures4)];
-struct svc_version	nfsd_version4 = {
+const struct svc_version nfsd_version4 = {
 	.vs_vers		= 4,
 	.vs_nproc		= 2,
 	.vs_proc		= nfsd_procedures4,
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index d96606801d47..b9c538ab7a59 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -60,7 +60,7 @@ struct readdir_cd {
 
 
 extern struct svc_program	nfsd_program;
-extern struct svc_version	nfsd_version2, nfsd_version3,
+extern const struct svc_version	nfsd_version2, nfsd_version3,
 				nfsd_version4;
 extern struct mutex		nfsd_mutex;
 extern spinlock_t		nfsd_drc_lock;
@@ -86,12 +86,12 @@ void		nfsd_destroy(struct net *net);
 
 #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
 #ifdef CONFIG_NFSD_V2_ACL
-extern struct svc_version nfsd_acl_version2;
+extern const struct svc_version nfsd_acl_version2;
 #else
 #define nfsd_acl_version2 NULL
 #endif
 #ifdef CONFIG_NFSD_V3_ACL
-extern struct svc_version nfsd_acl_version3;
+extern const struct svc_version nfsd_acl_version3;
 #else
 #define nfsd_acl_version3 NULL
 #endif
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index a68b686fda12..5076ae2b8258 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -744,13 +744,13 @@ static const struct svc_procedure nfsd_procedures2[18] = {
 
 
 static unsigned int nfsd_count2[ARRAY_SIZE(nfsd_procedures2)];
-struct svc_version	nfsd_version2 = {
-		.vs_vers	= 2,
-		.vs_nproc	= 18,
-		.vs_proc	= nfsd_procedures2,
-		.vs_count	= nfsd_count2,
-		.vs_dispatch	= nfsd_dispatch,
-		.vs_xdrsize	= NFS2_SVC_XDRSIZE,
+const struct svc_version nfsd_version2 = {
+	.vs_vers	= 2,
+	.vs_nproc	= 18,
+	.vs_proc	= nfsd_procedures2,
+	.vs_count	= nfsd_count2,
+	.vs_dispatch	= nfsd_dispatch,
+	.vs_xdrsize	= NFS2_SVC_XDRSIZE,
 };
 
 /*
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 379b310c445d..063ae7de2c12 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -68,14 +68,14 @@ unsigned long	nfsd_drc_mem_used;
 
 #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
 static struct svc_stat	nfsd_acl_svcstats;
-static struct svc_version *	nfsd_acl_version[] = {
+static const struct svc_version *nfsd_acl_version[] = {
 	[2] = &nfsd_acl_version2,
 	[3] = &nfsd_acl_version3,
 };
 
 #define NFSD_ACL_MINVERS            2
 #define NFSD_ACL_NRVERS		ARRAY_SIZE(nfsd_acl_version)
-static struct svc_version *nfsd_acl_versions[NFSD_ACL_NRVERS];
+static const struct svc_version *nfsd_acl_versions[NFSD_ACL_NRVERS];
 
 static struct svc_program	nfsd_acl_program = {
 	.pg_prog		= NFS_ACL_PROGRAM,
@@ -92,7 +92,7 @@ static struct svc_stat	nfsd_acl_svcstats = {
 };
 #endif /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */
 
-static struct svc_version *	nfsd_version[] = {
+static const struct svc_version *nfsd_version[] = {
 	[2] = &nfsd_version2,
 #if defined(CONFIG_NFSD_V3)
 	[3] = &nfsd_version3,
@@ -104,7 +104,7 @@ static struct svc_version *	nfsd_version[] = {
 
 #define NFSD_MINVERS    	2
 #define NFSD_NRVERS		ARRAY_SIZE(nfsd_version)
-static struct svc_version *nfsd_versions[NFSD_NRVERS];
+static const struct svc_version *nfsd_versions[NFSD_NRVERS];
 
 struct svc_program		nfsd_program = {
 #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 984e6b9c3043..e85267899753 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -383,7 +383,7 @@ struct svc_program {
 	unsigned int		pg_lovers;	/* lowest version */
 	unsigned int		pg_hivers;	/* highest version */
 	unsigned int		pg_nvers;	/* number of versions */
-	struct svc_version **	pg_vers;	/* version array */
+	const struct svc_version **pg_vers;	/* version array */
 	char *			pg_name;	/* service name */
 	char *			pg_class;	/* class name: services sharing authentication */
 	struct svc_stat *	pg_stats;	/* rpc statistics */
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 049963d676a7..45b4f2d2e3bd 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1008,7 +1008,7 @@ int svc_register(const struct svc_serv *serv, struct net *net,
 		 const unsigned short port)
 {
 	struct svc_program	*progp;
-	struct svc_version	*vers;
+	const struct svc_version *vers;
 	unsigned int		i;
 	int			error = 0;
 
@@ -1151,7 +1151,7 @@ static int
 svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 {
 	struct svc_program	*progp;
-	struct svc_version	*versp = NULL;	/* compiler food */
+	const struct svc_version *versp = NULL;	/* compiler food */
 	const struct svc_procedure *procp = NULL;
 	struct svc_serv		*serv = rqstp->rq_server;
 	__be32			*statp;
-- 
cgit v1.2.3


From cfc5604c488ccd17936b69008af0c9ae050f4a08 Mon Sep 17 00:00:00 2001
From: Cyrille Pitchen <cyrille.pitchen@atmel.com>
Date: Tue, 25 Apr 2017 22:08:46 +0200
Subject: mtd: spi-nor: introduce SPI 1-2-2 and SPI 1-4-4 protocols

This patch changes the prototype of spi_nor_scan(): its 3rd parameter
is replaced by a 'struct spi_nor_hwcaps' pointer, which tells the spi-nor
framework about the actual hardware capabilities supported by the SPI
controller and its driver.

Besides, this patch also introduces a new 'struct spi_nor_flash_parameter'
telling the spi-nor framework about the hardware capabilities supported by
the SPI flash memory and the associated settings required to use those
hardware caps.

Then, to improve the readability of spi_nor_scan(), the discovery of the
memory settings and the memory initialization are now split into two
dedicated functions.

1 - spi_nor_init_params()

The spi_nor_init_params() function is responsible for initializing the
'struct spi_nor_flash_parameter'. Currently this structure is filled with
legacy values but further patches will allow to override some parameter
values dynamically, for instance by reading the JESD216 Serial Flash
Discoverable Parameter (SFDP) tables from the SPI memory.
The spi_nor_init_params() function only deals with the hardware
capabilities of the SPI flash memory: especially it doesn't care about
the hardware capabilities supported by the SPI controller.

2 - spi_nor_setup()

The second function is called once the 'struct spi_nor_flash_parameter'
has been initialized by spi_nor_init_params().
With both 'struct spi_nor_flash_parameter' and 'struct spi_nor_hwcaps',
the new argument of spi_nor_scan(), spi_nor_setup() computes the best
match between hardware caps supported by both the (Q)SPI memory and
controller hence selecting the relevant settings for (Fast) Read and Page
Program operations.

Signed-off-by: Cyrille Pitchen <cyrille.pitchen@atmel.com>
Reviewed-by: Marek Vasut <marek.vasut@gmail.com>
---
 drivers/mtd/devices/m25p80.c          |  21 +-
 drivers/mtd/spi-nor/aspeed-smc.c      |  23 +-
 drivers/mtd/spi-nor/atmel-quadspi.c   |  83 ++++---
 drivers/mtd/spi-nor/cadence-quadspi.c |  18 +-
 drivers/mtd/spi-nor/fsl-quadspi.c     |   6 +-
 drivers/mtd/spi-nor/hisi-sfc.c        |  31 ++-
 drivers/mtd/spi-nor/intel-spi.c       |   7 +-
 drivers/mtd/spi-nor/mtk-quadspi.c     |  15 +-
 drivers/mtd/spi-nor/nxp-spifi.c       |  22 +-
 drivers/mtd/spi-nor/spi-nor.c         | 440 ++++++++++++++++++++++++++--------
 drivers/mtd/spi-nor/stm32-quadspi.c   |  27 ++-
 include/linux/mtd/spi-nor.h           | 119 ++++++++-
 12 files changed, 613 insertions(+), 199 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c
index c4df3b1bded0..07073f4ce0bd 100644
--- a/drivers/mtd/devices/m25p80.c
+++ b/drivers/mtd/devices/m25p80.c
@@ -111,14 +111,7 @@ static ssize_t m25p80_write(struct spi_nor *nor, loff_t to, size_t len,
 
 static inline unsigned int m25p80_rx_nbits(struct spi_nor *nor)
 {
-	switch (nor->flash_read) {
-	case SPI_NOR_DUAL:
-		return 2;
-	case SPI_NOR_QUAD:
-		return 4;
-	default:
-		return 0;
-	}
+	return spi_nor_get_protocol_data_nbits(nor->read_proto);
 }
 
 /*
@@ -196,7 +189,11 @@ static int m25p_probe(struct spi_device *spi)
 	struct flash_platform_data	*data;
 	struct m25p *flash;
 	struct spi_nor *nor;
-	enum read_mode mode = SPI_NOR_NORMAL;
+	struct spi_nor_hwcaps hwcaps = {
+		.mask = SNOR_HWCAPS_READ |
+			SNOR_HWCAPS_READ_FAST |
+			SNOR_HWCAPS_PP,
+	};
 	char *flash_name;
 	int ret;
 
@@ -222,9 +219,9 @@ static int m25p_probe(struct spi_device *spi)
 	flash->spi = spi;
 
 	if (spi->mode & SPI_RX_QUAD)
-		mode = SPI_NOR_QUAD;
+		hwcaps.mask |= SNOR_HWCAPS_READ_1_1_4;
 	else if (spi->mode & SPI_RX_DUAL)
-		mode = SPI_NOR_DUAL;
+		hwcaps.mask |= SNOR_HWCAPS_READ_1_1_2;
 
 	if (data && data->name)
 		nor->mtd.name = data->name;
@@ -241,7 +238,7 @@ static int m25p_probe(struct spi_device *spi)
 	else
 		flash_name = spi->modalias;
 
-	ret = spi_nor_scan(nor, flash_name, mode);
+	ret = spi_nor_scan(nor, flash_name, &hwcaps);
 	if (ret)
 		return ret;
 
diff --git a/drivers/mtd/spi-nor/aspeed-smc.c b/drivers/mtd/spi-nor/aspeed-smc.c
index 56051d30f000..3f875c8d6339 100644
--- a/drivers/mtd/spi-nor/aspeed-smc.c
+++ b/drivers/mtd/spi-nor/aspeed-smc.c
@@ -585,14 +585,12 @@ static int aspeed_smc_chip_setup_finish(struct aspeed_smc_chip *chip)
 	 * TODO: Adjust clocks if fast read is supported and interpret
 	 * SPI-NOR flags to adjust controller settings.
 	 */
-	switch (chip->nor.flash_read) {
-	case SPI_NOR_NORMAL:
-		cmd = CONTROL_COMMAND_MODE_NORMAL;
-		break;
-	case SPI_NOR_FAST:
-		cmd = CONTROL_COMMAND_MODE_FREAD;
-		break;
-	default:
+	if (chip->nor.read_proto == SNOR_PROTO_1_1_1) {
+		if (chip->nor.read_dummy == 0)
+			cmd = CONTROL_COMMAND_MODE_NORMAL;
+		else
+			cmd = CONTROL_COMMAND_MODE_FREAD;
+	} else {
 		dev_err(chip->nor.dev, "unsupported SPI read mode\n");
 		return -EINVAL;
 	}
@@ -608,6 +606,11 @@ static int aspeed_smc_chip_setup_finish(struct aspeed_smc_chip *chip)
 static int aspeed_smc_setup_flash(struct aspeed_smc_controller *controller,
 				  struct device_node *np, struct resource *r)
 {
+	const struct spi_nor_hwcaps hwcaps = {
+		.mask = SNOR_HWCAPS_READ |
+			SNOR_HWCAPS_READ_FAST |
+			SNOR_HWCAPS_PP,
+	};
 	const struct aspeed_smc_info *info = controller->info;
 	struct device *dev = controller->dev;
 	struct device_node *child;
@@ -671,11 +674,11 @@ static int aspeed_smc_setup_flash(struct aspeed_smc_controller *controller,
 			break;
 
 		/*
-		 * TODO: Add support for SPI_NOR_QUAD and SPI_NOR_DUAL
+		 * TODO: Add support for Dual and Quad SPI protocols
 		 * attach when board support is present as determined
 		 * by of property.
 		 */
-		ret = spi_nor_scan(nor, NULL, SPI_NOR_NORMAL);
+		ret = spi_nor_scan(nor, NULL, &hwcaps);
 		if (ret)
 			break;
 
diff --git a/drivers/mtd/spi-nor/atmel-quadspi.c b/drivers/mtd/spi-nor/atmel-quadspi.c
index 47937d9beec6..ba76fa8f2031 100644
--- a/drivers/mtd/spi-nor/atmel-quadspi.c
+++ b/drivers/mtd/spi-nor/atmel-quadspi.c
@@ -275,14 +275,48 @@ static void atmel_qspi_debug_command(struct atmel_qspi *aq,
 
 static int atmel_qspi_run_command(struct atmel_qspi *aq,
 				  const struct atmel_qspi_command *cmd,
-				  u32 ifr_tfrtyp, u32 ifr_width)
+				  u32 ifr_tfrtyp, enum spi_nor_protocol proto)
 {
 	u32 iar, icr, ifr, sr;
 	int err = 0;
 
 	iar = 0;
 	icr = 0;
-	ifr = ifr_tfrtyp | ifr_width;
+	ifr = ifr_tfrtyp;
+
+	/* Set the SPI protocol */
+	switch (proto) {
+	case SNOR_PROTO_1_1_1:
+		ifr |= QSPI_IFR_WIDTH_SINGLE_BIT_SPI;
+		break;
+
+	case SNOR_PROTO_1_1_2:
+		ifr |= QSPI_IFR_WIDTH_DUAL_OUTPUT;
+		break;
+
+	case SNOR_PROTO_1_1_4:
+		ifr |= QSPI_IFR_WIDTH_QUAD_OUTPUT;
+		break;
+
+	case SNOR_PROTO_1_2_2:
+		ifr |= QSPI_IFR_WIDTH_DUAL_IO;
+		break;
+
+	case SNOR_PROTO_1_4_4:
+		ifr |= QSPI_IFR_WIDTH_QUAD_IO;
+		break;
+
+	case SNOR_PROTO_2_2_2:
+		ifr |= QSPI_IFR_WIDTH_DUAL_CMD;
+		break;
+
+	case SNOR_PROTO_4_4_4:
+		ifr |= QSPI_IFR_WIDTH_QUAD_CMD;
+		break;
+
+	default:
+		return -EINVAL;
+	}
 
 	/* Compute instruction parameters */
 	if (cmd->enable.bits.instruction) {
@@ -434,7 +468,7 @@ static int atmel_qspi_read_reg(struct spi_nor *nor, u8 opcode,
 	cmd.rx_buf = buf;
 	cmd.buf_len = len;
 	return atmel_qspi_run_command(aq, &cmd, QSPI_IFR_TFRTYP_TRSFR_READ,
-				      QSPI_IFR_WIDTH_SINGLE_BIT_SPI);
+				      nor->reg_proto);
 }
 
 static int atmel_qspi_write_reg(struct spi_nor *nor, u8 opcode,
@@ -450,7 +484,7 @@ static int atmel_qspi_write_reg(struct spi_nor *nor, u8 opcode,
 	cmd.tx_buf = buf;
 	cmd.buf_len = len;
 	return atmel_qspi_run_command(aq, &cmd, QSPI_IFR_TFRTYP_TRSFR_WRITE,
-				      QSPI_IFR_WIDTH_SINGLE_BIT_SPI);
+				      nor->reg_proto);
 }
 
 static ssize_t atmel_qspi_write(struct spi_nor *nor, loff_t to, size_t len,
@@ -469,7 +503,7 @@ static ssize_t atmel_qspi_write(struct spi_nor *nor, loff_t to, size_t len,
 	cmd.tx_buf = write_buf;
 	cmd.buf_len = len;
 	ret = atmel_qspi_run_command(aq, &cmd, QSPI_IFR_TFRTYP_TRSFR_WRITE_MEM,
-				     QSPI_IFR_WIDTH_SINGLE_BIT_SPI);
+				     nor->write_proto);
 	return (ret < 0) ? ret : len;
 }
 
@@ -484,7 +518,7 @@ static int atmel_qspi_erase(struct spi_nor *nor, loff_t offs)
 	cmd.instruction = nor->erase_opcode;
 	cmd.address = (u32)offs;
 	return atmel_qspi_run_command(aq, &cmd, QSPI_IFR_TFRTYP_TRSFR_WRITE,
-				      QSPI_IFR_WIDTH_SINGLE_BIT_SPI);
+				      nor->reg_proto);
 }
 
 static ssize_t atmel_qspi_read(struct spi_nor *nor, loff_t from, size_t len,
@@ -493,27 +527,8 @@ static ssize_t atmel_qspi_read(struct spi_nor *nor, loff_t from, size_t len,
 	struct atmel_qspi *aq = nor->priv;
 	struct atmel_qspi_command cmd;
 	u8 num_mode_cycles, num_dummy_cycles;
-	u32 ifr_width;
 	ssize_t ret;
 
-	switch (nor->flash_read) {
-	case SPI_NOR_NORMAL:
-	case SPI_NOR_FAST:
-		ifr_width = QSPI_IFR_WIDTH_SINGLE_BIT_SPI;
-		break;
-
-	case SPI_NOR_DUAL:
-		ifr_width = QSPI_IFR_WIDTH_DUAL_OUTPUT;
-		break;
-
-	case SPI_NOR_QUAD:
-		ifr_width = QSPI_IFR_WIDTH_QUAD_OUTPUT;
-		break;
-
-	default:
-		return -EINVAL;
-	}
-
 	if (nor->read_dummy >= 2) {
 		num_mode_cycles = 2;
 		num_dummy_cycles = nor->read_dummy - 2;
@@ -536,7 +551,7 @@ static ssize_t atmel_qspi_read(struct spi_nor *nor, loff_t from, size_t len,
 	cmd.rx_buf = read_buf;
 	cmd.buf_len = len;
 	ret = atmel_qspi_run_command(aq, &cmd, QSPI_IFR_TFRTYP_TRSFR_READ_MEM,
-				     ifr_width);
+				     nor->read_proto);
 	return (ret < 0) ? ret : len;
 }
 
@@ -590,6 +605,20 @@ static irqreturn_t atmel_qspi_interrupt(int irq, void *dev_id)
 
 static int atmel_qspi_probe(struct platform_device *pdev)
 {
+	const struct spi_nor_hwcaps hwcaps = {
+		.mask = SNOR_HWCAPS_READ |
+			SNOR_HWCAPS_READ_FAST |
+			SNOR_HWCAPS_READ_1_1_2 |
+			SNOR_HWCAPS_READ_1_2_2 |
+			SNOR_HWCAPS_READ_2_2_2 |
+			SNOR_HWCAPS_READ_1_1_4 |
+			SNOR_HWCAPS_READ_1_4_4 |
+			SNOR_HWCAPS_READ_4_4_4 |
+			SNOR_HWCAPS_PP |
+			SNOR_HWCAPS_PP_1_1_4 |
+			SNOR_HWCAPS_PP_1_4_4 |
+			SNOR_HWCAPS_PP_4_4_4,
+	};
 	struct device_node *child, *np = pdev->dev.of_node;
 	struct atmel_qspi *aq;
 	struct resource *res;
@@ -679,7 +708,7 @@ static int atmel_qspi_probe(struct platform_device *pdev)
 	if (err)
 		goto disable_clk;
 
-	err = spi_nor_scan(nor, NULL, SPI_NOR_QUAD);
+	err = spi_nor_scan(nor, NULL, &hwcaps);
 	if (err)
 		goto disable_clk;
 
diff --git a/drivers/mtd/spi-nor/cadence-quadspi.c b/drivers/mtd/spi-nor/cadence-quadspi.c
index 9f8102de1b16..40096d73536c 100644
--- a/drivers/mtd/spi-nor/cadence-quadspi.c
+++ b/drivers/mtd/spi-nor/cadence-quadspi.c
@@ -855,15 +855,14 @@ static int cqspi_set_protocol(struct spi_nor *nor, const int read)
 	f_pdata->data_width = CQSPI_INST_TYPE_SINGLE;
 
 	if (read) {
-		switch (nor->flash_read) {
-		case SPI_NOR_NORMAL:
-		case SPI_NOR_FAST:
+		switch (nor->read_proto) {
+		case SNOR_PROTO_1_1_1:
 			f_pdata->data_width = CQSPI_INST_TYPE_SINGLE;
 			break;
-		case SPI_NOR_DUAL:
+		case SNOR_PROTO_1_1_2:
 			f_pdata->data_width = CQSPI_INST_TYPE_DUAL;
 			break;
-		case SPI_NOR_QUAD:
+		case SNOR_PROTO_1_1_4:
 			f_pdata->data_width = CQSPI_INST_TYPE_QUAD;
 			break;
 		default:
@@ -1069,6 +1068,13 @@ static void cqspi_controller_init(struct cqspi_st *cqspi)
 
 static int cqspi_setup_flash(struct cqspi_st *cqspi, struct device_node *np)
 {
+	const struct spi_nor_hwcaps hwcaps = {
+		.mask = SNOR_HWCAPS_READ |
+			SNOR_HWCAPS_READ_FAST |
+			SNOR_HWCAPS_READ_1_1_2 |
+			SNOR_HWCAPS_READ_1_1_4 |
+			SNOR_HWCAPS_PP,
+	};
 	struct platform_device *pdev = cqspi->pdev;
 	struct device *dev = &pdev->dev;
 	struct cqspi_flash_pdata *f_pdata;
@@ -1123,7 +1129,7 @@ static int cqspi_setup_flash(struct cqspi_st *cqspi, struct device_node *np)
 			goto err;
 		}
 
-		ret = spi_nor_scan(nor, NULL, SPI_NOR_QUAD);
+		ret = spi_nor_scan(nor, NULL, &hwcaps);
 		if (ret)
 			goto err;
 
diff --git a/drivers/mtd/spi-nor/fsl-quadspi.c b/drivers/mtd/spi-nor/fsl-quadspi.c
index 1476135e0d50..f17d22435bfc 100644
--- a/drivers/mtd/spi-nor/fsl-quadspi.c
+++ b/drivers/mtd/spi-nor/fsl-quadspi.c
@@ -957,6 +957,10 @@ static void fsl_qspi_unprep(struct spi_nor *nor, enum spi_nor_ops ops)
 
 static int fsl_qspi_probe(struct platform_device *pdev)
 {
+	const struct spi_nor_hwcaps hwcaps = {
+		.mask = SNOR_HWCAPS_READ_1_1_4 |
+			SNOR_HWCAPS_PP,
+	};
 	struct device_node *np = pdev->dev.of_node;
 	struct device *dev = &pdev->dev;
 	struct fsl_qspi *q;
@@ -1065,7 +1069,7 @@ static int fsl_qspi_probe(struct platform_device *pdev)
 		/* set the chip address for READID */
 		fsl_qspi_set_base_addr(q, nor);
 
-		ret = spi_nor_scan(nor, NULL, SPI_NOR_QUAD);
+		ret = spi_nor_scan(nor, NULL, &hwcaps);
 		if (ret)
 			goto mutex_failed;
 
diff --git a/drivers/mtd/spi-nor/hisi-sfc.c b/drivers/mtd/spi-nor/hisi-sfc.c
index a286350627a6..d1106832b9d5 100644
--- a/drivers/mtd/spi-nor/hisi-sfc.c
+++ b/drivers/mtd/spi-nor/hisi-sfc.c
@@ -120,19 +120,24 @@ static inline int wait_op_finish(struct hifmc_host *host)
 		(reg & FMC_INT_OP_DONE), 0, FMC_WAIT_TIMEOUT);
 }
 
-static int get_if_type(enum read_mode flash_read)
+static int get_if_type(enum spi_nor_protocol proto)
 {
 	enum hifmc_iftype if_type;
 
-	switch (flash_read) {
-	case SPI_NOR_DUAL:
+	switch (proto) {
+	case SNOR_PROTO_1_1_2:
 		if_type = IF_TYPE_DUAL;
 		break;
-	case SPI_NOR_QUAD:
+	case SNOR_PROTO_1_2_2:
+		if_type = IF_TYPE_DIO;
+		break;
+	case SNOR_PROTO_1_1_4:
 		if_type = IF_TYPE_QUAD;
 		break;
-	case SPI_NOR_NORMAL:
-	case SPI_NOR_FAST:
+	case SNOR_PROTO_1_4_4:
+		if_type = IF_TYPE_QIO;
+		break;
+	case SNOR_PROTO_1_1_1:
 	default:
 		if_type = IF_TYPE_STD;
 		break;
@@ -253,7 +258,10 @@ static int hisi_spi_nor_dma_transfer(struct spi_nor *nor, loff_t start_off,
 	writel(FMC_DMA_LEN_SET(len), host->regbase + FMC_DMA_LEN);
 
 	reg = OP_CFG_FM_CS(priv->chipselect);
-	if_type = get_if_type(nor->flash_read);
+	if (op_type == FMC_OP_READ)
+		if_type = get_if_type(nor->read_proto);
+	else
+		if_type = get_if_type(nor->write_proto);
 	reg |= OP_CFG_MEM_IF_TYPE(if_type);
 	if (op_type == FMC_OP_READ)
 		reg |= OP_CFG_DUMMY_NUM(nor->read_dummy >> 3);
@@ -321,6 +329,13 @@ static ssize_t hisi_spi_nor_write(struct spi_nor *nor, loff_t to,
 static int hisi_spi_nor_register(struct device_node *np,
 				struct hifmc_host *host)
 {
+	const struct spi_nor_hwcaps hwcaps = {
+		.mask = SNOR_HWCAPS_READ |
+			SNOR_HWCAPS_READ_FAST |
+			SNOR_HWCAPS_READ_1_1_2 |
+			SNOR_HWCAPS_READ_1_1_4 |
+			SNOR_HWCAPS_PP,
+	};
 	struct device *dev = host->dev;
 	struct spi_nor *nor;
 	struct hifmc_priv *priv;
@@ -362,7 +377,7 @@ static int hisi_spi_nor_register(struct device_node *np,
 	nor->read = hisi_spi_nor_read;
 	nor->write = hisi_spi_nor_write;
 	nor->erase = NULL;
-	ret = spi_nor_scan(nor, NULL, SPI_NOR_QUAD);
+	ret = spi_nor_scan(nor, NULL, &hwcaps);
 	if (ret)
 		return ret;
 
diff --git a/drivers/mtd/spi-nor/intel-spi.c b/drivers/mtd/spi-nor/intel-spi.c
index 986a3d020a3a..8a596bfeddff 100644
--- a/drivers/mtd/spi-nor/intel-spi.c
+++ b/drivers/mtd/spi-nor/intel-spi.c
@@ -715,6 +715,11 @@ static void intel_spi_fill_partition(struct intel_spi *ispi,
 struct intel_spi *intel_spi_probe(struct device *dev,
 	struct resource *mem, const struct intel_spi_boardinfo *info)
 {
+	const struct spi_nor_hwcaps hwcaps = {
+		.mask = SNOR_HWCAPS_READ |
+			SNOR_HWCAPS_READ_FAST |
+			SNOR_HWCAPS_PP,
+	};
 	struct mtd_partition part;
 	struct intel_spi *ispi;
 	int ret;
@@ -746,7 +751,7 @@ struct intel_spi *intel_spi_probe(struct device *dev,
 	ispi->nor.write = intel_spi_write;
 	ispi->nor.erase = intel_spi_erase;
 
-	ret = spi_nor_scan(&ispi->nor, NULL, SPI_NOR_NORMAL);
+	ret = spi_nor_scan(&ispi->nor, NULL, &hwcaps);
 	if (ret) {
 		dev_info(dev, "failed to locate the chip\n");
 		return ERR_PTR(ret);
diff --git a/drivers/mtd/spi-nor/mtk-quadspi.c b/drivers/mtd/spi-nor/mtk-quadspi.c
index b6377707ce32..8a20ec4991c8 100644
--- a/drivers/mtd/spi-nor/mtk-quadspi.c
+++ b/drivers/mtd/spi-nor/mtk-quadspi.c
@@ -123,20 +123,20 @@ static void mt8173_nor_set_read_mode(struct mt8173_nor *mt8173_nor)
 {
 	struct spi_nor *nor = &mt8173_nor->nor;
 
-	switch (nor->flash_read) {
-	case SPI_NOR_FAST:
+	switch (nor->read_proto) {
+	case SNOR_PROTO_1_1_1:
 		writeb(nor->read_opcode, mt8173_nor->base +
 		       MTK_NOR_PRGDATA3_REG);
 		writeb(MTK_NOR_FAST_READ, mt8173_nor->base +
 		       MTK_NOR_CFG1_REG);
 		break;
-	case SPI_NOR_DUAL:
+	case SNOR_PROTO_1_1_2:
 		writeb(nor->read_opcode, mt8173_nor->base +
 		       MTK_NOR_PRGDATA3_REG);
 		writeb(MTK_NOR_DUAL_READ_EN, mt8173_nor->base +
 		       MTK_NOR_DUAL_REG);
 		break;
-	case SPI_NOR_QUAD:
+	case SNOR_PROTO_1_1_4:
 		writeb(nor->read_opcode, mt8173_nor->base +
 		       MTK_NOR_PRGDATA4_REG);
 		writeb(MTK_NOR_QUAD_READ_EN, mt8173_nor->base +
@@ -408,6 +408,11 @@ static int mt8173_nor_write_reg(struct spi_nor *nor, u8 opcode, u8 *buf,
 static int mtk_nor_init(struct mt8173_nor *mt8173_nor,
 			struct device_node *flash_node)
 {
+	const struct spi_nor_hwcaps hwcaps = {
+		.mask = SNOR_HWCAPS_READ_FAST |
+			SNOR_HWCAPS_READ_1_1_2 |
+			SNOR_HWCAPS_PP,
+	};
 	int ret;
 	struct spi_nor *nor;
 
@@ -426,7 +431,7 @@ static int mtk_nor_init(struct mt8173_nor *mt8173_nor,
 	nor->write_reg = mt8173_nor_write_reg;
 	nor->mtd.name = "mtk_nor";
 	/* initialized with NULL */
-	ret = spi_nor_scan(nor, NULL, SPI_NOR_DUAL);
+	ret = spi_nor_scan(nor, NULL, &hwcaps);
 	if (ret)
 		return ret;
 
diff --git a/drivers/mtd/spi-nor/nxp-spifi.c b/drivers/mtd/spi-nor/nxp-spifi.c
index 73a14f40928b..15374216d4d9 100644
--- a/drivers/mtd/spi-nor/nxp-spifi.c
+++ b/drivers/mtd/spi-nor/nxp-spifi.c
@@ -240,13 +240,12 @@ static int nxp_spifi_erase(struct spi_nor *nor, loff_t offs)
 
 static int nxp_spifi_setup_memory_cmd(struct nxp_spifi *spifi)
 {
-	switch (spifi->nor.flash_read) {
-	case SPI_NOR_NORMAL:
-	case SPI_NOR_FAST:
+	switch (spifi->nor.read_proto) {
+	case SNOR_PROTO_1_1_1:
 		spifi->mcmd = SPIFI_CMD_FIELDFORM_ALL_SERIAL;
 		break;
-	case SPI_NOR_DUAL:
-	case SPI_NOR_QUAD:
+	case SNOR_PROTO_1_1_2:
+	case SNOR_PROTO_1_1_4:
 		spifi->mcmd = SPIFI_CMD_FIELDFORM_QUAD_DUAL_DATA;
 		break;
 	default:
@@ -274,7 +273,11 @@ static void nxp_spifi_dummy_id_read(struct spi_nor *nor)
 static int nxp_spifi_setup_flash(struct nxp_spifi *spifi,
 				 struct device_node *np)
 {
-	enum read_mode flash_read;
+	struct spi_nor_hwcaps hwcaps = {
+		.mask = SNOR_HWCAPS_READ |
+			SNOR_HWCAPS_READ_FAST |
+			SNOR_HWCAPS_PP,
+	};
 	u32 ctrl, property;
 	u16 mode = 0;
 	int ret;
@@ -308,13 +311,12 @@ static int nxp_spifi_setup_flash(struct nxp_spifi *spifi,
 
 	if (mode & SPI_RX_DUAL) {
 		ctrl |= SPIFI_CTRL_DUAL;
-		flash_read = SPI_NOR_DUAL;
+		hwcaps.mask |= SNOR_HWCAPS_READ_1_1_2;
 	} else if (mode & SPI_RX_QUAD) {
 		ctrl &= ~SPIFI_CTRL_DUAL;
-		flash_read = SPI_NOR_QUAD;
+		hwcaps.mask |= SNOR_HWCAPS_READ_1_1_4;
 	} else {
 		ctrl |= SPIFI_CTRL_DUAL;
-		flash_read = SPI_NOR_NORMAL;
 	}
 
 	switch (mode & (SPI_CPHA | SPI_CPOL)) {
@@ -351,7 +353,7 @@ static int nxp_spifi_setup_flash(struct nxp_spifi *spifi,
 	 */
 	nxp_spifi_dummy_id_read(&spifi->nor);
 
-	ret = spi_nor_scan(&spifi->nor, NULL, flash_read);
+	ret = spi_nor_scan(&spifi->nor, NULL, &hwcaps);
 	if (ret) {
 		dev_err(spifi->dev, "device scan failed\n");
 		return ret;
diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index dea8c9cbadf0..e653806070a1 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -149,24 +149,6 @@ static int read_cr(struct spi_nor *nor)
 	return val;
 }
 
-/*
- * Dummy Cycle calculation for different type of read.
- * It can be used to support more commands with
- * different dummy cycle requirements.
- */
-static inline int spi_nor_read_dummy_cycles(struct spi_nor *nor)
-{
-	switch (nor->flash_read) {
-	case SPI_NOR_FAST:
-	case SPI_NOR_DUAL:
-	case SPI_NOR_QUAD:
-		return 8;
-	case SPI_NOR_NORMAL:
-		return 0;
-	}
-	return 0;
-}
-
 /*
  * Write status register 1 byte
  * Returns negative if error occurred.
@@ -1460,30 +1442,6 @@ static int spansion_quad_enable(struct spi_nor *nor)
 	return 0;
 }
 
-static int set_quad_mode(struct spi_nor *nor, const struct flash_info *info)
-{
-	int status;
-
-	switch (JEDEC_MFR(info)) {
-	case SNOR_MFR_MACRONIX:
-		status = macronix_quad_enable(nor);
-		if (status) {
-			dev_err(nor->dev, "Macronix quad-read not enabled\n");
-			return -EINVAL;
-		}
-		return status;
-	case SNOR_MFR_MICRON:
-		return 0;
-	default:
-		status = spansion_quad_enable(nor);
-		if (status) {
-			dev_err(nor->dev, "Spansion quad-read not enabled\n");
-			return -EINVAL;
-		}
-		return status;
-	}
-}
-
 static int spi_nor_check(struct spi_nor *nor)
 {
 	if (!nor->dev || !nor->read || !nor->write ||
@@ -1536,8 +1494,323 @@ static int s3an_nor_scan(const struct flash_info *info, struct spi_nor *nor)
 	return 0;
 }
 
-int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode)
+struct spi_nor_read_command {
+	u8			num_mode_clocks;
+	u8			num_wait_states;
+	u8			opcode;
+	enum spi_nor_protocol	proto;
+};
+
+struct spi_nor_pp_command {
+	u8			opcode;
+	enum spi_nor_protocol	proto;
+};
+
+enum spi_nor_read_command_index {
+	SNOR_CMD_READ,
+	SNOR_CMD_READ_FAST,
+
+	/* Dual SPI */
+	SNOR_CMD_READ_1_1_2,
+	SNOR_CMD_READ_1_2_2,
+	SNOR_CMD_READ_2_2_2,
+
+	/* Quad SPI */
+	SNOR_CMD_READ_1_1_4,
+	SNOR_CMD_READ_1_4_4,
+	SNOR_CMD_READ_4_4_4,
+
+	SNOR_CMD_READ_MAX
+};
+
+enum spi_nor_pp_command_index {
+	SNOR_CMD_PP,
+
+	/* Quad SPI */
+	SNOR_CMD_PP_1_1_4,
+	SNOR_CMD_PP_1_4_4,
+	SNOR_CMD_PP_4_4_4,
+
+	SNOR_CMD_PP_MAX
+};
+
+struct spi_nor_flash_parameter {
+	u64				size;
+	u32				page_size;
+
+	struct spi_nor_hwcaps		hwcaps;
+	struct spi_nor_read_command	reads[SNOR_CMD_READ_MAX];
+	struct spi_nor_pp_command	page_programs[SNOR_CMD_PP_MAX];
+
+	int (*quad_enable)(struct spi_nor *nor);
+};
+
+static void
+spi_nor_set_read_settings(struct spi_nor_read_command *read,
+			  u8 num_mode_clocks,
+			  u8 num_wait_states,
+			  u8 opcode,
+			  enum spi_nor_protocol proto)
+{
+	read->num_mode_clocks = num_mode_clocks;
+	read->num_wait_states = num_wait_states;
+	read->opcode = opcode;
+	read->proto = proto;
+}
+
+static void
+spi_nor_set_pp_settings(struct spi_nor_pp_command *pp,
+			u8 opcode,
+			enum spi_nor_protocol proto)
+{
+	pp->opcode = opcode;
+	pp->proto = proto;
+}
+
+static int spi_nor_init_params(struct spi_nor *nor,
+			       const struct flash_info *info,
+			       struct spi_nor_flash_parameter *params)
+{
+	/* Set legacy flash parameters as default. */
+	memset(params, 0, sizeof(*params));
+
+	/* Set SPI NOR sizes. */
+	params->size = info->sector_size * info->n_sectors;
+	params->page_size = info->page_size;
+
+	/* (Fast) Read settings. */
+	params->hwcaps.mask |= SNOR_HWCAPS_READ;
+	spi_nor_set_read_settings(&params->reads[SNOR_CMD_READ],
+				  0, 0, SPINOR_OP_READ,
+				  SNOR_PROTO_1_1_1);
+
+	if (!(info->flags & SPI_NOR_NO_FR)) {
+		params->hwcaps.mask |= SNOR_HWCAPS_READ_FAST;
+		spi_nor_set_read_settings(&params->reads[SNOR_CMD_READ_FAST],
+					  0, 8, SPINOR_OP_READ_FAST,
+					  SNOR_PROTO_1_1_1);
+	}
+
+	if (info->flags & SPI_NOR_DUAL_READ) {
+		params->hwcaps.mask |= SNOR_HWCAPS_READ_1_1_2;
+		spi_nor_set_read_settings(&params->reads[SNOR_CMD_READ_1_1_2],
+					  0, 8, SPINOR_OP_READ_1_1_2,
+					  SNOR_PROTO_1_1_2);
+	}
+
+	if (info->flags & SPI_NOR_QUAD_READ) {
+		params->hwcaps.mask |= SNOR_HWCAPS_READ_1_1_4;
+		spi_nor_set_read_settings(&params->reads[SNOR_CMD_READ_1_1_4],
+					  0, 8, SPINOR_OP_READ_1_1_4,
+					  SNOR_PROTO_1_1_4);
+	}
+
+	/* Page Program settings. */
+	params->hwcaps.mask |= SNOR_HWCAPS_PP;
+	spi_nor_set_pp_settings(&params->page_programs[SNOR_CMD_PP],
+				SPINOR_OP_PP, SNOR_PROTO_1_1_1);
+
+	/* Select the procedure to set the Quad Enable bit. */
+	if (params->hwcaps.mask & (SNOR_HWCAPS_READ_QUAD |
+				   SNOR_HWCAPS_PP_QUAD)) {
+		switch (JEDEC_MFR(info)) {
+		case SNOR_MFR_MACRONIX:
+			params->quad_enable = macronix_quad_enable;
+			break;
+
+		case SNOR_MFR_MICRON:
+			break;
+
+		default:
+			params->quad_enable = spansion_quad_enable;
+			break;
+		}
+	}
+
+	return 0;
+}
+
+static int spi_nor_hwcaps2cmd(u32 hwcaps, const int table[][2], size_t size)
+{
+	size_t i;
+
+	for (i = 0; i < size; i++)
+		if (table[i][0] == (int)hwcaps)
+			return table[i][1];
+
+	return -EINVAL;
+}
+
+static int spi_nor_hwcaps_read2cmd(u32 hwcaps)
+{
+	static const int hwcaps_read2cmd[][2] = {
+		{ SNOR_HWCAPS_READ,		SNOR_CMD_READ },
+		{ SNOR_HWCAPS_READ_FAST,	SNOR_CMD_READ_FAST },
+		{ SNOR_HWCAPS_READ_1_1_2,	SNOR_CMD_READ_1_1_2 },
+		{ SNOR_HWCAPS_READ_1_2_2,	SNOR_CMD_READ_1_2_2 },
+		{ SNOR_HWCAPS_READ_2_2_2,	SNOR_CMD_READ_2_2_2 },
+		{ SNOR_HWCAPS_READ_1_1_4,	SNOR_CMD_READ_1_1_4 },
+		{ SNOR_HWCAPS_READ_1_4_4,	SNOR_CMD_READ_1_4_4 },
+		{ SNOR_HWCAPS_READ_4_4_4,	SNOR_CMD_READ_4_4_4 },
+	};
+
+	return spi_nor_hwcaps2cmd(hwcaps, hwcaps_read2cmd,
+				  ARRAY_SIZE(hwcaps_read2cmd));
+}
+
+static int spi_nor_hwcaps_pp2cmd(u32 hwcaps)
+{
+	static const int hwcaps_pp2cmd[][2] = {
+		{ SNOR_HWCAPS_PP,		SNOR_CMD_PP },
+		{ SNOR_HWCAPS_PP_1_1_4,		SNOR_CMD_PP_1_1_4 },
+		{ SNOR_HWCAPS_PP_1_4_4,		SNOR_CMD_PP_1_4_4 },
+		{ SNOR_HWCAPS_PP_4_4_4,		SNOR_CMD_PP_4_4_4 },
+	};
+
+	return spi_nor_hwcaps2cmd(hwcaps, hwcaps_pp2cmd,
+				  ARRAY_SIZE(hwcaps_pp2cmd));
+}
+
+static int spi_nor_select_read(struct spi_nor *nor,
+			       const struct spi_nor_flash_parameter *params,
+			       u32 shared_hwcaps)
+{
+	int cmd, best_match = fls(shared_hwcaps & SNOR_HWCAPS_READ_MASK) - 1;
+	const struct spi_nor_read_command *read;
+
+	if (best_match < 0)
+		return -EINVAL;
+
+	cmd = spi_nor_hwcaps_read2cmd(BIT(best_match));
+	if (cmd < 0)
+		return -EINVAL;
+
+	read = &params->reads[cmd];
+	nor->read_opcode = read->opcode;
+	nor->read_proto = read->proto;
+
+	/*
+	 * In the spi-nor framework, we don't need to make the difference
+	 * between mode clock cycles and wait state clock cycles.
+	 * Indeed, the value of the mode clock cycles is used by a QSPI
+	 * flash memory to know whether it should enter or leave its 0-4-4
+	 * (Continuous Read / XIP) mode.
+	 * eXecution In Place is out of the scope of the mtd sub-system.
+	 * Hence we choose to merge both mode and wait state clock cycles
+	 * into the so called dummy clock cycles.
+	 */
+	nor->read_dummy = read->num_mode_clocks + read->num_wait_states;
+	return 0;
+}
+
+static int spi_nor_select_pp(struct spi_nor *nor,
+			     const struct spi_nor_flash_parameter *params,
+			     u32 shared_hwcaps)
+{
+	int cmd, best_match = fls(shared_hwcaps & SNOR_HWCAPS_PP_MASK) - 1;
+	const struct spi_nor_pp_command *pp;
+
+	if (best_match < 0)
+		return -EINVAL;
+
+	cmd = spi_nor_hwcaps_pp2cmd(BIT(best_match));
+	if (cmd < 0)
+		return -EINVAL;
+
+	pp = &params->page_programs[cmd];
+	nor->program_opcode = pp->opcode;
+	nor->write_proto = pp->proto;
+	return 0;
+}
+
+static int spi_nor_select_erase(struct spi_nor *nor,
+				const struct flash_info *info)
+{
+	struct mtd_info *mtd = &nor->mtd;
+
+#ifdef CONFIG_MTD_SPI_NOR_USE_4K_SECTORS
+	/* prefer "small sector" erase if possible */
+	if (info->flags & SECT_4K) {
+		nor->erase_opcode = SPINOR_OP_BE_4K;
+		mtd->erasesize = 4096;
+	} else if (info->flags & SECT_4K_PMC) {
+		nor->erase_opcode = SPINOR_OP_BE_4K_PMC;
+		mtd->erasesize = 4096;
+	} else
+#endif
+	{
+		nor->erase_opcode = SPINOR_OP_SE;
+		mtd->erasesize = info->sector_size;
+	}
+	return 0;
+}
+
+static int spi_nor_setup(struct spi_nor *nor, const struct flash_info *info,
+			 const struct spi_nor_flash_parameter *params,
+			 const struct spi_nor_hwcaps *hwcaps)
+{
+	u32 ignored_mask, shared_mask;
+	bool enable_quad_io;
+	int err;
+
+	/*
+	 * Keep only the hardware capabilities supported by both the SPI
+	 * controller and the SPI flash memory.
+	 */
+	shared_mask = hwcaps->mask & params->hwcaps.mask;
+
+	/* SPI n-n-n protocols are not supported yet. */
+	ignored_mask = (SNOR_HWCAPS_READ_2_2_2 |
+			SNOR_HWCAPS_READ_4_4_4 |
+			SNOR_HWCAPS_PP_4_4_4);
+	if (shared_mask & ignored_mask) {
+		dev_dbg(nor->dev,
+			"SPI n-n-n protocols are not supported yet.\n");
+		shared_mask &= ~ignored_mask;
+	}
+
+	/* Select the (Fast) Read command. */
+	err = spi_nor_select_read(nor, params, shared_mask);
+	if (err) {
+		dev_err(nor->dev,
+			"can't select read settings supported by both the SPI controller and memory.\n");
+		return err;
+	}
+
+	/* Select the Page Program command. */
+	err = spi_nor_select_pp(nor, params, shared_mask);
+	if (err) {
+		dev_err(nor->dev,
+			"can't select write settings supported by both the SPI controller and memory.\n");
+		return err;
+	}
+
+	/* Select the Sector Erase command. */
+	err = spi_nor_select_erase(nor, info);
+	if (err) {
+		dev_err(nor->dev,
+			"can't select erase settings supported by both the SPI controller and memory.\n");
+		return err;
+	}
+
+	/* Enable Quad I/O if needed. */
+	enable_quad_io = (spi_nor_get_protocol_width(nor->read_proto) == 4 ||
+			  spi_nor_get_protocol_width(nor->write_proto) == 4);
+	if (enable_quad_io && params->quad_enable) {
+		err = params->quad_enable(nor);
+		if (err) {
+			dev_err(nor->dev, "quad mode not supported\n");
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+int spi_nor_scan(struct spi_nor *nor, const char *name,
+		 const struct spi_nor_hwcaps *hwcaps)
 {
+	struct spi_nor_flash_parameter params;
 	const struct flash_info *info = NULL;
 	struct device *dev = nor->dev;
 	struct mtd_info *mtd = &nor->mtd;
@@ -1549,6 +1822,11 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode)
 	if (ret)
 		return ret;
 
+	/* Reset SPI protocol for all commands. */
+	nor->reg_proto = SNOR_PROTO_1_1_1;
+	nor->read_proto = SNOR_PROTO_1_1_1;
+	nor->write_proto = SNOR_PROTO_1_1_1;
+
 	if (name)
 		info = spi_nor_match_id(name);
 	/* Try to auto-detect if chip name wasn't specified or not found */
@@ -1591,6 +1869,11 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode)
 	if (info->flags & SPI_S3AN)
 		nor->flags |=  SNOR_F_READY_XSR_RDY;
 
+	/* Parse the Serial Flash Discoverable Parameters table. */
+	ret = spi_nor_init_params(nor, info, &params);
+	if (ret)
+		return ret;
+
 	/*
 	 * Atmel, SST, Intel/Numonyx, and others serial NOR tend to power up
 	 * with the software protection bits set
@@ -1611,7 +1894,7 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode)
 	mtd->type = MTD_NORFLASH;
 	mtd->writesize = 1;
 	mtd->flags = MTD_CAP_NORFLASH;
-	mtd->size = info->sector_size * info->n_sectors;
+	mtd->size = params.size;
 	mtd->_erase = spi_nor_erase;
 	mtd->_read = spi_nor_read;
 
@@ -1642,75 +1925,38 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode)
 	if (info->flags & NO_CHIP_ERASE)
 		nor->flags |= SNOR_F_NO_OP_CHIP_ERASE;
 
-#ifdef CONFIG_MTD_SPI_NOR_USE_4K_SECTORS
-	/* prefer "small sector" erase if possible */
-	if (info->flags & SECT_4K) {
-		nor->erase_opcode = SPINOR_OP_BE_4K;
-		mtd->erasesize = 4096;
-	} else if (info->flags & SECT_4K_PMC) {
-		nor->erase_opcode = SPINOR_OP_BE_4K_PMC;
-		mtd->erasesize = 4096;
-	} else
-#endif
-	{
-		nor->erase_opcode = SPINOR_OP_SE;
-		mtd->erasesize = info->sector_size;
-	}
-
 	if (info->flags & SPI_NOR_NO_ERASE)
 		mtd->flags |= MTD_NO_ERASE;
 
 	mtd->dev.parent = dev;
-	nor->page_size = info->page_size;
+	nor->page_size = params.page_size;
 	mtd->writebufsize = nor->page_size;
 
 	if (np) {
 		/* If we were instantiated by DT, use it */
 		if (of_property_read_bool(np, "m25p,fast-read"))
-			nor->flash_read = SPI_NOR_FAST;
+			params.hwcaps.mask |= SNOR_HWCAPS_READ_FAST;
 		else
-			nor->flash_read = SPI_NOR_NORMAL;
+			params.hwcaps.mask &= ~SNOR_HWCAPS_READ_FAST;
 	} else {
 		/* If we weren't instantiated by DT, default to fast-read */
-		nor->flash_read = SPI_NOR_FAST;
+		params.hwcaps.mask |= SNOR_HWCAPS_READ_FAST;
 	}
 
 	/* Some devices cannot do fast-read, no matter what DT tells us */
 	if (info->flags & SPI_NOR_NO_FR)
-		nor->flash_read = SPI_NOR_NORMAL;
-
-	/* Quad/Dual-read mode takes precedence over fast/normal */
-	if (mode == SPI_NOR_QUAD && info->flags & SPI_NOR_QUAD_READ) {
-		ret = set_quad_mode(nor, info);
-		if (ret) {
-			dev_err(dev, "quad mode not supported\n");
-			return ret;
-		}
-		nor->flash_read = SPI_NOR_QUAD;
-	} else if (mode == SPI_NOR_DUAL && info->flags & SPI_NOR_DUAL_READ) {
-		nor->flash_read = SPI_NOR_DUAL;
-	}
-
-	/* Default commands */
-	switch (nor->flash_read) {
-	case SPI_NOR_QUAD:
-		nor->read_opcode = SPINOR_OP_READ_1_1_4;
-		break;
-	case SPI_NOR_DUAL:
-		nor->read_opcode = SPINOR_OP_READ_1_1_2;
-		break;
-	case SPI_NOR_FAST:
-		nor->read_opcode = SPINOR_OP_READ_FAST;
-		break;
-	case SPI_NOR_NORMAL:
-		nor->read_opcode = SPINOR_OP_READ;
-		break;
-	default:
-		dev_err(dev, "No Read opcode defined\n");
-		return -EINVAL;
-	}
+		params.hwcaps.mask &= ~SNOR_HWCAPS_READ_FAST;
 
-	nor->program_opcode = SPINOR_OP_PP;
+	/*
+	 * Configure the SPI memory:
+	 * - select op codes for (Fast) Read, Page Program and Sector Erase.
+	 * - set the number of dummy cycles (mode cycles + wait states).
+	 * - set the SPI protocols for register and memory accesses.
+	 * - set the Quad Enable bit if needed (required by SPI x-y-4 protos).
+	 */
+	ret = spi_nor_setup(nor, info, &params, hwcaps);
+	if (ret)
+		return ret;
 
 	if (info->addr_width)
 		nor->addr_width = info->addr_width;
@@ -1732,8 +1978,6 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode)
 		return -EINVAL;
 	}
 
-	nor->read_dummy = spi_nor_read_dummy_cycles(nor);
-
 	if (info->flags & SPI_S3AN) {
 		ret = s3an_nor_scan(info, nor);
 		if (ret)
diff --git a/drivers/mtd/spi-nor/stm32-quadspi.c b/drivers/mtd/spi-nor/stm32-quadspi.c
index ae45f81b8cd3..1056e7408d2a 100644
--- a/drivers/mtd/spi-nor/stm32-quadspi.c
+++ b/drivers/mtd/spi-nor/stm32-quadspi.c
@@ -192,15 +192,15 @@ static void stm32_qspi_set_framemode(struct spi_nor *nor,
 	cmd->framemode = CCR_IMODE_1;
 
 	if (read) {
-		switch (nor->flash_read) {
-		case SPI_NOR_NORMAL:
-		case SPI_NOR_FAST:
+		switch (nor->read_proto) {
+		default:
+		case SNOR_PROTO_1_1_1:
 			dmode = CCR_DMODE_1;
 			break;
-		case SPI_NOR_DUAL:
+		case SNOR_PROTO_1_1_2:
 			dmode = CCR_DMODE_2;
 			break;
-		case SPI_NOR_QUAD:
+		case SNOR_PROTO_1_1_4:
 			dmode = CCR_DMODE_4;
 			break;
 		}
@@ -480,7 +480,12 @@ static void stm32_qspi_unprep(struct spi_nor *nor, enum spi_nor_ops ops)
 static int stm32_qspi_flash_setup(struct stm32_qspi *qspi,
 				  struct device_node *np)
 {
-	u32 width, flash_read, presc, cs_num, max_rate = 0;
+	struct spi_nor_hwcaps hwcaps = {
+		.mask = SNOR_HWCAPS_READ |
+			SNOR_HWCAPS_READ_FAST |
+			SNOR_HWCAPS_PP,
+	};
+	u32 width, presc, cs_num, max_rate = 0;
 	struct stm32_qspi_flash *flash;
 	struct mtd_info *mtd;
 	int ret;
@@ -499,12 +504,10 @@ static int stm32_qspi_flash_setup(struct stm32_qspi *qspi,
 		width = 1;
 
 	if (width == 4)
-		flash_read = SPI_NOR_QUAD;
+		hwcaps.mask |= SNOR_HWCAPS_READ_1_1_4;
 	else if (width == 2)
-		flash_read = SPI_NOR_DUAL;
-	else if (width == 1)
-		flash_read = SPI_NOR_NORMAL;
-	else
+		hwcaps.mask |= SNOR_HWCAPS_READ_1_1_2;
+	else if (width != 1)
 		return -EINVAL;
 
 	flash = &qspi->flash[cs_num];
@@ -539,7 +542,7 @@ static int stm32_qspi_flash_setup(struct stm32_qspi *qspi,
 	 */
 	flash->fsize = FSIZE_VAL(SZ_1K);
 
-	ret = spi_nor_scan(&flash->nor, NULL, flash_read);
+	ret = spi_nor_scan(&flash->nor, NULL, &hwcaps);
 	if (ret) {
 		dev_err(qspi->dev, "device scan failed\n");
 		return ret;
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index f2a718030476..60db1585f94c 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -119,13 +119,63 @@
 /* Configuration Register bits. */
 #define CR_QUAD_EN_SPAN		BIT(1)	/* Spansion Quad I/O */
 
-enum read_mode {
-	SPI_NOR_NORMAL = 0,
-	SPI_NOR_FAST,
-	SPI_NOR_DUAL,
-	SPI_NOR_QUAD,
+/* Supported SPI protocols */
+#define SNOR_PROTO_INST_MASK	GENMASK(23, 16)
+#define SNOR_PROTO_INST_SHIFT	16
+#define SNOR_PROTO_INST(_nbits)	\
+	((((unsigned long)(_nbits)) << SNOR_PROTO_INST_SHIFT) & \
+	 SNOR_PROTO_INST_MASK)
+
+#define SNOR_PROTO_ADDR_MASK	GENMASK(15, 8)
+#define SNOR_PROTO_ADDR_SHIFT	8
+#define SNOR_PROTO_ADDR(_nbits)	\
+	((((unsigned long)(_nbits)) << SNOR_PROTO_ADDR_SHIFT) & \
+	 SNOR_PROTO_ADDR_MASK)
+
+#define SNOR_PROTO_DATA_MASK	GENMASK(7, 0)
+#define SNOR_PROTO_DATA_SHIFT	0
+#define SNOR_PROTO_DATA(_nbits)	\
+	((((unsigned long)(_nbits)) << SNOR_PROTO_DATA_SHIFT) & \
+	 SNOR_PROTO_DATA_MASK)
+
+#define SNOR_PROTO_STR(_inst_nbits, _addr_nbits, _data_nbits)	\
+	(SNOR_PROTO_INST(_inst_nbits) |				\
+	 SNOR_PROTO_ADDR(_addr_nbits) |				\
+	 SNOR_PROTO_DATA(_data_nbits))
+
+enum spi_nor_protocol {
+	SNOR_PROTO_1_1_1 = SNOR_PROTO_STR(1, 1, 1),
+	SNOR_PROTO_1_1_2 = SNOR_PROTO_STR(1, 1, 2),
+	SNOR_PROTO_1_1_4 = SNOR_PROTO_STR(1, 1, 4),
+	SNOR_PROTO_1_2_2 = SNOR_PROTO_STR(1, 2, 2),
+	SNOR_PROTO_1_4_4 = SNOR_PROTO_STR(1, 4, 4),
+	SNOR_PROTO_2_2_2 = SNOR_PROTO_STR(2, 2, 2),
+	SNOR_PROTO_4_4_4 = SNOR_PROTO_STR(4, 4, 4),
 };
 
+static inline u8 spi_nor_get_protocol_inst_nbits(enum spi_nor_protocol proto)
+{
+	return ((unsigned long)(proto & SNOR_PROTO_INST_MASK)) >>
+		SNOR_PROTO_INST_SHIFT;
+}
+
+static inline u8 spi_nor_get_protocol_addr_nbits(enum spi_nor_protocol proto)
+{
+	return ((unsigned long)(proto & SNOR_PROTO_ADDR_MASK)) >>
+		SNOR_PROTO_ADDR_SHIFT;
+}
+
+static inline u8 spi_nor_get_protocol_data_nbits(enum spi_nor_protocol proto)
+{
+	return ((unsigned long)(proto & SNOR_PROTO_DATA_MASK)) >>
+		SNOR_PROTO_DATA_SHIFT;
+}
+
+static inline u8 spi_nor_get_protocol_width(enum spi_nor_protocol proto)
+{
+	return spi_nor_get_protocol_data_nbits(proto);
+}
+
 #define SPI_NOR_MAX_CMD_SIZE	8
 enum spi_nor_ops {
 	SPI_NOR_OPS_READ = 0,
@@ -154,9 +204,11 @@ enum spi_nor_option_flags {
  * @read_opcode:	the read opcode
  * @read_dummy:		the dummy needed by the read operation
  * @program_opcode:	the program opcode
- * @flash_read:		the mode of the read
  * @sst_write_second:	used by the SST write operation
  * @flags:		flag options for the current SPI-NOR (SNOR_F_*)
+ * @read_proto:		the SPI protocol for read operations
+ * @write_proto:	the SPI protocol for write operations
+ * @reg_proto		the SPI protocol for read_reg/write_reg/erase operations
  * @cmd_buf:		used by the write_reg
  * @prepare:		[OPTIONAL] do some preparations for the
  *			read/write/erase/lock/unlock operations
@@ -185,7 +237,9 @@ struct spi_nor {
 	u8			read_opcode;
 	u8			read_dummy;
 	u8			program_opcode;
-	enum read_mode		flash_read;
+	enum spi_nor_protocol	read_proto;
+	enum spi_nor_protocol	write_proto;
+	enum spi_nor_protocol	reg_proto;
 	bool			sst_write_second;
 	u32			flags;
 	u8			cmd_buf[SPI_NOR_MAX_CMD_SIZE];
@@ -219,11 +273,57 @@ static inline struct device_node *spi_nor_get_flash_node(struct spi_nor *nor)
 	return mtd_get_of_node(&nor->mtd);
 }
 
+/**
+ * struct spi_nor_hwcaps - Structure for describing the hardware capabilies
+ * supported by the SPI controller (bus master).
+ * @mask:		the bitmask listing all the supported hw capabilies
+ */
+struct spi_nor_hwcaps {
+	u32	mask;
+};
+
+/*
+ *(Fast) Read capabilities.
+ * MUST be ordered by priority: the higher bit position, the higher priority.
+ * As a matter of performances, it is relevant to use Quad SPI protocols first,
+ * then Dual SPI protocols before Fast Read and lastly (Slow) Read.
+ */
+#define SNOR_HWCAPS_READ_MASK		GENMASK(7, 0)
+#define SNOR_HWCAPS_READ		BIT(0)
+#define SNOR_HWCAPS_READ_FAST		BIT(1)
+
+#define SNOR_HWCAPS_READ_DUAL		GENMASK(4, 2)
+#define SNOR_HWCAPS_READ_1_1_2		BIT(2)
+#define SNOR_HWCAPS_READ_1_2_2		BIT(3)
+#define SNOR_HWCAPS_READ_2_2_2		BIT(4)
+
+#define SNOR_HWCAPS_READ_QUAD		GENMASK(7, 5)
+#define SNOR_HWCAPS_READ_1_1_4		BIT(5)
+#define SNOR_HWCAPS_READ_1_4_4		BIT(6)
+#define SNOR_HWCAPS_READ_4_4_4		BIT(7)
+
+/*
+ * Page Program capabilities.
+ * MUST be ordered by priority: the higher bit position, the higher priority.
+ * Like (Fast) Read capabilities, Quad SPI protocols are preferred to the
+ * legacy SPI 1-1-1 protocol.
+ * Note that Dual Page Programs are not supported because there is no existing
+ * JEDEC/SFDP standard to define them. Also at this moment no SPI flash memory
+ * implements such commands.
+ */
+#define SNOR_HWCAPS_PP_MASK	GENMASK(19, 16)
+#define SNOR_HWCAPS_PP		BIT(16)
+
+#define SNOR_HWCAPS_PP_QUAD	GENMASK(19, 17)
+#define SNOR_HWCAPS_PP_1_1_4	BIT(17)
+#define SNOR_HWCAPS_PP_1_4_4	BIT(18)
+#define SNOR_HWCAPS_PP_4_4_4	BIT(19)
+
 /**
  * spi_nor_scan() - scan the SPI NOR
  * @nor:	the spi_nor structure
  * @name:	the chip type name
- * @mode:	the read mode supported by the driver
+ * @hwcaps:	the hardware capabilities supported by the controller driver
  *
  * The drivers can use this fuction to scan the SPI NOR.
  * In the scanning, it will try to get all the necessary information to
@@ -233,6 +333,7 @@ static inline struct device_node *spi_nor_get_flash_node(struct spi_nor *nor)
  *
  * Return: 0 for success, others for failure.
  */
-int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode);
+int spi_nor_scan(struct spi_nor *nor, const char *name,
+		 const struct spi_nor_hwcaps *hwcaps);
 
 #endif
-- 
cgit v1.2.3


From 15f55331527b1422eae683477f8a31fdfae93316 Mon Sep 17 00:00:00 2001
From: Cyrille Pitchen <cyrille.pitchen@atmel.com>
Date: Tue, 25 Apr 2017 22:08:48 +0200
Subject: mtd: spi-nor: introduce Double Transfer Rate (DTR) SPI protocols

This patch introduces support to Double Transfer Rate (DTR) SPI protocols.
DTR is used only for Fast Read operations.

According to manufacturer datasheets, whatever the number of I/O lines
used during instruction (x) and address/mode/dummy (y) clock cycles, DTR
is used only during data (z) clock cycles of SPI x-y-z protocols.

Signed-off-by: Cyrille Pitchen <cyrille.pitchen@atmel.com>
Reviewed-by: Marek Vasut <marek.vasut@gmail.com>
---
 drivers/mtd/spi-nor/spi-nor.c | 10 +++++++++
 include/linux/mtd/spi-nor.h   | 48 +++++++++++++++++++++++++++++++++----------
 2 files changed, 47 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index e653806070a1..2062a3abba72 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -203,6 +203,10 @@ static inline u8 spi_nor_convert_3to4_read(u8 opcode)
 		{ SPINOR_OP_READ_1_2_2,	SPINOR_OP_READ_1_2_2_4B },
 		{ SPINOR_OP_READ_1_1_4,	SPINOR_OP_READ_1_1_4_4B },
 		{ SPINOR_OP_READ_1_4_4,	SPINOR_OP_READ_1_4_4_4B },
+
+		{ SPINOR_OP_READ_1_1_1_DTR,	SPINOR_OP_READ_1_1_1_DTR_4B },
+		{ SPINOR_OP_READ_1_2_2_DTR,	SPINOR_OP_READ_1_2_2_DTR_4B },
+		{ SPINOR_OP_READ_1_4_4_DTR,	SPINOR_OP_READ_1_4_4_DTR_4B },
 	};
 
 	return spi_nor_convert_opcode(opcode, spi_nor_3to4_read,
@@ -1509,16 +1513,19 @@ struct spi_nor_pp_command {
 enum spi_nor_read_command_index {
 	SNOR_CMD_READ,
 	SNOR_CMD_READ_FAST,
+	SNOR_CMD_READ_1_1_1_DTR,
 
 	/* Dual SPI */
 	SNOR_CMD_READ_1_1_2,
 	SNOR_CMD_READ_1_2_2,
 	SNOR_CMD_READ_2_2_2,
+	SNOR_CMD_READ_1_2_2_DTR,
 
 	/* Quad SPI */
 	SNOR_CMD_READ_1_1_4,
 	SNOR_CMD_READ_1_4_4,
 	SNOR_CMD_READ_4_4_4,
+	SNOR_CMD_READ_1_4_4_DTR,
 
 	SNOR_CMD_READ_MAX
 };
@@ -1646,12 +1653,15 @@ static int spi_nor_hwcaps_read2cmd(u32 hwcaps)
 	static const int hwcaps_read2cmd[][2] = {
 		{ SNOR_HWCAPS_READ,		SNOR_CMD_READ },
 		{ SNOR_HWCAPS_READ_FAST,	SNOR_CMD_READ_FAST },
+		{ SNOR_HWCAPS_READ_1_1_1_DTR,	SNOR_CMD_READ_1_1_1_DTR },
 		{ SNOR_HWCAPS_READ_1_1_2,	SNOR_CMD_READ_1_1_2 },
 		{ SNOR_HWCAPS_READ_1_2_2,	SNOR_CMD_READ_1_2_2 },
 		{ SNOR_HWCAPS_READ_2_2_2,	SNOR_CMD_READ_2_2_2 },
+		{ SNOR_HWCAPS_READ_1_2_2_DTR,	SNOR_CMD_READ_1_2_2_DTR },
 		{ SNOR_HWCAPS_READ_1_1_4,	SNOR_CMD_READ_1_1_4 },
 		{ SNOR_HWCAPS_READ_1_4_4,	SNOR_CMD_READ_1_4_4 },
 		{ SNOR_HWCAPS_READ_4_4_4,	SNOR_CMD_READ_4_4_4 },
+		{ SNOR_HWCAPS_READ_1_4_4_DTR,	SNOR_CMD_READ_1_4_4_DTR },
 	};
 
 	return spi_nor_hwcaps2cmd(hwcaps, hwcaps_read2cmd,
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index 60db1585f94c..313dbe56f31a 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -73,6 +73,15 @@
 #define SPINOR_OP_BE_32K_4B	0x5c	/* Erase 32KiB block */
 #define SPINOR_OP_SE_4B		0xdc	/* Sector erase (usually 64KiB) */
 
+/* Double Transfer Rate opcodes - defined in JEDEC JESD216B. */
+#define SPINOR_OP_READ_1_1_1_DTR	0x0d
+#define SPINOR_OP_READ_1_2_2_DTR	0xbd
+#define SPINOR_OP_READ_1_4_4_DTR	0xed
+
+#define SPINOR_OP_READ_1_1_1_DTR_4B	0x0e
+#define SPINOR_OP_READ_1_2_2_DTR_4B	0xbe
+#define SPINOR_OP_READ_1_4_4_DTR_4B	0xee
+
 /* Used for SST flashes only. */
 #define SPINOR_OP_BP		0x02	/* Byte program */
 #define SPINOR_OP_WRDI		0x04	/* Write disable */
@@ -138,10 +147,15 @@
 	((((unsigned long)(_nbits)) << SNOR_PROTO_DATA_SHIFT) & \
 	 SNOR_PROTO_DATA_MASK)
 
+#define SNOR_PROTO_IS_DTR	BIT(24)	/* Double Transfer Rate */
+
 #define SNOR_PROTO_STR(_inst_nbits, _addr_nbits, _data_nbits)	\
 	(SNOR_PROTO_INST(_inst_nbits) |				\
 	 SNOR_PROTO_ADDR(_addr_nbits) |				\
 	 SNOR_PROTO_DATA(_data_nbits))
+#define SNOR_PROTO_DTR(_inst_nbits, _addr_nbits, _data_nbits)	\
+	(SNOR_PROTO_IS_DTR |					\
+	 SNOR_PROTO_STR(_inst_nbits, _addr_nbits, _data_nbits))
 
 enum spi_nor_protocol {
 	SNOR_PROTO_1_1_1 = SNOR_PROTO_STR(1, 1, 1),
@@ -151,8 +165,17 @@ enum spi_nor_protocol {
 	SNOR_PROTO_1_4_4 = SNOR_PROTO_STR(1, 4, 4),
 	SNOR_PROTO_2_2_2 = SNOR_PROTO_STR(2, 2, 2),
 	SNOR_PROTO_4_4_4 = SNOR_PROTO_STR(4, 4, 4),
+
+	SNOR_PROTO_1_1_1_DTR = SNOR_PROTO_DTR(1, 1, 1),
+	SNOR_PROTO_1_2_2_DTR = SNOR_PROTO_DTR(1, 2, 2),
+	SNOR_PROTO_1_4_4_DTR = SNOR_PROTO_DTR(1, 4, 4),
 };
 
+static inline bool spi_nor_protocol_is_dtr(enum spi_nor_protocol proto)
+{
+	return !!(proto & SNOR_PROTO_IS_DTR);
+}
+
 static inline u8 spi_nor_get_protocol_inst_nbits(enum spi_nor_protocol proto)
 {
 	return ((unsigned long)(proto & SNOR_PROTO_INST_MASK)) >>
@@ -288,19 +311,22 @@ struct spi_nor_hwcaps {
  * As a matter of performances, it is relevant to use Quad SPI protocols first,
  * then Dual SPI protocols before Fast Read and lastly (Slow) Read.
  */
-#define SNOR_HWCAPS_READ_MASK		GENMASK(7, 0)
+#define SNOR_HWCAPS_READ_MASK		GENMASK(10, 0)
 #define SNOR_HWCAPS_READ		BIT(0)
 #define SNOR_HWCAPS_READ_FAST		BIT(1)
-
-#define SNOR_HWCAPS_READ_DUAL		GENMASK(4, 2)
-#define SNOR_HWCAPS_READ_1_1_2		BIT(2)
-#define SNOR_HWCAPS_READ_1_2_2		BIT(3)
-#define SNOR_HWCAPS_READ_2_2_2		BIT(4)
-
-#define SNOR_HWCAPS_READ_QUAD		GENMASK(7, 5)
-#define SNOR_HWCAPS_READ_1_1_4		BIT(5)
-#define SNOR_HWCAPS_READ_1_4_4		BIT(6)
-#define SNOR_HWCAPS_READ_4_4_4		BIT(7)
+#define SNOR_HWCAPS_READ_1_1_1_DTR	BIT(2)
+
+#define SNOR_HWCAPS_READ_DUAL		GENMASK(6, 3)
+#define SNOR_HWCAPS_READ_1_1_2		BIT(3)
+#define SNOR_HWCAPS_READ_1_2_2		BIT(4)
+#define SNOR_HWCAPS_READ_2_2_2		BIT(5)
+#define SNOR_HWCAPS_READ_1_2_2_DTR	BIT(6)
+
+#define SNOR_HWCAPS_READ_QUAD		GENMASK(10, 7)
+#define SNOR_HWCAPS_READ_1_1_4		BIT(7)
+#define SNOR_HWCAPS_READ_1_4_4		BIT(8)
+#define SNOR_HWCAPS_READ_4_4_4		BIT(9)
+#define SNOR_HWCAPS_READ_1_4_4_DTR	BIT(10)
 
 /*
  * Page Program capabilities.
-- 
cgit v1.2.3


From fe488a5e48c69204c3b1ad6fa3282e12dbfaabe7 Mon Sep 17 00:00:00 2001
From: Cyrille Pitchen <cyrille.pitchen@atmel.com>
Date: Tue, 25 Apr 2017 22:08:49 +0200
Subject: mtd: spi-nor: introduce Octo SPI protocols

This patch starts adding support to Octo SPI protocols (SPI x-y-8).

Op codes for Fast Read and/or Page Program operations using Octo SPI
protocols are not known yet (no JEDEC specification has defined them yet)
but we'd rather introduce the Octo SPI protocols now so it's done as it
should be.

Signed-off-by: Cyrille Pitchen <cyrille.pitchen@atmel.com>
Reviewed-by: Marek Vasut <marek.vasut@gmail.com>
---
 drivers/mtd/spi-nor/spi-nor.c | 22 +++++++++++++++++++++-
 include/linux/mtd/spi-nor.h   | 26 +++++++++++++++++++++-----
 2 files changed, 42 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index 2062a3abba72..060a59e716be 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -1527,6 +1527,12 @@ enum spi_nor_read_command_index {
 	SNOR_CMD_READ_4_4_4,
 	SNOR_CMD_READ_1_4_4_DTR,
 
+	/* Octo SPI */
+	SNOR_CMD_READ_1_1_8,
+	SNOR_CMD_READ_1_8_8,
+	SNOR_CMD_READ_8_8_8,
+	SNOR_CMD_READ_1_8_8_DTR,
+
 	SNOR_CMD_READ_MAX
 };
 
@@ -1538,6 +1544,11 @@ enum spi_nor_pp_command_index {
 	SNOR_CMD_PP_1_4_4,
 	SNOR_CMD_PP_4_4_4,
 
+	/* Octo SPI */
+	SNOR_CMD_PP_1_1_8,
+	SNOR_CMD_PP_1_8_8,
+	SNOR_CMD_PP_8_8_8,
+
 	SNOR_CMD_PP_MAX
 };
 
@@ -1662,6 +1673,10 @@ static int spi_nor_hwcaps_read2cmd(u32 hwcaps)
 		{ SNOR_HWCAPS_READ_1_4_4,	SNOR_CMD_READ_1_4_4 },
 		{ SNOR_HWCAPS_READ_4_4_4,	SNOR_CMD_READ_4_4_4 },
 		{ SNOR_HWCAPS_READ_1_4_4_DTR,	SNOR_CMD_READ_1_4_4_DTR },
+		{ SNOR_HWCAPS_READ_1_1_8,	SNOR_CMD_READ_1_1_8 },
+		{ SNOR_HWCAPS_READ_1_8_8,	SNOR_CMD_READ_1_8_8 },
+		{ SNOR_HWCAPS_READ_8_8_8,	SNOR_CMD_READ_8_8_8 },
+		{ SNOR_HWCAPS_READ_1_8_8_DTR,	SNOR_CMD_READ_1_8_8_DTR },
 	};
 
 	return spi_nor_hwcaps2cmd(hwcaps, hwcaps_read2cmd,
@@ -1675,6 +1690,9 @@ static int spi_nor_hwcaps_pp2cmd(u32 hwcaps)
 		{ SNOR_HWCAPS_PP_1_1_4,		SNOR_CMD_PP_1_1_4 },
 		{ SNOR_HWCAPS_PP_1_4_4,		SNOR_CMD_PP_1_4_4 },
 		{ SNOR_HWCAPS_PP_4_4_4,		SNOR_CMD_PP_4_4_4 },
+		{ SNOR_HWCAPS_PP_1_1_8,		SNOR_CMD_PP_1_1_8 },
+		{ SNOR_HWCAPS_PP_1_8_8,		SNOR_CMD_PP_1_8_8 },
+		{ SNOR_HWCAPS_PP_8_8_8,		SNOR_CMD_PP_8_8_8 },
 	};
 
 	return spi_nor_hwcaps2cmd(hwcaps, hwcaps_pp2cmd,
@@ -1772,7 +1790,9 @@ static int spi_nor_setup(struct spi_nor *nor, const struct flash_info *info,
 	/* SPI n-n-n protocols are not supported yet. */
 	ignored_mask = (SNOR_HWCAPS_READ_2_2_2 |
 			SNOR_HWCAPS_READ_4_4_4 |
-			SNOR_HWCAPS_PP_4_4_4);
+			SNOR_HWCAPS_READ_8_8_8 |
+			SNOR_HWCAPS_PP_4_4_4 |
+			SNOR_HWCAPS_PP_8_8_8);
 	if (shared_mask & ignored_mask) {
 		dev_dbg(nor->dev,
 			"SPI n-n-n protocols are not supported yet.\n");
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index 313dbe56f31a..55faa2f07cca 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -161,14 +161,18 @@ enum spi_nor_protocol {
 	SNOR_PROTO_1_1_1 = SNOR_PROTO_STR(1, 1, 1),
 	SNOR_PROTO_1_1_2 = SNOR_PROTO_STR(1, 1, 2),
 	SNOR_PROTO_1_1_4 = SNOR_PROTO_STR(1, 1, 4),
+	SNOR_PROTO_1_1_8 = SNOR_PROTO_STR(1, 1, 8),
 	SNOR_PROTO_1_2_2 = SNOR_PROTO_STR(1, 2, 2),
 	SNOR_PROTO_1_4_4 = SNOR_PROTO_STR(1, 4, 4),
+	SNOR_PROTO_1_8_8 = SNOR_PROTO_STR(1, 8, 8),
 	SNOR_PROTO_2_2_2 = SNOR_PROTO_STR(2, 2, 2),
 	SNOR_PROTO_4_4_4 = SNOR_PROTO_STR(4, 4, 4),
+	SNOR_PROTO_8_8_8 = SNOR_PROTO_STR(8, 8, 8),
 
 	SNOR_PROTO_1_1_1_DTR = SNOR_PROTO_DTR(1, 1, 1),
 	SNOR_PROTO_1_2_2_DTR = SNOR_PROTO_DTR(1, 2, 2),
 	SNOR_PROTO_1_4_4_DTR = SNOR_PROTO_DTR(1, 4, 4),
+	SNOR_PROTO_1_8_8_DTR = SNOR_PROTO_DTR(1, 8, 8),
 };
 
 static inline bool spi_nor_protocol_is_dtr(enum spi_nor_protocol proto)
@@ -308,10 +312,11 @@ struct spi_nor_hwcaps {
 /*
  *(Fast) Read capabilities.
  * MUST be ordered by priority: the higher bit position, the higher priority.
- * As a matter of performances, it is relevant to use Quad SPI protocols first,
- * then Dual SPI protocols before Fast Read and lastly (Slow) Read.
+ * As a matter of performances, it is relevant to use Octo SPI protocols first,
+ * then Quad SPI protocols before Dual SPI protocols, Fast Read and lastly
+ * (Slow) Read.
  */
-#define SNOR_HWCAPS_READ_MASK		GENMASK(10, 0)
+#define SNOR_HWCAPS_READ_MASK		GENMASK(14, 0)
 #define SNOR_HWCAPS_READ		BIT(0)
 #define SNOR_HWCAPS_READ_FAST		BIT(1)
 #define SNOR_HWCAPS_READ_1_1_1_DTR	BIT(2)
@@ -328,16 +333,22 @@ struct spi_nor_hwcaps {
 #define SNOR_HWCAPS_READ_4_4_4		BIT(9)
 #define SNOR_HWCAPS_READ_1_4_4_DTR	BIT(10)
 
+#define SNOR_HWCPAS_READ_OCTO		GENMASK(14, 11)
+#define SNOR_HWCAPS_READ_1_1_8		BIT(11)
+#define SNOR_HWCAPS_READ_1_8_8		BIT(12)
+#define SNOR_HWCAPS_READ_8_8_8		BIT(13)
+#define SNOR_HWCAPS_READ_1_8_8_DTR	BIT(14)
+
 /*
  * Page Program capabilities.
  * MUST be ordered by priority: the higher bit position, the higher priority.
- * Like (Fast) Read capabilities, Quad SPI protocols are preferred to the
+ * Like (Fast) Read capabilities, Octo/Quad SPI protocols are preferred to the
  * legacy SPI 1-1-1 protocol.
  * Note that Dual Page Programs are not supported because there is no existing
  * JEDEC/SFDP standard to define them. Also at this moment no SPI flash memory
  * implements such commands.
  */
-#define SNOR_HWCAPS_PP_MASK	GENMASK(19, 16)
+#define SNOR_HWCAPS_PP_MASK	GENMASK(22, 16)
 #define SNOR_HWCAPS_PP		BIT(16)
 
 #define SNOR_HWCAPS_PP_QUAD	GENMASK(19, 17)
@@ -345,6 +356,11 @@ struct spi_nor_hwcaps {
 #define SNOR_HWCAPS_PP_1_4_4	BIT(18)
 #define SNOR_HWCAPS_PP_4_4_4	BIT(19)
 
+#define SNOR_HWCAPS_PP_OCTO	GENMASK(22, 20)
+#define SNOR_HWCAPS_PP_1_1_8	BIT(20)
+#define SNOR_HWCAPS_PP_1_8_8	BIT(21)
+#define SNOR_HWCAPS_PP_8_8_8	BIT(22)
+
 /**
  * spi_nor_scan() - scan the SPI NOR
  * @nor:	the spi_nor structure
-- 
cgit v1.2.3


From b5dceda1f7ef66cba6f8d766502f242a27f96e6d Mon Sep 17 00:00:00 2001
From: Anup Patel <anup.patel@broadcom.com>
Date: Mon, 15 May 2017 10:34:52 +0530
Subject: lib/raid6: Add log-of-2 table for RAID6 HW requiring disk position

The raid6_gfexp table represents {2}^n values for 0 <= n < 256. The
Linux async_tx framework pass values from raid6_gfexp as coefficients
for each source to prep_dma_pq() callback of DMA channel with PQ
capability. This creates problem for RAID6 offload engines (such as
Broadcom SBA) which take disk position (i.e. log of {2}) instead of
multiplicative cofficients from raid6_gfexp table.

This patch adds raid6_gflog table having log-of-2 value for any given
x such that 0 <= x < 256. For any given disk coefficient x, the
corresponding disk position is given by raid6_gflog[x]. The RAID6
offload engine driver can use this newly added raid6_gflog table to
get disk position from multiplicative coefficient.

Signed-off-by: Anup Patel <anup.patel@broadcom.com>
Reviewed-by: Scott Branden <scott.branden@broadcom.com>
Reviewed-by: Ray Jui <ray.jui@broadcom.com>
Acked-by: Shaohua Li <shli@fb.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 include/linux/raid/pq.h |  1 +
 lib/raid6/mktables.c    | 20 ++++++++++++++++++++
 2 files changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h
index 4d57bbaaa1bf..30f945329818 100644
--- a/include/linux/raid/pq.h
+++ b/include/linux/raid/pq.h
@@ -142,6 +142,7 @@ int raid6_select_algo(void);
 extern const u8 raid6_gfmul[256][256] __attribute__((aligned(256)));
 extern const u8 raid6_vgfmul[256][32] __attribute__((aligned(256)));
 extern const u8 raid6_gfexp[256]      __attribute__((aligned(256)));
+extern const u8 raid6_gflog[256]      __attribute__((aligned(256)));
 extern const u8 raid6_gfinv[256]      __attribute__((aligned(256)));
 extern const u8 raid6_gfexi[256]      __attribute__((aligned(256)));
 
diff --git a/lib/raid6/mktables.c b/lib/raid6/mktables.c
index 39787db588b0..e824d088f72c 100644
--- a/lib/raid6/mktables.c
+++ b/lib/raid6/mktables.c
@@ -125,6 +125,26 @@ int main(int argc, char *argv[])
 	printf("EXPORT_SYMBOL(raid6_gfexp);\n");
 	printf("#endif\n");
 
+	/* Compute log-of-2 table */
+	printf("\nconst u8 __attribute__((aligned(256)))\n"
+	       "raid6_gflog[256] =\n" "{\n");
+	for (i = 0; i < 256; i += 8) {
+		printf("\t");
+		for (j = 0; j < 8; j++) {
+			v = 255;
+			for (k = 0; k < 256; k++)
+				if (exptbl[k] == (i + j)) {
+					v = k;
+					break;
+				}
+			printf("0x%02x,%c", v, (j == 7) ? '\n' : ' ');
+		}
+	}
+	printf("};\n");
+	printf("#ifdef __KERNEL__\n");
+	printf("EXPORT_SYMBOL(raid6_gflog);\n");
+	printf("#endif\n");
+
 	/* Compute inverse table x^-1 == x^254 */
 	printf("\nconst u8 __attribute__((aligned(256)))\n"
 	       "raid6_gfinv[256] =\n" "{\n");
-- 
cgit v1.2.3


From c953d63548207a085abcb12a15fefc8a11ffdf0a Mon Sep 17 00:00:00 2001
From: Gao Feng <gfree.wind@vip.163.com>
Date: Tue, 16 May 2017 09:30:18 +0800
Subject: ebtables: arpreply: Add the standard target sanity check

The info->target comes from userspace and it would be used directly.
So we need to add the sanity check to make sure it is a valid standard
target, although the ebtables tool has already checked it. Kernel needs
to validate anything coming from userspace.

If the target is set as an evil value, it would break the ebtables
and cause a panic. Because the non-standard target is treated as one
offset.

Now add one helper function ebt_invalid_target, and we would replace
the macro INVALID_TARGET later.

Signed-off-by: Gao Feng <gfree.wind@vip.163.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_bridge/ebtables.h | 5 +++++
 net/bridge/netfilter/ebt_arpreply.c       | 3 +++
 2 files changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h
index a30efb437e6d..e0cbf17af780 100644
--- a/include/linux/netfilter_bridge/ebtables.h
+++ b/include/linux/netfilter_bridge/ebtables.h
@@ -125,4 +125,9 @@ extern unsigned int ebt_do_table(struct sk_buff *skb,
 /* True if the target is not a standard target */
 #define INVALID_TARGET (info->target < -NUM_STANDARD_TARGETS || info->target >= 0)
 
+static inline bool ebt_invalid_target(int target)
+{
+	return (target < -NUM_STANDARD_TARGETS || target >= 0);
+}
+
 #endif
diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c
index 5929309beaa1..db85230e49c3 100644
--- a/net/bridge/netfilter/ebt_arpreply.c
+++ b/net/bridge/netfilter/ebt_arpreply.c
@@ -68,6 +68,9 @@ static int ebt_arpreply_tg_check(const struct xt_tgchk_param *par)
 	if (e->ethproto != htons(ETH_P_ARP) ||
 	    e->invflags & EBT_IPROTO)
 		return -EINVAL;
+	if (ebt_invalid_target(info->target))
+		return -EINVAL;
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From 7b4ff1adb57ad96d8f12a05d8c661a3d8c4d2be1 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Thu, 11 May 2017 10:17:45 -0300
Subject: mutex, futex: adjust kernel-doc markups to generate ReST

There are a few issues on some kernel-doc markups that was
causing troubles with kernel-doc output on ReST format:

./kernel/futex.c:492: WARNING: Inline emphasis start-string without end-string.
./kernel/futex.c:1264: WARNING: Block quote ends without a blank line; unexpected unindent.
./kernel/futex.c:1721: WARNING: Block quote ends without a blank line; unexpected unindent.
./kernel/futex.c:2338: WARNING: Block quote ends without a blank line; unexpected unindent.
./kernel/futex.c:2426: WARNING: Block quote ends without a blank line; unexpected unindent.
./kernel/futex.c:2899: WARNING: Block quote ends without a blank line; unexpected unindent.
./kernel/futex.c:2972: WARNING: Block quote ends without a blank line; unexpected unindent.

Fix them.

No functional changes.

Acked-by: Darren Hart (VMware) <dvhart@infradead.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 include/linux/mutex.h  |  6 +++---
 kernel/futex.c         | 40 ++++++++++++++++++++--------------------
 kernel/locking/mutex.c |  6 +++---
 3 files changed, 26 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index 1127fe31645d..ffcba1f337da 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -214,9 +214,9 @@ enum mutex_trylock_recursive_enum {
  * raisins, and once those are gone this will be removed.
  *
  * Returns:
- *  MUTEX_TRYLOCK_FAILED    - trylock failed,
- *  MUTEX_TRYLOCK_SUCCESS   - lock acquired,
- *  MUTEX_TRYLOCK_RECURSIVE - we already owned the lock.
+ *  - MUTEX_TRYLOCK_FAILED    - trylock failed,
+ *  - MUTEX_TRYLOCK_SUCCESS   - lock acquired,
+ *  - MUTEX_TRYLOCK_RECURSIVE - we already owned the lock.
  */
 static inline /* __deprecated */ __must_check enum mutex_trylock_recursive_enum
 mutex_trylock_recursive(struct mutex *lock)
diff --git a/kernel/futex.c b/kernel/futex.c
index 357348a6cf6b..b8ae87d227da 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -488,7 +488,7 @@ static void drop_futex_key_refs(union futex_key *key)
  *
  * Return: a negative error code or 0
  *
- * The key words are stored in *key on success.
+ * The key words are stored in @key on success.
  *
  * For shared mappings, it's (page->index, file_inode(vma->vm_file),
  * offset_within_page).  For private mappings, it's (uaddr, current->mm).
@@ -1259,9 +1259,9 @@ static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
  * @set_waiters:	force setting the FUTEX_WAITERS bit (1) or not (0)
  *
  * Return:
- *  0 - ready to wait;
- *  1 - acquired the lock;
- * <0 - error
+ *  -  0 - ready to wait;
+ *  -  1 - acquired the lock;
+ *  - <0 - error
  *
  * The hb->lock and futex_key refs shall be held by the caller.
  */
@@ -1717,9 +1717,9 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
  * hb1 and hb2 must be held by the caller.
  *
  * Return:
- *  0 - failed to acquire the lock atomically;
- * >0 - acquired the lock, return value is vpid of the top_waiter
- * <0 - error
+ *  -  0 - failed to acquire the lock atomically;
+ *  - >0 - acquired the lock, return value is vpid of the top_waiter
+ *  - <0 - error
  */
 static int futex_proxy_trylock_atomic(u32 __user *pifutex,
 				 struct futex_hash_bucket *hb1,
@@ -1785,8 +1785,8 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex,
  * uaddr2 atomically on behalf of the top waiter.
  *
  * Return:
- * >=0 - on success, the number of tasks requeued or woken;
- *  <0 - on error
+ *  - >=0 - on success, the number of tasks requeued or woken;
+ *  -  <0 - on error
  */
 static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
 			 u32 __user *uaddr2, int nr_wake, int nr_requeue,
@@ -2142,8 +2142,8 @@ static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
  * be paired with exactly one earlier call to queue_me().
  *
  * Return:
- *   1 - if the futex_q was still queued (and we removed unqueued it);
- *   0 - if the futex_q was already removed by the waking thread
+ *  - 1 - if the futex_q was still queued (and we removed unqueued it);
+ *  - 0 - if the futex_q was already removed by the waking thread
  */
 static int unqueue_me(struct futex_q *q)
 {
@@ -2333,9 +2333,9 @@ static long futex_wait_restart(struct restart_block *restart);
  * acquire the lock. Must be called with the hb lock held.
  *
  * Return:
- *  1 - success, lock taken;
- *  0 - success, lock not taken;
- * <0 - on error (-EFAULT)
+ *  -  1 - success, lock taken;
+ *  -  0 - success, lock not taken;
+ *  - <0 - on error (-EFAULT)
  */
 static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
 {
@@ -2422,8 +2422,8 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
  * with no q.key reference on failure.
  *
  * Return:
- *  0 - uaddr contains val and hb has been locked;
- * <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlocked
+ *  -  0 - uaddr contains val and hb has been locked;
+ *  - <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlocked
  */
 static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
 			   struct futex_q *q, struct futex_hash_bucket **hb)
@@ -2895,8 +2895,8 @@ pi_faulted:
  * called with the hb lock held.
  *
  * Return:
- *  0 = no early wakeup detected;
- * <0 = -ETIMEDOUT or -ERESTARTNOINTR
+ *  -  0 = no early wakeup detected;
+ *  - <0 = -ETIMEDOUT or -ERESTARTNOINTR
  */
 static inline
 int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
@@ -2968,8 +2968,8 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
  * If 4 or 7, we cleanup and return with -ETIMEDOUT.
  *
  * Return:
- *  0 - On success;
- * <0 - On error
+ *  -  0 - On success;
+ *  - <0 - On error
  */
 static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
 				 u32 val, ktime_t *abs_time, u32 bitset,
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 198527a62149..858a07590e39 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -227,9 +227,9 @@ static void __sched __mutex_lock_slowpath(struct mutex *lock);
  * (or statically defined) before it can be locked. memset()-ing
  * the mutex to 0 is not allowed.
  *
- * ( The CONFIG_DEBUG_MUTEXES .config option turns on debugging
- *   checks that will enforce the restrictions and will also do
- *   deadlock debugging. )
+ * (The CONFIG_DEBUG_MUTEXES .config option turns on debugging
+ * checks that will enforce the restrictions and will also do
+ * deadlock debugging)
  *
  * This function is similar to (but not equivalent to) down().
  */
-- 
cgit v1.2.3


From 771b00a84be46d10e3f74af2d86d226302c907c5 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Fri, 12 May 2017 09:19:29 -0300
Subject: net: skbuff.h: properly escape a macro name on kernel-doc

The "%" escape code of kernel-doc only handle letters. It
doesn't handle special chars. So, use the ``literal``
notation. That fixes this warning:

./include/linux/skbuff.h:2695: WARNING: Inline literal start-string without end-string.

No functional changes.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 include/linux/skbuff.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index a098d95b3d84..25b1659c832a 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2691,7 +2691,7 @@ bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio);
  * @offset: the offset within the fragment (starting at the
  *          fragment's own offset)
  * @size: the number of bytes to map
- * @dir: the direction of the mapping (%PCI_DMA_*)
+ * @dir: the direction of the mapping (``PCI_DMA_*``)
  *
  * Maps the page associated with @frag to @device.
  */
-- 
cgit v1.2.3


From d651983dde41a854e25664d98cbfc999d55785a8 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Fri, 12 May 2017 09:35:46 -0300
Subject: net: fix some identation issues at kernel-doc markups

Sphinx is very pedantic with regards to identation and
escape sequences:

  ./include/net/sock.h:1967: ERROR: Unexpected indentation.
  ./include/net/sock.h:1969: ERROR: Unexpected indentation.
  ./include/net/sock.h:1970: WARNING: Block quote ends without a blank line; unexpected unindent.
  ./include/net/sock.h:1971: WARNING: Block quote ends without a blank line; unexpected unindent.
  ./include/net/sock.h:2268: WARNING: Inline emphasis start-string without end-string.
  ./net/core/sock.c:2686: ERROR: Unexpected indentation.
  ./net/core/sock.c:2687: WARNING: Block quote ends without a blank line; unexpected unindent.
  ./net/core/datagram.c:182: WARNING: Inline emphasis start-string without end-string.
  ./include/linux/netdevice.h:1444: ERROR: Unexpected indentation.
  ./drivers/net/phy/phy.c:381: ERROR: Unexpected indentation.
  ./drivers/net/phy/phy.c:382: WARNING: Block quote ends without a blank line; unexpected unindent.

- Fix spacing where needed;
- Properly escape constants;
- Use a literal block for a race description.

No functional changes.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 drivers/net/phy/phy.c     | 1 +
 include/linux/netdevice.h | 9 +++++----
 include/net/sock.h        | 9 ++++-----
 net/core/datagram.c       | 2 +-
 net/core/sock.c           | 7 +++++--
 5 files changed, 16 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 82ab8fb82587..709b8be53443 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -377,6 +377,7 @@ static void phy_sanitize_settings(struct phy_device *phydev)
  * @cmd: ethtool_cmd
  *
  * A few notes about parameter checking:
+ *
  * - We don't set port or transceiver, so we don't care what they
  *   were set to.
  * - phy_start_aneg() will make sure forced settings are sane, and
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9c23bd2efb56..56d54b6fac45 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1433,13 +1433,14 @@ enum netdev_priv_flags {
 
 /**
  *	struct net_device - The DEVICE structure.
- *		Actually, this whole structure is a big mistake.  It mixes I/O
- *		data with strictly "high-level" data, and it has to know about
- *		almost every data structure used in the INET module.
+ *
+ *	Actually, this whole structure is a big mistake.  It mixes I/O
+ *	data with strictly "high-level" data, and it has to know about
+ *	almost every data structure used in the INET module.
  *
  *	@name:	This is the first field of the "visible" part of this structure
  *		(i.e. as seen by users in the "Space.c" file).  It is the name
- *	 	of the interface.
+ *		of the interface.
  *
  *	@name_hlist: 	Device name hash chain, please keep it close to name[]
  *	@ifalias:	SNMP alias
diff --git a/include/net/sock.h b/include/net/sock.h
index 66349e49d468..9ca99b5c1328 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1953,11 +1953,10 @@ static inline bool sk_has_allocations(const struct sock *sk)
  * The purpose of the skwq_has_sleeper and sock_poll_wait is to wrap the memory
  * barrier call. They were added due to the race found within the tcp code.
  *
- * Consider following tcp code paths:
+ * Consider following tcp code paths::
  *
- * CPU1                  CPU2
- *
- * sys_select            receive packet
+ *   CPU1                CPU2
+ *   sys_select          receive packet
  *   ...                 ...
  *   __add_wait_queue    update tp->rcv_nxt
  *   ...                 ...
@@ -2264,7 +2263,7 @@ void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags);
  * @tsflags:	timestamping flags to use
  * @tx_flags:	completed with instructions for time stamping
  *
- * Note : callers should take care of initial *tx_flags value (usually 0)
+ * Note: callers should take care of initial ``*tx_flags`` value (usually 0)
  */
 static inline void sock_tx_timestamp(const struct sock *sk, __u16 tsflags,
 				     __u8 *tx_flags)
diff --git a/net/core/datagram.c b/net/core/datagram.c
index db1866f2ffcf..4dd594741b6d 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -181,7 +181,7 @@ done:
  *
  *	This function will lock the socket if a skb is returned, so
  *	the caller needs to unlock the socket in that case (usually by
- *	calling skb_free_datagram). Returns NULL with *err set to
+ *	calling skb_free_datagram). Returns NULL with @err set to
  *	-EAGAIN if no data was available or to some other value if an
  *	error was detected.
  *
diff --git a/net/core/sock.c b/net/core/sock.c
index 79c6aee6af9b..6adc69edfdd6 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2682,9 +2682,12 @@ EXPORT_SYMBOL(release_sock);
  * @sk: socket
  *
  * This version should be used for very small section, where process wont block
- * return false if fast path is taken
+ * return false if fast path is taken:
+ *
  *   sk_lock.slock locked, owned = 0, BH disabled
- * return true if slow path is taken
+ *
+ * return true if slow path is taken:
+ *
  *   sk_lock.slock unlocked, owned = 1, BH enabled
  */
 bool lock_sock_fast(struct sock *sk)
-- 
cgit v1.2.3


From b6f6c29454d236e85f2912cb0f9366825ca1b0be Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Sat, 13 May 2017 07:40:36 -0300
Subject: mtd: adjust kernel-docs to avoid Sphinx/kerneldoc warnings

./drivers/mtd/nand/nand_bbt.c:1: warning: no structured comments found
./include/linux/mtd/nand.h:785: ERROR: Unexpected indentation.
./drivers/mtd/nand/nand_base.c:449: WARNING: Definition list ends without a blank line; unexpected unindent.
./drivers/mtd/nand/nand_base.c:1161: ERROR: Unexpected indentation.
./drivers/mtd/nand/nand_base.c:1162: WARNING: Block quote ends without a blank line; unexpected unindent.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 Documentation/driver-api/mtdnand.rst | 3 ---
 drivers/mtd/nand/nand_base.c         | 7 +++++--
 include/linux/mtd/nand.h             | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/driver-api/mtdnand.rst b/Documentation/driver-api/mtdnand.rst
index 7c19795ebb4a..e9afa586d15e 100644
--- a/Documentation/driver-api/mtdnand.rst
+++ b/Documentation/driver-api/mtdnand.rst
@@ -970,9 +970,6 @@ hints" for an explanation.
 .. kernel-doc:: drivers/mtd/nand/nand_base.c
    :export:
 
-.. kernel-doc:: drivers/mtd/nand/nand_bbt.c
-   :export:
-
 .. kernel-doc:: drivers/mtd/nand/nand_ecc.c
    :export:
 
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index b0524f8accb6..c8988c01e0d7 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -442,10 +442,12 @@ static int nand_default_block_markbad(struct mtd_info *mtd, loff_t ofs)
  * specify how to write bad block markers to OOB (chip->block_markbad).
  *
  * We try operations in the following order:
+ *
  *  (1) erase the affected block, to allow OOB marker to be written cleanly
  *  (2) write bad block marker to OOB area of affected block (unless flag
  *      NAND_BBT_NO_OOB_BBM is present)
  *  (3) update the BBT
+ *
  * Note that we retain the first error encountered in (2) or (3), finish the
  * procedures, and dump the error in the end.
 */
@@ -1155,9 +1157,10 @@ int nand_reset(struct nand_chip *chip, int chipnr)
  * @mtd: mtd info
  * @ofs: offset to start unlock from
  * @len: length to unlock
- * @invert: when = 0, unlock the range of blocks within the lower and
+ * @invert:
+ *        - when = 0, unlock the range of blocks within the lower and
  *                    upper boundary address
- *          when = 1, unlock the range of blocks outside the boundaries
+ *        - when = 1, unlock the range of blocks outside the boundaries
  *                    of the lower and upper boundary address
  *
  * Returs unlock status.
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 9591e0fbe5bd..3d5b20379ba3 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -779,7 +779,7 @@ nand_get_sdr_timings(const struct nand_data_interface *conf)
  *			Minimum amount of bit errors per @ecc_step_ds guaranteed
  *			to be correctable. If unknown, set to zero.
  * @ecc_step_ds:	[INTERN] ECC step required by the @ecc_strength_ds,
- *                      also from the datasheet. It is the recommended ECC step
+ *			also from the datasheet. It is the recommended ECC step
  *			size, if known; if unknown, set to zero.
  * @onfi_timing_mode_default: [INTERN] default ONFI timing mode. This field is
  *			      set to the actually used ONFI mode if the chip is
-- 
cgit v1.2.3


From 19285f3c4669c8b0cea8fb6c452c83db9e6386be Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Sun, 14 May 2017 11:52:56 -0300
Subject: ata: update references for libata documentation

The libata documentation is now using ReST. Update references
to it to point to the new place.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 drivers/ata/acard-ahci.c    | 2 +-
 drivers/ata/ahci.c          | 2 +-
 drivers/ata/ahci.h          | 2 +-
 drivers/ata/ata_piix.c      | 2 +-
 drivers/ata/libahci.c       | 2 +-
 drivers/ata/libata-core.c   | 2 +-
 drivers/ata/libata-eh.c     | 2 +-
 drivers/ata/libata-scsi.c   | 2 +-
 drivers/ata/libata-sff.c    | 2 +-
 drivers/ata/libata.h        | 2 +-
 drivers/ata/pata_pdc2027x.c | 2 +-
 drivers/ata/pdc_adma.c      | 2 +-
 drivers/ata/sata_nv.c       | 2 +-
 drivers/ata/sata_promise.c  | 2 +-
 drivers/ata/sata_promise.h  | 2 +-
 drivers/ata/sata_qstor.c    | 2 +-
 drivers/ata/sata_sil.c      | 2 +-
 drivers/ata/sata_sis.c      | 2 +-
 drivers/ata/sata_svw.c      | 2 +-
 drivers/ata/sata_sx4.c      | 2 +-
 drivers/ata/sata_uli.c      | 2 +-
 drivers/ata/sata_via.c      | 2 +-
 drivers/ata/sata_vsc.c      | 2 +-
 include/linux/ata.h         | 2 +-
 include/linux/libata.h      | 2 +-
 25 files changed, 25 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/acard-ahci.c b/drivers/ata/acard-ahci.c
index ed6a30cd681a..940ddbc59aa7 100644
--- a/drivers/ata/acard-ahci.c
+++ b/drivers/ata/acard-ahci.c
@@ -25,7 +25,7 @@
  *
  *
  * libata documentation is available via 'make {ps|pdf}docs',
- * as Documentation/DocBook/libata.*
+ * as Documentation/driver-api/libata.rst
  *
  * AHCI hardware documentation:
  * http://www.intel.com/technology/serialata/pdf/rev1_0.pdf
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 2fc52407306c..fd712e0e9d87 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -24,7 +24,7 @@
  *
  *
  * libata documentation is available via 'make {ps|pdf}docs',
- * as Documentation/DocBook/libata.*
+ * as Documentation/driver-api/libata.rst
  *
  * AHCI hardware documentation:
  * http://www.intel.com/technology/serialata/pdf/rev1_0.pdf
diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h
index 5db6ab261643..30f67a1a4f54 100644
--- a/drivers/ata/ahci.h
+++ b/drivers/ata/ahci.h
@@ -24,7 +24,7 @@
  *
  *
  * libata documentation is available via 'make {ps|pdf}docs',
- * as Documentation/DocBook/libata.*
+ * as Documentation/driver-api/libata.rst
  *
  * AHCI hardware documentation:
  * http://www.intel.com/technology/serialata/pdf/rev1_0.pdf
diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c
index ffbe625e6fd2..8401c3b5be92 100644
--- a/drivers/ata/ata_piix.c
+++ b/drivers/ata/ata_piix.c
@@ -33,7 +33,7 @@
  *
  *
  *  libata documentation is available via 'make {ps|pdf}docs',
- *  as Documentation/DocBook/libata.*
+ *  as Documentation/driver-api/libata.rst
  *
  *  Hardware documentation available at http://developer.intel.com/
  *
diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c
index 3159f9e66d8f..6154f0e2b81a 100644
--- a/drivers/ata/libahci.c
+++ b/drivers/ata/libahci.c
@@ -24,7 +24,7 @@
  *
  *
  * libata documentation is available via 'make {ps|pdf}docs',
- * as Documentation/DocBook/libata.*
+ * as Documentation/driver-api/libata.rst
  *
  * AHCI hardware documentation:
  * http://www.intel.com/technology/serialata/pdf/rev1_0.pdf
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 2d83b8c75965..55aaa2e4c683 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -25,7 +25,7 @@
  *
  *
  *  libata documentation is available via 'make {ps|pdf}docs',
- *  as Documentation/DocBook/libata.*
+ *  as Documentation/driver-api/libata.rst
  *
  *  Hardware documentation available from http://www.t13.org/ and
  *  http://www.sata-io.org/
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index ef68232b5222..7e33e200aae5 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -25,7 +25,7 @@
  *
  *
  *  libata documentation is available via 'make {ps|pdf}docs',
- *  as Documentation/DocBook/libata.*
+ *  as Documentation/driver-api/libata.rst
  *
  *  Hardware documentation available from http://www.t13.org/ and
  *  http://www.sata-io.org/
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index dcd38d9e9804..b0866f040d1f 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -25,7 +25,7 @@
  *
  *
  *  libata documentation is available via 'make {ps|pdf}docs',
- *  as Documentation/DocBook/libata.*
+ *  as Documentation/driver-api/libata.rst
  *
  *  Hardware documentation available from
  *  - http://www.t10.org/
diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index 274d6d7193d7..052921352f31 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -25,7 +25,7 @@
  *
  *
  *  libata documentation is available via 'make {ps|pdf}docs',
- *  as Documentation/DocBook/libata.*
+ *  as Documentation/driver-api/libata.rst
  *
  *  Hardware documentation available from http://www.t13.org/ and
  *  http://www.sata-io.org/
diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h
index 120fce0befd3..5afe35baf61b 100644
--- a/drivers/ata/libata.h
+++ b/drivers/ata/libata.h
@@ -21,7 +21,7 @@
  *
  *
  *  libata documentation is available via 'make {ps|pdf}docs',
- *  as Documentation/DocBook/libata.*
+ *  as Documentation/driver-api/libata.rst
  *
  */
 
diff --git a/drivers/ata/pata_pdc2027x.c b/drivers/ata/pata_pdc2027x.c
index d9ef9e276225..82bfd51692f3 100644
--- a/drivers/ata/pata_pdc2027x.c
+++ b/drivers/ata/pata_pdc2027x.c
@@ -17,7 +17,7 @@
  *
  *
  *  libata documentation is available via 'make {ps|pdf}docs',
- *  as Documentation/DocBook/libata.*
+ *  as Documentation/driver-api/libata.rst
  *
  *  Hardware information only available under NDA.
  *
diff --git a/drivers/ata/pdc_adma.c b/drivers/ata/pdc_adma.c
index 64d682c6ee57..f1e873a37465 100644
--- a/drivers/ata/pdc_adma.c
+++ b/drivers/ata/pdc_adma.c
@@ -21,7 +21,7 @@
  *
  *
  *  libata documentation is available via 'make {ps|pdf}docs',
- *  as Documentation/DocBook/libata.*
+ *  as Documentation/driver-api/libata.rst
  *
  *
  *  Supports ATA disks in single-packet ADMA mode.
diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c
index 734f563b8d37..8c683ddd0f58 100644
--- a/drivers/ata/sata_nv.c
+++ b/drivers/ata/sata_nv.c
@@ -21,7 +21,7 @@
  *
  *
  *  libata documentation is available via 'make {ps|pdf}docs',
- *  as Documentation/DocBook/libata.*
+ *  as Documentation/driver-api/libata.rst
  *
  *  No hardware documentation available outside of NVIDIA.
  *  This driver programs the NVIDIA SATA controller in a similar
diff --git a/drivers/ata/sata_promise.c b/drivers/ata/sata_promise.c
index 0fa211e2831c..d032bf657f70 100644
--- a/drivers/ata/sata_promise.c
+++ b/drivers/ata/sata_promise.c
@@ -25,7 +25,7 @@
  *
  *
  *  libata documentation is available via 'make {ps|pdf}docs',
- *  as Documentation/DocBook/libata.*
+ *  as Documentation/driver-api/libata.rst
  *
  *  Hardware information only available under NDA.
  *
diff --git a/drivers/ata/sata_promise.h b/drivers/ata/sata_promise.h
index 00d6000e546f..61633ef5ed72 100644
--- a/drivers/ata/sata_promise.h
+++ b/drivers/ata/sata_promise.h
@@ -20,7 +20,7 @@
  *
  *
  *  libata documentation is available via 'make {ps|pdf}docs',
- *  as Documentation/DocBook/libata.*
+ *  as Documentation/driver-api/libata.rst
  *
  */
 
diff --git a/drivers/ata/sata_qstor.c b/drivers/ata/sata_qstor.c
index af987a4f33d1..1fe941688e95 100644
--- a/drivers/ata/sata_qstor.c
+++ b/drivers/ata/sata_qstor.c
@@ -23,7 +23,7 @@
  *
  *
  *  libata documentation is available via 'make {ps|pdf}docs',
- *  as Documentation/DocBook/libata.*
+ *  as Documentation/driver-api/libata.rst
  *
  */
 
diff --git a/drivers/ata/sata_sil.c b/drivers/ata/sata_sil.c
index 29bcff086bce..ed76f070d21e 100644
--- a/drivers/ata/sata_sil.c
+++ b/drivers/ata/sata_sil.c
@@ -25,7 +25,7 @@
  *
  *
  *  libata documentation is available via 'make {ps|pdf}docs',
- *  as Documentation/DocBook/libata.*
+ *  as Documentation/driver-api/libata.rst
  *
  *  Documentation for SiI 3112:
  *  http://gkernel.sourceforge.net/specs/sii/3112A_SiI-DS-0095-B2.pdf.bz2
diff --git a/drivers/ata/sata_sis.c b/drivers/ata/sata_sis.c
index d1637ac40a73..30f4f35f36d4 100644
--- a/drivers/ata/sata_sis.c
+++ b/drivers/ata/sata_sis.c
@@ -24,7 +24,7 @@
  *
  *
  *  libata documentation is available via 'make {ps|pdf}docs',
- *  as Documentation/DocBook/libata.*
+ *  as Documentation/driver-api/libata.rst
  *
  *  Hardware documentation available under NDA.
  *
diff --git a/drivers/ata/sata_svw.c b/drivers/ata/sata_svw.c
index ff614be55d0f..0fd6ac7e57ba 100644
--- a/drivers/ata/sata_svw.c
+++ b/drivers/ata/sata_svw.c
@@ -30,7 +30,7 @@
  *
  *
  *  libata documentation is available via 'make {ps|pdf}docs',
- *  as Documentation/DocBook/libata.*
+ *  as Documentation/driver-api/libata.rst
  *
  *  Hardware documentation available under NDA.
  *
diff --git a/drivers/ata/sata_sx4.c b/drivers/ata/sata_sx4.c
index 48301cb3a316..405e606a234d 100644
--- a/drivers/ata/sata_sx4.c
+++ b/drivers/ata/sata_sx4.c
@@ -24,7 +24,7 @@
  *
  *
  *  libata documentation is available via 'make {ps|pdf}docs',
- *  as Documentation/DocBook/libata.*
+ *  as Documentation/driver-api/libata.rst
  *
  *  Hardware documentation available under NDA.
  *
diff --git a/drivers/ata/sata_uli.c b/drivers/ata/sata_uli.c
index 08f98c3ed5c8..4f6e8d8156de 100644
--- a/drivers/ata/sata_uli.c
+++ b/drivers/ata/sata_uli.c
@@ -18,7 +18,7 @@
  *
  *
  *  libata documentation is available via 'make {ps|pdf}docs',
- *  as Documentation/DocBook/libata.*
+ *  as Documentation/driver-api/libata.rst
  *
  *  Hardware documentation available under NDA.
  *
diff --git a/drivers/ata/sata_via.c b/drivers/ata/sata_via.c
index f3f538eec7b3..22e96fc77d09 100644
--- a/drivers/ata/sata_via.c
+++ b/drivers/ata/sata_via.c
@@ -25,7 +25,7 @@
  *
  *
  *  libata documentation is available via 'make {ps|pdf}docs',
- *  as Documentation/DocBook/libata.*
+ *  as Documentation/driver-api/libata.rst
  *
  *  Hardware documentation available under NDA.
  *
diff --git a/drivers/ata/sata_vsc.c b/drivers/ata/sata_vsc.c
index 183eb52085df..9648127cca70 100644
--- a/drivers/ata/sata_vsc.c
+++ b/drivers/ata/sata_vsc.c
@@ -26,7 +26,7 @@
  *
  *
  *  libata documentation is available via 'make {ps|pdf}docs',
- *  as Documentation/DocBook/libata.*
+ *  as Documentation/driver-api/libata.rst
  *
  *  Vitesse hardware documentation presumably available under NDA.
  *  Intel 31244 (same hardware interface) documentation presumably
diff --git a/include/linux/ata.h b/include/linux/ata.h
index ad7d9ee89ff0..73fe18edfdaf 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -20,7 +20,7 @@
  *
  *
  *  libata documentation is available via 'make {ps|pdf}docs',
- *  as Documentation/DocBook/libata.*
+ *  as Documentation/driver-api/libata.rst
  *
  *  Hardware documentation available from http://www.t13.org/
  *
diff --git a/include/linux/libata.h b/include/linux/libata.h
index c9a69fc8821e..9e6633235ad7 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -19,7 +19,7 @@
  *
  *
  *  libata documentation is available via 'make {ps|pdf}docs',
- *  as Documentation/DocBook/libata.*
+ *  as Documentation/driver-api/libata.rst
  *
  */
 
-- 
cgit v1.2.3


From e1b4fc7add72f565d9c35066d85108346e01d3e9 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Sun, 14 May 2017 12:04:55 -0300
Subject: fs: update location of filesystems documentation

The filesystem documentation was moved from DocBook to
Documentation/filesystems/. Update it at the sources.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 fs/debugfs/file.c       | 2 +-
 include/linux/debugfs.h | 2 +-
 lib/Kconfig.debug       | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 354e2ab62031..6dabc4a10396 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -9,7 +9,7 @@
  *	2 as published by the Free Software Foundation.
  *
  *  debugfs is for people to use instead of /proc or /sys.
- *  See Documentation/DocBook/filesystems for more details.
+ *  See Documentation/filesystems/ for more details.
  *
  */
 
diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index 9174b0d28582..aa86e6d8c1aa 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -9,7 +9,7 @@
  *	2 as published by the Free Software Foundation.
  *
  *  debugfs is for people to use instead of /proc or /sys.
- *  See Documentation/DocBook/filesystems for more details.
+ *  See Documentation/filesystems/ for more details.
  */
 
 #ifndef _DEBUGFS_H_
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index e4587ebe52c7..b7882d45f48e 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -286,7 +286,7 @@ config DEBUG_FS
 	  write to these files.
 
 	  For detailed documentation on the debugfs API, see
-	  Documentation/DocBook/filesystems.
+	  Documentation/filesystems/.
 
 	  If unsure, say N.
 
-- 
cgit v1.2.3


From 12e84c71b7d4ee38d51377fd494ac748ee4e6912 Mon Sep 17 00:00:00 2001
From: Okash Khawaja <okash.khawaja@gmail.com>
Date: Mon, 15 May 2017 18:45:32 +0100
Subject: tty: export tty_open_by_driver

This exports tty_open_by_driver so that it can be called from other
places inside the kernel. The checks for null file pointer are based on
Alan Cox's patch here:
http://www.mail-archive.com/linux-kernel@vger.kernel.org/msg1215095.html.
Description below is quoted from it:

"[RFC] tty_port: allow a port to be opened with a tty that has no file handle

    Let us create tty objects entirely in kernel space. Untested proposal to
    show why all the ideas around rewriting half the uart stack are not needed.

    With this a kernel created non file backed tty object could be used to handle
    data, and set terminal modes. Not all ldiscs can cope with this as N_TTY in
    particular has to work back to the fs/tty layer.

    The tty_port code is however otherwise clean of file handles as far as I can
    tell as is the low level tty port write path used by the ldisc, the
    configuration low level interfaces and most of the ldiscs.

    Currently you don't have any exposure to see tty hangups because those are
    built around the file layer. However a) it's a fixed port so you probably
    don't care about that b) if you do we can add a callback and c) you almost
    certainly don't want the userspace tear down/rebuild behaviour anyway.

    This should however be sufficient if we wanted for example to enumerate all
    the bluetooth bound fixed ports via ACPI and make them directly available.

    It doesn't deal with the case of a user opening a port that's also kernel
    opened and that would need some locking out (so it returned EBUSY if bound
    to a kernel device of some kind). That needs resolving along with how you
    "up" or "down" your new bluetooth device, or enumerate it while providing
    the existing tty API to avoid regressions (and to debug)."

The exported funtion is used later in this patch set to gain access to tty_struct.

[changed export symbol level - gkh]

Signed-off-by: Okash Khawaja <okash.khawaja@gmail.com>
Reviewed-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/tty_io.c | 10 +++++++---
 include/linux/tty.h  |  2 ++
 2 files changed, 9 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index 0c150b5a9dd6..49abf04c90b2 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -1083,7 +1083,10 @@ static struct tty_struct *tty_driver_lookup_tty(struct tty_driver *driver,
 	struct tty_struct *tty;
 
 	if (driver->ops->lookup)
-		tty = driver->ops->lookup(driver, file, idx);
+		if (!file)
+			tty = ERR_PTR(-EIO);
+		else
+			tty = driver->ops->lookup(driver, file, idx);
 	else
 		tty = driver->ttys[idx];
 
@@ -1715,7 +1718,7 @@ static struct tty_driver *tty_lookup_driver(dev_t device, struct file *filp,
 		struct tty_driver *console_driver = console_device(index);
 		if (console_driver) {
 			driver = tty_driver_kref_get(console_driver);
-			if (driver) {
+			if (driver && filp) {
 				/* Don't let /dev/console block */
 				filp->f_flags |= O_NONBLOCK;
 				break;
@@ -1748,7 +1751,7 @@ static struct tty_driver *tty_lookup_driver(dev_t device, struct file *filp,
  *	  - concurrent tty driver removal w/ lookup
  *	  - concurrent tty removal from driver table
  */
-static struct tty_struct *tty_open_by_driver(dev_t device, struct inode *inode,
+struct tty_struct *tty_open_by_driver(dev_t device, struct inode *inode,
 					     struct file *filp)
 {
 	struct tty_struct *tty;
@@ -1793,6 +1796,7 @@ out:
 	tty_driver_kref_put(driver);
 	return tty;
 }
+EXPORT_SYMBOL_GPL(tty_open_by_driver);
 
 /**
  *	tty_open		-	open a tty device
diff --git a/include/linux/tty.h b/include/linux/tty.h
index d07cd2105a6c..c9f9fd2c4eef 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -400,6 +400,8 @@ extern struct tty_struct *get_current_tty(void);
 /* tty_io.c */
 extern int __init tty_init(void);
 extern const char *tty_name(const struct tty_struct *tty);
+extern struct tty_struct *tty_open_by_driver(dev_t device, struct inode *inode,
+		struct file *filp);
 #else
 static inline void tty_kref_put(struct tty_struct *tty)
 { }
-- 
cgit v1.2.3


From 9bb9a39ce51eae886575251e87d9292f679e3e32 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Tue, 16 May 2017 09:16:37 -0300
Subject: ata: update references for libata documentation

The libata documentation is now using ReST. Update references
to it to point to the new place.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Acked-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/ata/acard-ahci.c    | 2 +-
 drivers/ata/ahci.c          | 2 +-
 drivers/ata/ahci.h          | 2 +-
 drivers/ata/ata_piix.c      | 2 +-
 drivers/ata/libahci.c       | 2 +-
 drivers/ata/libata-core.c   | 2 +-
 drivers/ata/libata-eh.c     | 2 +-
 drivers/ata/libata-scsi.c   | 2 +-
 drivers/ata/libata-sff.c    | 2 +-
 drivers/ata/libata.h        | 2 +-
 drivers/ata/pata_pdc2027x.c | 2 +-
 drivers/ata/pdc_adma.c      | 2 +-
 drivers/ata/sata_nv.c       | 2 +-
 drivers/ata/sata_promise.c  | 2 +-
 drivers/ata/sata_promise.h  | 2 +-
 drivers/ata/sata_qstor.c    | 2 +-
 drivers/ata/sata_sil.c      | 2 +-
 drivers/ata/sata_sis.c      | 2 +-
 drivers/ata/sata_svw.c      | 2 +-
 drivers/ata/sata_sx4.c      | 2 +-
 drivers/ata/sata_uli.c      | 2 +-
 drivers/ata/sata_via.c      | 2 +-
 drivers/ata/sata_vsc.c      | 2 +-
 include/linux/ata.h         | 2 +-
 include/linux/libata.h      | 2 +-
 25 files changed, 25 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/acard-ahci.c b/drivers/ata/acard-ahci.c
index ed6a30cd681a..940ddbc59aa7 100644
--- a/drivers/ata/acard-ahci.c
+++ b/drivers/ata/acard-ahci.c
@@ -25,7 +25,7 @@
  *
  *
  * libata documentation is available via 'make {ps|pdf}docs',
- * as Documentation/DocBook/libata.*
+ * as Documentation/driver-api/libata.rst
  *
  * AHCI hardware documentation:
  * http://www.intel.com/technology/serialata/pdf/rev1_0.pdf
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 2fc52407306c..fd712e0e9d87 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -24,7 +24,7 @@
  *
  *
  * libata documentation is available via 'make {ps|pdf}docs',
- * as Documentation/DocBook/libata.*
+ * as Documentation/driver-api/libata.rst
  *
  * AHCI hardware documentation:
  * http://www.intel.com/technology/serialata/pdf/rev1_0.pdf
diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h
index 5db6ab261643..30f67a1a4f54 100644
--- a/drivers/ata/ahci.h
+++ b/drivers/ata/ahci.h
@@ -24,7 +24,7 @@
  *
  *
  * libata documentation is available via 'make {ps|pdf}docs',
- * as Documentation/DocBook/libata.*
+ * as Documentation/driver-api/libata.rst
  *
  * AHCI hardware documentation:
  * http://www.intel.com/technology/serialata/pdf/rev1_0.pdf
diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c
index ffbe625e6fd2..8401c3b5be92 100644
--- a/drivers/ata/ata_piix.c
+++ b/drivers/ata/ata_piix.c
@@ -33,7 +33,7 @@
  *
  *
  *  libata documentation is available via 'make {ps|pdf}docs',
- *  as Documentation/DocBook/libata.*
+ *  as Documentation/driver-api/libata.rst
  *
  *  Hardware documentation available at http://developer.intel.com/
  *
diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c
index 3159f9e66d8f..6154f0e2b81a 100644
--- a/drivers/ata/libahci.c
+++ b/drivers/ata/libahci.c
@@ -24,7 +24,7 @@
  *
  *
  * libata documentation is available via 'make {ps|pdf}docs',
- * as Documentation/DocBook/libata.*
+ * as Documentation/driver-api/libata.rst
  *
  * AHCI hardware documentation:
  * http://www.intel.com/technology/serialata/pdf/rev1_0.pdf
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 2d83b8c75965..55aaa2e4c683 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -25,7 +25,7 @@
  *
  *
  *  libata documentation is available via 'make {ps|pdf}docs',
- *  as Documentation/DocBook/libata.*
+ *  as Documentation/driver-api/libata.rst
  *
  *  Hardware documentation available from http://www.t13.org/ and
  *  http://www.sata-io.org/
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index ef68232b5222..7e33e200aae5 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -25,7 +25,7 @@
  *
  *
  *  libata documentation is available via 'make {ps|pdf}docs',
- *  as Documentation/DocBook/libata.*
+ *  as Documentation/driver-api/libata.rst
  *
  *  Hardware documentation available from http://www.t13.org/ and
  *  http://www.sata-io.org/
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index dcd38d9e9804..b0866f040d1f 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -25,7 +25,7 @@
  *
  *
  *  libata documentation is available via 'make {ps|pdf}docs',
- *  as Documentation/DocBook/libata.*
+ *  as Documentation/driver-api/libata.rst
  *
  *  Hardware documentation available from
  *  - http://www.t10.org/
diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index baa24a9a0e68..cc2f2e35f4c2 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -25,7 +25,7 @@
  *
  *
  *  libata documentation is available via 'make {ps|pdf}docs',
- *  as Documentation/DocBook/libata.*
+ *  as Documentation/driver-api/libata.rst
  *
  *  Hardware documentation available from http://www.t13.org/ and
  *  http://www.sata-io.org/
diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h
index 120fce0befd3..5afe35baf61b 100644
--- a/drivers/ata/libata.h
+++ b/drivers/ata/libata.h
@@ -21,7 +21,7 @@
  *
  *
  *  libata documentation is available via 'make {ps|pdf}docs',
- *  as Documentation/DocBook/libata.*
+ *  as Documentation/driver-api/libata.rst
  *
  */
 
diff --git a/drivers/ata/pata_pdc2027x.c b/drivers/ata/pata_pdc2027x.c
index d9ef9e276225..82bfd51692f3 100644
--- a/drivers/ata/pata_pdc2027x.c
+++ b/drivers/ata/pata_pdc2027x.c
@@ -17,7 +17,7 @@
  *
  *
  *  libata documentation is available via 'make {ps|pdf}docs',
- *  as Documentation/DocBook/libata.*
+ *  as Documentation/driver-api/libata.rst
  *
  *  Hardware information only available under NDA.
  *
diff --git a/drivers/ata/pdc_adma.c b/drivers/ata/pdc_adma.c
index 64d682c6ee57..f1e873a37465 100644
--- a/drivers/ata/pdc_adma.c
+++ b/drivers/ata/pdc_adma.c
@@ -21,7 +21,7 @@
  *
  *
  *  libata documentation is available via 'make {ps|pdf}docs',
- *  as Documentation/DocBook/libata.*
+ *  as Documentation/driver-api/libata.rst
  *
  *
  *  Supports ATA disks in single-packet ADMA mode.
diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c
index 734f563b8d37..8c683ddd0f58 100644
--- a/drivers/ata/sata_nv.c
+++ b/drivers/ata/sata_nv.c
@@ -21,7 +21,7 @@
  *
  *
  *  libata documentation is available via 'make {ps|pdf}docs',
- *  as Documentation/DocBook/libata.*
+ *  as Documentation/driver-api/libata.rst
  *
  *  No hardware documentation available outside of NVIDIA.
  *  This driver programs the NVIDIA SATA controller in a similar
diff --git a/drivers/ata/sata_promise.c b/drivers/ata/sata_promise.c
index 0fa211e2831c..d032bf657f70 100644
--- a/drivers/ata/sata_promise.c
+++ b/drivers/ata/sata_promise.c
@@ -25,7 +25,7 @@
  *
  *
  *  libata documentation is available via 'make {ps|pdf}docs',
- *  as Documentation/DocBook/libata.*
+ *  as Documentation/driver-api/libata.rst
  *
  *  Hardware information only available under NDA.
  *
diff --git a/drivers/ata/sata_promise.h b/drivers/ata/sata_promise.h
index 00d6000e546f..61633ef5ed72 100644
--- a/drivers/ata/sata_promise.h
+++ b/drivers/ata/sata_promise.h
@@ -20,7 +20,7 @@
  *
  *
  *  libata documentation is available via 'make {ps|pdf}docs',
- *  as Documentation/DocBook/libata.*
+ *  as Documentation/driver-api/libata.rst
  *
  */
 
diff --git a/drivers/ata/sata_qstor.c b/drivers/ata/sata_qstor.c
index af987a4f33d1..1fe941688e95 100644
--- a/drivers/ata/sata_qstor.c
+++ b/drivers/ata/sata_qstor.c
@@ -23,7 +23,7 @@
  *
  *
  *  libata documentation is available via 'make {ps|pdf}docs',
- *  as Documentation/DocBook/libata.*
+ *  as Documentation/driver-api/libata.rst
  *
  */
 
diff --git a/drivers/ata/sata_sil.c b/drivers/ata/sata_sil.c
index 29bcff086bce..ed76f070d21e 100644
--- a/drivers/ata/sata_sil.c
+++ b/drivers/ata/sata_sil.c
@@ -25,7 +25,7 @@
  *
  *
  *  libata documentation is available via 'make {ps|pdf}docs',
- *  as Documentation/DocBook/libata.*
+ *  as Documentation/driver-api/libata.rst
  *
  *  Documentation for SiI 3112:
  *  http://gkernel.sourceforge.net/specs/sii/3112A_SiI-DS-0095-B2.pdf.bz2
diff --git a/drivers/ata/sata_sis.c b/drivers/ata/sata_sis.c
index d1637ac40a73..30f4f35f36d4 100644
--- a/drivers/ata/sata_sis.c
+++ b/drivers/ata/sata_sis.c
@@ -24,7 +24,7 @@
  *
  *
  *  libata documentation is available via 'make {ps|pdf}docs',
- *  as Documentation/DocBook/libata.*
+ *  as Documentation/driver-api/libata.rst
  *
  *  Hardware documentation available under NDA.
  *
diff --git a/drivers/ata/sata_svw.c b/drivers/ata/sata_svw.c
index ff614be55d0f..0fd6ac7e57ba 100644
--- a/drivers/ata/sata_svw.c
+++ b/drivers/ata/sata_svw.c
@@ -30,7 +30,7 @@
  *
  *
  *  libata documentation is available via 'make {ps|pdf}docs',
- *  as Documentation/DocBook/libata.*
+ *  as Documentation/driver-api/libata.rst
  *
  *  Hardware documentation available under NDA.
  *
diff --git a/drivers/ata/sata_sx4.c b/drivers/ata/sata_sx4.c
index 48301cb3a316..405e606a234d 100644
--- a/drivers/ata/sata_sx4.c
+++ b/drivers/ata/sata_sx4.c
@@ -24,7 +24,7 @@
  *
  *
  *  libata documentation is available via 'make {ps|pdf}docs',
- *  as Documentation/DocBook/libata.*
+ *  as Documentation/driver-api/libata.rst
  *
  *  Hardware documentation available under NDA.
  *
diff --git a/drivers/ata/sata_uli.c b/drivers/ata/sata_uli.c
index 08f98c3ed5c8..4f6e8d8156de 100644
--- a/drivers/ata/sata_uli.c
+++ b/drivers/ata/sata_uli.c
@@ -18,7 +18,7 @@
  *
  *
  *  libata documentation is available via 'make {ps|pdf}docs',
- *  as Documentation/DocBook/libata.*
+ *  as Documentation/driver-api/libata.rst
  *
  *  Hardware documentation available under NDA.
  *
diff --git a/drivers/ata/sata_via.c b/drivers/ata/sata_via.c
index f3f538eec7b3..22e96fc77d09 100644
--- a/drivers/ata/sata_via.c
+++ b/drivers/ata/sata_via.c
@@ -25,7 +25,7 @@
  *
  *
  *  libata documentation is available via 'make {ps|pdf}docs',
- *  as Documentation/DocBook/libata.*
+ *  as Documentation/driver-api/libata.rst
  *
  *  Hardware documentation available under NDA.
  *
diff --git a/drivers/ata/sata_vsc.c b/drivers/ata/sata_vsc.c
index 183eb52085df..9648127cca70 100644
--- a/drivers/ata/sata_vsc.c
+++ b/drivers/ata/sata_vsc.c
@@ -26,7 +26,7 @@
  *
  *
  *  libata documentation is available via 'make {ps|pdf}docs',
- *  as Documentation/DocBook/libata.*
+ *  as Documentation/driver-api/libata.rst
  *
  *  Vitesse hardware documentation presumably available under NDA.
  *  Intel 31244 (same hardware interface) documentation presumably
diff --git a/include/linux/ata.h b/include/linux/ata.h
index ad7d9ee89ff0..73fe18edfdaf 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -20,7 +20,7 @@
  *
  *
  *  libata documentation is available via 'make {ps|pdf}docs',
- *  as Documentation/DocBook/libata.*
+ *  as Documentation/driver-api/libata.rst
  *
  *  Hardware documentation available from http://www.t13.org/
  *
diff --git a/include/linux/libata.h b/include/linux/libata.h
index c9a69fc8821e..9e6633235ad7 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -19,7 +19,7 @@
  *
  *
  *  libata documentation is available via 'make {ps|pdf}docs',
- *  as Documentation/DocBook/libata.*
+ *  as Documentation/driver-api/libata.rst
  *
  */
 
-- 
cgit v1.2.3


From 138bc7969c24c6cbba28e919c2376ad10a46fc60 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Fri, 7 Apr 2017 19:22:06 -0700
Subject: iio: hid-sensor-hub: Implement batch mode

HID sensor hubs using Integrated Senor Hub (ISH) has added capability to
support batch mode. This allows host processor to go to sleep for extended
duration, while the sensor hub is storing samples in its internal buffers.

'Commit f4f4673b7535 ("iio: add support for hardware fifo")' implements
feature in IIO core to implement such feature. This feature is used in
bmc150-accel-core.c to implement batch mode. This implementation allows
software device buffer watermark to be used as a hint to adjust hardware
FIFO.

But HID sensor hubs don't allow to change internal buffer size of FIFOs.
Instead an additional usage id to set "maximum report latency" is defined.
This allows host to go to sleep upto this latency period without getting
any report. Since there is no ABI to set this latency, a new attribute
"hwfifo_timeout" is added so that user mode can specify a latency.

This change checks presence of usage id to get/set maximum report latency
and if present, it will expose hwfifo_timeout.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 Documentation/ABI/testing/sysfs-bus-iio            | 11 +++
 .../iio/common/hid-sensors/hid-sensor-attributes.c | 44 ++++++++++++
 .../iio/common/hid-sensors/hid-sensor-trigger.c    | 80 ++++++++++++++++++++++
 include/linux/hid-sensor-hub.h                     |  5 ++
 include/linux/hid-sensor-ids.h                     |  3 +
 5 files changed, 143 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-bus-iio b/Documentation/ABI/testing/sysfs-bus-iio
index 8c24d0892f61..2db2cdf42d54 100644
--- a/Documentation/ABI/testing/sysfs-bus-iio
+++ b/Documentation/ABI/testing/sysfs-bus-iio
@@ -1425,6 +1425,17 @@ Description:
 		guarantees that the hardware fifo is flushed to the device
 		buffer.
 
+What:		/sys/bus/iio/devices/iio:device*/buffer/hwfifo_timeout
+KernelVersion:	4.12
+Contact:	linux-iio@vger.kernel.org
+Description:
+		A read/write property to provide capability to delay reporting of
+		samples till a timeout is reached. This allows host processors to
+		sleep, while the sensor is storing samples in its internal fifo.
+		The maximum timeout in seconds can be specified by setting
+		hwfifo_timeout.The current delay can be read by reading
+		hwfifo_timeout. A value of 0 means that there is no timeout.
+
 What:		/sys/bus/iio/devices/iio:deviceX/buffer/hwfifo_watermark
 KernelVersion: 4.2
 Contact:	linux-iio@vger.kernel.org
diff --git a/drivers/iio/common/hid-sensors/hid-sensor-attributes.c b/drivers/iio/common/hid-sensors/hid-sensor-attributes.c
index 0e05f6d1e761..f5d4d786e193 100644
--- a/drivers/iio/common/hid-sensors/hid-sensor-attributes.c
+++ b/drivers/iio/common/hid-sensors/hid-sensor-attributes.c
@@ -410,6 +410,48 @@ int hid_sensor_get_reporting_interval(struct hid_sensor_hub_device *hsdev,
 
 }
 
+static void hid_sensor_get_report_latency_info(struct hid_sensor_hub_device *hsdev,
+					       u32 usage_id,
+					       struct hid_sensor_common *st)
+{
+	sensor_hub_input_get_attribute_info(hsdev, HID_FEATURE_REPORT,
+					    usage_id,
+					    HID_USAGE_SENSOR_PROP_REPORT_LATENCY,
+					    &st->report_latency);
+
+	hid_dbg(hsdev->hdev, "Report latency attributes: %x:%x\n",
+		st->report_latency.index, st->report_latency.report_id);
+}
+
+int hid_sensor_get_report_latency(struct hid_sensor_common *st)
+{
+	int ret;
+	int value;
+
+	ret = sensor_hub_get_feature(st->hsdev, st->report_latency.report_id,
+				     st->report_latency.index, sizeof(value),
+				     &value);
+	if (ret < 0)
+		return ret;
+
+	return value;
+}
+EXPORT_SYMBOL(hid_sensor_get_report_latency);
+
+int hid_sensor_set_report_latency(struct hid_sensor_common *st, int latency_ms)
+{
+	return sensor_hub_set_feature(st->hsdev, st->report_latency.report_id,
+				      st->report_latency.index,
+				      sizeof(latency_ms), &latency_ms);
+}
+EXPORT_SYMBOL(hid_sensor_set_report_latency);
+
+bool hid_sensor_batch_mode_supported(struct hid_sensor_common *st)
+{
+	return st->report_latency.index > 0 && st->report_latency.report_id > 0;
+}
+EXPORT_SYMBOL(hid_sensor_batch_mode_supported);
+
 int hid_sensor_parse_common_attributes(struct hid_sensor_hub_device *hsdev,
 					u32 usage_id,
 					struct hid_sensor_common *st)
@@ -451,6 +493,8 @@ int hid_sensor_parse_common_attributes(struct hid_sensor_hub_device *hsdev,
 	} else
 		st->timestamp_ns_scale = 1000000000;
 
+	hid_sensor_get_report_latency_info(hsdev, usage_id, st);
+
 	hid_dbg(hsdev->hdev, "common attributes: %x:%x, %x:%x, %x:%x %x:%x %x:%x\n",
 		st->poll.index, st->poll.report_id,
 		st->report_state.index, st->report_state.report_id,
diff --git a/drivers/iio/common/hid-sensors/hid-sensor-trigger.c b/drivers/iio/common/hid-sensors/hid-sensor-trigger.c
index 0b5dea050239..16ade0a0327b 100644
--- a/drivers/iio/common/hid-sensors/hid-sensor-trigger.c
+++ b/drivers/iio/common/hid-sensors/hid-sensor-trigger.c
@@ -26,9 +26,84 @@
 #include <linux/hid-sensor-hub.h>
 #include <linux/iio/iio.h>
 #include <linux/iio/trigger.h>
+#include <linux/iio/buffer.h>
 #include <linux/iio/sysfs.h>
 #include "hid-sensor-trigger.h"
 
+static ssize_t _hid_sensor_set_report_latency(struct device *dev,
+					      struct device_attribute *attr,
+					      const char *buf, size_t len)
+{
+	struct iio_dev *indio_dev = dev_to_iio_dev(dev);
+	struct hid_sensor_common *attrb = iio_device_get_drvdata(indio_dev);
+	int integer, fract, ret;
+	int latency;
+
+	ret = iio_str_to_fixpoint(buf, 100000, &integer, &fract);
+	if (ret)
+		return ret;
+
+	latency = integer * 1000 + fract / 1000;
+	ret = hid_sensor_set_report_latency(attrb, latency);
+	if (ret < 0)
+		return len;
+
+	attrb->latency_ms = hid_sensor_get_report_latency(attrb);
+
+	return len;
+}
+
+static ssize_t _hid_sensor_get_report_latency(struct device *dev,
+					      struct device_attribute *attr,
+					      char *buf)
+{
+	struct iio_dev *indio_dev = dev_to_iio_dev(dev);
+	struct hid_sensor_common *attrb = iio_device_get_drvdata(indio_dev);
+	int latency;
+
+	latency = hid_sensor_get_report_latency(attrb);
+	if (latency < 0)
+		return latency;
+
+	return sprintf(buf, "%d.%06u\n", latency / 1000, (latency % 1000) * 1000);
+}
+
+static ssize_t _hid_sensor_get_fifo_state(struct device *dev,
+					  struct device_attribute *attr,
+					  char *buf)
+{
+	struct iio_dev *indio_dev = dev_to_iio_dev(dev);
+	struct hid_sensor_common *attrb = iio_device_get_drvdata(indio_dev);
+	int latency;
+
+	latency = hid_sensor_get_report_latency(attrb);
+	if (latency < 0)
+		return latency;
+
+	return sprintf(buf, "%d\n", !!latency);
+}
+
+static IIO_DEVICE_ATTR(hwfifo_timeout, 0644,
+		       _hid_sensor_get_report_latency,
+		       _hid_sensor_set_report_latency, 0);
+static IIO_DEVICE_ATTR(hwfifo_enabled, 0444,
+		       _hid_sensor_get_fifo_state, NULL, 0);
+
+static const struct attribute *hid_sensor_fifo_attributes[] = {
+	&iio_dev_attr_hwfifo_timeout.dev_attr.attr,
+	&iio_dev_attr_hwfifo_enabled.dev_attr.attr,
+	NULL,
+};
+
+static void hid_sensor_setup_batch_mode(struct iio_dev *indio_dev,
+					struct hid_sensor_common *st)
+{
+	if (!hid_sensor_batch_mode_supported(st))
+		return;
+
+	iio_buffer_set_attrs(indio_dev->buffer, hid_sensor_fifo_attributes);
+}
+
 static int _hid_sensor_power_state(struct hid_sensor_common *st, bool state)
 {
 	int state_val;
@@ -141,6 +216,9 @@ static void hid_sensor_set_power_work(struct work_struct *work)
 				       sizeof(attrb->raw_hystersis),
 				       &attrb->raw_hystersis);
 
+	if (attrb->latency_ms > 0)
+		hid_sensor_set_report_latency(attrb, attrb->latency_ms);
+
 	_hid_sensor_power_state(attrb, true);
 }
 
@@ -192,6 +270,8 @@ int hid_sensor_setup_trigger(struct iio_dev *indio_dev, const char *name,
 	attrb->trigger = trig;
 	indio_dev->trig = iio_trigger_get(trig);
 
+	hid_sensor_setup_batch_mode(indio_dev, attrb);
+
 	ret = pm_runtime_set_active(&indio_dev->dev);
 	if (ret)
 		goto error_unreg_trigger;
diff --git a/include/linux/hid-sensor-hub.h b/include/linux/hid-sensor-hub.h
index f32d7c392c1e..fc7aae64dcde 100644
--- a/include/linux/hid-sensor-hub.h
+++ b/include/linux/hid-sensor-hub.h
@@ -233,12 +233,14 @@ struct hid_sensor_common {
 	atomic_t user_requested_state;
 	int poll_interval;
 	int raw_hystersis;
+	int latency_ms;
 	struct iio_trigger *trigger;
 	int timestamp_ns_scale;
 	struct hid_sensor_hub_attribute_info poll;
 	struct hid_sensor_hub_attribute_info report_state;
 	struct hid_sensor_hub_attribute_info power_state;
 	struct hid_sensor_hub_attribute_info sensitivity;
+	struct hid_sensor_hub_attribute_info report_latency;
 	struct work_struct work;
 };
 
@@ -276,5 +278,8 @@ s32 hid_sensor_read_poll_value(struct hid_sensor_common *st);
 
 int64_t hid_sensor_convert_timestamp(struct hid_sensor_common *st,
 				     int64_t raw_value);
+bool hid_sensor_batch_mode_supported(struct hid_sensor_common *st);
+int hid_sensor_set_report_latency(struct hid_sensor_common *st, int latency);
+int hid_sensor_get_report_latency(struct hid_sensor_common *st);
 
 #endif
diff --git a/include/linux/hid-sensor-ids.h b/include/linux/hid-sensor-ids.h
index 5af62c7e49f3..76033e0420a7 100644
--- a/include/linux/hid-sensor-ids.h
+++ b/include/linux/hid-sensor-ids.h
@@ -152,6 +152,9 @@
 #define HID_USAGE_SENSOR_PROP_REPORT_STATE			0x200316
 #define HID_USAGE_SENSOR_PROY_POWER_STATE			0x200319
 
+/* Batch mode selectors */
+#define HID_USAGE_SENSOR_PROP_REPORT_LATENCY			0x20031B
+
 /* Per data field properties */
 #define HID_USAGE_SENSOR_DATA_MOD_NONE					0x00
 #define HID_USAGE_SENSOR_DATA_MOD_CHANGE_SENSITIVITY_ABS		0x1000
-- 
cgit v1.2.3


From 65101aeca52241a05e66f23c96eb896c9412718d Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Tue, 16 May 2017 11:20:13 +0200
Subject: net/sock: factor out dequeue/peek with offset code

And update __sk_queue_drop_skb() to work on the specified queue.
This will help the udp protocol to use an additional private
rx queue in a later patch.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  7 ++++
 include/net/sock.h     |  4 +--
 net/core/datagram.c    | 90 ++++++++++++++++++++++++++++----------------------
 3 files changed, 60 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index a098d95b3d84..bfc7892f6c33 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3056,6 +3056,13 @@ static inline void skb_frag_list_init(struct sk_buff *skb)
 
 int __skb_wait_for_more_packets(struct sock *sk, int *err, long *timeo_p,
 				const struct sk_buff *skb);
+struct sk_buff *__skb_try_recv_from_queue(struct sock *sk,
+					  struct sk_buff_head *queue,
+					  unsigned int flags,
+					  void (*destructor)(struct sock *sk,
+							   struct sk_buff *skb),
+					  int *peeked, int *off, int *err,
+					  struct sk_buff **last);
 struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned flags,
 					void (*destructor)(struct sock *sk,
 							   struct sk_buff *skb),
diff --git a/include/net/sock.h b/include/net/sock.h
index f33e3d134e0b..42264035dec0 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2035,8 +2035,8 @@ void sk_reset_timer(struct sock *sk, struct timer_list *timer,
 
 void sk_stop_timer(struct sock *sk, struct timer_list *timer);
 
-int __sk_queue_drop_skb(struct sock *sk, struct sk_buff *skb,
-			unsigned int flags,
+int __sk_queue_drop_skb(struct sock *sk, struct sk_buff_head *sk_queue,
+			struct sk_buff *skb, unsigned int flags,
 			void (*destructor)(struct sock *sk,
 					   struct sk_buff *skb));
 int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
diff --git a/net/core/datagram.c b/net/core/datagram.c
index db1866f2ffcf..a4592b43b40d 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -161,6 +161,43 @@ done:
 	return skb;
 }
 
+struct sk_buff *__skb_try_recv_from_queue(struct sock *sk,
+					  struct sk_buff_head *queue,
+					  unsigned int flags,
+					  void (*destructor)(struct sock *sk,
+							   struct sk_buff *skb),
+					  int *peeked, int *off, int *err,
+					  struct sk_buff **last)
+{
+	struct sk_buff *skb;
+
+	*last = queue->prev;
+	skb_queue_walk(queue, skb) {
+		if (flags & MSG_PEEK) {
+			if (*off >= skb->len && (skb->len || *off ||
+						 skb->peeked)) {
+				*off -= skb->len;
+				continue;
+			}
+			if (!skb->len) {
+				skb = skb_set_peeked(skb);
+				if (unlikely(IS_ERR(skb))) {
+					*err = PTR_ERR(skb);
+					return skb;
+				}
+			}
+			*peeked = 1;
+			atomic_inc(&skb->users);
+		} else {
+			__skb_unlink(skb, queue);
+			if (destructor)
+				destructor(sk, skb);
+		}
+		return skb;
+	}
+	return NULL;
+}
+
 /**
  *	__skb_try_recv_datagram - Receive a datagram skbuff
  *	@sk: socket
@@ -216,46 +253,20 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags,
 
 	*peeked = 0;
 	do {
+		int _off = *off;
+
 		/* Again only user level code calls this function, so nothing
 		 * interrupt level will suddenly eat the receive_queue.
 		 *
 		 * Look at current nfs client by the way...
 		 * However, this function was correct in any case. 8)
 		 */
-		int _off = *off;
-
-		*last = (struct sk_buff *)queue;
 		spin_lock_irqsave(&queue->lock, cpu_flags);
-		skb_queue_walk(queue, skb) {
-			*last = skb;
-			if (flags & MSG_PEEK) {
-				if (_off >= skb->len && (skb->len || _off ||
-							 skb->peeked)) {
-					_off -= skb->len;
-					continue;
-				}
-				if (!skb->len) {
-					skb = skb_set_peeked(skb);
-					if (IS_ERR(skb)) {
-						error = PTR_ERR(skb);
-						spin_unlock_irqrestore(&queue->lock,
-								       cpu_flags);
-						goto no_packet;
-					}
-				}
-				*peeked = 1;
-				atomic_inc(&skb->users);
-			} else {
-				__skb_unlink(skb, queue);
-				if (destructor)
-					destructor(sk, skb);
-			}
-			spin_unlock_irqrestore(&queue->lock, cpu_flags);
-			*off = _off;
-			return skb;
-		}
-
+		skb = __skb_try_recv_from_queue(sk, queue, flags, destructor,
+						peeked, &_off, err, last);
 		spin_unlock_irqrestore(&queue->lock, cpu_flags);
+		if (skb)
+			return skb;
 
 		if (!sk_can_busy_loop(sk))
 			break;
@@ -335,8 +346,8 @@ void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len)
 }
 EXPORT_SYMBOL(__skb_free_datagram_locked);
 
-int __sk_queue_drop_skb(struct sock *sk, struct sk_buff *skb,
-			unsigned int flags,
+int __sk_queue_drop_skb(struct sock *sk, struct sk_buff_head *sk_queue,
+			struct sk_buff *skb, unsigned int flags,
 			void (*destructor)(struct sock *sk,
 					   struct sk_buff *skb))
 {
@@ -344,15 +355,15 @@ int __sk_queue_drop_skb(struct sock *sk, struct sk_buff *skb,
 
 	if (flags & MSG_PEEK) {
 		err = -ENOENT;
-		spin_lock_bh(&sk->sk_receive_queue.lock);
-		if (skb == skb_peek(&sk->sk_receive_queue)) {
-			__skb_unlink(skb, &sk->sk_receive_queue);
+		spin_lock_bh(&sk_queue->lock);
+		if (skb == skb_peek(sk_queue)) {
+			__skb_unlink(skb, sk_queue);
 			atomic_dec(&skb->users);
 			if (destructor)
 				destructor(sk, skb);
 			err = 0;
 		}
-		spin_unlock_bh(&sk->sk_receive_queue.lock);
+		spin_unlock_bh(&sk_queue->lock);
 	}
 
 	atomic_inc(&sk->sk_drops);
@@ -383,7 +394,8 @@ EXPORT_SYMBOL(__sk_queue_drop_skb);
 
 int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
 {
-	int err = __sk_queue_drop_skb(sk, skb, flags, NULL);
+	int err = __sk_queue_drop_skb(sk, &sk->sk_receive_queue, skb, flags,
+				      NULL);
 
 	kfree_skb(skb);
 	sk_mem_reclaim_partial(sk);
-- 
cgit v1.2.3


From 2276f58ac5890e58d2b6a48b95493faff7347e3a Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Tue, 16 May 2017 11:20:14 +0200
Subject: udp: use a separate rx queue for packet reception

under udp flood the sk_receive_queue spinlock is heavily contended.
This patch try to reduce the contention on such lock adding a
second receive queue to the udp sockets; recvmsg() looks first
in such queue and, only if empty, tries to fetch the data from
sk_receive_queue. The latter is spliced into the newly added
queue every time the receive path has to acquire the
sk_receive_queue lock.

The accounting of forward allocated memory is still protected with
the sk_receive_queue lock, so udp_rmem_release() needs to acquire
both locks when the forward deficit is flushed.

On specific scenarios we can end up acquiring and releasing the
sk_receive_queue lock multiple times; that will be covered by
the next patch

Suggested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/udp.h   |   3 ++
 include/net/udp.h     |   9 +---
 include/net/udplite.h |   2 +-
 net/ipv4/udp.c        | 138 ++++++++++++++++++++++++++++++++++++++++++++------
 net/ipv6/udp.c        |   3 +-
 5 files changed, 131 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/udp.h b/include/linux/udp.h
index 6cb4061a720d..eaea63bc79bb 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -80,6 +80,9 @@ struct udp_sock {
 						struct sk_buff *skb,
 						int nhoff);
 
+	/* udp_recvmsg try to use this before splicing sk_receive_queue */
+	struct sk_buff_head	reader_queue ____cacheline_aligned_in_smp;
+
 	/* This field is dirtied by udp_recvmsg() */
 	int		forward_deficit;
 };
diff --git a/include/net/udp.h b/include/net/udp.h
index 3391dbd73959..1468dbd0f09a 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -249,13 +249,8 @@ void udp_destruct_sock(struct sock *sk);
 void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len);
 int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb);
 void udp_skb_destructor(struct sock *sk, struct sk_buff *skb);
-static inline struct sk_buff *
-__skb_recv_udp(struct sock *sk, unsigned int flags, int noblock, int *peeked,
-	       int *off, int *err)
-{
-	return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
-				   udp_skb_destructor, peeked, off, err);
-}
+struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags,
+			       int noblock, int *peeked, int *off, int *err);
 static inline struct sk_buff *skb_recv_udp(struct sock *sk, unsigned int flags,
 					   int noblock, int *err)
 {
diff --git a/include/net/udplite.h b/include/net/udplite.h
index ea340524f99b..b7a18f63d86d 100644
--- a/include/net/udplite.h
+++ b/include/net/udplite.h
@@ -26,8 +26,8 @@ static __inline__ int udplite_getfrag(void *from, char *to, int  offset,
 /* Designate sk as UDP-Lite socket */
 static inline int udplite_sk_init(struct sock *sk)
 {
+	udp_init_sock(sk);
 	udp_sk(sk)->pcflag = UDPLITE_BIT;
-	sk->sk_destruct = udp_destruct_sock;
 	return 0;
 }
 
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index ea6e4cff9faf..492c76be9230 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1167,19 +1167,24 @@ out:
 static void udp_rmem_release(struct sock *sk, int size, int partial)
 {
 	struct udp_sock *up = udp_sk(sk);
+	struct sk_buff_head *sk_queue;
 	int amt;
 
 	if (likely(partial)) {
 		up->forward_deficit += size;
 		size = up->forward_deficit;
 		if (size < (sk->sk_rcvbuf >> 2) &&
-		    !skb_queue_empty(&sk->sk_receive_queue))
+		    !skb_queue_empty(&up->reader_queue))
 			return;
 	} else {
 		size += up->forward_deficit;
 	}
 	up->forward_deficit = 0;
 
+	/* acquire the sk_receive_queue for fwd allocated memory scheduling */
+	sk_queue = &sk->sk_receive_queue;
+	spin_lock(&sk_queue->lock);
+
 	sk->sk_forward_alloc += size;
 	amt = (sk->sk_forward_alloc - partial) & ~(SK_MEM_QUANTUM - 1);
 	sk->sk_forward_alloc -= amt;
@@ -1188,9 +1193,14 @@ static void udp_rmem_release(struct sock *sk, int size, int partial)
 		__sk_mem_reduce_allocated(sk, amt >> SK_MEM_QUANTUM_SHIFT);
 
 	atomic_sub(size, &sk->sk_rmem_alloc);
+
+	/* this can save us from acquiring the rx queue lock on next receive */
+	skb_queue_splice_tail_init(sk_queue, &up->reader_queue);
+
+	spin_unlock(&sk_queue->lock);
 }
 
-/* Note: called with sk_receive_queue.lock held.
+/* Note: called with reader_queue.lock held.
  * Instead of using skb->truesize here, find a copy of it in skb->dev_scratch
  * This avoids a cache line miss while receive_queue lock is held.
  * Look at __udp_enqueue_schedule_skb() to find where this copy is done.
@@ -1306,10 +1316,12 @@ EXPORT_SYMBOL_GPL(__udp_enqueue_schedule_skb);
 void udp_destruct_sock(struct sock *sk)
 {
 	/* reclaim completely the forward allocated memory */
+	struct udp_sock *up = udp_sk(sk);
 	unsigned int total = 0;
 	struct sk_buff *skb;
 
-	while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
+	skb_queue_splice_tail_init(&sk->sk_receive_queue, &up->reader_queue);
+	while ((skb = __skb_dequeue(&up->reader_queue)) != NULL) {
 		total += skb->truesize;
 		kfree_skb(skb);
 	}
@@ -1321,6 +1333,7 @@ EXPORT_SYMBOL_GPL(udp_destruct_sock);
 
 int udp_init_sock(struct sock *sk)
 {
+	skb_queue_head_init(&udp_sk(sk)->reader_queue);
 	sk->sk_destruct = udp_destruct_sock;
 	return 0;
 }
@@ -1338,6 +1351,26 @@ void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len)
 }
 EXPORT_SYMBOL_GPL(skb_consume_udp);
 
+static struct sk_buff *__first_packet_length(struct sock *sk,
+					     struct sk_buff_head *rcvq,
+					     int *total)
+{
+	struct sk_buff *skb;
+
+	while ((skb = skb_peek(rcvq)) != NULL &&
+	       udp_lib_checksum_complete(skb)) {
+		__UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS,
+				IS_UDPLITE(sk));
+		__UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS,
+				IS_UDPLITE(sk));
+		atomic_inc(&sk->sk_drops);
+		__skb_unlink(skb, rcvq);
+		*total += skb->truesize;
+		kfree_skb(skb);
+	}
+	return skb;
+}
+
 /**
  *	first_packet_length	- return length of first packet in receive queue
  *	@sk: socket
@@ -1347,22 +1380,20 @@ EXPORT_SYMBOL_GPL(skb_consume_udp);
  */
 static int first_packet_length(struct sock *sk)
 {
-	struct sk_buff_head *rcvq = &sk->sk_receive_queue;
+	struct sk_buff_head *rcvq = &udp_sk(sk)->reader_queue;
+	struct sk_buff_head *sk_queue = &sk->sk_receive_queue;
 	struct sk_buff *skb;
 	int total = 0;
 	int res;
 
 	spin_lock_bh(&rcvq->lock);
-	while ((skb = skb_peek(rcvq)) != NULL &&
-		udp_lib_checksum_complete(skb)) {
-		__UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS,
-				IS_UDPLITE(sk));
-		__UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS,
-				IS_UDPLITE(sk));
-		atomic_inc(&sk->sk_drops);
-		__skb_unlink(skb, rcvq);
-		total += skb->truesize;
-		kfree_skb(skb);
+	skb = __first_packet_length(sk, rcvq, &total);
+	if (!skb && !skb_queue_empty(sk_queue)) {
+		spin_lock(&sk_queue->lock);
+		skb_queue_splice_tail_init(sk_queue, rcvq);
+		spin_unlock(&sk_queue->lock);
+
+		skb = __first_packet_length(sk, rcvq, &total);
 	}
 	res = skb ? skb->len : -1;
 	if (total)
@@ -1400,6 +1431,79 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 }
 EXPORT_SYMBOL(udp_ioctl);
 
+struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags,
+			       int noblock, int *peeked, int *off, int *err)
+{
+	struct sk_buff_head *sk_queue = &sk->sk_receive_queue;
+	struct sk_buff_head *queue;
+	struct sk_buff *last;
+	long timeo;
+	int error;
+
+	queue = &udp_sk(sk)->reader_queue;
+	flags |= noblock ? MSG_DONTWAIT : 0;
+	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+	do {
+		struct sk_buff *skb;
+
+		error = sock_error(sk);
+		if (error)
+			break;
+
+		error = -EAGAIN;
+		*peeked = 0;
+		do {
+			int _off = *off;
+
+			spin_lock_bh(&queue->lock);
+			skb = __skb_try_recv_from_queue(sk, queue, flags,
+							udp_skb_destructor,
+							peeked, &_off, err,
+							&last);
+			if (skb) {
+				spin_unlock_bh(&queue->lock);
+				*off = _off;
+				return skb;
+			}
+
+			if (skb_queue_empty(sk_queue)) {
+				spin_unlock_bh(&queue->lock);
+				goto busy_check;
+			}
+
+			/* refill the reader queue and walk it again */
+			_off = *off;
+			spin_lock(&sk_queue->lock);
+			skb_queue_splice_tail_init(sk_queue, queue);
+			spin_unlock(&sk_queue->lock);
+
+			skb = __skb_try_recv_from_queue(sk, queue, flags,
+							udp_skb_destructor,
+							peeked, &_off, err,
+							&last);
+			spin_unlock_bh(&queue->lock);
+			if (skb) {
+				*off = _off;
+				return skb;
+			}
+
+busy_check:
+			if (!sk_can_busy_loop(sk))
+				break;
+
+			sk_busy_loop(sk, flags & MSG_DONTWAIT);
+		} while (!skb_queue_empty(sk_queue));
+
+		/* sk_queue is empty, reader_queue may contain peeked packets */
+	} while (timeo &&
+		 !__skb_wait_for_more_packets(sk, &error, &timeo,
+					      (struct sk_buff *)sk_queue));
+
+	*err = error;
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(__skb_recv_udp);
+
 /*
  * 	This should be easy, if there is something there we
  * 	return it, otherwise we block.
@@ -1490,7 +1594,8 @@ try_again:
 	return err;
 
 csum_copy_err:
-	if (!__sk_queue_drop_skb(sk, skb, flags, udp_skb_destructor)) {
+	if (!__sk_queue_drop_skb(sk, &udp_sk(sk)->reader_queue, skb, flags,
+				 udp_skb_destructor)) {
 		UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
 		UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
 	}
@@ -2325,6 +2430,9 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
 	unsigned int mask = datagram_poll(file, sock, wait);
 	struct sock *sk = sock->sk;
 
+	if (!skb_queue_empty(&udp_sk(sk)->reader_queue))
+		mask |= POLLIN | POLLRDNORM;
+
 	sock_rps_record_flow(sk);
 
 	/* Check for false positives due to checksum errors */
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 04862abfe4ec..f78fdf8c9f0f 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -455,7 +455,8 @@ try_again:
 	return err;
 
 csum_copy_err:
-	if (!__sk_queue_drop_skb(sk, skb, flags, udp_skb_destructor)) {
+	if (!__sk_queue_drop_skb(sk, &udp_sk(sk)->reader_queue, skb, flags,
+				 udp_skb_destructor)) {
 		if (is_udp4) {
 			UDP_INC_STATS(sock_net(sk),
 				      UDP_MIB_CSUMERRORS, is_udplite);
-- 
cgit v1.2.3


From 218af599fa635b107cfe10acf3249c4dfe5e4123 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 16 May 2017 04:24:36 -0700
Subject: tcp: internal implementation for pacing

BBR congestion control depends on pacing, and pacing is
currently handled by sch_fq packet scheduler for performance reasons,
and also because implemening pacing with FQ was convenient to truly
avoid bursts.

However there are many cases where this packet scheduler constraint
is not practical.
- Many linux hosts are not focusing on handling thousands of TCP
  flows in the most efficient way.
- Some routers use fq_codel or other AQM, but still would like
  to use BBR for the few TCP flows they initiate/terminate.

This patch implements an automatic fallback to internal pacing.

Pacing is requested either by BBR or use of SO_MAX_PACING_RATE option.

If sch_fq happens to be in the egress path, pacing is delegated to
the qdisc, otherwise pacing is done by TCP itself.

One advantage of pacing from TCP stack is to get more precise rtt
estimations, and less work done from TX completion, since TCP Small
queue limits are not generally hit. Setups with single TX queue but
many cpus might even benefit from this.

Note that unlike sch_fq, we do not take into account header sizes.
Taking care of these headers would add additional complexity for
no practical differences in behavior.

Some performance numbers using 800 TCP_STREAM flows rate limited to
~48 Mbit per second on 40Gbit NIC.

If MQ+pfifo_fast is used on the NIC :

$ sar -n DEV 1 5 | grep eth
14:48:44         eth0 725743.00 2932134.00  46776.76 4335184.68      0.00      0.00      1.00
14:48:45         eth0 725349.00 2932112.00  46751.86 4335158.90      0.00      0.00      0.00
14:48:46         eth0 725101.00 2931153.00  46735.07 4333748.63      0.00      0.00      0.00
14:48:47         eth0 725099.00 2931161.00  46735.11 4333760.44      0.00      0.00      1.00
14:48:48         eth0 725160.00 2931731.00  46738.88 4334606.07      0.00      0.00      0.00
Average:         eth0 725290.40 2931658.20  46747.54 4334491.74      0.00      0.00      0.40
$ vmstat 1 5
procs -----------memory---------- ---swap-- -----io---- -system-- ------cpu-----
 r  b   swpd   free   buff  cache   si   so    bi    bo   in   cs us sy id wa st
 4  0      0 259825920  45644 2708324    0    0    21     2  247   98  0  0 100  0  0
 4  0      0 259823744  45644 2708356    0    0     0     0 2400825 159843  0 19 81  0  0
 0  0      0 259824208  45644 2708072    0    0     0     0 2407351 159929  0 19 81  0  0
 1  0      0 259824592  45644 2708128    0    0     0     0 2405183 160386  0 19 80  0  0
 1  0      0 259824272  45644 2707868    0    0     0    32 2396361 158037  0 19 81  0  0

Now use MQ+FQ :

lpaa23:~# echo fq >/proc/sys/net/core/default_qdisc
lpaa23:~# tc qdisc replace dev eth0 root mq

$ sar -n DEV 1 5 | grep eth
14:49:57         eth0 678614.00 2727930.00  43739.13 4033279.14      0.00      0.00      0.00
14:49:58         eth0 677620.00 2723971.00  43674.69 4027429.62      0.00      0.00      1.00
14:49:59         eth0 676396.00 2719050.00  43596.83 4020125.02      0.00      0.00      0.00
14:50:00         eth0 675197.00 2714173.00  43518.62 4012938.90      0.00      0.00      1.00
14:50:01         eth0 676388.00 2719063.00  43595.47 4020171.64      0.00      0.00      0.00
Average:         eth0 676843.00 2720837.40  43624.95 4022788.86      0.00      0.00      0.40
$ vmstat 1 5
procs -----------memory---------- ---swap-- -----io---- -system-- ------cpu-----
 r  b   swpd   free   buff  cache   si   so    bi    bo   in   cs us sy id wa st
 2  0      0 259832240  46008 2710912    0    0    21     2  223  192  0  1 99  0  0
 1  0      0 259832896  46008 2710744    0    0     0     0 1702206 198078  0 17 82  0  0
 0  0      0 259830272  46008 2710596    0    0     0     0 1696340 197756  1 17 83  0  0
 4  0      0 259829168  46024 2710584    0    0    16     0 1688472 197158  1 17 82  0  0
 3  0      0 259830224  46024 2710408    0    0     0     0 1692450 197212  0 18 82  0  0

As expected, number of interrupts per second is very different.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Van Jacobson <vanj@google.com>
Cc: Jerry Chu <hkchu@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h   |  2 ++
 include/net/sock.h    |  9 +++++-
 include/net/tcp.h     |  3 ++
 net/core/sock.c       |  4 +++
 net/ipv4/tcp_bbr.c    |  9 +++---
 net/ipv4/tcp_output.c | 80 +++++++++++++++++++++++++++++++++++++++++++++++++++
 net/ipv4/tcp_timer.c  |  3 ++
 net/sched/sch_fq.c    |  8 ++++++
 8 files changed, 113 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index b6d5adcee8fc..22854f028434 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -293,6 +293,8 @@ struct tcp_sock {
 	u32	sacked_out;	/* SACK'd packets			*/
 	u32	fackets_out;	/* FACK'd packets			*/
 
+	struct hrtimer	pacing_timer;
+
 	/* from STCP, retrans queue hinting */
 	struct sk_buff* lost_skb_hint;
 	struct sk_buff *retransmit_skb_hint;
diff --git a/include/net/sock.h b/include/net/sock.h
index 42264035dec0..3467d9e89e7d 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -253,6 +253,7 @@ struct sock_common {
   *	@sk_ll_usec: usecs to busypoll when there is no data
   *	@sk_allocation: allocation mode
   *	@sk_pacing_rate: Pacing rate (if supported by transport/packet scheduler)
+  *	@sk_pacing_status: Pacing status (requested, handled by sch_fq)
   *	@sk_max_pacing_rate: Maximum pacing rate (%SO_MAX_PACING_RATE)
   *	@sk_sndbuf: size of send buffer in bytes
   *	@sk_padding: unused element for alignment
@@ -396,7 +397,7 @@ struct sock {
 	__s32			sk_peek_off;
 	int			sk_write_pending;
 	__u32			sk_dst_pending_confirm;
-	/* Note: 32bit hole on 64bit arches */
+	u32			sk_pacing_status; /* see enum sk_pacing */
 	long			sk_sndtimeo;
 	struct timer_list	sk_timer;
 	__u32			sk_priority;
@@ -475,6 +476,12 @@ struct sock {
 	struct rcu_head		sk_rcu;
 };
 
+enum sk_pacing {
+	SK_PACING_NONE		= 0,
+	SK_PACING_NEEDED	= 1,
+	SK_PACING_FQ		= 2,
+};
+
 #define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data)))
 
 #define rcu_dereference_sk_user_data(sk)	rcu_dereference(__sk_user_data((sk)))
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 38a7427ae902..b4dc93dae98c 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -574,6 +574,7 @@ void tcp_fin(struct sock *sk);
 void tcp_init_xmit_timers(struct sock *);
 static inline void tcp_clear_xmit_timers(struct sock *sk)
 {
+	hrtimer_cancel(&tcp_sk(sk)->pacing_timer);
 	inet_csk_clear_xmit_timers(sk);
 }
 
@@ -1945,4 +1946,6 @@ static inline void tcp_listendrop(const struct sock *sk)
 	__NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS);
 }
 
+enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer);
+
 #endif	/* _TCP_H */
diff --git a/net/core/sock.c b/net/core/sock.c
index e43e71d7856b..93d011e35b83 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1041,6 +1041,10 @@ set_rcvbuf:
 #endif
 
 	case SO_MAX_PACING_RATE:
+		if (val != ~0U)
+			cmpxchg(&sk->sk_pacing_status,
+				SK_PACING_NONE,
+				SK_PACING_NEEDED);
 		sk->sk_max_pacing_rate = val;
 		sk->sk_pacing_rate = min(sk->sk_pacing_rate,
 					 sk->sk_max_pacing_rate);
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index b89bce4c721e..92b045c72163 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -52,10 +52,9 @@
  * There is a public e-mail list for discussing BBR development and testing:
  *   https://groups.google.com/forum/#!forum/bbr-dev
  *
- * NOTE: BBR *must* be used with the fq qdisc ("man tc-fq") with pacing enabled,
- * since pacing is integral to the BBR design and implementation.
- * BBR without pacing would not function properly, and may incur unnecessary
- * high packet loss rates.
+ * NOTE: BBR might be used with the fq qdisc ("man tc-fq") with pacing enabled,
+ * otherwise TCP stack falls back to an internal pacing using one high
+ * resolution timer per TCP socket and may use more resources.
  */
 #include <linux/module.h>
 #include <net/tcp.h>
@@ -830,6 +829,8 @@ static void bbr_init(struct sock *sk)
 	bbr->cycle_idx = 0;
 	bbr_reset_lt_bw_sampling(sk);
 	bbr_reset_startup_mode(sk);
+
+	cmpxchg(&sk->sk_pacing_status, SK_PACING_NONE, SK_PACING_NEEDED);
 }
 
 static u32 bbr_sndbuf_expand(struct sock *sk)
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 4858e190f6ac..a32172d69a03 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -904,6 +904,72 @@ out:
 	sk_free(sk);
 }
 
+/* Note: Called under hard irq.
+ * We can not call TCP stack right away.
+ */
+enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer)
+{
+	struct tcp_sock *tp = container_of(timer, struct tcp_sock, pacing_timer);
+	struct sock *sk = (struct sock *)tp;
+	unsigned long nval, oval;
+
+	for (oval = READ_ONCE(sk->sk_tsq_flags);; oval = nval) {
+		struct tsq_tasklet *tsq;
+		bool empty;
+
+		if (oval & TSQF_QUEUED)
+			break;
+
+		nval = (oval & ~TSQF_THROTTLED) | TSQF_QUEUED | TCPF_TSQ_DEFERRED;
+		nval = cmpxchg(&sk->sk_tsq_flags, oval, nval);
+		if (nval != oval)
+			continue;
+
+		if (!atomic_inc_not_zero(&sk->sk_wmem_alloc))
+			break;
+		/* queue this socket to tasklet queue */
+		tsq = this_cpu_ptr(&tsq_tasklet);
+		empty = list_empty(&tsq->head);
+		list_add(&tp->tsq_node, &tsq->head);
+		if (empty)
+			tasklet_schedule(&tsq->tasklet);
+		break;
+	}
+	return HRTIMER_NORESTART;
+}
+
+/* BBR congestion control needs pacing.
+ * Same remark for SO_MAX_PACING_RATE.
+ * sch_fq packet scheduler is efficiently handling pacing,
+ * but is not always installed/used.
+ * Return true if TCP stack should pace packets itself.
+ */
+static bool tcp_needs_internal_pacing(const struct sock *sk)
+{
+	return smp_load_acquire(&sk->sk_pacing_status) == SK_PACING_NEEDED;
+}
+
+static void tcp_internal_pacing(struct sock *sk, const struct sk_buff *skb)
+{
+	u64 len_ns;
+	u32 rate;
+
+	if (!tcp_needs_internal_pacing(sk))
+		return;
+	rate = sk->sk_pacing_rate;
+	if (!rate || rate == ~0U)
+		return;
+
+	/* Should account for header sizes as sch_fq does,
+	 * but lets make things simple.
+	 */
+	len_ns = (u64)skb->len * NSEC_PER_SEC;
+	do_div(len_ns, rate);
+	hrtimer_start(&tcp_sk(sk)->pacing_timer,
+		      ktime_add_ns(ktime_get(), len_ns),
+		      HRTIMER_MODE_ABS_PINNED);
+}
+
 /* This routine actually transmits TCP packets queued in by
  * tcp_do_sendmsg().  This is used by both the initial
  * transmission and possible later retransmissions.
@@ -1034,6 +1100,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	if (skb->len != tcp_header_size) {
 		tcp_event_data_sent(tp, sk);
 		tp->data_segs_out += tcp_skb_pcount(skb);
+		tcp_internal_pacing(sk, skb);
 	}
 
 	if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
@@ -2086,6 +2153,12 @@ static int tcp_mtu_probe(struct sock *sk)
 	return -1;
 }
 
+static bool tcp_pacing_check(const struct sock *sk)
+{
+	return tcp_needs_internal_pacing(sk) &&
+	       hrtimer_active(&tcp_sk(sk)->pacing_timer);
+}
+
 /* TCP Small Queues :
  * Control number of packets in qdisc/devices to two packets / or ~1 ms.
  * (These limits are doubled for retransmits)
@@ -2210,6 +2283,9 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 	while ((skb = tcp_send_head(sk))) {
 		unsigned int limit;
 
+		if (tcp_pacing_check(sk))
+			break;
+
 		tso_segs = tcp_init_tso_segs(skb, mss_now);
 		BUG_ON(!tso_segs);
 
@@ -2878,6 +2954,10 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 
 		if (skb == tcp_send_head(sk))
 			break;
+
+		if (tcp_pacing_check(sk))
+			break;
+
 		/* we could do better than to assign each time */
 		if (!hole)
 			tp->retransmit_skb_hint = skb;
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 14672543cf0b..86934bcf685a 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -710,4 +710,7 @@ void tcp_init_xmit_timers(struct sock *sk)
 {
 	inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer,
 				  &tcp_keepalive_timer);
+	hrtimer_init(&tcp_sk(sk)->pacing_timer, CLOCK_MONOTONIC,
+		     HRTIMER_MODE_ABS_PINNED);
+	tcp_sk(sk)->pacing_timer.function = tcp_pace_kick;
 }
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index b488721a0059..147fde73a0f5 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -390,9 +390,17 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 		q->stat_tcp_retrans++;
 	qdisc_qstats_backlog_inc(sch, skb);
 	if (fq_flow_is_detached(f)) {
+		struct sock *sk = skb->sk;
+
 		fq_flow_add_tail(&q->new_flows, f);
 		if (time_after(jiffies, f->age + q->flow_refill_delay))
 			f->credit = max_t(u32, f->credit, q->quantum);
+		if (sk && q->rate_enable) {
+			if (unlikely(smp_load_acquire(&sk->sk_pacing_status) !=
+				     SK_PACING_FQ))
+				smp_store_release(&sk->sk_pacing_status,
+						  SK_PACING_FQ);
+		}
 		q->inactive_flows--;
 	}
 
-- 
cgit v1.2.3


From 1b86f702f80de32d555519e2c4c61385faeab710 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Tue, 16 May 2017 18:29:11 +0200
Subject: net: phy: Remove residual magic from PHY drivers

commit fa8cddaf903c ("net phylib: Remove unnecessary condition check in phy")
removed the only place where the PHY flag PHY_HAS_MAGICANEG was
checked. But it left the flag being set in the drivers. Remove the flag.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/broadcom.c  | 30 +++++++++++++++---------------
 drivers/net/phy/micrel.c    | 27 +++++++++++++--------------
 drivers/net/phy/microchip.c |  2 +-
 drivers/net/phy/smsc.c      | 12 ++++++------
 include/linux/phy.h         |  3 +--
 5 files changed, 36 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c
index a32dc5d11e89..1e9ad30a35c8 100644
--- a/drivers/net/phy/broadcom.c
+++ b/drivers/net/phy/broadcom.c
@@ -540,7 +540,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.phy_id_mask	= 0xfffffff0,
 	.name		= "Broadcom BCM5411",
 	.features	= PHY_GBIT_FEATURES,
-	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
+	.flags		= PHY_HAS_INTERRUPT,
 	.config_init	= bcm54xx_config_init,
 	.config_aneg	= genphy_config_aneg,
 	.read_status	= genphy_read_status,
@@ -551,7 +551,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.phy_id_mask	= 0xfffffff0,
 	.name		= "Broadcom BCM5421",
 	.features	= PHY_GBIT_FEATURES,
-	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
+	.flags		= PHY_HAS_INTERRUPT,
 	.config_init	= bcm54xx_config_init,
 	.config_aneg	= genphy_config_aneg,
 	.read_status	= genphy_read_status,
@@ -562,7 +562,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.phy_id_mask	= 0xfffffff0,
 	.name		= "Broadcom BCM54210E",
 	.features	= PHY_GBIT_FEATURES,
-	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
+	.flags		= PHY_HAS_INTERRUPT,
 	.config_init	= bcm54xx_config_init,
 	.config_aneg	= genphy_config_aneg,
 	.read_status	= genphy_read_status,
@@ -573,7 +573,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.phy_id_mask	= 0xfffffff0,
 	.name		= "Broadcom BCM5461",
 	.features	= PHY_GBIT_FEATURES,
-	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
+	.flags		= PHY_HAS_INTERRUPT,
 	.config_init	= bcm54xx_config_init,
 	.config_aneg	= genphy_config_aneg,
 	.read_status	= genphy_read_status,
@@ -584,7 +584,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.phy_id_mask	= 0xfffffff0,
 	.name		= "Broadcom BCM54612E",
 	.features	= PHY_GBIT_FEATURES,
-	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
+	.flags		= PHY_HAS_INTERRUPT,
 	.config_init	= bcm54xx_config_init,
 	.config_aneg	= genphy_config_aneg,
 	.read_status	= genphy_read_status,
@@ -595,7 +595,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.phy_id_mask	= 0xfffffff0,
 	.name		= "Broadcom BCM54616S",
 	.features	= PHY_GBIT_FEATURES,
-	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
+	.flags		= PHY_HAS_INTERRUPT,
 	.config_init	= bcm54xx_config_init,
 	.config_aneg	= genphy_config_aneg,
 	.read_status	= genphy_read_status,
@@ -606,7 +606,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.phy_id_mask	= 0xfffffff0,
 	.name		= "Broadcom BCM5464",
 	.features	= PHY_GBIT_FEATURES,
-	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
+	.flags		= PHY_HAS_INTERRUPT,
 	.config_init	= bcm54xx_config_init,
 	.config_aneg	= genphy_config_aneg,
 	.read_status	= genphy_read_status,
@@ -617,7 +617,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.phy_id_mask	= 0xfffffff0,
 	.name		= "Broadcom BCM5481",
 	.features	= PHY_GBIT_FEATURES,
-	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
+	.flags		= PHY_HAS_INTERRUPT,
 	.config_init	= bcm54xx_config_init,
 	.config_aneg	= bcm5481_config_aneg,
 	.read_status	= genphy_read_status,
@@ -628,7 +628,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.phy_id_mask    = 0xfffffff0,
 	.name           = "Broadcom BCM54810",
 	.features       = PHY_GBIT_FEATURES,
-	.flags          = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
+	.flags          = PHY_HAS_INTERRUPT,
 	.config_init    = bcm54xx_config_init,
 	.config_aneg    = bcm5481_config_aneg,
 	.read_status    = genphy_read_status,
@@ -639,7 +639,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.phy_id_mask	= 0xfffffff0,
 	.name		= "Broadcom BCM5482",
 	.features	= PHY_GBIT_FEATURES,
-	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
+	.flags		= PHY_HAS_INTERRUPT,
 	.config_init	= bcm5482_config_init,
 	.config_aneg	= genphy_config_aneg,
 	.read_status	= bcm5482_read_status,
@@ -650,7 +650,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.phy_id_mask	= 0xfffffff0,
 	.name		= "Broadcom BCM50610",
 	.features	= PHY_GBIT_FEATURES,
-	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
+	.flags		= PHY_HAS_INTERRUPT,
 	.config_init	= bcm54xx_config_init,
 	.config_aneg	= genphy_config_aneg,
 	.read_status	= genphy_read_status,
@@ -661,7 +661,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.phy_id_mask	= 0xfffffff0,
 	.name		= "Broadcom BCM50610M",
 	.features	= PHY_GBIT_FEATURES,
-	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
+	.flags		= PHY_HAS_INTERRUPT,
 	.config_init	= bcm54xx_config_init,
 	.config_aneg	= genphy_config_aneg,
 	.read_status	= genphy_read_status,
@@ -672,7 +672,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.phy_id_mask	= 0xfffffff0,
 	.name		= "Broadcom BCM57780",
 	.features	= PHY_GBIT_FEATURES,
-	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
+	.flags		= PHY_HAS_INTERRUPT,
 	.config_init	= bcm54xx_config_init,
 	.config_aneg	= genphy_config_aneg,
 	.read_status	= genphy_read_status,
@@ -683,7 +683,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.phy_id_mask	= 0xfffffff0,
 	.name		= "Broadcom BCMAC131",
 	.features	= PHY_BASIC_FEATURES,
-	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
+	.flags		= PHY_HAS_INTERRUPT,
 	.config_init	= brcm_fet_config_init,
 	.config_aneg	= genphy_config_aneg,
 	.read_status	= genphy_read_status,
@@ -694,7 +694,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.phy_id_mask	= 0xfffffff0,
 	.name		= "Broadcom BCM5241",
 	.features	= PHY_BASIC_FEATURES,
-	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
+	.flags		= PHY_HAS_INTERRUPT,
 	.config_init	= brcm_fet_config_init,
 	.config_aneg	= genphy_config_aneg,
 	.read_status	= genphy_read_status,
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 6a5fd18f062c..4cfd54182da2 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -779,7 +779,7 @@ static struct phy_driver ksphy_driver[] = {
 	.phy_id_mask	= MICREL_PHY_ID_MASK,
 	.name		= "Micrel KS8737",
 	.features	= PHY_BASIC_FEATURES,
-	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
+	.flags		= PHY_HAS_INTERRUPT,
 	.driver_data	= &ks8737_type,
 	.config_init	= kszphy_config_init,
 	.config_aneg	= genphy_config_aneg,
@@ -793,7 +793,7 @@ static struct phy_driver ksphy_driver[] = {
 	.phy_id_mask	= 0x00ffffff,
 	.name		= "Micrel KSZ8021 or KSZ8031",
 	.features	= PHY_BASIC_FEATURES,
-	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
+	.flags		= PHY_HAS_INTERRUPT,
 	.driver_data	= &ksz8021_type,
 	.probe		= kszphy_probe,
 	.config_init	= kszphy_config_init,
@@ -811,7 +811,7 @@ static struct phy_driver ksphy_driver[] = {
 	.phy_id_mask	= 0x00ffffff,
 	.name		= "Micrel KSZ8031",
 	.features	= PHY_BASIC_FEATURES,
-	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
+	.flags		= PHY_HAS_INTERRUPT,
 	.driver_data	= &ksz8021_type,
 	.probe		= kszphy_probe,
 	.config_init	= kszphy_config_init,
@@ -829,7 +829,7 @@ static struct phy_driver ksphy_driver[] = {
 	.phy_id_mask	= MICREL_PHY_ID_MASK,
 	.name		= "Micrel KSZ8041",
 	.features	= PHY_BASIC_FEATURES,
-	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
+	.flags		= PHY_HAS_INTERRUPT,
 	.driver_data	= &ksz8041_type,
 	.probe		= kszphy_probe,
 	.config_init	= ksz8041_config_init,
@@ -847,7 +847,7 @@ static struct phy_driver ksphy_driver[] = {
 	.phy_id_mask	= MICREL_PHY_ID_MASK,
 	.name		= "Micrel KSZ8041RNLI",
 	.features	= PHY_BASIC_FEATURES,
-	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
+	.flags		= PHY_HAS_INTERRUPT,
 	.driver_data	= &ksz8041_type,
 	.probe		= kszphy_probe,
 	.config_init	= kszphy_config_init,
@@ -865,7 +865,7 @@ static struct phy_driver ksphy_driver[] = {
 	.phy_id_mask	= MICREL_PHY_ID_MASK,
 	.name		= "Micrel KSZ8051",
 	.features	= PHY_BASIC_FEATURES,
-	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
+	.flags		= PHY_HAS_INTERRUPT,
 	.driver_data	= &ksz8051_type,
 	.probe		= kszphy_probe,
 	.config_init	= kszphy_config_init,
@@ -883,7 +883,7 @@ static struct phy_driver ksphy_driver[] = {
 	.name		= "Micrel KSZ8001 or KS8721",
 	.phy_id_mask	= 0x00fffffc,
 	.features	= PHY_BASIC_FEATURES,
-	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
+	.flags		= PHY_HAS_INTERRUPT,
 	.driver_data	= &ksz8041_type,
 	.probe		= kszphy_probe,
 	.config_init	= kszphy_config_init,
@@ -901,7 +901,7 @@ static struct phy_driver ksphy_driver[] = {
 	.name		= "Micrel KSZ8081 or KSZ8091",
 	.phy_id_mask	= MICREL_PHY_ID_MASK,
 	.features	= PHY_BASIC_FEATURES,
-	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
+	.flags		= PHY_HAS_INTERRUPT,
 	.driver_data	= &ksz8081_type,
 	.probe		= kszphy_probe,
 	.config_init	= kszphy_config_init,
@@ -919,7 +919,7 @@ static struct phy_driver ksphy_driver[] = {
 	.name		= "Micrel KSZ8061",
 	.phy_id_mask	= MICREL_PHY_ID_MASK,
 	.features	= PHY_BASIC_FEATURES,
-	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
+	.flags		= PHY_HAS_INTERRUPT,
 	.config_init	= kszphy_config_init,
 	.config_aneg	= genphy_config_aneg,
 	.read_status	= genphy_read_status,
@@ -932,7 +932,7 @@ static struct phy_driver ksphy_driver[] = {
 	.phy_id_mask	= 0x000ffffe,
 	.name		= "Micrel KSZ9021 Gigabit PHY",
 	.features	= PHY_GBIT_FEATURES,
-	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
+	.flags		= PHY_HAS_INTERRUPT,
 	.driver_data	= &ksz9021_type,
 	.probe		= kszphy_probe,
 	.config_init	= ksz9021_config_init,
@@ -952,7 +952,7 @@ static struct phy_driver ksphy_driver[] = {
 	.phy_id_mask	= MICREL_PHY_ID_MASK,
 	.name		= "Micrel KSZ9031 Gigabit PHY",
 	.features	= PHY_GBIT_FEATURES,
-	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
+	.flags		= PHY_HAS_INTERRUPT,
 	.driver_data	= &ksz9021_type,
 	.probe		= kszphy_probe,
 	.config_init	= ksz9031_config_init,
@@ -969,7 +969,6 @@ static struct phy_driver ksphy_driver[] = {
 	.phy_id		= PHY_ID_KSZ8873MLL,
 	.phy_id_mask	= MICREL_PHY_ID_MASK,
 	.name		= "Micrel KSZ8873MLL Switch",
-	.flags		= PHY_HAS_MAGICANEG,
 	.config_init	= kszphy_config_init,
 	.config_aneg	= ksz8873mll_config_aneg,
 	.read_status	= ksz8873mll_read_status,
@@ -980,7 +979,7 @@ static struct phy_driver ksphy_driver[] = {
 	.phy_id_mask	= MICREL_PHY_ID_MASK,
 	.name		= "Micrel KSZ886X Switch",
 	.features	= PHY_BASIC_FEATURES,
-	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
+	.flags		= PHY_HAS_INTERRUPT,
 	.config_init	= kszphy_config_init,
 	.config_aneg	= genphy_config_aneg,
 	.read_status	= genphy_read_status,
@@ -991,7 +990,7 @@ static struct phy_driver ksphy_driver[] = {
 	.phy_id_mask	= MICREL_PHY_ID_MASK,
 	.name		= "Micrel KSZ8795",
 	.features	= PHY_BASIC_FEATURES,
-	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
+	.flags		= PHY_HAS_INTERRUPT,
 	.config_init	= kszphy_config_init,
 	.config_aneg	= ksz8873mll_config_aneg,
 	.read_status	= ksz8873mll_read_status,
diff --git a/drivers/net/phy/microchip.c b/drivers/net/phy/microchip.c
index 2b2f543cf9f0..37ee856c7680 100644
--- a/drivers/net/phy/microchip.c
+++ b/drivers/net/phy/microchip.c
@@ -146,7 +146,7 @@ static struct phy_driver microchip_phy_driver[] = {
 	.name		= "Microchip LAN88xx",
 
 	.features	= PHY_GBIT_FEATURES,
-	.flags		= PHY_HAS_INTERRUPT | PHY_HAS_MAGICANEG,
+	.flags		= PHY_HAS_INTERRUPT,
 
 	.probe		= lan88xx_probe,
 	.remove		= lan88xx_remove,
diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c
index cef6967b0396..67c9f2b26c8e 100644
--- a/drivers/net/phy/smsc.c
+++ b/drivers/net/phy/smsc.c
@@ -170,7 +170,7 @@ static struct phy_driver smsc_phy_driver[] = {
 	.name		= "SMSC LAN83C185",
 
 	.features	= PHY_BASIC_FEATURES,
-	.flags		= PHY_HAS_INTERRUPT | PHY_HAS_MAGICANEG,
+	.flags		= PHY_HAS_INTERRUPT,
 
 	.probe		= smsc_phy_probe,
 
@@ -192,7 +192,7 @@ static struct phy_driver smsc_phy_driver[] = {
 	.name		= "SMSC LAN8187",
 
 	.features	= PHY_BASIC_FEATURES,
-	.flags		= PHY_HAS_INTERRUPT | PHY_HAS_MAGICANEG,
+	.flags		= PHY_HAS_INTERRUPT,
 
 	.probe		= smsc_phy_probe,
 
@@ -214,7 +214,7 @@ static struct phy_driver smsc_phy_driver[] = {
 	.name		= "SMSC LAN8700",
 
 	.features	= PHY_BASIC_FEATURES,
-	.flags		= PHY_HAS_INTERRUPT | PHY_HAS_MAGICANEG,
+	.flags		= PHY_HAS_INTERRUPT,
 
 	.probe		= smsc_phy_probe,
 
@@ -236,7 +236,7 @@ static struct phy_driver smsc_phy_driver[] = {
 	.name		= "SMSC LAN911x Internal PHY",
 
 	.features	= PHY_BASIC_FEATURES,
-	.flags		= PHY_HAS_INTERRUPT | PHY_HAS_MAGICANEG,
+	.flags		= PHY_HAS_INTERRUPT,
 
 	.probe		= smsc_phy_probe,
 
@@ -257,7 +257,7 @@ static struct phy_driver smsc_phy_driver[] = {
 	.name		= "SMSC LAN8710/LAN8720",
 
 	.features	= PHY_BASIC_FEATURES,
-	.flags		= PHY_HAS_INTERRUPT | PHY_HAS_MAGICANEG,
+	.flags		= PHY_HAS_INTERRUPT,
 
 	.probe		= smsc_phy_probe,
 
@@ -279,7 +279,7 @@ static struct phy_driver smsc_phy_driver[] = {
 	.name		= "SMSC LAN8740",
 
 	.features	= PHY_BASIC_FEATURES,
-	.flags		= PHY_HAS_INTERRUPT | PHY_HAS_MAGICANEG,
+	.flags		= PHY_HAS_INTERRUPT,
 
 	.probe		= smsc_phy_probe,
 
diff --git a/include/linux/phy.h b/include/linux/phy.h
index e76e4adbc7c7..54ef45823fc1 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -58,8 +58,7 @@
 #define PHY_IGNORE_INTERRUPT	-2
 
 #define PHY_HAS_INTERRUPT	0x00000001
-#define PHY_HAS_MAGICANEG	0x00000002
-#define PHY_IS_INTERNAL		0x00000004
+#define PHY_IS_INTERNAL		0x00000002
 #define MDIO_DEVICE_IS_PHY	0x80000000
 
 /* Interface Mode definitions */
-- 
cgit v1.2.3


From 9512a16b0e1217bbef73d276a67c28b5fbb46512 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Tue, 16 May 2017 15:57:42 -0400
Subject: nfsd: Revert "nfsd: check for oversized NFSv2/v3 arguments"

This reverts commit 51f567777799 "nfsd: check for oversized NFSv2/v3
arguments", which breaks support for NFSv3 ACLs.

That patch was actually an earlier draft of a fix for the problem that
was eventually fixed by e6838a29ecb "nfsd: check for oversized NFSv2/v3
arguments".  But somehow I accidentally left this earlier draft in the
branch that was part of my 2.12 pull request.

Reported-by: Eryu Guan <eguan@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs3xdr.c          | 23 ++++++-----------------
 fs/nfsd/nfsxdr.c           | 13 +++----------
 include/linux/sunrpc/svc.h |  3 ++-
 3 files changed, 11 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 12feac6ee2fd..452334694a5d 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -334,11 +334,8 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
 	if (!p)
 		return 0;
 	p = xdr_decode_hyper(p, &args->offset);
-	args->count = ntohl(*p++);
-
-	if (!xdr_argsize_check(rqstp, p))
-		return 0;
 
+	args->count = ntohl(*p++);
 	len = min(args->count, max_blocksize);
 
 	/* set up the kvec */
@@ -352,7 +349,7 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
 		v++;
 	}
 	args->vlen = v;
-	return 1;
+	return xdr_argsize_check(rqstp, p);
 }
 
 int
@@ -544,11 +541,9 @@ nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p,
 	p = decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
-	if (!xdr_argsize_check(rqstp, p))
-		return 0;
 	args->buffer = page_address(*(rqstp->rq_next_page++));
 
-	return 1;
+	return xdr_argsize_check(rqstp, p);
 }
 
 int
@@ -574,14 +569,10 @@ nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p,
 	args->verf   = p; p += 2;
 	args->dircount = ~0;
 	args->count  = ntohl(*p++);
-
-	if (!xdr_argsize_check(rqstp, p))
-		return 0;
-
 	args->count  = min_t(u32, args->count, PAGE_SIZE);
 	args->buffer = page_address(*(rqstp->rq_next_page++));
 
-	return 1;
+	return xdr_argsize_check(rqstp, p);
 }
 
 int
@@ -599,9 +590,6 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p,
 	args->dircount = ntohl(*p++);
 	args->count    = ntohl(*p++);
 
-	if (!xdr_argsize_check(rqstp, p))
-		return 0;
-
 	len = args->count = min(args->count, max_blocksize);
 	while (len > 0) {
 		struct page *p = *(rqstp->rq_next_page++);
@@ -609,7 +597,8 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p,
 			args->buffer = page_address(p);
 		len -= PAGE_SIZE;
 	}
-	return 1;
+
+	return xdr_argsize_check(rqstp, p);
 }
 
 int
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 6a4947a3f4fa..de07ff625777 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -257,9 +257,6 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
 	len = args->count     = ntohl(*p++);
 	p++; /* totalcount - unused */
 
-	if (!xdr_argsize_check(rqstp, p))
-		return 0;
-
 	len = min_t(unsigned int, len, NFSSVC_MAXBLKSIZE_V2);
 
 	/* set up somewhere to store response.
@@ -275,7 +272,7 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
 		v++;
 	}
 	args->vlen = v;
-	return 1;
+	return xdr_argsize_check(rqstp, p);
 }
 
 int
@@ -365,11 +362,9 @@ nfssvc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_readli
 	p = decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
-	if (!xdr_argsize_check(rqstp, p))
-		return 0;
 	args->buffer = page_address(*(rqstp->rq_next_page++));
 
-	return 1;
+	return xdr_argsize_check(rqstp, p);
 }
 
 int
@@ -407,11 +402,9 @@ nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p,
 	args->cookie = ntohl(*p++);
 	args->count  = ntohl(*p++);
 	args->count  = min_t(u32, args->count, PAGE_SIZE);
-	if (!xdr_argsize_check(rqstp, p))
-		return 0;
 	args->buffer = page_address(*(rqstp->rq_next_page++));
 
-	return 1;
+	return xdr_argsize_check(rqstp, p);
 }
 
 /*
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 94631026f79c..11cef5a7bc87 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -336,7 +336,8 @@ xdr_argsize_check(struct svc_rqst *rqstp, __be32 *p)
 {
 	char *cp = (char *)p;
 	struct kvec *vec = &rqstp->rq_arg.head[0];
-	return cp == (char *)vec->iov_base + vec->iov_len;
+	return cp >= (char*)vec->iov_base
+		&& cp <= (char*)vec->iov_base + vec->iov_len;
 }
 
 static inline int
-- 
cgit v1.2.3


From 4c9c3d595f1bad021cc126d20879df4016801736 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Thu, 20 Apr 2017 01:31:42 +0000
Subject: of_graph: add of_graph_get_remote_endpoint()

It should use same method to get same result.
To getting remote-endpoint node,
let's use of_graph_get_remote_endpoint()

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/of/base.c        | 18 ++++++++++++++++--
 include/linux/of_graph.h |  8 ++++++++
 2 files changed, 24 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/of/base.c b/drivers/of/base.c
index 5db6aa1688e9..b169508a9b56 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -2486,6 +2486,20 @@ struct device_node *of_graph_get_endpoint_by_regs(
 }
 EXPORT_SYMBOL(of_graph_get_endpoint_by_regs);
 
+/**
+ * of_graph_get_remote_endpoint() - get remote endpoint node
+ * @node: pointer to a local endpoint device_node
+ *
+ * Return: Remote endpoint node associated with remote endpoint node linked
+ *	   to @node. Use of_node_put() on it when done.
+ */
+struct device_node *of_graph_get_remote_endpoint(const struct device_node *node)
+{
+	/* Get remote endpoint node. */
+	return of_parse_phandle(node, "remote-endpoint", 0);
+}
+EXPORT_SYMBOL(of_graph_get_remote_endpoint);
+
 /**
  * of_graph_get_remote_port_parent() - get remote port's parent node
  * @node: pointer to a local endpoint device_node
@@ -2500,7 +2514,7 @@ struct device_node *of_graph_get_remote_port_parent(
 	unsigned int depth;
 
 	/* Get remote endpoint node. */
-	np = of_parse_phandle(node, "remote-endpoint", 0);
+	np = of_graph_get_remote_endpoint(node);
 
 	/* Walk 3 levels up only if there is 'ports' node. */
 	for (depth = 3; depth && np; depth--) {
@@ -2524,7 +2538,7 @@ struct device_node *of_graph_get_remote_port(const struct device_node *node)
 	struct device_node *np;
 
 	/* Get remote endpoint node. */
-	np = of_parse_phandle(node, "remote-endpoint", 0);
+	np = of_graph_get_remote_endpoint(node);
 	if (!np)
 		return NULL;
 	return of_get_next_parent(np);
diff --git a/include/linux/of_graph.h b/include/linux/of_graph.h
index abdb02eaef06..0c9473a169dd 100644
--- a/include/linux/of_graph.h
+++ b/include/linux/of_graph.h
@@ -48,6 +48,8 @@ struct device_node *of_graph_get_next_endpoint(const struct device_node *parent,
 					struct device_node *previous);
 struct device_node *of_graph_get_endpoint_by_regs(
 		const struct device_node *parent, int port_reg, int reg);
+struct device_node *of_graph_get_remote_endpoint(
+					const struct device_node *node);
 struct device_node *of_graph_get_remote_port_parent(
 					const struct device_node *node);
 struct device_node *of_graph_get_remote_port(const struct device_node *node);
@@ -80,6 +82,12 @@ static inline struct device_node *of_graph_get_endpoint_by_regs(
 	return NULL;
 }
 
+static inline struct device_node *of_graph_get_remote_endpoint(
+					const struct device_node *node)
+{
+	return NULL;
+}
+
 static inline struct device_node *of_graph_get_remote_port_parent(
 					const struct device_node *node)
 {
-- 
cgit v1.2.3


From 0ef472a973ebbfc20f2f12769e77a8cfd3612778 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Thu, 20 Apr 2017 01:32:17 +0000
Subject: of_graph: add of_graph_get_port_parent()

Linux kernel already has of_graph_get_remote_port_parent(),
but, sometimes we want to get own port parent.
This patch adds of_graph_get_port_parent()

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/of/base.c        | 30 ++++++++++++++++++++++--------
 include/linux/of_graph.h |  7 +++++++
 2 files changed, 29 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/of/base.c b/drivers/of/base.c
index b169508a9b56..4c305599a664 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -2500,6 +2500,27 @@ struct device_node *of_graph_get_remote_endpoint(const struct device_node *node)
 }
 EXPORT_SYMBOL(of_graph_get_remote_endpoint);
 
+/**
+ * of_graph_get_port_parent() - get port's parent node
+ * @node: pointer to a local endpoint device_node
+ *
+ * Return: device node associated with endpoint node linked
+ *	   to @node. Use of_node_put() on it when done.
+ */
+struct device_node *of_graph_get_port_parent(struct device_node *node)
+{
+	unsigned int depth;
+
+	/* Walk 3 levels up only if there is 'ports' node. */
+	for (depth = 3; depth && node; depth--) {
+		node = of_get_next_parent(node);
+		if (depth == 2 && of_node_cmp(node->name, "ports"))
+			break;
+	}
+	return node;
+}
+EXPORT_SYMBOL(of_graph_get_port_parent);
+
 /**
  * of_graph_get_remote_port_parent() - get remote port's parent node
  * @node: pointer to a local endpoint device_node
@@ -2511,18 +2532,11 @@ struct device_node *of_graph_get_remote_port_parent(
 			       const struct device_node *node)
 {
 	struct device_node *np;
-	unsigned int depth;
 
 	/* Get remote endpoint node. */
 	np = of_graph_get_remote_endpoint(node);
 
-	/* Walk 3 levels up only if there is 'ports' node. */
-	for (depth = 3; depth && np; depth--) {
-		np = of_get_next_parent(np);
-		if (depth == 2 && of_node_cmp(np->name, "ports"))
-			break;
-	}
-	return np;
+	return of_graph_get_port_parent(np);
 }
 EXPORT_SYMBOL(of_graph_get_remote_port_parent);
 
diff --git a/include/linux/of_graph.h b/include/linux/of_graph.h
index 0c9473a169dd..9db632d16cbc 100644
--- a/include/linux/of_graph.h
+++ b/include/linux/of_graph.h
@@ -50,6 +50,7 @@ struct device_node *of_graph_get_endpoint_by_regs(
 		const struct device_node *parent, int port_reg, int reg);
 struct device_node *of_graph_get_remote_endpoint(
 					const struct device_node *node);
+struct device_node *of_graph_get_port_parent(struct device_node *node);
 struct device_node *of_graph_get_remote_port_parent(
 					const struct device_node *node);
 struct device_node *of_graph_get_remote_port(const struct device_node *node);
@@ -88,6 +89,12 @@ static inline struct device_node *of_graph_get_remote_endpoint(
 	return NULL;
 }
 
+static inline struct device_node *of_graph_get_port_parent(
+	struct device_node *node)
+{
+	return NULL;
+}
+
 static inline struct device_node *of_graph_get_remote_port_parent(
 					const struct device_node *node)
 {
-- 
cgit v1.2.3


From ac1e6958d3be29a28889b09e4eec1798eccc1606 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Thu, 20 Apr 2017 01:32:47 +0000
Subject: of_graph: add of_graph_get_endpoint_count()

OF graph want to count its endpoint number, same as
of_get_child_count(). This patch adds of_graph_get_endpoint_count()

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/of/base.c        | 12 ++++++++++++
 include/linux/of_graph.h |  6 ++++++
 2 files changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/of/base.c b/drivers/of/base.c
index 4c305599a664..cb1c49ae3b88 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -2559,6 +2559,18 @@ struct device_node *of_graph_get_remote_port(const struct device_node *node)
 }
 EXPORT_SYMBOL(of_graph_get_remote_port);
 
+int of_graph_get_endpoint_count(const struct device_node *np)
+{
+	struct device_node *endpoint;
+	int num = 0;
+
+	for_each_endpoint_of_node(np, endpoint)
+		num++;
+
+	return num;
+}
+EXPORT_SYMBOL(of_graph_get_endpoint_count);
+
 /**
  * of_graph_get_remote_node() - get remote parent device_node for given port/endpoint
  * @node: pointer to parent device_node containing graph port/endpoint
diff --git a/include/linux/of_graph.h b/include/linux/of_graph.h
index 9db632d16cbc..3e058f05ab04 100644
--- a/include/linux/of_graph.h
+++ b/include/linux/of_graph.h
@@ -43,6 +43,7 @@ struct of_endpoint {
 #ifdef CONFIG_OF
 int of_graph_parse_endpoint(const struct device_node *node,
 				struct of_endpoint *endpoint);
+int of_graph_get_endpoint_count(const struct device_node *np);
 struct device_node *of_graph_get_port_by_id(struct device_node *node, u32 id);
 struct device_node *of_graph_get_next_endpoint(const struct device_node *parent,
 					struct device_node *previous);
@@ -64,6 +65,11 @@ static inline int of_graph_parse_endpoint(const struct device_node *node,
 	return -ENOSYS;
 }
 
+static inline int of_graph_get_endpoint_count(const struct device_node *np)
+{
+	return 0;
+}
+
 static inline struct device_node *of_graph_get_port_by_id(
 					struct device_node *node, u32 id)
 {
-- 
cgit v1.2.3


From 3ffad468cf1d9825b425733941bdad0d8d20e795 Mon Sep 17 00:00:00 2001
From: Matthias Kaehlcke <mka@chromium.org>
Date: Tue, 16 May 2017 11:43:43 -0700
Subject: regulator: Allow for asymmetric settling times

Some regulators have different settling times for voltage increases and
decreases. To avoid a time penalty on the faster transition allow for
different settings for up- and downward transitions.

Signed-off-by: Matthias Kaehlcke <mka@chromium.org>
Acked-by: Laxman Dewangan <ldewangan@nvidia.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/core.c          |  6 ++++++
 drivers/regulator/of_regulator.c  | 19 +++++++++++++++++++
 include/linux/regulator/machine.h |  6 ++++++
 3 files changed, 31 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index c0d9ae8d0860..919b7f178209 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -2767,6 +2767,12 @@ static int _regulator_set_voltage_time(struct regulator_dev *rdev,
 		ramp_delay = rdev->desc->ramp_delay;
 	else if (rdev->constraints->settling_time)
 		return rdev->constraints->settling_time;
+	else if (rdev->constraints->settling_time_up &&
+		 (new_uV > old_uV))
+		return rdev->constraints->settling_time_up;
+	else if (rdev->constraints->settling_time_down &&
+		 (new_uV < old_uV))
+		return rdev->constraints->settling_time_down;
 
 	if (ramp_delay == 0) {
 		rdev_dbg(rdev, "ramp_delay not set\n");
diff --git a/drivers/regulator/of_regulator.c b/drivers/regulator/of_regulator.c
index 09d677d5d3f0..96bf75458da5 100644
--- a/drivers/regulator/of_regulator.c
+++ b/drivers/regulator/of_regulator.c
@@ -90,6 +90,25 @@ static void of_get_regulation_constraints(struct device_node *np,
 	if (!ret)
 		constraints->settling_time = pval;
 
+	ret = of_property_read_u32(np, "regulator-settling-time-up-us", &pval);
+	if (!ret)
+		constraints->settling_time_up = pval;
+	if (constraints->settling_time_up && constraints->settling_time) {
+		pr_warn("%s: ambiguous configuration for settling time, ignoring 'regulator-settling-time-up-us'\n",
+			np->name);
+		constraints->settling_time_up = 0;
+	}
+
+	ret = of_property_read_u32(np, "regulator-settling-time-down-us",
+				   &pval);
+	if (!ret)
+		constraints->settling_time_down = pval;
+	if (constraints->settling_time_down && constraints->settling_time) {
+		pr_warn("%s: ambiguous configuration for settling time, ignoring 'regulator-settling-time-down-us'\n",
+			np->name);
+		constraints->settling_time_down = 0;
+	}
+
 	ret = of_property_read_u32(np, "regulator-enable-ramp-delay", &pval);
 	if (!ret)
 		constraints->enable_time = pval;
diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h
index 117699d1f7df..9cd4fef37203 100644
--- a/include/linux/regulator/machine.h
+++ b/include/linux/regulator/machine.h
@@ -110,6 +110,10 @@ struct regulator_state {
  * @ramp_delay: Time to settle down after voltage change (unit: uV/us)
  * @settling_time: Time to settle down after voltage change when voltage
  *		   change is non-linear (unit: microseconds).
+ * @settling_time_up: Time to settle down after voltage increase when voltage
+ *		      change is non-linear (unit: microseconds).
+ * @settling_time_down : Time to settle down after voltage decrease when
+ *			 voltage change is non-linear (unit: microseconds).
  * @active_discharge: Enable/disable active discharge. The enum
  *		      regulator_active_discharge values are used for
  *		      initialisation.
@@ -152,6 +156,8 @@ struct regulation_constraints {
 
 	unsigned int ramp_delay;
 	unsigned int settling_time;
+	unsigned int settling_time_up;
+	unsigned int settling_time_down;
 	unsigned int enable_time;
 
 	unsigned int active_discharge;
-- 
cgit v1.2.3


From a86c309e71dc4f43c68483f7e328b1d4f9fef618 Mon Sep 17 00:00:00 2001
From: Mats Karrman <mats.dev.list@gmail.com>
Date: Tue, 25 Apr 2017 23:49:47 +0200
Subject: usb: typec: Don't prevent using constant typec_mode_desc initializers

In some situations, e.g. when registering alternate modes for local typec
ports, it may be handy to use constant mode descriptors. Allow this by
changing the mode descriptor arguments of typec_port_register_altmode()
et.al. to using const pointers.

Signed-off-by: Mats Karrman <mats.dev.list@gmail.com>
Reviewed-by: Guenter Roeck <groeck@chromium.org>
Acked-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/typec.c | 11 ++++++-----
 include/linux/usb/typec.h |  6 +++---
 2 files changed, 9 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/typec/typec.c b/drivers/usb/typec/typec.c
index 89e540bb7ff3..db5ee730ad24 100644
--- a/drivers/usb/typec/typec.c
+++ b/drivers/usb/typec/typec.c
@@ -291,7 +291,7 @@ typec_altmode_roles_show(struct device *dev, struct device_attribute *attr,
 }
 
 static void typec_init_modes(struct typec_altmode *alt,
-			     struct typec_mode_desc *desc, bool is_port)
+			     const struct typec_mode_desc *desc, bool is_port)
 {
 	int i;
 
@@ -378,7 +378,8 @@ static const struct device_type typec_altmode_dev_type = {
 };
 
 static struct typec_altmode *
-typec_register_altmode(struct device *parent, struct typec_altmode_desc *desc)
+typec_register_altmode(struct device *parent,
+		       const struct typec_altmode_desc *desc)
 {
 	struct typec_altmode *alt;
 	int ret;
@@ -495,7 +496,7 @@ EXPORT_SYMBOL_GPL(typec_partner_set_identity);
  */
 struct typec_altmode *
 typec_partner_register_altmode(struct typec_partner *partner,
-			       struct typec_altmode_desc *desc)
+			       const struct typec_altmode_desc *desc)
 {
 	return typec_register_altmode(&partner->dev, desc);
 }
@@ -590,7 +591,7 @@ static const struct device_type typec_plug_dev_type = {
  */
 struct typec_altmode *
 typec_plug_register_altmode(struct typec_plug *plug,
-			    struct typec_altmode_desc *desc)
+			    const struct typec_altmode_desc *desc)
 {
 	return typec_register_altmode(&plug->dev, desc);
 }
@@ -1159,7 +1160,7 @@ EXPORT_SYMBOL_GPL(typec_set_pwr_opmode);
  */
 struct typec_altmode *
 typec_port_register_altmode(struct typec_port *port,
-			    struct typec_altmode_desc *desc)
+			    const struct typec_altmode_desc *desc)
 {
 	return typec_register_altmode(&port->dev, desc);
 }
diff --git a/include/linux/usb/typec.h b/include/linux/usb/typec.h
index ec78204964ab..d1d2ebcf36ec 100644
--- a/include/linux/usb/typec.h
+++ b/include/linux/usb/typec.h
@@ -117,13 +117,13 @@ struct typec_altmode_desc {
 
 struct typec_altmode
 *typec_partner_register_altmode(struct typec_partner *partner,
-				struct typec_altmode_desc *desc);
+				const struct typec_altmode_desc *desc);
 struct typec_altmode
 *typec_plug_register_altmode(struct typec_plug *plug,
-			     struct typec_altmode_desc *desc);
+			     const struct typec_altmode_desc *desc);
 struct typec_altmode
 *typec_port_register_altmode(struct typec_port *port,
-			     struct typec_altmode_desc *desc);
+			     const struct typec_altmode_desc *desc);
 void typec_unregister_altmode(struct typec_altmode *altmode);
 
 struct typec_port *typec_altmode2port(struct typec_altmode *alt);
-- 
cgit v1.2.3


From 7d21114dc6a2d53babef43a84a8d8db2905d283d Mon Sep 17 00:00:00 2001
From: Baolin Wang <baolin.wang@linaro.org>
Date: Fri, 5 May 2017 14:12:24 +0800
Subject: usb: phy: Introduce one extcon device into usb phy

Usually usb phy need register one extcon device to get the connection
notifications. It will remove some duplicate code if the extcon device
is registered using common code instead of each phy driver having its
own related extcon APIs. So we add one pointer of extcon device into
usb phy structure, and some other helper functions to register extcon.

Signed-off-by: Baolin Wang <baolin.wang@linaro.org>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/phy/Kconfig |  6 +++---
 drivers/usb/phy/phy.c   | 57 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/usb/phy.h |  7 ++++++
 3 files changed, 67 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/phy/Kconfig b/drivers/usb/phy/Kconfig
index 3006f569c068..aff702c0eb9f 100644
--- a/drivers/usb/phy/Kconfig
+++ b/drivers/usb/phy/Kconfig
@@ -4,6 +4,7 @@
 menu "USB Physical Layer drivers"
 
 config USB_PHY
+	select EXTCON
 	def_bool n
 
 #
@@ -109,7 +110,7 @@ config OMAP_OTG
 
 config TAHVO_USB
 	tristate "Tahvo USB transceiver driver"
-	depends on MFD_RETU && EXTCON
+	depends on MFD_RETU
 	depends on USB_GADGET || !USB_GADGET # if USB_GADGET=m, this can't be 'y'
 	select USB_PHY
 	help
@@ -141,7 +142,6 @@ config USB_MSM_OTG
 	depends on (USB || USB_GADGET) && (ARCH_QCOM || COMPILE_TEST)
 	depends on USB_GADGET || !USB_GADGET # if USB_GADGET=m, this can't be 'y'
 	depends on RESET_CONTROLLER
-	depends on EXTCON
 	select USB_PHY
 	help
 	  Enable this to support the USB OTG transceiver on Qualcomm chips. It
@@ -155,7 +155,7 @@ config USB_MSM_OTG
 config USB_QCOM_8X16_PHY
 	tristate "Qualcomm APQ8016/MSM8916 on-chip USB PHY controller support"
 	depends on ARCH_QCOM || COMPILE_TEST
-	depends on RESET_CONTROLLER && EXTCON
+	depends on RESET_CONTROLLER
 	select USB_PHY
 	select USB_ULPI_VIEWPORT
 	help
diff --git a/drivers/usb/phy/phy.c b/drivers/usb/phy/phy.c
index 98f75d2842b7..032f5afaad4b 100644
--- a/drivers/usb/phy/phy.c
+++ b/drivers/usb/phy/phy.c
@@ -100,6 +100,54 @@ static int devm_usb_phy_match(struct device *dev, void *res, void *match_data)
 	return *phy == match_data;
 }
 
+static int usb_add_extcon(struct usb_phy *x)
+{
+	int ret;
+
+	if (of_property_read_bool(x->dev->of_node, "extcon")) {
+		x->edev = extcon_get_edev_by_phandle(x->dev, 0);
+		if (IS_ERR(x->edev))
+			return PTR_ERR(x->edev);
+
+		x->id_edev = extcon_get_edev_by_phandle(x->dev, 1);
+		if (IS_ERR(x->id_edev)) {
+			x->id_edev = NULL;
+			dev_info(x->dev, "No separate ID extcon device\n");
+		}
+
+		if (x->vbus_nb.notifier_call) {
+			ret = devm_extcon_register_notifier(x->dev, x->edev,
+							    EXTCON_USB,
+							    &x->vbus_nb);
+			if (ret < 0) {
+				dev_err(x->dev,
+					"register VBUS notifier failed\n");
+				return ret;
+			}
+		}
+
+		if (x->id_nb.notifier_call) {
+			struct extcon_dev *id_ext;
+
+			if (x->id_edev)
+				id_ext = x->id_edev;
+			else
+				id_ext = x->edev;
+
+			ret = devm_extcon_register_notifier(x->dev, id_ext,
+							    EXTCON_USB_HOST,
+							    &x->id_nb);
+			if (ret < 0) {
+				dev_err(x->dev,
+					"register ID notifier failed\n");
+				return ret;
+			}
+		}
+	}
+
+	return 0;
+}
+
 /**
  * devm_usb_get_phy - find the USB PHY
  * @dev - device that requests this phy
@@ -388,6 +436,10 @@ int usb_add_phy(struct usb_phy *x, enum usb_phy_type type)
 		return -EINVAL;
 	}
 
+	ret = usb_add_extcon(x);
+	if (ret)
+		return ret;
+
 	ATOMIC_INIT_NOTIFIER_HEAD(&x->notifier);
 
 	spin_lock_irqsave(&phy_lock, flags);
@@ -422,12 +474,17 @@ int usb_add_phy_dev(struct usb_phy *x)
 {
 	struct usb_phy_bind *phy_bind;
 	unsigned long flags;
+	int ret;
 
 	if (!x->dev) {
 		dev_err(x->dev, "no device provided for PHY\n");
 		return -EINVAL;
 	}
 
+	ret = usb_add_extcon(x);
+	if (ret)
+		return ret;
+
 	ATOMIC_INIT_NOTIFIER_HEAD(&x->notifier);
 
 	spin_lock_irqsave(&phy_lock, flags);
diff --git a/include/linux/usb/phy.h b/include/linux/usb/phy.h
index 31a8068c42a5..299245105610 100644
--- a/include/linux/usb/phy.h
+++ b/include/linux/usb/phy.h
@@ -9,6 +9,7 @@
 #ifndef __LINUX_USB_PHY_H
 #define __LINUX_USB_PHY_H
 
+#include <linux/extcon.h>
 #include <linux/notifier.h>
 #include <linux/usb.h>
 
@@ -85,6 +86,12 @@ struct usb_phy {
 	struct usb_phy_io_ops	*io_ops;
 	void __iomem		*io_priv;
 
+	/* to support extcon device */
+	struct extcon_dev	*edev;
+	struct extcon_dev	*id_edev;
+	struct notifier_block	vbus_nb;
+	struct notifier_block	id_nb;
+
 	/* for notification of usb_phy_events */
 	struct atomic_notifier_head	notifier;
 
-- 
cgit v1.2.3


From 15060aba717115dc9f204c02213a7c6bf341163e Mon Sep 17 00:00:00 2001
From: CQ Tang <cq.tang@intel.com>
Date: Wed, 10 May 2017 11:39:03 -0700
Subject: iommu/vt-d: Helper function to query if a pasid has any active users

A driver would need to know if there are any active references to a
a PASID before cleaning up its resources. This function helps check
if there are any active users of a PASID before it can perform any
recovery on that device.

To: Joerg Roedel <joro@8bytes.org>
To: linux-kernel@vger.kernel.org
To: David Woodhouse <dwmw2@infradead.org>
Cc: Jean-Phillipe Brucker <jean-philippe.brucker@arm.com>
Cc: iommu@lists.linux-foundation.org

Signed-off-by: CQ Tang <cq.tang@intel.com>
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-svm.c | 30 ++++++++++++++++++++++++++++++
 include/linux/intel-svm.h | 20 ++++++++++++++++++++
 2 files changed, 50 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index 23c427602c55..f167c0d84ebf 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -489,6 +489,36 @@ int intel_svm_unbind_mm(struct device *dev, int pasid)
 }
 EXPORT_SYMBOL_GPL(intel_svm_unbind_mm);
 
+int intel_svm_is_pasid_valid(struct device *dev, int pasid)
+{
+	struct intel_iommu *iommu;
+	struct intel_svm *svm;
+	int ret = -EINVAL;
+
+	mutex_lock(&pasid_mutex);
+	iommu = intel_svm_device_to_iommu(dev);
+	if (!iommu || !iommu->pasid_table)
+		goto out;
+
+	svm = idr_find(&iommu->pasid_idr, pasid);
+	if (!svm)
+		goto out;
+
+	/* init_mm is used in this case */
+	if (!svm->mm)
+		ret = 1;
+	else if (atomic_read(&svm->mm->mm_users) > 0)
+		ret = 1;
+	else
+		ret = 0;
+
+ out:
+	mutex_unlock(&pasid_mutex);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(intel_svm_is_pasid_valid);
+
 /* Page request queue descriptor */
 struct page_req_dsc {
 	u64 srr:1;
diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h
index 3c25794042f9..99bc5b3ae26e 100644
--- a/include/linux/intel-svm.h
+++ b/include/linux/intel-svm.h
@@ -102,6 +102,21 @@ extern int intel_svm_bind_mm(struct device *dev, int *pasid, int flags,
  */
 extern int intel_svm_unbind_mm(struct device *dev, int pasid);
 
+/**
+ * intel_svm_is_pasid_valid() - check if pasid is valid
+ * @dev:	Device for which PASID was allocated
+ * @pasid:	PASID value to be checked
+ *
+ * This function checks if the specified pasid is still valid. A
+ * valid pasid means the backing mm is still having a valid user.
+ * For kernel callers init_mm is always valid. for other mm, if mm->mm_users
+ * is non-zero, it is valid.
+ *
+ * returns -EINVAL if invalid pasid, 0 if pasid ref count is invalid
+ * 1 if pasid is valid.
+ */
+extern int intel_svm_is_pasid_valid(struct device *dev, int pasid);
+
 #else /* CONFIG_INTEL_IOMMU_SVM */
 
 static inline int intel_svm_bind_mm(struct device *dev, int *pasid,
@@ -114,6 +129,11 @@ static inline int intel_svm_unbind_mm(struct device *dev, int pasid)
 {
 	BUG();
 }
+
+static int intel_svm_is_pasid_valid(struct device *dev, int pasid)
+{
+	return -EINVAL;
+}
 #endif /* CONFIG_INTEL_IOMMU_SVM */
 
 #define intel_svm_available(dev) (!intel_svm_bind_mm((dev), NULL, 0, NULL))
-- 
cgit v1.2.3


From 9a568de4818dea9a05af141046bd3e589245ab83 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 16 May 2017 14:00:14 -0700
Subject: tcp: switch TCP TS option (RFC 7323) to 1ms clock

TCP Timestamps option is defined in RFC 7323

Traditionally on linux, it has been tied to the internal
'jiffies' variable, because it had been a cheap and good enough
generator.

For TCP flows on the Internet, 1 ms resolution would be much better
than 4ms or 10ms (HZ=250 or HZ=100 respectively)

For TCP flows in the DC, Google has used usec resolution for more
than two years with great success [1]

Receive size autotuning (DRS) is indeed more precise and converges
faster to optimal window size.

This patch converts tp->tcp_mstamp to a plain u64 value storing
a 1 usec TCP clock.

This choice will allow us to upstream the 1 usec TS option as
discussed in IETF 97.

[1] https://www.ietf.org/proceedings/97/slides/slides-97-tcpm-tcp-options-for-low-latency-00.pdf

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h           | 62 +-------------------------
 include/linux/tcp.h              | 22 ++++-----
 include/net/tcp.h                | 59 ++++++++++++++++++++----
 net/ipv4/syncookies.c            |  8 ++--
 net/ipv4/tcp.c                   |  4 +-
 net/ipv4/tcp_bbr.c               | 22 ++++-----
 net/ipv4/tcp_input.c             | 96 ++++++++++++++++++++--------------------
 net/ipv4/tcp_ipv4.c              | 17 +++----
 net/ipv4/tcp_lp.c                | 12 ++---
 net/ipv4/tcp_minisocks.c         |  4 +-
 net/ipv4/tcp_output.c            | 16 +++----
 net/ipv4/tcp_rate.c              | 16 +++----
 net/ipv4/tcp_recovery.c          | 23 +++++-----
 net/ipv4/tcp_timer.c             |  8 ++--
 net/ipv6/syncookies.c            |  2 +-
 net/ipv6/tcp_ipv6.c              |  4 +-
 net/netfilter/nf_synproxy_core.c |  2 +-
 17 files changed, 178 insertions(+), 199 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index bfc7892f6c33..7c0cb2ce8b01 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -506,66 +506,6 @@ typedef unsigned int sk_buff_data_t;
 typedef unsigned char *sk_buff_data_t;
 #endif
 
-/**
- * struct skb_mstamp - multi resolution time stamps
- * @stamp_us: timestamp in us resolution
- * @stamp_jiffies: timestamp in jiffies
- */
-struct skb_mstamp {
-	union {
-		u64		v64;
-		struct {
-			u32	stamp_us;
-			u32	stamp_jiffies;
-		};
-	};
-};
-
-/**
- * skb_mstamp_get - get current timestamp
- * @cl: place to store timestamps
- */
-static inline void skb_mstamp_get(struct skb_mstamp *cl)
-{
-	u64 val = local_clock();
-
-	do_div(val, NSEC_PER_USEC);
-	cl->stamp_us = (u32)val;
-	cl->stamp_jiffies = (u32)jiffies;
-}
-
-/**
- * skb_mstamp_delta - compute the difference in usec between two skb_mstamp
- * @t1: pointer to newest sample
- * @t0: pointer to oldest sample
- */
-static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1,
-				      const struct skb_mstamp *t0)
-{
-	s32 delta_us = t1->stamp_us - t0->stamp_us;
-	u32 delta_jiffies = t1->stamp_jiffies - t0->stamp_jiffies;
-
-	/* If delta_us is negative, this might be because interval is too big,
-	 * or local_clock() drift is too big : fallback using jiffies.
-	 */
-	if (delta_us <= 0 ||
-	    delta_jiffies >= (INT_MAX / (USEC_PER_SEC / HZ)))
-
-		delta_us = jiffies_to_usecs(delta_jiffies);
-
-	return delta_us;
-}
-
-static inline bool skb_mstamp_after(const struct skb_mstamp *t1,
-				    const struct skb_mstamp *t0)
-{
-	s32 diff = t1->stamp_jiffies - t0->stamp_jiffies;
-
-	if (!diff)
-		diff = t1->stamp_us - t0->stamp_us;
-	return diff > 0;
-}
-
 /** 
  *	struct sk_buff - socket buffer
  *	@next: Next buffer in list
@@ -646,7 +586,7 @@ struct sk_buff {
 
 			union {
 				ktime_t		tstamp;
-				struct skb_mstamp skb_mstamp;
+				u64		skb_mstamp;
 			};
 		};
 		struct rb_node	rbnode; /* used in netem & tcp stack */
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 22854f028434..542ca1ae02c4 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -123,7 +123,7 @@ struct tcp_request_sock_ops;
 struct tcp_request_sock {
 	struct inet_request_sock 	req;
 	const struct tcp_request_sock_ops *af_specific;
-	struct skb_mstamp		snt_synack; /* first SYNACK sent time */
+	u64				snt_synack; /* first SYNACK sent time */
 	bool				tfo_listener;
 	u32				txhash;
 	u32				rcv_isn;
@@ -211,7 +211,7 @@ struct tcp_sock {
 
 	/* Information of the most recently (s)acked skb */
 	struct tcp_rack {
-		struct skb_mstamp mstamp; /* (Re)sent time of the skb */
+		u64 mstamp; /* (Re)sent time of the skb */
 		u32 rtt_us;  /* Associated RTT */
 		u32 end_seq; /* Ending TCP sequence of the skb */
 		u8 advanced; /* mstamp advanced since last lost marking */
@@ -240,7 +240,7 @@ struct tcp_sock {
 	u32	tlp_high_seq;	/* snd_nxt at the time of TLP retransmit. */
 
 /* RTT measurement */
-	struct skb_mstamp tcp_mstamp; /* most recent packet received/sent */
+	u64	tcp_mstamp;	/* most recent packet received/sent */
 	u32	srtt_us;	/* smoothed round trip time << 3 in usecs */
 	u32	mdev_us;	/* medium deviation			*/
 	u32	mdev_max_us;	/* maximal mdev for the last rtt period	*/
@@ -280,8 +280,8 @@ struct tcp_sock {
 	u32	delivered;	/* Total data packets delivered incl. rexmits */
 	u32	lost;		/* Total data packets lost incl. rexmits */
 	u32	app_limited;	/* limited until "delivered" reaches this val */
-	struct skb_mstamp first_tx_mstamp;  /* start of window send phase */
-	struct skb_mstamp delivered_mstamp; /* time we reached "delivered" */
+	u64	first_tx_mstamp;  /* start of window send phase */
+	u64	delivered_mstamp; /* time we reached "delivered" */
 	u32	rate_delivered;    /* saved rate sample: packets delivered */
 	u32	rate_interval_us;  /* saved rate sample: time elapsed */
 
@@ -335,16 +335,16 @@ struct tcp_sock {
 
 /* Receiver side RTT estimation */
 	struct {
-		u32		rtt_us;
-		u32		seq;
-		struct skb_mstamp time;
+		u32	rtt_us;
+		u32	seq;
+		u64	time;
 	} rcv_rtt_est;
 
 /* Receiver queue space */
 	struct {
-		int		space;
-		u32		seq;
-		struct skb_mstamp time;
+		int	space;
+		u32	seq;
+		u64	time;
 	} rcvq_space;
 
 /* TCP-specific MTU probe information. */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 5b2932b8363f..82462db97183 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -519,7 +519,7 @@ static inline u32 tcp_cookie_time(void)
 u32 __cookie_v4_init_sequence(const struct iphdr *iph, const struct tcphdr *th,
 			      u16 *mssp);
 __u32 cookie_v4_init_sequence(const struct sk_buff *skb, __u16 *mss);
-__u32 cookie_init_timestamp(struct request_sock *req);
+u64 cookie_init_timestamp(struct request_sock *req);
 bool cookie_timestamp_decode(struct tcp_options_received *opt);
 bool cookie_ecn_ok(const struct tcp_options_received *opt,
 		   const struct net *net, const struct dst_entry *dst);
@@ -706,14 +706,55 @@ void tcp_send_window_probe(struct sock *sk);
  */
 #define tcp_jiffies32 ((u32)jiffies)
 
-/* Generator for TCP TS option (RFC 7323)
- * Currently tied to 'jiffies' but will soon be driven by 1 ms clock.
+/*
+ * Deliver a 32bit value for TCP timestamp option (RFC 7323)
+ * It is no longer tied to jiffies, but to 1 ms clock.
+ * Note: double check if you want to use tcp_jiffies32 instead of this.
+ */
+#define TCP_TS_HZ	1000
+
+static inline u64 tcp_clock_ns(void)
+{
+	return local_clock();
+}
+
+static inline u64 tcp_clock_us(void)
+{
+	return div_u64(tcp_clock_ns(), NSEC_PER_USEC);
+}
+
+/* This should only be used in contexts where tp->tcp_mstamp is up to date */
+static inline u32 tcp_time_stamp(const struct tcp_sock *tp)
+{
+	return div_u64(tp->tcp_mstamp, USEC_PER_SEC / TCP_TS_HZ);
+}
+
+/* Could use tcp_clock_us() / 1000, but this version uses a single divide */
+static inline u32 tcp_time_stamp_raw(void)
+{
+	return div_u64(tcp_clock_ns(), NSEC_PER_SEC / TCP_TS_HZ);
+}
+
+
+/* Refresh 1us clock of a TCP socket,
+ * ensuring monotically increasing values.
  */
-#define tcp_time_stamp		((__u32)(jiffies))
+static inline void tcp_mstamp_refresh(struct tcp_sock *tp)
+{
+	u64 val = tcp_clock_us();
+
+	if (val > tp->tcp_mstamp)
+		tp->tcp_mstamp = val;
+}
+
+static inline u32 tcp_stamp_us_delta(u64 t1, u64 t0)
+{
+	return max_t(s64, t1 - t0, 0);
+}
 
 static inline u32 tcp_skb_timestamp(const struct sk_buff *skb)
 {
-	return skb->skb_mstamp.stamp_jiffies;
+	return div_u64(skb->skb_mstamp, USEC_PER_SEC / TCP_TS_HZ);
 }
 
 
@@ -778,9 +819,9 @@ struct tcp_skb_cb {
 			/* pkts S/ACKed so far upon tx of skb, incl retrans: */
 			__u32 delivered;
 			/* start of send pipeline phase */
-			struct skb_mstamp first_tx_mstamp;
+			u64 first_tx_mstamp;
 			/* when we reached the "delivered" count */
-			struct skb_mstamp delivered_mstamp;
+			u64 delivered_mstamp;
 		} tx;   /* only used for outgoing skbs */
 		union {
 			struct inet_skb_parm	h4;
@@ -896,7 +937,7 @@ struct ack_sample {
  * A sample is invalid if "delivered" or "interval_us" is negative.
  */
 struct rate_sample {
-	struct	skb_mstamp prior_mstamp; /* starting timestamp for interval */
+	u64  prior_mstamp; /* starting timestamp for interval */
 	u32  prior_delivered;	/* tp->delivered at "prior_mstamp" */
 	s32  delivered;		/* number of packets delivered over interval */
 	long interval_us;	/* time for tp->delivered to incr "delivered" */
@@ -1862,7 +1903,7 @@ void tcp_init(void);
 /* tcp_recovery.c */
 extern void tcp_rack_mark_lost(struct sock *sk);
 extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
-			     const struct skb_mstamp *xmit_time);
+			     u64 xmit_time);
 extern void tcp_rack_reo_timeout(struct sock *sk);
 
 /*
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 0257d965f111..6426250a58ea 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -66,10 +66,10 @@ static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport,
  * Since subsequent timestamps use the normal tcp_time_stamp value, we
  * must make sure that the resulting initial timestamp is <= tcp_time_stamp.
  */
-__u32 cookie_init_timestamp(struct request_sock *req)
+u64 cookie_init_timestamp(struct request_sock *req)
 {
 	struct inet_request_sock *ireq;
-	u32 ts, ts_now = tcp_time_stamp;
+	u32 ts, ts_now = tcp_time_stamp_raw();
 	u32 options = 0;
 
 	ireq = inet_rsk(req);
@@ -88,7 +88,7 @@ __u32 cookie_init_timestamp(struct request_sock *req)
 		ts <<= TSBITS;
 		ts |= options;
 	}
-	return ts;
+	return (u64)ts * (USEC_PER_SEC / TCP_TS_HZ);
 }
 
 
@@ -343,7 +343,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
 	ireq->wscale_ok		= tcp_opt.wscale_ok;
 	ireq->tstamp_ok		= tcp_opt.saw_tstamp;
 	req->ts_recent		= tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
-	treq->snt_synack.v64	= 0;
+	treq->snt_synack	= 0;
 	treq->tfo_listener	= false;
 
 	ireq->ir_iif = inet_request_bound_dev_if(sk, skb);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 850054800526..b5d18484746d 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2706,7 +2706,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 		if (!tp->repair)
 			err = -EPERM;
 		else
-			tp->tsoffset = val - tcp_time_stamp;
+			tp->tsoffset = val - tcp_time_stamp_raw();
 		break;
 	case TCP_REPAIR_WINDOW:
 		err = tcp_repair_set_window(tp, optval, optlen);
@@ -3072,7 +3072,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
 		break;
 
 	case TCP_TIMESTAMP:
-		val = tcp_time_stamp + tp->tsoffset;
+		val = tcp_time_stamp_raw() + tp->tsoffset;
 		break;
 	case TCP_NOTSENT_LOWAT:
 		val = tp->notsent_lowat;
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 40dc4fc5f6ac..dbcc9352a48f 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -91,7 +91,7 @@ struct bbr {
 	struct minmax bw;	/* Max recent delivery rate in pkts/uS << 24 */
 	u32	rtt_cnt;	    /* count of packet-timed rounds elapsed */
 	u32     next_rtt_delivered; /* scb->tx.delivered at end of round */
-	struct skb_mstamp cycle_mstamp;  /* time of this cycle phase start */
+	u64	cycle_mstamp;	     /* time of this cycle phase start */
 	u32     mode:3,		     /* current bbr_mode in state machine */
 		prev_ca_state:3,     /* CA state on previous ACK */
 		packet_conservation:1,  /* use packet conservation? */
@@ -411,7 +411,7 @@ static bool bbr_is_next_cycle_phase(struct sock *sk,
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct bbr *bbr = inet_csk_ca(sk);
 	bool is_full_length =
-		skb_mstamp_us_delta(&tp->delivered_mstamp, &bbr->cycle_mstamp) >
+		tcp_stamp_us_delta(tp->delivered_mstamp, bbr->cycle_mstamp) >
 		bbr->min_rtt_us;
 	u32 inflight, bw;
 
@@ -497,7 +497,7 @@ static void bbr_reset_lt_bw_sampling_interval(struct sock *sk)
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct bbr *bbr = inet_csk_ca(sk);
 
-	bbr->lt_last_stamp = tp->delivered_mstamp.stamp_jiffies;
+	bbr->lt_last_stamp = div_u64(tp->delivered_mstamp, USEC_PER_MSEC);
 	bbr->lt_last_delivered = tp->delivered;
 	bbr->lt_last_lost = tp->lost;
 	bbr->lt_rtt_cnt = 0;
@@ -551,7 +551,7 @@ static void bbr_lt_bw_sampling(struct sock *sk, const struct rate_sample *rs)
 	struct bbr *bbr = inet_csk_ca(sk);
 	u32 lost, delivered;
 	u64 bw;
-	s32 t;
+	u32 t;
 
 	if (bbr->lt_use_bw) {	/* already using long-term rate, lt_bw? */
 		if (bbr->mode == BBR_PROBE_BW && bbr->round_start &&
@@ -603,15 +603,15 @@ static void bbr_lt_bw_sampling(struct sock *sk, const struct rate_sample *rs)
 		return;
 
 	/* Find average delivery rate in this sampling interval. */
-	t = (s32)(tp->delivered_mstamp.stamp_jiffies - bbr->lt_last_stamp);
-	if (t < 1)
-		return;		/* interval is less than one jiffy, so wait */
-	t = jiffies_to_usecs(t);
-	/* Interval long enough for jiffies_to_usecs() to return a bogus 0? */
-	if (t < 1) {
+	t = div_u64(tp->delivered_mstamp, USEC_PER_MSEC) - bbr->lt_last_stamp;
+	if ((s32)t < 1)
+		return;		/* interval is less than one ms, so wait */
+	/* Check if can multiply without overflow */
+	if (t >= ~0U / USEC_PER_MSEC) {
 		bbr_reset_lt_bw_sampling(sk);  /* interval too long; reset */
 		return;
 	}
+	t *= USEC_PER_MSEC;
 	bw = (u64)delivered * BW_UNIT;
 	do_div(bw, t);
 	bbr_lt_bw_interval_done(sk, bw);
@@ -825,7 +825,7 @@ static void bbr_init(struct sock *sk)
 	bbr->idle_restart = 0;
 	bbr->full_bw = 0;
 	bbr->full_bw_cnt = 0;
-	bbr->cycle_mstamp.v64 = 0;
+	bbr->cycle_mstamp = 0;
 	bbr->cycle_idx = 0;
 	bbr_reset_lt_bw_sampling(sk);
 	bbr_reset_startup_mode(sk);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 10e6775464f6..9a5a9e8eda89 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -441,7 +441,7 @@ void tcp_init_buffer_space(struct sock *sk)
 		tcp_sndbuf_expand(sk);
 
 	tp->rcvq_space.space = tp->rcv_wnd;
-	skb_mstamp_get(&tp->tcp_mstamp);
+	tcp_mstamp_refresh(tp);
 	tp->rcvq_space.time = tp->tcp_mstamp;
 	tp->rcvq_space.seq = tp->copied_seq;
 
@@ -555,11 +555,11 @@ static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp)
 {
 	u32 delta_us;
 
-	if (tp->rcv_rtt_est.time.v64 == 0)
+	if (tp->rcv_rtt_est.time == 0)
 		goto new_measure;
 	if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
 		return;
-	delta_us = skb_mstamp_us_delta(&tp->tcp_mstamp, &tp->rcv_rtt_est.time);
+	delta_us = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcv_rtt_est.time);
 	tcp_rcv_rtt_update(tp, delta_us, 1);
 
 new_measure:
@@ -571,13 +571,15 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
 					  const struct sk_buff *skb)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
+
 	if (tp->rx_opt.rcv_tsecr &&
 	    (TCP_SKB_CB(skb)->end_seq -
-	     TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss))
-		tcp_rcv_rtt_update(tp,
-				   jiffies_to_usecs(tcp_time_stamp -
-						    tp->rx_opt.rcv_tsecr),
-				   0);
+	     TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss)) {
+		u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
+		u32 delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
+
+		tcp_rcv_rtt_update(tp, delta_us, 0);
+	}
 }
 
 /*
@@ -590,7 +592,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
 	int time;
 	int copied;
 
-	time = skb_mstamp_us_delta(&tp->tcp_mstamp, &tp->rcvq_space.time);
+	time = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcvq_space.time);
 	if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us == 0)
 		return;
 
@@ -1134,8 +1136,8 @@ struct tcp_sacktag_state {
 	 * that was SACKed. RTO needs the earliest RTT to stay conservative,
 	 * but congestion control should still get an accurate delay signal.
 	 */
-	struct skb_mstamp first_sackt;
-	struct skb_mstamp last_sackt;
+	u64	first_sackt;
+	u64	last_sackt;
 	struct rate_sample *rate;
 	int	flag;
 };
@@ -1200,7 +1202,7 @@ static u8 tcp_sacktag_one(struct sock *sk,
 			  struct tcp_sacktag_state *state, u8 sacked,
 			  u32 start_seq, u32 end_seq,
 			  int dup_sack, int pcount,
-			  const struct skb_mstamp *xmit_time)
+			  u64 xmit_time)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	int fack_count = state->fack_count;
@@ -1242,9 +1244,9 @@ static u8 tcp_sacktag_one(struct sock *sk,
 							   state->reord);
 				if (!after(end_seq, tp->high_seq))
 					state->flag |= FLAG_ORIG_SACK_ACKED;
-				if (state->first_sackt.v64 == 0)
-					state->first_sackt = *xmit_time;
-				state->last_sackt = *xmit_time;
+				if (state->first_sackt == 0)
+					state->first_sackt = xmit_time;
+				state->last_sackt = xmit_time;
 			}
 
 			if (sacked & TCPCB_LOST) {
@@ -1304,7 +1306,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
 	 */
 	tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,
 			start_seq, end_seq, dup_sack, pcount,
-			&skb->skb_mstamp);
+			skb->skb_mstamp);
 	tcp_rate_skb_delivered(sk, skb, state->rate);
 
 	if (skb == tp->lost_skb_hint)
@@ -1356,8 +1358,8 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
 		tcp_advance_highest_sack(sk, skb);
 
 	tcp_skb_collapse_tstamp(prev, skb);
-	if (unlikely(TCP_SKB_CB(prev)->tx.delivered_mstamp.v64))
-		TCP_SKB_CB(prev)->tx.delivered_mstamp.v64 = 0;
+	if (unlikely(TCP_SKB_CB(prev)->tx.delivered_mstamp))
+		TCP_SKB_CB(prev)->tx.delivered_mstamp = 0;
 
 	tcp_unlink_write_queue(skb, sk);
 	sk_wmem_free_skb(sk, skb);
@@ -1587,7 +1589,7 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
 						TCP_SKB_CB(skb)->end_seq,
 						dup_sack,
 						tcp_skb_pcount(skb),
-						&skb->skb_mstamp);
+						skb->skb_mstamp);
 			tcp_rate_skb_delivered(sk, skb, state->rate);
 
 			if (!before(TCP_SKB_CB(skb)->seq,
@@ -2936,9 +2938,12 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
 	 * See draft-ietf-tcplw-high-performance-00, section 3.3.
 	 */
 	if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
-	    flag & FLAG_ACKED)
-		seq_rtt_us = ca_rtt_us = jiffies_to_usecs(tcp_time_stamp -
-							  tp->rx_opt.rcv_tsecr);
+	    flag & FLAG_ACKED) {
+		u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
+		u32 delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
+
+		seq_rtt_us = ca_rtt_us = delta_us;
+	}
 	if (seq_rtt_us < 0)
 		return false;
 
@@ -2960,12 +2965,8 @@ void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req)
 {
 	long rtt_us = -1L;
 
-	if (req && !req->num_retrans && tcp_rsk(req)->snt_synack.v64) {
-		struct skb_mstamp now;
-
-		skb_mstamp_get(&now);
-		rtt_us = skb_mstamp_us_delta(&now, &tcp_rsk(req)->snt_synack);
-	}
+	if (req && !req->num_retrans && tcp_rsk(req)->snt_synack)
+		rtt_us = tcp_stamp_us_delta(tcp_clock_us(), tcp_rsk(req)->snt_synack);
 
 	tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, rtt_us, -1L, rtt_us);
 }
@@ -3003,7 +3004,7 @@ void tcp_rearm_rto(struct sock *sk)
 			struct sk_buff *skb = tcp_write_queue_head(sk);
 			const u32 rto_time_stamp =
 				tcp_skb_timestamp(skb) + rto;
-			s32 delta = (s32)(rto_time_stamp - tcp_time_stamp);
+			s32 delta = (s32)(rto_time_stamp - tcp_jiffies32);
 			/* delta may not be positive if the socket is locked
 			 * when the retrans timer fires and is rescheduled.
 			 */
@@ -3060,9 +3061,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 			       struct tcp_sacktag_state *sack)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
-	struct skb_mstamp first_ackt, last_ackt;
+	u64 first_ackt, last_ackt;
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct skb_mstamp *now = &tp->tcp_mstamp;
 	u32 prior_sacked = tp->sacked_out;
 	u32 reord = tp->packets_out;
 	bool fully_acked = true;
@@ -3075,7 +3075,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 	bool rtt_update;
 	int flag = 0;
 
-	first_ackt.v64 = 0;
+	first_ackt = 0;
 
 	while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
 		struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
@@ -3106,8 +3106,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 			flag |= FLAG_RETRANS_DATA_ACKED;
 		} else if (!(sacked & TCPCB_SACKED_ACKED)) {
 			last_ackt = skb->skb_mstamp;
-			WARN_ON_ONCE(last_ackt.v64 == 0);
-			if (!first_ackt.v64)
+			WARN_ON_ONCE(last_ackt == 0);
+			if (!first_ackt)
 				first_ackt = last_ackt;
 
 			last_in_flight = TCP_SKB_CB(skb)->tx.in_flight;
@@ -3122,7 +3122,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 			tp->delivered += acked_pcount;
 			if (!tcp_skb_spurious_retrans(tp, skb))
 				tcp_rack_advance(tp, sacked, scb->end_seq,
-						 &skb->skb_mstamp);
+						 skb->skb_mstamp);
 		}
 		if (sacked & TCPCB_LOST)
 			tp->lost_out -= acked_pcount;
@@ -3165,13 +3165,13 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 	if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
 		flag |= FLAG_SACK_RENEGING;
 
-	if (likely(first_ackt.v64) && !(flag & FLAG_RETRANS_DATA_ACKED)) {
-		seq_rtt_us = skb_mstamp_us_delta(now, &first_ackt);
-		ca_rtt_us = skb_mstamp_us_delta(now, &last_ackt);
+	if (likely(first_ackt) && !(flag & FLAG_RETRANS_DATA_ACKED)) {
+		seq_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, first_ackt);
+		ca_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, last_ackt);
 	}
-	if (sack->first_sackt.v64) {
-		sack_rtt_us = skb_mstamp_us_delta(now, &sack->first_sackt);
-		ca_rtt_us = skb_mstamp_us_delta(now, &sack->last_sackt);
+	if (sack->first_sackt) {
+		sack_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, sack->first_sackt);
+		ca_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, sack->last_sackt);
 	}
 	sack->rate->rtt_us = ca_rtt_us; /* RTT of last (S)ACKed packet, or -1 */
 	rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us,
@@ -3201,7 +3201,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 		tp->fackets_out -= min(pkts_acked, tp->fackets_out);
 
 	} else if (skb && rtt_update && sack_rtt_us >= 0 &&
-		   sack_rtt_us > skb_mstamp_us_delta(now, &skb->skb_mstamp)) {
+		   sack_rtt_us > tcp_stamp_us_delta(tp->tcp_mstamp, skb->skb_mstamp)) {
 		/* Do not re-arm RTO if the sack RTT is measured from data sent
 		 * after when the head was last (re)transmitted. Otherwise the
 		 * timeout may continue to extend in loss recovery.
@@ -3553,7 +3553,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 	int acked = 0; /* Number of packets newly acked */
 	int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */
 
-	sack_state.first_sackt.v64 = 0;
+	sack_state.first_sackt = 0;
 	sack_state.rate = &rs;
 
 	/* We very likely will need to access write queue head. */
@@ -5356,7 +5356,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	skb_mstamp_get(&tp->tcp_mstamp);
+	tcp_mstamp_refresh(tp);
 	if (unlikely(!sk->sk_rx_dst))
 		inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb);
 	/*
@@ -5672,7 +5672,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 
 		if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
 		    !between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp,
-			     tcp_time_stamp)) {
+			     tcp_time_stamp(tp))) {
 			NET_INC_STATS(sock_net(sk),
 					LINUX_MIB_PAWSACTIVEREJECTED);
 			goto reset_and_undo;
@@ -5917,7 +5917,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 
 	case TCP_SYN_SENT:
 		tp->rx_opt.saw_tstamp = 0;
-		skb_mstamp_get(&tp->tcp_mstamp);
+		tcp_mstamp_refresh(tp);
 		queued = tcp_rcv_synsent_state_process(sk, skb, th);
 		if (queued >= 0)
 			return queued;
@@ -5929,7 +5929,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 		return 0;
 	}
 
-	skb_mstamp_get(&tp->tcp_mstamp);
+	tcp_mstamp_refresh(tp);
 	tp->rx_opt.saw_tstamp = 0;
 	req = tp->fastopen_rsk;
 	if (req) {
@@ -6202,7 +6202,7 @@ static void tcp_openreq_init(struct request_sock *req,
 	req->cookie_ts = 0;
 	tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq;
 	tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
-	skb_mstamp_get(&tcp_rsk(req)->snt_synack);
+	tcp_rsk(req)->snt_synack = tcp_clock_us();
 	tcp_rsk(req)->last_oow_ack_time = 0;
 	req->mss = rx_opt->mss_clamp;
 	req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index d8fe25db79f2..191b2f78b19d 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -376,8 +376,9 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 	struct sock *sk;
 	struct sk_buff *skb;
 	struct request_sock *fastopen;
-	__u32 seq, snd_una;
-	__u32 remaining;
+	u32 seq, snd_una;
+	s32 remaining;
+	u32 delta_us;
 	int err;
 	struct net *net = dev_net(icmp_skb->dev);
 
@@ -483,12 +484,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 		skb = tcp_write_queue_head(sk);
 		BUG_ON(!skb);
 
-		skb_mstamp_get(&tp->tcp_mstamp);
+		tcp_mstamp_refresh(tp);
+		delta_us = (u32)(tp->tcp_mstamp - skb->skb_mstamp);
 		remaining = icsk->icsk_rto -
-			    min(icsk->icsk_rto,
-				tcp_time_stamp - tcp_skb_timestamp(skb));
+			    usecs_to_jiffies(delta_us);
 
-		if (remaining) {
+		if (remaining > 0) {
 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
 						  remaining, TCP_RTO_MAX);
 		} else {
@@ -812,7 +813,7 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
 	tcp_v4_send_ack(sk, skb,
 			tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
-			tcp_time_stamp + tcptw->tw_ts_offset,
+			tcp_time_stamp_raw() + tcptw->tw_ts_offset,
 			tcptw->tw_ts_recent,
 			tw->tw_bound_dev_if,
 			tcp_twsk_md5_key(tcptw),
@@ -840,7 +841,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
 	tcp_v4_send_ack(sk, skb, seq,
 			tcp_rsk(req)->rcv_nxt,
 			req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
-			tcp_time_stamp + tcp_rsk(req)->ts_off,
+			tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
 			req->ts_recent,
 			0,
 			tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c
index ef3122abb373..ae10ed64fe13 100644
--- a/net/ipv4/tcp_lp.c
+++ b/net/ipv4/tcp_lp.c
@@ -37,7 +37,7 @@
 #include <net/tcp.h>
 
 /* resolution of owd */
-#define LP_RESOL       1000
+#define LP_RESOL       TCP_TS_HZ
 
 /**
  * enum tcp_lp_state
@@ -147,9 +147,9 @@ static u32 tcp_lp_remote_hz_estimator(struct sock *sk)
 	    tp->rx_opt.rcv_tsecr == lp->local_ref_time)
 		goto out;
 
-	m = HZ * (tp->rx_opt.rcv_tsval -
-		  lp->remote_ref_time) / (tp->rx_opt.rcv_tsecr -
-					  lp->local_ref_time);
+	m = TCP_TS_HZ *
+	    (tp->rx_opt.rcv_tsval - lp->remote_ref_time) /
+	    (tp->rx_opt.rcv_tsecr - lp->local_ref_time);
 	if (m < 0)
 		m = -m;
 
@@ -194,7 +194,7 @@ static u32 tcp_lp_owd_calculator(struct sock *sk)
 	if (lp->flag & LP_VALID_RHZ) {
 		owd =
 		    tp->rx_opt.rcv_tsval * (LP_RESOL / lp->remote_hz) -
-		    tp->rx_opt.rcv_tsecr * (LP_RESOL / HZ);
+		    tp->rx_opt.rcv_tsecr * (LP_RESOL / TCP_TS_HZ);
 		if (owd < 0)
 			owd = -owd;
 	}
@@ -264,7 +264,7 @@ static void tcp_lp_pkts_acked(struct sock *sk, const struct ack_sample *sample)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct lp *lp = inet_csk_ca(sk);
-	u32 now = tcp_time_stamp;
+	u32 now = tcp_time_stamp(tp);
 	u32 delta;
 
 	if (sample->rtt_us > 0)
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 6504f1082bdf..d0642df73044 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -455,7 +455,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
 		newtp->fackets_out = 0;
 		newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
 		newtp->tlp_high_seq = 0;
-		newtp->lsndtime = treq->snt_synack.stamp_jiffies;
+		newtp->lsndtime = tcp_jiffies32;
 		newsk->sk_txhash = treq->txhash;
 		newtp->last_oow_ack_time = 0;
 		newtp->total_retrans = req->num_retrans;
@@ -526,7 +526,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
 		newtp->fastopen_req = NULL;
 		newtp->fastopen_rsk = NULL;
 		newtp->syn_data_acked = 0;
-		newtp->rack.mstamp.v64 = 0;
+		newtp->rack.mstamp = 0;
 		newtp->rack.advanced = 0;
 
 		__TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 65472e931a0b..478f75baee31 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1962,7 +1962,7 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
 
 	head = tcp_write_queue_head(sk);
 
-	age = skb_mstamp_us_delta(&tp->tcp_mstamp, &head->skb_mstamp);
+	age = tcp_stamp_us_delta(tp->tcp_mstamp, head->skb_mstamp);
 	/* If next ACK is likely to come too late (half srtt), do not defer */
 	if (age < (tp->srtt_us >> 4))
 		goto send_now;
@@ -2279,7 +2279,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 	}
 
 	max_segs = tcp_tso_segs(sk, mss_now);
-	skb_mstamp_get(&tp->tcp_mstamp);
+	tcp_mstamp_refresh(tp);
 	while ((skb = tcp_send_head(sk))) {
 		unsigned int limit;
 
@@ -3095,7 +3095,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
 	skb_reserve(skb, MAX_TCP_HEADER);
 	tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk),
 			     TCPHDR_ACK | TCPHDR_RST);
-	skb_mstamp_get(&tcp_sk(sk)->tcp_mstamp);
+	tcp_mstamp_refresh(tcp_sk(sk));
 	/* Send it off. */
 	if (tcp_transmit_skb(sk, skb, 0, priority))
 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
@@ -3191,10 +3191,10 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
 	memset(&opts, 0, sizeof(opts));
 #ifdef CONFIG_SYN_COOKIES
 	if (unlikely(req->cookie_ts))
-		skb->skb_mstamp.stamp_jiffies = cookie_init_timestamp(req);
+		skb->skb_mstamp = cookie_init_timestamp(req);
 	else
 #endif
-	skb_mstamp_get(&skb->skb_mstamp);
+		skb->skb_mstamp = tcp_clock_us();
 
 #ifdef CONFIG_TCP_MD5SIG
 	rcu_read_lock();
@@ -3453,8 +3453,8 @@ int tcp_connect(struct sock *sk)
 		return -ENOBUFS;
 
 	tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN);
-	skb_mstamp_get(&tp->tcp_mstamp);
-	tp->retrans_stamp = tp->tcp_mstamp.stamp_jiffies;
+	tcp_mstamp_refresh(tp);
+	tp->retrans_stamp = tcp_time_stamp(tp);
 	tcp_connect_queue_skb(sk, buff);
 	tcp_ecn_send_syn(sk, buff);
 
@@ -3615,7 +3615,7 @@ void tcp_send_window_probe(struct sock *sk)
 {
 	if (sk->sk_state == TCP_ESTABLISHED) {
 		tcp_sk(sk)->snd_wl1 = tcp_sk(sk)->rcv_nxt - 1;
-		skb_mstamp_get(&tcp_sk(sk)->tcp_mstamp);
+		tcp_mstamp_refresh(tcp_sk(sk));
 		tcp_xmit_probe_skb(sk, 0, LINUX_MIB_TCPWINPROBE);
 	}
 }
diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c
index c6a9fa894646..ad99569d4c1e 100644
--- a/net/ipv4/tcp_rate.c
+++ b/net/ipv4/tcp_rate.c
@@ -78,7 +78,7 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
 
-	if (!scb->tx.delivered_mstamp.v64)
+	if (!scb->tx.delivered_mstamp)
 		return;
 
 	if (!rs->prior_delivered ||
@@ -89,9 +89,9 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
 		rs->is_retrans	     = scb->sacked & TCPCB_RETRANS;
 
 		/* Find the duration of the "send phase" of this window: */
-		rs->interval_us      = skb_mstamp_us_delta(
-						&skb->skb_mstamp,
-						&scb->tx.first_tx_mstamp);
+		rs->interval_us      = tcp_stamp_us_delta(
+						skb->skb_mstamp,
+						scb->tx.first_tx_mstamp);
 
 		/* Record send time of most recently ACKed packet: */
 		tp->first_tx_mstamp  = skb->skb_mstamp;
@@ -101,7 +101,7 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
 	 * we don't need to reset since it'll be freed soon.
 	 */
 	if (scb->sacked & TCPCB_SACKED_ACKED)
-		scb->tx.delivered_mstamp.v64 = 0;
+		scb->tx.delivered_mstamp = 0;
 }
 
 /* Update the connection delivery information and generate a rate sample. */
@@ -125,7 +125,7 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
 	rs->acked_sacked = delivered;	/* freshly ACKed or SACKed */
 	rs->losses = lost;		/* freshly marked lost */
 	/* Return an invalid sample if no timing information is available. */
-	if (!rs->prior_mstamp.v64) {
+	if (!rs->prior_mstamp) {
 		rs->delivered = -1;
 		rs->interval_us = -1;
 		return;
@@ -138,8 +138,8 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
 	 * longer phase.
 	 */
 	snd_us = rs->interval_us;				/* send phase */
-	ack_us = skb_mstamp_us_delta(&tp->tcp_mstamp,
-				     &rs->prior_mstamp); /* ack phase */
+	ack_us = tcp_stamp_us_delta(tp->tcp_mstamp,
+				    rs->prior_mstamp); /* ack phase */
 	rs->interval_us = max(snd_us, ack_us);
 
 	/* Normally we expect interval_us >= min-rtt.
diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
index cd72b3d3879e..fe9a493d0208 100644
--- a/net/ipv4/tcp_recovery.c
+++ b/net/ipv4/tcp_recovery.c
@@ -17,12 +17,9 @@ static void tcp_rack_mark_skb_lost(struct sock *sk, struct sk_buff *skb)
 	}
 }
 
-static bool tcp_rack_sent_after(const struct skb_mstamp *t1,
-				const struct skb_mstamp *t2,
-				u32 seq1, u32 seq2)
+static bool tcp_rack_sent_after(u64 t1, u64 t2, u32 seq1, u32 seq2)
 {
-	return skb_mstamp_after(t1, t2) ||
-	       (t1->v64 == t2->v64 && after(seq1, seq2));
+	return t1 > t2 || (t1 == t2 && after(seq1, seq2));
 }
 
 /* RACK loss detection (IETF draft draft-ietf-tcpm-rack-01):
@@ -72,14 +69,14 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout)
 		    scb->sacked & TCPCB_SACKED_ACKED)
 			continue;
 
-		if (tcp_rack_sent_after(&tp->rack.mstamp, &skb->skb_mstamp,
+		if (tcp_rack_sent_after(tp->rack.mstamp, skb->skb_mstamp,
 					tp->rack.end_seq, scb->end_seq)) {
 			/* Step 3 in draft-cheng-tcpm-rack-00.txt:
 			 * A packet is lost if its elapsed time is beyond
 			 * the recent RTT plus the reordering window.
 			 */
-			u32 elapsed = skb_mstamp_us_delta(&tp->tcp_mstamp,
-							  &skb->skb_mstamp);
+			u32 elapsed = tcp_stamp_us_delta(tp->tcp_mstamp,
+							 skb->skb_mstamp);
 			s32 remaining = tp->rack.rtt_us + reo_wnd - elapsed;
 
 			if (remaining < 0) {
@@ -127,16 +124,16 @@ void tcp_rack_mark_lost(struct sock *sk)
  * draft-cheng-tcpm-rack-00.txt
  */
 void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
-		      const struct skb_mstamp *xmit_time)
+		      u64 xmit_time)
 {
 	u32 rtt_us;
 
-	if (tp->rack.mstamp.v64 &&
-	    !tcp_rack_sent_after(xmit_time, &tp->rack.mstamp,
+	if (tp->rack.mstamp &&
+	    !tcp_rack_sent_after(xmit_time, tp->rack.mstamp,
 				 end_seq, tp->rack.end_seq))
 		return;
 
-	rtt_us = skb_mstamp_us_delta(&tp->tcp_mstamp, xmit_time);
+	rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, xmit_time);
 	if (sacked & TCPCB_RETRANS) {
 		/* If the sacked packet was retransmitted, it's ambiguous
 		 * whether the retransmission or the original (or the prior
@@ -152,7 +149,7 @@ void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
 			return;
 	}
 	tp->rack.rtt_us = rtt_us;
-	tp->rack.mstamp = *xmit_time;
+	tp->rack.mstamp = xmit_time;
 	tp->rack.end_seq = end_seq;
 	tp->rack.advanced = 1;
 }
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 6629f47aa7f0..27a667bce806 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -153,8 +153,8 @@ static bool retransmits_timed_out(struct sock *sk,
 				  unsigned int timeout,
 				  bool syn_set)
 {
-	unsigned int linear_backoff_thresh, start_ts;
 	unsigned int rto_base = syn_set ? TCP_TIMEOUT_INIT : TCP_RTO_MIN;
+	unsigned int linear_backoff_thresh, start_ts;
 
 	if (!inet_csk(sk)->icsk_retransmits)
 		return false;
@@ -172,7 +172,7 @@ static bool retransmits_timed_out(struct sock *sk,
 			timeout = ((2 << linear_backoff_thresh) - 1) * rto_base +
 				(boundary - linear_backoff_thresh) * TCP_RTO_MAX;
 	}
-	return (tcp_time_stamp - start_ts) >= timeout;
+	return (tcp_time_stamp(tcp_sk(sk)) - start_ts) >= jiffies_to_msecs(timeout);
 }
 
 /* A write timeout has occurred. Process the after effects. */
@@ -341,7 +341,7 @@ static void tcp_probe_timer(struct sock *sk)
 	if (!start_ts)
 		tcp_send_head(sk)->skb_mstamp = tp->tcp_mstamp;
 	else if (icsk->icsk_user_timeout &&
-		 (s32)(tcp_time_stamp - start_ts) > icsk->icsk_user_timeout)
+		 (s32)(tcp_time_stamp(tp) - start_ts) > icsk->icsk_user_timeout)
 		goto abort;
 
 	max_probes = sock_net(sk)->ipv4.sysctl_tcp_retries2;
@@ -561,7 +561,7 @@ void tcp_write_timer_handler(struct sock *sk)
 		goto out;
 	}
 
-	skb_mstamp_get(&tcp_sk(sk)->tcp_mstamp);
+	tcp_mstamp_refresh(tcp_sk(sk));
 	event = icsk->icsk_pending;
 
 	switch (event) {
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 5abc3692b901..971823359f5b 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -211,7 +211,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	ireq->wscale_ok		= tcp_opt.wscale_ok;
 	ireq->tstamp_ok		= tcp_opt.saw_tstamp;
 	req->ts_recent		= tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
-	treq->snt_synack.v64	= 0;
+	treq->snt_synack	= 0;
 	treq->rcv_isn = ntohl(th->seq) - 1;
 	treq->snt_isn = cookie;
 	treq->ts_off = 0;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 4f4310a36a04..233edfabe1db 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -949,7 +949,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
 
 	tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
-			tcp_time_stamp + tcptw->tw_ts_offset,
+			tcp_time_stamp_raw() + tcptw->tw_ts_offset,
 			tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
 			tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel));
 
@@ -971,7 +971,7 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
 			tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
 			tcp_rsk(req)->rcv_nxt,
 			req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
-			tcp_time_stamp + tcp_rsk(req)->ts_off,
+			tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
 			req->ts_recent, sk->sk_bound_dev_if,
 			tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr),
 			0, 0);
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index a504e87c6ddf..49bd8bb16b18 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -152,7 +152,7 @@ void synproxy_init_timestamp_cookie(const struct xt_synproxy_info *info,
 				    struct synproxy_options *opts)
 {
 	opts->tsecr = opts->tsval;
-	opts->tsval = tcp_time_stamp & ~0x3f;
+	opts->tsval = tcp_time_stamp_raw() & ~0x3f;
 
 	if (opts->options & XT_SYNPROXY_OPT_WSCALE) {
 		opts->tsval |= opts->wscale;
-- 
cgit v1.2.3


From 33c35aa4817864e056fd772230b0c6b552e36ea2 Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Mon, 15 May 2017 09:34:06 -0400
Subject: cgroup: Prevent kill_css() from being called more than once

The kill_css() function may be called more than once under the condition
that the css was killed but not physically removed yet followed by the
removal of the cgroup that is hosting the css. This patch prevents any
harmm from being done when that happens.

Signed-off-by: Waiman Long <longman@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: stable@vger.kernel.org # v4.5+
---
 include/linux/cgroup-defs.h | 1 +
 kernel/cgroup/cgroup.c      | 5 +++++
 2 files changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 21745946cae1..ec47101cb1bf 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -48,6 +48,7 @@ enum {
 	CSS_ONLINE	= (1 << 1), /* between ->css_online() and ->css_offline() */
 	CSS_RELEASED	= (1 << 2), /* refcnt reached zero, released */
 	CSS_VISIBLE	= (1 << 3), /* css is visible to userland */
+	CSS_DYING	= (1 << 4), /* css is dying */
 };
 
 /* bits in struct cgroup flags field */
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index c3c9a0e1b3c9..8d4e85eae42c 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -4265,6 +4265,11 @@ static void kill_css(struct cgroup_subsys_state *css)
 {
 	lockdep_assert_held(&cgroup_mutex);
 
+	if (css->flags & CSS_DYING)
+		return;
+
+	css->flags |= CSS_DYING;
+
 	/*
 	 * This must happen before css is disassociated with its cgroup.
 	 * See seq_css() for details.
-- 
cgit v1.2.3


From 197a5212c3dd70be267b5cd930be0fb68bb53018 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Wed, 17 May 2017 12:14:37 +0800
Subject: ptr_ring: add ptr_ring_unconsume

Applications that consume a batch of entries in one go
can benefit from ability to return some of them back
into the ring.

Add an API for that - assuming there's space. If there's no space
naturally can't do this and have to drop entries, but this implies ring
is full so we'd likely drop some anyway.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ptr_ring.h | 55 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h
index 6b2e0dd88569..796b90f6d4e9 100644
--- a/include/linux/ptr_ring.h
+++ b/include/linux/ptr_ring.h
@@ -403,6 +403,61 @@ static inline int ptr_ring_init(struct ptr_ring *r, int size, gfp_t gfp)
 	return 0;
 }
 
+/*
+ * Return entries into ring. Destroy entries that don't fit.
+ *
+ * Note: this is expected to be a rare slow path operation.
+ *
+ * Note: producer lock is nested within consumer lock, so if you
+ * resize you must make sure all uses nest correctly.
+ * In particular if you consume ring in interrupt or BH context, you must
+ * disable interrupts/BH when doing so.
+ */
+static inline void ptr_ring_unconsume(struct ptr_ring *r, void **batch, int n,
+				      void (*destroy)(void *))
+{
+	unsigned long flags;
+	int head;
+
+	spin_lock_irqsave(&r->consumer_lock, flags);
+	spin_lock(&r->producer_lock);
+
+	if (!r->size)
+		goto done;
+
+	/*
+	 * Clean out buffered entries (for simplicity). This way following code
+	 * can test entries for NULL and if not assume they are valid.
+	 */
+	head = r->consumer_head - 1;
+	while (likely(head >= r->consumer_tail))
+		r->queue[head--] = NULL;
+	r->consumer_tail = r->consumer_head;
+
+	/*
+	 * Go over entries in batch, start moving head back and copy entries.
+	 * Stop when we run into previously unconsumed entries.
+	 */
+	while (n) {
+		head = r->consumer_head - 1;
+		if (head < 0)
+			head = r->size - 1;
+		if (r->queue[head]) {
+			/* This batch entry will have to be destroyed. */
+			goto done;
+		}
+		r->queue[head] = batch[--n];
+		r->consumer_tail = r->consumer_head = head;
+	}
+
+done:
+	/* Destroy all entries left in the batch. */
+	while (n)
+		destroy(batch[--n]);
+	spin_unlock(&r->producer_lock);
+	spin_unlock_irqrestore(&r->consumer_lock, flags);
+}
+
 static inline void **__ptr_ring_swap_queue(struct ptr_ring *r, void **queue,
 					   int size, gfp_t gfp,
 					   void (*destroy)(void *))
-- 
cgit v1.2.3


From 3acb696015a222f4b25c1b5dce4e36b2d4980da6 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Wed, 17 May 2017 12:14:38 +0800
Subject: skb_array: introduce skb_array_unconsume

Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skb_array.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/skb_array.h b/include/linux/skb_array.h
index f4dfade428f0..79850b638bf2 100644
--- a/include/linux/skb_array.h
+++ b/include/linux/skb_array.h
@@ -156,6 +156,12 @@ static void __skb_array_destroy_skb(void *ptr)
 	kfree_skb(ptr);
 }
 
+static inline void skb_array_unconsume(struct skb_array *a,
+				       struct sk_buff **skbs, int n)
+{
+	ptr_ring_unconsume(&a->ring, (void **)skbs, n, __skb_array_destroy_skb);
+}
+
 static inline int skb_array_resize(struct skb_array *a, int size, gfp_t gfp)
 {
 	return ptr_ring_resize(&a->ring, size, gfp, __skb_array_destroy_skb);
-- 
cgit v1.2.3


From 728fc8d5532b956f9c4b48dff0577fb722251343 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Wed, 17 May 2017 12:14:39 +0800
Subject: ptr_ring: introduce batch dequeuing

This patch introduce a batched version of consuming, consumer can
dequeue more than one pointers from the ring at a time. We don't care
about the reorder of reading here so no need for compiler barrier.

Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ptr_ring.h | 65 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 65 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h
index 796b90f6d4e9..d8c97ec8a8e6 100644
--- a/include/linux/ptr_ring.h
+++ b/include/linux/ptr_ring.h
@@ -278,6 +278,22 @@ static inline void *__ptr_ring_consume(struct ptr_ring *r)
 	return ptr;
 }
 
+static inline int __ptr_ring_consume_batched(struct ptr_ring *r,
+					     void **array, int n)
+{
+	void *ptr;
+	int i;
+
+	for (i = 0; i < n; i++) {
+		ptr = __ptr_ring_consume(r);
+		if (!ptr)
+			break;
+		array[i] = ptr;
+	}
+
+	return i;
+}
+
 /*
  * Note: resize (below) nests producer lock within consumer lock, so if you
  * call this in interrupt or BH context, you must disable interrupts/BH when
@@ -328,6 +344,55 @@ static inline void *ptr_ring_consume_bh(struct ptr_ring *r)
 	return ptr;
 }
 
+static inline int ptr_ring_consume_batched(struct ptr_ring *r,
+					   void **array, int n)
+{
+	int ret;
+
+	spin_lock(&r->consumer_lock);
+	ret = __ptr_ring_consume_batched(r, array, n);
+	spin_unlock(&r->consumer_lock);
+
+	return ret;
+}
+
+static inline int ptr_ring_consume_batched_irq(struct ptr_ring *r,
+					       void **array, int n)
+{
+	int ret;
+
+	spin_lock_irq(&r->consumer_lock);
+	ret = __ptr_ring_consume_batched(r, array, n);
+	spin_unlock_irq(&r->consumer_lock);
+
+	return ret;
+}
+
+static inline int ptr_ring_consume_batched_any(struct ptr_ring *r,
+					       void **array, int n)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&r->consumer_lock, flags);
+	ret = __ptr_ring_consume_batched(r, array, n);
+	spin_unlock_irqrestore(&r->consumer_lock, flags);
+
+	return ret;
+}
+
+static inline int ptr_ring_consume_batched_bh(struct ptr_ring *r,
+					      void **array, int n)
+{
+	int ret;
+
+	spin_lock_bh(&r->consumer_lock);
+	ret = __ptr_ring_consume_batched(r, array, n);
+	spin_unlock_bh(&r->consumer_lock);
+
+	return ret;
+}
+
 /* Cast to structure type and call a function without discarding from FIFO.
  * Function must return a value.
  * Callers must take consumer_lock.
-- 
cgit v1.2.3


From 3528c1a52e7af001e0e387fcb6bac2bdb3775d3e Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Wed, 17 May 2017 12:14:40 +0800
Subject: skb_array: introduce batch dequeuing

Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skb_array.h | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/skb_array.h b/include/linux/skb_array.h
index 79850b638bf2..35226cd4efb0 100644
--- a/include/linux/skb_array.h
+++ b/include/linux/skb_array.h
@@ -97,21 +97,46 @@ static inline struct sk_buff *skb_array_consume(struct skb_array *a)
 	return ptr_ring_consume(&a->ring);
 }
 
+static inline int skb_array_consume_batched(struct skb_array *a,
+					    struct sk_buff **array, int n)
+{
+	return ptr_ring_consume_batched(&a->ring, (void **)array, n);
+}
+
 static inline struct sk_buff *skb_array_consume_irq(struct skb_array *a)
 {
 	return ptr_ring_consume_irq(&a->ring);
 }
 
+static inline int skb_array_consume_batched_irq(struct skb_array *a,
+						struct sk_buff **array, int n)
+{
+	return ptr_ring_consume_batched_irq(&a->ring, (void **)array, n);
+}
+
 static inline struct sk_buff *skb_array_consume_any(struct skb_array *a)
 {
 	return ptr_ring_consume_any(&a->ring);
 }
 
+static inline int skb_array_consume_batched_any(struct skb_array *a,
+						struct sk_buff **array, int n)
+{
+	return ptr_ring_consume_batched_any(&a->ring, (void **)array, n);
+}
+
+
 static inline struct sk_buff *skb_array_consume_bh(struct skb_array *a)
 {
 	return ptr_ring_consume_bh(&a->ring);
 }
 
+static inline int skb_array_consume_batched_bh(struct skb_array *a,
+					       struct sk_buff **array, int n)
+{
+	return ptr_ring_consume_batched_bh(&a->ring, (void **)array, n);
+}
+
 static inline int __skb_array_len_with_tag(struct sk_buff *skb)
 {
 	if (likely(skb)) {
-- 
cgit v1.2.3


From 83339c6b159ea6429a1db40b0d9d1083ab574733 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Wed, 17 May 2017 12:14:41 +0800
Subject: tun: export skb_array

This patch exports skb_array through tun_get_skb_array(). Caller can
then manipulate skb array directly.

Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tun.c      | 13 +++++++++++++
 include/linux/if_tun.h |  5 +++++
 2 files changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index bbd707b9ef7a..3cbfc5c707e3 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -2626,6 +2626,19 @@ struct socket *tun_get_socket(struct file *file)
 }
 EXPORT_SYMBOL_GPL(tun_get_socket);
 
+struct skb_array *tun_get_skb_array(struct file *file)
+{
+	struct tun_file *tfile;
+
+	if (file->f_op != &tun_fops)
+		return ERR_PTR(-EINVAL);
+	tfile = file->private_data;
+	if (!tfile)
+		return ERR_PTR(-EBADFD);
+	return &tfile->tx_array;
+}
+EXPORT_SYMBOL_GPL(tun_get_skb_array);
+
 module_init(tun_init);
 module_exit(tun_cleanup);
 MODULE_DESCRIPTION(DRV_DESCRIPTION);
diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h
index ed6da2e6df90..bf9bdf42d577 100644
--- a/include/linux/if_tun.h
+++ b/include/linux/if_tun.h
@@ -19,6 +19,7 @@
 
 #if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE)
 struct socket *tun_get_socket(struct file *);
+struct skb_array *tun_get_skb_array(struct file *file);
 #else
 #include <linux/err.h>
 #include <linux/errno.h>
@@ -28,5 +29,9 @@ static inline struct socket *tun_get_socket(struct file *f)
 {
 	return ERR_PTR(-EINVAL);
 }
+static inline struct skb_array *tun_get_skb_array(struct file *f)
+{
+	return ERR_PTR(-EINVAL);
+}
 #endif /* CONFIG_TUN */
 #endif /* __IF_TUN_H */
-- 
cgit v1.2.3


From 49f96fd0cb3808e5ff96573f28b3dceb16eb6998 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Wed, 17 May 2017 12:14:42 +0800
Subject: tap: export skb_array

This patch exports skb_array through tap_get_skb_array(). Caller can
then manipulate skb array directly.

Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tap.c      | 13 +++++++++++++
 include/linux/if_tap.h |  5 +++++
 2 files changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/tap.c b/drivers/net/tap.c
index 4d4173d25dd0..abdaf867774d 100644
--- a/drivers/net/tap.c
+++ b/drivers/net/tap.c
@@ -1193,6 +1193,19 @@ struct socket *tap_get_socket(struct file *file)
 }
 EXPORT_SYMBOL_GPL(tap_get_socket);
 
+struct skb_array *tap_get_skb_array(struct file *file)
+{
+	struct tap_queue *q;
+
+	if (file->f_op != &tap_fops)
+		return ERR_PTR(-EINVAL);
+	q = file->private_data;
+	if (!q)
+		return ERR_PTR(-EBADFD);
+	return &q->skb_array;
+}
+EXPORT_SYMBOL_GPL(tap_get_skb_array);
+
 int tap_queue_resize(struct tap_dev *tap)
 {
 	struct net_device *dev = tap->dev;
diff --git a/include/linux/if_tap.h b/include/linux/if_tap.h
index 3482c3c2037d..4837157da0dc 100644
--- a/include/linux/if_tap.h
+++ b/include/linux/if_tap.h
@@ -3,6 +3,7 @@
 
 #if IS_ENABLED(CONFIG_TAP)
 struct socket *tap_get_socket(struct file *);
+struct skb_array *tap_get_skb_array(struct file *file);
 #else
 #include <linux/err.h>
 #include <linux/errno.h>
@@ -12,6 +13,10 @@ static inline struct socket *tap_get_socket(struct file *f)
 {
 	return ERR_PTR(-EINVAL);
 }
+static inline struct skb_array *tap_get_skb_array(struct file *f)
+{
+	return ERR_PTR(-EINVAL);
+}
 #endif /* CONFIG_TAP */
 
 #include <net/sock.h>
-- 
cgit v1.2.3


From ea5244e2af3b4813bf3d90ba6a6481d1a3c33d15 Mon Sep 17 00:00:00 2001
From: Joel Stanley <joel@jms.id.au>
Date: Tue, 2 May 2017 17:15:42 +0930
Subject: serial: 8250: Add flag so drivers can avoid THRE probe

The probing of THRE irq behaviour assumes the other end will be reading
bytes out of the buffer in order to probe the port at driver init. In
some cases the other end cannot be relied upon to read these bytes, so
provide a flag for them to skip this step.

Bit 19 was chosen as the flags are a int and the top bits are taken.

Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Joel Stanley <joel@jms.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_port.c | 2 +-
 include/linux/serial_core.h         | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index 09a65a3ec7f7..d5b6cee87801 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -2229,7 +2229,7 @@ int serial8250_do_startup(struct uart_port *port)
 		}
 	}
 
-	if (port->irq) {
+	if (port->irq && !(up->port.flags & UPF_NO_THRE_TEST)) {
 		unsigned char iir1;
 		/*
 		 * Test for UARTs that do not reassert THRE when the
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 64d892f1e5cd..1775500294bb 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -195,6 +195,7 @@ struct uart_port {
 #define UPF_NO_TXEN_TEST	((__force upf_t) (1 << 15))
 #define UPF_MAGIC_MULTIPLIER	((__force upf_t) ASYNC_MAGIC_MULTIPLIER /* 16 */ )
 
+#define UPF_NO_THRE_TEST	((__force upf_t) (1 << 19))
 /* Port has hardware-assisted h/w flow control */
 #define UPF_AUTO_CTS		((__force upf_t) (1 << 20))
 #define UPF_AUTO_RTS		((__force upf_t) (1 << 21))
-- 
cgit v1.2.3


From 0cd2950357e31a96be03b531b4b11fe1df812c9f Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 17 May 2017 13:30:44 +0300
Subject: net: make struct net_device::tx_queue_len unsigned int

4 billion packet queue is something unthinkable so use 32-bit value
for now.

Space savings on x86_64:

	add/remove: 0/0 grow/shrink: 3/70 up/down: 16/-131 (-115)
	function                                     old     new   delta
	change_tx_queue_len                           94     108     +14
	qdisc_create                                1176    1177      +1
	alloc_netdev_mqs                            1124    1125      +1
	xenvif_alloc                                 533     532      -1
	x25_asy_setup                                167     166      -1
			...
	tun_queue_resize                             945     940      -5
	pfifo_fast_enqueue                           167     162      -5
	qfq_init_qdisc                               168     158     -10
	tap_queue_resize                             810     799     -11
	transmit                                     719     698     -21

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/hdlc_raw_eth.c | 3 ++-
 include/linux/netdevice.h      | 2 +-
 net/core/net-sysfs.c           | 8 ++++++--
 net/core/rtnetlink.c           | 4 ++--
 4 files changed, 11 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/wan/hdlc_raw_eth.c b/drivers/net/wan/hdlc_raw_eth.c
index 2f11836078ab..8bd3ed905813 100644
--- a/drivers/net/wan/hdlc_raw_eth.c
+++ b/drivers/net/wan/hdlc_raw_eth.c
@@ -57,7 +57,8 @@ static int raw_eth_ioctl(struct net_device *dev, struct ifreq *ifr)
 	const size_t size = sizeof(raw_hdlc_proto);
 	raw_hdlc_proto new_settings;
 	hdlc_device *hdlc = dev_to_hdlc(dev);
-	int result, old_qlen;
+	unsigned int old_qlen;
+	int result;
 
 	switch (ifr->ifr_settings.type) {
 	case IF_GET_PROTO:
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3f39d27decf4..0150b2dd3031 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1824,7 +1824,7 @@ struct net_device {
 #ifdef CONFIG_NET_SCHED
 	DECLARE_HASHTABLE	(qdisc_hash, 4);
 #endif
-	unsigned long		tx_queue_len;
+	unsigned int		tx_queue_len;
 	spinlock_t		tx_global_lock;
 	int			watchdog_timeo;
 
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 65ea0ff4017c..58e6cc70500d 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -323,7 +323,11 @@ NETDEVICE_SHOW_RW(flags, fmt_hex);
 
 static int change_tx_queue_len(struct net_device *dev, unsigned long new_len)
 {
-	int res, orig_len = dev->tx_queue_len;
+	unsigned int orig_len = dev->tx_queue_len;
+	int res;
+
+	if (new_len != (unsigned int)new_len)
+		return -ERANGE;
 
 	if (new_len != orig_len) {
 		dev->tx_queue_len = new_len;
@@ -349,7 +353,7 @@ static ssize_t tx_queue_len_store(struct device *dev,
 
 	return netdev_store(dev, attr, buf, len, change_tx_queue_len);
 }
-NETDEVICE_SHOW_RW(tx_queue_len, fmt_ulong);
+NETDEVICE_SHOW_RW(tx_queue_len, fmt_dec);
 
 static int change_gro_flush_timeout(struct net_device *dev, unsigned long val)
 {
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index d7f82c3450b1..f759f22af0af 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2046,8 +2046,8 @@ static int do_setlink(const struct sk_buff *skb,
 	}
 
 	if (tb[IFLA_TXQLEN]) {
-		unsigned long value = nla_get_u32(tb[IFLA_TXQLEN]);
-		unsigned long orig_len = dev->tx_queue_len;
+		unsigned int value = nla_get_u32(tb[IFLA_TXQLEN]);
+		unsigned int orig_len = dev->tx_queue_len;
 
 		if (dev->tx_queue_len ^ value) {
 			dev->tx_queue_len = value;
-- 
cgit v1.2.3


From 6bdc00d01e202ae11fa1cae0dacbef895434483d Mon Sep 17 00:00:00 2001
From: Stefan Wahren <stefan.wahren@i2se.com>
Date: Fri, 28 Apr 2017 13:47:21 +0200
Subject: serdev: Restore serdev_device_write_buf for atomic context

Starting with commit 6fe729c4bdae ("serdev: Add serdev_device_write
subroutine") the function serdev_device_write_buf cannot be used in
atomic context anymore (mutex_lock is sleeping). So restore the old
behavior.

Signed-off-by: Stefan Wahren <stefan.wahren@i2se.com>
Fixes: 6fe729c4bdae ("serdev: Add serdev_device_write subroutine")
Acked-by: Rob Herring <robh@kernel.org>
Reviewed-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serdev/core.c | 12 ++++++++++++
 include/linux/serdev.h    | 14 +++++++-------
 2 files changed, 19 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/serdev/core.c b/drivers/tty/serdev/core.c
index 433de5ea9b02..f71b47334149 100644
--- a/drivers/tty/serdev/core.c
+++ b/drivers/tty/serdev/core.c
@@ -122,6 +122,18 @@ void serdev_device_write_wakeup(struct serdev_device *serdev)
 }
 EXPORT_SYMBOL_GPL(serdev_device_write_wakeup);
 
+int serdev_device_write_buf(struct serdev_device *serdev,
+			    const unsigned char *buf, size_t count)
+{
+	struct serdev_controller *ctrl = serdev->ctrl;
+
+	if (!ctrl || !ctrl->ops->write_buf)
+		return -EINVAL;
+
+	return ctrl->ops->write_buf(ctrl, buf, count);
+}
+EXPORT_SYMBOL_GPL(serdev_device_write_buf);
+
 int serdev_device_write(struct serdev_device *serdev,
 			const unsigned char *buf, size_t count,
 			unsigned long timeout)
diff --git a/include/linux/serdev.h b/include/linux/serdev.h
index cda76c6506ca..e2a225bf716d 100644
--- a/include/linux/serdev.h
+++ b/include/linux/serdev.h
@@ -195,6 +195,7 @@ int serdev_device_open(struct serdev_device *);
 void serdev_device_close(struct serdev_device *);
 unsigned int serdev_device_set_baudrate(struct serdev_device *, unsigned int);
 void serdev_device_set_flow_control(struct serdev_device *, bool);
+int serdev_device_write_buf(struct serdev_device *, const unsigned char *, size_t);
 void serdev_device_wait_until_sent(struct serdev_device *, long);
 int serdev_device_get_tiocm(struct serdev_device *);
 int serdev_device_set_tiocm(struct serdev_device *, int, int);
@@ -236,6 +237,12 @@ static inline unsigned int serdev_device_set_baudrate(struct serdev_device *sdev
 	return 0;
 }
 static inline void serdev_device_set_flow_control(struct serdev_device *sdev, bool enable) {}
+static inline int serdev_device_write_buf(struct serdev_device *serdev,
+					  const unsigned char *buf,
+					  size_t count)
+{
+	return -ENODEV;
+}
 static inline void serdev_device_wait_until_sent(struct serdev_device *sdev, long timeout) {}
 static inline int serdev_device_get_tiocm(struct serdev_device *serdev)
 {
@@ -312,11 +319,4 @@ static inline struct device *serdev_tty_port_register(struct tty_port *port,
 static inline void serdev_tty_port_unregister(struct tty_port *port) {}
 #endif /* CONFIG_SERIAL_DEV_CTRL_TTYPORT */
 
-static inline int serdev_device_write_buf(struct serdev_device *serdev,
-					  const unsigned char *data,
-					  size_t count)
-{
-	return serdev_device_write(serdev, data, count, 0);
-}
-
 #endif /*_LINUX_SERDEV_H */
-- 
cgit v1.2.3


From f4660cc994e12bae60d6f49895636fba662ce0a1 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Wed, 10 May 2017 10:19:18 -0400
Subject: vhost/vsock: use static minor number

Vhost-vsock is a software device so there is no probe call that causes
the driver to register its misc char device node.  This creates a
chicken and egg problem: userspace applications must open
/dev/vhost-vsock to use the driver but the file doesn't exist until the
kernel module has been loaded.

Use the devname modalias mechanism so that /dev/vhost-vsock is created
at boot.  The vhost_vsock kernel module is automatically loaded when the
first application opens /dev/host-vsock.

Note that the "reserved for local use" range in
Documentation/admin-guide/devices.txt is incorrect.  The userio driver
already occupies part of that range.  I've updated the documentation
accordingly.

Cc: device@lanana.org
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/admin-guide/devices.txt | 4 +++-
 drivers/vhost/vsock.c                 | 4 +++-
 include/linux/miscdevice.h            | 1 +
 3 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/admin-guide/devices.txt b/Documentation/admin-guide/devices.txt
index c9cea2e39c21..6b71852dadc2 100644
--- a/Documentation/admin-guide/devices.txt
+++ b/Documentation/admin-guide/devices.txt
@@ -369,8 +369,10 @@
 		237 = /dev/loop-control Loopback control device
 		238 = /dev/vhost-net	Host kernel accelerator for virtio net
 		239 = /dev/uhid		User-space I/O driver support for HID subsystem
+		240 = /dev/userio	Serio driver testing device
+		241 = /dev/vhost-vsock	Host kernel driver for virtio vsock
 
-		240-254			Reserved for local use
+		242-254			Reserved for local use
 		255			Reserved for MISC_DYNAMIC_MINOR
 
   11 char	Raw keyboard device	(Linux/SPARC only)
diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index 3acef3c5d8ed..3f63e03de8e8 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -706,7 +706,7 @@ static const struct file_operations vhost_vsock_fops = {
 };
 
 static struct miscdevice vhost_vsock_misc = {
-	.minor = MISC_DYNAMIC_MINOR,
+	.minor = VHOST_VSOCK_MINOR,
 	.name = "vhost-vsock",
 	.fops = &vhost_vsock_fops,
 };
@@ -778,3 +778,5 @@ module_exit(vhost_vsock_exit);
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Asias He");
 MODULE_DESCRIPTION("vhost transport for vsock ");
+MODULE_ALIAS_MISCDEV(VHOST_VSOCK_MINOR);
+MODULE_ALIAS("devname:vhost-vsock");
diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h
index 762b5fec3383..58751eae5f77 100644
--- a/include/linux/miscdevice.h
+++ b/include/linux/miscdevice.h
@@ -54,6 +54,7 @@
 #define VHOST_NET_MINOR		238
 #define UHID_MINOR		239
 #define USERIO_MINOR		240
+#define VHOST_VSOCK_MINOR	241
 #define MISC_DYNAMIC_MINOR	255
 
 struct device;
-- 
cgit v1.2.3


From 34cfb106d1f8a746fcccbe61c852f705dcdceaa2 Mon Sep 17 00:00:00 2001
From: Dave Gerlach <d-gerlach@ti.com>
Date: Thu, 18 May 2017 10:07:06 -0500
Subject: misc: sram-exec: Use aligned fncpy instead of memcpy

Currently the sram-exec functionality, which allows allocation of
executable memory and provides an API to move code to it, is only
selected in configs for the ARM architecture. Based on commit
5756e9dd0de6 ("ARM: 6640/1: Thumb-2: Symbol manipulation macros for
function body copying") simply copying a C function pointer address
using memcpy without consideration of alignment and Thumb is unsafe on
ARM platforms.

The aforementioned patch introduces the fncpy macro which is a safe way
to copy executable code on ARM platforms, so let's make use of that here
rather than the unsafe plain memcpy that was previously used by
sram_exec_copy. Now sram_exec_copy will move the code to "dst" and
return an address that is guaranteed to be safely callable.

In the future, architectures hoping to make use of the sram-exec
functionality must define an fncpy macro just as ARM has done to
guarantee or check for safe copying to executable memory before allowing
the arch to select CONFIG_SRAM_EXEC.

Acked-by: Tony Lindgren <tony@atomide.com>
Acked-by: Russell King <rmk+kernel@armlinux.org.uk>
Reviewed-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Signed-off-by: Dave Gerlach <d-gerlach@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/sram-exec.c | 27 ++++++++++++++++++++-------
 include/linux/sram.h     |  8 ++++----
 2 files changed, 24 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/misc/sram-exec.c b/drivers/misc/sram-exec.c
index 3d528a13b8fc..426ad912b441 100644
--- a/drivers/misc/sram-exec.c
+++ b/drivers/misc/sram-exec.c
@@ -19,6 +19,7 @@
 #include <linux/mm.h>
 #include <linux/sram.h>
 
+#include <asm/fncpy.h>
 #include <asm/set_memory.h>
 
 #include "sram.h"
@@ -58,20 +59,32 @@ int sram_add_protect_exec(struct sram_partition *part)
  * @src: Source address for the data to copy
  * @size: Size of copy to perform, which starting from dst, must reside in pool
  *
+ * Return: Address for copied data that can safely be called through function
+ *	   pointer, or NULL if problem.
+ *
  * This helper function allows sram driver to act as central control location
  * of 'protect-exec' pools which are normal sram pools but are always set
  * read-only and executable except when copying data to them, at which point
  * they are set to read-write non-executable, to make sure no memory is
  * writeable and executable at the same time. This region must be page-aligned
  * and is checked during probe, otherwise page attribute manipulation would
- * not be possible.
+ * not be possible. Care must be taken to only call the returned address as
+ * dst address is not guaranteed to be safely callable.
+ *
+ * NOTE: This function uses the fncpy macro to move code to the executable
+ * region. Some architectures have strict requirements for relocating
+ * executable code, so fncpy is a macro that must be defined by any arch
+ * making use of this functionality that guarantees a safe copy of exec
+ * data and returns a safe address that can be called as a C function
+ * pointer.
  */
-int sram_exec_copy(struct gen_pool *pool, void *dst, void *src,
-		   size_t size)
+void *sram_exec_copy(struct gen_pool *pool, void *dst, void *src,
+		     size_t size)
 {
 	struct sram_partition *part = NULL, *p;
 	unsigned long base;
 	int pages;
+	void *dst_cpy;
 
 	mutex_lock(&exec_pool_list_mutex);
 	list_for_each_entry(p, &exec_pool_list, list) {
@@ -81,10 +94,10 @@ int sram_exec_copy(struct gen_pool *pool, void *dst, void *src,
 	mutex_unlock(&exec_pool_list_mutex);
 
 	if (!part)
-		return -EINVAL;
+		return NULL;
 
 	if (!addr_in_gen_pool(pool, (unsigned long)dst, size))
-		return -EINVAL;
+		return NULL;
 
 	base = (unsigned long)part->base;
 	pages = PAGE_ALIGN(size) / PAGE_SIZE;
@@ -94,13 +107,13 @@ int sram_exec_copy(struct gen_pool *pool, void *dst, void *src,
 	set_memory_nx((unsigned long)base, pages);
 	set_memory_rw((unsigned long)base, pages);
 
-	memcpy(dst, src, size);
+	dst_cpy = fncpy(dst, src, size);
 
 	set_memory_ro((unsigned long)base, pages);
 	set_memory_x((unsigned long)base, pages);
 
 	mutex_unlock(&part->lock);
 
-	return 0;
+	return dst_cpy;
 }
 EXPORT_SYMBOL_GPL(sram_exec_copy);
diff --git a/include/linux/sram.h b/include/linux/sram.h
index c97dcbe8ce25..4fb405fb0480 100644
--- a/include/linux/sram.h
+++ b/include/linux/sram.h
@@ -16,12 +16,12 @@
 struct gen_pool;
 
 #ifdef CONFIG_SRAM_EXEC
-int sram_exec_copy(struct gen_pool *pool, void *dst, void *src, size_t size);
+void *sram_exec_copy(struct gen_pool *pool, void *dst, void *src, size_t size);
 #else
-static inline int sram_exec_copy(struct gen_pool *pool, void *dst, void *src,
-				 size_t size)
+static inline void *sram_exec_copy(struct gen_pool *pool, void *dst, void *src,
+				   size_t size)
 {
-	return -ENODEV;
+	return NULL;
 }
 #endif /* CONFIG_SRAM_EXEC */
 #endif /* __LINUX_SRAM_H__ */
-- 
cgit v1.2.3


From 8cde11b2baa1d02eb2eb955dfd47d9f2a12f12cf Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Thu, 18 May 2017 17:33:00 +0200
Subject: tty/serdev: add serdev registration interface

Add a new interface for registering a serdev controller and clients, and
a helper function to deregister serdev devices (or a tty device) that
were previously registered using the new interface.

Once every driver currently using the tty_port_register_device() helpers
have been vetted and converted to use the new serdev registration
interface (at least for deregistration), we can move serdev registration
to the current helpers and get rid of the serdev-specific functions.

Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serdev/serdev-ttyport.c |  6 ++-
 drivers/tty/tty_port.c              | 75 +++++++++++++++++++++++++++++++++++++
 include/linux/serdev.h              |  7 +++-
 include/linux/tty.h                 |  9 +++++
 4 files changed, 93 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/serdev/serdev-ttyport.c b/drivers/tty/serdev/serdev-ttyport.c
index 013efffd2e82..d0a021c93986 100644
--- a/drivers/tty/serdev/serdev-ttyport.c
+++ b/drivers/tty/serdev/serdev-ttyport.c
@@ -250,16 +250,18 @@ err_reset_data:
 	return ERR_PTR(ret);
 }
 
-void serdev_tty_port_unregister(struct tty_port *port)
+int serdev_tty_port_unregister(struct tty_port *port)
 {
 	struct serdev_controller *ctrl = port->client_data;
 	struct serport *serport = serdev_controller_get_drvdata(ctrl);
 
 	if (!serport)
-		return;
+		return -ENODEV;
 
 	serdev_controller_remove(ctrl);
 	port->client_ops = NULL;
 	port->client_data = NULL;
 	serdev_controller_put(ctrl);
+
+	return 0;
 }
diff --git a/drivers/tty/tty_port.c b/drivers/tty/tty_port.c
index 88dac3b79369..4fb3165384c4 100644
--- a/drivers/tty/tty_port.c
+++ b/drivers/tty/tty_port.c
@@ -16,6 +16,7 @@
 #include <linux/bitops.h>
 #include <linux/delay.h>
 #include <linux/module.h>
+#include <linux/serdev.h>
 
 static int tty_port_default_receive_buf(struct tty_port *port,
 					const unsigned char *p,
@@ -136,6 +137,80 @@ struct device *tty_port_register_device_attr(struct tty_port *port,
 }
 EXPORT_SYMBOL_GPL(tty_port_register_device_attr);
 
+/**
+ * tty_port_register_device_attr_serdev - register tty or serdev device
+ * @port: tty_port of the device
+ * @driver: tty_driver for this device
+ * @index: index of the tty
+ * @device: parent if exists, otherwise NULL
+ * @drvdata: driver data for the device
+ * @attr_grp: attribute group for the device
+ *
+ * Register a serdev or tty device depending on if the parent device has any
+ * defined serdev clients or not.
+ */
+struct device *tty_port_register_device_attr_serdev(struct tty_port *port,
+		struct tty_driver *driver, unsigned index,
+		struct device *device, void *drvdata,
+		const struct attribute_group **attr_grp)
+{
+	struct device *dev;
+
+	tty_port_link_device(port, driver, index);
+
+	dev = serdev_tty_port_register(port, device, driver, index);
+	if (PTR_ERR(dev) != -ENODEV) {
+		/* Skip creating cdev if we registered a serdev device */
+		return dev;
+	}
+
+	return tty_register_device_attr(driver, index, device, drvdata,
+			attr_grp);
+}
+EXPORT_SYMBOL_GPL(tty_port_register_device_attr_serdev);
+
+/**
+ * tty_port_register_device_serdev - register tty or serdev device
+ * @port: tty_port of the device
+ * @driver: tty_driver for this device
+ * @index: index of the tty
+ * @device: parent if exists, otherwise NULL
+ *
+ * Register a serdev or tty device depending on if the parent device has any
+ * defined serdev clients or not.
+ */
+struct device *tty_port_register_device_serdev(struct tty_port *port,
+		struct tty_driver *driver, unsigned index,
+		struct device *device)
+{
+	return tty_port_register_device_attr_serdev(port, driver, index,
+			device, NULL, NULL);
+}
+EXPORT_SYMBOL_GPL(tty_port_register_device_serdev);
+
+/**
+ * tty_port_unregister_device - deregister a tty or serdev device
+ * @port: tty_port of the device
+ * @driver: tty_driver for this device
+ * @index: index of the tty
+ *
+ * If a tty or serdev device is registered with a call to
+ * tty_port_register_device_serdev() then this function must be called when
+ * the device is gone.
+ */
+void tty_port_unregister_device(struct tty_port *port,
+		struct tty_driver *driver, unsigned index)
+{
+	int ret;
+
+	ret = serdev_tty_port_unregister(port);
+	if (ret == 0)
+		return;
+
+	tty_unregister_device(driver, index);
+}
+EXPORT_SYMBOL_GPL(tty_port_unregister_device);
+
 int tty_port_alloc_xmit_buf(struct tty_port *port)
 {
 	/* We may sleep in get_zeroed_page() */
diff --git a/include/linux/serdev.h b/include/linux/serdev.h
index e2a225bf716d..e69402d4a8ae 100644
--- a/include/linux/serdev.h
+++ b/include/linux/serdev.h
@@ -308,7 +308,7 @@ struct tty_driver;
 struct device *serdev_tty_port_register(struct tty_port *port,
 					struct device *parent,
 					struct tty_driver *drv, int idx);
-void serdev_tty_port_unregister(struct tty_port *port);
+int serdev_tty_port_unregister(struct tty_port *port);
 #else
 static inline struct device *serdev_tty_port_register(struct tty_port *port,
 					   struct device *parent,
@@ -316,7 +316,10 @@ static inline struct device *serdev_tty_port_register(struct tty_port *port,
 {
 	return ERR_PTR(-ENODEV);
 }
-static inline void serdev_tty_port_unregister(struct tty_port *port) {}
+static inline int serdev_tty_port_unregister(struct tty_port *port)
+{
+	return -ENODEV;
+}
 #endif /* CONFIG_SERIAL_DEV_CTRL_TTYPORT */
 
 #endif /*_LINUX_SERDEV_H */
diff --git a/include/linux/tty.h b/include/linux/tty.h
index d07cd2105a6c..eccb4ec30a8a 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -558,6 +558,15 @@ extern struct device *tty_port_register_device_attr(struct tty_port *port,
 		struct tty_driver *driver, unsigned index,
 		struct device *device, void *drvdata,
 		const struct attribute_group **attr_grp);
+extern struct device *tty_port_register_device_serdev(struct tty_port *port,
+		struct tty_driver *driver, unsigned index,
+		struct device *device);
+extern struct device *tty_port_register_device_attr_serdev(struct tty_port *port,
+		struct tty_driver *driver, unsigned index,
+		struct device *device, void *drvdata,
+		const struct attribute_group **attr_grp);
+extern void tty_port_unregister_device(struct tty_port *port,
+		struct tty_driver *driver, unsigned index);
 extern int tty_port_alloc_xmit_buf(struct tty_port *port);
 extern void tty_port_free_xmit_buf(struct tty_port *port);
 extern void tty_port_destroy(struct tty_port *port);
-- 
cgit v1.2.3


From af777cd1b83e95138e7285fde87c795ef0ae7c4d Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Sat, 13 May 2017 04:51:40 -0700
Subject: doc: ReSTify credentials.txt

This updates the credentials API documentation to ReST markup and moves
it under the security subsection of kernel API documentation.

Cc: David Howells <dhowells@redhat.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/security/00-INDEX        |   2 -
 Documentation/security/credentials.rst | 554 +++++++++++++++++++++++++++++++
 Documentation/security/credentials.txt | 581 ---------------------------------
 Documentation/security/index.rst       |   1 +
 include/linux/cred.h                   |   2 +-
 kernel/cred.c                          |   2 +-
 6 files changed, 557 insertions(+), 585 deletions(-)
 create mode 100644 Documentation/security/credentials.rst
 delete mode 100644 Documentation/security/credentials.txt

(limited to 'include/linux')

diff --git a/Documentation/security/00-INDEX b/Documentation/security/00-INDEX
index 414235c1fcfc..c4df62a9ae5b 100644
--- a/Documentation/security/00-INDEX
+++ b/Documentation/security/00-INDEX
@@ -10,8 +10,6 @@ Yama.txt
 	- documentation on the Yama Linux Security Module.
 apparmor.txt
 	- documentation on the AppArmor security extension.
-credentials.txt
-	- documentation about credentials in Linux.
 keys-ecryptfs.txt
 	- description of the encryption keys for the ecryptfs filesystem.
 keys-request-key.txt
diff --git a/Documentation/security/credentials.rst b/Documentation/security/credentials.rst
new file mode 100644
index 000000000000..038a7e19eff9
--- /dev/null
+++ b/Documentation/security/credentials.rst
@@ -0,0 +1,554 @@
+====================
+Credentials in Linux
+====================
+
+By: David Howells <dhowells@redhat.com>
+
+.. contents:: :local:
+
+Overview
+========
+
+There are several parts to the security check performed by Linux when one
+object acts upon another:
+
+ 1. Objects.
+
+     Objects are things in the system that may be acted upon directly by
+     userspace programs.  Linux has a variety of actionable objects, including:
+
+	- Tasks
+	- Files/inodes
+	- Sockets
+	- Message queues
+	- Shared memory segments
+	- Semaphores
+	- Keys
+
+     As a part of the description of all these objects there is a set of
+     credentials.  What's in the set depends on the type of object.
+
+ 2. Object ownership.
+
+     Amongst the credentials of most objects, there will be a subset that
+     indicates the ownership of that object.  This is used for resource
+     accounting and limitation (disk quotas and task rlimits for example).
+
+     In a standard UNIX filesystem, for instance, this will be defined by the
+     UID marked on the inode.
+
+ 3. The objective context.
+
+     Also amongst the credentials of those objects, there will be a subset that
+     indicates the 'objective context' of that object.  This may or may not be
+     the same set as in (2) - in standard UNIX files, for instance, this is the
+     defined by the UID and the GID marked on the inode.
+
+     The objective context is used as part of the security calculation that is
+     carried out when an object is acted upon.
+
+ 4. Subjects.
+
+     A subject is an object that is acting upon another object.
+
+     Most of the objects in the system are inactive: they don't act on other
+     objects within the system.  Processes/tasks are the obvious exception:
+     they do stuff; they access and manipulate things.
+
+     Objects other than tasks may under some circumstances also be subjects.
+     For instance an open file may send SIGIO to a task using the UID and EUID
+     given to it by a task that called ``fcntl(F_SETOWN)`` upon it.  In this case,
+     the file struct will have a subjective context too.
+
+ 5. The subjective context.
+
+     A subject has an additional interpretation of its credentials.  A subset
+     of its credentials forms the 'subjective context'.  The subjective context
+     is used as part of the security calculation that is carried out when a
+     subject acts.
+
+     A Linux task, for example, has the FSUID, FSGID and the supplementary
+     group list for when it is acting upon a file - which are quite separate
+     from the real UID and GID that normally form the objective context of the
+     task.
+
+ 6. Actions.
+
+     Linux has a number of actions available that a subject may perform upon an
+     object.  The set of actions available depends on the nature of the subject
+     and the object.
+
+     Actions include reading, writing, creating and deleting files; forking or
+     signalling and tracing tasks.
+
+ 7. Rules, access control lists and security calculations.
+
+     When a subject acts upon an object, a security calculation is made.  This
+     involves taking the subjective context, the objective context and the
+     action, and searching one or more sets of rules to see whether the subject
+     is granted or denied permission to act in the desired manner on the
+     object, given those contexts.
+
+     There are two main sources of rules:
+
+     a. Discretionary access control (DAC):
+
+	 Sometimes the object will include sets of rules as part of its
+	 description.  This is an 'Access Control List' or 'ACL'.  A Linux
+	 file may supply more than one ACL.
+
+	 A traditional UNIX file, for example, includes a permissions mask that
+	 is an abbreviated ACL with three fixed classes of subject ('user',
+	 'group' and 'other'), each of which may be granted certain privileges
+	 ('read', 'write' and 'execute' - whatever those map to for the object
+	 in question).  UNIX file permissions do not allow the arbitrary
+	 specification of subjects, however, and so are of limited use.
+
+	 A Linux file might also sport a POSIX ACL.  This is a list of rules
+	 that grants various permissions to arbitrary subjects.
+
+     b. Mandatory access control (MAC):
+
+	 The system as a whole may have one or more sets of rules that get
+	 applied to all subjects and objects, regardless of their source.
+	 SELinux and Smack are examples of this.
+
+	 In the case of SELinux and Smack, each object is given a label as part
+	 of its credentials.  When an action is requested, they take the
+	 subject label, the object label and the action and look for a rule
+	 that says that this action is either granted or denied.
+
+
+Types of Credentials
+====================
+
+The Linux kernel supports the following types of credentials:
+
+ 1. Traditional UNIX credentials.
+
+	- Real User ID
+	- Real Group ID
+
+     The UID and GID are carried by most, if not all, Linux objects, even if in
+     some cases it has to be invented (FAT or CIFS files for example, which are
+     derived from Windows).  These (mostly) define the objective context of
+     that object, with tasks being slightly different in some cases.
+
+	- Effective, Saved and FS User ID
+	- Effective, Saved and FS Group ID
+	- Supplementary groups
+
+     These are additional credentials used by tasks only.  Usually, an
+     EUID/EGID/GROUPS will be used as the subjective context, and real UID/GID
+     will be used as the objective.  For tasks, it should be noted that this is
+     not always true.
+
+ 2. Capabilities.
+
+	- Set of permitted capabilities
+	- Set of inheritable capabilities
+	- Set of effective capabilities
+	- Capability bounding set
+
+     These are only carried by tasks.  They indicate superior capabilities
+     granted piecemeal to a task that an ordinary task wouldn't otherwise have.
+     These are manipulated implicitly by changes to the traditional UNIX
+     credentials, but can also be manipulated directly by the ``capset()``
+     system call.
+
+     The permitted capabilities are those caps that the process might grant
+     itself to its effective or permitted sets through ``capset()``.  This
+     inheritable set might also be so constrained.
+
+     The effective capabilities are the ones that a task is actually allowed to
+     make use of itself.
+
+     The inheritable capabilities are the ones that may get passed across
+     ``execve()``.
+
+     The bounding set limits the capabilities that may be inherited across
+     ``execve()``, especially when a binary is executed that will execute as
+     UID 0.
+
+ 3. Secure management flags (securebits).
+
+     These are only carried by tasks.  These govern the way the above
+     credentials are manipulated and inherited over certain operations such as
+     execve().  They aren't used directly as objective or subjective
+     credentials.
+
+ 4. Keys and keyrings.
+
+     These are only carried by tasks.  They carry and cache security tokens
+     that don't fit into the other standard UNIX credentials.  They are for
+     making such things as network filesystem keys available to the file
+     accesses performed by processes, without the necessity of ordinary
+     programs having to know about security details involved.
+
+     Keyrings are a special type of key.  They carry sets of other keys and can
+     be searched for the desired key.  Each process may subscribe to a number
+     of keyrings:
+
+	Per-thread keying
+	Per-process keyring
+	Per-session keyring
+
+     When a process accesses a key, if not already present, it will normally be
+     cached on one of these keyrings for future accesses to find.
+
+     For more information on using keys, see Documentation/security/keys.txt.
+
+ 5. LSM
+
+     The Linux Security Module allows extra controls to be placed over the
+     operations that a task may do.  Currently Linux supports several LSM
+     options.
+
+     Some work by labelling the objects in a system and then applying sets of
+     rules (policies) that say what operations a task with one label may do to
+     an object with another label.
+
+ 6. AF_KEY
+
+     This is a socket-based approach to credential management for networking
+     stacks [RFC 2367].  It isn't discussed by this document as it doesn't
+     interact directly with task and file credentials; rather it keeps system
+     level credentials.
+
+
+When a file is opened, part of the opening task's subjective context is
+recorded in the file struct created.  This allows operations using that file
+struct to use those credentials instead of the subjective context of the task
+that issued the operation.  An example of this would be a file opened on a
+network filesystem where the credentials of the opened file should be presented
+to the server, regardless of who is actually doing a read or a write upon it.
+
+
+File Markings
+=============
+
+Files on disk or obtained over the network may have annotations that form the
+objective security context of that file.  Depending on the type of filesystem,
+this may include one or more of the following:
+
+ * UNIX UID, GID, mode;
+ * Windows user ID;
+ * Access control list;
+ * LSM security label;
+ * UNIX exec privilege escalation bits (SUID/SGID);
+ * File capabilities exec privilege escalation bits.
+
+These are compared to the task's subjective security context, and certain
+operations allowed or disallowed as a result.  In the case of execve(), the
+privilege escalation bits come into play, and may allow the resulting process
+extra privileges, based on the annotations on the executable file.
+
+
+Task Credentials
+================
+
+In Linux, all of a task's credentials are held in (uid, gid) or through
+(groups, keys, LSM security) a refcounted structure of type 'struct cred'.
+Each task points to its credentials by a pointer called 'cred' in its
+task_struct.
+
+Once a set of credentials has been prepared and committed, it may not be
+changed, barring the following exceptions:
+
+ 1. its reference count may be changed;
+
+ 2. the reference count on the group_info struct it points to may be changed;
+
+ 3. the reference count on the security data it points to may be changed;
+
+ 4. the reference count on any keyrings it points to may be changed;
+
+ 5. any keyrings it points to may be revoked, expired or have their security
+    attributes changed; and
+
+ 6. the contents of any keyrings to which it points may be changed (the whole
+    point of keyrings being a shared set of credentials, modifiable by anyone
+    with appropriate access).
+
+To alter anything in the cred struct, the copy-and-replace principle must be
+adhered to.  First take a copy, then alter the copy and then use RCU to change
+the task pointer to make it point to the new copy.  There are wrappers to aid
+with this (see below).
+
+A task may only alter its _own_ credentials; it is no longer permitted for a
+task to alter another's credentials.  This means the ``capset()`` system call
+is no longer permitted to take any PID other than the one of the current
+process. Also ``keyctl_instantiate()`` and ``keyctl_negate()`` functions no
+longer permit attachment to process-specific keyrings in the requesting
+process as the instantiating process may need to create them.
+
+
+Immutable Credentials
+---------------------
+
+Once a set of credentials has been made public (by calling ``commit_creds()``
+for example), it must be considered immutable, barring two exceptions:
+
+ 1. The reference count may be altered.
+
+ 2. Whilst the keyring subscriptions of a set of credentials may not be
+    changed, the keyrings subscribed to may have their contents altered.
+
+To catch accidental credential alteration at compile time, struct task_struct
+has _const_ pointers to its credential sets, as does struct file.  Furthermore,
+certain functions such as ``get_cred()`` and ``put_cred()`` operate on const
+pointers, thus rendering casts unnecessary, but require to temporarily ditch
+the const qualification to be able to alter the reference count.
+
+
+Accessing Task Credentials
+--------------------------
+
+A task being able to alter only its own credentials permits the current process
+to read or replace its own credentials without the need for any form of locking
+-- which simplifies things greatly.  It can just call::
+
+	const struct cred *current_cred()
+
+to get a pointer to its credentials structure, and it doesn't have to release
+it afterwards.
+
+There are convenience wrappers for retrieving specific aspects of a task's
+credentials (the value is simply returned in each case)::
+
+	uid_t current_uid(void)		Current's real UID
+	gid_t current_gid(void)		Current's real GID
+	uid_t current_euid(void)	Current's effective UID
+	gid_t current_egid(void)	Current's effective GID
+	uid_t current_fsuid(void)	Current's file access UID
+	gid_t current_fsgid(void)	Current's file access GID
+	kernel_cap_t current_cap(void)	Current's effective capabilities
+	void *current_security(void)	Current's LSM security pointer
+	struct user_struct *current_user(void)  Current's user account
+
+There are also convenience wrappers for retrieving specific associated pairs of
+a task's credentials::
+
+	void current_uid_gid(uid_t *, gid_t *);
+	void current_euid_egid(uid_t *, gid_t *);
+	void current_fsuid_fsgid(uid_t *, gid_t *);
+
+which return these pairs of values through their arguments after retrieving
+them from the current task's credentials.
+
+
+In addition, there is a function for obtaining a reference on the current
+process's current set of credentials::
+
+	const struct cred *get_current_cred(void);
+
+and functions for getting references to one of the credentials that don't
+actually live in struct cred::
+
+	struct user_struct *get_current_user(void);
+	struct group_info *get_current_groups(void);
+
+which get references to the current process's user accounting structure and
+supplementary groups list respectively.
+
+Once a reference has been obtained, it must be released with ``put_cred()``,
+``free_uid()`` or ``put_group_info()`` as appropriate.
+
+
+Accessing Another Task's Credentials
+------------------------------------
+
+Whilst a task may access its own credentials without the need for locking, the
+same is not true of a task wanting to access another task's credentials.  It
+must use the RCU read lock and ``rcu_dereference()``.
+
+The ``rcu_dereference()`` is wrapped by::
+
+	const struct cred *__task_cred(struct task_struct *task);
+
+This should be used inside the RCU read lock, as in the following example::
+
+	void foo(struct task_struct *t, struct foo_data *f)
+	{
+		const struct cred *tcred;
+		...
+		rcu_read_lock();
+		tcred = __task_cred(t);
+		f->uid = tcred->uid;
+		f->gid = tcred->gid;
+		f->groups = get_group_info(tcred->groups);
+		rcu_read_unlock();
+		...
+	}
+
+Should it be necessary to hold another task's credentials for a long period of
+time, and possibly to sleep whilst doing so, then the caller should get a
+reference on them using::
+
+	const struct cred *get_task_cred(struct task_struct *task);
+
+This does all the RCU magic inside of it.  The caller must call put_cred() on
+the credentials so obtained when they're finished with.
+
+.. note::
+   The result of ``__task_cred()`` should not be passed directly to
+   ``get_cred()`` as this may race with ``commit_cred()``.
+
+There are a couple of convenience functions to access bits of another task's
+credentials, hiding the RCU magic from the caller::
+
+	uid_t task_uid(task)		Task's real UID
+	uid_t task_euid(task)		Task's effective UID
+
+If the caller is holding the RCU read lock at the time anyway, then::
+
+	__task_cred(task)->uid
+	__task_cred(task)->euid
+
+should be used instead.  Similarly, if multiple aspects of a task's credentials
+need to be accessed, RCU read lock should be used, ``__task_cred()`` called,
+the result stored in a temporary pointer and then the credential aspects called
+from that before dropping the lock.  This prevents the potentially expensive
+RCU magic from being invoked multiple times.
+
+Should some other single aspect of another task's credentials need to be
+accessed, then this can be used::
+
+	task_cred_xxx(task, member)
+
+where 'member' is a non-pointer member of the cred struct.  For instance::
+
+	uid_t task_cred_xxx(task, suid);
+
+will retrieve 'struct cred::suid' from the task, doing the appropriate RCU
+magic.  This may not be used for pointer members as what they point to may
+disappear the moment the RCU read lock is dropped.
+
+
+Altering Credentials
+--------------------
+
+As previously mentioned, a task may only alter its own credentials, and may not
+alter those of another task.  This means that it doesn't need to use any
+locking to alter its own credentials.
+
+To alter the current process's credentials, a function should first prepare a
+new set of credentials by calling::
+
+	struct cred *prepare_creds(void);
+
+this locks current->cred_replace_mutex and then allocates and constructs a
+duplicate of the current process's credentials, returning with the mutex still
+held if successful.  It returns NULL if not successful (out of memory).
+
+The mutex prevents ``ptrace()`` from altering the ptrace state of a process
+whilst security checks on credentials construction and changing is taking place
+as the ptrace state may alter the outcome, particularly in the case of
+``execve()``.
+
+The new credentials set should be altered appropriately, and any security
+checks and hooks done.  Both the current and the proposed sets of credentials
+are available for this purpose as current_cred() will return the current set
+still at this point.
+
+
+When the credential set is ready, it should be committed to the current process
+by calling::
+
+	int commit_creds(struct cred *new);
+
+This will alter various aspects of the credentials and the process, giving the
+LSM a chance to do likewise, then it will use ``rcu_assign_pointer()`` to
+actually commit the new credentials to ``current->cred``, it will release
+``current->cred_replace_mutex`` to allow ``ptrace()`` to take place, and it
+will notify the scheduler and others of the changes.
+
+This function is guaranteed to return 0, so that it can be tail-called at the
+end of such functions as ``sys_setresuid()``.
+
+Note that this function consumes the caller's reference to the new credentials.
+The caller should _not_ call ``put_cred()`` on the new credentials afterwards.
+
+Furthermore, once this function has been called on a new set of credentials,
+those credentials may _not_ be changed further.
+
+
+Should the security checks fail or some other error occur after
+``prepare_creds()`` has been called, then the following function should be
+invoked::
+
+	void abort_creds(struct cred *new);
+
+This releases the lock on ``current->cred_replace_mutex`` that
+``prepare_creds()`` got and then releases the new credentials.
+
+
+A typical credentials alteration function would look something like this::
+
+	int alter_suid(uid_t suid)
+	{
+		struct cred *new;
+		int ret;
+
+		new = prepare_creds();
+		if (!new)
+			return -ENOMEM;
+
+		new->suid = suid;
+		ret = security_alter_suid(new);
+		if (ret < 0) {
+			abort_creds(new);
+			return ret;
+		}
+
+		return commit_creds(new);
+	}
+
+
+Managing Credentials
+--------------------
+
+There are some functions to help manage credentials:
+
+ - ``void put_cred(const struct cred *cred);``
+
+     This releases a reference to the given set of credentials.  If the
+     reference count reaches zero, the credentials will be scheduled for
+     destruction by the RCU system.
+
+ - ``const struct cred *get_cred(const struct cred *cred);``
+
+     This gets a reference on a live set of credentials, returning a pointer to
+     that set of credentials.
+
+ - ``struct cred *get_new_cred(struct cred *cred);``
+
+     This gets a reference on a set of credentials that is under construction
+     and is thus still mutable, returning a pointer to that set of credentials.
+
+
+Open File Credentials
+=====================
+
+When a new file is opened, a reference is obtained on the opening task's
+credentials and this is attached to the file struct as ``f_cred`` in place of
+``f_uid`` and ``f_gid``.  Code that used to access ``file->f_uid`` and
+``file->f_gid`` should now access ``file->f_cred->fsuid`` and
+``file->f_cred->fsgid``.
+
+It is safe to access ``f_cred`` without the use of RCU or locking because the
+pointer will not change over the lifetime of the file struct, and nor will the
+contents of the cred struct pointed to, barring the exceptions listed above
+(see the Task Credentials section).
+
+
+Overriding the VFS's Use of Credentials
+=======================================
+
+Under some circumstances it is desirable to override the credentials used by
+the VFS, and that can be done by calling into such as ``vfs_mkdir()`` with a
+different set of credentials.  This is done in the following places:
+
+ * ``sys_faccessat()``.
+ * ``do_coredump()``.
+ * nfs4recover.c.
diff --git a/Documentation/security/credentials.txt b/Documentation/security/credentials.txt
deleted file mode 100644
index 86257052e31a..000000000000
--- a/Documentation/security/credentials.txt
+++ /dev/null
@@ -1,581 +0,0 @@
-			     ====================
-			     CREDENTIALS IN LINUX
-			     ====================
-
-By: David Howells <dhowells@redhat.com>
-
-Contents:
-
- (*) Overview.
-
- (*) Types of credentials.
-
- (*) File markings.
-
- (*) Task credentials.
-
-     - Immutable credentials.
-     - Accessing task credentials.
-     - Accessing another task's credentials.
-     - Altering credentials.
-     - Managing credentials.
-
- (*) Open file credentials.
-
- (*) Overriding the VFS's use of credentials.
-
-
-========
-OVERVIEW
-========
-
-There are several parts to the security check performed by Linux when one
-object acts upon another:
-
- (1) Objects.
-
-     Objects are things in the system that may be acted upon directly by
-     userspace programs.  Linux has a variety of actionable objects, including:
-
-	- Tasks
-	- Files/inodes
-	- Sockets
-	- Message queues
-	- Shared memory segments
-	- Semaphores
-	- Keys
-
-     As a part of the description of all these objects there is a set of
-     credentials.  What's in the set depends on the type of object.
-
- (2) Object ownership.
-
-     Amongst the credentials of most objects, there will be a subset that
-     indicates the ownership of that object.  This is used for resource
-     accounting and limitation (disk quotas and task rlimits for example).
-
-     In a standard UNIX filesystem, for instance, this will be defined by the
-     UID marked on the inode.
-
- (3) The objective context.
-
-     Also amongst the credentials of those objects, there will be a subset that
-     indicates the 'objective context' of that object.  This may or may not be
-     the same set as in (2) - in standard UNIX files, for instance, this is the
-     defined by the UID and the GID marked on the inode.
-
-     The objective context is used as part of the security calculation that is
-     carried out when an object is acted upon.
-
- (4) Subjects.
-
-     A subject is an object that is acting upon another object.
-
-     Most of the objects in the system are inactive: they don't act on other
-     objects within the system.  Processes/tasks are the obvious exception:
-     they do stuff; they access and manipulate things.
-
-     Objects other than tasks may under some circumstances also be subjects.
-     For instance an open file may send SIGIO to a task using the UID and EUID
-     given to it by a task that called fcntl(F_SETOWN) upon it.  In this case,
-     the file struct will have a subjective context too.
-
- (5) The subjective context.
-
-     A subject has an additional interpretation of its credentials.  A subset
-     of its credentials forms the 'subjective context'.  The subjective context
-     is used as part of the security calculation that is carried out when a
-     subject acts.
-
-     A Linux task, for example, has the FSUID, FSGID and the supplementary
-     group list for when it is acting upon a file - which are quite separate
-     from the real UID and GID that normally form the objective context of the
-     task.
-
- (6) Actions.
-
-     Linux has a number of actions available that a subject may perform upon an
-     object.  The set of actions available depends on the nature of the subject
-     and the object.
-
-     Actions include reading, writing, creating and deleting files; forking or
-     signalling and tracing tasks.
-
- (7) Rules, access control lists and security calculations.
-
-     When a subject acts upon an object, a security calculation is made.  This
-     involves taking the subjective context, the objective context and the
-     action, and searching one or more sets of rules to see whether the subject
-     is granted or denied permission to act in the desired manner on the
-     object, given those contexts.
-
-     There are two main sources of rules:
-
-     (a) Discretionary access control (DAC):
-
-	 Sometimes the object will include sets of rules as part of its
-	 description.  This is an 'Access Control List' or 'ACL'.  A Linux
-	 file may supply more than one ACL.
-
-	 A traditional UNIX file, for example, includes a permissions mask that
-	 is an abbreviated ACL with three fixed classes of subject ('user',
-	 'group' and 'other'), each of which may be granted certain privileges
-	 ('read', 'write' and 'execute' - whatever those map to for the object
-	 in question).  UNIX file permissions do not allow the arbitrary
-	 specification of subjects, however, and so are of limited use.
-
-	 A Linux file might also sport a POSIX ACL.  This is a list of rules
-	 that grants various permissions to arbitrary subjects.
-
-     (b) Mandatory access control (MAC):
-
-	 The system as a whole may have one or more sets of rules that get
-	 applied to all subjects and objects, regardless of their source.
-	 SELinux and Smack are examples of this.
-
-	 In the case of SELinux and Smack, each object is given a label as part
-	 of its credentials.  When an action is requested, they take the
-	 subject label, the object label and the action and look for a rule
-	 that says that this action is either granted or denied.
-
-
-====================
-TYPES OF CREDENTIALS
-====================
-
-The Linux kernel supports the following types of credentials:
-
- (1) Traditional UNIX credentials.
-
-	Real User ID
-	Real Group ID
-
-     The UID and GID are carried by most, if not all, Linux objects, even if in
-     some cases it has to be invented (FAT or CIFS files for example, which are
-     derived from Windows).  These (mostly) define the objective context of
-     that object, with tasks being slightly different in some cases.
-
-	Effective, Saved and FS User ID
-	Effective, Saved and FS Group ID
-	Supplementary groups
-
-     These are additional credentials used by tasks only.  Usually, an
-     EUID/EGID/GROUPS will be used as the subjective context, and real UID/GID
-     will be used as the objective.  For tasks, it should be noted that this is
-     not always true.
-
- (2) Capabilities.
-
-	Set of permitted capabilities
-	Set of inheritable capabilities
-	Set of effective capabilities
-	Capability bounding set
-
-     These are only carried by tasks.  They indicate superior capabilities
-     granted piecemeal to a task that an ordinary task wouldn't otherwise have.
-     These are manipulated implicitly by changes to the traditional UNIX
-     credentials, but can also be manipulated directly by the capset() system
-     call.
-
-     The permitted capabilities are those caps that the process might grant
-     itself to its effective or permitted sets through capset().  This
-     inheritable set might also be so constrained.
-
-     The effective capabilities are the ones that a task is actually allowed to
-     make use of itself.
-
-     The inheritable capabilities are the ones that may get passed across
-     execve().
-
-     The bounding set limits the capabilities that may be inherited across
-     execve(), especially when a binary is executed that will execute as UID 0.
-
- (3) Secure management flags (securebits).
-
-     These are only carried by tasks.  These govern the way the above
-     credentials are manipulated and inherited over certain operations such as
-     execve().  They aren't used directly as objective or subjective
-     credentials.
-
- (4) Keys and keyrings.
-
-     These are only carried by tasks.  They carry and cache security tokens
-     that don't fit into the other standard UNIX credentials.  They are for
-     making such things as network filesystem keys available to the file
-     accesses performed by processes, without the necessity of ordinary
-     programs having to know about security details involved.
-
-     Keyrings are a special type of key.  They carry sets of other keys and can
-     be searched for the desired key.  Each process may subscribe to a number
-     of keyrings:
-
-	Per-thread keying
-	Per-process keyring
-	Per-session keyring
-
-     When a process accesses a key, if not already present, it will normally be
-     cached on one of these keyrings for future accesses to find.
-
-     For more information on using keys, see Documentation/security/keys.txt.
-
- (5) LSM
-
-     The Linux Security Module allows extra controls to be placed over the
-     operations that a task may do.  Currently Linux supports several LSM
-     options.
-
-     Some work by labelling the objects in a system and then applying sets of
-     rules (policies) that say what operations a task with one label may do to
-     an object with another label.
-
- (6) AF_KEY
-
-     This is a socket-based approach to credential management for networking
-     stacks [RFC 2367].  It isn't discussed by this document as it doesn't
-     interact directly with task and file credentials; rather it keeps system
-     level credentials.
-
-
-When a file is opened, part of the opening task's subjective context is
-recorded in the file struct created.  This allows operations using that file
-struct to use those credentials instead of the subjective context of the task
-that issued the operation.  An example of this would be a file opened on a
-network filesystem where the credentials of the opened file should be presented
-to the server, regardless of who is actually doing a read or a write upon it.
-
-
-=============
-FILE MARKINGS
-=============
-
-Files on disk or obtained over the network may have annotations that form the
-objective security context of that file.  Depending on the type of filesystem,
-this may include one or more of the following:
-
- (*) UNIX UID, GID, mode;
-
- (*) Windows user ID;
-
- (*) Access control list;
-
- (*) LSM security label;
-
- (*) UNIX exec privilege escalation bits (SUID/SGID);
-
- (*) File capabilities exec privilege escalation bits.
-
-These are compared to the task's subjective security context, and certain
-operations allowed or disallowed as a result.  In the case of execve(), the
-privilege escalation bits come into play, and may allow the resulting process
-extra privileges, based on the annotations on the executable file.
-
-
-================
-TASK CREDENTIALS
-================
-
-In Linux, all of a task's credentials are held in (uid, gid) or through
-(groups, keys, LSM security) a refcounted structure of type 'struct cred'.
-Each task points to its credentials by a pointer called 'cred' in its
-task_struct.
-
-Once a set of credentials has been prepared and committed, it may not be
-changed, barring the following exceptions:
-
- (1) its reference count may be changed;
-
- (2) the reference count on the group_info struct it points to may be changed;
-
- (3) the reference count on the security data it points to may be changed;
-
- (4) the reference count on any keyrings it points to may be changed;
-
- (5) any keyrings it points to may be revoked, expired or have their security
-     attributes changed; and
-
- (6) the contents of any keyrings to which it points may be changed (the whole
-     point of keyrings being a shared set of credentials, modifiable by anyone
-     with appropriate access).
-
-To alter anything in the cred struct, the copy-and-replace principle must be
-adhered to.  First take a copy, then alter the copy and then use RCU to change
-the task pointer to make it point to the new copy.  There are wrappers to aid
-with this (see below).
-
-A task may only alter its _own_ credentials; it is no longer permitted for a
-task to alter another's credentials.  This means the capset() system call is no
-longer permitted to take any PID other than the one of the current process.
-Also keyctl_instantiate() and keyctl_negate() functions no longer permit
-attachment to process-specific keyrings in the requesting process as the
-instantiating process may need to create them.
-
-
-IMMUTABLE CREDENTIALS
----------------------
-
-Once a set of credentials has been made public (by calling commit_creds() for
-example), it must be considered immutable, barring two exceptions:
-
- (1) The reference count may be altered.
-
- (2) Whilst the keyring subscriptions of a set of credentials may not be
-     changed, the keyrings subscribed to may have their contents altered.
-
-To catch accidental credential alteration at compile time, struct task_struct
-has _const_ pointers to its credential sets, as does struct file.  Furthermore,
-certain functions such as get_cred() and put_cred() operate on const pointers,
-thus rendering casts unnecessary, but require to temporarily ditch the const
-qualification to be able to alter the reference count.
-
-
-ACCESSING TASK CREDENTIALS
---------------------------
-
-A task being able to alter only its own credentials permits the current process
-to read or replace its own credentials without the need for any form of locking
-- which simplifies things greatly.  It can just call:
-
-	const struct cred *current_cred()
-
-to get a pointer to its credentials structure, and it doesn't have to release
-it afterwards.
-
-There are convenience wrappers for retrieving specific aspects of a task's
-credentials (the value is simply returned in each case):
-
-	uid_t current_uid(void)		Current's real UID
-	gid_t current_gid(void)		Current's real GID
-	uid_t current_euid(void)	Current's effective UID
-	gid_t current_egid(void)	Current's effective GID
-	uid_t current_fsuid(void)	Current's file access UID
-	gid_t current_fsgid(void)	Current's file access GID
-	kernel_cap_t current_cap(void)	Current's effective capabilities
-	void *current_security(void)	Current's LSM security pointer
-	struct user_struct *current_user(void)  Current's user account
-
-There are also convenience wrappers for retrieving specific associated pairs of
-a task's credentials:
-
-	void current_uid_gid(uid_t *, gid_t *);
-	void current_euid_egid(uid_t *, gid_t *);
-	void current_fsuid_fsgid(uid_t *, gid_t *);
-
-which return these pairs of values through their arguments after retrieving
-them from the current task's credentials.
-
-
-In addition, there is a function for obtaining a reference on the current
-process's current set of credentials:
-
-	const struct cred *get_current_cred(void);
-
-and functions for getting references to one of the credentials that don't
-actually live in struct cred:
-
-	struct user_struct *get_current_user(void);
-	struct group_info *get_current_groups(void);
-
-which get references to the current process's user accounting structure and
-supplementary groups list respectively.
-
-Once a reference has been obtained, it must be released with put_cred(),
-free_uid() or put_group_info() as appropriate.
-
-
-ACCESSING ANOTHER TASK'S CREDENTIALS
-------------------------------------
-
-Whilst a task may access its own credentials without the need for locking, the
-same is not true of a task wanting to access another task's credentials.  It
-must use the RCU read lock and rcu_dereference().
-
-The rcu_dereference() is wrapped by:
-
-	const struct cred *__task_cred(struct task_struct *task);
-
-This should be used inside the RCU read lock, as in the following example:
-
-	void foo(struct task_struct *t, struct foo_data *f)
-	{
-		const struct cred *tcred;
-		...
-		rcu_read_lock();
-		tcred = __task_cred(t);
-		f->uid = tcred->uid;
-		f->gid = tcred->gid;
-		f->groups = get_group_info(tcred->groups);
-		rcu_read_unlock();
-		...
-	}
-
-Should it be necessary to hold another task's credentials for a long period of
-time, and possibly to sleep whilst doing so, then the caller should get a
-reference on them using:
-
-	const struct cred *get_task_cred(struct task_struct *task);
-
-This does all the RCU magic inside of it.  The caller must call put_cred() on
-the credentials so obtained when they're finished with.
-
- [*] Note: The result of __task_cred() should not be passed directly to
-     get_cred() as this may race with commit_cred().
-
-There are a couple of convenience functions to access bits of another task's
-credentials, hiding the RCU magic from the caller:
-
-	uid_t task_uid(task)		Task's real UID
-	uid_t task_euid(task)		Task's effective UID
-
-If the caller is holding the RCU read lock at the time anyway, then:
-
-	__task_cred(task)->uid
-	__task_cred(task)->euid
-
-should be used instead.  Similarly, if multiple aspects of a task's credentials
-need to be accessed, RCU read lock should be used, __task_cred() called, the
-result stored in a temporary pointer and then the credential aspects called
-from that before dropping the lock.  This prevents the potentially expensive
-RCU magic from being invoked multiple times.
-
-Should some other single aspect of another task's credentials need to be
-accessed, then this can be used:
-
-	task_cred_xxx(task, member)
-
-where 'member' is a non-pointer member of the cred struct.  For instance:
-
-	uid_t task_cred_xxx(task, suid);
-
-will retrieve 'struct cred::suid' from the task, doing the appropriate RCU
-magic.  This may not be used for pointer members as what they point to may
-disappear the moment the RCU read lock is dropped.
-
-
-ALTERING CREDENTIALS
---------------------
-
-As previously mentioned, a task may only alter its own credentials, and may not
-alter those of another task.  This means that it doesn't need to use any
-locking to alter its own credentials.
-
-To alter the current process's credentials, a function should first prepare a
-new set of credentials by calling:
-
-	struct cred *prepare_creds(void);
-
-this locks current->cred_replace_mutex and then allocates and constructs a
-duplicate of the current process's credentials, returning with the mutex still
-held if successful.  It returns NULL if not successful (out of memory).
-
-The mutex prevents ptrace() from altering the ptrace state of a process whilst
-security checks on credentials construction and changing is taking place as
-the ptrace state may alter the outcome, particularly in the case of execve().
-
-The new credentials set should be altered appropriately, and any security
-checks and hooks done.  Both the current and the proposed sets of credentials
-are available for this purpose as current_cred() will return the current set
-still at this point.
-
-
-When the credential set is ready, it should be committed to the current process
-by calling:
-
-	int commit_creds(struct cred *new);
-
-This will alter various aspects of the credentials and the process, giving the
-LSM a chance to do likewise, then it will use rcu_assign_pointer() to actually
-commit the new credentials to current->cred, it will release
-current->cred_replace_mutex to allow ptrace() to take place, and it will notify
-the scheduler and others of the changes.
-
-This function is guaranteed to return 0, so that it can be tail-called at the
-end of such functions as sys_setresuid().
-
-Note that this function consumes the caller's reference to the new credentials.
-The caller should _not_ call put_cred() on the new credentials afterwards.
-
-Furthermore, once this function has been called on a new set of credentials,
-those credentials may _not_ be changed further.
-
-
-Should the security checks fail or some other error occur after prepare_creds()
-has been called, then the following function should be invoked:
-
-	void abort_creds(struct cred *new);
-
-This releases the lock on current->cred_replace_mutex that prepare_creds() got
-and then releases the new credentials.
-
-
-A typical credentials alteration function would look something like this:
-
-	int alter_suid(uid_t suid)
-	{
-		struct cred *new;
-		int ret;
-
-		new = prepare_creds();
-		if (!new)
-			return -ENOMEM;
-
-		new->suid = suid;
-		ret = security_alter_suid(new);
-		if (ret < 0) {
-			abort_creds(new);
-			return ret;
-		}
-
-		return commit_creds(new);
-	}
-
-
-MANAGING CREDENTIALS
---------------------
-
-There are some functions to help manage credentials:
-
- (*) void put_cred(const struct cred *cred);
-
-     This releases a reference to the given set of credentials.  If the
-     reference count reaches zero, the credentials will be scheduled for
-     destruction by the RCU system.
-
- (*) const struct cred *get_cred(const struct cred *cred);
-
-     This gets a reference on a live set of credentials, returning a pointer to
-     that set of credentials.
-
- (*) struct cred *get_new_cred(struct cred *cred);
-
-     This gets a reference on a set of credentials that is under construction
-     and is thus still mutable, returning a pointer to that set of credentials.
-
-
-=====================
-OPEN FILE CREDENTIALS
-=====================
-
-When a new file is opened, a reference is obtained on the opening task's
-credentials and this is attached to the file struct as 'f_cred' in place of
-'f_uid' and 'f_gid'.  Code that used to access file->f_uid and file->f_gid
-should now access file->f_cred->fsuid and file->f_cred->fsgid.
-
-It is safe to access f_cred without the use of RCU or locking because the
-pointer will not change over the lifetime of the file struct, and nor will the
-contents of the cred struct pointed to, barring the exceptions listed above
-(see the Task Credentials section).
-
-
-=======================================
-OVERRIDING THE VFS'S USE OF CREDENTIALS
-=======================================
-
-Under some circumstances it is desirable to override the credentials used by
-the VFS, and that can be done by calling into such as vfs_mkdir() with a
-different set of credentials.  This is done in the following places:
-
- (*) sys_faccessat().
-
- (*) do_coredump().
-
- (*) nfs4recover.c.
diff --git a/Documentation/security/index.rst b/Documentation/security/index.rst
index 07335659ce8d..415be8e0b013 100644
--- a/Documentation/security/index.rst
+++ b/Documentation/security/index.rst
@@ -5,5 +5,6 @@ Security Documentation
 .. toctree::
    :maxdepth: 1
 
+   credentials
    IMA-templates
    tpm/index
diff --git a/include/linux/cred.h b/include/linux/cred.h
index b03e7d049a64..c728d515e5e2 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -1,4 +1,4 @@
-/* Credentials management - see Documentation/security/credentials.txt
+/* Credentials management - see Documentation/security/credentials.rst
  *
  * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
diff --git a/kernel/cred.c b/kernel/cred.c
index 2bc66075740f..ecf03657e71c 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -1,4 +1,4 @@
-/* Task credentials management - see Documentation/security/credentials.txt
+/* Task credentials management - see Documentation/security/credentials.rst
  *
  * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
-- 
cgit v1.2.3


From f00f85a8b2e0ac344f8dbaa3441b31bc283ce400 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Sat, 13 May 2017 04:51:42 -0700
Subject: doc: security: minor cleanups to build kernel-doc

These fixes were needed to parse lsm_hooks.h kernel-doc. More work is
needed, but this is the first step.

Acked-by: James Morris <james.l.morris@oracle.com>
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 include/linux/lsm_hooks.h | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 080f34e66017..a1eeaf603d2f 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -29,6 +29,8 @@
 #include <linux/rculist.h>
 
 /**
+ * union security_list_options - Linux Security Module hook function list
+ *
  * Security hooks for program execution operations.
  *
  * @bprm_set_creds:
@@ -193,8 +195,8 @@
  *	@value will be set to the allocated attribute value.
  *	@len will be set to the length of the value.
  *	Returns 0 if @name and @value have been successfully set,
- *		-EOPNOTSUPP if no security attribute is needed, or
- *		-ENOMEM on memory allocation failure.
+ *	-EOPNOTSUPP if no security attribute is needed, or
+ *	-ENOMEM on memory allocation failure.
  * @inode_create:
  *	Check permission to create a regular file.
  *	@dir contains inode structure of the parent of the new file.
@@ -510,8 +512,7 @@
  *	process @tsk.  Note that this hook is sometimes called from interrupt.
  *	Note that the fown_struct, @fown, is never outside the context of a
  *	struct file, so the file structure (and associated security information)
- *	can always be obtained:
- *		container_of(fown, struct file, f_owner)
+ *	can always be obtained: container_of(fown, struct file, f_owner)
  *	@tsk contains the structure of task receiving signal.
  *	@fown contains the file owner information.
  *	@sig is the signal that will be sent.  When 0, kernel sends SIGIO.
@@ -521,7 +522,7 @@
  *	to receive an open file descriptor via socket IPC.
  *	@file contains the file structure being received.
  *	Return 0 if permission is granted.
- * @file_open
+ * @file_open:
  *	Save open-time permission checking state for later use upon
  *	file_permission, and recheck access if anything has changed
  *	since inode_permission.
@@ -1143,7 +1144,7 @@
  *	@sma contains the semaphore structure.  May be NULL.
  *	@cmd contains the operation to be performed.
  *	Return 0 if permission is granted.
- * @sem_semop
+ * @sem_semop:
  *	Check permissions before performing operations on members of the
  *	semaphore set @sma.  If the @alter flag is nonzero, the semaphore set
  *	may be modified.
@@ -1153,20 +1154,20 @@
  *	@alter contains the flag indicating whether changes are to be made.
  *	Return 0 if permission is granted.
  *
- * @binder_set_context_mgr
+ * @binder_set_context_mgr:
  *	Check whether @mgr is allowed to be the binder context manager.
  *	@mgr contains the task_struct for the task being registered.
  *	Return 0 if permission is granted.
- * @binder_transaction
+ * @binder_transaction:
  *	Check whether @from is allowed to invoke a binder transaction call
  *	to @to.
  *	@from contains the task_struct for the sending task.
  *	@to contains the task_struct for the receiving task.
- * @binder_transfer_binder
+ * @binder_transfer_binder:
  *	Check whether @from is allowed to transfer a binder reference to @to.
  *	@from contains the task_struct for the sending task.
  *	@to contains the task_struct for the receiving task.
- * @binder_transfer_file
+ * @binder_transfer_file:
  *	Check whether @from is allowed to transfer @file to @to.
  *	@from contains the task_struct for the sending task.
  *	@file contains the struct file being transferred.
@@ -1214,7 +1215,7 @@
  *	@cred contains the credentials to use.
  *	@ns contains the user namespace we want the capability in
  *	@cap contains the capability <include/linux/capability.h>.
- *	@audit: Whether to write an audit message or not
+ *	@audit contains whether to write an audit message or not
  *	Return 0 if the capability is granted for @tsk.
  * @syslog:
  *	Check permission before accessing the kernel message ring or changing
@@ -1336,9 +1337,7 @@
  *	@inode we wish to get the security context of.
  *	@ctx is a pointer in which to place the allocated security context.
  *	@ctxlen points to the place to put the length of @ctx.
- * This is the main security structure.
  */
-
 union security_list_options {
 	int (*binder_set_context_mgr)(struct task_struct *mgr);
 	int (*binder_transaction)(struct task_struct *from,
-- 
cgit v1.2.3


From b68101a1e8f0263dbc7b8375d2a7c57c6216fb76 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Sat, 13 May 2017 04:51:50 -0700
Subject: doc: ReSTify keys.txt

This creates a new section in the security development index for kernel
keys, and adjusts for ReST markup.

Cc: David Howells <dhowells@redhat.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/crypto/asymmetric-keys.txt |    2 +-
 Documentation/security/00-INDEX          |    2 -
 Documentation/security/index.rst         |    1 +
 Documentation/security/keys.txt          | 1562 ------------------------------
 Documentation/security/keys/core.rst     | 1550 +++++++++++++++++++++++++++++
 Documentation/security/keys/index.rst    |    8 +
 MAINTAINERS                              |    2 +-
 include/linux/key.h                      |    2 +-
 8 files changed, 1562 insertions(+), 1567 deletions(-)
 delete mode 100644 Documentation/security/keys.txt
 create mode 100644 Documentation/security/keys/core.rst
 create mode 100644 Documentation/security/keys/index.rst

(limited to 'include/linux')

diff --git a/Documentation/crypto/asymmetric-keys.txt b/Documentation/crypto/asymmetric-keys.txt
index 5ad6480e3fb9..b82b6ad48488 100644
--- a/Documentation/crypto/asymmetric-keys.txt
+++ b/Documentation/crypto/asymmetric-keys.txt
@@ -265,7 +265,7 @@ mandatory:
 
      The caller passes a pointer to the following struct with all of the fields
      cleared, except for data, datalen and quotalen [see
-     Documentation/security/keys.txt].
+     Documentation/security/keys/core.rst].
 
 	struct key_preparsed_payload {
 		char		*description;
diff --git a/Documentation/security/00-INDEX b/Documentation/security/00-INDEX
index cdb2294ec047..a840095bb11c 100644
--- a/Documentation/security/00-INDEX
+++ b/Documentation/security/00-INDEX
@@ -6,5 +6,3 @@ keys-request-key.txt
 	- description of the kernel key request service.
 keys-trusted-encrypted.txt
 	- info on the Trusted and Encrypted keys in the kernel key ring service.
-keys.txt
-	- description of the kernel key retention service.
diff --git a/Documentation/security/index.rst b/Documentation/security/index.rst
index 94ba1cfc01c5..298a94a33f05 100644
--- a/Documentation/security/index.rst
+++ b/Documentation/security/index.rst
@@ -7,6 +7,7 @@ Security Documentation
 
    credentials
    IMA-templates
+   keys/index
    LSM
    self-protection
    tpm/index
diff --git a/Documentation/security/keys.txt b/Documentation/security/keys.txt
deleted file mode 100644
index cd5019934d7f..000000000000
--- a/Documentation/security/keys.txt
+++ /dev/null
@@ -1,1562 +0,0 @@
-			 ============================
-			 KERNEL KEY RETENTION SERVICE
-			 ============================
-
-This service allows cryptographic keys, authentication tokens, cross-domain
-user mappings, and similar to be cached in the kernel for the use of
-filesystems and other kernel services.
-
-Keyrings are permitted; these are a special type of key that can hold links to
-other keys. Processes each have three standard keyring subscriptions that a
-kernel service can search for relevant keys.
-
-The key service can be configured on by enabling:
-
-	"Security options"/"Enable access key retention support" (CONFIG_KEYS)
-
-This document has the following sections:
-
-	- Key overview
-	- Key service overview
-	- Key access permissions
-	- SELinux support
-	- New procfs files
-	- Userspace system call interface
-	- Kernel services
-	- Notes on accessing payload contents
-	- Defining a key type
-	- Request-key callback service
-	- Garbage collection
-
-
-============
-KEY OVERVIEW
-============
-
-In this context, keys represent units of cryptographic data, authentication
-tokens, keyrings, etc.. These are represented in the kernel by struct key.
-
-Each key has a number of attributes:
-
-	- A serial number.
-	- A type.
-	- A description (for matching a key in a search).
-	- Access control information.
-	- An expiry time.
-	- A payload.
-	- State.
-
-
- (*) Each key is issued a serial number of type key_serial_t that is unique for
-     the lifetime of that key. All serial numbers are positive non-zero 32-bit
-     integers.
-
-     Userspace programs can use a key's serial numbers as a way to gain access
-     to it, subject to permission checking.
-
- (*) Each key is of a defined "type". Types must be registered inside the
-     kernel by a kernel service (such as a filesystem) before keys of that type
-     can be added or used. Userspace programs cannot define new types directly.
-
-     Key types are represented in the kernel by struct key_type. This defines a
-     number of operations that can be performed on a key of that type.
-
-     Should a type be removed from the system, all the keys of that type will
-     be invalidated.
-
- (*) Each key has a description. This should be a printable string. The key
-     type provides an operation to perform a match between the description on a
-     key and a criterion string.
-
- (*) Each key has an owner user ID, a group ID and a permissions mask. These
-     are used to control what a process may do to a key from userspace, and
-     whether a kernel service will be able to find the key.
-
- (*) Each key can be set to expire at a specific time by the key type's
-     instantiation function. Keys can also be immortal.
-
- (*) Each key can have a payload. This is a quantity of data that represent the
-     actual "key". In the case of a keyring, this is a list of keys to which
-     the keyring links; in the case of a user-defined key, it's an arbitrary
-     blob of data.
-
-     Having a payload is not required; and the payload can, in fact, just be a
-     value stored in the struct key itself.
-
-     When a key is instantiated, the key type's instantiation function is
-     called with a blob of data, and that then creates the key's payload in
-     some way.
-
-     Similarly, when userspace wants to read back the contents of the key, if
-     permitted, another key type operation will be called to convert the key's
-     attached payload back into a blob of data.
-
- (*) Each key can be in one of a number of basic states:
-
-     (*) Uninstantiated. The key exists, but does not have any data attached.
-     	 Keys being requested from userspace will be in this state.
-
-     (*) Instantiated. This is the normal state. The key is fully formed, and
-	 has data attached.
-
-     (*) Negative. This is a relatively short-lived state. The key acts as a
-	 note saying that a previous call out to userspace failed, and acts as
-	 a throttle on key lookups. A negative key can be updated to a normal
-	 state.
-
-     (*) Expired. Keys can have lifetimes set. If their lifetime is exceeded,
-	 they traverse to this state. An expired key can be updated back to a
-	 normal state.
-
-     (*) Revoked. A key is put in this state by userspace action. It can't be
-	 found or operated upon (apart from by unlinking it).
-
-     (*) Dead. The key's type was unregistered, and so the key is now useless.
-
-Keys in the last three states are subject to garbage collection.  See the
-section on "Garbage collection".
-
-
-====================
-KEY SERVICE OVERVIEW
-====================
-
-The key service provides a number of features besides keys:
-
- (*) The key service defines three special key types:
-
-     (+) "keyring"
-
-	 Keyrings are special keys that contain a list of other keys. Keyring
-	 lists can be modified using various system calls. Keyrings should not
-	 be given a payload when created.
-
-     (+) "user"
-
-	 A key of this type has a description and a payload that are arbitrary
-	 blobs of data. These can be created, updated and read by userspace,
-	 and aren't intended for use by kernel services.
-
-     (+) "logon"
-
-	 Like a "user" key, a "logon" key has a payload that is an arbitrary
-	 blob of data. It is intended as a place to store secrets which are
-	 accessible to the kernel but not to userspace programs.
-
-	 The description can be arbitrary, but must be prefixed with a non-zero
-	 length string that describes the key "subclass". The subclass is
-	 separated from the rest of the description by a ':'. "logon" keys can
-	 be created and updated from userspace, but the payload is only
-	 readable from kernel space.
-
- (*) Each process subscribes to three keyrings: a thread-specific keyring, a
-     process-specific keyring, and a session-specific keyring.
-
-     The thread-specific keyring is discarded from the child when any sort of
-     clone, fork, vfork or execve occurs. A new keyring is created only when
-     required.
-
-     The process-specific keyring is replaced with an empty one in the child on
-     clone, fork, vfork unless CLONE_THREAD is supplied, in which case it is
-     shared. execve also discards the process's process keyring and creates a
-     new one.
-
-     The session-specific keyring is persistent across clone, fork, vfork and
-     execve, even when the latter executes a set-UID or set-GID binary. A
-     process can, however, replace its current session keyring with a new one
-     by using PR_JOIN_SESSION_KEYRING. It is permitted to request an anonymous
-     new one, or to attempt to create or join one of a specific name.
-
-     The ownership of the thread keyring changes when the real UID and GID of
-     the thread changes.
-
- (*) Each user ID resident in the system holds two special keyrings: a user
-     specific keyring and a default user session keyring. The default session
-     keyring is initialised with a link to the user-specific keyring.
-
-     When a process changes its real UID, if it used to have no session key, it
-     will be subscribed to the default session key for the new UID.
-
-     If a process attempts to access its session key when it doesn't have one,
-     it will be subscribed to the default for its current UID.
-
- (*) Each user has two quotas against which the keys they own are tracked. One
-     limits the total number of keys and keyrings, the other limits the total
-     amount of description and payload space that can be consumed.
-
-     The user can view information on this and other statistics through procfs
-     files.  The root user may also alter the quota limits through sysctl files
-     (see the section "New procfs files").
-
-     Process-specific and thread-specific keyrings are not counted towards a
-     user's quota.
-
-     If a system call that modifies a key or keyring in some way would put the
-     user over quota, the operation is refused and error EDQUOT is returned.
-
- (*) There's a system call interface by which userspace programs can create and
-     manipulate keys and keyrings.
-
- (*) There's a kernel interface by which services can register types and search
-     for keys.
-
- (*) There's a way for the a search done from the kernel to call back to
-     userspace to request a key that can't be found in a process's keyrings.
-
- (*) An optional filesystem is available through which the key database can be
-     viewed and manipulated.
-
-
-======================
-KEY ACCESS PERMISSIONS
-======================
-
-Keys have an owner user ID, a group access ID, and a permissions mask. The mask
-has up to eight bits each for possessor, user, group and other access. Only
-six of each set of eight bits are defined. These permissions granted are:
-
- (*) View
-
-     This permits a key or keyring's attributes to be viewed - including key
-     type and description.
-
- (*) Read
-
-     This permits a key's payload to be viewed or a keyring's list of linked
-     keys.
-
- (*) Write
-
-     This permits a key's payload to be instantiated or updated, or it allows a
-     link to be added to or removed from a keyring.
-
- (*) Search
-
-     This permits keyrings to be searched and keys to be found. Searches can
-     only recurse into nested keyrings that have search permission set.
-
- (*) Link
-
-     This permits a key or keyring to be linked to. To create a link from a
-     keyring to a key, a process must have Write permission on the keyring and
-     Link permission on the key.
-
- (*) Set Attribute
-
-     This permits a key's UID, GID and permissions mask to be changed.
-
-For changing the ownership, group ID or permissions mask, being the owner of
-the key or having the sysadmin capability is sufficient.
-
-
-===============
-SELINUX SUPPORT
-===============
-
-The security class "key" has been added to SELinux so that mandatory access
-controls can be applied to keys created within various contexts.  This support
-is preliminary, and is likely to change quite significantly in the near future.
-Currently, all of the basic permissions explained above are provided in SELinux
-as well; SELinux is simply invoked after all basic permission checks have been
-performed.
-
-The value of the file /proc/self/attr/keycreate influences the labeling of
-newly-created keys.  If the contents of that file correspond to an SELinux
-security context, then the key will be assigned that context.  Otherwise, the
-key will be assigned the current context of the task that invoked the key
-creation request.  Tasks must be granted explicit permission to assign a
-particular context to newly-created keys, using the "create" permission in the
-key security class.
-
-The default keyrings associated with users will be labeled with the default
-context of the user if and only if the login programs have been instrumented to
-properly initialize keycreate during the login process.  Otherwise, they will
-be labeled with the context of the login program itself.
-
-Note, however, that the default keyrings associated with the root user are
-labeled with the default kernel context, since they are created early in the
-boot process, before root has a chance to log in.
-
-The keyrings associated with new threads are each labeled with the context of
-their associated thread, and both session and process keyrings are handled
-similarly.
-
-
-================
-NEW PROCFS FILES
-================
-
-Two files have been added to procfs by which an administrator can find out
-about the status of the key service:
-
- (*) /proc/keys
-
-     This lists the keys that are currently viewable by the task reading the
-     file, giving information about their type, description and permissions.
-     It is not possible to view the payload of the key this way, though some
-     information about it may be given.
-
-     The only keys included in the list are those that grant View permission to
-     the reading process whether or not it possesses them.  Note that LSM
-     security checks are still performed, and may further filter out keys that
-     the current process is not authorised to view.
-
-     The contents of the file look like this:
-
-	SERIAL   FLAGS  USAGE EXPY PERM     UID   GID   TYPE      DESCRIPTION: SUMMARY
-	00000001 I-----    39 perm 1f3f0000     0     0 keyring   _uid_ses.0: 1/4
-	00000002 I-----     2 perm 1f3f0000     0     0 keyring   _uid.0: empty
-	00000007 I-----     1 perm 1f3f0000     0     0 keyring   _pid.1: empty
-	0000018d I-----     1 perm 1f3f0000     0     0 keyring   _pid.412: empty
-	000004d2 I--Q--     1 perm 1f3f0000    32    -1 keyring   _uid.32: 1/4
-	000004d3 I--Q--     3 perm 1f3f0000    32    -1 keyring   _uid_ses.32: empty
-	00000892 I--QU-     1 perm 1f000000     0     0 user      metal:copper: 0
-	00000893 I--Q-N     1  35s 1f3f0000     0     0 user      metal:silver: 0
-	00000894 I--Q--     1  10h 003f0000     0     0 user      metal:gold: 0
-
-     The flags are:
-
-	I	Instantiated
-	R	Revoked
-	D	Dead
-	Q	Contributes to user's quota
-	U	Under construction by callback to userspace
-	N	Negative key
-
-
- (*) /proc/key-users
-
-     This file lists the tracking data for each user that has at least one key
-     on the system.  Such data includes quota information and statistics:
-
-	[root@andromeda root]# cat /proc/key-users
-	0:     46 45/45 1/100 13/10000
-	29:     2 2/2 2/100 40/10000
-	32:     2 2/2 2/100 40/10000
-	38:     2 2/2 2/100 40/10000
-
-     The format of each line is
-	<UID>:			User ID to which this applies
-	<usage>			Structure refcount
-	<inst>/<keys>		Total number of keys and number instantiated
-	<keys>/<max>		Key count quota
-	<bytes>/<max>		Key size quota
-
-
-Four new sysctl files have been added also for the purpose of controlling the
-quota limits on keys:
-
- (*) /proc/sys/kernel/keys/root_maxkeys
-     /proc/sys/kernel/keys/root_maxbytes
-
-     These files hold the maximum number of keys that root may have and the
-     maximum total number of bytes of data that root may have stored in those
-     keys.
-
- (*) /proc/sys/kernel/keys/maxkeys
-     /proc/sys/kernel/keys/maxbytes
-
-     These files hold the maximum number of keys that each non-root user may
-     have and the maximum total number of bytes of data that each of those
-     users may have stored in their keys.
-
-Root may alter these by writing each new limit as a decimal number string to
-the appropriate file.
-
-
-===============================
-USERSPACE SYSTEM CALL INTERFACE
-===============================
-
-Userspace can manipulate keys directly through three new syscalls: add_key,
-request_key and keyctl. The latter provides a number of functions for
-manipulating keys.
-
-When referring to a key directly, userspace programs should use the key's
-serial number (a positive 32-bit integer). However, there are some special
-values available for referring to special keys and keyrings that relate to the
-process making the call:
-
-	CONSTANT			VALUE	KEY REFERENCED
-	==============================	======	===========================
-	KEY_SPEC_THREAD_KEYRING		-1	thread-specific keyring
-	KEY_SPEC_PROCESS_KEYRING	-2	process-specific keyring
-	KEY_SPEC_SESSION_KEYRING	-3	session-specific keyring
-	KEY_SPEC_USER_KEYRING		-4	UID-specific keyring
-	KEY_SPEC_USER_SESSION_KEYRING	-5	UID-session keyring
-	KEY_SPEC_GROUP_KEYRING		-6	GID-specific keyring
-	KEY_SPEC_REQKEY_AUTH_KEY	-7	assumed request_key()
-						  authorisation key
-
-
-The main syscalls are:
-
- (*) Create a new key of given type, description and payload and add it to the
-     nominated keyring:
-
-	key_serial_t add_key(const char *type, const char *desc,
-			     const void *payload, size_t plen,
-			     key_serial_t keyring);
-
-     If a key of the same type and description as that proposed already exists
-     in the keyring, this will try to update it with the given payload, or it
-     will return error EEXIST if that function is not supported by the key
-     type. The process must also have permission to write to the key to be able
-     to update it. The new key will have all user permissions granted and no
-     group or third party permissions.
-
-     Otherwise, this will attempt to create a new key of the specified type and
-     description, and to instantiate it with the supplied payload and attach it
-     to the keyring. In this case, an error will be generated if the process
-     does not have permission to write to the keyring.
-
-     If the key type supports it, if the description is NULL or an empty
-     string, the key type will try and generate a description from the content
-     of the payload.
-
-     The payload is optional, and the pointer can be NULL if not required by
-     the type. The payload is plen in size, and plen can be zero for an empty
-     payload.
-
-     A new keyring can be generated by setting type "keyring", the keyring name
-     as the description (or NULL) and setting the payload to NULL.
-
-     User defined keys can be created by specifying type "user". It is
-     recommended that a user defined key's description by prefixed with a type
-     ID and a colon, such as "krb5tgt:" for a Kerberos 5 ticket granting
-     ticket.
-
-     Any other type must have been registered with the kernel in advance by a
-     kernel service such as a filesystem.
-
-     The ID of the new or updated key is returned if successful.
-
-
- (*) Search the process's keyrings for a key, potentially calling out to
-     userspace to create it.
-
-	key_serial_t request_key(const char *type, const char *description,
-				 const char *callout_info,
-				 key_serial_t dest_keyring);
-
-     This function searches all the process's keyrings in the order thread,
-     process, session for a matching key. This works very much like
-     KEYCTL_SEARCH, including the optional attachment of the discovered key to
-     a keyring.
-
-     If a key cannot be found, and if callout_info is not NULL, then
-     /sbin/request-key will be invoked in an attempt to obtain a key. The
-     callout_info string will be passed as an argument to the program.
-
-     See also Documentation/security/keys-request-key.txt.
-
-
-The keyctl syscall functions are:
-
- (*) Map a special key ID to a real key ID for this process:
-
-	key_serial_t keyctl(KEYCTL_GET_KEYRING_ID, key_serial_t id,
-			    int create);
-
-     The special key specified by "id" is looked up (with the key being created
-     if necessary) and the ID of the key or keyring thus found is returned if
-     it exists.
-
-     If the key does not yet exist, the key will be created if "create" is
-     non-zero; and the error ENOKEY will be returned if "create" is zero.
-
-
- (*) Replace the session keyring this process subscribes to with a new one:
-
-	key_serial_t keyctl(KEYCTL_JOIN_SESSION_KEYRING, const char *name);
-
-     If name is NULL, an anonymous keyring is created attached to the process
-     as its session keyring, displacing the old session keyring.
-
-     If name is not NULL, if a keyring of that name exists, the process
-     attempts to attach it as the session keyring, returning an error if that
-     is not permitted; otherwise a new keyring of that name is created and
-     attached as the session keyring.
-
-     To attach to a named keyring, the keyring must have search permission for
-     the process's ownership.
-
-     The ID of the new session keyring is returned if successful.
-
-
- (*) Update the specified key:
-
-	long keyctl(KEYCTL_UPDATE, key_serial_t key, const void *payload,
-		    size_t plen);
-
-     This will try to update the specified key with the given payload, or it
-     will return error EOPNOTSUPP if that function is not supported by the key
-     type. The process must also have permission to write to the key to be able
-     to update it.
-
-     The payload is of length plen, and may be absent or empty as for
-     add_key().
-
-
- (*) Revoke a key:
-
-	long keyctl(KEYCTL_REVOKE, key_serial_t key);
-
-     This makes a key unavailable for further operations. Further attempts to
-     use the key will be met with error EKEYREVOKED, and the key will no longer
-     be findable.
-
-
- (*) Change the ownership of a key:
-
-	long keyctl(KEYCTL_CHOWN, key_serial_t key, uid_t uid, gid_t gid);
-
-     This function permits a key's owner and group ID to be changed. Either one
-     of uid or gid can be set to -1 to suppress that change.
-
-     Only the superuser can change a key's owner to something other than the
-     key's current owner. Similarly, only the superuser can change a key's
-     group ID to something other than the calling process's group ID or one of
-     its group list members.
-
-
- (*) Change the permissions mask on a key:
-
-	long keyctl(KEYCTL_SETPERM, key_serial_t key, key_perm_t perm);
-
-     This function permits the owner of a key or the superuser to change the
-     permissions mask on a key.
-
-     Only bits the available bits are permitted; if any other bits are set,
-     error EINVAL will be returned.
-
-
- (*) Describe a key:
-
-	long keyctl(KEYCTL_DESCRIBE, key_serial_t key, char *buffer,
-		    size_t buflen);
-
-     This function returns a summary of the key's attributes (but not its
-     payload data) as a string in the buffer provided.
-
-     Unless there's an error, it always returns the amount of data it could
-     produce, even if that's too big for the buffer, but it won't copy more
-     than requested to userspace. If the buffer pointer is NULL then no copy
-     will take place.
-
-     A process must have view permission on the key for this function to be
-     successful.
-
-     If successful, a string is placed in the buffer in the following format:
-
-	<type>;<uid>;<gid>;<perm>;<description>
-
-     Where type and description are strings, uid and gid are decimal, and perm
-     is hexadecimal. A NUL character is included at the end of the string if
-     the buffer is sufficiently big.
-
-     This can be parsed with
-
-	sscanf(buffer, "%[^;];%d;%d;%o;%s", type, &uid, &gid, &mode, desc);
-
-
- (*) Clear out a keyring:
-
-	long keyctl(KEYCTL_CLEAR, key_serial_t keyring);
-
-     This function clears the list of keys attached to a keyring. The calling
-     process must have write permission on the keyring, and it must be a
-     keyring (or else error ENOTDIR will result).
-
-     This function can also be used to clear special kernel keyrings if they
-     are appropriately marked if the user has CAP_SYS_ADMIN capability.  The
-     DNS resolver cache keyring is an example of this.
-
-
- (*) Link a key into a keyring:
-
-	long keyctl(KEYCTL_LINK, key_serial_t keyring, key_serial_t key);
-
-     This function creates a link from the keyring to the key. The process must
-     have write permission on the keyring and must have link permission on the
-     key.
-
-     Should the keyring not be a keyring, error ENOTDIR will result; and if the
-     keyring is full, error ENFILE will result.
-
-     The link procedure checks the nesting of the keyrings, returning ELOOP if
-     it appears too deep or EDEADLK if the link would introduce a cycle.
-
-     Any links within the keyring to keys that match the new key in terms of
-     type and description will be discarded from the keyring as the new one is
-     added.
-
-
- (*) Unlink a key or keyring from another keyring:
-
-	long keyctl(KEYCTL_UNLINK, key_serial_t keyring, key_serial_t key);
-
-     This function looks through the keyring for the first link to the
-     specified key, and removes it if found. Subsequent links to that key are
-     ignored. The process must have write permission on the keyring.
-
-     If the keyring is not a keyring, error ENOTDIR will result; and if the key
-     is not present, error ENOENT will be the result.
-
-
- (*) Search a keyring tree for a key:
-
-	key_serial_t keyctl(KEYCTL_SEARCH, key_serial_t keyring,
-			    const char *type, const char *description,
-			    key_serial_t dest_keyring);
-
-     This searches the keyring tree headed by the specified keyring until a key
-     is found that matches the type and description criteria. Each keyring is
-     checked for keys before recursion into its children occurs.
-
-     The process must have search permission on the top level keyring, or else
-     error EACCES will result. Only keyrings that the process has search
-     permission on will be recursed into, and only keys and keyrings for which
-     a process has search permission can be matched. If the specified keyring
-     is not a keyring, ENOTDIR will result.
-
-     If the search succeeds, the function will attempt to link the found key
-     into the destination keyring if one is supplied (non-zero ID). All the
-     constraints applicable to KEYCTL_LINK apply in this case too.
-
-     Error ENOKEY, EKEYREVOKED or EKEYEXPIRED will be returned if the search
-     fails. On success, the resulting key ID will be returned.
-
-
- (*) Read the payload data from a key:
-
-	long keyctl(KEYCTL_READ, key_serial_t keyring, char *buffer,
-		    size_t buflen);
-
-     This function attempts to read the payload data from the specified key
-     into the buffer. The process must have read permission on the key to
-     succeed.
-
-     The returned data will be processed for presentation by the key type. For
-     instance, a keyring will return an array of key_serial_t entries
-     representing the IDs of all the keys to which it is subscribed. The user
-     defined key type will return its data as is. If a key type does not
-     implement this function, error EOPNOTSUPP will result.
-
-     As much of the data as can be fitted into the buffer will be copied to
-     userspace if the buffer pointer is not NULL.
-
-     On a successful return, the function will always return the amount of data
-     available rather than the amount copied.
-
-
- (*) Instantiate a partially constructed key.
-
-	long keyctl(KEYCTL_INSTANTIATE, key_serial_t key,
-		    const void *payload, size_t plen,
-		    key_serial_t keyring);
-	long keyctl(KEYCTL_INSTANTIATE_IOV, key_serial_t key,
-		    const struct iovec *payload_iov, unsigned ioc,
-		    key_serial_t keyring);
-
-     If the kernel calls back to userspace to complete the instantiation of a
-     key, userspace should use this call to supply data for the key before the
-     invoked process returns, or else the key will be marked negative
-     automatically.
-
-     The process must have write access on the key to be able to instantiate
-     it, and the key must be uninstantiated.
-
-     If a keyring is specified (non-zero), the key will also be linked into
-     that keyring, however all the constraints applying in KEYCTL_LINK apply in
-     this case too.
-
-     The payload and plen arguments describe the payload data as for add_key().
-
-     The payload_iov and ioc arguments describe the payload data in an iovec
-     array instead of a single buffer.
-
-
- (*) Negatively instantiate a partially constructed key.
-
-	long keyctl(KEYCTL_NEGATE, key_serial_t key,
-		    unsigned timeout, key_serial_t keyring);
-	long keyctl(KEYCTL_REJECT, key_serial_t key,
-		    unsigned timeout, unsigned error, key_serial_t keyring);
-
-     If the kernel calls back to userspace to complete the instantiation of a
-     key, userspace should use this call mark the key as negative before the
-     invoked process returns if it is unable to fulfill the request.
-
-     The process must have write access on the key to be able to instantiate
-     it, and the key must be uninstantiated.
-
-     If a keyring is specified (non-zero), the key will also be linked into
-     that keyring, however all the constraints applying in KEYCTL_LINK apply in
-     this case too.
-
-     If the key is rejected, future searches for it will return the specified
-     error code until the rejected key expires.  Negating the key is the same
-     as rejecting the key with ENOKEY as the error code.
-
-
- (*) Set the default request-key destination keyring.
-
-	long keyctl(KEYCTL_SET_REQKEY_KEYRING, int reqkey_defl);
-
-     This sets the default keyring to which implicitly requested keys will be
-     attached for this thread. reqkey_defl should be one of these constants:
-
-	CONSTANT				VALUE	NEW DEFAULT KEYRING
-	======================================	======	=======================
-	KEY_REQKEY_DEFL_NO_CHANGE		-1	No change
-	KEY_REQKEY_DEFL_DEFAULT			0	Default[1]
-	KEY_REQKEY_DEFL_THREAD_KEYRING		1	Thread keyring
-	KEY_REQKEY_DEFL_PROCESS_KEYRING		2	Process keyring
-	KEY_REQKEY_DEFL_SESSION_KEYRING		3	Session keyring
-	KEY_REQKEY_DEFL_USER_KEYRING		4	User keyring
-	KEY_REQKEY_DEFL_USER_SESSION_KEYRING	5	User session keyring
-	KEY_REQKEY_DEFL_GROUP_KEYRING		6	Group keyring
-
-     The old default will be returned if successful and error EINVAL will be
-     returned if reqkey_defl is not one of the above values.
-
-     The default keyring can be overridden by the keyring indicated to the
-     request_key() system call.
-
-     Note that this setting is inherited across fork/exec.
-
-     [1] The default is: the thread keyring if there is one, otherwise
-     the process keyring if there is one, otherwise the session keyring if
-     there is one, otherwise the user default session keyring.
-
-
- (*) Set the timeout on a key.
-
-	long keyctl(KEYCTL_SET_TIMEOUT, key_serial_t key, unsigned timeout);
-
-     This sets or clears the timeout on a key. The timeout can be 0 to clear
-     the timeout or a number of seconds to set the expiry time that far into
-     the future.
-
-     The process must have attribute modification access on a key to set its
-     timeout. Timeouts may not be set with this function on negative, revoked
-     or expired keys.
-
-
- (*) Assume the authority granted to instantiate a key
-
-	long keyctl(KEYCTL_ASSUME_AUTHORITY, key_serial_t key);
-
-     This assumes or divests the authority required to instantiate the
-     specified key. Authority can only be assumed if the thread has the
-     authorisation key associated with the specified key in its keyrings
-     somewhere.
-
-     Once authority is assumed, searches for keys will also search the
-     requester's keyrings using the requester's security label, UID, GID and
-     groups.
-
-     If the requested authority is unavailable, error EPERM will be returned,
-     likewise if the authority has been revoked because the target key is
-     already instantiated.
-
-     If the specified key is 0, then any assumed authority will be divested.
-
-     The assumed authoritative key is inherited across fork and exec.
-
-
- (*) Get the LSM security context attached to a key.
-
-	long keyctl(KEYCTL_GET_SECURITY, key_serial_t key, char *buffer,
-		    size_t buflen)
-
-     This function returns a string that represents the LSM security context
-     attached to a key in the buffer provided.
-
-     Unless there's an error, it always returns the amount of data it could
-     produce, even if that's too big for the buffer, but it won't copy more
-     than requested to userspace. If the buffer pointer is NULL then no copy
-     will take place.
-
-     A NUL character is included at the end of the string if the buffer is
-     sufficiently big.  This is included in the returned count.  If no LSM is
-     in force then an empty string will be returned.
-
-     A process must have view permission on the key for this function to be
-     successful.
-
-
- (*) Install the calling process's session keyring on its parent.
-
-	long keyctl(KEYCTL_SESSION_TO_PARENT);
-
-     This functions attempts to install the calling process's session keyring
-     on to the calling process's parent, replacing the parent's current session
-     keyring.
-
-     The calling process must have the same ownership as its parent, the
-     keyring must have the same ownership as the calling process, the calling
-     process must have LINK permission on the keyring and the active LSM module
-     mustn't deny permission, otherwise error EPERM will be returned.
-
-     Error ENOMEM will be returned if there was insufficient memory to complete
-     the operation, otherwise 0 will be returned to indicate success.
-
-     The keyring will be replaced next time the parent process leaves the
-     kernel and resumes executing userspace.
-
-
- (*) Invalidate a key.
-
-	long keyctl(KEYCTL_INVALIDATE, key_serial_t key);
-
-     This function marks a key as being invalidated and then wakes up the
-     garbage collector.  The garbage collector immediately removes invalidated
-     keys from all keyrings and deletes the key when its reference count
-     reaches zero.
-
-     Keys that are marked invalidated become invisible to normal key operations
-     immediately, though they are still visible in /proc/keys until deleted
-     (they're marked with an 'i' flag).
-
-     A process must have search permission on the key for this function to be
-     successful.
-
- (*) Compute a Diffie-Hellman shared secret or public key
-
-       long keyctl(KEYCTL_DH_COMPUTE, struct keyctl_dh_params *params,
-		   char *buffer, size_t buflen,
-		   struct keyctl_kdf_params *kdf);
-
-     The params struct contains serial numbers for three keys:
-
-	 - The prime, p, known to both parties
-	 - The local private key
-	 - The base integer, which is either a shared generator or the
-	   remote public key
-
-     The value computed is:
-
-	result = base ^ private (mod prime)
-
-     If the base is the shared generator, the result is the local
-     public key.  If the base is the remote public key, the result is
-     the shared secret.
-
-     If the parameter kdf is NULL, the following applies:
-
-	 - The buffer length must be at least the length of the prime, or zero.
-
-	 - If the buffer length is nonzero, the length of the result is
-	   returned when it is successfully calculated and copied in to the
-	   buffer. When the buffer length is zero, the minimum required
-	   buffer length is returned.
-
-     The kdf parameter allows the caller to apply a key derivation function
-     (KDF) on the Diffie-Hellman computation where only the result
-     of the KDF is returned to the caller. The KDF is characterized with
-     struct keyctl_kdf_params as follows:
-
-	 - char *hashname specifies the NUL terminated string identifying
-	   the hash used from the kernel crypto API and applied for the KDF
-	   operation. The KDF implemenation complies with SP800-56A as well
-	   as with SP800-108 (the counter KDF).
-
-	 - char *otherinfo specifies the OtherInfo data as documented in
-	   SP800-56A section 5.8.1.2. The length of the buffer is given with
-	   otherinfolen. The format of OtherInfo is defined by the caller.
-	   The otherinfo pointer may be NULL if no OtherInfo shall be used.
-
-     This function will return error EOPNOTSUPP if the key type is not
-     supported, error ENOKEY if the key could not be found, or error
-     EACCES if the key is not readable by the caller. In addition, the
-     function will return EMSGSIZE when the parameter kdf is non-NULL
-     and either the buffer length or the OtherInfo length exceeds the
-     allowed length.
-
- (*) Restrict keyring linkage
-
-       long keyctl(KEYCTL_RESTRICT_KEYRING, key_serial_t keyring,
-		   const char *type, const char *restriction);
-
-     An existing keyring can restrict linkage of additional keys by evaluating
-     the contents of the key according to a restriction scheme.
-
-     "keyring" is the key ID for an existing keyring to apply a restriction
-     to. It may be empty or may already have keys linked. Existing linked keys
-     will remain in the keyring even if the new restriction would reject them.
-
-     "type" is a registered key type.
-
-     "restriction" is a string describing how key linkage is to be restricted.
-     The format varies depending on the key type, and the string is passed to
-     the lookup_restriction() function for the requested type.  It may specify
-     a method and relevant data for the restriction such as signature
-     verification or constraints on key payload. If the requested key type is
-     later unregistered, no keys may be added to the keyring after the key type
-     is removed.
-
-     To apply a keyring restriction the process must have Set Attribute
-     permission and the keyring must not be previously restricted.
-
-===============
-KERNEL SERVICES
-===============
-
-The kernel services for key management are fairly simple to deal with. They can
-be broken down into two areas: keys and key types.
-
-Dealing with keys is fairly straightforward. Firstly, the kernel service
-registers its type, then it searches for a key of that type. It should retain
-the key as long as it has need of it, and then it should release it. For a
-filesystem or device file, a search would probably be performed during the open
-call, and the key released upon close. How to deal with conflicting keys due to
-two different users opening the same file is left to the filesystem author to
-solve.
-
-To access the key manager, the following header must be #included:
-
-	<linux/key.h>
-
-Specific key types should have a header file under include/keys/ that should be
-used to access that type.  For keys of type "user", for example, that would be:
-
-	<keys/user-type.h>
-
-Note that there are two different types of pointers to keys that may be
-encountered:
-
- (*) struct key *
-
-     This simply points to the key structure itself. Key structures will be at
-     least four-byte aligned.
-
- (*) key_ref_t
-
-     This is equivalent to a struct key *, but the least significant bit is set
-     if the caller "possesses" the key. By "possession" it is meant that the
-     calling processes has a searchable link to the key from one of its
-     keyrings. There are three functions for dealing with these:
-
-	key_ref_t make_key_ref(const struct key *key, bool possession);
-
-	struct key *key_ref_to_ptr(const key_ref_t key_ref);
-
-	bool is_key_possessed(const key_ref_t key_ref);
-
-     The first function constructs a key reference from a key pointer and
-     possession information (which must be true or false).
-
-     The second function retrieves the key pointer from a reference and the
-     third retrieves the possession flag.
-
-When accessing a key's payload contents, certain precautions must be taken to
-prevent access vs modification races. See the section "Notes on accessing
-payload contents" for more information.
-
-(*) To search for a key, call:
-
-	struct key *request_key(const struct key_type *type,
-				const char *description,
-				const char *callout_info);
-
-    This is used to request a key or keyring with a description that matches
-    the description specified according to the key type's match_preparse()
-    method. This permits approximate matching to occur. If callout_string is
-    not NULL, then /sbin/request-key will be invoked in an attempt to obtain
-    the key from userspace. In that case, callout_string will be passed as an
-    argument to the program.
-
-    Should the function fail error ENOKEY, EKEYEXPIRED or EKEYREVOKED will be
-    returned.
-
-    If successful, the key will have been attached to the default keyring for
-    implicitly obtained request-key keys, as set by KEYCTL_SET_REQKEY_KEYRING.
-
-    See also Documentation/security/keys-request-key.txt.
-
-
-(*) To search for a key, passing auxiliary data to the upcaller, call:
-
-	struct key *request_key_with_auxdata(const struct key_type *type,
-					     const char *description,
-					     const void *callout_info,
-					     size_t callout_len,
-					     void *aux);
-
-    This is identical to request_key(), except that the auxiliary data is
-    passed to the key_type->request_key() op if it exists, and the callout_info
-    is a blob of length callout_len, if given (the length may be 0).
-
-
-(*) A key can be requested asynchronously by calling one of:
-
-	struct key *request_key_async(const struct key_type *type,
-				      const char *description,
-				      const void *callout_info,
-				      size_t callout_len);
-
-    or:
-
-	struct key *request_key_async_with_auxdata(const struct key_type *type,
-						   const char *description,
-						   const char *callout_info,
-					     	   size_t callout_len,
-					     	   void *aux);
-
-    which are asynchronous equivalents of request_key() and
-    request_key_with_auxdata() respectively.
-
-    These two functions return with the key potentially still under
-    construction.  To wait for construction completion, the following should be
-    called:
-
-	int wait_for_key_construction(struct key *key, bool intr);
-
-    The function will wait for the key to finish being constructed and then
-    invokes key_validate() to return an appropriate value to indicate the state
-    of the key (0 indicates the key is usable).
-
-    If intr is true, then the wait can be interrupted by a signal, in which
-    case error ERESTARTSYS will be returned.
-
-
-(*) When it is no longer required, the key should be released using:
-
-	void key_put(struct key *key);
-
-    Or:
-
-	void key_ref_put(key_ref_t key_ref);
-
-    These can be called from interrupt context. If CONFIG_KEYS is not set then
-    the argument will not be parsed.
-
-
-(*) Extra references can be made to a key by calling one of the following
-    functions:
-
-	struct key *__key_get(struct key *key);
-	struct key *key_get(struct key *key);
-
-    Keys so references will need to be disposed of by calling key_put() when
-    they've been finished with.  The key pointer passed in will be returned.
-
-    In the case of key_get(), if the pointer is NULL or CONFIG_KEYS is not set
-    then the key will not be dereferenced and no increment will take place.
-
-
-(*) A key's serial number can be obtained by calling:
-
-	key_serial_t key_serial(struct key *key);
-
-    If key is NULL or if CONFIG_KEYS is not set then 0 will be returned (in the
-    latter case without parsing the argument).
-
-
-(*) If a keyring was found in the search, this can be further searched by:
-
-	key_ref_t keyring_search(key_ref_t keyring_ref,
-				 const struct key_type *type,
-				 const char *description)
-
-    This searches the keyring tree specified for a matching key. Error ENOKEY
-    is returned upon failure (use IS_ERR/PTR_ERR to determine). If successful,
-    the returned key will need to be released.
-
-    The possession attribute from the keyring reference is used to control
-    access through the permissions mask and is propagated to the returned key
-    reference pointer if successful.
-
-
-(*) A keyring can be created by:
-
-	struct key *keyring_alloc(const char *description, uid_t uid, gid_t gid,
-				  const struct cred *cred,
-				  key_perm_t perm,
-				  struct key_restriction *restrict_link,
-				  unsigned long flags,
-				  struct key *dest);
-
-    This creates a keyring with the given attributes and returns it.  If dest
-    is not NULL, the new keyring will be linked into the keyring to which it
-    points.  No permission checks are made upon the destination keyring.
-
-    Error EDQUOT can be returned if the keyring would overload the quota (pass
-    KEY_ALLOC_NOT_IN_QUOTA in flags if the keyring shouldn't be accounted
-    towards the user's quota).  Error ENOMEM can also be returned.
-
-    If restrict_link is not NULL, it should point to a structure that contains
-    the function that will be called each time an attempt is made to link a
-    key into the new keyring.  The structure may also contain a key pointer
-    and an associated key type.  The function is called to check whether a key
-    may be added into the keyring or not.  The key type is used by the garbage
-    collector to clean up function or data pointers in this structure if the
-    given key type is unregistered.  Callers of key_create_or_update() within
-    the kernel can pass KEY_ALLOC_BYPASS_RESTRICTION to suppress the check.
-    An example of using this is to manage rings of cryptographic keys that are
-    set up when the kernel boots where userspace is also permitted to add keys
-    - provided they can be verified by a key the kernel already has.
-
-    When called, the restriction function will be passed the keyring being
-    added to, the key type, the payload of the key being added, and data to be
-    used in the restriction check.  Note that when a new key is being created,
-    this is called between payload preparsing and actual key creation.  The
-    function should return 0 to allow the link or an error to reject it.
-
-    A convenience function, restrict_link_reject, exists to always return
-    -EPERM to in this case.
-
-
-(*) To check the validity of a key, this function can be called:
-
-	int validate_key(struct key *key);
-
-    This checks that the key in question hasn't expired or and hasn't been
-    revoked. Should the key be invalid, error EKEYEXPIRED or EKEYREVOKED will
-    be returned. If the key is NULL or if CONFIG_KEYS is not set then 0 will be
-    returned (in the latter case without parsing the argument).
-
-
-(*) To register a key type, the following function should be called:
-
-	int register_key_type(struct key_type *type);
-
-    This will return error EEXIST if a type of the same name is already
-    present.
-
-
-(*) To unregister a key type, call:
-
-	void unregister_key_type(struct key_type *type);
-
-
-Under some circumstances, it may be desirable to deal with a bundle of keys.
-The facility provides access to the keyring type for managing such a bundle:
-
-	struct key_type key_type_keyring;
-
-This can be used with a function such as request_key() to find a specific
-keyring in a process's keyrings.  A keyring thus found can then be searched
-with keyring_search().  Note that it is not possible to use request_key() to
-search a specific keyring, so using keyrings in this way is of limited utility.
-
-
-===================================
-NOTES ON ACCESSING PAYLOAD CONTENTS
-===================================
-
-The simplest payload is just data stored in key->payload directly.  In this
-case, there's no need to indulge in RCU or locking when accessing the payload.
-
-More complex payload contents must be allocated and pointers to them set in the
-key->payload.data[] array.  One of the following ways must be selected to
-access the data:
-
- (1) Unmodifiable key type.
-
-     If the key type does not have a modify method, then the key's payload can
-     be accessed without any form of locking, provided that it's known to be
-     instantiated (uninstantiated keys cannot be "found").
-
- (2) The key's semaphore.
-
-     The semaphore could be used to govern access to the payload and to control
-     the payload pointer. It must be write-locked for modifications and would
-     have to be read-locked for general access. The disadvantage of doing this
-     is that the accessor may be required to sleep.
-
- (3) RCU.
-
-     RCU must be used when the semaphore isn't already held; if the semaphore
-     is held then the contents can't change under you unexpectedly as the
-     semaphore must still be used to serialise modifications to the key. The
-     key management code takes care of this for the key type.
-
-     However, this means using:
-
-	rcu_read_lock() ... rcu_dereference() ... rcu_read_unlock()
-
-     to read the pointer, and:
-
-	rcu_dereference() ... rcu_assign_pointer() ... call_rcu()
-
-     to set the pointer and dispose of the old contents after a grace period.
-     Note that only the key type should ever modify a key's payload.
-
-     Furthermore, an RCU controlled payload must hold a struct rcu_head for the
-     use of call_rcu() and, if the payload is of variable size, the length of
-     the payload. key->datalen cannot be relied upon to be consistent with the
-     payload just dereferenced if the key's semaphore is not held.
-
-     Note that key->payload.data[0] has a shadow that is marked for __rcu
-     usage.  This is called key->payload.rcu_data0.  The following accessors
-     wrap the RCU calls to this element:
-
-     (a) Set or change the first payload pointer:
-
-		rcu_assign_keypointer(struct key *key, void *data);
-
-     (b) Read the first payload pointer with the key semaphore held:
-
-		[const] void *dereference_key_locked([const] struct key *key);
-
-	 Note that the return value will inherit its constness from the key
-	 parameter.  Static analysis will give an error if it things the lock
-	 isn't held.
-
-     (c) Read the first payload pointer with the RCU read lock held:
-
-		const void *dereference_key_rcu(const struct key *key);
-
-
-===================
-DEFINING A KEY TYPE
-===================
-
-A kernel service may want to define its own key type. For instance, an AFS
-filesystem might want to define a Kerberos 5 ticket key type. To do this, it
-author fills in a key_type struct and registers it with the system.
-
-Source files that implement key types should include the following header file:
-
-	<linux/key-type.h>
-
-The structure has a number of fields, some of which are mandatory:
-
- (*) const char *name
-
-     The name of the key type. This is used to translate a key type name
-     supplied by userspace into a pointer to the structure.
-
-
- (*) size_t def_datalen
-
-     This is optional - it supplies the default payload data length as
-     contributed to the quota. If the key type's payload is always or almost
-     always the same size, then this is a more efficient way to do things.
-
-     The data length (and quota) on a particular key can always be changed
-     during instantiation or update by calling:
-
-	int key_payload_reserve(struct key *key, size_t datalen);
-
-     With the revised data length. Error EDQUOT will be returned if this is not
-     viable.
-
-
- (*) int (*vet_description)(const char *description);
-
-     This optional method is called to vet a key description.  If the key type
-     doesn't approve of the key description, it may return an error, otherwise
-     it should return 0.
-
-
- (*) int (*preparse)(struct key_preparsed_payload *prep);
-
-     This optional method permits the key type to attempt to parse payload
-     before a key is created (add key) or the key semaphore is taken (update or
-     instantiate key).  The structure pointed to by prep looks like:
-
-	struct key_preparsed_payload {
-		char		*description;
-		union key_payload payload;
-		const void	*data;
-		size_t		datalen;
-		size_t		quotalen;
-		time_t		expiry;
-	};
-
-     Before calling the method, the caller will fill in data and datalen with
-     the payload blob parameters; quotalen will be filled in with the default
-     quota size from the key type; expiry will be set to TIME_T_MAX and the
-     rest will be cleared.
-
-     If a description can be proposed from the payload contents, that should be
-     attached as a string to the description field.  This will be used for the
-     key description if the caller of add_key() passes NULL or "".
-
-     The method can attach anything it likes to payload.  This is merely passed
-     along to the instantiate() or update() operations.  If set, the expiry
-     time will be applied to the key if it is instantiated from this data.
-
-     The method should return 0 if successful or a negative error code
-     otherwise.
-
-
- (*) void (*free_preparse)(struct key_preparsed_payload *prep);
-
-     This method is only required if the preparse() method is provided,
-     otherwise it is unused.  It cleans up anything attached to the description
-     and payload fields of the key_preparsed_payload struct as filled in by the
-     preparse() method.  It will always be called after preparse() returns
-     successfully, even if instantiate() or update() succeed.
-
-
- (*) int (*instantiate)(struct key *key, struct key_preparsed_payload *prep);
-
-     This method is called to attach a payload to a key during construction.
-     The payload attached need not bear any relation to the data passed to this
-     function.
-
-     The prep->data and prep->datalen fields will define the original payload
-     blob.  If preparse() was supplied then other fields may be filled in also.
-
-     If the amount of data attached to the key differs from the size in
-     keytype->def_datalen, then key_payload_reserve() should be called.
-
-     This method does not have to lock the key in order to attach a payload.
-     The fact that KEY_FLAG_INSTANTIATED is not set in key->flags prevents
-     anything else from gaining access to the key.
-
-     It is safe to sleep in this method.
-
-     generic_key_instantiate() is provided to simply copy the data from
-     prep->payload.data[] to key->payload.data[], with RCU-safe assignment on
-     the first element.  It will then clear prep->payload.data[] so that the
-     free_preparse method doesn't release the data.
-
-
- (*) int (*update)(struct key *key, const void *data, size_t datalen);
-
-     If this type of key can be updated, then this method should be provided.
-     It is called to update a key's payload from the blob of data provided.
-
-     The prep->data and prep->datalen fields will define the original payload
-     blob.  If preparse() was supplied then other fields may be filled in also.
-
-     key_payload_reserve() should be called if the data length might change
-     before any changes are actually made. Note that if this succeeds, the type
-     is committed to changing the key because it's already been altered, so all
-     memory allocation must be done first.
-
-     The key will have its semaphore write-locked before this method is called,
-     but this only deters other writers; any changes to the key's payload must
-     be made under RCU conditions, and call_rcu() must be used to dispose of
-     the old payload.
-
-     key_payload_reserve() should be called before the changes are made, but
-     after all allocations and other potentially failing function calls are
-     made.
-
-     It is safe to sleep in this method.
-
-
- (*) int (*match_preparse)(struct key_match_data *match_data);
-
-     This method is optional.  It is called when a key search is about to be
-     performed.  It is given the following structure:
-
-	struct key_match_data {
-		bool (*cmp)(const struct key *key,
-			    const struct key_match_data *match_data);
-		const void	*raw_data;
-		void		*preparsed;
-		unsigned	lookup_type;
-	};
-
-     On entry, raw_data will be pointing to the criteria to be used in matching
-     a key by the caller and should not be modified.  (*cmp)() will be pointing
-     to the default matcher function (which does an exact description match
-     against raw_data) and lookup_type will be set to indicate a direct lookup.
-
-     The following lookup_type values are available:
-
-      [*] KEYRING_SEARCH_LOOKUP_DIRECT - A direct lookup hashes the type and
-      	  description to narrow down the search to a small number of keys.
-
-      [*] KEYRING_SEARCH_LOOKUP_ITERATE - An iterative lookup walks all the
-      	  keys in the keyring until one is matched.  This must be used for any
-      	  search that's not doing a simple direct match on the key description.
-
-     The method may set cmp to point to a function of its choice that does some
-     other form of match, may set lookup_type to KEYRING_SEARCH_LOOKUP_ITERATE
-     and may attach something to the preparsed pointer for use by (*cmp)().
-     (*cmp)() should return true if a key matches and false otherwise.
-
-     If preparsed is set, it may be necessary to use the match_free() method to
-     clean it up.
-
-     The method should return 0 if successful or a negative error code
-     otherwise.
-
-     It is permitted to sleep in this method, but (*cmp)() may not sleep as
-     locks will be held over it.
-
-     If match_preparse() is not provided, keys of this type will be matched
-     exactly by their description.
-
-
- (*) void (*match_free)(struct key_match_data *match_data);
-
-     This method is optional.  If given, it called to clean up
-     match_data->preparsed after a successful call to match_preparse().
-
-
- (*) void (*revoke)(struct key *key);
-
-     This method is optional.  It is called to discard part of the payload
-     data upon a key being revoked.  The caller will have the key semaphore
-     write-locked.
-
-     It is safe to sleep in this method, though care should be taken to avoid
-     a deadlock against the key semaphore.
-
-
- (*) void (*destroy)(struct key *key);
-
-     This method is optional. It is called to discard the payload data on a key
-     when it is being destroyed.
-
-     This method does not need to lock the key to access the payload; it can
-     consider the key as being inaccessible at this time. Note that the key's
-     type may have been changed before this function is called.
-
-     It is not safe to sleep in this method; the caller may hold spinlocks.
-
-
- (*) void (*describe)(const struct key *key, struct seq_file *p);
-
-     This method is optional. It is called during /proc/keys reading to
-     summarise a key's description and payload in text form.
-
-     This method will be called with the RCU read lock held. rcu_dereference()
-     should be used to read the payload pointer if the payload is to be
-     accessed. key->datalen cannot be trusted to stay consistent with the
-     contents of the payload.
-
-     The description will not change, though the key's state may.
-
-     It is not safe to sleep in this method; the RCU read lock is held by the
-     caller.
-
-
- (*) long (*read)(const struct key *key, char __user *buffer, size_t buflen);
-
-     This method is optional. It is called by KEYCTL_READ to translate the
-     key's payload into something a blob of data for userspace to deal with.
-     Ideally, the blob should be in the same format as that passed in to the
-     instantiate and update methods.
-
-     If successful, the blob size that could be produced should be returned
-     rather than the size copied.
-
-     This method will be called with the key's semaphore read-locked. This will
-     prevent the key's payload changing. It is not necessary to use RCU locking
-     when accessing the key's payload. It is safe to sleep in this method, such
-     as might happen when the userspace buffer is accessed.
-
-
- (*) int (*request_key)(struct key_construction *cons, const char *op,
-			void *aux);
-
-     This method is optional.  If provided, request_key() and friends will
-     invoke this function rather than upcalling to /sbin/request-key to operate
-     upon a key of this type.
-
-     The aux parameter is as passed to request_key_async_with_auxdata() and
-     similar or is NULL otherwise.  Also passed are the construction record for
-     the key to be operated upon and the operation type (currently only
-     "create").
-
-     This method is permitted to return before the upcall is complete, but the
-     following function must be called under all circumstances to complete the
-     instantiation process, whether or not it succeeds, whether or not there's
-     an error:
-
-	void complete_request_key(struct key_construction *cons, int error);
-
-     The error parameter should be 0 on success, -ve on error.  The
-     construction record is destroyed by this action and the authorisation key
-     will be revoked.  If an error is indicated, the key under construction
-     will be negatively instantiated if it wasn't already instantiated.
-
-     If this method returns an error, that error will be returned to the
-     caller of request_key*().  complete_request_key() must be called prior to
-     returning.
-
-     The key under construction and the authorisation key can be found in the
-     key_construction struct pointed to by cons:
-
-     (*) struct key *key;
-
-     	 The key under construction.
-
-     (*) struct key *authkey;
-
-     	 The authorisation key.
-
-
- (*) struct key_restriction *(*lookup_restriction)(const char *params);
-
-     This optional method is used to enable userspace configuration of keyring
-     restrictions. The restriction parameter string (not including the key type
-     name) is passed in, and this method returns a pointer to a key_restriction
-     structure containing the relevant functions and data to evaluate each
-     attempted key link operation. If there is no match, -EINVAL is returned.
-
-
-============================
-REQUEST-KEY CALLBACK SERVICE
-============================
-
-To create a new key, the kernel will attempt to execute the following command
-line:
-
-	/sbin/request-key create <key> <uid> <gid> \
-		<threadring> <processring> <sessionring> <callout_info>
-
-<key> is the key being constructed, and the three keyrings are the process
-keyrings from the process that caused the search to be issued. These are
-included for two reasons:
-
-  (1) There may be an authentication token in one of the keyrings that is
-      required to obtain the key, eg: a Kerberos Ticket-Granting Ticket.
-
-  (2) The new key should probably be cached in one of these rings.
-
-This program should set it UID and GID to those specified before attempting to
-access any more keys. It may then look around for a user specific process to
-hand the request off to (perhaps a path held in placed in another key by, for
-example, the KDE desktop manager).
-
-The program (or whatever it calls) should finish construction of the key by
-calling KEYCTL_INSTANTIATE or KEYCTL_INSTANTIATE_IOV, which also permits it to
-cache the key in one of the keyrings (probably the session ring) before
-returning.  Alternatively, the key can be marked as negative with KEYCTL_NEGATE
-or KEYCTL_REJECT; this also permits the key to be cached in one of the
-keyrings.
-
-If it returns with the key remaining in the unconstructed state, the key will
-be marked as being negative, it will be added to the session keyring, and an
-error will be returned to the key requestor.
-
-Supplementary information may be provided from whoever or whatever invoked this
-service. This will be passed as the <callout_info> parameter. If no such
-information was made available, then "-" will be passed as this parameter
-instead.
-
-
-Similarly, the kernel may attempt to update an expired or a soon to expire key
-by executing:
-
-	/sbin/request-key update <key> <uid> <gid> \
-		<threadring> <processring> <sessionring>
-
-In this case, the program isn't required to actually attach the key to a ring;
-the rings are provided for reference.
-
-
-==================
-GARBAGE COLLECTION
-==================
-
-Dead keys (for which the type has been removed) will be automatically unlinked
-from those keyrings that point to them and deleted as soon as possible by a
-background garbage collector.
-
-Similarly, revoked and expired keys will be garbage collected, but only after a
-certain amount of time has passed.  This time is set as a number of seconds in:
-
-	/proc/sys/kernel/keys/gc_delay
diff --git a/Documentation/security/keys/core.rst b/Documentation/security/keys/core.rst
new file mode 100644
index 000000000000..0d831a7afe4f
--- /dev/null
+++ b/Documentation/security/keys/core.rst
@@ -0,0 +1,1550 @@
+============================
+Kernel Key Retention Service
+============================
+
+This service allows cryptographic keys, authentication tokens, cross-domain
+user mappings, and similar to be cached in the kernel for the use of
+filesystems and other kernel services.
+
+Keyrings are permitted; these are a special type of key that can hold links to
+other keys. Processes each have three standard keyring subscriptions that a
+kernel service can search for relevant keys.
+
+The key service can be configured on by enabling:
+
+	"Security options"/"Enable access key retention support" (CONFIG_KEYS)
+
+This document has the following sections:
+
+	- Key overview
+	- Key service overview
+	- Key access permissions
+	- SELinux support
+	- New procfs files
+	- Userspace system call interface
+	- Kernel services
+	- Notes on accessing payload contents
+	- Defining a key type
+	- Request-key callback service
+	- Garbage collection
+
+
+Key Overview
+============
+
+In this context, keys represent units of cryptographic data, authentication
+tokens, keyrings, etc.. These are represented in the kernel by struct key.
+
+Each key has a number of attributes:
+
+	- A serial number.
+	- A type.
+	- A description (for matching a key in a search).
+	- Access control information.
+	- An expiry time.
+	- A payload.
+	- State.
+
+
+  *  Each key is issued a serial number of type key_serial_t that is unique for
+     the lifetime of that key. All serial numbers are positive non-zero 32-bit
+     integers.
+
+     Userspace programs can use a key's serial numbers as a way to gain access
+     to it, subject to permission checking.
+
+  *  Each key is of a defined "type". Types must be registered inside the
+     kernel by a kernel service (such as a filesystem) before keys of that type
+     can be added or used. Userspace programs cannot define new types directly.
+
+     Key types are represented in the kernel by struct key_type. This defines a
+     number of operations that can be performed on a key of that type.
+
+     Should a type be removed from the system, all the keys of that type will
+     be invalidated.
+
+  *  Each key has a description. This should be a printable string. The key
+     type provides an operation to perform a match between the description on a
+     key and a criterion string.
+
+  *  Each key has an owner user ID, a group ID and a permissions mask. These
+     are used to control what a process may do to a key from userspace, and
+     whether a kernel service will be able to find the key.
+
+  *  Each key can be set to expire at a specific time by the key type's
+     instantiation function. Keys can also be immortal.
+
+  *  Each key can have a payload. This is a quantity of data that represent the
+     actual "key". In the case of a keyring, this is a list of keys to which
+     the keyring links; in the case of a user-defined key, it's an arbitrary
+     blob of data.
+
+     Having a payload is not required; and the payload can, in fact, just be a
+     value stored in the struct key itself.
+
+     When a key is instantiated, the key type's instantiation function is
+     called with a blob of data, and that then creates the key's payload in
+     some way.
+
+     Similarly, when userspace wants to read back the contents of the key, if
+     permitted, another key type operation will be called to convert the key's
+     attached payload back into a blob of data.
+
+  *  Each key can be in one of a number of basic states:
+
+      *  Uninstantiated. The key exists, but does not have any data attached.
+     	 Keys being requested from userspace will be in this state.
+
+      *  Instantiated. This is the normal state. The key is fully formed, and
+	 has data attached.
+
+      *  Negative. This is a relatively short-lived state. The key acts as a
+	 note saying that a previous call out to userspace failed, and acts as
+	 a throttle on key lookups. A negative key can be updated to a normal
+	 state.
+
+      *  Expired. Keys can have lifetimes set. If their lifetime is exceeded,
+	 they traverse to this state. An expired key can be updated back to a
+	 normal state.
+
+      *  Revoked. A key is put in this state by userspace action. It can't be
+	 found or operated upon (apart from by unlinking it).
+
+      *  Dead. The key's type was unregistered, and so the key is now useless.
+
+Keys in the last three states are subject to garbage collection.  See the
+section on "Garbage collection".
+
+
+Key Service Overview
+====================
+
+The key service provides a number of features besides keys:
+
+  *  The key service defines three special key types:
+
+     (+) "keyring"
+
+	 Keyrings are special keys that contain a list of other keys. Keyring
+	 lists can be modified using various system calls. Keyrings should not
+	 be given a payload when created.
+
+     (+) "user"
+
+	 A key of this type has a description and a payload that are arbitrary
+	 blobs of data. These can be created, updated and read by userspace,
+	 and aren't intended for use by kernel services.
+
+     (+) "logon"
+
+	 Like a "user" key, a "logon" key has a payload that is an arbitrary
+	 blob of data. It is intended as a place to store secrets which are
+	 accessible to the kernel but not to userspace programs.
+
+	 The description can be arbitrary, but must be prefixed with a non-zero
+	 length string that describes the key "subclass". The subclass is
+	 separated from the rest of the description by a ':'. "logon" keys can
+	 be created and updated from userspace, but the payload is only
+	 readable from kernel space.
+
+  *  Each process subscribes to three keyrings: a thread-specific keyring, a
+     process-specific keyring, and a session-specific keyring.
+
+     The thread-specific keyring is discarded from the child when any sort of
+     clone, fork, vfork or execve occurs. A new keyring is created only when
+     required.
+
+     The process-specific keyring is replaced with an empty one in the child on
+     clone, fork, vfork unless CLONE_THREAD is supplied, in which case it is
+     shared. execve also discards the process's process keyring and creates a
+     new one.
+
+     The session-specific keyring is persistent across clone, fork, vfork and
+     execve, even when the latter executes a set-UID or set-GID binary. A
+     process can, however, replace its current session keyring with a new one
+     by using PR_JOIN_SESSION_KEYRING. It is permitted to request an anonymous
+     new one, or to attempt to create or join one of a specific name.
+
+     The ownership of the thread keyring changes when the real UID and GID of
+     the thread changes.
+
+  *  Each user ID resident in the system holds two special keyrings: a user
+     specific keyring and a default user session keyring. The default session
+     keyring is initialised with a link to the user-specific keyring.
+
+     When a process changes its real UID, if it used to have no session key, it
+     will be subscribed to the default session key for the new UID.
+
+     If a process attempts to access its session key when it doesn't have one,
+     it will be subscribed to the default for its current UID.
+
+  *  Each user has two quotas against which the keys they own are tracked. One
+     limits the total number of keys and keyrings, the other limits the total
+     amount of description and payload space that can be consumed.
+
+     The user can view information on this and other statistics through procfs
+     files.  The root user may also alter the quota limits through sysctl files
+     (see the section "New procfs files").
+
+     Process-specific and thread-specific keyrings are not counted towards a
+     user's quota.
+
+     If a system call that modifies a key or keyring in some way would put the
+     user over quota, the operation is refused and error EDQUOT is returned.
+
+  *  There's a system call interface by which userspace programs can create and
+     manipulate keys and keyrings.
+
+  *  There's a kernel interface by which services can register types and search
+     for keys.
+
+  *  There's a way for the a search done from the kernel to call back to
+     userspace to request a key that can't be found in a process's keyrings.
+
+  *  An optional filesystem is available through which the key database can be
+     viewed and manipulated.
+
+
+Key Access Permissions
+======================
+
+Keys have an owner user ID, a group access ID, and a permissions mask. The mask
+has up to eight bits each for possessor, user, group and other access. Only
+six of each set of eight bits are defined. These permissions granted are:
+
+  *  View
+
+     This permits a key or keyring's attributes to be viewed - including key
+     type and description.
+
+  *  Read
+
+     This permits a key's payload to be viewed or a keyring's list of linked
+     keys.
+
+  *  Write
+
+     This permits a key's payload to be instantiated or updated, or it allows a
+     link to be added to or removed from a keyring.
+
+  *  Search
+
+     This permits keyrings to be searched and keys to be found. Searches can
+     only recurse into nested keyrings that have search permission set.
+
+  *  Link
+
+     This permits a key or keyring to be linked to. To create a link from a
+     keyring to a key, a process must have Write permission on the keyring and
+     Link permission on the key.
+
+  *  Set Attribute
+
+     This permits a key's UID, GID and permissions mask to be changed.
+
+For changing the ownership, group ID or permissions mask, being the owner of
+the key or having the sysadmin capability is sufficient.
+
+
+SELinux Support
+===============
+
+The security class "key" has been added to SELinux so that mandatory access
+controls can be applied to keys created within various contexts.  This support
+is preliminary, and is likely to change quite significantly in the near future.
+Currently, all of the basic permissions explained above are provided in SELinux
+as well; SELinux is simply invoked after all basic permission checks have been
+performed.
+
+The value of the file /proc/self/attr/keycreate influences the labeling of
+newly-created keys.  If the contents of that file correspond to an SELinux
+security context, then the key will be assigned that context.  Otherwise, the
+key will be assigned the current context of the task that invoked the key
+creation request.  Tasks must be granted explicit permission to assign a
+particular context to newly-created keys, using the "create" permission in the
+key security class.
+
+The default keyrings associated with users will be labeled with the default
+context of the user if and only if the login programs have been instrumented to
+properly initialize keycreate during the login process.  Otherwise, they will
+be labeled with the context of the login program itself.
+
+Note, however, that the default keyrings associated with the root user are
+labeled with the default kernel context, since they are created early in the
+boot process, before root has a chance to log in.
+
+The keyrings associated with new threads are each labeled with the context of
+their associated thread, and both session and process keyrings are handled
+similarly.
+
+
+New ProcFS Files
+================
+
+Two files have been added to procfs by which an administrator can find out
+about the status of the key service:
+
+  *  /proc/keys
+
+     This lists the keys that are currently viewable by the task reading the
+     file, giving information about their type, description and permissions.
+     It is not possible to view the payload of the key this way, though some
+     information about it may be given.
+
+     The only keys included in the list are those that grant View permission to
+     the reading process whether or not it possesses them.  Note that LSM
+     security checks are still performed, and may further filter out keys that
+     the current process is not authorised to view.
+
+     The contents of the file look like this::
+
+	SERIAL   FLAGS  USAGE EXPY PERM     UID   GID   TYPE      DESCRIPTION: SUMMARY
+	00000001 I-----    39 perm 1f3f0000     0     0 keyring   _uid_ses.0: 1/4
+	00000002 I-----     2 perm 1f3f0000     0     0 keyring   _uid.0: empty
+	00000007 I-----     1 perm 1f3f0000     0     0 keyring   _pid.1: empty
+	0000018d I-----     1 perm 1f3f0000     0     0 keyring   _pid.412: empty
+	000004d2 I--Q--     1 perm 1f3f0000    32    -1 keyring   _uid.32: 1/4
+	000004d3 I--Q--     3 perm 1f3f0000    32    -1 keyring   _uid_ses.32: empty
+	00000892 I--QU-     1 perm 1f000000     0     0 user      metal:copper: 0
+	00000893 I--Q-N     1  35s 1f3f0000     0     0 user      metal:silver: 0
+	00000894 I--Q--     1  10h 003f0000     0     0 user      metal:gold: 0
+
+     The flags are::
+
+	I	Instantiated
+	R	Revoked
+	D	Dead
+	Q	Contributes to user's quota
+	U	Under construction by callback to userspace
+	N	Negative key
+
+
+  *  /proc/key-users
+
+     This file lists the tracking data for each user that has at least one key
+     on the system.  Such data includes quota information and statistics::
+
+	[root@andromeda root]# cat /proc/key-users
+	0:     46 45/45 1/100 13/10000
+	29:     2 2/2 2/100 40/10000
+	32:     2 2/2 2/100 40/10000
+	38:     2 2/2 2/100 40/10000
+
+     The format of each line is::
+
+	<UID>:			User ID to which this applies
+	<usage>			Structure refcount
+	<inst>/<keys>		Total number of keys and number instantiated
+	<keys>/<max>		Key count quota
+	<bytes>/<max>		Key size quota
+
+
+Four new sysctl files have been added also for the purpose of controlling the
+quota limits on keys:
+
+  *  /proc/sys/kernel/keys/root_maxkeys
+     /proc/sys/kernel/keys/root_maxbytes
+
+     These files hold the maximum number of keys that root may have and the
+     maximum total number of bytes of data that root may have stored in those
+     keys.
+
+  *  /proc/sys/kernel/keys/maxkeys
+     /proc/sys/kernel/keys/maxbytes
+
+     These files hold the maximum number of keys that each non-root user may
+     have and the maximum total number of bytes of data that each of those
+     users may have stored in their keys.
+
+Root may alter these by writing each new limit as a decimal number string to
+the appropriate file.
+
+
+Userspace System Call Interface
+===============================
+
+Userspace can manipulate keys directly through three new syscalls: add_key,
+request_key and keyctl. The latter provides a number of functions for
+manipulating keys.
+
+When referring to a key directly, userspace programs should use the key's
+serial number (a positive 32-bit integer). However, there are some special
+values available for referring to special keys and keyrings that relate to the
+process making the call::
+
+	CONSTANT			VALUE	KEY REFERENCED
+	==============================	======	===========================
+	KEY_SPEC_THREAD_KEYRING		-1	thread-specific keyring
+	KEY_SPEC_PROCESS_KEYRING	-2	process-specific keyring
+	KEY_SPEC_SESSION_KEYRING	-3	session-specific keyring
+	KEY_SPEC_USER_KEYRING		-4	UID-specific keyring
+	KEY_SPEC_USER_SESSION_KEYRING	-5	UID-session keyring
+	KEY_SPEC_GROUP_KEYRING		-6	GID-specific keyring
+	KEY_SPEC_REQKEY_AUTH_KEY	-7	assumed request_key()
+						  authorisation key
+
+
+The main syscalls are:
+
+  *  Create a new key of given type, description and payload and add it to the
+     nominated keyring::
+
+	key_serial_t add_key(const char *type, const char *desc,
+			     const void *payload, size_t plen,
+			     key_serial_t keyring);
+
+     If a key of the same type and description as that proposed already exists
+     in the keyring, this will try to update it with the given payload, or it
+     will return error EEXIST if that function is not supported by the key
+     type. The process must also have permission to write to the key to be able
+     to update it. The new key will have all user permissions granted and no
+     group or third party permissions.
+
+     Otherwise, this will attempt to create a new key of the specified type and
+     description, and to instantiate it with the supplied payload and attach it
+     to the keyring. In this case, an error will be generated if the process
+     does not have permission to write to the keyring.
+
+     If the key type supports it, if the description is NULL or an empty
+     string, the key type will try and generate a description from the content
+     of the payload.
+
+     The payload is optional, and the pointer can be NULL if not required by
+     the type. The payload is plen in size, and plen can be zero for an empty
+     payload.
+
+     A new keyring can be generated by setting type "keyring", the keyring name
+     as the description (or NULL) and setting the payload to NULL.
+
+     User defined keys can be created by specifying type "user". It is
+     recommended that a user defined key's description by prefixed with a type
+     ID and a colon, such as "krb5tgt:" for a Kerberos 5 ticket granting
+     ticket.
+
+     Any other type must have been registered with the kernel in advance by a
+     kernel service such as a filesystem.
+
+     The ID of the new or updated key is returned if successful.
+
+
+  *  Search the process's keyrings for a key, potentially calling out to
+     userspace to create it::
+
+	key_serial_t request_key(const char *type, const char *description,
+				 const char *callout_info,
+				 key_serial_t dest_keyring);
+
+     This function searches all the process's keyrings in the order thread,
+     process, session for a matching key. This works very much like
+     KEYCTL_SEARCH, including the optional attachment of the discovered key to
+     a keyring.
+
+     If a key cannot be found, and if callout_info is not NULL, then
+     /sbin/request-key will be invoked in an attempt to obtain a key. The
+     callout_info string will be passed as an argument to the program.
+
+     See also Documentation/security/keys-request-key.txt.
+
+
+The keyctl syscall functions are:
+
+  *  Map a special key ID to a real key ID for this process::
+
+	key_serial_t keyctl(KEYCTL_GET_KEYRING_ID, key_serial_t id,
+			    int create);
+
+     The special key specified by "id" is looked up (with the key being created
+     if necessary) and the ID of the key or keyring thus found is returned if
+     it exists.
+
+     If the key does not yet exist, the key will be created if "create" is
+     non-zero; and the error ENOKEY will be returned if "create" is zero.
+
+
+  *  Replace the session keyring this process subscribes to with a new one::
+
+	key_serial_t keyctl(KEYCTL_JOIN_SESSION_KEYRING, const char *name);
+
+     If name is NULL, an anonymous keyring is created attached to the process
+     as its session keyring, displacing the old session keyring.
+
+     If name is not NULL, if a keyring of that name exists, the process
+     attempts to attach it as the session keyring, returning an error if that
+     is not permitted; otherwise a new keyring of that name is created and
+     attached as the session keyring.
+
+     To attach to a named keyring, the keyring must have search permission for
+     the process's ownership.
+
+     The ID of the new session keyring is returned if successful.
+
+
+  *  Update the specified key::
+
+	long keyctl(KEYCTL_UPDATE, key_serial_t key, const void *payload,
+		    size_t plen);
+
+     This will try to update the specified key with the given payload, or it
+     will return error EOPNOTSUPP if that function is not supported by the key
+     type. The process must also have permission to write to the key to be able
+     to update it.
+
+     The payload is of length plen, and may be absent or empty as for
+     add_key().
+
+
+  *  Revoke a key::
+
+	long keyctl(KEYCTL_REVOKE, key_serial_t key);
+
+     This makes a key unavailable for further operations. Further attempts to
+     use the key will be met with error EKEYREVOKED, and the key will no longer
+     be findable.
+
+
+  *  Change the ownership of a key::
+
+	long keyctl(KEYCTL_CHOWN, key_serial_t key, uid_t uid, gid_t gid);
+
+     This function permits a key's owner and group ID to be changed. Either one
+     of uid or gid can be set to -1 to suppress that change.
+
+     Only the superuser can change a key's owner to something other than the
+     key's current owner. Similarly, only the superuser can change a key's
+     group ID to something other than the calling process's group ID or one of
+     its group list members.
+
+
+  *  Change the permissions mask on a key::
+
+	long keyctl(KEYCTL_SETPERM, key_serial_t key, key_perm_t perm);
+
+     This function permits the owner of a key or the superuser to change the
+     permissions mask on a key.
+
+     Only bits the available bits are permitted; if any other bits are set,
+     error EINVAL will be returned.
+
+
+  *  Describe a key::
+
+	long keyctl(KEYCTL_DESCRIBE, key_serial_t key, char *buffer,
+		    size_t buflen);
+
+     This function returns a summary of the key's attributes (but not its
+     payload data) as a string in the buffer provided.
+
+     Unless there's an error, it always returns the amount of data it could
+     produce, even if that's too big for the buffer, but it won't copy more
+     than requested to userspace. If the buffer pointer is NULL then no copy
+     will take place.
+
+     A process must have view permission on the key for this function to be
+     successful.
+
+     If successful, a string is placed in the buffer in the following format::
+
+	<type>;<uid>;<gid>;<perm>;<description>
+
+     Where type and description are strings, uid and gid are decimal, and perm
+     is hexadecimal. A NUL character is included at the end of the string if
+     the buffer is sufficiently big.
+
+     This can be parsed with::
+
+	sscanf(buffer, "%[^;];%d;%d;%o;%s", type, &uid, &gid, &mode, desc);
+
+
+  *  Clear out a keyring::
+
+	long keyctl(KEYCTL_CLEAR, key_serial_t keyring);
+
+     This function clears the list of keys attached to a keyring. The calling
+     process must have write permission on the keyring, and it must be a
+     keyring (or else error ENOTDIR will result).
+
+     This function can also be used to clear special kernel keyrings if they
+     are appropriately marked if the user has CAP_SYS_ADMIN capability.  The
+     DNS resolver cache keyring is an example of this.
+
+
+  *  Link a key into a keyring::
+
+	long keyctl(KEYCTL_LINK, key_serial_t keyring, key_serial_t key);
+
+     This function creates a link from the keyring to the key. The process must
+     have write permission on the keyring and must have link permission on the
+     key.
+
+     Should the keyring not be a keyring, error ENOTDIR will result; and if the
+     keyring is full, error ENFILE will result.
+
+     The link procedure checks the nesting of the keyrings, returning ELOOP if
+     it appears too deep or EDEADLK if the link would introduce a cycle.
+
+     Any links within the keyring to keys that match the new key in terms of
+     type and description will be discarded from the keyring as the new one is
+     added.
+
+
+  *  Unlink a key or keyring from another keyring::
+
+	long keyctl(KEYCTL_UNLINK, key_serial_t keyring, key_serial_t key);
+
+     This function looks through the keyring for the first link to the
+     specified key, and removes it if found. Subsequent links to that key are
+     ignored. The process must have write permission on the keyring.
+
+     If the keyring is not a keyring, error ENOTDIR will result; and if the key
+     is not present, error ENOENT will be the result.
+
+
+  *  Search a keyring tree for a key::
+
+	key_serial_t keyctl(KEYCTL_SEARCH, key_serial_t keyring,
+			    const char *type, const char *description,
+			    key_serial_t dest_keyring);
+
+     This searches the keyring tree headed by the specified keyring until a key
+     is found that matches the type and description criteria. Each keyring is
+     checked for keys before recursion into its children occurs.
+
+     The process must have search permission on the top level keyring, or else
+     error EACCES will result. Only keyrings that the process has search
+     permission on will be recursed into, and only keys and keyrings for which
+     a process has search permission can be matched. If the specified keyring
+     is not a keyring, ENOTDIR will result.
+
+     If the search succeeds, the function will attempt to link the found key
+     into the destination keyring if one is supplied (non-zero ID). All the
+     constraints applicable to KEYCTL_LINK apply in this case too.
+
+     Error ENOKEY, EKEYREVOKED or EKEYEXPIRED will be returned if the search
+     fails. On success, the resulting key ID will be returned.
+
+
+  *  Read the payload data from a key::
+
+	long keyctl(KEYCTL_READ, key_serial_t keyring, char *buffer,
+		    size_t buflen);
+
+     This function attempts to read the payload data from the specified key
+     into the buffer. The process must have read permission on the key to
+     succeed.
+
+     The returned data will be processed for presentation by the key type. For
+     instance, a keyring will return an array of key_serial_t entries
+     representing the IDs of all the keys to which it is subscribed. The user
+     defined key type will return its data as is. If a key type does not
+     implement this function, error EOPNOTSUPP will result.
+
+     As much of the data as can be fitted into the buffer will be copied to
+     userspace if the buffer pointer is not NULL.
+
+     On a successful return, the function will always return the amount of data
+     available rather than the amount copied.
+
+
+  *  Instantiate a partially constructed key::
+
+	long keyctl(KEYCTL_INSTANTIATE, key_serial_t key,
+		    const void *payload, size_t plen,
+		    key_serial_t keyring);
+	long keyctl(KEYCTL_INSTANTIATE_IOV, key_serial_t key,
+		    const struct iovec *payload_iov, unsigned ioc,
+		    key_serial_t keyring);
+
+     If the kernel calls back to userspace to complete the instantiation of a
+     key, userspace should use this call to supply data for the key before the
+     invoked process returns, or else the key will be marked negative
+     automatically.
+
+     The process must have write access on the key to be able to instantiate
+     it, and the key must be uninstantiated.
+
+     If a keyring is specified (non-zero), the key will also be linked into
+     that keyring, however all the constraints applying in KEYCTL_LINK apply in
+     this case too.
+
+     The payload and plen arguments describe the payload data as for add_key().
+
+     The payload_iov and ioc arguments describe the payload data in an iovec
+     array instead of a single buffer.
+
+
+  *  Negatively instantiate a partially constructed key::
+
+	long keyctl(KEYCTL_NEGATE, key_serial_t key,
+		    unsigned timeout, key_serial_t keyring);
+	long keyctl(KEYCTL_REJECT, key_serial_t key,
+		    unsigned timeout, unsigned error, key_serial_t keyring);
+
+     If the kernel calls back to userspace to complete the instantiation of a
+     key, userspace should use this call mark the key as negative before the
+     invoked process returns if it is unable to fulfill the request.
+
+     The process must have write access on the key to be able to instantiate
+     it, and the key must be uninstantiated.
+
+     If a keyring is specified (non-zero), the key will also be linked into
+     that keyring, however all the constraints applying in KEYCTL_LINK apply in
+     this case too.
+
+     If the key is rejected, future searches for it will return the specified
+     error code until the rejected key expires.  Negating the key is the same
+     as rejecting the key with ENOKEY as the error code.
+
+
+  *  Set the default request-key destination keyring::
+
+	long keyctl(KEYCTL_SET_REQKEY_KEYRING, int reqkey_defl);
+
+     This sets the default keyring to which implicitly requested keys will be
+     attached for this thread. reqkey_defl should be one of these constants::
+
+	CONSTANT				VALUE	NEW DEFAULT KEYRING
+	======================================	======	=======================
+	KEY_REQKEY_DEFL_NO_CHANGE		-1	No change
+	KEY_REQKEY_DEFL_DEFAULT			0	Default[1]
+	KEY_REQKEY_DEFL_THREAD_KEYRING		1	Thread keyring
+	KEY_REQKEY_DEFL_PROCESS_KEYRING		2	Process keyring
+	KEY_REQKEY_DEFL_SESSION_KEYRING		3	Session keyring
+	KEY_REQKEY_DEFL_USER_KEYRING		4	User keyring
+	KEY_REQKEY_DEFL_USER_SESSION_KEYRING	5	User session keyring
+	KEY_REQKEY_DEFL_GROUP_KEYRING		6	Group keyring
+
+     The old default will be returned if successful and error EINVAL will be
+     returned if reqkey_defl is not one of the above values.
+
+     The default keyring can be overridden by the keyring indicated to the
+     request_key() system call.
+
+     Note that this setting is inherited across fork/exec.
+
+     [1] The default is: the thread keyring if there is one, otherwise
+     the process keyring if there is one, otherwise the session keyring if
+     there is one, otherwise the user default session keyring.
+
+
+  *  Set the timeout on a key::
+
+	long keyctl(KEYCTL_SET_TIMEOUT, key_serial_t key, unsigned timeout);
+
+     This sets or clears the timeout on a key. The timeout can be 0 to clear
+     the timeout or a number of seconds to set the expiry time that far into
+     the future.
+
+     The process must have attribute modification access on a key to set its
+     timeout. Timeouts may not be set with this function on negative, revoked
+     or expired keys.
+
+
+  *  Assume the authority granted to instantiate a key::
+
+	long keyctl(KEYCTL_ASSUME_AUTHORITY, key_serial_t key);
+
+     This assumes or divests the authority required to instantiate the
+     specified key. Authority can only be assumed if the thread has the
+     authorisation key associated with the specified key in its keyrings
+     somewhere.
+
+     Once authority is assumed, searches for keys will also search the
+     requester's keyrings using the requester's security label, UID, GID and
+     groups.
+
+     If the requested authority is unavailable, error EPERM will be returned,
+     likewise if the authority has been revoked because the target key is
+     already instantiated.
+
+     If the specified key is 0, then any assumed authority will be divested.
+
+     The assumed authoritative key is inherited across fork and exec.
+
+
+  *  Get the LSM security context attached to a key::
+
+	long keyctl(KEYCTL_GET_SECURITY, key_serial_t key, char *buffer,
+		    size_t buflen)
+
+     This function returns a string that represents the LSM security context
+     attached to a key in the buffer provided.
+
+     Unless there's an error, it always returns the amount of data it could
+     produce, even if that's too big for the buffer, but it won't copy more
+     than requested to userspace. If the buffer pointer is NULL then no copy
+     will take place.
+
+     A NUL character is included at the end of the string if the buffer is
+     sufficiently big.  This is included in the returned count.  If no LSM is
+     in force then an empty string will be returned.
+
+     A process must have view permission on the key for this function to be
+     successful.
+
+
+  *  Install the calling process's session keyring on its parent::
+
+	long keyctl(KEYCTL_SESSION_TO_PARENT);
+
+     This functions attempts to install the calling process's session keyring
+     on to the calling process's parent, replacing the parent's current session
+     keyring.
+
+     The calling process must have the same ownership as its parent, the
+     keyring must have the same ownership as the calling process, the calling
+     process must have LINK permission on the keyring and the active LSM module
+     mustn't deny permission, otherwise error EPERM will be returned.
+
+     Error ENOMEM will be returned if there was insufficient memory to complete
+     the operation, otherwise 0 will be returned to indicate success.
+
+     The keyring will be replaced next time the parent process leaves the
+     kernel and resumes executing userspace.
+
+
+  *  Invalidate a key::
+
+	long keyctl(KEYCTL_INVALIDATE, key_serial_t key);
+
+     This function marks a key as being invalidated and then wakes up the
+     garbage collector.  The garbage collector immediately removes invalidated
+     keys from all keyrings and deletes the key when its reference count
+     reaches zero.
+
+     Keys that are marked invalidated become invisible to normal key operations
+     immediately, though they are still visible in /proc/keys until deleted
+     (they're marked with an 'i' flag).
+
+     A process must have search permission on the key for this function to be
+     successful.
+
+  *  Compute a Diffie-Hellman shared secret or public key::
+
+	long keyctl(KEYCTL_DH_COMPUTE, struct keyctl_dh_params *params,
+		    char *buffer, size_t buflen, struct keyctl_kdf_params *kdf);
+
+     The params struct contains serial numbers for three keys::
+
+	 - The prime, p, known to both parties
+	 - The local private key
+	 - The base integer, which is either a shared generator or the
+	   remote public key
+
+     The value computed is::
+
+	result = base ^ private (mod prime)
+
+     If the base is the shared generator, the result is the local
+     public key.  If the base is the remote public key, the result is
+     the shared secret.
+
+     If the parameter kdf is NULL, the following applies:
+
+	 - The buffer length must be at least the length of the prime, or zero.
+
+	 - If the buffer length is nonzero, the length of the result is
+	   returned when it is successfully calculated and copied in to the
+	   buffer. When the buffer length is zero, the minimum required
+	   buffer length is returned.
+
+     The kdf parameter allows the caller to apply a key derivation function
+     (KDF) on the Diffie-Hellman computation where only the result
+     of the KDF is returned to the caller. The KDF is characterized with
+     struct keyctl_kdf_params as follows:
+
+	 - ``char *hashname`` specifies the NUL terminated string identifying
+	   the hash used from the kernel crypto API and applied for the KDF
+	   operation. The KDF implemenation complies with SP800-56A as well
+	   as with SP800-108 (the counter KDF).
+
+	 - ``char *otherinfo`` specifies the OtherInfo data as documented in
+	   SP800-56A section 5.8.1.2. The length of the buffer is given with
+	   otherinfolen. The format of OtherInfo is defined by the caller.
+	   The otherinfo pointer may be NULL if no OtherInfo shall be used.
+
+     This function will return error EOPNOTSUPP if the key type is not
+     supported, error ENOKEY if the key could not be found, or error
+     EACCES if the key is not readable by the caller. In addition, the
+     function will return EMSGSIZE when the parameter kdf is non-NULL
+     and either the buffer length or the OtherInfo length exceeds the
+     allowed length.
+
+  *  Restrict keyring linkage::
+
+	long keyctl(KEYCTL_RESTRICT_KEYRING, key_serial_t keyring,
+		    const char *type, const char *restriction);
+
+     An existing keyring can restrict linkage of additional keys by evaluating
+     the contents of the key according to a restriction scheme.
+
+     "keyring" is the key ID for an existing keyring to apply a restriction
+     to. It may be empty or may already have keys linked. Existing linked keys
+     will remain in the keyring even if the new restriction would reject them.
+
+     "type" is a registered key type.
+
+     "restriction" is a string describing how key linkage is to be restricted.
+     The format varies depending on the key type, and the string is passed to
+     the lookup_restriction() function for the requested type.  It may specify
+     a method and relevant data for the restriction such as signature
+     verification or constraints on key payload. If the requested key type is
+     later unregistered, no keys may be added to the keyring after the key type
+     is removed.
+
+     To apply a keyring restriction the process must have Set Attribute
+     permission and the keyring must not be previously restricted.
+
+Kernel Services
+===============
+
+The kernel services for key management are fairly simple to deal with. They can
+be broken down into two areas: keys and key types.
+
+Dealing with keys is fairly straightforward. Firstly, the kernel service
+registers its type, then it searches for a key of that type. It should retain
+the key as long as it has need of it, and then it should release it. For a
+filesystem or device file, a search would probably be performed during the open
+call, and the key released upon close. How to deal with conflicting keys due to
+two different users opening the same file is left to the filesystem author to
+solve.
+
+To access the key manager, the following header must be #included::
+
+	<linux/key.h>
+
+Specific key types should have a header file under include/keys/ that should be
+used to access that type.  For keys of type "user", for example, that would be::
+
+	<keys/user-type.h>
+
+Note that there are two different types of pointers to keys that may be
+encountered:
+
+  *  struct key *
+
+     This simply points to the key structure itself. Key structures will be at
+     least four-byte aligned.
+
+  *  key_ref_t
+
+     This is equivalent to a ``struct key *``, but the least significant bit is set
+     if the caller "possesses" the key. By "possession" it is meant that the
+     calling processes has a searchable link to the key from one of its
+     keyrings. There are three functions for dealing with these::
+
+	key_ref_t make_key_ref(const struct key *key, bool possession);
+
+	struct key *key_ref_to_ptr(const key_ref_t key_ref);
+
+	bool is_key_possessed(const key_ref_t key_ref);
+
+     The first function constructs a key reference from a key pointer and
+     possession information (which must be true or false).
+
+     The second function retrieves the key pointer from a reference and the
+     third retrieves the possession flag.
+
+When accessing a key's payload contents, certain precautions must be taken to
+prevent access vs modification races. See the section "Notes on accessing
+payload contents" for more information.
+
+ *  To search for a key, call::
+
+	struct key *request_key(const struct key_type *type,
+				const char *description,
+				const char *callout_info);
+
+    This is used to request a key or keyring with a description that matches
+    the description specified according to the key type's match_preparse()
+    method. This permits approximate matching to occur. If callout_string is
+    not NULL, then /sbin/request-key will be invoked in an attempt to obtain
+    the key from userspace. In that case, callout_string will be passed as an
+    argument to the program.
+
+    Should the function fail error ENOKEY, EKEYEXPIRED or EKEYREVOKED will be
+    returned.
+
+    If successful, the key will have been attached to the default keyring for
+    implicitly obtained request-key keys, as set by KEYCTL_SET_REQKEY_KEYRING.
+
+    See also Documentation/security/keys-request-key.txt.
+
+
+ *  To search for a key, passing auxiliary data to the upcaller, call::
+
+	struct key *request_key_with_auxdata(const struct key_type *type,
+					     const char *description,
+					     const void *callout_info,
+					     size_t callout_len,
+					     void *aux);
+
+    This is identical to request_key(), except that the auxiliary data is
+    passed to the key_type->request_key() op if it exists, and the callout_info
+    is a blob of length callout_len, if given (the length may be 0).
+
+
+ *  A key can be requested asynchronously by calling one of::
+
+	struct key *request_key_async(const struct key_type *type,
+				      const char *description,
+				      const void *callout_info,
+				      size_t callout_len);
+
+    or::
+
+	struct key *request_key_async_with_auxdata(const struct key_type *type,
+						   const char *description,
+						   const char *callout_info,
+					     	   size_t callout_len,
+					     	   void *aux);
+
+    which are asynchronous equivalents of request_key() and
+    request_key_with_auxdata() respectively.
+
+    These two functions return with the key potentially still under
+    construction.  To wait for construction completion, the following should be
+    called::
+
+	int wait_for_key_construction(struct key *key, bool intr);
+
+    The function will wait for the key to finish being constructed and then
+    invokes key_validate() to return an appropriate value to indicate the state
+    of the key (0 indicates the key is usable).
+
+    If intr is true, then the wait can be interrupted by a signal, in which
+    case error ERESTARTSYS will be returned.
+
+
+ *  When it is no longer required, the key should be released using::
+
+	void key_put(struct key *key);
+
+    Or::
+
+	void key_ref_put(key_ref_t key_ref);
+
+    These can be called from interrupt context. If CONFIG_KEYS is not set then
+    the argument will not be parsed.
+
+
+ *  Extra references can be made to a key by calling one of the following
+    functions::
+
+	struct key *__key_get(struct key *key);
+	struct key *key_get(struct key *key);
+
+    Keys so references will need to be disposed of by calling key_put() when
+    they've been finished with.  The key pointer passed in will be returned.
+
+    In the case of key_get(), if the pointer is NULL or CONFIG_KEYS is not set
+    then the key will not be dereferenced and no increment will take place.
+
+
+ *  A key's serial number can be obtained by calling::
+
+	key_serial_t key_serial(struct key *key);
+
+    If key is NULL or if CONFIG_KEYS is not set then 0 will be returned (in the
+    latter case without parsing the argument).
+
+
+ *  If a keyring was found in the search, this can be further searched by::
+
+	key_ref_t keyring_search(key_ref_t keyring_ref,
+				 const struct key_type *type,
+				 const char *description)
+
+    This searches the keyring tree specified for a matching key. Error ENOKEY
+    is returned upon failure (use IS_ERR/PTR_ERR to determine). If successful,
+    the returned key will need to be released.
+
+    The possession attribute from the keyring reference is used to control
+    access through the permissions mask and is propagated to the returned key
+    reference pointer if successful.
+
+
+ *  A keyring can be created by::
+
+	struct key *keyring_alloc(const char *description, uid_t uid, gid_t gid,
+				  const struct cred *cred,
+				  key_perm_t perm,
+				  struct key_restriction *restrict_link,
+				  unsigned long flags,
+				  struct key *dest);
+
+    This creates a keyring with the given attributes and returns it.  If dest
+    is not NULL, the new keyring will be linked into the keyring to which it
+    points.  No permission checks are made upon the destination keyring.
+
+    Error EDQUOT can be returned if the keyring would overload the quota (pass
+    KEY_ALLOC_NOT_IN_QUOTA in flags if the keyring shouldn't be accounted
+    towards the user's quota).  Error ENOMEM can also be returned.
+
+    If restrict_link is not NULL, it should point to a structure that contains
+    the function that will be called each time an attempt is made to link a
+    key into the new keyring.  The structure may also contain a key pointer
+    and an associated key type.  The function is called to check whether a key
+    may be added into the keyring or not.  The key type is used by the garbage
+    collector to clean up function or data pointers in this structure if the
+    given key type is unregistered.  Callers of key_create_or_update() within
+    the kernel can pass KEY_ALLOC_BYPASS_RESTRICTION to suppress the check.
+    An example of using this is to manage rings of cryptographic keys that are
+    set up when the kernel boots where userspace is also permitted to add keys
+    - provided they can be verified by a key the kernel already has.
+
+    When called, the restriction function will be passed the keyring being
+    added to, the key type, the payload of the key being added, and data to be
+    used in the restriction check.  Note that when a new key is being created,
+    this is called between payload preparsing and actual key creation.  The
+    function should return 0 to allow the link or an error to reject it.
+
+    A convenience function, restrict_link_reject, exists to always return
+    -EPERM to in this case.
+
+
+ *  To check the validity of a key, this function can be called::
+
+	int validate_key(struct key *key);
+
+    This checks that the key in question hasn't expired or and hasn't been
+    revoked. Should the key be invalid, error EKEYEXPIRED or EKEYREVOKED will
+    be returned. If the key is NULL or if CONFIG_KEYS is not set then 0 will be
+    returned (in the latter case without parsing the argument).
+
+
+ *  To register a key type, the following function should be called::
+
+	int register_key_type(struct key_type *type);
+
+    This will return error EEXIST if a type of the same name is already
+    present.
+
+
+ *  To unregister a key type, call::
+
+	void unregister_key_type(struct key_type *type);
+
+
+Under some circumstances, it may be desirable to deal with a bundle of keys.
+The facility provides access to the keyring type for managing such a bundle::
+
+	struct key_type key_type_keyring;
+
+This can be used with a function such as request_key() to find a specific
+keyring in a process's keyrings.  A keyring thus found can then be searched
+with keyring_search().  Note that it is not possible to use request_key() to
+search a specific keyring, so using keyrings in this way is of limited utility.
+
+
+Notes On Accessing Payload Contents
+===================================
+
+The simplest payload is just data stored in key->payload directly.  In this
+case, there's no need to indulge in RCU or locking when accessing the payload.
+
+More complex payload contents must be allocated and pointers to them set in the
+key->payload.data[] array.  One of the following ways must be selected to
+access the data:
+
+  1) Unmodifiable key type.
+
+     If the key type does not have a modify method, then the key's payload can
+     be accessed without any form of locking, provided that it's known to be
+     instantiated (uninstantiated keys cannot be "found").
+
+  2) The key's semaphore.
+
+     The semaphore could be used to govern access to the payload and to control
+     the payload pointer. It must be write-locked for modifications and would
+     have to be read-locked for general access. The disadvantage of doing this
+     is that the accessor may be required to sleep.
+
+  3) RCU.
+
+     RCU must be used when the semaphore isn't already held; if the semaphore
+     is held then the contents can't change under you unexpectedly as the
+     semaphore must still be used to serialise modifications to the key. The
+     key management code takes care of this for the key type.
+
+     However, this means using::
+
+	rcu_read_lock() ... rcu_dereference() ... rcu_read_unlock()
+
+     to read the pointer, and::
+
+	rcu_dereference() ... rcu_assign_pointer() ... call_rcu()
+
+     to set the pointer and dispose of the old contents after a grace period.
+     Note that only the key type should ever modify a key's payload.
+
+     Furthermore, an RCU controlled payload must hold a struct rcu_head for the
+     use of call_rcu() and, if the payload is of variable size, the length of
+     the payload. key->datalen cannot be relied upon to be consistent with the
+     payload just dereferenced if the key's semaphore is not held.
+
+     Note that key->payload.data[0] has a shadow that is marked for __rcu
+     usage.  This is called key->payload.rcu_data0.  The following accessors
+     wrap the RCU calls to this element:
+
+     a) Set or change the first payload pointer::
+
+		rcu_assign_keypointer(struct key *key, void *data);
+
+     b) Read the first payload pointer with the key semaphore held::
+
+		[const] void *dereference_key_locked([const] struct key *key);
+
+	 Note that the return value will inherit its constness from the key
+	 parameter.  Static analysis will give an error if it things the lock
+	 isn't held.
+
+     c) Read the first payload pointer with the RCU read lock held::
+
+		const void *dereference_key_rcu(const struct key *key);
+
+
+Defining a Key Type
+===================
+
+A kernel service may want to define its own key type. For instance, an AFS
+filesystem might want to define a Kerberos 5 ticket key type. To do this, it
+author fills in a key_type struct and registers it with the system.
+
+Source files that implement key types should include the following header file::
+
+	<linux/key-type.h>
+
+The structure has a number of fields, some of which are mandatory:
+
+  *  ``const char *name``
+
+     The name of the key type. This is used to translate a key type name
+     supplied by userspace into a pointer to the structure.
+
+
+  *  ``size_t def_datalen``
+
+     This is optional - it supplies the default payload data length as
+     contributed to the quota. If the key type's payload is always or almost
+     always the same size, then this is a more efficient way to do things.
+
+     The data length (and quota) on a particular key can always be changed
+     during instantiation or update by calling::
+
+	int key_payload_reserve(struct key *key, size_t datalen);
+
+     With the revised data length. Error EDQUOT will be returned if this is not
+     viable.
+
+
+  *  ``int (*vet_description)(const char *description);``
+
+     This optional method is called to vet a key description.  If the key type
+     doesn't approve of the key description, it may return an error, otherwise
+     it should return 0.
+
+
+  *  ``int (*preparse)(struct key_preparsed_payload *prep);``
+
+     This optional method permits the key type to attempt to parse payload
+     before a key is created (add key) or the key semaphore is taken (update or
+     instantiate key).  The structure pointed to by prep looks like::
+
+	struct key_preparsed_payload {
+		char		*description;
+		union key_payload payload;
+		const void	*data;
+		size_t		datalen;
+		size_t		quotalen;
+		time_t		expiry;
+	};
+
+     Before calling the method, the caller will fill in data and datalen with
+     the payload blob parameters; quotalen will be filled in with the default
+     quota size from the key type; expiry will be set to TIME_T_MAX and the
+     rest will be cleared.
+
+     If a description can be proposed from the payload contents, that should be
+     attached as a string to the description field.  This will be used for the
+     key description if the caller of add_key() passes NULL or "".
+
+     The method can attach anything it likes to payload.  This is merely passed
+     along to the instantiate() or update() operations.  If set, the expiry
+     time will be applied to the key if it is instantiated from this data.
+
+     The method should return 0 if successful or a negative error code
+     otherwise.
+
+
+  *  ``void (*free_preparse)(struct key_preparsed_payload *prep);``
+
+     This method is only required if the preparse() method is provided,
+     otherwise it is unused.  It cleans up anything attached to the description
+     and payload fields of the key_preparsed_payload struct as filled in by the
+     preparse() method.  It will always be called after preparse() returns
+     successfully, even if instantiate() or update() succeed.
+
+
+  *  ``int (*instantiate)(struct key *key, struct key_preparsed_payload *prep);``
+
+     This method is called to attach a payload to a key during construction.
+     The payload attached need not bear any relation to the data passed to this
+     function.
+
+     The prep->data and prep->datalen fields will define the original payload
+     blob.  If preparse() was supplied then other fields may be filled in also.
+
+     If the amount of data attached to the key differs from the size in
+     keytype->def_datalen, then key_payload_reserve() should be called.
+
+     This method does not have to lock the key in order to attach a payload.
+     The fact that KEY_FLAG_INSTANTIATED is not set in key->flags prevents
+     anything else from gaining access to the key.
+
+     It is safe to sleep in this method.
+
+     generic_key_instantiate() is provided to simply copy the data from
+     prep->payload.data[] to key->payload.data[], with RCU-safe assignment on
+     the first element.  It will then clear prep->payload.data[] so that the
+     free_preparse method doesn't release the data.
+
+
+  *  ``int (*update)(struct key *key, const void *data, size_t datalen);``
+
+     If this type of key can be updated, then this method should be provided.
+     It is called to update a key's payload from the blob of data provided.
+
+     The prep->data and prep->datalen fields will define the original payload
+     blob.  If preparse() was supplied then other fields may be filled in also.
+
+     key_payload_reserve() should be called if the data length might change
+     before any changes are actually made. Note that if this succeeds, the type
+     is committed to changing the key because it's already been altered, so all
+     memory allocation must be done first.
+
+     The key will have its semaphore write-locked before this method is called,
+     but this only deters other writers; any changes to the key's payload must
+     be made under RCU conditions, and call_rcu() must be used to dispose of
+     the old payload.
+
+     key_payload_reserve() should be called before the changes are made, but
+     after all allocations and other potentially failing function calls are
+     made.
+
+     It is safe to sleep in this method.
+
+
+  *  ``int (*match_preparse)(struct key_match_data *match_data);``
+
+     This method is optional.  It is called when a key search is about to be
+     performed.  It is given the following structure::
+
+	struct key_match_data {
+		bool (*cmp)(const struct key *key,
+			    const struct key_match_data *match_data);
+		const void	*raw_data;
+		void		*preparsed;
+		unsigned	lookup_type;
+	};
+
+     On entry, raw_data will be pointing to the criteria to be used in matching
+     a key by the caller and should not be modified.  ``(*cmp)()`` will be pointing
+     to the default matcher function (which does an exact description match
+     against raw_data) and lookup_type will be set to indicate a direct lookup.
+
+     The following lookup_type values are available:
+
+       *  KEYRING_SEARCH_LOOKUP_DIRECT - A direct lookup hashes the type and
+      	  description to narrow down the search to a small number of keys.
+
+       *  KEYRING_SEARCH_LOOKUP_ITERATE - An iterative lookup walks all the
+      	  keys in the keyring until one is matched.  This must be used for any
+      	  search that's not doing a simple direct match on the key description.
+
+     The method may set cmp to point to a function of its choice that does some
+     other form of match, may set lookup_type to KEYRING_SEARCH_LOOKUP_ITERATE
+     and may attach something to the preparsed pointer for use by ``(*cmp)()``.
+     ``(*cmp)()`` should return true if a key matches and false otherwise.
+
+     If preparsed is set, it may be necessary to use the match_free() method to
+     clean it up.
+
+     The method should return 0 if successful or a negative error code
+     otherwise.
+
+     It is permitted to sleep in this method, but ``(*cmp)()`` may not sleep as
+     locks will be held over it.
+
+     If match_preparse() is not provided, keys of this type will be matched
+     exactly by their description.
+
+
+  *  ``void (*match_free)(struct key_match_data *match_data);``
+
+     This method is optional.  If given, it called to clean up
+     match_data->preparsed after a successful call to match_preparse().
+
+
+  *  ``void (*revoke)(struct key *key);``
+
+     This method is optional.  It is called to discard part of the payload
+     data upon a key being revoked.  The caller will have the key semaphore
+     write-locked.
+
+     It is safe to sleep in this method, though care should be taken to avoid
+     a deadlock against the key semaphore.
+
+
+  *  ``void (*destroy)(struct key *key);``
+
+     This method is optional. It is called to discard the payload data on a key
+     when it is being destroyed.
+
+     This method does not need to lock the key to access the payload; it can
+     consider the key as being inaccessible at this time. Note that the key's
+     type may have been changed before this function is called.
+
+     It is not safe to sleep in this method; the caller may hold spinlocks.
+
+
+  *  ``void (*describe)(const struct key *key, struct seq_file *p);``
+
+     This method is optional. It is called during /proc/keys reading to
+     summarise a key's description and payload in text form.
+
+     This method will be called with the RCU read lock held. rcu_dereference()
+     should be used to read the payload pointer if the payload is to be
+     accessed. key->datalen cannot be trusted to stay consistent with the
+     contents of the payload.
+
+     The description will not change, though the key's state may.
+
+     It is not safe to sleep in this method; the RCU read lock is held by the
+     caller.
+
+
+  *  ``long (*read)(const struct key *key, char __user *buffer, size_t buflen);``
+
+     This method is optional. It is called by KEYCTL_READ to translate the
+     key's payload into something a blob of data for userspace to deal with.
+     Ideally, the blob should be in the same format as that passed in to the
+     instantiate and update methods.
+
+     If successful, the blob size that could be produced should be returned
+     rather than the size copied.
+
+     This method will be called with the key's semaphore read-locked. This will
+     prevent the key's payload changing. It is not necessary to use RCU locking
+     when accessing the key's payload. It is safe to sleep in this method, such
+     as might happen when the userspace buffer is accessed.
+
+
+  *  ``int (*request_key)(struct key_construction *cons, const char *op, void *aux);``
+
+     This method is optional.  If provided, request_key() and friends will
+     invoke this function rather than upcalling to /sbin/request-key to operate
+     upon a key of this type.
+
+     The aux parameter is as passed to request_key_async_with_auxdata() and
+     similar or is NULL otherwise.  Also passed are the construction record for
+     the key to be operated upon and the operation type (currently only
+     "create").
+
+     This method is permitted to return before the upcall is complete, but the
+     following function must be called under all circumstances to complete the
+     instantiation process, whether or not it succeeds, whether or not there's
+     an error::
+
+	void complete_request_key(struct key_construction *cons, int error);
+
+     The error parameter should be 0 on success, -ve on error.  The
+     construction record is destroyed by this action and the authorisation key
+     will be revoked.  If an error is indicated, the key under construction
+     will be negatively instantiated if it wasn't already instantiated.
+
+     If this method returns an error, that error will be returned to the
+     caller of request_key*().  complete_request_key() must be called prior to
+     returning.
+
+     The key under construction and the authorisation key can be found in the
+     key_construction struct pointed to by cons:
+
+      *  ``struct key *key;``
+
+     	 The key under construction.
+
+      *  ``struct key *authkey;``
+
+     	 The authorisation key.
+
+
+  *  ``struct key_restriction *(*lookup_restriction)(const char *params);``
+
+     This optional method is used to enable userspace configuration of keyring
+     restrictions. The restriction parameter string (not including the key type
+     name) is passed in, and this method returns a pointer to a key_restriction
+     structure containing the relevant functions and data to evaluate each
+     attempted key link operation. If there is no match, -EINVAL is returned.
+
+
+Request-Key Callback Service
+============================
+
+To create a new key, the kernel will attempt to execute the following command
+line::
+
+	/sbin/request-key create <key> <uid> <gid> \
+		<threadring> <processring> <sessionring> <callout_info>
+
+<key> is the key being constructed, and the three keyrings are the process
+keyrings from the process that caused the search to be issued. These are
+included for two reasons:
+
+   1  There may be an authentication token in one of the keyrings that is
+      required to obtain the key, eg: a Kerberos Ticket-Granting Ticket.
+
+   2  The new key should probably be cached in one of these rings.
+
+This program should set it UID and GID to those specified before attempting to
+access any more keys. It may then look around for a user specific process to
+hand the request off to (perhaps a path held in placed in another key by, for
+example, the KDE desktop manager).
+
+The program (or whatever it calls) should finish construction of the key by
+calling KEYCTL_INSTANTIATE or KEYCTL_INSTANTIATE_IOV, which also permits it to
+cache the key in one of the keyrings (probably the session ring) before
+returning.  Alternatively, the key can be marked as negative with KEYCTL_NEGATE
+or KEYCTL_REJECT; this also permits the key to be cached in one of the
+keyrings.
+
+If it returns with the key remaining in the unconstructed state, the key will
+be marked as being negative, it will be added to the session keyring, and an
+error will be returned to the key requestor.
+
+Supplementary information may be provided from whoever or whatever invoked this
+service. This will be passed as the <callout_info> parameter. If no such
+information was made available, then "-" will be passed as this parameter
+instead.
+
+
+Similarly, the kernel may attempt to update an expired or a soon to expire key
+by executing::
+
+	/sbin/request-key update <key> <uid> <gid> \
+		<threadring> <processring> <sessionring>
+
+In this case, the program isn't required to actually attach the key to a ring;
+the rings are provided for reference.
+
+
+Garbage Collection
+==================
+
+Dead keys (for which the type has been removed) will be automatically unlinked
+from those keyrings that point to them and deleted as soon as possible by a
+background garbage collector.
+
+Similarly, revoked and expired keys will be garbage collected, but only after a
+certain amount of time has passed.  This time is set as a number of seconds in::
+
+	/proc/sys/kernel/keys/gc_delay
diff --git a/Documentation/security/keys/index.rst b/Documentation/security/keys/index.rst
new file mode 100644
index 000000000000..ddfe7e4726e6
--- /dev/null
+++ b/Documentation/security/keys/index.rst
@@ -0,0 +1,8 @@
+===========
+Kernel Keys
+===========
+
+.. toctree::
+   :maxdepth: 1
+
+   core
diff --git a/MAINTAINERS b/MAINTAINERS
index ce6744ee83e2..9f3b8b0cae5a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7342,7 +7342,7 @@ KEYS/KEYRINGS:
 M:	David Howells <dhowells@redhat.com>
 L:	keyrings@vger.kernel.org
 S:	Maintained
-F:	Documentation/security/keys.txt
+F:	Documentation/security/keys/core.rst
 F:	include/linux/key.h
 F:	include/linux/key-type.h
 F:	include/linux/keyctl.h
diff --git a/include/linux/key.h b/include/linux/key.h
index 0c9b93b0d1f7..24dfe6c1f8cb 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -9,7 +9,7 @@
  * 2 of the License, or (at your option) any later version.
  *
  *
- * See Documentation/security/keys.txt for information on keys/keyrings.
+ * See Documentation/security/keys/core.rst for information on keys/keyrings.
  */
 
 #ifndef _LINUX_KEY_H
-- 
cgit v1.2.3


From 7b6859fbdcc4a590c8ef03bcc00d770b42d41c42 Mon Sep 17 00:00:00 2001
From: "Mintz, Yuval" <Yuval.Mintz@cavium.com>
Date: Thu, 18 May 2017 19:41:04 +0300
Subject: qed: Utilize FW 8.20.0.0

This pushes qed [and as result, all qed* drivers] into using 8.20.0.0
firmware. The changes are mostly contained in qed with minor changes
to qedi due to some HSI changes.

Content-wise, the firmware contains fixes to various issues exposed
since the release of the previous firmware, including:
 - Corrects iSCSI fast retransmit when data digest is enabled.
 - Stop draining packets when receiving several consecutive PFCs.
 - Prevent possible assertion when consecutively opening/closing
   many connections.
 - Prevent possible assertion due to too long BDQ fetch time.

In addition, the new firmware would allow us to later add iWARP support
in qed and qedr.

Changes from previous version
-----------------------------
 - V2: Fix warning in qed_debug.c

Signed-off-by: Chad Dupuis <Chad.Dupuis@cavium.com>
Signed-off-by: Ram Amrani <Ram.Amrani@cavium.com>
Signed-off-by: Tomer Tayar <Tomer.Tayar@cavium.com>
Signed-off-by: Manish Rangankar <Manish.Rangankar@cavium.com>
Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed.h              |    2 +-
 drivers/net/ethernet/qlogic/qed/qed_dcbx.c         |   11 +-
 drivers/net/ethernet/qlogic/qed/qed_debug.c        | 3332 +++++++++--------
 drivers/net/ethernet/qlogic/qed/qed_debug.h        |    3 +
 drivers/net/ethernet/qlogic/qed/qed_hsi.h          | 3739 +++++++++++++-------
 .../net/ethernet/qlogic/qed/qed_init_fw_funcs.c    |  267 +-
 drivers/net/ethernet/qlogic/qed/qed_iscsi.c        |    3 -
 drivers/net/ethernet/qlogic/qed/qed_reg_addr.h     |  186 +-
 drivers/net/ethernet/qlogic/qed/qed_roce.c         |    4 -
 drivers/net/ethernet/qlogic/qed/qed_sp_commands.c  |   23 +-
 drivers/scsi/qedi/qedi_fw.c                        |   20 +-
 drivers/scsi/qedi/qedi_fw_api.c                    |    3 +-
 drivers/scsi/qedi/qedi_iscsi.c                     |    3 -
 include/linux/qed/common_hsi.h                     |  209 +-
 include/linux/qed/eth_common.h                     |    3 +-
 include/linux/qed/fcoe_common.h                    |    1 -
 include/linux/qed/iscsi_common.h                   |   91 +-
 include/linux/qed/rdma_common.h                    |    2 +-
 include/linux/qed/roce_common.h                    |    2 +
 include/linux/qed/tcp_common.h                     |    5 +-
 20 files changed, 4841 insertions(+), 3068 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index 2ab1aab7c3fe..162cd7ff9a69 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -54,7 +54,7 @@ extern const struct qed_common_ops qed_common_ops_pass;
 
 #define QED_MAJOR_VERSION               8
 #define QED_MINOR_VERSION               10
-#define QED_REVISION_VERSION            10
+#define QED_REVISION_VERSION            11
 #define QED_ENGINEERING_VERSION 21
 
 #define QED_VERSION						 \
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
index d883ad5bec6d..b7ca0e2181c4 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
@@ -944,17 +944,18 @@ void qed_dcbx_set_pf_update_params(struct qed_dcbx_results *p_src,
 	p_dest->pf_id = p_src->pf_id;
 
 	update_flag = p_src->arr[DCBX_PROTOCOL_FCOE].update;
-	p_dest->update_fcoe_dcb_data_flag = update_flag;
+	p_dest->update_fcoe_dcb_data_mode = update_flag;
 
 	update_flag = p_src->arr[DCBX_PROTOCOL_ROCE].update;
-	p_dest->update_roce_dcb_data_flag = update_flag;
+	p_dest->update_roce_dcb_data_mode = update_flag;
+
 	update_flag = p_src->arr[DCBX_PROTOCOL_ROCE_V2].update;
-	p_dest->update_roce_dcb_data_flag = update_flag;
+	p_dest->update_rroce_dcb_data_mode = update_flag;
 
 	update_flag = p_src->arr[DCBX_PROTOCOL_ISCSI].update;
-	p_dest->update_iscsi_dcb_data_flag = update_flag;
+	p_dest->update_iscsi_dcb_data_mode = update_flag;
 	update_flag = p_src->arr[DCBX_PROTOCOL_ETH].update;
-	p_dest->update_eth_dcb_data_flag = update_flag;
+	p_dest->update_eth_dcb_data_mode = update_flag;
 
 	p_dcb_data = &p_dest->fcoe_dcb_data;
 	qed_dcbx_update_protocol_data(p_dcb_data, p_src, DCBX_PROTOCOL_FCOE);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.c b/drivers/net/ethernet/qlogic/qed/qed_debug.c
index 483241b4b05d..87a1389fb4a8 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_debug.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_debug.c
@@ -15,13 +15,6 @@
 #include "qed_mcp.h"
 #include "qed_reg_addr.h"
 
-/* Chip IDs enum */
-enum chip_ids {
-	CHIP_BB_B0,
-	CHIP_K2,
-	MAX_CHIP_IDS
-};
-
 /* Memory groups enum */
 enum mem_groups {
 	MEM_GROUP_PXP_MEM,
@@ -33,7 +26,6 @@ enum mem_groups {
 	MEM_GROUP_BRB_MEM,
 	MEM_GROUP_PRS_MEM,
 	MEM_GROUP_SDM_MEM,
-	MEM_GROUP_PBUF,
 	MEM_GROUP_IOR,
 	MEM_GROUP_RAM,
 	MEM_GROUP_BTB_RAM,
@@ -45,6 +37,7 @@ enum mem_groups {
 	MEM_GROUP_CAU_PI,
 	MEM_GROUP_CAU_MEM,
 	MEM_GROUP_PXP_ILT,
+	MEM_GROUP_PBUF,
 	MEM_GROUP_MULD_MEM,
 	MEM_GROUP_BTB_MEM,
 	MEM_GROUP_IGU_MEM,
@@ -66,7 +59,6 @@ static const char * const s_mem_group_names[] = {
 	"BRB_MEM",
 	"PRS_MEM",
 	"SDM_MEM",
-	"PBUF",
 	"IOR",
 	"RAM",
 	"BTB_RAM",
@@ -78,6 +70,7 @@ static const char * const s_mem_group_names[] = {
 	"CAU_PI",
 	"CAU_MEM",
 	"PXP_ILT",
+	"PBUF",
 	"MULD_MEM",
 	"BTB_MEM",
 	"IGU_MEM",
@@ -88,48 +81,59 @@ static const char * const s_mem_group_names[] = {
 };
 
 /* Idle check conditions */
-static u32 cond4(const u32 *r, const u32 *imm)
+
+static u32 cond5(const u32 *r, const u32 *imm)
 {
 	return ((r[0] & imm[0]) != imm[1]) && ((r[1] & imm[2]) != imm[3]);
 }
 
-static u32 cond6(const u32 *r, const u32 *imm)
+static u32 cond7(const u32 *r, const u32 *imm)
 {
 	return ((r[0] >> imm[0]) & imm[1]) != imm[2];
 }
 
-static u32 cond5(const u32 *r, const u32 *imm)
+static u32 cond14(const u32 *r, const u32 *imm)
+{
+	return (r[0] != imm[0]) && (((r[1] >> imm[1]) & imm[2]) == imm[3]);
+}
+
+static u32 cond6(const u32 *r, const u32 *imm)
 {
 	return (r[0] & imm[0]) != imm[1];
 }
 
-static u32 cond8(const u32 *r, const u32 *imm)
+static u32 cond9(const u32 *r, const u32 *imm)
 {
 	return ((r[0] & imm[0]) >> imm[1]) !=
 	    (((r[0] & imm[2]) >> imm[3]) | ((r[1] & imm[4]) << imm[5]));
 }
 
-static u32 cond9(const u32 *r, const u32 *imm)
+static u32 cond10(const u32 *r, const u32 *imm)
 {
 	return ((r[0] & imm[0]) >> imm[1]) != (r[0] & imm[2]);
 }
 
-static u32 cond1(const u32 *r, const u32 *imm)
+static u32 cond4(const u32 *r, const u32 *imm)
 {
 	return (r[0] & ~imm[0]) != imm[1];
 }
 
 static u32 cond0(const u32 *r, const u32 *imm)
+{
+	return (r[0] & ~r[1]) != imm[0];
+}
+
+static u32 cond1(const u32 *r, const u32 *imm)
 {
 	return r[0] != imm[0];
 }
 
-static u32 cond10(const u32 *r, const u32 *imm)
+static u32 cond11(const u32 *r, const u32 *imm)
 {
 	return r[0] != r[1] && r[2] == imm[0];
 }
 
-static u32 cond11(const u32 *r, const u32 *imm)
+static u32 cond12(const u32 *r, const u32 *imm)
 {
 	return r[0] != r[1] && r[2] > imm[0];
 }
@@ -139,12 +143,12 @@ static u32 cond3(const u32 *r, const u32 *imm)
 	return r[0] != r[1];
 }
 
-static u32 cond12(const u32 *r, const u32 *imm)
+static u32 cond13(const u32 *r, const u32 *imm)
 {
 	return r[0] & imm[0];
 }
 
-static u32 cond7(const u32 *r, const u32 *imm)
+static u32 cond8(const u32 *r, const u32 *imm)
 {
 	return r[0] < (r[1] - imm[0]);
 }
@@ -169,6 +173,8 @@ static u32(*cond_arr[]) (const u32 *r, const u32 *imm) = {
 	cond10,
 	cond11,
 	cond12,
+	cond13,
+	cond14,
 };
 
 /******************************* Data Types **********************************/
@@ -181,11 +187,6 @@ enum platform_ids {
 	MAX_PLATFORM_IDS
 };
 
-struct dbg_array {
-	const u32 *ptr;
-	u32 size_in_dwords;
-};
-
 struct chip_platform_defs {
 	u8 num_ports;
 	u8 num_pfs;
@@ -204,7 +205,9 @@ struct platform_defs {
 	u32 delay_factor;
 };
 
-/* Storm constant definitions */
+/* Storm constant definitions.
+ * Addresses are in bytes, sizes are in quad-regs.
+ */
 struct storm_defs {
 	char letter;
 	enum block_id block_id;
@@ -218,13 +221,13 @@ struct storm_defs {
 	u32 sem_sync_dbg_empty_addr;
 	u32 sem_slow_dbg_empty_addr;
 	u32 cm_ctx_wr_addr;
-	u32 cm_conn_ag_ctx_lid_size; /* In quad-regs */
+	u32 cm_conn_ag_ctx_lid_size;
 	u32 cm_conn_ag_ctx_rd_addr;
-	u32 cm_conn_st_ctx_lid_size; /* In quad-regs */
+	u32 cm_conn_st_ctx_lid_size;
 	u32 cm_conn_st_ctx_rd_addr;
-	u32 cm_task_ag_ctx_lid_size; /* In quad-regs */
+	u32 cm_task_ag_ctx_lid_size;
 	u32 cm_task_ag_ctx_rd_addr;
-	u32 cm_task_st_ctx_lid_size; /* In quad-regs */
+	u32 cm_task_st_ctx_lid_size;
 	u32 cm_task_st_ctx_rd_addr;
 };
 
@@ -233,17 +236,23 @@ struct block_defs {
 	const char *name;
 	bool has_dbg_bus[MAX_CHIP_IDS];
 	bool associated_to_storm;
-	u32 storm_id; /* Valid only if associated_to_storm is true */
+
+	/* Valid only if associated_to_storm is true */
+	u32 storm_id;
 	enum dbg_bus_clients dbg_client_id[MAX_CHIP_IDS];
 	u32 dbg_select_addr;
-	u32 dbg_cycle_enable_addr;
+	u32 dbg_enable_addr;
 	u32 dbg_shift_addr;
 	u32 dbg_force_valid_addr;
 	u32 dbg_force_frame_addr;
 	bool has_reset_bit;
-	bool unreset; /* If true, the block is taken out of reset before dump */
+
+	/* If true, block is taken out of reset before dump */
+	bool unreset;
 	enum dbg_reset_regs reset_reg;
-	u8 reset_bit_offset; /* Bit offset in reset register */
+
+	/* Bit offset in reset register */
+	u8 reset_bit_offset;
 };
 
 /* Reset register definitions */
@@ -262,12 +271,13 @@ struct grc_param_defs {
 	u32 crash_preset_val;
 };
 
+/* Address is in 128b units. Width is in bits. */
 struct rss_mem_defs {
 	const char *mem_name;
 	const char *type_name;
-	u32 addr; /* In 128b units */
+	u32 addr;
 	u32 num_entries[MAX_CHIP_IDS];
-	u32 entry_width[MAX_CHIP_IDS]; /* In bits */
+	u32 entry_width[MAX_CHIP_IDS];
 };
 
 struct vfc_ram_defs {
@@ -289,10 +299,20 @@ struct big_ram_defs {
 
 struct phy_defs {
 	const char *phy_name;
+
+	/* PHY base GRC address */
 	u32 base_addr;
+
+	/* Relative address of indirect TBUS address register (bits 0..7) */
 	u32 tbus_addr_lo_addr;
+
+	/* Relative address of indirect TBUS address register (bits 8..10) */
 	u32 tbus_addr_hi_addr;
+
+	/* Relative address of indirect TBUS data register (bits 0..7) */
 	u32 tbus_data_lo_addr;
+
+	/* Relative address of indirect TBUS data register (bits 8..11) */
 	u32 tbus_data_hi_addr;
 };
 
@@ -300,9 +320,11 @@ struct phy_defs {
 
 #define MAX_LCIDS			320
 #define MAX_LTIDS			320
+
 #define NUM_IOR_SETS			2
 #define IORS_PER_SET			176
 #define IOR_SET_OFFSET(set_id)		((set_id) * 256)
+
 #define BYTES_IN_DWORD			sizeof(u32)
 
 /* In the macros below, size and offset are specified in bits */
@@ -315,6 +337,7 @@ struct phy_defs {
 #define FIELD_BIT_MASK(type, field) \
 	(((1 << FIELD_BIT_SIZE(type, field)) - 1) << \
 	 FIELD_DWORD_SHIFT(type, field))
+
 #define SET_VAR_FIELD(var, type, field, val) \
 	do { \
 		var[FIELD_DWORD_OFFSET(type, field)] &=	\
@@ -322,31 +345,51 @@ struct phy_defs {
 		var[FIELD_DWORD_OFFSET(type, field)] |= \
 		(val) << FIELD_DWORD_SHIFT(type, field); \
 	} while (0)
+
 #define ARR_REG_WR(dev, ptt, addr, arr, arr_size) \
 	do { \
 		for (i = 0; i < (arr_size); i++) \
 			qed_wr(dev, ptt, addr,	(arr)[i]); \
 	} while (0)
+
 #define ARR_REG_RD(dev, ptt, addr, arr, arr_size) \
 	do { \
 		for (i = 0; i < (arr_size); i++) \
 			(arr)[i] = qed_rd(dev, ptt, addr); \
 	} while (0)
 
+#ifndef DWORDS_TO_BYTES
 #define DWORDS_TO_BYTES(dwords)		((dwords) * BYTES_IN_DWORD)
+#endif
+#ifndef BYTES_TO_DWORDS
 #define BYTES_TO_DWORDS(bytes)		((bytes) / BYTES_IN_DWORD)
+#endif
+
+/* extra lines include a signature line + optional latency events line */
+#ifndef NUM_DBG_LINES
+#define NUM_EXTRA_DBG_LINES(block_desc) \
+	(1 + ((block_desc)->has_latency_events ? 1 : 0))
+#define NUM_DBG_LINES(block_desc) \
+	((block_desc)->num_of_lines + NUM_EXTRA_DBG_LINES(block_desc))
+#endif
+
 #define RAM_LINES_TO_DWORDS(lines)	((lines) * 2)
 #define RAM_LINES_TO_BYTES(lines) \
 	DWORDS_TO_BYTES(RAM_LINES_TO_DWORDS(lines))
+
 #define REG_DUMP_LEN_SHIFT		24
 #define MEM_DUMP_ENTRY_SIZE_DWORDS \
 	BYTES_TO_DWORDS(sizeof(struct dbg_dump_mem))
+
 #define IDLE_CHK_RULE_SIZE_DWORDS \
 	BYTES_TO_DWORDS(sizeof(struct dbg_idle_chk_rule))
+
 #define IDLE_CHK_RESULT_HDR_DWORDS \
 	BYTES_TO_DWORDS(sizeof(struct dbg_idle_chk_result_hdr))
+
 #define IDLE_CHK_RESULT_REG_HDR_DWORDS \
 	BYTES_TO_DWORDS(sizeof(struct dbg_idle_chk_result_reg_hdr))
+
 #define IDLE_CHK_MAX_ENTRIES_SIZE	32
 
 /* The sizes and offsets below are specified in bits */
@@ -363,62 +406,92 @@ struct phy_defs {
 #define VFC_RAM_ADDR_ROW_OFFSET		2
 #define VFC_RAM_ADDR_ROW_SIZE		10
 #define VFC_RAM_RESP_STRUCT_SIZE	256
+
 #define VFC_CAM_CMD_DWORDS		CEIL_DWORDS(VFC_CAM_CMD_STRUCT_SIZE)
 #define VFC_CAM_ADDR_DWORDS		CEIL_DWORDS(VFC_CAM_ADDR_STRUCT_SIZE)
 #define VFC_CAM_RESP_DWORDS		CEIL_DWORDS(VFC_CAM_RESP_STRUCT_SIZE)
 #define VFC_RAM_CMD_DWORDS		VFC_CAM_CMD_DWORDS
 #define VFC_RAM_ADDR_DWORDS		CEIL_DWORDS(VFC_RAM_ADDR_STRUCT_SIZE)
 #define VFC_RAM_RESP_DWORDS		CEIL_DWORDS(VFC_RAM_RESP_STRUCT_SIZE)
+
 #define NUM_VFC_RAM_TYPES		4
+
 #define VFC_CAM_NUM_ROWS		512
+
 #define VFC_OPCODE_CAM_RD		14
 #define VFC_OPCODE_RAM_RD		0
+
 #define NUM_RSS_MEM_TYPES		5
+
 #define NUM_BIG_RAM_TYPES		3
 #define BIG_RAM_BLOCK_SIZE_BYTES	128
 #define BIG_RAM_BLOCK_SIZE_DWORDS \
 	BYTES_TO_DWORDS(BIG_RAM_BLOCK_SIZE_BYTES)
+
 #define NUM_PHY_TBUS_ADDRESSES		2048
 #define PHY_DUMP_SIZE_DWORDS		(NUM_PHY_TBUS_ADDRESSES / 2)
+
 #define RESET_REG_UNRESET_OFFSET	4
+
 #define STALL_DELAY_MS			500
+
 #define STATIC_DEBUG_LINE_DWORDS	9
-#define NUM_DBG_BUS_LINES		256
+
 #define NUM_COMMON_GLOBAL_PARAMS	8
+
 #define FW_IMG_MAIN			1
-#define REG_FIFO_DEPTH_ELEMENTS		32
+
+#ifndef REG_FIFO_ELEMENT_DWORDS
 #define REG_FIFO_ELEMENT_DWORDS		2
+#endif
+#define REG_FIFO_DEPTH_ELEMENTS		32
 #define REG_FIFO_DEPTH_DWORDS \
 	(REG_FIFO_ELEMENT_DWORDS * REG_FIFO_DEPTH_ELEMENTS)
-#define IGU_FIFO_DEPTH_ELEMENTS		64
+
+#ifndef IGU_FIFO_ELEMENT_DWORDS
 #define IGU_FIFO_ELEMENT_DWORDS		4
+#endif
+#define IGU_FIFO_DEPTH_ELEMENTS		64
 #define IGU_FIFO_DEPTH_DWORDS \
 	(IGU_FIFO_ELEMENT_DWORDS * IGU_FIFO_DEPTH_ELEMENTS)
-#define PROTECTION_OVERRIDE_DEPTH_ELEMENTS	20
+
+#ifndef PROTECTION_OVERRIDE_ELEMENT_DWORDS
 #define PROTECTION_OVERRIDE_ELEMENT_DWORDS	2
+#endif
+#define PROTECTION_OVERRIDE_DEPTH_ELEMENTS	20
 #define PROTECTION_OVERRIDE_DEPTH_DWORDS \
 	(PROTECTION_OVERRIDE_DEPTH_ELEMENTS * \
 	 PROTECTION_OVERRIDE_ELEMENT_DWORDS)
+
 #define MCP_SPAD_TRACE_OFFSIZE_ADDR \
 	(MCP_REG_SCRATCH + \
 	 offsetof(struct static_init, sections[SPAD_SECTION_TRACE]))
-#define MCP_TRACE_META_IMAGE_SIGNATURE  0x669955aa
+
 #define EMPTY_FW_VERSION_STR		"???_???_???_???"
 #define EMPTY_FW_IMAGE_STR		"???????????????"
 
 /***************************** Constant Arrays *******************************/
 
+struct dbg_array {
+	const u32 *ptr;
+	u32 size_in_dwords;
+};
+
 /* Debug arrays */
-static struct dbg_array s_dbg_arrays[MAX_BIN_DBG_BUFFER_TYPE] = { {0} };
+static struct dbg_array s_dbg_arrays[MAX_BIN_DBG_BUFFER_TYPE] = { {NULL} };
 
 /* Chip constant definitions array */
 static struct chip_defs s_chip_defs[MAX_CHIP_IDS] = {
-	{ "bb_b0",
-	  { {MAX_NUM_PORTS_BB, MAX_NUM_PFS_BB, MAX_NUM_VFS_BB}, {0, 0, 0},
-	    {0, 0, 0}, {0, 0, 0} } },
-	{ "k2",
-	  { {MAX_NUM_PORTS_K2, MAX_NUM_PFS_K2, MAX_NUM_VFS_K2}, {0, 0, 0},
-	    {0, 0, 0}, {0, 0, 0} } }
+	{ "bb",
+	  {{MAX_NUM_PORTS_BB, MAX_NUM_PFS_BB, MAX_NUM_VFS_BB},
+	   {0, 0, 0},
+	   {0, 0, 0},
+	   {0, 0, 0} } },
+	{ "ah",
+	  {{MAX_NUM_PORTS_K2, MAX_NUM_PFS_K2, MAX_NUM_VFS_K2},
+	   {0, 0, 0},
+	   {0, 0, 0},
+	   {0, 0, 0} } }
 };
 
 /* Storm constant definitions array */
@@ -427,69 +500,74 @@ static struct storm_defs s_storm_defs[] = {
 	{'T', BLOCK_TSEM,
 	 {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT}, true,
 	 TSEM_REG_FAST_MEMORY,
-	 TSEM_REG_DBG_FRAME_MODE, TSEM_REG_SLOW_DBG_ACTIVE,
-	 TSEM_REG_SLOW_DBG_MODE, TSEM_REG_DBG_MODE1_CFG,
-	 TSEM_REG_SYNC_DBG_EMPTY, TSEM_REG_SLOW_DBG_EMPTY,
+	 TSEM_REG_DBG_FRAME_MODE_BB_K2, TSEM_REG_SLOW_DBG_ACTIVE_BB_K2,
+	 TSEM_REG_SLOW_DBG_MODE_BB_K2, TSEM_REG_DBG_MODE1_CFG_BB_K2,
+	 TSEM_REG_SYNC_DBG_EMPTY, TSEM_REG_SLOW_DBG_EMPTY_BB_K2,
 	 TCM_REG_CTX_RBC_ACCS,
 	 4, TCM_REG_AGG_CON_CTX,
 	 16, TCM_REG_SM_CON_CTX,
 	 2, TCM_REG_AGG_TASK_CTX,
 	 4, TCM_REG_SM_TASK_CTX},
+
 	/* Mstorm */
 	{'M', BLOCK_MSEM,
 	 {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM}, false,
 	 MSEM_REG_FAST_MEMORY,
-	 MSEM_REG_DBG_FRAME_MODE, MSEM_REG_SLOW_DBG_ACTIVE,
-	 MSEM_REG_SLOW_DBG_MODE, MSEM_REG_DBG_MODE1_CFG,
-	 MSEM_REG_SYNC_DBG_EMPTY, MSEM_REG_SLOW_DBG_EMPTY,
+	 MSEM_REG_DBG_FRAME_MODE_BB_K2, MSEM_REG_SLOW_DBG_ACTIVE_BB_K2,
+	 MSEM_REG_SLOW_DBG_MODE_BB_K2, MSEM_REG_DBG_MODE1_CFG_BB_K2,
+	 MSEM_REG_SYNC_DBG_EMPTY, MSEM_REG_SLOW_DBG_EMPTY_BB_K2,
 	 MCM_REG_CTX_RBC_ACCS,
 	 1, MCM_REG_AGG_CON_CTX,
 	 10, MCM_REG_SM_CON_CTX,
 	 2, MCM_REG_AGG_TASK_CTX,
 	 7, MCM_REG_SM_TASK_CTX},
+
 	/* Ustorm */
 	{'U', BLOCK_USEM,
 	 {DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU}, false,
 	 USEM_REG_FAST_MEMORY,
-	 USEM_REG_DBG_FRAME_MODE, USEM_REG_SLOW_DBG_ACTIVE,
-	 USEM_REG_SLOW_DBG_MODE, USEM_REG_DBG_MODE1_CFG,
-	 USEM_REG_SYNC_DBG_EMPTY, USEM_REG_SLOW_DBG_EMPTY,
+	 USEM_REG_DBG_FRAME_MODE_BB_K2, USEM_REG_SLOW_DBG_ACTIVE_BB_K2,
+	 USEM_REG_SLOW_DBG_MODE_BB_K2, USEM_REG_DBG_MODE1_CFG_BB_K2,
+	 USEM_REG_SYNC_DBG_EMPTY, USEM_REG_SLOW_DBG_EMPTY_BB_K2,
 	 UCM_REG_CTX_RBC_ACCS,
 	 2, UCM_REG_AGG_CON_CTX,
 	 13, UCM_REG_SM_CON_CTX,
 	 3, UCM_REG_AGG_TASK_CTX,
 	 3, UCM_REG_SM_TASK_CTX},
+
 	/* Xstorm */
 	{'X', BLOCK_XSEM,
 	 {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX}, false,
 	 XSEM_REG_FAST_MEMORY,
-	 XSEM_REG_DBG_FRAME_MODE, XSEM_REG_SLOW_DBG_ACTIVE,
-	 XSEM_REG_SLOW_DBG_MODE, XSEM_REG_DBG_MODE1_CFG,
-	 XSEM_REG_SYNC_DBG_EMPTY, XSEM_REG_SLOW_DBG_EMPTY,
+	 XSEM_REG_DBG_FRAME_MODE_BB_K2, XSEM_REG_SLOW_DBG_ACTIVE_BB_K2,
+	 XSEM_REG_SLOW_DBG_MODE_BB_K2, XSEM_REG_DBG_MODE1_CFG_BB_K2,
+	 XSEM_REG_SYNC_DBG_EMPTY, XSEM_REG_SLOW_DBG_EMPTY_BB_K2,
 	 XCM_REG_CTX_RBC_ACCS,
 	 9, XCM_REG_AGG_CON_CTX,
 	 15, XCM_REG_SM_CON_CTX,
 	 0, 0,
 	 0, 0},
+
 	/* Ystorm */
 	{'Y', BLOCK_YSEM,
 	 {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCY}, false,
 	 YSEM_REG_FAST_MEMORY,
-	 YSEM_REG_DBG_FRAME_MODE, YSEM_REG_SLOW_DBG_ACTIVE,
-	 YSEM_REG_SLOW_DBG_MODE, YSEM_REG_DBG_MODE1_CFG,
-	 YSEM_REG_SYNC_DBG_EMPTY, TSEM_REG_SLOW_DBG_EMPTY,
+	 YSEM_REG_DBG_FRAME_MODE_BB_K2, YSEM_REG_SLOW_DBG_ACTIVE_BB_K2,
+	 YSEM_REG_SLOW_DBG_MODE_BB_K2, YSEM_REG_DBG_MODE1_CFG_BB_K2,
+	 YSEM_REG_SYNC_DBG_EMPTY, TSEM_REG_SLOW_DBG_EMPTY_BB_K2,
 	 YCM_REG_CTX_RBC_ACCS,
 	 2, YCM_REG_AGG_CON_CTX,
 	 3, YCM_REG_SM_CON_CTX,
 	 2, YCM_REG_AGG_TASK_CTX,
 	 12, YCM_REG_SM_TASK_CTX},
+
 	/* Pstorm */
 	{'P', BLOCK_PSEM,
 	 {DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS}, true,
 	 PSEM_REG_FAST_MEMORY,
-	 PSEM_REG_DBG_FRAME_MODE, PSEM_REG_SLOW_DBG_ACTIVE,
-	 PSEM_REG_SLOW_DBG_MODE, PSEM_REG_DBG_MODE1_CFG,
-	 PSEM_REG_SYNC_DBG_EMPTY, PSEM_REG_SLOW_DBG_EMPTY,
+	 PSEM_REG_DBG_FRAME_MODE_BB_K2, PSEM_REG_SLOW_DBG_ACTIVE_BB_K2,
+	 PSEM_REG_SLOW_DBG_MODE_BB_K2, PSEM_REG_DBG_MODE1_CFG_BB_K2,
+	 PSEM_REG_SYNC_DBG_EMPTY, PSEM_REG_SLOW_DBG_EMPTY_BB_K2,
 	 PCM_REG_CTX_RBC_ACCS,
 	 0, 0,
 	 10, PCM_REG_SM_CON_CTX,
@@ -498,6 +576,7 @@ static struct storm_defs s_storm_defs[] = {
 };
 
 /* Block definitions array */
+
 static struct block_defs block_grc_defs = {
 	"grc",
 	{true, true}, false, 0,
@@ -587,9 +666,11 @@ static struct block_defs block_pcie_defs = {
 	"pcie",
 	{false, true}, false, 0,
 	{MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCH},
-	PCIE_REG_DBG_COMMON_SELECT, PCIE_REG_DBG_COMMON_DWORD_ENABLE,
-	PCIE_REG_DBG_COMMON_SHIFT, PCIE_REG_DBG_COMMON_FORCE_VALID,
-	PCIE_REG_DBG_COMMON_FORCE_FRAME,
+	PCIE_REG_DBG_COMMON_SELECT_K2,
+	PCIE_REG_DBG_COMMON_DWORD_ENABLE_K2,
+	PCIE_REG_DBG_COMMON_SHIFT_K2,
+	PCIE_REG_DBG_COMMON_FORCE_VALID_K2,
+	PCIE_REG_DBG_COMMON_FORCE_FRAME_K2,
 	false, false, MAX_DBG_RESET_REGS, 0
 };
 
@@ -691,9 +772,9 @@ static struct block_defs block_pglcs_defs = {
 	"pglcs",
 	{false, true}, false, 0,
 	{MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCH},
-	PGLCS_REG_DBG_SELECT, PGLCS_REG_DBG_DWORD_ENABLE,
-	PGLCS_REG_DBG_SHIFT, PGLCS_REG_DBG_FORCE_VALID,
-	PGLCS_REG_DBG_FORCE_FRAME,
+	PGLCS_REG_DBG_SELECT_K2, PGLCS_REG_DBG_DWORD_ENABLE_K2,
+	PGLCS_REG_DBG_SHIFT_K2, PGLCS_REG_DBG_FORCE_VALID_K2,
+	PGLCS_REG_DBG_FORCE_FRAME_K2,
 	true, false, DBG_RESET_REG_MISCS_PL_HV, 2
 };
 
@@ -991,10 +1072,11 @@ static struct block_defs block_yuld_defs = {
 	"yuld",
 	{true, true}, false, 0,
 	{DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU},
-	YULD_REG_DBG_SELECT, YULD_REG_DBG_DWORD_ENABLE,
-	YULD_REG_DBG_SHIFT, YULD_REG_DBG_FORCE_VALID,
-	YULD_REG_DBG_FORCE_FRAME,
-	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 15
+	YULD_REG_DBG_SELECT_BB_K2, YULD_REG_DBG_DWORD_ENABLE_BB_K2,
+	YULD_REG_DBG_SHIFT_BB_K2, YULD_REG_DBG_FORCE_VALID_BB_K2,
+	YULD_REG_DBG_FORCE_FRAME_BB_K2,
+	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2,
+	15
 };
 
 static struct block_defs block_xyld_defs = {
@@ -1143,9 +1225,9 @@ static struct block_defs block_umac_defs = {
 	"umac",
 	{false, true}, false, 0,
 	{MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCZ},
-	UMAC_REG_DBG_SELECT, UMAC_REG_DBG_DWORD_ENABLE,
-	UMAC_REG_DBG_SHIFT, UMAC_REG_DBG_FORCE_VALID,
-	UMAC_REG_DBG_FORCE_FRAME,
+	UMAC_REG_DBG_SELECT_K2, UMAC_REG_DBG_DWORD_ENABLE_K2,
+	UMAC_REG_DBG_SHIFT_K2, UMAC_REG_DBG_FORCE_VALID_K2,
+	UMAC_REG_DBG_FORCE_FRAME_K2,
 	true, false, DBG_RESET_REG_MISCS_PL_HV, 6
 };
 
@@ -1177,9 +1259,9 @@ static struct block_defs block_wol_defs = {
 	"wol",
 	{false, true}, false, 0,
 	{MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCZ},
-	WOL_REG_DBG_SELECT, WOL_REG_DBG_DWORD_ENABLE,
-	WOL_REG_DBG_SHIFT, WOL_REG_DBG_FORCE_VALID,
-	WOL_REG_DBG_FORCE_FRAME,
+	WOL_REG_DBG_SELECT_K2, WOL_REG_DBG_DWORD_ENABLE_K2,
+	WOL_REG_DBG_SHIFT_K2, WOL_REG_DBG_FORCE_VALID_K2,
+	WOL_REG_DBG_FORCE_FRAME_K2,
 	true, true, DBG_RESET_REG_MISC_PL_PDA_VAUX, 7
 };
 
@@ -1187,9 +1269,9 @@ static struct block_defs block_bmbn_defs = {
 	"bmbn",
 	{false, true}, false, 0,
 	{MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCB},
-	BMBN_REG_DBG_SELECT, BMBN_REG_DBG_DWORD_ENABLE,
-	BMBN_REG_DBG_SHIFT, BMBN_REG_DBG_FORCE_VALID,
-	BMBN_REG_DBG_FORCE_FRAME,
+	BMBN_REG_DBG_SELECT_K2, BMBN_REG_DBG_DWORD_ENABLE_K2,
+	BMBN_REG_DBG_SHIFT_K2, BMBN_REG_DBG_FORCE_VALID_K2,
+	BMBN_REG_DBG_FORCE_FRAME_K2,
 	false, false, MAX_DBG_RESET_REGS, 0
 };
 
@@ -1204,9 +1286,9 @@ static struct block_defs block_nwm_defs = {
 	"nwm",
 	{false, true}, false, 0,
 	{MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCW},
-	NWM_REG_DBG_SELECT, NWM_REG_DBG_DWORD_ENABLE,
-	NWM_REG_DBG_SHIFT, NWM_REG_DBG_FORCE_VALID,
-	NWM_REG_DBG_FORCE_FRAME,
+	NWM_REG_DBG_SELECT_K2, NWM_REG_DBG_DWORD_ENABLE_K2,
+	NWM_REG_DBG_SHIFT_K2, NWM_REG_DBG_FORCE_VALID_K2,
+	NWM_REG_DBG_FORCE_FRAME_K2,
 	true, false, DBG_RESET_REG_MISCS_PL_HV_2, 0
 };
 
@@ -1214,9 +1296,9 @@ static struct block_defs block_nws_defs = {
 	"nws",
 	{false, true}, false, 0,
 	{MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCW},
-	NWS_REG_DBG_SELECT, NWS_REG_DBG_DWORD_ENABLE,
-	NWS_REG_DBG_SHIFT, NWS_REG_DBG_FORCE_VALID,
-	NWS_REG_DBG_FORCE_FRAME,
+	NWS_REG_DBG_SELECT_K2, NWS_REG_DBG_DWORD_ENABLE_K2,
+	NWS_REG_DBG_SHIFT_K2, NWS_REG_DBG_FORCE_VALID_K2,
+	NWS_REG_DBG_FORCE_FRAME_K2,
 	true, false, DBG_RESET_REG_MISCS_PL_HV, 12
 };
 
@@ -1224,9 +1306,9 @@ static struct block_defs block_ms_defs = {
 	"ms",
 	{false, true}, false, 0,
 	{MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCZ},
-	MS_REG_DBG_SELECT, MS_REG_DBG_DWORD_ENABLE,
-	MS_REG_DBG_SHIFT, MS_REG_DBG_FORCE_VALID,
-	MS_REG_DBG_FORCE_FRAME,
+	MS_REG_DBG_SELECT_K2, MS_REG_DBG_DWORD_ENABLE_K2,
+	MS_REG_DBG_SHIFT_K2, MS_REG_DBG_FORCE_VALID_K2,
+	MS_REG_DBG_FORCE_FRAME_K2,
 	true, false, DBG_RESET_REG_MISCS_PL_HV, 13
 };
 
@@ -1234,9 +1316,11 @@ static struct block_defs block_phy_pcie_defs = {
 	"phy_pcie",
 	{false, true}, false, 0,
 	{MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCH},
-	PCIE_REG_DBG_COMMON_SELECT, PCIE_REG_DBG_COMMON_DWORD_ENABLE,
-	PCIE_REG_DBG_COMMON_SHIFT, PCIE_REG_DBG_COMMON_FORCE_VALID,
-	PCIE_REG_DBG_COMMON_FORCE_FRAME,
+	PCIE_REG_DBG_COMMON_SELECT_K2,
+	PCIE_REG_DBG_COMMON_DWORD_ENABLE_K2,
+	PCIE_REG_DBG_COMMON_SHIFT_K2,
+	PCIE_REG_DBG_COMMON_FORCE_VALID_K2,
+	PCIE_REG_DBG_COMMON_FORCE_FRAME_K2,
 	false, false, MAX_DBG_RESET_REGS, 0
 };
 
@@ -1261,6 +1345,13 @@ static struct block_defs block_rgfs_defs = {
 	false, false, MAX_DBG_RESET_REGS, 0
 };
 
+static struct block_defs block_rgsrc_defs = {
+	"rgsrc", {false, false}, false, 0,
+	{MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
+	0, 0, 0, 0, 0,
+	false, false, MAX_DBG_RESET_REGS, 0
+};
+
 static struct block_defs block_tgfs_defs = {
 	"tgfs", {false, false}, false, 0,
 	{MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
@@ -1268,6 +1359,13 @@ static struct block_defs block_tgfs_defs = {
 	false, false, MAX_DBG_RESET_REGS, 0
 };
 
+static struct block_defs block_tgsrc_defs = {
+	"tgsrc", {false, false}, false, 0,
+	{MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
+	0, 0, 0, 0, 0,
+	false, false, MAX_DBG_RESET_REGS, 0
+};
+
 static struct block_defs block_ptld_defs = {
 	"ptld", {false, false}, false, 0,
 	{MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
@@ -1350,6 +1448,8 @@ static struct block_defs *s_block_defs[MAX_BLOCK_ID] = {
 	&block_muld_defs,
 	&block_yuld_defs,
 	&block_xyld_defs,
+	&block_ptld_defs,
+	&block_ypld_defs,
 	&block_prm_defs,
 	&block_pbf_pb1_defs,
 	&block_pbf_pb2_defs,
@@ -1363,6 +1463,10 @@ static struct block_defs *s_block_defs[MAX_BLOCK_ID] = {
 	&block_tcfc_defs,
 	&block_igu_defs,
 	&block_cau_defs,
+	&block_rgfs_defs,
+	&block_rgsrc_defs,
+	&block_tgfs_defs,
+	&block_tgsrc_defs,
 	&block_umac_defs,
 	&block_xmac_defs,
 	&block_dbg_defs,
@@ -1376,10 +1480,6 @@ static struct block_defs *s_block_defs[MAX_BLOCK_ID] = {
 	&block_phy_pcie_defs,
 	&block_led_defs,
 	&block_avs_wrap_defs,
-	&block_rgfs_defs,
-	&block_tgfs_defs,
-	&block_ptld_defs,
-	&block_ypld_defs,
 	&block_misc_aeu_defs,
 	&block_bar0_map_defs,
 };
@@ -1392,66 +1492,151 @@ static struct platform_defs s_platform_defs[] = {
 };
 
 static struct grc_param_defs s_grc_param_defs[] = {
-	{{1, 1}, 0, 1, false, 1, 1},	/* DBG_GRC_PARAM_DUMP_TSTORM */
-	{{1, 1}, 0, 1, false, 1, 1},	/* DBG_GRC_PARAM_DUMP_MSTORM */
-	{{1, 1}, 0, 1, false, 1, 1},	/* DBG_GRC_PARAM_DUMP_USTORM */
-	{{1, 1}, 0, 1, false, 1, 1},	/* DBG_GRC_PARAM_DUMP_XSTORM */
-	{{1, 1}, 0, 1, false, 1, 1},	/* DBG_GRC_PARAM_DUMP_YSTORM */
-	{{1, 1}, 0, 1, false, 1, 1},	/* DBG_GRC_PARAM_DUMP_PSTORM */
-	{{1, 1}, 0, 1, false, 0, 1},	/* DBG_GRC_PARAM_DUMP_REGS */
-	{{1, 1}, 0, 1, false, 0, 1},	/* DBG_GRC_PARAM_DUMP_RAM */
-	{{1, 1}, 0, 1, false, 0, 1},	/* DBG_GRC_PARAM_DUMP_PBUF */
-	{{0, 0}, 0, 1, false, 0, 1},	/* DBG_GRC_PARAM_DUMP_IOR */
-	{{0, 0}, 0, 1, false, 0, 1},	/* DBG_GRC_PARAM_DUMP_VFC */
-	{{1, 1}, 0, 1, false, 0, 1},	/* DBG_GRC_PARAM_DUMP_CM_CTX */
-	{{1, 1}, 0, 1, false, 0, 1},	/* DBG_GRC_PARAM_DUMP_ILT */
-	{{1, 1}, 0, 1, false, 0, 1},	/* DBG_GRC_PARAM_DUMP_RSS */
-	{{1, 1}, 0, 1, false, 0, 1},	/* DBG_GRC_PARAM_DUMP_CAU */
-	{{1, 1}, 0, 1, false, 0, 1},	/* DBG_GRC_PARAM_DUMP_QM */
-	{{1, 1}, 0, 1, false, 0, 1},	/* DBG_GRC_PARAM_DUMP_MCP */
-	{{1, 1}, 0, 1, false, 0, 1},	/* DBG_GRC_PARAM_RESERVED */
-	{{1, 1}, 0, 1, false, 0, 1},	/* DBG_GRC_PARAM_DUMP_CFC */
-	{{1, 1}, 0, 1, false, 0, 1},	/* DBG_GRC_PARAM_DUMP_IGU */
-	{{0, 0}, 0, 1, false, 0, 1},	/* DBG_GRC_PARAM_DUMP_BRB */
-	{{0, 0}, 0, 1, false, 0, 1},	/* DBG_GRC_PARAM_DUMP_BTB */
-	{{0, 0}, 0, 1, false, 0, 1},	/* DBG_GRC_PARAM_DUMP_BMB */
-	{{1, 1}, 0, 1, false, 0, 1},	/* DBG_GRC_PARAM_DUMP_NIG */
-	{{1, 1}, 0, 1, false, 0, 1},	/* DBG_GRC_PARAM_DUMP_MULD */
-	{{1, 1}, 0, 1, false, 0, 1},	/* DBG_GRC_PARAM_DUMP_PRS */
-	{{1, 1}, 0, 1, false, 0, 1},	/* DBG_GRC_PARAM_DUMP_DMAE */
-	{{1, 1}, 0, 1, false, 0, 1},	/* DBG_GRC_PARAM_DUMP_TM */
-	{{1, 1}, 0, 1, false, 0, 1},	/* DBG_GRC_PARAM_DUMP_SDM */
-	{{1, 1}, 0, 1, false, 0, 1},	/* DBG_GRC_PARAM_DUMP_DIF */
-	{{1, 1}, 0, 1, false, 0, 1},	/* DBG_GRC_PARAM_DUMP_STATIC */
-	{{0, 0}, 0, 1, false, 0, 0},	/* DBG_GRC_PARAM_UNSTALL */
+	/* DBG_GRC_PARAM_DUMP_TSTORM */
+	{{1, 1}, 0, 1, false, 1, 1},
+
+	/* DBG_GRC_PARAM_DUMP_MSTORM */
+	{{1, 1}, 0, 1, false, 1, 1},
+
+	/* DBG_GRC_PARAM_DUMP_USTORM */
+	{{1, 1}, 0, 1, false, 1, 1},
+
+	/* DBG_GRC_PARAM_DUMP_XSTORM */
+	{{1, 1}, 0, 1, false, 1, 1},
+
+	/* DBG_GRC_PARAM_DUMP_YSTORM */
+	{{1, 1}, 0, 1, false, 1, 1},
+
+	/* DBG_GRC_PARAM_DUMP_PSTORM */
+	{{1, 1}, 0, 1, false, 1, 1},
+
+	/* DBG_GRC_PARAM_DUMP_REGS */
+	{{1, 1}, 0, 1, false, 0, 1},
+
+	/* DBG_GRC_PARAM_DUMP_RAM */
+	{{1, 1}, 0, 1, false, 0, 1},
+
+	/* DBG_GRC_PARAM_DUMP_PBUF */
+	{{1, 1}, 0, 1, false, 0, 1},
+
+	/* DBG_GRC_PARAM_DUMP_IOR */
+	{{0, 0}, 0, 1, false, 0, 1},
+
+	/* DBG_GRC_PARAM_DUMP_VFC */
+	{{0, 0}, 0, 1, false, 0, 1},
+
+	/* DBG_GRC_PARAM_DUMP_CM_CTX */
+	{{1, 1}, 0, 1, false, 0, 1},
+
+	/* DBG_GRC_PARAM_DUMP_ILT */
+	{{1, 1}, 0, 1, false, 0, 1},
+
+	/* DBG_GRC_PARAM_DUMP_RSS */
+	{{1, 1}, 0, 1, false, 0, 1},
+
+	/* DBG_GRC_PARAM_DUMP_CAU */
+	{{1, 1}, 0, 1, false, 0, 1},
+
+	/* DBG_GRC_PARAM_DUMP_QM */
+	{{1, 1}, 0, 1, false, 0, 1},
+
+	/* DBG_GRC_PARAM_DUMP_MCP */
+	{{1, 1}, 0, 1, false, 0, 1},
+
+	/* DBG_GRC_PARAM_RESERVED */
+	{{1, 1}, 0, 1, false, 0, 1},
+
+	/* DBG_GRC_PARAM_DUMP_CFC */
+	{{1, 1}, 0, 1, false, 0, 1},
+
+	/* DBG_GRC_PARAM_DUMP_IGU */
+	{{1, 1}, 0, 1, false, 0, 1},
+
+	/* DBG_GRC_PARAM_DUMP_BRB */
+	{{0, 0}, 0, 1, false, 0, 1},
+
+	/* DBG_GRC_PARAM_DUMP_BTB */
+	{{0, 0}, 0, 1, false, 0, 1},
+
+	/* DBG_GRC_PARAM_DUMP_BMB */
+	{{0, 0}, 0, 1, false, 0, 1},
+
+	/* DBG_GRC_PARAM_DUMP_NIG */
+	{{1, 1}, 0, 1, false, 0, 1},
+
+	/* DBG_GRC_PARAM_DUMP_MULD */
+	{{1, 1}, 0, 1, false, 0, 1},
+
+	/* DBG_GRC_PARAM_DUMP_PRS */
+	{{1, 1}, 0, 1, false, 0, 1},
+
+	/* DBG_GRC_PARAM_DUMP_DMAE */
+	{{1, 1}, 0, 1, false, 0, 1},
+
+	/* DBG_GRC_PARAM_DUMP_TM */
+	{{1, 1}, 0, 1, false, 0, 1},
+
+	/* DBG_GRC_PARAM_DUMP_SDM */
+	{{1, 1}, 0, 1, false, 0, 1},
+
+	/* DBG_GRC_PARAM_DUMP_DIF */
+	{{1, 1}, 0, 1, false, 0, 1},
+
+	/* DBG_GRC_PARAM_DUMP_STATIC */
+	{{1, 1}, 0, 1, false, 0, 1},
+
+	/* DBG_GRC_PARAM_UNSTALL */
+	{{0, 0}, 0, 1, false, 0, 0},
+
+	/* DBG_GRC_PARAM_NUM_LCIDS */
 	{{MAX_LCIDS, MAX_LCIDS}, 1, MAX_LCIDS, false, MAX_LCIDS,
-	 MAX_LCIDS},			/* DBG_GRC_PARAM_NUM_LCIDS */
+	 MAX_LCIDS},
+
+	/* DBG_GRC_PARAM_NUM_LTIDS */
 	{{MAX_LTIDS, MAX_LTIDS}, 1, MAX_LTIDS, false, MAX_LTIDS,
-	 MAX_LTIDS},			/* DBG_GRC_PARAM_NUM_LTIDS */
-	{{0, 0}, 0, 1, true, 0, 0},	/* DBG_GRC_PARAM_EXCLUDE_ALL */
-	{{0, 0}, 0, 1, true, 0, 0},	/* DBG_GRC_PARAM_CRASH */
-	{{0, 0}, 0, 1, false, 1, 0},	/* DBG_GRC_PARAM_PARITY_SAFE */
-	{{1, 1}, 0, 1, false, 0, 1},	/* DBG_GRC_PARAM_DUMP_CM */
-	{{1, 1}, 0, 1, false, 0, 1},	/* DBG_GRC_PARAM_DUMP_PHY */
-	{{0, 0}, 0, 1, false, 0, 0},	/* DBG_GRC_PARAM_NO_MCP */
-	{{0, 0}, 0, 1, false, 0, 0}	/* DBG_GRC_PARAM_NO_FW_VER */
+	 MAX_LTIDS},
+
+	/* DBG_GRC_PARAM_EXCLUDE_ALL */
+	{{0, 0}, 0, 1, true, 0, 0},
+
+	/* DBG_GRC_PARAM_CRASH */
+	{{0, 0}, 0, 1, true, 0, 0},
+
+	/* DBG_GRC_PARAM_PARITY_SAFE */
+	{{0, 0}, 0, 1, false, 1, 0},
+
+	/* DBG_GRC_PARAM_DUMP_CM */
+	{{1, 1}, 0, 1, false, 0, 1},
+
+	/* DBG_GRC_PARAM_DUMP_PHY */
+	{{1, 1}, 0, 1, false, 0, 1},
+
+	/* DBG_GRC_PARAM_NO_MCP */
+	{{0, 0}, 0, 1, false, 0, 0},
+
+	/* DBG_GRC_PARAM_NO_FW_VER */
+	{{0, 0}, 0, 1, false, 0, 0}
 };
 
 static struct rss_mem_defs s_rss_mem_defs[] = {
 	{ "rss_mem_cid", "rss_cid", 0,
 	  {256, 320},
 	  {32, 32} },
+
 	{ "rss_mem_key_msb", "rss_key", 1024,
 	  {128, 208},
 	  {256, 256} },
+
 	{ "rss_mem_key_lsb", "rss_key", 2048,
 	  {128, 208},
 	  {64, 64} },
+
 	{ "rss_mem_info", "rss_info", 3072,
 	  {128, 208},
 	  {16, 16} },
+
 	{ "rss_mem_ind", "rss_ind", 4096,
-	  {(128 * 128), (128 * 208)},
+	  {16384, 26624},
 	  {16, 16} }
 };
 
@@ -1466,50 +1651,71 @@ static struct big_ram_defs s_big_ram_defs[] = {
 	{ "BRB", MEM_GROUP_BRB_MEM, MEM_GROUP_BRB_RAM, DBG_GRC_PARAM_DUMP_BRB,
 	  BRB_REG_BIG_RAM_ADDRESS, BRB_REG_BIG_RAM_DATA,
 	  {4800, 5632} },
+
 	{ "BTB", MEM_GROUP_BTB_MEM, MEM_GROUP_BTB_RAM, DBG_GRC_PARAM_DUMP_BTB,
 	  BTB_REG_BIG_RAM_ADDRESS, BTB_REG_BIG_RAM_DATA,
 	  {2880, 3680} },
+
 	{ "BMB", MEM_GROUP_BMB_MEM, MEM_GROUP_BMB_RAM, DBG_GRC_PARAM_DUMP_BMB,
 	  BMB_REG_BIG_RAM_ADDRESS, BMB_REG_BIG_RAM_DATA,
 	  {1152, 1152} }
 };
 
 static struct reset_reg_defs s_reset_regs_defs[] = {
+	/* DBG_RESET_REG_MISCS_PL_UA */
 	{ MISCS_REG_RESET_PL_UA, 0x0,
-	  {true, true} },		/* DBG_RESET_REG_MISCS_PL_UA */
+	  {true, true} },
+
+	/* DBG_RESET_REG_MISCS_PL_HV */
 	{ MISCS_REG_RESET_PL_HV, 0x0,
-	  {true, true} },		/* DBG_RESET_REG_MISCS_PL_HV */
-	{ MISCS_REG_RESET_PL_HV_2, 0x0,
-	  {false, true} },	/* DBG_RESET_REG_MISCS_PL_HV_2 */
+	  {true, true} },
+
+	/* DBG_RESET_REG_MISCS_PL_HV_2 */
+	{ MISCS_REG_RESET_PL_HV_2_K2, 0x0,
+	  {false, true} },
+
+	/* DBG_RESET_REG_MISC_PL_UA */
 	{ MISC_REG_RESET_PL_UA, 0x0,
-	  {true, true} },		/* DBG_RESET_REG_MISC_PL_UA */
+	  {true, true} },
+
+	/* DBG_RESET_REG_MISC_PL_HV */
 	{ MISC_REG_RESET_PL_HV, 0x0,
-	  {true, true} },		/* DBG_RESET_REG_MISC_PL_HV */
+	  {true, true} },
+
+	/* DBG_RESET_REG_MISC_PL_PDA_VMAIN_1 */
 	{ MISC_REG_RESET_PL_PDA_VMAIN_1, 0x4404040,
-	  {true, true} },		/* DBG_RESET_REG_MISC_PL_PDA_VMAIN_1 */
+	  {true, true} },
+
+	/* DBG_RESET_REG_MISC_PL_PDA_VMAIN_2 */
 	{ MISC_REG_RESET_PL_PDA_VMAIN_2, 0x7c00007,
-	  {true, true} },		/* DBG_RESET_REG_MISC_PL_PDA_VMAIN_2 */
+	  {true, true} },
+
+	/* DBG_RESET_REG_MISC_PL_PDA_VAUX */
 	{ MISC_REG_RESET_PL_PDA_VAUX, 0x2,
-	  {true, true} },		/* DBG_RESET_REG_MISC_PL_PDA_VAUX */
+	  {true, true} },
 };
 
 static struct phy_defs s_phy_defs[] = {
-	{"nw_phy", NWS_REG_NWS_CMU, PHY_NW_IP_REG_PHY0_TOP_TBUS_ADDR_7_0,
-	 PHY_NW_IP_REG_PHY0_TOP_TBUS_ADDR_15_8,
-	 PHY_NW_IP_REG_PHY0_TOP_TBUS_DATA_7_0,
-	 PHY_NW_IP_REG_PHY0_TOP_TBUS_DATA_11_8},
-	{"sgmii_phy", MS_REG_MS_CMU, PHY_SGMII_IP_REG_AHB_CMU_CSR_0_X132,
-	 PHY_SGMII_IP_REG_AHB_CMU_CSR_0_X133,
-	 PHY_SGMII_IP_REG_AHB_CMU_CSR_0_X130,
-	 PHY_SGMII_IP_REG_AHB_CMU_CSR_0_X131},
-	{"pcie_phy0", PHY_PCIE_REG_PHY0, PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X132,
-	 PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X133,
-	 PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X130,
-	 PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X131},
-	{"pcie_phy1", PHY_PCIE_REG_PHY1, PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X132,
-	 PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X133,
-	 PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X130,
-	 PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X131},
+	{"nw_phy", NWS_REG_NWS_CMU_K2,
+	 PHY_NW_IP_REG_PHY0_TOP_TBUS_ADDR_7_0_K2,
+	 PHY_NW_IP_REG_PHY0_TOP_TBUS_ADDR_15_8_K2,
+	 PHY_NW_IP_REG_PHY0_TOP_TBUS_DATA_7_0_K2,
+	 PHY_NW_IP_REG_PHY0_TOP_TBUS_DATA_11_8_K2},
+	{"sgmii_phy", MS_REG_MS_CMU_K2,
+	 PHY_SGMII_IP_REG_AHB_CMU_CSR_0_X132_K2,
+	 PHY_SGMII_IP_REG_AHB_CMU_CSR_0_X133_K2,
+	 PHY_SGMII_IP_REG_AHB_CMU_CSR_0_X130_K2,
+	 PHY_SGMII_IP_REG_AHB_CMU_CSR_0_X131_K2},
+	{"pcie_phy0", PHY_PCIE_REG_PHY0_K2,
+	 PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X132_K2,
+	 PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X133_K2,
+	 PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X130_K2,
+	 PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X131_K2},
+	{"pcie_phy1", PHY_PCIE_REG_PHY1_K2,
+	 PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X132_K2,
+	 PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X133_K2,
+	 PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X130_K2,
+	 PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X131_K2},
 };
 
 /**************************** Private Functions ******************************/
@@ -1556,7 +1762,7 @@ static enum dbg_status qed_dbg_dev_init(struct qed_hwfn *p_hwfn,
 		dev_data->chip_id = CHIP_K2;
 		dev_data->mode_enable[MODE_K2] = 1;
 	} else if (QED_IS_BB_B0(p_hwfn->cdev)) {
-		dev_data->chip_id = CHIP_BB_B0;
+		dev_data->chip_id = CHIP_BB;
 		dev_data->mode_enable[MODE_BB] = 1;
 	} else {
 		return DBG_STATUS_UNKNOWN_CHIP;
@@ -1569,9 +1775,20 @@ static enum dbg_status qed_dbg_dev_init(struct qed_hwfn *p_hwfn,
 	qed_dbg_grc_init_params(p_hwfn);
 
 	dev_data->initialized = true;
+
 	return DBG_STATUS_OK;
 }
 
+static struct dbg_bus_block *get_dbg_bus_block_desc(struct qed_hwfn *p_hwfn,
+						    enum block_id block_id)
+{
+	struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+
+	return (struct dbg_bus_block *)&dbg_bus_blocks[block_id *
+						       MAX_CHIP_IDS +
+						       dev_data->chip_id];
+}
+
 /* Reads the FW info structure for the specified Storm from the chip,
  * and writes it to the specified fw_info pointer.
  */
@@ -1579,25 +1796,28 @@ static void qed_read_fw_info(struct qed_hwfn *p_hwfn,
 			     struct qed_ptt *p_ptt,
 			     u8 storm_id, struct fw_info *fw_info)
 {
-	/* Read first the address that points to fw_info location.
-	 * The address is located in the last line of the Storm RAM.
-	 */
-	u32 addr = s_storm_defs[storm_id].sem_fast_mem_addr +
-		   SEM_FAST_REG_INT_RAM +
-		   DWORDS_TO_BYTES(SEM_FAST_REG_INT_RAM_SIZE) -
-		   sizeof(struct fw_info_location);
+	struct storm_defs *storm = &s_storm_defs[storm_id];
 	struct fw_info_location fw_info_location;
-	u32 *dest = (u32 *)&fw_info_location;
-	u32 i;
+	u32 addr, i, *dest;
 
 	memset(&fw_info_location, 0, sizeof(fw_info_location));
 	memset(fw_info, 0, sizeof(*fw_info));
+
+	/* Read first the address that points to fw_info location.
+	 * The address is located in the last line of the Storm RAM.
+	 */
+	addr = storm->sem_fast_mem_addr + SEM_FAST_REG_INT_RAM +
+	       DWORDS_TO_BYTES(SEM_FAST_REG_INT_RAM_SIZE) -
+	       sizeof(fw_info_location);
+	dest = (u32 *)&fw_info_location;
+
 	for (i = 0; i < BYTES_TO_DWORDS(sizeof(fw_info_location));
 	     i++, addr += BYTES_IN_DWORD)
 		dest[i] = qed_rd(p_hwfn, p_ptt, addr);
+
+	/* Read FW version info from Storm RAM */
 	if (fw_info_location.size > 0 && fw_info_location.size <=
 	    sizeof(*fw_info)) {
-		/* Read FW version info from Storm RAM */
 		addr = fw_info_location.grc_addr;
 		dest = (u32 *)fw_info;
 		for (i = 0; i < BYTES_TO_DWORDS(fw_info_location.size);
@@ -1606,27 +1826,30 @@ static void qed_read_fw_info(struct qed_hwfn *p_hwfn,
 	}
 }
 
-/* Dumps the specified string to the specified buffer. Returns the dumped size
- * in bytes (actual length + 1 for the null character termination).
+/* Dumps the specified string to the specified buffer.
+ * Returns the dumped size in bytes.
  */
 static u32 qed_dump_str(char *dump_buf, bool dump, const char *str)
 {
 	if (dump)
 		strcpy(dump_buf, str);
+
 	return (u32)strlen(str) + 1;
 }
 
-/* Dumps zeros to align the specified buffer to dwords. Returns the dumped size
- * in bytes.
+/* Dumps zeros to align the specified buffer to dwords.
+ * Returns the dumped size in bytes.
  */
 static u32 qed_dump_align(char *dump_buf, bool dump, u32 byte_offset)
 {
-	u8 offset_in_dword = (u8)(byte_offset & 0x3), align_size;
+	u8 offset_in_dword, align_size;
 
+	offset_in_dword = (u8)(byte_offset & 0x3);
 	align_size = offset_in_dword ? BYTES_IN_DWORD - offset_in_dword : 0;
 
 	if (dump && align_size)
 		memset(dump_buf, 0, align_size);
+
 	return align_size;
 }
 
@@ -1653,6 +1876,7 @@ static u32 qed_dump_str_param(u32 *dump_buf,
 
 	/* Align buffer to next dword */
 	offset += qed_dump_align(char_buf + offset, dump, offset);
+
 	return BYTES_TO_DWORDS(offset);
 }
 
@@ -1681,6 +1905,7 @@ static u32 qed_dump_num_param(u32 *dump_buf,
 	if (dump)
 		*(dump_buf + offset) = param_val;
 	offset++;
+
 	return offset;
 }
 
@@ -1695,7 +1920,6 @@ static u32 qed_dump_fw_ver_param(struct qed_hwfn *p_hwfn,
 	char fw_ver_str[16] = EMPTY_FW_VERSION_STR;
 	char fw_img_str[16] = EMPTY_FW_IMAGE_STR;
 	struct fw_info fw_info = { {0}, {0} };
-	int printed_chars;
 	u32 offset = 0;
 
 	if (dump && !qed_grc_get_param(p_hwfn, DBG_GRC_PARAM_NO_FW_VER)) {
@@ -1705,37 +1929,32 @@ static u32 qed_dump_fw_ver_param(struct qed_hwfn *p_hwfn,
 
 		for (storm_id = 0; storm_id < MAX_DBG_STORMS && !found;
 		     storm_id++) {
-			/* Read FW version/image  */
-			if (!dev_data->block_in_reset
-			    [s_storm_defs[storm_id].block_id]) {
-				/* read FW info for the current Storm */
-				qed_read_fw_info(p_hwfn,
-						 p_ptt, storm_id, &fw_info);
-
-				/* Create FW version/image strings */
-				printed_chars =
-				    snprintf(fw_ver_str,
-					     sizeof(fw_ver_str),
-					     "%d_%d_%d_%d",
-					     fw_info.ver.num.major,
-					     fw_info.ver.num.minor,
-					     fw_info.ver.num.rev,
-					     fw_info.ver.num.eng);
-				if (printed_chars < 0 || printed_chars >=
-				    sizeof(fw_ver_str))
-					DP_NOTICE(p_hwfn,
-						  "Unexpected debug error: invalid FW version string\n");
-				switch (fw_info.ver.image_id) {
-				case FW_IMG_MAIN:
-					strcpy(fw_img_str, "main");
-					break;
-				default:
-					strcpy(fw_img_str, "unknown");
-					break;
-				}
+			struct storm_defs *storm = &s_storm_defs[storm_id];
+
+			/* Read FW version/image */
+			if (dev_data->block_in_reset[storm->block_id])
+				continue;
 
-				found = true;
+			/* Read FW info for the current Storm */
+			qed_read_fw_info(p_hwfn, p_ptt, storm_id, &fw_info);
+
+			/* Create FW version/image strings */
+			if (snprintf(fw_ver_str, sizeof(fw_ver_str),
+				     "%d_%d_%d_%d", fw_info.ver.num.major,
+				     fw_info.ver.num.minor, fw_info.ver.num.rev,
+				     fw_info.ver.num.eng) < 0)
+				DP_NOTICE(p_hwfn,
+					  "Unexpected debug error: invalid FW version string\n");
+			switch (fw_info.ver.image_id) {
+			case FW_IMG_MAIN:
+				strcpy(fw_img_str, "main");
+				break;
+			default:
+				strcpy(fw_img_str, "unknown");
+				break;
 			}
+
+			found = true;
 		}
 	}
 
@@ -1747,6 +1966,7 @@ static u32 qed_dump_fw_ver_param(struct qed_hwfn *p_hwfn,
 	offset += qed_dump_num_param(dump_buf + offset,
 				     dump,
 				     "fw-timestamp", fw_info.ver.timestamp);
+
 	return offset;
 }
 
@@ -1759,17 +1979,18 @@ static u32 qed_dump_mfw_ver_param(struct qed_hwfn *p_hwfn,
 {
 	char mfw_ver_str[16] = EMPTY_FW_VERSION_STR;
 
-	if (dump && !qed_grc_get_param(p_hwfn, DBG_GRC_PARAM_NO_FW_VER)) {
+	if (dump &&
+	    !qed_grc_get_param(p_hwfn, DBG_GRC_PARAM_NO_FW_VER)) {
 		u32 global_section_offsize, global_section_addr, mfw_ver;
 		u32 public_data_addr, global_section_offsize_addr;
-		int printed_chars;
 
-		/* Find MCP public data GRC address.
-		 * Needs to be ORed with MCP_REG_SCRATCH due to a HW bug.
+		/* Find MCP public data GRC address. Needs to be ORed with
+		 * MCP_REG_SCRATCH due to a HW bug.
 		 */
-		public_data_addr = qed_rd(p_hwfn, p_ptt,
+		public_data_addr = qed_rd(p_hwfn,
+					  p_ptt,
 					  MISC_REG_SHARED_MEM_ADDR) |
-					  MCP_REG_SCRATCH;
+				   MCP_REG_SCRATCH;
 
 		/* Find MCP public global section offset */
 		global_section_offsize_addr = public_data_addr +
@@ -1778,9 +1999,9 @@ static u32 qed_dump_mfw_ver_param(struct qed_hwfn *p_hwfn,
 					      sizeof(offsize_t) * PUBLIC_GLOBAL;
 		global_section_offsize = qed_rd(p_hwfn, p_ptt,
 						global_section_offsize_addr);
-		global_section_addr = MCP_REG_SCRATCH +
-				      (global_section_offsize &
-				       OFFSIZE_OFFSET_MASK) * 4;
+		global_section_addr =
+			MCP_REG_SCRATCH +
+			(global_section_offsize & OFFSIZE_OFFSET_MASK) * 4;
 
 		/* Read MFW version from MCP public global section */
 		mfw_ver = qed_rd(p_hwfn, p_ptt,
@@ -1788,13 +2009,9 @@ static u32 qed_dump_mfw_ver_param(struct qed_hwfn *p_hwfn,
 				 offsetof(struct public_global, mfw_ver));
 
 		/* Dump MFW version param */
-		printed_chars = snprintf(mfw_ver_str, sizeof(mfw_ver_str),
-					 "%d_%d_%d_%d",
-					 (u8) (mfw_ver >> 24),
-					 (u8) (mfw_ver >> 16),
-					 (u8) (mfw_ver >> 8),
-					 (u8) mfw_ver);
-		if (printed_chars < 0 || printed_chars >= sizeof(mfw_ver_str))
+		if (snprintf(mfw_ver_str, sizeof(mfw_ver_str), "%d_%d_%d_%d",
+			     (u8)(mfw_ver >> 24), (u8)(mfw_ver >> 16),
+			     (u8)(mfw_ver >> 8), (u8)mfw_ver) < 0)
 			DP_NOTICE(p_hwfn,
 				  "Unexpected debug error: invalid MFW version string\n");
 	}
@@ -1820,11 +2037,12 @@ static u32 qed_dump_common_global_params(struct qed_hwfn *p_hwfn,
 					 bool dump,
 					 u8 num_specific_global_params)
 {
-	u8 num_params = NUM_COMMON_GLOBAL_PARAMS + num_specific_global_params;
 	struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
 	u32 offset = 0;
+	u8 num_params;
 
-	/* Find platform string and dump global params section header */
+	/* Dump global params section header */
+	num_params = NUM_COMMON_GLOBAL_PARAMS + num_specific_global_params;
 	offset += qed_dump_section_hdr(dump_buf + offset,
 				       dump, "global_params", num_params);
 
@@ -1846,25 +2064,29 @@ static u32 qed_dump_common_global_params(struct qed_hwfn *p_hwfn,
 	offset +=
 	    qed_dump_num_param(dump_buf + offset, dump, "pci-func",
 			       p_hwfn->abs_pf_id);
+
 	return offset;
 }
 
-/* Writes the last section to the specified buffer at the given offset.
- * Returns the dumped size in dwords.
+/* Writes the "last" section (including CRC) to the specified buffer at the
+ * given offset. Returns the dumped size in dwords.
  */
-static u32 qed_dump_last_section(u32 *dump_buf, u32 offset, bool dump)
+static u32 qed_dump_last_section(struct qed_hwfn *p_hwfn,
+				 u32 *dump_buf, u32 offset, bool dump)
 {
-	u32 start_offset = offset, crc = ~0;
+	u32 start_offset = offset;
 
 	/* Dump CRC section header */
 	offset += qed_dump_section_hdr(dump_buf + offset, dump, "last", 0);
 
-	/* Calculate CRC32 and add it to the dword following the "last" section.
-	 */
+	/* Calculate CRC32 and add it to the dword after the "last" section */
 	if (dump)
-		*(dump_buf + offset) = ~crc32(crc, (u8 *)dump_buf,
+		*(dump_buf + offset) = ~crc32(0xffffffff,
+					      (u8 *)dump_buf,
 					      DWORDS_TO_BYTES(offset));
+
 	offset++;
+
 	return offset - start_offset;
 }
 
@@ -1883,11 +2105,12 @@ static void qed_update_blocks_reset_state(struct qed_hwfn *p_hwfn,
 					    p_ptt, s_reset_regs_defs[i].addr);
 
 	/* Check if blocks are in reset */
-	for (i = 0; i < MAX_BLOCK_ID; i++)
-		dev_data->block_in_reset[i] =
-		    s_block_defs[i]->has_reset_bit &&
-		    !(reg_val[s_block_defs[i]->reset_reg] &
-		      BIT(s_block_defs[i]->reset_bit_offset));
+	for (i = 0; i < MAX_BLOCK_ID; i++) {
+		struct block_defs *block = s_block_defs[i];
+
+		dev_data->block_in_reset[i] = block->has_reset_bit &&
+		    !(reg_val[block->reset_reg] & BIT(block->reset_bit_offset));
+	}
 }
 
 /* Enable / disable the Debug block */
@@ -1902,12 +2125,12 @@ static void qed_bus_reset_dbg_block(struct qed_hwfn *p_hwfn,
 				    struct qed_ptt *p_ptt)
 {
 	u32 dbg_reset_reg_addr, old_reset_reg_val, new_reset_reg_val;
+	struct block_defs *dbg_block = s_block_defs[BLOCK_DBG];
 
-	dbg_reset_reg_addr =
-		s_reset_regs_defs[s_block_defs[BLOCK_DBG]->reset_reg].addr;
+	dbg_reset_reg_addr = s_reset_regs_defs[dbg_block->reset_reg].addr;
 	old_reset_reg_val = qed_rd(p_hwfn, p_ptt, dbg_reset_reg_addr);
-	new_reset_reg_val = old_reset_reg_val &
-			    ~BIT(s_block_defs[BLOCK_DBG]->reset_bit_offset);
+	new_reset_reg_val =
+	    old_reset_reg_val & ~BIT(dbg_block->reset_bit_offset);
 
 	qed_wr(p_hwfn, p_ptt, dbg_reset_reg_addr, new_reset_reg_val);
 	qed_wr(p_hwfn, p_ptt, dbg_reset_reg_addr, old_reset_reg_val);
@@ -1920,8 +2143,8 @@ static void qed_bus_set_framing_mode(struct qed_hwfn *p_hwfn,
 	qed_wr(p_hwfn, p_ptt, DBG_REG_FRAMING_MODE, (u8)mode);
 }
 
-/* Enable / disable Debug Bus clients according to the specified mask.
- * (1 = enable, 0 = disable)
+/* Enable / disable Debug Bus clients according to the specified mask
+ * (1 = enable, 0 = disable).
  */
 static void qed_bus_enable_clients(struct qed_hwfn *p_hwfn,
 				   struct qed_ptt *p_ptt, u32 client_mask)
@@ -1931,10 +2154,14 @@ static void qed_bus_enable_clients(struct qed_hwfn *p_hwfn,
 
 static bool qed_is_mode_match(struct qed_hwfn *p_hwfn, u16 *modes_buf_offset)
 {
-	const u32 *ptr = s_dbg_arrays[BIN_BUF_DBG_MODE_TREE].ptr;
 	struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
-	u8 tree_val = ((u8 *)ptr)[(*modes_buf_offset)++];
 	bool arg1, arg2;
+	const u32 *ptr;
+	u8 tree_val;
+
+	/* Get next element from modes tree buffer */
+	ptr = s_dbg_arrays[BIN_BUF_DBG_MODE_TREE].ptr;
+	tree_val = ((u8 *)ptr)[(*modes_buf_offset)++];
 
 	switch (tree_val) {
 	case INIT_MODE_OP_NOT:
@@ -1974,75 +2201,81 @@ static bool qed_grc_is_storm_included(struct qed_hwfn *p_hwfn,
 static bool qed_grc_is_mem_included(struct qed_hwfn *p_hwfn,
 				    enum block_id block_id, u8 mem_group_id)
 {
+	struct block_defs *block = s_block_defs[block_id];
 	u8 i;
 
 	/* Check Storm match */
-	if (s_block_defs[block_id]->associated_to_storm &&
+	if (block->associated_to_storm &&
 	    !qed_grc_is_storm_included(p_hwfn,
-			(enum dbg_storms)s_block_defs[block_id]->storm_id))
+				       (enum dbg_storms)block->storm_id))
 		return false;
 
-	for (i = 0; i < NUM_BIG_RAM_TYPES; i++)
-		if (mem_group_id == s_big_ram_defs[i].mem_group_id ||
-		    mem_group_id == s_big_ram_defs[i].ram_mem_group_id)
-			return qed_grc_is_included(p_hwfn,
-						   s_big_ram_defs[i].grc_param);
-	if (mem_group_id == MEM_GROUP_PXP_ILT || mem_group_id ==
-	    MEM_GROUP_PXP_MEM)
+	for (i = 0; i < NUM_BIG_RAM_TYPES; i++) {
+		struct big_ram_defs *big_ram = &s_big_ram_defs[i];
+
+		if (mem_group_id == big_ram->mem_group_id ||
+		    mem_group_id == big_ram->ram_mem_group_id)
+			return qed_grc_is_included(p_hwfn, big_ram->grc_param);
+	}
+
+	switch (mem_group_id) {
+	case MEM_GROUP_PXP_ILT:
+	case MEM_GROUP_PXP_MEM:
 		return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_PXP);
-	if (mem_group_id == MEM_GROUP_RAM)
+	case MEM_GROUP_RAM:
 		return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_RAM);
-	if (mem_group_id == MEM_GROUP_PBUF)
+	case MEM_GROUP_PBUF:
 		return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_PBUF);
-	if (mem_group_id == MEM_GROUP_CAU_MEM ||
-	    mem_group_id == MEM_GROUP_CAU_SB ||
-	    mem_group_id == MEM_GROUP_CAU_PI)
+	case MEM_GROUP_CAU_MEM:
+	case MEM_GROUP_CAU_SB:
+	case MEM_GROUP_CAU_PI:
 		return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_CAU);
-	if (mem_group_id == MEM_GROUP_QM_MEM)
+	case MEM_GROUP_QM_MEM:
 		return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_QM);
-	if (mem_group_id == MEM_GROUP_CONN_CFC_MEM ||
-	    mem_group_id == MEM_GROUP_TASK_CFC_MEM)
+	case MEM_GROUP_CFC_MEM:
+	case MEM_GROUP_CONN_CFC_MEM:
+	case MEM_GROUP_TASK_CFC_MEM:
 		return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_CFC);
-	if (mem_group_id == MEM_GROUP_IGU_MEM || mem_group_id ==
-	    MEM_GROUP_IGU_MSIX)
+	case MEM_GROUP_IGU_MEM:
+	case MEM_GROUP_IGU_MSIX:
 		return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_IGU);
-	if (mem_group_id == MEM_GROUP_MULD_MEM)
+	case MEM_GROUP_MULD_MEM:
 		return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_MULD);
-	if (mem_group_id == MEM_GROUP_PRS_MEM)
+	case MEM_GROUP_PRS_MEM:
 		return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_PRS);
-	if (mem_group_id == MEM_GROUP_DMAE_MEM)
+	case MEM_GROUP_DMAE_MEM:
 		return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_DMAE);
-	if (mem_group_id == MEM_GROUP_TM_MEM)
+	case MEM_GROUP_TM_MEM:
 		return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_TM);
-	if (mem_group_id == MEM_GROUP_SDM_MEM)
+	case MEM_GROUP_SDM_MEM:
 		return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_SDM);
-	if (mem_group_id == MEM_GROUP_TDIF_CTX || mem_group_id ==
-	    MEM_GROUP_RDIF_CTX)
+	case MEM_GROUP_TDIF_CTX:
+	case MEM_GROUP_RDIF_CTX:
 		return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_DIF);
-	if (mem_group_id == MEM_GROUP_CM_MEM)
+	case MEM_GROUP_CM_MEM:
 		return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_CM);
-	if (mem_group_id == MEM_GROUP_IOR)
+	case MEM_GROUP_IOR:
 		return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_IOR);
-
-	return true;
+	default:
+		return true;
+	}
 }
 
 /* Stalls all Storms */
 static void qed_grc_stall_storms(struct qed_hwfn *p_hwfn,
 				 struct qed_ptt *p_ptt, bool stall)
 {
-	u8 reg_val = stall ? 1 : 0;
+	u32 reg_addr;
 	u8 storm_id;
 
 	for (storm_id = 0; storm_id < MAX_DBG_STORMS; storm_id++) {
-		if (qed_grc_is_storm_included(p_hwfn,
-					      (enum dbg_storms)storm_id)) {
-			u32 reg_addr =
-			    s_storm_defs[storm_id].sem_fast_mem_addr +
-			    SEM_FAST_REG_STALL_0;
+		if (!qed_grc_is_storm_included(p_hwfn,
+					       (enum dbg_storms)storm_id))
+			continue;
 
-			qed_wr(p_hwfn, p_ptt, reg_addr, reg_val);
-		}
+		reg_addr = s_storm_defs[storm_id].sem_fast_mem_addr +
+		    SEM_FAST_REG_STALL_0_BB_K2;
+		qed_wr(p_hwfn, p_ptt, reg_addr, stall ? 1 : 0);
 	}
 
 	msleep(STALL_DELAY_MS);
@@ -2054,24 +2287,29 @@ static void qed_grc_unreset_blocks(struct qed_hwfn *p_hwfn,
 {
 	struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
 	u32 reg_val[MAX_DBG_RESET_REGS] = { 0 };
-	u32 i;
+	u32 block_id, i;
 
 	/* Fill reset regs values */
-	for (i = 0; i < MAX_BLOCK_ID; i++)
-		if (s_block_defs[i]->has_reset_bit && s_block_defs[i]->unreset)
-			reg_val[s_block_defs[i]->reset_reg] |=
-			    BIT(s_block_defs[i]->reset_bit_offset);
+	for (block_id = 0; block_id < MAX_BLOCK_ID; block_id++) {
+		struct block_defs *block = s_block_defs[block_id];
+
+		if (block->has_reset_bit && block->unreset)
+			reg_val[block->reset_reg] |=
+			    BIT(block->reset_bit_offset);
+	}
 
 	/* Write reset registers */
 	for (i = 0; i < MAX_DBG_RESET_REGS; i++) {
-		if (s_reset_regs_defs[i].exists[dev_data->chip_id]) {
-			reg_val[i] |= s_reset_regs_defs[i].unreset_val;
-			if (reg_val[i])
-				qed_wr(p_hwfn,
-				       p_ptt,
-				       s_reset_regs_defs[i].addr +
-				       RESET_REG_UNRESET_OFFSET, reg_val[i]);
-		}
+		if (!s_reset_regs_defs[i].exists[dev_data->chip_id])
+			continue;
+
+		reg_val[i] |= s_reset_regs_defs[i].unreset_val;
+
+		if (reg_val[i])
+			qed_wr(p_hwfn,
+			       p_ptt,
+			       s_reset_regs_defs[i].addr +
+			       RESET_REG_UNRESET_OFFSET, reg_val[i]);
 	}
 }
 
@@ -2095,6 +2333,7 @@ qed_get_block_attn_regs(enum block_id block_id, enum dbg_attn_type attn_type,
 		qed_get_block_attn_data(block_id, attn_type);
 
 	*num_attn_regs = block_type_data->num_regs;
+
 	return &((const struct dbg_attn_reg *)
 		 s_dbg_arrays[BIN_BUF_DBG_ATTN_REGS].ptr)[block_type_data->
 							  regs_offset];
@@ -2105,34 +2344,34 @@ static void qed_grc_clear_all_prty(struct qed_hwfn *p_hwfn,
 				   struct qed_ptt *p_ptt)
 {
 	struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+	const struct dbg_attn_reg *attn_reg_arr;
 	u8 reg_idx, num_attn_regs;
 	u32 block_id;
 
 	for (block_id = 0; block_id < MAX_BLOCK_ID; block_id++) {
-		const struct dbg_attn_reg *attn_reg_arr;
-
 		if (dev_data->block_in_reset[block_id])
 			continue;
 
 		attn_reg_arr = qed_get_block_attn_regs((enum block_id)block_id,
 						       ATTN_TYPE_PARITY,
 						       &num_attn_regs);
+
 		for (reg_idx = 0; reg_idx < num_attn_regs; reg_idx++) {
 			const struct dbg_attn_reg *reg_data =
 				&attn_reg_arr[reg_idx];
+			u16 modes_buf_offset;
+			bool eval_mode;
 
 			/* Check mode */
-			bool eval_mode = GET_FIELD(reg_data->mode.data,
-						   DBG_MODE_HDR_EVAL_MODE) > 0;
-			u16 modes_buf_offset =
+			eval_mode = GET_FIELD(reg_data->mode.data,
+					      DBG_MODE_HDR_EVAL_MODE) > 0;
+			modes_buf_offset =
 				GET_FIELD(reg_data->mode.data,
 					  DBG_MODE_HDR_MODES_BUF_OFFSET);
 
+			/* If Mode match: clear parity status */
 			if (!eval_mode ||
 			    qed_is_mode_match(p_hwfn, &modes_buf_offset))
-				/* Mode match - read parity status read-clear
-				 * register.
-				 */
 				qed_rd(p_hwfn, p_ptt,
 				       DWORDS_TO_BYTES(reg_data->
 						       sts_clr_address));
@@ -2142,11 +2381,11 @@ static void qed_grc_clear_all_prty(struct qed_hwfn *p_hwfn,
 
 /* Dumps GRC registers section header. Returns the dumped size in dwords.
  * The following parameters are dumped:
- * - 'count' = num_dumped_entries
- * - 'split' = split_type
- * - 'id' = split_id (dumped only if split_id >= 0)
- * - 'param_name' = param_val (user param, dumped only if param_name != NULL and
- *	param_val != NULL)
+ * - count:	 no. of dumped entries
+ * - split:	 split type
+ * - id:	 split ID (dumped only if split_id >= 0)
+ * - param_name: user parameter value (dumped only if param_name != NULL
+ *		 and param_val != NULL).
  */
 static u32 qed_grc_dump_regs_hdr(u32 *dump_buf,
 				 bool dump,
@@ -2170,84 +2409,100 @@ static u32 qed_grc_dump_regs_hdr(u32 *dump_buf,
 	if (param_name && param_val)
 		offset += qed_dump_str_param(dump_buf + offset,
 					     dump, param_name, param_val);
+
 	return offset;
 }
 
 /* Dumps the GRC registers in the specified address range.
  * Returns the dumped size in dwords.
+ * The addr and len arguments are specified in dwords.
  */
 static u32 qed_grc_dump_addr_range(struct qed_hwfn *p_hwfn,
-				   struct qed_ptt *p_ptt, u32 *dump_buf,
-				   bool dump, u32 addr, u32 len)
+				   struct qed_ptt *p_ptt,
+				   u32 *dump_buf,
+				   bool dump, u32 addr, u32 len, bool wide_bus)
 {
 	u32 byte_addr = DWORDS_TO_BYTES(addr), offset = 0, i;
 
-	if (dump)
-		for (i = 0; i < len; i++, byte_addr += BYTES_IN_DWORD, offset++)
-			*(dump_buf + offset) = qed_rd(p_hwfn, p_ptt, byte_addr);
-	else
-		offset += len;
+	if (!dump)
+		return len;
+
+	for (i = 0; i < len; i++, byte_addr += BYTES_IN_DWORD, offset++)
+		*(dump_buf + offset) = qed_rd(p_hwfn, p_ptt, byte_addr);
+
 	return offset;
 }
 
-/* Dumps GRC registers sequence header. Returns the dumped size in dwords. */
-static u32 qed_grc_dump_reg_entry_hdr(u32 *dump_buf, bool dump, u32 addr,
-				      u32 len)
+/* Dumps GRC registers sequence header. Returns the dumped size in dwords.
+ * The addr and len arguments are specified in dwords.
+ */
+static u32 qed_grc_dump_reg_entry_hdr(u32 *dump_buf,
+				      bool dump, u32 addr, u32 len)
 {
 	if (dump)
 		*dump_buf = addr | (len << REG_DUMP_LEN_SHIFT);
+
 	return 1;
 }
 
-/* Dumps GRC registers sequence. Returns the dumped size in dwords. */
+/* Dumps GRC registers sequence. Returns the dumped size in dwords.
+ * The addr and len arguments are specified in dwords.
+ */
 static u32 qed_grc_dump_reg_entry(struct qed_hwfn *p_hwfn,
-				  struct qed_ptt *p_ptt, u32 *dump_buf,
-				  bool dump, u32 addr, u32 len)
+				  struct qed_ptt *p_ptt,
+				  u32 *dump_buf,
+				  bool dump, u32 addr, u32 len, bool wide_bus)
 {
 	u32 offset = 0;
 
 	offset += qed_grc_dump_reg_entry_hdr(dump_buf, dump, addr, len);
 	offset += qed_grc_dump_addr_range(p_hwfn,
 					  p_ptt,
-					  dump_buf + offset, dump, addr, len);
+					  dump_buf + offset,
+					  dump, addr, len, wide_bus);
+
 	return offset;
 }
 
 /* Dumps GRC registers sequence with skip cycle.
  * Returns the dumped size in dwords.
+ * - addr:	start GRC address in dwords
+ * - total_len:	total no. of dwords to dump
+ * - read_len:	no. consecutive dwords to read
+ * - skip_len:	no. of dwords to skip (and fill with zeros)
  */
 static u32 qed_grc_dump_reg_entry_skip(struct qed_hwfn *p_hwfn,
-				       struct qed_ptt *p_ptt, u32 *dump_buf,
-				       bool dump, u32 addr, u32 total_len,
+				       struct qed_ptt *p_ptt,
+				       u32 *dump_buf,
+				       bool dump,
+				       u32 addr,
+				       u32 total_len,
 				       u32 read_len, u32 skip_len)
 {
 	u32 offset = 0, reg_offset = 0;
 
 	offset += qed_grc_dump_reg_entry_hdr(dump_buf, dump, addr, total_len);
-	if (dump) {
-		while (reg_offset < total_len) {
-			u32 curr_len = min_t(u32,
-					     read_len,
-					     total_len - reg_offset);
-			offset += qed_grc_dump_addr_range(p_hwfn,
-							  p_ptt,
-							  dump_buf + offset,
-							  dump, addr, curr_len);
+
+	if (!dump)
+		return offset + total_len;
+
+	while (reg_offset < total_len) {
+		u32 curr_len = min_t(u32, read_len, total_len - reg_offset);
+
+		offset += qed_grc_dump_addr_range(p_hwfn,
+						  p_ptt,
+						  dump_buf + offset,
+						  dump, addr, curr_len, false);
+		reg_offset += curr_len;
+		addr += curr_len;
+
+		if (reg_offset < total_len) {
+			curr_len = min_t(u32, skip_len, total_len - skip_len);
+			memset(dump_buf + offset, 0, DWORDS_TO_BYTES(curr_len));
+			offset += curr_len;
 			reg_offset += curr_len;
 			addr += curr_len;
-			if (reg_offset < total_len) {
-				curr_len = min_t(u32,
-						 skip_len,
-						 total_len - skip_len);
-				memset(dump_buf + offset, 0,
-				       DWORDS_TO_BYTES(curr_len));
-				offset += curr_len;
-				reg_offset += curr_len;
-				addr += curr_len;
-			}
 		}
-	} else {
-		offset += total_len;
 	}
 
 	return offset;
@@ -2266,43 +2521,48 @@ static u32 qed_grc_dump_regs_entries(struct qed_hwfn *p_hwfn,
 	bool mode_match = true;
 
 	*num_dumped_reg_entries = 0;
+
 	while (input_offset < input_regs_arr.size_in_dwords) {
 		const struct dbg_dump_cond_hdr *cond_hdr =
 		    (const struct dbg_dump_cond_hdr *)
 		    &input_regs_arr.ptr[input_offset++];
-		bool eval_mode = GET_FIELD(cond_hdr->mode.data,
-					   DBG_MODE_HDR_EVAL_MODE) > 0;
+		u16 modes_buf_offset;
+		bool eval_mode;
 
 		/* Check mode/block */
+		eval_mode = GET_FIELD(cond_hdr->mode.data,
+				      DBG_MODE_HDR_EVAL_MODE) > 0;
 		if (eval_mode) {
-			u16 modes_buf_offset =
+			modes_buf_offset =
 				GET_FIELD(cond_hdr->mode.data,
 					  DBG_MODE_HDR_MODES_BUF_OFFSET);
 			mode_match = qed_is_mode_match(p_hwfn,
 						       &modes_buf_offset);
 		}
 
-		if (mode_match && block_enable[cond_hdr->block_id]) {
-			for (i = 0; i < cond_hdr->data_size;
-			     i++, input_offset++) {
-				const struct dbg_dump_reg *reg =
-				    (const struct dbg_dump_reg *)
-				    &input_regs_arr.ptr[input_offset];
-				u32 addr, len;
-
-				addr = GET_FIELD(reg->data,
-						 DBG_DUMP_REG_ADDRESS);
-				len = GET_FIELD(reg->data, DBG_DUMP_REG_LENGTH);
-				offset +=
-				    qed_grc_dump_reg_entry(p_hwfn, p_ptt,
-							   dump_buf + offset,
-							   dump,
-							   addr,
-							   len);
-				(*num_dumped_reg_entries)++;
-			}
-		} else {
+		if (!mode_match || !block_enable[cond_hdr->block_id]) {
 			input_offset += cond_hdr->data_size;
+			continue;
+		}
+
+		for (i = 0; i < cond_hdr->data_size; i++, input_offset++) {
+			const struct dbg_dump_reg *reg =
+			    (const struct dbg_dump_reg *)
+			    &input_regs_arr.ptr[input_offset];
+			u32 addr, len;
+			bool wide_bus;
+
+			addr = GET_FIELD(reg->data, DBG_DUMP_REG_ADDRESS);
+			len = GET_FIELD(reg->data, DBG_DUMP_REG_LENGTH);
+			wide_bus = GET_FIELD(reg->data, DBG_DUMP_REG_WIDE_BUS);
+			offset += qed_grc_dump_reg_entry(p_hwfn,
+							 p_ptt,
+							 dump_buf + offset,
+							 dump,
+							 addr,
+							 len,
+							 wide_bus);
+			(*num_dumped_reg_entries)++;
 		}
 	}
 
@@ -2350,8 +2610,8 @@ static u32 qed_grc_dump_split_data(struct qed_hwfn *p_hwfn,
 	return num_dumped_reg_entries > 0 ? offset : 0;
 }
 
-/* Dumps registers according to the input registers array.
- * Returns the dumped size in dwords.
+/* Dumps registers according to the input registers array. Returns the dumped
+ * size in dwords.
  */
 static u32 qed_grc_dump_registers(struct qed_hwfn *p_hwfn,
 				  struct qed_ptt *p_ptt,
@@ -2361,29 +2621,37 @@ static u32 qed_grc_dump_registers(struct qed_hwfn *p_hwfn,
 				  const char *param_name, const char *param_val)
 {
 	struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
-	struct chip_platform_defs *p_platform_defs;
+	struct chip_platform_defs *chip_platform;
 	u32 offset = 0, input_offset = 0;
-	struct chip_defs *p_chip_defs;
+	struct chip_defs *chip;
 	u8 port_id, pf_id, vf_id;
 	u16 fid;
 
-	p_chip_defs = &s_chip_defs[dev_data->chip_id];
-	p_platform_defs = &p_chip_defs->per_platform[dev_data->platform_id];
+	chip = &s_chip_defs[dev_data->chip_id];
+	chip_platform = &chip->per_platform[dev_data->platform_id];
 
 	if (dump)
 		DP_VERBOSE(p_hwfn, QED_MSG_DEBUG, "Dumping registers...\n");
+
 	while (input_offset <
 	       s_dbg_arrays[BIN_BUF_DBG_DUMP_REG].size_in_dwords) {
-		const struct dbg_dump_split_hdr *split_hdr =
+		const struct dbg_dump_split_hdr *split_hdr;
+		struct dbg_array curr_input_regs_arr;
+		u32 split_data_size;
+		u8 split_type_id;
+
+		split_hdr =
 			(const struct dbg_dump_split_hdr *)
 			&s_dbg_arrays[BIN_BUF_DBG_DUMP_REG].ptr[input_offset++];
-		u8 split_type_id = GET_FIELD(split_hdr->hdr,
-					     DBG_DUMP_SPLIT_HDR_SPLIT_TYPE_ID);
-		u32 split_data_size = GET_FIELD(split_hdr->hdr,
-						DBG_DUMP_SPLIT_HDR_DATA_SIZE);
-		struct dbg_array curr_input_regs_arr = {
-			&s_dbg_arrays[BIN_BUF_DBG_DUMP_REG].ptr[input_offset],
-			split_data_size};
+		split_type_id =
+			GET_FIELD(split_hdr->hdr,
+				  DBG_DUMP_SPLIT_HDR_SPLIT_TYPE_ID);
+		split_data_size =
+			GET_FIELD(split_hdr->hdr,
+				  DBG_DUMP_SPLIT_HDR_DATA_SIZE);
+		curr_input_regs_arr.ptr =
+			&s_dbg_arrays[BIN_BUF_DBG_DUMP_REG].ptr[input_offset];
+		curr_input_regs_arr.size_in_dwords = split_data_size;
 
 		switch (split_type_id) {
 		case SPLIT_TYPE_NONE:
@@ -2398,8 +2666,9 @@ static u32 qed_grc_dump_registers(struct qed_hwfn *p_hwfn,
 							  param_name,
 							  param_val);
 			break;
+
 		case SPLIT_TYPE_PORT:
-			for (port_id = 0; port_id < p_platform_defs->num_ports;
+			for (port_id = 0; port_id < chip_platform->num_ports;
 			     port_id++) {
 				if (dump)
 					qed_port_pretend(p_hwfn, p_ptt,
@@ -2414,9 +2683,10 @@ static u32 qed_grc_dump_registers(struct qed_hwfn *p_hwfn,
 							    param_val);
 			}
 			break;
+
 		case SPLIT_TYPE_PF:
 		case SPLIT_TYPE_PORT_PF:
-			for (pf_id = 0; pf_id < p_platform_defs->num_pfs;
+			for (pf_id = 0; pf_id < chip_platform->num_pfs;
 			     pf_id++) {
 				u8 pfid_shift =
 					PXP_PRETEND_CONCRETE_FID_PFID_SHIFT;
@@ -2427,17 +2697,21 @@ static u32 qed_grc_dump_registers(struct qed_hwfn *p_hwfn,
 				}
 
 				offset +=
-				    qed_grc_dump_split_data(p_hwfn, p_ptt,
+				    qed_grc_dump_split_data(p_hwfn,
+							    p_ptt,
 							    curr_input_regs_arr,
 							    dump_buf + offset,
-							    dump, block_enable,
-							    "pf", pf_id,
+							    dump,
+							    block_enable,
+							    "pf",
+							    pf_id,
 							    param_name,
 							    param_val);
 			}
 			break;
+
 		case SPLIT_TYPE_VF:
-			for (vf_id = 0; vf_id < p_platform_defs->num_vfs;
+			for (vf_id = 0; vf_id < chip_platform->num_vfs;
 			     vf_id++) {
 				u8 vfvalid_shift =
 					PXP_PRETEND_CONCRETE_FID_VFVALID_SHIFT;
@@ -2460,6 +2734,7 @@ static u32 qed_grc_dump_registers(struct qed_hwfn *p_hwfn,
 							    param_val);
 			}
 			break;
+
 		default:
 			break;
 		}
@@ -2490,35 +2765,37 @@ static u32 qed_grc_dump_reset_regs(struct qed_hwfn *p_hwfn,
 
 	/* Write reset registers */
 	for (i = 0; i < MAX_DBG_RESET_REGS; i++) {
-		if (s_reset_regs_defs[i].exists[dev_data->chip_id]) {
-			u32 addr = BYTES_TO_DWORDS(s_reset_regs_defs[i].addr);
+		if (!s_reset_regs_defs[i].exists[dev_data->chip_id])
+			continue;
 
-			offset += qed_grc_dump_reg_entry(p_hwfn,
-							 p_ptt,
-							 dump_buf + offset,
-							 dump,
-							 addr,
-							 1);
-			num_regs++;
-		}
+		offset += qed_grc_dump_reg_entry(p_hwfn,
+						 p_ptt,
+						 dump_buf + offset,
+						 dump,
+						 BYTES_TO_DWORDS
+						 (s_reset_regs_defs[i].addr), 1,
+						 false);
+		num_regs++;
 	}
 
 	/* Write header */
 	if (dump)
 		qed_grc_dump_regs_hdr(dump_buf,
 				      true, num_regs, "eng", -1, NULL, NULL);
+
 	return offset;
 }
 
-/* Dump registers that are modified during GRC Dump and therefore must be dumped
- * first. Returns the dumped size in dwords.
+/* Dump registers that are modified during GRC Dump and therefore must be
+ * dumped first. Returns the dumped size in dwords.
  */
 static u32 qed_grc_dump_modified_regs(struct qed_hwfn *p_hwfn,
 				      struct qed_ptt *p_ptt,
 				      u32 *dump_buf, bool dump)
 {
 	struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
-	u32 offset = 0, num_reg_entries = 0, block_id;
+	u32 block_id, offset = 0, num_reg_entries = 0;
+	const struct dbg_attn_reg *attn_reg_arr;
 	u8 storm_id, reg_idx, num_attn_regs;
 
 	/* Calculate header size */
@@ -2527,14 +2804,13 @@ static u32 qed_grc_dump_modified_regs(struct qed_hwfn *p_hwfn,
 
 	/* Write parity registers */
 	for (block_id = 0; block_id < MAX_BLOCK_ID; block_id++) {
-		const struct dbg_attn_reg *attn_reg_arr;
-
 		if (dev_data->block_in_reset[block_id] && dump)
 			continue;
 
 		attn_reg_arr = qed_get_block_attn_regs((enum block_id)block_id,
 						       ATTN_TYPE_PARITY,
 						       &num_attn_regs);
+
 		for (reg_idx = 0; reg_idx < num_attn_regs; reg_idx++) {
 			const struct dbg_attn_reg *reg_data =
 				&attn_reg_arr[reg_idx];
@@ -2548,37 +2824,36 @@ static u32 qed_grc_dump_modified_regs(struct qed_hwfn *p_hwfn,
 			modes_buf_offset =
 				GET_FIELD(reg_data->mode.data,
 					  DBG_MODE_HDR_MODES_BUF_OFFSET);
-			if (!eval_mode ||
-			    qed_is_mode_match(p_hwfn, &modes_buf_offset)) {
-				/* Mode match - read and dump registers */
-				addr = reg_data->mask_address;
-				offset +=
-				    qed_grc_dump_reg_entry(p_hwfn,
-							   p_ptt,
-							   dump_buf + offset,
-							   dump,
-							   addr,
-							   1);
-				addr = GET_FIELD(reg_data->data,
-						 DBG_ATTN_REG_STS_ADDRESS);
-				offset +=
-				    qed_grc_dump_reg_entry(p_hwfn,
-							   p_ptt,
-							   dump_buf + offset,
-							   dump,
-							   addr,
-							   1);
-				num_reg_entries += 2;
-			}
+			if (eval_mode &&
+			    !qed_is_mode_match(p_hwfn, &modes_buf_offset))
+				continue;
+
+			/* Mode match: read & dump registers */
+			addr = reg_data->mask_address;
+			offset += qed_grc_dump_reg_entry(p_hwfn,
+							 p_ptt,
+							 dump_buf + offset,
+							 dump,
+							 addr,
+							 1, false);
+			addr = GET_FIELD(reg_data->data,
+					 DBG_ATTN_REG_STS_ADDRESS);
+			offset += qed_grc_dump_reg_entry(p_hwfn,
+							 p_ptt,
+							 dump_buf + offset,
+							 dump,
+							 addr,
+							 1, false);
+			num_reg_entries += 2;
 		}
 	}
 
-	/* Write storm stall status registers */
+	/* Write Storm stall status registers */
 	for (storm_id = 0; storm_id < MAX_DBG_STORMS; storm_id++) {
+		struct storm_defs *storm = &s_storm_defs[storm_id];
 		u32 addr;
 
-		if (dev_data->block_in_reset[s_storm_defs[storm_id].block_id] &&
-		    dump)
+		if (dev_data->block_in_reset[storm->block_id] && dump)
 			continue;
 
 		addr =
@@ -2589,7 +2864,8 @@ static u32 qed_grc_dump_modified_regs(struct qed_hwfn *p_hwfn,
 						 dump_buf + offset,
 						 dump,
 						 addr,
-						 1);
+						 1,
+						 false);
 		num_reg_entries++;
 	}
 
@@ -2598,6 +2874,7 @@ static u32 qed_grc_dump_modified_regs(struct qed_hwfn *p_hwfn,
 		qed_grc_dump_regs_hdr(dump_buf,
 				      true,
 				      num_reg_entries, "eng", -1, NULL, NULL);
+
 	return offset;
 }
 
@@ -2637,17 +2914,17 @@ static u32 qed_grc_dump_special_regs(struct qed_hwfn *p_hwfn,
 	return offset;
 }
 
-/* Dumps a GRC memory header (section and params).
- * The following parameters are dumped:
- * name - name is dumped only if it's not NULL.
- * addr - addr is dumped only if name is NULL.
- * len - len is always dumped.
- * width - bit_width is dumped if it's not zero.
- * packed - packed=1 is dumped if it's not false.
- * mem_group - mem_group is always dumped.
- * is_storm - true only if the memory is related to a Storm.
- * storm_letter - storm letter (valid only if is_storm is true).
- * Returns the dumped size in dwords.
+/* Dumps a GRC memory header (section and params). Returns the dumped size in
+ * dwords. The following parameters are dumped:
+ * - name:	   dumped only if it's not NULL.
+ * - addr:	   in dwords, dumped only if name is NULL.
+ * - len:	   in dwords, always dumped.
+ * - width:	   dumped if it's not zero.
+ * - packed:	   dumped only if it's not false.
+ * - mem_group:	   always dumped.
+ * - is_storm:	   true only if the memory is related to a Storm.
+ * - storm_letter: valid only if is_storm is true.
+ *
  */
 static u32 qed_grc_dump_mem_hdr(struct qed_hwfn *p_hwfn,
 				u32 *dump_buf,
@@ -2667,6 +2944,7 @@ static u32 qed_grc_dump_mem_hdr(struct qed_hwfn *p_hwfn,
 	if (!len)
 		DP_NOTICE(p_hwfn,
 			  "Unexpected GRC Dump error: dumped memory size must be non-zero\n");
+
 	if (bit_width)
 		num_params++;
 	if (packed)
@@ -2675,6 +2953,7 @@ static u32 qed_grc_dump_mem_hdr(struct qed_hwfn *p_hwfn,
 	/* Dump section header */
 	offset += qed_dump_section_hdr(dump_buf + offset,
 				       dump, "grc_mem", num_params);
+
 	if (name) {
 		/* Dump name */
 		if (is_storm) {
@@ -2694,14 +2973,15 @@ static u32 qed_grc_dump_mem_hdr(struct qed_hwfn *p_hwfn,
 				   len, buf);
 	} else {
 		/* Dump address */
+		u32 addr_in_bytes = DWORDS_TO_BYTES(addr);
+
 		offset += qed_dump_num_param(dump_buf + offset,
-					     dump, "addr",
-					     DWORDS_TO_BYTES(addr));
+					     dump, "addr", addr_in_bytes);
 		if (dump && len > 64)
 			DP_VERBOSE(p_hwfn,
 				   QED_MSG_DEBUG,
 				   "Dumping %d registers from address 0x%x...\n",
-				   len, (u32)DWORDS_TO_BYTES(addr));
+				   len, addr_in_bytes);
 	}
 
 	/* Dump len */
@@ -2727,11 +3007,13 @@ static u32 qed_grc_dump_mem_hdr(struct qed_hwfn *p_hwfn,
 	}
 
 	offset += qed_dump_str_param(dump_buf + offset, dump, "type", buf);
+
 	return offset;
 }
 
 /* Dumps a single GRC memory. If name is NULL, the memory is stored by address.
  * Returns the dumped size in dwords.
+ * The addr and len arguments are specified in dwords.
  */
 static u32 qed_grc_dump_mem(struct qed_hwfn *p_hwfn,
 			    struct qed_ptt *p_ptt,
@@ -2740,6 +3022,7 @@ static u32 qed_grc_dump_mem(struct qed_hwfn *p_hwfn,
 			    const char *name,
 			    u32 addr,
 			    u32 len,
+			    bool wide_bus,
 			    u32 bit_width,
 			    bool packed,
 			    const char *mem_group,
@@ -2758,7 +3041,9 @@ static u32 qed_grc_dump_mem(struct qed_hwfn *p_hwfn,
 				       mem_group, is_storm, storm_letter);
 	offset += qed_grc_dump_addr_range(p_hwfn,
 					  p_ptt,
-					  dump_buf + offset, dump, addr, len);
+					  dump_buf + offset,
+					  dump, addr, len, wide_bus);
+
 	return offset;
 }
 
@@ -2773,20 +3058,21 @@ static u32 qed_grc_dump_mem_entries(struct qed_hwfn *p_hwfn,
 
 	while (input_offset < input_mems_arr.size_in_dwords) {
 		const struct dbg_dump_cond_hdr *cond_hdr;
+		u16 modes_buf_offset;
 		u32 num_entries;
 		bool eval_mode;
 
 		cond_hdr = (const struct dbg_dump_cond_hdr *)
 			   &input_mems_arr.ptr[input_offset++];
-		eval_mode = GET_FIELD(cond_hdr->mode.data,
-				      DBG_MODE_HDR_EVAL_MODE) > 0;
+		num_entries = cond_hdr->data_size / MEM_DUMP_ENTRY_SIZE_DWORDS;
 
 		/* Check required mode */
+		eval_mode = GET_FIELD(cond_hdr->mode.data,
+				      DBG_MODE_HDR_EVAL_MODE) > 0;
 		if (eval_mode) {
-			u16 modes_buf_offset =
+			modes_buf_offset =
 				GET_FIELD(cond_hdr->mode.data,
 					  DBG_MODE_HDR_MODES_BUF_OFFSET);
-
 			mode_match = qed_is_mode_match(p_hwfn,
 						       &modes_buf_offset);
 		}
@@ -2796,81 +3082,87 @@ static u32 qed_grc_dump_mem_entries(struct qed_hwfn *p_hwfn,
 			continue;
 		}
 
-		num_entries = cond_hdr->data_size / MEM_DUMP_ENTRY_SIZE_DWORDS;
 		for (i = 0; i < num_entries;
 		     i++, input_offset += MEM_DUMP_ENTRY_SIZE_DWORDS) {
 			const struct dbg_dump_mem *mem =
 				(const struct dbg_dump_mem *)
 				&input_mems_arr.ptr[input_offset];
-			u8 mem_group_id;
+			u8 mem_group_id = GET_FIELD(mem->dword0,
+						    DBG_DUMP_MEM_MEM_GROUP_ID);
+			bool is_storm = false, mem_wide_bus;
+			enum dbg_grc_params grc_param;
+			char storm_letter = 'a';
+			enum block_id block_id;
+			u32 mem_addr, mem_len;
 
-			mem_group_id = GET_FIELD(mem->dword0,
-						 DBG_DUMP_MEM_MEM_GROUP_ID);
 			if (mem_group_id >= MEM_GROUPS_NUM) {
 				DP_NOTICE(p_hwfn, "Invalid mem_group_id\n");
 				return 0;
 			}
 
-			if (qed_grc_is_mem_included(p_hwfn,
-					(enum block_id)cond_hdr->block_id,
-					mem_group_id)) {
-				u32 mem_addr = GET_FIELD(mem->dword0,
-							 DBG_DUMP_MEM_ADDRESS);
-				u32 mem_len = GET_FIELD(mem->dword1,
-							DBG_DUMP_MEM_LENGTH);
-				enum dbg_grc_params grc_param;
-				char storm_letter = 'a';
-				bool is_storm = false;
-
-				/* Update memory length for CCFC/TCFC memories
-				 * according to number of LCIDs/LTIDs.
-				 */
-				if (mem_group_id == MEM_GROUP_CONN_CFC_MEM) {
-					if (mem_len % MAX_LCIDS != 0) {
-						DP_NOTICE(p_hwfn,
-							  "Invalid CCFC connection memory size\n");
-						return 0;
-					}
-
-					grc_param = DBG_GRC_PARAM_NUM_LCIDS;
-					mem_len = qed_grc_get_param(p_hwfn,
-								    grc_param) *
-						  (mem_len / MAX_LCIDS);
-				} else if (mem_group_id ==
-					   MEM_GROUP_TASK_CFC_MEM) {
-					if (mem_len % MAX_LTIDS != 0) {
-						DP_NOTICE(p_hwfn,
-							  "Invalid TCFC task memory size\n");
-						return 0;
-					}
-
-					grc_param = DBG_GRC_PARAM_NUM_LTIDS;
-					mem_len = qed_grc_get_param(p_hwfn,
-								    grc_param) *
-						  (mem_len / MAX_LTIDS);
+			block_id = (enum block_id)cond_hdr->block_id;
+			if (!qed_grc_is_mem_included(p_hwfn,
+						     block_id,
+						     mem_group_id))
+				continue;
+
+			mem_addr = GET_FIELD(mem->dword0, DBG_DUMP_MEM_ADDRESS);
+			mem_len = GET_FIELD(mem->dword1, DBG_DUMP_MEM_LENGTH);
+			mem_wide_bus = GET_FIELD(mem->dword1,
+						 DBG_DUMP_MEM_WIDE_BUS);
+
+			/* Update memory length for CCFC/TCFC memories
+			 * according to number of LCIDs/LTIDs.
+			 */
+			if (mem_group_id == MEM_GROUP_CONN_CFC_MEM) {
+				if (mem_len % MAX_LCIDS) {
+					DP_NOTICE(p_hwfn,
+						  "Invalid CCFC connection memory size\n");
+					return 0;
 				}
 
-				/* If memory is associated with Storm, update
-				 * Storm details.
-				 */
-				if (s_block_defs[cond_hdr->block_id]->
-							associated_to_storm) {
-					is_storm = true;
-					storm_letter =
-						s_storm_defs[s_block_defs[
-						cond_hdr->block_id]->
-						storm_id].letter;
+				grc_param = DBG_GRC_PARAM_NUM_LCIDS;
+				mem_len = qed_grc_get_param(p_hwfn, grc_param) *
+					  (mem_len / MAX_LCIDS);
+			} else if (mem_group_id == MEM_GROUP_TASK_CFC_MEM) {
+				if (mem_len % MAX_LTIDS) {
+					DP_NOTICE(p_hwfn,
+						  "Invalid TCFC task memory size\n");
+					return 0;
 				}
 
-				/* Dump memory */
-				offset += qed_grc_dump_mem(p_hwfn, p_ptt,
-						dump_buf + offset, dump, NULL,
-						mem_addr, mem_len, 0,
+				grc_param = DBG_GRC_PARAM_NUM_LTIDS;
+				mem_len = qed_grc_get_param(p_hwfn, grc_param) *
+					  (mem_len / MAX_LTIDS);
+			}
+
+			/* If memory is associated with Storm, update Storm
+			 * details.
+			 */
+			if (s_block_defs
+			    [cond_hdr->block_id]->associated_to_storm) {
+				is_storm = true;
+				storm_letter =
+				    s_storm_defs[s_block_defs
+						 [cond_hdr->block_id]->
+						 storm_id].letter;
+			}
+
+			/* Dump memory */
+			offset += qed_grc_dump_mem(p_hwfn,
+						p_ptt,
+						dump_buf + offset,
+						dump,
+						NULL,
+						mem_addr,
+						mem_len,
+						mem_wide_bus,
+						0,
 						false,
 						s_mem_group_names[mem_group_id],
-						is_storm, storm_letter);
-				}
-			}
+						is_storm,
+						storm_letter);
+		}
 	}
 
 	return offset;
@@ -2887,16 +3179,22 @@ static u32 qed_grc_dump_memories(struct qed_hwfn *p_hwfn,
 
 	while (input_offset <
 	       s_dbg_arrays[BIN_BUF_DBG_DUMP_MEM].size_in_dwords) {
-		const struct dbg_dump_split_hdr *split_hdr =
-			(const struct dbg_dump_split_hdr *)
+		const struct dbg_dump_split_hdr *split_hdr;
+		struct dbg_array curr_input_mems_arr;
+		u32 split_data_size;
+		u8 split_type_id;
+
+		split_hdr = (const struct dbg_dump_split_hdr *)
 			&s_dbg_arrays[BIN_BUF_DBG_DUMP_MEM].ptr[input_offset++];
-		u8 split_type_id = GET_FIELD(split_hdr->hdr,
-					     DBG_DUMP_SPLIT_HDR_SPLIT_TYPE_ID);
-		u32 split_data_size = GET_FIELD(split_hdr->hdr,
-						DBG_DUMP_SPLIT_HDR_DATA_SIZE);
-		struct dbg_array curr_input_mems_arr = {
-			&s_dbg_arrays[BIN_BUF_DBG_DUMP_MEM].ptr[input_offset],
-			split_data_size};
+		split_type_id =
+			GET_FIELD(split_hdr->hdr,
+				  DBG_DUMP_SPLIT_HDR_SPLIT_TYPE_ID);
+		split_data_size =
+			GET_FIELD(split_hdr->hdr,
+				  DBG_DUMP_SPLIT_HDR_DATA_SIZE);
+		curr_input_mems_arr.ptr =
+			&s_dbg_arrays[BIN_BUF_DBG_DUMP_MEM].ptr[input_offset];
+		curr_input_mems_arr.size_in_dwords = split_data_size;
 
 		switch (split_type_id) {
 		case SPLIT_TYPE_NONE:
@@ -2906,6 +3204,7 @@ static u32 qed_grc_dump_memories(struct qed_hwfn *p_hwfn,
 							   dump_buf + offset,
 							   dump);
 			break;
+
 		default:
 			DP_NOTICE(p_hwfn,
 				  "Dumping split memories is currently not supported\n");
@@ -2920,6 +3219,7 @@ static u32 qed_grc_dump_memories(struct qed_hwfn *p_hwfn,
 
 /* Dumps GRC context data for the specified Storm.
  * Returns the dumped size in dwords.
+ * The lid_size argument is specified in quad-regs.
  */
 static u32 qed_grc_dump_ctx_data(struct qed_hwfn *p_hwfn,
 				 struct qed_ptt *p_ptt,
@@ -2931,13 +3231,15 @@ static u32 qed_grc_dump_ctx_data(struct qed_hwfn *p_hwfn,
 				 u32 rd_reg_addr,
 				 u8 storm_id)
 {
-	u32 i, lid, total_size;
-	u32 offset = 0;
+	struct storm_defs *storm = &s_storm_defs[storm_id];
+	u32 i, lid, total_size, offset = 0;
 
 	if (!lid_size)
 		return 0;
+
 	lid_size *= BYTES_IN_DWORD;
 	total_size = num_lids * lid_size;
+
 	offset += qed_grc_dump_mem_hdr(p_hwfn,
 				       dump_buf + offset,
 				       dump,
@@ -2945,25 +3247,19 @@ static u32 qed_grc_dump_ctx_data(struct qed_hwfn *p_hwfn,
 				       0,
 				       total_size,
 				       lid_size * 32,
-				       false,
-				       name,
-				       true, s_storm_defs[storm_id].letter);
+				       false, name, true, storm->letter);
+
+	if (!dump)
+		return offset + total_size;
 
 	/* Dump context data */
-	if (dump) {
-		for (lid = 0; lid < num_lids; lid++) {
-			for (i = 0; i < lid_size; i++, offset++) {
-				qed_wr(p_hwfn,
-				       p_ptt,
-				       s_storm_defs[storm_id].cm_ctx_wr_addr,
-				       BIT(9) | lid);
-				*(dump_buf + offset) = qed_rd(p_hwfn,
-							      p_ptt,
-							      rd_reg_addr);
-			}
+	for (lid = 0; lid < num_lids; lid++) {
+		for (i = 0; i < lid_size; i++, offset++) {
+			qed_wr(p_hwfn,
+			       p_ptt, storm->cm_ctx_wr_addr, (i << 9) | lid);
+			*(dump_buf + offset) = qed_rd(p_hwfn,
+						      p_ptt, rd_reg_addr);
 		}
-	} else {
-		offset += total_size;
 	}
 
 	return offset;
@@ -2973,15 +3269,19 @@ static u32 qed_grc_dump_ctx_data(struct qed_hwfn *p_hwfn,
 static u32 qed_grc_dump_ctx(struct qed_hwfn *p_hwfn,
 			    struct qed_ptt *p_ptt, u32 *dump_buf, bool dump)
 {
+	enum dbg_grc_params grc_param;
 	u32 offset = 0;
 	u8 storm_id;
 
 	for (storm_id = 0; storm_id < MAX_DBG_STORMS; storm_id++) {
+		struct storm_defs *storm = &s_storm_defs[storm_id];
+
 		if (!qed_grc_is_storm_included(p_hwfn,
 					       (enum dbg_storms)storm_id))
 			continue;
 
 		/* Dump Conn AG context size */
+		grc_param = DBG_GRC_PARAM_NUM_LCIDS;
 		offset +=
 			qed_grc_dump_ctx_data(p_hwfn,
 					      p_ptt,
@@ -2989,14 +3289,13 @@ static u32 qed_grc_dump_ctx(struct qed_hwfn *p_hwfn,
 					      dump,
 					      "CONN_AG_CTX",
 					      qed_grc_get_param(p_hwfn,
-						    DBG_GRC_PARAM_NUM_LCIDS),
-					      s_storm_defs[storm_id].
-						    cm_conn_ag_ctx_lid_size,
-					      s_storm_defs[storm_id].
-						    cm_conn_ag_ctx_rd_addr,
+								grc_param),
+					      storm->cm_conn_ag_ctx_lid_size,
+					      storm->cm_conn_ag_ctx_rd_addr,
 					      storm_id);
 
 		/* Dump Conn ST context size */
+		grc_param = DBG_GRC_PARAM_NUM_LCIDS;
 		offset +=
 			qed_grc_dump_ctx_data(p_hwfn,
 					      p_ptt,
@@ -3004,14 +3303,13 @@ static u32 qed_grc_dump_ctx(struct qed_hwfn *p_hwfn,
 					      dump,
 					      "CONN_ST_CTX",
 					      qed_grc_get_param(p_hwfn,
-						    DBG_GRC_PARAM_NUM_LCIDS),
-					      s_storm_defs[storm_id].
-						    cm_conn_st_ctx_lid_size,
-					      s_storm_defs[storm_id].
-						    cm_conn_st_ctx_rd_addr,
+								grc_param),
+					      storm->cm_conn_st_ctx_lid_size,
+					      storm->cm_conn_st_ctx_rd_addr,
 					      storm_id);
 
 		/* Dump Task AG context size */
+		grc_param = DBG_GRC_PARAM_NUM_LTIDS;
 		offset +=
 			qed_grc_dump_ctx_data(p_hwfn,
 					      p_ptt,
@@ -3019,14 +3317,13 @@ static u32 qed_grc_dump_ctx(struct qed_hwfn *p_hwfn,
 					      dump,
 					      "TASK_AG_CTX",
 					      qed_grc_get_param(p_hwfn,
-						    DBG_GRC_PARAM_NUM_LTIDS),
-					      s_storm_defs[storm_id].
-						    cm_task_ag_ctx_lid_size,
-					      s_storm_defs[storm_id].
-						    cm_task_ag_ctx_rd_addr,
+								grc_param),
+					      storm->cm_task_ag_ctx_lid_size,
+					      storm->cm_task_ag_ctx_rd_addr,
 					      storm_id);
 
 		/* Dump Task ST context size */
+		grc_param = DBG_GRC_PARAM_NUM_LTIDS;
 		offset +=
 			qed_grc_dump_ctx_data(p_hwfn,
 					      p_ptt,
@@ -3034,11 +3331,9 @@ static u32 qed_grc_dump_ctx(struct qed_hwfn *p_hwfn,
 					      dump,
 					      "TASK_ST_CTX",
 					      qed_grc_get_param(p_hwfn,
-						    DBG_GRC_PARAM_NUM_LTIDS),
-					      s_storm_defs[storm_id].
-						    cm_task_st_ctx_lid_size,
-					      s_storm_defs[storm_id].
-						    cm_task_st_ctx_rd_addr,
+								grc_param),
+					      storm->cm_task_st_ctx_lid_size,
+					      storm->cm_task_st_ctx_rd_addr,
 					      storm_id);
 	}
 
@@ -3050,8 +3345,8 @@ static u32 qed_grc_dump_iors(struct qed_hwfn *p_hwfn,
 			     struct qed_ptt *p_ptt, u32 *dump_buf, bool dump)
 {
 	char buf[10] = "IOR_SET_?";
+	u32 addr, offset = 0;
 	u8 storm_id, set_id;
-	u32 offset = 0;
 
 	for (storm_id = 0; storm_id < MAX_DBG_STORMS; storm_id++) {
 		struct storm_defs *storm = &s_storm_defs[storm_id];
@@ -3061,11 +3356,9 @@ static u32 qed_grc_dump_iors(struct qed_hwfn *p_hwfn,
 			continue;
 
 		for (set_id = 0; set_id < NUM_IOR_SETS; set_id++) {
-			u32 dwords, addr;
-
-			dwords = storm->sem_fast_mem_addr +
-				 SEM_FAST_REG_STORM_REG_FILE;
-			addr = BYTES_TO_DWORDS(dwords) + IOR_SET_OFFSET(set_id);
+			addr = BYTES_TO_DWORDS(storm->sem_fast_mem_addr +
+					       SEM_FAST_REG_STORM_REG_FILE) +
+			       IOR_SET_OFFSET(set_id);
 			buf[strlen(buf) - 1] = '0' + set_id;
 			offset += qed_grc_dump_mem(p_hwfn,
 						   p_ptt,
@@ -3074,6 +3367,7 @@ static u32 qed_grc_dump_iors(struct qed_hwfn *p_hwfn,
 						   buf,
 						   addr,
 						   IORS_PER_SET,
+						   false,
 						   32,
 						   false,
 						   "ior",
@@ -3091,10 +3385,10 @@ static u32 qed_grc_dump_vfc_cam(struct qed_hwfn *p_hwfn,
 				u32 *dump_buf, bool dump, u8 storm_id)
 {
 	u32 total_size = VFC_CAM_NUM_ROWS * VFC_CAM_RESP_DWORDS;
+	struct storm_defs *storm = &s_storm_defs[storm_id];
 	u32 cam_addr[VFC_CAM_ADDR_DWORDS] = { 0 };
 	u32 cam_cmd[VFC_CAM_CMD_DWORDS] = { 0 };
-	u32 offset = 0;
-	u32 row, i;
+	u32 row, i, offset = 0;
 
 	offset += qed_grc_dump_mem_hdr(p_hwfn,
 				       dump_buf + offset,
@@ -3103,38 +3397,34 @@ static u32 qed_grc_dump_vfc_cam(struct qed_hwfn *p_hwfn,
 				       0,
 				       total_size,
 				       256,
-				       false,
-				       "vfc_cam",
-				       true, s_storm_defs[storm_id].letter);
-	if (dump) {
-		/* Prepare CAM address */
-		SET_VAR_FIELD(cam_addr, VFC_CAM_ADDR, OP, VFC_OPCODE_CAM_RD);
-		for (row = 0; row < VFC_CAM_NUM_ROWS;
-		     row++, offset += VFC_CAM_RESP_DWORDS) {
-			/* Write VFC CAM command */
-			SET_VAR_FIELD(cam_cmd, VFC_CAM_CMD, ROW, row);
-			ARR_REG_WR(p_hwfn,
-				   p_ptt,
-				   s_storm_defs[storm_id].sem_fast_mem_addr +
-				   SEM_FAST_REG_VFC_DATA_WR,
-				   cam_cmd, VFC_CAM_CMD_DWORDS);
+				       false, "vfc_cam", true, storm->letter);
 
-			/* Write VFC CAM address */
-			ARR_REG_WR(p_hwfn,
-				   p_ptt,
-				   s_storm_defs[storm_id].sem_fast_mem_addr +
-				   SEM_FAST_REG_VFC_ADDR,
-				   cam_addr, VFC_CAM_ADDR_DWORDS);
+	if (!dump)
+		return offset + total_size;
 
-			/* Read VFC CAM read response */
-			ARR_REG_RD(p_hwfn,
-				   p_ptt,
-				   s_storm_defs[storm_id].sem_fast_mem_addr +
-				   SEM_FAST_REG_VFC_DATA_RD,
-				   dump_buf + offset, VFC_CAM_RESP_DWORDS);
-		}
-	} else {
-		offset += total_size;
+	/* Prepare CAM address */
+	SET_VAR_FIELD(cam_addr, VFC_CAM_ADDR, OP, VFC_OPCODE_CAM_RD);
+
+	for (row = 0; row < VFC_CAM_NUM_ROWS;
+	     row++, offset += VFC_CAM_RESP_DWORDS) {
+		/* Write VFC CAM command */
+		SET_VAR_FIELD(cam_cmd, VFC_CAM_CMD, ROW, row);
+		ARR_REG_WR(p_hwfn,
+			   p_ptt,
+			   storm->sem_fast_mem_addr + SEM_FAST_REG_VFC_DATA_WR,
+			   cam_cmd, VFC_CAM_CMD_DWORDS);
+
+		/* Write VFC CAM address */
+		ARR_REG_WR(p_hwfn,
+			   p_ptt,
+			   storm->sem_fast_mem_addr + SEM_FAST_REG_VFC_ADDR,
+			   cam_addr, VFC_CAM_ADDR_DWORDS);
+
+		/* Read VFC CAM read response */
+		ARR_REG_RD(p_hwfn,
+			   p_ptt,
+			   storm->sem_fast_mem_addr + SEM_FAST_REG_VFC_DATA_RD,
+			   dump_buf + offset, VFC_CAM_RESP_DWORDS);
 	}
 
 	return offset;
@@ -3148,10 +3438,10 @@ static u32 qed_grc_dump_vfc_ram(struct qed_hwfn *p_hwfn,
 				u8 storm_id, struct vfc_ram_defs *ram_defs)
 {
 	u32 total_size = ram_defs->num_rows * VFC_RAM_RESP_DWORDS;
+	struct storm_defs *storm = &s_storm_defs[storm_id];
 	u32 ram_addr[VFC_RAM_ADDR_DWORDS] = { 0 };
 	u32 ram_cmd[VFC_RAM_CMD_DWORDS] = { 0 };
-	u32 offset = 0;
-	u32 row, i;
+	u32 row, i, offset = 0;
 
 	offset += qed_grc_dump_mem_hdr(p_hwfn,
 				       dump_buf + offset,
@@ -3162,7 +3452,7 @@ static u32 qed_grc_dump_vfc_ram(struct qed_hwfn *p_hwfn,
 				       256,
 				       false,
 				       ram_defs->type_name,
-				       true, s_storm_defs[storm_id].letter);
+				       true, storm->letter);
 
 	/* Prepare RAM address */
 	SET_VAR_FIELD(ram_addr, VFC_RAM_ADDR, OP, VFC_OPCODE_RAM_RD);
@@ -3176,23 +3466,20 @@ static u32 qed_grc_dump_vfc_ram(struct qed_hwfn *p_hwfn,
 		/* Write VFC RAM command */
 		ARR_REG_WR(p_hwfn,
 			   p_ptt,
-			   s_storm_defs[storm_id].sem_fast_mem_addr +
-			   SEM_FAST_REG_VFC_DATA_WR,
+			   storm->sem_fast_mem_addr + SEM_FAST_REG_VFC_DATA_WR,
 			   ram_cmd, VFC_RAM_CMD_DWORDS);
 
 		/* Write VFC RAM address */
 		SET_VAR_FIELD(ram_addr, VFC_RAM_ADDR, ROW, row);
 		ARR_REG_WR(p_hwfn,
 			   p_ptt,
-			   s_storm_defs[storm_id].sem_fast_mem_addr +
-			   SEM_FAST_REG_VFC_ADDR,
+			   storm->sem_fast_mem_addr + SEM_FAST_REG_VFC_ADDR,
 			   ram_addr, VFC_RAM_ADDR_DWORDS);
 
 		/* Read VFC RAM read response */
 		ARR_REG_RD(p_hwfn,
 			   p_ptt,
-			   s_storm_defs[storm_id].sem_fast_mem_addr +
-			   SEM_FAST_REG_VFC_DATA_RD,
+			   storm->sem_fast_mem_addr + SEM_FAST_REG_VFC_DATA_RD,
 			   dump_buf + offset, VFC_RAM_RESP_DWORDS);
 	}
 
@@ -3208,28 +3495,27 @@ static u32 qed_grc_dump_vfc(struct qed_hwfn *p_hwfn,
 	u32 offset = 0;
 
 	for (storm_id = 0; storm_id < MAX_DBG_STORMS; storm_id++) {
-		if (qed_grc_is_storm_included(p_hwfn,
-					      (enum dbg_storms)storm_id) &&
-		    s_storm_defs[storm_id].has_vfc &&
-		    (storm_id != DBG_PSTORM_ID ||
-		     dev_data->platform_id == PLATFORM_ASIC)) {
-			/* Read CAM */
-			offset += qed_grc_dump_vfc_cam(p_hwfn,
+		if (!qed_grc_is_storm_included(p_hwfn,
+					       (enum dbg_storms)storm_id) ||
+		    !s_storm_defs[storm_id].has_vfc ||
+		    (storm_id == DBG_PSTORM_ID && dev_data->platform_id !=
+		     PLATFORM_ASIC))
+			continue;
+
+		/* Read CAM */
+		offset += qed_grc_dump_vfc_cam(p_hwfn,
+					       p_ptt,
+					       dump_buf + offset,
+					       dump, storm_id);
+
+		/* Read RAM */
+		for (i = 0; i < NUM_VFC_RAM_TYPES; i++)
+			offset += qed_grc_dump_vfc_ram(p_hwfn,
 						       p_ptt,
 						       dump_buf + offset,
-						       dump, storm_id);
-
-			/* Read RAM */
-			for (i = 0; i < NUM_VFC_RAM_TYPES; i++)
-				offset += qed_grc_dump_vfc_ram(p_hwfn,
-							       p_ptt,
-							       dump_buf +
-							       offset,
-							       dump,
-							       storm_id,
-							       &s_vfc_ram_defs
-							       [i]);
-		}
+						       dump,
+						       storm_id,
+						       &s_vfc_ram_defs[i]);
 	}
 
 	return offset;
@@ -3244,14 +3530,17 @@ static u32 qed_grc_dump_rss(struct qed_hwfn *p_hwfn,
 	u8 rss_mem_id;
 
 	for (rss_mem_id = 0; rss_mem_id < NUM_RSS_MEM_TYPES; rss_mem_id++) {
-		struct rss_mem_defs *rss_defs = &s_rss_mem_defs[rss_mem_id];
-		u32 num_entries = rss_defs->num_entries[dev_data->chip_id];
-		u32 entry_width = rss_defs->entry_width[dev_data->chip_id];
-		u32 total_dwords = (num_entries * entry_width) / 32;
-		u32 size = RSS_REG_RSS_RAM_DATA_SIZE;
-		bool packed = (entry_width == 16);
-		u32 rss_addr = rss_defs->addr;
-		u32 i, addr;
+		u32 rss_addr, num_entries, entry_width, total_dwords, i;
+		struct rss_mem_defs *rss_defs;
+		u32 addr, size;
+		bool packed;
+
+		rss_defs = &s_rss_mem_defs[rss_mem_id];
+		rss_addr = rss_defs->addr;
+		num_entries = rss_defs->num_entries[dev_data->chip_id];
+		entry_width = rss_defs->entry_width[dev_data->chip_id];
+		total_dwords = (num_entries * entry_width) / 32;
+		packed = (entry_width == 16);
 
 		offset += qed_grc_dump_mem_hdr(p_hwfn,
 					       dump_buf + offset,
@@ -3263,23 +3552,23 @@ static u32 qed_grc_dump_rss(struct qed_hwfn *p_hwfn,
 					       packed,
 					       rss_defs->type_name, false, 0);
 
+		/* Dump RSS data */
 		if (!dump) {
 			offset += total_dwords;
 			continue;
 		}
 
-		/* Dump RSS data */
-		for (i = 0; i < total_dwords;
-		     i += RSS_REG_RSS_RAM_DATA_SIZE, rss_addr++) {
-			addr = BYTES_TO_DWORDS(RSS_REG_RSS_RAM_DATA);
+		addr = BYTES_TO_DWORDS(RSS_REG_RSS_RAM_DATA);
+		size = RSS_REG_RSS_RAM_DATA_SIZE;
+		for (i = 0; i < total_dwords; i += size, rss_addr++) {
 			qed_wr(p_hwfn, p_ptt, RSS_REG_RSS_RAM_ADDR, rss_addr);
-				offset += qed_grc_dump_addr_range(p_hwfn,
-								  p_ptt,
-								  dump_buf +
-								  offset,
-								  dump,
-								  addr,
-								  size);
+			offset += qed_grc_dump_addr_range(p_hwfn,
+							  p_ptt,
+							  dump_buf + offset,
+							  dump,
+							  addr,
+							  size,
+							  false);
 		}
 	}
 
@@ -3316,10 +3605,11 @@ static u32 qed_grc_dump_big_ram(struct qed_hwfn *p_hwfn,
 				       BIG_RAM_BLOCK_SIZE_BYTES * 8,
 				       false, type_name, false, 0);
 
+	/* Read and dump Big RAM data */
 	if (!dump)
 		return offset + ram_size;
 
-	/* Read and dump Big RAM data */
+	/* Dump Big RAM */
 	for (i = 0; i < total_blocks / 2; i++) {
 		u32 addr, len;
 
@@ -3331,7 +3621,8 @@ static u32 qed_grc_dump_big_ram(struct qed_hwfn *p_hwfn,
 						  dump_buf + offset,
 						  dump,
 						  addr,
-						  len);
+						  len,
+						  false);
 	}
 
 	return offset;
@@ -3359,7 +3650,7 @@ static u32 qed_grc_dump_mcp(struct qed_hwfn *p_hwfn,
 				   NULL,
 				   BYTES_TO_DWORDS(MCP_REG_SCRATCH),
 				   MCP_REG_SCRATCH_SIZE,
-				   0, false, "MCP", false, 0);
+				   false, 0, false, "MCP", false, 0);
 
 	/* Dump MCP cpu_reg_file */
 	offset += qed_grc_dump_mem(p_hwfn,
@@ -3369,7 +3660,7 @@ static u32 qed_grc_dump_mcp(struct qed_hwfn *p_hwfn,
 				   NULL,
 				   BYTES_TO_DWORDS(MCP_REG_CPU_REG_FILE),
 				   MCP_REG_CPU_REG_FILE_SIZE,
-				   0, false, "MCP", false, 0);
+				   false, 0, false, "MCP", false, 0);
 
 	/* Dump MCP registers */
 	block_enable[BLOCK_MCP] = true;
@@ -3387,11 +3678,13 @@ static u32 qed_grc_dump_mcp(struct qed_hwfn *p_hwfn,
 					 dump_buf + offset,
 					 dump,
 					 addr,
-					 1);
+					 1,
+					 false);
 
 	/* Release MCP */
 	if (halted && qed_mcp_resume(p_hwfn, p_ptt))
 		DP_NOTICE(p_hwfn, "Failed to resume MCP after halt!\n");
+
 	return offset;
 }
 
@@ -3404,14 +3697,26 @@ static u32 qed_grc_dump_phy(struct qed_hwfn *p_hwfn,
 	u8 phy_id;
 
 	for (phy_id = 0; phy_id < ARRAY_SIZE(s_phy_defs); phy_id++) {
-		struct phy_defs *phy_defs = &s_phy_defs[phy_id];
-		int printed_chars;
-
-		printed_chars = snprintf(mem_name, sizeof(mem_name), "tbus_%s",
-					 phy_defs->phy_name);
-		if (printed_chars < 0 || printed_chars >= sizeof(mem_name))
+		u32 addr_lo_addr, addr_hi_addr, data_lo_addr, data_hi_addr;
+		struct phy_defs *phy_defs;
+		u8 *bytes_buf;
+
+		phy_defs = &s_phy_defs[phy_id];
+		addr_lo_addr = phy_defs->base_addr +
+			       phy_defs->tbus_addr_lo_addr;
+		addr_hi_addr = phy_defs->base_addr +
+			       phy_defs->tbus_addr_hi_addr;
+		data_lo_addr = phy_defs->base_addr +
+			       phy_defs->tbus_data_lo_addr;
+		data_hi_addr = phy_defs->base_addr +
+			       phy_defs->tbus_data_hi_addr;
+		bytes_buf = (u8 *)(dump_buf + offset);
+
+		if (snprintf(mem_name, sizeof(mem_name), "tbus_%s",
+			     phy_defs->phy_name) < 0)
 			DP_NOTICE(p_hwfn,
 				  "Unexpected debug error: invalid PHY memory name\n");
+
 		offset += qed_grc_dump_mem_hdr(p_hwfn,
 					       dump_buf + offset,
 					       dump,
@@ -3419,34 +3724,26 @@ static u32 qed_grc_dump_phy(struct qed_hwfn *p_hwfn,
 					       0,
 					       PHY_DUMP_SIZE_DWORDS,
 					       16, true, mem_name, false, 0);
-		if (dump) {
-			u32 addr_lo_addr = phy_defs->base_addr +
-					   phy_defs->tbus_addr_lo_addr;
-			u32 addr_hi_addr = phy_defs->base_addr +
-					   phy_defs->tbus_addr_hi_addr;
-			u32 data_lo_addr = phy_defs->base_addr +
-					   phy_defs->tbus_data_lo_addr;
-			u32 data_hi_addr = phy_defs->base_addr +
-					   phy_defs->tbus_data_hi_addr;
-			u8 *bytes_buf = (u8 *)(dump_buf + offset);
-
-			for (tbus_hi_offset = 0;
-			     tbus_hi_offset < (NUM_PHY_TBUS_ADDRESSES >> 8);
-			     tbus_hi_offset++) {
+
+		if (!dump) {
+			offset += PHY_DUMP_SIZE_DWORDS;
+			continue;
+		}
+
+		for (tbus_hi_offset = 0;
+		     tbus_hi_offset < (NUM_PHY_TBUS_ADDRESSES >> 8);
+		     tbus_hi_offset++) {
+			qed_wr(p_hwfn, p_ptt, addr_hi_addr, tbus_hi_offset);
+			for (tbus_lo_offset = 0; tbus_lo_offset < 256;
+			     tbus_lo_offset++) {
 				qed_wr(p_hwfn,
-				       p_ptt, addr_hi_addr, tbus_hi_offset);
-				for (tbus_lo_offset = 0; tbus_lo_offset < 256;
-				     tbus_lo_offset++) {
-					qed_wr(p_hwfn,
-					       p_ptt,
-					       addr_lo_addr, tbus_lo_offset);
-					*(bytes_buf++) =
-						(u8)qed_rd(p_hwfn, p_ptt,
-							   data_lo_addr);
-					*(bytes_buf++) =
-						(u8)qed_rd(p_hwfn, p_ptt,
-							   data_hi_addr);
-				}
+				       p_ptt, addr_lo_addr, tbus_lo_offset);
+				*(bytes_buf++) = (u8)qed_rd(p_hwfn,
+							    p_ptt,
+							    data_lo_addr);
+				*(bytes_buf++) = (u8)qed_rd(p_hwfn,
+							    p_ptt,
+							    data_hi_addr);
 			}
 		}
 
@@ -3460,16 +3757,17 @@ static void qed_config_dbg_line(struct qed_hwfn *p_hwfn,
 				struct qed_ptt *p_ptt,
 				enum block_id block_id,
 				u8 line_id,
-				u8 cycle_en,
-				u8 right_shift, u8 force_valid, u8 force_frame)
+				u8 enable_mask,
+				u8 right_shift,
+				u8 force_valid_mask, u8 force_frame_mask)
 {
-	struct block_defs *p_block_defs = s_block_defs[block_id];
+	struct block_defs *block = s_block_defs[block_id];
 
-	qed_wr(p_hwfn, p_ptt, p_block_defs->dbg_select_addr, line_id);
-	qed_wr(p_hwfn, p_ptt, p_block_defs->dbg_cycle_enable_addr, cycle_en);
-	qed_wr(p_hwfn, p_ptt, p_block_defs->dbg_shift_addr, right_shift);
-	qed_wr(p_hwfn, p_ptt, p_block_defs->dbg_force_valid_addr, force_valid);
-	qed_wr(p_hwfn, p_ptt, p_block_defs->dbg_force_frame_addr, force_frame);
+	qed_wr(p_hwfn, p_ptt, block->dbg_select_addr, line_id);
+	qed_wr(p_hwfn, p_ptt, block->dbg_enable_addr, enable_mask);
+	qed_wr(p_hwfn, p_ptt, block->dbg_shift_addr, right_shift);
+	qed_wr(p_hwfn, p_ptt, block->dbg_force_valid_addr, force_valid_mask);
+	qed_wr(p_hwfn, p_ptt, block->dbg_force_frame_addr, force_frame_mask);
 }
 
 /* Dumps Static Debug data. Returns the dumped size in dwords. */
@@ -3477,10 +3775,12 @@ static u32 qed_grc_dump_static_debug(struct qed_hwfn *p_hwfn,
 				     struct qed_ptt *p_ptt,
 				     u32 *dump_buf, bool dump)
 {
-	u32 block_dwords = NUM_DBG_BUS_LINES * STATIC_DEBUG_LINE_DWORDS;
 	struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
-	u32 offset = 0, block_id, line_id;
-	struct block_defs *p_block_defs;
+	u32 block_id, line_id, offset = 0;
+
+	/* Skip static debug if a debug bus recording is in progress */
+	if (qed_rd(p_hwfn, p_ptt, DBG_REG_DBG_BLOCK_ON))
+		return 0;
 
 	if (dump) {
 		DP_VERBOSE(p_hwfn,
@@ -3488,11 +3788,11 @@ static u32 qed_grc_dump_static_debug(struct qed_hwfn *p_hwfn,
 
 		/* Disable all blocks debug output */
 		for (block_id = 0; block_id < MAX_BLOCK_ID; block_id++) {
-			p_block_defs = s_block_defs[block_id];
+			struct block_defs *block = s_block_defs[block_id];
 
-			if (p_block_defs->has_dbg_bus[dev_data->chip_id])
-				qed_wr(p_hwfn, p_ptt,
-				       p_block_defs->dbg_cycle_enable_addr, 0);
+			if (block->has_dbg_bus[dev_data->chip_id])
+				qed_wr(p_hwfn, p_ptt, block->dbg_enable_addr,
+				       0);
 		}
 
 		qed_bus_reset_dbg_block(p_hwfn, p_ptt);
@@ -3506,59 +3806,71 @@ static u32 qed_grc_dump_static_debug(struct qed_hwfn *p_hwfn,
 
 	/* Dump all static debug lines for each relevant block */
 	for (block_id = 0; block_id < MAX_BLOCK_ID; block_id++) {
-		p_block_defs = s_block_defs[block_id];
+		struct block_defs *block = s_block_defs[block_id];
+		struct dbg_bus_block *block_desc;
+		u32 block_dwords, addr, len;
+		u8 dbg_client_id;
 
-		if (!p_block_defs->has_dbg_bus[dev_data->chip_id])
+		if (!block->has_dbg_bus[dev_data->chip_id])
 			continue;
 
+		block_desc =
+			get_dbg_bus_block_desc(p_hwfn,
+					       (enum block_id)block_id);
+		block_dwords = NUM_DBG_LINES(block_desc) *
+			       STATIC_DEBUG_LINE_DWORDS;
+
 		/* Dump static section params */
 		offset += qed_grc_dump_mem_hdr(p_hwfn,
 					       dump_buf + offset,
 					       dump,
-					       p_block_defs->name, 0,
-					       block_dwords, 32, false,
-					       "STATIC", false, 0);
-
-		if (dump && !dev_data->block_in_reset[block_id]) {
-			u8 dbg_client_id =
-				p_block_defs->dbg_client_id[dev_data->chip_id];
-			u32 addr = BYTES_TO_DWORDS(DBG_REG_CALENDAR_OUT_DATA);
-			u32 len = STATIC_DEBUG_LINE_DWORDS;
-
-			/* Enable block's client */
-			qed_bus_enable_clients(p_hwfn, p_ptt,
-					       BIT(dbg_client_id));
-
-			for (line_id = 0; line_id < NUM_DBG_BUS_LINES;
-			     line_id++) {
-				/* Configure debug line ID */
-				qed_config_dbg_line(p_hwfn,
-						    p_ptt,
-						    (enum block_id)block_id,
-						    (u8)line_id,
-						    0xf, 0, 0, 0);
+					       block->name,
+					       0,
+					       block_dwords,
+					       32, false, "STATIC", false, 0);
 
-				/* Read debug line info */
-				offset +=
-				    qed_grc_dump_addr_range(p_hwfn,
-							    p_ptt,
-							    dump_buf + offset,
-							    dump,
-							    addr,
-							    len);
-			}
+		if (!dump) {
+			offset += block_dwords;
+			continue;
+		}
 
-			/* Disable block's client and debug output */
-			qed_bus_enable_clients(p_hwfn, p_ptt, 0);
-			qed_wr(p_hwfn, p_ptt,
-			       p_block_defs->dbg_cycle_enable_addr, 0);
-		} else {
-			/* All lines are invalid - dump zeros */
-			if (dump)
-				memset(dump_buf + offset, 0,
-				       DWORDS_TO_BYTES(block_dwords));
+		/* If all lines are invalid - dump zeros */
+		if (dev_data->block_in_reset[block_id]) {
+			memset(dump_buf + offset, 0,
+			       DWORDS_TO_BYTES(block_dwords));
 			offset += block_dwords;
+			continue;
+		}
+
+		/* Enable block's client */
+		dbg_client_id = block->dbg_client_id[dev_data->chip_id];
+		qed_bus_enable_clients(p_hwfn,
+				       p_ptt,
+				       BIT(dbg_client_id));
+
+		addr = BYTES_TO_DWORDS(DBG_REG_CALENDAR_OUT_DATA);
+		len = STATIC_DEBUG_LINE_DWORDS;
+		for (line_id = 0; line_id < (u32)NUM_DBG_LINES(block_desc);
+		     line_id++) {
+			/* Configure debug line ID */
+			qed_config_dbg_line(p_hwfn,
+					    p_ptt,
+					    (enum block_id)block_id,
+					    (u8)line_id, 0xf, 0, 0, 0);
+
+			/* Read debug line info */
+			offset += qed_grc_dump_addr_range(p_hwfn,
+							  p_ptt,
+							  dump_buf + offset,
+							  dump,
+							  addr,
+							  len,
+							  true);
 		}
+
+		/* Disable block's client and debug output */
+		qed_bus_enable_clients(p_hwfn, p_ptt, 0);
+		qed_wr(p_hwfn, p_ptt, block->dbg_enable_addr, 0);
 	}
 
 	if (dump) {
@@ -3584,8 +3896,8 @@ static enum dbg_status qed_grc_dump(struct qed_hwfn *p_hwfn,
 
 	*num_dumped_dwords = 0;
 
-	/* Find port mode */
 	if (dump) {
+		/* Find port mode */
 		switch (qed_rd(p_hwfn, p_ptt, MISC_REG_PORT_MODE)) {
 		case 0:
 			port_mode = 1;
@@ -3597,11 +3909,10 @@ static enum dbg_status qed_grc_dump(struct qed_hwfn *p_hwfn,
 			port_mode = 4;
 			break;
 		}
-	}
 
-	/* Update reset state */
-	if (dump)
+		/* Update reset state */
 		qed_update_blocks_reset_state(p_hwfn, p_ptt);
+	}
 
 	/* Dump global params */
 	offset += qed_dump_common_global_params(p_hwfn,
@@ -3635,7 +3946,8 @@ static enum dbg_status qed_grc_dump(struct qed_hwfn *p_hwfn,
 	}
 
 	/* Disable all parities using MFW command */
-	if (dump && !qed_grc_get_param(p_hwfn, DBG_GRC_PARAM_NO_MCP)) {
+	if (dump &&
+	    !qed_grc_get_param(p_hwfn, DBG_GRC_PARAM_NO_MCP)) {
 		parities_masked = !qed_mcp_mask_parities(p_hwfn, p_ptt, 1);
 		if (!parities_masked) {
 			DP_NOTICE(p_hwfn,
@@ -3661,9 +3973,9 @@ static enum dbg_status qed_grc_dump(struct qed_hwfn *p_hwfn,
 
 	/* Dump all regs  */
 	if (qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_REGS)) {
-		/* Dump all blocks except MCP */
 		bool block_enable[MAX_BLOCK_ID];
 
+		/* Dump all blocks except MCP */
 		for (i = 0; i < MAX_BLOCK_ID; i++)
 			block_enable[i] = true;
 		block_enable[BLOCK_MCP] = false;
@@ -3732,7 +4044,8 @@ static enum dbg_status qed_grc_dump(struct qed_hwfn *p_hwfn,
 						    dump_buf + offset, dump);
 
 	/* Dump last section */
-	offset += qed_dump_last_section(dump_buf, offset, dump);
+	offset += qed_dump_last_section(p_hwfn, dump_buf, offset, dump);
+
 	if (dump) {
 		/* Unstall storms */
 		if (qed_grc_get_param(p_hwfn, DBG_GRC_PARAM_UNSTALL))
@@ -3763,19 +4076,20 @@ static u32 qed_idle_chk_dump_failure(struct qed_hwfn *p_hwfn,
 				     const struct dbg_idle_chk_rule *rule,
 				     u16 fail_entry_id, u32 *cond_reg_values)
 {
-	const union dbg_idle_chk_reg *regs = &((const union dbg_idle_chk_reg *)
-					       s_dbg_arrays
-					       [BIN_BUF_DBG_IDLE_CHK_REGS].
-					       ptr)[rule->reg_offset];
-	const struct dbg_idle_chk_cond_reg *cond_regs = &regs[0].cond_reg;
 	struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
-	struct dbg_idle_chk_result_hdr *hdr =
-		(struct dbg_idle_chk_result_hdr *)dump_buf;
-	const struct dbg_idle_chk_info_reg *info_regs =
-		&regs[rule->num_cond_regs].info_reg;
-	u32 next_reg_offset = 0, i, offset = 0;
+	const struct dbg_idle_chk_cond_reg *cond_regs;
+	const struct dbg_idle_chk_info_reg *info_regs;
+	u32 i, next_reg_offset = 0, offset = 0;
+	struct dbg_idle_chk_result_hdr *hdr;
+	const union dbg_idle_chk_reg *regs;
 	u8 reg_id;
 
+	hdr = (struct dbg_idle_chk_result_hdr *)dump_buf;
+	regs = &((const union dbg_idle_chk_reg *)
+		 s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_REGS].ptr)[rule->reg_offset];
+	cond_regs = &regs[0].cond_reg;
+	info_regs = &regs[rule->num_cond_regs].info_reg;
+
 	/* Dump rule data */
 	if (dump) {
 		memset(hdr, 0, sizeof(*hdr));
@@ -3790,33 +4104,31 @@ static u32 qed_idle_chk_dump_failure(struct qed_hwfn *p_hwfn,
 	/* Dump condition register values */
 	for (reg_id = 0; reg_id < rule->num_cond_regs; reg_id++) {
 		const struct dbg_idle_chk_cond_reg *reg = &cond_regs[reg_id];
+		struct dbg_idle_chk_result_reg_hdr *reg_hdr;
 
-		/* Write register header */
-		if (dump) {
-			struct dbg_idle_chk_result_reg_hdr *reg_hdr =
-			    (struct dbg_idle_chk_result_reg_hdr *)(dump_buf
-								   + offset);
-			offset += IDLE_CHK_RESULT_REG_HDR_DWORDS;
-			memset(reg_hdr, 0,
-			       sizeof(struct dbg_idle_chk_result_reg_hdr));
-			reg_hdr->start_entry = reg->start_entry;
-			reg_hdr->size = reg->entry_size;
-			SET_FIELD(reg_hdr->data,
-				  DBG_IDLE_CHK_RESULT_REG_HDR_IS_MEM,
-				  reg->num_entries > 1 || reg->start_entry > 0
-				  ? 1 : 0);
-			SET_FIELD(reg_hdr->data,
-				  DBG_IDLE_CHK_RESULT_REG_HDR_REG_ID, reg_id);
+		reg_hdr = (struct dbg_idle_chk_result_reg_hdr *)
+			  (dump_buf + offset);
 
-			/* Write register values */
-			for (i = 0; i < reg_hdr->size;
-			     i++, next_reg_offset++, offset++)
-				dump_buf[offset] =
-				    cond_reg_values[next_reg_offset];
-		} else {
+		/* Write register header */
+		if (!dump) {
 			offset += IDLE_CHK_RESULT_REG_HDR_DWORDS +
 			    reg->entry_size;
+			continue;
 		}
+
+		offset += IDLE_CHK_RESULT_REG_HDR_DWORDS;
+		memset(reg_hdr, 0, sizeof(*reg_hdr));
+		reg_hdr->start_entry = reg->start_entry;
+		reg_hdr->size = reg->entry_size;
+		SET_FIELD(reg_hdr->data,
+			  DBG_IDLE_CHK_RESULT_REG_HDR_IS_MEM,
+			  reg->num_entries > 1 || reg->start_entry > 0 ? 1 : 0);
+		SET_FIELD(reg_hdr->data,
+			  DBG_IDLE_CHK_RESULT_REG_HDR_REG_ID, reg_id);
+
+		/* Write register values */
+		for (i = 0; i < reg_hdr->size; i++, next_reg_offset++, offset++)
+			dump_buf[offset] = cond_reg_values[next_reg_offset];
 	}
 
 	/* Dump info register values */
@@ -3824,12 +4136,12 @@ static u32 qed_idle_chk_dump_failure(struct qed_hwfn *p_hwfn,
 		const struct dbg_idle_chk_info_reg *reg = &info_regs[reg_id];
 		u32 block_id;
 
+		/* Check if register's block is in reset */
 		if (!dump) {
 			offset += IDLE_CHK_RESULT_REG_HDR_DWORDS + reg->size;
 			continue;
 		}
 
-		/* Check if register's block is in reset */
 		block_id = GET_FIELD(reg->data, DBG_IDLE_CHK_INFO_REG_BLOCK_ID);
 		if (block_id >= MAX_BLOCK_ID) {
 			DP_NOTICE(p_hwfn, "Invalid block_id\n");
@@ -3837,47 +4149,50 @@ static u32 qed_idle_chk_dump_failure(struct qed_hwfn *p_hwfn,
 		}
 
 		if (!dev_data->block_in_reset[block_id]) {
-			bool eval_mode = GET_FIELD(reg->mode.data,
-						   DBG_MODE_HDR_EVAL_MODE) > 0;
-			bool mode_match = true;
+			struct dbg_idle_chk_result_reg_hdr *reg_hdr;
+			bool wide_bus, eval_mode, mode_match = true;
+			u16 modes_buf_offset;
+			u32 addr;
+
+			reg_hdr = (struct dbg_idle_chk_result_reg_hdr *)
+				  (dump_buf + offset);
 
 			/* Check mode */
+			eval_mode = GET_FIELD(reg->mode.data,
+					      DBG_MODE_HDR_EVAL_MODE) > 0;
 			if (eval_mode) {
-				u16 modes_buf_offset =
-					GET_FIELD(reg->mode.data,
-						DBG_MODE_HDR_MODES_BUF_OFFSET);
+				modes_buf_offset =
+				    GET_FIELD(reg->mode.data,
+					      DBG_MODE_HDR_MODES_BUF_OFFSET);
 				mode_match =
 					qed_is_mode_match(p_hwfn,
 							  &modes_buf_offset);
 			}
 
-			if (mode_match) {
-				u32 addr =
-				    GET_FIELD(reg->data,
-					      DBG_IDLE_CHK_INFO_REG_ADDRESS);
-
-				/* Write register header */
-				struct dbg_idle_chk_result_reg_hdr *reg_hdr =
-					(struct dbg_idle_chk_result_reg_hdr *)
-					(dump_buf + offset);
-
-				offset += IDLE_CHK_RESULT_REG_HDR_DWORDS;
-				hdr->num_dumped_info_regs++;
-				memset(reg_hdr, 0, sizeof(*reg_hdr));
-				reg_hdr->size = reg->size;
-				SET_FIELD(reg_hdr->data,
-					  DBG_IDLE_CHK_RESULT_REG_HDR_REG_ID,
-					  rule->num_cond_regs + reg_id);
-
-				/* Write register values */
-				offset +=
-				    qed_grc_dump_addr_range(p_hwfn,
-							    p_ptt,
-							    dump_buf + offset,
-							    dump,
-							    addr,
-							    reg->size);
-			}
+			if (!mode_match)
+				continue;
+
+			addr = GET_FIELD(reg->data,
+					 DBG_IDLE_CHK_INFO_REG_ADDRESS);
+			wide_bus = GET_FIELD(reg->data,
+					     DBG_IDLE_CHK_INFO_REG_WIDE_BUS);
+
+			/* Write register header */
+			offset += IDLE_CHK_RESULT_REG_HDR_DWORDS;
+			hdr->num_dumped_info_regs++;
+			memset(reg_hdr, 0, sizeof(*reg_hdr));
+			reg_hdr->size = reg->size;
+			SET_FIELD(reg_hdr->data,
+				  DBG_IDLE_CHK_RESULT_REG_HDR_REG_ID,
+				  rule->num_cond_regs + reg_id);
+
+			/* Write register values */
+			offset += qed_grc_dump_addr_range(p_hwfn,
+							  p_ptt,
+							  dump_buf + offset,
+							  dump,
+							  addr,
+							  reg->size, wide_bus);
 		}
 	}
 
@@ -3898,6 +4213,7 @@ qed_idle_chk_dump_rule_entries(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
 	u8 reg_id;
 
 	*num_failing_rules = 0;
+
 	for (i = 0; i < num_input_rules; i++) {
 		const struct dbg_idle_chk_cond_reg *cond_regs;
 		const struct dbg_idle_chk_rule *rule;
@@ -3920,8 +4236,9 @@ qed_idle_chk_dump_rule_entries(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
 		 */
 		for (reg_id = 0; reg_id < rule->num_cond_regs && check_rule;
 		     reg_id++) {
-			u32 block_id = GET_FIELD(cond_regs[reg_id].data,
-						DBG_IDLE_CHK_COND_REG_BLOCK_ID);
+			u32 block_id =
+				GET_FIELD(cond_regs[reg_id].data,
+					  DBG_IDLE_CHK_COND_REG_BLOCK_ID);
 
 			if (block_id >= MAX_BLOCK_ID) {
 				DP_NOTICE(p_hwfn, "Invalid block_id\n");
@@ -3936,48 +4253,47 @@ qed_idle_chk_dump_rule_entries(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
 		if (!check_rule && dump)
 			continue;
 
-		if (!dump) {
-			u32 entry_dump_size =
-				qed_idle_chk_dump_failure(p_hwfn,
-							  p_ptt,
-							  dump_buf + offset,
-							  false,
-							  rule->rule_id,
-							  rule,
-							  0,
-							  NULL);
-
-			offset += num_reg_entries * entry_dump_size;
-			(*num_failing_rules) += num_reg_entries;
-			continue;
-		}
-
 		/* Go over all register entries (number of entries is the same
 		 * for all condition registers).
 		 */
 		for (entry_id = 0; entry_id < num_reg_entries; entry_id++) {
-			/* Read current entry of all condition registers */
 			u32 next_reg_offset = 0;
 
+			if (!dump) {
+				offset += qed_idle_chk_dump_failure(p_hwfn,
+							p_ptt,
+							dump_buf + offset,
+							false,
+							rule->rule_id,
+							rule,
+							entry_id,
+							NULL);
+				(*num_failing_rules)++;
+				break;
+			}
+
+			/* Read current entry of all condition registers */
 			for (reg_id = 0; reg_id < rule->num_cond_regs;
 			     reg_id++) {
 				const struct dbg_idle_chk_cond_reg *reg =
-					&cond_regs[reg_id];
+				    &cond_regs[reg_id];
+				u32 padded_entry_size, addr;
+				bool wide_bus;
 
-				/* Find GRC address (if it's a memory,the
+				/* Find GRC address (if it's a memory, the
 				 * address of the specific entry is calculated).
 				 */
-				u32 addr =
+				addr = GET_FIELD(reg->data,
+						 DBG_IDLE_CHK_COND_REG_ADDRESS);
+				wide_bus =
 				    GET_FIELD(reg->data,
-					      DBG_IDLE_CHK_COND_REG_ADDRESS);
-
+					      DBG_IDLE_CHK_COND_REG_WIDE_BUS);
 				if (reg->num_entries > 1 ||
 				    reg->start_entry > 0) {
-					u32 padded_entry_size =
-					   reg->entry_size > 1 ?
-					   roundup_pow_of_two(reg->entry_size) :
-					   1;
-
+					padded_entry_size =
+					    reg->entry_size > 1 ?
+					    roundup_pow_of_two(reg->entry_size)
+					    : 1;
 					addr += (reg->start_entry + entry_id) *
 						padded_entry_size;
 				}
@@ -3991,28 +4307,27 @@ qed_idle_chk_dump_rule_entries(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
 				}
 
 				next_reg_offset +=
-				    qed_grc_dump_addr_range(p_hwfn,
-							    p_ptt,
+				    qed_grc_dump_addr_range(p_hwfn, p_ptt,
 							    cond_reg_values +
 							    next_reg_offset,
 							    dump, addr,
-							    reg->entry_size);
+							    reg->entry_size,
+							    wide_bus);
 			}
 
-			/* Call rule's condition function - a return value of
-			 * true indicates failure.
+			/* Call rule condition function.
+			 * If returns true, it's a failure.
 			 */
-			if ((*cond_arr[rule->cond_id])(cond_reg_values,
-						       imm_values)) {
-				offset +=
-				    qed_idle_chk_dump_failure(p_hwfn,
-							      p_ptt,
-							      dump_buf + offset,
-							      dump,
-							      rule->rule_id,
-							      rule,
-							      entry_id,
-							      cond_reg_values);
+			if ((*cond_arr[rule->cond_id]) (cond_reg_values,
+							imm_values)) {
+				offset += qed_idle_chk_dump_failure(p_hwfn,
+							p_ptt,
+							dump_buf + offset,
+							dump,
+							rule->rule_id,
+							rule,
+							entry_id,
+							cond_reg_values);
 				(*num_failing_rules)++;
 				break;
 			}
@@ -4028,8 +4343,8 @@ qed_idle_chk_dump_rule_entries(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
 static u32 qed_idle_chk_dump(struct qed_hwfn *p_hwfn,
 			     struct qed_ptt *p_ptt, u32 *dump_buf, bool dump)
 {
-	u32 offset = 0, input_offset = 0, num_failing_rules = 0;
-	u32 num_failing_rules_offset;
+	u32 num_failing_rules_offset, offset = 0, input_offset = 0;
+	u32 num_failing_rules = 0;
 
 	/* Dump global params */
 	offset += qed_dump_common_global_params(p_hwfn,
@@ -4042,29 +4357,29 @@ static u32 qed_idle_chk_dump(struct qed_hwfn *p_hwfn,
 	offset += qed_dump_section_hdr(dump_buf + offset, dump, "idle_chk", 1);
 	num_failing_rules_offset = offset;
 	offset += qed_dump_num_param(dump_buf + offset, dump, "num_rules", 0);
+
 	while (input_offset <
 	       s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_RULES].size_in_dwords) {
 		const struct dbg_idle_chk_cond_hdr *cond_hdr =
 			(const struct dbg_idle_chk_cond_hdr *)
 			&s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_RULES].ptr
 			[input_offset++];
-		bool eval_mode = GET_FIELD(cond_hdr->mode.data,
-					   DBG_MODE_HDR_EVAL_MODE) > 0;
-		bool mode_match = true;
+		bool eval_mode, mode_match = true;
+		u32 curr_failing_rules;
+		u16 modes_buf_offset;
 
 		/* Check mode */
+		eval_mode = GET_FIELD(cond_hdr->mode.data,
+				      DBG_MODE_HDR_EVAL_MODE) > 0;
 		if (eval_mode) {
-			u16 modes_buf_offset =
+			modes_buf_offset =
 				GET_FIELD(cond_hdr->mode.data,
 					  DBG_MODE_HDR_MODES_BUF_OFFSET);
-
 			mode_match = qed_is_mode_match(p_hwfn,
 						       &modes_buf_offset);
 		}
 
 		if (mode_match) {
-			u32 curr_failing_rules;
-
 			offset +=
 			    qed_idle_chk_dump_rule_entries(p_hwfn,
 				p_ptt,
@@ -4086,10 +4401,13 @@ static u32 qed_idle_chk_dump(struct qed_hwfn *p_hwfn,
 		qed_dump_num_param(dump_buf + num_failing_rules_offset,
 				   dump, "num_rules", num_failing_rules);
 
+	/* Dump last section */
+	offset += qed_dump_last_section(p_hwfn, dump_buf, offset, dump);
+
 	return offset;
 }
 
-/* Finds the meta data image in NVRAM. */
+/* Finds the meta data image in NVRAM */
 static enum dbg_status qed_find_nvram_image(struct qed_hwfn *p_hwfn,
 					    struct qed_ptt *p_ptt,
 					    u32 image_type,
@@ -4098,16 +4416,16 @@ static enum dbg_status qed_find_nvram_image(struct qed_hwfn *p_hwfn,
 {
 	u32 ret_mcp_resp, ret_mcp_param, ret_txn_size;
 	struct mcp_file_att file_att;
+	int nvm_result;
 
 	/* Call NVRAM get file command */
-	int nvm_result = qed_mcp_nvm_rd_cmd(p_hwfn,
-					    p_ptt,
-					    DRV_MSG_CODE_NVM_GET_FILE_ATT,
-					    image_type,
-					    &ret_mcp_resp,
-					    &ret_mcp_param,
-					    &ret_txn_size,
-					    (u32 *)&file_att);
+	nvm_result = qed_mcp_nvm_rd_cmd(p_hwfn,
+					p_ptt,
+					DRV_MSG_CODE_NVM_GET_FILE_ATT,
+					image_type,
+					&ret_mcp_resp,
+					&ret_mcp_param,
+					&ret_txn_size, (u32 *)&file_att);
 
 	/* Check response */
 	if (nvm_result ||
@@ -4117,6 +4435,7 @@ static enum dbg_status qed_find_nvram_image(struct qed_hwfn *p_hwfn,
 	/* Update return values */
 	*nvram_offset_bytes = file_att.nvm_start_addr;
 	*nvram_size_bytes = file_att.len;
+
 	DP_VERBOSE(p_hwfn,
 		   QED_MSG_DEBUG,
 		   "find_nvram_image: found NVRAM image of type %d in NVRAM offset %d bytes with size %d bytes\n",
@@ -4125,22 +4444,25 @@ static enum dbg_status qed_find_nvram_image(struct qed_hwfn *p_hwfn,
 	/* Check alignment */
 	if (*nvram_size_bytes & 0x3)
 		return DBG_STATUS_NON_ALIGNED_NVRAM_IMAGE;
+
 	return DBG_STATUS_OK;
 }
 
+/* Reads data from NVRAM */
 static enum dbg_status qed_nvram_read(struct qed_hwfn *p_hwfn,
 				      struct qed_ptt *p_ptt,
 				      u32 nvram_offset_bytes,
 				      u32 nvram_size_bytes, u32 *ret_buf)
 {
-	u32 ret_mcp_resp, ret_mcp_param, ret_read_size;
-	u32 bytes_to_copy, read_offset = 0;
+	u32 ret_mcp_resp, ret_mcp_param, ret_read_size, bytes_to_copy;
 	s32 bytes_left = nvram_size_bytes;
+	u32 read_offset = 0;
 
 	DP_VERBOSE(p_hwfn,
 		   QED_MSG_DEBUG,
 		   "nvram_read: reading image of size %d bytes from NVRAM\n",
 		   nvram_size_bytes);
+
 	do {
 		bytes_to_copy =
 		    (bytes_left >
@@ -4155,8 +4477,7 @@ static enum dbg_status qed_nvram_read(struct qed_hwfn *p_hwfn,
 					DRV_MB_PARAM_NVM_LEN_SHIFT),
 				       &ret_mcp_resp, &ret_mcp_param,
 				       &ret_read_size,
-				       (u32 *)((u8 *)ret_buf +
-					       read_offset)) != 0)
+				       (u32 *)((u8 *)ret_buf + read_offset)))
 			return DBG_STATUS_NVRAM_READ_FAILED;
 
 		/* Check response */
@@ -4172,24 +4493,20 @@ static enum dbg_status qed_nvram_read(struct qed_hwfn *p_hwfn,
 }
 
 /* Get info on the MCP Trace data in the scratchpad:
- * - trace_data_grc_addr - the GRC address of the trace data
- * - trace_data_size_bytes - the size in bytes of the MCP Trace data (without
- *	the header)
+ * - trace_data_grc_addr (OUT): trace data GRC address in bytes
+ * - trace_data_size (OUT): trace data size in bytes (without the header)
  */
 static enum dbg_status qed_mcp_trace_get_data_info(struct qed_hwfn *p_hwfn,
 						   struct qed_ptt *p_ptt,
 						   u32 *trace_data_grc_addr,
-						   u32 *trace_data_size_bytes)
+						   u32 *trace_data_size)
 {
-	/* Read MCP trace section offsize structure from MCP scratchpad */
-	u32 spad_trace_offsize = qed_rd(p_hwfn,
-					p_ptt,
-					MCP_SPAD_TRACE_OFFSIZE_ADDR);
-	u32 signature;
+	u32 spad_trace_offsize, signature;
 
-	/* Extract MCP trace section GRC address from offsize structure (within
-	 * scratchpad).
-	 */
+	/* Read trace section offsize structure from MCP scratchpad */
+	spad_trace_offsize = qed_rd(p_hwfn, p_ptt, MCP_SPAD_TRACE_OFFSIZE_ADDR);
+
+	/* Extract trace section address from offsize (in scratchpad) */
 	*trace_data_grc_addr =
 		MCP_REG_SCRATCH + SECTION_OFFSET(spad_trace_offsize);
 
@@ -4197,42 +4514,41 @@ static enum dbg_status qed_mcp_trace_get_data_info(struct qed_hwfn *p_hwfn,
 	signature = qed_rd(p_hwfn, p_ptt,
 			   *trace_data_grc_addr +
 			   offsetof(struct mcp_trace, signature));
+
 	if (signature != MFW_TRACE_SIGNATURE)
 		return DBG_STATUS_INVALID_TRACE_SIGNATURE;
 
 	/* Read trace size from MCP trace section */
-	*trace_data_size_bytes = qed_rd(p_hwfn,
-					p_ptt,
-					*trace_data_grc_addr +
-					offsetof(struct mcp_trace, size));
+	*trace_data_size = qed_rd(p_hwfn,
+				  p_ptt,
+				  *trace_data_grc_addr +
+				  offsetof(struct mcp_trace, size));
+
 	return DBG_STATUS_OK;
 }
 
-/* Reads MCP trace meta data image from NVRAM.
- * - running_bundle_id (OUT) - the running bundle ID (invalid when loaded from
- *	file)
- * - trace_meta_offset_bytes (OUT) - the NVRAM offset in bytes in which the MCP
- *	Trace meta data starts (invalid when loaded from file)
- * - trace_meta_size_bytes (OUT) - the size in bytes of the MCP Trace meta data
+/* Reads MCP trace meta data image from NVRAM
+ * - running_bundle_id (OUT): running bundle ID (invalid when loaded from file)
+ * - trace_meta_offset (OUT): trace meta offset in NVRAM in bytes (invalid when
+ *			      loaded from file).
+ * - trace_meta_size (OUT):   size in bytes of the trace meta data.
  */
 static enum dbg_status qed_mcp_trace_get_meta_info(struct qed_hwfn *p_hwfn,
 						   struct qed_ptt *p_ptt,
 						   u32 trace_data_size_bytes,
 						   u32 *running_bundle_id,
-						   u32 *trace_meta_offset_bytes,
-						   u32 *trace_meta_size_bytes)
+						   u32 *trace_meta_offset,
+						   u32 *trace_meta_size)
 {
+	u32 spad_trace_offsize, nvram_image_type, running_mfw_addr;
+
 	/* Read MCP trace section offsize structure from MCP scratchpad */
-	u32 spad_trace_offsize = qed_rd(p_hwfn,
-					p_ptt,
-					MCP_SPAD_TRACE_OFFSIZE_ADDR);
+	spad_trace_offsize = qed_rd(p_hwfn, p_ptt, MCP_SPAD_TRACE_OFFSIZE_ADDR);
 
 	/* Find running bundle ID */
-	u32 running_mfw_addr =
+	running_mfw_addr =
 		MCP_REG_SCRATCH + SECTION_OFFSET(spad_trace_offsize) +
 		QED_SECTION_SIZE(spad_trace_offsize) + trace_data_size_bytes;
-	u32 nvram_image_type;
-
 	*running_bundle_id = qed_rd(p_hwfn, p_ptt, running_mfw_addr);
 	if (*running_bundle_id > 1)
 		return DBG_STATUS_INVALID_NVRAM_BUNDLE;
@@ -4241,40 +4557,33 @@ static enum dbg_status qed_mcp_trace_get_meta_info(struct qed_hwfn *p_hwfn,
 	nvram_image_type =
 	    (*running_bundle_id ==
 	     DIR_ID_1) ? NVM_TYPE_MFW_TRACE1 : NVM_TYPE_MFW_TRACE2;
-
 	return qed_find_nvram_image(p_hwfn,
 				    p_ptt,
 				    nvram_image_type,
-				    trace_meta_offset_bytes,
-				    trace_meta_size_bytes);
+				    trace_meta_offset, trace_meta_size);
 }
 
-/* Reads the MCP Trace meta data (from NVRAM or buffer) into the specified
- * buffer.
- */
+/* Reads the MCP Trace meta data from NVRAM into the specified buffer */
 static enum dbg_status qed_mcp_trace_read_meta(struct qed_hwfn *p_hwfn,
 					       struct qed_ptt *p_ptt,
 					       u32 nvram_offset_in_bytes,
 					       u32 size_in_bytes, u32 *buf)
 {
-	u8 *byte_buf = (u8 *)buf;
-	u8 modules_num, i;
+	u8 modules_num, module_len, i, *byte_buf = (u8 *)buf;
+	enum dbg_status status;
 	u32 signature;
 
 	/* Read meta data from NVRAM */
-	enum dbg_status status = qed_nvram_read(p_hwfn,
-						p_ptt,
-						nvram_offset_in_bytes,
-						size_in_bytes,
-						buf);
-
+	status = qed_nvram_read(p_hwfn,
+				p_ptt,
+				nvram_offset_in_bytes, size_in_bytes, buf);
 	if (status != DBG_STATUS_OK)
 		return status;
 
 	/* Extract and check first signature */
 	signature = qed_read_unaligned_dword(byte_buf);
-	byte_buf += sizeof(u32);
-	if (signature != MCP_TRACE_META_IMAGE_SIGNATURE)
+	byte_buf += sizeof(signature);
+	if (signature != NVM_MAGIC_VALUE)
 		return DBG_STATUS_INVALID_TRACE_SIGNATURE;
 
 	/* Extract number of modules */
@@ -4282,16 +4591,16 @@ static enum dbg_status qed_mcp_trace_read_meta(struct qed_hwfn *p_hwfn,
 
 	/* Skip all modules */
 	for (i = 0; i < modules_num; i++) {
-		u8 module_len = *(byte_buf++);
-
+		module_len = *(byte_buf++);
 		byte_buf += module_len;
 	}
 
 	/* Extract and check second signature */
 	signature = qed_read_unaligned_dword(byte_buf);
-	byte_buf += sizeof(u32);
-	if (signature != MCP_TRACE_META_IMAGE_SIGNATURE)
+	byte_buf += sizeof(signature);
+	if (signature != NVM_MAGIC_VALUE)
 		return DBG_STATUS_INVALID_TRACE_SIGNATURE;
+
 	return DBG_STATUS_OK;
 }
 
@@ -4308,10 +4617,10 @@ static enum dbg_status qed_mcp_trace_dump(struct qed_hwfn *p_hwfn,
 	bool mcp_access;
 	int halted = 0;
 
-	mcp_access = !qed_grc_get_param(p_hwfn, DBG_GRC_PARAM_NO_MCP);
-
 	*num_dumped_dwords = 0;
 
+	mcp_access = !qed_grc_get_param(p_hwfn, DBG_GRC_PARAM_NO_MCP);
+
 	/* Get trace data info */
 	status = qed_mcp_trace_get_data_info(p_hwfn,
 					     p_ptt,
@@ -4328,7 +4637,7 @@ static enum dbg_status qed_mcp_trace_dump(struct qed_hwfn *p_hwfn,
 				     dump, "dump-type", "mcp-trace");
 
 	/* Halt MCP while reading from scratchpad so the read data will be
-	 * consistent if halt fails, MCP trace is taken anyway, with a small
+	 * consistent. if halt fails, MCP trace is taken anyway, with a small
 	 * risk that it may be corrupt.
 	 */
 	if (dump && mcp_access) {
@@ -4339,8 +4648,8 @@ static enum dbg_status qed_mcp_trace_dump(struct qed_hwfn *p_hwfn,
 
 	/* Find trace data size */
 	trace_data_size_dwords =
-		DIV_ROUND_UP(trace_data_size_bytes + sizeof(struct mcp_trace),
-			     BYTES_IN_DWORD);
+	    DIV_ROUND_UP(trace_data_size_bytes + sizeof(struct mcp_trace),
+			 BYTES_IN_DWORD);
 
 	/* Dump trace data section header and param */
 	offset += qed_dump_section_hdr(dump_buf + offset,
@@ -4354,17 +4663,17 @@ static enum dbg_status qed_mcp_trace_dump(struct qed_hwfn *p_hwfn,
 					  dump_buf + offset,
 					  dump,
 					  BYTES_TO_DWORDS(trace_data_grc_addr),
-					  trace_data_size_dwords);
+					  trace_data_size_dwords, false);
 
 	/* Resume MCP (only if halt succeeded) */
-	if (halted && qed_mcp_resume(p_hwfn, p_ptt) != 0)
+	if (halted && qed_mcp_resume(p_hwfn, p_ptt))
 		DP_NOTICE(p_hwfn, "Failed to resume MCP after halt!\n");
 
 	/* Dump trace meta section header */
 	offset += qed_dump_section_hdr(dump_buf + offset,
 				       dump, "mcp_trace_meta", 1);
 
-	/* Read trace meta info */
+	/* Read trace meta info (trace_meta_size_bytes is dword-aligned) */
 	if (mcp_access) {
 		status = qed_mcp_trace_get_meta_info(p_hwfn,
 						     p_ptt,
@@ -4391,6 +4700,9 @@ static enum dbg_status qed_mcp_trace_dump(struct qed_hwfn *p_hwfn,
 	if (status == DBG_STATUS_OK)
 		offset += trace_meta_size_dwords;
 
+	/* Dump last section */
+	offset += qed_dump_last_section(p_hwfn, dump_buf, offset, dump);
+
 	*num_dumped_dwords = offset;
 
 	/* If no mcp access, indicate that the dump doesn't contain the meta
@@ -4405,7 +4717,7 @@ static enum dbg_status qed_reg_fifo_dump(struct qed_hwfn *p_hwfn,
 					 u32 *dump_buf,
 					 bool dump, u32 *num_dumped_dwords)
 {
-	u32 offset = 0, dwords_read, size_param_offset;
+	u32 dwords_read, size_param_offset, offset = 0;
 	bool fifo_has_data;
 
 	*num_dumped_dwords = 0;
@@ -4417,8 +4729,8 @@ static enum dbg_status qed_reg_fifo_dump(struct qed_hwfn *p_hwfn,
 	offset += qed_dump_str_param(dump_buf + offset,
 				     dump, "dump-type", "reg-fifo");
 
-	/* Dump fifo data section header and param. The size param is 0 for now,
-	 * and is overwritten after reading the FIFO.
+	/* Dump fifo data section header and param. The size param is 0 for
+	 * now, and is overwritten after reading the FIFO.
 	 */
 	offset += qed_dump_section_hdr(dump_buf + offset,
 				       dump, "reg_fifo_data", 1);
@@ -4430,8 +4742,7 @@ static enum dbg_status qed_reg_fifo_dump(struct qed_hwfn *p_hwfn,
 		 * test how much data is available, except for reading it.
 		 */
 		offset += REG_FIFO_DEPTH_DWORDS;
-		*num_dumped_dwords = offset;
-		return DBG_STATUS_OK;
+		goto out;
 	}
 
 	fifo_has_data = qed_rd(p_hwfn, p_ptt,
@@ -4456,8 +4767,12 @@ static enum dbg_status qed_reg_fifo_dump(struct qed_hwfn *p_hwfn,
 
 	qed_dump_num_param(dump_buf + size_param_offset, dump, "size",
 			   dwords_read);
+out:
+	/* Dump last section */
+	offset += qed_dump_last_section(p_hwfn, dump_buf, offset, dump);
 
 	*num_dumped_dwords = offset;
+
 	return DBG_STATUS_OK;
 }
 
@@ -4467,7 +4782,7 @@ static enum dbg_status qed_igu_fifo_dump(struct qed_hwfn *p_hwfn,
 					 u32 *dump_buf,
 					 bool dump, u32 *num_dumped_dwords)
 {
-	u32 offset = 0, dwords_read, size_param_offset;
+	u32 dwords_read, size_param_offset, offset = 0;
 	bool fifo_has_data;
 
 	*num_dumped_dwords = 0;
@@ -4479,8 +4794,8 @@ static enum dbg_status qed_igu_fifo_dump(struct qed_hwfn *p_hwfn,
 	offset += qed_dump_str_param(dump_buf + offset,
 				     dump, "dump-type", "igu-fifo");
 
-	/* Dump fifo data section header and param. The size param is 0 for now,
-	 * and is overwritten after reading the FIFO.
+	/* Dump fifo data section header and param. The size param is 0 for
+	 * now, and is overwritten after reading the FIFO.
 	 */
 	offset += qed_dump_section_hdr(dump_buf + offset,
 				       dump, "igu_fifo_data", 1);
@@ -4492,8 +4807,7 @@ static enum dbg_status qed_igu_fifo_dump(struct qed_hwfn *p_hwfn,
 		 * test how much data is available, except for reading it.
 		 */
 		offset += IGU_FIFO_DEPTH_DWORDS;
-		*num_dumped_dwords = offset;
-		return DBG_STATUS_OK;
+		goto out;
 	}
 
 	fifo_has_data = qed_rd(p_hwfn, p_ptt,
@@ -4519,8 +4833,12 @@ static enum dbg_status qed_igu_fifo_dump(struct qed_hwfn *p_hwfn,
 
 	qed_dump_num_param(dump_buf + size_param_offset, dump, "size",
 			   dwords_read);
+out:
+	/* Dump last section */
+	offset += qed_dump_last_section(p_hwfn, dump_buf, offset, dump);
 
 	*num_dumped_dwords = offset;
+
 	return DBG_STATUS_OK;
 }
 
@@ -4531,7 +4849,7 @@ static enum dbg_status qed_protection_override_dump(struct qed_hwfn *p_hwfn,
 						    bool dump,
 						    u32 *num_dumped_dwords)
 {
-	u32 offset = 0, size_param_offset, override_window_dwords;
+	u32 size_param_offset, override_window_dwords, offset = 0;
 
 	*num_dumped_dwords = 0;
 
@@ -4542,8 +4860,8 @@ static enum dbg_status qed_protection_override_dump(struct qed_hwfn *p_hwfn,
 	offset += qed_dump_str_param(dump_buf + offset,
 				     dump, "dump-type", "protection-override");
 
-	/* Dump data section header and param. The size param is 0 for now, and
-	 * is overwritten after reading the data.
+	/* Dump data section header and param. The size param is 0 for now,
+	 * and is overwritten after reading the data.
 	 */
 	offset += qed_dump_section_hdr(dump_buf + offset,
 				       dump, "protection_override_data", 1);
@@ -4552,8 +4870,7 @@ static enum dbg_status qed_protection_override_dump(struct qed_hwfn *p_hwfn,
 
 	if (!dump) {
 		offset += PROTECTION_OVERRIDE_DEPTH_DWORDS;
-		*num_dumped_dwords = offset;
-		return DBG_STATUS_OK;
+		goto out;
 	}
 
 	/* Add override window info to buffer */
@@ -4569,8 +4886,12 @@ static enum dbg_status qed_protection_override_dump(struct qed_hwfn *p_hwfn,
 	offset += override_window_dwords;
 	qed_dump_num_param(dump_buf + size_param_offset, dump, "size",
 			   override_window_dwords);
+out:
+	/* Dump last section */
+	offset += qed_dump_last_section(p_hwfn, dump_buf, offset, dump);
 
 	*num_dumped_dwords = offset;
+
 	return DBG_STATUS_OK;
 }
 
@@ -4593,11 +4914,14 @@ static u32 qed_fw_asserts_dump(struct qed_hwfn *p_hwfn,
 						dump_buf + offset, dump, 1);
 	offset += qed_dump_str_param(dump_buf + offset,
 				     dump, "dump-type", "fw-asserts");
+
+	/* Find Storm dump size */
 	for (storm_id = 0; storm_id < MAX_DBG_STORMS; storm_id++) {
 		u32 fw_asserts_section_addr, next_list_idx_addr, next_list_idx;
+		struct storm_defs *storm = &s_storm_defs[storm_id];
 		u32 last_list_idx, addr;
 
-		if (dev_data->block_in_reset[s_storm_defs[storm_id].block_id])
+		if (dev_data->block_in_reset[storm->block_id])
 			continue;
 
 		/* Read FW info for the current Storm */
@@ -4606,26 +4930,26 @@ static u32 qed_fw_asserts_dump(struct qed_hwfn *p_hwfn,
 		asserts = &fw_info.fw_asserts_section;
 
 		/* Dump FW Asserts section header and params */
-		storm_letter_str[0] = s_storm_defs[storm_id].letter;
-		offset += qed_dump_section_hdr(dump_buf + offset, dump,
-					       "fw_asserts", 2);
-		offset += qed_dump_str_param(dump_buf + offset, dump, "storm",
-					     storm_letter_str);
-		offset += qed_dump_num_param(dump_buf + offset, dump, "size",
+		storm_letter_str[0] = storm->letter;
+		offset += qed_dump_section_hdr(dump_buf + offset,
+					       dump, "fw_asserts", 2);
+		offset += qed_dump_str_param(dump_buf + offset,
+					     dump, "storm", storm_letter_str);
+		offset += qed_dump_num_param(dump_buf + offset,
+					     dump,
+					     "size",
 					     asserts->list_element_dword_size);
 
+		/* Read and dump FW Asserts data */
 		if (!dump) {
 			offset += asserts->list_element_dword_size;
 			continue;
 		}
 
-		/* Read and dump FW Asserts data */
-		fw_asserts_section_addr =
-			s_storm_defs[storm_id].sem_fast_mem_addr +
+		fw_asserts_section_addr = storm->sem_fast_mem_addr +
 			SEM_FAST_REG_INT_RAM +
 			RAM_LINES_TO_BYTES(asserts->section_ram_line_offset);
-		next_list_idx_addr =
-			fw_asserts_section_addr +
+		next_list_idx_addr = fw_asserts_section_addr +
 			DWORDS_TO_BYTES(asserts->list_next_index_dword_offset);
 		next_list_idx = qed_rd(p_hwfn, p_ptt, next_list_idx_addr);
 		last_list_idx = (next_list_idx > 0
@@ -4638,11 +4962,13 @@ static u32 qed_fw_asserts_dump(struct qed_hwfn *p_hwfn,
 		    qed_grc_dump_addr_range(p_hwfn, p_ptt,
 					    dump_buf + offset,
 					    dump, addr,
-					    asserts->list_element_dword_size);
+					    asserts->list_element_dword_size,
+					    false);
 	}
 
 	/* Dump last section */
-	offset += qed_dump_section_hdr(dump_buf + offset, dump, "last", 0);
+	offset += qed_dump_last_section(p_hwfn, dump_buf, offset, dump);
+
 	return offset;
 }
 
@@ -4650,10 +4976,10 @@ static u32 qed_fw_asserts_dump(struct qed_hwfn *p_hwfn,
 
 enum dbg_status qed_dbg_set_bin_ptr(const u8 * const bin_ptr)
 {
-	/* Convert binary data to debug arrays */
 	struct bin_buffer_hdr *buf_array = (struct bin_buffer_hdr *)bin_ptr;
 	u8 buf_id;
 
+	/* convert binary data to debug arrays */
 	for (buf_id = 0; buf_id < MAX_BIN_DBG_BUFFER_TYPE; buf_id++) {
 		s_dbg_arrays[buf_id].ptr =
 		    (u32 *)(bin_ptr + buf_array[buf_id].offset);
@@ -4682,14 +5008,17 @@ enum dbg_status qed_dbg_grc_get_dump_buf_size(struct qed_hwfn *p_hwfn,
 	enum dbg_status status = qed_dbg_dev_init(p_hwfn, p_ptt);
 
 	*buf_size = 0;
+
 	if (status != DBG_STATUS_OK)
 		return status;
+
 	if (!s_dbg_arrays[BIN_BUF_DBG_MODE_TREE].ptr ||
 	    !s_dbg_arrays[BIN_BUF_DBG_DUMP_REG].ptr ||
 	    !s_dbg_arrays[BIN_BUF_DBG_DUMP_MEM].ptr ||
 	    !s_dbg_arrays[BIN_BUF_DBG_ATTN_BLOCKS].ptr ||
 	    !s_dbg_arrays[BIN_BUF_DBG_ATTN_REGS].ptr)
 		return DBG_STATUS_DBG_ARRAY_NOT_SET;
+
 	return qed_grc_dump(p_hwfn, p_ptt, NULL, false, buf_size);
 }
 
@@ -4702,12 +5031,14 @@ enum dbg_status qed_dbg_grc_dump(struct qed_hwfn *p_hwfn,
 	u32 needed_buf_size_in_dwords;
 	enum dbg_status status;
 
-	status = qed_dbg_grc_get_dump_buf_size(p_hwfn, p_ptt,
-					       &needed_buf_size_in_dwords);
-
 	*num_dumped_dwords = 0;
+
+	status = qed_dbg_grc_get_dump_buf_size(p_hwfn,
+					       p_ptt,
+					       &needed_buf_size_in_dwords);
 	if (status != DBG_STATUS_OK)
 		return status;
+
 	if (buf_size_in_dwords < needed_buf_size_in_dwords)
 		return DBG_STATUS_DUMP_BUF_TOO_SMALL;
 
@@ -4724,25 +5055,31 @@ enum dbg_status qed_dbg_idle_chk_get_dump_buf_size(struct qed_hwfn *p_hwfn,
 						   struct qed_ptt *p_ptt,
 						   u32 *buf_size)
 {
-	enum dbg_status status = qed_dbg_dev_init(p_hwfn, p_ptt);
 	struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+	struct idle_chk_data *idle_chk;
+	enum dbg_status status;
 
+	idle_chk = &dev_data->idle_chk;
 	*buf_size = 0;
+
+	status = qed_dbg_dev_init(p_hwfn, p_ptt);
 	if (status != DBG_STATUS_OK)
 		return status;
+
 	if (!s_dbg_arrays[BIN_BUF_DBG_MODE_TREE].ptr ||
 	    !s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_REGS].ptr ||
 	    !s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_IMMS].ptr ||
 	    !s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_RULES].ptr)
 		return DBG_STATUS_DBG_ARRAY_NOT_SET;
-	if (!dev_data->idle_chk.buf_size_set) {
-		dev_data->idle_chk.buf_size = qed_idle_chk_dump(p_hwfn,
-								p_ptt,
-								NULL, false);
-		dev_data->idle_chk.buf_size_set = true;
+
+	if (!idle_chk->buf_size_set) {
+		idle_chk->buf_size = qed_idle_chk_dump(p_hwfn,
+						       p_ptt, NULL, false);
+		idle_chk->buf_size_set = true;
 	}
 
-	*buf_size = dev_data->idle_chk.buf_size;
+	*buf_size = idle_chk->buf_size;
+
 	return DBG_STATUS_OK;
 }
 
@@ -4755,12 +5092,14 @@ enum dbg_status qed_dbg_idle_chk_dump(struct qed_hwfn *p_hwfn,
 	u32 needed_buf_size_in_dwords;
 	enum dbg_status status;
 
-	status = qed_dbg_idle_chk_get_dump_buf_size(p_hwfn, p_ptt,
-						    &needed_buf_size_in_dwords);
-
 	*num_dumped_dwords = 0;
+
+	status = qed_dbg_idle_chk_get_dump_buf_size(p_hwfn,
+						    p_ptt,
+						    &needed_buf_size_in_dwords);
 	if (status != DBG_STATUS_OK)
 		return status;
+
 	if (buf_size_in_dwords < needed_buf_size_in_dwords)
 		return DBG_STATUS_DUMP_BUF_TOO_SMALL;
 
@@ -4783,8 +5122,10 @@ enum dbg_status qed_dbg_mcp_trace_get_dump_buf_size(struct qed_hwfn *p_hwfn,
 	enum dbg_status status = qed_dbg_dev_init(p_hwfn, p_ptt);
 
 	*buf_size = 0;
+
 	if (status != DBG_STATUS_OK)
 		return status;
+
 	return qed_mcp_trace_dump(p_hwfn, p_ptt, NULL, false, buf_size);
 }
 
@@ -4797,13 +5138,12 @@ enum dbg_status qed_dbg_mcp_trace_dump(struct qed_hwfn *p_hwfn,
 	u32 needed_buf_size_in_dwords;
 	enum dbg_status status;
 
-	/* validate buffer size */
 	status =
-	    qed_dbg_mcp_trace_get_dump_buf_size(p_hwfn, p_ptt,
-						&needed_buf_size_in_dwords);
-
-	if (status != DBG_STATUS_OK &&
-	    status != DBG_STATUS_NVRAM_GET_IMAGE_FAILED)
+		qed_dbg_mcp_trace_get_dump_buf_size(p_hwfn,
+						    p_ptt,
+						    &needed_buf_size_in_dwords);
+	if (status != DBG_STATUS_OK && status !=
+	    DBG_STATUS_NVRAM_GET_IMAGE_FAILED)
 		return status;
 
 	if (buf_size_in_dwords < needed_buf_size_in_dwords)
@@ -4829,8 +5169,10 @@ enum dbg_status qed_dbg_reg_fifo_get_dump_buf_size(struct qed_hwfn *p_hwfn,
 	enum dbg_status status = qed_dbg_dev_init(p_hwfn, p_ptt);
 
 	*buf_size = 0;
+
 	if (status != DBG_STATUS_OK)
 		return status;
+
 	return qed_reg_fifo_dump(p_hwfn, p_ptt, NULL, false, buf_size);
 }
 
@@ -4843,12 +5185,14 @@ enum dbg_status qed_dbg_reg_fifo_dump(struct qed_hwfn *p_hwfn,
 	u32 needed_buf_size_in_dwords;
 	enum dbg_status status;
 
-	status = qed_dbg_reg_fifo_get_dump_buf_size(p_hwfn, p_ptt,
-						    &needed_buf_size_in_dwords);
-
 	*num_dumped_dwords = 0;
+
+	status = qed_dbg_reg_fifo_get_dump_buf_size(p_hwfn,
+						    p_ptt,
+						    &needed_buf_size_in_dwords);
 	if (status != DBG_STATUS_OK)
 		return status;
+
 	if (buf_size_in_dwords < needed_buf_size_in_dwords)
 		return DBG_STATUS_DUMP_BUF_TOO_SMALL;
 
@@ -4871,8 +5215,10 @@ enum dbg_status qed_dbg_igu_fifo_get_dump_buf_size(struct qed_hwfn *p_hwfn,
 	enum dbg_status status = qed_dbg_dev_init(p_hwfn, p_ptt);
 
 	*buf_size = 0;
+
 	if (status != DBG_STATUS_OK)
 		return status;
+
 	return qed_igu_fifo_dump(p_hwfn, p_ptt, NULL, false, buf_size);
 }
 
@@ -4885,12 +5231,14 @@ enum dbg_status qed_dbg_igu_fifo_dump(struct qed_hwfn *p_hwfn,
 	u32 needed_buf_size_in_dwords;
 	enum dbg_status status;
 
-	status = qed_dbg_igu_fifo_get_dump_buf_size(p_hwfn, p_ptt,
-						    &needed_buf_size_in_dwords);
-
 	*num_dumped_dwords = 0;
+
+	status = qed_dbg_igu_fifo_get_dump_buf_size(p_hwfn,
+						    p_ptt,
+						    &needed_buf_size_in_dwords);
 	if (status != DBG_STATUS_OK)
 		return status;
+
 	if (buf_size_in_dwords < needed_buf_size_in_dwords)
 		return DBG_STATUS_DUMP_BUF_TOO_SMALL;
 
@@ -4913,8 +5261,10 @@ qed_dbg_protection_override_get_dump_buf_size(struct qed_hwfn *p_hwfn,
 	enum dbg_status status = qed_dbg_dev_init(p_hwfn, p_ptt);
 
 	*buf_size = 0;
+
 	if (status != DBG_STATUS_OK)
 		return status;
+
 	return qed_protection_override_dump(p_hwfn,
 					    p_ptt, NULL, false, buf_size);
 }
@@ -4925,15 +5275,18 @@ enum dbg_status qed_dbg_protection_override_dump(struct qed_hwfn *p_hwfn,
 						 u32 buf_size_in_dwords,
 						 u32 *num_dumped_dwords)
 {
-	u32 needed_buf_size_in_dwords;
+	u32 needed_buf_size_in_dwords, *p_size = &needed_buf_size_in_dwords;
 	enum dbg_status status;
 
-	status = qed_dbg_protection_override_get_dump_buf_size(p_hwfn, p_ptt,
-						&needed_buf_size_in_dwords);
-
 	*num_dumped_dwords = 0;
+
+	status =
+		qed_dbg_protection_override_get_dump_buf_size(p_hwfn,
+							      p_ptt,
+							      p_size);
 	if (status != DBG_STATUS_OK)
 		return status;
+
 	if (buf_size_in_dwords < needed_buf_size_in_dwords)
 		return DBG_STATUS_DUMP_BUF_TOO_SMALL;
 
@@ -4958,12 +5311,15 @@ enum dbg_status qed_dbg_fw_asserts_get_dump_buf_size(struct qed_hwfn *p_hwfn,
 	enum dbg_status status = qed_dbg_dev_init(p_hwfn, p_ptt);
 
 	*buf_size = 0;
+
 	if (status != DBG_STATUS_OK)
 		return status;
 
 	/* Update reset state */
 	qed_update_blocks_reset_state(p_hwfn, p_ptt);
+
 	*buf_size = qed_fw_asserts_dump(p_hwfn, p_ptt, NULL, false);
+
 	return DBG_STATUS_OK;
 }
 
@@ -4973,19 +5329,26 @@ enum dbg_status qed_dbg_fw_asserts_dump(struct qed_hwfn *p_hwfn,
 					u32 buf_size_in_dwords,
 					u32 *num_dumped_dwords)
 {
-	u32 needed_buf_size_in_dwords;
+	u32 needed_buf_size_in_dwords, *p_size = &needed_buf_size_in_dwords;
 	enum dbg_status status;
 
-	status = qed_dbg_fw_asserts_get_dump_buf_size(p_hwfn, p_ptt,
-						&needed_buf_size_in_dwords);
-
 	*num_dumped_dwords = 0;
+
+	status =
+		qed_dbg_fw_asserts_get_dump_buf_size(p_hwfn,
+						     p_ptt,
+						     p_size);
 	if (status != DBG_STATUS_OK)
 		return status;
+
 	if (buf_size_in_dwords < needed_buf_size_in_dwords)
 		return DBG_STATUS_DUMP_BUF_TOO_SMALL;
 
 	*num_dumped_dwords = qed_fw_asserts_dump(p_hwfn, p_ptt, dump_buf, true);
+
+	/* Revert GRC params to their default */
+	qed_dbg_grc_set_params_default(p_hwfn);
+
 	return DBG_STATUS_OK;
 }
 
@@ -5005,9 +5368,14 @@ struct mcp_trace_format {
 #define MCP_TRACE_FORMAT_P3_SIZE_SHIFT	22
 #define MCP_TRACE_FORMAT_LEN_MASK	0xff000000
 #define MCP_TRACE_FORMAT_LEN_SHIFT	24
+
 	char *format_str;
 };
 
+/* Meta data structure, generated by a perl script during MFW build. therefore,
+ * the structs mcp_trace_meta and mcp_trace_format are duplicated in the perl
+ * script.
+ */
 struct mcp_trace_meta {
 	u32 modules_num;
 	char **modules;
@@ -5015,7 +5383,7 @@ struct mcp_trace_meta {
 	struct mcp_trace_format *formats;
 };
 
-/* Reg fifo element */
+/* REG fifo element */
 struct reg_fifo_element {
 	u64 data;
 #define REG_FIFO_ELEMENT_ADDRESS_SHIFT		0
@@ -5140,12 +5508,15 @@ struct igu_fifo_addr_data {
 /******************************** Constants **********************************/
 
 #define MAX_MSG_LEN				1024
+
 #define MCP_TRACE_MAX_MODULE_LEN		8
 #define MCP_TRACE_FORMAT_MAX_PARAMS		3
 #define MCP_TRACE_FORMAT_PARAM_WIDTH \
 	(MCP_TRACE_FORMAT_P2_SIZE_SHIFT - MCP_TRACE_FORMAT_P1_SIZE_SHIFT)
+
 #define REG_FIFO_ELEMENT_ADDR_FACTOR		4
 #define REG_FIFO_ELEMENT_IS_PF_VF_VAL		127
+
 #define PROTECTION_OVERRIDE_ELEMENT_ADDR_FACTOR	4
 
 /********************************* Macros ************************************/
@@ -5154,59 +5525,178 @@ struct igu_fifo_addr_data {
 
 /***************************** Constant Arrays *******************************/
 
+struct user_dbg_array {
+	const u32 *ptr;
+	u32 size_in_dwords;
+};
+
+/* Debug arrays */
+static struct user_dbg_array
+s_user_dbg_arrays[MAX_BIN_DBG_BUFFER_TYPE] = { {NULL} };
+
 /* Status string array */
 static const char * const s_status_str[] = {
+	/* DBG_STATUS_OK */
 	"Operation completed successfully",
+
+	/* DBG_STATUS_APP_VERSION_NOT_SET */
 	"Debug application version wasn't set",
+
+	/* DBG_STATUS_UNSUPPORTED_APP_VERSION */
 	"Unsupported debug application version",
+
+	/* DBG_STATUS_DBG_BLOCK_NOT_RESET */
 	"The debug block wasn't reset since the last recording",
+
+	/* DBG_STATUS_INVALID_ARGS */
 	"Invalid arguments",
+
+	/* DBG_STATUS_OUTPUT_ALREADY_SET */
 	"The debug output was already set",
+
+	/* DBG_STATUS_INVALID_PCI_BUF_SIZE */
 	"Invalid PCI buffer size",
+
+	/* DBG_STATUS_PCI_BUF_ALLOC_FAILED */
 	"PCI buffer allocation failed",
+
+	/* DBG_STATUS_PCI_BUF_NOT_ALLOCATED */
 	"A PCI buffer wasn't allocated",
+
+	/* DBG_STATUS_TOO_MANY_INPUTS */
 	"Too many inputs were enabled. Enabled less inputs, or set 'unifyInputs' to true",
-	"GRC/Timestamp input overlap in cycle dword 0",
+
+	/* DBG_STATUS_INPUT_OVERLAP */
+	"Overlapping debug bus inputs",
+
+	/* DBG_STATUS_HW_ONLY_RECORDING */
 	"Cannot record Storm data since the entire recording cycle is used by HW",
+
+	/* DBG_STATUS_STORM_ALREADY_ENABLED */
 	"The Storm was already enabled",
+
+	/* DBG_STATUS_STORM_NOT_ENABLED */
 	"The specified Storm wasn't enabled",
+
+	/* DBG_STATUS_BLOCK_ALREADY_ENABLED */
 	"The block was already enabled",
+
+	/* DBG_STATUS_BLOCK_NOT_ENABLED */
 	"The specified block wasn't enabled",
+
+	/* DBG_STATUS_NO_INPUT_ENABLED */
 	"No input was enabled for recording",
+
+	/* DBG_STATUS_NO_FILTER_TRIGGER_64B */
 	"Filters and triggers are not allowed when recording in 64b units",
+
+	/* DBG_STATUS_FILTER_ALREADY_ENABLED */
 	"The filter was already enabled",
+
+	/* DBG_STATUS_TRIGGER_ALREADY_ENABLED */
 	"The trigger was already enabled",
+
+	/* DBG_STATUS_TRIGGER_NOT_ENABLED */
 	"The trigger wasn't enabled",
+
+	/* DBG_STATUS_CANT_ADD_CONSTRAINT */
 	"A constraint can be added only after a filter was enabled or a trigger state was added",
+
+	/* DBG_STATUS_TOO_MANY_TRIGGER_STATES */
 	"Cannot add more than 3 trigger states",
+
+	/* DBG_STATUS_TOO_MANY_CONSTRAINTS */
 	"Cannot add more than 4 constraints per filter or trigger state",
+
+	/* DBG_STATUS_RECORDING_NOT_STARTED */
 	"The recording wasn't started",
+
+	/* DBG_STATUS_DATA_DIDNT_TRIGGER */
 	"A trigger was configured, but it didn't trigger",
+
+	/* DBG_STATUS_NO_DATA_RECORDED */
 	"No data was recorded",
+
+	/* DBG_STATUS_DUMP_BUF_TOO_SMALL */
 	"Dump buffer is too small",
+
+	/* DBG_STATUS_DUMP_NOT_CHUNK_ALIGNED */
 	"Dumped data is not aligned to chunks",
+
+	/* DBG_STATUS_UNKNOWN_CHIP */
 	"Unknown chip",
+
+	/* DBG_STATUS_VIRT_MEM_ALLOC_FAILED */
 	"Failed allocating virtual memory",
+
+	/* DBG_STATUS_BLOCK_IN_RESET */
 	"The input block is in reset",
+
+	/* DBG_STATUS_INVALID_TRACE_SIGNATURE */
 	"Invalid MCP trace signature found in NVRAM",
+
+	/* DBG_STATUS_INVALID_NVRAM_BUNDLE */
 	"Invalid bundle ID found in NVRAM",
+
+	/* DBG_STATUS_NVRAM_GET_IMAGE_FAILED */
 	"Failed getting NVRAM image",
+
+	/* DBG_STATUS_NON_ALIGNED_NVRAM_IMAGE */
 	"NVRAM image is not dword-aligned",
+
+	/* DBG_STATUS_NVRAM_READ_FAILED */
 	"Failed reading from NVRAM",
+
+	/* DBG_STATUS_IDLE_CHK_PARSE_FAILED */
 	"Idle check parsing failed",
+
+	/* DBG_STATUS_MCP_TRACE_BAD_DATA */
 	"MCP Trace data is corrupt",
-	"Dump doesn't contain meta data - it must be provided in an image file",
+
+	/* DBG_STATUS_MCP_TRACE_NO_META */
+	"Dump doesn't contain meta data - it must be provided in image file",
+
+	/* DBG_STATUS_MCP_COULD_NOT_HALT */
 	"Failed to halt MCP",
+
+	/* DBG_STATUS_MCP_COULD_NOT_RESUME */
 	"Failed to resume MCP after halt",
+
+	/* DBG_STATUS_DMAE_FAILED */
 	"DMAE transaction failed",
+
+	/* DBG_STATUS_SEMI_FIFO_NOT_EMPTY */
 	"Failed to empty SEMI sync FIFO",
+
+	/* DBG_STATUS_IGU_FIFO_BAD_DATA */
 	"IGU FIFO data is corrupt",
+
+	/* DBG_STATUS_MCP_COULD_NOT_MASK_PRTY */
 	"MCP failed to mask parities",
+
+	/* DBG_STATUS_FW_ASSERTS_PARSE_FAILED */
 	"FW Asserts parsing failed",
+
+	/* DBG_STATUS_REG_FIFO_BAD_DATA */
 	"GRC FIFO data is corrupt",
+
+	/* DBG_STATUS_PROTECTION_OVERRIDE_BAD_DATA */
 	"Protection Override data is corrupt",
+
+	/* DBG_STATUS_DBG_ARRAY_NOT_SET */
 	"Debug arrays were not set (when using binary files, dbg_set_bin_ptr must be called)",
-	"When a block is filtered, no other blocks can be recorded unless inputs are unified (due to a HW bug)"
+
+	/* DBG_STATUS_FILTER_BUG */
+	"Debug Bus filtering requires the -unifyInputs option (due to a HW bug)",
+
+	/* DBG_STATUS_NON_MATCHING_LINES */
+	"Non-matching debug lines - all lines must be of the same type (either 128b or 256b)",
+
+	/* DBG_STATUS_INVALID_TRIGGER_DWORD_OFFSET */
+	"The selected trigger dword offset wasn't enabled in the recorded HW block",
+
+	/* DBG_STATUS_DBG_BUS_IN_USE */
+	"The debug bus is in use"
 };
 
 /* Idle check severity names array */
@@ -5223,12 +5713,13 @@ static const char * const s_mcp_trace_level_str[] = {
 	"DEBUG"
 };
 
-/* Parsing strings */
+/* Access type names array */
 static const char * const s_access_strs[] = {
 	"read",
 	"write"
 };
 
+/* Privilege type names array */
 static const char * const s_privilege_strs[] = {
 	"VF",
 	"PDA",
@@ -5236,6 +5727,7 @@ static const char * const s_privilege_strs[] = {
 	"UA"
 };
 
+/* Protection type names array */
 static const char * const s_protection_strs[] = {
 	"(default)",
 	"(default)",
@@ -5247,6 +5739,7 @@ static const char * const s_protection_strs[] = {
 	"override UA"
 };
 
+/* Master type names array */
 static const char * const s_master_strs[] = {
 	"???",
 	"pxp",
@@ -5266,6 +5759,7 @@ static const char * const s_master_strs[] = {
 	"???"
 };
 
+/* REG FIFO error messages array */
 static const char * const s_reg_fifo_error_strs[] = {
 	"grc timeout",
 	"address doesn't belong to any block",
@@ -5274,6 +5768,7 @@ static const char * const s_reg_fifo_error_strs[] = {
 	"path isolation error"
 };
 
+/* IGU FIFO sources array */
 static const char * const s_igu_fifo_source_strs[] = {
 	"TSTORM",
 	"MSTORM",
@@ -5288,6 +5783,7 @@ static const char * const s_igu_fifo_source_strs[] = {
 	"GRC",
 };
 
+/* IGU FIFO error messages */
 static const char * const s_igu_fifo_error_strs[] = {
 	"no error",
 	"length error",
@@ -5308,13 +5804,18 @@ static const char * const s_igu_fifo_error_strs[] = {
 
 /* IGU FIFO address data */
 static const struct igu_fifo_addr_data s_igu_fifo_addr_data[] = {
-	{0x0, 0x101, "MSI-X Memory", NULL, IGU_ADDR_TYPE_MSIX_MEM},
-	{0x102, 0x1ff, "reserved", NULL, IGU_ADDR_TYPE_RESERVED},
-	{0x200, 0x200, "Write PBA[0:63]", NULL, IGU_ADDR_TYPE_WRITE_PBA},
+	{0x0, 0x101, "MSI-X Memory", NULL,
+	 IGU_ADDR_TYPE_MSIX_MEM},
+	{0x102, 0x1ff, "reserved", NULL,
+	 IGU_ADDR_TYPE_RESERVED},
+	{0x200, 0x200, "Write PBA[0:63]", NULL,
+	 IGU_ADDR_TYPE_WRITE_PBA},
 	{0x201, 0x201, "Write PBA[64:127]", "reserved",
 	 IGU_ADDR_TYPE_WRITE_PBA},
-	{0x202, 0x202, "Write PBA[128]", "reserved", IGU_ADDR_TYPE_WRITE_PBA},
-	{0x203, 0x3ff, "reserved", NULL, IGU_ADDR_TYPE_RESERVED},
+	{0x202, 0x202, "Write PBA[128]", "reserved",
+	 IGU_ADDR_TYPE_WRITE_PBA},
+	{0x203, 0x3ff, "reserved", NULL,
+	 IGU_ADDR_TYPE_RESERVED},
 	{0x400, 0x5ef, "Write interrupt acknowledgment", NULL,
 	 IGU_ADDR_TYPE_WRITE_INT_ACK},
 	{0x5f0, 0x5f0, "Attention bits update", NULL,
@@ -5331,8 +5832,10 @@ static const struct igu_fifo_addr_data s_igu_fifo_addr_data[] = {
 	 IGU_ADDR_TYPE_READ_INT},
 	{0x5f6, 0x5f6, "Read interrupt 0:63 without mask", NULL,
 	 IGU_ADDR_TYPE_READ_INT},
-	{0x5f7, 0x5ff, "reserved", NULL, IGU_ADDR_TYPE_RESERVED},
-	{0x600, 0x7ff, "Producer update", NULL, IGU_ADDR_TYPE_WRITE_PROD_UPDATE}
+	{0x5f7, 0x5ff, "reserved", NULL,
+	 IGU_ADDR_TYPE_RESERVED},
+	{0x600, 0x7ff, "Producer update", NULL,
+	 IGU_ADDR_TYPE_WRITE_PROD_UPDATE}
 };
 
 /******************************** Variables **********************************/
@@ -5340,28 +5843,12 @@ static const struct igu_fifo_addr_data s_igu_fifo_addr_data[] = {
 /* MCP Trace meta data - used in case the dump doesn't contain the meta data
  * (e.g. due to no NVRAM access).
  */
-static struct dbg_array s_mcp_trace_meta = { NULL, 0 };
+static struct user_dbg_array s_mcp_trace_meta = { NULL, 0 };
 
 /* Temporary buffer, used for print size calculations */
 static char s_temp_buf[MAX_MSG_LEN];
 
-/***************************** Public Functions *******************************/
-
-enum dbg_status qed_dbg_user_set_bin_ptr(const u8 * const bin_ptr)
-{
-	/* Convert binary data to debug arrays */
-	struct bin_buffer_hdr *buf_array = (struct bin_buffer_hdr *)bin_ptr;
-	u8 buf_id;
-
-	for (buf_id = 0; buf_id < MAX_BIN_DBG_BUFFER_TYPE; buf_id++) {
-		s_dbg_arrays[buf_id].ptr =
-		    (u32 *)(bin_ptr + buf_array[buf_id].offset);
-		s_dbg_arrays[buf_id].size_in_dwords =
-		    BYTES_TO_DWORDS(buf_array[buf_id].length);
-	}
-
-	return DBG_STATUS_OK;
-}
+/**************************** Private Functions ******************************/
 
 static u32 qed_cyclic_add(u32 a, u32 b, u32 size)
 {
@@ -5381,10 +5868,8 @@ static u32 qed_read_from_cyclic_buf(void *buf,
 				    u32 *offset,
 				    u32 buf_size, u8 num_bytes_to_read)
 {
-	u8 *bytes_buf = (u8 *)buf;
-	u8 *val_ptr;
+	u8 i, *val_ptr, *bytes_buf = (u8 *)buf;
 	u32 val = 0;
-	u8 i;
 
 	val_ptr = (u8 *)&val;
 
@@ -5412,6 +5897,7 @@ static u32 qed_read_dword_from_buf(void *buf, u32 *offset)
 	u32 dword_val = *(u32 *)&((u8 *)buf)[*offset];
 
 	*offset += 4;
+
 	return dword_val;
 }
 
@@ -5445,7 +5931,7 @@ static u32 qed_read_param(u32 *dump_buf,
 			  const char **param_str_val, u32 *param_num_val)
 {
 	char *char_buf = (char *)dump_buf;
-	u32 offset = 0; /* In bytes */
+	size_t offset = 0;
 
 	/* Extract param name */
 	*param_name = char_buf;
@@ -5493,37 +5979,31 @@ static u32 qed_print_section_params(u32 *dump_buf,
 	u32 i, dump_offset = 0, results_offset = 0;
 
 	for (i = 0; i < num_section_params; i++) {
-		const char *param_name;
-		const char *param_str_val;
+		const char *param_name, *param_str_val;
 		u32 param_num_val = 0;
 
 		dump_offset += qed_read_param(dump_buf + dump_offset,
 					      &param_name,
 					      &param_str_val, &param_num_val);
+
 		if (param_str_val)
-			/* String param */
 			results_offset +=
 				sprintf(qed_get_buf_ptr(results_buf,
 							results_offset),
 					"%s: %s\n", param_name, param_str_val);
 		else if (strcmp(param_name, "fw-timestamp"))
-			/* Numeric param */
 			results_offset +=
 				sprintf(qed_get_buf_ptr(results_buf,
 							results_offset),
 					"%s: %d\n", param_name, param_num_val);
 	}
 
-	results_offset +=
-	    sprintf(qed_get_buf_ptr(results_buf, results_offset), "\n");
+	results_offset += sprintf(qed_get_buf_ptr(results_buf, results_offset),
+				  "\n");
+
 	*num_chars_printed = results_offset;
-	return dump_offset;
-}
 
-const char *qed_dbg_get_status_str(enum dbg_status status)
-{
-	return (status <
-		MAX_DBG_STATUS) ? s_status_str[status] : "Invalid debug status";
+	return dump_offset;
 }
 
 /* Parses the idle check rules and returns the number of characters printed.
@@ -5537,7 +6017,10 @@ static u32 qed_parse_idle_chk_dump_rules(struct qed_hwfn *p_hwfn,
 					 char *results_buf,
 					 u32 *num_errors, u32 *num_warnings)
 {
-	u32 rule_idx, results_offset = 0; /* Offset in results_buf in bytes */
+	/* Offset in results_buf in bytes */
+	u32 results_offset = 0;
+
+	u32 rule_idx;
 	u16 i, j;
 
 	*num_errors = 0;
@@ -5548,16 +6031,15 @@ static u32 qed_parse_idle_chk_dump_rules(struct qed_hwfn *p_hwfn,
 	     rule_idx++) {
 		const struct dbg_idle_chk_rule_parsing_data *rule_parsing_data;
 		struct dbg_idle_chk_result_hdr *hdr;
-		const char *parsing_str;
+		const char *parsing_str, *lsi_msg;
 		u32 parsing_str_offset;
-		const char *lsi_msg;
-		u8 curr_reg_id = 0;
 		bool has_fw_msg;
+		u8 curr_reg_id;
 
 		hdr = (struct dbg_idle_chk_result_hdr *)dump_buf;
 		rule_parsing_data =
 			(const struct dbg_idle_chk_rule_parsing_data *)
-			&s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_PARSING_DATA].
+			&s_user_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_PARSING_DATA].
 			ptr[hdr->rule_id];
 		parsing_str_offset =
 			GET_FIELD(rule_parsing_data->data,
@@ -5565,16 +6047,18 @@ static u32 qed_parse_idle_chk_dump_rules(struct qed_hwfn *p_hwfn,
 		has_fw_msg =
 			GET_FIELD(rule_parsing_data->data,
 				DBG_IDLE_CHK_RULE_PARSING_DATA_HAS_FW_MSG) > 0;
-		parsing_str = &((const char *)
-				s_dbg_arrays[BIN_BUF_DBG_PARSING_STRINGS].ptr)
-				[parsing_str_offset];
+		parsing_str =
+			&((const char *)
+			s_user_dbg_arrays[BIN_BUF_DBG_PARSING_STRINGS].ptr)
+			[parsing_str_offset];
 		lsi_msg = parsing_str;
+		curr_reg_id = 0;
 
 		if (hdr->severity >= MAX_DBG_IDLE_CHK_SEVERITY_TYPES)
 			return 0;
 
 		/* Skip rule header */
-		dump_buf += (sizeof(struct dbg_idle_chk_result_hdr) / 4);
+		dump_buf += BYTES_TO_DWORDS(sizeof(*hdr));
 
 		/* Update errors/warnings count */
 		if (hdr->severity == IDLE_CHK_SEVERITY_ERROR ||
@@ -5606,19 +6090,19 @@ static u32 qed_parse_idle_chk_dump_rules(struct qed_hwfn *p_hwfn,
 		for (i = 0;
 		     i < hdr->num_dumped_cond_regs + hdr->num_dumped_info_regs;
 		     i++) {
-			struct dbg_idle_chk_result_reg_hdr *reg_hdr
-			    = (struct dbg_idle_chk_result_reg_hdr *)
-			    dump_buf;
-			bool is_mem =
-				GET_FIELD(reg_hdr->data,
-					  DBG_IDLE_CHK_RESULT_REG_HDR_IS_MEM);
-			u8 reg_id =
-				GET_FIELD(reg_hdr->data,
-					  DBG_IDLE_CHK_RESULT_REG_HDR_REG_ID);
+			struct dbg_idle_chk_result_reg_hdr *reg_hdr;
+			bool is_mem;
+			u8 reg_id;
+
+			reg_hdr =
+				(struct dbg_idle_chk_result_reg_hdr *)dump_buf;
+			is_mem = GET_FIELD(reg_hdr->data,
+					   DBG_IDLE_CHK_RESULT_REG_HDR_IS_MEM);
+			reg_id = GET_FIELD(reg_hdr->data,
+					   DBG_IDLE_CHK_RESULT_REG_HDR_REG_ID);
 
 			/* Skip reg header */
-			dump_buf +=
-			    (sizeof(struct dbg_idle_chk_result_reg_hdr) / 4);
+			dump_buf += BYTES_TO_DWORDS(sizeof(*reg_hdr));
 
 			/* Skip register names until the required reg_id is
 			 * reached.
@@ -5660,6 +6144,7 @@ static u32 qed_parse_idle_chk_dump_rules(struct qed_hwfn *p_hwfn,
 	/* Check if end of dump buffer was exceeded */
 	if (dump_buf > dump_buf_end)
 		return 0;
+
 	return results_offset;
 }
 
@@ -5680,13 +6165,16 @@ static enum dbg_status qed_parse_idle_chk_dump(struct qed_hwfn *p_hwfn,
 	const char *section_name, *param_name, *param_str_val;
 	u32 *dump_buf_end = dump_buf + num_dumped_dwords;
 	u32 num_section_params = 0, num_rules;
-	u32 results_offset = 0;	/* Offset in results_buf in bytes */
+
+	/* Offset in results_buf in bytes */
+	u32 results_offset = 0;
 
 	*parsed_results_bytes = 0;
 	*num_errors = 0;
 	*num_warnings = 0;
-	if (!s_dbg_arrays[BIN_BUF_DBG_PARSING_STRINGS].ptr ||
-	    !s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_PARSING_DATA].ptr)
+
+	if (!s_user_dbg_arrays[BIN_BUF_DBG_PARSING_STRINGS].ptr ||
+	    !s_user_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_PARSING_DATA].ptr)
 		return DBG_STATUS_DBG_ARRAY_NOT_SET;
 
 	/* Read global_params section */
@@ -5705,10 +6193,9 @@ static enum dbg_status qed_parse_idle_chk_dump(struct qed_hwfn *p_hwfn,
 					 &section_name, &num_section_params);
 	if (strcmp(section_name, "idle_chk") || num_section_params != 1)
 		return DBG_STATUS_IDLE_CHK_PARSE_FAILED;
-
 	dump_buf += qed_read_param(dump_buf,
 				   &param_name, &param_str_val, &num_rules);
-	if (strcmp(param_name, "num_rules") != 0)
+	if (strcmp(param_name, "num_rules"))
 		return DBG_STATUS_IDLE_CHK_PARSE_FAILED;
 
 	if (num_rules) {
@@ -5728,7 +6215,7 @@ static enum dbg_status qed_parse_idle_chk_dump(struct qed_hwfn *p_hwfn,
 						      results_offset : NULL,
 						      num_errors, num_warnings);
 		results_offset += rules_print_size;
-		if (rules_print_size == 0)
+		if (!rules_print_size)
 			return DBG_STATUS_IDLE_CHK_PARSE_FAILED;
 
 		/* Print LSI output */
@@ -5745,69 +6232,38 @@ static enum dbg_status qed_parse_idle_chk_dump(struct qed_hwfn *p_hwfn,
 						      results_offset : NULL,
 						      num_errors, num_warnings);
 		results_offset += rules_print_size;
-		if (rules_print_size == 0)
+		if (!rules_print_size)
 			return DBG_STATUS_IDLE_CHK_PARSE_FAILED;
 	}
 
 	/* Print errors/warnings count */
-	if (*num_errors) {
+	if (*num_errors)
 		results_offset +=
 		    sprintf(qed_get_buf_ptr(results_buf,
 					    results_offset),
 			    "\nIdle Check failed!!! (with %d errors and %d warnings)\n",
 			    *num_errors, *num_warnings);
-	} else if (*num_warnings) {
+	else if (*num_warnings)
 		results_offset +=
 		    sprintf(qed_get_buf_ptr(results_buf,
 					    results_offset),
-			    "\nIdle Check completed successfuly (with %d warnings)\n",
+			    "\nIdle Check completed successfully (with %d warnings)\n",
 			    *num_warnings);
-	} else {
+	else
 		results_offset +=
 		    sprintf(qed_get_buf_ptr(results_buf,
 					    results_offset),
-			    "\nIdle Check completed successfuly\n");
-	}
+			    "\nIdle Check completed successfully\n");
 
 	/* Add 1 for string NULL termination */
 	*parsed_results_bytes = results_offset + 1;
+
 	return DBG_STATUS_OK;
 }
 
-enum dbg_status qed_get_idle_chk_results_buf_size(struct qed_hwfn *p_hwfn,
-						  u32 *dump_buf,
-						  u32 num_dumped_dwords,
-						  u32 *results_buf_size)
-{
-	u32 num_errors, num_warnings;
-
-	return qed_parse_idle_chk_dump(p_hwfn,
-				       dump_buf,
-				       num_dumped_dwords,
-				       NULL,
-				       results_buf_size,
-				       &num_errors, &num_warnings);
-}
-
-enum dbg_status qed_print_idle_chk_results(struct qed_hwfn *p_hwfn,
-					   u32 *dump_buf,
-					   u32 num_dumped_dwords,
-					   char *results_buf,
-					   u32 *num_errors, u32 *num_warnings)
-{
-	u32 parsed_buf_size;
-
-	return qed_parse_idle_chk_dump(p_hwfn,
-				       dump_buf,
-				       num_dumped_dwords,
-				       results_buf,
-				       &parsed_buf_size,
-				       num_errors, num_warnings);
-}
-
-/* Frees the specified MCP Trace meta data */
-static void qed_mcp_trace_free_meta(struct qed_hwfn *p_hwfn,
-				    struct mcp_trace_meta *meta)
+/* Frees the specified MCP Trace meta data */
+static void qed_mcp_trace_free_meta(struct qed_hwfn *p_hwfn,
+				    struct mcp_trace_meta *meta)
 {
 	u32 i;
 
@@ -5841,12 +6297,10 @@ static enum dbg_status qed_mcp_trace_alloc_meta(struct qed_hwfn *p_hwfn,
 
 	/* Read first signature */
 	signature = qed_read_dword_from_buf(meta_buf_bytes, &offset);
-	if (signature != MCP_TRACE_META_IMAGE_SIGNATURE)
+	if (signature != NVM_MAGIC_VALUE)
 		return DBG_STATUS_INVALID_TRACE_SIGNATURE;
 
-	/* Read number of modules and allocate memory for all the modules
-	 * pointers.
-	 */
+	/* Read no. of modules and allocate memory for their pointers */
 	meta->modules_num = qed_read_byte_from_buf(meta_buf_bytes, &offset);
 	meta->modules = kzalloc(meta->modules_num * sizeof(char *), GFP_KERNEL);
 	if (!meta->modules)
@@ -5871,7 +6325,7 @@ static enum dbg_status qed_mcp_trace_alloc_meta(struct qed_hwfn *p_hwfn,
 
 	/* Read second signature */
 	signature = qed_read_dword_from_buf(meta_buf_bytes, &offset);
-	if (signature != MCP_TRACE_META_IMAGE_SIGNATURE)
+	if (signature != NVM_MAGIC_VALUE)
 		return DBG_STATUS_INVALID_TRACE_SIGNATURE;
 
 	/* Read number of formats and allocate memory for all formats */
@@ -5919,10 +6373,10 @@ static enum dbg_status qed_parse_mcp_trace_dump(struct qed_hwfn *p_hwfn,
 						char *results_buf,
 						u32 *parsed_results_bytes)
 {
-	u32 results_offset = 0, param_mask, param_shift, param_num_val;
-	u32 num_section_params, offset, end_offset, bytes_left;
+	u32 end_offset, bytes_left, trace_data_dwords, trace_meta_dwords;
+	u32 param_mask, param_shift, param_num_val, num_section_params;
 	const char *section_name, *param_name, *param_str_val;
-	u32 trace_data_dwords, trace_meta_dwords;
+	u32 offset, results_offset = 0;
 	struct mcp_trace_meta meta;
 	struct mcp_trace *trace;
 	enum dbg_status status;
@@ -5955,7 +6409,7 @@ static enum dbg_status qed_parse_mcp_trace_dump(struct qed_hwfn *p_hwfn,
 
 	/* Prepare trace info */
 	trace = (struct mcp_trace *)dump_buf;
-	trace_buf = (u8 *)dump_buf + sizeof(struct mcp_trace);
+	trace_buf = (u8 *)dump_buf + sizeof(*trace);
 	offset = trace->trace_oldest;
 	end_offset = trace->trace_prod;
 	bytes_left = qed_cyclic_sub(end_offset, offset, trace->size);
@@ -5968,7 +6422,7 @@ static enum dbg_status qed_parse_mcp_trace_dump(struct qed_hwfn *p_hwfn,
 		return DBG_STATUS_MCP_TRACE_BAD_DATA;
 	dump_buf += qed_read_param(dump_buf,
 				   &param_name, &param_str_val, &param_num_val);
-	if (strcmp(param_name, "size") != 0)
+	if (strcmp(param_name, "size"))
 		return DBG_STATUS_MCP_TRACE_BAD_DATA;
 	trace_meta_dwords = param_num_val;
 
@@ -6028,6 +6482,7 @@ static enum dbg_status qed_parse_mcp_trace_dump(struct qed_hwfn *p_hwfn,
 		}
 
 		format_ptr = &meta.formats[format_idx];
+
 		for (i = 0,
 		     param_mask = MCP_TRACE_FORMAT_P1_SIZE_MASK, param_shift =
 		     MCP_TRACE_FORMAT_P1_SIZE_SHIFT;
@@ -6050,6 +6505,7 @@ static enum dbg_status qed_parse_mcp_trace_dump(struct qed_hwfn *p_hwfn,
 			 */
 			if (param_size == 3)
 				param_size = 4;
+
 			if (bytes_left < param_size) {
 				status = DBG_STATUS_MCP_TRACE_BAD_DATA;
 				goto free_mem;
@@ -6059,13 +6515,14 @@ static enum dbg_status qed_parse_mcp_trace_dump(struct qed_hwfn *p_hwfn,
 							     &offset,
 							     trace->size,
 							     param_size);
+
 			bytes_left -= param_size;
 		}
 
 		format_level =
 		    (u8)((format_ptr->data &
 			  MCP_TRACE_FORMAT_LEVEL_MASK) >>
-			  MCP_TRACE_FORMAT_LEVEL_SHIFT);
+			 MCP_TRACE_FORMAT_LEVEL_SHIFT);
 		format_module =
 		    (u8)((format_ptr->data &
 			  MCP_TRACE_FORMAT_MODULE_MASK) >>
@@ -6094,30 +6551,6 @@ free_mem:
 	return status;
 }
 
-enum dbg_status qed_get_mcp_trace_results_buf_size(struct qed_hwfn *p_hwfn,
-						   u32 *dump_buf,
-						   u32 num_dumped_dwords,
-						   u32 *results_buf_size)
-{
-	return qed_parse_mcp_trace_dump(p_hwfn,
-					dump_buf,
-					num_dumped_dwords,
-					NULL, results_buf_size);
-}
-
-enum dbg_status qed_print_mcp_trace_results(struct qed_hwfn *p_hwfn,
-					    u32 *dump_buf,
-					    u32 num_dumped_dwords,
-					    char *results_buf)
-{
-	u32 parsed_buf_size;
-
-	return qed_parse_mcp_trace_dump(p_hwfn,
-					dump_buf,
-					num_dumped_dwords,
-					results_buf, &parsed_buf_size);
-}
-
 /* Parses a Reg FIFO dump buffer.
  * If result_buf is not NULL, the Reg FIFO results are printed to it.
  * In any case, the required results buffer size is assigned to
@@ -6130,10 +6563,11 @@ static enum dbg_status qed_parse_reg_fifo_dump(struct qed_hwfn *p_hwfn,
 					       char *results_buf,
 					       u32 *parsed_results_bytes)
 {
-	u32 results_offset = 0, param_num_val, num_section_params, num_elements;
 	const char *section_name, *param_name, *param_str_val;
+	u32 param_num_val, num_section_params, num_elements;
 	struct reg_fifo_element *elements;
 	u8 i, j, err_val, vf_val;
+	u32 results_offset = 0;
 	char vf_str[4];
 
 	/* Read global_params section */
@@ -6179,17 +6613,17 @@ static enum dbg_status qed_parse_reg_fifo_dump(struct qed_hwfn *p_hwfn,
 			    "raw: 0x%016llx, address: 0x%07x, access: %-5s, pf: %2d, vf: %s, port: %d, privilege: %-3s, protection: %-12s, master: %-4s, errors: ",
 			    elements[i].data,
 			    (u32)GET_FIELD(elements[i].data,
-				      REG_FIFO_ELEMENT_ADDRESS) *
-				      REG_FIFO_ELEMENT_ADDR_FACTOR,
-				      s_access_strs[GET_FIELD(elements[i].data,
+					   REG_FIFO_ELEMENT_ADDRESS) *
+			    REG_FIFO_ELEMENT_ADDR_FACTOR,
+			    s_access_strs[GET_FIELD(elements[i].data,
 						    REG_FIFO_ELEMENT_ACCESS)],
 			    (u32)GET_FIELD(elements[i].data,
-					   REG_FIFO_ELEMENT_PF), vf_str,
+					   REG_FIFO_ELEMENT_PF),
+			    vf_str,
 			    (u32)GET_FIELD(elements[i].data,
-				      REG_FIFO_ELEMENT_PORT),
-				      s_privilege_strs[GET_FIELD(elements[i].
-				      data,
-				      REG_FIFO_ELEMENT_PRIVILEGE)],
+					   REG_FIFO_ELEMENT_PORT),
+			    s_privilege_strs[GET_FIELD(elements[i].data,
+						REG_FIFO_ELEMENT_PRIVILEGE)],
 			    s_protection_strs[GET_FIELD(elements[i].data,
 						REG_FIFO_ELEMENT_PROTECTION)],
 			    s_master_strs[GET_FIELD(elements[i].data,
@@ -6201,18 +6635,18 @@ static enum dbg_status qed_parse_reg_fifo_dump(struct qed_hwfn *p_hwfn,
 					 REG_FIFO_ELEMENT_ERROR);
 		     j < ARRAY_SIZE(s_reg_fifo_error_strs);
 		     j++, err_val >>= 1) {
-			if (!(err_val & 0x1))
-				continue;
-			if (err_printed)
+			if (err_val & 0x1) {
+				if (err_printed)
+					results_offset +=
+					    sprintf(qed_get_buf_ptr
+						    (results_buf,
+						     results_offset), ", ");
 				results_offset +=
-					sprintf(qed_get_buf_ptr(results_buf,
-								results_offset),
-						", ");
-			results_offset +=
-				sprintf(qed_get_buf_ptr(results_buf,
-							results_offset), "%s",
-					s_reg_fifo_error_strs[j]);
-			err_printed = true;
+				    sprintf(qed_get_buf_ptr
+					    (results_buf, results_offset), "%s",
+					    s_reg_fifo_error_strs[j]);
+				err_printed = true;
+			}
 		}
 
 		results_offset +=
@@ -6225,31 +6659,140 @@ static enum dbg_status qed_parse_reg_fifo_dump(struct qed_hwfn *p_hwfn,
 
 	/* Add 1 for string NULL termination */
 	*parsed_results_bytes = results_offset + 1;
+
 	return DBG_STATUS_OK;
 }
 
-enum dbg_status qed_get_reg_fifo_results_buf_size(struct qed_hwfn *p_hwfn,
-						  u32 *dump_buf,
-						  u32 num_dumped_dwords,
-						  u32 *results_buf_size)
+static enum dbg_status qed_parse_igu_fifo_element(struct igu_fifo_element
+						  *element, char
+						  *results_buf,
+						  u32 *results_offset,
+						  u32 *parsed_results_bytes)
 {
-	return qed_parse_reg_fifo_dump(p_hwfn,
-				       dump_buf,
-				       num_dumped_dwords,
-				       NULL, results_buf_size);
-}
+	const struct igu_fifo_addr_data *found_addr = NULL;
+	u8 source, err_type, i, is_cleanup;
+	char parsed_addr_data[32];
+	char parsed_wr_data[256];
+	u32 wr_data, prod_cons;
+	bool is_wr_cmd, is_pf;
+	u16 cmd_addr;
+	u64 dword12;
 
-enum dbg_status qed_print_reg_fifo_results(struct qed_hwfn *p_hwfn,
-					   u32 *dump_buf,
-					   u32 num_dumped_dwords,
-					   char *results_buf)
-{
-	u32 parsed_buf_size;
+	/* Dword12 (dword index 1 and 2) contains bits 32..95 of the
+	 * FIFO element.
+	 */
+	dword12 = ((u64)element->dword2 << 32) | element->dword1;
+	is_wr_cmd = GET_FIELD(dword12, IGU_FIFO_ELEMENT_DWORD12_IS_WR_CMD);
+	is_pf = GET_FIELD(element->dword0, IGU_FIFO_ELEMENT_DWORD0_IS_PF);
+	cmd_addr = GET_FIELD(element->dword0, IGU_FIFO_ELEMENT_DWORD0_CMD_ADDR);
+	source = GET_FIELD(element->dword0, IGU_FIFO_ELEMENT_DWORD0_SOURCE);
+	err_type = GET_FIELD(element->dword0, IGU_FIFO_ELEMENT_DWORD0_ERR_TYPE);
+
+	if (source >= ARRAY_SIZE(s_igu_fifo_source_strs))
+		return DBG_STATUS_IGU_FIFO_BAD_DATA;
+	if (err_type >= ARRAY_SIZE(s_igu_fifo_error_strs))
+		return DBG_STATUS_IGU_FIFO_BAD_DATA;
 
-	return qed_parse_reg_fifo_dump(p_hwfn,
-				       dump_buf,
-				       num_dumped_dwords,
-				       results_buf, &parsed_buf_size);
+	/* Find address data */
+	for (i = 0; i < ARRAY_SIZE(s_igu_fifo_addr_data) && !found_addr; i++) {
+		const struct igu_fifo_addr_data *curr_addr =
+			&s_igu_fifo_addr_data[i];
+
+		if (cmd_addr >= curr_addr->start_addr && cmd_addr <=
+		    curr_addr->end_addr)
+			found_addr = curr_addr;
+	}
+
+	if (!found_addr)
+		return DBG_STATUS_IGU_FIFO_BAD_DATA;
+
+	/* Prepare parsed address data */
+	switch (found_addr->type) {
+	case IGU_ADDR_TYPE_MSIX_MEM:
+		sprintf(parsed_addr_data, " vector_num = 0x%x", cmd_addr / 2);
+		break;
+	case IGU_ADDR_TYPE_WRITE_INT_ACK:
+	case IGU_ADDR_TYPE_WRITE_PROD_UPDATE:
+		sprintf(parsed_addr_data,
+			" SB = 0x%x", cmd_addr - found_addr->start_addr);
+		break;
+	default:
+		parsed_addr_data[0] = '\0';
+	}
+
+	if (!is_wr_cmd) {
+		parsed_wr_data[0] = '\0';
+		goto out;
+	}
+
+	/* Prepare parsed write data */
+	wr_data = GET_FIELD(dword12, IGU_FIFO_ELEMENT_DWORD12_WR_DATA);
+	prod_cons = GET_FIELD(wr_data, IGU_FIFO_WR_DATA_PROD_CONS);
+	is_cleanup = GET_FIELD(wr_data, IGU_FIFO_WR_DATA_CMD_TYPE);
+
+	if (source == IGU_SRC_ATTN) {
+		sprintf(parsed_wr_data, "prod: 0x%x, ", prod_cons);
+	} else {
+		if (is_cleanup) {
+			u8 cleanup_val, cleanup_type;
+
+			cleanup_val =
+				GET_FIELD(wr_data,
+					  IGU_FIFO_CLEANUP_WR_DATA_CLEANUP_VAL);
+			cleanup_type =
+			    GET_FIELD(wr_data,
+				      IGU_FIFO_CLEANUP_WR_DATA_CLEANUP_TYPE);
+
+			sprintf(parsed_wr_data,
+				"cmd_type: cleanup, cleanup_val: %s, cleanup_type : %d, ",
+				cleanup_val ? "set" : "clear",
+				cleanup_type);
+		} else {
+			u8 update_flag, en_dis_int_for_sb, segment;
+			u8 timer_mask;
+
+			update_flag = GET_FIELD(wr_data,
+						IGU_FIFO_WR_DATA_UPDATE_FLAG);
+			en_dis_int_for_sb =
+				GET_FIELD(wr_data,
+					  IGU_FIFO_WR_DATA_EN_DIS_INT_FOR_SB);
+			segment = GET_FIELD(wr_data,
+					    IGU_FIFO_WR_DATA_SEGMENT);
+			timer_mask = GET_FIELD(wr_data,
+					       IGU_FIFO_WR_DATA_TIMER_MASK);
+
+			sprintf(parsed_wr_data,
+				"cmd_type: prod/cons update, prod/cons: 0x%x, update_flag: %s, en_dis_int_for_sb : %s, segment : %s, timer_mask = %d, ",
+				prod_cons,
+				update_flag ? "update" : "nop",
+				en_dis_int_for_sb
+				? (en_dis_int_for_sb == 1 ? "disable" : "nop")
+				: "enable",
+				segment ? "attn" : "regular",
+				timer_mask);
+		}
+	}
+out:
+	/* Add parsed element to parsed buffer */
+	*results_offset += sprintf(qed_get_buf_ptr(results_buf,
+						   *results_offset),
+				   "raw: 0x%01x%08x%08x, %s: %d, source : %s, type : %s, cmd_addr : 0x%x(%s%s), %serror: %s\n",
+				   element->dword2, element->dword1,
+				   element->dword0,
+				   is_pf ? "pf" : "vf",
+				   GET_FIELD(element->dword0,
+					     IGU_FIFO_ELEMENT_DWORD0_FID),
+				   s_igu_fifo_source_strs[source],
+				   is_wr_cmd ? "wr" : "rd",
+				   cmd_addr,
+				   (!is_pf && found_addr->vf_desc)
+				   ? found_addr->vf_desc
+				   : found_addr->desc,
+				   parsed_addr_data,
+				   parsed_wr_data,
+				   s_igu_fifo_error_strs[err_type]);
+
+	return DBG_STATUS_OK;
 }
 
 /* Parses an IGU FIFO dump buffer.
@@ -6264,12 +6807,12 @@ static enum dbg_status qed_parse_igu_fifo_dump(struct qed_hwfn *p_hwfn,
 					       char *results_buf,
 					       u32 *parsed_results_bytes)
 {
-	u32 results_offset = 0, param_num_val, num_section_params, num_elements;
 	const char *section_name, *param_name, *param_str_val;
+	u32 param_num_val, num_section_params, num_elements;
 	struct igu_fifo_element *elements;
-	char parsed_addr_data[32];
-	char parsed_wr_data[256];
-	u8 i, j;
+	enum dbg_status status;
+	u32 results_offset = 0;
+	u8 i;
 
 	/* Read global_params section */
 	dump_buf += qed_read_section_hdr(dump_buf,
@@ -6298,118 +6841,12 @@ static enum dbg_status qed_parse_igu_fifo_dump(struct qed_hwfn *p_hwfn,
 
 	/* Decode elements */
 	for (i = 0; i < num_elements; i++) {
-		/* dword12 (dword index 1 and 2) contains bits 32..95 of the
-		 * FIFO element.
-		 */
-		u64 dword12 =
-		    ((u64)elements[i].dword2 << 32) | elements[i].dword1;
-		bool is_wr_cmd = GET_FIELD(dword12,
-					   IGU_FIFO_ELEMENT_DWORD12_IS_WR_CMD);
-		bool is_pf = GET_FIELD(elements[i].dword0,
-				       IGU_FIFO_ELEMENT_DWORD0_IS_PF);
-		u16 cmd_addr = GET_FIELD(elements[i].dword0,
-					 IGU_FIFO_ELEMENT_DWORD0_CMD_ADDR);
-		u8 source = GET_FIELD(elements[i].dword0,
-				      IGU_FIFO_ELEMENT_DWORD0_SOURCE);
-		u8 err_type = GET_FIELD(elements[i].dword0,
-					IGU_FIFO_ELEMENT_DWORD0_ERR_TYPE);
-		const struct igu_fifo_addr_data *addr_data = NULL;
-
-		if (source >= ARRAY_SIZE(s_igu_fifo_source_strs))
-			return DBG_STATUS_IGU_FIFO_BAD_DATA;
-		if (err_type >= ARRAY_SIZE(s_igu_fifo_error_strs))
-			return DBG_STATUS_IGU_FIFO_BAD_DATA;
-
-		/* Find address data */
-		for (j = 0; j < ARRAY_SIZE(s_igu_fifo_addr_data) && !addr_data;
-		     j++)
-			if (cmd_addr >= s_igu_fifo_addr_data[j].start_addr &&
-			    cmd_addr <= s_igu_fifo_addr_data[j].end_addr)
-				addr_data = &s_igu_fifo_addr_data[j];
-		if (!addr_data)
-			return DBG_STATUS_IGU_FIFO_BAD_DATA;
-
-		/* Prepare parsed address data */
-		switch (addr_data->type) {
-		case IGU_ADDR_TYPE_MSIX_MEM:
-			sprintf(parsed_addr_data,
-				" vector_num=0x%x", cmd_addr / 2);
-			break;
-		case IGU_ADDR_TYPE_WRITE_INT_ACK:
-		case IGU_ADDR_TYPE_WRITE_PROD_UPDATE:
-			sprintf(parsed_addr_data,
-				" SB=0x%x", cmd_addr - addr_data->start_addr);
-			break;
-		default:
-			parsed_addr_data[0] = '\0';
-		}
-
-		/* Prepare parsed write data */
-		if (is_wr_cmd) {
-			u32 wr_data = GET_FIELD(dword12,
-					IGU_FIFO_ELEMENT_DWORD12_WR_DATA);
-			u32 prod_cons = GET_FIELD(wr_data,
-						  IGU_FIFO_WR_DATA_PROD_CONS);
-			u8 is_cleanup = GET_FIELD(wr_data,
-						  IGU_FIFO_WR_DATA_CMD_TYPE);
-
-			if (source == IGU_SRC_ATTN) {
-				sprintf(parsed_wr_data,
-					"prod: 0x%x, ", prod_cons);
-			} else {
-				if (is_cleanup) {
-					u8 cleanup_val = GET_FIELD(wr_data,
-								   IGU_FIFO_CLEANUP_WR_DATA_CLEANUP_VAL);
-					u8 cleanup_type = GET_FIELD(wr_data,
-								    IGU_FIFO_CLEANUP_WR_DATA_CLEANUP_TYPE);
-
-					sprintf(parsed_wr_data,
-						"cmd_type: cleanup, cleanup_val: %s, cleanup_type: %d, ",
-						cleanup_val ? "set" : "clear",
-						cleanup_type);
-				} else {
-					u8 update_flag = GET_FIELD(wr_data,
-								   IGU_FIFO_WR_DATA_UPDATE_FLAG);
-					u8 en_dis_int_for_sb =
-					    GET_FIELD(wr_data,
-						      IGU_FIFO_WR_DATA_EN_DIS_INT_FOR_SB);
-					u8 segment = GET_FIELD(wr_data,
-							       IGU_FIFO_WR_DATA_SEGMENT);
-					u8 timer_mask = GET_FIELD(wr_data,
-								  IGU_FIFO_WR_DATA_TIMER_MASK);
-
-					sprintf(parsed_wr_data,
-						"cmd_type: prod/cons update, prod/cons: 0x%x, update_flag: %s, en_dis_int_for_sb: %s, segment: %s, timer_mask=%d, ",
-						prod_cons,
-						update_flag ? "update" : "nop",
-						en_dis_int_for_sb
-						? (en_dis_int_for_sb ==
-						   1 ? "disable" : "nop") :
-						"enable",
-						segment ? "attn" : "regular",
-						timer_mask);
-				}
-			}
-		} else {
-			parsed_wr_data[0] = '\0';
-		}
-
-		/* Add parsed element to parsed buffer */
-		results_offset +=
-		    sprintf(qed_get_buf_ptr(results_buf,
-					    results_offset),
-			    "raw: 0x%01x%08x%08x, %s: %d, source: %s, type: %s, cmd_addr: 0x%x (%s%s), %serror: %s\n",
-			    elements[i].dword2, elements[i].dword1,
-			    elements[i].dword0,
-			    is_pf ? "pf" : "vf",
-			    GET_FIELD(elements[i].dword0,
-				      IGU_FIFO_ELEMENT_DWORD0_FID),
-			    s_igu_fifo_source_strs[source],
-			    is_wr_cmd ? "wr" : "rd", cmd_addr,
-			    (!is_pf && addr_data->vf_desc)
-			    ? addr_data->vf_desc : addr_data->desc,
-			    parsed_addr_data, parsed_wr_data,
-			    s_igu_fifo_error_strs[err_type]);
+		status = qed_parse_igu_fifo_element(&elements[i],
+						    results_buf,
+						    &results_offset,
+						    parsed_results_bytes);
+		if (status != DBG_STATUS_OK)
+			return status;
 	}
 
 	results_offset += sprintf(qed_get_buf_ptr(results_buf,
@@ -6418,31 +6855,8 @@ static enum dbg_status qed_parse_igu_fifo_dump(struct qed_hwfn *p_hwfn,
 
 	/* Add 1 for string NULL termination */
 	*parsed_results_bytes = results_offset + 1;
-	return DBG_STATUS_OK;
-}
-
-enum dbg_status qed_get_igu_fifo_results_buf_size(struct qed_hwfn *p_hwfn,
-						  u32 *dump_buf,
-						  u32 num_dumped_dwords,
-						  u32 *results_buf_size)
-{
-	return qed_parse_igu_fifo_dump(p_hwfn,
-				       dump_buf,
-				       num_dumped_dwords,
-				       NULL, results_buf_size);
-}
-
-enum dbg_status qed_print_igu_fifo_results(struct qed_hwfn *p_hwfn,
-					   u32 *dump_buf,
-					   u32 num_dumped_dwords,
-					   char *results_buf)
-{
-	u32 parsed_buf_size;
 
-	return qed_parse_igu_fifo_dump(p_hwfn,
-				       dump_buf,
-				       num_dumped_dwords,
-				       results_buf, &parsed_buf_size);
+	return DBG_STATUS_OK;
 }
 
 static enum dbg_status
@@ -6452,9 +6866,10 @@ qed_parse_protection_override_dump(struct qed_hwfn *p_hwfn,
 				   char *results_buf,
 				   u32 *parsed_results_bytes)
 {
-	u32 results_offset = 0, param_num_val, num_section_params, num_elements;
 	const char *section_name, *param_name, *param_str_val;
+	u32 param_num_val, num_section_params, num_elements;
 	struct protection_override_element *elements;
+	u32 results_offset = 0;
 	u8 i;
 
 	/* Read global_params section */
@@ -6477,7 +6892,7 @@ qed_parse_protection_override_dump(struct qed_hwfn *p_hwfn,
 				   &param_name, &param_str_val, &param_num_val);
 	if (strcmp(param_name, "size"))
 		return DBG_STATUS_PROTECTION_OVERRIDE_BAD_DATA;
-	if (param_num_val % PROTECTION_OVERRIDE_ELEMENT_DWORDS != 0)
+	if (param_num_val % PROTECTION_OVERRIDE_ELEMENT_DWORDS)
 		return DBG_STATUS_PROTECTION_OVERRIDE_BAD_DATA;
 	num_elements = param_num_val / PROTECTION_OVERRIDE_ELEMENT_DWORDS;
 	elements = (struct protection_override_element *)dump_buf;
@@ -6486,7 +6901,7 @@ qed_parse_protection_override_dump(struct qed_hwfn *p_hwfn,
 	for (i = 0; i < num_elements; i++) {
 		u32 address = GET_FIELD(elements[i].data,
 					PROTECTION_OVERRIDE_ELEMENT_ADDRESS) *
-					PROTECTION_OVERRIDE_ELEMENT_ADDR_FACTOR;
+			      PROTECTION_OVERRIDE_ELEMENT_ADDR_FACTOR;
 
 		results_offset +=
 		    sprintf(qed_get_buf_ptr(results_buf,
@@ -6512,33 +6927,8 @@ qed_parse_protection_override_dump(struct qed_hwfn *p_hwfn,
 
 	/* Add 1 for string NULL termination */
 	*parsed_results_bytes = results_offset + 1;
-	return DBG_STATUS_OK;
-}
-
-enum dbg_status
-qed_get_protection_override_results_buf_size(struct qed_hwfn *p_hwfn,
-					     u32 *dump_buf,
-					     u32 num_dumped_dwords,
-					     u32 *results_buf_size)
-{
-	return qed_parse_protection_override_dump(p_hwfn,
-						  dump_buf,
-						  num_dumped_dwords,
-						  NULL, results_buf_size);
-}
 
-enum dbg_status qed_print_protection_override_results(struct qed_hwfn *p_hwfn,
-						      u32 *dump_buf,
-						      u32 num_dumped_dwords,
-						      char *results_buf)
-{
-	u32 parsed_buf_size;
-
-	return qed_parse_protection_override_dump(p_hwfn,
-						  dump_buf,
-						  num_dumped_dwords,
-						  results_buf,
-						  &parsed_buf_size);
+	return DBG_STATUS_OK;
 }
 
 /* Parses a FW Asserts dump buffer.
@@ -6553,7 +6943,7 @@ static enum dbg_status qed_parse_fw_asserts_dump(struct qed_hwfn *p_hwfn,
 						 char *results_buf,
 						 u32 *parsed_results_bytes)
 {
-	u32 results_offset = 0, num_section_params, param_num_val, i;
+	u32 num_section_params, param_num_val, i, results_offset = 0;
 	const char *param_name, *param_str_val, *section_name;
 	bool last_section_found = false;
 
@@ -6569,54 +6959,216 @@ static enum dbg_status qed_parse_fw_asserts_dump(struct qed_hwfn *p_hwfn,
 	dump_buf += qed_print_section_params(dump_buf,
 					     num_section_params,
 					     results_buf, &results_offset);
-	while (!last_section_found) {
-		const char *storm_letter = NULL;
-		u32 storm_dump_size = 0;
 
+	while (!last_section_found) {
 		dump_buf += qed_read_section_hdr(dump_buf,
 						 &section_name,
 						 &num_section_params);
-		if (!strcmp(section_name, "last")) {
-			last_section_found = true;
-			continue;
-		} else if (strcmp(section_name, "fw_asserts")) {
-			return DBG_STATUS_FW_ASSERTS_PARSE_FAILED;
-		}
+		if (!strcmp(section_name, "fw_asserts")) {
+			/* Extract params */
+			const char *storm_letter = NULL;
+			u32 storm_dump_size = 0;
+
+			for (i = 0; i < num_section_params; i++) {
+				dump_buf += qed_read_param(dump_buf,
+							   &param_name,
+							   &param_str_val,
+							   &param_num_val);
+				if (!strcmp(param_name, "storm"))
+					storm_letter = param_str_val;
+				else if (!strcmp(param_name, "size"))
+					storm_dump_size = param_num_val;
+				else
+					return
+					    DBG_STATUS_FW_ASSERTS_PARSE_FAILED;
+			}
 
-		/* Extract params */
-		for (i = 0; i < num_section_params; i++) {
-			dump_buf += qed_read_param(dump_buf,
-						   &param_name,
-						   &param_str_val,
-						   &param_num_val);
-			if (!strcmp(param_name, "storm"))
-				storm_letter = param_str_val;
-			else if (!strcmp(param_name, "size"))
-				storm_dump_size = param_num_val;
-			else
+			if (!storm_letter || !storm_dump_size)
 				return DBG_STATUS_FW_ASSERTS_PARSE_FAILED;
-		}
-
-		if (!storm_letter || !storm_dump_size)
-			return DBG_STATUS_FW_ASSERTS_PARSE_FAILED;
 
-		/* Print data */
-		results_offset += sprintf(qed_get_buf_ptr(results_buf,
-							  results_offset),
-					  "\n%sSTORM_ASSERT: size=%d\n",
-					  storm_letter, storm_dump_size);
-		for (i = 0; i < storm_dump_size; i++, dump_buf++)
+			/* Print data */
 			results_offset +=
 			    sprintf(qed_get_buf_ptr(results_buf,
 						    results_offset),
-				    "%08x\n", *dump_buf);
+				    "\n%sSTORM_ASSERT: size=%d\n",
+				    storm_letter, storm_dump_size);
+			for (i = 0; i < storm_dump_size; i++, dump_buf++)
+				results_offset +=
+				    sprintf(qed_get_buf_ptr(results_buf,
+							    results_offset),
+					    "%08x\n", *dump_buf);
+		} else if (!strcmp(section_name, "last")) {
+			last_section_found = true;
+		} else {
+			return DBG_STATUS_FW_ASSERTS_PARSE_FAILED;
+		}
 	}
 
 	/* Add 1 for string NULL termination */
 	*parsed_results_bytes = results_offset + 1;
+
+	return DBG_STATUS_OK;
+}
+
+/***************************** Public Functions *******************************/
+
+enum dbg_status qed_dbg_user_set_bin_ptr(const u8 * const bin_ptr)
+{
+	struct bin_buffer_hdr *buf_array = (struct bin_buffer_hdr *)bin_ptr;
+	u8 buf_id;
+
+	/* Convert binary data to debug arrays */
+	for (buf_id = 0; buf_id < MAX_BIN_DBG_BUFFER_TYPE; buf_id++) {
+		s_user_dbg_arrays[buf_id].ptr =
+			(u32 *)(bin_ptr + buf_array[buf_id].offset);
+		s_user_dbg_arrays[buf_id].size_in_dwords =
+			BYTES_TO_DWORDS(buf_array[buf_id].length);
+	}
+
 	return DBG_STATUS_OK;
 }
 
+const char *qed_dbg_get_status_str(enum dbg_status status)
+{
+	return (status <
+		MAX_DBG_STATUS) ? s_status_str[status] : "Invalid debug status";
+}
+
+enum dbg_status qed_get_idle_chk_results_buf_size(struct qed_hwfn *p_hwfn,
+						  u32 *dump_buf,
+						  u32 num_dumped_dwords,
+						  u32 *results_buf_size)
+{
+	u32 num_errors, num_warnings;
+
+	return qed_parse_idle_chk_dump(p_hwfn,
+				       dump_buf,
+				       num_dumped_dwords,
+				       NULL,
+				       results_buf_size,
+				       &num_errors, &num_warnings);
+}
+
+enum dbg_status qed_print_idle_chk_results(struct qed_hwfn *p_hwfn,
+					   u32 *dump_buf,
+					   u32 num_dumped_dwords,
+					   char *results_buf,
+					   u32 *num_errors, u32 *num_warnings)
+{
+	u32 parsed_buf_size;
+
+	return qed_parse_idle_chk_dump(p_hwfn,
+				       dump_buf,
+				       num_dumped_dwords,
+				       results_buf,
+				       &parsed_buf_size,
+				       num_errors, num_warnings);
+}
+
+void qed_dbg_mcp_trace_set_meta_data(u32 *data, u32 size)
+{
+	s_mcp_trace_meta.ptr = data;
+	s_mcp_trace_meta.size_in_dwords = size;
+}
+
+enum dbg_status qed_get_mcp_trace_results_buf_size(struct qed_hwfn *p_hwfn,
+						   u32 *dump_buf,
+						   u32 num_dumped_dwords,
+						   u32 *results_buf_size)
+{
+	return qed_parse_mcp_trace_dump(p_hwfn,
+					dump_buf,
+					num_dumped_dwords,
+					NULL, results_buf_size);
+}
+
+enum dbg_status qed_print_mcp_trace_results(struct qed_hwfn *p_hwfn,
+					    u32 *dump_buf,
+					    u32 num_dumped_dwords,
+					    char *results_buf)
+{
+	u32 parsed_buf_size;
+
+	return qed_parse_mcp_trace_dump(p_hwfn,
+					dump_buf,
+					num_dumped_dwords,
+					results_buf, &parsed_buf_size);
+}
+
+enum dbg_status qed_get_reg_fifo_results_buf_size(struct qed_hwfn *p_hwfn,
+						  u32 *dump_buf,
+						  u32 num_dumped_dwords,
+						  u32 *results_buf_size)
+{
+	return qed_parse_reg_fifo_dump(p_hwfn,
+				       dump_buf,
+				       num_dumped_dwords,
+				       NULL, results_buf_size);
+}
+
+enum dbg_status qed_print_reg_fifo_results(struct qed_hwfn *p_hwfn,
+					   u32 *dump_buf,
+					   u32 num_dumped_dwords,
+					   char *results_buf)
+{
+	u32 parsed_buf_size;
+
+	return qed_parse_reg_fifo_dump(p_hwfn,
+				       dump_buf,
+				       num_dumped_dwords,
+				       results_buf, &parsed_buf_size);
+}
+
+enum dbg_status qed_get_igu_fifo_results_buf_size(struct qed_hwfn *p_hwfn,
+						  u32 *dump_buf,
+						  u32 num_dumped_dwords,
+						  u32 *results_buf_size)
+{
+	return qed_parse_igu_fifo_dump(p_hwfn,
+				       dump_buf,
+				       num_dumped_dwords,
+				       NULL, results_buf_size);
+}
+
+enum dbg_status qed_print_igu_fifo_results(struct qed_hwfn *p_hwfn,
+					   u32 *dump_buf,
+					   u32 num_dumped_dwords,
+					   char *results_buf)
+{
+	u32 parsed_buf_size;
+
+	return qed_parse_igu_fifo_dump(p_hwfn,
+				       dump_buf,
+				       num_dumped_dwords,
+				       results_buf, &parsed_buf_size);
+}
+
+enum dbg_status
+qed_get_protection_override_results_buf_size(struct qed_hwfn *p_hwfn,
+					     u32 *dump_buf,
+					     u32 num_dumped_dwords,
+					     u32 *results_buf_size)
+{
+	return qed_parse_protection_override_dump(p_hwfn,
+						  dump_buf,
+						  num_dumped_dwords,
+						  NULL, results_buf_size);
+}
+
+enum dbg_status qed_print_protection_override_results(struct qed_hwfn *p_hwfn,
+						      u32 *dump_buf,
+						      u32 num_dumped_dwords,
+						      char *results_buf)
+{
+	u32 parsed_buf_size;
+
+	return qed_parse_protection_override_dump(p_hwfn,
+						  dump_buf,
+						  num_dumped_dwords,
+						  results_buf,
+						  &parsed_buf_size);
+}
+
 enum dbg_status qed_get_fw_asserts_results_buf_size(struct qed_hwfn *p_hwfn,
 						    u32 *dump_buf,
 						    u32 num_dumped_dwords,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.h b/drivers/net/ethernet/qlogic/qed/qed_debug.h
index f872d7324814..ea1cc8eaa125 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_debug.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_debug.h
@@ -20,6 +20,9 @@ enum qed_dbg_features {
 	DBG_FEATURE_NUM
 };
 
+/* Forward Declaration */
+struct qed_dev;
+
 int qed_dbg_grc(struct qed_dev *cdev, void *buffer, u32 *num_dumped_bytes);
 int qed_dbg_grc_size(struct qed_dev *cdev);
 int qed_dbg_idle_chk(struct qed_dev *cdev, void *buffer,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index 858a57a73589..eedf79a026a2 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -346,7 +346,7 @@ struct xstorm_core_conn_ag_ctx {
 	u8 byte13;
 	u8 byte14;
 	u8 byte15;
-	u8 byte16;
+	u8 e5_reserved;
 	__le16 word11;
 	__le32 reg10;
 	__le32 reg11;
@@ -368,85 +368,85 @@ struct tstorm_core_conn_ag_ctx {
 	u8 byte0;
 	u8 byte1;
 	u8 flags0;
-#define TSTORM_CORE_CONN_AG_CTX_BIT0_MASK	0x1
-#define TSTORM_CORE_CONN_AG_CTX_BIT0_SHIFT	0
-#define TSTORM_CORE_CONN_AG_CTX_BIT1_MASK	0x1
-#define TSTORM_CORE_CONN_AG_CTX_BIT1_SHIFT	1
-#define TSTORM_CORE_CONN_AG_CTX_BIT2_MASK	0x1
-#define TSTORM_CORE_CONN_AG_CTX_BIT2_SHIFT	2
-#define TSTORM_CORE_CONN_AG_CTX_BIT3_MASK	0x1
-#define TSTORM_CORE_CONN_AG_CTX_BIT3_SHIFT	3
-#define TSTORM_CORE_CONN_AG_CTX_BIT4_MASK	0x1
-#define TSTORM_CORE_CONN_AG_CTX_BIT4_SHIFT	4
-#define TSTORM_CORE_CONN_AG_CTX_BIT5_MASK	0x1
-#define TSTORM_CORE_CONN_AG_CTX_BIT5_SHIFT	5
-#define TSTORM_CORE_CONN_AG_CTX_CF0_MASK	0x3
-#define TSTORM_CORE_CONN_AG_CTX_CF0_SHIFT	6
+#define TSTORM_CORE_CONN_AG_CTX_BIT0_MASK     0x1	/* exist_in_qm0 */
+#define TSTORM_CORE_CONN_AG_CTX_BIT0_SHIFT    0
+#define TSTORM_CORE_CONN_AG_CTX_BIT1_MASK     0x1	/* exist_in_qm1 */
+#define TSTORM_CORE_CONN_AG_CTX_BIT1_SHIFT    1
+#define TSTORM_CORE_CONN_AG_CTX_BIT2_MASK     0x1	/* bit2 */
+#define TSTORM_CORE_CONN_AG_CTX_BIT2_SHIFT    2
+#define TSTORM_CORE_CONN_AG_CTX_BIT3_MASK     0x1	/* bit3 */
+#define TSTORM_CORE_CONN_AG_CTX_BIT3_SHIFT    3
+#define TSTORM_CORE_CONN_AG_CTX_BIT4_MASK     0x1	/* bit4 */
+#define TSTORM_CORE_CONN_AG_CTX_BIT4_SHIFT    4
+#define TSTORM_CORE_CONN_AG_CTX_BIT5_MASK     0x1	/* bit5 */
+#define TSTORM_CORE_CONN_AG_CTX_BIT5_SHIFT    5
+#define TSTORM_CORE_CONN_AG_CTX_CF0_MASK      0x3	/* timer0cf */
+#define TSTORM_CORE_CONN_AG_CTX_CF0_SHIFT     6
 	u8 flags1;
-#define TSTORM_CORE_CONN_AG_CTX_CF1_MASK	0x3
-#define TSTORM_CORE_CONN_AG_CTX_CF1_SHIFT	0
-#define TSTORM_CORE_CONN_AG_CTX_CF2_MASK	0x3
-#define TSTORM_CORE_CONN_AG_CTX_CF2_SHIFT	2
-#define TSTORM_CORE_CONN_AG_CTX_CF3_MASK	0x3
-#define TSTORM_CORE_CONN_AG_CTX_CF3_SHIFT	4
-#define TSTORM_CORE_CONN_AG_CTX_CF4_MASK	0x3
-#define TSTORM_CORE_CONN_AG_CTX_CF4_SHIFT	6
+#define TSTORM_CORE_CONN_AG_CTX_CF1_MASK      0x3	/* timer1cf */
+#define TSTORM_CORE_CONN_AG_CTX_CF1_SHIFT     0
+#define TSTORM_CORE_CONN_AG_CTX_CF2_MASK      0x3	/* timer2cf */
+#define TSTORM_CORE_CONN_AG_CTX_CF2_SHIFT     2
+#define TSTORM_CORE_CONN_AG_CTX_CF3_MASK      0x3	/* timer_stop_all */
+#define TSTORM_CORE_CONN_AG_CTX_CF3_SHIFT     4
+#define TSTORM_CORE_CONN_AG_CTX_CF4_MASK      0x3	/* cf4 */
+#define TSTORM_CORE_CONN_AG_CTX_CF4_SHIFT     6
 	u8 flags2;
-#define TSTORM_CORE_CONN_AG_CTX_CF5_MASK	0x3
-#define TSTORM_CORE_CONN_AG_CTX_CF5_SHIFT	0
-#define TSTORM_CORE_CONN_AG_CTX_CF6_MASK	0x3
-#define TSTORM_CORE_CONN_AG_CTX_CF6_SHIFT	2
-#define TSTORM_CORE_CONN_AG_CTX_CF7_MASK	0x3
-#define TSTORM_CORE_CONN_AG_CTX_CF7_SHIFT	4
-#define TSTORM_CORE_CONN_AG_CTX_CF8_MASK	0x3
-#define TSTORM_CORE_CONN_AG_CTX_CF8_SHIFT	6
+#define TSTORM_CORE_CONN_AG_CTX_CF5_MASK      0x3	/* cf5 */
+#define TSTORM_CORE_CONN_AG_CTX_CF5_SHIFT     0
+#define TSTORM_CORE_CONN_AG_CTX_CF6_MASK      0x3	/* cf6 */
+#define TSTORM_CORE_CONN_AG_CTX_CF6_SHIFT     2
+#define TSTORM_CORE_CONN_AG_CTX_CF7_MASK      0x3	/* cf7 */
+#define TSTORM_CORE_CONN_AG_CTX_CF7_SHIFT     4
+#define TSTORM_CORE_CONN_AG_CTX_CF8_MASK      0x3	/* cf8 */
+#define TSTORM_CORE_CONN_AG_CTX_CF8_SHIFT     6
 	u8 flags3;
-#define TSTORM_CORE_CONN_AG_CTX_CF9_MASK	0x3
-#define TSTORM_CORE_CONN_AG_CTX_CF9_SHIFT	0
-#define TSTORM_CORE_CONN_AG_CTX_CF10_MASK	0x3
-#define TSTORM_CORE_CONN_AG_CTX_CF10_SHIFT	2
-#define TSTORM_CORE_CONN_AG_CTX_CF0EN_MASK	0x1
-#define TSTORM_CORE_CONN_AG_CTX_CF0EN_SHIFT	4
-#define TSTORM_CORE_CONN_AG_CTX_CF1EN_MASK	0x1
-#define TSTORM_CORE_CONN_AG_CTX_CF1EN_SHIFT	5
-#define TSTORM_CORE_CONN_AG_CTX_CF2EN_MASK	0x1
-#define TSTORM_CORE_CONN_AG_CTX_CF2EN_SHIFT	6
-#define TSTORM_CORE_CONN_AG_CTX_CF3EN_MASK	0x1
-#define TSTORM_CORE_CONN_AG_CTX_CF3EN_SHIFT	7
+#define TSTORM_CORE_CONN_AG_CTX_CF9_MASK      0x3	/* cf9 */
+#define TSTORM_CORE_CONN_AG_CTX_CF9_SHIFT     0
+#define TSTORM_CORE_CONN_AG_CTX_CF10_MASK     0x3	/* cf10 */
+#define TSTORM_CORE_CONN_AG_CTX_CF10_SHIFT    2
+#define TSTORM_CORE_CONN_AG_CTX_CF0EN_MASK    0x1	/* cf0en */
+#define TSTORM_CORE_CONN_AG_CTX_CF0EN_SHIFT   4
+#define TSTORM_CORE_CONN_AG_CTX_CF1EN_MASK    0x1	/* cf1en */
+#define TSTORM_CORE_CONN_AG_CTX_CF1EN_SHIFT   5
+#define TSTORM_CORE_CONN_AG_CTX_CF2EN_MASK    0x1	/* cf2en */
+#define TSTORM_CORE_CONN_AG_CTX_CF2EN_SHIFT   6
+#define TSTORM_CORE_CONN_AG_CTX_CF3EN_MASK    0x1	/* cf3en */
+#define TSTORM_CORE_CONN_AG_CTX_CF3EN_SHIFT   7
 	u8 flags4;
-#define TSTORM_CORE_CONN_AG_CTX_CF4EN_MASK	0x1
-#define TSTORM_CORE_CONN_AG_CTX_CF4EN_SHIFT	0
-#define TSTORM_CORE_CONN_AG_CTX_CF5EN_MASK	0x1
-#define TSTORM_CORE_CONN_AG_CTX_CF5EN_SHIFT	1
-#define TSTORM_CORE_CONN_AG_CTX_CF6EN_MASK	0x1
-#define TSTORM_CORE_CONN_AG_CTX_CF6EN_SHIFT	2
-#define TSTORM_CORE_CONN_AG_CTX_CF7EN_MASK	0x1
-#define TSTORM_CORE_CONN_AG_CTX_CF7EN_SHIFT	3
-#define TSTORM_CORE_CONN_AG_CTX_CF8EN_MASK	0x1
-#define TSTORM_CORE_CONN_AG_CTX_CF8EN_SHIFT	4
-#define TSTORM_CORE_CONN_AG_CTX_CF9EN_MASK	0x1
-#define TSTORM_CORE_CONN_AG_CTX_CF9EN_SHIFT	5
-#define TSTORM_CORE_CONN_AG_CTX_CF10EN_MASK	0x1
-#define TSTORM_CORE_CONN_AG_CTX_CF10EN_SHIFT	6
-#define TSTORM_CORE_CONN_AG_CTX_RULE0EN_MASK	0x1
-#define TSTORM_CORE_CONN_AG_CTX_RULE0EN_SHIFT	7
+#define TSTORM_CORE_CONN_AG_CTX_CF4EN_MASK    0x1	/* cf4en */
+#define TSTORM_CORE_CONN_AG_CTX_CF4EN_SHIFT   0
+#define TSTORM_CORE_CONN_AG_CTX_CF5EN_MASK    0x1	/* cf5en */
+#define TSTORM_CORE_CONN_AG_CTX_CF5EN_SHIFT   1
+#define TSTORM_CORE_CONN_AG_CTX_CF6EN_MASK    0x1	/* cf6en */
+#define TSTORM_CORE_CONN_AG_CTX_CF6EN_SHIFT   2
+#define TSTORM_CORE_CONN_AG_CTX_CF7EN_MASK    0x1	/* cf7en */
+#define TSTORM_CORE_CONN_AG_CTX_CF7EN_SHIFT   3
+#define TSTORM_CORE_CONN_AG_CTX_CF8EN_MASK    0x1	/* cf8en */
+#define TSTORM_CORE_CONN_AG_CTX_CF8EN_SHIFT   4
+#define TSTORM_CORE_CONN_AG_CTX_CF9EN_MASK    0x1	/* cf9en */
+#define TSTORM_CORE_CONN_AG_CTX_CF9EN_SHIFT   5
+#define TSTORM_CORE_CONN_AG_CTX_CF10EN_MASK   0x1	/* cf10en */
+#define TSTORM_CORE_CONN_AG_CTX_CF10EN_SHIFT  6
+#define TSTORM_CORE_CONN_AG_CTX_RULE0EN_MASK  0x1	/* rule0en */
+#define TSTORM_CORE_CONN_AG_CTX_RULE0EN_SHIFT 7
 	u8 flags5;
-#define TSTORM_CORE_CONN_AG_CTX_RULE1EN_MASK	0x1
-#define TSTORM_CORE_CONN_AG_CTX_RULE1EN_SHIFT	0
-#define TSTORM_CORE_CONN_AG_CTX_RULE2EN_MASK	0x1
-#define TSTORM_CORE_CONN_AG_CTX_RULE2EN_SHIFT	1
-#define TSTORM_CORE_CONN_AG_CTX_RULE3EN_MASK	0x1
-#define TSTORM_CORE_CONN_AG_CTX_RULE3EN_SHIFT	2
-#define TSTORM_CORE_CONN_AG_CTX_RULE4EN_MASK	0x1
-#define TSTORM_CORE_CONN_AG_CTX_RULE4EN_SHIFT	3
-#define TSTORM_CORE_CONN_AG_CTX_RULE5EN_MASK	0x1
-#define TSTORM_CORE_CONN_AG_CTX_RULE5EN_SHIFT	4
-#define TSTORM_CORE_CONN_AG_CTX_RULE6EN_MASK	0x1
-#define TSTORM_CORE_CONN_AG_CTX_RULE6EN_SHIFT	5
-#define TSTORM_CORE_CONN_AG_CTX_RULE7EN_MASK	0x1
-#define TSTORM_CORE_CONN_AG_CTX_RULE7EN_SHIFT	6
-#define TSTORM_CORE_CONN_AG_CTX_RULE8EN_MASK	0x1
-#define TSTORM_CORE_CONN_AG_CTX_RULE8EN_SHIFT	7
+#define TSTORM_CORE_CONN_AG_CTX_RULE1EN_MASK  0x1	/* rule1en */
+#define TSTORM_CORE_CONN_AG_CTX_RULE1EN_SHIFT 0
+#define TSTORM_CORE_CONN_AG_CTX_RULE2EN_MASK  0x1	/* rule2en */
+#define TSTORM_CORE_CONN_AG_CTX_RULE2EN_SHIFT 1
+#define TSTORM_CORE_CONN_AG_CTX_RULE3EN_MASK  0x1	/* rule3en */
+#define TSTORM_CORE_CONN_AG_CTX_RULE3EN_SHIFT 2
+#define TSTORM_CORE_CONN_AG_CTX_RULE4EN_MASK  0x1	/* rule4en */
+#define TSTORM_CORE_CONN_AG_CTX_RULE4EN_SHIFT 3
+#define TSTORM_CORE_CONN_AG_CTX_RULE5EN_MASK  0x1	/* rule5en */
+#define TSTORM_CORE_CONN_AG_CTX_RULE5EN_SHIFT 4
+#define TSTORM_CORE_CONN_AG_CTX_RULE6EN_MASK  0x1	/* rule6en */
+#define TSTORM_CORE_CONN_AG_CTX_RULE6EN_SHIFT 5
+#define TSTORM_CORE_CONN_AG_CTX_RULE7EN_MASK  0x1	/* rule7en */
+#define TSTORM_CORE_CONN_AG_CTX_RULE7EN_SHIFT 6
+#define TSTORM_CORE_CONN_AG_CTX_RULE8EN_MASK  0x1	/* rule8en */
+#define TSTORM_CORE_CONN_AG_CTX_RULE8EN_SHIFT 7
 	__le32 reg0;
 	__le32 reg1;
 	__le32 reg2;
@@ -681,7 +681,9 @@ struct core_rx_fast_path_cqe {
 	__le16 packet_length;
 	__le16 vlan;
 	struct core_rx_cqe_opaque_data opaque_data;
-	__le32 reserved[4];
+	struct parsing_err_flags err_flags;
+	__le16 reserved0;
+	__le32 reserved1[3];
 };
 
 struct core_rx_gsi_offload_cqe {
@@ -692,7 +694,7 @@ struct core_rx_gsi_offload_cqe {
 	__le16 vlan;
 	__le32 src_mac_addrhi;
 	__le16 src_mac_addrlo;
-	u8 reserved1[2];
+	__le16 qp_id;
 	__le32 gid_dst[4];
 };
 
@@ -774,15 +776,15 @@ struct core_tx_bd {
 	__le16 bitfield1;
 #define CORE_TX_BD_L4_HDR_OFFSET_W_MASK	0x3FFF
 #define CORE_TX_BD_L4_HDR_OFFSET_W_SHIFT 0
-#define CORE_TX_BD_TX_DST_MASK	0x1
-#define CORE_TX_BD_TX_DST_SHIFT	14
-#define CORE_TX_BD_RESERVED_MASK         0x1
-#define CORE_TX_BD_RESERVED_SHIFT        15
+#define CORE_TX_BD_TX_DST_MASK		0x3
+#define CORE_TX_BD_TX_DST_SHIFT		14
 };
 
 enum core_tx_dest {
 	CORE_TX_DEST_NW,
 	CORE_TX_DEST_LB,
+	CORE_TX_DEST_RESERVED,
+	CORE_TX_DEST_DROP,
 	MAX_CORE_TX_DEST
 };
 
@@ -804,12 +806,12 @@ struct core_tx_stop_ramrod_data {
 	__le32 reserved0[2];
 };
 
-enum dcb_dhcp_update_flag {
-	DONT_UPDATE_DCB_DHCP,
+enum dcb_dscp_update_mode {
+	DONT_UPDATE_DCB_DSCP,
 	UPDATE_DCB,
 	UPDATE_DSCP,
 	UPDATE_DCB_DSCP,
-	MAX_DCB_DHCP_UPDATE_FLAG
+	MAX_DCB_DSCP_UPDATE_MODE
 };
 
 struct eth_mstorm_per_pf_stat {
@@ -917,6 +919,14 @@ struct hsi_fp_ver_struct {
 	u8 major_ver_arr[2];
 };
 
+enum iwarp_ll2_tx_queues {
+	IWARP_LL2_IN_ORDER_TX_QUEUE =			1,
+	IWARP_LL2_ALIGNED_TX_QUEUE,
+	IWARP_LL2_ALIGNED_RIGHT_TRIMMED_TX_QUEUE,
+	IWARP_LL2_ERROR,
+	MAX_IWARP_LL2_TX_QUEUES
+};
+
 /* Mstorm non-triggering VF zone */
 enum malicious_vf_error_id {
 	MALICIOUS_VF_NO_ERROR,
@@ -960,7 +970,7 @@ enum personality_type {
 	PERSONALITY_ISCSI,
 	PERSONALITY_FCOE,
 	PERSONALITY_RDMA_AND_ETH,
-	PERSONALITY_RESERVED3,
+	PERSONALITY_RDMA,
 	PERSONALITY_CORE,
 	PERSONALITY_ETH,
 	PERSONALITY_RESERVED4,
@@ -971,16 +981,12 @@ enum personality_type {
 struct pf_start_tunnel_config {
 	u8 set_vxlan_udp_port_flg;
 	u8 set_geneve_udp_port_flg;
-	u8 tx_enable_vxlan;
-	u8 tx_enable_l2geneve;
-	u8 tx_enable_ipgeneve;
-	u8 tx_enable_l2gre;
-	u8 tx_enable_ipgre;
 	u8 tunnel_clss_vxlan;
 	u8 tunnel_clss_l2geneve;
 	u8 tunnel_clss_ipgeneve;
 	u8 tunnel_clss_l2gre;
 	u8 tunnel_clss_ipgre;
+	u8 reserved;
 	__le16 vxlan_udp_port;
 	__le16 geneve_udp_port;
 };
@@ -990,6 +996,7 @@ struct pf_start_ramrod_data {
 	struct regpair event_ring_pbl_addr;
 	struct regpair consolid_q_pbl_addr;
 	struct pf_start_tunnel_config tunnel_config;
+	__le32 reserved;
 	__le16 event_ring_sb_id;
 	u8 base_vf_id;
 	u8 num_vfs;
@@ -1007,7 +1014,6 @@ struct pf_start_ramrod_data {
 	u8 pri_map_valid;
 	__le32 outer_tag;
 	struct hsi_fp_ver_struct hsi_fp_ver;
-
 };
 
 struct protocol_dcb_data {
@@ -1023,14 +1029,8 @@ struct pf_update_tunnel_config {
 	u8 update_rx_pf_clss;
 	u8 update_rx_def_ucast_clss;
 	u8 update_rx_def_non_ucast_clss;
-	u8 update_tx_pf_clss;
 	u8 set_vxlan_udp_port_flg;
 	u8 set_geneve_udp_port_flg;
-	u8 tx_enable_vxlan;
-	u8 tx_enable_l2geneve;
-	u8 tx_enable_ipgeneve;
-	u8 tx_enable_l2gre;
-	u8 tx_enable_ipgre;
 	u8 tunnel_clss_vxlan;
 	u8 tunnel_clss_l2geneve;
 	u8 tunnel_clss_ipgeneve;
@@ -1038,17 +1038,17 @@ struct pf_update_tunnel_config {
 	u8 tunnel_clss_ipgre;
 	__le16 vxlan_udp_port;
 	__le16 geneve_udp_port;
-	__le16 reserved[2];
+	__le16 reserved;
 };
 
 struct pf_update_ramrod_data {
 	u8 pf_id;
-	u8 update_eth_dcb_data_flag;
-	u8 update_fcoe_dcb_data_flag;
-	u8 update_iscsi_dcb_data_flag;
-	u8 update_roce_dcb_data_flag;
-	u8 update_rroce_dcb_data_flag;
-	u8 update_iwarp_dcb_data_flag;
+	u8 update_eth_dcb_data_mode;
+	u8 update_fcoe_dcb_data_mode;
+	u8 update_iscsi_dcb_data_mode;
+	u8 update_roce_dcb_data_mode;
+	u8 update_rroce_dcb_data_mode;
+	u8 update_iwarp_dcb_data_mode;
 	u8 update_mf_vlan_flag;
 	struct protocol_dcb_data eth_dcb_data;
 	struct protocol_dcb_data fcoe_dcb_data;
@@ -1127,7 +1127,7 @@ struct tstorm_per_port_stat {
 	struct regpair iscsi_irregular_pkt;
 	struct regpair fcoe_irregular_pkt;
 	struct regpair roce_irregular_pkt;
-	struct regpair reserved;
+	struct regpair iwarp_irregular_pkt;
 	struct regpair eth_irregular_pkt;
 	struct regpair reserved1;
 	struct regpair preroce_irregular_pkt;
@@ -1326,6 +1326,87 @@ enum dmae_cmd_src_enum {
 	MAX_DMAE_CMD_SRC_ENUM
 };
 
+struct mstorm_core_conn_ag_ctx {
+	u8 byte0;
+	u8 byte1;
+	u8 flags0;
+#define MSTORM_CORE_CONN_AG_CTX_BIT0_MASK	0x1
+#define MSTORM_CORE_CONN_AG_CTX_BIT0_SHIFT	0
+#define MSTORM_CORE_CONN_AG_CTX_BIT1_MASK	0x1
+#define MSTORM_CORE_CONN_AG_CTX_BIT1_SHIFT	1
+#define MSTORM_CORE_CONN_AG_CTX_CF0_MASK	0x3
+#define MSTORM_CORE_CONN_AG_CTX_CF0_SHIFT	2
+#define MSTORM_CORE_CONN_AG_CTX_CF1_MASK	0x3
+#define MSTORM_CORE_CONN_AG_CTX_CF1_SHIFT	4
+#define MSTORM_CORE_CONN_AG_CTX_CF2_MASK	0x3
+#define MSTORM_CORE_CONN_AG_CTX_CF2_SHIFT	6
+	u8 flags1;
+#define MSTORM_CORE_CONN_AG_CTX_CF0EN_MASK	0x1
+#define MSTORM_CORE_CONN_AG_CTX_CF0EN_SHIFT	0
+#define MSTORM_CORE_CONN_AG_CTX_CF1EN_MASK	0x1
+#define MSTORM_CORE_CONN_AG_CTX_CF1EN_SHIFT	1
+#define MSTORM_CORE_CONN_AG_CTX_CF2EN_MASK	0x1
+#define MSTORM_CORE_CONN_AG_CTX_CF2EN_SHIFT	2
+#define MSTORM_CORE_CONN_AG_CTX_RULE0EN_MASK	0x1
+#define MSTORM_CORE_CONN_AG_CTX_RULE0EN_SHIFT	3
+#define MSTORM_CORE_CONN_AG_CTX_RULE1EN_MASK	0x1
+#define MSTORM_CORE_CONN_AG_CTX_RULE1EN_SHIFT	4
+#define MSTORM_CORE_CONN_AG_CTX_RULE2EN_MASK	0x1
+#define MSTORM_CORE_CONN_AG_CTX_RULE2EN_SHIFT	5
+#define MSTORM_CORE_CONN_AG_CTX_RULE3EN_MASK	0x1
+#define MSTORM_CORE_CONN_AG_CTX_RULE3EN_SHIFT	6
+#define MSTORM_CORE_CONN_AG_CTX_RULE4EN_MASK	0x1
+#define MSTORM_CORE_CONN_AG_CTX_RULE4EN_SHIFT	7
+	__le16 word0;
+	__le16 word1;
+	__le32 reg0;
+	__le32 reg1;
+};
+
+struct ystorm_core_conn_ag_ctx {
+	u8 byte0;
+	u8 byte1;
+	u8 flags0;
+#define YSTORM_CORE_CONN_AG_CTX_BIT0_MASK	0x1
+#define YSTORM_CORE_CONN_AG_CTX_BIT0_SHIFT	0
+#define YSTORM_CORE_CONN_AG_CTX_BIT1_MASK	0x1
+#define YSTORM_CORE_CONN_AG_CTX_BIT1_SHIFT	1
+#define YSTORM_CORE_CONN_AG_CTX_CF0_MASK	0x3
+#define YSTORM_CORE_CONN_AG_CTX_CF0_SHIFT	2
+#define YSTORM_CORE_CONN_AG_CTX_CF1_MASK	0x3
+#define YSTORM_CORE_CONN_AG_CTX_CF1_SHIFT	4
+#define YSTORM_CORE_CONN_AG_CTX_CF2_MASK	0x3
+#define YSTORM_CORE_CONN_AG_CTX_CF2_SHIFT	6
+	u8 flags1;
+#define YSTORM_CORE_CONN_AG_CTX_CF0EN_MASK	0x1
+#define YSTORM_CORE_CONN_AG_CTX_CF0EN_SHIFT	0
+#define YSTORM_CORE_CONN_AG_CTX_CF1EN_MASK	0x1
+#define YSTORM_CORE_CONN_AG_CTX_CF1EN_SHIFT	1
+#define YSTORM_CORE_CONN_AG_CTX_CF2EN_MASK	0x1
+#define YSTORM_CORE_CONN_AG_CTX_CF2EN_SHIFT	2
+#define YSTORM_CORE_CONN_AG_CTX_RULE0EN_MASK	0x1
+#define YSTORM_CORE_CONN_AG_CTX_RULE0EN_SHIFT	3
+#define YSTORM_CORE_CONN_AG_CTX_RULE1EN_MASK	0x1
+#define YSTORM_CORE_CONN_AG_CTX_RULE1EN_SHIFT	4
+#define YSTORM_CORE_CONN_AG_CTX_RULE2EN_MASK	0x1
+#define YSTORM_CORE_CONN_AG_CTX_RULE2EN_SHIFT	5
+#define YSTORM_CORE_CONN_AG_CTX_RULE3EN_MASK	0x1
+#define YSTORM_CORE_CONN_AG_CTX_RULE3EN_SHIFT	6
+#define YSTORM_CORE_CONN_AG_CTX_RULE4EN_MASK	0x1
+#define YSTORM_CORE_CONN_AG_CTX_RULE4EN_SHIFT	7
+	u8 byte2;
+	u8 byte3;
+	__le16 word0;
+	__le32 reg0;
+	__le32 reg1;
+	__le16 word1;
+	__le16 word2;
+	__le16 word3;
+	__le16 word4;
+	__le32 reg2;
+	__le32 reg3;
+};
+
 /* IGU cleanup command */
 struct igu_cleanup {
 	__le32 sb_id_and_flags;
@@ -1389,44 +1470,6 @@ struct igu_msix_vector {
 #define IGU_MSIX_VECTOR_RESERVED1_MASK		0xFF
 #define IGU_MSIX_VECTOR_RESERVED1_SHIFT		24
 };
-
-struct mstorm_core_conn_ag_ctx {
-	u8 byte0;
-	u8 byte1;
-	u8 flags0;
-#define MSTORM_CORE_CONN_AG_CTX_BIT0_MASK	0x1
-#define MSTORM_CORE_CONN_AG_CTX_BIT0_SHIFT	0
-#define MSTORM_CORE_CONN_AG_CTX_BIT1_MASK	0x1
-#define MSTORM_CORE_CONN_AG_CTX_BIT1_SHIFT	1
-#define MSTORM_CORE_CONN_AG_CTX_CF0_MASK	0x3
-#define MSTORM_CORE_CONN_AG_CTX_CF0_SHIFT	2
-#define MSTORM_CORE_CONN_AG_CTX_CF1_MASK	0x3
-#define MSTORM_CORE_CONN_AG_CTX_CF1_SHIFT	4
-#define MSTORM_CORE_CONN_AG_CTX_CF2_MASK	0x3
-#define MSTORM_CORE_CONN_AG_CTX_CF2_SHIFT	6
-	u8 flags1;
-#define MSTORM_CORE_CONN_AG_CTX_CF0EN_MASK	0x1
-#define MSTORM_CORE_CONN_AG_CTX_CF0EN_SHIFT	0
-#define MSTORM_CORE_CONN_AG_CTX_CF1EN_MASK	0x1
-#define MSTORM_CORE_CONN_AG_CTX_CF1EN_SHIFT	1
-#define MSTORM_CORE_CONN_AG_CTX_CF2EN_MASK	0x1
-#define MSTORM_CORE_CONN_AG_CTX_CF2EN_SHIFT	2
-#define MSTORM_CORE_CONN_AG_CTX_RULE0EN_MASK	0x1
-#define MSTORM_CORE_CONN_AG_CTX_RULE0EN_SHIFT	3
-#define MSTORM_CORE_CONN_AG_CTX_RULE1EN_MASK	0x1
-#define MSTORM_CORE_CONN_AG_CTX_RULE1EN_SHIFT	4
-#define MSTORM_CORE_CONN_AG_CTX_RULE2EN_MASK	0x1
-#define MSTORM_CORE_CONN_AG_CTX_RULE2EN_SHIFT	5
-#define MSTORM_CORE_CONN_AG_CTX_RULE3EN_MASK	0x1
-#define MSTORM_CORE_CONN_AG_CTX_RULE3EN_SHIFT	6
-#define MSTORM_CORE_CONN_AG_CTX_RULE4EN_MASK	0x1
-#define MSTORM_CORE_CONN_AG_CTX_RULE4EN_SHIFT	7
-	__le16 word0;
-	__le16 word1;
-	__le32 reg0;
-	__le32 reg1;
-};
-
 /* per encapsulation type enabling flags */
 struct prs_reg_encapsulation_type_en {
 	u8 flags;
@@ -1541,50 +1584,6 @@ struct sdm_op_gen {
 #define SDM_OP_GEN_RESERVED_SHIFT	20
 };
 
-struct ystorm_core_conn_ag_ctx {
-	u8 byte0;
-	u8 byte1;
-	u8 flags0;
-#define YSTORM_CORE_CONN_AG_CTX_BIT0_MASK	0x1
-#define YSTORM_CORE_CONN_AG_CTX_BIT0_SHIFT	0
-#define YSTORM_CORE_CONN_AG_CTX_BIT1_MASK	0x1
-#define YSTORM_CORE_CONN_AG_CTX_BIT1_SHIFT	1
-#define YSTORM_CORE_CONN_AG_CTX_CF0_MASK	0x3
-#define YSTORM_CORE_CONN_AG_CTX_CF0_SHIFT	2
-#define YSTORM_CORE_CONN_AG_CTX_CF1_MASK	0x3
-#define YSTORM_CORE_CONN_AG_CTX_CF1_SHIFT	4
-#define YSTORM_CORE_CONN_AG_CTX_CF2_MASK	0x3
-#define YSTORM_CORE_CONN_AG_CTX_CF2_SHIFT	6
-	u8 flags1;
-#define YSTORM_CORE_CONN_AG_CTX_CF0EN_MASK	0x1
-#define YSTORM_CORE_CONN_AG_CTX_CF0EN_SHIFT	0
-#define YSTORM_CORE_CONN_AG_CTX_CF1EN_MASK	0x1
-#define YSTORM_CORE_CONN_AG_CTX_CF1EN_SHIFT	1
-#define YSTORM_CORE_CONN_AG_CTX_CF2EN_MASK	0x1
-#define YSTORM_CORE_CONN_AG_CTX_CF2EN_SHIFT	2
-#define YSTORM_CORE_CONN_AG_CTX_RULE0EN_MASK	0x1
-#define YSTORM_CORE_CONN_AG_CTX_RULE0EN_SHIFT	3
-#define YSTORM_CORE_CONN_AG_CTX_RULE1EN_MASK	0x1
-#define YSTORM_CORE_CONN_AG_CTX_RULE1EN_SHIFT	4
-#define YSTORM_CORE_CONN_AG_CTX_RULE2EN_MASK	0x1
-#define YSTORM_CORE_CONN_AG_CTX_RULE2EN_SHIFT	5
-#define YSTORM_CORE_CONN_AG_CTX_RULE3EN_MASK	0x1
-#define YSTORM_CORE_CONN_AG_CTX_RULE3EN_SHIFT	6
-#define YSTORM_CORE_CONN_AG_CTX_RULE4EN_MASK	0x1
-#define YSTORM_CORE_CONN_AG_CTX_RULE4EN_SHIFT	7
-	u8 byte2;
-	u8 byte3;
-	__le16 word0;
-	__le32 reg0;
-	__le32 reg1;
-	__le16 word1;
-	__le16 word2;
-	__le16 word3;
-	__le16 word4;
-	__le32 reg2;
-	__le32 reg3;
-};
-
 /****************************************/
 /* Debug Tools HSI constants and macros */
 /****************************************/
@@ -1643,6 +1642,8 @@ enum block_addr {
 	GRCBASE_MULD = 0x4e0000,
 	GRCBASE_YULD = 0x4c8000,
 	GRCBASE_XYLD = 0x4c0000,
+	GRCBASE_PTLD = 0x590000,
+	GRCBASE_YPLD = 0x5b0000,
 	GRCBASE_PRM = 0x230000,
 	GRCBASE_PBF_PB1 = 0xda0000,
 	GRCBASE_PBF_PB2 = 0xda4000,
@@ -1656,6 +1657,10 @@ enum block_addr {
 	GRCBASE_TCFC = 0x2d0000,
 	GRCBASE_IGU = 0x180000,
 	GRCBASE_CAU = 0x1c0000,
+	GRCBASE_RGFS = 0xf00000,
+	GRCBASE_RGSRC = 0x320000,
+	GRCBASE_TGFS = 0xd00000,
+	GRCBASE_TGSRC = 0x322000,
 	GRCBASE_UMAC = 0x51000,
 	GRCBASE_XMAC = 0x210000,
 	GRCBASE_DBG = 0x10000,
@@ -1669,10 +1674,6 @@ enum block_addr {
 	GRCBASE_PHY_PCIE = 0x620000,
 	GRCBASE_LED = 0x6b8000,
 	GRCBASE_AVS_WRAP = 0x6b0000,
-	GRCBASE_RGFS = 0x19d0000,
-	GRCBASE_TGFS = 0x19e0000,
-	GRCBASE_PTLD = 0x19f0000,
-	GRCBASE_YPLD = 0x1a10000,
 	GRCBASE_MISC_AEU = 0x8000,
 	GRCBASE_BAR0_MAP = 0x1c00000,
 	MAX_BLOCK_ADDR
@@ -1732,6 +1733,8 @@ enum block_id {
 	BLOCK_MULD,
 	BLOCK_YULD,
 	BLOCK_XYLD,
+	BLOCK_PTLD,
+	BLOCK_YPLD,
 	BLOCK_PRM,
 	BLOCK_PBF_PB1,
 	BLOCK_PBF_PB2,
@@ -1745,6 +1748,10 @@ enum block_id {
 	BLOCK_TCFC,
 	BLOCK_IGU,
 	BLOCK_CAU,
+	BLOCK_RGFS,
+	BLOCK_RGSRC,
+	BLOCK_TGFS,
+	BLOCK_TGSRC,
 	BLOCK_UMAC,
 	BLOCK_XMAC,
 	BLOCK_DBG,
@@ -1758,10 +1765,6 @@ enum block_id {
 	BLOCK_PHY_PCIE,
 	BLOCK_LED,
 	BLOCK_AVS_WRAP,
-	BLOCK_RGFS,
-	BLOCK_TGFS,
-	BLOCK_PTLD,
-	BLOCK_YPLD,
 	BLOCK_MISC_AEU,
 	BLOCK_BAR0_MAP,
 	MAX_BLOCK_ID
@@ -1780,6 +1783,10 @@ enum bin_dbg_buffer_type {
 	BIN_BUF_DBG_ATTN_REGS,
 	BIN_BUF_DBG_ATTN_INDEXES,
 	BIN_BUF_DBG_ATTN_NAME_OFFSETS,
+	BIN_BUF_DBG_BUS_BLOCKS,
+	BIN_BUF_DBG_BUS_LINES,
+	BIN_BUF_DBG_BUS_BLOCKS_USER_DATA,
+	BIN_BUF_DBG_BUS_LINE_NAME_OFFSETS,
 	BIN_BUF_DBG_PARSING_STRINGS,
 	MAX_BIN_DBG_BUFFER_TYPE
 };
@@ -1862,6 +1869,29 @@ enum dbg_attn_type {
 	MAX_DBG_ATTN_TYPE
 };
 
+struct dbg_bus_block {
+	u8 num_of_lines;
+	u8 has_latency_events;
+	__le16 lines_offset;
+};
+
+struct dbg_bus_block_user_data {
+	u8 num_of_lines;
+	u8 has_latency_events;
+	__le16 names_offset;
+};
+
+struct dbg_bus_line {
+	u8 data;
+#define DBG_BUS_LINE_NUM_OF_GROUPS_MASK  0xF
+#define DBG_BUS_LINE_NUM_OF_GROUPS_SHIFT 0
+#define DBG_BUS_LINE_IS_256B_MASK        0x1
+#define DBG_BUS_LINE_IS_256B_SHIFT       4
+#define DBG_BUS_LINE_RESERVED_MASK       0x7
+#define DBG_BUS_LINE_RESERVED_SHIFT      5
+	u8 group_sizes;
+};
+
 /* condition header for registers dump */
 struct dbg_dump_cond_hdr {
 	struct dbg_mode_hdr mode; /* Mode header */
@@ -1879,17 +1909,21 @@ struct dbg_dump_mem {
 	__le32 dword1;
 #define DBG_DUMP_MEM_LENGTH_MASK        0xFFFFFF
 #define DBG_DUMP_MEM_LENGTH_SHIFT       0
-#define DBG_DUMP_MEM_RESERVED_MASK      0xFF
-#define DBG_DUMP_MEM_RESERVED_SHIFT     24
+#define DBG_DUMP_MEM_WIDE_BUS_MASK      0x1
+#define DBG_DUMP_MEM_WIDE_BUS_SHIFT     24
+#define DBG_DUMP_MEM_RESERVED_MASK      0x7F
+#define DBG_DUMP_MEM_RESERVED_SHIFT     25
 };
 
 /* register data for registers dump */
 struct dbg_dump_reg {
 	__le32 data;
-#define DBG_DUMP_REG_ADDRESS_MASK  0xFFFFFF /* register address (in dwords) */
+#define DBG_DUMP_REG_ADDRESS_MASK 0x7FFFFF /* register address (in dwords) */
 #define DBG_DUMP_REG_ADDRESS_SHIFT 0
-#define DBG_DUMP_REG_LENGTH_MASK   0xFF /* register size (in dwords) */
-#define DBG_DUMP_REG_LENGTH_SHIFT  24
+#define DBG_DUMP_REG_WIDE_BUS_MASK 0x1 /* indicates register is wide-bus */
+#define DBG_DUMP_REG_WIDE_BUS_SHIFT 23
+#define DBG_DUMP_REG_LENGTH_MASK  0xFF /* register size (in dwords) */
+#define DBG_DUMP_REG_LENGTH_SHIFT 24
 };
 
 /* split header for registers dump */
@@ -1910,20 +1944,24 @@ struct dbg_idle_chk_cond_hdr {
 /* Idle Check condition register */
 struct dbg_idle_chk_cond_reg {
 	__le32 data;
-#define DBG_IDLE_CHK_COND_REG_ADDRESS_MASK   0xFFFFFF
+#define DBG_IDLE_CHK_COND_REG_ADDRESS_MASK   0x7FFFFF
 #define DBG_IDLE_CHK_COND_REG_ADDRESS_SHIFT  0
+#define DBG_IDLE_CHK_COND_REG_WIDE_BUS_MASK  0x1
+#define DBG_IDLE_CHK_COND_REG_WIDE_BUS_SHIFT 23
 #define DBG_IDLE_CHK_COND_REG_BLOCK_ID_MASK  0xFF
 #define DBG_IDLE_CHK_COND_REG_BLOCK_ID_SHIFT 24
-	__le16 num_entries; /* number of registers entries to check */
-	u8 entry_size; /* size of registers entry (in dwords) */
-	u8 start_entry; /* index of the first entry to check */
+	__le16 num_entries;
+	u8 entry_size;
+	u8 start_entry;
 };
 
 /* Idle Check info register */
 struct dbg_idle_chk_info_reg {
 	__le32 data;
-#define DBG_IDLE_CHK_INFO_REG_ADDRESS_MASK   0xFFFFFF
+#define DBG_IDLE_CHK_INFO_REG_ADDRESS_MASK   0x7FFFFF
 #define DBG_IDLE_CHK_INFO_REG_ADDRESS_SHIFT  0
+#define DBG_IDLE_CHK_INFO_REG_WIDE_BUS_MASK  0x1
+#define DBG_IDLE_CHK_INFO_REG_WIDE_BUS_SHIFT 23
 #define DBG_IDLE_CHK_INFO_REG_BLOCK_ID_MASK  0xFF
 #define DBG_IDLE_CHK_INFO_REG_BLOCK_ID_SHIFT 24
 	__le16 size; /* register size in dwords */
@@ -1996,15 +2034,17 @@ enum dbg_idle_chk_severity_types {
 
 /* Debug Bus block data */
 struct dbg_bus_block_data {
-	u8 enabled; /* Indicates if the block is enabled for recording (0/1) */
-	u8 hw_id; /* HW ID associated with the block */
-	u8 line_num; /* Debug line number to select */
-	u8 right_shift; /* Number of units to  right the debug data (0-3) */
-	u8 cycle_en; /* 4-bit value: bit i set -> unit i is enabled. */
-	u8 force_valid; /* 4-bit value: bit i set -> unit i is forced valid. */
-	u8 force_frame; /* 4-bit value: bit i set -> unit i frame bit is forced.
-			 */
-	u8 reserved;
+	__le16 data;
+#define DBG_BUS_BLOCK_DATA_ENABLE_MASK_MASK       0xF
+#define DBG_BUS_BLOCK_DATA_ENABLE_MASK_SHIFT      0
+#define DBG_BUS_BLOCK_DATA_RIGHT_SHIFT_MASK       0xF
+#define DBG_BUS_BLOCK_DATA_RIGHT_SHIFT_SHIFT      4
+#define DBG_BUS_BLOCK_DATA_FORCE_VALID_MASK_MASK  0xF
+#define DBG_BUS_BLOCK_DATA_FORCE_VALID_MASK_SHIFT 8
+#define DBG_BUS_BLOCK_DATA_FORCE_FRAME_MASK_MASK  0xF
+#define DBG_BUS_BLOCK_DATA_FORCE_FRAME_MASK_SHIFT 12
+	u8 line_num;
+	u8 hw_id;
 };
 
 /* Debug Bus Clients */
@@ -2045,6 +2085,14 @@ enum dbg_bus_constraint_ops {
 	MAX_DBG_BUS_CONSTRAINT_OPS
 };
 
+struct dbg_bus_trigger_state_data {
+	u8 data;
+#define DBG_BUS_TRIGGER_STATE_DATA_BLOCK_SHIFTED_ENABLE_MASK_MASK  0xF
+#define DBG_BUS_TRIGGER_STATE_DATA_BLOCK_SHIFTED_ENABLE_MASK_SHIFT 0
+#define DBG_BUS_TRIGGER_STATE_DATA_CONSTRAINT_DWORD_MASK_MASK      0xF
+#define DBG_BUS_TRIGGER_STATE_DATA_CONSTRAINT_DWORD_MASK_SHIFT     4
+};
+
 /* Debug Bus memory address */
 struct dbg_bus_mem_addr {
 	__le32 lo;
@@ -2078,66 +2126,42 @@ union dbg_bus_storm_eid_params {
 
 /* Debug Bus Storm data */
 struct dbg_bus_storm_data {
-	u8 fast_enabled;
-	u8 fast_mode;
-	u8 slow_enabled;
-	u8 slow_mode;
+	u8 enabled;
+	u8 mode;
 	u8 hw_id;
 	u8 eid_filter_en;
 	u8 eid_range_not_mask;
 	u8 cid_filter_en;
 	union dbg_bus_storm_eid_params eid_filter_params;
-	__le16 reserved;
 	__le32 cid;
 };
 
 /* Debug Bus data */
 struct dbg_bus_data {
-	__le32 app_version; /* The tools version number of the application */
-	u8 state; /* The current debug bus state */
-	u8 hw_dwords; /* HW dwords per cycle */
-	u8 next_hw_id; /* Next HW ID to be associated with an input */
-	u8 num_enabled_blocks; /* Number of blocks enabled for recording */
-	u8 num_enabled_storms; /* Number of Storms enabled for recording */
-	u8 target; /* Output target */
-	u8 next_trigger_state; /* ID of next trigger state to be added */
-	u8 next_constraint_id; /* ID of next filter/trigger constraint to be
-				* added.
-				*/
-	u8 one_shot_en; /* Indicates if one-shot mode is enabled (0/1) */
-	u8 grc_input_en; /* Indicates if GRC recording is enabled (0/1) */
-	u8 timestamp_input_en; /* Indicates if timestamp recording is enabled
-				* (0/1).
-				*/
-	u8 filter_en; /* Indicates if the recording filter is enabled (0/1) */
-	u8 trigger_en; /* Indicates if the recording trigger is enabled (0/1) */
-	u8 adding_filter; /* If true, the next added constraint belong to the
-			   * filter. Otherwise, it belongs to the last added
-			   * trigger state. Valid only if either filter or
-			   * triggers are enabled.
-			   */
-	u8 filter_pre_trigger; /* Indicates if the recording filter should be
-				* applied before the trigger. Valid only if both
-				* filter and trigger are enabled (0/1).
-				*/
-	u8 filter_post_trigger; /* Indicates if the recording filter should be
-				 * applied after the trigger. Valid only if both
-				 * filter and trigger are enabled (0/1).
-				 */
-	u8 unify_inputs; /* If true, all inputs are associated with HW ID 0.
-			  * Otherwise, each input is assigned a different HW ID
-			  * (0/1).
-			  */
-	u8 rcv_from_other_engine; /* Indicates if the other engine sends it NW
-				   * recording to this engine (0/1).
-				   */
-	struct dbg_bus_pci_buf_data pci_buf; /* Debug Bus PCI buffer data. Valid
-					      * only when the target is
-					      * DBG_BUS_TARGET_ID_PCI.
-					      */
+	__le32 app_version;
+	u8 state;
+	u8 hw_dwords;
+	__le16 hw_id_mask;
+	u8 num_enabled_blocks;
+	u8 num_enabled_storms;
+	u8 target;
+	u8 one_shot_en;
+	u8 grc_input_en;
+	u8 timestamp_input_en;
+	u8 filter_en;
+	u8 adding_filter;
+	u8 filter_pre_trigger;
+	u8 filter_post_trigger;
 	__le16 reserved;
-	struct dbg_bus_block_data blocks[88];/* Debug Bus data for each block */
-	struct dbg_bus_storm_data storms[6]; /* Debug Bus data for each block */
+	u8 trigger_en;
+	struct dbg_bus_trigger_state_data trigger_states[3];
+	u8 next_trigger_state;
+	u8 next_constraint_id;
+	u8 unify_inputs;
+	u8 rcv_from_other_engine;
+	struct dbg_bus_pci_buf_data pci_buf;
+	struct dbg_bus_block_data blocks[88];
+	struct dbg_bus_storm_data storms[6];
 };
 
 enum dbg_bus_filter_types {
@@ -2156,12 +2180,6 @@ enum dbg_bus_frame_modes {
 	MAX_DBG_BUS_FRAME_MODES
 };
 
-enum dbg_bus_input_types {
-	DBG_BUS_INPUT_TYPE_STORM,
-	DBG_BUS_INPUT_TYPE_BLOCK,
-	MAX_DBG_BUS_INPUT_TYPES
-};
-
 enum dbg_bus_other_engine_modes {
 	DBG_BUS_OTHER_ENGINE_MODE_NONE,
 	DBG_BUS_OTHER_ENGINE_MODE_DOUBLE_BW_TX,
@@ -2185,19 +2203,19 @@ enum dbg_bus_pre_trigger_types {
 };
 
 enum dbg_bus_semi_frame_modes {
-	DBG_BUS_SEMI_FRAME_MODE_0SLOW_4FAST = 0,
-	DBG_BUS_SEMI_FRAME_MODE_4SLOW_0FAST = 3,
+	DBG_BUS_SEMI_FRAME_MODE_0SLOW_4FAST =
+	    0,
+	DBG_BUS_SEMI_FRAME_MODE_4SLOW_0FAST =
+	    3,
 	MAX_DBG_BUS_SEMI_FRAME_MODES
 };
 
 /* Debug bus states */
 enum dbg_bus_states {
-	DBG_BUS_STATE_IDLE, /* debug bus idle state (not recording) */
-	DBG_BUS_STATE_READY, /* debug bus is ready for configuration and
-			      * recording.
-			      */
-	DBG_BUS_STATE_RECORDING, /* debug bus is currently recording */
-	DBG_BUS_STATE_STOPPED, /* debug bus recording has stopped */
+	DBG_BUS_STATE_IDLE,
+	DBG_BUS_STATE_READY,
+	DBG_BUS_STATE_RECORDING,
+	DBG_BUS_STATE_STOPPED,
 	MAX_DBG_BUS_STATES
 };
 
@@ -2216,11 +2234,8 @@ enum dbg_bus_storm_modes {
 
 /* Debug bus target IDs */
 enum dbg_bus_targets {
-	/* records debug bus to DBG block internal buffer */
 	DBG_BUS_TARGET_ID_INT_BUF,
-	/* records debug bus to the NW */
 	DBG_BUS_TARGET_ID_NIG,
-	/* records debug bus to a PCI buffer */
 	DBG_BUS_TARGET_ID_PCI,
 	MAX_DBG_BUS_TARGETS
 };
@@ -2235,48 +2250,45 @@ struct dbg_grc_data {
 
 /* Debug GRC params */
 enum dbg_grc_params {
-	DBG_GRC_PARAM_DUMP_TSTORM, /* dump Tstorm memories (0/1) */
-	DBG_GRC_PARAM_DUMP_MSTORM, /* dump Mstorm memories (0/1) */
-	DBG_GRC_PARAM_DUMP_USTORM, /* dump Ustorm memories (0/1) */
-	DBG_GRC_PARAM_DUMP_XSTORM, /* dump Xstorm memories (0/1) */
-	DBG_GRC_PARAM_DUMP_YSTORM, /* dump Ystorm memories (0/1) */
-	DBG_GRC_PARAM_DUMP_PSTORM, /* dump Pstorm memories (0/1) */
-	DBG_GRC_PARAM_DUMP_REGS, /* dump non-memory registers (0/1) */
-	DBG_GRC_PARAM_DUMP_RAM, /* dump Storm internal RAMs (0/1) */
-	DBG_GRC_PARAM_DUMP_PBUF, /* dump Storm passive buffer (0/1) */
-	DBG_GRC_PARAM_DUMP_IOR, /* dump Storm IORs (0/1) */
-	DBG_GRC_PARAM_DUMP_VFC, /* dump VFC memories (0/1) */
-	DBG_GRC_PARAM_DUMP_CM_CTX, /* dump CM contexts (0/1) */
-	DBG_GRC_PARAM_DUMP_PXP, /* dump PXP memories (0/1) */
-	DBG_GRC_PARAM_DUMP_RSS, /* dump RSS memories (0/1) */
-	DBG_GRC_PARAM_DUMP_CAU, /* dump CAU memories (0/1) */
-	DBG_GRC_PARAM_DUMP_QM, /* dump QM memories (0/1) */
-	DBG_GRC_PARAM_DUMP_MCP, /* dump MCP memories (0/1) */
-	DBG_GRC_PARAM_RESERVED, /* reserved */
-	DBG_GRC_PARAM_DUMP_CFC, /* dump CFC memories (0/1) */
-	DBG_GRC_PARAM_DUMP_IGU, /* dump IGU memories (0/1) */
-	DBG_GRC_PARAM_DUMP_BRB, /* dump BRB memories (0/1) */
-	DBG_GRC_PARAM_DUMP_BTB, /* dump BTB memories (0/1) */
-	DBG_GRC_PARAM_DUMP_BMB, /* dump BMB memories (0/1) */
-	DBG_GRC_PARAM_DUMP_NIG, /* dump NIG memories (0/1) */
-	DBG_GRC_PARAM_DUMP_MULD, /* dump MULD memories (0/1) */
-	DBG_GRC_PARAM_DUMP_PRS, /* dump PRS memories (0/1) */
-	DBG_GRC_PARAM_DUMP_DMAE, /* dump PRS memories (0/1) */
-	DBG_GRC_PARAM_DUMP_TM, /* dump TM (timers) memories (0/1) */
-	DBG_GRC_PARAM_DUMP_SDM, /* dump SDM memories (0/1) */
-	DBG_GRC_PARAM_DUMP_DIF, /* dump DIF memories (0/1) */
-	DBG_GRC_PARAM_DUMP_STATIC, /* dump static debug data (0/1) */
-	DBG_GRC_PARAM_UNSTALL, /* un-stall Storms after dump (0/1) */
-	DBG_GRC_PARAM_NUM_LCIDS, /* number of LCIDs (0..320) */
-	DBG_GRC_PARAM_NUM_LTIDS, /* number of LTIDs (0..320) */
-	/* preset: exclude all memories from dump (1 only) */
+	DBG_GRC_PARAM_DUMP_TSTORM,
+	DBG_GRC_PARAM_DUMP_MSTORM,
+	DBG_GRC_PARAM_DUMP_USTORM,
+	DBG_GRC_PARAM_DUMP_XSTORM,
+	DBG_GRC_PARAM_DUMP_YSTORM,
+	DBG_GRC_PARAM_DUMP_PSTORM,
+	DBG_GRC_PARAM_DUMP_REGS,
+	DBG_GRC_PARAM_DUMP_RAM,
+	DBG_GRC_PARAM_DUMP_PBUF,
+	DBG_GRC_PARAM_DUMP_IOR,
+	DBG_GRC_PARAM_DUMP_VFC,
+	DBG_GRC_PARAM_DUMP_CM_CTX,
+	DBG_GRC_PARAM_DUMP_PXP,
+	DBG_GRC_PARAM_DUMP_RSS,
+	DBG_GRC_PARAM_DUMP_CAU,
+	DBG_GRC_PARAM_DUMP_QM,
+	DBG_GRC_PARAM_DUMP_MCP,
+	DBG_GRC_PARAM_RESERVED,
+	DBG_GRC_PARAM_DUMP_CFC,
+	DBG_GRC_PARAM_DUMP_IGU,
+	DBG_GRC_PARAM_DUMP_BRB,
+	DBG_GRC_PARAM_DUMP_BTB,
+	DBG_GRC_PARAM_DUMP_BMB,
+	DBG_GRC_PARAM_DUMP_NIG,
+	DBG_GRC_PARAM_DUMP_MULD,
+	DBG_GRC_PARAM_DUMP_PRS,
+	DBG_GRC_PARAM_DUMP_DMAE,
+	DBG_GRC_PARAM_DUMP_TM,
+	DBG_GRC_PARAM_DUMP_SDM,
+	DBG_GRC_PARAM_DUMP_DIF,
+	DBG_GRC_PARAM_DUMP_STATIC,
+	DBG_GRC_PARAM_UNSTALL,
+	DBG_GRC_PARAM_NUM_LCIDS,
+	DBG_GRC_PARAM_NUM_LTIDS,
 	DBG_GRC_PARAM_EXCLUDE_ALL,
-	/* preset: include memories for crash dump (1 only) */
 	DBG_GRC_PARAM_CRASH,
-	/* perform dump only if MFW is responding (0/1) */
 	DBG_GRC_PARAM_PARITY_SAFE,
-	DBG_GRC_PARAM_DUMP_CM, /* dump CM memories (0/1) */
-	DBG_GRC_PARAM_DUMP_PHY, /* dump PHY memories (0/1) */
+	DBG_GRC_PARAM_DUMP_CM,
+	DBG_GRC_PARAM_DUMP_PHY,
 	DBG_GRC_PARAM_NO_MCP,
 	DBG_GRC_PARAM_NO_FW_VER,
 	MAX_DBG_GRC_PARAMS
@@ -2347,7 +2359,10 @@ enum dbg_status {
 	DBG_STATUS_REG_FIFO_BAD_DATA,
 	DBG_STATUS_PROTECTION_OVERRIDE_BAD_DATA,
 	DBG_STATUS_DBG_ARRAY_NOT_SET,
-	DBG_STATUS_MULTI_BLOCKS_WITH_FILTER,
+	DBG_STATUS_FILTER_BUG,
+	DBG_STATUS_NON_MATCHING_LINES,
+	DBG_STATUS_INVALID_TRIGGER_DWORD_OFFSET,
+	DBG_STATUS_DBG_BUS_IN_USE,
 	MAX_DBG_STATUS
 };
 
@@ -2364,25 +2379,22 @@ enum dbg_storms {
 
 /* Idle Check data */
 struct idle_chk_data {
-	__le32 buf_size; /* Idle check buffer size in dwords */
-	u8 buf_size_set; /* Indicates if the idle check buffer size was set
-			  * (0/1).
-			  */
+	__le32 buf_size;
+	u8 buf_size_set;
 	u8 reserved1;
 	__le16 reserved2;
 };
 
 /* Debug Tools data (per HW function) */
 struct dbg_tools_data {
-	struct dbg_grc_data grc; /* GRC Dump data */
-	struct dbg_bus_data bus; /* Debug Bus data */
-	struct idle_chk_data idle_chk; /* Idle Check data */
-	u8 mode_enable[40]; /* Indicates if a mode is enabled (0/1) */
-	u8 block_in_reset[88]; /* Indicates if a block is in reset state (0/1).
-				*/
-	u8 chip_id; /* Chip ID (from enum chip_ids) */
-	u8 platform_id; /* Platform ID (from enum platform_ids) */
-	u8 initialized; /* Indicates if the data was initialized */
+	struct dbg_grc_data grc;
+	struct dbg_bus_data bus;
+	struct idle_chk_data idle_chk;
+	u8 mode_enable[40];
+	u8 block_in_reset[88];
+	u8 chip_id;
+	u8 platform_id;
+	u8 initialized;
 	u8 reserved;
 };
 
@@ -2464,6 +2476,12 @@ struct init_qm_vport_params {
 
 /* Max size in dwords of a zipped array */
 #define MAX_ZIPPED_SIZE	8192
+enum chip_ids {
+	CHIP_BB,
+	CHIP_K2,
+	CHIP_RESERVED,
+	MAX_CHIP_IDS
+};
 
 struct fw_asserts_ram_section {
 	__le16 section_ram_line_offset;
@@ -2475,18 +2493,18 @@ struct fw_asserts_ram_section {
 };
 
 struct fw_ver_num {
-	u8 major; /* Firmware major version number */
-	u8 minor; /* Firmware minor version number */
-	u8 rev; /* Firmware revision version number */
-	u8 eng; /* Firmware engineering version number (for bootleg versions) */
+	u8 major;
+	u8 minor;
+	u8 rev;
+	u8 eng;
 };
 
 struct fw_ver_info {
-	__le16 tools_ver; /* Tools version number */
-	u8 image_id; /* FW image ID (e.g. main) */
+	__le16 tools_ver;
+	u8 image_id;
 	u8 reserved1;
-	struct fw_ver_num num; /* FW version number */
-	__le32 timestamp; /* FW Timestamp in unix time  (sec. since 1970) */
+	struct fw_ver_num num;
+	__le32 timestamp;
 	__le32 reserved2;
 };
 
@@ -2722,7 +2740,6 @@ struct init_read_op {
 #define INIT_READ_OP_ADDRESS_MASK	0x7FFFFF
 #define INIT_READ_OP_ADDRESS_SHIFT	9
 	__le32 expected_val;
-
 };
 
 /* Init operations union */
@@ -2782,6 +2799,7 @@ struct iro {
  * @param bin_ptr - a pointer to the binary data with debug arrays.
  */
 enum dbg_status qed_dbg_set_bin_ptr(const u8 * const bin_ptr);
+
 /**
  * @brief qed_dbg_grc_set_params_default - Reverts all GRC parameters to their
  *	default value.
@@ -2805,6 +2823,7 @@ void qed_dbg_grc_set_params_default(struct qed_hwfn *p_hwfn);
 enum dbg_status qed_dbg_grc_get_dump_buf_size(struct qed_hwfn *p_hwfn,
 					      struct qed_ptt *p_ptt,
 					      u32 *buf_size);
+
 /**
  * @brief qed_dbg_grc_dump - Dumps GRC data into the specified buffer.
  *
@@ -2824,6 +2843,7 @@ enum dbg_status qed_dbg_grc_dump(struct qed_hwfn *p_hwfn,
 				 u32 *dump_buf,
 				 u32 buf_size_in_dwords,
 				 u32 *num_dumped_dwords);
+
 /**
  * @brief qed_dbg_idle_chk_get_dump_buf_size - Returns the required buffer size
  *	for idle check results.
@@ -2840,6 +2860,7 @@ enum dbg_status qed_dbg_grc_dump(struct qed_hwfn *p_hwfn,
 enum dbg_status qed_dbg_idle_chk_get_dump_buf_size(struct qed_hwfn *p_hwfn,
 						   struct qed_ptt *p_ptt,
 						   u32 *buf_size);
+
 /**
  * @brief qed_dbg_idle_chk_dump - Performs idle check and writes the results
  *	into the specified buffer.
@@ -2860,6 +2881,7 @@ enum dbg_status qed_dbg_idle_chk_dump(struct qed_hwfn *p_hwfn,
 				      u32 *dump_buf,
 				      u32 buf_size_in_dwords,
 				      u32 *num_dumped_dwords);
+
 /**
  * @brief qed_dbg_mcp_trace_get_dump_buf_size - Returns the required buffer size
  *	for mcp trace results.
@@ -2878,6 +2900,7 @@ enum dbg_status qed_dbg_idle_chk_dump(struct qed_hwfn *p_hwfn,
 enum dbg_status qed_dbg_mcp_trace_get_dump_buf_size(struct qed_hwfn *p_hwfn,
 						    struct qed_ptt *p_ptt,
 						    u32 *buf_size);
+
 /**
  * @brief qed_dbg_mcp_trace_dump - Performs mcp trace and writes the results
  *	into the specified buffer.
@@ -2902,6 +2925,7 @@ enum dbg_status qed_dbg_mcp_trace_dump(struct qed_hwfn *p_hwfn,
 				       u32 *dump_buf,
 				       u32 buf_size_in_dwords,
 				       u32 *num_dumped_dwords);
+
 /**
  * @brief qed_dbg_reg_fifo_get_dump_buf_size - Returns the required buffer size
  *	for grc trace fifo results.
@@ -2917,6 +2941,7 @@ enum dbg_status qed_dbg_mcp_trace_dump(struct qed_hwfn *p_hwfn,
 enum dbg_status qed_dbg_reg_fifo_get_dump_buf_size(struct qed_hwfn *p_hwfn,
 						   struct qed_ptt *p_ptt,
 						   u32 *buf_size);
+
 /**
  * @brief qed_dbg_reg_fifo_dump - Reads the reg fifo and writes the results into
  *	the specified buffer.
@@ -2938,6 +2963,7 @@ enum dbg_status qed_dbg_reg_fifo_dump(struct qed_hwfn *p_hwfn,
 				      u32 *dump_buf,
 				      u32 buf_size_in_dwords,
 				      u32 *num_dumped_dwords);
+
 /**
  * @brief qed_dbg_igu_fifo_get_dump_buf_size - Returns the required buffer size
  *	for the IGU fifo results.
@@ -2954,6 +2980,7 @@ enum dbg_status qed_dbg_reg_fifo_dump(struct qed_hwfn *p_hwfn,
 enum dbg_status qed_dbg_igu_fifo_get_dump_buf_size(struct qed_hwfn *p_hwfn,
 						   struct qed_ptt *p_ptt,
 						   u32 *buf_size);
+
 /**
  * @brief qed_dbg_igu_fifo_dump - Reads the IGU fifo and writes the results into
  *	the specified buffer.
@@ -2975,6 +3002,7 @@ enum dbg_status qed_dbg_igu_fifo_dump(struct qed_hwfn *p_hwfn,
 				      u32 *dump_buf,
 				      u32 buf_size_in_dwords,
 				      u32 *num_dumped_dwords);
+
 /**
  * @brief qed_dbg_protection_override_get_dump_buf_size - Returns the required
  *	buffer size for protection override window results.
@@ -3074,6 +3102,7 @@ enum dbg_status qed_dbg_print_attn(struct qed_hwfn *p_hwfn,
  * @param bin_ptr - a pointer to the binary data with debug arrays.
  */
 enum dbg_status qed_dbg_user_set_bin_ptr(const u8 * const bin_ptr);
+
 /**
  * @brief qed_dbg_get_status_str - Returns a string for the specified status.
  *
@@ -3082,6 +3111,7 @@ enum dbg_status qed_dbg_user_set_bin_ptr(const u8 * const bin_ptr);
  * @return a string for the specified status
  */
 const char *qed_dbg_get_status_str(enum dbg_status status);
+
 /**
  * @brief qed_get_idle_chk_results_buf_size - Returns the required buffer size
  *	for idle check results (in bytes).
@@ -3116,6 +3146,7 @@ enum dbg_status qed_print_idle_chk_results(struct qed_hwfn *p_hwfn,
 					   char *results_buf,
 					   u32 *num_errors,
 					   u32 *num_warnings);
+
 /**
  * @brief qed_get_mcp_trace_results_buf_size - Returns the required buffer size
  *	for MCP Trace results (in bytes).
@@ -3132,6 +3163,7 @@ enum dbg_status qed_get_mcp_trace_results_buf_size(struct qed_hwfn *p_hwfn,
 						   u32 *dump_buf,
 						   u32 num_dumped_dwords,
 						   u32 *results_buf_size);
+
 /**
  * @brief qed_print_mcp_trace_results - Prints MCP Trace results
  *
@@ -3146,6 +3178,7 @@ enum dbg_status qed_print_mcp_trace_results(struct qed_hwfn *p_hwfn,
 					    u32 *dump_buf,
 					    u32 num_dumped_dwords,
 					    char *results_buf);
+
 /**
  * @brief qed_get_reg_fifo_results_buf_size - Returns the required buffer size
  *	for reg_fifo results (in bytes).
@@ -3162,6 +3195,7 @@ enum dbg_status qed_get_reg_fifo_results_buf_size(struct qed_hwfn *p_hwfn,
 						  u32 *dump_buf,
 						  u32 num_dumped_dwords,
 						  u32 *results_buf_size);
+
 /**
  * @brief qed_print_reg_fifo_results - Prints reg fifo results
  *
@@ -3176,6 +3210,7 @@ enum dbg_status qed_print_reg_fifo_results(struct qed_hwfn *p_hwfn,
 					   u32 *dump_buf,
 					   u32 num_dumped_dwords,
 					   char *results_buf);
+
 /**
  * @brief qed_get_igu_fifo_results_buf_size - Returns the required buffer size
  *	for igu_fifo results (in bytes).
@@ -3192,6 +3227,7 @@ enum dbg_status qed_get_igu_fifo_results_buf_size(struct qed_hwfn *p_hwfn,
 						  u32 *dump_buf,
 						  u32 num_dumped_dwords,
 						  u32 *results_buf_size);
+
 /**
  * @brief qed_print_igu_fifo_results - Prints IGU fifo results
  *
@@ -3206,6 +3242,7 @@ enum dbg_status qed_print_igu_fifo_results(struct qed_hwfn *p_hwfn,
 					   u32 *dump_buf,
 					   u32 num_dumped_dwords,
 					   char *results_buf);
+
 /**
  * @brief qed_get_protection_override_results_buf_size - Returns the required
  *	buffer size for protection override results (in bytes).
@@ -3223,6 +3260,7 @@ qed_get_protection_override_results_buf_size(struct qed_hwfn *p_hwfn,
 					     u32 *dump_buf,
 					     u32 num_dumped_dwords,
 					     u32 *results_buf_size);
+
 /**
  * @brief qed_print_protection_override_results - Prints protection override
  *	results.
@@ -3238,6 +3276,7 @@ enum dbg_status qed_print_protection_override_results(struct qed_hwfn *p_hwfn,
 						      u32 *dump_buf,
 						      u32 num_dumped_dwords,
 						      char *results_buf);
+
 /**
  * @brief qed_get_fw_asserts_results_buf_size - Returns the required buffer size
  *	for FW Asserts results (in bytes).
@@ -3254,6 +3293,7 @@ enum dbg_status qed_get_fw_asserts_results_buf_size(struct qed_hwfn *p_hwfn,
 						    u32 *dump_buf,
 						    u32 num_dumped_dwords,
 						    u32 *results_buf_size);
+
 /**
  * @brief qed_print_fw_asserts_results - Prints FW Asserts results
  *
@@ -3268,6 +3308,269 @@ enum dbg_status qed_print_fw_asserts_results(struct qed_hwfn *p_hwfn,
 					     u32 *dump_buf,
 					     u32 num_dumped_dwords,
 					     char *results_buf);
+
+/* Debug Bus blocks */
+static const u32 dbg_bus_blocks[] = {
+	0x0000000f,		/* grc, bb, 15 lines */
+	0x0000000f,		/* grc, k2, 15 lines */
+	0x00000000,
+	0x00000000,		/* miscs, bb, 0 lines */
+	0x00000000,		/* miscs, k2, 0 lines */
+	0x00000000,
+	0x00000000,		/* misc, bb, 0 lines */
+	0x00000000,		/* misc, k2, 0 lines */
+	0x00000000,
+	0x00000000,		/* dbu, bb, 0 lines */
+	0x00000000,		/* dbu, k2, 0 lines */
+	0x00000000,
+	0x000f0127,		/* pglue_b, bb, 39 lines */
+	0x0036012a,		/* pglue_b, k2, 42 lines */
+	0x00000000,
+	0x00000000,		/* cnig, bb, 0 lines */
+	0x00120102,		/* cnig, k2, 2 lines */
+	0x00000000,
+	0x00000000,		/* cpmu, bb, 0 lines */
+	0x00000000,		/* cpmu, k2, 0 lines */
+	0x00000000,
+	0x00000001,		/* ncsi, bb, 1 lines */
+	0x00000001,		/* ncsi, k2, 1 lines */
+	0x00000000,
+	0x00000000,		/* opte, bb, 0 lines */
+	0x00000000,		/* opte, k2, 0 lines */
+	0x00000000,
+	0x00600085,		/* bmb, bb, 133 lines */
+	0x00600085,		/* bmb, k2, 133 lines */
+	0x00000000,
+	0x00000000,		/* pcie, bb, 0 lines */
+	0x00e50033,		/* pcie, k2, 51 lines */
+	0x00000000,
+	0x00000000,		/* mcp, bb, 0 lines */
+	0x00000000,		/* mcp, k2, 0 lines */
+	0x00000000,
+	0x01180009,		/* mcp2, bb, 9 lines */
+	0x01180009,		/* mcp2, k2, 9 lines */
+	0x00000000,
+	0x01210104,		/* pswhst, bb, 4 lines */
+	0x01210104,		/* pswhst, k2, 4 lines */
+	0x00000000,
+	0x01250103,		/* pswhst2, bb, 3 lines */
+	0x01250103,		/* pswhst2, k2, 3 lines */
+	0x00000000,
+	0x00340101,		/* pswrd, bb, 1 lines */
+	0x00340101,		/* pswrd, k2, 1 lines */
+	0x00000000,
+	0x01280119,		/* pswrd2, bb, 25 lines */
+	0x01280119,		/* pswrd2, k2, 25 lines */
+	0x00000000,
+	0x01410109,		/* pswwr, bb, 9 lines */
+	0x01410109,		/* pswwr, k2, 9 lines */
+	0x00000000,
+	0x00000000,		/* pswwr2, bb, 0 lines */
+	0x00000000,		/* pswwr2, k2, 0 lines */
+	0x00000000,
+	0x001c0001,		/* pswrq, bb, 1 lines */
+	0x001c0001,		/* pswrq, k2, 1 lines */
+	0x00000000,
+	0x014a0015,		/* pswrq2, bb, 21 lines */
+	0x014a0015,		/* pswrq2, k2, 21 lines */
+	0x00000000,
+	0x00000000,		/* pglcs, bb, 0 lines */
+	0x00120006,		/* pglcs, k2, 6 lines */
+	0x00000000,
+	0x00100001,		/* dmae, bb, 1 lines */
+	0x00100001,		/* dmae, k2, 1 lines */
+	0x00000000,
+	0x015f0105,		/* ptu, bb, 5 lines */
+	0x015f0105,		/* ptu, k2, 5 lines */
+	0x00000000,
+	0x01640120,		/* tcm, bb, 32 lines */
+	0x01640120,		/* tcm, k2, 32 lines */
+	0x00000000,
+	0x01640120,		/* mcm, bb, 32 lines */
+	0x01640120,		/* mcm, k2, 32 lines */
+	0x00000000,
+	0x01640120,		/* ucm, bb, 32 lines */
+	0x01640120,		/* ucm, k2, 32 lines */
+	0x00000000,
+	0x01640120,		/* xcm, bb, 32 lines */
+	0x01640120,		/* xcm, k2, 32 lines */
+	0x00000000,
+	0x01640120,		/* ycm, bb, 32 lines */
+	0x01640120,		/* ycm, k2, 32 lines */
+	0x00000000,
+	0x01640120,		/* pcm, bb, 32 lines */
+	0x01640120,		/* pcm, k2, 32 lines */
+	0x00000000,
+	0x01840062,		/* qm, bb, 98 lines */
+	0x01840062,		/* qm, k2, 98 lines */
+	0x00000000,
+	0x01e60021,		/* tm, bb, 33 lines */
+	0x01e60021,		/* tm, k2, 33 lines */
+	0x00000000,
+	0x02070107,		/* dorq, bb, 7 lines */
+	0x02070107,		/* dorq, k2, 7 lines */
+	0x00000000,
+	0x00600185,		/* brb, bb, 133 lines */
+	0x00600185,		/* brb, k2, 133 lines */
+	0x00000000,
+	0x020e0019,		/* src, bb, 25 lines */
+	0x020c001a,		/* src, k2, 26 lines */
+	0x00000000,
+	0x02270104,		/* prs, bb, 4 lines */
+	0x02270104,		/* prs, k2, 4 lines */
+	0x00000000,
+	0x022b0133,		/* tsdm, bb, 51 lines */
+	0x022b0133,		/* tsdm, k2, 51 lines */
+	0x00000000,
+	0x022b0133,		/* msdm, bb, 51 lines */
+	0x022b0133,		/* msdm, k2, 51 lines */
+	0x00000000,
+	0x022b0133,		/* usdm, bb, 51 lines */
+	0x022b0133,		/* usdm, k2, 51 lines */
+	0x00000000,
+	0x022b0133,		/* xsdm, bb, 51 lines */
+	0x022b0133,		/* xsdm, k2, 51 lines */
+	0x00000000,
+	0x022b0133,		/* ysdm, bb, 51 lines */
+	0x022b0133,		/* ysdm, k2, 51 lines */
+	0x00000000,
+	0x022b0133,		/* psdm, bb, 51 lines */
+	0x022b0133,		/* psdm, k2, 51 lines */
+	0x00000000,
+	0x025e010c,		/* tsem, bb, 12 lines */
+	0x025e010c,		/* tsem, k2, 12 lines */
+	0x00000000,
+	0x025e010c,		/* msem, bb, 12 lines */
+	0x025e010c,		/* msem, k2, 12 lines */
+	0x00000000,
+	0x025e010c,		/* usem, bb, 12 lines */
+	0x025e010c,		/* usem, k2, 12 lines */
+	0x00000000,
+	0x025e010c,		/* xsem, bb, 12 lines */
+	0x025e010c,		/* xsem, k2, 12 lines */
+	0x00000000,
+	0x025e010c,		/* ysem, bb, 12 lines */
+	0x025e010c,		/* ysem, k2, 12 lines */
+	0x00000000,
+	0x025e010c,		/* psem, bb, 12 lines */
+	0x025e010c,		/* psem, k2, 12 lines */
+	0x00000000,
+	0x026a000d,		/* rss, bb, 13 lines */
+	0x026a000d,		/* rss, k2, 13 lines */
+	0x00000000,
+	0x02770106,		/* tmld, bb, 6 lines */
+	0x02770106,		/* tmld, k2, 6 lines */
+	0x00000000,
+	0x027d0106,		/* muld, bb, 6 lines */
+	0x027d0106,		/* muld, k2, 6 lines */
+	0x00000000,
+	0x02770005,		/* yuld, bb, 5 lines */
+	0x02770005,		/* yuld, k2, 5 lines */
+	0x00000000,
+	0x02830107,		/* xyld, bb, 7 lines */
+	0x027d0107,		/* xyld, k2, 7 lines */
+	0x00000000,
+	0x00000000,		/* ptld, bb, 0 lines */
+	0x00000000,		/* ptld, k2, 0 lines */
+	0x00000000,
+	0x00000000,		/* ypld, bb, 0 lines */
+	0x00000000,		/* ypld, k2, 0 lines */
+	0x00000000,
+	0x028a010e,		/* prm, bb, 14 lines */
+	0x02980110,		/* prm, k2, 16 lines */
+	0x00000000,
+	0x02a8000d,		/* pbf_pb1, bb, 13 lines */
+	0x02a8000d,		/* pbf_pb1, k2, 13 lines */
+	0x00000000,
+	0x02a8000d,		/* pbf_pb2, bb, 13 lines */
+	0x02a8000d,		/* pbf_pb2, k2, 13 lines */
+	0x00000000,
+	0x02a8000d,		/* rpb, bb, 13 lines */
+	0x02a8000d,		/* rpb, k2, 13 lines */
+	0x00000000,
+	0x00600185,		/* btb, bb, 133 lines */
+	0x00600185,		/* btb, k2, 133 lines */
+	0x00000000,
+	0x02b50117,		/* pbf, bb, 23 lines */
+	0x02b50117,		/* pbf, k2, 23 lines */
+	0x00000000,
+	0x02cc0006,		/* rdif, bb, 6 lines */
+	0x02cc0006,		/* rdif, k2, 6 lines */
+	0x00000000,
+	0x02d20006,		/* tdif, bb, 6 lines */
+	0x02d20006,		/* tdif, k2, 6 lines */
+	0x00000000,
+	0x02d80003,		/* cdu, bb, 3 lines */
+	0x02db000e,		/* cdu, k2, 14 lines */
+	0x00000000,
+	0x02e9010d,		/* ccfc, bb, 13 lines */
+	0x02f60117,		/* ccfc, k2, 23 lines */
+	0x00000000,
+	0x02e9010d,		/* tcfc, bb, 13 lines */
+	0x02f60117,		/* tcfc, k2, 23 lines */
+	0x00000000,
+	0x030d0133,		/* igu, bb, 51 lines */
+	0x030d0133,		/* igu, k2, 51 lines */
+	0x00000000,
+	0x03400106,		/* cau, bb, 6 lines */
+	0x03400106,		/* cau, k2, 6 lines */
+	0x00000000,
+	0x00000000,		/* rgfs, bb, 0 lines */
+	0x00000000,		/* rgfs, k2, 0 lines */
+	0x00000000,
+	0x00000000,		/* rgsrc, bb, 0 lines */
+	0x00000000,		/* rgsrc, k2, 0 lines */
+	0x00000000,
+	0x00000000,		/* tgfs, bb, 0 lines */
+	0x00000000,		/* tgfs, k2, 0 lines */
+	0x00000000,
+	0x00000000,		/* tgsrc, bb, 0 lines */
+	0x00000000,		/* tgsrc, k2, 0 lines */
+	0x00000000,
+	0x00000000,		/* umac, bb, 0 lines */
+	0x00120006,		/* umac, k2, 6 lines */
+	0x00000000,
+	0x00000000,		/* xmac, bb, 0 lines */
+	0x00000000,		/* xmac, k2, 0 lines */
+	0x00000000,
+	0x00000000,		/* dbg, bb, 0 lines */
+	0x00000000,		/* dbg, k2, 0 lines */
+	0x00000000,
+	0x0346012b,		/* nig, bb, 43 lines */
+	0x0346011d,		/* nig, k2, 29 lines */
+	0x00000000,
+	0x00000000,		/* wol, bb, 0 lines */
+	0x001c0002,		/* wol, k2, 2 lines */
+	0x00000000,
+	0x00000000,		/* bmbn, bb, 0 lines */
+	0x00210008,		/* bmbn, k2, 8 lines */
+	0x00000000,
+	0x00000000,		/* ipc, bb, 0 lines */
+	0x00000000,		/* ipc, k2, 0 lines */
+	0x00000000,
+	0x00000000,		/* nwm, bb, 0 lines */
+	0x0371000b,		/* nwm, k2, 11 lines */
+	0x00000000,
+	0x00000000,		/* nws, bb, 0 lines */
+	0x037c0009,		/* nws, k2, 9 lines */
+	0x00000000,
+	0x00000000,		/* ms, bb, 0 lines */
+	0x00120004,		/* ms, k2, 4 lines */
+	0x00000000,
+	0x00000000,		/* phy_pcie, bb, 0 lines */
+	0x00e5001a,		/* phy_pcie, k2, 26 lines */
+	0x00000000,
+	0x00000000,		/* led, bb, 0 lines */
+	0x00000000,		/* led, k2, 0 lines */
+	0x00000000,
+	0x00000000,		/* avs_wrap, bb, 0 lines */
+	0x00000000,		/* avs_wrap, k2, 0 lines */
+	0x00000000,
+	0x00000000,		/* bar0_map, bb, 0 lines */
+	0x00000000,		/* bar0_map, k2, 0 lines */
+	0x00000000,
+};
+
 /* Win 2 */
 #define GTT_BAR0_MAP_REG_IGU_CMD	0x00f000UL
 
@@ -3589,37 +3892,37 @@ void qed_set_rfs_mode_enable(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
 #define PSTORM_FCOE_TX_STATS_OFFSET(pf_id) \
 	(IRO[44].base + ((pf_id) * IRO[44].m1))
 
-static const struct iro iro_arr[47] = {
+static const struct iro iro_arr[49] = {
 	{0x0, 0x0, 0x0, 0x0, 0x8},
 	{0x4cb0, 0x80, 0x0, 0x0, 0x80},
-	{0x6318, 0x20, 0x0, 0x0, 0x20},
+	{0x6518, 0x20, 0x0, 0x0, 0x20},
 	{0xb00, 0x8, 0x0, 0x0, 0x4},
 	{0xa80, 0x8, 0x0, 0x0, 0x4},
 	{0x0, 0x8, 0x0, 0x0, 0x2},
 	{0x80, 0x8, 0x0, 0x0, 0x4},
 	{0x84, 0x8, 0x0, 0x0, 0x2},
-	{0x4bc0, 0x0, 0x0, 0x0, 0x78},
+	{0x4c40, 0x0, 0x0, 0x0, 0x78},
 	{0x3df0, 0x0, 0x0, 0x0, 0x78},
 	{0x29b0, 0x0, 0x0, 0x0, 0x78},
 	{0x4c38, 0x0, 0x0, 0x0, 0x78},
 	{0x4990, 0x0, 0x0, 0x0, 0x78},
-	{0x7e48, 0x0, 0x0, 0x0, 0x78},
+	{0x7f48, 0x0, 0x0, 0x0, 0x78},
 	{0xa28, 0x8, 0x0, 0x0, 0x8},
-	{0x60f8, 0x10, 0x0, 0x0, 0x10},
-	{0xb820, 0x30, 0x0, 0x0, 0x30},
+	{0x61f8, 0x10, 0x0, 0x0, 0x10},
+	{0xbd20, 0x30, 0x0, 0x0, 0x30},
 	{0x95b8, 0x30, 0x0, 0x0, 0x30},
 	{0x4b60, 0x80, 0x0, 0x0, 0x40},
 	{0x1f8, 0x4, 0x0, 0x0, 0x4},
 	{0x53a0, 0x80, 0x4, 0x0, 0x4},
-	{0xc8f0, 0x0, 0x0, 0x0, 0x4},
+	{0xc7c8, 0x0, 0x0, 0x0, 0x4},
 	{0x4ba0, 0x80, 0x0, 0x0, 0x20},
-	{0x8050, 0x40, 0x0, 0x0, 0x30},
-	{0xe770, 0x60, 0x0, 0x0, 0x60},
+	{0x8150, 0x40, 0x0, 0x0, 0x30},
+	{0xec70, 0x60, 0x0, 0x0, 0x60},
 	{0x2b48, 0x80, 0x0, 0x0, 0x38},
-	{0xf188, 0x78, 0x0, 0x0, 0x78},
+	{0xf1b0, 0x78, 0x0, 0x0, 0x78},
 	{0x1f8, 0x4, 0x0, 0x0, 0x4},
-	{0xacf0, 0x0, 0x0, 0x0, 0xf0},
-	{0xade0, 0x8, 0x0, 0x0, 0x8},
+	{0xaef8, 0x0, 0x0, 0x0, 0xf0},
+	{0xafe8, 0x8, 0x0, 0x0, 0x8},
 	{0x1f8, 0x8, 0x0, 0x0, 0x8},
 	{0xac0, 0x8, 0x0, 0x0, 0x8},
 	{0x2578, 0x8, 0x0, 0x0, 0x8},
@@ -3627,16 +3930,18 @@ static const struct iro iro_arr[47] = {
 	{0x0, 0x8, 0x0, 0x0, 0x8},
 	{0x200, 0x10, 0x8, 0x0, 0x8},
 	{0xb78, 0x10, 0x8, 0x0, 0x2},
-	{0xd888, 0x38, 0x0, 0x0, 0x24},
-	{0x12c38, 0x10, 0x0, 0x0, 0x8},
-	{0x11aa0, 0x38, 0x0, 0x0, 0x18},
-	{0xa8c0, 0x38, 0x0, 0x0, 0x10},
+	{0xd9a8, 0x38, 0x0, 0x0, 0x24},
+	{0x12988, 0x10, 0x0, 0x0, 0x8},
+	{0x11fa0, 0x38, 0x0, 0x0, 0x18},
+	{0xa580, 0x38, 0x0, 0x0, 0x10},
 	{0x86f8, 0x30, 0x0, 0x0, 0x18},
 	{0x101f8, 0x10, 0x0, 0x0, 0x10},
-	{0xdd08, 0x48, 0x0, 0x0, 0x38},
+	{0xde28, 0x48, 0x0, 0x0, 0x38},
 	{0x10660, 0x20, 0x0, 0x0, 0x20},
 	{0x2b80, 0x80, 0x0, 0x0, 0x10},
 	{0x5020, 0x10, 0x0, 0x0, 0x10},
+	{0xc9b0, 0x30, 0x0, 0x0, 0x10},
+	{0xeec0, 0x10, 0x0, 0x0, 0x10},
 };
 
 /* Runtime array offsets */
@@ -3724,361 +4029,359 @@ static const struct iro iro_arr[47] = {
 #define PSWRQ2_REG_CDUC_BLOCKS_FACTOR_RT_OFFSET	6697
 #define PSWRQ2_REG_VF_BASE_RT_OFFSET	6698
 #define PSWRQ2_REG_VF_LAST_ILT_RT_OFFSET	6699
-#define PSWRQ2_REG_WR_MBS0_RT_OFFSET	6700
-#define PSWRQ2_REG_RD_MBS0_RT_OFFSET	6701
-#define PSWRQ2_REG_DRAM_ALIGN_WR_RT_OFFSET	6702
-#define PSWRQ2_REG_DRAM_ALIGN_RD_RT_OFFSET	6703
-#define PSWRQ2_REG_ILT_MEMORY_RT_OFFSET	6704
+#define PSWRQ2_REG_DRAM_ALIGN_WR_RT_OFFSET	6700
+#define PSWRQ2_REG_DRAM_ALIGN_RD_RT_OFFSET	6701
+#define PSWRQ2_REG_ILT_MEMORY_RT_OFFSET	6702
 #define PSWRQ2_REG_ILT_MEMORY_RT_SIZE	22000
-#define PGLUE_REG_B_VF_BASE_RT_OFFSET	28704
-#define PGLUE_REG_B_MSDM_OFFSET_MASK_B_RT_OFFSET	28705
-#define PGLUE_REG_B_MSDM_VF_SHIFT_B_RT_OFFSET	28706
-#define PGLUE_REG_B_CACHE_LINE_SIZE_RT_OFFSET	28707
-#define PGLUE_REG_B_PF_BAR0_SIZE_RT_OFFSET	28708
-#define PGLUE_REG_B_PF_BAR1_SIZE_RT_OFFSET	28709
-#define PGLUE_REG_B_VF_BAR1_SIZE_RT_OFFSET	28710
-#define TM_REG_VF_ENABLE_CONN_RT_OFFSET	28711
-#define TM_REG_PF_ENABLE_CONN_RT_OFFSET	28712
-#define TM_REG_PF_ENABLE_TASK_RT_OFFSET	28713
-#define TM_REG_GROUP_SIZE_RESOLUTION_CONN_RT_OFFSET	28714
-#define TM_REG_GROUP_SIZE_RESOLUTION_TASK_RT_OFFSET	28715
-#define TM_REG_CONFIG_CONN_MEM_RT_OFFSET	28716
+#define PGLUE_REG_B_VF_BASE_RT_OFFSET	28702
+#define PGLUE_REG_B_MSDM_OFFSET_MASK_B_RT_OFFSET	28703
+#define PGLUE_REG_B_MSDM_VF_SHIFT_B_RT_OFFSET	28704
+#define PGLUE_REG_B_CACHE_LINE_SIZE_RT_OFFSET	28705
+#define PGLUE_REG_B_PF_BAR0_SIZE_RT_OFFSET	28706
+#define PGLUE_REG_B_PF_BAR1_SIZE_RT_OFFSET	28707
+#define PGLUE_REG_B_VF_BAR1_SIZE_RT_OFFSET	28708
+#define TM_REG_VF_ENABLE_CONN_RT_OFFSET	28709
+#define TM_REG_PF_ENABLE_CONN_RT_OFFSET	28710
+#define TM_REG_PF_ENABLE_TASK_RT_OFFSET	28711
+#define TM_REG_GROUP_SIZE_RESOLUTION_CONN_RT_OFFSET	28712
+#define TM_REG_GROUP_SIZE_RESOLUTION_TASK_RT_OFFSET	28713
+#define TM_REG_CONFIG_CONN_MEM_RT_OFFSET	28714
 #define TM_REG_CONFIG_CONN_MEM_RT_SIZE	416
-#define TM_REG_CONFIG_TASK_MEM_RT_OFFSET	29132
-#define TM_REG_CONFIG_TASK_MEM_RT_SIZE	512
-#define QM_REG_MAXPQSIZE_0_RT_OFFSET	29644
-#define QM_REG_MAXPQSIZE_1_RT_OFFSET	29645
-#define QM_REG_MAXPQSIZE_2_RT_OFFSET	29646
-#define QM_REG_MAXPQSIZETXSEL_0_RT_OFFSET	29647
-#define QM_REG_MAXPQSIZETXSEL_1_RT_OFFSET	29648
-#define QM_REG_MAXPQSIZETXSEL_2_RT_OFFSET	29649
-#define QM_REG_MAXPQSIZETXSEL_3_RT_OFFSET	29650
-#define QM_REG_MAXPQSIZETXSEL_4_RT_OFFSET	29651
-#define QM_REG_MAXPQSIZETXSEL_5_RT_OFFSET	29652
-#define QM_REG_MAXPQSIZETXSEL_6_RT_OFFSET	29653
-#define QM_REG_MAXPQSIZETXSEL_7_RT_OFFSET	29654
-#define QM_REG_MAXPQSIZETXSEL_8_RT_OFFSET	29655
-#define QM_REG_MAXPQSIZETXSEL_9_RT_OFFSET	29656
-#define QM_REG_MAXPQSIZETXSEL_10_RT_OFFSET	29657
-#define QM_REG_MAXPQSIZETXSEL_11_RT_OFFSET	29658
-#define QM_REG_MAXPQSIZETXSEL_12_RT_OFFSET	29659
-#define QM_REG_MAXPQSIZETXSEL_13_RT_OFFSET	29660
-#define QM_REG_MAXPQSIZETXSEL_14_RT_OFFSET	29661
-#define QM_REG_MAXPQSIZETXSEL_15_RT_OFFSET	29662
-#define QM_REG_MAXPQSIZETXSEL_16_RT_OFFSET	29663
-#define QM_REG_MAXPQSIZETXSEL_17_RT_OFFSET	29664
-#define QM_REG_MAXPQSIZETXSEL_18_RT_OFFSET	29665
-#define QM_REG_MAXPQSIZETXSEL_19_RT_OFFSET	29666
-#define QM_REG_MAXPQSIZETXSEL_20_RT_OFFSET	29667
-#define QM_REG_MAXPQSIZETXSEL_21_RT_OFFSET	29668
-#define QM_REG_MAXPQSIZETXSEL_22_RT_OFFSET	29669
-#define QM_REG_MAXPQSIZETXSEL_23_RT_OFFSET	29670
-#define QM_REG_MAXPQSIZETXSEL_24_RT_OFFSET	29671
-#define QM_REG_MAXPQSIZETXSEL_25_RT_OFFSET	29672
-#define QM_REG_MAXPQSIZETXSEL_26_RT_OFFSET	29673
-#define QM_REG_MAXPQSIZETXSEL_27_RT_OFFSET	29674
-#define QM_REG_MAXPQSIZETXSEL_28_RT_OFFSET	29675
-#define QM_REG_MAXPQSIZETXSEL_29_RT_OFFSET	29676
-#define QM_REG_MAXPQSIZETXSEL_30_RT_OFFSET	29677
-#define QM_REG_MAXPQSIZETXSEL_31_RT_OFFSET	29678
-#define QM_REG_MAXPQSIZETXSEL_32_RT_OFFSET	29679
-#define QM_REG_MAXPQSIZETXSEL_33_RT_OFFSET	29680
-#define QM_REG_MAXPQSIZETXSEL_34_RT_OFFSET	29681
-#define QM_REG_MAXPQSIZETXSEL_35_RT_OFFSET	29682
-#define QM_REG_MAXPQSIZETXSEL_36_RT_OFFSET	29683
-#define QM_REG_MAXPQSIZETXSEL_37_RT_OFFSET	29684
-#define QM_REG_MAXPQSIZETXSEL_38_RT_OFFSET	29685
-#define QM_REG_MAXPQSIZETXSEL_39_RT_OFFSET	29686
-#define QM_REG_MAXPQSIZETXSEL_40_RT_OFFSET	29687
-#define QM_REG_MAXPQSIZETXSEL_41_RT_OFFSET	29688
-#define QM_REG_MAXPQSIZETXSEL_42_RT_OFFSET	29689
-#define QM_REG_MAXPQSIZETXSEL_43_RT_OFFSET	29690
-#define QM_REG_MAXPQSIZETXSEL_44_RT_OFFSET	29691
-#define QM_REG_MAXPQSIZETXSEL_45_RT_OFFSET	29692
-#define QM_REG_MAXPQSIZETXSEL_46_RT_OFFSET	29693
-#define QM_REG_MAXPQSIZETXSEL_47_RT_OFFSET	29694
-#define QM_REG_MAXPQSIZETXSEL_48_RT_OFFSET	29695
-#define QM_REG_MAXPQSIZETXSEL_49_RT_OFFSET	29696
-#define QM_REG_MAXPQSIZETXSEL_50_RT_OFFSET	29697
-#define QM_REG_MAXPQSIZETXSEL_51_RT_OFFSET	29698
-#define QM_REG_MAXPQSIZETXSEL_52_RT_OFFSET	29699
-#define QM_REG_MAXPQSIZETXSEL_53_RT_OFFSET	29700
-#define QM_REG_MAXPQSIZETXSEL_54_RT_OFFSET	29701
-#define QM_REG_MAXPQSIZETXSEL_55_RT_OFFSET	29702
-#define QM_REG_MAXPQSIZETXSEL_56_RT_OFFSET	29703
-#define QM_REG_MAXPQSIZETXSEL_57_RT_OFFSET	29704
-#define QM_REG_MAXPQSIZETXSEL_58_RT_OFFSET	29705
-#define QM_REG_MAXPQSIZETXSEL_59_RT_OFFSET	29706
-#define QM_REG_MAXPQSIZETXSEL_60_RT_OFFSET	29707
-#define QM_REG_MAXPQSIZETXSEL_61_RT_OFFSET	29708
-#define QM_REG_MAXPQSIZETXSEL_62_RT_OFFSET	29709
-#define QM_REG_MAXPQSIZETXSEL_63_RT_OFFSET	29710
-#define QM_REG_BASEADDROTHERPQ_RT_OFFSET	29711
+#define TM_REG_CONFIG_TASK_MEM_RT_OFFSET	29130
+#define TM_REG_CONFIG_TASK_MEM_RT_SIZE	608
+#define QM_REG_MAXPQSIZE_0_RT_OFFSET	29738
+#define QM_REG_MAXPQSIZE_1_RT_OFFSET	29739
+#define QM_REG_MAXPQSIZE_2_RT_OFFSET	29740
+#define QM_REG_MAXPQSIZETXSEL_0_RT_OFFSET	29741
+#define QM_REG_MAXPQSIZETXSEL_1_RT_OFFSET	29742
+#define QM_REG_MAXPQSIZETXSEL_2_RT_OFFSET	29743
+#define QM_REG_MAXPQSIZETXSEL_3_RT_OFFSET	29744
+#define QM_REG_MAXPQSIZETXSEL_4_RT_OFFSET	29745
+#define QM_REG_MAXPQSIZETXSEL_5_RT_OFFSET	29746
+#define QM_REG_MAXPQSIZETXSEL_6_RT_OFFSET	29747
+#define QM_REG_MAXPQSIZETXSEL_7_RT_OFFSET	29748
+#define QM_REG_MAXPQSIZETXSEL_8_RT_OFFSET	29749
+#define QM_REG_MAXPQSIZETXSEL_9_RT_OFFSET	29750
+#define QM_REG_MAXPQSIZETXSEL_10_RT_OFFSET	29751
+#define QM_REG_MAXPQSIZETXSEL_11_RT_OFFSET	29752
+#define QM_REG_MAXPQSIZETXSEL_12_RT_OFFSET	29753
+#define QM_REG_MAXPQSIZETXSEL_13_RT_OFFSET	29754
+#define QM_REG_MAXPQSIZETXSEL_14_RT_OFFSET	29755
+#define QM_REG_MAXPQSIZETXSEL_15_RT_OFFSET	29756
+#define QM_REG_MAXPQSIZETXSEL_16_RT_OFFSET	29757
+#define QM_REG_MAXPQSIZETXSEL_17_RT_OFFSET	29758
+#define QM_REG_MAXPQSIZETXSEL_18_RT_OFFSET	29759
+#define QM_REG_MAXPQSIZETXSEL_19_RT_OFFSET	29760
+#define QM_REG_MAXPQSIZETXSEL_20_RT_OFFSET	29761
+#define QM_REG_MAXPQSIZETXSEL_21_RT_OFFSET	29762
+#define QM_REG_MAXPQSIZETXSEL_22_RT_OFFSET	29763
+#define QM_REG_MAXPQSIZETXSEL_23_RT_OFFSET	29764
+#define QM_REG_MAXPQSIZETXSEL_24_RT_OFFSET	29765
+#define QM_REG_MAXPQSIZETXSEL_25_RT_OFFSET	29766
+#define QM_REG_MAXPQSIZETXSEL_26_RT_OFFSET	29767
+#define QM_REG_MAXPQSIZETXSEL_27_RT_OFFSET	29768
+#define QM_REG_MAXPQSIZETXSEL_28_RT_OFFSET	29769
+#define QM_REG_MAXPQSIZETXSEL_29_RT_OFFSET	29770
+#define QM_REG_MAXPQSIZETXSEL_30_RT_OFFSET	29771
+#define QM_REG_MAXPQSIZETXSEL_31_RT_OFFSET	29772
+#define QM_REG_MAXPQSIZETXSEL_32_RT_OFFSET	29773
+#define QM_REG_MAXPQSIZETXSEL_33_RT_OFFSET	29774
+#define QM_REG_MAXPQSIZETXSEL_34_RT_OFFSET	29775
+#define QM_REG_MAXPQSIZETXSEL_35_RT_OFFSET	29776
+#define QM_REG_MAXPQSIZETXSEL_36_RT_OFFSET	29777
+#define QM_REG_MAXPQSIZETXSEL_37_RT_OFFSET	29778
+#define QM_REG_MAXPQSIZETXSEL_38_RT_OFFSET	29779
+#define QM_REG_MAXPQSIZETXSEL_39_RT_OFFSET	29780
+#define QM_REG_MAXPQSIZETXSEL_40_RT_OFFSET	29781
+#define QM_REG_MAXPQSIZETXSEL_41_RT_OFFSET	29782
+#define QM_REG_MAXPQSIZETXSEL_42_RT_OFFSET	29783
+#define QM_REG_MAXPQSIZETXSEL_43_RT_OFFSET	29784
+#define QM_REG_MAXPQSIZETXSEL_44_RT_OFFSET	29785
+#define QM_REG_MAXPQSIZETXSEL_45_RT_OFFSET	29786
+#define QM_REG_MAXPQSIZETXSEL_46_RT_OFFSET	29787
+#define QM_REG_MAXPQSIZETXSEL_47_RT_OFFSET	29788
+#define QM_REG_MAXPQSIZETXSEL_48_RT_OFFSET	29789
+#define QM_REG_MAXPQSIZETXSEL_49_RT_OFFSET	29790
+#define QM_REG_MAXPQSIZETXSEL_50_RT_OFFSET	29791
+#define QM_REG_MAXPQSIZETXSEL_51_RT_OFFSET	29792
+#define QM_REG_MAXPQSIZETXSEL_52_RT_OFFSET	29793
+#define QM_REG_MAXPQSIZETXSEL_53_RT_OFFSET	29794
+#define QM_REG_MAXPQSIZETXSEL_54_RT_OFFSET	29795
+#define QM_REG_MAXPQSIZETXSEL_55_RT_OFFSET	29796
+#define QM_REG_MAXPQSIZETXSEL_56_RT_OFFSET	29797
+#define QM_REG_MAXPQSIZETXSEL_57_RT_OFFSET	29798
+#define QM_REG_MAXPQSIZETXSEL_58_RT_OFFSET	29799
+#define QM_REG_MAXPQSIZETXSEL_59_RT_OFFSET	29800
+#define QM_REG_MAXPQSIZETXSEL_60_RT_OFFSET	29801
+#define QM_REG_MAXPQSIZETXSEL_61_RT_OFFSET	29802
+#define QM_REG_MAXPQSIZETXSEL_62_RT_OFFSET	29803
+#define QM_REG_MAXPQSIZETXSEL_63_RT_OFFSET	29804
+#define QM_REG_BASEADDROTHERPQ_RT_OFFSET	29805
 #define QM_REG_BASEADDROTHERPQ_RT_SIZE	128
-#define QM_REG_VOQCRDLINE_RT_OFFSET	29839
-#define QM_REG_VOQCRDLINE_RT_SIZE	20
-#define QM_REG_VOQINITCRDLINE_RT_OFFSET	29859
-#define QM_REG_VOQINITCRDLINE_RT_SIZE	20
-#define QM_REG_AFULLQMBYPTHRPFWFQ_RT_OFFSET	29879
-#define QM_REG_AFULLQMBYPTHRVPWFQ_RT_OFFSET	29880
-#define QM_REG_AFULLQMBYPTHRPFRL_RT_OFFSET	29881
-#define QM_REG_AFULLQMBYPTHRGLBLRL_RT_OFFSET	29882
-#define QM_REG_AFULLOPRTNSTCCRDMASK_RT_OFFSET	29883
-#define QM_REG_WRROTHERPQGRP_0_RT_OFFSET	29884
-#define QM_REG_WRROTHERPQGRP_1_RT_OFFSET	29885
-#define QM_REG_WRROTHERPQGRP_2_RT_OFFSET	29886
-#define QM_REG_WRROTHERPQGRP_3_RT_OFFSET	29887
-#define QM_REG_WRROTHERPQGRP_4_RT_OFFSET	29888
-#define QM_REG_WRROTHERPQGRP_5_RT_OFFSET	29889
-#define QM_REG_WRROTHERPQGRP_6_RT_OFFSET	29890
-#define QM_REG_WRROTHERPQGRP_7_RT_OFFSET	29891
-#define QM_REG_WRROTHERPQGRP_8_RT_OFFSET	29892
-#define QM_REG_WRROTHERPQGRP_9_RT_OFFSET	29893
-#define QM_REG_WRROTHERPQGRP_10_RT_OFFSET	29894
-#define QM_REG_WRROTHERPQGRP_11_RT_OFFSET	29895
-#define QM_REG_WRROTHERPQGRP_12_RT_OFFSET	29896
-#define QM_REG_WRROTHERPQGRP_13_RT_OFFSET	29897
-#define QM_REG_WRROTHERPQGRP_14_RT_OFFSET	29898
-#define QM_REG_WRROTHERPQGRP_15_RT_OFFSET	29899
-#define QM_REG_WRROTHERGRPWEIGHT_0_RT_OFFSET	29900
-#define QM_REG_WRROTHERGRPWEIGHT_1_RT_OFFSET	29901
-#define QM_REG_WRROTHERGRPWEIGHT_2_RT_OFFSET	29902
-#define QM_REG_WRROTHERGRPWEIGHT_3_RT_OFFSET	29903
-#define QM_REG_WRRTXGRPWEIGHT_0_RT_OFFSET	29904
-#define QM_REG_WRRTXGRPWEIGHT_1_RT_OFFSET	29905
-#define QM_REG_PQTX2PF_0_RT_OFFSET	29906
-#define QM_REG_PQTX2PF_1_RT_OFFSET	29907
-#define QM_REG_PQTX2PF_2_RT_OFFSET	29908
-#define QM_REG_PQTX2PF_3_RT_OFFSET	29909
-#define QM_REG_PQTX2PF_4_RT_OFFSET	29910
-#define QM_REG_PQTX2PF_5_RT_OFFSET	29911
-#define QM_REG_PQTX2PF_6_RT_OFFSET	29912
-#define QM_REG_PQTX2PF_7_RT_OFFSET	29913
-#define QM_REG_PQTX2PF_8_RT_OFFSET	29914
-#define QM_REG_PQTX2PF_9_RT_OFFSET	29915
-#define QM_REG_PQTX2PF_10_RT_OFFSET	29916
-#define QM_REG_PQTX2PF_11_RT_OFFSET	29917
-#define QM_REG_PQTX2PF_12_RT_OFFSET	29918
-#define QM_REG_PQTX2PF_13_RT_OFFSET	29919
-#define QM_REG_PQTX2PF_14_RT_OFFSET	29920
-#define QM_REG_PQTX2PF_15_RT_OFFSET	29921
-#define QM_REG_PQTX2PF_16_RT_OFFSET	29922
-#define QM_REG_PQTX2PF_17_RT_OFFSET	29923
-#define QM_REG_PQTX2PF_18_RT_OFFSET	29924
-#define QM_REG_PQTX2PF_19_RT_OFFSET	29925
-#define QM_REG_PQTX2PF_20_RT_OFFSET	29926
-#define QM_REG_PQTX2PF_21_RT_OFFSET	29927
-#define QM_REG_PQTX2PF_22_RT_OFFSET	29928
-#define QM_REG_PQTX2PF_23_RT_OFFSET	29929
-#define QM_REG_PQTX2PF_24_RT_OFFSET	29930
-#define QM_REG_PQTX2PF_25_RT_OFFSET	29931
-#define QM_REG_PQTX2PF_26_RT_OFFSET	29932
-#define QM_REG_PQTX2PF_27_RT_OFFSET	29933
-#define QM_REG_PQTX2PF_28_RT_OFFSET	29934
-#define QM_REG_PQTX2PF_29_RT_OFFSET	29935
-#define QM_REG_PQTX2PF_30_RT_OFFSET	29936
-#define QM_REG_PQTX2PF_31_RT_OFFSET	29937
-#define QM_REG_PQTX2PF_32_RT_OFFSET	29938
-#define QM_REG_PQTX2PF_33_RT_OFFSET	29939
-#define QM_REG_PQTX2PF_34_RT_OFFSET	29940
-#define QM_REG_PQTX2PF_35_RT_OFFSET	29941
-#define QM_REG_PQTX2PF_36_RT_OFFSET	29942
-#define QM_REG_PQTX2PF_37_RT_OFFSET	29943
-#define QM_REG_PQTX2PF_38_RT_OFFSET	29944
-#define QM_REG_PQTX2PF_39_RT_OFFSET	29945
-#define QM_REG_PQTX2PF_40_RT_OFFSET	29946
-#define QM_REG_PQTX2PF_41_RT_OFFSET	29947
-#define QM_REG_PQTX2PF_42_RT_OFFSET	29948
-#define QM_REG_PQTX2PF_43_RT_OFFSET	29949
-#define QM_REG_PQTX2PF_44_RT_OFFSET	29950
-#define QM_REG_PQTX2PF_45_RT_OFFSET	29951
-#define QM_REG_PQTX2PF_46_RT_OFFSET	29952
-#define QM_REG_PQTX2PF_47_RT_OFFSET	29953
-#define QM_REG_PQTX2PF_48_RT_OFFSET	29954
-#define QM_REG_PQTX2PF_49_RT_OFFSET	29955
-#define QM_REG_PQTX2PF_50_RT_OFFSET	29956
-#define QM_REG_PQTX2PF_51_RT_OFFSET	29957
-#define QM_REG_PQTX2PF_52_RT_OFFSET	29958
-#define QM_REG_PQTX2PF_53_RT_OFFSET	29959
-#define QM_REG_PQTX2PF_54_RT_OFFSET	29960
-#define QM_REG_PQTX2PF_55_RT_OFFSET	29961
-#define QM_REG_PQTX2PF_56_RT_OFFSET	29962
-#define QM_REG_PQTX2PF_57_RT_OFFSET	29963
-#define QM_REG_PQTX2PF_58_RT_OFFSET	29964
-#define QM_REG_PQTX2PF_59_RT_OFFSET	29965
-#define QM_REG_PQTX2PF_60_RT_OFFSET	29966
-#define QM_REG_PQTX2PF_61_RT_OFFSET	29967
-#define QM_REG_PQTX2PF_62_RT_OFFSET	29968
-#define QM_REG_PQTX2PF_63_RT_OFFSET	29969
-#define QM_REG_PQOTHER2PF_0_RT_OFFSET	29970
-#define QM_REG_PQOTHER2PF_1_RT_OFFSET	29971
-#define QM_REG_PQOTHER2PF_2_RT_OFFSET	29972
-#define QM_REG_PQOTHER2PF_3_RT_OFFSET	29973
-#define QM_REG_PQOTHER2PF_4_RT_OFFSET	29974
-#define QM_REG_PQOTHER2PF_5_RT_OFFSET	29975
-#define QM_REG_PQOTHER2PF_6_RT_OFFSET	29976
-#define QM_REG_PQOTHER2PF_7_RT_OFFSET	29977
-#define QM_REG_PQOTHER2PF_8_RT_OFFSET	29978
-#define QM_REG_PQOTHER2PF_9_RT_OFFSET	29979
-#define QM_REG_PQOTHER2PF_10_RT_OFFSET	29980
-#define QM_REG_PQOTHER2PF_11_RT_OFFSET	29981
-#define QM_REG_PQOTHER2PF_12_RT_OFFSET	29982
-#define QM_REG_PQOTHER2PF_13_RT_OFFSET	29983
-#define QM_REG_PQOTHER2PF_14_RT_OFFSET	29984
-#define QM_REG_PQOTHER2PF_15_RT_OFFSET	29985
-#define QM_REG_RLGLBLPERIOD_0_RT_OFFSET	29986
-#define QM_REG_RLGLBLPERIOD_1_RT_OFFSET	29987
-#define QM_REG_RLGLBLPERIODTIMER_0_RT_OFFSET	29988
-#define QM_REG_RLGLBLPERIODTIMER_1_RT_OFFSET	29989
-#define QM_REG_RLGLBLPERIODSEL_0_RT_OFFSET	29990
-#define QM_REG_RLGLBLPERIODSEL_1_RT_OFFSET	29991
-#define QM_REG_RLGLBLPERIODSEL_2_RT_OFFSET	29992
-#define QM_REG_RLGLBLPERIODSEL_3_RT_OFFSET	29993
-#define QM_REG_RLGLBLPERIODSEL_4_RT_OFFSET	29994
-#define QM_REG_RLGLBLPERIODSEL_5_RT_OFFSET	29995
-#define QM_REG_RLGLBLPERIODSEL_6_RT_OFFSET	29996
-#define QM_REG_RLGLBLPERIODSEL_7_RT_OFFSET	29997
-#define QM_REG_RLGLBLINCVAL_RT_OFFSET	29998
+#define QM_REG_AFULLQMBYPTHRPFWFQ_RT_OFFSET	29933
+#define QM_REG_AFULLQMBYPTHRVPWFQ_RT_OFFSET	29934
+#define QM_REG_AFULLQMBYPTHRPFRL_RT_OFFSET	29935
+#define QM_REG_AFULLQMBYPTHRGLBLRL_RT_OFFSET	29936
+#define QM_REG_AFULLOPRTNSTCCRDMASK_RT_OFFSET	29937
+#define QM_REG_WRROTHERPQGRP_0_RT_OFFSET	29938
+#define QM_REG_WRROTHERPQGRP_1_RT_OFFSET	29939
+#define QM_REG_WRROTHERPQGRP_2_RT_OFFSET	29940
+#define QM_REG_WRROTHERPQGRP_3_RT_OFFSET	29941
+#define QM_REG_WRROTHERPQGRP_4_RT_OFFSET	29942
+#define QM_REG_WRROTHERPQGRP_5_RT_OFFSET	29943
+#define QM_REG_WRROTHERPQGRP_6_RT_OFFSET	29944
+#define QM_REG_WRROTHERPQGRP_7_RT_OFFSET	29945
+#define QM_REG_WRROTHERPQGRP_8_RT_OFFSET	29946
+#define QM_REG_WRROTHERPQGRP_9_RT_OFFSET	29947
+#define QM_REG_WRROTHERPQGRP_10_RT_OFFSET	29948
+#define QM_REG_WRROTHERPQGRP_11_RT_OFFSET	29949
+#define QM_REG_WRROTHERPQGRP_12_RT_OFFSET	29950
+#define QM_REG_WRROTHERPQGRP_13_RT_OFFSET	29951
+#define QM_REG_WRROTHERPQGRP_14_RT_OFFSET	29952
+#define QM_REG_WRROTHERPQGRP_15_RT_OFFSET	29953
+#define QM_REG_WRROTHERGRPWEIGHT_0_RT_OFFSET	29954
+#define QM_REG_WRROTHERGRPWEIGHT_1_RT_OFFSET	29955
+#define QM_REG_WRROTHERGRPWEIGHT_2_RT_OFFSET	29956
+#define QM_REG_WRROTHERGRPWEIGHT_3_RT_OFFSET	29957
+#define QM_REG_WRRTXGRPWEIGHT_0_RT_OFFSET	29958
+#define QM_REG_WRRTXGRPWEIGHT_1_RT_OFFSET	29959
+#define QM_REG_PQTX2PF_0_RT_OFFSET	29960
+#define QM_REG_PQTX2PF_1_RT_OFFSET	29961
+#define QM_REG_PQTX2PF_2_RT_OFFSET	29962
+#define QM_REG_PQTX2PF_3_RT_OFFSET	29963
+#define QM_REG_PQTX2PF_4_RT_OFFSET	29964
+#define QM_REG_PQTX2PF_5_RT_OFFSET	29965
+#define QM_REG_PQTX2PF_6_RT_OFFSET	29966
+#define QM_REG_PQTX2PF_7_RT_OFFSET	29967
+#define QM_REG_PQTX2PF_8_RT_OFFSET	29968
+#define QM_REG_PQTX2PF_9_RT_OFFSET	29969
+#define QM_REG_PQTX2PF_10_RT_OFFSET	29970
+#define QM_REG_PQTX2PF_11_RT_OFFSET	29971
+#define QM_REG_PQTX2PF_12_RT_OFFSET	29972
+#define QM_REG_PQTX2PF_13_RT_OFFSET	29973
+#define QM_REG_PQTX2PF_14_RT_OFFSET	29974
+#define QM_REG_PQTX2PF_15_RT_OFFSET	29975
+#define QM_REG_PQTX2PF_16_RT_OFFSET	29976
+#define QM_REG_PQTX2PF_17_RT_OFFSET	29977
+#define QM_REG_PQTX2PF_18_RT_OFFSET	29978
+#define QM_REG_PQTX2PF_19_RT_OFFSET	29979
+#define QM_REG_PQTX2PF_20_RT_OFFSET	29980
+#define QM_REG_PQTX2PF_21_RT_OFFSET	29981
+#define QM_REG_PQTX2PF_22_RT_OFFSET	29982
+#define QM_REG_PQTX2PF_23_RT_OFFSET	29983
+#define QM_REG_PQTX2PF_24_RT_OFFSET	29984
+#define QM_REG_PQTX2PF_25_RT_OFFSET	29985
+#define QM_REG_PQTX2PF_26_RT_OFFSET	29986
+#define QM_REG_PQTX2PF_27_RT_OFFSET	29987
+#define QM_REG_PQTX2PF_28_RT_OFFSET	29988
+#define QM_REG_PQTX2PF_29_RT_OFFSET	29989
+#define QM_REG_PQTX2PF_30_RT_OFFSET	29990
+#define QM_REG_PQTX2PF_31_RT_OFFSET	29991
+#define QM_REG_PQTX2PF_32_RT_OFFSET	29992
+#define QM_REG_PQTX2PF_33_RT_OFFSET	29993
+#define QM_REG_PQTX2PF_34_RT_OFFSET	29994
+#define QM_REG_PQTX2PF_35_RT_OFFSET	29995
+#define QM_REG_PQTX2PF_36_RT_OFFSET	29996
+#define QM_REG_PQTX2PF_37_RT_OFFSET	29997
+#define QM_REG_PQTX2PF_38_RT_OFFSET	29998
+#define QM_REG_PQTX2PF_39_RT_OFFSET	29999
+#define QM_REG_PQTX2PF_40_RT_OFFSET	30000
+#define QM_REG_PQTX2PF_41_RT_OFFSET	30001
+#define QM_REG_PQTX2PF_42_RT_OFFSET	30002
+#define QM_REG_PQTX2PF_43_RT_OFFSET	30003
+#define QM_REG_PQTX2PF_44_RT_OFFSET	30004
+#define QM_REG_PQTX2PF_45_RT_OFFSET	30005
+#define QM_REG_PQTX2PF_46_RT_OFFSET	30006
+#define QM_REG_PQTX2PF_47_RT_OFFSET	30007
+#define QM_REG_PQTX2PF_48_RT_OFFSET	30008
+#define QM_REG_PQTX2PF_49_RT_OFFSET	30009
+#define QM_REG_PQTX2PF_50_RT_OFFSET	30010
+#define QM_REG_PQTX2PF_51_RT_OFFSET	30011
+#define QM_REG_PQTX2PF_52_RT_OFFSET	30012
+#define QM_REG_PQTX2PF_53_RT_OFFSET	30013
+#define QM_REG_PQTX2PF_54_RT_OFFSET	30014
+#define QM_REG_PQTX2PF_55_RT_OFFSET	30015
+#define QM_REG_PQTX2PF_56_RT_OFFSET	30016
+#define QM_REG_PQTX2PF_57_RT_OFFSET	30017
+#define QM_REG_PQTX2PF_58_RT_OFFSET	30018
+#define QM_REG_PQTX2PF_59_RT_OFFSET	30019
+#define QM_REG_PQTX2PF_60_RT_OFFSET	30020
+#define QM_REG_PQTX2PF_61_RT_OFFSET	30021
+#define QM_REG_PQTX2PF_62_RT_OFFSET	30022
+#define QM_REG_PQTX2PF_63_RT_OFFSET	30023
+#define QM_REG_PQOTHER2PF_0_RT_OFFSET	30024
+#define QM_REG_PQOTHER2PF_1_RT_OFFSET	30025
+#define QM_REG_PQOTHER2PF_2_RT_OFFSET	30026
+#define QM_REG_PQOTHER2PF_3_RT_OFFSET	30027
+#define QM_REG_PQOTHER2PF_4_RT_OFFSET	30028
+#define QM_REG_PQOTHER2PF_5_RT_OFFSET	30029
+#define QM_REG_PQOTHER2PF_6_RT_OFFSET	30030
+#define QM_REG_PQOTHER2PF_7_RT_OFFSET	30031
+#define QM_REG_PQOTHER2PF_8_RT_OFFSET	30032
+#define QM_REG_PQOTHER2PF_9_RT_OFFSET	30033
+#define QM_REG_PQOTHER2PF_10_RT_OFFSET	30034
+#define QM_REG_PQOTHER2PF_11_RT_OFFSET	30035
+#define QM_REG_PQOTHER2PF_12_RT_OFFSET	30036
+#define QM_REG_PQOTHER2PF_13_RT_OFFSET	30037
+#define QM_REG_PQOTHER2PF_14_RT_OFFSET	30038
+#define QM_REG_PQOTHER2PF_15_RT_OFFSET	30039
+#define QM_REG_RLGLBLPERIOD_0_RT_OFFSET	30040
+#define QM_REG_RLGLBLPERIOD_1_RT_OFFSET	30041
+#define QM_REG_RLGLBLPERIODTIMER_0_RT_OFFSET	30042
+#define QM_REG_RLGLBLPERIODTIMER_1_RT_OFFSET	30043
+#define QM_REG_RLGLBLPERIODSEL_0_RT_OFFSET	30044
+#define QM_REG_RLGLBLPERIODSEL_1_RT_OFFSET	30045
+#define QM_REG_RLGLBLPERIODSEL_2_RT_OFFSET	30046
+#define QM_REG_RLGLBLPERIODSEL_3_RT_OFFSET	30047
+#define QM_REG_RLGLBLPERIODSEL_4_RT_OFFSET	30048
+#define QM_REG_RLGLBLPERIODSEL_5_RT_OFFSET	30049
+#define QM_REG_RLGLBLPERIODSEL_6_RT_OFFSET	30050
+#define QM_REG_RLGLBLPERIODSEL_7_RT_OFFSET	30051
+#define QM_REG_RLGLBLINCVAL_RT_OFFSET	30052
 #define QM_REG_RLGLBLINCVAL_RT_SIZE	256
-#define QM_REG_RLGLBLUPPERBOUND_RT_OFFSET	30254
+#define QM_REG_RLGLBLUPPERBOUND_RT_OFFSET	30308
 #define QM_REG_RLGLBLUPPERBOUND_RT_SIZE	256
-#define QM_REG_RLGLBLCRD_RT_OFFSET	30510
+#define QM_REG_RLGLBLCRD_RT_OFFSET	30564
 #define QM_REG_RLGLBLCRD_RT_SIZE	256
-#define QM_REG_RLGLBLENABLE_RT_OFFSET	30766
-#define QM_REG_RLPFPERIOD_RT_OFFSET	30767
-#define QM_REG_RLPFPERIODTIMER_RT_OFFSET	30768
-#define QM_REG_RLPFINCVAL_RT_OFFSET	30769
+#define QM_REG_RLGLBLENABLE_RT_OFFSET	30820
+#define QM_REG_RLPFPERIOD_RT_OFFSET	30821
+#define QM_REG_RLPFPERIODTIMER_RT_OFFSET	30822
+#define QM_REG_RLPFINCVAL_RT_OFFSET	30823
 #define QM_REG_RLPFINCVAL_RT_SIZE	16
-#define QM_REG_RLPFUPPERBOUND_RT_OFFSET	30785
+#define QM_REG_RLPFUPPERBOUND_RT_OFFSET	30839
 #define QM_REG_RLPFUPPERBOUND_RT_SIZE	16
-#define QM_REG_RLPFCRD_RT_OFFSET	30801
+#define QM_REG_RLPFCRD_RT_OFFSET	30855
 #define QM_REG_RLPFCRD_RT_SIZE	16
-#define QM_REG_RLPFENABLE_RT_OFFSET	30817
-#define QM_REG_RLPFVOQENABLE_RT_OFFSET	30818
-#define QM_REG_WFQPFWEIGHT_RT_OFFSET	30819
+#define QM_REG_RLPFENABLE_RT_OFFSET	30871
+#define QM_REG_RLPFVOQENABLE_RT_OFFSET	30872
+#define QM_REG_WFQPFWEIGHT_RT_OFFSET	30873
 #define QM_REG_WFQPFWEIGHT_RT_SIZE	16
-#define QM_REG_WFQPFUPPERBOUND_RT_OFFSET	30835
+#define QM_REG_WFQPFUPPERBOUND_RT_OFFSET	30889
 #define QM_REG_WFQPFUPPERBOUND_RT_SIZE	16
-#define QM_REG_WFQPFCRD_RT_OFFSET	30851
-#define QM_REG_WFQPFCRD_RT_SIZE	160
-#define QM_REG_WFQPFENABLE_RT_OFFSET	31011
-#define QM_REG_WFQVPENABLE_RT_OFFSET	31012
-#define QM_REG_BASEADDRTXPQ_RT_OFFSET	31013
+#define QM_REG_WFQPFCRD_RT_OFFSET	30905
+#define QM_REG_WFQPFCRD_RT_SIZE	256
+#define QM_REG_WFQPFENABLE_RT_OFFSET	31161
+#define QM_REG_WFQVPENABLE_RT_OFFSET	31162
+#define QM_REG_BASEADDRTXPQ_RT_OFFSET	31163
 #define QM_REG_BASEADDRTXPQ_RT_SIZE	512
-#define QM_REG_TXPQMAP_RT_OFFSET	31525
+#define QM_REG_TXPQMAP_RT_OFFSET	31675
 #define QM_REG_TXPQMAP_RT_SIZE	512
-#define QM_REG_WFQVPWEIGHT_RT_OFFSET	32037
+#define QM_REG_WFQVPWEIGHT_RT_OFFSET	32187
 #define QM_REG_WFQVPWEIGHT_RT_SIZE	512
-#define QM_REG_WFQVPCRD_RT_OFFSET	32549
+#define QM_REG_WFQVPCRD_RT_OFFSET	32699
 #define QM_REG_WFQVPCRD_RT_SIZE	512
-#define QM_REG_WFQVPMAP_RT_OFFSET	33061
+#define QM_REG_WFQVPMAP_RT_OFFSET	33211
 #define QM_REG_WFQVPMAP_RT_SIZE	512
-#define QM_REG_WFQPFCRD_MSB_RT_OFFSET	33573
-#define QM_REG_WFQPFCRD_MSB_RT_SIZE	160
-#define NIG_REG_TAG_ETHERTYPE_0_RT_OFFSET	33733
-#define NIG_REG_OUTER_TAG_VALUE_LIST0_RT_OFFSET	33734
-#define NIG_REG_OUTER_TAG_VALUE_LIST1_RT_OFFSET	33735
-#define NIG_REG_OUTER_TAG_VALUE_LIST2_RT_OFFSET	33736
-#define NIG_REG_OUTER_TAG_VALUE_LIST3_RT_OFFSET	33737
-#define NIG_REG_OUTER_TAG_VALUE_MASK_RT_OFFSET	33738
-#define NIG_REG_LLH_FUNC_TAGMAC_CLS_TYPE_RT_OFFSET	33739
-#define NIG_REG_LLH_FUNC_TAG_EN_RT_OFFSET	33740
+#define QM_REG_WFQPFCRD_MSB_RT_OFFSET	33723
+#define QM_REG_WFQPFCRD_MSB_RT_SIZE	320
+#define QM_REG_VOQCRDLINE_RT_OFFSET	34043
+#define QM_REG_VOQCRDLINE_RT_SIZE	36
+#define QM_REG_VOQINITCRDLINE_RT_OFFSET	34079
+#define QM_REG_VOQINITCRDLINE_RT_SIZE	36
+#define NIG_REG_TAG_ETHERTYPE_0_RT_OFFSET	34115
+#define NIG_REG_OUTER_TAG_VALUE_LIST0_RT_OFFSET	34116
+#define NIG_REG_OUTER_TAG_VALUE_LIST1_RT_OFFSET	34117
+#define NIG_REG_OUTER_TAG_VALUE_LIST2_RT_OFFSET	34118
+#define NIG_REG_OUTER_TAG_VALUE_LIST3_RT_OFFSET	34119
+#define NIG_REG_OUTER_TAG_VALUE_MASK_RT_OFFSET	34120
+#define NIG_REG_LLH_FUNC_TAGMAC_CLS_TYPE_RT_OFFSET	34121
+#define NIG_REG_LLH_FUNC_TAG_EN_RT_OFFSET	34122
 #define NIG_REG_LLH_FUNC_TAG_EN_RT_SIZE	4
-#define NIG_REG_LLH_FUNC_TAG_HDR_SEL_RT_OFFSET	33744
+#define NIG_REG_LLH_FUNC_TAG_HDR_SEL_RT_OFFSET	34126
 #define NIG_REG_LLH_FUNC_TAG_HDR_SEL_RT_SIZE	4
-#define NIG_REG_LLH_FUNC_TAG_VALUE_RT_OFFSET	33748
+#define NIG_REG_LLH_FUNC_TAG_VALUE_RT_OFFSET	34130
 #define NIG_REG_LLH_FUNC_TAG_VALUE_RT_SIZE	4
-#define NIG_REG_LLH_FUNC_NO_TAG_RT_OFFSET	33752
-#define NIG_REG_LLH_FUNC_FILTER_VALUE_RT_OFFSET	33753
+#define NIG_REG_LLH_FUNC_NO_TAG_RT_OFFSET	34134
+#define NIG_REG_LLH_FUNC_FILTER_VALUE_RT_OFFSET	34135
 #define NIG_REG_LLH_FUNC_FILTER_VALUE_RT_SIZE	32
-#define NIG_REG_LLH_FUNC_FILTER_EN_RT_OFFSET	33785
+#define NIG_REG_LLH_FUNC_FILTER_EN_RT_OFFSET	34167
 #define NIG_REG_LLH_FUNC_FILTER_EN_RT_SIZE	16
-#define NIG_REG_LLH_FUNC_FILTER_MODE_RT_OFFSET	33801
+#define NIG_REG_LLH_FUNC_FILTER_MODE_RT_OFFSET	34183
 #define NIG_REG_LLH_FUNC_FILTER_MODE_RT_SIZE	16
-#define NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE_RT_OFFSET	33817
+#define NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE_RT_OFFSET	34199
 #define NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE_RT_SIZE	16
-#define NIG_REG_LLH_FUNC_FILTER_HDR_SEL_RT_OFFSET	33833
+#define NIG_REG_LLH_FUNC_FILTER_HDR_SEL_RT_OFFSET	34215
 #define NIG_REG_LLH_FUNC_FILTER_HDR_SEL_RT_SIZE	16
-#define NIG_REG_TX_EDPM_CTRL_RT_OFFSET	33849
-#define NIG_REG_ROCE_DUPLICATE_TO_HOST_RT_OFFSET	33850
-#define CDU_REG_CID_ADDR_PARAMS_RT_OFFSET	33851
-#define CDU_REG_SEGMENT0_PARAMS_RT_OFFSET	33852
-#define CDU_REG_SEGMENT1_PARAMS_RT_OFFSET	33853
-#define CDU_REG_PF_SEG0_TYPE_OFFSET_RT_OFFSET	33854
-#define CDU_REG_PF_SEG1_TYPE_OFFSET_RT_OFFSET	33855
-#define CDU_REG_PF_SEG2_TYPE_OFFSET_RT_OFFSET	33856
-#define CDU_REG_PF_SEG3_TYPE_OFFSET_RT_OFFSET	33857
-#define CDU_REG_PF_FL_SEG0_TYPE_OFFSET_RT_OFFSET	33858
-#define CDU_REG_PF_FL_SEG1_TYPE_OFFSET_RT_OFFSET	33859
-#define CDU_REG_PF_FL_SEG2_TYPE_OFFSET_RT_OFFSET	33860
-#define CDU_REG_PF_FL_SEG3_TYPE_OFFSET_RT_OFFSET	33861
-#define CDU_REG_VF_SEG_TYPE_OFFSET_RT_OFFSET	33862
-#define CDU_REG_VF_FL_SEG_TYPE_OFFSET_RT_OFFSET	33863
-#define PBF_REG_TAG_ETHERTYPE_0_RT_OFFSET	33864
-#define PBF_REG_BTB_SHARED_AREA_SIZE_RT_OFFSET	33865
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ0_RT_OFFSET	33866
-#define PBF_REG_BTB_GUARANTEED_VOQ0_RT_OFFSET	33867
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ0_RT_OFFSET	33868
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ1_RT_OFFSET	33869
-#define PBF_REG_BTB_GUARANTEED_VOQ1_RT_OFFSET	33870
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ1_RT_OFFSET	33871
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ2_RT_OFFSET	33872
-#define PBF_REG_BTB_GUARANTEED_VOQ2_RT_OFFSET	33873
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ2_RT_OFFSET	33874
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ3_RT_OFFSET	33875
-#define PBF_REG_BTB_GUARANTEED_VOQ3_RT_OFFSET	33876
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ3_RT_OFFSET	33877
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ4_RT_OFFSET	33878
-#define PBF_REG_BTB_GUARANTEED_VOQ4_RT_OFFSET	33879
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ4_RT_OFFSET	33880
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ5_RT_OFFSET	33881
-#define PBF_REG_BTB_GUARANTEED_VOQ5_RT_OFFSET	33882
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ5_RT_OFFSET	33883
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ6_RT_OFFSET	33884
-#define PBF_REG_BTB_GUARANTEED_VOQ6_RT_OFFSET	33885
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ6_RT_OFFSET	33886
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ7_RT_OFFSET	33887
-#define PBF_REG_BTB_GUARANTEED_VOQ7_RT_OFFSET	33888
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ7_RT_OFFSET	33889
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ8_RT_OFFSET	33890
-#define PBF_REG_BTB_GUARANTEED_VOQ8_RT_OFFSET	33891
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ8_RT_OFFSET	33892
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ9_RT_OFFSET	33893
-#define PBF_REG_BTB_GUARANTEED_VOQ9_RT_OFFSET	33894
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ9_RT_OFFSET	33895
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ10_RT_OFFSET	33896
-#define PBF_REG_BTB_GUARANTEED_VOQ10_RT_OFFSET	33897
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ10_RT_OFFSET	33898
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ11_RT_OFFSET	33899
-#define PBF_REG_BTB_GUARANTEED_VOQ11_RT_OFFSET	33900
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ11_RT_OFFSET	33901
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ12_RT_OFFSET	33902
-#define PBF_REG_BTB_GUARANTEED_VOQ12_RT_OFFSET	33903
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ12_RT_OFFSET	33904
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ13_RT_OFFSET	33905
-#define PBF_REG_BTB_GUARANTEED_VOQ13_RT_OFFSET	33906
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ13_RT_OFFSET	33907
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ14_RT_OFFSET	33908
-#define PBF_REG_BTB_GUARANTEED_VOQ14_RT_OFFSET	33909
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ14_RT_OFFSET	33910
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ15_RT_OFFSET	33911
-#define PBF_REG_BTB_GUARANTEED_VOQ15_RT_OFFSET	33912
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ15_RT_OFFSET	33913
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ16_RT_OFFSET	33914
-#define PBF_REG_BTB_GUARANTEED_VOQ16_RT_OFFSET	33915
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ16_RT_OFFSET	33916
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ17_RT_OFFSET	33917
-#define PBF_REG_BTB_GUARANTEED_VOQ17_RT_OFFSET	33918
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ17_RT_OFFSET	33919
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ18_RT_OFFSET	33920
-#define PBF_REG_BTB_GUARANTEED_VOQ18_RT_OFFSET	33921
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ18_RT_OFFSET	33922
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ19_RT_OFFSET	33923
-#define PBF_REG_BTB_GUARANTEED_VOQ19_RT_OFFSET	33924
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ19_RT_OFFSET	33925
-#define XCM_REG_CON_PHY_Q3_RT_OFFSET	33926
-
-#define RUNTIME_ARRAY_SIZE 33927
+#define NIG_REG_TX_EDPM_CTRL_RT_OFFSET	34231
+#define NIG_REG_ROCE_DUPLICATE_TO_HOST_RT_OFFSET	34232
+#define CDU_REG_CID_ADDR_PARAMS_RT_OFFSET	34233
+#define CDU_REG_SEGMENT0_PARAMS_RT_OFFSET	34234
+#define CDU_REG_SEGMENT1_PARAMS_RT_OFFSET	34235
+#define CDU_REG_PF_SEG0_TYPE_OFFSET_RT_OFFSET	34236
+#define CDU_REG_PF_SEG1_TYPE_OFFSET_RT_OFFSET	34237
+#define CDU_REG_PF_SEG2_TYPE_OFFSET_RT_OFFSET	34238
+#define CDU_REG_PF_SEG3_TYPE_OFFSET_RT_OFFSET	34239
+#define CDU_REG_PF_FL_SEG0_TYPE_OFFSET_RT_OFFSET	34240
+#define CDU_REG_PF_FL_SEG1_TYPE_OFFSET_RT_OFFSET	34241
+#define CDU_REG_PF_FL_SEG2_TYPE_OFFSET_RT_OFFSET	34242
+#define CDU_REG_PF_FL_SEG3_TYPE_OFFSET_RT_OFFSET	34243
+#define CDU_REG_VF_SEG_TYPE_OFFSET_RT_OFFSET	34244
+#define CDU_REG_VF_FL_SEG_TYPE_OFFSET_RT_OFFSET	34245
+#define PBF_REG_TAG_ETHERTYPE_0_RT_OFFSET	34246
+#define PBF_REG_BTB_SHARED_AREA_SIZE_RT_OFFSET	34247
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ0_RT_OFFSET	34248
+#define PBF_REG_BTB_GUARANTEED_VOQ0_RT_OFFSET	34249
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ0_RT_OFFSET	34250
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ1_RT_OFFSET	34251
+#define PBF_REG_BTB_GUARANTEED_VOQ1_RT_OFFSET	34252
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ1_RT_OFFSET	34253
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ2_RT_OFFSET	34254
+#define PBF_REG_BTB_GUARANTEED_VOQ2_RT_OFFSET	34255
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ2_RT_OFFSET	34256
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ3_RT_OFFSET	34257
+#define PBF_REG_BTB_GUARANTEED_VOQ3_RT_OFFSET	34258
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ3_RT_OFFSET	34259
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ4_RT_OFFSET	34260
+#define PBF_REG_BTB_GUARANTEED_VOQ4_RT_OFFSET	34261
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ4_RT_OFFSET	34262
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ5_RT_OFFSET	34263
+#define PBF_REG_BTB_GUARANTEED_VOQ5_RT_OFFSET	34264
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ5_RT_OFFSET	34265
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ6_RT_OFFSET	34266
+#define PBF_REG_BTB_GUARANTEED_VOQ6_RT_OFFSET	34267
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ6_RT_OFFSET	34268
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ7_RT_OFFSET	34269
+#define PBF_REG_BTB_GUARANTEED_VOQ7_RT_OFFSET	34270
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ7_RT_OFFSET	34271
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ8_RT_OFFSET	34272
+#define PBF_REG_BTB_GUARANTEED_VOQ8_RT_OFFSET	34273
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ8_RT_OFFSET	34274
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ9_RT_OFFSET	34275
+#define PBF_REG_BTB_GUARANTEED_VOQ9_RT_OFFSET	34276
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ9_RT_OFFSET	34277
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ10_RT_OFFSET	34278
+#define PBF_REG_BTB_GUARANTEED_VOQ10_RT_OFFSET	34279
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ10_RT_OFFSET	34280
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ11_RT_OFFSET	34281
+#define PBF_REG_BTB_GUARANTEED_VOQ11_RT_OFFSET	34282
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ11_RT_OFFSET	34283
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ12_RT_OFFSET	34284
+#define PBF_REG_BTB_GUARANTEED_VOQ12_RT_OFFSET	34285
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ12_RT_OFFSET	34286
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ13_RT_OFFSET	34287
+#define PBF_REG_BTB_GUARANTEED_VOQ13_RT_OFFSET	34288
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ13_RT_OFFSET	34289
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ14_RT_OFFSET	34290
+#define PBF_REG_BTB_GUARANTEED_VOQ14_RT_OFFSET	34291
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ14_RT_OFFSET	34292
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ15_RT_OFFSET	34293
+#define PBF_REG_BTB_GUARANTEED_VOQ15_RT_OFFSET	34294
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ15_RT_OFFSET	34295
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ16_RT_OFFSET	34296
+#define PBF_REG_BTB_GUARANTEED_VOQ16_RT_OFFSET	34297
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ16_RT_OFFSET	34298
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ17_RT_OFFSET	34299
+#define PBF_REG_BTB_GUARANTEED_VOQ17_RT_OFFSET	34300
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ17_RT_OFFSET	34301
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ18_RT_OFFSET	34302
+#define PBF_REG_BTB_GUARANTEED_VOQ18_RT_OFFSET	34303
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ18_RT_OFFSET	34304
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ19_RT_OFFSET	34305
+#define PBF_REG_BTB_GUARANTEED_VOQ19_RT_OFFSET	34306
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ19_RT_OFFSET	34307
+#define XCM_REG_CON_PHY_Q3_RT_OFFSET	34308
+
+#define RUNTIME_ARRAY_SIZE 34309
 
 /* The eth storm context for the Tstorm */
 struct tstorm_eth_conn_st_ctx {
@@ -4307,7 +4610,7 @@ struct xstorm_eth_conn_ag_ctx {
 #define XSTORM_ETH_CONN_AG_CTX_TPH_ENABLE_SHIFT		6
 	u8 edpm_event_id;
 	__le16 physical_q0;
-	__le16 quota;
+	__le16 ereserved1;
 	__le16 edpm_num_bds;
 	__le16 tx_bd_cons;
 	__le16 tx_bd_prod;
@@ -4340,7 +4643,7 @@ struct xstorm_eth_conn_ag_ctx {
 	u8 byte13;
 	u8 byte14;
 	u8 byte15;
-	u8 byte16;
+	u8 ereserved;
 	__le16 word11;
 	__le32 reg10;
 	__le32 reg11;
@@ -4627,6 +4930,7 @@ enum eth_error_code {
 	ETH_FILTERS_PAIR_ADD_FAIL_ZERO_MAC,
 	ETH_FILTERS_VNI_ADD_FAIL_FULL,
 	ETH_FILTERS_VNI_ADD_FAIL_DUP,
+	ETH_FILTERS_GFT_UPDATE_FAIL,
 	MAX_ETH_ERROR_CODE
 };
 
@@ -4879,6 +5183,39 @@ enum gft_logic_filter_type {
 	MAX_GFT_LOGIC_FILTER_TYPE
 };
 
+struct rx_add_openflow_filter_data {
+	__le16 action_icid;
+	u8 priority;
+	u8 reserved0;
+	__le32 tenant_id;
+	__le16 dst_mac_hi;
+	__le16 dst_mac_mid;
+	__le16 dst_mac_lo;
+	__le16 src_mac_hi;
+	__le16 src_mac_mid;
+	__le16 src_mac_lo;
+	__le16 vlan_id;
+	__le16 l2_eth_type;
+	u8 ipv4_dscp;
+	u8 ipv4_frag_type;
+	u8 ipv4_over_ip;
+	u8 tenant_id_exists;
+	__le32 ipv4_dst_addr;
+	__le32 ipv4_src_addr;
+	__le16 l4_dst_port;
+	__le16 l4_src_port;
+};
+
+struct rx_create_gft_action_data {
+	u8 vport_id;
+	u8 reserved[7];
+};
+
+struct rx_create_openflow_action_data {
+	u8 vport_id;
+	u8 reserved[7];
+};
+
 /* Ramrod data for rx queue start ramrod */
 struct rx_queue_start_ramrod_data {
 	__le16 rx_queue_id;
@@ -4956,7 +5293,7 @@ struct rx_update_gft_filter_data {
 	u8 vport_id;
 	u8 filter_type;
 	u8 filter_action;
-	u8 reserved;
+	u8 assert_on_error;
 };
 
 /* Ramrod data for rx queue start ramrod */
@@ -5102,203 +5439,6 @@ struct vport_update_ramrod_data {
 	struct eth_vport_rss_config rss_config;
 };
 
-struct gft_cam_line {
-	__le32 camline;
-#define GFT_CAM_LINE_VALID_MASK		0x1
-#define GFT_CAM_LINE_VALID_SHIFT	0
-#define GFT_CAM_LINE_DATA_MASK		0x3FFF
-#define GFT_CAM_LINE_DATA_SHIFT		1
-#define GFT_CAM_LINE_MASK_BITS_MASK	0x3FFF
-#define GFT_CAM_LINE_MASK_BITS_SHIFT	15
-#define GFT_CAM_LINE_RESERVED1_MASK	0x7
-#define GFT_CAM_LINE_RESERVED1_SHIFT	29
-};
-
-struct gft_cam_line_mapped {
-	__le32 camline;
-#define GFT_CAM_LINE_MAPPED_VALID_MASK				0x1
-#define GFT_CAM_LINE_MAPPED_VALID_SHIFT				0
-#define GFT_CAM_LINE_MAPPED_IP_VERSION_MASK			0x1
-#define GFT_CAM_LINE_MAPPED_IP_VERSION_SHIFT			1
-#define GFT_CAM_LINE_MAPPED_TUNNEL_IP_VERSION_MASK		0x1
-#define GFT_CAM_LINE_MAPPED_TUNNEL_IP_VERSION_SHIFT		2
-#define GFT_CAM_LINE_MAPPED_UPPER_PROTOCOL_TYPE_MASK		0xF
-#define GFT_CAM_LINE_MAPPED_UPPER_PROTOCOL_TYPE_SHIFT		3
-#define GFT_CAM_LINE_MAPPED_TUNNEL_TYPE_MASK			0xF
-#define GFT_CAM_LINE_MAPPED_TUNNEL_TYPE_SHIFT			7
-#define GFT_CAM_LINE_MAPPED_PF_ID_MASK				0xF
-#define GFT_CAM_LINE_MAPPED_PF_ID_SHIFT				11
-#define GFT_CAM_LINE_MAPPED_IP_VERSION_MASK_MASK		0x1
-#define GFT_CAM_LINE_MAPPED_IP_VERSION_MASK_SHIFT		15
-#define GFT_CAM_LINE_MAPPED_TUNNEL_IP_VERSION_MASK_MASK		0x1
-#define GFT_CAM_LINE_MAPPED_TUNNEL_IP_VERSION_MASK_SHIFT	16
-#define GFT_CAM_LINE_MAPPED_UPPER_PROTOCOL_TYPE_MASK_MASK	0xF
-#define GFT_CAM_LINE_MAPPED_UPPER_PROTOCOL_TYPE_MASK_SHIFT	17
-#define GFT_CAM_LINE_MAPPED_TUNNEL_TYPE_MASK_MASK		0xF
-#define GFT_CAM_LINE_MAPPED_TUNNEL_TYPE_MASK_SHIFT		21
-#define GFT_CAM_LINE_MAPPED_PF_ID_MASK_MASK			0xF
-#define GFT_CAM_LINE_MAPPED_PF_ID_MASK_SHIFT			25
-#define GFT_CAM_LINE_MAPPED_RESERVED1_MASK			0x7
-#define GFT_CAM_LINE_MAPPED_RESERVED1_SHIFT			29
-};
-
-union gft_cam_line_union {
-	struct gft_cam_line cam_line;
-	struct gft_cam_line_mapped cam_line_mapped;
-};
-
-enum gft_profile_ip_version {
-	GFT_PROFILE_IPV4 = 0,
-	GFT_PROFILE_IPV6 = 1,
-	MAX_GFT_PROFILE_IP_VERSION
-};
-
-enum gft_profile_upper_protocol_type {
-	GFT_PROFILE_ROCE_PROTOCOL = 0,
-	GFT_PROFILE_RROCE_PROTOCOL = 1,
-	GFT_PROFILE_FCOE_PROTOCOL = 2,
-	GFT_PROFILE_ICMP_PROTOCOL = 3,
-	GFT_PROFILE_ARP_PROTOCOL = 4,
-	GFT_PROFILE_USER_TCP_SRC_PORT_1_INNER = 5,
-	GFT_PROFILE_USER_TCP_DST_PORT_1_INNER = 6,
-	GFT_PROFILE_TCP_PROTOCOL = 7,
-	GFT_PROFILE_USER_UDP_DST_PORT_1_INNER = 8,
-	GFT_PROFILE_USER_UDP_DST_PORT_2_OUTER = 9,
-	GFT_PROFILE_UDP_PROTOCOL = 10,
-	GFT_PROFILE_USER_IP_1_INNER = 11,
-	GFT_PROFILE_USER_IP_2_OUTER = 12,
-	GFT_PROFILE_USER_ETH_1_INNER = 13,
-	GFT_PROFILE_USER_ETH_2_OUTER = 14,
-	GFT_PROFILE_RAW = 15,
-	MAX_GFT_PROFILE_UPPER_PROTOCOL_TYPE
-};
-
-struct gft_ram_line {
-	__le32 low32bits;
-#define GFT_RAM_LINE_VLAN_SELECT_MASK			0x3
-#define GFT_RAM_LINE_VLAN_SELECT_SHIFT			0
-#define GFT_RAM_LINE_TUNNEL_ENTROPHY_MASK		0x1
-#define GFT_RAM_LINE_TUNNEL_ENTROPHY_SHIFT		2
-#define GFT_RAM_LINE_TUNNEL_TTL_EQUAL_ONE_MASK		0x1
-#define GFT_RAM_LINE_TUNNEL_TTL_EQUAL_ONE_SHIFT		3
-#define GFT_RAM_LINE_TUNNEL_TTL_MASK			0x1
-#define GFT_RAM_LINE_TUNNEL_TTL_SHIFT			4
-#define GFT_RAM_LINE_TUNNEL_ETHERTYPE_MASK		0x1
-#define GFT_RAM_LINE_TUNNEL_ETHERTYPE_SHIFT		5
-#define GFT_RAM_LINE_TUNNEL_DST_PORT_MASK		0x1
-#define GFT_RAM_LINE_TUNNEL_DST_PORT_SHIFT		6
-#define GFT_RAM_LINE_TUNNEL_SRC_PORT_MASK		0x1
-#define GFT_RAM_LINE_TUNNEL_SRC_PORT_SHIFT		7
-#define GFT_RAM_LINE_TUNNEL_DSCP_MASK			0x1
-#define GFT_RAM_LINE_TUNNEL_DSCP_SHIFT			8
-#define GFT_RAM_LINE_TUNNEL_OVER_IP_PROTOCOL_MASK	0x1
-#define GFT_RAM_LINE_TUNNEL_OVER_IP_PROTOCOL_SHIFT	9
-#define GFT_RAM_LINE_TUNNEL_DST_IP_MASK			0x1
-#define GFT_RAM_LINE_TUNNEL_DST_IP_SHIFT		10
-#define GFT_RAM_LINE_TUNNEL_SRC_IP_MASK			0x1
-#define GFT_RAM_LINE_TUNNEL_SRC_IP_SHIFT		11
-#define GFT_RAM_LINE_TUNNEL_PRIORITY_MASK		0x1
-#define GFT_RAM_LINE_TUNNEL_PRIORITY_SHIFT		12
-#define GFT_RAM_LINE_TUNNEL_PROVIDER_VLAN_MASK		0x1
-#define GFT_RAM_LINE_TUNNEL_PROVIDER_VLAN_SHIFT		13
-#define GFT_RAM_LINE_TUNNEL_VLAN_MASK			0x1
-#define GFT_RAM_LINE_TUNNEL_VLAN_SHIFT			14
-#define GFT_RAM_LINE_TUNNEL_DST_MAC_MASK		0x1
-#define GFT_RAM_LINE_TUNNEL_DST_MAC_SHIFT		15
-#define GFT_RAM_LINE_TUNNEL_SRC_MAC_MASK		0x1
-#define GFT_RAM_LINE_TUNNEL_SRC_MAC_SHIFT		16
-#define GFT_RAM_LINE_TTL_EQUAL_ONE_MASK			0x1
-#define GFT_RAM_LINE_TTL_EQUAL_ONE_SHIFT		17
-#define GFT_RAM_LINE_TTL_MASK				0x1
-#define GFT_RAM_LINE_TTL_SHIFT				18
-#define GFT_RAM_LINE_ETHERTYPE_MASK			0x1
-#define GFT_RAM_LINE_ETHERTYPE_SHIFT			19
-#define GFT_RAM_LINE_RESERVED0_MASK			0x1
-#define GFT_RAM_LINE_RESERVED0_SHIFT			20
-#define GFT_RAM_LINE_TCP_FLAG_FIN_MASK			0x1
-#define GFT_RAM_LINE_TCP_FLAG_FIN_SHIFT			21
-#define GFT_RAM_LINE_TCP_FLAG_SYN_MASK			0x1
-#define GFT_RAM_LINE_TCP_FLAG_SYN_SHIFT			22
-#define GFT_RAM_LINE_TCP_FLAG_RST_MASK			0x1
-#define GFT_RAM_LINE_TCP_FLAG_RST_SHIFT			23
-#define GFT_RAM_LINE_TCP_FLAG_PSH_MASK			0x1
-#define GFT_RAM_LINE_TCP_FLAG_PSH_SHIFT			24
-#define GFT_RAM_LINE_TCP_FLAG_ACK_MASK			0x1
-#define GFT_RAM_LINE_TCP_FLAG_ACK_SHIFT			25
-#define GFT_RAM_LINE_TCP_FLAG_URG_MASK			0x1
-#define GFT_RAM_LINE_TCP_FLAG_URG_SHIFT			26
-#define GFT_RAM_LINE_TCP_FLAG_ECE_MASK			0x1
-#define GFT_RAM_LINE_TCP_FLAG_ECE_SHIFT			27
-#define GFT_RAM_LINE_TCP_FLAG_CWR_MASK			0x1
-#define GFT_RAM_LINE_TCP_FLAG_CWR_SHIFT			28
-#define GFT_RAM_LINE_TCP_FLAG_NS_MASK			0x1
-#define GFT_RAM_LINE_TCP_FLAG_NS_SHIFT			29
-#define GFT_RAM_LINE_DST_PORT_MASK			0x1
-#define GFT_RAM_LINE_DST_PORT_SHIFT			30
-#define GFT_RAM_LINE_SRC_PORT_MASK			0x1
-#define GFT_RAM_LINE_SRC_PORT_SHIFT			31
-	__le32 high32bits;
-#define GFT_RAM_LINE_DSCP_MASK				0x1
-#define GFT_RAM_LINE_DSCP_SHIFT				0
-#define GFT_RAM_LINE_OVER_IP_PROTOCOL_MASK		0x1
-#define GFT_RAM_LINE_OVER_IP_PROTOCOL_SHIFT		1
-#define GFT_RAM_LINE_DST_IP_MASK			0x1
-#define GFT_RAM_LINE_DST_IP_SHIFT			2
-#define GFT_RAM_LINE_SRC_IP_MASK			0x1
-#define GFT_RAM_LINE_SRC_IP_SHIFT			3
-#define GFT_RAM_LINE_PRIORITY_MASK			0x1
-#define GFT_RAM_LINE_PRIORITY_SHIFT			4
-#define GFT_RAM_LINE_PROVIDER_VLAN_MASK			0x1
-#define GFT_RAM_LINE_PROVIDER_VLAN_SHIFT		5
-#define GFT_RAM_LINE_VLAN_MASK				0x1
-#define GFT_RAM_LINE_VLAN_SHIFT				6
-#define GFT_RAM_LINE_DST_MAC_MASK			0x1
-#define GFT_RAM_LINE_DST_MAC_SHIFT			7
-#define GFT_RAM_LINE_SRC_MAC_MASK			0x1
-#define GFT_RAM_LINE_SRC_MAC_SHIFT			8
-#define GFT_RAM_LINE_TENANT_ID_MASK			0x1
-#define GFT_RAM_LINE_TENANT_ID_SHIFT			9
-#define GFT_RAM_LINE_RESERVED1_MASK			0x3FFFFF
-#define GFT_RAM_LINE_RESERVED1_SHIFT			10
-};
-
-struct mstorm_eth_conn_ag_ctx {
-	u8 byte0;
-	u8 byte1;
-	u8 flags0;
-#define MSTORM_ETH_CONN_AG_CTX_EXIST_IN_QM0_MASK	0x1
-#define MSTORM_ETH_CONN_AG_CTX_EXIST_IN_QM0_SHIFT 0
-#define MSTORM_ETH_CONN_AG_CTX_BIT1_MASK	0x1
-#define MSTORM_ETH_CONN_AG_CTX_BIT1_SHIFT         1
-#define MSTORM_ETH_CONN_AG_CTX_CF0_MASK	0x3
-#define MSTORM_ETH_CONN_AG_CTX_CF0_SHIFT          2
-#define MSTORM_ETH_CONN_AG_CTX_CF1_MASK	0x3
-#define MSTORM_ETH_CONN_AG_CTX_CF1_SHIFT          4
-#define MSTORM_ETH_CONN_AG_CTX_CF2_MASK	0x3
-#define MSTORM_ETH_CONN_AG_CTX_CF2_SHIFT          6
-	u8 flags1;
-#define MSTORM_ETH_CONN_AG_CTX_CF0EN_MASK	0x1
-#define MSTORM_ETH_CONN_AG_CTX_CF0EN_SHIFT        0
-#define MSTORM_ETH_CONN_AG_CTX_CF1EN_MASK	0x1
-#define MSTORM_ETH_CONN_AG_CTX_CF1EN_SHIFT        1
-#define MSTORM_ETH_CONN_AG_CTX_CF2EN_MASK	0x1
-#define MSTORM_ETH_CONN_AG_CTX_CF2EN_SHIFT        2
-#define MSTORM_ETH_CONN_AG_CTX_RULE0EN_MASK	0x1
-#define MSTORM_ETH_CONN_AG_CTX_RULE0EN_SHIFT      3
-#define MSTORM_ETH_CONN_AG_CTX_RULE1EN_MASK	0x1
-#define MSTORM_ETH_CONN_AG_CTX_RULE1EN_SHIFT      4
-#define MSTORM_ETH_CONN_AG_CTX_RULE2EN_MASK	0x1
-#define MSTORM_ETH_CONN_AG_CTX_RULE2EN_SHIFT      5
-#define MSTORM_ETH_CONN_AG_CTX_RULE3EN_MASK	0x1
-#define MSTORM_ETH_CONN_AG_CTX_RULE3EN_SHIFT      6
-#define MSTORM_ETH_CONN_AG_CTX_RULE4EN_MASK	0x1
-#define MSTORM_ETH_CONN_AG_CTX_RULE4EN_SHIFT      7
-	__le16 word0;
-	__le16 word1;
-	__le32 reg0;
-	__le32 reg1;
-};
-
 struct xstorm_eth_conn_agctxdq_ext_ldpart {
 	u8 reserved0;
 	u8 eth_state;
@@ -5511,7 +5651,7 @@ struct xstorm_eth_conn_agctxdq_ext_ldpart {
 #define XSTORMETHCONNAGCTXDQEXTLDPART_TPH_ENABLE_SHIFT             6
 	u8 edpm_event_id;
 	__le16 physical_q0;
-	__le16 quota;
+	__le16 ereserved1;
 	__le16 edpm_num_bds;
 	__le16 tx_bd_cons;
 	__le16 tx_bd_prod;
@@ -5528,6 +5668,43 @@ struct xstorm_eth_conn_agctxdq_ext_ldpart {
 	__le32 reg4;
 };
 
+struct mstorm_eth_conn_ag_ctx {
+	u8 byte0;
+	u8 byte1;
+	u8 flags0;
+#define MSTORM_ETH_CONN_AG_CTX_EXIST_IN_QM0_MASK	0x1
+#define MSTORM_ETH_CONN_AG_CTX_EXIST_IN_QM0_SHIFT 0
+#define MSTORM_ETH_CONN_AG_CTX_BIT1_MASK	0x1
+#define MSTORM_ETH_CONN_AG_CTX_BIT1_SHIFT         1
+#define MSTORM_ETH_CONN_AG_CTX_CF0_MASK	0x3
+#define MSTORM_ETH_CONN_AG_CTX_CF0_SHIFT          2
+#define MSTORM_ETH_CONN_AG_CTX_CF1_MASK	0x3
+#define MSTORM_ETH_CONN_AG_CTX_CF1_SHIFT          4
+#define MSTORM_ETH_CONN_AG_CTX_CF2_MASK	0x3
+#define MSTORM_ETH_CONN_AG_CTX_CF2_SHIFT          6
+	u8 flags1;
+#define MSTORM_ETH_CONN_AG_CTX_CF0EN_MASK	0x1
+#define MSTORM_ETH_CONN_AG_CTX_CF0EN_SHIFT        0
+#define MSTORM_ETH_CONN_AG_CTX_CF1EN_MASK	0x1
+#define MSTORM_ETH_CONN_AG_CTX_CF1EN_SHIFT        1
+#define MSTORM_ETH_CONN_AG_CTX_CF2EN_MASK	0x1
+#define MSTORM_ETH_CONN_AG_CTX_CF2EN_SHIFT        2
+#define MSTORM_ETH_CONN_AG_CTX_RULE0EN_MASK	0x1
+#define MSTORM_ETH_CONN_AG_CTX_RULE0EN_SHIFT      3
+#define MSTORM_ETH_CONN_AG_CTX_RULE1EN_MASK	0x1
+#define MSTORM_ETH_CONN_AG_CTX_RULE1EN_SHIFT      4
+#define MSTORM_ETH_CONN_AG_CTX_RULE2EN_MASK	0x1
+#define MSTORM_ETH_CONN_AG_CTX_RULE2EN_SHIFT      5
+#define MSTORM_ETH_CONN_AG_CTX_RULE3EN_MASK	0x1
+#define MSTORM_ETH_CONN_AG_CTX_RULE3EN_SHIFT      6
+#define MSTORM_ETH_CONN_AG_CTX_RULE4EN_MASK	0x1
+#define MSTORM_ETH_CONN_AG_CTX_RULE4EN_SHIFT      7
+	__le16 word0;
+	__le16 word1;
+	__le32 reg0;
+	__le32 reg1;
+};
+
 struct xstorm_eth_hw_conn_ag_ctx {
 	u8 reserved0;
 	u8 eth_state;
@@ -5740,7 +5917,7 @@ struct xstorm_eth_hw_conn_ag_ctx {
 #define XSTORM_ETH_HW_CONN_AG_CTX_TPH_ENABLE_SHIFT             6
 	u8 edpm_event_id;
 	__le16 physical_q0;
-	__le16 quota;
+	__le16 ereserved1;
 	__le16 edpm_num_bds;
 	__le16 tx_bd_cons;
 	__le16 tx_bd_prod;
@@ -5748,32 +5925,226 @@ struct xstorm_eth_hw_conn_ag_ctx {
 	__le16 conn_dpi;
 };
 
-struct mstorm_rdma_task_st_ctx {
-	struct regpair temp[4];
-};
-
-struct rdma_close_func_ramrod_data {
-	u8 cnq_start_offset;
-	u8 num_cnqs;
-	u8 vf_id;
-	u8 vf_valid;
-	u8 reserved[4];
-};
-
-struct rdma_cnq_params {
-	__le16 sb_num;
-	u8 sb_index;
-	u8 num_pbl_pages;
-	__le32 reserved;
-	struct regpair pbl_base_addr;
-	__le16 queue_zone_num;
-	u8 reserved1[6];
+struct gft_cam_line {
+	__le32 camline;
+#define GFT_CAM_LINE_VALID_MASK		0x1
+#define GFT_CAM_LINE_VALID_SHIFT	0
+#define GFT_CAM_LINE_DATA_MASK		0x3FFF
+#define GFT_CAM_LINE_DATA_SHIFT		1
+#define GFT_CAM_LINE_MASK_BITS_MASK	0x3FFF
+#define GFT_CAM_LINE_MASK_BITS_SHIFT	15
+#define GFT_CAM_LINE_RESERVED1_MASK	0x7
+#define GFT_CAM_LINE_RESERVED1_SHIFT	29
 };
 
-struct rdma_create_cq_ramrod_data {
-	struct regpair cq_handle;
-	struct regpair pbl_addr;
-	__le32 max_cqes;
+struct gft_cam_line_mapped {
+	__le32 camline;
+#define GFT_CAM_LINE_MAPPED_VALID_MASK				0x1
+#define GFT_CAM_LINE_MAPPED_VALID_SHIFT				0
+#define GFT_CAM_LINE_MAPPED_IP_VERSION_MASK			0x1
+#define GFT_CAM_LINE_MAPPED_IP_VERSION_SHIFT			1
+#define GFT_CAM_LINE_MAPPED_TUNNEL_IP_VERSION_MASK		0x1
+#define GFT_CAM_LINE_MAPPED_TUNNEL_IP_VERSION_SHIFT		2
+#define GFT_CAM_LINE_MAPPED_UPPER_PROTOCOL_TYPE_MASK		0xF
+#define GFT_CAM_LINE_MAPPED_UPPER_PROTOCOL_TYPE_SHIFT		3
+#define GFT_CAM_LINE_MAPPED_TUNNEL_TYPE_MASK			0xF
+#define GFT_CAM_LINE_MAPPED_TUNNEL_TYPE_SHIFT			7
+#define GFT_CAM_LINE_MAPPED_PF_ID_MASK				0xF
+#define GFT_CAM_LINE_MAPPED_PF_ID_SHIFT				11
+#define GFT_CAM_LINE_MAPPED_IP_VERSION_MASK_MASK		0x1
+#define GFT_CAM_LINE_MAPPED_IP_VERSION_MASK_SHIFT		15
+#define GFT_CAM_LINE_MAPPED_TUNNEL_IP_VERSION_MASK_MASK		0x1
+#define GFT_CAM_LINE_MAPPED_TUNNEL_IP_VERSION_MASK_SHIFT	16
+#define GFT_CAM_LINE_MAPPED_UPPER_PROTOCOL_TYPE_MASK_MASK	0xF
+#define GFT_CAM_LINE_MAPPED_UPPER_PROTOCOL_TYPE_MASK_SHIFT	17
+#define GFT_CAM_LINE_MAPPED_TUNNEL_TYPE_MASK_MASK		0xF
+#define GFT_CAM_LINE_MAPPED_TUNNEL_TYPE_MASK_SHIFT		21
+#define GFT_CAM_LINE_MAPPED_PF_ID_MASK_MASK			0xF
+#define GFT_CAM_LINE_MAPPED_PF_ID_MASK_SHIFT			25
+#define GFT_CAM_LINE_MAPPED_RESERVED1_MASK			0x7
+#define GFT_CAM_LINE_MAPPED_RESERVED1_SHIFT			29
+};
+
+union gft_cam_line_union {
+	struct gft_cam_line cam_line;
+	struct gft_cam_line_mapped cam_line_mapped;
+};
+
+enum gft_profile_ip_version {
+	GFT_PROFILE_IPV4 = 0,
+	GFT_PROFILE_IPV6 = 1,
+	MAX_GFT_PROFILE_IP_VERSION
+};
+
+struct gft_profile_key {
+	__le16 profile_key;
+#define GFT_PROFILE_KEY_IP_VERSION_MASK           0x1
+#define GFT_PROFILE_KEY_IP_VERSION_SHIFT          0
+#define GFT_PROFILE_KEY_TUNNEL_IP_VERSION_MASK    0x1
+#define GFT_PROFILE_KEY_TUNNEL_IP_VERSION_SHIFT   1
+#define GFT_PROFILE_KEY_UPPER_PROTOCOL_TYPE_MASK  0xF
+#define GFT_PROFILE_KEY_UPPER_PROTOCOL_TYPE_SHIFT 2
+#define GFT_PROFILE_KEY_TUNNEL_TYPE_MASK          0xF
+#define GFT_PROFILE_KEY_TUNNEL_TYPE_SHIFT         6
+#define GFT_PROFILE_KEY_PF_ID_MASK                0xF
+#define GFT_PROFILE_KEY_PF_ID_SHIFT               10
+#define GFT_PROFILE_KEY_RESERVED0_MASK            0x3
+#define GFT_PROFILE_KEY_RESERVED0_SHIFT           14
+};
+
+enum gft_profile_tunnel_type {
+	GFT_PROFILE_NO_TUNNEL = 0,
+	GFT_PROFILE_VXLAN_TUNNEL = 1,
+	GFT_PROFILE_GRE_MAC_OR_NVGRE_TUNNEL = 2,
+	GFT_PROFILE_GRE_IP_TUNNEL = 3,
+	GFT_PROFILE_GENEVE_MAC_TUNNEL = 4,
+	GFT_PROFILE_GENEVE_IP_TUNNEL = 5,
+	MAX_GFT_PROFILE_TUNNEL_TYPE
+};
+
+enum gft_profile_upper_protocol_type {
+	GFT_PROFILE_ROCE_PROTOCOL = 0,
+	GFT_PROFILE_RROCE_PROTOCOL = 1,
+	GFT_PROFILE_FCOE_PROTOCOL = 2,
+	GFT_PROFILE_ICMP_PROTOCOL = 3,
+	GFT_PROFILE_ARP_PROTOCOL = 4,
+	GFT_PROFILE_USER_TCP_SRC_PORT_1_INNER = 5,
+	GFT_PROFILE_USER_TCP_DST_PORT_1_INNER = 6,
+	GFT_PROFILE_TCP_PROTOCOL = 7,
+	GFT_PROFILE_USER_UDP_DST_PORT_1_INNER = 8,
+	GFT_PROFILE_USER_UDP_DST_PORT_2_OUTER = 9,
+	GFT_PROFILE_UDP_PROTOCOL = 10,
+	GFT_PROFILE_USER_IP_1_INNER = 11,
+	GFT_PROFILE_USER_IP_2_OUTER = 12,
+	GFT_PROFILE_USER_ETH_1_INNER = 13,
+	GFT_PROFILE_USER_ETH_2_OUTER = 14,
+	GFT_PROFILE_RAW = 15,
+	MAX_GFT_PROFILE_UPPER_PROTOCOL_TYPE
+};
+
+struct gft_ram_line {
+	__le32 lo;
+#define GFT_RAM_LINE_VLAN_SELECT_MASK			0x3
+#define GFT_RAM_LINE_VLAN_SELECT_SHIFT			0
+#define GFT_RAM_LINE_TUNNEL_ENTROPHY_MASK		0x1
+#define GFT_RAM_LINE_TUNNEL_ENTROPHY_SHIFT		2
+#define GFT_RAM_LINE_TUNNEL_TTL_EQUAL_ONE_MASK		0x1
+#define GFT_RAM_LINE_TUNNEL_TTL_EQUAL_ONE_SHIFT		3
+#define GFT_RAM_LINE_TUNNEL_TTL_MASK			0x1
+#define GFT_RAM_LINE_TUNNEL_TTL_SHIFT			4
+#define GFT_RAM_LINE_TUNNEL_ETHERTYPE_MASK		0x1
+#define GFT_RAM_LINE_TUNNEL_ETHERTYPE_SHIFT		5
+#define GFT_RAM_LINE_TUNNEL_DST_PORT_MASK		0x1
+#define GFT_RAM_LINE_TUNNEL_DST_PORT_SHIFT		6
+#define GFT_RAM_LINE_TUNNEL_SRC_PORT_MASK		0x1
+#define GFT_RAM_LINE_TUNNEL_SRC_PORT_SHIFT		7
+#define GFT_RAM_LINE_TUNNEL_DSCP_MASK			0x1
+#define GFT_RAM_LINE_TUNNEL_DSCP_SHIFT			8
+#define GFT_RAM_LINE_TUNNEL_OVER_IP_PROTOCOL_MASK	0x1
+#define GFT_RAM_LINE_TUNNEL_OVER_IP_PROTOCOL_SHIFT	9
+#define GFT_RAM_LINE_TUNNEL_DST_IP_MASK			0x1
+#define GFT_RAM_LINE_TUNNEL_DST_IP_SHIFT		10
+#define GFT_RAM_LINE_TUNNEL_SRC_IP_MASK			0x1
+#define GFT_RAM_LINE_TUNNEL_SRC_IP_SHIFT		11
+#define GFT_RAM_LINE_TUNNEL_PRIORITY_MASK		0x1
+#define GFT_RAM_LINE_TUNNEL_PRIORITY_SHIFT		12
+#define GFT_RAM_LINE_TUNNEL_PROVIDER_VLAN_MASK		0x1
+#define GFT_RAM_LINE_TUNNEL_PROVIDER_VLAN_SHIFT		13
+#define GFT_RAM_LINE_TUNNEL_VLAN_MASK			0x1
+#define GFT_RAM_LINE_TUNNEL_VLAN_SHIFT			14
+#define GFT_RAM_LINE_TUNNEL_DST_MAC_MASK		0x1
+#define GFT_RAM_LINE_TUNNEL_DST_MAC_SHIFT		15
+#define GFT_RAM_LINE_TUNNEL_SRC_MAC_MASK		0x1
+#define GFT_RAM_LINE_TUNNEL_SRC_MAC_SHIFT		16
+#define GFT_RAM_LINE_TTL_EQUAL_ONE_MASK			0x1
+#define GFT_RAM_LINE_TTL_EQUAL_ONE_SHIFT		17
+#define GFT_RAM_LINE_TTL_MASK				0x1
+#define GFT_RAM_LINE_TTL_SHIFT				18
+#define GFT_RAM_LINE_ETHERTYPE_MASK			0x1
+#define GFT_RAM_LINE_ETHERTYPE_SHIFT			19
+#define GFT_RAM_LINE_RESERVED0_MASK			0x1
+#define GFT_RAM_LINE_RESERVED0_SHIFT			20
+#define GFT_RAM_LINE_TCP_FLAG_FIN_MASK			0x1
+#define GFT_RAM_LINE_TCP_FLAG_FIN_SHIFT			21
+#define GFT_RAM_LINE_TCP_FLAG_SYN_MASK			0x1
+#define GFT_RAM_LINE_TCP_FLAG_SYN_SHIFT			22
+#define GFT_RAM_LINE_TCP_FLAG_RST_MASK			0x1
+#define GFT_RAM_LINE_TCP_FLAG_RST_SHIFT			23
+#define GFT_RAM_LINE_TCP_FLAG_PSH_MASK			0x1
+#define GFT_RAM_LINE_TCP_FLAG_PSH_SHIFT			24
+#define GFT_RAM_LINE_TCP_FLAG_ACK_MASK			0x1
+#define GFT_RAM_LINE_TCP_FLAG_ACK_SHIFT			25
+#define GFT_RAM_LINE_TCP_FLAG_URG_MASK			0x1
+#define GFT_RAM_LINE_TCP_FLAG_URG_SHIFT			26
+#define GFT_RAM_LINE_TCP_FLAG_ECE_MASK			0x1
+#define GFT_RAM_LINE_TCP_FLAG_ECE_SHIFT			27
+#define GFT_RAM_LINE_TCP_FLAG_CWR_MASK			0x1
+#define GFT_RAM_LINE_TCP_FLAG_CWR_SHIFT			28
+#define GFT_RAM_LINE_TCP_FLAG_NS_MASK			0x1
+#define GFT_RAM_LINE_TCP_FLAG_NS_SHIFT			29
+#define GFT_RAM_LINE_DST_PORT_MASK			0x1
+#define GFT_RAM_LINE_DST_PORT_SHIFT			30
+#define GFT_RAM_LINE_SRC_PORT_MASK			0x1
+#define GFT_RAM_LINE_SRC_PORT_SHIFT			31
+	__le32 hi;
+#define GFT_RAM_LINE_DSCP_MASK				0x1
+#define GFT_RAM_LINE_DSCP_SHIFT				0
+#define GFT_RAM_LINE_OVER_IP_PROTOCOL_MASK		0x1
+#define GFT_RAM_LINE_OVER_IP_PROTOCOL_SHIFT		1
+#define GFT_RAM_LINE_DST_IP_MASK			0x1
+#define GFT_RAM_LINE_DST_IP_SHIFT			2
+#define GFT_RAM_LINE_SRC_IP_MASK			0x1
+#define GFT_RAM_LINE_SRC_IP_SHIFT			3
+#define GFT_RAM_LINE_PRIORITY_MASK			0x1
+#define GFT_RAM_LINE_PRIORITY_SHIFT			4
+#define GFT_RAM_LINE_PROVIDER_VLAN_MASK			0x1
+#define GFT_RAM_LINE_PROVIDER_VLAN_SHIFT		5
+#define GFT_RAM_LINE_VLAN_MASK				0x1
+#define GFT_RAM_LINE_VLAN_SHIFT				6
+#define GFT_RAM_LINE_DST_MAC_MASK			0x1
+#define GFT_RAM_LINE_DST_MAC_SHIFT			7
+#define GFT_RAM_LINE_SRC_MAC_MASK			0x1
+#define GFT_RAM_LINE_SRC_MAC_SHIFT			8
+#define GFT_RAM_LINE_TENANT_ID_MASK			0x1
+#define GFT_RAM_LINE_TENANT_ID_SHIFT			9
+#define GFT_RAM_LINE_RESERVED1_MASK			0x3FFFFF
+#define GFT_RAM_LINE_RESERVED1_SHIFT			10
+};
+
+enum gft_vlan_select {
+	INNER_PROVIDER_VLAN = 0,
+	INNER_VLAN = 1,
+	OUTER_PROVIDER_VLAN = 2,
+	OUTER_VLAN = 3,
+	MAX_GFT_VLAN_SELECT
+};
+
+struct mstorm_rdma_task_st_ctx {
+	struct regpair temp[4];
+};
+
+struct rdma_close_func_ramrod_data {
+	u8 cnq_start_offset;
+	u8 num_cnqs;
+	u8 vf_id;
+	u8 vf_valid;
+	u8 reserved[4];
+};
+
+struct rdma_cnq_params {
+	__le16 sb_num;
+	u8 sb_index;
+	u8 num_pbl_pages;
+	__le32 reserved;
+	struct regpair pbl_base_addr;
+	__le16 queue_zone_num;
+	u8 reserved1[6];
+};
+
+struct rdma_create_cq_ramrod_data {
+	struct regpair cq_handle;
+	struct regpair pbl_addr;
+	__le32 max_cqes;
 	__le16 pbl_num_pages;
 	__le16 dpi;
 	u8 is_two_level_pbl;
@@ -5827,12 +6198,9 @@ struct rdma_init_func_hdr {
 	u8 cnq_start_offset;
 	u8 num_cnqs;
 	u8 cq_ring_mode;
-	u8 cnp_vlan_priority;
-	__le32 cnp_send_timeout;
-	u8 cnp_dscp;
 	u8 vf_id;
 	u8 vf_valid;
-	u8 reserved[5];
+	u8 reserved[3];
 };
 
 struct rdma_init_func_ramrod_data {
@@ -5856,54 +6224,55 @@ enum rdma_ramrod_cmd_id {
 };
 
 struct rdma_register_tid_ramrod_data {
-	__le32 flags;
-#define RDMA_REGISTER_TID_RAMROD_DATA_MAX_ID_MASK             0x3FFFF
-#define RDMA_REGISTER_TID_RAMROD_DATA_MAX_ID_SHIFT            0
-#define RDMA_REGISTER_TID_RAMROD_DATA_PAGE_SIZE_LOG_MASK      0x1F
-#define RDMA_REGISTER_TID_RAMROD_DATA_PAGE_SIZE_LOG_SHIFT     18
-#define RDMA_REGISTER_TID_RAMROD_DATA_TWO_LEVEL_PBL_MASK      0x1
-#define RDMA_REGISTER_TID_RAMROD_DATA_TWO_LEVEL_PBL_SHIFT     23
-#define RDMA_REGISTER_TID_RAMROD_DATA_ZERO_BASED_MASK         0x1
-#define RDMA_REGISTER_TID_RAMROD_DATA_ZERO_BASED_SHIFT        24
-#define RDMA_REGISTER_TID_RAMROD_DATA_PHY_MR_MASK             0x1
-#define RDMA_REGISTER_TID_RAMROD_DATA_PHY_MR_SHIFT            25
-#define RDMA_REGISTER_TID_RAMROD_DATA_REMOTE_READ_MASK        0x1
-#define RDMA_REGISTER_TID_RAMROD_DATA_REMOTE_READ_SHIFT       26
-#define RDMA_REGISTER_TID_RAMROD_DATA_REMOTE_WRITE_MASK       0x1
-#define RDMA_REGISTER_TID_RAMROD_DATA_REMOTE_WRITE_SHIFT      27
-#define RDMA_REGISTER_TID_RAMROD_DATA_REMOTE_ATOMIC_MASK      0x1
-#define RDMA_REGISTER_TID_RAMROD_DATA_REMOTE_ATOMIC_SHIFT     28
-#define RDMA_REGISTER_TID_RAMROD_DATA_LOCAL_WRITE_MASK        0x1
-#define RDMA_REGISTER_TID_RAMROD_DATA_LOCAL_WRITE_SHIFT       29
-#define RDMA_REGISTER_TID_RAMROD_DATA_LOCAL_READ_MASK         0x1
-#define RDMA_REGISTER_TID_RAMROD_DATA_LOCAL_READ_SHIFT        30
-#define RDMA_REGISTER_TID_RAMROD_DATA_ENABLE_MW_BIND_MASK     0x1
-#define RDMA_REGISTER_TID_RAMROD_DATA_ENABLE_MW_BIND_SHIFT    31
+	__le16 flags;
+#define RDMA_REGISTER_TID_RAMROD_DATA_PAGE_SIZE_LOG_MASK	0x1F
+#define RDMA_REGISTER_TID_RAMROD_DATA_PAGE_SIZE_LOG_SHIFT	0
+#define RDMA_REGISTER_TID_RAMROD_DATA_TWO_LEVEL_PBL_MASK	0x1
+#define RDMA_REGISTER_TID_RAMROD_DATA_TWO_LEVEL_PBL_SHIFT	5
+#define RDMA_REGISTER_TID_RAMROD_DATA_ZERO_BASED_MASK	0x1
+#define RDMA_REGISTER_TID_RAMROD_DATA_ZERO_BASED_SHIFT	6
+#define RDMA_REGISTER_TID_RAMROD_DATA_PHY_MR_MASK	0x1
+#define RDMA_REGISTER_TID_RAMROD_DATA_PHY_MR_SHIFT	7
+#define RDMA_REGISTER_TID_RAMROD_DATA_REMOTE_READ_MASK	0x1
+#define RDMA_REGISTER_TID_RAMROD_DATA_REMOTE_READ_SHIFT	8
+#define RDMA_REGISTER_TID_RAMROD_DATA_REMOTE_WRITE_MASK	0x1
+#define RDMA_REGISTER_TID_RAMROD_DATA_REMOTE_WRITE_SHIFT	9
+#define RDMA_REGISTER_TID_RAMROD_DATA_REMOTE_ATOMIC_MASK	0x1
+#define RDMA_REGISTER_TID_RAMROD_DATA_REMOTE_ATOMIC_SHIFT	10
+#define RDMA_REGISTER_TID_RAMROD_DATA_LOCAL_WRITE_MASK	0x1
+#define RDMA_REGISTER_TID_RAMROD_DATA_LOCAL_WRITE_SHIFT	11
+#define RDMA_REGISTER_TID_RAMROD_DATA_LOCAL_READ_MASK	0x1
+#define RDMA_REGISTER_TID_RAMROD_DATA_LOCAL_READ_SHIFT	12
+#define RDMA_REGISTER_TID_RAMROD_DATA_ENABLE_MW_BIND_MASK	0x1
+#define RDMA_REGISTER_TID_RAMROD_DATA_ENABLE_MW_BIND_SHIFT	13
+#define RDMA_REGISTER_TID_RAMROD_DATA_RESERVED_MASK	0x3
+#define RDMA_REGISTER_TID_RAMROD_DATA_RESERVED_SHIFT	14
 	u8 flags1;
-#define RDMA_REGISTER_TID_RAMROD_DATA_PBL_PAGE_SIZE_LOG_MASK  0x1F
+#define RDMA_REGISTER_TID_RAMROD_DATA_PBL_PAGE_SIZE_LOG_MASK	0x1F
 #define RDMA_REGISTER_TID_RAMROD_DATA_PBL_PAGE_SIZE_LOG_SHIFT 0
-#define RDMA_REGISTER_TID_RAMROD_DATA_TID_TYPE_MASK           0x7
-#define RDMA_REGISTER_TID_RAMROD_DATA_TID_TYPE_SHIFT          5
+#define RDMA_REGISTER_TID_RAMROD_DATA_TID_TYPE_MASK	0x7
+#define RDMA_REGISTER_TID_RAMROD_DATA_TID_TYPE_SHIFT	5
 	u8 flags2;
-#define RDMA_REGISTER_TID_RAMROD_DATA_DMA_MR_MASK             0x1
-#define RDMA_REGISTER_TID_RAMROD_DATA_DMA_MR_SHIFT            0
-#define RDMA_REGISTER_TID_RAMROD_DATA_DIF_ON_HOST_FLG_MASK    0x1
-#define RDMA_REGISTER_TID_RAMROD_DATA_DIF_ON_HOST_FLG_SHIFT   1
-#define RDMA_REGISTER_TID_RAMROD_DATA_RESERVED1_MASK          0x3F
-#define RDMA_REGISTER_TID_RAMROD_DATA_RESERVED1_SHIFT         2
+#define RDMA_REGISTER_TID_RAMROD_DATA_DMA_MR_MASK	0x1
+#define RDMA_REGISTER_TID_RAMROD_DATA_DMA_MR_SHIFT	0
+#define RDMA_REGISTER_TID_RAMROD_DATA_DIF_ON_HOST_FLG_MASK	0x1
+#define RDMA_REGISTER_TID_RAMROD_DATA_DIF_ON_HOST_FLG_SHIFT	1
+#define RDMA_REGISTER_TID_RAMROD_DATA_RESERVED1_MASK	0x3F
+#define RDMA_REGISTER_TID_RAMROD_DATA_RESERVED1_SHIFT	2
 	u8 key;
 	u8 length_hi;
 	u8 vf_id;
 	u8 vf_valid;
 	__le16 pd;
+	__le16 reserved2;
 	__le32 length_lo;
 	__le32 itid;
-	__le32 reserved2;
+	__le32 reserved3;
 	struct regpair va;
 	struct regpair pbl_base;
 	struct regpair dif_error_addr;
 	struct regpair dif_runt_addr;
-	__le32 reserved3[2];
+	__le32 reserved4[2];
 };
 
 struct rdma_resize_cq_output_params {
@@ -6149,298 +6518,9 @@ enum rdma_tid_type {
 	MAX_RDMA_TID_TYPE
 };
 
-struct mstorm_rdma_conn_ag_ctx {
-	u8 byte0;
-	u8 byte1;
-	u8 flags0;
-#define MSTORM_RDMA_CONN_AG_CTX_BIT0_MASK     0x1
-#define MSTORM_RDMA_CONN_AG_CTX_BIT0_SHIFT    0
-#define MSTORM_RDMA_CONN_AG_CTX_BIT1_MASK     0x1
-#define MSTORM_RDMA_CONN_AG_CTX_BIT1_SHIFT    1
-#define MSTORM_RDMA_CONN_AG_CTX_CF0_MASK      0x3
-#define MSTORM_RDMA_CONN_AG_CTX_CF0_SHIFT     2
-#define MSTORM_RDMA_CONN_AG_CTX_CF1_MASK      0x3
-#define MSTORM_RDMA_CONN_AG_CTX_CF1_SHIFT     4
-#define MSTORM_RDMA_CONN_AG_CTX_CF2_MASK      0x3
-#define MSTORM_RDMA_CONN_AG_CTX_CF2_SHIFT     6
-	u8 flags1;
-#define MSTORM_RDMA_CONN_AG_CTX_CF0EN_MASK    0x1
-#define MSTORM_RDMA_CONN_AG_CTX_CF0EN_SHIFT   0
-#define MSTORM_RDMA_CONN_AG_CTX_CF1EN_MASK    0x1
-#define MSTORM_RDMA_CONN_AG_CTX_CF1EN_SHIFT   1
-#define MSTORM_RDMA_CONN_AG_CTX_CF2EN_MASK    0x1
-#define MSTORM_RDMA_CONN_AG_CTX_CF2EN_SHIFT   2
-#define MSTORM_RDMA_CONN_AG_CTX_RULE0EN_MASK  0x1
-#define MSTORM_RDMA_CONN_AG_CTX_RULE0EN_SHIFT 3
-#define MSTORM_RDMA_CONN_AG_CTX_RULE1EN_MASK  0x1
-#define MSTORM_RDMA_CONN_AG_CTX_RULE1EN_SHIFT 4
-#define MSTORM_RDMA_CONN_AG_CTX_RULE2EN_MASK  0x1
-#define MSTORM_RDMA_CONN_AG_CTX_RULE2EN_SHIFT 5
-#define MSTORM_RDMA_CONN_AG_CTX_RULE3EN_MASK  0x1
-#define MSTORM_RDMA_CONN_AG_CTX_RULE3EN_SHIFT 6
-#define MSTORM_RDMA_CONN_AG_CTX_RULE4EN_MASK  0x1
-#define MSTORM_RDMA_CONN_AG_CTX_RULE4EN_SHIFT 7
-	__le16 word0;
-	__le16 word1;
-	__le32 reg0;
-	__le32 reg1;
-};
-
-struct tstorm_rdma_conn_ag_ctx {
+struct xstorm_roce_conn_ag_ctx_dq_ext_ld_part {
 	u8 reserved0;
-	u8 byte1;
-	u8 flags0;
-#define TSTORM_RDMA_CONN_AG_CTX_EXIST_IN_QM0_MASK          0x1
-#define TSTORM_RDMA_CONN_AG_CTX_EXIST_IN_QM0_SHIFT         0
-#define TSTORM_RDMA_CONN_AG_CTX_BIT1_MASK                  0x1
-#define TSTORM_RDMA_CONN_AG_CTX_BIT1_SHIFT                 1
-#define TSTORM_RDMA_CONN_AG_CTX_BIT2_MASK                  0x1
-#define TSTORM_RDMA_CONN_AG_CTX_BIT2_SHIFT                 2
-#define TSTORM_RDMA_CONN_AG_CTX_BIT3_MASK                  0x1
-#define TSTORM_RDMA_CONN_AG_CTX_BIT3_SHIFT                 3
-#define TSTORM_RDMA_CONN_AG_CTX_BIT4_MASK                  0x1
-#define TSTORM_RDMA_CONN_AG_CTX_BIT4_SHIFT                 4
-#define TSTORM_RDMA_CONN_AG_CTX_BIT5_MASK                  0x1
-#define TSTORM_RDMA_CONN_AG_CTX_BIT5_SHIFT                 5
-#define TSTORM_RDMA_CONN_AG_CTX_CF0_MASK                   0x3
-#define TSTORM_RDMA_CONN_AG_CTX_CF0_SHIFT                  6
-	u8 flags1;
-#define TSTORM_RDMA_CONN_AG_CTX_CF1_MASK                   0x3
-#define TSTORM_RDMA_CONN_AG_CTX_CF1_SHIFT                  0
-#define TSTORM_RDMA_CONN_AG_CTX_CF2_MASK                   0x3
-#define TSTORM_RDMA_CONN_AG_CTX_CF2_SHIFT                  2
-#define TSTORM_RDMA_CONN_AG_CTX_TIMER_STOP_ALL_CF_MASK     0x3
-#define TSTORM_RDMA_CONN_AG_CTX_TIMER_STOP_ALL_CF_SHIFT    4
-#define TSTORM_RDMA_CONN_AG_CTX_FLUSH_Q0_CF_MASK           0x3
-#define TSTORM_RDMA_CONN_AG_CTX_FLUSH_Q0_CF_SHIFT          6
-	u8 flags2;
-#define TSTORM_RDMA_CONN_AG_CTX_MSTORM_FLUSH_CF_MASK       0x3
-#define TSTORM_RDMA_CONN_AG_CTX_MSTORM_FLUSH_CF_SHIFT      0
-#define TSTORM_RDMA_CONN_AG_CTX_CF6_MASK                   0x3
-#define TSTORM_RDMA_CONN_AG_CTX_CF6_SHIFT                  2
-#define TSTORM_RDMA_CONN_AG_CTX_CF7_MASK                   0x3
-#define TSTORM_RDMA_CONN_AG_CTX_CF7_SHIFT                  4
-#define TSTORM_RDMA_CONN_AG_CTX_CF8_MASK                   0x3
-#define TSTORM_RDMA_CONN_AG_CTX_CF8_SHIFT                  6
-	u8 flags3;
-#define TSTORM_RDMA_CONN_AG_CTX_CF9_MASK                   0x3
-#define TSTORM_RDMA_CONN_AG_CTX_CF9_SHIFT                  0
-#define TSTORM_RDMA_CONN_AG_CTX_CF10_MASK                  0x3
-#define TSTORM_RDMA_CONN_AG_CTX_CF10_SHIFT                 2
-#define TSTORM_RDMA_CONN_AG_CTX_CF0EN_MASK                 0x1
-#define TSTORM_RDMA_CONN_AG_CTX_CF0EN_SHIFT                4
-#define TSTORM_RDMA_CONN_AG_CTX_CF1EN_MASK                 0x1
-#define TSTORM_RDMA_CONN_AG_CTX_CF1EN_SHIFT                5
-#define TSTORM_RDMA_CONN_AG_CTX_CF2EN_MASK                 0x1
-#define TSTORM_RDMA_CONN_AG_CTX_CF2EN_SHIFT                6
-#define TSTORM_RDMA_CONN_AG_CTX_TIMER_STOP_ALL_CF_EN_MASK  0x1
-#define TSTORM_RDMA_CONN_AG_CTX_TIMER_STOP_ALL_CF_EN_SHIFT 7
-	u8 flags4;
-#define TSTORM_RDMA_CONN_AG_CTX_FLUSH_Q0_CF_EN_MASK        0x1
-#define TSTORM_RDMA_CONN_AG_CTX_FLUSH_Q0_CF_EN_SHIFT       0
-#define TSTORM_RDMA_CONN_AG_CTX_MSTORM_FLUSH_CF_EN_MASK    0x1
-#define TSTORM_RDMA_CONN_AG_CTX_MSTORM_FLUSH_CF_EN_SHIFT   1
-#define TSTORM_RDMA_CONN_AG_CTX_CF6EN_MASK                 0x1
-#define TSTORM_RDMA_CONN_AG_CTX_CF6EN_SHIFT                2
-#define TSTORM_RDMA_CONN_AG_CTX_CF7EN_MASK                 0x1
-#define TSTORM_RDMA_CONN_AG_CTX_CF7EN_SHIFT                3
-#define TSTORM_RDMA_CONN_AG_CTX_CF8EN_MASK                 0x1
-#define TSTORM_RDMA_CONN_AG_CTX_CF8EN_SHIFT                4
-#define TSTORM_RDMA_CONN_AG_CTX_CF9EN_MASK                 0x1
-#define TSTORM_RDMA_CONN_AG_CTX_CF9EN_SHIFT                5
-#define TSTORM_RDMA_CONN_AG_CTX_CF10EN_MASK                0x1
-#define TSTORM_RDMA_CONN_AG_CTX_CF10EN_SHIFT               6
-#define TSTORM_RDMA_CONN_AG_CTX_RULE0EN_MASK               0x1
-#define TSTORM_RDMA_CONN_AG_CTX_RULE0EN_SHIFT              7
-	u8 flags5;
-#define TSTORM_RDMA_CONN_AG_CTX_RULE1EN_MASK               0x1
-#define TSTORM_RDMA_CONN_AG_CTX_RULE1EN_SHIFT              0
-#define TSTORM_RDMA_CONN_AG_CTX_RULE2EN_MASK               0x1
-#define TSTORM_RDMA_CONN_AG_CTX_RULE2EN_SHIFT              1
-#define TSTORM_RDMA_CONN_AG_CTX_RULE3EN_MASK               0x1
-#define TSTORM_RDMA_CONN_AG_CTX_RULE3EN_SHIFT              2
-#define TSTORM_RDMA_CONN_AG_CTX_RULE4EN_MASK               0x1
-#define TSTORM_RDMA_CONN_AG_CTX_RULE4EN_SHIFT              3
-#define TSTORM_RDMA_CONN_AG_CTX_RULE5EN_MASK               0x1
-#define TSTORM_RDMA_CONN_AG_CTX_RULE5EN_SHIFT              4
-#define TSTORM_RDMA_CONN_AG_CTX_RULE6EN_MASK               0x1
-#define TSTORM_RDMA_CONN_AG_CTX_RULE6EN_SHIFT              5
-#define TSTORM_RDMA_CONN_AG_CTX_RULE7EN_MASK               0x1
-#define TSTORM_RDMA_CONN_AG_CTX_RULE7EN_SHIFT              6
-#define TSTORM_RDMA_CONN_AG_CTX_RULE8EN_MASK               0x1
-#define TSTORM_RDMA_CONN_AG_CTX_RULE8EN_SHIFT              7
-	__le32 reg0;
-	__le32 reg1;
-	__le32 reg2;
-	__le32 reg3;
-	__le32 reg4;
-	__le32 reg5;
-	__le32 reg6;
-	__le32 reg7;
-	__le32 reg8;
-	u8 byte2;
-	u8 byte3;
-	__le16 word0;
-	u8 byte4;
-	u8 byte5;
-	__le16 word1;
-	__le16 word2;
-	__le16 word3;
-	__le32 reg9;
-	__le32 reg10;
-};
-
-struct tstorm_rdma_task_ag_ctx {
-	u8 byte0;
-	u8 byte1;
-	__le16 word0;
-	u8 flags0;
-#define TSTORM_RDMA_TASK_AG_CTX_NIBBLE0_MASK  0xF
-#define TSTORM_RDMA_TASK_AG_CTX_NIBBLE0_SHIFT 0
-#define TSTORM_RDMA_TASK_AG_CTX_BIT0_MASK     0x1
-#define TSTORM_RDMA_TASK_AG_CTX_BIT0_SHIFT    4
-#define TSTORM_RDMA_TASK_AG_CTX_BIT1_MASK     0x1
-#define TSTORM_RDMA_TASK_AG_CTX_BIT1_SHIFT    5
-#define TSTORM_RDMA_TASK_AG_CTX_BIT2_MASK     0x1
-#define TSTORM_RDMA_TASK_AG_CTX_BIT2_SHIFT    6
-#define TSTORM_RDMA_TASK_AG_CTX_BIT3_MASK     0x1
-#define TSTORM_RDMA_TASK_AG_CTX_BIT3_SHIFT    7
-	u8 flags1;
-#define TSTORM_RDMA_TASK_AG_CTX_BIT4_MASK     0x1
-#define TSTORM_RDMA_TASK_AG_CTX_BIT4_SHIFT    0
-#define TSTORM_RDMA_TASK_AG_CTX_BIT5_MASK     0x1
-#define TSTORM_RDMA_TASK_AG_CTX_BIT5_SHIFT    1
-#define TSTORM_RDMA_TASK_AG_CTX_CF0_MASK      0x3
-#define TSTORM_RDMA_TASK_AG_CTX_CF0_SHIFT     2
-#define TSTORM_RDMA_TASK_AG_CTX_CF1_MASK      0x3
-#define TSTORM_RDMA_TASK_AG_CTX_CF1_SHIFT     4
-#define TSTORM_RDMA_TASK_AG_CTX_CF2_MASK      0x3
-#define TSTORM_RDMA_TASK_AG_CTX_CF2_SHIFT     6
-	u8 flags2;
-#define TSTORM_RDMA_TASK_AG_CTX_CF3_MASK      0x3
-#define TSTORM_RDMA_TASK_AG_CTX_CF3_SHIFT     0
-#define TSTORM_RDMA_TASK_AG_CTX_CF4_MASK      0x3
-#define TSTORM_RDMA_TASK_AG_CTX_CF4_SHIFT     2
-#define TSTORM_RDMA_TASK_AG_CTX_CF5_MASK      0x3
-#define TSTORM_RDMA_TASK_AG_CTX_CF5_SHIFT     4
-#define TSTORM_RDMA_TASK_AG_CTX_CF6_MASK      0x3
-#define TSTORM_RDMA_TASK_AG_CTX_CF6_SHIFT     6
-	u8 flags3;
-#define TSTORM_RDMA_TASK_AG_CTX_CF7_MASK      0x3
-#define TSTORM_RDMA_TASK_AG_CTX_CF7_SHIFT     0
-#define TSTORM_RDMA_TASK_AG_CTX_CF0EN_MASK    0x1
-#define TSTORM_RDMA_TASK_AG_CTX_CF0EN_SHIFT   2
-#define TSTORM_RDMA_TASK_AG_CTX_CF1EN_MASK    0x1
-#define TSTORM_RDMA_TASK_AG_CTX_CF1EN_SHIFT   3
-#define TSTORM_RDMA_TASK_AG_CTX_CF2EN_MASK    0x1
-#define TSTORM_RDMA_TASK_AG_CTX_CF2EN_SHIFT   4
-#define TSTORM_RDMA_TASK_AG_CTX_CF3EN_MASK    0x1
-#define TSTORM_RDMA_TASK_AG_CTX_CF3EN_SHIFT   5
-#define TSTORM_RDMA_TASK_AG_CTX_CF4EN_MASK    0x1
-#define TSTORM_RDMA_TASK_AG_CTX_CF4EN_SHIFT   6
-#define TSTORM_RDMA_TASK_AG_CTX_CF5EN_MASK    0x1
-#define TSTORM_RDMA_TASK_AG_CTX_CF5EN_SHIFT   7
-	u8 flags4;
-#define TSTORM_RDMA_TASK_AG_CTX_CF6EN_MASK    0x1
-#define TSTORM_RDMA_TASK_AG_CTX_CF6EN_SHIFT   0
-#define TSTORM_RDMA_TASK_AG_CTX_CF7EN_MASK    0x1
-#define TSTORM_RDMA_TASK_AG_CTX_CF7EN_SHIFT   1
-#define TSTORM_RDMA_TASK_AG_CTX_RULE0EN_MASK  0x1
-#define TSTORM_RDMA_TASK_AG_CTX_RULE0EN_SHIFT 2
-#define TSTORM_RDMA_TASK_AG_CTX_RULE1EN_MASK  0x1
-#define TSTORM_RDMA_TASK_AG_CTX_RULE1EN_SHIFT 3
-#define TSTORM_RDMA_TASK_AG_CTX_RULE2EN_MASK  0x1
-#define TSTORM_RDMA_TASK_AG_CTX_RULE2EN_SHIFT 4
-#define TSTORM_RDMA_TASK_AG_CTX_RULE3EN_MASK  0x1
-#define TSTORM_RDMA_TASK_AG_CTX_RULE3EN_SHIFT 5
-#define TSTORM_RDMA_TASK_AG_CTX_RULE4EN_MASK  0x1
-#define TSTORM_RDMA_TASK_AG_CTX_RULE4EN_SHIFT 6
-#define TSTORM_RDMA_TASK_AG_CTX_RULE5EN_MASK  0x1
-#define TSTORM_RDMA_TASK_AG_CTX_RULE5EN_SHIFT 7
-	u8 byte2;
-	__le16 word1;
-	__le32 reg0;
-	u8 byte3;
-	u8 byte4;
-	__le16 word2;
-	__le16 word3;
-	__le16 word4;
-	__le32 reg1;
-	__le32 reg2;
-};
-
-struct ustorm_rdma_conn_ag_ctx {
-	u8 reserved;
-	u8 byte1;
-	u8 flags0;
-#define USTORM_RDMA_CONN_AG_CTX_EXIST_IN_QM0_MASK     0x1
-#define USTORM_RDMA_CONN_AG_CTX_EXIST_IN_QM0_SHIFT    0
-#define USTORM_RDMA_CONN_AG_CTX_BIT1_MASK             0x1
-#define USTORM_RDMA_CONN_AG_CTX_BIT1_SHIFT            1
-#define USTORM_RDMA_CONN_AG_CTX_FLUSH_Q0_CF_MASK      0x3
-#define USTORM_RDMA_CONN_AG_CTX_FLUSH_Q0_CF_SHIFT     2
-#define USTORM_RDMA_CONN_AG_CTX_CF1_MASK              0x3
-#define USTORM_RDMA_CONN_AG_CTX_CF1_SHIFT             4
-#define USTORM_RDMA_CONN_AG_CTX_CF2_MASK              0x3
-#define USTORM_RDMA_CONN_AG_CTX_CF2_SHIFT             6
-	u8 flags1;
-#define USTORM_RDMA_CONN_AG_CTX_CF3_MASK              0x3
-#define USTORM_RDMA_CONN_AG_CTX_CF3_SHIFT             0
-#define USTORM_RDMA_CONN_AG_CTX_CQ_ARM_SE_CF_MASK     0x3
-#define USTORM_RDMA_CONN_AG_CTX_CQ_ARM_SE_CF_SHIFT    2
-#define USTORM_RDMA_CONN_AG_CTX_CQ_ARM_CF_MASK        0x3
-#define USTORM_RDMA_CONN_AG_CTX_CQ_ARM_CF_SHIFT       4
-#define USTORM_RDMA_CONN_AG_CTX_CF6_MASK              0x3
-#define USTORM_RDMA_CONN_AG_CTX_CF6_SHIFT             6
-	u8 flags2;
-#define USTORM_RDMA_CONN_AG_CTX_FLUSH_Q0_CF_EN_MASK   0x1
-#define USTORM_RDMA_CONN_AG_CTX_FLUSH_Q0_CF_EN_SHIFT  0
-#define USTORM_RDMA_CONN_AG_CTX_CF1EN_MASK            0x1
-#define USTORM_RDMA_CONN_AG_CTX_CF1EN_SHIFT           1
-#define USTORM_RDMA_CONN_AG_CTX_CF2EN_MASK            0x1
-#define USTORM_RDMA_CONN_AG_CTX_CF2EN_SHIFT           2
-#define USTORM_RDMA_CONN_AG_CTX_CF3EN_MASK            0x1
-#define USTORM_RDMA_CONN_AG_CTX_CF3EN_SHIFT           3
-#define USTORM_RDMA_CONN_AG_CTX_CQ_ARM_SE_CF_EN_MASK  0x1
-#define USTORM_RDMA_CONN_AG_CTX_CQ_ARM_SE_CF_EN_SHIFT 4
-#define USTORM_RDMA_CONN_AG_CTX_CQ_ARM_CF_EN_MASK     0x1
-#define USTORM_RDMA_CONN_AG_CTX_CQ_ARM_CF_EN_SHIFT    5
-#define USTORM_RDMA_CONN_AG_CTX_CF6EN_MASK            0x1
-#define USTORM_RDMA_CONN_AG_CTX_CF6EN_SHIFT           6
-#define USTORM_RDMA_CONN_AG_CTX_CQ_SE_EN_MASK         0x1
-#define USTORM_RDMA_CONN_AG_CTX_CQ_SE_EN_SHIFT        7
-	u8 flags3;
-#define USTORM_RDMA_CONN_AG_CTX_CQ_EN_MASK            0x1
-#define USTORM_RDMA_CONN_AG_CTX_CQ_EN_SHIFT           0
-#define USTORM_RDMA_CONN_AG_CTX_RULE2EN_MASK          0x1
-#define USTORM_RDMA_CONN_AG_CTX_RULE2EN_SHIFT         1
-#define USTORM_RDMA_CONN_AG_CTX_RULE3EN_MASK          0x1
-#define USTORM_RDMA_CONN_AG_CTX_RULE3EN_SHIFT         2
-#define USTORM_RDMA_CONN_AG_CTX_RULE4EN_MASK          0x1
-#define USTORM_RDMA_CONN_AG_CTX_RULE4EN_SHIFT         3
-#define USTORM_RDMA_CONN_AG_CTX_RULE5EN_MASK          0x1
-#define USTORM_RDMA_CONN_AG_CTX_RULE5EN_SHIFT         4
-#define USTORM_RDMA_CONN_AG_CTX_RULE6EN_MASK          0x1
-#define USTORM_RDMA_CONN_AG_CTX_RULE6EN_SHIFT         5
-#define USTORM_RDMA_CONN_AG_CTX_RULE7EN_MASK          0x1
-#define USTORM_RDMA_CONN_AG_CTX_RULE7EN_SHIFT         6
-#define USTORM_RDMA_CONN_AG_CTX_RULE8EN_MASK          0x1
-#define USTORM_RDMA_CONN_AG_CTX_RULE8EN_SHIFT         7
-	u8 byte2;
-	u8 byte3;
-	__le16 conn_dpi;
-	__le16 word1;
-	__le32 cq_cons;
-	__le32 cq_se_prod;
-	__le32 cq_prod;
-	__le32 reg3;
-	__le16 int_timeout;
-	__le16 word3;
-};
-
-struct xstorm_roce_conn_ag_ctx_dq_ext_ld_part {
-	u8 reserved0;
-	u8 state;
+	u8 state;
 	u8 flags0;
 #define XSTORMROCECONNAGCTXDQEXTLDPART_EXIST_IN_QM0_MASK      0x1
 #define XSTORMROCECONNAGCTXDQEXTLDPART_EXIST_IN_QM0_SHIFT     0
@@ -6469,8 +6549,8 @@ struct xstorm_roce_conn_ag_ctx_dq_ext_ld_part {
 #define XSTORMROCECONNAGCTXDQEXTLDPART_BIT11_SHIFT            3
 #define XSTORMROCECONNAGCTXDQEXTLDPART_BIT12_MASK             0x1
 #define XSTORMROCECONNAGCTXDQEXTLDPART_BIT12_SHIFT            4
-#define XSTORMROCECONNAGCTXDQEXTLDPART_BIT13_MASK             0x1
-#define XSTORMROCECONNAGCTXDQEXTLDPART_BIT13_SHIFT            5
+#define XSTORMROCECONNAGCTXDQEXTLDPART_MSTORM_FLUSH_MASK      0x1
+#define XSTORMROCECONNAGCTXDQEXTLDPART_MSTORM_FLUSH_SHIFT     5
 #define XSTORMROCECONNAGCTXDQEXTLDPART_BIT14_MASK             0x1
 #define XSTORMROCECONNAGCTXDQEXTLDPART_BIT14_SHIFT            6
 #define XSTORMROCECONNAGCTXDQEXTLDPART_YSTORM_FLUSH_MASK      0x1
@@ -6647,22 +6727,311 @@ struct xstorm_roce_conn_ag_ctx_dq_ext_ld_part {
 #define XSTORMROCECONNAGCTXDQEXTLDPART_CF23_MASK              0x3
 #define XSTORMROCECONNAGCTXDQEXTLDPART_CF23_SHIFT             6
 	u8 byte2;
-	__le16 physical_q0;
+	__le16 physical_q0;
+	__le16 word1;
+	__le16 word2;
+	__le16 word3;
+	__le16 word4;
+	__le16 word5;
+	__le16 conn_dpi;
+	u8 byte3;
+	u8 byte4;
+	u8 byte5;
+	u8 byte6;
+	__le32 reg0;
+	__le32 reg1;
+	__le32 reg2;
+	__le32 snd_nxt_psn;
+	__le32 reg4;
+};
+
+struct mstorm_rdma_conn_ag_ctx {
+	u8 byte0;
+	u8 byte1;
+	u8 flags0;
+#define MSTORM_RDMA_CONN_AG_CTX_BIT0_MASK     0x1
+#define MSTORM_RDMA_CONN_AG_CTX_BIT0_SHIFT    0
+#define MSTORM_RDMA_CONN_AG_CTX_BIT1_MASK     0x1
+#define MSTORM_RDMA_CONN_AG_CTX_BIT1_SHIFT    1
+#define MSTORM_RDMA_CONN_AG_CTX_CF0_MASK      0x3
+#define MSTORM_RDMA_CONN_AG_CTX_CF0_SHIFT     2
+#define MSTORM_RDMA_CONN_AG_CTX_CF1_MASK      0x3
+#define MSTORM_RDMA_CONN_AG_CTX_CF1_SHIFT     4
+#define MSTORM_RDMA_CONN_AG_CTX_CF2_MASK      0x3
+#define MSTORM_RDMA_CONN_AG_CTX_CF2_SHIFT     6
+	u8 flags1;
+#define MSTORM_RDMA_CONN_AG_CTX_CF0EN_MASK    0x1
+#define MSTORM_RDMA_CONN_AG_CTX_CF0EN_SHIFT   0
+#define MSTORM_RDMA_CONN_AG_CTX_CF1EN_MASK    0x1
+#define MSTORM_RDMA_CONN_AG_CTX_CF1EN_SHIFT   1
+#define MSTORM_RDMA_CONN_AG_CTX_CF2EN_MASK    0x1
+#define MSTORM_RDMA_CONN_AG_CTX_CF2EN_SHIFT   2
+#define MSTORM_RDMA_CONN_AG_CTX_RULE0EN_MASK  0x1
+#define MSTORM_RDMA_CONN_AG_CTX_RULE0EN_SHIFT 3
+#define MSTORM_RDMA_CONN_AG_CTX_RULE1EN_MASK  0x1
+#define MSTORM_RDMA_CONN_AG_CTX_RULE1EN_SHIFT 4
+#define MSTORM_RDMA_CONN_AG_CTX_RULE2EN_MASK  0x1
+#define MSTORM_RDMA_CONN_AG_CTX_RULE2EN_SHIFT 5
+#define MSTORM_RDMA_CONN_AG_CTX_RULE3EN_MASK  0x1
+#define MSTORM_RDMA_CONN_AG_CTX_RULE3EN_SHIFT 6
+#define MSTORM_RDMA_CONN_AG_CTX_RULE4EN_MASK  0x1
+#define MSTORM_RDMA_CONN_AG_CTX_RULE4EN_SHIFT 7
+	__le16 word0;
+	__le16 word1;
+	__le32 reg0;
+	__le32 reg1;
+};
+
+struct tstorm_rdma_conn_ag_ctx {
+	u8 reserved0;
+	u8 byte1;
+	u8 flags0;
+#define TSTORM_RDMA_CONN_AG_CTX_EXIST_IN_QM0_MASK          0x1
+#define TSTORM_RDMA_CONN_AG_CTX_EXIST_IN_QM0_SHIFT         0
+#define TSTORM_RDMA_CONN_AG_CTX_BIT1_MASK                  0x1
+#define TSTORM_RDMA_CONN_AG_CTX_BIT1_SHIFT                 1
+#define TSTORM_RDMA_CONN_AG_CTX_BIT2_MASK                  0x1
+#define TSTORM_RDMA_CONN_AG_CTX_BIT2_SHIFT                 2
+#define TSTORM_RDMA_CONN_AG_CTX_BIT3_MASK                  0x1
+#define TSTORM_RDMA_CONN_AG_CTX_BIT3_SHIFT                 3
+#define TSTORM_RDMA_CONN_AG_CTX_BIT4_MASK                  0x1
+#define TSTORM_RDMA_CONN_AG_CTX_BIT4_SHIFT                 4
+#define TSTORM_RDMA_CONN_AG_CTX_BIT5_MASK                  0x1
+#define TSTORM_RDMA_CONN_AG_CTX_BIT5_SHIFT                 5
+#define TSTORM_RDMA_CONN_AG_CTX_CF0_MASK                   0x3
+#define TSTORM_RDMA_CONN_AG_CTX_CF0_SHIFT                  6
+	u8 flags1;
+#define TSTORM_RDMA_CONN_AG_CTX_CF1_MASK                   0x3
+#define TSTORM_RDMA_CONN_AG_CTX_CF1_SHIFT                  0
+#define TSTORM_RDMA_CONN_AG_CTX_CF2_MASK                   0x3
+#define TSTORM_RDMA_CONN_AG_CTX_CF2_SHIFT                  2
+#define TSTORM_RDMA_CONN_AG_CTX_TIMER_STOP_ALL_CF_MASK     0x3
+#define TSTORM_RDMA_CONN_AG_CTX_TIMER_STOP_ALL_CF_SHIFT    4
+#define TSTORM_RDMA_CONN_AG_CTX_FLUSH_Q0_CF_MASK           0x3
+#define TSTORM_RDMA_CONN_AG_CTX_FLUSH_Q0_CF_SHIFT          6
+	u8 flags2;
+#define TSTORM_RDMA_CONN_AG_CTX_MSTORM_FLUSH_CF_MASK       0x3
+#define TSTORM_RDMA_CONN_AG_CTX_MSTORM_FLUSH_CF_SHIFT      0
+#define TSTORM_RDMA_CONN_AG_CTX_CF6_MASK                   0x3
+#define TSTORM_RDMA_CONN_AG_CTX_CF6_SHIFT                  2
+#define TSTORM_RDMA_CONN_AG_CTX_CF7_MASK                   0x3
+#define TSTORM_RDMA_CONN_AG_CTX_CF7_SHIFT                  4
+#define TSTORM_RDMA_CONN_AG_CTX_CF8_MASK                   0x3
+#define TSTORM_RDMA_CONN_AG_CTX_CF8_SHIFT                  6
+	u8 flags3;
+#define TSTORM_RDMA_CONN_AG_CTX_CF9_MASK                   0x3
+#define TSTORM_RDMA_CONN_AG_CTX_CF9_SHIFT                  0
+#define TSTORM_RDMA_CONN_AG_CTX_CF10_MASK                  0x3
+#define TSTORM_RDMA_CONN_AG_CTX_CF10_SHIFT                 2
+#define TSTORM_RDMA_CONN_AG_CTX_CF0EN_MASK                 0x1
+#define TSTORM_RDMA_CONN_AG_CTX_CF0EN_SHIFT                4
+#define TSTORM_RDMA_CONN_AG_CTX_CF1EN_MASK                 0x1
+#define TSTORM_RDMA_CONN_AG_CTX_CF1EN_SHIFT                5
+#define TSTORM_RDMA_CONN_AG_CTX_CF2EN_MASK                 0x1
+#define TSTORM_RDMA_CONN_AG_CTX_CF2EN_SHIFT                6
+#define TSTORM_RDMA_CONN_AG_CTX_TIMER_STOP_ALL_CF_EN_MASK  0x1
+#define TSTORM_RDMA_CONN_AG_CTX_TIMER_STOP_ALL_CF_EN_SHIFT 7
+	u8 flags4;
+#define TSTORM_RDMA_CONN_AG_CTX_FLUSH_Q0_CF_EN_MASK        0x1
+#define TSTORM_RDMA_CONN_AG_CTX_FLUSH_Q0_CF_EN_SHIFT       0
+#define TSTORM_RDMA_CONN_AG_CTX_MSTORM_FLUSH_CF_EN_MASK    0x1
+#define TSTORM_RDMA_CONN_AG_CTX_MSTORM_FLUSH_CF_EN_SHIFT   1
+#define TSTORM_RDMA_CONN_AG_CTX_CF6EN_MASK                 0x1
+#define TSTORM_RDMA_CONN_AG_CTX_CF6EN_SHIFT                2
+#define TSTORM_RDMA_CONN_AG_CTX_CF7EN_MASK                 0x1
+#define TSTORM_RDMA_CONN_AG_CTX_CF7EN_SHIFT                3
+#define TSTORM_RDMA_CONN_AG_CTX_CF8EN_MASK                 0x1
+#define TSTORM_RDMA_CONN_AG_CTX_CF8EN_SHIFT                4
+#define TSTORM_RDMA_CONN_AG_CTX_CF9EN_MASK                 0x1
+#define TSTORM_RDMA_CONN_AG_CTX_CF9EN_SHIFT                5
+#define TSTORM_RDMA_CONN_AG_CTX_CF10EN_MASK                0x1
+#define TSTORM_RDMA_CONN_AG_CTX_CF10EN_SHIFT               6
+#define TSTORM_RDMA_CONN_AG_CTX_RULE0EN_MASK               0x1
+#define TSTORM_RDMA_CONN_AG_CTX_RULE0EN_SHIFT              7
+	u8 flags5;
+#define TSTORM_RDMA_CONN_AG_CTX_RULE1EN_MASK               0x1
+#define TSTORM_RDMA_CONN_AG_CTX_RULE1EN_SHIFT              0
+#define TSTORM_RDMA_CONN_AG_CTX_RULE2EN_MASK               0x1
+#define TSTORM_RDMA_CONN_AG_CTX_RULE2EN_SHIFT              1
+#define TSTORM_RDMA_CONN_AG_CTX_RULE3EN_MASK               0x1
+#define TSTORM_RDMA_CONN_AG_CTX_RULE3EN_SHIFT              2
+#define TSTORM_RDMA_CONN_AG_CTX_RULE4EN_MASK               0x1
+#define TSTORM_RDMA_CONN_AG_CTX_RULE4EN_SHIFT              3
+#define TSTORM_RDMA_CONN_AG_CTX_RULE5EN_MASK               0x1
+#define TSTORM_RDMA_CONN_AG_CTX_RULE5EN_SHIFT              4
+#define TSTORM_RDMA_CONN_AG_CTX_RULE6EN_MASK               0x1
+#define TSTORM_RDMA_CONN_AG_CTX_RULE6EN_SHIFT              5
+#define TSTORM_RDMA_CONN_AG_CTX_RULE7EN_MASK               0x1
+#define TSTORM_RDMA_CONN_AG_CTX_RULE7EN_SHIFT              6
+#define TSTORM_RDMA_CONN_AG_CTX_RULE8EN_MASK               0x1
+#define TSTORM_RDMA_CONN_AG_CTX_RULE8EN_SHIFT              7
+	__le32 reg0;
+	__le32 reg1;
+	__le32 reg2;
+	__le32 reg3;
+	__le32 reg4;
+	__le32 reg5;
+	__le32 reg6;
+	__le32 reg7;
+	__le32 reg8;
+	u8 byte2;
+	u8 byte3;
+	__le16 word0;
+	u8 byte4;
+	u8 byte5;
+	__le16 word1;
+	__le16 word2;
+	__le16 word3;
+	__le32 reg9;
+	__le32 reg10;
+};
+
+struct tstorm_rdma_task_ag_ctx {
+	u8 byte0;
+	u8 byte1;
+	__le16 word0;
+	u8 flags0;
+#define TSTORM_RDMA_TASK_AG_CTX_NIBBLE0_MASK  0xF
+#define TSTORM_RDMA_TASK_AG_CTX_NIBBLE0_SHIFT 0
+#define TSTORM_RDMA_TASK_AG_CTX_BIT0_MASK     0x1
+#define TSTORM_RDMA_TASK_AG_CTX_BIT0_SHIFT    4
+#define TSTORM_RDMA_TASK_AG_CTX_BIT1_MASK     0x1
+#define TSTORM_RDMA_TASK_AG_CTX_BIT1_SHIFT    5
+#define TSTORM_RDMA_TASK_AG_CTX_BIT2_MASK     0x1
+#define TSTORM_RDMA_TASK_AG_CTX_BIT2_SHIFT    6
+#define TSTORM_RDMA_TASK_AG_CTX_BIT3_MASK     0x1
+#define TSTORM_RDMA_TASK_AG_CTX_BIT3_SHIFT    7
+	u8 flags1;
+#define TSTORM_RDMA_TASK_AG_CTX_BIT4_MASK     0x1
+#define TSTORM_RDMA_TASK_AG_CTX_BIT4_SHIFT    0
+#define TSTORM_RDMA_TASK_AG_CTX_BIT5_MASK     0x1
+#define TSTORM_RDMA_TASK_AG_CTX_BIT5_SHIFT    1
+#define TSTORM_RDMA_TASK_AG_CTX_CF0_MASK      0x3
+#define TSTORM_RDMA_TASK_AG_CTX_CF0_SHIFT     2
+#define TSTORM_RDMA_TASK_AG_CTX_CF1_MASK      0x3
+#define TSTORM_RDMA_TASK_AG_CTX_CF1_SHIFT     4
+#define TSTORM_RDMA_TASK_AG_CTX_CF2_MASK      0x3
+#define TSTORM_RDMA_TASK_AG_CTX_CF2_SHIFT     6
+	u8 flags2;
+#define TSTORM_RDMA_TASK_AG_CTX_CF3_MASK      0x3
+#define TSTORM_RDMA_TASK_AG_CTX_CF3_SHIFT     0
+#define TSTORM_RDMA_TASK_AG_CTX_CF4_MASK      0x3
+#define TSTORM_RDMA_TASK_AG_CTX_CF4_SHIFT     2
+#define TSTORM_RDMA_TASK_AG_CTX_CF5_MASK      0x3
+#define TSTORM_RDMA_TASK_AG_CTX_CF5_SHIFT     4
+#define TSTORM_RDMA_TASK_AG_CTX_CF6_MASK      0x3
+#define TSTORM_RDMA_TASK_AG_CTX_CF6_SHIFT     6
+	u8 flags3;
+#define TSTORM_RDMA_TASK_AG_CTX_CF7_MASK      0x3
+#define TSTORM_RDMA_TASK_AG_CTX_CF7_SHIFT     0
+#define TSTORM_RDMA_TASK_AG_CTX_CF0EN_MASK    0x1
+#define TSTORM_RDMA_TASK_AG_CTX_CF0EN_SHIFT   2
+#define TSTORM_RDMA_TASK_AG_CTX_CF1EN_MASK    0x1
+#define TSTORM_RDMA_TASK_AG_CTX_CF1EN_SHIFT   3
+#define TSTORM_RDMA_TASK_AG_CTX_CF2EN_MASK    0x1
+#define TSTORM_RDMA_TASK_AG_CTX_CF2EN_SHIFT   4
+#define TSTORM_RDMA_TASK_AG_CTX_CF3EN_MASK    0x1
+#define TSTORM_RDMA_TASK_AG_CTX_CF3EN_SHIFT   5
+#define TSTORM_RDMA_TASK_AG_CTX_CF4EN_MASK    0x1
+#define TSTORM_RDMA_TASK_AG_CTX_CF4EN_SHIFT   6
+#define TSTORM_RDMA_TASK_AG_CTX_CF5EN_MASK    0x1
+#define TSTORM_RDMA_TASK_AG_CTX_CF5EN_SHIFT   7
+	u8 flags4;
+#define TSTORM_RDMA_TASK_AG_CTX_CF6EN_MASK    0x1
+#define TSTORM_RDMA_TASK_AG_CTX_CF6EN_SHIFT   0
+#define TSTORM_RDMA_TASK_AG_CTX_CF7EN_MASK    0x1
+#define TSTORM_RDMA_TASK_AG_CTX_CF7EN_SHIFT   1
+#define TSTORM_RDMA_TASK_AG_CTX_RULE0EN_MASK  0x1
+#define TSTORM_RDMA_TASK_AG_CTX_RULE0EN_SHIFT 2
+#define TSTORM_RDMA_TASK_AG_CTX_RULE1EN_MASK  0x1
+#define TSTORM_RDMA_TASK_AG_CTX_RULE1EN_SHIFT 3
+#define TSTORM_RDMA_TASK_AG_CTX_RULE2EN_MASK  0x1
+#define TSTORM_RDMA_TASK_AG_CTX_RULE2EN_SHIFT 4
+#define TSTORM_RDMA_TASK_AG_CTX_RULE3EN_MASK  0x1
+#define TSTORM_RDMA_TASK_AG_CTX_RULE3EN_SHIFT 5
+#define TSTORM_RDMA_TASK_AG_CTX_RULE4EN_MASK  0x1
+#define TSTORM_RDMA_TASK_AG_CTX_RULE4EN_SHIFT 6
+#define TSTORM_RDMA_TASK_AG_CTX_RULE5EN_MASK  0x1
+#define TSTORM_RDMA_TASK_AG_CTX_RULE5EN_SHIFT 7
+	u8 byte2;
+	__le16 word1;
+	__le32 reg0;
+	u8 byte3;
+	u8 byte4;
+	__le16 word2;
+	__le16 word3;
+	__le16 word4;
+	__le32 reg1;
+	__le32 reg2;
+};
+
+struct ustorm_rdma_conn_ag_ctx {
+	u8 reserved;
+	u8 byte1;
+	u8 flags0;
+#define USTORM_RDMA_CONN_AG_CTX_EXIST_IN_QM0_MASK     0x1
+#define USTORM_RDMA_CONN_AG_CTX_EXIST_IN_QM0_SHIFT    0
+#define USTORM_RDMA_CONN_AG_CTX_BIT1_MASK             0x1
+#define USTORM_RDMA_CONN_AG_CTX_BIT1_SHIFT            1
+#define USTORM_RDMA_CONN_AG_CTX_FLUSH_Q0_CF_MASK      0x3
+#define USTORM_RDMA_CONN_AG_CTX_FLUSH_Q0_CF_SHIFT     2
+#define USTORM_RDMA_CONN_AG_CTX_CF1_MASK              0x3
+#define USTORM_RDMA_CONN_AG_CTX_CF1_SHIFT             4
+#define USTORM_RDMA_CONN_AG_CTX_CF2_MASK              0x3
+#define USTORM_RDMA_CONN_AG_CTX_CF2_SHIFT             6
+	u8 flags1;
+#define USTORM_RDMA_CONN_AG_CTX_CF3_MASK              0x3
+#define USTORM_RDMA_CONN_AG_CTX_CF3_SHIFT             0
+#define USTORM_RDMA_CONN_AG_CTX_CQ_ARM_SE_CF_MASK     0x3
+#define USTORM_RDMA_CONN_AG_CTX_CQ_ARM_SE_CF_SHIFT    2
+#define USTORM_RDMA_CONN_AG_CTX_CQ_ARM_CF_MASK        0x3
+#define USTORM_RDMA_CONN_AG_CTX_CQ_ARM_CF_SHIFT       4
+#define USTORM_RDMA_CONN_AG_CTX_CF6_MASK              0x3
+#define USTORM_RDMA_CONN_AG_CTX_CF6_SHIFT             6
+	u8 flags2;
+#define USTORM_RDMA_CONN_AG_CTX_FLUSH_Q0_CF_EN_MASK   0x1
+#define USTORM_RDMA_CONN_AG_CTX_FLUSH_Q0_CF_EN_SHIFT  0
+#define USTORM_RDMA_CONN_AG_CTX_CF1EN_MASK            0x1
+#define USTORM_RDMA_CONN_AG_CTX_CF1EN_SHIFT           1
+#define USTORM_RDMA_CONN_AG_CTX_CF2EN_MASK            0x1
+#define USTORM_RDMA_CONN_AG_CTX_CF2EN_SHIFT           2
+#define USTORM_RDMA_CONN_AG_CTX_CF3EN_MASK            0x1
+#define USTORM_RDMA_CONN_AG_CTX_CF3EN_SHIFT           3
+#define USTORM_RDMA_CONN_AG_CTX_CQ_ARM_SE_CF_EN_MASK  0x1
+#define USTORM_RDMA_CONN_AG_CTX_CQ_ARM_SE_CF_EN_SHIFT 4
+#define USTORM_RDMA_CONN_AG_CTX_CQ_ARM_CF_EN_MASK     0x1
+#define USTORM_RDMA_CONN_AG_CTX_CQ_ARM_CF_EN_SHIFT    5
+#define USTORM_RDMA_CONN_AG_CTX_CF6EN_MASK            0x1
+#define USTORM_RDMA_CONN_AG_CTX_CF6EN_SHIFT           6
+#define USTORM_RDMA_CONN_AG_CTX_CQ_SE_EN_MASK         0x1
+#define USTORM_RDMA_CONN_AG_CTX_CQ_SE_EN_SHIFT        7
+	u8 flags3;
+#define USTORM_RDMA_CONN_AG_CTX_CQ_EN_MASK            0x1
+#define USTORM_RDMA_CONN_AG_CTX_CQ_EN_SHIFT           0
+#define USTORM_RDMA_CONN_AG_CTX_RULE2EN_MASK          0x1
+#define USTORM_RDMA_CONN_AG_CTX_RULE2EN_SHIFT         1
+#define USTORM_RDMA_CONN_AG_CTX_RULE3EN_MASK          0x1
+#define USTORM_RDMA_CONN_AG_CTX_RULE3EN_SHIFT         2
+#define USTORM_RDMA_CONN_AG_CTX_RULE4EN_MASK          0x1
+#define USTORM_RDMA_CONN_AG_CTX_RULE4EN_SHIFT         3
+#define USTORM_RDMA_CONN_AG_CTX_RULE5EN_MASK          0x1
+#define USTORM_RDMA_CONN_AG_CTX_RULE5EN_SHIFT         4
+#define USTORM_RDMA_CONN_AG_CTX_RULE6EN_MASK          0x1
+#define USTORM_RDMA_CONN_AG_CTX_RULE6EN_SHIFT         5
+#define USTORM_RDMA_CONN_AG_CTX_RULE7EN_MASK          0x1
+#define USTORM_RDMA_CONN_AG_CTX_RULE7EN_SHIFT         6
+#define USTORM_RDMA_CONN_AG_CTX_RULE8EN_MASK          0x1
+#define USTORM_RDMA_CONN_AG_CTX_RULE8EN_SHIFT         7
+	u8 byte2;
+	u8 byte3;
+	__le16 conn_dpi;
 	__le16 word1;
-	__le16 word2;
+	__le32 cq_cons;
+	__le32 cq_se_prod;
+	__le32 cq_prod;
+	__le32 reg3;
+	__le16 int_timeout;
 	__le16 word3;
-	__le16 word4;
-	__le16 word5;
-	__le16 conn_dpi;
-	u8 byte3;
-	u8 byte4;
-	u8 byte5;
-	u8 byte6;
-	__le32 reg0;
-	__le32 reg1;
-	__le32 reg2;
-	__le32 snd_nxt_psn;
-	__le32 reg4;
 };
 
 struct xstorm_rdma_conn_ag_ctx {
@@ -6696,8 +7065,8 @@ struct xstorm_rdma_conn_ag_ctx {
 #define XSTORM_RDMA_CONN_AG_CTX_BIT11_SHIFT            3
 #define XSTORM_RDMA_CONN_AG_CTX_BIT12_MASK             0x1
 #define XSTORM_RDMA_CONN_AG_CTX_BIT12_SHIFT            4
-#define XSTORM_RDMA_CONN_AG_CTX_BIT13_MASK             0x1
-#define XSTORM_RDMA_CONN_AG_CTX_BIT13_SHIFT            5
+#define XSTORM_RDMA_CONN_AG_CTX_MSTORM_FLUSH_MASK      0x1
+#define XSTORM_RDMA_CONN_AG_CTX_MSTORM_FLUSH_SHIFT     5
 #define XSTORM_RDMA_CONN_AG_CTX_BIT14_MASK             0x1
 #define XSTORM_RDMA_CONN_AG_CTX_BIT14_SHIFT            6
 #define XSTORM_RDMA_CONN_AG_CTX_YSTORM_FLUSH_MASK      0x1
@@ -7093,16 +7462,35 @@ struct roce_destroy_qp_resp_ramrod_data {
 	struct regpair output_params_addr;
 };
 
+struct roce_events_stats {
+	__le16 silent_drops;
+	__le16 rnr_naks_sent;
+	__le32 retransmit_count;
+	__le32 icrc_error_count;
+	__le32 reserved;
+};
+
 enum roce_event_opcode {
 	ROCE_EVENT_CREATE_QP = 11,
 	ROCE_EVENT_MODIFY_QP,
 	ROCE_EVENT_QUERY_QP,
 	ROCE_EVENT_DESTROY_QP,
+	ROCE_EVENT_CREATE_UD_QP,
+	ROCE_EVENT_DESTROY_UD_QP,
 	MAX_ROCE_EVENT_OPCODE
 };
 
+struct roce_init_func_params {
+	u8 ll2_queue_id;
+	u8 cnp_vlan_priority;
+	u8 cnp_dscp;
+	u8 reserved;
+	__le32 cnp_send_timeout;
+};
+
 struct roce_init_func_ramrod_data {
 	struct rdma_init_func_ramrod_data rdma;
+	struct roce_init_func_params roce;
 };
 
 struct roce_modify_qp_req_ramrod_data {
@@ -7222,6 +7610,8 @@ enum roce_ramrod_cmd_id {
 	ROCE_RAMROD_MODIFY_QP,
 	ROCE_RAMROD_QUERY_QP,
 	ROCE_RAMROD_DESTROY_QP,
+	ROCE_RAMROD_CREATE_UD_QP,
+	ROCE_RAMROD_DESTROY_UD_QP,
 	MAX_ROCE_RAMROD_CMD_ID
 };
 
@@ -7299,13 +7689,6 @@ struct mstorm_roce_resp_conn_ag_ctx {
 	__le32 reg1;
 };
 
-enum roce_flavor {
-	PLAIN_ROCE /* RoCE v1 */ ,
-	RROCE_IPV4 /* RoCE v2 (Routable RoCE) over ipv4 */ ,
-	RROCE_IPV6 /* RoCE v2 (Routable RoCE) over ipv6 */ ,
-	MAX_ROCE_FLAVOR
-};
-
 struct tstorm_roce_req_conn_ag_ctx {
 	u8 reserved0;
 	u8 state;
@@ -7416,8 +7799,8 @@ struct tstorm_roce_resp_conn_ag_ctx {
 	u8 flags0;
 #define TSTORM_ROCE_RESP_CONN_AG_CTX_EXIST_IN_QM0_MASK        0x1
 #define TSTORM_ROCE_RESP_CONN_AG_CTX_EXIST_IN_QM0_SHIFT       0
-#define TSTORM_ROCE_RESP_CONN_AG_CTX_BIT1_MASK                0x1
-#define TSTORM_ROCE_RESP_CONN_AG_CTX_BIT1_SHIFT               1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_RX_ERROR_NOTIFY_REQUESTER_MASK  0x1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_RX_ERROR_NOTIFY_REQUESTER_SHIFT 1
 #define TSTORM_ROCE_RESP_CONN_AG_CTX_BIT2_MASK                0x1
 #define TSTORM_ROCE_RESP_CONN_AG_CTX_BIT2_SHIFT               2
 #define TSTORM_ROCE_RESP_CONN_AG_CTX_BIT3_MASK                0x1
@@ -8097,7 +8480,7 @@ struct xstorm_roce_resp_conn_ag_ctx {
 	__le16 irq_prod;
 	__le16 word3;
 	__le16 word4;
-	__le16 word5;
+	__le16 ereserved1;
 	__le16 irq_cons;
 	u8 rxmit_opcode;
 	u8 byte4;
@@ -8200,6 +8583,812 @@ struct ystorm_roce_resp_conn_ag_ctx {
 	__le32 reg3;
 };
 
+enum roce_flavor {
+	PLAIN_ROCE,
+	RROCE_IPV4,
+	RROCE_IPV6,
+	MAX_ROCE_FLAVOR
+};
+
+struct ystorm_iwarp_conn_st_ctx {
+	__le32 reserved[4];
+};
+
+struct pstorm_iwarp_conn_st_ctx {
+	__le32 reserved[36];
+};
+
+struct xstorm_iwarp_conn_st_ctx {
+	__le32 reserved[44];
+};
+
+struct xstorm_iwarp_conn_ag_ctx {
+	u8 reserved0;
+	u8 state;
+	u8 flags0;
+#define XSTORM_IWARP_CONN_AG_CTX_EXIST_IN_QM0_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_EXIST_IN_QM0_SHIFT	0
+#define XSTORM_IWARP_CONN_AG_CTX_EXIST_IN_QM1_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_EXIST_IN_QM1_SHIFT	1
+#define XSTORM_IWARP_CONN_AG_CTX_EXIST_IN_QM2_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_EXIST_IN_QM2_SHIFT	2
+#define XSTORM_IWARP_CONN_AG_CTX_EXIST_IN_QM3_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_EXIST_IN_QM3_SHIFT	3
+#define XSTORM_IWARP_CONN_AG_CTX_BIT4_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_BIT4_SHIFT	4
+#define XSTORM_IWARP_CONN_AG_CTX_RESERVED2_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_RESERVED2_SHIFT	5
+#define XSTORM_IWARP_CONN_AG_CTX_BIT6_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_BIT6_SHIFT	6
+#define XSTORM_IWARP_CONN_AG_CTX_BIT7_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_BIT7_SHIFT	7
+	u8 flags1;
+#define XSTORM_IWARP_CONN_AG_CTX_BIT8_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_BIT8_SHIFT	0
+#define XSTORM_IWARP_CONN_AG_CTX_BIT9_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_BIT9_SHIFT	1
+#define XSTORM_IWARP_CONN_AG_CTX_BIT10_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_BIT10_SHIFT	2
+#define XSTORM_IWARP_CONN_AG_CTX_BIT11_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_BIT11_SHIFT	3
+#define XSTORM_IWARP_CONN_AG_CTX_BIT12_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_BIT12_SHIFT	4
+#define XSTORM_IWARP_CONN_AG_CTX_BIT13_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_BIT13_SHIFT	5
+#define XSTORM_IWARP_CONN_AG_CTX_BIT14_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_BIT14_SHIFT	6
+#define XSTORM_IWARP_CONN_AG_CTX_YSTORM_FLUSH_OR_REWIND_SND_MAX_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_YSTORM_FLUSH_OR_REWIND_SND_MAX_SHIFT	7
+	u8 flags2;
+#define XSTORM_IWARP_CONN_AG_CTX_CF0_MASK	0x3
+#define XSTORM_IWARP_CONN_AG_CTX_CF0_SHIFT	0
+#define XSTORM_IWARP_CONN_AG_CTX_CF1_MASK	0x3
+#define XSTORM_IWARP_CONN_AG_CTX_CF1_SHIFT	2
+#define XSTORM_IWARP_CONN_AG_CTX_CF2_MASK	0x3
+#define XSTORM_IWARP_CONN_AG_CTX_CF2_SHIFT	4
+#define XSTORM_IWARP_CONN_AG_CTX_TIMER_STOP_ALL_MASK	0x3
+#define XSTORM_IWARP_CONN_AG_CTX_TIMER_STOP_ALL_SHIFT	6
+	u8 flags3;
+#define XSTORM_IWARP_CONN_AG_CTX_CF4_MASK	0x3
+#define XSTORM_IWARP_CONN_AG_CTX_CF4_SHIFT	0
+#define XSTORM_IWARP_CONN_AG_CTX_CF5_MASK	0x3
+#define XSTORM_IWARP_CONN_AG_CTX_CF5_SHIFT	2
+#define XSTORM_IWARP_CONN_AG_CTX_CF6_MASK	0x3
+#define XSTORM_IWARP_CONN_AG_CTX_CF6_SHIFT	4
+#define XSTORM_IWARP_CONN_AG_CTX_CF7_MASK	0x3
+#define XSTORM_IWARP_CONN_AG_CTX_CF7_SHIFT	6
+	u8 flags4;
+#define XSTORM_IWARP_CONN_AG_CTX_CF8_MASK	0x3
+#define XSTORM_IWARP_CONN_AG_CTX_CF8_SHIFT	0
+#define XSTORM_IWARP_CONN_AG_CTX_CF9_MASK	0x3
+#define XSTORM_IWARP_CONN_AG_CTX_CF9_SHIFT	2
+#define XSTORM_IWARP_CONN_AG_CTX_CF10_MASK	0x3
+#define XSTORM_IWARP_CONN_AG_CTX_CF10_SHIFT	4
+#define XSTORM_IWARP_CONN_AG_CTX_CF11_MASK	0x3
+#define XSTORM_IWARP_CONN_AG_CTX_CF11_SHIFT	6
+	u8 flags5;
+#define XSTORM_IWARP_CONN_AG_CTX_CF12_MASK	0x3
+#define XSTORM_IWARP_CONN_AG_CTX_CF12_SHIFT	0
+#define XSTORM_IWARP_CONN_AG_CTX_CF13_MASK	0x3
+#define XSTORM_IWARP_CONN_AG_CTX_CF13_SHIFT	2
+#define XSTORM_IWARP_CONN_AG_CTX_SQ_FLUSH_CF_MASK	0x3
+#define XSTORM_IWARP_CONN_AG_CTX_SQ_FLUSH_CF_SHIFT	4
+#define XSTORM_IWARP_CONN_AG_CTX_CF15_MASK	0x3
+#define XSTORM_IWARP_CONN_AG_CTX_CF15_SHIFT	6
+	u8 flags6;
+#define XSTORM_IWARP_CONN_AG_CTX_MPA_OR_ERROR_WAKEUP_TRIGGER_CF_MASK	0x3
+#define XSTORM_IWARP_CONN_AG_CTX_MPA_OR_ERROR_WAKEUP_TRIGGER_CF_SHIFT	0
+#define XSTORM_IWARP_CONN_AG_CTX_CF17_MASK	0x3
+#define XSTORM_IWARP_CONN_AG_CTX_CF17_SHIFT	2
+#define XSTORM_IWARP_CONN_AG_CTX_CF18_MASK	0x3
+#define XSTORM_IWARP_CONN_AG_CTX_CF18_SHIFT	4
+#define XSTORM_IWARP_CONN_AG_CTX_DQ_FLUSH_MASK	0x3
+#define XSTORM_IWARP_CONN_AG_CTX_DQ_FLUSH_SHIFT	6
+	u8 flags7;
+#define XSTORM_IWARP_CONN_AG_CTX_FLUSH_Q0_MASK	0x3
+#define XSTORM_IWARP_CONN_AG_CTX_FLUSH_Q0_SHIFT	0
+#define XSTORM_IWARP_CONN_AG_CTX_FLUSH_Q1_MASK	0x3
+#define XSTORM_IWARP_CONN_AG_CTX_FLUSH_Q1_SHIFT	2
+#define XSTORM_IWARP_CONN_AG_CTX_SLOW_PATH_MASK	0x3
+#define XSTORM_IWARP_CONN_AG_CTX_SLOW_PATH_SHIFT	4
+#define XSTORM_IWARP_CONN_AG_CTX_CF0EN_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_CF0EN_SHIFT	6
+#define XSTORM_IWARP_CONN_AG_CTX_CF1EN_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_CF1EN_SHIFT	7
+	u8 flags8;
+#define XSTORM_IWARP_CONN_AG_CTX_CF2EN_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_CF2EN_SHIFT	0
+#define XSTORM_IWARP_CONN_AG_CTX_TIMER_STOP_ALL_EN_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_TIMER_STOP_ALL_EN_SHIFT	1
+#define XSTORM_IWARP_CONN_AG_CTX_CF4EN_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_CF4EN_SHIFT	2
+#define XSTORM_IWARP_CONN_AG_CTX_CF5EN_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_CF5EN_SHIFT	3
+#define XSTORM_IWARP_CONN_AG_CTX_CF6EN_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_CF6EN_SHIFT	4
+#define XSTORM_IWARP_CONN_AG_CTX_CF7EN_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_CF7EN_SHIFT	5
+#define XSTORM_IWARP_CONN_AG_CTX_CF8EN_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_CF8EN_SHIFT	6
+#define XSTORM_IWARP_CONN_AG_CTX_CF9EN_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_CF9EN_SHIFT	7
+	u8 flags9;
+#define XSTORM_IWARP_CONN_AG_CTX_CF10EN_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_CF10EN_SHIFT	0
+#define XSTORM_IWARP_CONN_AG_CTX_CF11EN_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_CF11EN_SHIFT	1
+#define XSTORM_IWARP_CONN_AG_CTX_CF12EN_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_CF12EN_SHIFT	2
+#define XSTORM_IWARP_CONN_AG_CTX_CF13EN_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_CF13EN_SHIFT	3
+#define XSTORM_IWARP_CONN_AG_CTX_SQ_FLUSH_CF_EN_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_SQ_FLUSH_CF_EN_SHIFT	4
+#define XSTORM_IWARP_CONN_AG_CTX_CF15EN_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_CF15EN_SHIFT	5
+#define XSTORM_IWARP_CONN_AG_CTX_MPA_OR_ERROR_WAKEUP_TRIGGER_CF_EN_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_MPA_OR_ERROR_WAKEUP_TRIGGER_CF_EN_SHIFT 6
+#define XSTORM_IWARP_CONN_AG_CTX_CF17EN_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_CF17EN_SHIFT	7
+	u8 flags10;
+#define XSTORM_IWARP_CONN_AG_CTX_CF18EN_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_CF18EN_SHIFT	0
+#define XSTORM_IWARP_CONN_AG_CTX_DQ_FLUSH_EN_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_DQ_FLUSH_EN_SHIFT	1
+#define XSTORM_IWARP_CONN_AG_CTX_FLUSH_Q0_EN_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_FLUSH_Q0_EN_SHIFT	2
+#define XSTORM_IWARP_CONN_AG_CTX_FLUSH_Q1_EN_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_FLUSH_Q1_EN_SHIFT	3
+#define XSTORM_IWARP_CONN_AG_CTX_SLOW_PATH_EN_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_SLOW_PATH_EN_SHIFT	4
+#define XSTORM_IWARP_CONN_AG_CTX_CF23EN_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_CF23EN_SHIFT	5
+#define XSTORM_IWARP_CONN_AG_CTX_RULE0EN_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_RULE0EN_SHIFT	6
+#define XSTORM_IWARP_CONN_AG_CTX_MORE_TO_SEND_RULE_EN_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_MORE_TO_SEND_RULE_EN_SHIFT	7
+	u8 flags11;
+#define XSTORM_IWARP_CONN_AG_CTX_TX_BLOCKED_EN_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_TX_BLOCKED_EN_SHIFT	0
+#define XSTORM_IWARP_CONN_AG_CTX_RULE3EN_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_RULE3EN_SHIFT	1
+#define XSTORM_IWARP_CONN_AG_CTX_RESERVED3_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_RESERVED3_SHIFT	2
+#define XSTORM_IWARP_CONN_AG_CTX_RULE5EN_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_RULE5EN_SHIFT	3
+#define XSTORM_IWARP_CONN_AG_CTX_RULE6EN_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_RULE6EN_SHIFT	4
+#define XSTORM_IWARP_CONN_AG_CTX_RULE7EN_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_RULE7EN_SHIFT	5
+#define XSTORM_IWARP_CONN_AG_CTX_A0_RESERVED1_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_A0_RESERVED1_SHIFT	6
+#define XSTORM_IWARP_CONN_AG_CTX_RULE9EN_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_RULE9EN_SHIFT	7
+	u8 flags12;
+#define XSTORM_IWARP_CONN_AG_CTX_SQ_NOT_EMPTY_RULE_EN_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_SQ_NOT_EMPTY_RULE_EN_SHIFT	0
+#define XSTORM_IWARP_CONN_AG_CTX_RULE11EN_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_RULE11EN_SHIFT	1
+#define XSTORM_IWARP_CONN_AG_CTX_A0_RESERVED2_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_A0_RESERVED2_SHIFT	2
+#define XSTORM_IWARP_CONN_AG_CTX_A0_RESERVED3_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_A0_RESERVED3_SHIFT	3
+#define XSTORM_IWARP_CONN_AG_CTX_SQ_FENCE_RULE_EN_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_SQ_FENCE_RULE_EN_SHIFT	4
+#define XSTORM_IWARP_CONN_AG_CTX_RULE15EN_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_RULE15EN_SHIFT	5
+#define XSTORM_IWARP_CONN_AG_CTX_RULE16EN_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_RULE16EN_SHIFT	6
+#define XSTORM_IWARP_CONN_AG_CTX_RULE17EN_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_RULE17EN_SHIFT	7
+	u8 flags13;
+#define XSTORM_IWARP_CONN_AG_CTX_IRQ_NOT_EMPTY_RULE_EN_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_IRQ_NOT_EMPTY_RULE_EN_SHIFT	0
+#define XSTORM_IWARP_CONN_AG_CTX_HQ_NOT_FULL_RULE_EN_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_HQ_NOT_FULL_RULE_EN_SHIFT	1
+#define XSTORM_IWARP_CONN_AG_CTX_ORQ_RD_FENCE_RULE_EN_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_ORQ_RD_FENCE_RULE_EN_SHIFT	2
+#define XSTORM_IWARP_CONN_AG_CTX_RULE21EN_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_RULE21EN_SHIFT	3
+#define XSTORM_IWARP_CONN_AG_CTX_A0_RESERVED6_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_A0_RESERVED6_SHIFT	4
+#define XSTORM_IWARP_CONN_AG_CTX_ORQ_NOT_FULL_RULE_EN_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_ORQ_NOT_FULL_RULE_EN_SHIFT	5
+#define XSTORM_IWARP_CONN_AG_CTX_A0_RESERVED8_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_A0_RESERVED8_SHIFT	6
+#define XSTORM_IWARP_CONN_AG_CTX_A0_RESERVED9_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_A0_RESERVED9_SHIFT	7
+	u8 flags14;
+#define XSTORM_IWARP_CONN_AG_CTX_BIT16_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_BIT16_SHIFT	0
+#define XSTORM_IWARP_CONN_AG_CTX_BIT17_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_BIT17_SHIFT	1
+#define XSTORM_IWARP_CONN_AG_CTX_BIT18_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_BIT18_SHIFT	2
+#define XSTORM_IWARP_CONN_AG_CTX_E5_RESERVED1_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_E5_RESERVED1_SHIFT	3
+#define XSTORM_IWARP_CONN_AG_CTX_E5_RESERVED2_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_E5_RESERVED2_SHIFT	4
+#define XSTORM_IWARP_CONN_AG_CTX_E5_RESERVED3_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_E5_RESERVED3_SHIFT	5
+#define XSTORM_IWARP_CONN_AG_CTX_CF23_MASK	0x3
+#define XSTORM_IWARP_CONN_AG_CTX_CF23_SHIFT	6
+	u8 byte2;
+	__le16 physical_q0;
+	__le16 physical_q1;
+	__le16 sq_comp_cons;
+	__le16 sq_tx_cons;
+	__le16 sq_prod;
+	__le16 word5;
+	__le16 conn_dpi;
+	u8 byte3;
+	u8 byte4;
+	u8 byte5;
+	u8 byte6;
+	__le32 reg0;
+	__le32 reg1;
+	__le32 reg2;
+	__le32 more_to_send_seq;
+	__le32 reg4;
+	__le32 rewinded_snd_max;
+	__le32 rd_msn;
+	__le16 irq_prod_via_msdm;
+	__le16 irq_cons;
+	__le16 hq_cons_th_or_mpa_data;
+	__le16 hq_cons;
+	__le32 atom_msn;
+	__le32 orq_cons;
+	__le32 orq_cons_th;
+	u8 byte7;
+	u8 max_ord;
+	u8 wqe_data_pad_bytes;
+	u8 former_hq_prod;
+	u8 irq_prod_via_msem;
+	u8 byte12;
+	u8 max_pkt_pdu_size_lo;
+	u8 max_pkt_pdu_size_hi;
+	u8 byte15;
+	u8 e5_reserved;
+	__le16 e5_reserved4;
+	__le32 reg10;
+	__le32 reg11;
+	__le32 shared_queue_page_addr_lo;
+	__le32 shared_queue_page_addr_hi;
+	__le32 reg14;
+	__le32 reg15;
+	__le32 reg16;
+	__le32 reg17;
+};
+
+struct tstorm_iwarp_conn_ag_ctx {
+	u8 reserved0;
+	u8 state;
+	u8 flags0;
+#define TSTORM_IWARP_CONN_AG_CTX_EXIST_IN_QM0_MASK	0x1
+#define TSTORM_IWARP_CONN_AG_CTX_EXIST_IN_QM0_SHIFT	0
+#define TSTORM_IWARP_CONN_AG_CTX_BIT1_MASK	0x1
+#define TSTORM_IWARP_CONN_AG_CTX_BIT1_SHIFT	1
+#define TSTORM_IWARP_CONN_AG_CTX_BIT2_MASK	0x1
+#define TSTORM_IWARP_CONN_AG_CTX_BIT2_SHIFT	2
+#define TSTORM_IWARP_CONN_AG_CTX_MSTORM_FLUSH_MASK	0x1
+#define TSTORM_IWARP_CONN_AG_CTX_MSTORM_FLUSH_SHIFT	3
+#define TSTORM_IWARP_CONN_AG_CTX_BIT4_MASK	0x1
+#define TSTORM_IWARP_CONN_AG_CTX_BIT4_SHIFT	4
+#define TSTORM_IWARP_CONN_AG_CTX_CACHED_ORQ_MASK	0x1
+#define TSTORM_IWARP_CONN_AG_CTX_CACHED_ORQ_SHIFT	5
+#define TSTORM_IWARP_CONN_AG_CTX_CF0_MASK	0x3
+#define TSTORM_IWARP_CONN_AG_CTX_CF0_SHIFT	6
+	u8 flags1;
+#define TSTORM_IWARP_CONN_AG_CTX_RQ_POST_CF_MASK	0x3
+#define TSTORM_IWARP_CONN_AG_CTX_RQ_POST_CF_SHIFT	0
+#define TSTORM_IWARP_CONN_AG_CTX_MPA_TIMEOUT_CF_MASK	0x3
+#define TSTORM_IWARP_CONN_AG_CTX_MPA_TIMEOUT_CF_SHIFT	2
+#define TSTORM_IWARP_CONN_AG_CTX_TIMER_STOP_ALL_MASK	0x3
+#define TSTORM_IWARP_CONN_AG_CTX_TIMER_STOP_ALL_SHIFT	4
+#define TSTORM_IWARP_CONN_AG_CTX_CF4_MASK	0x3
+#define TSTORM_IWARP_CONN_AG_CTX_CF4_SHIFT	6
+	u8 flags2;
+#define TSTORM_IWARP_CONN_AG_CTX_CF5_MASK	0x3
+#define TSTORM_IWARP_CONN_AG_CTX_CF5_SHIFT	0
+#define TSTORM_IWARP_CONN_AG_CTX_CF6_MASK	0x3
+#define TSTORM_IWARP_CONN_AG_CTX_CF6_SHIFT	2
+#define TSTORM_IWARP_CONN_AG_CTX_CF7_MASK	0x3
+#define TSTORM_IWARP_CONN_AG_CTX_CF7_SHIFT	4
+#define TSTORM_IWARP_CONN_AG_CTX_CF8_MASK	0x3
+#define TSTORM_IWARP_CONN_AG_CTX_CF8_SHIFT	6
+	u8 flags3;
+#define TSTORM_IWARP_CONN_AG_CTX_FLUSH_Q0_MASK	0x3
+#define TSTORM_IWARP_CONN_AG_CTX_FLUSH_Q0_SHIFT	0
+#define TSTORM_IWARP_CONN_AG_CTX_FLUSH_OR_ERROR_DETECTED_MASK	0x3
+#define TSTORM_IWARP_CONN_AG_CTX_FLUSH_OR_ERROR_DETECTED_SHIFT	2
+#define TSTORM_IWARP_CONN_AG_CTX_CF0EN_MASK	0x1
+#define TSTORM_IWARP_CONN_AG_CTX_CF0EN_SHIFT	4
+#define TSTORM_IWARP_CONN_AG_CTX_RQ_POST_CF_EN_MASK	0x1
+#define TSTORM_IWARP_CONN_AG_CTX_RQ_POST_CF_EN_SHIFT	5
+#define TSTORM_IWARP_CONN_AG_CTX_MPA_TIMEOUT_CF_EN_MASK	0x1
+#define TSTORM_IWARP_CONN_AG_CTX_MPA_TIMEOUT_CF_EN_SHIFT	6
+#define TSTORM_IWARP_CONN_AG_CTX_TIMER_STOP_ALL_EN_MASK	0x1
+#define TSTORM_IWARP_CONN_AG_CTX_TIMER_STOP_ALL_EN_SHIFT	7
+	u8 flags4;
+#define TSTORM_IWARP_CONN_AG_CTX_CF4EN_MASK	0x1
+#define TSTORM_IWARP_CONN_AG_CTX_CF4EN_SHIFT	0
+#define TSTORM_IWARP_CONN_AG_CTX_CF5EN_MASK	0x1
+#define TSTORM_IWARP_CONN_AG_CTX_CF5EN_SHIFT	1
+#define TSTORM_IWARP_CONN_AG_CTX_CF6EN_MASK	0x1
+#define TSTORM_IWARP_CONN_AG_CTX_CF6EN_SHIFT	2
+#define TSTORM_IWARP_CONN_AG_CTX_CF7EN_MASK	0x1
+#define TSTORM_IWARP_CONN_AG_CTX_CF7EN_SHIFT	3
+#define TSTORM_IWARP_CONN_AG_CTX_CF8EN_MASK	0x1
+#define TSTORM_IWARP_CONN_AG_CTX_CF8EN_SHIFT	4
+#define TSTORM_IWARP_CONN_AG_CTX_FLUSH_Q0_EN_MASK	0x1
+#define TSTORM_IWARP_CONN_AG_CTX_FLUSH_Q0_EN_SHIFT	5
+#define TSTORM_IWARP_CONN_AG_CTX_FLUSH_OR_ERROR_DETECTED_EN_MASK	0x1
+#define TSTORM_IWARP_CONN_AG_CTX_FLUSH_OR_ERROR_DETECTED_EN_SHIFT 6
+#define TSTORM_IWARP_CONN_AG_CTX_RULE0EN_MASK	0x1
+#define TSTORM_IWARP_CONN_AG_CTX_RULE0EN_SHIFT	7
+	u8 flags5;
+#define TSTORM_IWARP_CONN_AG_CTX_RULE1EN_MASK	0x1
+#define TSTORM_IWARP_CONN_AG_CTX_RULE1EN_SHIFT	0
+#define TSTORM_IWARP_CONN_AG_CTX_RULE2EN_MASK	0x1
+#define TSTORM_IWARP_CONN_AG_CTX_RULE2EN_SHIFT	1
+#define TSTORM_IWARP_CONN_AG_CTX_RULE3EN_MASK	0x1
+#define TSTORM_IWARP_CONN_AG_CTX_RULE3EN_SHIFT	2
+#define TSTORM_IWARP_CONN_AG_CTX_RULE4EN_MASK	0x1
+#define TSTORM_IWARP_CONN_AG_CTX_RULE4EN_SHIFT	3
+#define TSTORM_IWARP_CONN_AG_CTX_RULE5EN_MASK	0x1
+#define TSTORM_IWARP_CONN_AG_CTX_RULE5EN_SHIFT	4
+#define TSTORM_IWARP_CONN_AG_CTX_SND_SQ_CONS_RULE_MASK	0x1
+#define TSTORM_IWARP_CONN_AG_CTX_SND_SQ_CONS_RULE_SHIFT	5
+#define TSTORM_IWARP_CONN_AG_CTX_RULE7EN_MASK	0x1
+#define TSTORM_IWARP_CONN_AG_CTX_RULE7EN_SHIFT	6
+#define TSTORM_IWARP_CONN_AG_CTX_RULE8EN_MASK	0x1
+#define TSTORM_IWARP_CONN_AG_CTX_RULE8EN_SHIFT	7
+	__le32 reg0;
+	__le32 reg1;
+	__le32 unaligned_nxt_seq;
+	__le32 reg3;
+	__le32 reg4;
+	__le32 reg5;
+	__le32 reg6;
+	__le32 reg7;
+	__le32 reg8;
+	u8 orq_cache_idx;
+	u8 hq_prod;
+	__le16 sq_tx_cons_th;
+	u8 orq_prod;
+	u8 irq_cons;
+	__le16 sq_tx_cons;
+	__le16 conn_dpi;
+	__le16 rq_prod;
+	__le32 snd_seq;
+	__le32 last_hq_sequence;
+};
+
+struct tstorm_iwarp_conn_st_ctx {
+	__le32 reserved[60];
+};
+
+struct mstorm_iwarp_conn_st_ctx {
+	__le32 reserved[32];
+};
+
+struct ustorm_iwarp_conn_st_ctx {
+	__le32 reserved[24];
+};
+
+struct iwarp_conn_context {
+	struct ystorm_iwarp_conn_st_ctx ystorm_st_context;
+	struct regpair ystorm_st_padding[2];
+	struct pstorm_iwarp_conn_st_ctx pstorm_st_context;
+	struct regpair pstorm_st_padding[2];
+	struct xstorm_iwarp_conn_st_ctx xstorm_st_context;
+	struct regpair xstorm_st_padding[2];
+	struct xstorm_iwarp_conn_ag_ctx xstorm_ag_context;
+	struct tstorm_iwarp_conn_ag_ctx tstorm_ag_context;
+	struct timers_context timer_context;
+	struct ustorm_rdma_conn_ag_ctx ustorm_ag_context;
+	struct tstorm_iwarp_conn_st_ctx tstorm_st_context;
+	struct regpair tstorm_st_padding[2];
+	struct mstorm_iwarp_conn_st_ctx mstorm_st_context;
+	struct ustorm_iwarp_conn_st_ctx ustorm_st_context;
+};
+
+struct iwarp_create_qp_ramrod_data {
+	u8 flags;
+#define IWARP_CREATE_QP_RAMROD_DATA_FMR_AND_RESERVED_EN_MASK	0x1
+#define IWARP_CREATE_QP_RAMROD_DATA_FMR_AND_RESERVED_EN_SHIFT 0
+#define IWARP_CREATE_QP_RAMROD_DATA_SIGNALED_COMP_MASK	0x1
+#define IWARP_CREATE_QP_RAMROD_DATA_SIGNALED_COMP_SHIFT	1
+#define IWARP_CREATE_QP_RAMROD_DATA_RDMA_RD_EN_MASK	0x1
+#define IWARP_CREATE_QP_RAMROD_DATA_RDMA_RD_EN_SHIFT	2
+#define IWARP_CREATE_QP_RAMROD_DATA_RDMA_WR_EN_MASK	0x1
+#define IWARP_CREATE_QP_RAMROD_DATA_RDMA_WR_EN_SHIFT	3
+#define IWARP_CREATE_QP_RAMROD_DATA_ATOMIC_EN_MASK	0x1
+#define IWARP_CREATE_QP_RAMROD_DATA_ATOMIC_EN_SHIFT	4
+#define IWARP_CREATE_QP_RAMROD_DATA_SRQ_FLG_MASK	0x1
+#define IWARP_CREATE_QP_RAMROD_DATA_SRQ_FLG_SHIFT	5
+#define IWARP_CREATE_QP_RAMROD_DATA_RESERVED0_MASK	0x3
+#define IWARP_CREATE_QP_RAMROD_DATA_RESERVED0_SHIFT	6
+	u8 reserved1;
+	__le16 pd;
+	__le16 sq_num_pages;
+	__le16 rq_num_pages;
+	__le32 reserved3[2];
+	struct regpair qp_handle_for_cqe;
+	struct rdma_srq_id srq_id;
+	__le32 cq_cid_for_sq;
+	__le32 cq_cid_for_rq;
+	__le16 dpi;
+	__le16 physical_q0;
+	__le16 physical_q1;
+	u8 reserved2[6];
+};
+
+enum iwarp_eqe_async_opcode {
+	IWARP_EVENT_TYPE_ASYNC_CONNECT_COMPLETE,
+	IWARP_EVENT_TYPE_ASYNC_ENHANCED_MPA_REPLY_ARRIVED,
+	IWARP_EVENT_TYPE_ASYNC_MPA_HANDSHAKE_COMPLETE,
+	IWARP_EVENT_TYPE_ASYNC_CID_CLEANED,
+	IWARP_EVENT_TYPE_ASYNC_EXCEPTION_DETECTED,
+	IWARP_EVENT_TYPE_ASYNC_QP_IN_ERROR_STATE,
+	IWARP_EVENT_TYPE_ASYNC_CQ_OVERFLOW,
+	MAX_IWARP_EQE_ASYNC_OPCODE
+};
+
+struct iwarp_eqe_data_mpa_async_completion {
+	__le16 ulp_data_len;
+	u8 reserved[6];
+};
+
+struct iwarp_eqe_data_tcp_async_completion {
+	__le16 ulp_data_len;
+	u8 mpa_handshake_mode;
+	u8 reserved[5];
+};
+
+enum iwarp_eqe_sync_opcode {
+	IWARP_EVENT_TYPE_TCP_OFFLOAD =
+	11,
+	IWARP_EVENT_TYPE_MPA_OFFLOAD,
+	IWARP_EVENT_TYPE_MPA_OFFLOAD_SEND_RTR,
+	IWARP_EVENT_TYPE_CREATE_QP,
+	IWARP_EVENT_TYPE_QUERY_QP,
+	IWARP_EVENT_TYPE_MODIFY_QP,
+	IWARP_EVENT_TYPE_DESTROY_QP,
+	MAX_IWARP_EQE_SYNC_OPCODE
+};
+
+enum iwarp_fw_return_code {
+	IWARP_CONN_ERROR_TCP_CONNECT_INVALID_PACKET = 5,
+	IWARP_CONN_ERROR_TCP_CONNECTION_RST,
+	IWARP_CONN_ERROR_TCP_CONNECT_TIMEOUT,
+	IWARP_CONN_ERROR_MPA_ERROR_REJECT,
+	IWARP_CONN_ERROR_MPA_NOT_SUPPORTED_VER,
+	IWARP_CONN_ERROR_MPA_RST,
+	IWARP_CONN_ERROR_MPA_FIN,
+	IWARP_CONN_ERROR_MPA_RTR_MISMATCH,
+	IWARP_CONN_ERROR_MPA_INSUF_IRD,
+	IWARP_CONN_ERROR_MPA_INVALID_PACKET,
+	IWARP_CONN_ERROR_MPA_LOCAL_ERROR,
+	IWARP_CONN_ERROR_MPA_TIMEOUT,
+	IWARP_CONN_ERROR_MPA_TERMINATE,
+	IWARP_QP_IN_ERROR_GOOD_CLOSE,
+	IWARP_QP_IN_ERROR_BAD_CLOSE,
+	IWARP_EXCEPTION_DETECTED_LLP_CLOSED,
+	IWARP_EXCEPTION_DETECTED_LLP_RESET,
+	IWARP_EXCEPTION_DETECTED_IRQ_FULL,
+	IWARP_EXCEPTION_DETECTED_RQ_EMPTY,
+	IWARP_EXCEPTION_DETECTED_LLP_TIMEOUT,
+	IWARP_EXCEPTION_DETECTED_REMOTE_PROTECTION_ERROR,
+	IWARP_EXCEPTION_DETECTED_CQ_OVERFLOW,
+	IWARP_EXCEPTION_DETECTED_LOCAL_CATASTROPHIC,
+	IWARP_EXCEPTION_DETECTED_LOCAL_ACCESS_ERROR,
+	IWARP_EXCEPTION_DETECTED_REMOTE_OPERATION_ERROR,
+	IWARP_EXCEPTION_DETECTED_TERMINATE_RECEIVED,
+	MAX_IWARP_FW_RETURN_CODE
+};
+
+struct iwarp_init_func_params {
+	u8 ll2_ooo_q_index;
+	u8 reserved1[7];
+};
+
+struct iwarp_init_func_ramrod_data {
+	struct rdma_init_func_ramrod_data rdma;
+	struct tcp_init_params tcp;
+	struct iwarp_init_func_params iwarp;
+};
+
+enum iwarp_modify_qp_new_state_type {
+	IWARP_MODIFY_QP_STATE_CLOSING = 1,
+	IWARP_MODIFY_QP_STATE_ERROR =
+	2,
+	MAX_IWARP_MODIFY_QP_NEW_STATE_TYPE
+};
+
+struct iwarp_modify_qp_ramrod_data {
+	__le16 transition_to_state;
+	__le16 flags;
+#define IWARP_MODIFY_QP_RAMROD_DATA_RDMA_RD_EN_MASK	0x1
+#define IWARP_MODIFY_QP_RAMROD_DATA_RDMA_RD_EN_SHIFT	0
+#define IWARP_MODIFY_QP_RAMROD_DATA_RDMA_WR_EN_MASK	0x1
+#define IWARP_MODIFY_QP_RAMROD_DATA_RDMA_WR_EN_SHIFT	1
+#define IWARP_MODIFY_QP_RAMROD_DATA_ATOMIC_EN_MASK	0x1
+#define IWARP_MODIFY_QP_RAMROD_DATA_ATOMIC_EN_SHIFT	2
+#define IWARP_MODIFY_QP_RAMROD_DATA_STATE_TRANS_EN_MASK	0x1
+#define IWARP_MODIFY_QP_RAMROD_DATA_STATE_TRANS_EN_SHIFT	3
+#define IWARP_MODIFY_QP_RAMROD_DATA_RDMA_OPS_EN_FLG_MASK	0x1
+#define IWARP_MODIFY_QP_RAMROD_DATA_RDMA_OPS_EN_FLG_SHIFT 4
+#define IWARP_MODIFY_QP_RAMROD_DATA_RESERVED_MASK	0x7FF
+#define IWARP_MODIFY_QP_RAMROD_DATA_RESERVED_SHIFT	5
+	__le32 reserved3[3];
+	__le32 reserved4[8];
+};
+
+struct mpa_rq_params {
+	__le32 ird;
+	__le32 ord;
+};
+
+struct mpa_ulp_buffer {
+	struct regpair addr;
+	__le16 len;
+	__le16 reserved[3];
+};
+
+struct mpa_outgoing_params {
+	u8 crc_needed;
+	u8 reject;
+	u8 reserved[6];
+	struct mpa_rq_params out_rq;
+	struct mpa_ulp_buffer outgoing_ulp_buffer;
+};
+
+struct iwarp_mpa_offload_ramrod_data {
+	struct mpa_outgoing_params common;
+	__le32 tcp_cid;
+	u8 mode;
+	u8 tcp_connect_side;
+	u8 rtr_pref;
+#define IWARP_MPA_OFFLOAD_RAMROD_DATA_RTR_SUPPORTED_MASK	0x7
+#define IWARP_MPA_OFFLOAD_RAMROD_DATA_RTR_SUPPORTED_SHIFT 0
+#define IWARP_MPA_OFFLOAD_RAMROD_DATA_RESERVED1_MASK	0x1F
+#define IWARP_MPA_OFFLOAD_RAMROD_DATA_RESERVED1_SHIFT	3
+	u8 reserved2;
+	struct mpa_ulp_buffer incoming_ulp_buffer;
+	struct regpair async_eqe_output_buf;
+	struct regpair handle_for_async;
+	struct regpair shared_queue_addr;
+	u8 stats_counter_id;
+	u8 reserved3[15];
+};
+
+struct iwarp_offload_params {
+	struct mpa_ulp_buffer incoming_ulp_buffer;
+	struct regpair async_eqe_output_buf;
+	struct regpair handle_for_async;
+	__le16 physical_q0;
+	__le16 physical_q1;
+	u8 stats_counter_id;
+	u8 mpa_mode;
+	u8 reserved[10];
+};
+
+struct iwarp_query_qp_output_params {
+	__le32 flags;
+#define IWARP_QUERY_QP_OUTPUT_PARAMS_ERROR_FLG_MASK	0x1
+#define IWARP_QUERY_QP_OUTPUT_PARAMS_ERROR_FLG_SHIFT 0
+#define IWARP_QUERY_QP_OUTPUT_PARAMS_RESERVED0_MASK	0x7FFFFFFF
+#define IWARP_QUERY_QP_OUTPUT_PARAMS_RESERVED0_SHIFT 1
+	u8 reserved1[4];
+};
+
+struct iwarp_query_qp_ramrod_data {
+	struct regpair output_params_addr;
+};
+
+enum iwarp_ramrod_cmd_id {
+	IWARP_RAMROD_CMD_ID_TCP_OFFLOAD =
+	11,
+	IWARP_RAMROD_CMD_ID_MPA_OFFLOAD,
+	IWARP_RAMROD_CMD_ID_MPA_OFFLOAD_SEND_RTR,
+	IWARP_RAMROD_CMD_ID_CREATE_QP,
+	IWARP_RAMROD_CMD_ID_QUERY_QP,
+	IWARP_RAMROD_CMD_ID_MODIFY_QP,
+	IWARP_RAMROD_CMD_ID_DESTROY_QP,
+	MAX_IWARP_RAMROD_CMD_ID
+};
+
+struct iwarp_rxmit_stats_drv {
+	struct regpair tx_go_to_slow_start_event_cnt;
+	struct regpair tx_fast_retransmit_event_cnt;
+};
+
+struct iwarp_tcp_offload_ramrod_data {
+	struct iwarp_offload_params iwarp;
+	struct tcp_offload_params_opt2 tcp;
+};
+
+enum mpa_negotiation_mode {
+	MPA_NEGOTIATION_TYPE_BASIC = 1,
+	MPA_NEGOTIATION_TYPE_ENHANCED = 2,
+	MAX_MPA_NEGOTIATION_MODE
+};
+
+enum mpa_rtr_type {
+	MPA_RTR_TYPE_NONE = 0,
+	MPA_RTR_TYPE_ZERO_SEND = 1,
+	MPA_RTR_TYPE_ZERO_WRITE = 2,
+	MPA_RTR_TYPE_ZERO_SEND_AND_WRITE = 3,
+	MPA_RTR_TYPE_ZERO_READ = 4,
+	MPA_RTR_TYPE_ZERO_SEND_AND_READ = 5,
+	MPA_RTR_TYPE_ZERO_WRITE_AND_READ = 6,
+	MPA_RTR_TYPE_ZERO_SEND_AND_WRITE_AND_READ = 7,
+	MAX_MPA_RTR_TYPE
+};
+
+struct unaligned_opaque_data {
+	__le16 first_mpa_offset;
+	u8 tcp_payload_offset;
+	u8 flags;
+#define UNALIGNED_OPAQUE_DATA_PKT_REACHED_WIN_RIGHT_EDGE_MASK	0x1
+#define UNALIGNED_OPAQUE_DATA_PKT_REACHED_WIN_RIGHT_EDGE_SHIFT 0
+#define UNALIGNED_OPAQUE_DATA_CONNECTION_CLOSED_MASK	0x1
+#define UNALIGNED_OPAQUE_DATA_CONNECTION_CLOSED_SHIFT	1
+#define UNALIGNED_OPAQUE_DATA_RESERVED_MASK	0x3F
+#define UNALIGNED_OPAQUE_DATA_RESERVED_SHIFT	2
+	__le32 cid;
+};
+
+struct mstorm_iwarp_conn_ag_ctx {
+	u8 reserved;
+	u8 state;
+	u8 flags0;
+#define MSTORM_IWARP_CONN_AG_CTX_EXIST_IN_QM0_MASK	0x1
+#define MSTORM_IWARP_CONN_AG_CTX_EXIST_IN_QM0_SHIFT	0
+#define MSTORM_IWARP_CONN_AG_CTX_BIT1_MASK	0x1
+#define MSTORM_IWARP_CONN_AG_CTX_BIT1_SHIFT	1
+#define MSTORM_IWARP_CONN_AG_CTX_INV_STAG_DONE_CF_MASK	0x3
+#define MSTORM_IWARP_CONN_AG_CTX_INV_STAG_DONE_CF_SHIFT	2
+#define MSTORM_IWARP_CONN_AG_CTX_CF1_MASK	0x3
+#define MSTORM_IWARP_CONN_AG_CTX_CF1_SHIFT	4
+#define MSTORM_IWARP_CONN_AG_CTX_CF2_MASK	0x3
+#define MSTORM_IWARP_CONN_AG_CTX_CF2_SHIFT	6
+	u8 flags1;
+#define MSTORM_IWARP_CONN_AG_CTX_INV_STAG_DONE_CF_EN_MASK	0x1
+#define MSTORM_IWARP_CONN_AG_CTX_INV_STAG_DONE_CF_EN_SHIFT 0
+#define MSTORM_IWARP_CONN_AG_CTX_CF1EN_MASK	0x1
+#define MSTORM_IWARP_CONN_AG_CTX_CF1EN_SHIFT	1
+#define MSTORM_IWARP_CONN_AG_CTX_CF2EN_MASK	0x1
+#define MSTORM_IWARP_CONN_AG_CTX_CF2EN_SHIFT	2
+#define MSTORM_IWARP_CONN_AG_CTX_RULE0EN_MASK	0x1
+#define MSTORM_IWARP_CONN_AG_CTX_RULE0EN_SHIFT	3
+#define MSTORM_IWARP_CONN_AG_CTX_RULE1EN_MASK	0x1
+#define MSTORM_IWARP_CONN_AG_CTX_RULE1EN_SHIFT	4
+#define MSTORM_IWARP_CONN_AG_CTX_RULE2EN_MASK	0x1
+#define MSTORM_IWARP_CONN_AG_CTX_RULE2EN_SHIFT	5
+#define MSTORM_IWARP_CONN_AG_CTX_RCQ_CONS_EN_MASK	0x1
+#define MSTORM_IWARP_CONN_AG_CTX_RCQ_CONS_EN_SHIFT	6
+#define MSTORM_IWARP_CONN_AG_CTX_RULE4EN_MASK	0x1
+#define MSTORM_IWARP_CONN_AG_CTX_RULE4EN_SHIFT	7
+	__le16 rcq_cons;
+	__le16 rcq_cons_th;
+	__le32 reg0;
+	__le32 reg1;
+};
+
+struct ustorm_iwarp_conn_ag_ctx {
+	u8 reserved;
+	u8 byte1;
+	u8 flags0;
+#define USTORM_IWARP_CONN_AG_CTX_EXIST_IN_QM0_MASK	0x1
+#define USTORM_IWARP_CONN_AG_CTX_EXIST_IN_QM0_SHIFT	0
+#define USTORM_IWARP_CONN_AG_CTX_BIT1_MASK	0x1
+#define USTORM_IWARP_CONN_AG_CTX_BIT1_SHIFT	1
+#define USTORM_IWARP_CONN_AG_CTX_CF0_MASK	0x3
+#define USTORM_IWARP_CONN_AG_CTX_CF0_SHIFT	2
+#define USTORM_IWARP_CONN_AG_CTX_CF1_MASK	0x3
+#define USTORM_IWARP_CONN_AG_CTX_CF1_SHIFT	4
+#define USTORM_IWARP_CONN_AG_CTX_CF2_MASK	0x3
+#define USTORM_IWARP_CONN_AG_CTX_CF2_SHIFT	6
+	u8 flags1;
+#define USTORM_IWARP_CONN_AG_CTX_CF3_MASK	0x3
+#define USTORM_IWARP_CONN_AG_CTX_CF3_SHIFT	0
+#define USTORM_IWARP_CONN_AG_CTX_CQ_ARM_SE_CF_MASK	0x3
+#define USTORM_IWARP_CONN_AG_CTX_CQ_ARM_SE_CF_SHIFT	2
+#define USTORM_IWARP_CONN_AG_CTX_CQ_ARM_CF_MASK	0x3
+#define USTORM_IWARP_CONN_AG_CTX_CQ_ARM_CF_SHIFT	4
+#define USTORM_IWARP_CONN_AG_CTX_CF6_MASK	0x3
+#define USTORM_IWARP_CONN_AG_CTX_CF6_SHIFT	6
+	u8 flags2;
+#define USTORM_IWARP_CONN_AG_CTX_CF0EN_MASK	0x1
+#define USTORM_IWARP_CONN_AG_CTX_CF0EN_SHIFT	0
+#define USTORM_IWARP_CONN_AG_CTX_CF1EN_MASK	0x1
+#define USTORM_IWARP_CONN_AG_CTX_CF1EN_SHIFT	1
+#define USTORM_IWARP_CONN_AG_CTX_CF2EN_MASK	0x1
+#define USTORM_IWARP_CONN_AG_CTX_CF2EN_SHIFT	2
+#define USTORM_IWARP_CONN_AG_CTX_CF3EN_MASK	0x1
+#define USTORM_IWARP_CONN_AG_CTX_CF3EN_SHIFT	3
+#define USTORM_IWARP_CONN_AG_CTX_CQ_ARM_SE_CF_EN_MASK	0x1
+#define USTORM_IWARP_CONN_AG_CTX_CQ_ARM_SE_CF_EN_SHIFT 4
+#define USTORM_IWARP_CONN_AG_CTX_CQ_ARM_CF_EN_MASK	0x1
+#define USTORM_IWARP_CONN_AG_CTX_CQ_ARM_CF_EN_SHIFT	5
+#define USTORM_IWARP_CONN_AG_CTX_CF6EN_MASK	0x1
+#define USTORM_IWARP_CONN_AG_CTX_CF6EN_SHIFT	6
+#define USTORM_IWARP_CONN_AG_CTX_CQ_SE_EN_MASK	0x1
+#define USTORM_IWARP_CONN_AG_CTX_CQ_SE_EN_SHIFT	7
+	u8 flags3;
+#define USTORM_IWARP_CONN_AG_CTX_CQ_EN_MASK	0x1
+#define USTORM_IWARP_CONN_AG_CTX_CQ_EN_SHIFT	0
+#define USTORM_IWARP_CONN_AG_CTX_RULE2EN_MASK	0x1
+#define USTORM_IWARP_CONN_AG_CTX_RULE2EN_SHIFT	1
+#define USTORM_IWARP_CONN_AG_CTX_RULE3EN_MASK	0x1
+#define USTORM_IWARP_CONN_AG_CTX_RULE3EN_SHIFT	2
+#define USTORM_IWARP_CONN_AG_CTX_RULE4EN_MASK	0x1
+#define USTORM_IWARP_CONN_AG_CTX_RULE4EN_SHIFT	3
+#define USTORM_IWARP_CONN_AG_CTX_RULE5EN_MASK	0x1
+#define USTORM_IWARP_CONN_AG_CTX_RULE5EN_SHIFT	4
+#define USTORM_IWARP_CONN_AG_CTX_RULE6EN_MASK	0x1
+#define USTORM_IWARP_CONN_AG_CTX_RULE6EN_SHIFT	5
+#define USTORM_IWARP_CONN_AG_CTX_RULE7EN_MASK	0x1
+#define USTORM_IWARP_CONN_AG_CTX_RULE7EN_SHIFT	6
+#define USTORM_IWARP_CONN_AG_CTX_RULE8EN_MASK	0x1
+#define USTORM_IWARP_CONN_AG_CTX_RULE8EN_SHIFT	7
+	u8 byte2;
+	u8 byte3;
+	__le16 word0;
+	__le16 word1;
+	__le32 cq_cons;
+	__le32 cq_se_prod;
+	__le32 cq_prod;
+	__le32 reg3;
+	__le16 word2;
+	__le16 word3;
+};
+
+struct ystorm_iwarp_conn_ag_ctx {
+	u8 byte0;
+	u8 byte1;
+	u8 flags0;
+#define YSTORM_IWARP_CONN_AG_CTX_BIT0_MASK	0x1
+#define YSTORM_IWARP_CONN_AG_CTX_BIT0_SHIFT	0
+#define YSTORM_IWARP_CONN_AG_CTX_BIT1_MASK	0x1
+#define YSTORM_IWARP_CONN_AG_CTX_BIT1_SHIFT	1
+#define YSTORM_IWARP_CONN_AG_CTX_CF0_MASK	0x3
+#define YSTORM_IWARP_CONN_AG_CTX_CF0_SHIFT	2
+#define YSTORM_IWARP_CONN_AG_CTX_CF1_MASK	0x3
+#define YSTORM_IWARP_CONN_AG_CTX_CF1_SHIFT	4
+#define YSTORM_IWARP_CONN_AG_CTX_CF2_MASK	0x3
+#define YSTORM_IWARP_CONN_AG_CTX_CF2_SHIFT	6
+	u8 flags1;
+#define YSTORM_IWARP_CONN_AG_CTX_CF0EN_MASK	0x1
+#define YSTORM_IWARP_CONN_AG_CTX_CF0EN_SHIFT	0
+#define YSTORM_IWARP_CONN_AG_CTX_CF1EN_MASK	0x1
+#define YSTORM_IWARP_CONN_AG_CTX_CF1EN_SHIFT	1
+#define YSTORM_IWARP_CONN_AG_CTX_CF2EN_MASK	0x1
+#define YSTORM_IWARP_CONN_AG_CTX_CF2EN_SHIFT	2
+#define YSTORM_IWARP_CONN_AG_CTX_RULE0EN_MASK	0x1
+#define YSTORM_IWARP_CONN_AG_CTX_RULE0EN_SHIFT 3
+#define YSTORM_IWARP_CONN_AG_CTX_RULE1EN_MASK	0x1
+#define YSTORM_IWARP_CONN_AG_CTX_RULE1EN_SHIFT 4
+#define YSTORM_IWARP_CONN_AG_CTX_RULE2EN_MASK	0x1
+#define YSTORM_IWARP_CONN_AG_CTX_RULE2EN_SHIFT 5
+#define YSTORM_IWARP_CONN_AG_CTX_RULE3EN_MASK	0x1
+#define YSTORM_IWARP_CONN_AG_CTX_RULE3EN_SHIFT 6
+#define YSTORM_IWARP_CONN_AG_CTX_RULE4EN_MASK	0x1
+#define YSTORM_IWARP_CONN_AG_CTX_RULE4EN_SHIFT 7
+	u8 byte2;
+	u8 byte3;
+	__le16 word0;
+	__le32 reg0;
+	__le32 reg1;
+	__le16 word1;
+	__le16 word2;
+	__le16 word3;
+	__le16 word4;
+	__le32 reg2;
+	__le32 reg3;
+};
+
 struct ystorm_fcoe_conn_st_ctx {
 	u8 func_mode;
 	u8 cos;
@@ -9222,7 +10411,7 @@ struct xstorm_iscsi_conn_ag_ctx {
 	u8 byte13;
 	u8 byte14;
 	u8 byte15;
-	u8 byte16;
+	u8 ereserved;
 	__le16 word11;
 	__le32 reg10;
 	__le32 reg11;
@@ -10758,6 +11947,8 @@ struct static_init {
 	u32 rsrv_persist[5];	/* Persist reserved for MFW upgrades */
 };
 
+#define NVM_MAGIC_VALUE		0x669955aa
+
 enum nvm_image_type {
 	NVM_TYPE_TIM1 = 0x01,
 	NVM_TYPE_TIM2 = 0x02,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c b/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c
index 0a8fde629991..b069ad088269 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c
@@ -40,31 +40,17 @@
 #include "qed_init_ops.h"
 #include "qed_reg_addr.h"
 
-enum cminterface {
-	MCM_SEC,
-	MCM_PRI,
-	UCM_SEC,
-	UCM_PRI,
-	TCM_SEC,
-	TCM_PRI,
-	YCM_SEC,
-	YCM_PRI,
-	XCM_SEC,
-	XCM_PRI,
-	NUM_OF_CM_INTERFACES
-};
-
-/* general constants */
+/* General constants */
 #define QM_PQ_MEM_4KB(pq_size)	(pq_size ? DIV_ROUND_UP((pq_size + 1) *	\
 							QM_PQ_ELEMENT_SIZE, \
 							0x1000) : 0)
 #define QM_PQ_SIZE_256B(pq_size)	(pq_size ? DIV_ROUND_UP(pq_size, \
 								0x100) - 1 : 0)
 #define QM_INVALID_PQ_ID                        0xffff
-/* feature enable */
+/* Feature enable */
 #define QM_BYPASS_EN                            1
 #define QM_BYTE_CRD_EN                          1
-/* other PQ constants */
+/* Other PQ constants */
 #define QM_OTHER_PQS_PER_PF                     4
 /* WFQ constants */
 #define QM_WFQ_UPPER_BOUND		62500000
@@ -106,20 +92,21 @@ enum cminterface {
 #define BTB_PURE_LB_FACTOR                      10
 #define BTB_PURE_LB_RATIO                       7
 /* QM stop command constants */
-#define QM_STOP_PQ_MASK_WIDTH                   32
-#define QM_STOP_CMD_ADDR                                0x2
-#define QM_STOP_CMD_STRUCT_SIZE                 2
+#define QM_STOP_PQ_MASK_WIDTH           32
+#define QM_STOP_CMD_ADDR                2
+#define QM_STOP_CMD_STRUCT_SIZE         2
 #define QM_STOP_CMD_PAUSE_MASK_OFFSET   0
 #define QM_STOP_CMD_PAUSE_MASK_SHIFT    0
-#define QM_STOP_CMD_PAUSE_MASK_MASK             -1
-#define QM_STOP_CMD_GROUP_ID_OFFSET             1
-#define QM_STOP_CMD_GROUP_ID_SHIFT              16
-#define QM_STOP_CMD_GROUP_ID_MASK               15
-#define QM_STOP_CMD_PQ_TYPE_OFFSET              1
-#define QM_STOP_CMD_PQ_TYPE_SHIFT               24
-#define QM_STOP_CMD_PQ_TYPE_MASK                1
-#define QM_STOP_CMD_MAX_POLL_COUNT              100
-#define QM_STOP_CMD_POLL_PERIOD_US              500
+#define QM_STOP_CMD_PAUSE_MASK_MASK     -1
+#define QM_STOP_CMD_GROUP_ID_OFFSET     1
+#define QM_STOP_CMD_GROUP_ID_SHIFT      16
+#define QM_STOP_CMD_GROUP_ID_MASK       15
+#define QM_STOP_CMD_PQ_TYPE_OFFSET      1
+#define QM_STOP_CMD_PQ_TYPE_SHIFT       24
+#define QM_STOP_CMD_PQ_TYPE_MASK        1
+#define QM_STOP_CMD_MAX_POLL_COUNT      100
+#define QM_STOP_CMD_POLL_PERIOD_US      500
+
 /* QM command macros */
 #define QM_CMD_STRUCT_SIZE(cmd)			cmd ## \
 	_STRUCT_SIZE
@@ -146,16 +133,17 @@ static void qed_enable_pf_rl(struct qed_hwfn *p_hwfn, bool pf_rl_en)
 {
 	STORE_RT_REG(p_hwfn, QM_REG_RLPFENABLE_RT_OFFSET, pf_rl_en ? 1 : 0);
 	if (pf_rl_en) {
-		/* enable RLs for all VOQs */
+		/* Enable RLs for all VOQs */
 		STORE_RT_REG(p_hwfn, QM_REG_RLPFVOQENABLE_RT_OFFSET,
 			     (1 << MAX_NUM_VOQS) - 1);
-		/* write RL period */
+		/* Write RL period */
 		STORE_RT_REG(p_hwfn,
 			     QM_REG_RLPFPERIOD_RT_OFFSET, QM_RL_PERIOD_CLK_25M);
 		STORE_RT_REG(p_hwfn,
 			     QM_REG_RLPFPERIODTIMER_RT_OFFSET,
 			     QM_RL_PERIOD_CLK_25M);
-		/* set credit threshold for QM bypass flow */
+
+		/* Set credit threshold for QM bypass flow */
 		if (QM_BYPASS_EN)
 			STORE_RT_REG(p_hwfn,
 				     QM_REG_AFULLQMBYPTHRPFRL_RT_OFFSET,
@@ -167,7 +155,8 @@ static void qed_enable_pf_rl(struct qed_hwfn *p_hwfn, bool pf_rl_en)
 static void qed_enable_pf_wfq(struct qed_hwfn *p_hwfn, bool pf_wfq_en)
 {
 	STORE_RT_REG(p_hwfn, QM_REG_WFQPFENABLE_RT_OFFSET, pf_wfq_en ? 1 : 0);
-	/* set credit threshold for QM bypass flow */
+
+	/* Set credit threshold for QM bypass flow */
 	if (pf_wfq_en && QM_BYPASS_EN)
 		STORE_RT_REG(p_hwfn,
 			     QM_REG_AFULLQMBYPTHRPFWFQ_RT_OFFSET,
@@ -180,14 +169,15 @@ static void qed_enable_vport_rl(struct qed_hwfn *p_hwfn, bool vport_rl_en)
 	STORE_RT_REG(p_hwfn, QM_REG_RLGLBLENABLE_RT_OFFSET,
 		     vport_rl_en ? 1 : 0);
 	if (vport_rl_en) {
-		/* write RL period (use timer 0 only) */
+		/* Write RL period (use timer 0 only) */
 		STORE_RT_REG(p_hwfn,
 			     QM_REG_RLGLBLPERIOD_0_RT_OFFSET,
 			     QM_RL_PERIOD_CLK_25M);
 		STORE_RT_REG(p_hwfn,
 			     QM_REG_RLGLBLPERIODTIMER_0_RT_OFFSET,
 			     QM_RL_PERIOD_CLK_25M);
-		/* set credit threshold for QM bypass flow */
+
+		/* Set credit threshold for QM bypass flow */
 		if (QM_BYPASS_EN)
 			STORE_RT_REG(p_hwfn,
 				     QM_REG_AFULLQMBYPTHRGLBLRL_RT_OFFSET,
@@ -200,7 +190,8 @@ static void qed_enable_vport_wfq(struct qed_hwfn *p_hwfn, bool vport_wfq_en)
 {
 	STORE_RT_REG(p_hwfn, QM_REG_WFQVPENABLE_RT_OFFSET,
 		     vport_wfq_en ? 1 : 0);
-	/* set credit threshold for QM bypass flow */
+
+	/* Set credit threshold for QM bypass flow */
 	if (vport_wfq_en && QM_BYPASS_EN)
 		STORE_RT_REG(p_hwfn,
 			     QM_REG_AFULLQMBYPTHRVPWFQ_RT_OFFSET,
@@ -208,7 +199,7 @@ static void qed_enable_vport_wfq(struct qed_hwfn *p_hwfn, bool vport_wfq_en)
 }
 
 /* Prepare runtime init values to allocate PBF command queue lines for
- * the specified VOQ
+ * the specified VOQ.
  */
 static void qed_cmdq_lines_voq_rt_init(struct qed_hwfn *p_hwfn,
 				       u8 voq, u16 cmdq_lines)
@@ -232,7 +223,7 @@ static void qed_cmdq_lines_rt_init(
 {
 	u8 tc, voq, port_id, num_tcs_in_port;
 
-	/* clear PBF lines for all VOQs */
+	/* Clear PBF lines for all VOQs */
 	for (voq = 0; voq < MAX_NUM_VOQS; voq++)
 		STORE_RT_REG(p_hwfn, PBF_CMDQ_LINES_RT_OFFSET(voq), 0);
 	for (port_id = 0; port_id < max_ports_per_engine; port_id++) {
@@ -285,7 +276,7 @@ static void qed_btb_blocks_rt_init(
 		if (!port_params[port_id].active)
 			continue;
 
-		/* subtract headroom blocks */
+		/* Subtract headroom blocks */
 		usable_blocks = port_params[port_id].num_btb_blocks -
 				BTB_HEADROOM_BLOCKS;
 
@@ -305,7 +296,7 @@ static void qed_btb_blocks_rt_init(
 		phys_blocks = (usable_blocks - pure_lb_blocks) /
 			      num_tcs_in_port;
 
-		/* init physical TCs */
+		/* Init physical TCs */
 		for (tc = 0; tc < NUM_OF_PHYS_TCS; tc++) {
 			if (((port_params[port_id].active_phys_tcs >>
 			      tc) & 0x1) != 1)
@@ -317,7 +308,7 @@ static void qed_btb_blocks_rt_init(
 				     phys_blocks);
 		}
 
-		/* init pure LB TC */
+		/* Init pure LB TC */
 		temp = LB_VOQ(port_id);
 		STORE_RT_REG(p_hwfn, PBF_BTB_GUARANTEED_RT_OFFSET(temp),
 			     pure_lb_blocks);
@@ -338,24 +329,24 @@ static void qed_tx_pq_map_rt_init(
 			    QM_PF_QUEUE_GROUP_SIZE;
 	u16 i, pq_id, pq_group;
 
-	/* a bit per Tx PQ indicating if the PQ is associated with a VF */
+	/* A bit per Tx PQ indicating if the PQ is associated with a VF */
 	u32 tx_pq_vf_mask[MAX_QM_TX_QUEUES / QM_PF_QUEUE_GROUP_SIZE] = { 0 };
 	u32 num_tx_pq_vf_masks = MAX_QM_TX_QUEUES / QM_PF_QUEUE_GROUP_SIZE;
 	u32 pq_mem_4kb = QM_PQ_MEM_4KB(p_params->num_pf_cids);
 	u32 vport_pq_mem_4kb = QM_PQ_MEM_4KB(p_params->num_vf_cids);
 	u32 mem_addr_4kb = base_mem_addr_4kb;
 
-	/* set mapping from PQ group to PF */
+	/* Set mapping from PQ group to PF */
 	for (pq_group = first_pq_group; pq_group <= last_pq_group; pq_group++)
 		STORE_RT_REG(p_hwfn, QM_REG_PQTX2PF_0_RT_OFFSET + pq_group,
 			     (u32)(p_params->pf_id));
-	/* set PQ sizes */
+	/* Set PQ sizes */
 	STORE_RT_REG(p_hwfn, QM_REG_MAXPQSIZE_0_RT_OFFSET,
 		     QM_PQ_SIZE_256B(p_params->num_pf_cids));
 	STORE_RT_REG(p_hwfn, QM_REG_MAXPQSIZE_1_RT_OFFSET,
 		     QM_PQ_SIZE_256B(p_params->num_vf_cids));
 
-	/* go over all Tx PQs */
+	/* Go over all Tx PQs */
 	for (i = 0, pq_id = p_params->start_pq; i < num_pqs; i++, pq_id++) {
 		u8 voq = VOQ(p_params->port_id, p_params->pq_params[i].tc_id,
 			     p_params->max_phys_tcs_per_port);
@@ -366,17 +357,18 @@ static void qed_tx_pq_map_rt_init(
 				(p_params->pq_params[i].vport_id <
 				 MAX_QM_GLOBAL_RLS);
 
-		/* update first Tx PQ of VPORT/TC */
+		/* Update first Tx PQ of VPORT/TC */
 		u8 vport_id_in_pf = p_params->pq_params[i].vport_id -
 				    p_params->start_vport;
 		u16 *pq_ids = &vport_params[vport_id_in_pf].first_tx_pq_id[0];
 		u16 first_tx_pq_id = pq_ids[p_params->pq_params[i].tc_id];
 
 		if (first_tx_pq_id == QM_INVALID_PQ_ID) {
-			/* create new VP PQ */
+			/* Create new VP PQ */
 			pq_ids[p_params->pq_params[i].tc_id] = pq_id;
 			first_tx_pq_id = pq_id;
-			/* map VP PQ to VOQ and PF */
+
+			/* Map VP PQ to VOQ and PF */
 			STORE_RT_REG(p_hwfn,
 				     QM_REG_WFQVPMAP_RT_OFFSET +
 				     first_tx_pq_id,
@@ -388,7 +380,7 @@ static void qed_tx_pq_map_rt_init(
 		if (p_params->pq_params[i].rl_valid && !rl_valid)
 			DP_NOTICE(p_hwfn,
 				  "Invalid VPORT ID for rate limiter configuration");
-		/* fill PQ map entry */
+		/* Fill PQ map entry */
 		memset(&tx_pq_map, 0, sizeof(tx_pq_map));
 		SET_FIELD(tx_pq_map.reg, QM_RF_PQ_MAP_PQ_VALID, 1);
 		SET_FIELD(tx_pq_map.reg,
@@ -400,18 +392,16 @@ static void qed_tx_pq_map_rt_init(
 		SET_FIELD(tx_pq_map.reg, QM_RF_PQ_MAP_VOQ, voq);
 		SET_FIELD(tx_pq_map.reg, QM_RF_PQ_MAP_WRR_WEIGHT_GROUP,
 			  p_params->pq_params[i].wrr_group);
-		/* write PQ map entry to CAM */
+		/* Write PQ map entry to CAM */
 		STORE_RT_REG(p_hwfn, QM_REG_TXPQMAP_RT_OFFSET + pq_id,
 			     *((u32 *)&tx_pq_map));
-		/* set base address */
+		/* Set base address */
 		STORE_RT_REG(p_hwfn,
 			     QM_REG_BASEADDRTXPQ_RT_OFFSET + pq_id,
 			     mem_addr_4kb);
-		/* check if VF PQ */
+
+		/* If VF PQ, add indication to PQ VF mask */
 		if (is_vf_pq) {
-			/* if PQ is associated with a VF, add indication
-			 * to PQ VF mask
-			 */
 			tx_pq_vf_mask[pq_id /
 				      QM_PF_QUEUE_GROUP_SIZE] |=
 			    BIT((pq_id % QM_PF_QUEUE_GROUP_SIZE));
@@ -421,16 +411,12 @@ static void qed_tx_pq_map_rt_init(
 		}
 	}
 
-	/* store Tx PQ VF mask to size select register */
-	for (i = 0; i < num_tx_pq_vf_masks; i++) {
-		if (tx_pq_vf_mask[i]) {
-			u32 addr;
-
-			addr = QM_REG_MAXPQSIZETXSEL_0_RT_OFFSET + i;
-			STORE_RT_REG(p_hwfn, addr,
+	/* Store Tx PQ VF mask to size select register */
+	for (i = 0; i < num_tx_pq_vf_masks; i++)
+		if (tx_pq_vf_mask[i])
+			STORE_RT_REG(p_hwfn,
+				     QM_REG_MAXPQSIZETXSEL_0_RT_OFFSET + i,
 				     tx_pq_vf_mask[i]);
-		}
-	}
 }
 
 /* Prepare Other PQ mapping runtime init values for the specified PF */
@@ -440,23 +426,25 @@ static void qed_other_pq_map_rt_init(struct qed_hwfn *p_hwfn,
 				     u32 num_pf_cids,
 				     u32 num_tids, u32 base_mem_addr_4kb)
 {
-	u16 i, pq_id;
+	u32 pq_size, pq_mem_4kb, mem_addr_4kb;
+	u16 i, pq_id, pq_group;
 
 	/* a single other PQ group is used in each PF,
 	 * where PQ group i is used in PF i.
 	 */
-	u16 pq_group = pf_id;
-	u32 pq_size = num_pf_cids + num_tids;
-	u32 pq_mem_4kb = QM_PQ_MEM_4KB(pq_size);
-	u32 mem_addr_4kb = base_mem_addr_4kb;
+	pq_group = pf_id;
+	pq_size = num_pf_cids + num_tids;
+	pq_mem_4kb = QM_PQ_MEM_4KB(pq_size);
+	mem_addr_4kb = base_mem_addr_4kb;
 
-	/* map PQ group to PF */
+	/* Map PQ group to PF */
 	STORE_RT_REG(p_hwfn, QM_REG_PQOTHER2PF_0_RT_OFFSET + pq_group,
 		     (u32)(pf_id));
-	/* set PQ sizes */
+	/* Set PQ sizes */
 	STORE_RT_REG(p_hwfn, QM_REG_MAXPQSIZE_2_RT_OFFSET,
 		     QM_PQ_SIZE_256B(pq_size));
-	/* set base address */
+
+	/* Set base address */
 	for (i = 0, pq_id = pf_id * QM_PF_QUEUE_GROUP_SIZE;
 	     i < QM_OTHER_PQS_PER_PF; i++, pq_id++) {
 		STORE_RT_REG(p_hwfn,
@@ -485,7 +473,7 @@ static int qed_pf_wfq_rt_init(struct qed_hwfn *p_hwfn,
 
 	inc_val = QM_WFQ_INC_VAL(p_params->pf_wfq);
 	if (!inc_val || inc_val > QM_WFQ_MAX_INC_VAL) {
-		DP_NOTICE(p_hwfn, "Invalid PF WFQ weight configuration");
+		DP_NOTICE(p_hwfn, "Invalid PF WFQ weight configuration\n");
 		return -1;
 	}
 
@@ -514,7 +502,7 @@ static int qed_pf_rl_rt_init(struct qed_hwfn *p_hwfn, u8 pf_id, u32 pf_rl)
 	u32 inc_val = QM_RL_INC_VAL(pf_rl);
 
 	if (inc_val > QM_RL_MAX_INC_VAL) {
-		DP_NOTICE(p_hwfn, "Invalid PF rate limit configuration");
+		DP_NOTICE(p_hwfn, "Invalid PF rate limit configuration\n");
 		return -1;
 	}
 	STORE_RT_REG(p_hwfn, QM_REG_RLPFCRD_RT_OFFSET + pf_id,
@@ -535,7 +523,7 @@ static int qed_vp_wfq_rt_init(struct qed_hwfn *p_hwfn,
 	u32 inc_val;
 	u8 tc, i;
 
-	/* go over all PF VPORTs */
+	/* Go over all PF VPORTs */
 	for (i = 0; i < num_vports; i++) {
 
 		if (!vport_params[i].vport_wfq)
@@ -544,7 +532,7 @@ static int qed_vp_wfq_rt_init(struct qed_hwfn *p_hwfn,
 		inc_val = QM_WFQ_INC_VAL(vport_params[i].vport_wfq);
 		if (inc_val > QM_WFQ_MAX_INC_VAL) {
 			DP_NOTICE(p_hwfn,
-				  "Invalid VPORT WFQ weight configuration");
+				  "Invalid VPORT WFQ weight configuration\n");
 			return -1;
 		}
 
@@ -578,17 +566,17 @@ static int qed_vport_rl_rt_init(struct qed_hwfn *p_hwfn,
 
 	if (start_vport + num_vports >= MAX_QM_GLOBAL_RLS) {
 		DP_NOTICE(p_hwfn,
-			  "Invalid VPORT ID for rate limiter configuration");
+			  "Invalid VPORT ID for rate limiter configuration\n");
 		return -1;
 	}
 
-	/* go over all PF VPORTs */
+	/* Go over all PF VPORTs */
 	for (i = 0, vport_id = start_vport; i < num_vports; i++, vport_id++) {
 		u32 inc_val = QM_RL_INC_VAL(vport_params[i].vport_rl);
 
 		if (inc_val > QM_RL_MAX_INC_VAL) {
 			DP_NOTICE(p_hwfn,
-				  "Invalid VPORT rate-limit configuration");
+				  "Invalid VPORT rate-limit configuration\n");
 			return -1;
 		}
 
@@ -617,7 +605,7 @@ static bool qed_poll_on_qm_cmd_ready(struct qed_hwfn *p_hwfn,
 		reg_val = qed_rd(p_hwfn, p_ptt, QM_REG_SDMCMDREADY);
 	}
 
-	/* check if timeout while waiting for SDM command ready */
+	/* Check if timeout while waiting for SDM command ready */
 	if (i == QM_STOP_CMD_MAX_POLL_COUNT) {
 		DP_VERBOSE(p_hwfn, NETIF_MSG_HW,
 			   "Timeout when waiting for QM SDM command ready signal\n");
@@ -701,16 +689,16 @@ int qed_qm_pf_rt_init(struct qed_hwfn *p_hwfn,
 				 QM_OTHER_PQS_PER_PF;
 	u8 tc, i;
 
-	/* clear first Tx PQ ID array for each VPORT */
+	/* Clear first Tx PQ ID array for each VPORT */
 	for (i = 0; i < p_params->num_vports; i++)
 		for (tc = 0; tc < NUM_OF_TCS; tc++)
 			vport_params[i].first_tx_pq_id[tc] = QM_INVALID_PQ_ID;
 
-	/* map Other PQs (if any) */
+	/* Map Other PQs (if any) */
 	qed_other_pq_map_rt_init(p_hwfn, p_params->port_id, p_params->pf_id,
 				 p_params->num_pf_cids, p_params->num_tids, 0);
 
-	/* map Tx PQs */
+	/* Map Tx PQs */
 	qed_tx_pq_map_rt_init(p_hwfn, p_ptt, p_params, other_mem_size_4kb);
 
 	if (p_params->pf_wfq)
@@ -736,7 +724,7 @@ int qed_init_pf_wfq(struct qed_hwfn *p_hwfn,
 	u32 inc_val = QM_WFQ_INC_VAL(pf_wfq);
 
 	if (!inc_val || inc_val > QM_WFQ_MAX_INC_VAL) {
-		DP_NOTICE(p_hwfn, "Invalid PF WFQ weight configuration");
+		DP_NOTICE(p_hwfn, "Invalid PF WFQ weight configuration\n");
 		return -1;
 	}
 
@@ -750,7 +738,7 @@ int qed_init_pf_rl(struct qed_hwfn *p_hwfn,
 	u32 inc_val = QM_RL_INC_VAL(pf_rl);
 
 	if (inc_val > QM_RL_MAX_INC_VAL) {
-		DP_NOTICE(p_hwfn, "Invalid PF rate limit configuration");
+		DP_NOTICE(p_hwfn, "Invalid PF rate limit configuration\n");
 		return -1;
 	}
 
@@ -766,17 +754,18 @@ int qed_init_vport_wfq(struct qed_hwfn *p_hwfn,
 		       struct qed_ptt *p_ptt,
 		       u16 first_tx_pq_id[NUM_OF_TCS], u16 vport_wfq)
 {
-	u32 inc_val = QM_WFQ_INC_VAL(vport_wfq);
+	u16 vport_pq_id;
+	u32 inc_val;
 	u8 tc;
 
+	inc_val = QM_WFQ_INC_VAL(vport_wfq);
 	if (!inc_val || inc_val > QM_WFQ_MAX_INC_VAL) {
-		DP_NOTICE(p_hwfn, "Invalid VPORT WFQ weight configuration");
+		DP_NOTICE(p_hwfn, "Invalid VPORT WFQ weight configuration\n");
 		return -1;
 	}
 
 	for (tc = 0; tc < NUM_OF_TCS; tc++) {
-		u16 vport_pq_id = first_tx_pq_id[tc];
-
+		vport_pq_id = first_tx_pq_id[tc];
 		if (vport_pq_id != QM_INVALID_PQ_ID)
 			qed_wr(p_hwfn, p_ptt,
 			       QM_REG_WFQVPWEIGHT + vport_pq_id * 4,
@@ -793,12 +782,12 @@ int qed_init_vport_rl(struct qed_hwfn *p_hwfn,
 
 	if (vport_id >= MAX_QM_GLOBAL_RLS) {
 		DP_NOTICE(p_hwfn,
-			  "Invalid VPORT ID for rate limiter configuration");
+			  "Invalid VPORT ID for rate limiter configuration\n");
 		return -1;
 	}
 
 	if (inc_val > QM_RL_MAX_INC_VAL) {
-		DP_NOTICE(p_hwfn, "Invalid VPORT rate-limit configuration");
+		DP_NOTICE(p_hwfn, "Invalid VPORT rate-limit configuration\n");
 		return -1;
 	}
 
@@ -818,15 +807,15 @@ bool qed_send_qm_stop_cmd(struct qed_hwfn *p_hwfn,
 	u32 cmd_arr[QM_CMD_STRUCT_SIZE(QM_STOP_CMD)] = { 0 };
 	u32 pq_mask = 0, last_pq = start_pq + num_pqs - 1, pq_id;
 
-	/* set command's PQ type */
+	/* Set command's PQ type */
 	QM_CMD_SET_FIELD(cmd_arr, QM_STOP_CMD, PQ_TYPE, is_tx_pq ? 0 : 1);
 
 	for (pq_id = start_pq; pq_id <= last_pq; pq_id++) {
-		/* set PQ bit in mask (stop command only) */
+		/* Set PQ bit in mask (stop command only) */
 		if (!is_release_cmd)
 			pq_mask |= (1 << (pq_id % QM_STOP_PQ_MASK_WIDTH));
 
-		/* if last PQ or end of PQ mask, write command */
+		/* If last PQ or end of PQ mask, write command */
 		if ((pq_id == last_pq) ||
 		    (pq_id % QM_STOP_PQ_MASK_WIDTH ==
 		     (QM_STOP_PQ_MASK_WIDTH - 1))) {
@@ -962,8 +951,10 @@ void qed_set_geneve_enable(struct qed_hwfn *p_hwfn,
 	       ip_geneve_enable ? 1 : 0);
 }
 
+#define T_ETH_PACKET_ACTION_GFT_EVENTID  23
+#define PARSER_ETH_CONN_GFT_ACTION_CM_HDR  272
 #define T_ETH_PACKET_MATCH_RFS_EVENTID 25
-#define PARSER_ETH_CONN_CM_HDR (0x0)
+#define PARSER_ETH_CONN_CM_HDR 0
 #define CAM_LINE_SIZE sizeof(u32)
 #define RAM_LINE_SIZE sizeof(u64)
 #define REG_SIZE sizeof(u32)
@@ -971,40 +962,26 @@ void qed_set_geneve_enable(struct qed_hwfn *p_hwfn,
 void qed_set_rfs_mode_disable(struct qed_hwfn *p_hwfn,
 			      struct qed_ptt *p_ptt, u16 pf_id)
 {
-	union gft_cam_line_union camline;
-	struct gft_ram_line ramline;
-	u32 *p_ramline, i;
-
-	p_ramline = (u32 *)&ramline;
+	u32 hw_addr = PRS_REG_GFT_PROFILE_MASK_RAM +
+		      pf_id * RAM_LINE_SIZE;
 
 	/*stop using gft logic */
 	qed_wr(p_hwfn, p_ptt, PRS_REG_SEARCH_GFT, 0);
 	qed_wr(p_hwfn, p_ptt, PRS_REG_CM_HDR_GFT, 0x0);
-	memset(&camline, 0, sizeof(union gft_cam_line_union));
-	qed_wr(p_hwfn, p_ptt, PRS_REG_GFT_CAM + CAM_LINE_SIZE * pf_id,
-	       camline.cam_line_mapped.camline);
-	memset(&ramline, 0, sizeof(ramline));
-
-	for (i = 0; i < RAM_LINE_SIZE / REG_SIZE; i++) {
-		u32 hw_addr = PRS_REG_GFT_PROFILE_MASK_RAM;
-
-		hw_addr += (RAM_LINE_SIZE * pf_id + i * REG_SIZE);
-
-		qed_wr(p_hwfn, p_ptt, hw_addr, *(p_ramline + i));
-	}
+	qed_wr(p_hwfn, p_ptt, PRS_REG_GFT_CAM + CAM_LINE_SIZE * pf_id, 0);
+	qed_wr(p_hwfn, p_ptt, hw_addr, 0);
+	qed_wr(p_hwfn, p_ptt, hw_addr + 4, 0);
 }
 
 void qed_set_rfs_mode_enable(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
 			     u16 pf_id, bool tcp, bool udp,
 			     bool ipv4, bool ipv6)
 {
-	u32 rfs_cm_hdr_event_id, *p_ramline;
 	union gft_cam_line_union camline;
 	struct gft_ram_line ramline;
-	int i;
+	u32 rfs_cm_hdr_event_id;
 
 	rfs_cm_hdr_event_id = qed_rd(p_hwfn, p_ptt, PRS_REG_CM_HDR_GFT);
-	p_ramline = (u32 *)&ramline;
 
 	if (!ipv6 && !ipv4)
 		DP_NOTICE(p_hwfn,
@@ -1024,18 +1001,20 @@ void qed_set_rfs_mode_enable(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
 	qed_wr(p_hwfn, p_ptt, PRS_REG_LOAD_L2_FILTER, 0);
 	camline.cam_line_mapped.camline = 0;
 
-	/* cam line is now valid!! */
+	/* Cam line is now valid!! */
 	SET_FIELD(camline.cam_line_mapped.camline,
 		  GFT_CAM_LINE_MAPPED_VALID, 1);
 
 	/* filters are per PF!! */
 	SET_FIELD(camline.cam_line_mapped.camline,
-		  GFT_CAM_LINE_MAPPED_PF_ID_MASK, 1);
+		  GFT_CAM_LINE_MAPPED_PF_ID_MASK,
+		  GFT_CAM_LINE_MAPPED_PF_ID_MASK_MASK);
 	SET_FIELD(camline.cam_line_mapped.camline,
 		  GFT_CAM_LINE_MAPPED_PF_ID, pf_id);
 	if (!(tcp && udp)) {
 		SET_FIELD(camline.cam_line_mapped.camline,
-			  GFT_CAM_LINE_MAPPED_UPPER_PROTOCOL_TYPE_MASK, 1);
+			  GFT_CAM_LINE_MAPPED_UPPER_PROTOCOL_TYPE_MASK,
+			  GFT_CAM_LINE_MAPPED_UPPER_PROTOCOL_TYPE_MASK_MASK);
 		if (tcp)
 			SET_FIELD(camline.cam_line_mapped.camline,
 				  GFT_CAM_LINE_MAPPED_UPPER_PROTOCOL_TYPE,
@@ -1059,34 +1038,38 @@ void qed_set_rfs_mode_enable(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
 				  GFT_PROFILE_IPV6);
 	}
 
-	/* write characteristics to cam */
+	/* Write characteristics to cam */
 	qed_wr(p_hwfn, p_ptt, PRS_REG_GFT_CAM + CAM_LINE_SIZE * pf_id,
 	       camline.cam_line_mapped.camline);
 	camline.cam_line_mapped.camline = qed_rd(p_hwfn, p_ptt,
 						 PRS_REG_GFT_CAM +
 						 CAM_LINE_SIZE * pf_id);
 
-	/* write line to RAM - compare to filter 4 tuple */
-	ramline.low32bits = 0;
-	ramline.high32bits = 0;
-	SET_FIELD(ramline.high32bits, GFT_RAM_LINE_DST_IP, 1);
-	SET_FIELD(ramline.high32bits, GFT_RAM_LINE_SRC_IP, 1);
-	SET_FIELD(ramline.low32bits, GFT_RAM_LINE_SRC_PORT, 1);
-	SET_FIELD(ramline.low32bits, GFT_RAM_LINE_DST_PORT, 1);
-
-	/* each iteration write to reg */
-	for (i = 0; i < RAM_LINE_SIZE / REG_SIZE; i++)
-		qed_wr(p_hwfn, p_ptt,
-		       PRS_REG_GFT_PROFILE_MASK_RAM + RAM_LINE_SIZE * pf_id +
-		       i * REG_SIZE, *(p_ramline + i));
-
-	/* set default profile so that no filter match will happen */
-	ramline.low32bits = 0xffff;
-	ramline.high32bits = 0xffff;
-
-	for (i = 0; i < RAM_LINE_SIZE / REG_SIZE; i++)
-		qed_wr(p_hwfn, p_ptt,
-		       PRS_REG_GFT_PROFILE_MASK_RAM + RAM_LINE_SIZE *
-		       PRS_GFT_CAM_LINES_NO_MATCH + i * REG_SIZE,
-		       *(p_ramline + i));
+	/* Write line to RAM - compare to filter 4 tuple */
+	ramline.lo = 0;
+	ramline.hi = 0;
+	SET_FIELD(ramline.hi, GFT_RAM_LINE_DST_IP, 1);
+	SET_FIELD(ramline.hi, GFT_RAM_LINE_SRC_IP, 1);
+	SET_FIELD(ramline.hi, GFT_RAM_LINE_OVER_IP_PROTOCOL, 1);
+	SET_FIELD(ramline.lo, GFT_RAM_LINE_ETHERTYPE, 1);
+	SET_FIELD(ramline.lo, GFT_RAM_LINE_SRC_PORT, 1);
+	SET_FIELD(ramline.lo, GFT_RAM_LINE_DST_PORT, 1);
+
+	/* Each iteration write to reg */
+	qed_wr(p_hwfn, p_ptt,
+	       PRS_REG_GFT_PROFILE_MASK_RAM + RAM_LINE_SIZE * pf_id,
+	       ramline.lo);
+	qed_wr(p_hwfn, p_ptt,
+	       PRS_REG_GFT_PROFILE_MASK_RAM + RAM_LINE_SIZE * pf_id + 4,
+	       ramline.hi);
+
+	/* Set default profile so that no filter match will happen */
+	qed_wr(p_hwfn, p_ptt,
+	       PRS_REG_GFT_PROFILE_MASK_RAM +
+	       RAM_LINE_SIZE * PRS_GFT_CAM_LINES_NO_MATCH,
+	       ramline.lo);
+	qed_wr(p_hwfn, p_ptt,
+	       PRS_REG_GFT_PROFILE_MASK_RAM +
+	       RAM_LINE_SIZE * PRS_GFT_CAM_LINES_NO_MATCH + 4,
+	       ramline.hi);
 }
diff --git a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
index 339c91dfa658..3897ac0ae835 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
@@ -375,7 +375,6 @@ static int qed_sp_iscsi_conn_offload(struct qed_hwfn *p_hwfn,
 		p_tcp->ss_thresh = cpu_to_le32(p_conn->ss_thresh);
 		p_tcp->srtt = cpu_to_le16(p_conn->srtt);
 		p_tcp->rtt_var = cpu_to_le16(p_conn->rtt_var);
-		p_tcp->ts_time = cpu_to_le32(p_conn->ts_time);
 		p_tcp->ts_recent = cpu_to_le32(p_conn->ts_recent);
 		p_tcp->ts_recent_age = cpu_to_le32(p_conn->ts_recent_age);
 		p_tcp->total_rt = cpu_to_le32(p_conn->total_rt);
@@ -400,8 +399,6 @@ static int qed_sp_iscsi_conn_offload(struct qed_hwfn *p_hwfn,
 		p_tcp->mss = cpu_to_le16(p_conn->mss);
 		p_tcp->snd_wnd_scale = p_conn->snd_wnd_scale;
 		p_tcp->rcv_wnd_scale = p_conn->rcv_wnd_scale;
-		dval = p_conn->ts_ticks_per_second;
-		p_tcp->ts_ticks_per_second = cpu_to_le32(dval);
 		wval = p_conn->da_timeout_value;
 		p_tcp->da_timeout_value = cpu_to_le16(wval);
 		p_tcp->ack_frequency = p_conn->ack_frequency;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
index 1ae73b2d6d1e..f14772b9cda3 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
@@ -592,15 +592,15 @@
 #define QM_REG_WFQPFWEIGHT	0x2f4e80UL
 #define QM_REG_WFQVPWEIGHT	0x2fa000UL
 
-#define PGLCS_REG_DBG_SELECT \
+#define PGLCS_REG_DBG_SELECT_K2 \
 	0x001d14UL
-#define PGLCS_REG_DBG_DWORD_ENABLE \
+#define PGLCS_REG_DBG_DWORD_ENABLE_K2 \
 	0x001d18UL
-#define PGLCS_REG_DBG_SHIFT \
+#define PGLCS_REG_DBG_SHIFT_K2 \
 	0x001d1cUL
-#define PGLCS_REG_DBG_FORCE_VALID \
+#define PGLCS_REG_DBG_FORCE_VALID_K2 \
 	0x001d20UL
-#define PGLCS_REG_DBG_FORCE_FRAME \
+#define PGLCS_REG_DBG_FORCE_FRAME_K2 \
 	0x001d24UL
 #define MISC_REG_RESET_PL_PDA_VMAIN_1 \
 	0x008070UL
@@ -612,7 +612,7 @@
 	0x009050UL
 #define MISCS_REG_RESET_PL_HV \
 	0x009060UL
-#define MISCS_REG_RESET_PL_HV_2	\
+#define MISCS_REG_RESET_PL_HV_2_K2	\
 	0x009150UL
 #define DMAE_REG_DBG_SELECT \
 	0x00c510UL
@@ -644,15 +644,15 @@
 	0x0500b0UL
 #define GRC_REG_DBG_FORCE_FRAME	\
 	0x0500b4UL
-#define UMAC_REG_DBG_SELECT \
+#define UMAC_REG_DBG_SELECT_K2 \
 	0x051094UL
-#define UMAC_REG_DBG_DWORD_ENABLE \
+#define UMAC_REG_DBG_DWORD_ENABLE_K2 \
 	0x051098UL
-#define UMAC_REG_DBG_SHIFT \
+#define UMAC_REG_DBG_SHIFT_K2 \
 	0x05109cUL
-#define UMAC_REG_DBG_FORCE_VALID \
+#define UMAC_REG_DBG_FORCE_VALID_K2 \
 	0x0510a0UL
-#define UMAC_REG_DBG_FORCE_FRAME \
+#define UMAC_REG_DBG_FORCE_FRAME_K2 \
 	0x0510a4UL
 #define MCP2_REG_DBG_SELECT \
 	0x052400UL
@@ -924,15 +924,15 @@
 	0x4c160cUL
 #define XYLD_REG_DBG_FORCE_FRAME \
 	0x4c1610UL
-#define YULD_REG_DBG_SELECT \
+#define YULD_REG_DBG_SELECT_BB_K2 \
 	0x4c9600UL
-#define YULD_REG_DBG_DWORD_ENABLE \
+#define YULD_REG_DBG_DWORD_ENABLE_BB_K2 \
 	0x4c9604UL
-#define YULD_REG_DBG_SHIFT \
+#define YULD_REG_DBG_SHIFT_BB_K2 \
 	0x4c9608UL
-#define YULD_REG_DBG_FORCE_VALID \
+#define YULD_REG_DBG_FORCE_VALID_BB_K2 \
 	0x4c960cUL
-#define YULD_REG_DBG_FORCE_FRAME \
+#define YULD_REG_DBG_FORCE_FRAME_BB_K2 \
 	0x4c9610UL
 #define TMLD_REG_DBG_SELECT \
 	0x4d1600UL
@@ -994,35 +994,35 @@
 	0x580710UL
 #define CDU_REG_DBG_FORCE_FRAME	\
 	0x580714UL
-#define WOL_REG_DBG_SELECT \
+#define WOL_REG_DBG_SELECT_K2 \
 	0x600140UL
-#define WOL_REG_DBG_DWORD_ENABLE \
+#define WOL_REG_DBG_DWORD_ENABLE_K2 \
 	0x600144UL
-#define WOL_REG_DBG_SHIFT \
+#define WOL_REG_DBG_SHIFT_K2 \
 	0x600148UL
-#define WOL_REG_DBG_FORCE_VALID	\
+#define WOL_REG_DBG_FORCE_VALID_K2 \
 	0x60014cUL
-#define WOL_REG_DBG_FORCE_FRAME	\
+#define WOL_REG_DBG_FORCE_FRAME_K2 \
 	0x600150UL
-#define BMBN_REG_DBG_SELECT \
+#define BMBN_REG_DBG_SELECT_K2 \
 	0x610140UL
-#define BMBN_REG_DBG_DWORD_ENABLE \
+#define BMBN_REG_DBG_DWORD_ENABLE_K2 \
 	0x610144UL
-#define BMBN_REG_DBG_SHIFT \
+#define BMBN_REG_DBG_SHIFT_K2 \
 	0x610148UL
-#define BMBN_REG_DBG_FORCE_VALID \
+#define BMBN_REG_DBG_FORCE_VALID_K2 \
 	0x61014cUL
-#define BMBN_REG_DBG_FORCE_FRAME \
+#define BMBN_REG_DBG_FORCE_FRAME_K2 \
 	0x610150UL
-#define NWM_REG_DBG_SELECT \
+#define NWM_REG_DBG_SELECT_K2 \
 	0x8000ecUL
-#define NWM_REG_DBG_DWORD_ENABLE \
+#define NWM_REG_DBG_DWORD_ENABLE_K2 \
 	0x8000f0UL
-#define NWM_REG_DBG_SHIFT \
+#define NWM_REG_DBG_SHIFT_K2 \
 	0x8000f4UL
-#define NWM_REG_DBG_FORCE_VALID	\
+#define NWM_REG_DBG_FORCE_VALID_K2 \
 	0x8000f8UL
-#define NWM_REG_DBG_FORCE_FRAME	\
+#define NWM_REG_DBG_FORCE_FRAME_K2\
 	0x8000fcUL
 #define PBF_REG_DBG_SELECT \
 	0xd80060UL
@@ -1244,35 +1244,35 @@
 	0x1901534UL
 #define USEM_REG_DBG_FORCE_FRAME \
 	0x1901538UL
-#define NWS_REG_DBG_SELECT \
+#define NWS_REG_DBG_SELECT_K2 \
 	0x700128UL
-#define NWS_REG_DBG_DWORD_ENABLE \
+#define NWS_REG_DBG_DWORD_ENABLE_K2 \
 	0x70012cUL
-#define NWS_REG_DBG_SHIFT \
+#define NWS_REG_DBG_SHIFT_K2 \
 	0x700130UL
-#define NWS_REG_DBG_FORCE_VALID	\
+#define NWS_REG_DBG_FORCE_VALID_K2 \
 	0x700134UL
-#define NWS_REG_DBG_FORCE_FRAME	\
+#define NWS_REG_DBG_FORCE_FRAME_K2 \
 	0x700138UL
-#define MS_REG_DBG_SELECT \
+#define MS_REG_DBG_SELECT_K2 \
 	0x6a0228UL
-#define MS_REG_DBG_DWORD_ENABLE \
+#define MS_REG_DBG_DWORD_ENABLE_K2 \
 	0x6a022cUL
-#define MS_REG_DBG_SHIFT \
+#define MS_REG_DBG_SHIFT_K2 \
 	0x6a0230UL
-#define MS_REG_DBG_FORCE_VALID \
+#define MS_REG_DBG_FORCE_VALID_K2 \
 	0x6a0234UL
-#define MS_REG_DBG_FORCE_FRAME \
+#define MS_REG_DBG_FORCE_FRAME_K2 \
 	0x6a0238UL
-#define PCIE_REG_DBG_COMMON_SELECT \
+#define PCIE_REG_DBG_COMMON_SELECT_K2 \
 	0x054398UL
-#define PCIE_REG_DBG_COMMON_DWORD_ENABLE \
+#define PCIE_REG_DBG_COMMON_DWORD_ENABLE_K2 \
 	0x05439cUL
-#define PCIE_REG_DBG_COMMON_SHIFT \
+#define PCIE_REG_DBG_COMMON_SHIFT_K2 \
 	0x0543a0UL
-#define PCIE_REG_DBG_COMMON_FORCE_VALID	\
+#define PCIE_REG_DBG_COMMON_FORCE_VALID_K2 \
 	0x0543a4UL
-#define PCIE_REG_DBG_COMMON_FORCE_FRAME	\
+#define PCIE_REG_DBG_COMMON_FORCE_FRAME_K2 \
 	0x0543a8UL
 #define MISC_REG_RESET_PL_UA \
 	0x008050UL
@@ -1328,85 +1328,85 @@
 	0x128170cUL
 #define UCM_REG_SM_TASK_CTX \
 	0x1281710UL
-#define XSEM_REG_SLOW_DBG_EMPTY	\
+#define XSEM_REG_SLOW_DBG_EMPTY_BB_K2	\
 	0x1401140UL
 #define XSEM_REG_SYNC_DBG_EMPTY	\
 	0x1401160UL
-#define XSEM_REG_SLOW_DBG_ACTIVE \
+#define XSEM_REG_SLOW_DBG_ACTIVE_BB_K2 \
 	0x1401400UL
-#define XSEM_REG_SLOW_DBG_MODE \
+#define XSEM_REG_SLOW_DBG_MODE_BB_K2 \
 	0x1401404UL
-#define XSEM_REG_DBG_FRAME_MODE	\
+#define XSEM_REG_DBG_FRAME_MODE_BB_K2	\
 	0x1401408UL
-#define XSEM_REG_DBG_MODE1_CFG \
+#define XSEM_REG_DBG_MODE1_CFG_BB_K2 \
 	0x1401420UL
 #define XSEM_REG_FAST_MEMORY \
 	0x1440000UL
 #define YSEM_REG_SYNC_DBG_EMPTY	\
 	0x1501160UL
-#define YSEM_REG_SLOW_DBG_ACTIVE \
+#define YSEM_REG_SLOW_DBG_ACTIVE_BB_K2 \
 	0x1501400UL
-#define YSEM_REG_SLOW_DBG_MODE \
+#define YSEM_REG_SLOW_DBG_MODE_BB_K2 \
 	0x1501404UL
-#define YSEM_REG_DBG_FRAME_MODE	\
+#define YSEM_REG_DBG_FRAME_MODE_BB_K2	\
 	0x1501408UL
-#define YSEM_REG_DBG_MODE1_CFG \
+#define YSEM_REG_DBG_MODE1_CFG_BB_K2 \
 	0x1501420UL
 #define YSEM_REG_FAST_MEMORY \
 	0x1540000UL
-#define PSEM_REG_SLOW_DBG_EMPTY	\
+#define PSEM_REG_SLOW_DBG_EMPTY_BB_K2	\
 	0x1601140UL
 #define PSEM_REG_SYNC_DBG_EMPTY	\
 	0x1601160UL
-#define PSEM_REG_SLOW_DBG_ACTIVE \
+#define PSEM_REG_SLOW_DBG_ACTIVE_BB_K2 \
 	0x1601400UL
-#define PSEM_REG_SLOW_DBG_MODE \
+#define PSEM_REG_SLOW_DBG_MODE_BB_K2 \
 	0x1601404UL
-#define PSEM_REG_DBG_FRAME_MODE	\
+#define PSEM_REG_DBG_FRAME_MODE_BB_K2	\
 	0x1601408UL
-#define PSEM_REG_DBG_MODE1_CFG \
+#define PSEM_REG_DBG_MODE1_CFG_BB_K2 \
 	0x1601420UL
 #define PSEM_REG_FAST_MEMORY \
 	0x1640000UL
-#define TSEM_REG_SLOW_DBG_EMPTY	\
+#define TSEM_REG_SLOW_DBG_EMPTY_BB_K2	\
 	0x1701140UL
 #define TSEM_REG_SYNC_DBG_EMPTY	\
 	0x1701160UL
-#define TSEM_REG_SLOW_DBG_ACTIVE \
+#define TSEM_REG_SLOW_DBG_ACTIVE_BB_K2 \
 	0x1701400UL
-#define TSEM_REG_SLOW_DBG_MODE \
+#define TSEM_REG_SLOW_DBG_MODE_BB_K2 \
 	0x1701404UL
-#define TSEM_REG_DBG_FRAME_MODE	\
+#define TSEM_REG_DBG_FRAME_MODE_BB_K2	\
 	0x1701408UL
-#define TSEM_REG_DBG_MODE1_CFG \
+#define TSEM_REG_DBG_MODE1_CFG_BB_K2 \
 	0x1701420UL
 #define TSEM_REG_FAST_MEMORY \
 	0x1740000UL
-#define MSEM_REG_SLOW_DBG_EMPTY	\
+#define MSEM_REG_SLOW_DBG_EMPTY_BB_K2	\
 	0x1801140UL
 #define MSEM_REG_SYNC_DBG_EMPTY	\
 	0x1801160UL
-#define MSEM_REG_SLOW_DBG_ACTIVE \
+#define MSEM_REG_SLOW_DBG_ACTIVE_BB_K2 \
 	0x1801400UL
-#define MSEM_REG_SLOW_DBG_MODE \
+#define MSEM_REG_SLOW_DBG_MODE_BB_K2 \
 	0x1801404UL
-#define MSEM_REG_DBG_FRAME_MODE	\
+#define MSEM_REG_DBG_FRAME_MODE_BB_K2	\
 	0x1801408UL
-#define MSEM_REG_DBG_MODE1_CFG \
+#define MSEM_REG_DBG_MODE1_CFG_BB_K2 \
 	0x1801420UL
 #define MSEM_REG_FAST_MEMORY \
 	0x1840000UL
-#define USEM_REG_SLOW_DBG_EMPTY	\
+#define USEM_REG_SLOW_DBG_EMPTY_BB_K2	\
 	0x1901140UL
 #define USEM_REG_SYNC_DBG_EMPTY	\
 	0x1901160UL
-#define USEM_REG_SLOW_DBG_ACTIVE \
+#define USEM_REG_SLOW_DBG_ACTIVE_BB_K2 \
 	0x1901400UL
-#define USEM_REG_SLOW_DBG_MODE \
+#define USEM_REG_SLOW_DBG_MODE_BB_K2 \
 	0x1901404UL
-#define USEM_REG_DBG_FRAME_MODE	\
+#define USEM_REG_DBG_FRAME_MODE_BB_K2	\
 	0x1901408UL
-#define USEM_REG_DBG_MODE1_CFG \
+#define USEM_REG_DBG_MODE1_CFG_BB_K2 \
 	0x1901420UL
 #define USEM_REG_FAST_MEMORY \
 	0x1940000UL
@@ -1430,7 +1430,7 @@
 	0x340800UL
 #define BRB_REG_BIG_RAM_DATA \
 	0x341500UL
-#define SEM_FAST_REG_STALL_0 \
+#define SEM_FAST_REG_STALL_0_BB_K2 \
 	0x000488UL
 #define SEM_FAST_REG_STALLED \
 	0x000494UL
@@ -1480,37 +1480,37 @@
 	4
 #define MISC_REG_BLOCK_256B_EN \
 	0x008c14UL
-#define NWS_REG_NWS_CMU	\
+#define NWS_REG_NWS_CMU_K2	\
 	0x720000UL
-#define PHY_NW_IP_REG_PHY0_TOP_TBUS_ADDR_7_0 \
+#define PHY_NW_IP_REG_PHY0_TOP_TBUS_ADDR_7_0_K2 \
 	0x000680UL
-#define PHY_NW_IP_REG_PHY0_TOP_TBUS_ADDR_15_8 \
+#define PHY_NW_IP_REG_PHY0_TOP_TBUS_ADDR_15_8_K2 \
 	0x000684UL
-#define PHY_NW_IP_REG_PHY0_TOP_TBUS_DATA_7_0 \
+#define PHY_NW_IP_REG_PHY0_TOP_TBUS_DATA_7_0_K2 \
 	0x0006c0UL
-#define PHY_NW_IP_REG_PHY0_TOP_TBUS_DATA_11_8 \
+#define PHY_NW_IP_REG_PHY0_TOP_TBUS_DATA_11_8_K2 \
 	0x0006c4UL
-#define MS_REG_MS_CMU \
+#define MS_REG_MS_CMU_K2 \
 	0x6a4000UL
-#define PHY_SGMII_IP_REG_AHB_CMU_CSR_0_X130 \
+#define PHY_SGMII_IP_REG_AHB_CMU_CSR_0_X130_K2 \
 	0x000208UL
-#define PHY_SGMII_IP_REG_AHB_CMU_CSR_0_X132 \
-	0x000210UL
-#define PHY_SGMII_IP_REG_AHB_CMU_CSR_0_X131 \
+#define PHY_SGMII_IP_REG_AHB_CMU_CSR_0_X131_K2 \
 	0x00020cUL
-#define PHY_SGMII_IP_REG_AHB_CMU_CSR_0_X133 \
+#define PHY_SGMII_IP_REG_AHB_CMU_CSR_0_X132_K2 \
+	0x000210UL
+#define PHY_SGMII_IP_REG_AHB_CMU_CSR_0_X133_K2 \
 	0x000214UL
-#define PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X130 \
+#define PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X130_K2 \
 	0x000208UL
-#define PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X131 \
+#define PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X131_K2 \
 	0x00020cUL
-#define PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X132 \
+#define PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X132_K2 \
 	0x000210UL
-#define PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X133 \
+#define PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X133_K2 \
 	0x000214UL
-#define PHY_PCIE_REG_PHY0 \
+#define PHY_PCIE_REG_PHY0_K2 \
 	0x620000UL
-#define PHY_PCIE_REG_PHY1 \
+#define PHY_PCIE_REG_PHY1_K2 \
 	0x624000UL
 #define NIG_REG_ROCE_DUPLICATE_TO_HOST 0x5088f0UL
 #define PRS_REG_LIGHT_L2_ETHERTYPE_EN 0x1f0968UL
diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c
index 56289d7cd306..eb1a5cfc49c0 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_roce.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c
@@ -2431,10 +2431,6 @@ qed_rdma_register_tid(void *rdma_cxt,
 			  RDMA_REGISTER_TID_RAMROD_DATA_PAGE_SIZE_LOG,
 			  params->page_size_log - 12);
 
-	SET_FIELD(p_ramrod->flags,
-		  RDMA_REGISTER_TID_RAMROD_DATA_MAX_ID,
-		  p_hwfn->p_rdma_info->last_tid);
-
 	SET_FIELD(p_ramrod->flags,
 		  RDMA_REGISTER_TID_RAMROD_DATA_REMOTE_READ,
 		  params->remote_read);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
index bc3694e91b85..5abcac64d969 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
@@ -185,22 +185,20 @@ static void qed_set_tunn_ports(struct qed_tunnel_info *p_tun,
 }
 
 static void
-__qed_set_ramrod_tunnel_param(u8 *p_tunn_cls, u8 *p_enable_tx_clas,
+__qed_set_ramrod_tunnel_param(u8 *p_tunn_cls,
 			      struct qed_tunn_update_type *tun_type)
 {
 	*p_tunn_cls = tun_type->tun_cls;
-
-	if (tun_type->b_mode_enabled)
-		*p_enable_tx_clas = 1;
 }
 
 static void
-qed_set_ramrod_tunnel_param(u8 *p_tunn_cls, u8 *p_enable_tx_clas,
+qed_set_ramrod_tunnel_param(u8 *p_tunn_cls,
 			    struct qed_tunn_update_type *tun_type,
-			    u8 *p_update_port, __le16 *p_port,
+			    u8 *p_update_port,
+			    __le16 *p_port,
 			    struct qed_tunn_update_udp_port *p_udp_port)
 {
-	__qed_set_ramrod_tunnel_param(p_tunn_cls, p_enable_tx_clas, tun_type);
+	__qed_set_ramrod_tunnel_param(p_tunn_cls, tun_type);
 	if (p_udp_port->b_update_port) {
 		*p_update_port = 1;
 		*p_port = cpu_to_le16(p_udp_port->port);
@@ -219,33 +217,27 @@ qed_tunn_set_pf_update_params(struct qed_hwfn *p_hwfn,
 	qed_set_tunn_ports(p_tun, p_src);
 
 	qed_set_ramrod_tunnel_param(&p_tunn_cfg->tunnel_clss_vxlan,
-				    &p_tunn_cfg->tx_enable_vxlan,
 				    &p_tun->vxlan,
 				    &p_tunn_cfg->set_vxlan_udp_port_flg,
 				    &p_tunn_cfg->vxlan_udp_port,
 				    &p_tun->vxlan_port);
 
 	qed_set_ramrod_tunnel_param(&p_tunn_cfg->tunnel_clss_l2geneve,
-				    &p_tunn_cfg->tx_enable_l2geneve,
 				    &p_tun->l2_geneve,
 				    &p_tunn_cfg->set_geneve_udp_port_flg,
 				    &p_tunn_cfg->geneve_udp_port,
 				    &p_tun->geneve_port);
 
 	__qed_set_ramrod_tunnel_param(&p_tunn_cfg->tunnel_clss_ipgeneve,
-				      &p_tunn_cfg->tx_enable_ipgeneve,
 				      &p_tun->ip_geneve);
 
 	__qed_set_ramrod_tunnel_param(&p_tunn_cfg->tunnel_clss_l2gre,
-				      &p_tunn_cfg->tx_enable_l2gre,
 				      &p_tun->l2_gre);
 
 	__qed_set_ramrod_tunnel_param(&p_tunn_cfg->tunnel_clss_ipgre,
-				      &p_tunn_cfg->tx_enable_ipgre,
 				      &p_tun->ip_gre);
 
 	p_tunn_cfg->update_rx_pf_clss = p_tun->b_update_rx_cls;
-	p_tunn_cfg->update_tx_pf_clss = p_tun->b_update_tx_cls;
 }
 
 static void qed_set_hw_tunn_mode(struct qed_hwfn *p_hwfn,
@@ -289,29 +281,24 @@ qed_tunn_set_pf_start_params(struct qed_hwfn *p_hwfn,
 	qed_set_tunn_ports(p_tun, p_src);
 
 	qed_set_ramrod_tunnel_param(&p_tunn_cfg->tunnel_clss_vxlan,
-				    &p_tunn_cfg->tx_enable_vxlan,
 				    &p_tun->vxlan,
 				    &p_tunn_cfg->set_vxlan_udp_port_flg,
 				    &p_tunn_cfg->vxlan_udp_port,
 				    &p_tun->vxlan_port);
 
 	qed_set_ramrod_tunnel_param(&p_tunn_cfg->tunnel_clss_l2geneve,
-				    &p_tunn_cfg->tx_enable_l2geneve,
 				    &p_tun->l2_geneve,
 				    &p_tunn_cfg->set_geneve_udp_port_flg,
 				    &p_tunn_cfg->geneve_udp_port,
 				    &p_tun->geneve_port);
 
 	__qed_set_ramrod_tunnel_param(&p_tunn_cfg->tunnel_clss_ipgeneve,
-				      &p_tunn_cfg->tx_enable_ipgeneve,
 				      &p_tun->ip_geneve);
 
 	__qed_set_ramrod_tunnel_param(&p_tunn_cfg->tunnel_clss_l2gre,
-				      &p_tunn_cfg->tx_enable_l2gre,
 				      &p_tun->l2_gre);
 
 	__qed_set_ramrod_tunnel_param(&p_tunn_cfg->tunnel_clss_ipgre,
-				      &p_tunn_cfg->tx_enable_ipgre,
 				      &p_tun->ip_gre);
 }
 
diff --git a/drivers/scsi/qedi/qedi_fw.c b/drivers/scsi/qedi/qedi_fw.c
index d6978cbc56f0..7658138f2283 100644
--- a/drivers/scsi/qedi/qedi_fw.c
+++ b/drivers/scsi/qedi/qedi_fw.c
@@ -2099,14 +2099,16 @@ int qedi_iscsi_send_ioreq(struct iscsi_task *task)
 	/* Update header info */
 	SET_FIELD(cmd_pdu_header.flags_attr, ISCSI_CMD_HDR_ATTR,
 		  ISCSI_ATTR_SIMPLE);
-	if (sc->sc_data_direction == DMA_TO_DEVICE) {
-		SET_FIELD(cmd_pdu_header.flags_attr,
-			  ISCSI_CMD_HDR_WRITE, 1);
-		task_type = ISCSI_TASK_TYPE_INITIATOR_WRITE;
-	} else {
-		SET_FIELD(cmd_pdu_header.flags_attr,
-			  ISCSI_CMD_HDR_READ, 1);
-		task_type = ISCSI_TASK_TYPE_INITIATOR_READ;
+	if (hdr->cdb[0] != TEST_UNIT_READY) {
+		if (sc->sc_data_direction == DMA_TO_DEVICE) {
+			SET_FIELD(cmd_pdu_header.flags_attr,
+				  ISCSI_CMD_HDR_WRITE, 1);
+			task_type = ISCSI_TASK_TYPE_INITIATOR_WRITE;
+		} else {
+			SET_FIELD(cmd_pdu_header.flags_attr,
+				  ISCSI_CMD_HDR_READ, 1);
+			task_type = ISCSI_TASK_TYPE_INITIATOR_READ;
+		}
 	}
 
 	cmd_pdu_header.lun.lo = be32_to_cpu(scsi_lun[0]);
@@ -2117,7 +2119,7 @@ int qedi_iscsi_send_ioreq(struct iscsi_task *task)
 	cmd_pdu_header.expected_transfer_length = cpu_to_be32(hdr->data_length);
 	cmd_pdu_header.hdr_second_dword = ntoh24(hdr->dlength);
 	cmd_pdu_header.cmd_sn = be32_to_cpu(hdr->cmdsn);
-	cmd_pdu_header.opcode = hdr->opcode;
+	cmd_pdu_header.hdr_first_byte = hdr->opcode;
 	qedi_cpy_scsi_cdb(sc, (u32 *)cmd_pdu_header.cdb);
 
 	/* Fill tx AHS and rx buffer */
diff --git a/drivers/scsi/qedi/qedi_fw_api.c b/drivers/scsi/qedi/qedi_fw_api.c
index fd354d4e03eb..7df32a68bd54 100644
--- a/drivers/scsi/qedi/qedi_fw_api.c
+++ b/drivers/scsi/qedi/qedi_fw_api.c
@@ -578,7 +578,8 @@ int init_initiator_rw_iscsi_task(struct iscsi_task_params *task_params,
 					  (struct iscsi_common_hdr *)cmd_header,
 					  tx_sgl_params, cmd_params,
 					  dif_task_params);
-	else if (GET_FIELD(cmd_header->flags_attr, ISCSI_CMD_HDR_READ))
+	else if (GET_FIELD(cmd_header->flags_attr, ISCSI_CMD_HDR_READ) ||
+		 (task_params->rx_io_size == 0 && task_params->tx_io_size == 0))
 		return init_rw_iscsi_task(task_params,
 					  ISCSI_TASK_TYPE_INITIATOR_READ,
 					  conn_params,
diff --git a/drivers/scsi/qedi/qedi_iscsi.c b/drivers/scsi/qedi/qedi_iscsi.c
index 3548d46f9b27..0c8ccffa4c38 100644
--- a/drivers/scsi/qedi/qedi_iscsi.c
+++ b/drivers/scsi/qedi/qedi_iscsi.c
@@ -1461,9 +1461,6 @@ static const struct {
 	{ ISCSI_CONN_ERROR_OUT_OF_SGES_ERROR,
 	  "out of sge error"
 	},
-	{ ISCSI_CONN_ERROR_TCP_SEG_PROC_IP_OPTIONS_ERROR,
-	  "tcp seg ip options error"
-	},
 	{ ISCSI_CONN_ERROR_TCP_IP_FRAGMENT_ERROR,
 	  "tcp ip fragment error"
 	},
diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h
index fbab6e0514f0..a567cbf8c5b4 100644
--- a/include/linux/qed/common_hsi.h
+++ b/include/linux/qed/common_hsi.h
@@ -96,12 +96,12 @@
 
 #define CORE_SPQE_PAGE_SIZE_BYTES	4096
 
-#define MAX_NUM_LL2_RX_QUEUES		32
-#define MAX_NUM_LL2_TX_STATS_COUNTERS	32
+#define MAX_NUM_LL2_RX_QUEUES		48
+#define MAX_NUM_LL2_TX_STATS_COUNTERS	48
 
 #define FW_MAJOR_VERSION	8
-#define FW_MINOR_VERSION	15
-#define FW_REVISION_VERSION	3
+#define FW_MINOR_VERSION	20
+#define FW_REVISION_VERSION	0
 #define FW_ENGINEERING_VERSION	0
 
 /***********************/
@@ -181,6 +181,14 @@
 
 #define CDU_VF_FL_SEG_TYPE_OFFSET_REG_TYPE_SHIFT	(12)
 #define CDU_VF_FL_SEG_TYPE_OFFSET_REG_OFFSET_MASK	(0xfff)
+
+#define CDU_CONTEXT_VALIDATION_CFG_ENABLE_SHIFT			(0)
+#define CDU_CONTEXT_VALIDATION_CFG_VALIDATION_TYPE_SHIFT	(1)
+#define CDU_CONTEXT_VALIDATION_CFG_USE_TYPE			(2)
+#define CDU_CONTEXT_VALIDATION_CFG_USE_REGION			(3)
+#define CDU_CONTEXT_VALIDATION_CFG_USE_CID			(4)
+#define CDU_CONTEXT_VALIDATION_CFG_USE_ACTIVE			(5)
+
 /*****************/
 /* DQ CONSTANTS  */
 /*****************/
@@ -457,7 +465,6 @@
 #define PXP_BAR_DQ	1
 
 /* PTT and GTT */
-#define PXP_NUM_PF_WINDOWS		12
 #define PXP_PER_PF_ENTRY_SIZE		8
 #define PXP_NUM_GLOBAL_WINDOWS		243
 #define PXP_GLOBAL_ENTRY_SIZE		4
@@ -482,6 +489,7 @@
 #define PXP_PF_ME_OPAQUE_ADDR		0x1f8
 #define PXP_PF_ME_CONCRETE_ADDR		0x1fc
 
+#define PXP_NUM_PF_WINDOWS	12
 #define PXP_EXTERNAL_BAR_PF_WINDOW_START	0x1000
 #define PXP_EXTERNAL_BAR_PF_WINDOW_NUM		PXP_NUM_PF_WINDOWS
 #define PXP_EXTERNAL_BAR_PF_WINDOW_SINGLE_SIZE	0x1000
@@ -618,16 +626,21 @@
 /*****************/
 /* PRM CONSTANTS */
 /*****************/
-#define PRM_DMA_PAD_BYTES_NUM	2
-/******************/
-/* SDMs CONSTANTS */
-/******************/
-#define SDM_OP_GEN_TRIG_NONE	0
-#define SDM_OP_GEN_TRIG_WAKE_THREAD	1
-#define SDM_OP_GEN_TRIG_AGG_INT	2
-#define SDM_OP_GEN_TRIG_LOADER	4
-#define SDM_OP_GEN_TRIG_INDICATE_ERROR	6
-#define SDM_OP_GEN_TRIG_RELEASE_THREAD	7
+#define PRM_DMA_PAD_BYTES_NUM  2
+/*****************/
+/* SDMs CONSTANTS  */
+/*****************/
+
+#define SDM_OP_GEN_TRIG_NONE                    0
+#define SDM_OP_GEN_TRIG_WAKE_THREAD             1
+#define SDM_OP_GEN_TRIG_AGG_INT                 2
+#define SDM_OP_GEN_TRIG_LOADER                  4
+#define SDM_OP_GEN_TRIG_INDICATE_ERROR  6
+#define SDM_OP_GEN_TRIG_INC_ORDER_CNT   9
+
+/********************/
+/* Completion types */
+/********************/
 
 #define SDM_COMP_TYPE_NONE              0
 #define SDM_COMP_TYPE_WAKE_THREAD       1
@@ -638,10 +651,11 @@
 #define SDM_COMP_TYPE_INDICATE_ERROR    6
 #define SDM_COMP_TYPE_RELEASE_THREAD    7
 #define SDM_COMP_TYPE_RAM               8
+#define SDM_COMP_TYPE_INC_ORDER_CNT     9
 
-/******************/
-/* PBF CONSTANTS  */
-/******************/
+/*****************/
+/* PBF Constants */
+/*****************/
 
 /* Number of PBF command queue lines. Each line is 32B. */
 #define PBF_MAX_CMD_LINES 3328
@@ -861,7 +875,7 @@ enum db_dest {
 /* Enum of doorbell DPM types */
 enum db_dpm_type {
 	DPM_LEGACY,
-	DPM_ROCE,
+	DPM_RDMA,
 	DPM_L2_INLINE,
 	DPM_L2_BD,
 	MAX_DB_DPM_TYPE
@@ -884,8 +898,8 @@ struct db_l2_dpm_data {
 #define DB_L2_DPM_DATA_RESERVED0_SHIFT 27
 #define DB_L2_DPM_DATA_SGE_NUM_MASK	0x7
 #define DB_L2_DPM_DATA_SGE_NUM_SHIFT	28
-#define DB_L2_DPM_DATA_RESERVED1_MASK	0x1
-#define DB_L2_DPM_DATA_RESERVED1_SHIFT 31
+#define DB_L2_DPM_DATA_GFS_SRC_EN_MASK	0x1
+#define DB_L2_DPM_DATA_GFS_SRC_EN_SHIFT	31
 };
 
 /* Structure for SGE in a DPM doorbell of type DPM_L2_BD */
@@ -931,31 +945,33 @@ struct db_pwm_addr {
 };
 
 /* Parameters to RoCE firmware, passed in EDPM doorbell */
-struct db_roce_dpm_params {
+struct db_rdma_dpm_params {
 	__le32 params;
-#define DB_ROCE_DPM_PARAMS_SIZE_MASK		0x3F
-#define DB_ROCE_DPM_PARAMS_SIZE_SHIFT		0
-#define DB_ROCE_DPM_PARAMS_DPM_TYPE_MASK	0x3
-#define DB_ROCE_DPM_PARAMS_DPM_TYPE_SHIFT	6
-#define DB_ROCE_DPM_PARAMS_OPCODE_MASK		0xFF
-#define DB_ROCE_DPM_PARAMS_OPCODE_SHIFT		8
-#define DB_ROCE_DPM_PARAMS_WQE_SIZE_MASK	0x7FF
-#define DB_ROCE_DPM_PARAMS_WQE_SIZE_SHIFT	16
-#define DB_ROCE_DPM_PARAMS_RESERVED0_MASK	0x1
-#define DB_ROCE_DPM_PARAMS_RESERVED0_SHIFT	27
-#define DB_ROCE_DPM_PARAMS_COMPLETION_FLG_MASK	0x1
-#define DB_ROCE_DPM_PARAMS_COMPLETION_FLG_SHIFT 28
-#define DB_ROCE_DPM_PARAMS_S_FLG_MASK		0x1
-#define DB_ROCE_DPM_PARAMS_S_FLG_SHIFT		29
-#define DB_ROCE_DPM_PARAMS_RESERVED1_MASK	0x3
-#define DB_ROCE_DPM_PARAMS_RESERVED1_SHIFT	30
+#define DB_RDMA_DPM_PARAMS_SIZE_MASK		0x3F
+#define DB_RDMA_DPM_PARAMS_SIZE_SHIFT		0
+#define DB_RDMA_DPM_PARAMS_DPM_TYPE_MASK	0x3
+#define DB_RDMA_DPM_PARAMS_DPM_TYPE_SHIFT	6
+#define DB_RDMA_DPM_PARAMS_OPCODE_MASK		0xFF
+#define DB_RDMA_DPM_PARAMS_OPCODE_SHIFT		8
+#define DB_RDMA_DPM_PARAMS_WQE_SIZE_MASK	0x7FF
+#define DB_RDMA_DPM_PARAMS_WQE_SIZE_SHIFT	16
+#define DB_RDMA_DPM_PARAMS_RESERVED0_MASK	0x1
+#define DB_RDMA_DPM_PARAMS_RESERVED0_SHIFT	27
+#define DB_RDMA_DPM_PARAMS_COMPLETION_FLG_MASK	0x1
+#define DB_RDMA_DPM_PARAMS_COMPLETION_FLG_SHIFT 28
+#define DB_RDMA_DPM_PARAMS_S_FLG_MASK		0x1
+#define DB_RDMA_DPM_PARAMS_S_FLG_SHIFT		29
+#define DB_RDMA_DPM_PARAMS_RESERVED1_MASK	0x1
+#define DB_RDMA_DPM_PARAMS_RESERVED1_SHIFT	30
+#define DB_RDMA_DPM_PARAMS_CONN_TYPE_IS_IWARP_MASK	0x1
+#define DB_RDMA_DPM_PARAMS_CONN_TYPE_IS_IWARP_SHIFT	31
 };
 
 /* Structure for doorbell data, in ROCE DPM mode, for 1st db in a DPM burst */
-struct db_roce_dpm_data {
+struct db_rdma_dpm_data {
 	__le16 icid;
 	__le16 prod_val;
-	struct db_roce_dpm_params params;
+	struct db_rdma_dpm_params params;
 };
 
 /* Igu interrupt command */
@@ -1026,6 +1042,42 @@ struct parsing_and_err_flags {
 #define PARSING_AND_ERR_FLAGS_TUNNELL4CHKSMERROR_SHIFT         15
 };
 
+struct parsing_err_flags {
+	__le16 flags;
+#define PARSING_ERR_FLAGS_MAC_ERROR_MASK				0x1
+#define PARSING_ERR_FLAGS_MAC_ERROR_SHIFT				0
+#define PARSING_ERR_FLAGS_TRUNC_ERROR_MASK				0x1
+#define PARSING_ERR_FLAGS_TRUNC_ERROR_SHIFT				1
+#define PARSING_ERR_FLAGS_PKT_TOO_SMALL_MASK				0x1
+#define PARSING_ERR_FLAGS_PKT_TOO_SMALL_SHIFT				2
+#define PARSING_ERR_FLAGS_ANY_HDR_MISSING_TAG_MASK			0x1
+#define PARSING_ERR_FLAGS_ANY_HDR_MISSING_TAG_SHIFT			3
+#define PARSING_ERR_FLAGS_ANY_HDR_IP_VER_MISMTCH_MASK			0x1
+#define PARSING_ERR_FLAGS_ANY_HDR_IP_VER_MISMTCH_SHIFT			4
+#define PARSING_ERR_FLAGS_ANY_HDR_IP_V4_HDR_LEN_TOO_SMALL_MASK		0x1
+#define PARSING_ERR_FLAGS_ANY_HDR_IP_V4_HDR_LEN_TOO_SMALL_SHIFT		5
+#define PARSING_ERR_FLAGS_ANY_HDR_IP_BAD_TOTAL_LEN_MASK			0x1
+#define PARSING_ERR_FLAGS_ANY_HDR_IP_BAD_TOTAL_LEN_SHIFT		6
+#define PARSING_ERR_FLAGS_IP_V4_CHKSM_ERROR_MASK			0x1
+#define PARSING_ERR_FLAGS_IP_V4_CHKSM_ERROR_SHIFT			7
+#define PARSING_ERR_FLAGS_ANY_HDR_L4_IP_LEN_MISMTCH_MASK		0x1
+#define PARSING_ERR_FLAGS_ANY_HDR_L4_IP_LEN_MISMTCH_SHIFT		8
+#define PARSING_ERR_FLAGS_ZERO_UDP_IP_V6_CHKSM_MASK			0x1
+#define PARSING_ERR_FLAGS_ZERO_UDP_IP_V6_CHKSM_SHIFT			9
+#define PARSING_ERR_FLAGS_INNER_L4_CHKSM_ERROR_MASK			0x1
+#define PARSING_ERR_FLAGS_INNER_L4_CHKSM_ERROR_SHIFT			10
+#define PARSING_ERR_FLAGS_ANY_HDR_ZERO_TTL_OR_HOP_LIM_MASK		0x1
+#define PARSING_ERR_FLAGS_ANY_HDR_ZERO_TTL_OR_HOP_LIM_SHIFT		11
+#define PARSING_ERR_FLAGS_NON_8021Q_TAG_EXISTS_IN_BOTH_HDRS_MASK	0x1
+#define PARSING_ERR_FLAGS_NON_8021Q_TAG_EXISTS_IN_BOTH_HDRS_SHIFT	12
+#define PARSING_ERR_FLAGS_GENEVE_OPTION_OVERSIZED_MASK			0x1
+#define PARSING_ERR_FLAGS_GENEVE_OPTION_OVERSIZED_SHIFT			13
+#define PARSING_ERR_FLAGS_TUNNEL_IP_V4_CHKSM_ERROR_MASK			0x1
+#define PARSING_ERR_FLAGS_TUNNEL_IP_V4_CHKSM_ERROR_SHIFT		14
+#define PARSING_ERR_FLAGS_TUNNEL_L4_CHKSM_ERROR_MASK			0x1
+#define PARSING_ERR_FLAGS_TUNNEL_L4_CHKSM_ERROR_SHIFT			15
+};
+
 struct pb_context {
 	__le32 crc[4];
 };
@@ -1288,39 +1340,56 @@ struct tdif_task_context {
 
 struct timers_context {
 	__le32 logical_client_0;
-#define TIMERS_CONTEXT_EXPIRATIONTIMELC0_MASK     0xFFFFFFF
-#define TIMERS_CONTEXT_EXPIRATIONTIMELC0_SHIFT    0
-#define TIMERS_CONTEXT_VALIDLC0_MASK              0x1
-#define TIMERS_CONTEXT_VALIDLC0_SHIFT             28
-#define TIMERS_CONTEXT_ACTIVELC0_MASK             0x1
-#define TIMERS_CONTEXT_ACTIVELC0_SHIFT            29
-#define TIMERS_CONTEXT_RESERVED0_MASK             0x3
-#define TIMERS_CONTEXT_RESERVED0_SHIFT            30
+#define TIMERS_CONTEXT_EXPIRATIONTIMELC0_MASK	0x7FFFFFF
+#define TIMERS_CONTEXT_EXPIRATIONTIMELC0_SHIFT	0
+#define TIMERS_CONTEXT_RESERVED0_MASK		0x1
+#define TIMERS_CONTEXT_RESERVED0_SHIFT		27
+#define TIMERS_CONTEXT_VALIDLC0_MASK		0x1
+#define TIMERS_CONTEXT_VALIDLC0_SHIFT		28
+#define TIMERS_CONTEXT_ACTIVELC0_MASK		0x1
+#define TIMERS_CONTEXT_ACTIVELC0_SHIFT		29
+#define TIMERS_CONTEXT_RESERVED1_MASK		0x3
+#define TIMERS_CONTEXT_RESERVED1_SHIFT		30
 	__le32 logical_client_1;
-#define TIMERS_CONTEXT_EXPIRATIONTIMELC1_MASK     0xFFFFFFF
-#define TIMERS_CONTEXT_EXPIRATIONTIMELC1_SHIFT    0
-#define TIMERS_CONTEXT_VALIDLC1_MASK              0x1
-#define TIMERS_CONTEXT_VALIDLC1_SHIFT             28
-#define TIMERS_CONTEXT_ACTIVELC1_MASK             0x1
-#define TIMERS_CONTEXT_ACTIVELC1_SHIFT            29
-#define TIMERS_CONTEXT_RESERVED1_MASK             0x3
-#define TIMERS_CONTEXT_RESERVED1_SHIFT            30
+#define TIMERS_CONTEXT_EXPIRATIONTIMELC1_MASK	0x7FFFFFF
+#define TIMERS_CONTEXT_EXPIRATIONTIMELC1_SHIFT	0
+#define TIMERS_CONTEXT_RESERVED2_MASK		0x1
+#define TIMERS_CONTEXT_RESERVED2_SHIFT		27
+#define TIMERS_CONTEXT_VALIDLC1_MASK		0x1
+#define TIMERS_CONTEXT_VALIDLC1_SHIFT		28
+#define TIMERS_CONTEXT_ACTIVELC1_MASK		0x1
+#define TIMERS_CONTEXT_ACTIVELC1_SHIFT		29
+#define TIMERS_CONTEXT_RESERVED3_MASK		0x3
+#define TIMERS_CONTEXT_RESERVED3_SHIFT		30
 	__le32 logical_client_2;
-#define TIMERS_CONTEXT_EXPIRATIONTIMELC2_MASK     0xFFFFFFF
-#define TIMERS_CONTEXT_EXPIRATIONTIMELC2_SHIFT    0
-#define TIMERS_CONTEXT_VALIDLC2_MASK              0x1
-#define TIMERS_CONTEXT_VALIDLC2_SHIFT             28
-#define TIMERS_CONTEXT_ACTIVELC2_MASK             0x1
-#define TIMERS_CONTEXT_ACTIVELC2_SHIFT            29
-#define TIMERS_CONTEXT_RESERVED2_MASK             0x3
-#define TIMERS_CONTEXT_RESERVED2_SHIFT            30
+#define TIMERS_CONTEXT_EXPIRATIONTIMELC2_MASK	0x7FFFFFF
+#define TIMERS_CONTEXT_EXPIRATIONTIMELC2_SHIFT	0
+#define TIMERS_CONTEXT_RESERVED4_MASK		0x1
+#define TIMERS_CONTEXT_RESERVED4_SHIFT		27
+#define TIMERS_CONTEXT_VALIDLC2_MASK		0x1
+#define TIMERS_CONTEXT_VALIDLC2_SHIFT		28
+#define TIMERS_CONTEXT_ACTIVELC2_MASK		0x1
+#define TIMERS_CONTEXT_ACTIVELC2_SHIFT		29
+#define TIMERS_CONTEXT_RESERVED5_MASK		0x3
+#define TIMERS_CONTEXT_RESERVED5_SHIFT		30
 	__le32 host_expiration_fields;
-#define TIMERS_CONTEXT_HOSTEXPRIRATIONVALUE_MASK  0xFFFFFFF
-#define TIMERS_CONTEXT_HOSTEXPRIRATIONVALUE_SHIFT 0
-#define TIMERS_CONTEXT_HOSTEXPRIRATIONVALID_MASK  0x1
-#define TIMERS_CONTEXT_HOSTEXPRIRATIONVALID_SHIFT 28
-#define TIMERS_CONTEXT_RESERVED3_MASK             0x7
-#define TIMERS_CONTEXT_RESERVED3_SHIFT            29
+#define TIMERS_CONTEXT_HOSTEXPRIRATIONVALUE_MASK	0x7FFFFFF
+#define TIMERS_CONTEXT_HOSTEXPRIRATIONVALUE_SHIFT	0
+#define TIMERS_CONTEXT_RESERVED6_MASK			0x1
+#define TIMERS_CONTEXT_RESERVED6_SHIFT			27
+#define TIMERS_CONTEXT_HOSTEXPRIRATIONVALID_MASK	0x1
+#define TIMERS_CONTEXT_HOSTEXPRIRATIONVALID_SHIFT	 28
+#define TIMERS_CONTEXT_RESERVED7_MASK			0x7
+#define TIMERS_CONTEXT_RESERVED7_SHIFT			29
 };
+
+enum tunnel_next_protocol {
+	e_unknown = 0,
+	e_l2 = 1,
+	e_ipv4 = 2,
+	e_ipv6 = 3,
+	MAX_TUNNEL_NEXT_PROTOCOL
+};
+
 #endif /* __COMMON_HSI__ */
 #endif
diff --git a/include/linux/qed/eth_common.h b/include/linux/qed/eth_common.h
index 34d93eb5bfba..cb06e6e368e1 100644
--- a/include/linux/qed/eth_common.h
+++ b/include/linux/qed/eth_common.h
@@ -75,7 +75,8 @@
 	(ETH_NUM_STATISTIC_COUNTERS - 3 * MAX_NUM_VFS / 4)
 
 /* Maximum number of buffers, used for RX packet placement */
-#define ETH_RX_MAX_BUFF_PER_PKT             5
+#define ETH_RX_MAX_BUFF_PER_PKT	5
+#define ETH_RX_BD_THRESHOLD	12
 
 /* num of MAC/VLAN filters */
 #define ETH_NUM_MAC_FILTERS                                     512
diff --git a/include/linux/qed/fcoe_common.h b/include/linux/qed/fcoe_common.h
index 947a635d04bb..12fc9e788eea 100644
--- a/include/linux/qed/fcoe_common.h
+++ b/include/linux/qed/fcoe_common.h
@@ -13,7 +13,6 @@
 /*********************/
 
 #define FC_ABTS_REPLY_MAX_PAYLOAD_LEN	12
-#define FCOE_MAX_SIZE_FCP_DATA_SUPER	(8600)
 
 struct fcoe_abts_pkt {
 	__le32 abts_rsp_fc_payload_lo;
diff --git a/include/linux/qed/iscsi_common.h b/include/linux/qed/iscsi_common.h
index 69949f8e354b..85e086cba639 100644
--- a/include/linux/qed/iscsi_common.h
+++ b/include/linux/qed/iscsi_common.h
@@ -75,25 +75,13 @@
 #define ISCSI_TARGET_MODE 1
 
 /* iSCSI request op codes */
-#define ISCSI_OPCODE_NOP_OUT_NO_IMM                     (0)
-#define ISCSI_OPCODE_NOP_OUT                            ( \
-		ISCSI_OPCODE_NOP_OUT_NO_IMM | 0x40)
-#define ISCSI_OPCODE_SCSI_CMD_NO_IMM            (1)
-#define ISCSI_OPCODE_SCSI_CMD                           ( \
-		ISCSI_OPCODE_SCSI_CMD_NO_IMM | 0x40)
-#define ISCSI_OPCODE_TMF_REQUEST_NO_IMM         (2)
-#define ISCSI_OPCODE_TMF_REQUEST                        ( \
-		ISCSI_OPCODE_TMF_REQUEST_NO_IMM | 0x40)
-#define ISCSI_OPCODE_LOGIN_REQUEST_NO_IMM       (3)
-#define ISCSI_OPCODE_LOGIN_REQUEST                      ( \
-		ISCSI_OPCODE_LOGIN_REQUEST_NO_IMM | 0x40)
-#define ISCSI_OPCODE_TEXT_REQUEST_NO_IMM        (4)
-#define ISCSI_OPCODE_TEXT_REQUEST                       ( \
-		ISCSI_OPCODE_TEXT_REQUEST_NO_IMM | 0x40)
-#define ISCSI_OPCODE_DATA_OUT                           (5)
-#define ISCSI_OPCODE_LOGOUT_REQUEST_NO_IMM      (6)
-#define ISCSI_OPCODE_LOGOUT_REQUEST                     ( \
-		ISCSI_OPCODE_LOGOUT_REQUEST_NO_IMM | 0x40)
+#define ISCSI_OPCODE_NOP_OUT		(0)
+#define ISCSI_OPCODE_SCSI_CMD		(1)
+#define ISCSI_OPCODE_TMF_REQUEST	(2)
+#define ISCSI_OPCODE_LOGIN_REQUEST	(3)
+#define ISCSI_OPCODE_TEXT_REQUEST	(4)
+#define ISCSI_OPCODE_DATA_OUT		(5)
+#define ISCSI_OPCODE_LOGOUT_REQUEST	(6)
 
 /* iSCSI response/messages op codes */
 #define ISCSI_OPCODE_NOP_IN             (0x20)
@@ -172,17 +160,23 @@ struct iscsi_async_msg_hdr {
 struct iscsi_cmd_hdr {
 	__le16 reserved1;
 	u8 flags_attr;
-#define ISCSI_CMD_HDR_ATTR_MASK           0x7
-#define ISCSI_CMD_HDR_ATTR_SHIFT          0
-#define ISCSI_CMD_HDR_RSRV_MASK           0x3
-#define ISCSI_CMD_HDR_RSRV_SHIFT          3
-#define ISCSI_CMD_HDR_WRITE_MASK          0x1
-#define ISCSI_CMD_HDR_WRITE_SHIFT         5
-#define ISCSI_CMD_HDR_READ_MASK           0x1
-#define ISCSI_CMD_HDR_READ_SHIFT          6
-#define ISCSI_CMD_HDR_FINAL_MASK          0x1
-#define ISCSI_CMD_HDR_FINAL_SHIFT         7
-	u8 opcode;
+#define ISCSI_CMD_HDR_ATTR_MASK		0x7
+#define ISCSI_CMD_HDR_ATTR_SHIFT	0
+#define ISCSI_CMD_HDR_RSRV_MASK		0x3
+#define ISCSI_CMD_HDR_RSRV_SHIFT	3
+#define ISCSI_CMD_HDR_WRITE_MASK	0x1
+#define ISCSI_CMD_HDR_WRITE_SHIFT	5
+#define ISCSI_CMD_HDR_READ_MASK		0x1
+#define ISCSI_CMD_HDR_READ_SHIFT	6
+#define ISCSI_CMD_HDR_FINAL_MASK	0x1
+#define ISCSI_CMD_HDR_FINAL_SHIFT	7
+	u8 hdr_first_byte;
+#define ISCSI_CMD_HDR_OPCODE_MASK	0x3F
+#define ISCSI_CMD_HDR_OPCODE_SHIFT	0
+#define ISCSI_CMD_HDR_IMM_MASK		0x1
+#define ISCSI_CMD_HDR_IMM_SHIFT		6
+#define ISCSI_CMD_HDR_RSRV1_MASK	0x1
+#define ISCSI_CMD_HDR_RSRV1_SHIFT	7
 	__le32 hdr_second_dword;
 #define ISCSI_CMD_HDR_DATA_SEG_LEN_MASK   0xFFFFFF
 #define ISCSI_CMD_HDR_DATA_SEG_LEN_SHIFT  0
@@ -790,9 +784,9 @@ enum iscsi_error_types {
 	ISCSI_CONN_ERROR_LOCAL_COMPLETION_ERROR,
 	ISCSI_CONN_ERROR_DATA_OVERRUN,
 	ISCSI_CONN_ERROR_OUT_OF_SGES_ERROR,
-	ISCSI_CONN_ERROR_TCP_SEG_PROC_URG_ERROR,
-	ISCSI_CONN_ERROR_TCP_SEG_PROC_IP_OPTIONS_ERROR,
-	ISCSI_CONN_ERROR_TCP_SEG_PROC_CONNECT_INVALID_WS_OPTION,
+	ISCSI_CONN_ERROR_IP_OPTIONS_ERROR,
+	ISCSI_CONN_ERROR_PRS_ERRORS,
+	ISCSI_CONN_ERROR_CONNECT_INVALID_TCP_OPTION,
 	ISCSI_CONN_ERROR_TCP_IP_FRAGMENT_ERROR,
 	ISCSI_CONN_ERROR_PROTOCOL_ERR_AHS_LEN,
 	ISCSI_CONN_ERROR_PROTOCOL_ERR_AHS_TYPE,
@@ -1304,22 +1298,6 @@ struct ystorm_iscsi_stats_drv {
 	struct regpair iscsi_tx_total_pdu_cnt;
 };
 
-struct iscsi_db_data {
-	u8 params;
-#define ISCSI_DB_DATA_DEST_MASK         0x3
-#define ISCSI_DB_DATA_DEST_SHIFT        0
-#define ISCSI_DB_DATA_AGG_CMD_MASK      0x3
-#define ISCSI_DB_DATA_AGG_CMD_SHIFT     2
-#define ISCSI_DB_DATA_BYPASS_EN_MASK    0x1
-#define ISCSI_DB_DATA_BYPASS_EN_SHIFT   4
-#define ISCSI_DB_DATA_RESERVED_MASK     0x1
-#define ISCSI_DB_DATA_RESERVED_SHIFT    5
-#define ISCSI_DB_DATA_AGG_VAL_SEL_MASK  0x3
-#define ISCSI_DB_DATA_AGG_VAL_SEL_SHIFT 6
-	u8 agg_flags;
-	__le16 sq_prod;
-};
-
 struct tstorm_iscsi_task_ag_ctx {
 	u8 byte0;
 	u8 byte1;
@@ -1398,5 +1376,20 @@ struct tstorm_iscsi_task_ag_ctx {
 	__le32 reg1;
 	__le32 reg2;
 };
+struct iscsi_db_data {
+	u8 params;
+#define ISCSI_DB_DATA_DEST_MASK         0x3
+#define ISCSI_DB_DATA_DEST_SHIFT        0
+#define ISCSI_DB_DATA_AGG_CMD_MASK      0x3
+#define ISCSI_DB_DATA_AGG_CMD_SHIFT     2
+#define ISCSI_DB_DATA_BYPASS_EN_MASK    0x1
+#define ISCSI_DB_DATA_BYPASS_EN_SHIFT   4
+#define ISCSI_DB_DATA_RESERVED_MASK     0x1
+#define ISCSI_DB_DATA_RESERVED_SHIFT    5
+#define ISCSI_DB_DATA_AGG_VAL_SEL_MASK  0x3
+#define ISCSI_DB_DATA_AGG_VAL_SEL_SHIFT 6
+	u8 agg_flags;
+	__le16 sq_prod;
+};
 
 #endif /* __ISCSI_COMMON__ */
diff --git a/include/linux/qed/rdma_common.h b/include/linux/qed/rdma_common.h
index 72c770f9f666..a9b3050f469c 100644
--- a/include/linux/qed/rdma_common.h
+++ b/include/linux/qed/rdma_common.h
@@ -42,7 +42,7 @@
 #define RDMA_MAX_SGE_PER_SQ_WQE         (4)
 #define RDMA_MAX_SGE_PER_RQ_WQE         (4)
 
-#define RDMA_MAX_DATA_SIZE_IN_WQE       (0x7FFFFFFF)
+#define RDMA_MAX_DATA_SIZE_IN_WQE	(0x80000000)
 
 #define RDMA_REQ_RD_ATOMIC_ELM_SIZE             (0x50)
 #define RDMA_RESP_RD_ATOMIC_ELM_SIZE    (0x20)
diff --git a/include/linux/qed/roce_common.h b/include/linux/qed/roce_common.h
index 866f063026de..fe6a33e45977 100644
--- a/include/linux/qed/roce_common.h
+++ b/include/linux/qed/roce_common.h
@@ -37,6 +37,8 @@
 #define ROCE_REQ_MAX_SINGLE_SQ_WQE_SIZE (288)
 
 #define ROCE_MAX_QPS	(32 * 1024)
+#define ROCE_DCQCN_NP_MAX_QPS	(64)
+#define ROCE_DCQCN_RP_MAX_QPS	(64)
 
 enum roce_async_events_type {
 	ROCE_ASYNC_EVENT_NONE = 0,
diff --git a/include/linux/qed/tcp_common.h b/include/linux/qed/tcp_common.h
index a5e843268f0e..dbf7a43c3e1f 100644
--- a/include/linux/qed/tcp_common.h
+++ b/include/linux/qed/tcp_common.h
@@ -111,7 +111,6 @@ struct tcp_offload_params {
 	__le32 snd_wnd;
 	__le32 rcv_wnd;
 	__le32 snd_wl1;
-	__le32 ts_time;
 	__le32 ts_recent;
 	__le32 ts_recent_age;
 	__le32 total_rt;
@@ -122,7 +121,7 @@ struct tcp_offload_params {
 	u8 ka_probe_cnt;
 	u8 rt_cnt;
 	__le16 rtt_var;
-	__le16 reserved2;
+	__le16 fw_internal;
 	__le32 ka_timeout;
 	__le32 ka_interval;
 	__le32 max_rt_time;
@@ -130,7 +129,7 @@ struct tcp_offload_params {
 	u8 snd_wnd_scale;
 	u8 ack_frequency;
 	__le16 da_timeout_value;
-	__le32 ts_ticks_per_second;
+	__le32 reserved3[2];
 };
 
 struct tcp_offload_params_opt2 {
-- 
cgit v1.2.3


From 1dbe0ccb0631c4ed399261934fe16f07407b078d Mon Sep 17 00:00:00 2001
From: Icenowy Zheng <icenowy@aosc.io>
Date: Thu, 18 May 2017 15:16:49 +0800
Subject: regulator: axp20x-regulator: add support for AXP803

AXP803 PMIC also have a series of regulators (DCDCs and LDOs)
controllable via I2C/RSB bus.

Add support for them.

Signed-off-by: Icenowy Zheng <icenowy@aosc.io>
Acked-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/axp20x-regulator.c | 153 ++++++++++++++++++++++++++++++-----
 include/linux/mfd/axp20x.h           |  37 +++++++++
 2 files changed, 168 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/axp20x-regulator.c b/drivers/regulator/axp20x-regulator.c
index 0b9d4e3e52c7..e2608fe770b9 100644
--- a/drivers/regulator/axp20x-regulator.c
+++ b/drivers/regulator/axp20x-regulator.c
@@ -244,6 +244,82 @@ static const struct regulator_desc axp22x_drivevbus_regulator = {
 	.ops		= &axp20x_ops_sw,
 };
 
+static const struct regulator_linear_range axp803_dcdc234_ranges[] = {
+	REGULATOR_LINEAR_RANGE(500000, 0x0, 0x46, 10000),
+	REGULATOR_LINEAR_RANGE(1220000, 0x47, 0x4b, 20000),
+};
+
+static const struct regulator_linear_range axp803_dcdc5_ranges[] = {
+	REGULATOR_LINEAR_RANGE(800000, 0x0, 0x20, 10000),
+	REGULATOR_LINEAR_RANGE(1140000, 0x21, 0x44, 20000),
+};
+
+static const struct regulator_linear_range axp803_dcdc6_ranges[] = {
+	REGULATOR_LINEAR_RANGE(600000, 0x0, 0x32, 10000),
+	REGULATOR_LINEAR_RANGE(1120000, 0x33, 0x47, 20000),
+};
+
+/* AXP806's CLDO2 and AXP809's DLDO1 shares the same range */
+static const struct regulator_linear_range axp803_dldo2_ranges[] = {
+	REGULATOR_LINEAR_RANGE(700000, 0x0, 0x1a, 100000),
+	REGULATOR_LINEAR_RANGE(3400000, 0x1b, 0x1f, 200000),
+};
+
+static const struct regulator_desc axp803_regulators[] = {
+	AXP_DESC(AXP803, DCDC1, "dcdc1", "vin1", 1600, 3400, 100,
+		 AXP803_DCDC1_V_OUT, 0x1f, AXP22X_PWR_OUT_CTRL1, BIT(0)),
+	AXP_DESC_RANGES(AXP803, DCDC2, "dcdc2", "vin2", axp803_dcdc234_ranges,
+			76, AXP803_DCDC2_V_OUT, 0x7f, AXP22X_PWR_OUT_CTRL1,
+			BIT(1)),
+	AXP_DESC_RANGES(AXP803, DCDC3, "dcdc3", "vin3", axp803_dcdc234_ranges,
+			76, AXP803_DCDC3_V_OUT, 0x7f, AXP22X_PWR_OUT_CTRL1,
+			BIT(2)),
+	AXP_DESC_RANGES(AXP803, DCDC4, "dcdc4", "vin4", axp803_dcdc234_ranges,
+			76, AXP803_DCDC4_V_OUT, 0x7f, AXP22X_PWR_OUT_CTRL1,
+			BIT(3)),
+	AXP_DESC_RANGES(AXP803, DCDC5, "dcdc5", "vin5", axp803_dcdc5_ranges,
+			68, AXP803_DCDC5_V_OUT, 0x7f, AXP22X_PWR_OUT_CTRL1,
+			BIT(4)),
+	AXP_DESC_RANGES(AXP803, DCDC6, "dcdc6", "vin6", axp803_dcdc6_ranges,
+			72, AXP803_DCDC6_V_OUT, 0x7f, AXP22X_PWR_OUT_CTRL1,
+			BIT(5)),
+	/* secondary switchable output of DCDC1 */
+	AXP_DESC_SW(AXP803, DC1SW, "dc1sw", NULL, AXP22X_PWR_OUT_CTRL2,
+		    BIT(7)),
+	AXP_DESC(AXP803, ALDO1, "aldo1", "aldoin", 700, 3300, 100,
+		 AXP22X_ALDO1_V_OUT, 0x1f, AXP22X_PWR_OUT_CTRL3, BIT(5)),
+	AXP_DESC(AXP803, ALDO2, "aldo2", "aldoin", 700, 3300, 100,
+		 AXP22X_ALDO2_V_OUT, 0x1f, AXP22X_PWR_OUT_CTRL3, BIT(6)),
+	AXP_DESC(AXP803, ALDO3, "aldo3", "aldoin", 700, 3300, 100,
+		 AXP22X_ALDO3_V_OUT, 0x1f, AXP22X_PWR_OUT_CTRL3, BIT(7)),
+	AXP_DESC(AXP803, DLDO1, "dldo1", "dldoin", 700, 3300, 100,
+		 AXP22X_DLDO1_V_OUT, 0x1f, AXP22X_PWR_OUT_CTRL2, BIT(3)),
+	AXP_DESC_RANGES(AXP803, DLDO2, "dldo2", "dldoin", axp803_dldo2_ranges,
+			32, AXP22X_DLDO2_V_OUT, 0x1f, AXP22X_PWR_OUT_CTRL2,
+			BIT(4)),
+	AXP_DESC(AXP803, DLDO3, "dldo3", "dldoin", 700, 3300, 100,
+		 AXP22X_DLDO3_V_OUT, 0x1f, AXP22X_PWR_OUT_CTRL2, BIT(5)),
+	AXP_DESC(AXP803, DLDO4, "dldo4", "dldoin", 700, 3300, 100,
+		 AXP22X_DLDO4_V_OUT, 0x1f, AXP22X_PWR_OUT_CTRL2, BIT(6)),
+	AXP_DESC(AXP803, ELDO1, "eldo1", "eldoin", 700, 1900, 50,
+		 AXP22X_ELDO1_V_OUT, 0x1f, AXP22X_PWR_OUT_CTRL2, BIT(0)),
+	AXP_DESC(AXP803, ELDO2, "eldo2", "eldoin", 700, 1900, 50,
+		 AXP22X_ELDO2_V_OUT, 0x1f, AXP22X_PWR_OUT_CTRL2, BIT(1)),
+	AXP_DESC(AXP803, ELDO3, "eldo3", "eldoin", 700, 1900, 50,
+		 AXP22X_ELDO3_V_OUT, 0x1f, AXP22X_PWR_OUT_CTRL2, BIT(2)),
+	AXP_DESC(AXP803, FLDO1, "fldo1", "fldoin", 700, 1450, 50,
+		 AXP803_FLDO1_V_OUT, 0x0f, AXP22X_PWR_OUT_CTRL3, BIT(2)),
+	AXP_DESC(AXP803, FLDO2, "fldo2", "fldoin", 700, 1450, 50,
+		 AXP803_FLDO2_V_OUT, 0x0f, AXP22X_PWR_OUT_CTRL3, BIT(3)),
+	AXP_DESC_IO(AXP803, LDO_IO0, "ldo-io0", "ips", 700, 3300, 100,
+		    AXP22X_LDO_IO0_V_OUT, 0x1f, AXP20X_GPIO0_CTRL, 0x07,
+		    AXP22X_IO_ENABLED, AXP22X_IO_DISABLED),
+	AXP_DESC_IO(AXP803, LDO_IO1, "ldo-io1", "ips", 700, 3300, 100,
+		    AXP22X_LDO_IO1_V_OUT, 0x1f, AXP20X_GPIO1_CTRL, 0x07,
+		    AXP22X_IO_ENABLED, AXP22X_IO_DISABLED),
+	AXP_DESC_FIXED(AXP803, RTC_LDO, "rtc-ldo", "ips", 3000),
+};
+
 static const struct regulator_linear_range axp806_dcdca_ranges[] = {
 	REGULATOR_LINEAR_RANGE(600000, 0x0, 0x32, 10000),
 	REGULATOR_LINEAR_RANGE(1120000, 0x33, 0x47, 20000),
@@ -254,11 +330,6 @@ static const struct regulator_linear_range axp806_dcdcd_ranges[] = {
 	REGULATOR_LINEAR_RANGE(1600000, 0x2e, 0x3f, 100000),
 };
 
-static const struct regulator_linear_range axp806_cldo2_ranges[] = {
-	REGULATOR_LINEAR_RANGE(700000, 0x0, 0x1a, 100000),
-	REGULATOR_LINEAR_RANGE(3400000, 0x1b, 0x1f, 200000),
-};
-
 static const struct regulator_desc axp806_regulators[] = {
 	AXP_DESC_RANGES(AXP806, DCDCA, "dcdca", "vina", axp806_dcdca_ranges,
 			72, AXP806_DCDCA_V_CTRL, 0x7f, AXP806_PWR_OUT_CTRL1,
@@ -289,7 +360,7 @@ static const struct regulator_desc axp806_regulators[] = {
 		 AXP806_BLDO4_V_CTRL, 0x0f, AXP806_PWR_OUT_CTRL2, BIT(3)),
 	AXP_DESC(AXP806, CLDO1, "cldo1", "cldoin", 700, 3300, 100,
 		 AXP806_CLDO1_V_CTRL, 0x1f, AXP806_PWR_OUT_CTRL2, BIT(4)),
-	AXP_DESC_RANGES(AXP806, CLDO2, "cldo2", "cldoin", axp806_cldo2_ranges,
+	AXP_DESC_RANGES(AXP806, CLDO2, "cldo2", "cldoin", axp803_dldo2_ranges,
 			32, AXP806_CLDO2_V_CTRL, 0x1f, AXP806_PWR_OUT_CTRL2,
 			BIT(5)),
 	AXP_DESC(AXP806, CLDO3, "cldo3", "cldoin", 700, 3300, 100,
@@ -326,7 +397,7 @@ static const struct regulator_desc axp809_regulators[] = {
 		 AXP22X_ALDO2_V_OUT, 0x1f, AXP22X_PWR_OUT_CTRL1, BIT(7)),
 	AXP_DESC(AXP809, ALDO3, "aldo3", "aldoin", 700, 3300, 100,
 		 AXP22X_ALDO3_V_OUT, 0x1f, AXP22X_PWR_OUT_CTRL2, BIT(5)),
-	AXP_DESC_RANGES(AXP809, DLDO1, "dldo1", "dldoin", axp806_cldo2_ranges,
+	AXP_DESC_RANGES(AXP809, DLDO1, "dldo1", "dldoin", axp803_dldo2_ranges,
 			32, AXP22X_DLDO1_V_OUT, 0x1f, AXP22X_PWR_OUT_CTRL2,
 			BIT(3)),
 	AXP_DESC(AXP809, DLDO2, "dldo2", "dldoin", 700, 3300, 100,
@@ -369,14 +440,21 @@ static int axp20x_set_dcdc_freq(struct platform_device *pdev, u32 dcdcfreq)
 		def = 1500;
 		step = 75;
 		break;
-	case AXP806_ID:
+	case AXP803_ID:
 		/*
-		 * AXP806 DCDC work frequency setting has the same range and
+		 * AXP803 DCDC work frequency setting has the same range and
 		 * step as AXP22X, but at a different register.
 		 * Fall through to the check below.
 		 * (See include/linux/mfd/axp20x.h)
 		 */
-		reg = AXP806_DCDC_FREQ_CTRL;
+		reg = AXP803_DCDC_FREQ_CTRL;
+	case AXP806_ID:
+		/*
+		 * AXP806 also have DCDC work frequency setting register at a
+		 * different position.
+		 */
+		if (axp20x->variant == AXP806_ID)
+			reg = AXP806_DCDC_FREQ_CTRL;
 	case AXP221_ID:
 	case AXP223_ID:
 	case AXP809_ID:
@@ -475,6 +553,14 @@ static int axp20x_set_dcdc_workmode(struct regulator_dev *rdev, int id, u32 work
 		workmode <<= id - AXP22X_DCDC1;
 		break;
 
+	case AXP803_ID:
+		if (id < AXP803_DCDC1 || id > AXP803_DCDC6)
+			return -EINVAL;
+
+		mask = AXP22X_WORKMODE_DCDCX_MASK(id - AXP803_DCDC1);
+		workmode <<= id - AXP803_DCDC1;
+		break;
+
 	default:
 		/* should not happen */
 		WARN_ON(1);
@@ -492,20 +578,38 @@ static bool axp20x_is_polyphase_slave(struct axp20x_dev *axp20x, int id)
 {
 	u32 reg = 0;
 
-	/* Only AXP806 has poly-phase outputs */
-	if (axp20x->variant != AXP806_ID)
-		return false;
+	/*
+	 * Currently in our supported AXP variants, only AXP803 and AXP806
+	 * have polyphase regulators.
+	 */
+	switch (axp20x->variant) {
+	case AXP803_ID:
+		regmap_read(axp20x->regmap, AXP803_POLYPHASE_CTRL, &reg);
+
+		switch (id) {
+		case AXP803_DCDC3:
+			return !!(reg & BIT(6));
+		case AXP803_DCDC6:
+			return !!(reg & BIT(7));
+		}
+		break;
 
-	regmap_read(axp20x->regmap, AXP806_DCDC_MODE_CTRL2, &reg);
+	case AXP806_ID:
+		regmap_read(axp20x->regmap, AXP806_DCDC_MODE_CTRL2, &reg);
+
+		switch (id) {
+		case AXP806_DCDCB:
+			return (((reg & GENMASK(7, 6)) == BIT(6)) ||
+				((reg & GENMASK(7, 6)) == BIT(7)));
+		case AXP806_DCDCC:
+			return ((reg & GENMASK(7, 6)) == BIT(7));
+		case AXP806_DCDCE:
+			return !!(reg & BIT(5));
+		}
+		break;
 
-	switch (id) {
-	case AXP806_DCDCB:
-		return (((reg & GENMASK(7, 6)) == BIT(6)) ||
-			((reg & GENMASK(7, 6)) == BIT(7)));
-	case AXP806_DCDCC:
-		return ((reg & GENMASK(7, 6)) == BIT(7));
-	case AXP806_DCDCE:
-		return !!(reg & BIT(5));
+	default:
+		return false;
 	}
 
 	return false;
@@ -540,6 +644,10 @@ static int axp20x_regulator_probe(struct platform_device *pdev)
 		drivevbus = of_property_read_bool(pdev->dev.parent->of_node,
 						  "x-powers,drive-vbus-en");
 		break;
+	case AXP803_ID:
+		regulators = axp803_regulators;
+		nregulators = AXP803_REG_ID_MAX;
+		break;
 	case AXP806_ID:
 		regulators = axp806_regulators;
 		nregulators = AXP806_REG_ID_MAX;
@@ -579,6 +687,7 @@ static int axp20x_regulator_probe(struct platform_device *pdev)
 		 * name.
 		 */
 		if ((regulators == axp22x_regulators && i == AXP22X_DC1SW) ||
+		    (regulators == axp803_regulators && i == AXP803_DC1SW) ||
 		    (regulators == axp809_regulators && i == AXP809_DC1SW)) {
 			new_desc = devm_kzalloc(&pdev->dev, sizeof(*desc),
 						GFP_KERNEL);
diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h
index cde56cfe8446..965b027e31b3 100644
--- a/include/linux/mfd/axp20x.h
+++ b/include/linux/mfd/axp20x.h
@@ -119,6 +119,17 @@ enum axp20x_variants {
 #define AXP806_BUS_ADDR_EXT		0xfe
 #define AXP806_REG_ADDR_EXT		0xff
 
+#define AXP803_POLYPHASE_CTRL		0x14
+#define AXP803_FLDO1_V_OUT		0x1c
+#define AXP803_FLDO2_V_OUT		0x1d
+#define AXP803_DCDC1_V_OUT		0x20
+#define AXP803_DCDC2_V_OUT		0x21
+#define AXP803_DCDC3_V_OUT		0x22
+#define AXP803_DCDC4_V_OUT		0x23
+#define AXP803_DCDC5_V_OUT		0x24
+#define AXP803_DCDC6_V_OUT		0x25
+#define AXP803_DCDC_FREQ_CTRL		0x3b
+
 /* Interrupt */
 #define AXP152_IRQ1_EN			0x40
 #define AXP152_IRQ2_EN			0x41
@@ -350,6 +361,32 @@ enum {
 	AXP809_REG_ID_MAX,
 };
 
+enum {
+	AXP803_DCDC1 = 0,
+	AXP803_DCDC2,
+	AXP803_DCDC3,
+	AXP803_DCDC4,
+	AXP803_DCDC5,
+	AXP803_DCDC6,
+	AXP803_DC1SW,
+	AXP803_ALDO1,
+	AXP803_ALDO2,
+	AXP803_ALDO3,
+	AXP803_DLDO1,
+	AXP803_DLDO2,
+	AXP803_DLDO3,
+	AXP803_DLDO4,
+	AXP803_ELDO1,
+	AXP803_ELDO2,
+	AXP803_ELDO3,
+	AXP803_FLDO1,
+	AXP803_FLDO2,
+	AXP803_RTC_LDO,
+	AXP803_LDO_IO0,
+	AXP803_LDO_IO1,
+	AXP803_REG_ID_MAX,
+};
+
 /* IRQs */
 enum {
 	AXP152_IRQ_LDO0IN_CONNECT = 1,
-- 
cgit v1.2.3


From 032838f9cb4014af8a974374db9e2ce6f3aa8d3b Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 8 May 2017 12:33:48 -0700
Subject: drm/pl111: Register the clock divider and use it.

This is required for the panel to work on bcm911360, where CLCDCLK is
the fixed 200Mhz AXI41 clock.  The rate set is still passed up to the
CLCDCLK, for platforms that have a settable rate on that one.

v2: Set SET_RATE_PARENT (caught by Linus Walleij), depend on
    COMMON_CLK.
v3: Mark the clk_ops static (caught by Stephen).

Signed-off-by: Eric Anholt <eric@anholt.net>
Link: http://patchwork.freedesktop.org/patch/msgid/20170508193348.30236-1-eric@anholt.net
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/gpu/drm/pl111/Kconfig         |   1 +
 drivers/gpu/drm/pl111/pl111_display.c | 162 ++++++++++++++++++++++++++++++----
 drivers/gpu/drm/pl111/pl111_drm.h     |   8 ++
 drivers/gpu/drm/pl111/pl111_drv.c     |  11 +--
 include/linux/amba/clcd-regs.h        |   5 ++
 5 files changed, 163 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/pl111/Kconfig b/drivers/gpu/drm/pl111/Kconfig
index ede49efd531f..309f4fd52de7 100644
--- a/drivers/gpu/drm/pl111/Kconfig
+++ b/drivers/gpu/drm/pl111/Kconfig
@@ -2,6 +2,7 @@ config DRM_PL111
 	tristate "DRM Support for PL111 CLCD Controller"
 	depends on DRM
 	depends on ARM || ARM64 || COMPILE_TEST
+	depends on COMMON_CLK
 	select DRM_KMS_HELPER
 	select DRM_KMS_CMA_HELPER
 	select DRM_GEM_CMA_HELPER
diff --git a/drivers/gpu/drm/pl111/pl111_display.c b/drivers/gpu/drm/pl111/pl111_display.c
index 39a5c33bce7d..fbf8fbec6c16 100644
--- a/drivers/gpu/drm/pl111/pl111_display.c
+++ b/drivers/gpu/drm/pl111/pl111_display.c
@@ -108,7 +108,7 @@ static void pl111_display_enable(struct drm_simple_display_pipe *pipe,
 	u32 cntl;
 	u32 ppl, hsw, hfp, hbp;
 	u32 lpp, vsw, vfp, vbp;
-	u32 cpl;
+	u32 cpl, tim2;
 	int ret;
 
 	ret = clk_set_rate(priv->clk, mode->clock * 1000);
@@ -142,20 +142,28 @@ static void pl111_display_enable(struct drm_simple_display_pipe *pipe,
 	       (vfp << 16) |
 	       (vbp << 24),
 	       priv->regs + CLCD_TIM1);
-	/* XXX: We currently always use CLCDCLK with no divisor.  We
-	 * could probably reduce power consumption by using HCLK
-	 * (apb_pclk) with a divisor when it gets us near our target
-	 * pixel clock.
-	 */
-	writel(((mode->flags & DRM_MODE_FLAG_NHSYNC) ? TIM2_IHS : 0) |
-	       ((mode->flags & DRM_MODE_FLAG_NVSYNC) ? TIM2_IVS : 0) |
-	       ((connector->display_info.bus_flags &
-		 DRM_BUS_FLAG_DE_LOW) ? TIM2_IOE : 0) |
-	       ((connector->display_info.bus_flags &
-		 DRM_BUS_FLAG_PIXDATA_NEGEDGE) ? TIM2_IPC : 0) |
-	       TIM2_BCD |
-	       (cpl << 16),
-	       priv->regs + CLCD_TIM2);
+
+	spin_lock(&priv->tim2_lock);
+
+	tim2 = readl(priv->regs + CLCD_TIM2);
+	tim2 &= (TIM2_BCD | TIM2_PCD_LO_MASK | TIM2_PCD_HI_MASK);
+
+	if (mode->flags & DRM_MODE_FLAG_NHSYNC)
+		tim2 |= TIM2_IHS;
+
+	if (mode->flags & DRM_MODE_FLAG_NVSYNC)
+		tim2 |= TIM2_IVS;
+
+	if (connector->display_info.bus_flags & DRM_BUS_FLAG_DE_LOW)
+		tim2 |= TIM2_IOE;
+
+	if (connector->display_info.bus_flags & DRM_BUS_FLAG_PIXDATA_NEGEDGE)
+		tim2 |= TIM2_IPC;
+
+	tim2 |= cpl << 16;
+	writel(tim2, priv->regs + CLCD_TIM2);
+	spin_unlock(&priv->tim2_lock);
+
 	writel(0, priv->regs + CLCD_TIM3);
 
 	drm_panel_prepare(priv->connector.panel);
@@ -288,6 +296,126 @@ const struct drm_simple_display_pipe_funcs pl111_display_funcs = {
 	.prepare_fb = pl111_display_prepare_fb,
 };
 
+static int pl111_clk_div_choose_div(struct clk_hw *hw, unsigned long rate,
+				    unsigned long *prate, bool set_parent)
+{
+	int best_div = 1, div;
+	struct clk_hw *parent = clk_hw_get_parent(hw);
+	unsigned long best_prate = 0;
+	unsigned long best_diff = ~0ul;
+	int max_div = (1 << (TIM2_PCD_LO_BITS + TIM2_PCD_HI_BITS)) - 1;
+
+	for (div = 1; div < max_div; div++) {
+		unsigned long this_prate, div_rate, diff;
+
+		if (set_parent)
+			this_prate = clk_hw_round_rate(parent, rate * div);
+		else
+			this_prate = *prate;
+		div_rate = DIV_ROUND_UP_ULL(this_prate, div);
+		diff = abs(rate - div_rate);
+
+		if (diff < best_diff) {
+			best_div = div;
+			best_diff = diff;
+			best_prate = this_prate;
+		}
+	}
+
+	*prate = best_prate;
+	return best_div;
+}
+
+static long pl111_clk_div_round_rate(struct clk_hw *hw, unsigned long rate,
+				     unsigned long *prate)
+{
+	int div = pl111_clk_div_choose_div(hw, rate, prate, true);
+
+	return DIV_ROUND_UP_ULL(*prate, div);
+}
+
+static unsigned long pl111_clk_div_recalc_rate(struct clk_hw *hw,
+					       unsigned long prate)
+{
+	struct pl111_drm_dev_private *priv =
+		container_of(hw, struct pl111_drm_dev_private, clk_div);
+	u32 tim2 = readl(priv->regs + CLCD_TIM2);
+	int div;
+
+	if (tim2 & TIM2_BCD)
+		return prate;
+
+	div = tim2 & TIM2_PCD_LO_MASK;
+	div |= (tim2 & TIM2_PCD_HI_MASK) >>
+		(TIM2_PCD_HI_SHIFT - TIM2_PCD_LO_BITS);
+	div += 2;
+
+	return DIV_ROUND_UP_ULL(prate, div);
+}
+
+static int pl111_clk_div_set_rate(struct clk_hw *hw, unsigned long rate,
+				  unsigned long prate)
+{
+	struct pl111_drm_dev_private *priv =
+		container_of(hw, struct pl111_drm_dev_private, clk_div);
+	int div = pl111_clk_div_choose_div(hw, rate, &prate, false);
+	u32 tim2;
+
+	spin_lock(&priv->tim2_lock);
+	tim2 = readl(priv->regs + CLCD_TIM2);
+	tim2 &= ~(TIM2_BCD | TIM2_PCD_LO_MASK | TIM2_PCD_HI_MASK);
+
+	if (div == 1) {
+		tim2 |= TIM2_BCD;
+	} else {
+		div -= 2;
+		tim2 |= div & TIM2_PCD_LO_MASK;
+		tim2 |= (div >> TIM2_PCD_LO_BITS) << TIM2_PCD_HI_SHIFT;
+	}
+
+	writel(tim2, priv->regs + CLCD_TIM2);
+	spin_unlock(&priv->tim2_lock);
+
+	return 0;
+}
+
+static const struct clk_ops pl111_clk_div_ops = {
+	.recalc_rate = pl111_clk_div_recalc_rate,
+	.round_rate = pl111_clk_div_round_rate,
+	.set_rate = pl111_clk_div_set_rate,
+};
+
+static int
+pl111_init_clock_divider(struct drm_device *drm)
+{
+	struct pl111_drm_dev_private *priv = drm->dev_private;
+	struct clk *parent = devm_clk_get(drm->dev, "clcdclk");
+	struct clk_hw *div = &priv->clk_div;
+	const char *parent_name;
+	struct clk_init_data init = {
+		.name = "pl111_div",
+		.ops = &pl111_clk_div_ops,
+		.parent_names = &parent_name,
+		.num_parents = 1,
+		.flags = CLK_SET_RATE_PARENT,
+	};
+	int ret;
+
+	if (IS_ERR(parent)) {
+		dev_err(drm->dev, "CLCD: unable to get clcdclk.\n");
+		return PTR_ERR(parent);
+	}
+	parent_name = __clk_get_name(parent);
+
+	spin_lock_init(&priv->tim2_lock);
+	div->init = &init;
+
+	ret = devm_clk_hw_register(drm->dev, div);
+
+	priv->clk = div->clk;
+	return ret;
+}
+
 int pl111_display_init(struct drm_device *drm)
 {
 	struct pl111_drm_dev_private *priv = drm->dev_private;
@@ -333,6 +461,10 @@ int pl111_display_init(struct drm_device *drm)
 		return -EINVAL;
 	}
 
+	ret = pl111_init_clock_divider(drm);
+	if (ret)
+		return ret;
+
 	ret = drm_simple_display_pipe_init(drm, &priv->pipe,
 					   &pl111_display_funcs,
 					   formats, ARRAY_SIZE(formats),
diff --git a/drivers/gpu/drm/pl111/pl111_drm.h b/drivers/gpu/drm/pl111/pl111_drm.h
index f381593921b7..4162c6aa5dbb 100644
--- a/drivers/gpu/drm/pl111/pl111_drm.h
+++ b/drivers/gpu/drm/pl111/pl111_drm.h
@@ -21,6 +21,7 @@
 
 #include <drm/drm_gem.h>
 #include <drm/drm_simple_kms_helper.h>
+#include <linux/clk-provider.h>
 
 #define CLCD_IRQ_NEXTBASE_UPDATE BIT(2)
 
@@ -37,7 +38,14 @@ struct pl111_drm_dev_private {
 	struct drm_fbdev_cma *fbdev;
 
 	void *regs;
+	/* The pixel clock (a reference to our clock divider off of CLCDCLK). */
 	struct clk *clk;
+	/* pl111's internal clock divider. */
+	struct clk_hw clk_div;
+	/* Lock to sync access to CLCD_TIM2 between the common clock
+	 * subsystem and pl111_display_enable().
+	 */
+	spinlock_t tim2_lock;
 };
 
 #define to_pl111_connector(x) \
diff --git a/drivers/gpu/drm/pl111/pl111_drv.c b/drivers/gpu/drm/pl111/pl111_drv.c
index 936403f65508..9d1467492cb9 100644
--- a/drivers/gpu/drm/pl111/pl111_drv.c
+++ b/drivers/gpu/drm/pl111/pl111_drv.c
@@ -50,8 +50,8 @@
  * - Read back hardware state at boot to skip reprogramming the
  *   hardware when doing a no-op modeset.
  *
- * - Use the internal clock divisor to reduce power consumption by
- *   using HCLK (apb_pclk) when appropriate.
+ * - Use the CLKSEL bit to support switching between the two external
+ *   clock parents.
  */
 
 #include <linux/amba/bus.h>
@@ -195,13 +195,6 @@ static int pl111_amba_probe(struct amba_device *amba_dev,
 	priv->drm = drm;
 	drm->dev_private = priv;
 
-	priv->clk = devm_clk_get(dev, "clcdclk");
-	if (IS_ERR(priv->clk)) {
-		dev_err(dev, "CLCD: unable to get clk.\n");
-		ret = PTR_ERR(priv->clk);
-		goto dev_unref;
-	}
-
 	priv->regs = devm_ioremap_resource(dev, &amba_dev->res);
 	if (!priv->regs) {
 		dev_err(dev, "%s failed mmio\n", __func__);
diff --git a/include/linux/amba/clcd-regs.h b/include/linux/amba/clcd-regs.h
index 69c0e2143003..516a6fda83c5 100644
--- a/include/linux/amba/clcd-regs.h
+++ b/include/linux/amba/clcd-regs.h
@@ -39,12 +39,17 @@
 #define CLCD_PALL 		0x00000200
 #define CLCD_PALETTE		0x00000200
 
+#define TIM2_PCD_LO_MASK	GENMASK(4, 0)
+#define TIM2_PCD_LO_BITS	5
 #define TIM2_CLKSEL		(1 << 5)
 #define TIM2_IVS		(1 << 11)
 #define TIM2_IHS		(1 << 12)
 #define TIM2_IPC		(1 << 13)
 #define TIM2_IOE		(1 << 14)
 #define TIM2_BCD		(1 << 26)
+#define TIM2_PCD_HI_MASK	GENMASK(31, 27)
+#define TIM2_PCD_HI_BITS	5
+#define TIM2_PCD_HI_SHIFT	27
 
 #define CNTL_LCDEN		(1 << 0)
 #define CNTL_LCDBPP1		(0 << 1)
-- 
cgit v1.2.3


From 9617813dba5b6c112922c60cd2bc57c6e11ae907 Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Thu, 18 May 2017 15:44:37 +0200
Subject: skbuff: add stub to help computing crc32c on SCTP packets

sctp_compute_checksum requires crc32c symbol (provided by libcrc32c), so
it can't be used in net core. Like it has been done previously with other
symbols (e.g. ipv6_dst_lookup), introduce a stub struct skb_checksum_ops
to allow computation of crc32c checksum in net core after sctp.ko (and thus
libcrc32c) has been loaded.

Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  2 ++
 net/core/skbuff.c      | 26 ++++++++++++++++++++++++++
 net/sctp/offload.c     |  6 ++++++
 3 files changed, 34 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 7c0cb2ce8b01..b1f46a0d18e2 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3076,6 +3076,8 @@ struct skb_checksum_ops {
 	__wsum (*combine)(__wsum csum, __wsum csum2, int offset, int len);
 };
 
+extern const struct skb_checksum_ops *crc32c_csum_stub __read_mostly;
+
 __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len,
 		      __wsum csum, const struct skb_checksum_ops *ops);
 __wsum skb_checksum(const struct sk_buff *skb, int offset, int len,
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 346d3e85dfbc..d5c98117cbce 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2243,6 +2243,32 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
 }
 EXPORT_SYMBOL(skb_copy_and_csum_bits);
 
+static __wsum warn_crc32c_csum_update(const void *buff, int len, __wsum sum)
+{
+	net_warn_ratelimited(
+		"%s: attempt to compute crc32c without libcrc32c.ko\n",
+		__func__);
+	return 0;
+}
+
+static __wsum warn_crc32c_csum_combine(__wsum csum, __wsum csum2,
+				       int offset, int len)
+{
+	net_warn_ratelimited(
+		"%s: attempt to compute crc32c without libcrc32c.ko\n",
+		__func__);
+	return 0;
+}
+
+static const struct skb_checksum_ops default_crc32c_ops = {
+	.update  = warn_crc32c_csum_update,
+	.combine = warn_crc32c_csum_combine,
+};
+
+const struct skb_checksum_ops *crc32c_csum_stub __read_mostly =
+	&default_crc32c_ops;
+EXPORT_SYMBOL(crc32c_csum_stub);
+
  /**
  *	skb_zerocopy_headlen - Calculate headroom needed for skb_zerocopy()
  *	@from: source buffer
diff --git a/net/sctp/offload.c b/net/sctp/offload.c
index 4f5a2b580aa5..b67198429db5 100644
--- a/net/sctp/offload.c
+++ b/net/sctp/offload.c
@@ -98,6 +98,11 @@ static const struct net_offload sctp6_offload = {
 	},
 };
 
+static const struct skb_checksum_ops crc32c_csum_ops = {
+	.update  = sctp_csum_update,
+	.combine = sctp_csum_combine,
+};
+
 int __init sctp_offload_init(void)
 {
 	int ret;
@@ -110,6 +115,7 @@ int __init sctp_offload_init(void)
 	if (ret)
 		goto ipv4;
 
+	crc32c_csum_stub = &crc32c_csum_ops;
 	return ret;
 
 ipv4:
-- 
cgit v1.2.3


From b72b5bf6a8fc9065f270ae135bbd47abb9d96790 Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Thu, 18 May 2017 15:44:38 +0200
Subject: net: introduce skb_crc32c_csum_help

skb_crc32c_csum_help is like skb_checksum_help, but it is designed for
checksumming SCTP packets using crc32c (see RFC3309), provided that
libcrc32c.ko has been loaded before. In case libcrc32c is not loaded,
invoking skb_crc32c_csum_help on a skb results in one the following
printouts:

warn_crc32c_csum_update: attempt to compute crc32c without libcrc32c.ko
warn_crc32c_csum_combine: attempt to compute crc32c without libcrc32c.ko

Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  1 +
 include/linux/skbuff.h    |  3 ++-
 net/core/dev.c            | 41 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 44 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 0150b2dd3031..abbc72e09f11 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3931,6 +3931,7 @@ void netdev_rss_key_fill(void *buffer, size_t len);
 
 int dev_get_nest_level(struct net_device *dev);
 int skb_checksum_help(struct sk_buff *skb);
+int skb_crc32c_csum_help(struct sk_buff *skb);
 struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
 				  netdev_features_t features, bool tx_path);
 struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index b1f46a0d18e2..62d62964c743 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -193,7 +193,8 @@
  *     accordingly. Note the there is no indication in the skbuff that the
  *     CHECKSUM_PARTIAL refers to an SCTP checksum, a driver that supports
  *     both IP checksum offload and SCTP CRC offload must verify which offload
- *     is configured for a packet presumably by inspecting packet headers.
+ *     is configured for a packet presumably by inspecting packet headers; in
+ *     case, skb_crc32c_csum_help is provided to compute CRC on SCTP packets.
  *
  *   NETIF_F_FCOE_CRC - This feature indicates that a device is capable of
  *     offloading the FCOE CRC in a packet. To perform this offload the stack
diff --git a/net/core/dev.c b/net/core/dev.c
index acd594c56f0a..8356d5f05f89 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -143,6 +143,7 @@
 #include <linux/hrtimer.h>
 #include <linux/netfilter_ingress.h>
 #include <linux/crash_dump.h>
+#include <linux/sctp.h>
 
 #include "net-sysfs.h"
 
@@ -2612,6 +2613,46 @@ out:
 }
 EXPORT_SYMBOL(skb_checksum_help);
 
+int skb_crc32c_csum_help(struct sk_buff *skb)
+{
+	__le32 crc32c_csum;
+	int ret = 0, offset, start;
+
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
+		goto out;
+
+	if (unlikely(skb_is_gso(skb)))
+		goto out;
+
+	/* Before computing a checksum, we should make sure no frag could
+	 * be modified by an external entity : checksum could be wrong.
+	 */
+	if (unlikely(skb_has_shared_frag(skb))) {
+		ret = __skb_linearize(skb);
+		if (ret)
+			goto out;
+	}
+	start = skb_checksum_start_offset(skb);
+	offset = start + offsetof(struct sctphdr, checksum);
+	if (WARN_ON_ONCE(offset >= skb_headlen(skb))) {
+		ret = -EINVAL;
+		goto out;
+	}
+	if (skb_cloned(skb) &&
+	    !skb_clone_writable(skb, offset + sizeof(__le32))) {
+		ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
+		if (ret)
+			goto out;
+	}
+	crc32c_csum = cpu_to_le32(~__skb_checksum(skb, start,
+						  skb->len - start, ~(__u32)0,
+						  crc32c_csum_stub));
+	*(__le32 *)(skb->data + offset) = crc32c_csum;
+	skb->ip_summed = CHECKSUM_NONE;
+out:
+	return ret;
+}
+
 __be16 skb_network_protocol(struct sk_buff *skb, int *depth)
 {
 	__be16 type = skb->protocol;
-- 
cgit v1.2.3


From 219f1d79871257e9603f504dce0fe8ebf47aad08 Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Thu, 18 May 2017 15:44:39 +0200
Subject: sk_buff: remove support for csum_bad in sk_buff

This bit was introduced with commit 5a21232983aa ("net: Support for
csum_bad in skbuff") to reduce the stack workload when processing RX
packets carrying a wrong Internet Checksum. Up to now, only one driver and
GRO core are setting it.

Suggested-by: Tom Herbert <tom@herbertland.com>
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/aquantia/atlantic/aq_ring.c |  2 +-
 include/linux/netdevice.h                        |  4 +---
 include/linux/skbuff.h                           | 23 ++---------------------
 net/bridge/netfilter/nft_reject_bridge.c         |  5 +----
 net/core/dev.c                                   |  3 ---
 net/ipv4/netfilter/nf_reject_ipv4.c              |  2 +-
 net/ipv6/netfilter/nf_reject_ipv6.c              |  3 ---
 7 files changed, 6 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
index 3a8a4aa13687..9a0817938eca 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
@@ -223,7 +223,7 @@ int aq_ring_rx_clean(struct aq_ring_s *self, int *work_done, int budget)
 		skb->protocol = eth_type_trans(skb, ndev);
 		if (unlikely(buff->is_cso_err)) {
 			++self->stats.rx.errors;
-			__skb_mark_checksum_bad(skb);
+			skb->ip_summed = CHECKSUM_NONE;
 		} else {
 			if (buff->is_ip_cso) {
 				__skb_incr_checksum_unnecessary(skb);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index abbc72e09f11..c1611ace5336 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2573,9 +2573,7 @@ static inline void skb_gro_incr_csum_unnecessary(struct sk_buff *skb)
 	if (__skb_gro_checksum_validate_needed(skb, zero_okay, check))	\
 		__ret = __skb_gro_checksum_validate_complete(skb,	\
 				compute_pseudo(skb, proto));		\
-	if (__ret)							\
-		__skb_mark_checksum_bad(skb);				\
-	else								\
+	if (!__ret)							\
 		skb_gro_incr_csum_unnecessary(skb);			\
 	__ret;								\
 })
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 62d62964c743..c38f890d425e 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -685,7 +685,7 @@ struct sk_buff {
 	__u8			csum_valid:1;
 	__u8			csum_complete_sw:1;
 	__u8			csum_level:2;
-	__u8			csum_bad:1;
+	__u8			__csum_bad_unused:1; /* one bit hole */
 
 	__u8			dst_pending_confirm:1;
 #ifdef CONFIG_IPV6_NDISC_NODETYPE
@@ -3336,21 +3336,6 @@ static inline void __skb_incr_checksum_unnecessary(struct sk_buff *skb)
 	}
 }
 
-static inline void __skb_mark_checksum_bad(struct sk_buff *skb)
-{
-	/* Mark current checksum as bad (typically called from GRO
-	 * path). In the case that ip_summed is CHECKSUM_NONE
-	 * this must be the first checksum encountered in the packet.
-	 * When ip_summed is CHECKSUM_UNNECESSARY, this is the first
-	 * checksum after the last one validated. For UDP, a zero
-	 * checksum can not be marked as bad.
-	 */
-
-	if (skb->ip_summed == CHECKSUM_NONE ||
-	    skb->ip_summed == CHECKSUM_UNNECESSARY)
-		skb->csum_bad = 1;
-}
-
 /* Check if we need to perform checksum complete validation.
  *
  * Returns true if checksum complete is needed, false otherwise
@@ -3404,9 +3389,6 @@ static inline __sum16 __skb_checksum_validate_complete(struct sk_buff *skb,
 			skb->csum_valid = 1;
 			return 0;
 		}
-	} else if (skb->csum_bad) {
-		/* ip_summed == CHECKSUM_NONE in this case */
-		return (__force __sum16)1;
 	}
 
 	skb->csum = psum;
@@ -3466,8 +3448,7 @@ static inline __wsum null_compute_pseudo(struct sk_buff *skb, int proto)
 
 static inline bool __skb_checksum_convert_check(struct sk_buff *skb)
 {
-	return (skb->ip_summed == CHECKSUM_NONE &&
-		skb->csum_valid && !skb->csum_bad);
+	return (skb->ip_summed == CHECKSUM_NONE && skb->csum_valid);
 }
 
 static inline void __skb_checksum_convert(struct sk_buff *skb,
diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c
index 346ef6b00b8f..c16dd3a47fc6 100644
--- a/net/bridge/netfilter/nft_reject_bridge.c
+++ b/net/bridge/netfilter/nft_reject_bridge.c
@@ -111,7 +111,7 @@ static void nft_reject_br_send_v4_unreach(struct net *net,
 	__wsum csum;
 	u8 proto;
 
-	if (oldskb->csum_bad || !nft_bridge_iphdr_validate(oldskb))
+	if (!nft_bridge_iphdr_validate(oldskb))
 		return;
 
 	/* IP header checks: fragment. */
@@ -226,9 +226,6 @@ static bool reject6_br_csum_ok(struct sk_buff *skb, int hook)
 	__be16 fo;
 	u8 proto = ip6h->nexthdr;
 
-	if (skb->csum_bad)
-		return false;
-
 	if (skb_csum_unnecessary(skb))
 		return true;
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 8356d5f05f89..f0281ff45e77 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4678,9 +4678,6 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 	if (netif_elide_gro(skb->dev))
 		goto normal;
 
-	if (skb->csum_bad)
-		goto normal;
-
 	gro_list_prepare(napi, skb);
 
 	rcu_read_lock();
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index 7cd8d0d918f8..6f8d9e5e062b 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -172,7 +172,7 @@ void nf_send_unreach(struct sk_buff *skb_in, int code, int hook)
 	struct iphdr *iph = ip_hdr(skb_in);
 	u8 proto;
 
-	if (skb_in->csum_bad || iph->frag_off & htons(IP_OFFSET))
+	if (iph->frag_off & htons(IP_OFFSET))
 		return;
 
 	if (skb_csum_unnecessary(skb_in)) {
diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c
index eedee5d108d9..f63b18e05c69 100644
--- a/net/ipv6/netfilter/nf_reject_ipv6.c
+++ b/net/ipv6/netfilter/nf_reject_ipv6.c
@@ -220,9 +220,6 @@ static bool reject6_csum_ok(struct sk_buff *skb, int hook)
 	__be16 fo;
 	u8 proto;
 
-	if (skb->csum_bad)
-		return false;
-
 	if (skb_csum_unnecessary(skb))
 		return true;
 
-- 
cgit v1.2.3


From dba003067a43a9699bef0c4bdbe320ece5a109b8 Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Thu, 18 May 2017 15:44:40 +0200
Subject: net: use skb->csum_not_inet to identify packets needing crc32c

skb->csum_not_inet carries the indication on which algorithm is needed to
compute checksum on skb in the transmit path, when skb->ip_summed is equal
to CHECKSUM_PARTIAL. If skb carries a SCTP packet and crc32c hasn't been
yet written in L4 header, skb->csum_not_inet is assigned to 1; otherwise,
assume Internet Checksum is needed and thus set skb->csum_not_inet to 0.

Suggested-by: Tom Herbert <tom@herbertland.com>
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Acked-by: Tom Herbert <tom@herbertland.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 16 +++++++++-------
 net/core/dev.c         |  1 +
 net/sched/act_csum.c   |  1 +
 net/sctp/offload.c     |  1 +
 net/sctp/output.c      |  1 +
 5 files changed, 13 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index c38f890d425e..a43d2086bb7f 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -189,12 +189,13 @@
  *
  *   NETIF_F_SCTP_CRC - This feature indicates that a device is capable of
  *     offloading the SCTP CRC in a packet. To perform this offload the stack
- *     will set ip_summed to CHECKSUM_PARTIAL and set csum_start and csum_offset
- *     accordingly. Note the there is no indication in the skbuff that the
- *     CHECKSUM_PARTIAL refers to an SCTP checksum, a driver that supports
- *     both IP checksum offload and SCTP CRC offload must verify which offload
- *     is configured for a packet presumably by inspecting packet headers; in
- *     case, skb_crc32c_csum_help is provided to compute CRC on SCTP packets.
+ *     will set set csum_start and csum_offset accordingly, set ip_summed to
+ *     CHECKSUM_PARTIAL and set csum_not_inet to 1, to provide an indication in
+ *     the skbuff that the CHECKSUM_PARTIAL refers to CRC32c.
+ *     A driver that supports both IP checksum offload and SCTP CRC32c offload
+ *     must verify which offload is configured for a packet by testing the
+ *     value of skb->csum_not_inet; skb_crc32c_csum_help is provided to resolve
+ *     CHECKSUM_PARTIAL on skbs where csum_not_inet is set to 1.
  *
  *   NETIF_F_FCOE_CRC - This feature indicates that a device is capable of
  *     offloading the FCOE CRC in a packet. To perform this offload the stack
@@ -557,6 +558,7 @@ typedef unsigned char *sk_buff_data_t;
  *	@wifi_acked_valid: wifi_acked was set
  *	@wifi_acked: whether frame was acked on wifi or not
  *	@no_fcs:  Request NIC to treat last 4 bytes as Ethernet FCS
+ *	@csum_not_inet: use CRC32c to resolve CHECKSUM_PARTIAL
  *	@dst_pending_confirm: need to confirm neighbour
   *	@napi_id: id of the NAPI struct this skb came from
  *	@secmark: security marking
@@ -685,7 +687,7 @@ struct sk_buff {
 	__u8			csum_valid:1;
 	__u8			csum_complete_sw:1;
 	__u8			csum_level:2;
-	__u8			__csum_bad_unused:1; /* one bit hole */
+	__u8			csum_not_inet:1;
 
 	__u8			dst_pending_confirm:1;
 #ifdef CONFIG_IPV6_NDISC_NODETYPE
diff --git a/net/core/dev.c b/net/core/dev.c
index f0281ff45e77..71107d1f3051 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2649,6 +2649,7 @@ int skb_crc32c_csum_help(struct sk_buff *skb)
 						  crc32c_csum_stub));
 	*(__le32 *)(skb->data + offset) = crc32c_csum;
 	skb->ip_summed = CHECKSUM_NONE;
+	skb->csum_not_inet = 0;
 out:
 	return ret;
 }
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index ab6fdbd34db7..3317a2f579da 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -350,6 +350,7 @@ static int tcf_csum_sctp(struct sk_buff *skb, unsigned int ihl,
 	sctph->checksum = sctp_compute_cksum(skb,
 					     skb_network_offset(skb) + ihl);
 	skb->ip_summed = CHECKSUM_NONE;
+	skb->csum_not_inet = 0;
 
 	return 1;
 }
diff --git a/net/sctp/offload.c b/net/sctp/offload.c
index b67198429db5..275925b93b29 100644
--- a/net/sctp/offload.c
+++ b/net/sctp/offload.c
@@ -35,6 +35,7 @@
 static __le32 sctp_gso_make_checksum(struct sk_buff *skb)
 {
 	skb->ip_summed = CHECKSUM_NONE;
+	skb->csum_not_inet = 0;
 	return sctp_compute_cksum(skb, skb_transport_offset(skb));
 }
 
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 1409a875ad8e..e2edf2ebbade 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -538,6 +538,7 @@ merge:
 	} else {
 chksum:
 		head->ip_summed = CHECKSUM_PARTIAL;
+		head->csum_not_inet = 1;
 		head->csum_start = skb_transport_header(head) - head->head;
 		head->csum_offset = offsetof(struct sctphdr, checksum);
 	}
-- 
cgit v1.2.3


From 43c26a1a45938624fb9301e8bf7dfabbed293619 Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Thu, 18 May 2017 15:44:41 +0200
Subject: net: more accurate checksumming in validate_xmit_skb()

skb_csum_hwoffload_help() uses netdev features and skb->csum_not_inet to
determine if skb needs software computation of Internet Checksum or crc32c
(or nothing, if this computation can be done by the hardware). Use it in
place of skb_checksum_help() in validate_xmit_skb() to avoid corruption
of non-GSO SCTP packets having skb->ip_summed equal to CHECKSUM_PARTIAL.

While at it, remove references to skb_csum_off_chk* functions, since they
are not present anymore in Linux  _ see commit cf53b1da73bd ("Revert
 "net: Add driver helper functions to determine checksum offloadability"").

Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/checksum-offloads.txt | 11 +++++++----
 include/linux/netdevice.h                      |  3 +++
 include/linux/skbuff.h                         | 13 +++++--------
 net/core/dev.c                                 | 14 ++++++++++++--
 4 files changed, 27 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/networking/checksum-offloads.txt b/Documentation/networking/checksum-offloads.txt
index 56e36861245f..d52d191bbb0c 100644
--- a/Documentation/networking/checksum-offloads.txt
+++ b/Documentation/networking/checksum-offloads.txt
@@ -35,6 +35,9 @@ This interface only allows a single checksum to be offloaded.  Where
  encapsulation is used, the packet may have multiple checksum fields in
  different header layers, and the rest will have to be handled by another
  mechanism such as LCO or RCO.
+CRC32c can also be offloaded using this interface, by means of filling
+ skb->csum_start and skb->csum_offset as described above, and setting
+ skb->csum_not_inet: see skbuff.h comment (section 'D') for more details.
 No offloading of the IP header checksum is performed; it is always done in
  software.  This is OK because when we build the IP header, we obviously
  have it in cache, so summing it isn't expensive.  It's also rather short.
@@ -49,9 +52,9 @@ A driver declares its offload capabilities in netdev->hw_features; see
  and csum_offset given in the SKB; if it tries to deduce these itself in
  hardware (as some NICs do) the driver should check that the values in the
  SKB match those which the hardware will deduce, and if not, fall back to
- checksumming in software instead (with skb_checksum_help or one of the
- skb_csum_off_chk* functions as mentioned in include/linux/skbuff.h).  This
- is a pain, but that's what you get when hardware tries to be clever.
+ checksumming in software instead (with skb_csum_hwoffload_help() or one of
+ the skb_checksum_help() / skb_crc32c_csum_help functions, as mentioned in
+ include/linux/skbuff.h).
 
 The stack should, for the most part, assume that checksum offload is
  supported by the underlying device.  The only place that should check is
@@ -60,7 +63,7 @@ The stack should, for the most part, assume that checksum offload is
  may include other offloads besides TX Checksum Offload) and, if they are
  not supported or enabled on the device (determined by netdev->features),
  performs the corresponding offload in software.  In the case of TX
- Checksum Offload, that means calling skb_checksum_help(skb).
+ Checksum Offload, that means calling skb_csum_hwoffload_help(skb, features).
 
 
 LCO: Local Checksum Offload
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c1611ace5336..f8f7cd52a0a0 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3930,6 +3930,9 @@ void netdev_rss_key_fill(void *buffer, size_t len);
 int dev_get_nest_level(struct net_device *dev);
 int skb_checksum_help(struct sk_buff *skb);
 int skb_crc32c_csum_help(struct sk_buff *skb);
+int skb_csum_hwoffload_help(struct sk_buff *skb,
+			    const netdev_features_t features);
+
 struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
 				  netdev_features_t features, bool tx_path);
 struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index a43d2086bb7f..43d7ca07b2ff 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -162,14 +162,11 @@
  *
  *   NETIF_F_IP_CSUM and NETIF_F_IPV6_CSUM are being deprecated in favor of
  *   NETIF_F_HW_CSUM. New devices should use NETIF_F_HW_CSUM to indicate
- *   checksum offload capability. If a	device has limited checksum capabilities
- *   (for instance can only perform NETIF_F_IP_CSUM or NETIF_F_IPV6_CSUM as
- *   described above) a helper function can be called to resolve
- *   CHECKSUM_PARTIAL. The helper functions are skb_csum_off_chk*. The helper
- *   function takes a spec argument that describes the protocol layer that is
- *   supported for checksum offload and can be called for each packet. If a
- *   packet does not match the specification for offload, skb_checksum_help
- *   is called to resolve the checksum.
+ *   checksum offload capability.
+ *   skb_csum_hwoffload_help() can be called to resolve CHECKSUM_PARTIAL based
+ *   on network device checksumming capabilities: if a packet does not match
+ *   them, skb_checksum_help or skb_crc32c_help (depending on the value of
+ *   csum_not_inet, see item D.) is called to resolve the checksum.
  *
  * CHECKSUM_NONE:
  *
diff --git a/net/core/dev.c b/net/core/dev.c
index 71107d1f3051..bb136f726890 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2996,6 +2996,17 @@ static struct sk_buff *validate_xmit_vlan(struct sk_buff *skb,
 	return skb;
 }
 
+int skb_csum_hwoffload_help(struct sk_buff *skb,
+			    const netdev_features_t features)
+{
+	if (unlikely(skb->csum_not_inet))
+		return !!(features & NETIF_F_SCTP_CRC) ? 0 :
+			skb_crc32c_csum_help(skb);
+
+	return !!(features & NETIF_F_CSUM_MASK) ? 0 : skb_checksum_help(skb);
+}
+EXPORT_SYMBOL(skb_csum_hwoffload_help);
+
 static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev)
 {
 	netdev_features_t features;
@@ -3034,8 +3045,7 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device
 			else
 				skb_set_transport_header(skb,
 							 skb_checksum_start_offset(skb));
-			if (!(features & NETIF_F_CSUM_MASK) &&
-			    skb_checksum_help(skb))
+			if (skb_csum_hwoffload_help(skb, features))
 				goto out_kfree_skb;
 		}
 	}
-- 
cgit v1.2.3


From b4759dcdcd8466e70f01ff07f33e17cd93131d34 Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Thu, 18 May 2017 15:44:43 +0200
Subject: sk_buff.h: improve description of CHECKSUM_{COMPLETE, UNNECESSARY}

Add FCoE to the list of protocols that can set CHECKSUM_UNNECESSARY; add a
note to CHECKSUM_COMPLETE section to specify that it does not apply to SCTP
and FCoE protocols.

Suggested-by: Tom Herbert <tom@herbertland.com>
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Acked-by: Tom Herbert <tom@herbertland.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 43d7ca07b2ff..1713e4b7ea9f 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -109,6 +109,7 @@
  *       may perform further validation in this case.
  *     GRE: only if the checksum is present in the header.
  *     SCTP: indicates the CRC in SCTP header has been validated.
+ *     FCOE: indicates the CRC in FC frame has been validated.
  *
  *   skb->csum_level indicates the number of consecutive checksums found in
  *   the packet minus one that have been verified as CHECKSUM_UNNECESSARY.
@@ -126,8 +127,10 @@
  *   packet as seen by netif_rx() and fills out in skb->csum. Meaning, the
  *   hardware doesn't need to parse L3/L4 headers to implement this.
  *
- *   Note: Even if device supports only some protocols, but is able to produce
- *   skb->csum, it MUST use CHECKSUM_COMPLETE, not CHECKSUM_UNNECESSARY.
+ *   Notes:
+ *   - Even if device supports only some protocols, but is able to produce
+ *     skb->csum, it MUST use CHECKSUM_COMPLETE, not CHECKSUM_UNNECESSARY.
+ *   - CHECKSUM_COMPLETE is not applicable to SCTP and FCoE protocols.
  *
  * CHECKSUM_PARTIAL:
  *
-- 
cgit v1.2.3


From 32d0f7830d9be5b1652a718e050d808b4908155f Mon Sep 17 00:00:00 2001
From: Iyappan Subramanian <isubramanian@apm.com>
Date: Thu, 18 May 2017 15:13:43 -0700
Subject: phy: Add helper function to check phy interface mode

Added helper function that checks phy_mode is RGMII (all variants)
'bool phy_interface_mode_is_rgmii(phy_interface_t mode)'

Changed the following function, to use the above.
'bool phy_interface_is_rgmii(struct phy_device *phydev)'

Signed-off-by: Iyappan Subramanian <isubramanian@apm.com>
Suggested-by: Florian Fainelli <f.fainelli@gmail.com>
Suggested-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phy.h | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index 54ef45823fc1..5a808a26e4cf 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -715,6 +715,17 @@ static inline bool phy_is_internal(struct phy_device *phydev)
 	return phydev->is_internal;
 }
 
+/**
+ * phy_interface_mode_is_rgmii - Convenience function for testing if a
+ * PHY interface mode is RGMII (all variants)
+ * @mode: the phy_interface_t enum
+ */
+static inline bool phy_interface_mode_is_rgmii(phy_interface_t mode)
+{
+	return mode >= PHY_INTERFACE_MODE_RGMII &&
+		mode <= PHY_INTERFACE_MODE_RGMII_TXID;
+};
+
 /**
  * phy_interface_is_rgmii - Convenience function for testing if a PHY interface
  * is RGMII (all variants)
@@ -722,8 +733,7 @@ static inline bool phy_is_internal(struct phy_device *phydev)
  */
 static inline bool phy_interface_is_rgmii(struct phy_device *phydev)
 {
-	return phydev->interface >= PHY_INTERFACE_MODE_RGMII &&
-		phydev->interface <= PHY_INTERFACE_MODE_RGMII_TXID;
+	return phy_interface_mode_is_rgmii(phydev->interface);
 };
 
 /*
-- 
cgit v1.2.3


From ce72a16fa705f960ca2352e95a7c5f4801475e75 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 14 May 2017 20:25:02 -0400
Subject: wait4(2)/waitid(2): separate copying rusage to userland

New helpers: kernel_waitid() and kernel_wait4().  sys_waitid(),
sys_wait4() and their compat variants switched to those.  Copying
struct rusage to userland is left to syscall itself.  For
compat_sys_wait4() that eliminates the use of set_fs() completely.
For compat_sys_waitid() it's still needed (for siginfo handling);
that will change shortly.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/resource.h |  2 +-
 kernel/exit.c            | 89 ++++++++++++++++++++++++++++--------------------
 kernel/sys.c             | 16 ++++-----
 3 files changed, 59 insertions(+), 48 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/resource.h b/include/linux/resource.h
index 5bc3116e649c..277afdad6589 100644
--- a/include/linux/resource.h
+++ b/include/linux/resource.h
@@ -6,7 +6,7 @@
 
 struct task_struct;
 
-int getrusage(struct task_struct *p, int who, struct rusage __user *ru);
+void getrusage(struct task_struct *p, int who, struct rusage *ru);
 int do_prlimit(struct task_struct *tsk, unsigned int resource,
 		struct rlimit *new_rlim, struct rlimit *old_rlim);
 
diff --git a/kernel/exit.c b/kernel/exit.c
index f98782bd27b6..d44f12948c5f 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1003,7 +1003,7 @@ struct wait_opts {
 
 	struct siginfo __user	*wo_info;
 	int __user		*wo_stat;
-	struct rusage __user	*wo_rusage;
+	struct rusage		*wo_rusage;
 
 	wait_queue_t		child_wait;
 	int			notask_error;
@@ -1054,8 +1054,10 @@ static int wait_noreap_copyout(struct wait_opts *wo, struct task_struct *p,
 				pid_t pid, uid_t uid, int why, int status)
 {
 	struct siginfo __user *infop;
-	int retval = wo->wo_rusage
-		? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;
+	int retval = 0;
+
+	if (wo->wo_rusage)
+		getrusage(p, RUSAGE_BOTH, wo->wo_rusage);
 
 	put_task_struct(p);
 	infop = wo->wo_info;
@@ -1182,8 +1184,9 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
 		spin_unlock_irq(&current->sighand->siglock);
 	}
 
-	retval = wo->wo_rusage
-		? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;
+	if (wo->wo_rusage)
+		getrusage(p, RUSAGE_BOTH, wo->wo_rusage);
+	retval = 0;
 	status = (p->signal->flags & SIGNAL_GROUP_EXIT)
 		? p->signal->group_exit_code : p->exit_code;
 	if (!retval && wo->wo_stat)
@@ -1316,8 +1319,9 @@ unlock_sig:
 	if (unlikely(wo->wo_flags & WNOWAIT))
 		return wait_noreap_copyout(wo, p, pid, uid, why, exit_code);
 
-	retval = wo->wo_rusage
-		? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;
+	if (wo->wo_rusage)
+		getrusage(p, RUSAGE_BOTH, wo->wo_rusage);
+	retval = 0;
 	if (!retval && wo->wo_stat)
 		retval = put_user((exit_code << 8) | 0x7f, wo->wo_stat);
 
@@ -1377,8 +1381,9 @@ static int wait_task_continued(struct wait_opts *wo, struct task_struct *p)
 	sched_annotate_sleep();
 
 	if (!wo->wo_info) {
-		retval = wo->wo_rusage
-			? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;
+		if (wo->wo_rusage)
+			getrusage(p, RUSAGE_BOTH, wo->wo_rusage);
+		retval = 0;
 		put_task_struct(p);
 		if (!retval && wo->wo_stat)
 			retval = put_user(0xffff, wo->wo_stat);
@@ -1618,8 +1623,8 @@ end:
 	return retval;
 }
 
-SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
-		infop, int, options, struct rusage __user *, ru)
+static long kernel_waitid(int which, pid_t upid, struct siginfo __user *infop,
+			  int options, struct rusage *ru)
 {
 	struct wait_opts wo;
 	struct pid *pid = NULL;
@@ -1687,8 +1692,21 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
 	return ret;
 }
 
-SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr,
-		int, options, struct rusage __user *, ru)
+SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
+		infop, int, options, struct rusage __user *, ru)
+{
+	struct rusage r;
+	long err = kernel_waitid(which, upid, infop, options, ru ? &r : NULL);
+
+	if (!err) {
+		if (ru && copy_to_user(ru, &r, sizeof(struct rusage)))
+			return -EFAULT;
+	}
+	return err;
+}
+
+static long kernel_wait4(pid_t upid, int __user *stat_addr,
+			int options, struct rusage *ru)
 {
 	struct wait_opts wo;
 	struct pid *pid = NULL;
@@ -1724,6 +1742,19 @@ SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr,
 	return ret;
 }
 
+SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr,
+		int, options, struct rusage __user *, ru)
+{
+	struct rusage r;
+	long err = kernel_wait4(upid, stat_addr, options, ru ? &r : NULL);
+
+	if (err > 0) {
+		if (ru && copy_to_user(ru, &r, sizeof(struct rusage)))
+			return -EFAULT;
+	}
+	return err;
+}
+
 #ifdef __ARCH_WANT_SYS_WAITPID
 
 /*
@@ -1744,29 +1775,13 @@ COMPAT_SYSCALL_DEFINE4(wait4,
 	int, options,
 	struct compat_rusage __user *, ru)
 {
-	if (!ru) {
-		return sys_wait4(pid, stat_addr, options, NULL);
-	} else {
-		struct rusage r;
-		int ret;
-		unsigned int status;
-		mm_segment_t old_fs = get_fs();
-
-		set_fs (KERNEL_DS);
-		ret = sys_wait4(pid,
-				(stat_addr ?
-				 (unsigned int __user *) &status : NULL),
-				options, (struct rusage __user *) &r);
-		set_fs (old_fs);
-
-		if (ret > 0) {
-			if (put_compat_rusage(&r, ru))
-				return -EFAULT;
-			if (stat_addr && put_user(status, stat_addr))
-				return -EFAULT;
-		}
-		return ret;
+	struct rusage r;
+	long err = kernel_wait4(pid, stat_addr, options, ru ? &r : NULL);
+	if (err > 0) {
+		if (ru && put_compat_rusage(&r, ru))
+			return -EFAULT;
 	}
+	return err;
 }
 
 COMPAT_SYSCALL_DEFINE5(waitid,
@@ -1782,8 +1797,8 @@ COMPAT_SYSCALL_DEFINE5(waitid,
 	memset(&info, 0, sizeof(info));
 
 	set_fs(KERNEL_DS);
-	ret = sys_waitid(which, pid, (siginfo_t __user *)&info, options,
-			 uru ? (struct rusage __user *)&ru : NULL);
+	ret = kernel_waitid(which, pid, (siginfo_t __user *)&info, options,
+			 uru ? &ru : NULL);
 	set_fs(old_fs);
 
 	if ((ret < 0) || (info.si_signo == 0))
diff --git a/kernel/sys.c b/kernel/sys.c
index 8a94b4eabcaa..dab1a0658a92 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1552,7 +1552,7 @@ static void accumulate_thread_rusage(struct task_struct *t, struct rusage *r)
 	r->ru_oublock += task_io_get_oublock(t);
 }
 
-static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
+void getrusage(struct task_struct *p, int who, struct rusage *r)
 {
 	struct task_struct *t;
 	unsigned long flags;
@@ -1626,20 +1626,16 @@ out:
 	r->ru_maxrss = maxrss * (PAGE_SIZE / 1024); /* convert pages to KBs */
 }
 
-int getrusage(struct task_struct *p, int who, struct rusage __user *ru)
+SYSCALL_DEFINE2(getrusage, int, who, struct rusage __user *, ru)
 {
 	struct rusage r;
 
-	k_getrusage(p, who, &r);
-	return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0;
-}
-
-SYSCALL_DEFINE2(getrusage, int, who, struct rusage __user *, ru)
-{
 	if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN &&
 	    who != RUSAGE_THREAD)
 		return -EINVAL;
-	return getrusage(current, who, ru);
+
+	getrusage(current, who, &r);
+	return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0;
 }
 
 #ifdef CONFIG_COMPAT
@@ -1651,7 +1647,7 @@ COMPAT_SYSCALL_DEFINE2(getrusage, int, who, struct compat_rusage __user *, ru)
 	    who != RUSAGE_THREAD)
 		return -EINVAL;
 
-	k_getrusage(current, who, &r);
+	getrusage(current, who, &r);
 	return put_compat_rusage(&r, ru);
 }
 #endif
-- 
cgit v1.2.3


From 92ebce5ac55dba258c608248dddf59eca3f7f514 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 14 May 2017 23:54:33 -0400
Subject: osf_wait4: switch to kernel_wait4()

... and sanitize copying rusage to userland

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/alpha/kernel/osf_sys.c | 53 ++++++++++++---------------------------------
 include/linux/sched/task.h  |  2 ++
 kernel/exit.c               |  4 ++--
 3 files changed, 18 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index ce93124a850b..b23d6fbbb225 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -1183,48 +1183,23 @@ SYSCALL_DEFINE2(osf_getrusage, int, who, struct rusage32 __user *, ru)
 SYSCALL_DEFINE4(osf_wait4, pid_t, pid, int __user *, ustatus, int, options,
 		struct rusage32 __user *, ur)
 {
-	struct rusage r;
-	long ret, err;
 	unsigned int status = 0;
-	mm_segment_t old_fs;
-
+	struct rusage r;
+	long err = kernel_wait4(pid, &status, options, &r);
+	if (err <= 0)
+		return err;
+	if (put_user(status, ustatus))
+		return -EFAULT;
 	if (!ur)
-		return sys_wait4(pid, ustatus, options, NULL);
-
-	old_fs = get_fs();
-		
-	set_fs (KERNEL_DS);
-	ret = sys_wait4(pid, (unsigned int __user *) &status, options,
-			(struct rusage __user *) &r);
-	set_fs (old_fs);
-
-	if (!access_ok(VERIFY_WRITE, ur, sizeof(*ur)))
+		return err;
+	if (put_tv32(&ur->ru_utime, &r.ru_utime))
 		return -EFAULT;
-
-	err = put_user(status, ustatus);
-	if (ret < 0)
-		return err ? err : ret;
-
-	err |= __put_user(r.ru_utime.tv_sec, &ur->ru_utime.tv_sec);
-	err |= __put_user(r.ru_utime.tv_usec, &ur->ru_utime.tv_usec);
-	err |= __put_user(r.ru_stime.tv_sec, &ur->ru_stime.tv_sec);
-	err |= __put_user(r.ru_stime.tv_usec, &ur->ru_stime.tv_usec);
-	err |= __put_user(r.ru_maxrss, &ur->ru_maxrss);
-	err |= __put_user(r.ru_ixrss, &ur->ru_ixrss);
-	err |= __put_user(r.ru_idrss, &ur->ru_idrss);
-	err |= __put_user(r.ru_isrss, &ur->ru_isrss);
-	err |= __put_user(r.ru_minflt, &ur->ru_minflt);
-	err |= __put_user(r.ru_majflt, &ur->ru_majflt);
-	err |= __put_user(r.ru_nswap, &ur->ru_nswap);
-	err |= __put_user(r.ru_inblock, &ur->ru_inblock);
-	err |= __put_user(r.ru_oublock, &ur->ru_oublock);
-	err |= __put_user(r.ru_msgsnd, &ur->ru_msgsnd);
-	err |= __put_user(r.ru_msgrcv, &ur->ru_msgrcv);
-	err |= __put_user(r.ru_nsignals, &ur->ru_nsignals);
-	err |= __put_user(r.ru_nvcsw, &ur->ru_nvcsw);
-	err |= __put_user(r.ru_nivcsw, &ur->ru_nivcsw);
-
-	return err ? err : ret;
+	if (put_tv32(&ur->ru_stime, &r.ru_stime))
+		return -EFAULT;
+	if (copy_to_user(&ur->ru_maxrss, &r.ru_maxrss,
+	      sizeof(struct rusage32) - offsetof(struct rusage32, ru_maxrss)))
+		return -EFAULT;
+	return err;
 }
 
 /*
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index a978d7189cfd..6b830fd9d809 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -9,6 +9,7 @@
 #include <linux/sched.h>
 
 struct task_struct;
+struct rusage;
 union thread_union;
 
 /*
@@ -74,6 +75,7 @@ extern long _do_fork(unsigned long, unsigned long, unsigned long, int __user *,
 extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *);
 struct task_struct *fork_idle(int);
 extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
+extern long kernel_wait4(pid_t, int *, int, struct rusage *);
 
 extern void free_task(struct task_struct *tsk);
 
diff --git a/kernel/exit.c b/kernel/exit.c
index f3b8c3a87bc1..462fc25eec6e 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1639,8 +1639,8 @@ Efault:
 	return -EFAULT;
 }
 
-static long kernel_wait4(pid_t upid, int __user *stat_addr,
-			int options, struct rusage *ru)
+long kernel_wait4(pid_t upid, int __user *stat_addr, int options,
+		  struct rusage *ru)
 {
 	struct wait_opts wo;
 	struct pid *pid = NULL;
-- 
cgit v1.2.3


From 90b602f80397657429373ca009f98aec4dd3c553 Mon Sep 17 00:00:00 2001
From: Miroslav Lichvar <mlichvar@redhat.com>
Date: Fri, 19 May 2017 17:52:37 +0200
Subject: net: add function to retrieve original skb device using NAPI ID

Since commit b68581778cd0 ("net: Make skb->skb_iif always track
skb->dev") skbs don't have the original index of the interface which
received the packet. This information is now needed for a new control
message related to hardware timestamping.

Instead of adding a new field to skb, we can find the device by the NAPI
ID if it is available, i.e. CONFIG_NET_RX_BUSY_POLL is enabled and the
driver is using NAPI. Add dev_get_by_napi_id() and also skb_napi_id() to
hide the CONFIG_NET_RX_BUSY_POLL ifdef.

CC: Richard Cochran <richardcochran@gmail.com>
Suggested-by: Willem de Bruijn <willemb@google.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Miroslav Lichvar <mlichvar@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  1 +
 include/linux/skbuff.h    |  9 +++++++++
 net/core/dev.c            | 26 ++++++++++++++++++++++++++
 3 files changed, 36 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f8f7cd52a0a0..c50c9218e31e 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2456,6 +2456,7 @@ static inline int dev_recursion_level(void)
 struct net_device *dev_get_by_index(struct net *net, int ifindex);
 struct net_device *__dev_get_by_index(struct net *net, int ifindex);
 struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex);
+struct net_device *dev_get_by_napi_id(unsigned int napi_id);
 int netdev_get_name(struct net *net, char *name, int ifindex);
 int dev_restart(struct net_device *dev);
 int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb);
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 1713e4b7ea9f..8acce7143f6a 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -858,6 +858,15 @@ static inline bool skb_pkt_type_ok(u32 ptype)
 	return ptype <= PACKET_OTHERHOST;
 }
 
+static inline unsigned int skb_napi_id(const struct sk_buff *skb)
+{
+#ifdef CONFIG_NET_RX_BUSY_POLL
+	return skb->napi_id;
+#else
+	return 0;
+#endif
+}
+
 void kfree_skb(struct sk_buff *skb);
 void kfree_skb_list(struct sk_buff *segs);
 void skb_tx_error(struct sk_buff *skb);
diff --git a/net/core/dev.c b/net/core/dev.c
index bb136f726890..3d98fbf4cbb0 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -163,6 +163,7 @@ static int netif_rx_internal(struct sk_buff *skb);
 static int call_netdevice_notifiers_info(unsigned long val,
 					 struct net_device *dev,
 					 struct netdev_notifier_info *info);
+static struct napi_struct *napi_by_id(unsigned int napi_id);
 
 /*
  * The @dev_base_head list is protected by @dev_base_lock and the rtnl
@@ -866,6 +867,31 @@ struct net_device *dev_get_by_index(struct net *net, int ifindex)
 }
 EXPORT_SYMBOL(dev_get_by_index);
 
+/**
+ *	dev_get_by_napi_id - find a device by napi_id
+ *	@napi_id: ID of the NAPI struct
+ *
+ *	Search for an interface by NAPI ID. Returns %NULL if the device
+ *	is not found or a pointer to the device. The device has not had
+ *	its reference counter increased so the caller must be careful
+ *	about locking. The caller must hold RCU lock.
+ */
+
+struct net_device *dev_get_by_napi_id(unsigned int napi_id)
+{
+	struct napi_struct *napi;
+
+	WARN_ON_ONCE(!rcu_read_lock_held());
+
+	if (napi_id < MIN_NAPI_ID)
+		return NULL;
+
+	napi = napi_by_id(napi_id);
+
+	return napi ? napi->dev : NULL;
+}
+EXPORT_SYMBOL(dev_get_by_napi_id);
+
 /**
  *	netdev_get_name - get a netdevice name, knowing its ifindex.
  *	@net: network namespace
-- 
cgit v1.2.3


From b50a5c70ffa4fd6b6da324ab54c84adf48fb17d9 Mon Sep 17 00:00:00 2001
From: Miroslav Lichvar <mlichvar@redhat.com>
Date: Fri, 19 May 2017 17:52:40 +0200
Subject: net: allow simultaneous SW and HW transmit timestamping

Add SOF_TIMESTAMPING_OPT_TX_SWHW option to allow an outgoing packet to
be looped to the socket's error queue with a software timestamp even
when a hardware transmit timestamp is expected to be provided by the
driver.

Applications using this option will receive two separate messages from
the error queue, one with a software timestamp and the other with a
hardware timestamp. As the hardware timestamp is saved to the shared skb
info, which may happen before the first message with software timestamp
is received by the application, the hardware timestamp is copied to the
SCM_TIMESTAMPING control message only when the skb has no software
timestamp or it is an incoming packet.

While changing sw_tx_timestamp(), inline it in skb_tx_timestamp() as
there are no other users.

CC: Richard Cochran <richardcochran@gmail.com>
CC: Willem de Bruijn <willemb@google.com>
Signed-off-by: Miroslav Lichvar <mlichvar@redhat.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/timestamping.txt |  8 ++++++++
 include/linux/skbuff.h                    | 10 ++--------
 include/uapi/linux/net_tstamp.h           |  3 ++-
 net/core/skbuff.c                         |  4 ++++
 net/socket.c                              | 20 ++++++++++++++++++--
 5 files changed, 34 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/networking/timestamping.txt b/Documentation/networking/timestamping.txt
index 50eb0e554778..196ba17cc344 100644
--- a/Documentation/networking/timestamping.txt
+++ b/Documentation/networking/timestamping.txt
@@ -203,6 +203,14 @@ SOF_TIMESTAMPING_OPT_PKTINFO:
   enabled and the driver is using NAPI. The struct contains also two
   other fields, but they are reserved and undefined.
 
+SOF_TIMESTAMPING_OPT_TX_SWHW:
+
+  Request both hardware and software timestamps for outgoing packets
+  when SOF_TIMESTAMPING_TX_HARDWARE and SOF_TIMESTAMPING_TX_SOFTWARE
+  are enabled at the same time. If both timestamps are generated,
+  two separate messages will be looped to the socket's error queue,
+  each containing just one timestamp.
+
 New applications are encouraged to pass SOF_TIMESTAMPING_OPT_ID to
 disambiguate timestamps and SOF_TIMESTAMPING_OPT_TSONLY to operate
 regardless of the setting of sysctl net.core.tstamp_allow_data.
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 8acce7143f6a..45a59c1e0cc7 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3259,13 +3259,6 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
 void skb_tstamp_tx(struct sk_buff *orig_skb,
 		   struct skb_shared_hwtstamps *hwtstamps);
 
-static inline void sw_tx_timestamp(struct sk_buff *skb)
-{
-	if (skb_shinfo(skb)->tx_flags & SKBTX_SW_TSTAMP &&
-	    !(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS))
-		skb_tstamp_tx(skb, NULL);
-}
-
 /**
  * skb_tx_timestamp() - Driver hook for transmit timestamping
  *
@@ -3281,7 +3274,8 @@ static inline void sw_tx_timestamp(struct sk_buff *skb)
 static inline void skb_tx_timestamp(struct sk_buff *skb)
 {
 	skb_clone_tx_timestamp(skb);
-	sw_tx_timestamp(skb);
+	if (skb_shinfo(skb)->tx_flags & SKBTX_SW_TSTAMP)
+		skb_tstamp_tx(skb, NULL);
 }
 
 /**
diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
index dee74d39da94..3d421d912193 100644
--- a/include/uapi/linux/net_tstamp.h
+++ b/include/uapi/linux/net_tstamp.h
@@ -28,8 +28,9 @@ enum {
 	SOF_TIMESTAMPING_OPT_TSONLY = (1<<11),
 	SOF_TIMESTAMPING_OPT_STATS = (1<<12),
 	SOF_TIMESTAMPING_OPT_PKTINFO = (1<<13),
+	SOF_TIMESTAMPING_OPT_TX_SWHW = (1<<14),
 
-	SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_PKTINFO,
+	SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_TX_SWHW,
 	SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) |
 				 SOF_TIMESTAMPING_LAST
 };
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index d5c98117cbce..780b7c1563d0 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3901,6 +3901,10 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
 	if (!sk)
 		return;
 
+	if (!hwtstamps && !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TX_SWHW) &&
+	    skb_shinfo(orig_skb)->tx_flags & SKBTX_IN_PROGRESS)
+		return;
+
 	tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY;
 	if (!skb_may_tx_timestamp(sk, tsonly))
 		return;
diff --git a/net/socket.c b/net/socket.c
index 67db7d8a3b81..cb355a7ef135 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -662,6 +662,19 @@ static bool skb_is_err_queue(const struct sk_buff *skb)
 	return skb->pkt_type == PACKET_OUTGOING;
 }
 
+/* On transmit, software and hardware timestamps are returned independently.
+ * As the two skb clones share the hardware timestamp, which may be updated
+ * before the software timestamp is received, a hardware TX timestamp may be
+ * returned only if there is no software TX timestamp. Ignore false software
+ * timestamps, which may be made in the __sock_recv_timestamp() call when the
+ * option SO_TIMESTAMP(NS) is enabled on the socket, even when the skb has a
+ * hardware timestamp.
+ */
+static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
+{
+	return skb->tstamp && !false_tstamp && skb_is_err_queue(skb);
+}
+
 static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb)
 {
 	struct scm_ts_pktinfo ts_pktinfo;
@@ -691,14 +704,16 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
 {
 	int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
 	struct scm_timestamping tss;
-	int empty = 1;
+	int empty = 1, false_tstamp = 0;
 	struct skb_shared_hwtstamps *shhwtstamps =
 		skb_hwtstamps(skb);
 
 	/* Race occurred between timestamp enabling and packet
 	   receiving.  Fill in the current time for now. */
-	if (need_software_tstamp && skb->tstamp == 0)
+	if (need_software_tstamp && skb->tstamp == 0) {
 		__net_timestamp(skb);
+		false_tstamp = 1;
+	}
 
 	if (need_software_tstamp) {
 		if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
@@ -720,6 +735,7 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
 		empty = 0;
 	if (shhwtstamps &&
 	    (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
+	    !skb_is_swtx_tstamp(skb, false_tstamp) &&
 	    ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2)) {
 		empty = 0;
 		if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
-- 
cgit v1.2.3


From b4d2ea2af95cb77e2f320e24da526280d4aa2f6b Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Mon, 8 May 2017 10:48:21 +0200
Subject: Revert "pinctrl: generic: Add bi-directional and output-enable"

This reverts commit 8c58f1a7a4b6d1d723bf25fef9d842d5a11200d0.

It turns out that applying these generic properties was
premature: the properties used in the driver using this
are of unclear electrical nature and the subject need to
be discussed.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt | 2 --
 drivers/pinctrl/pinconf-generic.c                              | 3 ---
 include/linux/pinctrl/pinconf-generic.h                        | 3 ---
 3 files changed, 8 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt b/Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt
index 71a3c134af1b..f01d154090da 100644
--- a/Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt
+++ b/Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt
@@ -247,7 +247,6 @@ bias-bus-hold		- latch weakly
 bias-pull-up		- pull up the pin
 bias-pull-down		- pull down the pin
 bias-pull-pin-default	- use pin-default pull state
-bi-directional		- pin supports simultaneous input/output operations
 drive-push-pull		- drive actively high and low
 drive-open-drain	- drive with open drain
 drive-open-source	- drive with open source
@@ -260,7 +259,6 @@ input-debounce		- debounce mode with debound time X
 power-source		- select between different power supplies
 low-power-enable	- enable low power mode
 low-power-disable	- disable low power mode
-output-enable		- enable output on pin regardless of output value
 output-low		- set the pin to output mode with low level
 output-high		- set the pin to output mode with high level
 slew-rate		- set the slew rate
diff --git a/drivers/pinctrl/pinconf-generic.c b/drivers/pinctrl/pinconf-generic.c
index 0d6b7f4b82af..720a19fd38d2 100644
--- a/drivers/pinctrl/pinconf-generic.c
+++ b/drivers/pinctrl/pinconf-generic.c
@@ -35,7 +35,6 @@ static const struct pin_config_item conf_items[] = {
 	PCONFDUMP(PIN_CONFIG_BIAS_PULL_PIN_DEFAULT,
 				"input bias pull to pin specific state", NULL, false),
 	PCONFDUMP(PIN_CONFIG_BIAS_PULL_UP, "input bias pull up", NULL, false),
-	PCONFDUMP(PIN_CONFIG_BIDIRECTIONAL, "bi-directional pin operations", NULL, false),
 	PCONFDUMP(PIN_CONFIG_DRIVE_OPEN_DRAIN, "output drive open drain", NULL, false),
 	PCONFDUMP(PIN_CONFIG_DRIVE_OPEN_SOURCE, "output drive open source", NULL, false),
 	PCONFDUMP(PIN_CONFIG_DRIVE_PUSH_PULL, "output drive push pull", NULL, false),
@@ -161,7 +160,6 @@ static const struct pinconf_generic_params dt_params[] = {
 	{ "bias-pull-up", PIN_CONFIG_BIAS_PULL_UP, 1 },
 	{ "bias-pull-pin-default", PIN_CONFIG_BIAS_PULL_PIN_DEFAULT, 1 },
 	{ "bias-pull-down", PIN_CONFIG_BIAS_PULL_DOWN, 1 },
-	{ "bi-directional", PIN_CONFIG_BIDIRECTIONAL, 1 },
 	{ "drive-open-drain", PIN_CONFIG_DRIVE_OPEN_DRAIN, 0 },
 	{ "drive-open-source", PIN_CONFIG_DRIVE_OPEN_SOURCE, 0 },
 	{ "drive-push-pull", PIN_CONFIG_DRIVE_PUSH_PULL, 0 },
@@ -174,7 +172,6 @@ static const struct pinconf_generic_params dt_params[] = {
 	{ "input-schmitt-enable", PIN_CONFIG_INPUT_SCHMITT_ENABLE, 1 },
 	{ "low-power-disable", PIN_CONFIG_LOW_POWER_MODE, 0 },
 	{ "low-power-enable", PIN_CONFIG_LOW_POWER_MODE, 1 },
-	{ "output-enable", PIN_CONFIG_OUTPUT, 1, },
 	{ "output-high", PIN_CONFIG_OUTPUT, 1, },
 	{ "output-low", PIN_CONFIG_OUTPUT, 0, },
 	{ "power-source", PIN_CONFIG_POWER_SOURCE, 0 },
diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h
index 279e3c5326e3..7620eb127cff 100644
--- a/include/linux/pinctrl/pinconf-generic.h
+++ b/include/linux/pinctrl/pinconf-generic.h
@@ -42,8 +42,6 @@
  * @PIN_CONFIG_BIAS_PULL_UP: the pin will be pulled up (usually with high
  *	impedance to VDD). If the argument is != 0 pull-up is enabled,
  *	if it is 0, pull-up is total, i.e. the pin is connected to VDD.
- * @PIN_CONFIG_BIDIRECTIONAL: the pin will be configured to allow simultaneous
- *	input and output operations.
  * @PIN_CONFIG_DRIVE_OPEN_DRAIN: the pin will be driven with open drain (open
  *	collector) which means it is usually wired with other output ports
  *	which are then pulled up with an external resistor. Setting this
@@ -98,7 +96,6 @@ enum pin_config_param {
 	PIN_CONFIG_BIAS_PULL_DOWN,
 	PIN_CONFIG_BIAS_PULL_PIN_DEFAULT,
 	PIN_CONFIG_BIAS_PULL_UP,
-	PIN_CONFIG_BIDIRECTIONAL,
 	PIN_CONFIG_DRIVE_OPEN_DRAIN,
 	PIN_CONFIG_DRIVE_OPEN_SOURCE,
 	PIN_CONFIG_DRIVE_PUSH_PULL,
-- 
cgit v1.2.3


From 020e0b1c8f19f1fc3bce23beeccd80c574ca0e49 Mon Sep 17 00:00:00 2001
From: Anatolij Gustschin <agust@denx.de>
Date: Thu, 11 May 2017 20:24:31 +0200
Subject: gpiolib: Add stubs for gpiod lookup table interface

Add stubs for gpiod_add_lookup_table() and gpiod_remove_lookup_table()
for the !GPIOLIB case to prevent build errors.

Signed-off-by: Anatolij Gustschin <agust@denx.de>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/gpio/machine.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/gpio/machine.h b/include/linux/gpio/machine.h
index c0d712d22b07..f738d50cc17d 100644
--- a/include/linux/gpio/machine.h
+++ b/include/linux/gpio/machine.h
@@ -56,7 +56,14 @@ struct gpiod_lookup_table {
 	.flags = _flags,                                                  \
 }
 
+#ifdef CONFIG_GPIOLIB
 void gpiod_add_lookup_table(struct gpiod_lookup_table *table);
 void gpiod_remove_lookup_table(struct gpiod_lookup_table *table);
+#else
+static inline
+void gpiod_add_lookup_table(struct gpiod_lookup_table *table) {}
+static inline
+void gpiod_remove_lookup_table(struct gpiod_lookup_table *table) {}
+#endif
 
 #endif /* __LINUX_GPIO_MACHINE_H */
-- 
cgit v1.2.3


From 91b9ae48aadd7e634161372b0bc3ffc88a050e8b Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Sun, 21 May 2017 22:30:33 +0200
Subject: HID: i2c-hid: move header file out of I2C realm

include/linux/i2c is not for client devices. Move the header file to a
more appropriate location.

Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Acked-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/i2c-hid/i2c-hid.c         |  2 +-
 include/linux/i2c/i2c-hid.h           | 42 -----------------------------------
 include/linux/platform_data/i2c-hid.h | 42 +++++++++++++++++++++++++++++++++++
 3 files changed, 43 insertions(+), 43 deletions(-)
 delete mode 100644 include/linux/i2c/i2c-hid.h
 create mode 100644 include/linux/platform_data/i2c-hid.h

(limited to 'include/linux')

diff --git a/drivers/hid/i2c-hid/i2c-hid.c b/drivers/hid/i2c-hid/i2c-hid.c
index 8daa8ce64ebb..841aa43526eb 100644
--- a/drivers/hid/i2c-hid/i2c-hid.c
+++ b/drivers/hid/i2c-hid/i2c-hid.c
@@ -40,7 +40,7 @@
 #include <linux/of.h>
 #include <linux/regulator/consumer.h>
 
-#include <linux/i2c/i2c-hid.h>
+#include <linux/platform_data/i2c-hid.h>
 
 #include "../hid-ids.h"
 
diff --git a/include/linux/i2c/i2c-hid.h b/include/linux/i2c/i2c-hid.h
deleted file mode 100644
index 1fb088239d12..000000000000
--- a/include/linux/i2c/i2c-hid.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * HID over I2C protocol implementation
- *
- * Copyright (c) 2012 Benjamin Tissoires <benjamin.tissoires@gmail.com>
- * Copyright (c) 2012 Ecole Nationale de l'Aviation Civile, France
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file COPYING in the main directory of this archive for
- * more details.
- */
-
-#ifndef __LINUX_I2C_HID_H
-#define __LINUX_I2C_HID_H
-
-#include <linux/types.h>
-
-struct regulator;
-
-/**
- * struct i2chid_platform_data - used by hid over i2c implementation.
- * @hid_descriptor_address: i2c register where the HID descriptor is stored.
- * @supply: regulator for powering on the device.
- * @post_power_delay_ms: delay after powering on before device is usable.
- *
- * Note that it is the responsibility of the platform driver (or the acpi 5.0
- * driver, or the flattened device tree) to setup the irq related to the gpio in
- * the struct i2c_board_info.
- * The platform driver should also setup the gpio according to the device:
- *
- * A typical example is the following:
- *	irq = gpio_to_irq(intr_gpio);
- *	hkdk4412_i2c_devs5[0].irq = irq; // store the irq in i2c_board_info
- *	gpio_request(intr_gpio, "elan-irq");
- *	s3c_gpio_setpull(intr_gpio, S3C_GPIO_PULL_UP);
- */
-struct i2c_hid_platform_data {
-	u16 hid_descriptor_address;
-	struct regulator *supply;
-	int post_power_delay_ms;
-};
-
-#endif /* __LINUX_I2C_HID_H */
diff --git a/include/linux/platform_data/i2c-hid.h b/include/linux/platform_data/i2c-hid.h
new file mode 100644
index 000000000000..1fb088239d12
--- /dev/null
+++ b/include/linux/platform_data/i2c-hid.h
@@ -0,0 +1,42 @@
+/*
+ * HID over I2C protocol implementation
+ *
+ * Copyright (c) 2012 Benjamin Tissoires <benjamin.tissoires@gmail.com>
+ * Copyright (c) 2012 Ecole Nationale de l'Aviation Civile, France
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of this archive for
+ * more details.
+ */
+
+#ifndef __LINUX_I2C_HID_H
+#define __LINUX_I2C_HID_H
+
+#include <linux/types.h>
+
+struct regulator;
+
+/**
+ * struct i2chid_platform_data - used by hid over i2c implementation.
+ * @hid_descriptor_address: i2c register where the HID descriptor is stored.
+ * @supply: regulator for powering on the device.
+ * @post_power_delay_ms: delay after powering on before device is usable.
+ *
+ * Note that it is the responsibility of the platform driver (or the acpi 5.0
+ * driver, or the flattened device tree) to setup the irq related to the gpio in
+ * the struct i2c_board_info.
+ * The platform driver should also setup the gpio according to the device:
+ *
+ * A typical example is the following:
+ *	irq = gpio_to_irq(intr_gpio);
+ *	hkdk4412_i2c_devs5[0].irq = irq; // store the irq in i2c_board_info
+ *	gpio_request(intr_gpio, "elan-irq");
+ *	s3c_gpio_setpull(intr_gpio, S3C_GPIO_PULL_UP);
+ */
+struct i2c_hid_platform_data {
+	u16 hid_descriptor_address;
+	struct regulator *supply;
+	int post_power_delay_ms;
+};
+
+#endif /* __LINUX_I2C_HID_H */
-- 
cgit v1.2.3


From 6e7edabfc6a8ac5dce8c55363a7bb1576fc9348f Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Thu, 11 May 2017 19:11:11 +0200
Subject: HID: Microsoft Win8 Wireless Radio Controls cleanup

Use a better URL for the HUTRR40 Radio HID Usages documentation and use the
HID_GD_WIRELESS_RADIO_CTLS define rather then hardcoding a check for
0x0001000c.

Fixes: 61df56bef9 ("HID: Add mapping for Microsoft Win8 Wireless Radio Controls extensions")
Suggested-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-input.c | 2 +-
 include/linux/hid.h     | 5 ++---
 2 files changed, 3 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index 412040b11268..ccdff1ee1f0c 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -658,7 +658,7 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel
 
 		case HID_GD_RFKILL_BTN:
 			/* MS wireless radio ctl extension, also check CA */
-			if (field->application == 0x0001000c) {
+			if (field->application == HID_GD_WIRELESS_RADIO_CTLS) {
 				map_key_clear(KEY_RFKILL);
 				/* We need to simulate the btn release */
 				field->flags |= HID_MAIN_ITEM_RELATIVE;
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 0b29466bbc21..bebbf4893448 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -183,9 +183,8 @@ struct hid_item {
 #define HID_GD_KEYPAD		0x00010007
 #define HID_GD_MULTIAXIS	0x00010008
 /*
- * Microsoft Win8 Wireless Radio Controls extensions CA, see (checked 09052017):
- * https://docs.microsoft.com/en-us/windows-hardware/drivers/hid/airplane-mode-radio-management
- * https://web.archive.org/web/20170509144631/https://docs.microsoft.com/en-us/windows-hardware/drivers/hid/airplane-mode-radio-management
+ * Microsoft Win8 Wireless Radio Controls extensions CA, see:
+ * http://www.usb.org/developers/hidpage/HUTRR40RadioHIDUsagesFinal.pdf
  */
 #define HID_GD_WIRELESS_RADIO_CTLS	0x0001000c
 #define HID_GD_X		0x00010030
-- 
cgit v1.2.3


From c2372c20425bd75a5527b3e2204059762190f6ca Mon Sep 17 00:00:00 2001
From: Jan Glauber <jglauber@cavium.com>
Date: Mon, 22 May 2017 13:09:20 +0200
Subject: of/platform: Make of_platform_device_destroy globally visible

of_platform_device_destroy is the counterpart to
of_platform_device_create which is a non-static function.

After creating a platform device it might be neccessary
to destroy it to deal with -EPROBE_DEFER where a
repeated of_platform_device_create call would fail otherwise.

Therefore also make of_platform_device_destroy globally visible.

Signed-off-by: Jan Glauber <jglauber@cavium.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/of/platform.c       | 3 ++-
 include/linux/of_platform.h | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index 71fecc2debfc..703a42118ffc 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -523,7 +523,7 @@ static int __init of_platform_default_populate_init(void)
 arch_initcall_sync(of_platform_default_populate_init);
 #endif
 
-static int of_platform_device_destroy(struct device *dev, void *data)
+int of_platform_device_destroy(struct device *dev, void *data)
 {
 	/* Do not touch devices not populated from the device tree */
 	if (!dev->of_node || !of_node_check_flag(dev->of_node, OF_POPULATED))
@@ -544,6 +544,7 @@ static int of_platform_device_destroy(struct device *dev, void *data)
 	of_node_clear_flag(dev->of_node, OF_POPULATED_BUS);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(of_platform_device_destroy);
 
 /**
  * of_platform_depopulate() - Remove devices populated from device tree
diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h
index dc8224ae28d5..e0d1946270f3 100644
--- a/include/linux/of_platform.h
+++ b/include/linux/of_platform.h
@@ -64,6 +64,7 @@ extern struct platform_device *of_platform_device_create(struct device_node *np,
 						   const char *bus_id,
 						   struct device *parent);
 
+extern int of_platform_device_destroy(struct device *dev, void *data);
 extern int of_platform_bus_probe(struct device_node *root,
 				 const struct of_device_id *matches,
 				 struct device *parent);
-- 
cgit v1.2.3


From c3ab2b4ec8f7c0700bf10957171c479bf3dbca52 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Sun, 21 May 2017 10:12:03 -0600
Subject: net: ipv4: Add extack messages for route add failures

Add messages for non-obvious errors (e.g, no need to add text for malloc
failures or ENODEV failures). This mostly covers the annoying EINVAL errors
Some message strings violate the 80-columns but searchable strings need to
trump that rule.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h  |   5 +++
 net/ipv4/fib_frontend.c  |   2 +
 net/ipv4/fib_semantics.c | 115 ++++++++++++++++++++++++++++++++++++++---------
 3 files changed, 100 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 5fff5ba5964e..a68aad484c69 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -97,6 +97,11 @@ struct netlink_ext_ack {
 #define NL_SET_ERR_MSG_MOD(extack, msg)			\
 	NL_SET_ERR_MSG((extack), KBUILD_MODNAME ": " msg)
 
+#define NL_SET_BAD_ATTR(extack, attr) do {		\
+	if ((extack))					\
+		(extack)->bad_attr = (attr);		\
+} while (0)
+
 extern void netlink_kernel_release(struct sock *sk);
 extern int __netlink_change_ngroups(struct sock *sk, unsigned int groups);
 extern int netlink_change_ngroups(struct sock *sk, unsigned int groups);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 511edff76c01..14d2f7bd7c76 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -656,6 +656,7 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
 	cfg->fc_nlinfo.nl_net = net;
 
 	if (cfg->fc_type > RTN_MAX) {
+		NL_SET_ERR_MSG(extack, "Invalid route type");
 		err = -EINVAL;
 		goto errout;
 	}
@@ -726,6 +727,7 @@ static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	tb = fib_get_table(net, cfg.fc_table);
 	if (!tb) {
+		NL_SET_ERR_MSG(extack, "FIB table does not exist");
 		err = -ESRCH;
 		goto errout;
 	}
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 8587d1b55b53..4852e183afe0 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -32,6 +32,7 @@
 #include <linux/skbuff.h>
 #include <linux/init.h>
 #include <linux/slab.h>
+#include <linux/netlink.h>
 
 #include <net/arp.h>
 #include <net/ip.h>
@@ -465,7 +466,13 @@ static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining,
 	}
 
 	/* leftover implies invalid nexthop configuration, discard it */
-	return remaining > 0 ? 0 : nhs;
+	if (remaining > 0) {
+		NL_SET_ERR_MSG(extack,
+			       "Invalid nexthop configuration - extra data after nexthops");
+		nhs = 0;
+	}
+
+	return nhs;
 }
 
 static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
@@ -477,11 +484,17 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
 	change_nexthops(fi) {
 		int attrlen;
 
-		if (!rtnh_ok(rtnh, remaining))
+		if (!rtnh_ok(rtnh, remaining)) {
+			NL_SET_ERR_MSG(extack,
+				       "Invalid nexthop configuration - extra data after nexthop");
 			return -EINVAL;
+		}
 
-		if (rtnh->rtnh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN))
+		if (rtnh->rtnh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) {
+			NL_SET_ERR_MSG(extack,
+				       "Invalid flags for nexthop - can not contain DEAD or LINKDOWN");
 			return -EINVAL;
+		}
 
 		nexthop_nh->nh_flags =
 			(cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
@@ -507,8 +520,12 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
 
 				nla_entype = nla_find(attrs, attrlen,
 						      RTA_ENCAP_TYPE);
-				if (!nla_entype)
+				if (!nla_entype) {
+					NL_SET_BAD_ATTR(extack, nla);
+					NL_SET_ERR_MSG(extack,
+						       "Encap type is missing");
 					goto err_inval;
+				}
 
 				ret = lwtunnel_build_state(nla_get_u16(
 							   nla_entype),
@@ -729,16 +746,25 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
 		if (nh->nh_flags & RTNH_F_ONLINK) {
 			unsigned int addr_type;
 
-			if (cfg->fc_scope >= RT_SCOPE_LINK)
+			if (cfg->fc_scope >= RT_SCOPE_LINK) {
+				NL_SET_ERR_MSG(extack,
+					       "Nexthop has invalid scope");
 				return -EINVAL;
+			}
 			dev = __dev_get_by_index(net, nh->nh_oif);
 			if (!dev)
 				return -ENODEV;
-			if (!(dev->flags & IFF_UP))
+			if (!(dev->flags & IFF_UP)) {
+				NL_SET_ERR_MSG(extack,
+					       "Nexthop device is not up");
 				return -ENETDOWN;
+			}
 			addr_type = inet_addr_type_dev_table(net, dev, nh->nh_gw);
-			if (addr_type != RTN_UNICAST)
+			if (addr_type != RTN_UNICAST) {
+				NL_SET_ERR_MSG(extack,
+					       "Nexthop has invalid gateway");
 				return -EINVAL;
+			}
 			if (!netif_carrier_ok(dev))
 				nh->nh_flags |= RTNH_F_LINKDOWN;
 			nh->nh_dev = dev;
@@ -778,18 +804,25 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
 			}
 
 			if (err) {
+				NL_SET_ERR_MSG(extack,
+					       "Nexthop has invalid gateway");
 				rcu_read_unlock();
 				return err;
 			}
 		}
 		err = -EINVAL;
-		if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
+		if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) {
+			NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway");
 			goto out;
+		}
 		nh->nh_scope = res.scope;
 		nh->nh_oif = FIB_RES_OIF(res);
 		nh->nh_dev = dev = FIB_RES_DEV(res);
-		if (!dev)
+		if (!dev) {
+			NL_SET_ERR_MSG(extack,
+				       "No egress device for nexthop gateway");
 			goto out;
+		}
 		dev_hold(dev);
 		if (!netif_carrier_ok(dev))
 			nh->nh_flags |= RTNH_F_LINKDOWN;
@@ -797,16 +830,21 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
 	} else {
 		struct in_device *in_dev;
 
-		if (nh->nh_flags & (RTNH_F_PERVASIVE | RTNH_F_ONLINK))
+		if (nh->nh_flags & (RTNH_F_PERVASIVE | RTNH_F_ONLINK)) {
+			NL_SET_ERR_MSG(extack,
+				       "Invalid flags for nexthop - PERVASIVE and ONLINK can not be set");
 			return -EINVAL;
+		}
 		rcu_read_lock();
 		err = -ENODEV;
 		in_dev = inetdev_by_index(net, nh->nh_oif);
 		if (!in_dev)
 			goto out;
 		err = -ENETDOWN;
-		if (!(in_dev->dev->flags & IFF_UP))
+		if (!(in_dev->dev->flags & IFF_UP)) {
+			NL_SET_ERR_MSG(extack, "Device for nexthop is not up");
 			goto out;
+		}
 		nh->nh_dev = in_dev->dev;
 		dev_hold(nh->nh_dev);
 		nh->nh_scope = RT_SCOPE_HOST;
@@ -994,11 +1032,16 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
 		goto err_inval;
 
 	/* Fast check to catch the most weird cases */
-	if (fib_props[cfg->fc_type].scope > cfg->fc_scope)
+	if (fib_props[cfg->fc_type].scope > cfg->fc_scope) {
+		NL_SET_ERR_MSG(extack, "Invalid scope");
 		goto err_inval;
+	}
 
-	if (cfg->fc_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN))
+	if (cfg->fc_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) {
+		NL_SET_ERR_MSG(extack,
+			       "Invalid rtm_flags - can not contain DEAD or LINKDOWN");
 		goto err_inval;
+	}
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 	if (cfg->fc_mp) {
@@ -1067,15 +1110,26 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
 		err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg, extack);
 		if (err != 0)
 			goto failure;
-		if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif)
+		if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif) {
+			NL_SET_ERR_MSG(extack,
+				       "Nexthop device index does not match RTA_OIF");
 			goto err_inval;
-		if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw)
+		}
+		if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw) {
+			NL_SET_ERR_MSG(extack,
+				       "Nexthop gateway does not match RTA_GATEWAY");
 			goto err_inval;
+		}
 #ifdef CONFIG_IP_ROUTE_CLASSID
-		if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow)
+		if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) {
+			NL_SET_ERR_MSG(extack,
+				       "Nexthop class id does not match RTA_FLOW");
 			goto err_inval;
+		}
 #endif
 #else
+		NL_SET_ERR_MSG(extack,
+			       "Multipath support not enabled in kernel");
 		goto err_inval;
 #endif
 	} else {
@@ -1084,8 +1138,11 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
 		if (cfg->fc_encap) {
 			struct lwtunnel_state *lwtstate;
 
-			if (cfg->fc_encap_type == LWTUNNEL_ENCAP_NONE)
+			if (cfg->fc_encap_type == LWTUNNEL_ENCAP_NONE) {
+				NL_SET_ERR_MSG(extack,
+					       "LWT encap type not specified");
 				goto err_inval;
+			}
 			err = lwtunnel_build_state(cfg->fc_encap_type,
 						   cfg->fc_encap, AF_INET, cfg,
 						   &lwtstate);
@@ -1108,8 +1165,11 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
 	}
 
 	if (fib_props[cfg->fc_type].error) {
-		if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp)
+		if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp) {
+			NL_SET_ERR_MSG(extack,
+				       "Gateway, device and multipath can not be specified for this route type");
 			goto err_inval;
+		}
 		goto link_it;
 	} else {
 		switch (cfg->fc_type) {
@@ -1120,21 +1180,30 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
 		case RTN_MULTICAST:
 			break;
 		default:
+			NL_SET_ERR_MSG(extack, "Invalid route type");
 			goto err_inval;
 		}
 	}
 
-	if (cfg->fc_scope > RT_SCOPE_HOST)
+	if (cfg->fc_scope > RT_SCOPE_HOST) {
+		NL_SET_ERR_MSG(extack, "Invalid scope");
 		goto err_inval;
+	}
 
 	if (cfg->fc_scope == RT_SCOPE_HOST) {
 		struct fib_nh *nh = fi->fib_nh;
 
 		/* Local address is added. */
-		if (nhs != 1)
+		if (nhs != 1) {
+			NL_SET_ERR_MSG(extack,
+				       "Route with host scope can not have multiple nexthops");
 			goto err_inval;
-		if (nh->nh_gw)
+		}
+		if (nh->nh_gw) {
+			NL_SET_ERR_MSG(extack,
+				       "Route with host scope can not have a gateway");
 			goto err_inval;
+		}
 		nh->nh_scope = RT_SCOPE_NOWHERE;
 		nh->nh_dev = dev_get_by_index(net, fi->fib_nh->nh_oif);
 		err = -ENODEV;
@@ -1154,8 +1223,10 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
 			fi->fib_flags |= RTNH_F_LINKDOWN;
 	}
 
-	if (fi->fib_prefsrc && !fib_valid_prefsrc(cfg, fi->fib_prefsrc))
+	if (fi->fib_prefsrc && !fib_valid_prefsrc(cfg, fi->fib_prefsrc)) {
+		NL_SET_ERR_MSG(extack, "Invalid prefsrc address");
 		goto err_inval;
+	}
 
 	change_nexthops(fi) {
 		fib_info_update_nh_saddr(net, nexthop_nh);
-- 
cgit v1.2.3


From 7254a50a5db40ca6739ddf37e0a45e6912532b2c Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Mon, 22 May 2017 23:05:05 +0800
Subject: blk-mq: remove blk_mq_abort_requeue_list()

No one uses it any more, so remove it.

Reviewed-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 block/blk-mq.c         | 19 -------------------
 include/linux/blk-mq.h |  1 -
 2 files changed, 20 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index a69ad122ed66..f2224ffd225d 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -628,25 +628,6 @@ void blk_mq_delay_kick_requeue_list(struct request_queue *q,
 }
 EXPORT_SYMBOL(blk_mq_delay_kick_requeue_list);
 
-void blk_mq_abort_requeue_list(struct request_queue *q)
-{
-	unsigned long flags;
-	LIST_HEAD(rq_list);
-
-	spin_lock_irqsave(&q->requeue_lock, flags);
-	list_splice_init(&q->requeue_list, &rq_list);
-	spin_unlock_irqrestore(&q->requeue_lock, flags);
-
-	while (!list_empty(&rq_list)) {
-		struct request *rq;
-
-		rq = list_first_entry(&rq_list, struct request, queuelist);
-		list_del_init(&rq->queuelist);
-		blk_mq_end_request(rq, -EIO);
-	}
-}
-EXPORT_SYMBOL(blk_mq_abort_requeue_list);
-
 struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
 {
 	if (tag < tags->nr_tags) {
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index c47aa248c640..fcd641032f8d 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -238,7 +238,6 @@ void blk_mq_add_to_requeue_list(struct request *rq, bool at_head,
 				bool kick_requeue_list);
 void blk_mq_kick_requeue_list(struct request_queue *q);
 void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs);
-void blk_mq_abort_requeue_list(struct request_queue *q);
 void blk_mq_complete_request(struct request *rq);
 
 bool blk_mq_queue_stopped(struct request_queue *q);
-- 
cgit v1.2.3


From 6f9a22bc5775d231ab8fbe2c2f3c88e45e3e7c28 Mon Sep 17 00:00:00 2001
From: Michael Hernandez <michael.hernandez@cavium.com>
Date: Thu, 18 May 2017 10:47:47 -0700
Subject: PCI/MSI: Ignore affinity if pre/post vector count is more than
 min_vecs

min_vecs is the minimum amount of vectors needed to operate in MSI-X mode
which may just include the vectors that don't need affinity.

Disabling affinity settings causes the qla2xxx driver scsi_add_host() to fail
when blk_mq is enabled as the blk_mq_pci_map_queues() expects affinity masks
on each vector.

Fixes: dfef358bd1be ("PCI/MSI: Don't apply affinity if there aren't enough vectors left")
Signed-off-by: Michael Hernandez <michael.hernandez@cavium.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: stable@vger.kernel.org	# v4.10+
---
 drivers/pci/msi.c         | 14 ++------------
 include/linux/interrupt.h |  4 ++--
 kernel/irq/affinity.c     | 13 ++++++++++++-
 3 files changed, 16 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index ba44fdfda66b..9e1569107cd6 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -1058,7 +1058,7 @@ static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec,
 
 	for (;;) {
 		if (affd) {
-			nvec = irq_calc_affinity_vectors(nvec, affd);
+			nvec = irq_calc_affinity_vectors(minvec, nvec, affd);
 			if (nvec < minvec)
 				return -ENOSPC;
 		}
@@ -1097,7 +1097,7 @@ static int __pci_enable_msix_range(struct pci_dev *dev,
 
 	for (;;) {
 		if (affd) {
-			nvec = irq_calc_affinity_vectors(nvec, affd);
+			nvec = irq_calc_affinity_vectors(minvec, nvec, affd);
 			if (nvec < minvec)
 				return -ENOSPC;
 		}
@@ -1165,16 +1165,6 @@ int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs,
 	if (flags & PCI_IRQ_AFFINITY) {
 		if (!affd)
 			affd = &msi_default_affd;
-
-		if (affd->pre_vectors + affd->post_vectors > min_vecs)
-			return -EINVAL;
-
-		/*
-		 * If there aren't any vectors left after applying the pre/post
-		 * vectors don't bother with assigning affinity.
-		 */
-		if (affd->pre_vectors + affd->post_vectors == min_vecs)
-			affd = NULL;
 	} else {
 		if (WARN_ON(affd))
 			affd = NULL;
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index a6fba4804672..0991f973f8ca 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -291,7 +291,7 @@ extern int
 irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify);
 
 struct cpumask *irq_create_affinity_masks(int nvec, const struct irq_affinity *affd);
-int irq_calc_affinity_vectors(int maxvec, const struct irq_affinity *affd);
+int irq_calc_affinity_vectors(int minvec, int maxvec, const struct irq_affinity *affd);
 
 #else /* CONFIG_SMP */
 
@@ -331,7 +331,7 @@ irq_create_affinity_masks(int nvec, const struct irq_affinity *affd)
 }
 
 static inline int
-irq_calc_affinity_vectors(int maxvec, const struct irq_affinity *affd)
+irq_calc_affinity_vectors(int minvec, int maxvec, const struct irq_affinity *affd)
 {
 	return maxvec;
 }
diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c
index e2d356dd7581..9b71406d2eec 100644
--- a/kernel/irq/affinity.c
+++ b/kernel/irq/affinity.c
@@ -66,6 +66,13 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd)
 	struct cpumask *masks;
 	cpumask_var_t nmsk;
 
+	/*
+	 * If there aren't any vectors left after applying the pre/post
+	 * vectors don't bother with assigning affinity.
+	 */
+	if (!affv)
+		return NULL;
+
 	if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL))
 		return NULL;
 
@@ -140,15 +147,19 @@ out:
 
 /**
  * irq_calc_affinity_vectors - Calculate the optimal number of vectors
+ * @minvec:	The minimum number of vectors available
  * @maxvec:	The maximum number of vectors available
  * @affd:	Description of the affinity requirements
  */
-int irq_calc_affinity_vectors(int maxvec, const struct irq_affinity *affd)
+int irq_calc_affinity_vectors(int minvec, int maxvec, const struct irq_affinity *affd)
 {
 	int resv = affd->pre_vectors + affd->post_vectors;
 	int vecs = maxvec - resv;
 	int cpus;
 
+	if (resv > minvec)
+		return 0;
+
 	/* Stabilize the cpumasks */
 	get_online_cpus();
 	cpus = cpumask_weight(cpu_online_mask);
-- 
cgit v1.2.3


From f81126b0b67c864b0b3d614d8adadc3a37ba5209 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Mon, 22 May 2017 16:24:22 -0700
Subject: Input: lm8323 - move header file out of I2C realm

include/linux/i2c is not for client devices. Move the header file to a
more appropriate location.

Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/keyboard/lm8323.c      |  2 +-
 include/linux/i2c/lm8323.h           | 46 ------------------------------------
 include/linux/platform_data/lm8323.h | 46 ++++++++++++++++++++++++++++++++++++
 3 files changed, 47 insertions(+), 47 deletions(-)
 delete mode 100644 include/linux/i2c/lm8323.h
 create mode 100644 include/linux/platform_data/lm8323.h

(limited to 'include/linux')

diff --git a/drivers/input/keyboard/lm8323.c b/drivers/input/keyboard/lm8323.c
index 21bea52d4365..04a5d7e134d7 100644
--- a/drivers/input/keyboard/lm8323.c
+++ b/drivers/input/keyboard/lm8323.c
@@ -30,8 +30,8 @@
 #include <linux/delay.h>
 #include <linux/input.h>
 #include <linux/leds.h>
+#include <linux/platform_data/lm8323.h>
 #include <linux/pm.h>
-#include <linux/i2c/lm8323.h>
 #include <linux/slab.h>
 
 /* Commands to send to the chip. */
diff --git a/include/linux/i2c/lm8323.h b/include/linux/i2c/lm8323.h
deleted file mode 100644
index 478d668bc590..000000000000
--- a/include/linux/i2c/lm8323.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * lm8323.h - Configuration for LM8323 keypad driver.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation (version 2 of the License only).
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifndef __LINUX_LM8323_H
-#define __LINUX_LM8323_H
-
-#include <linux/types.h>
-
-/*
- * Largest keycode that the chip can send, plus one,
- * so keys can be mapped directly at the index of the
- * LM8323 keycode instead of subtracting one.
- */
-#define LM8323_KEYMAP_SIZE	(0x7f + 1)
-
-#define LM8323_NUM_PWMS		3
-
-struct lm8323_platform_data {
-	int debounce_time; /* Time to watch for key bouncing, in ms. */
-	int active_time; /* Idle time until sleep, in ms. */
-
-	int size_x;
-	int size_y;
-	bool repeat;
-	const unsigned short *keymap;
-
-	const char *pwm_names[LM8323_NUM_PWMS];
-
-	const char *name; /* Device name. */
-};
-
-#endif /* __LINUX_LM8323_H */
diff --git a/include/linux/platform_data/lm8323.h b/include/linux/platform_data/lm8323.h
new file mode 100644
index 000000000000..478d668bc590
--- /dev/null
+++ b/include/linux/platform_data/lm8323.h
@@ -0,0 +1,46 @@
+/*
+ * lm8323.h - Configuration for LM8323 keypad driver.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation (version 2 of the License only).
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef __LINUX_LM8323_H
+#define __LINUX_LM8323_H
+
+#include <linux/types.h>
+
+/*
+ * Largest keycode that the chip can send, plus one,
+ * so keys can be mapped directly at the index of the
+ * LM8323 keycode instead of subtracting one.
+ */
+#define LM8323_KEYMAP_SIZE	(0x7f + 1)
+
+#define LM8323_NUM_PWMS		3
+
+struct lm8323_platform_data {
+	int debounce_time; /* Time to watch for key bouncing, in ms. */
+	int active_time; /* Idle time until sleep, in ms. */
+
+	int size_x;
+	int size_y;
+	bool repeat;
+	const unsigned short *keymap;
+
+	const char *pwm_names[LM8323_NUM_PWMS];
+
+	const char *name; /* Device name. */
+};
+
+#endif /* __LINUX_LM8323_H */
-- 
cgit v1.2.3


From 8cd9ab9e19b8cf23a7cae503af81ae70154e7956 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Mon, 22 May 2017 16:26:56 -0700
Subject: Input: mcs - move header file out of I2C realm

include/linux/i2c is not for client devices. Move the header file to a
more appropriate location.

Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/keyboard/mcs_touchkey.c  |  2 +-
 drivers/input/touchscreen/mcs5000_ts.c |  2 +-
 include/linux/i2c/mcs.h                | 35 ----------------------------------
 include/linux/platform_data/mcs.h      | 35 ++++++++++++++++++++++++++++++++++
 4 files changed, 37 insertions(+), 37 deletions(-)
 delete mode 100644 include/linux/i2c/mcs.h
 create mode 100644 include/linux/platform_data/mcs.h

(limited to 'include/linux')

diff --git a/drivers/input/keyboard/mcs_touchkey.c b/drivers/input/keyboard/mcs_touchkey.c
index 31090d71a685..be56d4f262a7 100644
--- a/drivers/input/keyboard/mcs_touchkey.c
+++ b/drivers/input/keyboard/mcs_touchkey.c
@@ -13,11 +13,11 @@
 
 #include <linux/module.h>
 #include <linux/i2c.h>
-#include <linux/i2c/mcs.h>
 #include <linux/interrupt.h>
 #include <linux/input.h>
 #include <linux/irq.h>
 #include <linux/slab.h>
+#include <linux/platform_data/mcs.h>
 #include <linux/pm.h>
 
 /* MCS5000 Touchkey */
diff --git a/drivers/input/touchscreen/mcs5000_ts.c b/drivers/input/touchscreen/mcs5000_ts.c
index 90fc07dc98a6..8868573133ab 100644
--- a/drivers/input/touchscreen/mcs5000_ts.c
+++ b/drivers/input/touchscreen/mcs5000_ts.c
@@ -15,10 +15,10 @@
 
 #include <linux/module.h>
 #include <linux/i2c.h>
-#include <linux/i2c/mcs.h>
 #include <linux/interrupt.h>
 #include <linux/input.h>
 #include <linux/irq.h>
+#include <linux/platform_data/mcs.h>
 #include <linux/slab.h>
 
 /* Registers */
diff --git a/include/linux/i2c/mcs.h b/include/linux/i2c/mcs.h
deleted file mode 100644
index 61bb18a4fd3c..000000000000
--- a/include/linux/i2c/mcs.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright (C) 2009 - 2010 Samsung Electronics Co.Ltd
- * Author: Joonyoung Shim <jy0922.shim@samsung.com>
- * Author: HeungJun Kim <riverful.kim@samsung.com>
- *
- *  This program is free software; you can redistribute  it and/or modify it
- *  under  the terms of  the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the  License, or (at your
- *  option) any later version.
- *
- */
-
-#ifndef __LINUX_MCS_H
-#define __LINUX_MCS_H
-
-#define MCS_KEY_MAP(v, c)	((((v) & 0xff) << 16) | ((c) & 0xffff))
-#define MCS_KEY_VAL(v)		(((v) >> 16) & 0xff)
-#define MCS_KEY_CODE(v)		((v) & 0xffff)
-
-struct mcs_platform_data {
-	void (*poweron)(bool);
-	void (*cfg_pin)(void);
-
-	/* touchscreen */
-	unsigned int x_size;
-	unsigned int y_size;
-
-	/* touchkey */
-	const u32 *keymap;
-	unsigned int keymap_size;
-	unsigned int key_maxval;
-	bool no_autorepeat;
-};
-
-#endif	/* __LINUX_MCS_H */
diff --git a/include/linux/platform_data/mcs.h b/include/linux/platform_data/mcs.h
new file mode 100644
index 000000000000..61bb18a4fd3c
--- /dev/null
+++ b/include/linux/platform_data/mcs.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2009 - 2010 Samsung Electronics Co.Ltd
+ * Author: Joonyoung Shim <jy0922.shim@samsung.com>
+ * Author: HeungJun Kim <riverful.kim@samsung.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#ifndef __LINUX_MCS_H
+#define __LINUX_MCS_H
+
+#define MCS_KEY_MAP(v, c)	((((v) & 0xff) << 16) | ((c) & 0xffff))
+#define MCS_KEY_VAL(v)		(((v) >> 16) & 0xff)
+#define MCS_KEY_CODE(v)		((v) & 0xffff)
+
+struct mcs_platform_data {
+	void (*poweron)(bool);
+	void (*cfg_pin)(void);
+
+	/* touchscreen */
+	unsigned int x_size;
+	unsigned int y_size;
+
+	/* touchkey */
+	const u32 *keymap;
+	unsigned int keymap_size;
+	unsigned int key_maxval;
+	bool no_autorepeat;
+};
+
+#endif	/* __LINUX_MCS_H */
-- 
cgit v1.2.3


From 0d846a4cbbcbf81e527542d15e165b8f774bace5 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Mon, 22 May 2017 16:28:42 -0700
Subject: Input: mms114 - move header file out of I2C realm

include/linux/i2c is not for client devices. Move the header file to a
more appropriate location.

Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/touchscreen/mms114.c   |  2 +-
 include/linux/i2c/mms114.h           | 24 ------------------------
 include/linux/platform_data/mms114.h | 24 ++++++++++++++++++++++++
 3 files changed, 25 insertions(+), 25 deletions(-)
 delete mode 100644 include/linux/i2c/mms114.h
 create mode 100644 include/linux/platform_data/mms114.h

(limited to 'include/linux')

diff --git a/drivers/input/touchscreen/mms114.c b/drivers/input/touchscreen/mms114.c
index 1fafc9f57af6..e5eeb6311f7d 100644
--- a/drivers/input/touchscreen/mms114.c
+++ b/drivers/input/touchscreen/mms114.c
@@ -11,9 +11,9 @@
 #include <linux/delay.h>
 #include <linux/of.h>
 #include <linux/i2c.h>
-#include <linux/i2c/mms114.h>
 #include <linux/input/mt.h>
 #include <linux/interrupt.h>
+#include <linux/platform_data/mms114.h>
 #include <linux/regulator/consumer.h>
 #include <linux/slab.h>
 
diff --git a/include/linux/i2c/mms114.h b/include/linux/i2c/mms114.h
deleted file mode 100644
index 5722ebfb2738..000000000000
--- a/include/linux/i2c/mms114.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Copyright (C) 2012 Samsung Electronics Co.Ltd
- * Author: Joonyoung Shim <jy0922.shim@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundationr
- */
-
-#ifndef __LINUX_MMS114_H
-#define __LINUX_MMS114_H
-
-struct mms114_platform_data {
-	unsigned int x_size;
-	unsigned int y_size;
-	unsigned int contact_threshold;
-	unsigned int moving_threshold;
-	bool x_invert;
-	bool y_invert;
-
-	void (*cfg_pin)(bool);
-};
-
-#endif	/* __LINUX_MMS114_H */
diff --git a/include/linux/platform_data/mms114.h b/include/linux/platform_data/mms114.h
new file mode 100644
index 000000000000..5722ebfb2738
--- /dev/null
+++ b/include/linux/platform_data/mms114.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2012 Samsung Electronics Co.Ltd
+ * Author: Joonyoung Shim <jy0922.shim@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundationr
+ */
+
+#ifndef __LINUX_MMS114_H
+#define __LINUX_MMS114_H
+
+struct mms114_platform_data {
+	unsigned int x_size;
+	unsigned int y_size;
+	unsigned int contact_threshold;
+	unsigned int moving_threshold;
+	bool x_invert;
+	bool y_invert;
+
+	void (*cfg_pin)(bool);
+};
+
+#endif	/* __LINUX_MMS114_H */
-- 
cgit v1.2.3


From 8fd708157a592a376c4d0b3b2ba23b9e9f79caa5 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Mon, 22 May 2017 16:30:04 -0700
Subject: Input: tsc2007 - move header file out of I2C realm

include/linux/i2c is not for client devices. Move the header file to a
more appropriate location.

Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Acked-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 arch/sh/boards/mach-ecovec24/setup.c     |  2 +-
 drivers/input/touchscreen/tsc2007_core.c |  2 +-
 drivers/mfd/timberdale.c                 |  2 +-
 include/linux/i2c/tsc2007.h              | 22 ----------------------
 include/linux/platform_data/tsc2007.h    | 22 ++++++++++++++++++++++
 5 files changed, 25 insertions(+), 25 deletions(-)
 delete mode 100644 include/linux/i2c/tsc2007.h
 create mode 100644 include/linux/platform_data/tsc2007.h

(limited to 'include/linux')

diff --git a/arch/sh/boards/mach-ecovec24/setup.c b/arch/sh/boards/mach-ecovec24/setup.c
index 6d612792f6b8..1faf6cb93dcb 100644
--- a/arch/sh/boards/mach-ecovec24/setup.c
+++ b/arch/sh/boards/mach-ecovec24/setup.c
@@ -24,7 +24,7 @@
 #include <linux/usb/r8a66597.h>
 #include <linux/usb/renesas_usbhs.h>
 #include <linux/i2c.h>
-#include <linux/i2c/tsc2007.h>
+#include <linux/platform_data/tsc2007.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/sh_msiof.h>
 #include <linux/spi/mmc_spi.h>
diff --git a/drivers/input/touchscreen/tsc2007_core.c b/drivers/input/touchscreen/tsc2007_core.c
index fc7384936011..8342e0c48a53 100644
--- a/drivers/input/touchscreen/tsc2007_core.c
+++ b/drivers/input/touchscreen/tsc2007_core.c
@@ -25,9 +25,9 @@
 #include <linux/input.h>
 #include <linux/interrupt.h>
 #include <linux/i2c.h>
-#include <linux/i2c/tsc2007.h>
 #include <linux/of_device.h>
 #include <linux/of_gpio.h>
+#include <linux/platform_data/tsc2007.h>
 #include "tsc2007.h"
 
 int tsc2007_xfer(struct tsc2007 *tsc, u8 cmd)
diff --git a/drivers/mfd/timberdale.c b/drivers/mfd/timberdale.c
index c9339f85359b..cd4a6d7d6750 100644
--- a/drivers/mfd/timberdale.c
+++ b/drivers/mfd/timberdale.c
@@ -32,13 +32,13 @@
 #include <linux/i2c.h>
 #include <linux/i2c-ocores.h>
 #include <linux/i2c-xiic.h>
-#include <linux/i2c/tsc2007.h>
 
 #include <linux/spi/spi.h>
 #include <linux/spi/xilinx_spi.h>
 #include <linux/spi/max7301.h>
 #include <linux/spi/mc33880.h>
 
+#include <linux/platform_data/tsc2007.h>
 #include <linux/platform_data/media/timb_radio.h>
 #include <linux/platform_data/media/timb_video.h>
 
diff --git a/include/linux/i2c/tsc2007.h b/include/linux/i2c/tsc2007.h
deleted file mode 100644
index 4f35b6ad3889..000000000000
--- a/include/linux/i2c/tsc2007.h
+++ /dev/null
@@ -1,22 +0,0 @@
-#ifndef __LINUX_I2C_TSC2007_H
-#define __LINUX_I2C_TSC2007_H
-
-/* linux/i2c/tsc2007.h */
-
-struct tsc2007_platform_data {
-	u16	model;				/* 2007. */
-	u16	x_plate_ohms;	/* must be non-zero value */
-	u16	max_rt; /* max. resistance above which samples are ignored */
-	unsigned long poll_period; /* time (in ms) between samples */
-	int	fuzzx; /* fuzz factor for X, Y and pressure axes */
-	int	fuzzy;
-	int	fuzzz;
-
-	int	(*get_pendown_state)(struct device *);
-	/* If needed, clear 2nd level interrupt source */
-	void	(*clear_penirq)(void);
-	int	(*init_platform_hw)(void);
-	void	(*exit_platform_hw)(void);
-};
-
-#endif
diff --git a/include/linux/platform_data/tsc2007.h b/include/linux/platform_data/tsc2007.h
new file mode 100644
index 000000000000..c2d3aa1dadd4
--- /dev/null
+++ b/include/linux/platform_data/tsc2007.h
@@ -0,0 +1,22 @@
+#ifndef __LINUX_I2C_TSC2007_H
+#define __LINUX_I2C_TSC2007_H
+
+/* linux/platform_data/tsc2007.h */
+
+struct tsc2007_platform_data {
+	u16	model;				/* 2007. */
+	u16	x_plate_ohms;	/* must be non-zero value */
+	u16	max_rt; /* max. resistance above which samples are ignored */
+	unsigned long poll_period; /* time (in ms) between samples */
+	int	fuzzx; /* fuzz factor for X, Y and pressure axes */
+	int	fuzzy;
+	int	fuzzz;
+
+	int	(*get_pendown_state)(struct device *);
+	/* If needed, clear 2nd level interrupt source */
+	void	(*clear_penirq)(void);
+	int	(*init_platform_hw)(void);
+	void	(*exit_platform_hw)(void);
+};
+
+#endif
-- 
cgit v1.2.3


From d795cb51dfee2a859b5585101a4e3ce5bc9bff75 Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
Date: Mon, 15 May 2017 11:24:27 +0200
Subject: pinctrl: mcp23s08: drop pullup config from pdata

mcp23s08 support configuration of the pullups using the
pinconf framework. This removes the custom pullup configuration
from platform data, which has no upstream users.

Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/pinctrl-mcp23s08.c | 7 -------
 include/linux/spi/mcp23s08.h       | 1 -
 2 files changed, 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pinctrl/pinctrl-mcp23s08.c b/drivers/pinctrl/pinctrl-mcp23s08.c
index d957c4bbc8c1..8c684d179e29 100644
--- a/drivers/pinctrl/pinctrl-mcp23s08.c
+++ b/drivers/pinctrl/pinctrl-mcp23s08.c
@@ -782,11 +782,6 @@ static int mcp23s08_probe_one(struct mcp23s08 *mcp, struct device *dev,
 			goto fail;
 	}
 
-	/* configure ~100K pullups */
-	ret = mcp_write(mcp, MCP_GPPU, pdata->chip[cs].pullups);
-	if (ret < 0)
-		goto fail;
-
 	ret = mcp_update_cache(mcp);
 	if (ret < 0)
 		goto fail;
@@ -911,7 +906,6 @@ static int mcp230xx_probe(struct i2c_client *client,
 	if (match) {
 		pdata = &local_pdata;
 		pdata->base = -1;
-		pdata->chip[0].pullups = 0;
 		pdata->irq_controller =	of_property_read_bool(
 					client->dev.of_node,
 					"interrupt-controller");
@@ -1031,7 +1025,6 @@ static int mcp23s08_probe(struct spi_device *spi)
 		pdata = &local_pdata;
 		pdata->base = -1;
 		for (addr = 0; addr < ARRAY_SIZE(pdata->chip); addr++) {
-			pdata->chip[addr].pullups = 0;
 			if (spi_present_mask & (1 << addr))
 				chips++;
 		}
diff --git a/include/linux/spi/mcp23s08.h b/include/linux/spi/mcp23s08.h
index aa07d7b32568..080ecc6bb270 100644
--- a/include/linux/spi/mcp23s08.h
+++ b/include/linux/spi/mcp23s08.h
@@ -3,7 +3,6 @@
 
 struct mcp23s08_chip_info {
 	bool		is_present;	/* true if populated */
-	unsigned	pullups;	/* BIT(x) means enable pullup x */
 };
 
 struct mcp23s08_platform_data {
-- 
cgit v1.2.3


From 5b1a7e803a9fd960b6d75a1d970519c57cfe2618 Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
Date: Mon, 15 May 2017 11:24:35 +0200
Subject: pinctrl: mcp23s08: generalize irq property handling

This moves irq property handling from spi/i2c specific code into
the generic mcp23s08_probe_one. This is possible because the
device properties are named equally.

As a side-effect this drops support for setting the properties via
pdata, which has no mainline users. If boardcode wants to enable
the chip as interrupt controller it can attach the device properties
instead.

Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/pinctrl-mcp23s08.c | 27 +++++++++------------------
 include/linux/spi/mcp23s08.h       | 18 ------------------
 2 files changed, 9 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pinctrl/pinctrl-mcp23s08.c b/drivers/pinctrl/pinctrl-mcp23s08.c
index 541bf80a2a13..b39da587f2fa 100644
--- a/drivers/pinctrl/pinctrl-mcp23s08.c
+++ b/drivers/pinctrl/pinctrl-mcp23s08.c
@@ -783,7 +783,7 @@ done:
 
 static int mcp23s08_probe_one(struct mcp23s08 *mcp, struct device *dev,
 			      void *data, unsigned addr, unsigned type,
-			      struct mcp23s08_platform_data *pdata, int cs)
+			      unsigned int base, int cs)
 {
 	int status, ret;
 	bool mirror = false;
@@ -855,7 +855,7 @@ static int mcp23s08_probe_one(struct mcp23s08 *mcp, struct device *dev,
 	if (IS_ERR(mcp->regmap))
 		return PTR_ERR(mcp->regmap);
 
-	mcp->chip.base = pdata->base;
+	mcp->chip.base = base;
 	mcp->chip.can_sleep = true;
 	mcp->chip.parent = dev;
 	mcp->chip.owner = THIS_MODULE;
@@ -868,13 +868,14 @@ static int mcp23s08_probe_one(struct mcp23s08 *mcp, struct device *dev,
 	if (ret < 0)
 		goto fail;
 
-	mcp->irq_controller = pdata->irq_controller;
+	mcp->irq_controller =
+		device_property_read_bool(dev, "interrupt-controller");
 	if (mcp->irq && mcp->irq_controller) {
 		mcp->irq_active_high =
-			of_property_read_bool(mcp->chip.parent->of_node,
+			device_property_read_bool(dev,
 					      "microchip,irq-active-high");
 
-		mirror = pdata->mirror;
+		mirror = device_property_read_bool(dev, "microchip,irq-mirror");
 	}
 
 	if ((status & IOCON_SEQOP) || !(status & IOCON_HAEN) || mirror ||
@@ -1000,11 +1001,6 @@ static int mcp230xx_probe(struct i2c_client *client,
 	if (!pdata) {
 		pdata = &local_pdata;
 		pdata->base = -1;
-
-		pdata->irq_controller = device_property_read_bool(
-			&client->dev, "interrupt-controller");
-		pdata->mirror = device_property_read_bool(
-			&client->dev, "microchip,irq-mirror");
 	}
 
 	mcp = devm_kzalloc(&client->dev, sizeof(*mcp), GFP_KERNEL);
@@ -1013,7 +1009,7 @@ static int mcp230xx_probe(struct i2c_client *client,
 
 	mcp->irq = client->irq;
 	status = mcp23s08_probe_one(mcp, &client->dev, client, client->addr,
-				    id->driver_data, pdata, 0);
+				    id->driver_data, pdata->base, 0);
 	if (status)
 		return status;
 
@@ -1081,11 +1077,6 @@ static int mcp23s08_probe(struct spi_device *spi)
 		pdata = &local_pdata;
 		pdata->base = -1;
 
-		pdata->irq_controller = device_property_read_bool(&spi->dev,
-			"interrupt-controller");
-		pdata->mirror = device_property_read_bool(&spi->dev,
-			"microchip,irq-mirror");
-
 		status = device_property_read_u32(&spi->dev,
 			"microchip,spi-present-mask", &spi_present_mask);
 		if (status) {
@@ -1138,8 +1129,8 @@ static int mcp23s08_probe(struct spi_device *spi)
 		data->mcp[addr] = &data->chip[chips];
 		data->mcp[addr]->irq = spi->irq;
 		status = mcp23s08_probe_one(data->mcp[addr], &spi->dev, spi,
-					    0x40 | (addr << 1), type, pdata,
-					    addr);
+					    0x40 | (addr << 1), type,
+					    pdata->base, addr);
 		if (status < 0)
 			return status;
 
diff --git a/include/linux/spi/mcp23s08.h b/include/linux/spi/mcp23s08.h
index 080ecc6bb270..4af82ee63329 100644
--- a/include/linux/spi/mcp23s08.h
+++ b/include/linux/spi/mcp23s08.h
@@ -21,22 +21,4 @@ struct mcp23s08_platform_data {
 	 * base to base+15 (or base+31 for s17 variant).
 	 */
 	unsigned	base;
-	/* Marks the device as a interrupt controller.
-	 * NOTE: The interrupt functionality is only supported for i2c
-	 * versions of the chips. The spi chips can also do the interrupts,
-	 * but this is not supported by the linux driver yet.
-	 */
-	bool		irq_controller;
-
-	/* Sets the mirror flag in the IOCON register. Devices
-	 * with two interrupt outputs (these are the devices ending with 17 and
-	 * those that have 16 IOs) have two IO banks: IO 0-7 form bank 1 and
-	 * IO 8-15 are bank 2. These chips have two different interrupt outputs:
-	 * One for bank 1 and another for bank 2. If irq-mirror is set, both
-	 * interrupts are generated regardless of the bank that an input change
-	 * occurred on. If it is not set, the interrupt are only generated for
-	 * the bank they belong to.
-	 * On devices with only one interrupt output this property is useless.
-	 */
-	bool		mirror;
 };
-- 
cgit v1.2.3


From ce9bd0a0ff106b478012dc2e4c2b10bb0138dd7a Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
Date: Mon, 15 May 2017 11:24:36 +0200
Subject: pinctrl: mcp23s08: simplify spi_present_mask handling

Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 arch/blackfin/mach-bf527/boards/tll6527m.c |  4 ++--
 drivers/pinctrl/pinctrl-mcp23s08.c         | 29 ++++++++++-------------------
 include/linux/spi/mcp23s08.h               |  6 +-----
 3 files changed, 13 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/arch/blackfin/mach-bf527/boards/tll6527m.c b/arch/blackfin/mach-bf527/boards/tll6527m.c
index be61477826f3..ce5488e8226b 100644
--- a/arch/blackfin/mach-bf527/boards/tll6527m.c
+++ b/arch/blackfin/mach-bf527/boards/tll6527m.c
@@ -351,11 +351,11 @@ static struct platform_device bfin_i2s = {
 #if IS_ENABLED(CONFIG_PINCTRL_MCP23S08)
 #include <linux/spi/mcp23s08.h>
 static const struct mcp23s08_platform_data bfin_mcp23s08_sys_gpio_info = {
-	.chip[0].is_present = true,
+	.spi_present_mask = BIT(0),
 	.base = 0x30,
 };
 static const struct mcp23s08_platform_data bfin_mcp23s08_usr_gpio_info = {
-	.chip[2].is_present = true,
+	.spi_present_mask = BIT(2),
 	.base = 0x38,
 };
 #endif
diff --git a/drivers/pinctrl/pinctrl-mcp23s08.c b/drivers/pinctrl/pinctrl-mcp23s08.c
index b39da587f2fa..29d9c1fd4309 100644
--- a/drivers/pinctrl/pinctrl-mcp23s08.c
+++ b/drivers/pinctrl/pinctrl-mcp23s08.c
@@ -36,6 +36,8 @@
 #define MCP_TYPE_017	3
 #define MCP_TYPE_S18    4
 
+#define MCP_MAX_DEV_PER_CS	8
+
 /* Registers are all 8 bits wide.
  *
  * The mcp23s17 has twice as many bits, and can be configured to work
@@ -1064,7 +1066,6 @@ static int mcp23s08_probe(struct spi_device *spi)
 	int				status, type;
 	unsigned			ngpio = 0;
 	const struct			of_device_id *match;
-	u32				spi_present_mask = 0;
 
 	match = of_match_device(of_match_ptr(mcp23s08_spi_of_match), &spi->dev);
 	if (match)
@@ -1078,36 +1079,26 @@ static int mcp23s08_probe(struct spi_device *spi)
 		pdata->base = -1;
 
 		status = device_property_read_u32(&spi->dev,
-			"microchip,spi-present-mask", &spi_present_mask);
+			"microchip,spi-present-mask", &pdata->spi_present_mask);
 		if (status) {
 			status = device_property_read_u32(&spi->dev,
-				"mcp,spi-present-mask", &spi_present_mask);
+				"mcp,spi-present-mask",
+				&pdata->spi_present_mask);
 
 			if (status) {
 				dev_err(&spi->dev, "missing spi-present-mask");
 				return -ENODEV;
 			}
 		}
-	} else {
-		for (addr = 0; addr < ARRAY_SIZE(pdata->chip); addr++) {
-			if (!pdata->chip[addr].is_present)
-				continue;
-			if ((type == MCP_TYPE_S08) && (addr > 3)) {
-				dev_err(&spi->dev,
-					"mcp23s08 only supports address 0..3");
-				return -EINVAL;
-			}
-			spi_present_mask |= BIT(addr);
-		}
 	}
 
-	if (!spi_present_mask || spi_present_mask >= 256) {
+	if (!pdata->spi_present_mask || pdata->spi_present_mask > 0xff) {
 		dev_err(&spi->dev, "invalid spi-present-mask");
 		return -ENODEV;
 	}
 
-	for (addr = 0; addr < ARRAY_SIZE(pdata->chip); addr++) {
-		if (spi_present_mask & BIT(addr))
+	for (addr = 0; addr < MCP_MAX_DEV_PER_CS; addr++) {
+		if (pdata->spi_present_mask & BIT(addr))
 			chips++;
 	}
 
@@ -1122,8 +1113,8 @@ static int mcp23s08_probe(struct spi_device *spi)
 
 	spi_set_drvdata(spi, data);
 
-	for (addr = 0; addr < ARRAY_SIZE(pdata->chip); addr++) {
-		if (!(spi_present_mask & (1 << addr)))
+	for (addr = 0; addr < MCP_MAX_DEV_PER_CS; addr++) {
+		if (!(pdata->spi_present_mask & BIT(addr)))
 			continue;
 		chips--;
 		data->mcp[addr] = &data->chip[chips];
diff --git a/include/linux/spi/mcp23s08.h b/include/linux/spi/mcp23s08.h
index 4af82ee63329..211f3c0ef49c 100644
--- a/include/linux/spi/mcp23s08.h
+++ b/include/linux/spi/mcp23s08.h
@@ -1,10 +1,6 @@
 
 /* FIXME driver should be able to handle IRQs...  */
 
-struct mcp23s08_chip_info {
-	bool		is_present;	/* true if populated */
-};
-
 struct mcp23s08_platform_data {
 	/* For mcp23s08, up to 4 slaves (numbered 0..3) can share one SPI
 	 * chipselect, each providing 1 gpio_chip instance with 8 gpios.
@@ -12,7 +8,7 @@ struct mcp23s08_platform_data {
 	 * chipselect, each providing 1 gpio_chip (port A + port B) with
 	 * 16 gpios.
 	 */
-	struct mcp23s08_chip_info	chip[8];
+	u32 spi_present_mask;
 
 	/* "base" is the number of the first GPIO.  Dynamic assignment is
 	 * not currently supported, and even if there are gaps in chip
-- 
cgit v1.2.3


From d8f4494e70ae5fef159719bfbb6abedc53619bf1 Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
Date: Mon, 15 May 2017 11:24:37 +0200
Subject: pinctrl: mcp23s08: drop comment about missing irq support

The driver supports using mcp23xxx as interrupt controller, so
let's drop all comments stating otherwise.

Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/pinctrl-mcp23s08.c | 19 ++-----------------
 include/linux/spi/mcp23s08.h       |  3 ---
 2 files changed, 2 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pinctrl/pinctrl-mcp23s08.c b/drivers/pinctrl/pinctrl-mcp23s08.c
index 29d9c1fd4309..3e40d4245512 100644
--- a/drivers/pinctrl/pinctrl-mcp23s08.c
+++ b/drivers/pinctrl/pinctrl-mcp23s08.c
@@ -1,14 +1,4 @@
-/*
- * MCP23S08 SPI/I2C GPIO gpio expander driver
- *
- * The inputs and outputs of the mcp23s08, mcp23s17, mcp23008 and mcp23017 are
- * supported.
- * For the I2C versions of the chips (mcp23008 and mcp23017) generation of
- * interrupts is also supported.
- * The hardware of the SPI versions of the chips (mcp23s08 and mcp23s17) is
- * also capable of generating interrupts, but the linux driver does not
- * support that yet.
- */
+/* MCP23S08 SPI/I2C GPIO driver */
 
 #include <linux/kernel.h>
 #include <linux/device.h>
@@ -27,7 +17,7 @@
 #include <linux/pinctrl/pinconf.h>
 #include <linux/pinctrl/pinconf-generic.h>
 
-/**
+/*
  * MCP types supported by driver
  */
 #define MCP_TYPE_S08	0
@@ -1131,11 +1121,6 @@ static int mcp23s08_probe(struct spi_device *spi)
 	}
 	data->ngpio = ngpio;
 
-	/* NOTE:  these chips have a relatively sane IRQ framework, with
-	 * per-signal masking and level/edge triggering.  It's not yet
-	 * handled here...
-	 */
-
 	return 0;
 }
 
diff --git a/include/linux/spi/mcp23s08.h b/include/linux/spi/mcp23s08.h
index 211f3c0ef49c..4354beefd584 100644
--- a/include/linux/spi/mcp23s08.h
+++ b/include/linux/spi/mcp23s08.h
@@ -1,6 +1,3 @@
-
-/* FIXME driver should be able to handle IRQs...  */
-
 struct mcp23s08_platform_data {
 	/* For mcp23s08, up to 4 slaves (numbered 0..3) can share one SPI
 	 * chipselect, each providing 1 gpio_chip instance with 8 gpios.
-- 
cgit v1.2.3


From 7f38c5b99779554d6c6e5043cfda848b967f2ca9 Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
Date: Mon, 15 May 2017 11:24:38 +0200
Subject: pinctrl: mcp23s08: fix comment for mcp23s08_platform_data.base

The comment does not match the driver, which actually supports
automatic assignment. Fix this by updating the comment.

Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/spi/mcp23s08.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/spi/mcp23s08.h b/include/linux/spi/mcp23s08.h
index 4354beefd584..82d96a346e6f 100644
--- a/include/linux/spi/mcp23s08.h
+++ b/include/linux/spi/mcp23s08.h
@@ -7,11 +7,11 @@ struct mcp23s08_platform_data {
 	 */
 	u32 spi_present_mask;
 
-	/* "base" is the number of the first GPIO.  Dynamic assignment is
-	 * not currently supported, and even if there are gaps in chip
-	 * addressing the GPIO numbers are sequential .. so for example
-	 * if only slaves 0 and 3 are present, their GPIOs range from
-	 * base to base+15 (or base+31 for s17 variant).
+	/* "base" is the number of the first GPIO or -1 for dynamic
+	 * assignment. If there are gaps in chip addressing the GPIO
+	 * numbers are sequential .. so for example if only slaves 0
+	 * and 3 are present, their GPIOs range from base to base+15
+	 * (or base+31 for s17 variant).
 	 */
 	unsigned	base;
 };
-- 
cgit v1.2.3


From 6c8557bdb28df3ae97476c5e2aed6373cd235aab Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 19 May 2017 12:58:25 +0200
Subject: smp, cpumask: Use non-atomic cpumask_{set,clear}_cpu()

The cpumasks in smp_call_function_many() are private and not subject
to concurrency, atomic bitops are pointless and expensive.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/cpumask.h | 11 +++++++++++
 kernel/smp.c            |  4 ++--
 2 files changed, 13 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index a21b1fb9a968..4bf4479a3a80 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -293,6 +293,12 @@ static inline void cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp)
 	set_bit(cpumask_check(cpu), cpumask_bits(dstp));
 }
 
+static inline void __cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp)
+{
+	__set_bit(cpumask_check(cpu), cpumask_bits(dstp));
+}
+
+
 /**
  * cpumask_clear_cpu - clear a cpu in a cpumask
  * @cpu: cpu number (< nr_cpu_ids)
@@ -303,6 +309,11 @@ static inline void cpumask_clear_cpu(int cpu, struct cpumask *dstp)
 	clear_bit(cpumask_check(cpu), cpumask_bits(dstp));
 }
 
+static inline void __cpumask_clear_cpu(int cpu, struct cpumask *dstp)
+{
+	__clear_bit(cpumask_check(cpu), cpumask_bits(dstp));
+}
+
 /**
  * cpumask_test_cpu - test for a cpu in a cpumask
  * @cpu: cpu number (< nr_cpu_ids)
diff --git a/kernel/smp.c b/kernel/smp.c
index 76d16fe3c427..3061483cb3ad 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -436,7 +436,7 @@ void smp_call_function_many(const struct cpumask *mask,
 	cfd = this_cpu_ptr(&cfd_data);
 
 	cpumask_and(cfd->cpumask, mask, cpu_online_mask);
-	cpumask_clear_cpu(this_cpu, cfd->cpumask);
+	__cpumask_clear_cpu(this_cpu, cfd->cpumask);
 
 	/* Some callers race with other cpus changing the passed mask */
 	if (unlikely(!cpumask_weight(cfd->cpumask)))
@@ -452,7 +452,7 @@ void smp_call_function_many(const struct cpumask *mask,
 		csd->func = func;
 		csd->info = info;
 		if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu)))
-			cpumask_set_cpu(cpu, cfd->cpumask_ipi);
+			__cpumask_set_cpu(cpu, cfd->cpumask_ipi);
 	}
 
 	/* Send a message to all CPUs in the map */
-- 
cgit v1.2.3


From d714893e61cd8c6e5c7e095f7dd615aa434bca95 Mon Sep 17 00:00:00 2001
From: Byungchul Park <byungchul.park@lge.com>
Date: Fri, 12 May 2017 09:36:56 +0900
Subject: llist: Provide a safe version for llist_for_each()

Sometimes we have to dereference next field of llist node before entering
loop becasue the node might be deleted or the next field might be
modified within the loop. So this adds the safe version of llist_for_each(),
that is, llist_for_each_safe().

Signed-off-by: Byungchul Park <byungchul.park@lge.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Huang, Ying <ying.huang@intel.com>
Cc: <kernel-team@lge.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1494549416-10539-1-git-send-email-byungchul.park@lge.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/llist.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/llist.h b/include/linux/llist.h
index 171baa90f6f6..d11738110a7a 100644
--- a/include/linux/llist.h
+++ b/include/linux/llist.h
@@ -109,6 +109,25 @@ static inline void init_llist_head(struct llist_head *list)
 #define llist_for_each(pos, node)			\
 	for ((pos) = (node); pos; (pos) = (pos)->next)
 
+/**
+ * llist_for_each_safe - iterate over some deleted entries of a lock-less list
+ *			 safe against removal of list entry
+ * @pos:	the &struct llist_node to use as a loop cursor
+ * @n:		another &struct llist_node to use as temporary storage
+ * @node:	the first entry of deleted list entries
+ *
+ * In general, some entries of the lock-less list can be traversed
+ * safely only after being deleted from list, so start with an entry
+ * instead of list head.
+ *
+ * If being used on entries deleted from lock-less list directly, the
+ * traverse order is from the newest to the oldest added entry.  If
+ * you want to traverse from the oldest to the newest, you must
+ * reverse the order by yourself before traversing.
+ */
+#define llist_for_each_safe(pos, n, node)			\
+	for ((pos) = (node); (pos) && ((n) = (pos)->next, true); (pos) = (n))
+
 /**
  * llist_for_each_entry - iterate over some deleted entries of lock-less list of given type
  * @pos:	the type * to use as a loop cursor.
-- 
cgit v1.2.3


From 69a78ff226fe0241ab6cb9dd961667be477e3cf7 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 16 May 2017 20:42:47 +0200
Subject: init: Introduce SYSTEM_SCHEDULING state

might_sleep() debugging and smp_processor_id() debugging should be active
right after the scheduler starts working. The init task can invoke
smp_processor_id() from preemptible context as it is pinned on the boot cpu
until sched_smp_init() removes the pinning and lets it schedule on all non
isolated cpus.

Add a new state which allows to enable those checks earlier and add it to
the xen do_poweroff() function.

No functional change.

Tested-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/20170516184736.196214622@linutronix.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/xen/manage.c   | 1 +
 include/linux/kernel.h | 6 +++++-
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
index c1ec8ee80924..9e35032351a0 100644
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -190,6 +190,7 @@ static void do_poweroff(void)
 {
 	switch (system_state) {
 	case SYSTEM_BOOTING:
+	case SYSTEM_SCHEDULING:
 		orderly_poweroff(true);
 		break;
 	case SYSTEM_RUNNING:
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 13bc08aba704..1c91f26e2996 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -490,9 +490,13 @@ extern int root_mountflags;
 
 extern bool early_boot_irqs_disabled;
 
-/* Values used for system_state */
+/*
+ * Values used for system_state. Ordering of the states must not be changed
+ * as code checks for <, <=, >, >= STATE.
+ */
 extern enum system_states {
 	SYSTEM_BOOTING,
+	SYSTEM_SCHEDULING,
 	SYSTEM_RUNNING,
 	SYSTEM_HALT,
 	SYSTEM_POWER_OFF,
-- 
cgit v1.2.3


From c61872c9833d17d3807fb999096917c1f9eaada0 Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Wed, 17 May 2017 13:25:12 +0300
Subject: firmware: dmi: Add DMI_PRODUCT_FAMILY identification string

Sometimes it is more convenient to be able to match a whole family of
products, like in case of bunch of Chromebooks based on Intel_Strago to
apply a driver quirk instead of quirking each machine one-by-one.

This adds support for DMI_PRODUCT_FAMILY identification string and also
exports it to the userspace through sysfs attribute just like the
existing ones.

Suggested-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/firmware/dmi-id.c       | 2 ++
 drivers/firmware/dmi_scan.c     | 1 +
 include/linux/mod_devicetable.h | 1 +
 3 files changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/firmware/dmi-id.c b/drivers/firmware/dmi-id.c
index 44c01390d035..dc269cb288c2 100644
--- a/drivers/firmware/dmi-id.c
+++ b/drivers/firmware/dmi-id.c
@@ -47,6 +47,7 @@ DEFINE_DMI_ATTR_WITH_SHOW(product_name,		0444, DMI_PRODUCT_NAME);
 DEFINE_DMI_ATTR_WITH_SHOW(product_version,	0444, DMI_PRODUCT_VERSION);
 DEFINE_DMI_ATTR_WITH_SHOW(product_serial,	0400, DMI_PRODUCT_SERIAL);
 DEFINE_DMI_ATTR_WITH_SHOW(product_uuid,		0400, DMI_PRODUCT_UUID);
+DEFINE_DMI_ATTR_WITH_SHOW(product_family,	0400, DMI_PRODUCT_FAMILY);
 DEFINE_DMI_ATTR_WITH_SHOW(board_vendor,		0444, DMI_BOARD_VENDOR);
 DEFINE_DMI_ATTR_WITH_SHOW(board_name,		0444, DMI_BOARD_NAME);
 DEFINE_DMI_ATTR_WITH_SHOW(board_version,	0444, DMI_BOARD_VERSION);
@@ -191,6 +192,7 @@ static void __init dmi_id_init_attr_table(void)
 	ADD_DMI_ATTR(product_version,   DMI_PRODUCT_VERSION);
 	ADD_DMI_ATTR(product_serial,    DMI_PRODUCT_SERIAL);
 	ADD_DMI_ATTR(product_uuid,      DMI_PRODUCT_UUID);
+	ADD_DMI_ATTR(product_family,      DMI_PRODUCT_FAMILY);
 	ADD_DMI_ATTR(board_vendor,      DMI_BOARD_VENDOR);
 	ADD_DMI_ATTR(board_name,        DMI_BOARD_NAME);
 	ADD_DMI_ATTR(board_version,     DMI_BOARD_VERSION);
diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c
index 54be60ead08f..93f7acdaac7a 100644
--- a/drivers/firmware/dmi_scan.c
+++ b/drivers/firmware/dmi_scan.c
@@ -430,6 +430,7 @@ static void __init dmi_decode(const struct dmi_header *dm, void *dummy)
 		dmi_save_ident(dm, DMI_PRODUCT_VERSION, 6);
 		dmi_save_ident(dm, DMI_PRODUCT_SERIAL, 7);
 		dmi_save_uuid(dm, DMI_PRODUCT_UUID, 8);
+		dmi_save_ident(dm, DMI_PRODUCT_FAMILY, 26);
 		break;
 	case 2:		/* Base Board Information */
 		dmi_save_ident(dm, DMI_BOARD_VENDOR, 4);
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index 566fda587fcf..3f74ef2281e8 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -467,6 +467,7 @@ enum dmi_field {
 	DMI_PRODUCT_VERSION,
 	DMI_PRODUCT_SERIAL,
 	DMI_PRODUCT_UUID,
+	DMI_PRODUCT_FAMILY,
 	DMI_BOARD_VENDOR,
 	DMI_BOARD_NAME,
 	DMI_BOARD_VERSION,
-- 
cgit v1.2.3


From 0a848d638a25b4f2767b260ed83c271854e93cce Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Sun, 21 May 2017 23:57:25 +0200
Subject: gpio: max732x: move header file out of I2C realm

include/linux/i2c is not for client devices. Move the header file to a
more appropriate location.

Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 arch/arm/mach-pxa/littleton.c         |  2 +-
 drivers/gpio/gpio-max732x.c           |  2 +-
 include/linux/i2c/max732x.h           | 22 ----------------------
 include/linux/platform_data/max732x.h | 22 ++++++++++++++++++++++
 4 files changed, 24 insertions(+), 24 deletions(-)
 delete mode 100644 include/linux/i2c/max732x.h
 create mode 100644 include/linux/platform_data/max732x.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-pxa/littleton.c b/arch/arm/mach-pxa/littleton.c
index 051c554776a6..fae38fdc8d8e 100644
--- a/arch/arm/mach-pxa/littleton.c
+++ b/arch/arm/mach-pxa/littleton.c
@@ -27,7 +27,7 @@
 #include <linux/i2c.h>
 #include <linux/leds.h>
 #include <linux/mfd/da903x.h>
-#include <linux/i2c/max732x.h>
+#include <linux/platform_data/max732x.h>
 #include <linux/i2c/pxa-i2c.h>
 
 #include <asm/types.h>
diff --git a/drivers/gpio/gpio-max732x.c b/drivers/gpio/gpio-max732x.c
index 4ea4c6a1313b..7f4d26ce5f23 100644
--- a/drivers/gpio/gpio-max732x.c
+++ b/drivers/gpio/gpio-max732x.c
@@ -20,7 +20,7 @@
 #include <linux/gpio/driver.h>
 #include <linux/interrupt.h>
 #include <linux/i2c.h>
-#include <linux/i2c/max732x.h>
+#include <linux/platform_data/max732x.h>
 #include <linux/of.h>
 
 
diff --git a/include/linux/i2c/max732x.h b/include/linux/i2c/max732x.h
deleted file mode 100644
index c04bac8bf2fe..000000000000
--- a/include/linux/i2c/max732x.h
+++ /dev/null
@@ -1,22 +0,0 @@
-#ifndef __LINUX_I2C_MAX732X_H
-#define __LINUX_I2C_MAX732X_H
-
-/* platform data for the MAX732x 8/16-bit I/O expander driver */
-
-struct max732x_platform_data {
-	/* number of the first GPIO */
-	unsigned	gpio_base;
-
-	/* interrupt base */
-	int		irq_base;
-
-	void		*context;	/* param to setup/teardown */
-
-	int		(*setup)(struct i2c_client *client,
-				unsigned gpio, unsigned ngpio,
-				void *context);
-	int		(*teardown)(struct i2c_client *client,
-				unsigned gpio, unsigned ngpio,
-				void *context);
-};
-#endif /* __LINUX_I2C_MAX732X_H */
diff --git a/include/linux/platform_data/max732x.h b/include/linux/platform_data/max732x.h
new file mode 100644
index 000000000000..c04bac8bf2fe
--- /dev/null
+++ b/include/linux/platform_data/max732x.h
@@ -0,0 +1,22 @@
+#ifndef __LINUX_I2C_MAX732X_H
+#define __LINUX_I2C_MAX732X_H
+
+/* platform data for the MAX732x 8/16-bit I/O expander driver */
+
+struct max732x_platform_data {
+	/* number of the first GPIO */
+	unsigned	gpio_base;
+
+	/* interrupt base */
+	int		irq_base;
+
+	void		*context;	/* param to setup/teardown */
+
+	int		(*setup)(struct i2c_client *client,
+				unsigned gpio, unsigned ngpio,
+				void *context);
+	int		(*teardown)(struct i2c_client *client,
+				unsigned gpio, unsigned ngpio,
+				void *context);
+};
+#endif /* __LINUX_I2C_MAX732X_H */
-- 
cgit v1.2.3


From b6480faeee234829b315168aebcb281ecf95f178 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Sun, 21 May 2017 23:57:26 +0200
Subject: gpio: pcf857x: move header file out of I2C realm

include/linux/i2c is not for client devices. Move the header file to a
more appropriate location.

Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 arch/arm/mach-davinci/board-da830-evm.c  |  2 +-
 arch/arm/mach-davinci/board-dm644x-evm.c |  2 +-
 arch/arm/mach-davinci/board-dm646x-evm.c |  2 +-
 arch/arm/mach-pxa/balloon3.c             |  2 +-
 arch/arm/mach-pxa/stargate2.c            |  2 +-
 arch/mips/ath79/mach-pb44.c              |  2 +-
 drivers/gpio/gpio-pcf857x.c              |  2 +-
 include/linux/i2c/pcf857x.h              | 44 --------------------------------
 include/linux/platform_data/pcf857x.h    | 44 ++++++++++++++++++++++++++++++++
 9 files changed, 51 insertions(+), 51 deletions(-)
 delete mode 100644 include/linux/i2c/pcf857x.h
 create mode 100644 include/linux/platform_data/pcf857x.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-davinci/board-da830-evm.c b/arch/arm/mach-davinci/board-da830-evm.c
index 58075627c6df..f673cd7a6766 100644
--- a/arch/arm/mach-davinci/board-da830-evm.c
+++ b/arch/arm/mach-davinci/board-da830-evm.c
@@ -17,7 +17,7 @@
 #include <linux/gpio/machine.h>
 #include <linux/platform_device.h>
 #include <linux/i2c.h>
-#include <linux/i2c/pcf857x.h>
+#include <linux/platform_data/pcf857x.h>
 #include <linux/platform_data/at24.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
diff --git a/arch/arm/mach-davinci/board-dm644x-evm.c b/arch/arm/mach-davinci/board-dm644x-evm.c
index 20f1874a5657..70e00dbeec96 100644
--- a/arch/arm/mach-davinci/board-dm644x-evm.c
+++ b/arch/arm/mach-davinci/board-dm644x-evm.c
@@ -14,7 +14,7 @@
 #include <linux/platform_device.h>
 #include <linux/gpio.h>
 #include <linux/i2c.h>
-#include <linux/i2c/pcf857x.h>
+#include <linux/platform_data/pcf857x.h>
 #include <linux/platform_data/at24.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/nand.h>
diff --git a/arch/arm/mach-davinci/board-dm646x-evm.c b/arch/arm/mach-davinci/board-dm646x-evm.c
index cb176826d1cb..ca69d0b96a4f 100644
--- a/arch/arm/mach-davinci/board-dm646x-evm.c
+++ b/arch/arm/mach-davinci/board-dm646x-evm.c
@@ -23,7 +23,7 @@
 #include <linux/platform_device.h>
 #include <linux/i2c.h>
 #include <linux/platform_data/at24.h>
-#include <linux/i2c/pcf857x.h>
+#include <linux/platform_data/pcf857x.h>
 
 #include <media/i2c/tvp514x.h>
 #include <media/i2c/adv7343.h>
diff --git a/arch/arm/mach-pxa/balloon3.c b/arch/arm/mach-pxa/balloon3.c
index d452a49c0396..1467c1d1e541 100644
--- a/arch/arm/mach-pxa/balloon3.c
+++ b/arch/arm/mach-pxa/balloon3.c
@@ -27,7 +27,7 @@
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
 #include <linux/types.h>
-#include <linux/i2c/pcf857x.h>
+#include <linux/platform_data/pcf857x.h>
 #include <linux/i2c/pxa-i2c.h>
 #include <linux/mtd/nand.h>
 #include <linux/mtd/physmap.h>
diff --git a/arch/arm/mach-pxa/stargate2.c b/arch/arm/mach-pxa/stargate2.c
index 7b6610e9dae4..2d45d18b1a5e 100644
--- a/arch/arm/mach-pxa/stargate2.c
+++ b/arch/arm/mach-pxa/stargate2.c
@@ -26,7 +26,7 @@
 #include <linux/mtd/partitions.h>
 
 #include <linux/i2c/pxa-i2c.h>
-#include <linux/i2c/pcf857x.h>
+#include <linux/platform_data/pcf857x.h>
 #include <linux/platform_data/at24.h>
 #include <linux/smc91x.h>
 #include <linux/gpio.h>
diff --git a/arch/mips/ath79/mach-pb44.c b/arch/mips/ath79/mach-pb44.c
index 67b980d94fb7..be78298dffb4 100644
--- a/arch/mips/ath79/mach-pb44.c
+++ b/arch/mips/ath79/mach-pb44.c
@@ -12,7 +12,7 @@
 #include <linux/platform_device.h>
 #include <linux/i2c.h>
 #include <linux/i2c-gpio.h>
-#include <linux/i2c/pcf857x.h>
+#include <linux/platform_data/pcf857x.h>
 
 #include "machtypes.h"
 #include "dev-gpio-buttons.h"
diff --git a/drivers/gpio/gpio-pcf857x.c b/drivers/gpio/gpio-pcf857x.c
index 8ddf9302ce3b..a4fd78b9c0e4 100644
--- a/drivers/gpio/gpio-pcf857x.c
+++ b/drivers/gpio/gpio-pcf857x.c
@@ -20,7 +20,7 @@
 
 #include <linux/gpio.h>
 #include <linux/i2c.h>
-#include <linux/i2c/pcf857x.h>
+#include <linux/platform_data/pcf857x.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/irqdomain.h>
diff --git a/include/linux/i2c/pcf857x.h b/include/linux/i2c/pcf857x.h
deleted file mode 100644
index 0767a2a6b2f1..000000000000
--- a/include/linux/i2c/pcf857x.h
+++ /dev/null
@@ -1,44 +0,0 @@
-#ifndef __LINUX_PCF857X_H
-#define __LINUX_PCF857X_H
-
-/**
- * struct pcf857x_platform_data - data to set up pcf857x driver
- * @gpio_base: number of the chip's first GPIO
- * @n_latch: optional bit-inverse of initial register value; if
- *	you leave this initialized to zero the driver will act
- *	like the chip was just reset
- * @setup: optional callback issued once the GPIOs are valid
- * @teardown: optional callback issued before the GPIOs are invalidated
- * @context: optional parameter passed to setup() and teardown()
- *
- * In addition to the I2C_BOARD_INFO() state appropriate to each chip,
- * the i2c_board_info used with the pcf875x driver must provide its
- * platform_data (pointer to one of these structures) with at least
- * the gpio_base value initialized.
- *
- * The @setup callback may be used with the kind of board-specific glue
- * which hands the (now-valid) GPIOs to other drivers, or which puts
- * devices in their initial states using these GPIOs.
- *
- * These GPIO chips are only "quasi-bidirectional"; read the chip specs
- * to understand the behavior.  They don't have separate registers to
- * record which pins are used for input or output, record which output
- * values are driven, or provide access to input values.  That must be
- * inferred by reading the chip's value and knowing the last value written
- * to it.  If you leave n_latch initialized to zero, that last written
- * value is presumed to be all ones (as if the chip were just reset).
- */
-struct pcf857x_platform_data {
-	unsigned	gpio_base;
-	unsigned	n_latch;
-
-	int		(*setup)(struct i2c_client *client,
-					int gpio, unsigned ngpio,
-					void *context);
-	int		(*teardown)(struct i2c_client *client,
-					int gpio, unsigned ngpio,
-					void *context);
-	void		*context;
-};
-
-#endif /* __LINUX_PCF857X_H */
diff --git a/include/linux/platform_data/pcf857x.h b/include/linux/platform_data/pcf857x.h
new file mode 100644
index 000000000000..0767a2a6b2f1
--- /dev/null
+++ b/include/linux/platform_data/pcf857x.h
@@ -0,0 +1,44 @@
+#ifndef __LINUX_PCF857X_H
+#define __LINUX_PCF857X_H
+
+/**
+ * struct pcf857x_platform_data - data to set up pcf857x driver
+ * @gpio_base: number of the chip's first GPIO
+ * @n_latch: optional bit-inverse of initial register value; if
+ *	you leave this initialized to zero the driver will act
+ *	like the chip was just reset
+ * @setup: optional callback issued once the GPIOs are valid
+ * @teardown: optional callback issued before the GPIOs are invalidated
+ * @context: optional parameter passed to setup() and teardown()
+ *
+ * In addition to the I2C_BOARD_INFO() state appropriate to each chip,
+ * the i2c_board_info used with the pcf875x driver must provide its
+ * platform_data (pointer to one of these structures) with at least
+ * the gpio_base value initialized.
+ *
+ * The @setup callback may be used with the kind of board-specific glue
+ * which hands the (now-valid) GPIOs to other drivers, or which puts
+ * devices in their initial states using these GPIOs.
+ *
+ * These GPIO chips are only "quasi-bidirectional"; read the chip specs
+ * to understand the behavior.  They don't have separate registers to
+ * record which pins are used for input or output, record which output
+ * values are driven, or provide access to input values.  That must be
+ * inferred by reading the chip's value and knowing the last value written
+ * to it.  If you leave n_latch initialized to zero, that last written
+ * value is presumed to be all ones (as if the chip were just reset).
+ */
+struct pcf857x_platform_data {
+	unsigned	gpio_base;
+	unsigned	n_latch;
+
+	int		(*setup)(struct i2c_client *client,
+					int gpio, unsigned ngpio,
+					void *context);
+	int		(*teardown)(struct i2c_client *client,
+					int gpio, unsigned ngpio,
+					void *context);
+	void		*context;
+};
+
+#endif /* __LINUX_PCF857X_H */
-- 
cgit v1.2.3


From c70d9d809fdeecedb96972457ee45c49a232d97f Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 22 May 2017 15:40:12 -0500
Subject: ptrace: Properly initialize ptracer_cred on fork

When I introduced ptracer_cred I failed to consider the weirdness of
fork where the task_struct copies the old value by default.  This
winds up leaving ptracer_cred set even when a process forks and
the child process does not wind up being ptraced.

Because ptracer_cred is not set on non-ptraced processes whose
parents were ptraced this has broken the ability of the enlightenment
window manager to start setuid children.

Fix this by properly initializing ptracer_cred in ptrace_init_task

This must be done with a little bit of care to preserve the current value
of ptracer_cred when ptrace carries through fork.  Re-reading the
ptracer_cred from the ptracing process at this point is inconsistent
with how PT_PTRACE_CAP has been maintained all of these years.

Tested-by: Takashi Iwai <tiwai@suse.de>
Fixes: 64b875f7ac8a ("ptrace: Capture the ptracer's creds not PT_PTRACE_CAP")
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/ptrace.h |  7 +++++--
 kernel/ptrace.c        | 20 +++++++++++++-------
 2 files changed, 18 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 422bc2e4cb6a..ef3eb8bbfee4 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -54,7 +54,8 @@ extern int ptrace_request(struct task_struct *child, long request,
 			  unsigned long addr, unsigned long data);
 extern void ptrace_notify(int exit_code);
 extern void __ptrace_link(struct task_struct *child,
-			  struct task_struct *new_parent);
+			  struct task_struct *new_parent,
+			  const struct cred *ptracer_cred);
 extern void __ptrace_unlink(struct task_struct *child);
 extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead);
 #define PTRACE_MODE_READ	0x01
@@ -206,7 +207,7 @@ static inline void ptrace_init_task(struct task_struct *child, bool ptrace)
 
 	if (unlikely(ptrace) && current->ptrace) {
 		child->ptrace = current->ptrace;
-		__ptrace_link(child, current->parent);
+		__ptrace_link(child, current->parent, current->ptracer_cred);
 
 		if (child->ptrace & PT_SEIZED)
 			task_set_jobctl_pending(child, JOBCTL_TRAP_STOP);
@@ -215,6 +216,8 @@ static inline void ptrace_init_task(struct task_struct *child, bool ptrace)
 
 		set_tsk_thread_flag(child, TIF_SIGPENDING);
 	}
+	else
+		child->ptracer_cred = NULL;
 }
 
 /**
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 266ddcc1d8bb..60f356d91060 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -60,19 +60,25 @@ int ptrace_access_vm(struct task_struct *tsk, unsigned long addr,
 }
 
 
+void __ptrace_link(struct task_struct *child, struct task_struct *new_parent,
+		   const struct cred *ptracer_cred)
+{
+	BUG_ON(!list_empty(&child->ptrace_entry));
+	list_add(&child->ptrace_entry, &new_parent->ptraced);
+	child->parent = new_parent;
+	child->ptracer_cred = get_cred(ptracer_cred);
+}
+
 /*
  * ptrace a task: make the debugger its new parent and
  * move it to the ptrace list.
  *
  * Must be called with the tasklist lock write-held.
  */
-void __ptrace_link(struct task_struct *child, struct task_struct *new_parent)
+static void ptrace_link(struct task_struct *child, struct task_struct *new_parent)
 {
-	BUG_ON(!list_empty(&child->ptrace_entry));
-	list_add(&child->ptrace_entry, &new_parent->ptraced);
-	child->parent = new_parent;
 	rcu_read_lock();
-	child->ptracer_cred = get_cred(__task_cred(new_parent));
+	__ptrace_link(child, new_parent, __task_cred(new_parent));
 	rcu_read_unlock();
 }
 
@@ -386,7 +392,7 @@ static int ptrace_attach(struct task_struct *task, long request,
 		flags |= PT_SEIZED;
 	task->ptrace = flags;
 
-	__ptrace_link(task, current);
+	ptrace_link(task, current);
 
 	/* SEIZE doesn't trap tracee on attach */
 	if (!seize)
@@ -459,7 +465,7 @@ static int ptrace_traceme(void)
 		 */
 		if (!ret && !(current->real_parent->flags & PF_EXITING)) {
 			current->ptrace = PT_PTRACED;
-			__ptrace_link(current, current->real_parent);
+			ptrace_link(current, current->real_parent);
 		}
 	}
 	write_unlock_irq(&tasklist_lock);
-- 
cgit v1.2.3


From 73dd3a4839c1d27c36d4dcc92e1ff44225ecbeb7 Mon Sep 17 00:00:00 2001
From: Mohamad Haj Yahia <mohamad@mellanox.com>
Date: Thu, 23 Feb 2017 11:19:36 +0200
Subject: net/mlx5: Avoid using pending command interface slots

Currently when firmware command gets stuck or it takes long time to
complete, the driver command will get timeout and the command slot is
freed and can be used for new commands, and if the firmware receive new
command on the old busy slot its behavior is unexpected and this could
be harmful.
To fix this when the driver command gets timeout we return failure,
but we don't free the command slot and we wait for the firmware to
explicitly respond to that command.
Once all the entries are busy we will stop processing new firmware
commands.

Fixes: 9cba4ebcf374 ('net/mlx5: Fix potential deadlock in command mode change')
Signed-off-by: Mohamad Haj Yahia <mohamad@mellanox.com>
Cc: kernel-team@fb.com
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c    | 41 +++++++++++++++++++++---
 drivers/net/ethernet/mellanox/mlx5/core/eq.c     |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/health.c |  2 +-
 include/linux/mlx5/driver.h                      |  7 +++-
 4 files changed, 44 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 5bdaf3d545b2..10d282841f5b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -774,7 +774,7 @@ static void cb_timeout_handler(struct work_struct *work)
 	mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n",
 		       mlx5_command_str(msg_to_opcode(ent->in)),
 		       msg_to_opcode(ent->in));
-	mlx5_cmd_comp_handler(dev, 1UL << ent->idx);
+	mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
 }
 
 static void cmd_work_handler(struct work_struct *work)
@@ -804,6 +804,7 @@ static void cmd_work_handler(struct work_struct *work)
 	}
 
 	cmd->ent_arr[ent->idx] = ent;
+	set_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state);
 	lay = get_inst(cmd, ent->idx);
 	ent->lay = lay;
 	memset(lay, 0, sizeof(*lay));
@@ -825,6 +826,20 @@ static void cmd_work_handler(struct work_struct *work)
 	if (ent->callback)
 		schedule_delayed_work(&ent->cb_timeout_work, cb_timeout);
 
+	/* Skip sending command to fw if internal error */
+	if (pci_channel_offline(dev->pdev) ||
+	    dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
+		u8 status = 0;
+		u32 drv_synd;
+
+		ent->ret = mlx5_internal_err_ret_value(dev, msg_to_opcode(ent->in), &drv_synd, &status);
+		MLX5_SET(mbox_out, ent->out, status, status);
+		MLX5_SET(mbox_out, ent->out, syndrome, drv_synd);
+
+		mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
+		return;
+	}
+
 	/* ring doorbell after the descriptor is valid */
 	mlx5_core_dbg(dev, "writing 0x%x to command doorbell\n", 1 << ent->idx);
 	wmb();
@@ -835,7 +850,7 @@ static void cmd_work_handler(struct work_struct *work)
 		poll_timeout(ent);
 		/* make sure we read the descriptor after ownership is SW */
 		rmb();
-		mlx5_cmd_comp_handler(dev, 1UL << ent->idx);
+		mlx5_cmd_comp_handler(dev, 1UL << ent->idx, (ent->ret == -ETIMEDOUT));
 	}
 }
 
@@ -879,7 +894,7 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
 		wait_for_completion(&ent->done);
 	} else if (!wait_for_completion_timeout(&ent->done, timeout)) {
 		ent->ret = -ETIMEDOUT;
-		mlx5_cmd_comp_handler(dev, 1UL << ent->idx);
+		mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
 	}
 
 	err = ent->ret;
@@ -1375,7 +1390,7 @@ static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg)
 	}
 }
 
-void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec)
+void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced)
 {
 	struct mlx5_cmd *cmd = &dev->cmd;
 	struct mlx5_cmd_work_ent *ent;
@@ -1395,6 +1410,19 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec)
 			struct semaphore *sem;
 
 			ent = cmd->ent_arr[i];
+
+			/* if we already completed the command, ignore it */
+			if (!test_and_clear_bit(MLX5_CMD_ENT_STATE_PENDING_COMP,
+						&ent->state)) {
+				/* only real completion can free the cmd slot */
+				if (!forced) {
+					mlx5_core_err(dev, "Command completion arrived after timeout (entry idx = %d).\n",
+						      ent->idx);
+					free_ent(cmd, ent->idx);
+				}
+				continue;
+			}
+
 			if (ent->callback)
 				cancel_delayed_work(&ent->cb_timeout_work);
 			if (ent->page_queue)
@@ -1417,7 +1445,10 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec)
 				mlx5_core_dbg(dev, "command completed. ret 0x%x, delivery status %s(0x%x)\n",
 					      ent->ret, deliv_status_to_str(ent->status), ent->status);
 			}
-			free_ent(cmd, ent->idx);
+
+			/* only real completion will free the entry slot */
+			if (!forced)
+				free_ent(cmd, ent->idx);
 
 			if (ent->callback) {
 				ds = ent->ts2 - ent->ts1;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index ea5d8d37a75c..33eae5ad2fb0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -422,7 +422,7 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
 			break;
 
 		case MLX5_EVENT_TYPE_CMD:
-			mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector));
+			mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector), false);
 			break;
 
 		case MLX5_EVENT_TYPE_PORT_CHANGE:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index d0515391d33b..44f59b1d6f0f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -90,7 +90,7 @@ static void trigger_cmd_completions(struct mlx5_core_dev *dev)
 	spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);
 
 	mlx5_core_dbg(dev, "vector 0x%llx\n", vector);
-	mlx5_cmd_comp_handler(dev, vector);
+	mlx5_cmd_comp_handler(dev, vector, true);
 	return;
 
 no_trig:
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index bcdf739ee41a..93273d9ea4d1 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -787,7 +787,12 @@ enum {
 
 typedef void (*mlx5_cmd_cbk_t)(int status, void *context);
 
+enum {
+	MLX5_CMD_ENT_STATE_PENDING_COMP,
+};
+
 struct mlx5_cmd_work_ent {
+	unsigned long		state;
 	struct mlx5_cmd_msg    *in;
 	struct mlx5_cmd_msg    *out;
 	void		       *uout;
@@ -976,7 +981,7 @@ void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn);
 void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type);
 void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type);
 struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn);
-void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec);
+void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced);
 void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type);
 int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
 		       int nent, u64 mask, const char *name,
-- 
cgit v1.2.3


From 7f65b1f5adc5f8496ca8bec4947de66fefe36220 Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oneukum@suse.com>
Date: Mon, 22 May 2017 14:50:30 +0200
Subject: cdc-ether: divorce initialisation with a filter reset and a generic
 method

Some devices need their multicast filter reset but others are crashed by that.
So the methods need to be separated.

Signed-off-by: Oliver Neukum <oneukum@suse.com>
Reported-by: "Ridgway, Keith" <kridgway@harris.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_ether.c | 31 ++++++++++++++++++++++++-------
 include/linux/usb/usbnet.h  |  1 +
 2 files changed, 25 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c
index f3ae88fdf332..8ab281b478f2 100644
--- a/drivers/net/usb/cdc_ether.c
+++ b/drivers/net/usb/cdc_ether.c
@@ -310,6 +310,26 @@ skip:
 		return -ENODEV;
 	}
 
+	return 0;
+
+bad_desc:
+	dev_info(&dev->udev->dev, "bad CDC descriptors\n");
+	return -ENODEV;
+}
+EXPORT_SYMBOL_GPL(usbnet_generic_cdc_bind);
+
+
+/* like usbnet_generic_cdc_bind() but handles filter initialization
+ * correctly
+ */
+int usbnet_ether_cdc_bind(struct usbnet *dev, struct usb_interface *intf)
+{
+	int rv;
+
+	rv = usbnet_generic_cdc_bind(dev, intf);
+	if (rv < 0)
+		goto bail_out;
+
 	/* Some devices don't initialise properly. In particular
 	 * the packet filter is not reset. There are devices that
 	 * don't do reset all the way. So the packet filter should
@@ -317,13 +337,10 @@ skip:
 	 */
 	usbnet_cdc_update_filter(dev);
 
-	return 0;
-
-bad_desc:
-	dev_info(&dev->udev->dev, "bad CDC descriptors\n");
-	return -ENODEV;
+bail_out:
+	return rv;
 }
-EXPORT_SYMBOL_GPL(usbnet_generic_cdc_bind);
+EXPORT_SYMBOL_GPL(usbnet_ether_cdc_bind);
 
 void usbnet_cdc_unbind(struct usbnet *dev, struct usb_interface *intf)
 {
@@ -417,7 +434,7 @@ int usbnet_cdc_bind(struct usbnet *dev, struct usb_interface *intf)
 	BUILD_BUG_ON((sizeof(((struct usbnet *)0)->data)
 			< sizeof(struct cdc_state)));
 
-	status = usbnet_generic_cdc_bind(dev, intf);
+	status = usbnet_ether_cdc_bind(dev, intf);
 	if (status < 0)
 		return status;
 
diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index 7dffa5624ea6..97116379db5f 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -206,6 +206,7 @@ struct cdc_state {
 };
 
 extern int usbnet_generic_cdc_bind(struct usbnet *, struct usb_interface *);
+extern int usbnet_ether_cdc_bind(struct usbnet *dev, struct usb_interface *intf);
 extern int usbnet_cdc_bind(struct usbnet *, struct usb_interface *);
 extern void usbnet_cdc_unbind(struct usbnet *, struct usb_interface *);
 extern void usbnet_cdc_status(struct usbnet *, struct urb *);
-- 
cgit v1.2.3


From 12e8b570e732eaa5eae3a2895ba3fbcf91bde2b4 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Mon, 22 May 2017 20:13:07 +0200
Subject: mlx5: fix bug reading rss_hash_type from CQE

Masks for extracting part of the Completion Queue Entry (CQE)
field rss_hash_type was swapped, namely CQE_RSS_HTYPE_IP and
CQE_RSS_HTYPE_L4.

The bug resulted in setting skb->l4_hash, even-though the
rss_hash_type indicated that hash was NOT computed over the
L4 (UDP or TCP) part of the packet.

Added comments from the datasheet, to make it more clear what
these masks are selecting.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Acked-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/device.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index dd9a263ed368..a940ec6a046c 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -787,8 +787,14 @@ enum {
 };
 
 enum {
-	CQE_RSS_HTYPE_IP	= 0x3 << 6,
-	CQE_RSS_HTYPE_L4	= 0x3 << 2,
+	CQE_RSS_HTYPE_IP	= 0x3 << 2,
+	/* cqe->rss_hash_type[3:2] - IP destination selected for hash
+	 * (00 = none,  01 = IPv4, 10 = IPv6, 11 = Reserved)
+	 */
+	CQE_RSS_HTYPE_L4	= 0x3 << 6,
+	/* cqe->rss_hash_type[7:6] - L4 destination selected for hash
+	 * (00 = none, 01 = TCP. 10 = UDP, 11 = IPSEC.SPI
+	 */
 };
 
 enum {
-- 
cgit v1.2.3


From 6f4dbd149d2a151b89d1a5bbf7530ee5546c7908 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Fri, 19 May 2017 11:33:16 +0200
Subject: libceph: use kbasename() and kill ceph_file_part()

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Alex Elder <elder@linaro.org>
---
 include/linux/ceph/ceph_debug.h |  6 +++---
 net/ceph/ceph_common.c          | 13 -------------
 2 files changed, 3 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/ceph_debug.h b/include/linux/ceph/ceph_debug.h
index aa2e19182d99..51c5bd64bd00 100644
--- a/include/linux/ceph/ceph_debug.h
+++ b/include/linux/ceph/ceph_debug.h
@@ -3,6 +3,8 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <linux/string.h>
+
 #ifdef CONFIG_CEPH_LIB_PRETTYDEBUG
 
 /*
@@ -12,12 +14,10 @@
  */
 
 # if defined(DEBUG) || defined(CONFIG_DYNAMIC_DEBUG)
-extern const char *ceph_file_part(const char *s, int len);
 #  define dout(fmt, ...)						\
 	pr_debug("%.*s %12.12s:%-4d : " fmt,				\
 		 8 - (int)sizeof(KBUILD_MODNAME), "    ",		\
-		 ceph_file_part(__FILE__, sizeof(__FILE__)),		\
-		 __LINE__, ##__VA_ARGS__)
+		 kbasename(__FILE__), __LINE__, ##__VA_ARGS__)
 # else
 /* faux printk call just to see any compiler warnings. */
 #  define dout(fmt, ...)	do {				\
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 4fd02831beed..47e94b560ba0 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -56,19 +56,6 @@ static const struct kernel_param_ops param_ops_supported_features = {
 module_param_cb(supported_features, &param_ops_supported_features, NULL,
 		S_IRUGO);
 
-/*
- * find filename portion of a path (/foo/bar/baz -> baz)
- */
-const char *ceph_file_part(const char *s, int len)
-{
-	const char *e = s + len;
-
-	while (e != s && *(e-1) != '/')
-		e--;
-	return e;
-}
-EXPORT_SYMBOL(ceph_file_part);
-
 const char *ceph_msg_type_name(int type)
 {
 	switch (type) {
-- 
cgit v1.2.3


From 4d071c3238987325b9e50e33051a40d1cce311cc Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Tue, 23 May 2017 14:18:17 -0500
Subject: PCI/PM: Add needs_resume flag to avoid suspend complete optimization

Some drivers - like i915 - may not support the system suspend direct
complete optimization due to differences in their runtime and system
suspend sequence.  Add a flag that when set resumes the device before
calling the driver's system suspend handlers which effectively disables
the optimization.

Needed by a future patch fixing suspend/resume on i915.

Suggested by Rafael.

Signed-off-by: Imre Deak <imre.deak@intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: stable@vger.kernel.org
---
 drivers/pci/pci.c   | 3 ++-
 include/linux/pci.h | 5 +++++
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index b01bd5bba8e6..563901cd9c06 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -2144,7 +2144,8 @@ bool pci_dev_keep_suspended(struct pci_dev *pci_dev)
 
 	if (!pm_runtime_suspended(dev)
 	    || pci_target_state(pci_dev) != pci_dev->current_state
-	    || platform_pci_need_resume(pci_dev))
+	    || platform_pci_need_resume(pci_dev)
+	    || (pci_dev->dev_flags & PCI_DEV_FLAGS_NEEDS_RESUME))
 		return false;
 
 	/*
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 33c2b0b77429..df7dd9021646 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -183,6 +183,11 @@ enum pci_dev_flags {
 	PCI_DEV_FLAGS_BRIDGE_XLATE_ROOT = (__force pci_dev_flags_t) (1 << 9),
 	/* Do not use FLR even if device advertises PCI_AF_CAP */
 	PCI_DEV_FLAGS_NO_FLR_RESET = (__force pci_dev_flags_t) (1 << 10),
+	/*
+	 * Resume before calling the driver's system suspend hooks, disabling
+	 * the direct_complete optimization.
+	 */
+	PCI_DEV_FLAGS_NEEDS_RESUME = (__force pci_dev_flags_t) (1 << 11),
 };
 
 enum pci_irq_reroute_variant {
-- 
cgit v1.2.3


From 4166a56aa8d5babe979d8e0834a741c9f015ad14 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Sat, 20 May 2017 23:42:50 +0200
Subject: ARM/dmaengine: pl08x: pass reasonable memcpy settings

We cannot use bits from configuration registers as API between
platforms and driver like this, abstract it out to two enums
and mimic the stuff passed as device tree data.

This is done to make it possible for the driver to generate the
ccfg word on-the-fly so we can support more PL08x derivatives.

Acked-by: Olof Johansson <olof@lixom.net>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 arch/arm/mach-lpc32xx/phy3250.c |   3 +
 arch/arm/mach-s3c64xx/pl080.c   |  28 +++------
 arch/arm/mach-spear/spear3xx.c  |  14 ++---
 arch/arm/mach-spear/spear6xx.c  |  14 ++---
 drivers/dma/amba-pl08x.c        | 131 ++++++++++++++++++++++++++++------------
 include/linux/amba/pl08x.h      |  30 +++++++--
 6 files changed, 138 insertions(+), 82 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-lpc32xx/phy3250.c b/arch/arm/mach-lpc32xx/phy3250.c
index 6c52bd32610e..e48cc06c2aec 100644
--- a/arch/arm/mach-lpc32xx/phy3250.c
+++ b/arch/arm/mach-lpc32xx/phy3250.c
@@ -137,6 +137,9 @@ static void pl08x_put_signal(const struct pl08x_channel_data *cd, int ch)
 }
 
 static struct pl08x_platform_data pl08x_pd = {
+	/* Some reasonable memcpy defaults */
+	.memcpy_burst_size = PL08X_BURST_SZ_256,
+	.memcpy_bus_width = PL08X_BUS_WIDTH_32_BITS,
 	.slave_channels = &pl08x_slave_channels[0],
 	.num_slave_channels = ARRAY_SIZE(pl08x_slave_channels),
 	.get_xfer_signal = pl08x_get_signal,
diff --git a/arch/arm/mach-s3c64xx/pl080.c b/arch/arm/mach-s3c64xx/pl080.c
index 261820a855ec..66fc774b70ec 100644
--- a/arch/arm/mach-s3c64xx/pl080.c
+++ b/arch/arm/mach-s3c64xx/pl080.c
@@ -137,16 +137,10 @@ static const struct dma_slave_map s3c64xx_dma0_slave_map[] = {
 };
 
 struct pl08x_platform_data s3c64xx_dma0_plat_data = {
-	.memcpy_channel = {
-		.bus_id = "memcpy",
-		.cctl_memcpy =
-			(PL080_BSIZE_4 << PL080_CONTROL_SB_SIZE_SHIFT |
-			PL080_BSIZE_4 << PL080_CONTROL_DB_SIZE_SHIFT |
-			PL080_WIDTH_32BIT << PL080_CONTROL_SWIDTH_SHIFT |
-			PL080_WIDTH_32BIT << PL080_CONTROL_DWIDTH_SHIFT |
-			PL080_CONTROL_PROT_BUFF | PL080_CONTROL_PROT_CACHE |
-			PL080_CONTROL_PROT_SYS),
-	},
+	.memcpy_burst_size = PL08X_BURST_SZ_4,
+	.memcpy_bus_width = PL08X_BUS_WIDTH_32_BITS,
+	.memcpy_prot_buff = true,
+	.memcpy_prot_cache = true,
 	.lli_buses = PL08X_AHB1,
 	.mem_buses = PL08X_AHB1,
 	.get_xfer_signal = pl08x_get_xfer_signal,
@@ -238,16 +232,10 @@ static const struct dma_slave_map s3c64xx_dma1_slave_map[] = {
 };
 
 struct pl08x_platform_data s3c64xx_dma1_plat_data = {
-	.memcpy_channel = {
-		.bus_id = "memcpy",
-		.cctl_memcpy =
-			(PL080_BSIZE_4 << PL080_CONTROL_SB_SIZE_SHIFT |
-			PL080_BSIZE_4 << PL080_CONTROL_DB_SIZE_SHIFT |
-			PL080_WIDTH_32BIT << PL080_CONTROL_SWIDTH_SHIFT |
-			PL080_WIDTH_32BIT << PL080_CONTROL_DWIDTH_SHIFT |
-			PL080_CONTROL_PROT_BUFF | PL080_CONTROL_PROT_CACHE |
-			PL080_CONTROL_PROT_SYS),
-	},
+	.memcpy_burst_size = PL08X_BURST_SZ_4,
+	.memcpy_bus_width = PL08X_BUS_WIDTH_32_BITS,
+	.memcpy_prot_buff = true,
+	.memcpy_prot_cache = true,
 	.lli_buses = PL08X_AHB1,
 	.mem_buses = PL08X_AHB1,
 	.get_xfer_signal = pl08x_get_xfer_signal,
diff --git a/arch/arm/mach-spear/spear3xx.c b/arch/arm/mach-spear/spear3xx.c
index 23394ac76cf2..8537fcffe5a8 100644
--- a/arch/arm/mach-spear/spear3xx.c
+++ b/arch/arm/mach-spear/spear3xx.c
@@ -44,16 +44,10 @@ struct pl022_ssp_controller pl022_plat_data = {
 
 /* dmac device registration */
 struct pl08x_platform_data pl080_plat_data = {
-	.memcpy_channel = {
-		.bus_id = "memcpy",
-		.cctl_memcpy =
-			(PL080_BSIZE_16 << PL080_CONTROL_SB_SIZE_SHIFT | \
-			PL080_BSIZE_16 << PL080_CONTROL_DB_SIZE_SHIFT | \
-			PL080_WIDTH_32BIT << PL080_CONTROL_SWIDTH_SHIFT | \
-			PL080_WIDTH_32BIT << PL080_CONTROL_DWIDTH_SHIFT | \
-			PL080_CONTROL_PROT_BUFF | PL080_CONTROL_PROT_CACHE | \
-			PL080_CONTROL_PROT_SYS),
-	},
+	.memcpy_burst_size = PL08X_BURST_SZ_16,
+	.memcpy_bus_width = PL08X_BUS_WIDTH_32_BITS,
+	.memcpy_prot_buff = true,
+	.memcpy_prot_cache = true,
 	.lli_buses = PL08X_AHB1,
 	.mem_buses = PL08X_AHB1,
 	.get_xfer_signal = pl080_get_signal,
diff --git a/arch/arm/mach-spear/spear6xx.c b/arch/arm/mach-spear/spear6xx.c
index ccf3573b831c..c5fc110134ba 100644
--- a/arch/arm/mach-spear/spear6xx.c
+++ b/arch/arm/mach-spear/spear6xx.c
@@ -322,16 +322,10 @@ static struct pl08x_channel_data spear600_dma_info[] = {
 };
 
 static struct pl08x_platform_data spear6xx_pl080_plat_data = {
-	.memcpy_channel = {
-		.bus_id = "memcpy",
-		.cctl_memcpy =
-			(PL080_BSIZE_16 << PL080_CONTROL_SB_SIZE_SHIFT | \
-			PL080_BSIZE_16 << PL080_CONTROL_DB_SIZE_SHIFT | \
-			PL080_WIDTH_32BIT << PL080_CONTROL_SWIDTH_SHIFT | \
-			PL080_WIDTH_32BIT << PL080_CONTROL_DWIDTH_SHIFT | \
-			PL080_CONTROL_PROT_BUFF | PL080_CONTROL_PROT_CACHE | \
-			PL080_CONTROL_PROT_SYS),
-	},
+	.memcpy_burst_size = PL08X_BURST_SZ_16,
+	.memcpy_bus_width = PL08X_BUS_WIDTH_32_BITS,
+	.memcpy_prot_buff = true,
+	.memcpy_prot_cache = true,
 	.lli_buses = PL08X_AHB1,
 	.mem_buses = PL08X_AHB1,
 	.get_xfer_signal = pl080_get_signal,
diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c
index 6bb8813ca275..d3d660a36ad7 100644
--- a/drivers/dma/amba-pl08x.c
+++ b/drivers/dma/amba-pl08x.c
@@ -1433,6 +1433,7 @@ static struct dma_async_tx_descriptor *pl08x_prep_dma_memcpy(
 	struct pl08x_driver_data *pl08x = plchan->host;
 	struct pl08x_txd *txd;
 	struct pl08x_sg *dsg;
+	u32 cctl = 0;
 	int ret;
 
 	txd = pl08x_get_txd(plchan);
@@ -1455,15 +1456,83 @@ static struct dma_async_tx_descriptor *pl08x_prep_dma_memcpy(
 
 	/* Set platform data for m2m */
 	txd->ccfg |= PL080_FLOW_MEM2MEM << PL080_CONFIG_FLOW_CONTROL_SHIFT;
-	txd->cctl = pl08x->pd->memcpy_channel.cctl_memcpy &
-			~(PL080_CONTROL_DST_AHB2 | PL080_CONTROL_SRC_AHB2);
+
+	/* Conjure cctl */
+	switch (pl08x->pd->memcpy_burst_size) {
+	default:
+		dev_err(&pl08x->adev->dev,
+			"illegal burst size for memcpy, set to 1\n");
+		/* Fall through */
+	case PL08X_BURST_SZ_1:
+		cctl |= PL080_BSIZE_1 << PL080_CONTROL_SB_SIZE_SHIFT |
+			PL080_BSIZE_1 << PL080_CONTROL_DB_SIZE_SHIFT;
+		break;
+	case PL08X_BURST_SZ_4:
+		cctl |= PL080_BSIZE_4 << PL080_CONTROL_SB_SIZE_SHIFT |
+			PL080_BSIZE_4 << PL080_CONTROL_DB_SIZE_SHIFT;
+		break;
+	case PL08X_BURST_SZ_8:
+		cctl |= PL080_BSIZE_8 << PL080_CONTROL_SB_SIZE_SHIFT |
+			PL080_BSIZE_8 << PL080_CONTROL_DB_SIZE_SHIFT;
+		break;
+	case PL08X_BURST_SZ_16:
+		cctl |= PL080_BSIZE_16 << PL080_CONTROL_SB_SIZE_SHIFT |
+			PL080_BSIZE_16 << PL080_CONTROL_DB_SIZE_SHIFT;
+		break;
+	case PL08X_BURST_SZ_32:
+		cctl |= PL080_BSIZE_32 << PL080_CONTROL_SB_SIZE_SHIFT |
+			PL080_BSIZE_32 << PL080_CONTROL_DB_SIZE_SHIFT;
+		break;
+	case PL08X_BURST_SZ_64:
+		cctl |= PL080_BSIZE_64 << PL080_CONTROL_SB_SIZE_SHIFT |
+			PL080_BSIZE_64 << PL080_CONTROL_DB_SIZE_SHIFT;
+		break;
+	case PL08X_BURST_SZ_128:
+		cctl |= PL080_BSIZE_128 << PL080_CONTROL_SB_SIZE_SHIFT |
+			PL080_BSIZE_128 << PL080_CONTROL_DB_SIZE_SHIFT;
+		break;
+	case PL08X_BURST_SZ_256:
+		cctl |= PL080_BSIZE_256 << PL080_CONTROL_SB_SIZE_SHIFT |
+			PL080_BSIZE_256 << PL080_CONTROL_DB_SIZE_SHIFT;
+		break;
+	}
+
+	switch (pl08x->pd->memcpy_bus_width) {
+	default:
+		dev_err(&pl08x->adev->dev,
+			"illegal bus width for memcpy, set to 8 bits\n");
+		/* Fall through */
+	case PL08X_BUS_WIDTH_8_BITS:
+		cctl |= PL080_WIDTH_8BIT << PL080_CONTROL_SWIDTH_SHIFT |
+			PL080_WIDTH_8BIT << PL080_CONTROL_DWIDTH_SHIFT;
+		break;
+	case PL08X_BUS_WIDTH_16_BITS:
+		cctl |= PL080_WIDTH_16BIT << PL080_CONTROL_SWIDTH_SHIFT |
+			PL080_WIDTH_16BIT << PL080_CONTROL_DWIDTH_SHIFT;
+		break;
+	case PL08X_BUS_WIDTH_32_BITS:
+		cctl |= PL080_WIDTH_32BIT << PL080_CONTROL_SWIDTH_SHIFT |
+			PL080_WIDTH_32BIT << PL080_CONTROL_DWIDTH_SHIFT;
+		break;
+	}
+
+	/* Protection flags */
+	if (pl08x->pd->memcpy_prot_buff)
+		cctl |= PL080_CONTROL_PROT_BUFF;
+	if (pl08x->pd->memcpy_prot_cache)
+		cctl |= PL080_CONTROL_PROT_CACHE;
+
+	/* We are the kernel, so we are in privileged mode */
+	cctl |= PL080_CONTROL_PROT_SYS;
 
 	/* Both to be incremented or the code will break */
-	txd->cctl |= PL080_CONTROL_SRC_INCR | PL080_CONTROL_DST_INCR;
+	cctl |= PL080_CONTROL_SRC_INCR | PL080_CONTROL_DST_INCR;
 
 	if (pl08x->vd->dualmaster)
-		txd->cctl |= pl08x_select_bus(pl08x->mem_buses,
-					      pl08x->mem_buses);
+		cctl |= pl08x_select_bus(pl08x->mem_buses,
+					 pl08x->mem_buses);
+
+	txd->cctl = cctl;
 
 	ret = pl08x_fill_llis_for_desc(plchan->host, txd);
 	if (!ret) {
@@ -1925,9 +1994,16 @@ static int pl08x_dma_init_virtual_channels(struct pl08x_driver_data *pl08x,
 			chan->signal = i;
 			pl08x_dma_slave_init(chan);
 		} else {
-			chan->cd = &pl08x->pd->memcpy_channel;
+			chan->cd = kzalloc(sizeof(*chan->cd), GFP_KERNEL);
+			if (!chan->cd) {
+				kfree(chan);
+				return -ENOMEM;
+			}
+			chan->cd->bus_id = "memcpy";
+			chan->cd->periph_buses = pl08x->pd->mem_buses;
 			chan->name = kasprintf(GFP_KERNEL, "memcpy%d", i);
 			if (!chan->name) {
+				kfree(chan->cd);
 				kfree(chan);
 				return -ENOMEM;
 			}
@@ -2099,7 +2175,6 @@ static int pl08x_of_probe(struct amba_device *adev,
 {
 	struct pl08x_platform_data *pd;
 	struct pl08x_channel_data *chanp = NULL;
-	u32 cctl_memcpy = 0;
 	u32 val;
 	int ret;
 	int i;
@@ -2139,36 +2214,28 @@ static int pl08x_of_probe(struct amba_device *adev,
 		dev_err(&adev->dev, "illegal burst size for memcpy, set to 1\n");
 		/* Fall through */
 	case 1:
-		cctl_memcpy |= PL080_BSIZE_1 << PL080_CONTROL_SB_SIZE_SHIFT |
-			       PL080_BSIZE_1 << PL080_CONTROL_DB_SIZE_SHIFT;
+		pd->memcpy_burst_size = PL08X_BURST_SZ_1;
 		break;
 	case 4:
-		cctl_memcpy |= PL080_BSIZE_4 << PL080_CONTROL_SB_SIZE_SHIFT |
-			       PL080_BSIZE_4 << PL080_CONTROL_DB_SIZE_SHIFT;
+		pd->memcpy_burst_size = PL08X_BURST_SZ_4;
 		break;
 	case 8:
-		cctl_memcpy |= PL080_BSIZE_8 << PL080_CONTROL_SB_SIZE_SHIFT |
-			       PL080_BSIZE_8 << PL080_CONTROL_DB_SIZE_SHIFT;
+		pd->memcpy_burst_size = PL08X_BURST_SZ_8;
 		break;
 	case 16:
-		cctl_memcpy |= PL080_BSIZE_16 << PL080_CONTROL_SB_SIZE_SHIFT |
-			       PL080_BSIZE_16 << PL080_CONTROL_DB_SIZE_SHIFT;
+		pd->memcpy_burst_size = PL08X_BURST_SZ_16;
 		break;
 	case 32:
-		cctl_memcpy |= PL080_BSIZE_32 << PL080_CONTROL_SB_SIZE_SHIFT |
-			       PL080_BSIZE_32 << PL080_CONTROL_DB_SIZE_SHIFT;
+		pd->memcpy_burst_size = PL08X_BURST_SZ_32;
 		break;
 	case 64:
-		cctl_memcpy |= PL080_BSIZE_64 << PL080_CONTROL_SB_SIZE_SHIFT |
-			       PL080_BSIZE_64 << PL080_CONTROL_DB_SIZE_SHIFT;
+		pd->memcpy_burst_size = PL08X_BURST_SZ_64;
 		break;
 	case 128:
-		cctl_memcpy |= PL080_BSIZE_128 << PL080_CONTROL_SB_SIZE_SHIFT |
-			       PL080_BSIZE_128 << PL080_CONTROL_DB_SIZE_SHIFT;
+		pd->memcpy_burst_size = PL08X_BURST_SZ_128;
 		break;
 	case 256:
-		cctl_memcpy |= PL080_BSIZE_256 << PL080_CONTROL_SB_SIZE_SHIFT |
-			       PL080_BSIZE_256 << PL080_CONTROL_DB_SIZE_SHIFT;
+		pd->memcpy_burst_size = PL08X_BURST_SZ_256;
 		break;
 	}
 
@@ -2182,28 +2249,16 @@ static int pl08x_of_probe(struct amba_device *adev,
 		dev_err(&adev->dev, "illegal bus width for memcpy, set to 8 bits\n");
 		/* Fall through */
 	case 8:
-		cctl_memcpy |= PL080_WIDTH_8BIT << PL080_CONTROL_SWIDTH_SHIFT |
-			       PL080_WIDTH_8BIT << PL080_CONTROL_DWIDTH_SHIFT;
+		pd->memcpy_bus_width = PL08X_BUS_WIDTH_8_BITS;
 		break;
 	case 16:
-		cctl_memcpy |= PL080_WIDTH_16BIT << PL080_CONTROL_SWIDTH_SHIFT |
-			       PL080_WIDTH_16BIT << PL080_CONTROL_DWIDTH_SHIFT;
+		pd->memcpy_bus_width = PL08X_BUS_WIDTH_16_BITS;
 		break;
 	case 32:
-		cctl_memcpy |= PL080_WIDTH_32BIT << PL080_CONTROL_SWIDTH_SHIFT |
-			       PL080_WIDTH_32BIT << PL080_CONTROL_DWIDTH_SHIFT;
+		pd->memcpy_bus_width = PL08X_BUS_WIDTH_32_BITS;
 		break;
 	}
 
-	/* This is currently the only thing making sense */
-	cctl_memcpy |= PL080_CONTROL_PROT_SYS;
-
-	/* Set up memcpy channel */
-	pd->memcpy_channel.bus_id = "memcpy";
-	pd->memcpy_channel.cctl_memcpy = cctl_memcpy;
-	/* Use the buses that can access memory, obviously */
-	pd->memcpy_channel.periph_buses = pd->mem_buses;
-
 	/*
 	 * Allocate channel data for all possible slave channels (one
 	 * for each possible signal), channels will then be allocated
diff --git a/include/linux/amba/pl08x.h b/include/linux/amba/pl08x.h
index 5308eae9ce35..79d1bcee738d 100644
--- a/include/linux/amba/pl08x.h
+++ b/include/linux/amba/pl08x.h
@@ -47,8 +47,6 @@ enum {
  * devices with static assignments
  * @muxval: a number usually used to poke into some mux regiser to
  * mux in the signal to this channel
- * @cctl_memcpy: options for the channel control register for memcpy
- *  *** not used for slave channels ***
  * @addr: source/target address in physical memory for this DMA channel,
  * can be the address of a FIFO register for burst requests for example.
  * This can be left undefined if the PrimeCell API is used for configuring
@@ -63,12 +61,28 @@ struct pl08x_channel_data {
 	int min_signal;
 	int max_signal;
 	u32 muxval;
-	u32 cctl_memcpy;
 	dma_addr_t addr;
 	bool single;
 	u8 periph_buses;
 };
 
+enum pl08x_burst_size {
+	PL08X_BURST_SZ_1,
+	PL08X_BURST_SZ_4,
+	PL08X_BURST_SZ_8,
+	PL08X_BURST_SZ_16,
+	PL08X_BURST_SZ_32,
+	PL08X_BURST_SZ_64,
+	PL08X_BURST_SZ_128,
+	PL08X_BURST_SZ_256,
+};
+
+enum pl08x_bus_width {
+	PL08X_BUS_WIDTH_8_BITS,
+	PL08X_BUS_WIDTH_16_BITS,
+	PL08X_BUS_WIDTH_32_BITS,
+};
+
 /**
  * struct pl08x_platform_data - the platform configuration for the PL08x
  * PrimeCells.
@@ -76,6 +90,11 @@ struct pl08x_channel_data {
  * platform, all inclusive, including multiplexed channels. The available
  * physical channels will be multiplexed around these signals as they are
  * requested, just enumerate all possible channels.
+ * @num_slave_channels: number of elements in the slave channel array
+ * @memcpy_burst_size: the appropriate burst size for memcpy operations
+ * @memcpy_bus_width: memory bus width
+ * @memcpy_prot_buff: whether memcpy DMA is bufferable
+ * @memcpy_prot_cache: whether memcpy DMA is cacheable
  * @get_xfer_signal: request a physical signal to be used for a DMA transfer
  * immediately: if there is some multiplexing or similar blocking the use
  * of the channel the transfer can be denied by returning less than zero,
@@ -90,7 +109,10 @@ struct pl08x_channel_data {
 struct pl08x_platform_data {
 	struct pl08x_channel_data *slave_channels;
 	unsigned int num_slave_channels;
-	struct pl08x_channel_data memcpy_channel;
+	enum pl08x_burst_size memcpy_burst_size;
+	enum pl08x_bus_width memcpy_bus_width;
+	bool memcpy_prot_buff;
+	bool memcpy_prot_cache;
 	int (*get_xfer_signal)(const struct pl08x_channel_data *);
 	void (*put_xfer_signal)(const struct pl08x_channel_data *, int);
 	u8 lli_buses;
-- 
cgit v1.2.3


From 1e1cfc7213a37131a53e7dfada75dce77b8e043d Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Sat, 20 May 2017 23:42:53 +0200
Subject: dmaengine: pl08x: Add support for Faraday Technology FTDMAC020

After reading the specs for the Faraday Technology FTDMAC020 found
in the Gemini platform, it becomes pretty evident that this is just
another PL08x derivative, and should be handled like such by simply
extending the existing PL08x driver to handle the quirks in this
hardware.

This patch makes memcpy work and has been tested on the Gemini and
also regression-tested on the Nomadik NHK15 using dmatest with
10 threads per channel without a hinch for hours.

I have not implemented slave DMA in those codepaths, because this
device (Gemini) does not use slave DMA, and it seems like devices
using FTDMAC020 for device DMA have a slightly different register
layout so some real hardware is needed to proceed with this. I
left some FIXME etc in the code for this.

I had to do some refactorings of some helper functions, but I have
not split those into separate patches because these refactorings
do not make much sense without the increased complexity of handling
the FTDMAC020.

The DMA test would hang the platform on me on the Gemini after a
few thousand iterations, however after turning of the caches the
problem immediately disappeared and I could run the DMA engine
with 10 threads pers physical channel for days in a row without
a crash. I think there is no problem with the DMA driver: instead
it is something fishy in the FA526 cache handling code that get
pretty heavily exercised by the DMA engine and we need to go and
fix that instead.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/Kconfig        |   6 +-
 drivers/dma/amba-pl08x.c   | 767 ++++++++++++++++++++++++++++++++++++---------
 include/linux/amba/pl080.h |  83 ++++-
 3 files changed, 698 insertions(+), 158 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 24e8597b2c3e..67ab15aa98c3 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -62,8 +62,10 @@ config AMBA_PL08X
 	select DMA_ENGINE
 	select DMA_VIRTUAL_CHANNELS
 	help
-	  Platform has a PL08x DMAC device
-	  which can provide DMA engine support
+	  Say yes if your platform has a PL08x DMAC device which can
+	  provide DMA engine support. This includes the original ARM
+	  PL080 and PL081, Samsungs PL080 derivative and Faraday
+	  Technology's FTDMAC020 PL080 derivative.
 
 config AMCC_PPC440SPE_ADMA
 	tristate "AMCC PPC440SPe ADMA support"
diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c
index 6ddb029dac50..13cc95c0474c 100644
--- a/drivers/dma/amba-pl08x.c
+++ b/drivers/dma/amba-pl08x.c
@@ -1,9 +1,10 @@
 /*
  * Copyright (c) 2006 ARM Ltd.
  * Copyright (c) 2010 ST-Ericsson SA
+ * Copyirght (c) 2017 Linaro Ltd.
  *
  * Author: Peter Pearse <peter.pearse@arm.com>
- * Author: Linus Walleij <linus.walleij@stericsson.com>
+ * Author: Linus Walleij <linus.walleij@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the Free
@@ -110,11 +111,12 @@ struct pl08x_driver_data;
  * @channels: the number of channels available in this variant
  * @signals: the number of request signals available from the hardware
  * @dualmaster: whether this version supports dual AHB masters or not.
- * @nomadik: whether the channels have Nomadik security extension bits
- *	that need to be checked for permission before use and some registers are
- *	missing
- * @pl080s: whether this version is a PL080S, which has separate register and
- *	LLI word for transfer size.
+ * @nomadik: whether this variant is a ST Microelectronics Nomadik, where the
+ *	channels have Nomadik security extension bits that need to be checked
+ *	for permission before use and some registers are missing
+ * @pl080s: whether this variant is a Samsung PL080S, which has separate
+ *	register and LLI word for transfer size.
+ * @ftdmac020: whether this variant is a Faraday Technology FTDMAC020
  * @max_transfer_size: the maximum single element transfer size for this
  *	PL08x variant.
  */
@@ -125,6 +127,7 @@ struct vendor_data {
 	bool dualmaster;
 	bool nomadik;
 	bool pl080s;
+	bool ftdmac020;
 	u32 max_transfer_size;
 };
 
@@ -148,19 +151,34 @@ struct pl08x_bus_data {
  * @id: physical index to this channel
  * @base: memory base address for this physical channel
  * @reg_config: configuration address for this physical channel
+ * @reg_control: control address for this physical channel
+ * @reg_src: transfer source address register
+ * @reg_dst: transfer destination address register
+ * @reg_lli: transfer LLI address register
+ * @reg_busy: if the variant has a special per-channel busy register,
+ * this contains a pointer to it
  * @lock: a lock to use when altering an instance of this struct
  * @serving: the virtual channel currently being served by this physical
  * channel
  * @locked: channel unavailable for the system, e.g. dedicated to secure
  * world
+ * @ftdmac020: channel is on a FTDMAC020
+ * @pl080s: channel is on a PL08s
  */
 struct pl08x_phy_chan {
 	unsigned int id;
 	void __iomem *base;
 	void __iomem *reg_config;
+	void __iomem *reg_control;
+	void __iomem *reg_src;
+	void __iomem *reg_dst;
+	void __iomem *reg_lli;
+	void __iomem *reg_busy;
 	spinlock_t lock;
 	struct pl08x_dma_chan *serving;
 	bool locked;
+	bool ftdmac020;
+	bool pl080s;
 };
 
 /**
@@ -362,10 +380,24 @@ static int pl08x_phy_channel_busy(struct pl08x_phy_chan *ch)
 {
 	unsigned int val;
 
+	/* If we have a special busy register, take a shortcut */
+	if (ch->reg_busy) {
+		val = readl(ch->reg_busy);
+		return !!(val & BIT(ch->id));
+	}
 	val = readl(ch->reg_config);
 	return val & PL080_CONFIG_ACTIVE;
 }
 
+/*
+ * pl08x_write_lli() - Write an LLI into the DMA controller.
+ *
+ * The PL08x derivatives support linked lists, but the first item of the
+ * list containing the source, destination, control word and next LLI is
+ * ignored. Instead the driver has to write those values directly into the
+ * SRC, DST, LLI and control registers. On FTDMAC020 also the SIZE
+ * register need to be set up for the first transfer.
+ */
 static void pl08x_write_lli(struct pl08x_driver_data *pl08x,
 		struct pl08x_phy_chan *phychan, const u32 *lli, u32 ccfg)
 {
@@ -383,11 +415,112 @@ static void pl08x_write_lli(struct pl08x_driver_data *pl08x,
 			phychan->id, lli[PL080_LLI_SRC], lli[PL080_LLI_DST],
 			lli[PL080_LLI_LLI], lli[PL080_LLI_CCTL], ccfg);
 
-	writel_relaxed(lli[PL080_LLI_SRC], phychan->base + PL080_CH_SRC_ADDR);
-	writel_relaxed(lli[PL080_LLI_DST], phychan->base + PL080_CH_DST_ADDR);
-	writel_relaxed(lli[PL080_LLI_LLI], phychan->base + PL080_CH_LLI);
-	writel_relaxed(lli[PL080_LLI_CCTL], phychan->base + PL080_CH_CONTROL);
+	writel_relaxed(lli[PL080_LLI_SRC], phychan->reg_src);
+	writel_relaxed(lli[PL080_LLI_DST], phychan->reg_dst);
+	writel_relaxed(lli[PL080_LLI_LLI], phychan->reg_lli);
 
+	/*
+	 * The FTMAC020 has a different layout in the CCTL word of the LLI
+	 * and the CCTL register which is split in CSR and SIZE registers.
+	 * Convert the LLI item CCTL into the proper values to write into
+	 * the CSR and SIZE registers.
+	 */
+	if (phychan->ftdmac020) {
+		u32 llictl = lli[PL080_LLI_CCTL];
+		u32 val = 0;
+
+		/* Write the transfer size (12 bits) to the size register */
+		writel_relaxed(llictl & FTDMAC020_LLI_TRANSFER_SIZE_MASK,
+			       phychan->base + FTDMAC020_CH_SIZE);
+		/*
+		 * Then write the control bits 28..16 to the control register
+		 * by shuffleing the bits around to where they are in the
+		 * main register. The mapping is as follows:
+		 * Bit 28: TC_MSK - mask on all except last LLI
+		 * Bit 27..25: SRC_WIDTH
+		 * Bit 24..22: DST_WIDTH
+		 * Bit 21..20: SRCAD_CTRL
+		 * Bit 19..17: DSTAD_CTRL
+		 * Bit 17: SRC_SEL
+		 * Bit 16: DST_SEL
+		 */
+		if (llictl & FTDMAC020_LLI_TC_MSK)
+			val |= FTDMAC020_CH_CSR_TC_MSK;
+		val |= ((llictl  & FTDMAC020_LLI_SRC_WIDTH_MSK) >>
+			(FTDMAC020_LLI_SRC_WIDTH_SHIFT -
+			 FTDMAC020_CH_CSR_SRC_WIDTH_SHIFT));
+		val |= ((llictl  & FTDMAC020_LLI_DST_WIDTH_MSK) >>
+			(FTDMAC020_LLI_DST_WIDTH_SHIFT -
+			 FTDMAC020_CH_CSR_DST_WIDTH_SHIFT));
+		val |= ((llictl  & FTDMAC020_LLI_SRCAD_CTL_MSK) >>
+			(FTDMAC020_LLI_SRCAD_CTL_SHIFT -
+			 FTDMAC020_CH_CSR_SRCAD_CTL_SHIFT));
+		val |= ((llictl  & FTDMAC020_LLI_DSTAD_CTL_MSK) >>
+			(FTDMAC020_LLI_DSTAD_CTL_SHIFT -
+			 FTDMAC020_CH_CSR_DSTAD_CTL_SHIFT));
+		if (llictl & FTDMAC020_LLI_SRC_SEL)
+			val |= FTDMAC020_CH_CSR_SRC_SEL;
+		if (llictl & FTDMAC020_LLI_DST_SEL)
+			val |= FTDMAC020_CH_CSR_DST_SEL;
+
+		/*
+		 * Set up the bits that exist in the CSR but are not
+		 * part the LLI, i.e. only gets written to the control
+		 * register right here.
+		 *
+		 * FIXME: do not just handle memcpy, also handle slave DMA.
+		 */
+		switch (pl08x->pd->memcpy_burst_size) {
+		default:
+		case PL08X_BURST_SZ_1:
+			val |= PL080_BSIZE_1 <<
+				FTDMAC020_CH_CSR_SRC_SIZE_SHIFT;
+			break;
+		case PL08X_BURST_SZ_4:
+			val |= PL080_BSIZE_4 <<
+				FTDMAC020_CH_CSR_SRC_SIZE_SHIFT;
+			break;
+		case PL08X_BURST_SZ_8:
+			val |= PL080_BSIZE_8 <<
+				FTDMAC020_CH_CSR_SRC_SIZE_SHIFT;
+			break;
+		case PL08X_BURST_SZ_16:
+			val |= PL080_BSIZE_16 <<
+				FTDMAC020_CH_CSR_SRC_SIZE_SHIFT;
+			break;
+		case PL08X_BURST_SZ_32:
+			val |= PL080_BSIZE_32 <<
+				FTDMAC020_CH_CSR_SRC_SIZE_SHIFT;
+			break;
+		case PL08X_BURST_SZ_64:
+			val |= PL080_BSIZE_64 <<
+				FTDMAC020_CH_CSR_SRC_SIZE_SHIFT;
+			break;
+		case PL08X_BURST_SZ_128:
+			val |= PL080_BSIZE_128 <<
+				FTDMAC020_CH_CSR_SRC_SIZE_SHIFT;
+			break;
+		case PL08X_BURST_SZ_256:
+			val |= PL080_BSIZE_256 <<
+				FTDMAC020_CH_CSR_SRC_SIZE_SHIFT;
+			break;
+		}
+
+		/* Protection flags */
+		if (pl08x->pd->memcpy_prot_buff)
+			val |= FTDMAC020_CH_CSR_PROT2;
+		if (pl08x->pd->memcpy_prot_cache)
+			val |= FTDMAC020_CH_CSR_PROT3;
+		/* We are the kernel, so we are in privileged mode */
+		val |= FTDMAC020_CH_CSR_PROT1;
+
+		writel_relaxed(val, phychan->reg_control);
+	} else {
+		/* Bits are just identical */
+		writel_relaxed(lli[PL080_LLI_CCTL], phychan->reg_control);
+	}
+
+	/* Second control word on the PL080s */
 	if (pl08x->vd->pl080s)
 		writel_relaxed(lli[PL080S_LLI_CCTL2],
 				phychan->base + PL080S_CH_CONTROL2);
@@ -425,11 +558,25 @@ static void pl08x_start_next_txd(struct pl08x_dma_chan *plchan)
 		cpu_relax();
 
 	/* Do not access config register until channel shows as inactive */
-	val = readl(phychan->reg_config);
-	while ((val & PL080_CONFIG_ACTIVE) || (val & PL080_CONFIG_ENABLE))
+	if (phychan->ftdmac020) {
+		val = readl(phychan->reg_config);
+		while (val & FTDMAC020_CH_CFG_BUSY)
+			val = readl(phychan->reg_config);
+
+		val = readl(phychan->reg_control);
+		while (val & FTDMAC020_CH_CSR_EN)
+			val = readl(phychan->reg_control);
+
+		writel(val | FTDMAC020_CH_CSR_EN,
+		       phychan->reg_control);
+	} else {
 		val = readl(phychan->reg_config);
+		while ((val & PL080_CONFIG_ACTIVE) ||
+		       (val & PL080_CONFIG_ENABLE))
+			val = readl(phychan->reg_config);
 
-	writel(val | PL080_CONFIG_ENABLE, phychan->reg_config);
+		writel(val | PL080_CONFIG_ENABLE, phychan->reg_config);
+	}
 }
 
 /*
@@ -447,6 +594,14 @@ static void pl08x_pause_phy_chan(struct pl08x_phy_chan *ch)
 	u32 val;
 	int timeout;
 
+	if (ch->ftdmac020) {
+		/* Use the enable bit on the FTDMAC020 */
+		val = readl(ch->reg_control);
+		val &= ~FTDMAC020_CH_CSR_EN;
+		writel(val, ch->reg_control);
+		return;
+	}
+
 	/* Set the HALT bit and wait for the FIFO to drain */
 	val = readl(ch->reg_config);
 	val |= PL080_CONFIG_HALT;
@@ -466,6 +621,14 @@ static void pl08x_resume_phy_chan(struct pl08x_phy_chan *ch)
 {
 	u32 val;
 
+	/* Use the enable bit on the FTDMAC020 */
+	if (ch->ftdmac020) {
+		val = readl(ch->reg_control);
+		val |= FTDMAC020_CH_CSR_EN;
+		writel(val, ch->reg_control);
+		return;
+	}
+
 	/* Clear the HALT bit */
 	val = readl(ch->reg_config);
 	val &= ~PL080_CONFIG_HALT;
@@ -481,25 +644,68 @@ static void pl08x_resume_phy_chan(struct pl08x_phy_chan *ch)
 static void pl08x_terminate_phy_chan(struct pl08x_driver_data *pl08x,
 	struct pl08x_phy_chan *ch)
 {
-	u32 val = readl(ch->reg_config);
+	u32 val;
+
+	/* The layout for the FTDMAC020 is different */
+	if (ch->ftdmac020) {
+		/* Disable all interrupts */
+		val = readl(ch->reg_config);
+		val |= (FTDMAC020_CH_CFG_INT_ABT_MASK |
+			FTDMAC020_CH_CFG_INT_ERR_MASK |
+			FTDMAC020_CH_CFG_INT_TC_MASK);
+		writel(val, ch->reg_config);
+
+		/* Abort and disable channel */
+		val = readl(ch->reg_control);
+		val &= ~FTDMAC020_CH_CSR_EN;
+		val |= FTDMAC020_CH_CSR_ABT;
+		writel(val, ch->reg_control);
+
+		/* Clear ABT and ERR interrupt flags */
+		writel(BIT(ch->id) | BIT(ch->id + 16),
+		       pl08x->base + PL080_ERR_CLEAR);
+		writel(BIT(ch->id), pl08x->base + PL080_TC_CLEAR);
 
+		return;
+	}
+
+	val = readl(ch->reg_config);
 	val &= ~(PL080_CONFIG_ENABLE | PL080_CONFIG_ERR_IRQ_MASK |
 		 PL080_CONFIG_TC_IRQ_MASK);
-
 	writel(val, ch->reg_config);
 
 	writel(BIT(ch->id), pl08x->base + PL080_ERR_CLEAR);
 	writel(BIT(ch->id), pl08x->base + PL080_TC_CLEAR);
 }
 
-static inline u32 get_bytes_in_cctl(u32 cctl)
+static u32 get_bytes_in_phy_channel(struct pl08x_phy_chan *ch)
 {
-	/* The source width defines the number of bytes */
-	u32 bytes = cctl & PL080_CONTROL_TRANSFER_SIZE_MASK;
+	u32 val;
+	u32 bytes;
+
+	if (ch->ftdmac020) {
+		bytes = readl(ch->base + FTDMAC020_CH_SIZE);
+
+		val = readl(ch->reg_control);
+		val &= FTDMAC020_CH_CSR_SRC_WIDTH_MSK;
+		val >>= FTDMAC020_CH_CSR_SRC_WIDTH_SHIFT;
+	} else if (ch->pl080s) {
+		val = readl(ch->base + PL080S_CH_CONTROL2);
+		bytes = val & PL080S_CONTROL_TRANSFER_SIZE_MASK;
 
-	cctl &= PL080_CONTROL_SWIDTH_MASK;
+		val = readl(ch->reg_control);
+		val &= PL080_CONTROL_SWIDTH_MASK;
+		val >>= PL080_CONTROL_SWIDTH_SHIFT;
+	} else {
+		/* Plain PL08x */
+		val = readl(ch->reg_control);
+		bytes = val & PL080_CONTROL_TRANSFER_SIZE_MASK;
+
+		val &= PL080_CONTROL_SWIDTH_MASK;
+		val >>= PL080_CONTROL_SWIDTH_SHIFT;
+	}
 
-	switch (cctl >> PL080_CONTROL_SWIDTH_SHIFT) {
+	switch (val) {
 	case PL080_WIDTH_8BIT:
 		break;
 	case PL080_WIDTH_16BIT:
@@ -512,14 +718,35 @@ static inline u32 get_bytes_in_cctl(u32 cctl)
 	return bytes;
 }
 
-static inline u32 get_bytes_in_cctl_pl080s(u32 cctl, u32 cctl1)
+static u32 get_bytes_in_lli(struct pl08x_phy_chan *ch, const u32 *llis_va)
 {
-	/* The source width defines the number of bytes */
-	u32 bytes = cctl1 & PL080S_CONTROL_TRANSFER_SIZE_MASK;
+	u32 val;
+	u32 bytes;
+
+	if (ch->ftdmac020) {
+		val = llis_va[PL080_LLI_CCTL];
+		bytes = val & FTDMAC020_LLI_TRANSFER_SIZE_MASK;
+
+		val = llis_va[PL080_LLI_CCTL];
+		val &= FTDMAC020_LLI_SRC_WIDTH_MSK;
+		val >>= FTDMAC020_LLI_SRC_WIDTH_SHIFT;
+	} else if (ch->pl080s) {
+		val = llis_va[PL080S_LLI_CCTL2];
+		bytes = val & PL080S_CONTROL_TRANSFER_SIZE_MASK;
+
+		val = llis_va[PL080_LLI_CCTL];
+		val &= PL080_CONTROL_SWIDTH_MASK;
+		val >>= PL080_CONTROL_SWIDTH_SHIFT;
+	} else {
+		/* Plain PL08x */
+		val = llis_va[PL080_LLI_CCTL];
+		bytes = val & PL080_CONTROL_TRANSFER_SIZE_MASK;
 
-	cctl &= PL080_CONTROL_SWIDTH_MASK;
+		val &= PL080_CONTROL_SWIDTH_MASK;
+		val >>= PL080_CONTROL_SWIDTH_SHIFT;
+	}
 
-	switch (cctl >> PL080_CONTROL_SWIDTH_SHIFT) {
+	switch (val) {
 	case PL080_WIDTH_8BIT:
 		break;
 	case PL080_WIDTH_16BIT:
@@ -554,15 +781,10 @@ static u32 pl08x_getbytes_chan(struct pl08x_dma_chan *plchan)
 	 * Follow the LLIs to get the number of remaining
 	 * bytes in the currently active transaction.
 	 */
-	clli = readl(ch->base + PL080_CH_LLI) & ~PL080_LLI_LM_AHB2;
+	clli = readl(ch->reg_lli) & ~PL080_LLI_LM_AHB2;
 
 	/* First get the remaining bytes in the active transfer */
-	if (pl08x->vd->pl080s)
-		bytes = get_bytes_in_cctl_pl080s(
-				readl(ch->base + PL080_CH_CONTROL),
-				readl(ch->base + PL080S_CH_CONTROL2));
-	else
-		bytes = get_bytes_in_cctl(readl(ch->base + PL080_CH_CONTROL));
+	bytes = get_bytes_in_phy_channel(ch);
 
 	if (!clli)
 		return bytes;
@@ -583,12 +805,7 @@ static u32 pl08x_getbytes_chan(struct pl08x_dma_chan *plchan)
 	llis_va_limit = llis_va + llis_max_words;
 
 	for (; llis_va < llis_va_limit; llis_va += pl08x->lli_words) {
-		if (pl08x->vd->pl080s)
-			bytes += get_bytes_in_cctl_pl080s(
-						llis_va[PL080_LLI_CCTL],
-						llis_va[PL080S_LLI_CCTL2]);
-		else
-			bytes += get_bytes_in_cctl(llis_va[PL080_LLI_CCTL]);
+		bytes += get_bytes_in_lli(ch, llis_va);
 
 		/*
 		 * A LLI pointer going backward terminates the LLI list
@@ -748,9 +965,30 @@ static void pl08x_phy_free(struct pl08x_dma_chan *plchan)
  * LLI handling
  */
 
-static inline unsigned int pl08x_get_bytes_for_cctl(unsigned int coded)
+static inline unsigned int
+pl08x_get_bytes_for_lli(struct pl08x_driver_data *pl08x,
+			u32 cctl,
+			bool source)
 {
-	switch (coded) {
+	u32 val;
+
+	if (pl08x->vd->ftdmac020) {
+		if (source)
+			val = (cctl & FTDMAC020_LLI_SRC_WIDTH_MSK) >>
+				FTDMAC020_LLI_SRC_WIDTH_SHIFT;
+		else
+			val = (cctl & FTDMAC020_LLI_DST_WIDTH_MSK) >>
+				FTDMAC020_LLI_DST_WIDTH_SHIFT;
+	} else {
+		if (source)
+			val = (cctl & PL080_CONTROL_SWIDTH_MASK) >>
+				PL080_CONTROL_SWIDTH_SHIFT;
+		else
+			val = (cctl & PL080_CONTROL_DWIDTH_MASK) >>
+				PL080_CONTROL_DWIDTH_SHIFT;
+	}
+
+	switch (val) {
 	case PL080_WIDTH_8BIT:
 		return 1;
 	case PL080_WIDTH_16BIT:
@@ -764,49 +1002,106 @@ static inline unsigned int pl08x_get_bytes_for_cctl(unsigned int coded)
 	return 0;
 }
 
-static inline u32 pl08x_cctl_bits(u32 cctl, u8 srcwidth, u8 dstwidth,
-				  size_t tsize)
+static inline u32 pl08x_lli_control_bits(struct pl08x_driver_data *pl08x,
+					 u32 cctl,
+					 u8 srcwidth, u8 dstwidth,
+					 size_t tsize)
 {
 	u32 retbits = cctl;
 
-	/* Remove all src, dst and transfer size bits */
-	retbits &= ~PL080_CONTROL_DWIDTH_MASK;
-	retbits &= ~PL080_CONTROL_SWIDTH_MASK;
-	retbits &= ~PL080_CONTROL_TRANSFER_SIZE_MASK;
+	/*
+	 * Remove all src, dst and transfer size bits, then set the
+	 * width and size according to the parameters. The bit offsets
+	 * are different in the FTDMAC020 so we need to accound for this.
+	 */
+	if (pl08x->vd->ftdmac020) {
+		retbits &= ~FTDMAC020_LLI_DST_WIDTH_MSK;
+		retbits &= ~FTDMAC020_LLI_SRC_WIDTH_MSK;
+		retbits &= ~FTDMAC020_LLI_TRANSFER_SIZE_MASK;
+
+		switch (srcwidth) {
+		case 1:
+			retbits |= PL080_WIDTH_8BIT <<
+				FTDMAC020_LLI_SRC_WIDTH_SHIFT;
+			break;
+		case 2:
+			retbits |= PL080_WIDTH_16BIT <<
+				FTDMAC020_LLI_SRC_WIDTH_SHIFT;
+			break;
+		case 4:
+			retbits |= PL080_WIDTH_32BIT <<
+				FTDMAC020_LLI_SRC_WIDTH_SHIFT;
+			break;
+		default:
+			BUG();
+			break;
+		}
 
-	/* Then set the bits according to the parameters */
-	switch (srcwidth) {
-	case 1:
-		retbits |= PL080_WIDTH_8BIT << PL080_CONTROL_SWIDTH_SHIFT;
-		break;
-	case 2:
-		retbits |= PL080_WIDTH_16BIT << PL080_CONTROL_SWIDTH_SHIFT;
-		break;
-	case 4:
-		retbits |= PL080_WIDTH_32BIT << PL080_CONTROL_SWIDTH_SHIFT;
-		break;
-	default:
-		BUG();
-		break;
-	}
+		switch (dstwidth) {
+		case 1:
+			retbits |= PL080_WIDTH_8BIT <<
+				FTDMAC020_LLI_DST_WIDTH_SHIFT;
+			break;
+		case 2:
+			retbits |= PL080_WIDTH_16BIT <<
+				FTDMAC020_LLI_DST_WIDTH_SHIFT;
+			break;
+		case 4:
+			retbits |= PL080_WIDTH_32BIT <<
+				FTDMAC020_LLI_DST_WIDTH_SHIFT;
+			break;
+		default:
+			BUG();
+			break;
+		}
 
-	switch (dstwidth) {
-	case 1:
-		retbits |= PL080_WIDTH_8BIT << PL080_CONTROL_DWIDTH_SHIFT;
-		break;
-	case 2:
-		retbits |= PL080_WIDTH_16BIT << PL080_CONTROL_DWIDTH_SHIFT;
-		break;
-	case 4:
-		retbits |= PL080_WIDTH_32BIT << PL080_CONTROL_DWIDTH_SHIFT;
-		break;
-	default:
-		BUG();
-		break;
+		tsize &= FTDMAC020_LLI_TRANSFER_SIZE_MASK;
+		retbits |= tsize << FTDMAC020_LLI_TRANSFER_SIZE_SHIFT;
+	} else {
+		retbits &= ~PL080_CONTROL_DWIDTH_MASK;
+		retbits &= ~PL080_CONTROL_SWIDTH_MASK;
+		retbits &= ~PL080_CONTROL_TRANSFER_SIZE_MASK;
+
+		switch (srcwidth) {
+		case 1:
+			retbits |= PL080_WIDTH_8BIT <<
+				PL080_CONTROL_SWIDTH_SHIFT;
+			break;
+		case 2:
+			retbits |= PL080_WIDTH_16BIT <<
+				PL080_CONTROL_SWIDTH_SHIFT;
+			break;
+		case 4:
+			retbits |= PL080_WIDTH_32BIT <<
+				PL080_CONTROL_SWIDTH_SHIFT;
+			break;
+		default:
+			BUG();
+			break;
+		}
+
+		switch (dstwidth) {
+		case 1:
+			retbits |= PL080_WIDTH_8BIT <<
+				PL080_CONTROL_DWIDTH_SHIFT;
+			break;
+		case 2:
+			retbits |= PL080_WIDTH_16BIT <<
+				PL080_CONTROL_DWIDTH_SHIFT;
+			break;
+		case 4:
+			retbits |= PL080_WIDTH_32BIT <<
+				PL080_CONTROL_DWIDTH_SHIFT;
+			break;
+		default:
+			BUG();
+			break;
+		}
+
+		tsize &= PL080_CONTROL_TRANSFER_SIZE_MASK;
+		retbits |= tsize << PL080_CONTROL_TRANSFER_SIZE_SHIFT;
 	}
 
-	tsize &= PL080_CONTROL_TRANSFER_SIZE_MASK;
-	retbits |= tsize << PL080_CONTROL_TRANSFER_SIZE_SHIFT;
 	return retbits;
 }
 
@@ -827,13 +1122,35 @@ struct pl08x_lli_build_data {
  * - prefers the destination bus if both available
  * - prefers bus with fixed address (i.e. peripheral)
  */
-static void pl08x_choose_master_bus(struct pl08x_lli_build_data *bd,
-	struct pl08x_bus_data **mbus, struct pl08x_bus_data **sbus, u32 cctl)
+static void pl08x_choose_master_bus(struct pl08x_driver_data *pl08x,
+				    struct pl08x_lli_build_data *bd,
+				    struct pl08x_bus_data **mbus,
+				    struct pl08x_bus_data **sbus,
+				    u32 cctl)
 {
-	if (!(cctl & PL080_CONTROL_DST_INCR)) {
+	bool dst_incr;
+	bool src_incr;
+
+	/*
+	 * The FTDMAC020 only supports memory-to-memory transfer, so
+	 * source and destination always increase.
+	 */
+	if (pl08x->vd->ftdmac020) {
+		dst_incr = true;
+		src_incr = true;
+	} else {
+		dst_incr = !!(cctl & PL080_CONTROL_DST_INCR);
+		src_incr = !!(cctl & PL080_CONTROL_SRC_INCR);
+	}
+
+	/*
+	 * If either bus is not advancing, i.e. it is a peripheral, that
+	 * one becomes master
+	 */
+	if (!dst_incr) {
 		*mbus = &bd->dstbus;
 		*sbus = &bd->srcbus;
-	} else if (!(cctl & PL080_CONTROL_SRC_INCR)) {
+	} else if (!src_incr) {
 		*mbus = &bd->srcbus;
 		*sbus = &bd->dstbus;
 	} else {
@@ -871,10 +1188,16 @@ static void pl08x_fill_lli_for_desc(struct pl08x_driver_data *pl08x,
 	if (pl08x->vd->pl080s)
 		llis_va[PL080S_LLI_CCTL2] = cctl2;
 
-	if (cctl & PL080_CONTROL_SRC_INCR)
+	if (pl08x->vd->ftdmac020) {
+		/* FIXME: only memcpy so far so both increase */
 		bd->srcbus.addr += len;
-	if (cctl & PL080_CONTROL_DST_INCR)
 		bd->dstbus.addr += len;
+	} else {
+		if (cctl & PL080_CONTROL_SRC_INCR)
+			bd->srcbus.addr += len;
+		if (cctl & PL080_CONTROL_DST_INCR)
+			bd->dstbus.addr += len;
+	}
 
 	BUG_ON(bd->remainder < len);
 
@@ -885,12 +1208,12 @@ static inline void prep_byte_width_lli(struct pl08x_driver_data *pl08x,
 			struct pl08x_lli_build_data *bd, u32 *cctl, u32 len,
 			int num_llis, size_t *total_bytes)
 {
-	*cctl = pl08x_cctl_bits(*cctl, 1, 1, len);
+	*cctl = pl08x_lli_control_bits(pl08x, *cctl, 1, 1, len);
 	pl08x_fill_lli_for_desc(pl08x, bd, num_llis, len, *cctl, len);
 	(*total_bytes) += len;
 }
 
-#ifdef VERBOSE_DEBUG
+#if 1
 static void pl08x_dump_lli(struct pl08x_driver_data *pl08x,
 			   const u32 *llis_va, int num_llis)
 {
@@ -955,14 +1278,10 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x,
 	cctl = txd->cctl;
 
 	/* Find maximum width of the source bus */
-	bd.srcbus.maxwidth =
-		pl08x_get_bytes_for_cctl((cctl & PL080_CONTROL_SWIDTH_MASK) >>
-				       PL080_CONTROL_SWIDTH_SHIFT);
+	bd.srcbus.maxwidth = pl08x_get_bytes_for_lli(pl08x, cctl, true);
 
 	/* Find maximum width of the destination bus */
-	bd.dstbus.maxwidth =
-		pl08x_get_bytes_for_cctl((cctl & PL080_CONTROL_DWIDTH_MASK) >>
-				       PL080_CONTROL_DWIDTH_SHIFT);
+	bd.dstbus.maxwidth = pl08x_get_bytes_for_lli(pl08x, cctl, false);
 
 	list_for_each_entry(dsg, &txd->dsg_list, node) {
 		total_bytes = 0;
@@ -974,7 +1293,7 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x,
 		bd.srcbus.buswidth = bd.srcbus.maxwidth;
 		bd.dstbus.buswidth = bd.dstbus.maxwidth;
 
-		pl08x_choose_master_bus(&bd, &mbus, &sbus, cctl);
+		pl08x_choose_master_bus(pl08x, &bd, &mbus, &sbus, cctl);
 
 		dev_vdbg(&pl08x->adev->dev,
 			"src=0x%08llx%s/%u dst=0x%08llx%s/%u len=%zu\n",
@@ -1011,8 +1330,14 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x,
 		 *   supported. Thus, we can't have scattered addresses.
 		 */
 		if (!bd.remainder) {
-			u32 fc = (txd->ccfg & PL080_CONFIG_FLOW_CONTROL_MASK) >>
-				PL080_CONFIG_FLOW_CONTROL_SHIFT;
+			u32 fc;
+
+			/* FTDMAC020 only does memory-to-memory */
+			if (pl08x->vd->ftdmac020)
+				fc = PL080_FLOW_MEM2MEM;
+			else
+				fc = (txd->ccfg & PL080_CONFIG_FLOW_CONTROL_MASK) >>
+					PL080_CONFIG_FLOW_CONTROL_SHIFT;
 			if (!((fc >= PL080_FLOW_SRC2DST_DST) &&
 					(fc <= PL080_FLOW_SRC2DST_SRC))) {
 				dev_err(&pl08x->adev->dev, "%s sg len can't be zero",
@@ -1029,8 +1354,9 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x,
 				return 0;
 			}
 
-			cctl = pl08x_cctl_bits(cctl, bd.srcbus.buswidth,
-					bd.dstbus.buswidth, 0);
+			cctl = pl08x_lli_control_bits(pl08x, cctl,
+					bd.srcbus.buswidth, bd.dstbus.buswidth,
+					0);
 			pl08x_fill_lli_for_desc(pl08x, &bd, num_llis++,
 					0, cctl, 0);
 			break;
@@ -1109,8 +1435,9 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x,
 					"size 0x%08zx (remainder 0x%08zx)\n",
 					__func__, lli_len, bd.remainder);
 
-				cctl = pl08x_cctl_bits(cctl, bd.srcbus.buswidth,
-					bd.dstbus.buswidth, tsize);
+				cctl = pl08x_lli_control_bits(pl08x, cctl,
+					bd.srcbus.buswidth, bd.dstbus.buswidth,
+					tsize);
 				pl08x_fill_lli_for_desc(pl08x, &bd, num_llis++,
 						lli_len, cctl, tsize);
 				total_bytes += lli_len;
@@ -1153,7 +1480,10 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x,
 		/* The final LLI terminates the LLI. */
 		last_lli[PL080_LLI_LLI] = 0;
 		/* The final LLI element shall also fire an interrupt. */
-		last_lli[PL080_LLI_CCTL] |= PL080_CONTROL_TC_IRQ_EN;
+		if (pl08x->vd->ftdmac020)
+			last_lli[PL080_LLI_CCTL] &= ~FTDMAC020_LLI_TC_MSK;
+		else
+			last_lli[PL080_LLI_CCTL] |= PL080_CONTROL_TC_IRQ_EN;
 	}
 
 	pl08x_dump_lli(pl08x, llis_va, num_llis);
@@ -1319,14 +1649,25 @@ static const struct burst_table burst_sizes[] = {
  * will be routed to each port.  We try to have source and destination
  * on separate ports, but always respect the allowable settings.
  */
-static u32 pl08x_select_bus(u8 src, u8 dst)
+static u32 pl08x_select_bus(bool ftdmac020, u8 src, u8 dst)
 {
 	u32 cctl = 0;
+	u32 dst_ahb2;
+	u32 src_ahb2;
+
+	/* The FTDMAC020 use different bits to indicate src/dst bus */
+	if (ftdmac020) {
+		dst_ahb2 = FTDMAC020_LLI_DST_SEL;
+		src_ahb2 = FTDMAC020_LLI_SRC_SEL;
+	} else {
+		dst_ahb2 = PL080_CONTROL_DST_AHB2;
+		src_ahb2 = PL080_CONTROL_SRC_AHB2;
+	}
 
 	if (!(dst & PL08X_AHB1) || ((dst & PL08X_AHB2) && (src & PL08X_AHB1)))
-		cctl |= PL080_CONTROL_DST_AHB2;
+		cctl |= dst_ahb2;
 	if (!(src & PL08X_AHB1) || ((src & PL08X_AHB2) && !(dst & PL08X_AHB2)))
-		cctl |= PL080_CONTROL_SRC_AHB2;
+		cctl |= src_ahb2;
 
 	return cctl;
 }
@@ -1414,50 +1755,14 @@ static struct pl08x_txd *pl08x_get_txd(struct pl08x_dma_chan *plchan)
 {
 	struct pl08x_txd *txd = kzalloc(sizeof(*txd), GFP_NOWAIT);
 
-	if (txd) {
+	if (txd)
 		INIT_LIST_HEAD(&txd->dsg_list);
-
-		/* Always enable error and terminal interrupts */
-		txd->ccfg = PL080_CONFIG_ERR_IRQ_MASK |
-			    PL080_CONFIG_TC_IRQ_MASK;
-	}
 	return txd;
 }
 
-/*
- * Initialize a descriptor to be used by memcpy submit
- */
-static struct dma_async_tx_descriptor *pl08x_prep_dma_memcpy(
-		struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
-		size_t len, unsigned long flags)
+static u32 pl08x_memcpy_cctl(struct pl08x_driver_data *pl08x)
 {
-	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
-	struct pl08x_driver_data *pl08x = plchan->host;
-	struct pl08x_txd *txd;
-	struct pl08x_sg *dsg;
 	u32 cctl = 0;
-	int ret;
-
-	txd = pl08x_get_txd(plchan);
-	if (!txd) {
-		dev_err(&pl08x->adev->dev,
-			"%s no memory for descriptor\n", __func__);
-		return NULL;
-	}
-
-	dsg = kzalloc(sizeof(struct pl08x_sg), GFP_NOWAIT);
-	if (!dsg) {
-		pl08x_free_txd(pl08x, txd);
-		return NULL;
-	}
-	list_add_tail(&dsg->node, &txd->dsg_list);
-
-	dsg->src_addr = src;
-	dsg->dst_addr = dest;
-	dsg->len = len;
-
-	/* Set platform data for m2m */
-	txd->ccfg |= PL080_FLOW_MEM2MEM << PL080_CONFIG_FLOW_CONTROL_SHIFT;
 
 	/* Conjure cctl */
 	switch (pl08x->pd->memcpy_burst_size) {
@@ -1531,10 +1836,95 @@ static struct dma_async_tx_descriptor *pl08x_prep_dma_memcpy(
 	cctl |= PL080_CONTROL_SRC_INCR | PL080_CONTROL_DST_INCR;
 
 	if (pl08x->vd->dualmaster)
-		cctl |= pl08x_select_bus(pl08x->mem_buses,
+		cctl |= pl08x_select_bus(false,
+					 pl08x->mem_buses,
+					 pl08x->mem_buses);
+
+	return cctl;
+}
+
+static u32 pl08x_ftdmac020_memcpy_cctl(struct pl08x_driver_data *pl08x)
+{
+	u32 cctl = 0;
+
+	/* Conjure cctl */
+	switch (pl08x->pd->memcpy_bus_width) {
+	default:
+		dev_err(&pl08x->adev->dev,
+			"illegal bus width for memcpy, set to 8 bits\n");
+		/* Fall through */
+	case PL08X_BUS_WIDTH_8_BITS:
+		cctl |= PL080_WIDTH_8BIT << FTDMAC020_LLI_SRC_WIDTH_SHIFT |
+			PL080_WIDTH_8BIT << FTDMAC020_LLI_DST_WIDTH_SHIFT;
+		break;
+	case PL08X_BUS_WIDTH_16_BITS:
+		cctl |= PL080_WIDTH_16BIT << FTDMAC020_LLI_SRC_WIDTH_SHIFT |
+			PL080_WIDTH_16BIT << FTDMAC020_LLI_DST_WIDTH_SHIFT;
+		break;
+	case PL08X_BUS_WIDTH_32_BITS:
+		cctl |= PL080_WIDTH_32BIT << FTDMAC020_LLI_SRC_WIDTH_SHIFT |
+			PL080_WIDTH_32BIT << FTDMAC020_LLI_DST_WIDTH_SHIFT;
+		break;
+	}
+
+	/*
+	 * By default mask the TC IRQ on all LLIs, it will be unmasked on
+	 * the last LLI item by other code.
+	 */
+	cctl |= FTDMAC020_LLI_TC_MSK;
+
+	/*
+	 * Both to be incremented so leave bits FTDMAC020_LLI_SRCAD_CTL
+	 * and FTDMAC020_LLI_DSTAD_CTL as zero
+	 */
+	if (pl08x->vd->dualmaster)
+		cctl |= pl08x_select_bus(true,
+					 pl08x->mem_buses,
 					 pl08x->mem_buses);
 
-	txd->cctl = cctl;
+	return cctl;
+}
+
+/*
+ * Initialize a descriptor to be used by memcpy submit
+ */
+static struct dma_async_tx_descriptor *pl08x_prep_dma_memcpy(
+		struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+		size_t len, unsigned long flags)
+{
+	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+	struct pl08x_driver_data *pl08x = plchan->host;
+	struct pl08x_txd *txd;
+	struct pl08x_sg *dsg;
+	int ret;
+
+	txd = pl08x_get_txd(plchan);
+	if (!txd) {
+		dev_err(&pl08x->adev->dev,
+			"%s no memory for descriptor\n", __func__);
+		return NULL;
+	}
+
+	dsg = kzalloc(sizeof(struct pl08x_sg), GFP_NOWAIT);
+	if (!dsg) {
+		pl08x_free_txd(pl08x, txd);
+		return NULL;
+	}
+	list_add_tail(&dsg->node, &txd->dsg_list);
+
+	dsg->src_addr = src;
+	dsg->dst_addr = dest;
+	dsg->len = len;
+	if (pl08x->vd->ftdmac020) {
+		/* Writing CCFG zero ENABLES all interrupts */
+		txd->ccfg = 0;
+		txd->cctl = pl08x_ftdmac020_memcpy_cctl(pl08x);
+	} else {
+		txd->ccfg = PL080_CONFIG_ERR_IRQ_MASK |
+			PL080_CONFIG_TC_IRQ_MASK |
+			PL080_FLOW_MEM2MEM << PL080_CONFIG_FLOW_CONTROL_SHIFT;
+		txd->cctl = pl08x_memcpy_cctl(pl08x);
+	}
 
 	ret = pl08x_fill_llis_for_desc(plchan->host, txd);
 	if (!ret) {
@@ -1598,7 +1988,7 @@ static struct pl08x_txd *pl08x_init_txd(
 		return NULL;
 	}
 
-	txd->cctl = cctl | pl08x_select_bus(src_buses, dst_buses);
+	txd->cctl = cctl | pl08x_select_bus(false, src_buses, dst_buses);
 
 	if (plchan->cfg.device_fc)
 		tmp = (direction == DMA_MEM_TO_DEV) ? PL080_FLOW_MEM2PER_PER :
@@ -1607,7 +1997,9 @@ static struct pl08x_txd *pl08x_init_txd(
 		tmp = (direction == DMA_MEM_TO_DEV) ? PL080_FLOW_MEM2PER :
 			PL080_FLOW_PER2MEM;
 
-	txd->ccfg |= tmp << PL080_CONFIG_FLOW_CONTROL_SHIFT;
+	txd->ccfg = PL080_CONFIG_ERR_IRQ_MASK |
+		PL080_CONFIG_TC_IRQ_MASK |
+		tmp << PL080_CONFIG_FLOW_CONTROL_SHIFT;
 
 	ret = pl08x_request_mux(plchan);
 	if (ret < 0) {
@@ -1884,6 +2276,11 @@ static void pl08x_ensure_on(struct pl08x_driver_data *pl08x)
 	/* The Nomadik variant does not have the config register */
 	if (pl08x->vd->nomadik)
 		return;
+	/* The FTDMAC020 variant does this in another register */
+	if (pl08x->vd->ftdmac020) {
+		writel(PL080_CONFIG_ENABLE, pl08x->base + FTDMAC020_CSR);
+		return;
+	}
 	writel(PL080_CONFIG_ENABLE, pl08x->base + PL080_CONFIG);
 }
 
@@ -2310,7 +2707,7 @@ static inline int pl08x_of_probe(struct amba_device *adev,
 static int pl08x_probe(struct amba_device *adev, const struct amba_id *id)
 {
 	struct pl08x_driver_data *pl08x;
-	const struct vendor_data *vd = id->data;
+	struct vendor_data *vd = id->data;
 	struct device_node *np = adev->dev.of_node;
 	u32 tsfr_size;
 	int ret = 0;
@@ -2336,6 +2733,34 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id)
 	pl08x->adev = adev;
 	pl08x->vd = vd;
 
+	pl08x->base = ioremap(adev->res.start, resource_size(&adev->res));
+	if (!pl08x->base) {
+		ret = -ENOMEM;
+		goto out_no_ioremap;
+	}
+
+	if (vd->ftdmac020) {
+		u32 val;
+
+		val = readl(pl08x->base + FTDMAC020_REVISION);
+		dev_info(&pl08x->adev->dev, "FTDMAC020 %d.%d rel %d\n",
+			 (val >> 16) & 0xff, (val >> 8) & 0xff, val & 0xff);
+		val = readl(pl08x->base + FTDMAC020_FEATURE);
+		dev_info(&pl08x->adev->dev, "FTDMAC020 %d channels, "
+			 "%s built-in bridge, %s, %s linked lists\n",
+			 (val >> 12) & 0x0f,
+			 (val & BIT(10)) ? "no" : "has",
+			 (val & BIT(9)) ? "AHB0 and AHB1" : "AHB0",
+			 (val & BIT(8)) ? "supports" : "does not support");
+
+		/* Vendor data from feature register */
+		if (!(val & BIT(8)))
+			dev_warn(&pl08x->adev->dev,
+				 "linked lists not supported, required\n");
+		vd->channels = (val >> 12) & 0x0f;
+		vd->dualmaster = !!(val & BIT(9));
+	}
+
 	/* Initialize memcpy engine */
 	dma_cap_set(DMA_MEMCPY, pl08x->memcpy.cap_mask);
 	pl08x->memcpy.dev = &adev->dev;
@@ -2352,6 +2777,9 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id)
 	pl08x->memcpy.dst_addr_widths = PL80X_DMA_BUSWIDTHS;
 	pl08x->memcpy.directions = BIT(DMA_MEM_TO_MEM);
 	pl08x->memcpy.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT;
+	if (vd->ftdmac020)
+		pl08x->memcpy.copy_align = DMAENGINE_ALIGN_4_BYTES;
+
 
 	/*
 	 * Initialize slave engine, if the block has no signals, that means
@@ -2422,19 +2850,18 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id)
 		goto out_no_lli_pool;
 	}
 
-	pl08x->base = ioremap(adev->res.start, resource_size(&adev->res));
-	if (!pl08x->base) {
-		ret = -ENOMEM;
-		goto out_no_ioremap;
-	}
-
 	/* Turn on the PL08x */
 	pl08x_ensure_on(pl08x);
 
-	/* Attach the interrupt handler */
-	writel(0x000000FF, pl08x->base + PL080_ERR_CLEAR);
+	/* Clear any pending interrupts */
+	if (vd->ftdmac020)
+		/* This variant has error IRQs in bits 16-19 */
+		writel(0x0000FFFF, pl08x->base + PL080_ERR_CLEAR);
+	else
+		writel(0x000000FF, pl08x->base + PL080_ERR_CLEAR);
 	writel(0x000000FF, pl08x->base + PL080_TC_CLEAR);
 
+	/* Attach the interrupt handler */
 	ret = request_irq(adev->irq[0], pl08x_irq, 0, DRIVER_NAME, pl08x);
 	if (ret) {
 		dev_err(&adev->dev, "%s failed to request interrupt %d\n",
@@ -2455,7 +2882,25 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id)
 
 		ch->id = i;
 		ch->base = pl08x->base + PL080_Cx_BASE(i);
-		ch->reg_config = ch->base + vd->config_offset;
+		if (vd->ftdmac020) {
+			/* FTDMA020 has a special channel busy register */
+			ch->reg_busy = ch->base + FTDMAC020_CH_BUSY;
+			ch->reg_config = ch->base + FTDMAC020_CH_CFG;
+			ch->reg_control = ch->base + FTDMAC020_CH_CSR;
+			ch->reg_src = ch->base + FTDMAC020_CH_SRC_ADDR;
+			ch->reg_dst = ch->base + FTDMAC020_CH_DST_ADDR;
+			ch->reg_lli = ch->base + FTDMAC020_CH_LLP;
+			ch->ftdmac020 = true;
+		} else {
+			ch->reg_config = ch->base + vd->config_offset;
+			ch->reg_control = ch->base + PL080_CH_CONTROL;
+			ch->reg_src = ch->base + PL080_CH_SRC_ADDR;
+			ch->reg_dst = ch->base + PL080_CH_DST_ADDR;
+			ch->reg_lli = ch->base + PL080_CH_LLI;
+		}
+		if (vd->pl080s)
+			ch->pl080s = true;
+
 		spin_lock_init(&ch->lock);
 
 		/*
@@ -2537,11 +2982,11 @@ out_no_memcpy:
 out_no_phychans:
 	free_irq(adev->irq[0], pl08x);
 out_no_irq:
-	iounmap(pl08x->base);
-out_no_ioremap:
 	dma_pool_destroy(pl08x->pool);
 out_no_lli_pool:
 out_no_platdata:
+	iounmap(pl08x->base);
+out_no_ioremap:
 	kfree(pl08x);
 out_no_pl08x:
 	amba_release_regions(adev);
@@ -2582,6 +3027,12 @@ static struct vendor_data vendor_pl081 = {
 	.max_transfer_size = PL080_CONTROL_TRANSFER_SIZE_MASK,
 };
 
+static struct vendor_data vendor_ftdmac020 = {
+	.config_offset = PL080_CH_CONFIG,
+	.ftdmac020 = true,
+	.max_transfer_size = PL080_CONTROL_TRANSFER_SIZE_MASK,
+};
+
 static struct amba_id pl08x_ids[] = {
 	/* Samsung PL080S variant */
 	{
@@ -2607,6 +3058,12 @@ static struct amba_id pl08x_ids[] = {
 		.mask	= 0x00ffffff,
 		.data	= &vendor_nomadik,
 	},
+	/* Faraday Technology FTDMAC020 */
+	{
+		.id	= 0x0003b080,
+		.mask	= 0x000fffff,
+		.data	= &vendor_ftdmac020,
+	},
 	{ 0, 0 },
 };
 
diff --git a/include/linux/amba/pl080.h b/include/linux/amba/pl080.h
index 580b5323a717..10124c9f9db5 100644
--- a/include/linux/amba/pl080.h
+++ b/include/linux/amba/pl080.h
@@ -44,7 +44,14 @@
 
 #define PL080_SYNC				(0x34)
 
-/* Per channel configuration registers */
+/* The Faraday Technology FTDMAC020 variant registers */
+#define FTDMAC020_CH_BUSY			(0x20)
+/* Identical to PL080_CONFIG */
+#define FTDMAC020_CSR				(0x24)
+/* Identical to PL080_SYNC */
+#define FTDMAC020_SYNC				(0x2C)
+#define FTDMAC020_REVISION			(0x30)
+#define FTDMAC020_FEATURE			(0x34)
 
 /* Per channel configuration registers */
 #define PL080_Cx_BASE(x)			((0x100 + (x * 0x20)))
@@ -55,6 +62,13 @@
 #define PL080_CH_CONFIG				(0x10)
 #define PL080S_CH_CONTROL2			(0x10)
 #define PL080S_CH_CONFIG			(0x14)
+/* The Faraday FTDMAC020 derivative shuffles the registers around */
+#define FTDMAC020_CH_CSR			(0x00)
+#define FTDMAC020_CH_CFG			(0x04)
+#define FTDMAC020_CH_SRC_ADDR			(0x08)
+#define FTDMAC020_CH_DST_ADDR			(0x0C)
+#define FTDMAC020_CH_LLP			(0x10)
+#define FTDMAC020_CH_SIZE			(0x14)
 
 #define PL080_LLI_ADDR_MASK			(0x3fffffff << 2)
 #define PL080_LLI_ADDR_SHIFT			(2)
@@ -119,6 +133,73 @@
 #define PL080_FLOW_PER2MEM_PER			(0x6)
 #define PL080_FLOW_SRC2DST_SRC			(0x7)
 
+#define FTDMAC020_CH_CSR_TC_MSK			BIT(31)
+/* Later versions have a threshold in bits 24..26,  */
+#define FTDMAC020_CH_CSR_FIFOTH_MSK		(0x7 << 24)
+#define FTDMAC020_CH_CSR_FIFOTH_SHIFT		(24)
+#define FTDMAC020_CH_CSR_CHPR1_MSK		(0x3 << 22)
+#define FTDMAC020_CH_CSR_PROT3			BIT(21)
+#define FTDMAC020_CH_CSR_PROT2			BIT(20)
+#define FTDMAC020_CH_CSR_PROT1			BIT(19)
+#define FTDMAC020_CH_CSR_SRC_SIZE_MSK		(0x7 << 16)
+#define FTDMAC020_CH_CSR_SRC_SIZE_SHIFT		(16)
+#define FTDMAC020_CH_CSR_ABT			BIT(15)
+#define FTDMAC020_CH_CSR_SRC_WIDTH_MSK		(0x7 << 11)
+#define FTDMAC020_CH_CSR_SRC_WIDTH_SHIFT	(11)
+#define FTDMAC020_CH_CSR_DST_WIDTH_MSK		(0x7 << 8)
+#define FTDMAC020_CH_CSR_DST_WIDTH_SHIFT	(8)
+#define FTDMAC020_CH_CSR_MODE			BIT(7)
+/* 00 = increase, 01 = decrease, 10 = fix */
+#define FTDMAC020_CH_CSR_SRCAD_CTL_MSK		(0x3 << 5)
+#define FTDMAC020_CH_CSR_SRCAD_CTL_SHIFT	(5)
+#define FTDMAC020_CH_CSR_DSTAD_CTL_MSK		(0x3 << 3)
+#define FTDMAC020_CH_CSR_DSTAD_CTL_SHIFT	(3)
+#define FTDMAC020_CH_CSR_SRC_SEL		BIT(2)
+#define FTDMAC020_CH_CSR_DST_SEL		BIT(1)
+#define FTDMAC020_CH_CSR_EN			BIT(0)
+
+/* FIFO threshold setting */
+#define FTDMAC020_CH_CSR_FIFOTH_1		(0x0)
+#define FTDMAC020_CH_CSR_FIFOTH_2		(0x1)
+#define FTDMAC020_CH_CSR_FIFOTH_4		(0x2)
+#define FTDMAC020_CH_CSR_FIFOTH_8		(0x3)
+#define FTDMAC020_CH_CSR_FIFOTH_16		(0x4)
+/* The FTDMAC020 supports 64bit wide transfers */
+#define FTDMAC020_WIDTH_64BIT			(0x3)
+/* Address can be increased, decreased or fixed */
+#define FTDMAC020_CH_CSR_SRCAD_CTL_INC		(0x0)
+#define FTDMAC020_CH_CSR_SRCAD_CTL_DEC		(0x1)
+#define FTDMAC020_CH_CSR_SRCAD_CTL_FIXED	(0x2)
+
+#define FTDMAC020_CH_CFG_LLP_CNT_MASK		(0xf << 16)
+#define FTDMAC020_CH_CFG_LLP_CNT_SHIFT		(16)
+#define FTDMAC020_CH_CFG_BUSY			BIT(8)
+#define FTDMAC020_CH_CFG_INT_ABT_MASK		BIT(2)
+#define FTDMAC020_CH_CFG_INT_ERR_MASK		BIT(1)
+#define FTDMAC020_CH_CFG_INT_TC_MASK		BIT(0)
+
+/* Inside the LLIs, the applicable CSR fields are mapped differently */
+#define FTDMAC020_LLI_TC_MSK			BIT(28)
+#define FTDMAC020_LLI_SRC_WIDTH_MSK		(0x7 << 25)
+#define FTDMAC020_LLI_SRC_WIDTH_SHIFT		(25)
+#define FTDMAC020_LLI_DST_WIDTH_MSK		(0x7 << 22)
+#define FTDMAC020_LLI_DST_WIDTH_SHIFT		(22)
+#define FTDMAC020_LLI_SRCAD_CTL_MSK		(0x3 << 20)
+#define FTDMAC020_LLI_SRCAD_CTL_SHIFT		(20)
+#define FTDMAC020_LLI_DSTAD_CTL_MSK		(0x3 << 18)
+#define FTDMAC020_LLI_DSTAD_CTL_SHIFT		(18)
+#define FTDMAC020_LLI_SRC_SEL			BIT(17)
+#define FTDMAC020_LLI_DST_SEL			BIT(16)
+#define FTDMAC020_LLI_TRANSFER_SIZE_MASK	(0xfff << 0)
+#define FTDMAC020_LLI_TRANSFER_SIZE_SHIFT	(0)
+
+#define FTDMAC020_CFG_LLP_CNT_MASK		(0x0f << 16)
+#define FTDMAC020_CFG_LLP_CNT_SHIFT		(16)
+#define FTDMAC020_CFG_BUSY			BIT(8)
+#define FTDMAC020_CFG_INT_ABT_MSK		BIT(2)
+#define FTDMAC020_CFG_INT_ERR_MSK		BIT(1)
+#define FTDMAC020_CFG_INT_TC_MSK		BIT(0)
+
 /* DMA linked list chain structure */
 
 struct pl080_lli {
-- 
cgit v1.2.3


From fcc785417fba2dc81d2f6ba888caaff463f4f441 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Sat, 20 May 2017 23:42:54 +0200
Subject: dmaengine: pl08x: use GENMASK() to create bitmasks

This switches the arbitrary shifting of hex constants in
the pl080 header to use GENMASK().

Suggested-by: Vinod Koul <vinod.koul@intel.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 include/linux/amba/pl080.h | 50 +++++++++++++++++++++++-----------------------
 1 file changed, 25 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/amba/pl080.h b/include/linux/amba/pl080.h
index 10124c9f9db5..ab036b6b1804 100644
--- a/include/linux/amba/pl080.h
+++ b/include/linux/amba/pl080.h
@@ -70,12 +70,12 @@
 #define FTDMAC020_CH_LLP			(0x10)
 #define FTDMAC020_CH_SIZE			(0x14)
 
-#define PL080_LLI_ADDR_MASK			(0x3fffffff << 2)
+#define PL080_LLI_ADDR_MASK			GENMASK(31, 2)
 #define PL080_LLI_ADDR_SHIFT			(2)
 #define PL080_LLI_LM_AHB2			BIT(0)
 
 #define PL080_CONTROL_TC_IRQ_EN			BIT(31)
-#define PL080_CONTROL_PROT_MASK			(0x7 << 28)
+#define PL080_CONTROL_PROT_MASK			GENMASK(30, 28)
 #define PL080_CONTROL_PROT_SHIFT		(28)
 #define PL080_CONTROL_PROT_CACHE		BIT(30)
 #define PL080_CONTROL_PROT_BUFF			BIT(29)
@@ -84,16 +84,16 @@
 #define PL080_CONTROL_SRC_INCR			BIT(26)
 #define PL080_CONTROL_DST_AHB2			BIT(25)
 #define PL080_CONTROL_SRC_AHB2			BIT(24)
-#define PL080_CONTROL_DWIDTH_MASK		(0x7 << 21)
+#define PL080_CONTROL_DWIDTH_MASK		GENMASK(23, 21)
 #define PL080_CONTROL_DWIDTH_SHIFT		(21)
-#define PL080_CONTROL_SWIDTH_MASK		(0x7 << 18)
+#define PL080_CONTROL_SWIDTH_MASK		GENMASK(20, 18)
 #define PL080_CONTROL_SWIDTH_SHIFT		(18)
-#define PL080_CONTROL_DB_SIZE_MASK		(0x7 << 15)
+#define PL080_CONTROL_DB_SIZE_MASK		GENMASK(17, 15)
 #define PL080_CONTROL_DB_SIZE_SHIFT		(15)
-#define PL080_CONTROL_SB_SIZE_MASK		(0x7 << 12)
+#define PL080_CONTROL_SB_SIZE_MASK		GENMASK(14, 12)
 #define PL080_CONTROL_SB_SIZE_SHIFT		(12)
-#define PL080_CONTROL_TRANSFER_SIZE_MASK	(0xfff << 0)
-#define PL080S_CONTROL_TRANSFER_SIZE_MASK	(0x1ffffff << 0)
+#define PL080_CONTROL_TRANSFER_SIZE_MASK	GENMASK(11, 0)
+#define PL080S_CONTROL_TRANSFER_SIZE_MASK	GENMASK(24, 0)
 #define PL080_CONTROL_TRANSFER_SIZE_SHIFT	(0)
 
 #define PL080_BSIZE_1				(0x0)
@@ -116,11 +116,11 @@
 #define PL080_CONFIG_LOCK			BIT(16)
 #define PL080_CONFIG_TC_IRQ_MASK		BIT(15)
 #define PL080_CONFIG_ERR_IRQ_MASK		BIT(14)
-#define PL080_CONFIG_FLOW_CONTROL_MASK		(0x7 << 11)
+#define PL080_CONFIG_FLOW_CONTROL_MASK		GENMASK(13, 11)
 #define PL080_CONFIG_FLOW_CONTROL_SHIFT		(11)
-#define PL080_CONFIG_DST_SEL_MASK		(0xf << 6)
+#define PL080_CONFIG_DST_SEL_MASK		GENMASK(9, 6)
 #define PL080_CONFIG_DST_SEL_SHIFT		(6)
-#define PL080_CONFIG_SRC_SEL_MASK		(0xf << 1)
+#define PL080_CONFIG_SRC_SEL_MASK		GENMASK(4, 1)
 #define PL080_CONFIG_SRC_SEL_SHIFT		(1)
 #define PL080_CONFIG_ENABLE			BIT(0)
 
@@ -135,24 +135,24 @@
 
 #define FTDMAC020_CH_CSR_TC_MSK			BIT(31)
 /* Later versions have a threshold in bits 24..26,  */
-#define FTDMAC020_CH_CSR_FIFOTH_MSK		(0x7 << 24)
+#define FTDMAC020_CH_CSR_FIFOTH_MSK		GENMASK(26, 24)
 #define FTDMAC020_CH_CSR_FIFOTH_SHIFT		(24)
-#define FTDMAC020_CH_CSR_CHPR1_MSK		(0x3 << 22)
+#define FTDMAC020_CH_CSR_CHPR1_MSK		GENMASK(23, 22)
 #define FTDMAC020_CH_CSR_PROT3			BIT(21)
 #define FTDMAC020_CH_CSR_PROT2			BIT(20)
 #define FTDMAC020_CH_CSR_PROT1			BIT(19)
-#define FTDMAC020_CH_CSR_SRC_SIZE_MSK		(0x7 << 16)
+#define FTDMAC020_CH_CSR_SRC_SIZE_MSK		GENMASK(18, 16)
 #define FTDMAC020_CH_CSR_SRC_SIZE_SHIFT		(16)
 #define FTDMAC020_CH_CSR_ABT			BIT(15)
-#define FTDMAC020_CH_CSR_SRC_WIDTH_MSK		(0x7 << 11)
+#define FTDMAC020_CH_CSR_SRC_WIDTH_MSK		GENMASK(13, 11)
 #define FTDMAC020_CH_CSR_SRC_WIDTH_SHIFT	(11)
-#define FTDMAC020_CH_CSR_DST_WIDTH_MSK		(0x7 << 8)
+#define FTDMAC020_CH_CSR_DST_WIDTH_MSK		GENMASK(10, 8)
 #define FTDMAC020_CH_CSR_DST_WIDTH_SHIFT	(8)
 #define FTDMAC020_CH_CSR_MODE			BIT(7)
 /* 00 = increase, 01 = decrease, 10 = fix */
-#define FTDMAC020_CH_CSR_SRCAD_CTL_MSK		(0x3 << 5)
+#define FTDMAC020_CH_CSR_SRCAD_CTL_MSK		GENMASK(6, 5)
 #define FTDMAC020_CH_CSR_SRCAD_CTL_SHIFT	(5)
-#define FTDMAC020_CH_CSR_DSTAD_CTL_MSK		(0x3 << 3)
+#define FTDMAC020_CH_CSR_DSTAD_CTL_MSK		GENMASK(4, 3)
 #define FTDMAC020_CH_CSR_DSTAD_CTL_SHIFT	(3)
 #define FTDMAC020_CH_CSR_SRC_SEL		BIT(2)
 #define FTDMAC020_CH_CSR_DST_SEL		BIT(1)
@@ -171,7 +171,7 @@
 #define FTDMAC020_CH_CSR_SRCAD_CTL_DEC		(0x1)
 #define FTDMAC020_CH_CSR_SRCAD_CTL_FIXED	(0x2)
 
-#define FTDMAC020_CH_CFG_LLP_CNT_MASK		(0xf << 16)
+#define FTDMAC020_CH_CFG_LLP_CNT_MASK		GENMASK(19, 16)
 #define FTDMAC020_CH_CFG_LLP_CNT_SHIFT		(16)
 #define FTDMAC020_CH_CFG_BUSY			BIT(8)
 #define FTDMAC020_CH_CFG_INT_ABT_MASK		BIT(2)
@@ -180,20 +180,20 @@
 
 /* Inside the LLIs, the applicable CSR fields are mapped differently */
 #define FTDMAC020_LLI_TC_MSK			BIT(28)
-#define FTDMAC020_LLI_SRC_WIDTH_MSK		(0x7 << 25)
+#define FTDMAC020_LLI_SRC_WIDTH_MSK		GENMASK(27, 25)
 #define FTDMAC020_LLI_SRC_WIDTH_SHIFT		(25)
-#define FTDMAC020_LLI_DST_WIDTH_MSK		(0x7 << 22)
+#define FTDMAC020_LLI_DST_WIDTH_MSK		GENMASK(24, 22)
 #define FTDMAC020_LLI_DST_WIDTH_SHIFT		(22)
-#define FTDMAC020_LLI_SRCAD_CTL_MSK		(0x3 << 20)
+#define FTDMAC020_LLI_SRCAD_CTL_MSK		GENMASK(21, 20)
 #define FTDMAC020_LLI_SRCAD_CTL_SHIFT		(20)
-#define FTDMAC020_LLI_DSTAD_CTL_MSK		(0x3 << 18)
+#define FTDMAC020_LLI_DSTAD_CTL_MSK		GENMASK(19, 18)
 #define FTDMAC020_LLI_DSTAD_CTL_SHIFT		(18)
 #define FTDMAC020_LLI_SRC_SEL			BIT(17)
 #define FTDMAC020_LLI_DST_SEL			BIT(16)
-#define FTDMAC020_LLI_TRANSFER_SIZE_MASK	(0xfff << 0)
+#define FTDMAC020_LLI_TRANSFER_SIZE_MASK	GENMASK(11, 0)
 #define FTDMAC020_LLI_TRANSFER_SIZE_SHIFT	(0)
 
-#define FTDMAC020_CFG_LLP_CNT_MASK		(0x0f << 16)
+#define FTDMAC020_CFG_LLP_CNT_MASK		GENMASK(19, 16)
 #define FTDMAC020_CFG_LLP_CNT_SHIFT		(16)
 #define FTDMAC020_CFG_BUSY			BIT(8)
 #define FTDMAC020_CFG_INT_ABT_MSK		BIT(2)
-- 
cgit v1.2.3


From 28232a4317be7ad615f0f1b69dc8583fd580a8e3 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <cdall@linaro.org>
Date: Sat, 20 May 2017 14:12:34 +0200
Subject: KVM: arm/arm64: Fix isues with GICv2 on GICv3 migration

We have been a little loose with our intermediate VMCR representation
where we had a 'ctlr' field, but we failed to differentiate between the
GICv2 GICC_CTLR and ICC_CTLR_EL1 layouts, and therefore ended up mapping
the wrong bits into the individual fields of the ICH_VMCR_EL2 when
emulating a GICv2 on a GICv3 system.

Fix this by using explicit fields for the VMCR bits instead.

Cc: Eric Auger <eric.auger@redhat.com>
Reported-by: wanghaibin <wanghaibin.wang@huawei.com>
Signed-off-by: Christoffer Dall <cdall@linaro.org>
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Tested-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm64/kvm/vgic-sys-reg-v3.c   | 10 ++++----
 include/linux/irqchip/arm-gic-v3.h |  4 ++++
 include/linux/irqchip/arm-gic.h    | 28 ++++++++++++++++++++---
 virt/kvm/arm/vgic/vgic-mmio-v2.c   | 16 +++++++++++--
 virt/kvm/arm/vgic/vgic-v2.c        | 28 ++++++++++++++++++++---
 virt/kvm/arm/vgic/vgic-v3.c        | 47 ++++++++++++++++++++++++++------------
 virt/kvm/arm/vgic/vgic.h           | 12 ++++++----
 7 files changed, 114 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm64/kvm/vgic-sys-reg-v3.c b/arch/arm64/kvm/vgic-sys-reg-v3.c
index 79f37e37d367..6260b69e5622 100644
--- a/arch/arm64/kvm/vgic-sys-reg-v3.c
+++ b/arch/arm64/kvm/vgic-sys-reg-v3.c
@@ -65,8 +65,8 @@ static bool access_gic_ctlr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 		 * Here set VMCR.CTLR in ICC_CTLR_EL1 layout.
 		 * The vgic_set_vmcr() will convert to ICH_VMCR layout.
 		 */
-		vmcr.ctlr = val & ICC_CTLR_EL1_CBPR_MASK;
-		vmcr.ctlr |= val & ICC_CTLR_EL1_EOImode_MASK;
+		vmcr.cbpr = (val & ICC_CTLR_EL1_CBPR_MASK) >> ICC_CTLR_EL1_CBPR_SHIFT;
+		vmcr.eoim = (val & ICC_CTLR_EL1_EOImode_MASK) >> ICC_CTLR_EL1_EOImode_SHIFT;
 		vgic_set_vmcr(vcpu, &vmcr);
 	} else {
 		val = 0;
@@ -83,8 +83,8 @@ static bool access_gic_ctlr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 		 * The VMCR.CTLR value is in ICC_CTLR_EL1 layout.
 		 * Extract it directly using ICC_CTLR_EL1 reg definitions.
 		 */
-		val |= vmcr.ctlr & ICC_CTLR_EL1_CBPR_MASK;
-		val |= vmcr.ctlr & ICC_CTLR_EL1_EOImode_MASK;
+		val |= (vmcr.cbpr << ICC_CTLR_EL1_CBPR_SHIFT) & ICC_CTLR_EL1_CBPR_MASK;
+		val |= (vmcr.eoim << ICC_CTLR_EL1_EOImode_SHIFT) & ICC_CTLR_EL1_EOImode_MASK;
 
 		p->regval = val;
 	}
@@ -135,7 +135,7 @@ static bool access_gic_bpr1(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 		p->regval = 0;
 
 	vgic_get_vmcr(vcpu, &vmcr);
-	if (!((vmcr.ctlr & ICH_VMCR_CBPR_MASK) >> ICH_VMCR_CBPR_SHIFT)) {
+	if (!vmcr.cbpr) {
 		if (p->is_write) {
 			vmcr.abpr = (p->regval & ICC_BPR1_EL1_MASK) >>
 				     ICC_BPR1_EL1_SHIFT;
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index fffb91202bc9..1fa293a37f4a 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -417,6 +417,10 @@
 #define ICH_HCR_EN			(1 << 0)
 #define ICH_HCR_UIE			(1 << 1)
 
+#define ICH_VMCR_ACK_CTL_SHIFT		2
+#define ICH_VMCR_ACK_CTL_MASK		(1 << ICH_VMCR_ACK_CTL_SHIFT)
+#define ICH_VMCR_FIQ_EN_SHIFT		3
+#define ICH_VMCR_FIQ_EN_MASK		(1 << ICH_VMCR_FIQ_EN_SHIFT)
 #define ICH_VMCR_CBPR_SHIFT		4
 #define ICH_VMCR_CBPR_MASK		(1 << ICH_VMCR_CBPR_SHIFT)
 #define ICH_VMCR_EOIM_SHIFT		9
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index dc30f3d057eb..d3453ee072fc 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -25,7 +25,18 @@
 #define GICC_ENABLE			0x1
 #define GICC_INT_PRI_THRESHOLD		0xf0
 
-#define GIC_CPU_CTRL_EOImodeNS		(1 << 9)
+#define GIC_CPU_CTRL_EnableGrp0_SHIFT	0
+#define GIC_CPU_CTRL_EnableGrp0		(1 << GIC_CPU_CTRL_EnableGrp0_SHIFT)
+#define GIC_CPU_CTRL_EnableGrp1_SHIFT	1
+#define GIC_CPU_CTRL_EnableGrp1		(1 << GIC_CPU_CTRL_EnableGrp1_SHIFT)
+#define GIC_CPU_CTRL_AckCtl_SHIFT	2
+#define GIC_CPU_CTRL_AckCtl		(1 << GIC_CPU_CTRL_AckCtl_SHIFT)
+#define GIC_CPU_CTRL_FIQEn_SHIFT	3
+#define GIC_CPU_CTRL_FIQEn		(1 << GIC_CPU_CTRL_FIQEn_SHIFT)
+#define GIC_CPU_CTRL_CBPR_SHIFT		4
+#define GIC_CPU_CTRL_CBPR		(1 << GIC_CPU_CTRL_CBPR_SHIFT)
+#define GIC_CPU_CTRL_EOImodeNS_SHIFT	9
+#define GIC_CPU_CTRL_EOImodeNS		(1 << GIC_CPU_CTRL_EOImodeNS_SHIFT)
 
 #define GICC_IAR_INT_ID_MASK		0x3ff
 #define GICC_INT_SPURIOUS		1023
@@ -84,8 +95,19 @@
 #define GICH_LR_EOI			(1 << 19)
 #define GICH_LR_HW			(1 << 31)
 
-#define GICH_VMCR_CTRL_SHIFT		0
-#define GICH_VMCR_CTRL_MASK		(0x21f << GICH_VMCR_CTRL_SHIFT)
+#define GICH_VMCR_ENABLE_GRP0_SHIFT	0
+#define GICH_VMCR_ENABLE_GRP0_MASK	(1 << GICH_VMCR_ENABLE_GRP0_SHIFT)
+#define GICH_VMCR_ENABLE_GRP1_SHIFT	1
+#define GICH_VMCR_ENABLE_GRP1_MASK	(1 << GICH_VMCR_ENABLE_GRP1_SHIFT)
+#define GICH_VMCR_ACK_CTL_SHIFT		2
+#define GICH_VMCR_ACK_CTL_MASK		(1 << GICH_VMCR_ACK_CTL_SHIFT)
+#define GICH_VMCR_FIQ_EN_SHIFT		3
+#define GICH_VMCR_FIQ_EN_MASK		(1 << GICH_VMCR_FIQ_EN_SHIFT)
+#define GICH_VMCR_CBPR_SHIFT		4
+#define GICH_VMCR_CBPR_MASK		(1 << GICH_VMCR_CBPR_SHIFT)
+#define GICH_VMCR_EOI_MODE_SHIFT	9
+#define GICH_VMCR_EOI_MODE_MASK		(1 << GICH_VMCR_EOI_MODE_SHIFT)
+
 #define GICH_VMCR_PRIMASK_SHIFT		27
 #define GICH_VMCR_PRIMASK_MASK		(0x1f << GICH_VMCR_PRIMASK_SHIFT)
 #define GICH_VMCR_BINPOINT_SHIFT	21
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c
index 0a4283ed9aa7..63e0bbdcddcc 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v2.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c
@@ -226,7 +226,13 @@ static unsigned long vgic_mmio_read_vcpuif(struct kvm_vcpu *vcpu,
 
 	switch (addr & 0xff) {
 	case GIC_CPU_CTRL:
-		val = vmcr.ctlr;
+		val = vmcr.grpen0 << GIC_CPU_CTRL_EnableGrp0_SHIFT;
+		val |= vmcr.grpen1 << GIC_CPU_CTRL_EnableGrp1_SHIFT;
+		val |= vmcr.ackctl << GIC_CPU_CTRL_AckCtl_SHIFT;
+		val |= vmcr.fiqen << GIC_CPU_CTRL_FIQEn_SHIFT;
+		val |= vmcr.cbpr << GIC_CPU_CTRL_CBPR_SHIFT;
+		val |= vmcr.eoim << GIC_CPU_CTRL_EOImodeNS_SHIFT;
+
 		break;
 	case GIC_CPU_PRIMASK:
 		/*
@@ -267,7 +273,13 @@ static void vgic_mmio_write_vcpuif(struct kvm_vcpu *vcpu,
 
 	switch (addr & 0xff) {
 	case GIC_CPU_CTRL:
-		vmcr.ctlr = val;
+		vmcr.grpen0 = !!(val & GIC_CPU_CTRL_EnableGrp0);
+		vmcr.grpen1 = !!(val & GIC_CPU_CTRL_EnableGrp1);
+		vmcr.ackctl = !!(val & GIC_CPU_CTRL_AckCtl);
+		vmcr.fiqen = !!(val & GIC_CPU_CTRL_FIQEn);
+		vmcr.cbpr = !!(val & GIC_CPU_CTRL_CBPR);
+		vmcr.eoim = !!(val & GIC_CPU_CTRL_EOImodeNS);
+
 		break;
 	case GIC_CPU_PRIMASK:
 		/*
diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c
index 504b4bd0d651..e4187e52bb26 100644
--- a/virt/kvm/arm/vgic/vgic-v2.c
+++ b/virt/kvm/arm/vgic/vgic-v2.c
@@ -177,7 +177,18 @@ void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
 	struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
 	u32 vmcr;
 
-	vmcr  = (vmcrp->ctlr << GICH_VMCR_CTRL_SHIFT) & GICH_VMCR_CTRL_MASK;
+	vmcr = (vmcrp->grpen0 << GICH_VMCR_ENABLE_GRP0_SHIFT) &
+		GICH_VMCR_ENABLE_GRP0_MASK;
+	vmcr |= (vmcrp->grpen1 << GICH_VMCR_ENABLE_GRP1_SHIFT) &
+		GICH_VMCR_ENABLE_GRP1_MASK;
+	vmcr |= (vmcrp->ackctl << GICH_VMCR_ACK_CTL_SHIFT) &
+		GICH_VMCR_ACK_CTL_MASK;
+	vmcr |= (vmcrp->fiqen << GICH_VMCR_FIQ_EN_SHIFT) &
+		GICH_VMCR_FIQ_EN_MASK;
+	vmcr |= (vmcrp->cbpr << GICH_VMCR_CBPR_SHIFT) &
+		GICH_VMCR_CBPR_MASK;
+	vmcr |= (vmcrp->eoim << GICH_VMCR_EOI_MODE_SHIFT) &
+		GICH_VMCR_EOI_MODE_MASK;
 	vmcr |= (vmcrp->abpr << GICH_VMCR_ALIAS_BINPOINT_SHIFT) &
 		GICH_VMCR_ALIAS_BINPOINT_MASK;
 	vmcr |= (vmcrp->bpr << GICH_VMCR_BINPOINT_SHIFT) &
@@ -195,8 +206,19 @@ void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
 
 	vmcr = cpu_if->vgic_vmcr;
 
-	vmcrp->ctlr = (vmcr & GICH_VMCR_CTRL_MASK) >>
-			GICH_VMCR_CTRL_SHIFT;
+	vmcrp->grpen0 = (vmcr & GICH_VMCR_ENABLE_GRP0_MASK) >>
+		GICH_VMCR_ENABLE_GRP0_SHIFT;
+	vmcrp->grpen1 = (vmcr & GICH_VMCR_ENABLE_GRP1_MASK) >>
+		GICH_VMCR_ENABLE_GRP1_SHIFT;
+	vmcrp->ackctl = (vmcr & GICH_VMCR_ACK_CTL_MASK) >>
+		GICH_VMCR_ACK_CTL_SHIFT;
+	vmcrp->fiqen = (vmcr & GICH_VMCR_FIQ_EN_MASK) >>
+		GICH_VMCR_FIQ_EN_SHIFT;
+	vmcrp->cbpr = (vmcr & GICH_VMCR_CBPR_MASK) >>
+		GICH_VMCR_CBPR_SHIFT;
+	vmcrp->eoim = (vmcr & GICH_VMCR_EOI_MODE_MASK) >>
+		GICH_VMCR_EOI_MODE_SHIFT;
+
 	vmcrp->abpr = (vmcr & GICH_VMCR_ALIAS_BINPOINT_MASK) >>
 			GICH_VMCR_ALIAS_BINPOINT_SHIFT;
 	vmcrp->bpr  = (vmcr & GICH_VMCR_BINPOINT_MASK) >>
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
index 6fe3f003636a..030248e669f6 100644
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -159,15 +159,24 @@ void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr)
 void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
 {
 	struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
+	u32 model = vcpu->kvm->arch.vgic.vgic_model;
 	u32 vmcr;
 
-	/*
-	 * Ignore the FIQen bit, because GIC emulation always implies
-	 * SRE=1 which means the vFIQEn bit is also RES1.
-	 */
-	vmcr = ((vmcrp->ctlr >> ICC_CTLR_EL1_EOImode_SHIFT) <<
-		 ICH_VMCR_EOIM_SHIFT) & ICH_VMCR_EOIM_MASK;
-	vmcr |= (vmcrp->ctlr << ICH_VMCR_CBPR_SHIFT) & ICH_VMCR_CBPR_MASK;
+	if (model == KVM_DEV_TYPE_ARM_VGIC_V2) {
+		vmcr = (vmcrp->ackctl << ICH_VMCR_ACK_CTL_SHIFT) &
+			ICH_VMCR_ACK_CTL_MASK;
+		vmcr |= (vmcrp->fiqen << ICH_VMCR_FIQ_EN_SHIFT) &
+			ICH_VMCR_FIQ_EN_MASK;
+	} else {
+		/*
+		 * When emulating GICv3 on GICv3 with SRE=1 on the
+		 * VFIQEn bit is RES1 and the VAckCtl bit is RES0.
+		 */
+		vmcr = ICH_VMCR_FIQ_EN_MASK;
+	}
+
+	vmcr |= (vmcrp->cbpr << ICH_VMCR_CBPR_SHIFT) & ICH_VMCR_CBPR_MASK;
+	vmcr |= (vmcrp->eoim << ICH_VMCR_EOIM_SHIFT) & ICH_VMCR_EOIM_MASK;
 	vmcr |= (vmcrp->abpr << ICH_VMCR_BPR1_SHIFT) & ICH_VMCR_BPR1_MASK;
 	vmcr |= (vmcrp->bpr << ICH_VMCR_BPR0_SHIFT) & ICH_VMCR_BPR0_MASK;
 	vmcr |= (vmcrp->pmr << ICH_VMCR_PMR_SHIFT) & ICH_VMCR_PMR_MASK;
@@ -180,17 +189,27 @@ void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
 void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
 {
 	struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
+	u32 model = vcpu->kvm->arch.vgic.vgic_model;
 	u32 vmcr;
 
 	vmcr = cpu_if->vgic_vmcr;
 
-	/*
-	 * Ignore the FIQen bit, because GIC emulation always implies
-	 * SRE=1 which means the vFIQEn bit is also RES1.
-	 */
-	vmcrp->ctlr = ((vmcr >> ICH_VMCR_EOIM_SHIFT) <<
-			ICC_CTLR_EL1_EOImode_SHIFT) & ICC_CTLR_EL1_EOImode_MASK;
-	vmcrp->ctlr |= (vmcr & ICH_VMCR_CBPR_MASK) >> ICH_VMCR_CBPR_SHIFT;
+	if (model == KVM_DEV_TYPE_ARM_VGIC_V2) {
+		vmcrp->ackctl = (vmcr & ICH_VMCR_ACK_CTL_MASK) >>
+			ICH_VMCR_ACK_CTL_SHIFT;
+		vmcrp->fiqen = (vmcr & ICH_VMCR_FIQ_EN_MASK) >>
+			ICH_VMCR_FIQ_EN_SHIFT;
+	} else {
+		/*
+		 * When emulating GICv3 on GICv3 with SRE=1 on the
+		 * VFIQEn bit is RES1 and the VAckCtl bit is RES0.
+		 */
+		vmcrp->fiqen = 1;
+		vmcrp->ackctl = 0;
+	}
+
+	vmcrp->cbpr = (vmcr & ICH_VMCR_CBPR_MASK) >> ICH_VMCR_CBPR_SHIFT;
+	vmcrp->eoim = (vmcr & ICH_VMCR_EOIM_MASK) >> ICH_VMCR_EOIM_SHIFT;
 	vmcrp->abpr = (vmcr & ICH_VMCR_BPR1_MASK) >> ICH_VMCR_BPR1_SHIFT;
 	vmcrp->bpr  = (vmcr & ICH_VMCR_BPR0_MASK) >> ICH_VMCR_BPR0_SHIFT;
 	vmcrp->pmr  = (vmcr & ICH_VMCR_PMR_MASK) >> ICH_VMCR_PMR_SHIFT;
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
index da83e4caa272..bba7fa22a7f7 100644
--- a/virt/kvm/arm/vgic/vgic.h
+++ b/virt/kvm/arm/vgic/vgic.h
@@ -111,14 +111,18 @@ static inline bool irq_is_pending(struct vgic_irq *irq)
  * registers regardless of the hardware backed GIC used.
  */
 struct vgic_vmcr {
-	u32	ctlr;
+	u32	grpen0;
+	u32	grpen1;
+
+	u32	ackctl;
+	u32	fiqen;
+	u32	cbpr;
+	u32	eoim;
+
 	u32	abpr;
 	u32	bpr;
 	u32	pmr;  /* Priority mask field in the GICC_PMR and
 		       * ICC_PMR_EL1 priority field format */
-	/* Below member variable are valid only for GICv3 */
-	u32	grpen0;
-	u32	grpen1;
 };
 
 struct vgic_reg_attr {
-- 
cgit v1.2.3


From e73ad5ff2f76da25390e9607cb549691639330c3 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Mon, 22 May 2017 15:30:03 -0700
Subject: mm, x86/mm: Make the batched unmap TLB flush API more generic

try_to_unmap_flush() used to open-code a rather x86-centric flush
sequence: local_flush_tlb() + flush_tlb_others().  Rearrange the
code so that the arch (only x86 for now) provides
arch_tlbbatch_add_mm() and arch_tlbbatch_flush() and the core code
calls those functions instead.

I'll want this for x86 because, to enable address space ids, I can't
support the flush_tlb_others() mode used by exising
try_to_unmap_flush() implementation with good performance.  I can
support the new API fairly easily, though.

I imagine that other architectures may be in a similar position.
Architectures with strong remote flush primitives (arm64?) may have
even worse performance problems with flush_tlb_others() the way that
try_to_unmap_flush() uses it.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Borislav Petkov <bpetkov@suse.de>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Nadav Amit <nadav.amit@gmail.com>
Cc: Nadav Amit <namit@vmware.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Sasha Levin <sasha.levin@oracle.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-mm@kvack.org
Link: http://lkml.kernel.org/r/19f25a8581f9fb77876b7ff3b001f89835e34ea3.1495492063.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/tlbbatch.h | 16 ++++++++++++++++
 arch/x86/include/asm/tlbflush.h |  8 ++++++++
 arch/x86/mm/tlb.c               | 17 +++++++++++++++++
 include/linux/mm_types_task.h   | 15 +++++++++++----
 mm/rmap.c                       | 16 ++--------------
 5 files changed, 54 insertions(+), 18 deletions(-)
 create mode 100644 arch/x86/include/asm/tlbbatch.h

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/tlbbatch.h b/arch/x86/include/asm/tlbbatch.h
new file mode 100644
index 000000000000..01a6de16fb96
--- /dev/null
+++ b/arch/x86/include/asm/tlbbatch.h
@@ -0,0 +1,16 @@
+#ifndef _ARCH_X86_TLBBATCH_H
+#define _ARCH_X86_TLBBATCH_H
+
+#include <linux/cpumask.h>
+
+#ifdef CONFIG_SMP
+struct arch_tlbflush_unmap_batch {
+	/*
+	 * Each bit set is a CPU that potentially has a TLB entry for one of
+	 * the PFNs being flushed..
+	 */
+	struct cpumask cpumask;
+};
+#endif
+
+#endif /* _ARCH_X86_TLBBATCH_H */
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index b9db0f8fef55..8f6e2f87511b 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -329,6 +329,14 @@ static inline void reset_lazy_tlbstate(void)
 	this_cpu_write(cpu_tlbstate.active_mm, &init_mm);
 }
 
+static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch,
+					struct mm_struct *mm)
+{
+	cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm));
+}
+
+extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch);
+
 #endif	/* SMP */
 
 #ifndef CONFIG_PARAVIRT
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 4d303864b310..743e4c6b4529 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -395,6 +395,23 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
 	}
 }
 
+void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
+{
+	int cpu = get_cpu();
+
+	if (cpumask_test_cpu(cpu, &batch->cpumask)) {
+		count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
+		local_flush_tlb();
+		trace_tlb_flush(TLB_LOCAL_SHOOTDOWN, TLB_FLUSH_ALL);
+	}
+
+	if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids)
+		flush_tlb_others(&batch->cpumask, NULL, 0, TLB_FLUSH_ALL);
+	cpumask_clear(&batch->cpumask);
+
+	put_cpu();
+}
+
 static ssize_t tlbflush_read_file(struct file *file, char __user *user_buf,
 			     size_t count, loff_t *ppos)
 {
diff --git a/include/linux/mm_types_task.h b/include/linux/mm_types_task.h
index 136dfdf63ba1..fc412fbd80bd 100644
--- a/include/linux/mm_types_task.h
+++ b/include/linux/mm_types_task.h
@@ -14,6 +14,10 @@
 
 #include <asm/page.h>
 
+#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
+#include <asm/tlbbatch.h>
+#endif
+
 #define USE_SPLIT_PTE_PTLOCKS	(NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS)
 #define USE_SPLIT_PMD_PTLOCKS	(USE_SPLIT_PTE_PTLOCKS && \
 		IS_ENABLED(CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK))
@@ -67,12 +71,15 @@ struct page_frag {
 struct tlbflush_unmap_batch {
 #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
 	/*
-	 * Each bit set is a CPU that potentially has a TLB entry for one of
-	 * the PFNs being flushed. See set_tlb_ubc_flush_pending().
+	 * The arch code makes the following promise: generic code can modify a
+	 * PTE, then call arch_tlbbatch_add_mm() (which internally provides all
+	 * needed barriers), then call arch_tlbbatch_flush(), and the entries
+	 * will be flushed on all CPUs by the time that arch_tlbbatch_flush()
+	 * returns.
 	 */
-	struct cpumask cpumask;
+	struct arch_tlbflush_unmap_batch arch;
 
-	/* True if any bit in cpumask is set */
+	/* True if a flush is needed. */
 	bool flush_required;
 
 	/*
diff --git a/mm/rmap.c b/mm/rmap.c
index d405f0e0ee96..130c238fe384 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -579,25 +579,13 @@ void page_unlock_anon_vma_read(struct anon_vma *anon_vma)
 void try_to_unmap_flush(void)
 {
 	struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc;
-	int cpu;
 
 	if (!tlb_ubc->flush_required)
 		return;
 
-	cpu = get_cpu();
-
-	if (cpumask_test_cpu(cpu, &tlb_ubc->cpumask)) {
-		count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
-		local_flush_tlb();
-		trace_tlb_flush(TLB_LOCAL_SHOOTDOWN, TLB_FLUSH_ALL);
-	}
-
-	if (cpumask_any_but(&tlb_ubc->cpumask, cpu) < nr_cpu_ids)
-		flush_tlb_others(&tlb_ubc->cpumask, NULL, 0, TLB_FLUSH_ALL);
-	cpumask_clear(&tlb_ubc->cpumask);
+	arch_tlbbatch_flush(&tlb_ubc->arch);
 	tlb_ubc->flush_required = false;
 	tlb_ubc->writable = false;
-	put_cpu();
 }
 
 /* Flush iff there are potentially writable TLB entries that can race with IO */
@@ -613,7 +601,7 @@ static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable)
 {
 	struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc;
 
-	cpumask_or(&tlb_ubc->cpumask, &tlb_ubc->cpumask, mm_cpumask(mm));
+	arch_tlbbatch_add_mm(&tlb_ubc->arch, mm);
 	tlb_ubc->flush_required = true;
 
 	/*
-- 
cgit v1.2.3


From 71ebc9a3795818eab52e81bbcbdfae130ee35d9e Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 16 May 2017 12:10:42 +0100
Subject: dma-buf/sync-file: Defer creation of sync_file->name

Constructing the name takes the majority of the time for allocating a
sync_file to wrap a fence, and the name is very rarely used (only via
the sync_file status user interface). To reduce the impact on the common
path (that of creating sync_file to pass around), defer the construction
of the name until it is first used.

v2: Update kerneldoc (kbuild test robot)
v3: sync_debug.c was peeking at the name
v4: Comment upon the potential race between two users of
sync_file_get_name() and claim that such a race is below the level of
notice. However, to prevent any future nuisance, use a global spinlock
to serialize the assignment of the name.
v5: Completely avoid the read/write race by only storing the name passed
in from the user inside sync_file->user_name and passing in a buffer to
dynamically construct the name otherwise.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Sumit Semwal <sumit.semwal@linaro.org>
Cc: Gustavo Padovan <gustavo@padovan.org>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: David Herrmann <dh.herrmann@gmail.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170516111042.24719-1-chris@chris-wilson.co.uk
---
 drivers/dma-buf/sync_debug.c |  4 +++-
 drivers/dma-buf/sync_file.c  | 39 ++++++++++++++++++++++++++++++++-------
 include/linux/sync_file.h    | 11 +++++++++--
 3 files changed, 44 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/dma-buf/sync_debug.c b/drivers/dma-buf/sync_debug.c
index a0d780ab68c3..82a6e7f6d37f 100644
--- a/drivers/dma-buf/sync_debug.c
+++ b/drivers/dma-buf/sync_debug.c
@@ -132,9 +132,11 @@ static void sync_print_obj(struct seq_file *s, struct sync_timeline *obj)
 static void sync_print_sync_file(struct seq_file *s,
 				  struct sync_file *sync_file)
 {
+	char buf[128];
 	int i;
 
-	seq_printf(s, "[%p] %s: %s\n", sync_file, sync_file->name,
+	seq_printf(s, "[%p] %s: %s\n", sync_file,
+		   sync_file_get_name(sync_file, buf, sizeof(buf)),
 		   sync_status_str(dma_fence_get_status(sync_file->fence)));
 
 	if (dma_fence_is_array(sync_file->fence)) {
diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c
index dc89b1d484e8..545e2c5c4815 100644
--- a/drivers/dma-buf/sync_file.c
+++ b/drivers/dma-buf/sync_file.c
@@ -80,11 +80,6 @@ struct sync_file *sync_file_create(struct dma_fence *fence)
 
 	sync_file->fence = dma_fence_get(fence);
 
-	snprintf(sync_file->name, sizeof(sync_file->name), "%s-%s%llu-%d",
-		 fence->ops->get_driver_name(fence),
-		 fence->ops->get_timeline_name(fence), fence->context,
-		 fence->seqno);
-
 	return sync_file;
 }
 EXPORT_SYMBOL(sync_file_create);
@@ -129,6 +124,36 @@ struct dma_fence *sync_file_get_fence(int fd)
 }
 EXPORT_SYMBOL(sync_file_get_fence);
 
+/**
+ * sync_file_get_name - get the name of the sync_file
+ * @sync_file:		sync_file to get the fence from
+ * @buf:		destination buffer to copy sync_file name into
+ * @len:		available size of destination buffer.
+ *
+ * Each sync_file may have a name assigned either by the user (when merging
+ * sync_files together) or created from the fence it contains. In the latter
+ * case construction of the name is deferred until use, and so requires
+ * sync_file_get_name().
+ *
+ * Returns: a string representing the name.
+ */
+char *sync_file_get_name(struct sync_file *sync_file, char *buf, int len)
+{
+	if (sync_file->user_name[0]) {
+		strlcpy(buf, sync_file->user_name, len);
+	} else {
+		struct dma_fence *fence = sync_file->fence;
+
+		snprintf(buf, len, "%s-%s%llu-%d",
+			 fence->ops->get_driver_name(fence),
+			 fence->ops->get_timeline_name(fence),
+			 fence->context,
+			 fence->seqno);
+	}
+
+	return buf;
+}
+
 static int sync_file_set_fence(struct sync_file *sync_file,
 			       struct dma_fence **fences, int num_fences)
 {
@@ -266,7 +291,7 @@ static struct sync_file *sync_file_merge(const char *name, struct sync_file *a,
 		goto err;
 	}
 
-	strlcpy(sync_file->name, name, sizeof(sync_file->name));
+	strlcpy(sync_file->user_name, name, sizeof(sync_file->user_name));
 	return sync_file;
 
 err:
@@ -413,7 +438,7 @@ static long sync_file_ioctl_fence_info(struct sync_file *sync_file,
 	}
 
 no_fences:
-	strlcpy(info.name, sync_file->name, sizeof(info.name));
+	sync_file_get_name(sync_file, info.name, sizeof(info.name));
 	info.status = dma_fence_is_signaled(sync_file->fence);
 	info.num_fences = num_fences;
 
diff --git a/include/linux/sync_file.h b/include/linux/sync_file.h
index d37beefdfbd5..5726107963b2 100644
--- a/include/linux/sync_file.h
+++ b/include/linux/sync_file.h
@@ -23,7 +23,6 @@
 /**
  * struct sync_file - sync file to export to the userspace
  * @file:		file representing this fence
- * @name:		name of sync_file.  Useful for debugging
  * @sync_file_list:	membership in global file list
  * @wq:			wait queue for fence signaling
  * @fence:		fence with the fences in the sync_file
@@ -31,7 +30,14 @@
  */
 struct sync_file {
 	struct file		*file;
-	char			name[32];
+	/**
+	 * @user_name:
+	 *
+	 * Name of the sync file provided by userspace, for merged fences.
+	 * Otherwise generated through driver callbacks (in which case the
+	 * entire array is 0).
+	 */
+	char			user_name[32];
 #ifdef CONFIG_DEBUG_FS
 	struct list_head	sync_file_list;
 #endif
@@ -46,5 +52,6 @@ struct sync_file {
 
 struct sync_file *sync_file_create(struct dma_fence *fence);
 struct dma_fence *sync_file_get_fence(int fd);
+char *sync_file_get_name(struct sync_file *sync_file, char *buf, int len);
 
 #endif /* _LINUX_SYNC_H */
-- 
cgit v1.2.3


From 41c25707d21716826e3c1f60967f5550610ec1c9 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 24 May 2017 12:03:48 -0400
Subject: cpuset: consider dying css as offline

In most cases, a cgroup controller don't care about the liftimes of
cgroups.  For the controller, a css becomes online when ->css_online()
is called on it and offline when ->css_offline() is called.

However, cpuset is special in that the user interface it exposes cares
whether certain cgroups exist or not.  Combined with the RCU delay
between cgroup removal and css offlining, this can lead to user
visible behavior oddities where operations which should succeed after
cgroup removals fail for some time period.  The effects of cgroup
removals are delayed when seen from userland.

This patch adds css_is_dying() which tests whether offline is pending
and updates is_cpuset_online() so that the function returns false also
while offline is pending.  This gets rid of the userland visible
delays.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Daniel Jordan <daniel.m.jordan@oracle.com>
Link: http://lkml.kernel.org/r/327ca1f5-7957-fbb9-9e5f-9ba149d40ba2@oracle.com
Cc: stable@vger.kernel.org
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cgroup.h | 20 ++++++++++++++++++++
 kernel/cgroup/cpuset.c |  4 ++--
 2 files changed, 22 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index ed2573e149fa..710a005c6b7a 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -343,6 +343,26 @@ static inline bool css_tryget_online(struct cgroup_subsys_state *css)
 	return true;
 }
 
+/**
+ * css_is_dying - test whether the specified css is dying
+ * @css: target css
+ *
+ * Test whether @css is in the process of offlining or already offline.  In
+ * most cases, ->css_online() and ->css_offline() callbacks should be
+ * enough; however, the actual offline operations are RCU delayed and this
+ * test returns %true also when @css is scheduled to be offlined.
+ *
+ * This is useful, for example, when the use case requires synchronous
+ * behavior with respect to cgroup removal.  cgroup removal schedules css
+ * offlining but the css can seem alive while the operation is being
+ * delayed.  If the delay affects user visible semantics, this test can be
+ * used to resolve the situation.
+ */
+static inline bool css_is_dying(struct cgroup_subsys_state *css)
+{
+	return !(css->flags & CSS_NO_REF) && percpu_ref_is_dying(&css->refcnt);
+}
+
 /**
  * css_put - put a css reference
  * @css: target css
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index f6501f4f6040..ae643412948a 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -176,9 +176,9 @@ typedef enum {
 } cpuset_flagbits_t;
 
 /* convenient tests for these bits */
-static inline bool is_cpuset_online(const struct cpuset *cs)
+static inline bool is_cpuset_online(struct cpuset *cs)
 {
-	return test_bit(CS_ONLINE, &cs->flags);
+	return test_bit(CS_ONLINE, &cs->flags) && !css_is_dying(&cs->css);
 }
 
 static inline int is_cpu_exclusive(const struct cpuset *cs)
-- 
cgit v1.2.3


From 9d7650c25498e4f51213fe48eddde5778434f375 Mon Sep 17 00:00:00 2001
From: "Mintz, Yuval" <Yuval.Mintz@cavium.com>
Date: Tue, 23 May 2017 09:41:19 +0300
Subject: qed: Align DP_ERR style with other DP macros

Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/qed/qed_if.h | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index c70ac13a97e6..ff590cb37a00 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -700,11 +700,13 @@ struct qed_common_ops {
 	(((value) >> (name ## _SHIFT)) & name ## _MASK)
 
 /* Debug print definitions */
-#define DP_ERR(cdev, fmt, ...)						     \
-		pr_err("[%s:%d(%s)]" fmt,				     \
-		       __func__, __LINE__,				     \
-		       DP_NAME(cdev) ? DP_NAME(cdev) : "",		     \
-		       ## __VA_ARGS__)					     \
+#define DP_ERR(cdev, fmt, ...)					\
+	do {							\
+		pr_err("[%s:%d(%s)]" fmt,			\
+		       __func__, __LINE__,			\
+		       DP_NAME(cdev) ? DP_NAME(cdev) : "",	\
+		       ## __VA_ARGS__);				\
+	} while (0)
 
 #define DP_NOTICE(cdev, fmt, ...)				      \
 	do {							      \
-- 
cgit v1.2.3


From ae33666ab89675968d77753d18452b1ef654c43a Mon Sep 17 00:00:00 2001
From: Tomer Tayar <Tomer.Tayar@cavium.com>
Date: Tue, 23 May 2017 09:41:26 +0300
Subject: qed: Provide MBI information in dev_info

Pass additional information about package installed on persistent memory
so that protocol drivers would be able to log it.

Signed-off-by: Tomer Tayar <Tomer.Tayar@cavium.com>
Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_hsi.h  |  6 ++++++
 drivers/net/ethernet/qlogic/qed/qed_main.c |  3 +++
 drivers/net/ethernet/qlogic/qed/qed_mcp.c  | 30 ++++++++++++++++++++++++++++++
 drivers/net/ethernet/qlogic/qed/qed_mcp.h  | 12 ++++++++++++
 include/linux/qed/qed_if.h                 | 17 +++++++++++++++++
 5 files changed, 68 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index 4755d0b33b90..802c162d8474 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -11782,6 +11782,12 @@ struct nvm_cfg1_glob {
 	u32 led_global_settings;
 	u32 generic_cont1;
 	u32 mbi_version;
+#define NVM_CFG1_GLOB_MBI_VERSION_0_MASK		0x000000FF
+#define NVM_CFG1_GLOB_MBI_VERSION_0_OFFSET		0
+#define NVM_CFG1_GLOB_MBI_VERSION_1_MASK		0x0000FF00
+#define NVM_CFG1_GLOB_MBI_VERSION_1_OFFSET		8
+#define NVM_CFG1_GLOB_MBI_VERSION_2_MASK		0x00FF0000
+#define NVM_CFG1_GLOB_MBI_VERSION_2_OFFSET		16
 	u32 mbi_date;
 	u32 misc_sig;
 	u32 device_capabilities;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index 3043dcce125c..b5313c561fa2 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -281,6 +281,9 @@ int qed_fill_dev_info(struct qed_dev *cdev,
 			qed_mcp_get_mfw_ver(QED_LEADING_HWFN(cdev), ptt,
 					    &dev_info->mfw_rev, NULL);
 
+			qed_mcp_get_mbi_ver(QED_LEADING_HWFN(cdev), ptt,
+					    &dev_info->mbi_version);
+
 			qed_mcp_get_flash_size(QED_LEADING_HWFN(cdev), ptt,
 					       &dev_info->flash_size);
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index b32e8190f3fb..fc49c75e6c4b 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -1523,6 +1523,36 @@ int qed_mcp_get_mfw_ver(struct qed_hwfn *p_hwfn,
 	return 0;
 }
 
+int qed_mcp_get_mbi_ver(struct qed_hwfn *p_hwfn,
+			struct qed_ptt *p_ptt, u32 *p_mbi_ver)
+{
+	u32 nvm_cfg_addr, nvm_cfg1_offset, mbi_ver_addr;
+
+	if (IS_VF(p_hwfn->cdev))
+		return -EINVAL;
+
+	/* Read the address of the nvm_cfg */
+	nvm_cfg_addr = qed_rd(p_hwfn, p_ptt, MISC_REG_GEN_PURP_CR0);
+	if (!nvm_cfg_addr) {
+		DP_NOTICE(p_hwfn, "Shared memory not initialized\n");
+		return -EINVAL;
+	}
+
+	/* Read the offset of nvm_cfg1 */
+	nvm_cfg1_offset = qed_rd(p_hwfn, p_ptt, nvm_cfg_addr + 4);
+
+	mbi_ver_addr = MCP_REG_SCRATCH + nvm_cfg1_offset +
+		       offsetof(struct nvm_cfg1, glob) +
+		       offsetof(struct nvm_cfg1_glob, mbi_version);
+	*p_mbi_ver = qed_rd(p_hwfn, p_ptt,
+			    mbi_ver_addr) &
+		     (NVM_CFG1_GLOB_MBI_VERSION_0_MASK |
+		      NVM_CFG1_GLOB_MBI_VERSION_1_MASK |
+		      NVM_CFG1_GLOB_MBI_VERSION_2_MASK);
+
+	return 0;
+}
+
 int qed_mcp_get_media_type(struct qed_dev *cdev, u32 *p_media_type)
 {
 	struct qed_hwfn *p_hwfn = &cdev->hwfns[0];
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
index 3e5bffe3d4e2..40247593e772 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
@@ -255,6 +255,18 @@ int qed_mcp_get_mfw_ver(struct qed_hwfn *p_hwfn,
 			struct qed_ptt *p_ptt,
 			u32 *p_mfw_ver, u32 *p_running_bundle_id);
 
+/**
+ * @brief Get the MBI version value
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ * @param p_mbi_ver - A pointer to a variable to be filled with the MBI version.
+ *
+ * @return int - 0 - operation was successful.
+ */
+int qed_mcp_get_mbi_ver(struct qed_hwfn *p_hwfn,
+			struct qed_ptt *p_ptt, u32 *p_mbi_ver);
+
 /**
  * @brief Get media type value of the port.
  *
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index ff590cb37a00..b00e6753b4f4 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -328,6 +328,14 @@ struct qed_dev_info {
 
 	/* MFW version */
 	u32		mfw_rev;
+#define QED_MFW_VERSION_0_MASK		0x000000FF
+#define QED_MFW_VERSION_0_OFFSET	0
+#define QED_MFW_VERSION_1_MASK		0x0000FF00
+#define QED_MFW_VERSION_1_OFFSET	8
+#define QED_MFW_VERSION_2_MASK		0x00FF0000
+#define QED_MFW_VERSION_2_OFFSET	16
+#define QED_MFW_VERSION_3_MASK		0xFF000000
+#define QED_MFW_VERSION_3_OFFSET	24
 
 	u32		flash_size;
 	u8		mf_mode;
@@ -337,6 +345,15 @@ struct qed_dev_info {
 
 	bool wol_support;
 
+	/* MBI version */
+	u32 mbi_version;
+#define QED_MBI_VERSION_0_MASK		0x000000FF
+#define QED_MBI_VERSION_0_OFFSET	0
+#define QED_MBI_VERSION_1_MASK		0x0000FF00
+#define QED_MBI_VERSION_1_OFFSET	8
+#define QED_MBI_VERSION_2_MASK		0x00FF0000
+#define QED_MBI_VERSION_2_OFFSET	16
+
 	enum qed_dev_type dev_type;
 
 	/* Output parameters for qede */
-- 
cgit v1.2.3


From 712c3cbf193fcadf0ba67da61432beb1a71e400b Mon Sep 17 00:00:00 2001
From: "Mintz, Yuval" <Yuval.Mintz@cavium.com>
Date: Tue, 23 May 2017 09:41:28 +0300
Subject: qed: Replace set_id() api with set_name()

Current API between qed and protocol modules allows passing an
additional private string - but it doesn't get utilized by qed
anywhere.

Clarify the API by removing it and renaming it 'set_name'.

CC: Manish Rangankar <Manish.Rangankar@cavium.com>
Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed.h        | 1 -
 drivers/net/ethernet/qlogic/qed/qed_main.c   | 9 +++------
 drivers/net/ethernet/qlogic/qede/qede_main.c | 4 ++--
 drivers/scsi/qedf/qedf_main.c                | 2 +-
 drivers/scsi/qedi/qedi_main.c                | 2 +-
 include/linux/qed/qed_if.h                   | 4 +---
 6 files changed, 8 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index 2eb6031f0df1..e0becec17b09 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -638,7 +638,6 @@ struct qed_dev {
 
 	int				pcie_width;
 	int				pcie_speed;
-	u8				ver_str[VER_SIZE];
 
 	/* Add MF related configuration */
 	u8				mcp_rev;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index b5313c561fa2..c5bb80b9afc1 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -338,6 +338,7 @@ static struct qed_dev *qed_probe(struct pci_dev *pdev,
 	if (!cdev)
 		goto err0;
 
+	cdev->drv_type = DRV_ID_DRV_TYPE_LINUX;
 	cdev->protocol = params->protocol;
 
 	if (params->is_vf)
@@ -1128,17 +1129,13 @@ static int qed_slowpath_stop(struct qed_dev *cdev)
 	return 0;
 }
 
-static void qed_set_id(struct qed_dev *cdev, char name[NAME_SIZE],
-		       char ver_str[VER_SIZE])
+static void qed_set_name(struct qed_dev *cdev, char name[NAME_SIZE])
 {
 	int i;
 
 	memcpy(cdev->name, name, NAME_SIZE);
 	for_each_hwfn(cdev, i)
 		snprintf(cdev->hwfns[i].name, NAME_SIZE, "%s-%d", name, i);
-
-	memcpy(cdev->ver_str, ver_str, VER_SIZE);
-	cdev->drv_type = DRV_ID_DRV_TYPE_LINUX;
 }
 
 static u32 qed_sb_init(struct qed_dev *cdev,
@@ -1692,7 +1689,7 @@ const struct qed_common_ops qed_common_ops_pass = {
 	.probe = &qed_probe,
 	.remove = &qed_remove,
 	.set_power_state = &qed_set_power_state,
-	.set_id = &qed_set_id,
+	.set_name = &qed_set_name,
 	.update_pf_params = &qed_update_pf_params,
 	.slowpath_start = &qed_slowpath_start,
 	.slowpath_stop = &qed_slowpath_stop,
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index d496ba70ddb8..00c70625f8a4 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -259,7 +259,7 @@ static int qede_netdev_event(struct notifier_block *this, unsigned long event,
 		/* Notify qed of the name change */
 		if (!edev->ops || !edev->ops->common)
 			goto done;
-		edev->ops->common->set_id(edev->cdev, edev->ndev->name, "qede");
+		edev->ops->common->set_name(edev->cdev, edev->ndev->name);
 		break;
 	case NETDEV_CHANGEADDR:
 		edev = netdev_priv(ndev);
@@ -967,7 +967,7 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
 		goto err4;
 	}
 
-	edev->ops->common->set_id(cdev, edev->ndev->name, DRV_MODULE_VERSION);
+	edev->ops->common->set_name(cdev, edev->ndev->name);
 
 	/* PTP not supported on VFs */
 	if (!is_vf)
diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c
index a5c97342fd5d..b97405ed6cae 100644
--- a/drivers/scsi/qedf/qedf_main.c
+++ b/drivers/scsi/qedf/qedf_main.c
@@ -2954,7 +2954,7 @@ static int __qedf_probe(struct pci_dev *pdev, int mode)
 		   "WWPN=%016llx.\n", qedf->wwnn, qedf->wwpn);
 
 	sprintf(host_buf, "host_%d", host->host_no);
-	qed_ops->common->set_id(qedf->cdev, host_buf, QEDF_VERSION);
+	qed_ops->common->set_name(qedf->cdev, host_buf);
 
 
 	/* Set xid max values */
diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c
index 92775a8b74b1..073b3051bb8f 100644
--- a/drivers/scsi/qedi/qedi_main.c
+++ b/drivers/scsi/qedi/qedi_main.c
@@ -1843,7 +1843,7 @@ static int __qedi_probe(struct pci_dev *pdev, int mode)
 		  qedi->mac);
 
 	sprintf(host_buf, "host_%d", qedi->shost->host_no);
-	qedi_ops->common->set_id(qedi->cdev, host_buf, QEDI_MODULE_VERSION);
+	qedi_ops->common->set_name(qedi->cdev, host_buf);
 
 	qedi_ops->register_ops(qedi->cdev, &qedi_cb_ops, qedi);
 
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index b00e6753b4f4..73c46d6d5727 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -520,9 +520,7 @@ struct qed_common_ops {
 	int		(*set_power_state)(struct qed_dev *cdev,
 					   pci_power_t state);
 
-	void		(*set_id)(struct qed_dev *cdev,
-				  char name[],
-				  char ver_str[]);
+	void (*set_name) (struct qed_dev *cdev, char name[]);
 
 	/* Client drivers need to make this call before slowpath_start.
 	 * PF params required for the call before slowpath_start is
-- 
cgit v1.2.3


From 35d2f80b07bbe03fb358afb0bdeff7437a7d67ff Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevich@gmail.com>
Date: Tue, 23 May 2017 13:38:41 -0400
Subject: vlan: Fix tcp checksum offloads in Q-in-Q vlans

It appears that TCP checksum offloading has been broken for
Q-in-Q vlans.  The behavior was execerbated by the
series
    commit afb0bc972b52 ("Merge branch 'stacked_vlan_tso'")
that that enabled accleleration features on stacked vlans.

However, event without that series, it is possible to trigger
this issue.  It just requires a lot more specialized configuration.

The root cause is the interaction between how
netdev_intersect_features() works, the features actually set on
the vlan devices and HW having the ability to run checksum with
longer headers.

The issue starts when netdev_interesect_features() replaces
NETIF_F_HW_CSUM with a combination of NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM,
if the HW advertises IP|IPV6 specific checksums.  This happens
for tagged and multi-tagged packets.   However, HW that enables
IP|IPV6 checksum offloading doesn't gurantee that packets with
arbitrarily long headers can be checksummed.

This patch disables IP|IPV6 checksums on the packet for multi-tagged
packets.

CC: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
CC: Michal Kubecek <mkubecek@suse.cz>
Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
Acked-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 8d5fcd6284ce..283dc2f5364d 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -614,14 +614,16 @@ static inline bool skb_vlan_tagged_multi(const struct sk_buff *skb)
 static inline netdev_features_t vlan_features_check(const struct sk_buff *skb,
 						    netdev_features_t features)
 {
-	if (skb_vlan_tagged_multi(skb))
-		features = netdev_intersect_features(features,
-						     NETIF_F_SG |
-						     NETIF_F_HIGHDMA |
-						     NETIF_F_FRAGLIST |
-						     NETIF_F_HW_CSUM |
-						     NETIF_F_HW_VLAN_CTAG_TX |
-						     NETIF_F_HW_VLAN_STAG_TX);
+	if (skb_vlan_tagged_multi(skb)) {
+		/* In the case of multi-tagged packets, use a direct mask
+		 * instead of using netdev_interesect_features(), to make
+		 * sure that only devices supporting NETIF_F_HW_CSUM will
+		 * have checksum offloading support.
+		 */
+		features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_HW_CSUM |
+			    NETIF_F_FRAGLIST | NETIF_F_HW_VLAN_CTAG_TX |
+			    NETIF_F_HW_VLAN_STAG_TX;
+	}
 
 	return features;
 }
-- 
cgit v1.2.3


From b8cb5a545c3dd8b975aad19ea020eabe0a888e8d Mon Sep 17 00:00:00 2001
From: Tahsin Erdogan <tahsin@google.com>
Date: Wed, 24 May 2017 18:24:07 -0400
Subject: ext4: fix quota charging for shared xattr blocks

ext4_xattr_block_set() calls dquot_alloc_block() to charge for an xattr
block when new references are made. However if dquot_initialize() hasn't
been called on an inode, request for charging is effectively ignored
because ext4_inode_info->i_dquot is not initialized yet.

Add dquot_initialize() to call paths that lead to ext4_xattr_block_set().

Signed-off-by: Tahsin Erdogan <tahsin@google.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Jan Kara <jack@suse.cz>
---
 fs/ext4/acl.c            |  4 ++++
 fs/ext4/super.c          |  3 +++
 fs/ext4/xattr.c          |  8 ++++++++
 fs/quota/dquot.c         | 16 ++++++++++++++++
 include/linux/quotaops.h |  6 ++++++
 5 files changed, 37 insertions(+)

(limited to 'include/linux')

diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index fd389935ecd1..3ec0e46de95f 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -4,6 +4,7 @@
  * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
  */
 
+#include <linux/quotaops.h>
 #include "ext4_jbd2.h"
 #include "ext4.h"
 #include "xattr.h"
@@ -232,6 +233,9 @@ ext4_set_acl(struct inode *inode, struct posix_acl *acl, int type)
 	handle_t *handle;
 	int error, retries = 0;
 
+	error = dquot_initialize(inode);
+	if (error)
+		return error;
 retry:
 	handle = ext4_journal_start(inode, EXT4_HT_XATTR,
 				    ext4_jbd2_credits_xattr(inode));
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 8f05c72bc4ec..d37c81f327e7 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1174,6 +1174,9 @@ static int ext4_set_context(struct inode *inode, const void *ctx, size_t len,
 		return res;
 	}
 
+	res = dquot_initialize(inode);
+	if (res)
+		return res;
 retry:
 	handle = ext4_journal_start(inode, EXT4_HT_MISC,
 			ext4_jbd2_credits_xattr(inode));
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 8fb7ce14e6eb..5d3c2536641c 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -888,6 +888,8 @@ inserted:
 			else {
 				u32 ref;
 
+				WARN_ON_ONCE(dquot_initialize_needed(inode));
+
 				/* The old block is released after updating
 				   the inode. */
 				error = dquot_alloc_block(inode,
@@ -954,6 +956,8 @@ inserted:
 			/* We need to allocate a new block */
 			ext4_fsblk_t goal, block;
 
+			WARN_ON_ONCE(dquot_initialize_needed(inode));
+
 			goal = ext4_group_first_block_no(sb,
 						EXT4_I(inode)->i_block_group);
 
@@ -1166,6 +1170,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
 		return -EINVAL;
 	if (strlen(name) > 255)
 		return -ERANGE;
+
 	ext4_write_lock_xattr(inode, &no_expand);
 
 	error = ext4_reserve_inode_write(handle, inode, &is.iloc);
@@ -1267,6 +1272,9 @@ ext4_xattr_set(struct inode *inode, int name_index, const char *name,
 	int error, retries = 0;
 	int credits = ext4_jbd2_credits_xattr(inode);
 
+	error = dquot_initialize(inode);
+	if (error)
+		return error;
 retry:
 	handle = ext4_journal_start(inode, EXT4_HT_XATTR, credits);
 	if (IS_ERR(handle)) {
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index ebf80c7739e1..48813aeaab80 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1512,6 +1512,22 @@ int dquot_initialize(struct inode *inode)
 }
 EXPORT_SYMBOL(dquot_initialize);
 
+bool dquot_initialize_needed(struct inode *inode)
+{
+	struct dquot **dquots;
+	int i;
+
+	if (!dquot_active(inode))
+		return false;
+
+	dquots = i_dquot(inode);
+	for (i = 0; i < MAXQUOTAS; i++)
+		if (!dquots[i] && sb_has_quota_active(inode->i_sb, i))
+			return true;
+	return false;
+}
+EXPORT_SYMBOL(dquot_initialize_needed);
+
 /*
  * Release all quotas referenced by inode.
  *
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 9c6f768b7d32..dda22f45fc1b 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -44,6 +44,7 @@ void inode_sub_rsv_space(struct inode *inode, qsize_t number);
 void inode_reclaim_rsv_space(struct inode *inode, qsize_t number);
 
 int dquot_initialize(struct inode *inode);
+bool dquot_initialize_needed(struct inode *inode);
 void dquot_drop(struct inode *inode);
 struct dquot *dqget(struct super_block *sb, struct kqid qid);
 static inline struct dquot *dqgrab(struct dquot *dquot)
@@ -207,6 +208,11 @@ static inline int dquot_initialize(struct inode *inode)
 	return 0;
 }
 
+static inline bool dquot_initialize_needed(struct inode *inode)
+{
+	return false;
+}
+
 static inline void dquot_drop(struct inode *inode)
 {
 }
-- 
cgit v1.2.3


From be035303182a1260803a1871065d7b1e67c9ebe9 Mon Sep 17 00:00:00 2001
From: Keerthy <j-keerthy@ti.com>
Date: Tue, 23 May 2017 17:46:56 +0530
Subject: regulator: tps65917: Add support for SMPS12

App support for SMPS12 dual phase regulator.

Signed-off-by: Keerthy <j-keerthy@ti.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/palmas-regulator.c | 18 +++++++++++++++---
 include/linux/mfd/palmas.h           |  2 ++
 2 files changed, 17 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/palmas-regulator.c b/drivers/regulator/palmas-regulator.c
index 31ae5ee3a80d..8018a44ed4f7 100644
--- a/drivers/regulator/palmas-regulator.c
+++ b/drivers/regulator/palmas-regulator.c
@@ -263,6 +263,13 @@ static struct palmas_regs_info tps65917_regs_info[] = {
 		.ctrl_addr	= TPS65917_SMPS5_CTRL,
 		.sleep_id	= TPS65917_EXTERNAL_REQSTR_ID_SMPS5,
 	},
+	{
+		.name		= "SMPS12",
+		.sname		= "smps1-in",
+		.vsel_addr	= TPS65917_SMPS1_VOLTAGE,
+		.ctrl_addr	= TPS65917_SMPS1_CTRL,
+		.sleep_id	= TPS65917_EXTERNAL_REQSTR_ID_SMPS12,
+	},
 	{
 		.name		= "LDO1",
 		.sname		= "ldo1-in",
@@ -367,6 +374,7 @@ static struct palmas_sleep_requestor_info tps65917_sleep_req_info[] = {
 	EXTERNAL_REQUESTOR_TPS65917(SMPS3, 1, 2),
 	EXTERNAL_REQUESTOR_TPS65917(SMPS4, 1, 3),
 	EXTERNAL_REQUESTOR_TPS65917(SMPS5, 1, 4),
+	EXTERNAL_REQUESTOR_TPS65917(SMPS12, 1, 5),
 	EXTERNAL_REQUESTOR_TPS65917(LDO1, 2, 0),
 	EXTERNAL_REQUESTOR_TPS65917(LDO2, 2, 1),
 	EXTERNAL_REQUESTOR_TPS65917(LDO3, 2, 2),
@@ -1305,7 +1313,8 @@ static int tps65917_smps_registration(struct palmas_pmic *pmic,
 		 */
 		desc = &pmic->desc[id];
 		desc->n_linear_ranges = 3;
-		if ((id == TPS65917_REG_SMPS2) && pmic->smps12)
+		if ((id == TPS65917_REG_SMPS2 || id == TPS65917_REG_SMPS1) &&
+		    pmic->smps12)
 			continue;
 
 		/* Initialise sleep/init values from platform data */
@@ -1427,6 +1436,7 @@ static struct of_regulator_match tps65917_matches[] = {
 	{ .name = "smps3", },
 	{ .name = "smps4", },
 	{ .name = "smps5", },
+	{ .name = "smps12",},
 	{ .name = "ldo1", },
 	{ .name = "ldo2", },
 	{ .name = "ldo3", },
@@ -1455,7 +1465,7 @@ static struct palmas_pmic_driver_data palmas_ddata = {
 
 static struct palmas_pmic_driver_data tps65917_ddata = {
 	.smps_start = TPS65917_REG_SMPS1,
-	.smps_end = TPS65917_REG_SMPS5,
+	.smps_end = TPS65917_REG_SMPS12,
 	.ldo_begin = TPS65917_REG_LDO1,
 	.ldo_end = TPS65917_REG_LDO5,
 	.max_reg = TPS65917_NUM_REGS,
@@ -1643,8 +1653,10 @@ static int palmas_regulators_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	if (reg & PALMAS_SMPS_CTRL_SMPS12_SMPS123_EN)
+	if (reg & PALMAS_SMPS_CTRL_SMPS12_SMPS123_EN) {
 		pmic->smps123 = 1;
+		pmic->smps12 = 1;
+	}
 
 	if (reg & PALMAS_SMPS_CTRL_SMPS45_SMPS457_EN)
 		pmic->smps457 = 1;
diff --git a/include/linux/mfd/palmas.h b/include/linux/mfd/palmas.h
index 5c9a1d44c125..6dec43826303 100644
--- a/include/linux/mfd/palmas.h
+++ b/include/linux/mfd/palmas.h
@@ -250,6 +250,7 @@ enum tps65917_regulators {
 	TPS65917_REG_SMPS3,
 	TPS65917_REG_SMPS4,
 	TPS65917_REG_SMPS5,
+	TPS65917_REG_SMPS12,
 	/* LDO regulators */
 	TPS65917_REG_LDO1,
 	TPS65917_REG_LDO2,
@@ -317,6 +318,7 @@ enum tps65917_external_requestor_id {
 	TPS65917_EXTERNAL_REQSTR_ID_SMPS3,
 	TPS65917_EXTERNAL_REQSTR_ID_SMPS4,
 	TPS65917_EXTERNAL_REQSTR_ID_SMPS5,
+	TPS65917_EXTERNAL_REQSTR_ID_SMPS12,
 	TPS65917_EXTERNAL_REQSTR_ID_LDO1,
 	TPS65917_EXTERNAL_REQSTR_ID_LDO2,
 	TPS65917_EXTERNAL_REQSTR_ID_LDO3,
-- 
cgit v1.2.3


From 610387d162eb1beb6eb2009af5175dc6b44b8da6 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Sun, 21 May 2017 22:42:32 +0200
Subject: misc: apds990x: move header file out of I2C realm

include/linux/i2c is not for client devices. Move the header file to a
more appropriate location.

Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/apds990x.c                |  2 +-
 include/linux/i2c/apds990x.h           | 79 ----------------------------------
 include/linux/platform_data/apds990x.h | 79 ++++++++++++++++++++++++++++++++++
 3 files changed, 80 insertions(+), 80 deletions(-)
 delete mode 100644 include/linux/i2c/apds990x.h
 create mode 100644 include/linux/platform_data/apds990x.h

(limited to 'include/linux')

diff --git a/drivers/misc/apds990x.c b/drivers/misc/apds990x.c
index dfb72ecfa604..c341164edaad 100644
--- a/drivers/misc/apds990x.c
+++ b/drivers/misc/apds990x.c
@@ -32,7 +32,7 @@
 #include <linux/delay.h>
 #include <linux/wait.h>
 #include <linux/slab.h>
-#include <linux/i2c/apds990x.h>
+#include <linux/platform_data/apds990x.h>
 
 /* Register map */
 #define APDS990X_ENABLE	 0x00 /* Enable of states and interrupts */
diff --git a/include/linux/i2c/apds990x.h b/include/linux/i2c/apds990x.h
deleted file mode 100644
index d186fcc5d257..000000000000
--- a/include/linux/i2c/apds990x.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * This file is part of the APDS990x sensor driver.
- * Chip is combined proximity and ambient light sensor.
- *
- * Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies).
- *
- * Contact: Samu Onkalo <samu.p.onkalo@nokia.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
- * 02110-1301 USA
- *
- */
-
-#ifndef __APDS990X_H__
-#define __APDS990X_H__
-
-
-#define APDS_IRLED_CURR_12mA	0x3
-#define APDS_IRLED_CURR_25mA	0x2
-#define APDS_IRLED_CURR_50mA	0x1
-#define APDS_IRLED_CURR_100mA	0x0
-
-/**
- * struct apds990x_chip_factors - defines effect of the cover window
- * @ga: Total glass attenuation
- * @cf1: clear channel factor 1 for raw to lux conversion
- * @irf1: IR channel factor 1 for raw to lux conversion
- * @cf2: clear channel factor 2 for raw to lux conversion
- * @irf2: IR channel factor 2 for raw to lux conversion
- * @df: device factor for conversion formulas
- *
- * Structure for tuning ALS calculation to match with environment.
- * Values depend on the material above the sensor and the sensor
- * itself. If the GA is zero, driver will use uncovered sensor default values
- * format: decimal value * APDS_PARAM_SCALE except df which is plain integer.
- */
-#define APDS_PARAM_SCALE 4096
-struct apds990x_chip_factors {
-	int ga;
-	int cf1;
-	int irf1;
-	int cf2;
-	int irf2;
-	int df;
-};
-
-/**
- * struct apds990x_platform_data - platform data for apsd990x.c driver
- * @cf: chip factor data
- * @pddrive: IR-led driving current
- * @ppcount: number of IR pulses used for proximity estimation
- * @setup_resources: interrupt line setup call back function
- * @release_resources: interrupt line release call back function
- *
- * Proximity detection result depends heavily on correct ppcount, pdrive
- * and cover window.
- *
- */
-
-struct apds990x_platform_data {
-	struct apds990x_chip_factors cf;
-	u8     pdrive;
-	u8     ppcount;
-	int    (*setup_resources)(void);
-	int    (*release_resources)(void);
-};
-
-#endif
diff --git a/include/linux/platform_data/apds990x.h b/include/linux/platform_data/apds990x.h
new file mode 100644
index 000000000000..d186fcc5d257
--- /dev/null
+++ b/include/linux/platform_data/apds990x.h
@@ -0,0 +1,79 @@
+/*
+ * This file is part of the APDS990x sensor driver.
+ * Chip is combined proximity and ambient light sensor.
+ *
+ * Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies).
+ *
+ * Contact: Samu Onkalo <samu.p.onkalo@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ *
+ */
+
+#ifndef __APDS990X_H__
+#define __APDS990X_H__
+
+
+#define APDS_IRLED_CURR_12mA	0x3
+#define APDS_IRLED_CURR_25mA	0x2
+#define APDS_IRLED_CURR_50mA	0x1
+#define APDS_IRLED_CURR_100mA	0x0
+
+/**
+ * struct apds990x_chip_factors - defines effect of the cover window
+ * @ga: Total glass attenuation
+ * @cf1: clear channel factor 1 for raw to lux conversion
+ * @irf1: IR channel factor 1 for raw to lux conversion
+ * @cf2: clear channel factor 2 for raw to lux conversion
+ * @irf2: IR channel factor 2 for raw to lux conversion
+ * @df: device factor for conversion formulas
+ *
+ * Structure for tuning ALS calculation to match with environment.
+ * Values depend on the material above the sensor and the sensor
+ * itself. If the GA is zero, driver will use uncovered sensor default values
+ * format: decimal value * APDS_PARAM_SCALE except df which is plain integer.
+ */
+#define APDS_PARAM_SCALE 4096
+struct apds990x_chip_factors {
+	int ga;
+	int cf1;
+	int irf1;
+	int cf2;
+	int irf2;
+	int df;
+};
+
+/**
+ * struct apds990x_platform_data - platform data for apsd990x.c driver
+ * @cf: chip factor data
+ * @pddrive: IR-led driving current
+ * @ppcount: number of IR pulses used for proximity estimation
+ * @setup_resources: interrupt line setup call back function
+ * @release_resources: interrupt line release call back function
+ *
+ * Proximity detection result depends heavily on correct ppcount, pdrive
+ * and cover window.
+ *
+ */
+
+struct apds990x_platform_data {
+	struct apds990x_chip_factors cf;
+	u8     pdrive;
+	u8     ppcount;
+	int    (*setup_resources)(void);
+	int    (*release_resources)(void);
+};
+
+#endif
-- 
cgit v1.2.3


From 7ae5f10a9fc1ae2f001ab3be5be84a5d0a89f918 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Sun, 21 May 2017 22:42:33 +0200
Subject: misc: bh1770glc: move header file out of I2C realm

include/linux/i2c is not for client devices. Move the header file to a
more appropriate location.

Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/bh1770glc.c                |  2 +-
 include/linux/i2c/bh1770glc.h           | 53 ---------------------------------
 include/linux/platform_data/bh1770glc.h | 53 +++++++++++++++++++++++++++++++++
 3 files changed, 54 insertions(+), 54 deletions(-)
 delete mode 100644 include/linux/i2c/bh1770glc.h
 create mode 100644 include/linux/platform_data/bh1770glc.h

(limited to 'include/linux')

diff --git a/drivers/misc/bh1770glc.c b/drivers/misc/bh1770glc.c
index 845466e45b95..38fcfe219d1c 100644
--- a/drivers/misc/bh1770glc.c
+++ b/drivers/misc/bh1770glc.c
@@ -27,7 +27,7 @@
 #include <linux/i2c.h>
 #include <linux/interrupt.h>
 #include <linux/mutex.h>
-#include <linux/i2c/bh1770glc.h>
+#include <linux/platform_data/bh1770glc.h>
 #include <linux/regulator/consumer.h>
 #include <linux/pm_runtime.h>
 #include <linux/workqueue.h>
diff --git a/include/linux/i2c/bh1770glc.h b/include/linux/i2c/bh1770glc.h
deleted file mode 100644
index 8b5e2df36c72..000000000000
--- a/include/linux/i2c/bh1770glc.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * This file is part of the ROHM BH1770GLC / OSRAM SFH7770 sensor driver.
- * Chip is combined proximity and ambient light sensor.
- *
- * Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies).
- *
- * Contact: Samu Onkalo <samu.p.onkalo@nokia.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
- * 02110-1301 USA
- *
- */
-
-#ifndef __BH1770_H__
-#define __BH1770_H__
-
-/**
- * struct bh1770_platform_data - platform data for bh1770glc driver
- * @led_def_curr: IR led driving current.
- * @glass_attenuation: Attenuation factor for covering window.
- * @setup_resources: Call back for interrupt line setup function
- * @release_resources: Call back for interrupte line release function
- *
- * Example of glass attenuation: 16384 * 385 / 100 means attenuation factor
- * of 3.85. i.e. light_above_sensor = light_above_cover_window / 3.85
- */
-
-struct bh1770_platform_data {
-#define BH1770_LED_5mA	0
-#define BH1770_LED_10mA	1
-#define BH1770_LED_20mA	2
-#define BH1770_LED_50mA	3
-#define BH1770_LED_100mA 4
-#define BH1770_LED_150mA 5
-#define BH1770_LED_200mA 6
-	__u8 led_def_curr;
-#define BH1770_NEUTRAL_GA 16384 /* 16384 / 16384 = 1 */
-	__u32 glass_attenuation;
-	int (*setup_resources)(void);
-	int (*release_resources)(void);
-};
-#endif
diff --git a/include/linux/platform_data/bh1770glc.h b/include/linux/platform_data/bh1770glc.h
new file mode 100644
index 000000000000..8b5e2df36c72
--- /dev/null
+++ b/include/linux/platform_data/bh1770glc.h
@@ -0,0 +1,53 @@
+/*
+ * This file is part of the ROHM BH1770GLC / OSRAM SFH7770 sensor driver.
+ * Chip is combined proximity and ambient light sensor.
+ *
+ * Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies).
+ *
+ * Contact: Samu Onkalo <samu.p.onkalo@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ *
+ */
+
+#ifndef __BH1770_H__
+#define __BH1770_H__
+
+/**
+ * struct bh1770_platform_data - platform data for bh1770glc driver
+ * @led_def_curr: IR led driving current.
+ * @glass_attenuation: Attenuation factor for covering window.
+ * @setup_resources: Call back for interrupt line setup function
+ * @release_resources: Call back for interrupte line release function
+ *
+ * Example of glass attenuation: 16384 * 385 / 100 means attenuation factor
+ * of 3.85. i.e. light_above_sensor = light_above_cover_window / 3.85
+ */
+
+struct bh1770_platform_data {
+#define BH1770_LED_5mA	0
+#define BH1770_LED_10mA	1
+#define BH1770_LED_20mA	2
+#define BH1770_LED_50mA	3
+#define BH1770_LED_100mA 4
+#define BH1770_LED_150mA 5
+#define BH1770_LED_200mA 6
+	__u8 led_def_curr;
+#define BH1770_NEUTRAL_GA 16384 /* 16384 / 16384 = 1 */
+	__u32 glass_attenuation;
+	int (*setup_resources)(void);
+	int (*release_resources)(void);
+};
+#endif
-- 
cgit v1.2.3


From f36776fafbaa0094390dd4e7e3e29805e0b82730 Mon Sep 17 00:00:00 2001
From: Peter Rajnoha <prajnoha@redhat.com>
Date: Tue, 9 May 2017 15:22:30 +0200
Subject: kobject: support passing in variables for synthetic uevents

This patch makes it possible to pass additional arguments in addition
to uevent action name when writing /sys/.../uevent attribute. These
additional arguments are then inserted into generated synthetic uevent
as additional environment variables.

Before, we were not able to pass any additional uevent environment
variables for synthetic uevents. This made it hard to identify such uevents
properly in userspace to make proper distinction between genuine uevents
originating from kernel and synthetic uevents triggered from userspace.
Also, it was not possible to pass any additional information which would
make it possible to optimize and change the way the synthetic uevents are
processed back in userspace based on the originating environment of the
triggering action in userspace. With the extra additional variables, we are
able to pass through this extra information needed and also it makes it
possible to synchronize with such synthetic uevents as they can be clearly
identified back in userspace.

The format for writing the uevent attribute is following:

    ACTION [UUID [KEY=VALUE ...]

There's no change in how "ACTION" is recognized - it stays the same
("add", "change", "remove"). The "ACTION" is the only argument required
to generate synthetic uevent, the rest of arguments, that this patch
adds support for, are optional.

The "UUID" is considered as transaction identifier so it's possible to
use the same UUID value for one or more synthetic uevents in which case
we logically group these uevents together for any userspace listeners.
The "UUID" is expected to be in "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"
format where "x" is a hex digit. The value appears in uevent as
"SYNTH_UUID=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" environment variable.

The "KEY=VALUE" pairs can contain alphanumeric characters only. It's
possible to define zero or more more pairs - each pair is then delimited
by a space character " ". Each pair appears in synthetic uevents as
"SYNTH_ARG_KEY=VALUE" environment variable. That means the KEY name gains
"SYNTH_ARG_" prefix to avoid possible collisions with existing variables.
To pass the "KEY=VALUE" pairs, it's also required to pass in the "UUID"
part for the synthetic uevent first.

If "UUID" is not passed in, the generated synthetic uevent gains
"SYNTH_UUID=0" environment variable automatically so it's possible to
identify this situation in userspace when reading generated uevent and so
we can still make a difference between genuine and synthetic uevents.

Signed-off-by: Peter Rajnoha <prajnoha@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/ABI/testing/sysfs-uevent |  47 ++++++++++
 drivers/base/bus.c                     |  10 +-
 drivers/base/core.c                    |   7 +-
 include/linux/kobject.h                |   4 +-
 kernel/module.c                        |   5 +-
 lib/kobject_uevent.c                   | 167 ++++++++++++++++++++++++++++++---
 6 files changed, 207 insertions(+), 33 deletions(-)
 create mode 100644 Documentation/ABI/testing/sysfs-uevent

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-uevent b/Documentation/ABI/testing/sysfs-uevent
new file mode 100644
index 000000000000..d7ac99072a16
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-uevent
@@ -0,0 +1,47 @@
+What:           /sys/.../uevent
+Date:           May 2017
+KernelVersion:  4.12
+Contact:        Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description:
+                Enable passing additional variables for synthetic uevents that
+                are generated by writing /sys/.../uevent file.
+
+                Recognized extended format is ACTION [UUID [KEY=VALUE ...].
+
+                The ACTION is compulsory - it is the name of the uevent action
+                ("add", "change", "remove"). There is no change compared to
+                previous functionality here. The rest of the extended format
+                is optional.
+
+                You need to pass UUID first before any KEY=VALUE pairs.
+                The UUID must be in "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"
+                format where 'x' is a hex digit. The UUID is considered to be
+                a transaction identifier so it's possible to use the same UUID
+                value for one or more synthetic uevents in which case we
+                logically group these uevents together for any userspace
+                listeners. The UUID value appears in uevent as
+                "SYNTH_UUID=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" environment
+                variable.
+
+                If UUID is not passed in, the generated synthetic uevent gains
+                "SYNTH_UUID=0" environment variable automatically.
+
+                The KEY=VALUE pairs can contain alphanumeric characters only.
+                It's possible to define zero or more pairs - each pair is then
+                delimited by a space character ' '. Each pair appears in
+                synthetic uevent as "SYNTH_ARG_KEY=VALUE". That means the KEY
+                name gains "SYNTH_ARG_" prefix to avoid possible collisions
+                with existing variables.
+
+                Example of valid sequence written to the uevent file:
+
+                    add fe4d7c9d-b8c6-4a70-9ef1-3d8a58d18eed A=1 B=abc
+
+                This generates synthetic uevent including these variables:
+
+                    ACTION=add
+                    SYNTH_ARG_A=1
+                    SYNTH_ARG_B=abc
+                    SYNTH_UUID=fe4d7c9d-b8c6-4a70-9ef1-3d8a58d18eed
+Users:
+                udev, userspace tools generating synthetic uevents
diff --git a/drivers/base/bus.c b/drivers/base/bus.c
index 6470eb8088f4..f945f2f0ee06 100644
--- a/drivers/base/bus.c
+++ b/drivers/base/bus.c
@@ -648,10 +648,7 @@ static void remove_probe_files(struct bus_type *bus)
 static ssize_t uevent_store(struct device_driver *drv, const char *buf,
 			    size_t count)
 {
-	enum kobject_action action;
-
-	if (kobject_action_type(buf, count, &action) == 0)
-		kobject_uevent(&drv->p->kobj, action);
+	kobject_synth_uevent(&drv->p->kobj, buf, count);
 	return count;
 }
 static DRIVER_ATTR_WO(uevent);
@@ -868,10 +865,7 @@ static void klist_devices_put(struct klist_node *n)
 static ssize_t bus_uevent_store(struct bus_type *bus,
 				const char *buf, size_t count)
 {
-	enum kobject_action action;
-
-	if (kobject_action_type(buf, count, &action) == 0)
-		kobject_uevent(&bus->p->subsys.kobj, action);
+	kobject_synth_uevent(&bus->p->subsys.kobj, buf, count);
 	return count;
 }
 static BUS_ATTR(uevent, S_IWUSR, NULL, bus_uevent_store);
diff --git a/drivers/base/core.c b/drivers/base/core.c
index bbecaf9293be..6564339d7f59 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -981,12 +981,9 @@ out:
 static ssize_t uevent_store(struct device *dev, struct device_attribute *attr,
 			    const char *buf, size_t count)
 {
-	enum kobject_action action;
+	if (kobject_synth_uevent(&dev->kobj, buf, count))
+		dev_err(dev, "uevent: failed to send synthetic uevent\n");
 
-	if (kobject_action_type(buf, count, &action) == 0)
-		kobject_uevent(&dev->kobj, action);
-	else
-		dev_err(dev, "uevent: unknown action-string\n");
 	return count;
 }
 static DEVICE_ATTR_RW(uevent);
diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index ca85cb80e99a..eeab34b0f589 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -217,11 +217,9 @@ extern struct kobject *firmware_kobj;
 int kobject_uevent(struct kobject *kobj, enum kobject_action action);
 int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
 			char *envp[]);
+int kobject_synth_uevent(struct kobject *kobj, const char *buf, size_t count);
 
 __printf(2, 3)
 int add_uevent_var(struct kobj_uevent_env *env, const char *format, ...);
 
-int kobject_action_type(const char *buf, size_t count,
-			enum kobject_action *type);
-
 #endif /* _KOBJECT_H_ */
diff --git a/kernel/module.c b/kernel/module.c
index 4a3665f8f837..d7eb41d772c4 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1202,10 +1202,7 @@ static ssize_t store_uevent(struct module_attribute *mattr,
 			    struct module_kobject *mk,
 			    const char *buffer, size_t count)
 {
-	enum kobject_action action;
-
-	if (kobject_action_type(buffer, count, &action) == 0)
-		kobject_uevent(&mk->kobj, action);
+	kobject_synth_uevent(&mk->kobj, buffer, count);
 	return count;
 }
 
diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index 9a2b811966eb..719c155fce20 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -23,6 +23,8 @@
 #include <linux/socket.h>
 #include <linux/skbuff.h>
 #include <linux/netlink.h>
+#include <linux/uuid.h>
+#include <linux/ctype.h>
 #include <net/sock.h>
 #include <net/net_namespace.h>
 
@@ -52,19 +54,13 @@ static const char *kobject_actions[] = {
 	[KOBJ_OFFLINE] =	"offline",
 };
 
-/**
- * kobject_action_type - translate action string to numeric type
- *
- * @buf: buffer containing the action string, newline is ignored
- * @count: length of buffer
- * @type: pointer to the location to store the action type
- *
- * Returns 0 if the action string was recognized.
- */
-int kobject_action_type(const char *buf, size_t count,
-			enum kobject_action *type)
+static int kobject_action_type(const char *buf, size_t count,
+			       enum kobject_action *type,
+			       const char **args)
 {
 	enum kobject_action action;
+	size_t count_first;
+	const char *args_start;
 	int ret = -EINVAL;
 
 	if (count && (buf[count-1] == '\n' || buf[count-1] == '\0'))
@@ -73,11 +69,20 @@ int kobject_action_type(const char *buf, size_t count,
 	if (!count)
 		goto out;
 
+	args_start = strnchr(buf, count, ' ');
+	if (args_start) {
+		count_first = args_start - buf;
+		args_start = args_start + 1;
+	} else
+		count_first = count;
+
 	for (action = 0; action < ARRAY_SIZE(kobject_actions); action++) {
-		if (strncmp(kobject_actions[action], buf, count) != 0)
+		if (strncmp(kobject_actions[action], buf, count_first) != 0)
 			continue;
-		if (kobject_actions[action][count] != '\0')
+		if (kobject_actions[action][count_first] != '\0')
 			continue;
+		if (args)
+			*args = args_start;
 		*type = action;
 		ret = 0;
 		break;
@@ -86,6 +91,142 @@ out:
 	return ret;
 }
 
+static const char *action_arg_word_end(const char *buf, const char *buf_end,
+				       char delim)
+{
+	const char *next = buf;
+
+	while (next <= buf_end && *next != delim)
+		if (!isalnum(*next++))
+			return NULL;
+
+	if (next == buf)
+		return NULL;
+
+	return next;
+}
+
+static int kobject_action_args(const char *buf, size_t count,
+			       struct kobj_uevent_env **ret_env)
+{
+	struct kobj_uevent_env *env = NULL;
+	const char *next, *buf_end, *key;
+	int key_len;
+	int r = -EINVAL;
+
+	if (count && (buf[count - 1] == '\n' || buf[count - 1] == '\0'))
+		count--;
+
+	if (!count)
+		return -EINVAL;
+
+	env = kzalloc(sizeof(*env), GFP_KERNEL);
+	if (!env)
+		return -ENOMEM;
+
+	/* first arg is UUID */
+	if (count < UUID_STRING_LEN || !uuid_is_valid(buf) ||
+	    add_uevent_var(env, "SYNTH_UUID=%.*s", UUID_STRING_LEN, buf))
+		goto out;
+
+	/*
+	 * the rest are custom environment variables in KEY=VALUE
+	 * format with ' ' delimiter between each KEY=VALUE pair
+	 */
+	next = buf + UUID_STRING_LEN;
+	buf_end = buf + count - 1;
+
+	while (next <= buf_end) {
+		if (*next != ' ')
+			goto out;
+
+		/* skip the ' ', key must follow */
+		key = ++next;
+		if (key > buf_end)
+			goto out;
+
+		buf = next;
+		next = action_arg_word_end(buf, buf_end, '=');
+		if (!next || next > buf_end || *next != '=')
+			goto out;
+		key_len = next - buf;
+
+		/* skip the '=', value must follow */
+		if (++next > buf_end)
+			goto out;
+
+		buf = next;
+		next = action_arg_word_end(buf, buf_end, ' ');
+		if (!next)
+			goto out;
+
+		if (add_uevent_var(env, "SYNTH_ARG_%.*s=%.*s",
+				   key_len, key, (int) (next - buf), buf))
+			goto out;
+	}
+
+	r = 0;
+out:
+	if (r)
+		kfree(env);
+	else
+		*ret_env = env;
+	return r;
+}
+
+/**
+ * kobject_synth_uevent - send synthetic uevent with arguments
+ *
+ * @kobj: struct kobject for which synthetic uevent is to be generated
+ * @buf: buffer containing action type and action args, newline is ignored
+ * @count: length of buffer
+ *
+ * Returns 0 if kobject_synthetic_uevent() is completed with success or the
+ * corresponding error when it fails.
+ */
+int kobject_synth_uevent(struct kobject *kobj, const char *buf, size_t count)
+{
+	char *no_uuid_envp[] = { "SYNTH_UUID=0", NULL };
+	enum kobject_action action;
+	const char *action_args;
+	struct kobj_uevent_env *env;
+	const char *msg = NULL, *devpath;
+	int r;
+
+	r = kobject_action_type(buf, count, &action, &action_args);
+	if (r) {
+		msg = "unknown uevent action string\n";
+		goto out;
+	}
+
+	if (!action_args) {
+		r = kobject_uevent_env(kobj, action, no_uuid_envp);
+		goto out;
+	}
+
+	r = kobject_action_args(action_args,
+				count - (action_args - buf), &env);
+	if (r == -EINVAL) {
+		msg = "incorrect uevent action arguments\n";
+		goto out;
+	}
+
+	if (r)
+		goto out;
+
+	r = kobject_uevent_env(kobj, action, env->envp);
+	kfree(env);
+out:
+	if (r) {
+		devpath = kobject_get_path(kobj, GFP_KERNEL);
+		printk(KERN_WARNING "synth uevent: %s: %s",
+		       devpath ?: "unknown device",
+		       msg ?: "failed to send uevent");
+		kfree(devpath);
+	}
+	return r;
+}
+
 #ifdef CONFIG_NET
 static int kobj_bcast_filter(struct sock *dsk, struct sk_buff *skb, void *data)
 {
-- 
cgit v1.2.3


From 89cf2a20c3f13dbb4c15a0c6d2e390e700992173 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <nick.desaulniers@gmail.com>
Date: Sun, 21 May 2017 01:58:07 -0700
Subject: sysfs: remove signedness from sysfs_get_dirent

sysfs_get_dirent is usually invoked with a string literal, which
have the type char[].  While the toplevel Makefile
disables -Wpointer-sign, other Makefiles like

arch/x86/boot/compressed/Makefile

redefine KBUILD_CFLAGS. Fixes the warning:

In file included from arch/x86/boot/compressed/kaslr.c:17:
In file included from ./include/linux/module.h:17:
In file included from ./include/linux/kobject.h:21:
./include/linux/sysfs.h:517:37: warning: passing 'const unsigned char *'
to parameter of
      type 'const char *' converts between pointers to integer types
with different sign
      [-Wpointer-sign]
        return kernfs_find_and_get(parent, name);
                                           ^~~~
./include/linux/kernfs.h:462:57: note: passing argument to parameter
'name' here
kernfs_find_and_get(struct kernfs_node *kn, const char *name)
                                                        ^

Signed-off-by: Nick Desaulniers <nick.desaulniers@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/sysfs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index c6f0f0d0e17e..aa02c328dff5 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -512,7 +512,7 @@ static inline void sysfs_notify_dirent(struct kernfs_node *kn)
 }
 
 static inline struct kernfs_node *sysfs_get_dirent(struct kernfs_node *parent,
-						   const unsigned char *name)
+						   const char *name)
 {
 	return kernfs_find_and_get(parent, name);
 }
-- 
cgit v1.2.3


From 614d0d77b49a9b131e58b77473698ab5b2c525b7 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 25 May 2017 01:05:09 +0200
Subject: bpf: add various verifier test cases

This patch adds various verifier test cases:

1) A test case for the pruning issue when tracking alignment
   is used.
2) Various PTR_TO_MAP_VALUE_OR_NULL tests to make sure pointer
   arithmetic turns such register into UNKNOWN_VALUE type.
3) Test cases for the special treatment of LD_ABS/LD_IND to
   make sure verifier doesn't break calling convention here.
   Latter is needed, since f.e. arm64 JIT uses r1 - r5 for
   storing temporary data, so they really must be marked as
   NOT_INIT.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h                      |  10 ++
 tools/include/linux/filter.h                |  10 ++
 tools/testing/selftests/bpf/test_verifier.c | 239 +++++++++++++++++++++++++++-
 3 files changed, 255 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 56197f82af45..62d948f80730 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -272,6 +272,16 @@ struct bpf_prog_aux;
 		.off   = OFF,					\
 		.imm   = IMM })
 
+/* Unconditional jumps, goto pc + off16 */
+
+#define BPF_JMP_A(OFF)						\
+	((struct bpf_insn) {					\
+		.code  = BPF_JMP | BPF_JA,			\
+		.dst_reg = 0,					\
+		.src_reg = 0,					\
+		.off   = OFF,					\
+		.imm   = 0 })
+
 /* Function call */
 
 #define BPF_EMIT_CALL(FUNC)					\
diff --git a/tools/include/linux/filter.h b/tools/include/linux/filter.h
index 390d7c9685fd..4ce25d43e8e3 100644
--- a/tools/include/linux/filter.h
+++ b/tools/include/linux/filter.h
@@ -208,6 +208,16 @@
 		.off   = OFF,					\
 		.imm   = IMM })
 
+/* Unconditional jumps, goto pc + off16 */
+
+#define BPF_JMP_A(OFF)						\
+	((struct bpf_insn) {					\
+		.code  = BPF_JMP | BPF_JA,			\
+		.dst_reg = 0,					\
+		.src_reg = 0,					\
+		.off   = OFF,					\
+		.imm   = 0 })
+
 /* Function call */
 
 #define BPF_EMIT_CALL(FUNC)					\
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 3773562056da..cabb19b1e371 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -49,6 +49,7 @@
 #define MAX_NR_MAPS	4
 
 #define F_NEEDS_EFFICIENT_UNALIGNED_ACCESS	(1 << 0)
+#define F_LOAD_WITH_STRICT_ALIGNMENT		(1 << 1)
 
 struct bpf_test {
 	const char *descr;
@@ -2614,6 +2615,30 @@ static struct bpf_test tests[] = {
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
+	{
+		"direct packet access: test17 (pruning, alignment)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 14),
+			BPF_JMP_IMM(BPF_JGT, BPF_REG_7, 1, 4),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+			BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, -4),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
+			BPF_JMP_A(-6),
+		},
+		.errstr = "misaligned packet access off 2+15+-4 size 4",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
+	},
 	{
 		"helper access to packet: test1, valid packet_ptr range",
 		.insns = {
@@ -3340,6 +3365,70 @@ static struct bpf_test tests[] = {
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS
 	},
+	{
+		"alu ops on ptr_to_map_value_or_null, 1",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_1, 10),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 2),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 4 },
+		.errstr = "R4 invalid mem access",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS
+	},
+	{
+		"alu ops on ptr_to_map_value_or_null, 2",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_1, 10),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_4, -1),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 4 },
+		.errstr = "R4 invalid mem access",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS
+	},
+	{
+		"alu ops on ptr_to_map_value_or_null, 3",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_1, 10),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_4, 1),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 4 },
+		.errstr = "R4 invalid mem access",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS
+	},
 	{
 		"invalid memory access with multiple map_lookup_elem calls",
 		.insns = {
@@ -4937,7 +5026,149 @@ static struct bpf_test tests[] = {
 		.fixup_map_in_map = { 3 },
 		.errstr = "R1 type=map_value_or_null expected=map_ptr",
 		.result = REJECT,
-	}
+	},
+	{
+		"ld_abs: check calling conv, r1",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_1, 0),
+			BPF_LD_ABS(BPF_W, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R1 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_abs: check calling conv, r2",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_LD_ABS(BPF_W, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R2 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_abs: check calling conv, r3",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_LD_ABS(BPF_W, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_3),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R3 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_abs: check calling conv, r4",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_LD_ABS(BPF_W, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_4),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R4 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_abs: check calling conv, r5",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_5, 0),
+			BPF_LD_ABS(BPF_W, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R5 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_abs: check calling conv, r7",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_7, 0),
+			BPF_LD_ABS(BPF_W, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+	},
+	{
+		"ld_ind: check calling conv, r1",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_1, 1),
+			BPF_LD_IND(BPF_W, BPF_REG_1, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R1 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_ind: check calling conv, r2",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_2, 1),
+			BPF_LD_IND(BPF_W, BPF_REG_2, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R2 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_ind: check calling conv, r3",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_3, 1),
+			BPF_LD_IND(BPF_W, BPF_REG_3, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_3),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R3 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_ind: check calling conv, r4",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_4, 1),
+			BPF_LD_IND(BPF_W, BPF_REG_4, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_4),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R4 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_ind: check calling conv, r5",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_5, 1),
+			BPF_LD_IND(BPF_W, BPF_REG_5, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R5 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_ind: check calling conv, r7",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_7, 1),
+			BPF_LD_IND(BPF_W, BPF_REG_7, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+	},
 };
 
 static int probe_filter_length(const struct bpf_insn *fp)
@@ -5059,9 +5290,9 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 
 	do_test_fixup(test, prog, map_fds);
 
-	fd_prog = bpf_load_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER,
-				   prog, prog_len, "GPL", 0, bpf_vlog,
-				   sizeof(bpf_vlog));
+	fd_prog = bpf_verify_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER,
+				     prog, prog_len, test->flags & F_LOAD_WITH_STRICT_ALIGNMENT,
+				     "GPL", 0, bpf_vlog, sizeof(bpf_vlog));
 
 	expected_ret = unpriv && test->result_unpriv != UNDEF ?
 		       test->result_unpriv : test->result;
-- 
cgit v1.2.3


From 8f553c498e1772cccb39a114da4a498d22992758 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 24 May 2017 10:15:12 +0200
Subject: cpu/hotplug: Provide cpus_read|write_[un]lock()

The counting 'rwsem' hackery of get|put_online_cpus() is going to be
replaced by percpu rwsem.

Rename the functions to make it clear that it's locking and not some
refcount style interface. These new functions will be used for the
preparatory patches which make the code ready for the percpu rwsem
conversion.

Rename all instances in the cpu hotplug code while at it.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sebastian Siewior <bigeasy@linutronix.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/20170524081547.080397752@linutronix.de
---
 include/linux/cpu.h | 34 +++++++++++++++++++---------------
 kernel/cpu.c        | 36 ++++++++++++++++++------------------
 2 files changed, 37 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index f92081234afd..055876003914 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -99,26 +99,30 @@ static inline void cpu_maps_update_done(void)
 extern struct bus_type cpu_subsys;
 
 #ifdef CONFIG_HOTPLUG_CPU
-/* Stop CPUs going up and down. */
-
-extern void cpu_hotplug_begin(void);
-extern void cpu_hotplug_done(void);
-extern void get_online_cpus(void);
-extern void put_online_cpus(void);
+extern void cpus_write_lock(void);
+extern void cpus_write_unlock(void);
+extern void cpus_read_lock(void);
+extern void cpus_read_unlock(void);
 extern void cpu_hotplug_disable(void);
 extern void cpu_hotplug_enable(void);
 void clear_tasks_mm_cpumask(int cpu);
 int cpu_down(unsigned int cpu);
 
-#else		/* CONFIG_HOTPLUG_CPU */
-
-static inline void cpu_hotplug_begin(void) {}
-static inline void cpu_hotplug_done(void) {}
-#define get_online_cpus()	do { } while (0)
-#define put_online_cpus()	do { } while (0)
-#define cpu_hotplug_disable()	do { } while (0)
-#define cpu_hotplug_enable()	do { } while (0)
-#endif		/* CONFIG_HOTPLUG_CPU */
+#else /* CONFIG_HOTPLUG_CPU */
+
+static inline void cpus_write_lock(void) { }
+static inline void cpus_write_unlock(void) { }
+static inline void cpus_read_lock(void) { }
+static inline void cpus_read_unlock(void) { }
+static inline void cpu_hotplug_disable(void) { }
+static inline void cpu_hotplug_enable(void) { }
+#endif	/* !CONFIG_HOTPLUG_CPU */
+
+/* Wrappers which go away once all code is converted */
+static inline void cpu_hotplug_begin(void) { cpus_write_lock(); }
+static inline void cpu_hotplug_done(void) { cpus_write_unlock(); }
+static inline void get_online_cpus(void) { cpus_read_lock(); }
+static inline void put_online_cpus(void) { cpus_read_unlock(); }
 
 #ifdef CONFIG_PM_SLEEP_SMP
 extern int freeze_secondary_cpus(int primary);
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 9ae6fbe5b5cf..d3221ae5b474 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -235,7 +235,7 @@ static struct {
 #define cpuhp_lock_release()      lock_map_release(&cpu_hotplug.dep_map)
 
 
-void get_online_cpus(void)
+void cpus_read_lock(void)
 {
 	might_sleep();
 	if (cpu_hotplug.active_writer == current)
@@ -245,9 +245,9 @@ void get_online_cpus(void)
 	atomic_inc(&cpu_hotplug.refcount);
 	mutex_unlock(&cpu_hotplug.lock);
 }
-EXPORT_SYMBOL_GPL(get_online_cpus);
+EXPORT_SYMBOL_GPL(cpus_read_lock);
 
-void put_online_cpus(void)
+void cpus_read_unlock(void)
 {
 	int refcount;
 
@@ -264,7 +264,7 @@ void put_online_cpus(void)
 	cpuhp_lock_release();
 
 }
-EXPORT_SYMBOL_GPL(put_online_cpus);
+EXPORT_SYMBOL_GPL(cpus_read_unlock);
 
 /*
  * This ensures that the hotplug operation can begin only when the
@@ -288,7 +288,7 @@ EXPORT_SYMBOL_GPL(put_online_cpus);
  * get_online_cpus() not an api which is called all that often.
  *
  */
-void cpu_hotplug_begin(void)
+void cpus_write_lock(void)
 {
 	DEFINE_WAIT(wait);
 
@@ -306,7 +306,7 @@ void cpu_hotplug_begin(void)
 	finish_wait(&cpu_hotplug.wq, &wait);
 }
 
-void cpu_hotplug_done(void)
+void cpus_write_unlock(void)
 {
 	cpu_hotplug.active_writer = NULL;
 	mutex_unlock(&cpu_hotplug.lock);
@@ -773,7 +773,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
 	if (!cpu_present(cpu))
 		return -EINVAL;
 
-	cpu_hotplug_begin();
+	cpus_write_lock();
 
 	cpuhp_tasks_frozen = tasks_frozen;
 
@@ -811,7 +811,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
 	}
 
 out:
-	cpu_hotplug_done();
+	cpus_write_unlock();
 	return ret;
 }
 
@@ -893,7 +893,7 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
 	struct task_struct *idle;
 	int ret = 0;
 
-	cpu_hotplug_begin();
+	cpus_write_lock();
 
 	if (!cpu_present(cpu)) {
 		ret = -EINVAL;
@@ -941,7 +941,7 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
 	target = min((int)target, CPUHP_BRINGUP_CPU);
 	ret = cpuhp_up_callbacks(cpu, st, target);
 out:
-	cpu_hotplug_done();
+	cpus_write_unlock();
 	return ret;
 }
 
@@ -1424,7 +1424,7 @@ int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
 	if (sp->multi_instance == false)
 		return -EINVAL;
 
-	get_online_cpus();
+	cpus_read_lock();
 	mutex_lock(&cpuhp_state_mutex);
 
 	if (!invoke || !sp->startup.multi)
@@ -1453,7 +1453,7 @@ add_node:
 	hlist_add_head(node, &sp->list);
 unlock:
 	mutex_unlock(&cpuhp_state_mutex);
-	put_online_cpus();
+	cpus_read_unlock();
 	return ret;
 }
 EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance);
@@ -1486,7 +1486,7 @@ int __cpuhp_setup_state(enum cpuhp_state state,
 	if (cpuhp_cb_check(state) || !name)
 		return -EINVAL;
 
-	get_online_cpus();
+	cpus_read_lock();
 	mutex_lock(&cpuhp_state_mutex);
 
 	ret = cpuhp_store_callbacks(state, name, startup, teardown,
@@ -1522,7 +1522,7 @@ int __cpuhp_setup_state(enum cpuhp_state state,
 	}
 out:
 	mutex_unlock(&cpuhp_state_mutex);
-	put_online_cpus();
+	cpus_read_unlock();
 	/*
 	 * If the requested state is CPUHP_AP_ONLINE_DYN, return the
 	 * dynamically allocated state in case of success.
@@ -1544,7 +1544,7 @@ int __cpuhp_state_remove_instance(enum cpuhp_state state,
 	if (!sp->multi_instance)
 		return -EINVAL;
 
-	get_online_cpus();
+	cpus_read_lock();
 	mutex_lock(&cpuhp_state_mutex);
 
 	if (!invoke || !cpuhp_get_teardown_cb(state))
@@ -1565,7 +1565,7 @@ int __cpuhp_state_remove_instance(enum cpuhp_state state,
 remove:
 	hlist_del(node);
 	mutex_unlock(&cpuhp_state_mutex);
-	put_online_cpus();
+	cpus_read_unlock();
 
 	return 0;
 }
@@ -1587,7 +1587,7 @@ void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
 
 	BUG_ON(cpuhp_cb_check(state));
 
-	get_online_cpus();
+	cpus_read_lock();
 
 	mutex_lock(&cpuhp_state_mutex);
 	if (sp->multi_instance) {
@@ -1615,7 +1615,7 @@ void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
 remove:
 	cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
 	mutex_unlock(&cpuhp_state_mutex);
-	put_online_cpus();
+	cpus_read_unlock();
 }
 EXPORT_SYMBOL(__cpuhp_remove_state);
 
-- 
cgit v1.2.3


From ade3f680a76b474d9f5375a9b1d100ee787bf469 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 24 May 2017 10:15:13 +0200
Subject: cpu/hotplug: Provide lockdep_assert_cpus_held()

Provide a stub function which can be used in places where existing
get_online_cpus() calls are moved to call sites.

This stub is going to be filled by the final conversion of the hotplug
locking mechanism to a percpu rwsem.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sebastian Siewior <bigeasy@linutronix.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/20170524081547.161282442@linutronix.de
---
 include/linux/cpu.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 055876003914..af4d660798e5 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -103,6 +103,7 @@ extern void cpus_write_lock(void);
 extern void cpus_write_unlock(void);
 extern void cpus_read_lock(void);
 extern void cpus_read_unlock(void);
+static inline void lockdep_assert_cpus_held(void) { }
 extern void cpu_hotplug_disable(void);
 extern void cpu_hotplug_enable(void);
 void clear_tasks_mm_cpumask(int cpu);
@@ -114,6 +115,7 @@ static inline void cpus_write_lock(void) { }
 static inline void cpus_write_unlock(void) { }
 static inline void cpus_read_lock(void) { }
 static inline void cpus_read_unlock(void) { }
+static inline void lockdep_assert_cpus_held(void) { }
 static inline void cpu_hotplug_disable(void) { }
 static inline void cpu_hotplug_enable(void) { }
 #endif	/* !CONFIG_HOTPLUG_CPU */
-- 
cgit v1.2.3


From 71def423fe3da0d40ad3427a4cd5f9edc53bff67 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 24 May 2017 10:15:14 +0200
Subject: cpu/hotplug: Provide cpuhp_setup/remove_state[_nocalls]_cpuslocked()

Some call sites of cpuhp_setup/remove_state[_nocalls]() are within a
cpus_read locked region.

cpuhp_setup/remove_state[_nocalls]() call cpus_read_lock() as well, which
is possible in the current implementation but prevents converting the
hotplug locking to a percpu rwsem.

Provide locked versions of the interfaces to avoid nested calls to
cpus_read_lock().

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/20170524081547.239600868@linutronix.de
---
 include/linux/cpuhotplug.h | 29 ++++++++++++++++++++++++++++
 kernel/cpu.c               | 47 +++++++++++++++++++++++++++++++++++-----------
 2 files changed, 65 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 0f2a80377520..4fac564dde70 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -153,6 +153,11 @@ int __cpuhp_setup_state(enum cpuhp_state state,	const char *name, bool invoke,
 			int (*startup)(unsigned int cpu),
 			int (*teardown)(unsigned int cpu), bool multi_instance);
 
+int __cpuhp_setup_state_cpuslocked(enum cpuhp_state state, const char *name,
+				   bool invoke,
+				   int (*startup)(unsigned int cpu),
+				   int (*teardown)(unsigned int cpu),
+				   bool multi_instance);
 /**
  * cpuhp_setup_state - Setup hotplug state callbacks with calling the callbacks
  * @state:	The state for which the calls are installed
@@ -171,6 +176,15 @@ static inline int cpuhp_setup_state(enum cpuhp_state state,
 	return __cpuhp_setup_state(state, name, true, startup, teardown, false);
 }
 
+static inline int cpuhp_setup_state_cpuslocked(enum cpuhp_state state,
+					       const char *name,
+					       int (*startup)(unsigned int cpu),
+					       int (*teardown)(unsigned int cpu))
+{
+	return __cpuhp_setup_state_cpuslocked(state, name, true, startup,
+					      teardown, false);
+}
+
 /**
  * cpuhp_setup_state_nocalls - Setup hotplug state callbacks without calling the
  *			       callbacks
@@ -191,6 +205,15 @@ static inline int cpuhp_setup_state_nocalls(enum cpuhp_state state,
 				   false);
 }
 
+static inline int cpuhp_setup_state_nocalls_cpuslocked(enum cpuhp_state state,
+						     const char *name,
+						     int (*startup)(unsigned int cpu),
+						     int (*teardown)(unsigned int cpu))
+{
+	return __cpuhp_setup_state_cpuslocked(state, name, false, startup,
+					    teardown, false);
+}
+
 /**
  * cpuhp_setup_state_multi - Add callbacks for multi state
  * @state:	The state for which the calls are installed
@@ -250,6 +273,7 @@ static inline int cpuhp_state_add_instance_nocalls(enum cpuhp_state state,
 }
 
 void __cpuhp_remove_state(enum cpuhp_state state, bool invoke);
+void __cpuhp_remove_state_cpuslocked(enum cpuhp_state state, bool invoke);
 
 /**
  * cpuhp_remove_state - Remove hotplug state callbacks and invoke the teardown
@@ -273,6 +297,11 @@ static inline void cpuhp_remove_state_nocalls(enum cpuhp_state state)
 	__cpuhp_remove_state(state, false);
 }
 
+static inline void cpuhp_remove_state_nocalls_cpuslocked(enum cpuhp_state state)
+{
+	__cpuhp_remove_state_cpuslocked(state, false);
+}
+
 /**
  * cpuhp_remove_multi_state - Remove hotplug multi state callback
  * @state:	The state for which the calls are removed
diff --git a/kernel/cpu.c b/kernel/cpu.c
index d3221ae5b474..dc27c5a28153 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -1459,7 +1459,7 @@ unlock:
 EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance);
 
 /**
- * __cpuhp_setup_state - Setup the callbacks for an hotplug machine state
+ * __cpuhp_setup_state_cpuslocked - Setup the callbacks for an hotplug machine state
  * @state:		The state to setup
  * @invoke:		If true, the startup function is invoked for cpus where
  *			cpu state >= @state
@@ -1468,25 +1468,27 @@ EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance);
  * @multi_instance:	State is set up for multiple instances which get
  *			added afterwards.
  *
+ * The caller needs to hold cpus read locked while calling this function.
  * Returns:
  *   On success:
  *      Positive state number if @state is CPUHP_AP_ONLINE_DYN
  *      0 for all other states
  *   On failure: proper (negative) error code
  */
-int __cpuhp_setup_state(enum cpuhp_state state,
-			const char *name, bool invoke,
-			int (*startup)(unsigned int cpu),
-			int (*teardown)(unsigned int cpu),
-			bool multi_instance)
+int __cpuhp_setup_state_cpuslocked(enum cpuhp_state state,
+				   const char *name, bool invoke,
+				   int (*startup)(unsigned int cpu),
+				   int (*teardown)(unsigned int cpu),
+				   bool multi_instance)
 {
 	int cpu, ret = 0;
 	bool dynstate;
 
+	lockdep_assert_cpus_held();
+
 	if (cpuhp_cb_check(state) || !name)
 		return -EINVAL;
 
-	cpus_read_lock();
 	mutex_lock(&cpuhp_state_mutex);
 
 	ret = cpuhp_store_callbacks(state, name, startup, teardown,
@@ -1522,7 +1524,6 @@ int __cpuhp_setup_state(enum cpuhp_state state,
 	}
 out:
 	mutex_unlock(&cpuhp_state_mutex);
-	cpus_read_unlock();
 	/*
 	 * If the requested state is CPUHP_AP_ONLINE_DYN, return the
 	 * dynamically allocated state in case of success.
@@ -1531,6 +1532,22 @@ out:
 		return state;
 	return ret;
 }
+EXPORT_SYMBOL(__cpuhp_setup_state_cpuslocked);
+
+int __cpuhp_setup_state(enum cpuhp_state state,
+			const char *name, bool invoke,
+			int (*startup)(unsigned int cpu),
+			int (*teardown)(unsigned int cpu),
+			bool multi_instance)
+{
+	int ret;
+
+	cpus_read_lock();
+	ret = __cpuhp_setup_state_cpuslocked(state, name, invoke, startup,
+					     teardown, multi_instance);
+	cpus_read_unlock();
+	return ret;
+}
 EXPORT_SYMBOL(__cpuhp_setup_state);
 
 int __cpuhp_state_remove_instance(enum cpuhp_state state,
@@ -1572,22 +1589,23 @@ remove:
 EXPORT_SYMBOL_GPL(__cpuhp_state_remove_instance);
 
 /**
- * __cpuhp_remove_state - Remove the callbacks for an hotplug machine state
+ * __cpuhp_remove_state_cpuslocked - Remove the callbacks for an hotplug machine state
  * @state:	The state to remove
  * @invoke:	If true, the teardown function is invoked for cpus where
  *		cpu state >= @state
  *
+ * The caller needs to hold cpus read locked while calling this function.
  * The teardown callback is currently not allowed to fail. Think
  * about module removal!
  */
-void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
+void __cpuhp_remove_state_cpuslocked(enum cpuhp_state state, bool invoke)
 {
 	struct cpuhp_step *sp = cpuhp_get_step(state);
 	int cpu;
 
 	BUG_ON(cpuhp_cb_check(state));
 
-	cpus_read_lock();
+	lockdep_assert_cpus_held();
 
 	mutex_lock(&cpuhp_state_mutex);
 	if (sp->multi_instance) {
@@ -1615,6 +1633,13 @@ void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
 remove:
 	cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
 	mutex_unlock(&cpuhp_state_mutex);
+}
+EXPORT_SYMBOL(__cpuhp_remove_state_cpuslocked);
+
+void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
+{
+	cpus_read_lock();
+	__cpuhp_remove_state_cpuslocked(state, invoke);
 	cpus_read_unlock();
 }
 EXPORT_SYMBOL(__cpuhp_remove_state);
-- 
cgit v1.2.3


From 9805c6733349ea3ccd22cf75b8ebaabb5290e310 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 24 May 2017 10:15:15 +0200
Subject: cpu/hotplug: Add __cpuhp_state_add_instance_cpuslocked()

Add cpuslocked() variants for the multi instance registration so this can
be called from a cpus_read_lock() protected region.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sebastian Siewior <bigeasy@linutronix.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/20170524081547.321782217@linutronix.de
---
 include/linux/cpuhotplug.h |  9 +++++++++
 kernel/cpu.c               | 18 +++++++++++++++---
 2 files changed, 24 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 4fac564dde70..df3d2719a796 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -240,6 +240,8 @@ static inline int cpuhp_setup_state_multi(enum cpuhp_state state,
 
 int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
 			       bool invoke);
+int __cpuhp_state_add_instance_cpuslocked(enum cpuhp_state state,
+					  struct hlist_node *node, bool invoke);
 
 /**
  * cpuhp_state_add_instance - Add an instance for a state and invoke startup
@@ -272,6 +274,13 @@ static inline int cpuhp_state_add_instance_nocalls(enum cpuhp_state state,
 	return __cpuhp_state_add_instance(state, node, false);
 }
 
+static inline int
+cpuhp_state_add_instance_nocalls_cpuslocked(enum cpuhp_state state,
+					    struct hlist_node *node)
+{
+	return __cpuhp_state_add_instance_cpuslocked(state, node, false);
+}
+
 void __cpuhp_remove_state(enum cpuhp_state state, bool invoke);
 void __cpuhp_remove_state_cpuslocked(enum cpuhp_state state, bool invoke);
 
diff --git a/kernel/cpu.c b/kernel/cpu.c
index dc27c5a28153..e4389ac55b65 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -1413,18 +1413,20 @@ static void cpuhp_rollback_install(int failedcpu, enum cpuhp_state state,
 	}
 }
 
-int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
-			       bool invoke)
+int __cpuhp_state_add_instance_cpuslocked(enum cpuhp_state state,
+					  struct hlist_node *node,
+					  bool invoke)
 {
 	struct cpuhp_step *sp;
 	int cpu;
 	int ret;
 
+	lockdep_assert_cpus_held();
+
 	sp = cpuhp_get_step(state);
 	if (sp->multi_instance == false)
 		return -EINVAL;
 
-	cpus_read_lock();
 	mutex_lock(&cpuhp_state_mutex);
 
 	if (!invoke || !sp->startup.multi)
@@ -1453,6 +1455,16 @@ add_node:
 	hlist_add_head(node, &sp->list);
 unlock:
 	mutex_unlock(&cpuhp_state_mutex);
+	return ret;
+}
+
+int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
+			       bool invoke)
+{
+	int ret;
+
+	cpus_read_lock();
+	ret = __cpuhp_state_add_instance_cpuslocked(state, node, invoke);
 	cpus_read_unlock();
 	return ret;
 }
-- 
cgit v1.2.3


From fe5595c074005bd94f0c7d1644175941149f6768 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 24 May 2017 10:15:16 +0200
Subject: stop_machine: Provide stop_machine_cpuslocked()

Some call sites of stop_machine() are within a get_online_cpus() protected
region.

stop_machine() calls get_online_cpus() as well, which is possible in the
current implementation but prevents converting the hotplug locking to a
percpu rwsem.

Provide stop_machine_cpuslocked() to avoid nested calls to get_online_cpus().

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/20170524081547.400700852@linutronix.de
---
 include/linux/stop_machine.h | 26 +++++++++++++++++++++++---
 kernel/stop_machine.c        | 11 +++++++----
 2 files changed, 30 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
index 3cc9632dcc2a..3d60275e3ba9 100644
--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -116,15 +116,29 @@ static inline int try_stop_cpus(const struct cpumask *cpumask,
  * @fn() runs.
  *
  * This can be thought of as a very heavy write lock, equivalent to
- * grabbing every spinlock in the kernel. */
+ * grabbing every spinlock in the kernel.
+ *
+ * Protects against CPU hotplug.
+ */
 int stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus);
 
+/**
+ * stop_machine_cpuslocked: freeze the machine on all CPUs and run this function
+ * @fn: the function to run
+ * @data: the data ptr for the @fn()
+ * @cpus: the cpus to run the @fn() on (NULL = any online cpu)
+ *
+ * Same as above. Must be called from with in a cpus_read_lock() protected
+ * region. Avoids nested calls to cpus_read_lock().
+ */
+int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus);
+
 int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
 				   const struct cpumask *cpus);
 #else	/* CONFIG_SMP || CONFIG_HOTPLUG_CPU */
 
-static inline int stop_machine(cpu_stop_fn_t fn, void *data,
-				 const struct cpumask *cpus)
+static inline int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data,
+					  const struct cpumask *cpus)
 {
 	unsigned long flags;
 	int ret;
@@ -134,6 +148,12 @@ static inline int stop_machine(cpu_stop_fn_t fn, void *data,
 	return ret;
 }
 
+static inline int stop_machine(cpu_stop_fn_t fn, void *data,
+			       const struct cpumask *cpus)
+{
+	return stop_machine_cpuslocked(fn, data, cpus);
+}
+
 static inline int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
 						 const struct cpumask *cpus)
 {
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 1eb82661ecdb..b7591261652d 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -552,7 +552,8 @@ static int __init cpu_stop_init(void)
 }
 early_initcall(cpu_stop_init);
 
-static int __stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus)
+int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data,
+			    const struct cpumask *cpus)
 {
 	struct multi_stop_data msdata = {
 		.fn = fn,
@@ -561,6 +562,8 @@ static int __stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cp
 		.active_cpus = cpus,
 	};
 
+	lockdep_assert_cpus_held();
+
 	if (!stop_machine_initialized) {
 		/*
 		 * Handle the case where stop_machine() is called
@@ -590,9 +593,9 @@ int stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus)
 	int ret;
 
 	/* No CPUs can come up or down during this. */
-	get_online_cpus();
-	ret = __stop_machine(fn, data, cpus);
-	put_online_cpus();
+	cpus_read_lock();
+	ret = stop_machine_cpuslocked(fn, data, cpus);
+	cpus_read_unlock();
 	return ret;
 }
 EXPORT_SYMBOL_GPL(stop_machine);
-- 
cgit v1.2.3


From 9596695ee1e7eedd743c43811fe68299eb005b5c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 24 May 2017 10:15:17 +0200
Subject: padata: Make padata_alloc() static

No users outside of padata.c

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: Steffen Klassert <steffen.klassert@secunet.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sebastian Siewior <bigeasy@linutronix.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: linux-crypto@vger.kernel.org
Link: http://lkml.kernel.org/r/20170524081547.491457256@linutronix.de
---
 include/linux/padata.h |  3 ---
 kernel/padata.c        | 32 ++++++++++++++++----------------
 2 files changed, 16 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/padata.h b/include/linux/padata.h
index 0f9e567d5e15..2f9c1f93b1ce 100644
--- a/include/linux/padata.h
+++ b/include/linux/padata.h
@@ -166,9 +166,6 @@ struct padata_instance {
 
 extern struct padata_instance *padata_alloc_possible(
 					struct workqueue_struct *wq);
-extern struct padata_instance *padata_alloc(struct workqueue_struct *wq,
-					    const struct cpumask *pcpumask,
-					    const struct cpumask *cbcpumask);
 extern void padata_free(struct padata_instance *pinst);
 extern int padata_do_parallel(struct padata_instance *pinst,
 			      struct padata_priv *padata, int cb_cpu);
diff --git a/kernel/padata.c b/kernel/padata.c
index ac8f1e524836..0c708f648853 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -933,19 +933,6 @@ static struct kobj_type padata_attr_type = {
 	.release = padata_sysfs_release,
 };
 
-/**
- * padata_alloc_possible - Allocate and initialize padata instance.
- *                         Use the cpu_possible_mask for serial and
- *                         parallel workers.
- *
- * @wq: workqueue to use for the allocated padata instance
- */
-struct padata_instance *padata_alloc_possible(struct workqueue_struct *wq)
-{
-	return padata_alloc(wq, cpu_possible_mask, cpu_possible_mask);
-}
-EXPORT_SYMBOL(padata_alloc_possible);
-
 /**
  * padata_alloc - allocate and initialize a padata instance and specify
  *                cpumasks for serial and parallel workers.
@@ -954,9 +941,9 @@ EXPORT_SYMBOL(padata_alloc_possible);
  * @pcpumask: cpumask that will be used for padata parallelization
  * @cbcpumask: cpumask that will be used for padata serialization
  */
-struct padata_instance *padata_alloc(struct workqueue_struct *wq,
-				     const struct cpumask *pcpumask,
-				     const struct cpumask *cbcpumask)
+static struct padata_instance *padata_alloc(struct workqueue_struct *wq,
+					    const struct cpumask *pcpumask,
+					    const struct cpumask *cbcpumask)
 {
 	struct padata_instance *pinst;
 	struct parallel_data *pd = NULL;
@@ -1010,6 +997,19 @@ err:
 	return NULL;
 }
 
+/**
+ * padata_alloc_possible - Allocate and initialize padata instance.
+ *                         Use the cpu_possible_mask for serial and
+ *                         parallel workers.
+ *
+ * @wq: workqueue to use for the allocated padata instance
+ */
+struct padata_instance *padata_alloc_possible(struct workqueue_struct *wq)
+{
+	return padata_alloc(wq, cpu_possible_mask, cpu_possible_mask);
+}
+EXPORT_SYMBOL(padata_alloc_possible);
+
 /**
  * padata_free - free a padata instance
  *
-- 
cgit v1.2.3


From 0b2c2a71e6f07fb67e6f72817d39910f64d2e258 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 24 May 2017 10:15:32 +0200
Subject: PCI: Replace the racy recursion prevention

pci_call_probe() can called recursively when a physcial function is probed
and the probing creates virtual functions, which are populated via
pci_bus_add_device() which in turn can end up calling pci_call_probe()
again.

The code has an interesting way to prevent recursing into the workqueue
code.  That's accomplished by a check whether the current task runs already
on the numa node which is associated with the device.

While that works to prevent the recursion into the workqueue code, it's
racy versus normal execution as there is no guarantee that the node does
not vanish after the check.

There is another issue with this code. It dereferences cpumask_of_node()
unconditionally without checking whether the node is available.

Make the detection reliable by:

 - Mark a probed device as 'is_probed' in pci_call_probe()

 - Check in pci_call_probe for a virtual function. If it's a virtual
   function and the associated physical function device is marked
   'is_probed' then this is a recursive call, so the call can be invoked in
   the calling context.

 - Add a check whether the node is online before dereferencing it.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: linux-pci@vger.kernel.org
Cc: Sebastian Siewior <bigeasy@linutronix.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/20170524081548.771457199@linutronix.de
---
 drivers/pci/pci-driver.c | 47 +++++++++++++++++++++++++----------------------
 include/linux/pci.h      |  1 +
 2 files changed, 26 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index 5bf92fd983e5..fe6be6382505 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -320,10 +320,19 @@ static long local_pci_probe(void *_ddi)
 	return 0;
 }
 
+static bool pci_physfn_is_probed(struct pci_dev *dev)
+{
+#ifdef CONFIG_PCI_IOV
+	return dev->is_virtfn && dev->physfn->is_probed;
+#else
+	return false;
+#endif
+}
+
 static int pci_call_probe(struct pci_driver *drv, struct pci_dev *dev,
 			  const struct pci_device_id *id)
 {
-	int error, node;
+	int error, node, cpu;
 	struct drv_dev_and_id ddi = { drv, dev, id };
 
 	/*
@@ -332,33 +341,27 @@ static int pci_call_probe(struct pci_driver *drv, struct pci_dev *dev,
 	 * on the right node.
 	 */
 	node = dev_to_node(&dev->dev);
+	dev->is_probed = 1;
+
+	cpu_hotplug_disable();
 
 	/*
-	 * On NUMA systems, we are likely to call a PF probe function using
-	 * work_on_cpu().  If that probe calls pci_enable_sriov() (which
-	 * adds the VF devices via pci_bus_add_device()), we may re-enter
-	 * this function to call the VF probe function.  Calling
-	 * work_on_cpu() again will cause a lockdep warning.  Since VFs are
-	 * always on the same node as the PF, we can work around this by
-	 * avoiding work_on_cpu() when we're already on the correct node.
-	 *
-	 * Preemption is enabled, so it's theoretically unsafe to use
-	 * numa_node_id(), but even if we run the probe function on the
-	 * wrong node, it should be functionally correct.
+	 * Prevent nesting work_on_cpu() for the case where a Virtual Function
+	 * device is probed from work_on_cpu() of the Physical device.
 	 */
-	if (node >= 0 && node != numa_node_id()) {
-		int cpu;
-
-		cpu_hotplug_disable();
+	if (node < 0 || node >= MAX_NUMNODES || !node_online(node) ||
+	    pci_physfn_is_probed(dev))
+		cpu = nr_cpu_ids;
+	else
 		cpu = cpumask_any_and(cpumask_of_node(node), cpu_online_mask);
-		if (cpu < nr_cpu_ids)
-			error = work_on_cpu(cpu, local_pci_probe, &ddi);
-		else
-			error = local_pci_probe(&ddi);
-		cpu_hotplug_enable();
-	} else
+
+	if (cpu < nr_cpu_ids)
+		error = work_on_cpu(cpu, local_pci_probe, &ddi);
+	else
 		error = local_pci_probe(&ddi);
 
+	dev->is_probed = 0;
+	cpu_hotplug_enable();
 	return error;
 }
 
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 33c2b0b77429..5026f2ae86db 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -371,6 +371,7 @@ struct pci_dev {
 	unsigned int	irq_managed:1;
 	unsigned int	has_secondary_link:1;
 	unsigned int	non_compliant_bars:1;	/* broken BARs; ignore them */
+	unsigned int	is_probed:1;		/* device probing in progress */
 	pci_dev_flags_t dev_flags;
 	atomic_t	enable_cnt;	/* pci_enable_device has been called */
 
-- 
cgit v1.2.3


From a63fbed776c7124ce9f606234267c3c095b2680e Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 24 May 2017 10:15:34 +0200
Subject: perf/tracing/cpuhotplug: Fix locking order

perf, tracing, kprobes and jump_labels have a gazillion of ways to create
dependency lock chains. Some of those involve nested invocations of
get_online_cpus().

The conversion of the hotplug locking to a percpu rwsem requires to avoid
such nested calls. sys_perf_event_open() protects most of the syscall logic
against cpu hotplug. This causes nested calls and lock inversions versus
ftrace and kprobes in various interesting ways.

It's impossible to move the hotplug locking to the outer end of all call
chains in the involved facilities, so the hotplug protection in
sys_perf_event_open() needs to be solved differently.

Introduce 'pmus_mutex' which protects a perf private online cpumask. This
mutex is taken when the mask is updated in the cpu hotplug callbacks and
can be taken in sys_perf_event_open() to protect the swhash setup/teardown
code and when the final judgement about a valid event has to be made.

[ tglx: Produced changelog and fixed the swhash interaction ]

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Sebastian Siewior <bigeasy@linutronix.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Link: http://lkml.kernel.org/r/20170524081548.930941109@linutronix.de
---
 include/linux/perf_event.h |   2 +
 kernel/events/core.c       | 106 ++++++++++++++++++++++++++++++++-------------
 2 files changed, 78 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 24a635887f28..7d6aa29094b2 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -801,6 +801,8 @@ struct perf_cpu_context {
 
 	struct list_head		sched_cb_entry;
 	int				sched_cb_usage;
+
+	int				online;
 };
 
 struct perf_output_handle {
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 6e75a5c9412d..b97cda4d1777 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -389,6 +389,7 @@ static atomic_t nr_switch_events __read_mostly;
 static LIST_HEAD(pmus);
 static DEFINE_MUTEX(pmus_lock);
 static struct srcu_struct pmus_srcu;
+static cpumask_var_t perf_online_mask;
 
 /*
  * perf event paranoia level:
@@ -3812,14 +3813,6 @@ find_get_context(struct pmu *pmu, struct task_struct *task,
 		if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
 			return ERR_PTR(-EACCES);
 
-		/*
-		 * We could be clever and allow to attach a event to an
-		 * offline CPU and activate it when the CPU comes up, but
-		 * that's for later.
-		 */
-		if (!cpu_online(cpu))
-			return ERR_PTR(-ENODEV);
-
 		cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
 		ctx = &cpuctx->ctx;
 		get_ctx(ctx);
@@ -7703,7 +7696,8 @@ static int swevent_hlist_get_cpu(int cpu)
 	int err = 0;
 
 	mutex_lock(&swhash->hlist_mutex);
-	if (!swevent_hlist_deref(swhash) && cpu_online(cpu)) {
+	if (!swevent_hlist_deref(swhash) &&
+	    cpumask_test_cpu(cpu, perf_online_mask)) {
 		struct swevent_hlist *hlist;
 
 		hlist = kzalloc(sizeof(*hlist), GFP_KERNEL);
@@ -7724,7 +7718,7 @@ static int swevent_hlist_get(void)
 {
 	int err, cpu, failed_cpu;
 
-	get_online_cpus();
+	mutex_lock(&pmus_lock);
 	for_each_possible_cpu(cpu) {
 		err = swevent_hlist_get_cpu(cpu);
 		if (err) {
@@ -7732,8 +7726,7 @@ static int swevent_hlist_get(void)
 			goto fail;
 		}
 	}
-	put_online_cpus();
-
+	mutex_unlock(&pmus_lock);
 	return 0;
 fail:
 	for_each_possible_cpu(cpu) {
@@ -7741,8 +7734,7 @@ fail:
 			break;
 		swevent_hlist_put_cpu(cpu);
 	}
-
-	put_online_cpus();
+	mutex_unlock(&pmus_lock);
 	return err;
 }
 
@@ -8920,7 +8912,7 @@ perf_event_mux_interval_ms_store(struct device *dev,
 	pmu->hrtimer_interval_ms = timer;
 
 	/* update all cpuctx for this PMU */
-	get_online_cpus();
+	cpus_read_lock();
 	for_each_online_cpu(cpu) {
 		struct perf_cpu_context *cpuctx;
 		cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
@@ -8929,7 +8921,7 @@ perf_event_mux_interval_ms_store(struct device *dev,
 		cpu_function_call(cpu,
 			(remote_function_f)perf_mux_hrtimer_restart, cpuctx);
 	}
-	put_online_cpus();
+	cpus_read_unlock();
 	mutex_unlock(&mux_interval_mutex);
 
 	return count;
@@ -9059,6 +9051,7 @@ skip_type:
 		lockdep_set_class(&cpuctx->ctx.mutex, &cpuctx_mutex);
 		lockdep_set_class(&cpuctx->ctx.lock, &cpuctx_lock);
 		cpuctx->ctx.pmu = pmu;
+		cpuctx->online = cpumask_test_cpu(cpu, perf_online_mask);
 
 		__perf_mux_hrtimer_init(cpuctx, cpu);
 	}
@@ -9882,12 +9875,10 @@ SYSCALL_DEFINE5(perf_event_open,
 		goto err_task;
 	}
 
-	get_online_cpus();
-
 	if (task) {
 		err = mutex_lock_interruptible(&task->signal->cred_guard_mutex);
 		if (err)
-			goto err_cpus;
+			goto err_cred;
 
 		/*
 		 * Reuse ptrace permission checks for now.
@@ -10073,6 +10064,23 @@ SYSCALL_DEFINE5(perf_event_open,
 		goto err_locked;
 	}
 
+	if (!task) {
+		/*
+		 * Check if the @cpu we're creating an event for is online.
+		 *
+		 * We use the perf_cpu_context::ctx::mutex to serialize against
+		 * the hotplug notifiers. See perf_event_{init,exit}_cpu().
+		 */
+		struct perf_cpu_context *cpuctx =
+			container_of(ctx, struct perf_cpu_context, ctx);
+
+		if (!cpuctx->online) {
+			err = -ENODEV;
+			goto err_locked;
+		}
+	}
+
+
 	/*
 	 * Must be under the same ctx::mutex as perf_install_in_context(),
 	 * because we need to serialize with concurrent event creation.
@@ -10162,8 +10170,6 @@ SYSCALL_DEFINE5(perf_event_open,
 		put_task_struct(task);
 	}
 
-	put_online_cpus();
-
 	mutex_lock(&current->perf_event_mutex);
 	list_add_tail(&event->owner_entry, &current->perf_event_list);
 	mutex_unlock(&current->perf_event_mutex);
@@ -10197,8 +10203,6 @@ err_alloc:
 err_cred:
 	if (task)
 		mutex_unlock(&task->signal->cred_guard_mutex);
-err_cpus:
-	put_online_cpus();
 err_task:
 	if (task)
 		put_task_struct(task);
@@ -10253,6 +10257,21 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
 		goto err_unlock;
 	}
 
+	if (!task) {
+		/*
+		 * Check if the @cpu we're creating an event for is online.
+		 *
+		 * We use the perf_cpu_context::ctx::mutex to serialize against
+		 * the hotplug notifiers. See perf_event_{init,exit}_cpu().
+		 */
+		struct perf_cpu_context *cpuctx =
+			container_of(ctx, struct perf_cpu_context, ctx);
+		if (!cpuctx->online) {
+			err = -ENODEV;
+			goto err_unlock;
+		}
+	}
+
 	if (!exclusive_event_installable(event, ctx)) {
 		err = -EBUSY;
 		goto err_unlock;
@@ -10920,6 +10939,8 @@ static void __init perf_event_init_all_cpus(void)
 	struct swevent_htable *swhash;
 	int cpu;
 
+	zalloc_cpumask_var(&perf_online_mask, GFP_KERNEL);
+
 	for_each_possible_cpu(cpu) {
 		swhash = &per_cpu(swevent_htable, cpu);
 		mutex_init(&swhash->hlist_mutex);
@@ -10935,7 +10956,7 @@ static void __init perf_event_init_all_cpus(void)
 	}
 }
 
-int perf_event_init_cpu(unsigned int cpu)
+void perf_swevent_init_cpu(unsigned int cpu)
 {
 	struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
 
@@ -10948,7 +10969,6 @@ int perf_event_init_cpu(unsigned int cpu)
 		rcu_assign_pointer(swhash->swevent_hlist, hlist);
 	}
 	mutex_unlock(&swhash->hlist_mutex);
-	return 0;
 }
 
 #if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE
@@ -10966,19 +10986,22 @@ static void __perf_event_exit_context(void *__info)
 
 static void perf_event_exit_cpu_context(int cpu)
 {
+	struct perf_cpu_context *cpuctx;
 	struct perf_event_context *ctx;
 	struct pmu *pmu;
-	int idx;
 
-	idx = srcu_read_lock(&pmus_srcu);
-	list_for_each_entry_rcu(pmu, &pmus, entry) {
-		ctx = &per_cpu_ptr(pmu->pmu_cpu_context, cpu)->ctx;
+	mutex_lock(&pmus_lock);
+	list_for_each_entry(pmu, &pmus, entry) {
+		cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
+		ctx = &cpuctx->ctx;
 
 		mutex_lock(&ctx->mutex);
 		smp_call_function_single(cpu, __perf_event_exit_context, ctx, 1);
+		cpuctx->online = 0;
 		mutex_unlock(&ctx->mutex);
 	}
-	srcu_read_unlock(&pmus_srcu, idx);
+	cpumask_clear_cpu(cpu, perf_online_mask);
+	mutex_unlock(&pmus_lock);
 }
 #else
 
@@ -10986,6 +11009,29 @@ static void perf_event_exit_cpu_context(int cpu) { }
 
 #endif
 
+int perf_event_init_cpu(unsigned int cpu)
+{
+	struct perf_cpu_context *cpuctx;
+	struct perf_event_context *ctx;
+	struct pmu *pmu;
+
+	perf_swevent_init_cpu(cpu);
+
+	mutex_lock(&pmus_lock);
+	cpumask_set_cpu(cpu, perf_online_mask);
+	list_for_each_entry(pmu, &pmus, entry) {
+		cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
+		ctx = &cpuctx->ctx;
+
+		mutex_lock(&ctx->mutex);
+		cpuctx->online = 1;
+		mutex_unlock(&ctx->mutex);
+	}
+	mutex_unlock(&pmus_lock);
+
+	return 0;
+}
+
 int perf_event_exit_cpu(unsigned int cpu)
 {
 	perf_event_exit_cpu_context(cpu);
-- 
cgit v1.2.3


From fc8dffd379ca5620664336eb895a426b42847558 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 24 May 2017 10:15:40 +0200
Subject: cpu/hotplug: Convert hotplug locking to percpu rwsem

There are no more (known) nested calls to get_online_cpus() and all
observed lock ordering problems have been addressed.

Replace the magic nested 'rwsem' hackery with a percpu-rwsem.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sebastian Siewior <bigeasy@linutronix.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/20170524081549.447014063@linutronix.de
---
 include/linux/cpu.h |   2 +-
 kernel/cpu.c        | 107 +++++++---------------------------------------------
 2 files changed, 14 insertions(+), 95 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index af4d660798e5..ca73bc1563f4 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -103,7 +103,7 @@ extern void cpus_write_lock(void);
 extern void cpus_write_unlock(void);
 extern void cpus_read_lock(void);
 extern void cpus_read_unlock(void);
-static inline void lockdep_assert_cpus_held(void) { }
+extern void lockdep_assert_cpus_held(void);
 extern void cpu_hotplug_disable(void);
 extern void cpu_hotplug_enable(void);
 void clear_tasks_mm_cpumask(int cpu);
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 142d889d9f69..66836216ebae 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -27,6 +27,7 @@
 #include <linux/smpboot.h>
 #include <linux/relay.h>
 #include <linux/slab.h>
+#include <linux/percpu-rwsem.h>
 
 #include <trace/events/power.h>
 #define CREATE_TRACE_POINTS
@@ -196,121 +197,41 @@ void cpu_maps_update_done(void)
 	mutex_unlock(&cpu_add_remove_lock);
 }
 
-/* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
+/*
+ * If set, cpu_up and cpu_down will return -EBUSY and do nothing.
  * Should always be manipulated under cpu_add_remove_lock
  */
 static int cpu_hotplug_disabled;
 
 #ifdef CONFIG_HOTPLUG_CPU
 
-static struct {
-	struct task_struct *active_writer;
-	/* wait queue to wake up the active_writer */
-	wait_queue_head_t wq;
-	/* verifies that no writer will get active while readers are active */
-	struct mutex lock;
-	/*
-	 * Also blocks the new readers during
-	 * an ongoing cpu hotplug operation.
-	 */
-	atomic_t refcount;
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-	struct lockdep_map dep_map;
-#endif
-} cpu_hotplug = {
-	.active_writer = NULL,
-	.wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq),
-	.lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-	.dep_map = STATIC_LOCKDEP_MAP_INIT("cpu_hotplug.dep_map", &cpu_hotplug.dep_map),
-#endif
-};
-
-/* Lockdep annotations for get/put_online_cpus() and cpu_hotplug_begin/end() */
-#define cpuhp_lock_acquire_read() lock_map_acquire_read(&cpu_hotplug.dep_map)
-#define cpuhp_lock_acquire_tryread() \
-				  lock_map_acquire_tryread(&cpu_hotplug.dep_map)
-#define cpuhp_lock_acquire()      lock_map_acquire(&cpu_hotplug.dep_map)
-#define cpuhp_lock_release()      lock_map_release(&cpu_hotplug.dep_map)
-
+DEFINE_STATIC_PERCPU_RWSEM(cpu_hotplug_lock);
 
 void cpus_read_lock(void)
 {
-	might_sleep();
-	if (cpu_hotplug.active_writer == current)
-		return;
-	cpuhp_lock_acquire_read();
-	mutex_lock(&cpu_hotplug.lock);
-	atomic_inc(&cpu_hotplug.refcount);
-	mutex_unlock(&cpu_hotplug.lock);
+	percpu_down_read(&cpu_hotplug_lock);
 }
 EXPORT_SYMBOL_GPL(cpus_read_lock);
 
 void cpus_read_unlock(void)
 {
-	int refcount;
-
-	if (cpu_hotplug.active_writer == current)
-		return;
-
-	refcount = atomic_dec_return(&cpu_hotplug.refcount);
-	if (WARN_ON(refcount < 0)) /* try to fix things up */
-		atomic_inc(&cpu_hotplug.refcount);
-
-	if (refcount <= 0 && waitqueue_active(&cpu_hotplug.wq))
-		wake_up(&cpu_hotplug.wq);
-
-	cpuhp_lock_release();
-
+	percpu_up_read(&cpu_hotplug_lock);
 }
 EXPORT_SYMBOL_GPL(cpus_read_unlock);
 
-/*
- * This ensures that the hotplug operation can begin only when the
- * refcount goes to zero.
- *
- * Note that during a cpu-hotplug operation, the new readers, if any,
- * will be blocked by the cpu_hotplug.lock
- *
- * Since cpu_hotplug_begin() is always called after invoking
- * cpu_maps_update_begin(), we can be sure that only one writer is active.
- *
- * Note that theoretically, there is a possibility of a livelock:
- * - Refcount goes to zero, last reader wakes up the sleeping
- *   writer.
- * - Last reader unlocks the cpu_hotplug.lock.
- * - A new reader arrives at this moment, bumps up the refcount.
- * - The writer acquires the cpu_hotplug.lock finds the refcount
- *   non zero and goes to sleep again.
- *
- * However, this is very difficult to achieve in practice since
- * get_online_cpus() not an api which is called all that often.
- *
- */
 void cpus_write_lock(void)
 {
-	DEFINE_WAIT(wait);
-
-	cpu_hotplug.active_writer = current;
-	cpuhp_lock_acquire();
-
-	for (;;) {
-		mutex_lock(&cpu_hotplug.lock);
-		prepare_to_wait(&cpu_hotplug.wq, &wait, TASK_UNINTERRUPTIBLE);
-		if (likely(!atomic_read(&cpu_hotplug.refcount)))
-				break;
-		mutex_unlock(&cpu_hotplug.lock);
-		schedule();
-	}
-	finish_wait(&cpu_hotplug.wq, &wait);
+	percpu_down_write(&cpu_hotplug_lock);
 }
 
 void cpus_write_unlock(void)
 {
-	cpu_hotplug.active_writer = NULL;
-	mutex_unlock(&cpu_hotplug.lock);
-	cpuhp_lock_release();
+	percpu_up_write(&cpu_hotplug_lock);
+}
+
+void lockdep_assert_cpus_held(void)
+{
+	percpu_rwsem_assert_held(&cpu_hotplug_lock);
 }
 
 /*
@@ -344,8 +265,6 @@ void cpu_hotplug_enable(void)
 EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
 #endif	/* CONFIG_HOTPLUG_CPU */
 
-/* Notifier wrappers for transitioning to state machine */
-
 static int bringup_wait_for_ap(unsigned int cpu)
 {
 	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
-- 
cgit v1.2.3


From 62ec05dd71b19f5be890a1992227cc7b2ac0adc4 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 24 May 2017 10:15:41 +0200
Subject: sched: Provide is_percpu_thread() helper

Provide a helper function for checking whether current task is a per cpu
thread.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sebastian Siewior <bigeasy@linutronix.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/20170524081549.541649540@linutronix.de
---
 include/linux/sched.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2b69fc650201..3dfa5f99d6ee 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1265,6 +1265,16 @@ extern struct pid *cad_pid;
 #define tsk_used_math(p)			((p)->flags & PF_USED_MATH)
 #define used_math()				tsk_used_math(current)
 
+static inline bool is_percpu_thread(void)
+{
+#ifdef CONFIG_SMP
+	return (current->flags & PF_NO_SETAFFINITY) &&
+		(current->nr_cpus_allowed  == 1);
+#else
+	return true;
+#endif
+}
+
 /* Per-process atomic flags. */
 #define PFA_NO_NEW_PRIVS		0	/* May not gain new privileges. */
 #define PFA_SPREAD_PAGE			1	/* Spread page cache over cpuset */
-- 
cgit v1.2.3


From 6c364062bfed3c34490e85bea52ff6e2d4f0f281 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Mon, 22 May 2017 15:11:41 +0200
Subject: spi: core: Add support for registering SPI slave controllers

Add support for registering SPI slave controllers using the existing SPI
master framework:
  - SPI slave controllers must use spi_alloc_slave() instead of
    spi_alloc_master(), and should provide an additional callback
    "slave_abort" to abort an ongoing SPI transfer request,
  - SPI slave controllers are added to a new "spi_slave" device class,
  - SPI slave handlers can be bound to the SPI slave device represented
    by an SPI slave controller using a DT child node named "slave",
  - Alternatively, (un)binding an SPI slave handler to the SPI slave
    device represented by an SPI slave controller can be done by
    (un)registering the slave device through a sysfs virtual file named
    "slave".

From the point of view of an SPI slave protocol handler, an SPI slave
controller looks almost like an ordinary SPI master controller. The only
exception is that a transfer request will block on the remote SPI
master, and may be cancelled using spi_slave_abort().

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/Kconfig     |  14 +++-
 drivers/spi/Makefile    |   2 +
 drivers/spi/spi.c       | 179 +++++++++++++++++++++++++++++++++++++++++-------
 include/linux/spi/spi.h |  35 ++++++++--
 4 files changed, 201 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index 1761c9004fc1..df8ddec24b5d 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -784,6 +784,18 @@ config SPI_TLE62X0
 
 endif # SPI_MASTER
 
-# (slave support would go here)
+#
+# SLAVE side ... listening to other SPI masters
+#
+
+config SPI_SLAVE
+	bool "SPI slave protocol handlers"
+	help
+	  If your system has a slave-capable SPI controller, you can enable
+	  slave protocol handlers.
+
+if SPI_SLAVE
+
+endif # SPI_SLAVE
 
 endif # SPI
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index b375a7a89216..e50852c6fcb8 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -105,3 +105,5 @@ obj-$(CONFIG_SPI_XILINX)		+= spi-xilinx.o
 obj-$(CONFIG_SPI_XLP)			+= spi-xlp.o
 obj-$(CONFIG_SPI_XTENSA_XTFPGA)		+= spi-xtensa-xtfpga.o
 obj-$(CONFIG_SPI_ZYNQMP_GQSPI)		+= spi-zynqmp-gqspi.o
+
+# SPI slave protocol handlers
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 89254a55eb2e..6a8280bdc7a8 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -1535,15 +1535,6 @@ static int of_spi_parse_dt(struct spi_master *master, struct spi_device *spi,
 	u32 value;
 	int rc;
 
-	/* Device address */
-	rc = of_property_read_u32(nc, "reg", &value);
-	if (rc) {
-		dev_err(&master->dev, "%s has no valid 'reg' property (%d)\n",
-			nc->full_name, rc);
-		return rc;
-	}
-	spi->chip_select = value;
-
 	/* Mode (clock phase/polarity/etc.) */
 	if (of_find_property(nc, "spi-cpha", NULL))
 		spi->mode |= SPI_CPHA;
@@ -1593,6 +1584,24 @@ static int of_spi_parse_dt(struct spi_master *master, struct spi_device *spi,
 		}
 	}
 
+	if (spi_controller_is_slave(master)) {
+		if (strcmp(nc->name, "slave")) {
+			dev_err(&master->dev, "%s is not called 'slave'\n",
+				nc->full_name);
+			return -EINVAL;
+		}
+		return 0;
+	}
+
+	/* Device address */
+	rc = of_property_read_u32(nc, "reg", &value);
+	if (rc) {
+		dev_err(&master->dev, "%s has no valid 'reg' property (%d)\n",
+			nc->full_name, rc);
+		return rc;
+	}
+	spi->chip_select = value;
+
 	/* Device speed */
 	rc = of_property_read_u32(nc, "spi-max-frequency", &value);
 	if (rc) {
@@ -1658,8 +1667,8 @@ err_out:
  * of_register_spi_devices() - Register child devices onto the SPI bus
  * @master:	Pointer to spi_master device
  *
- * Registers an spi_device for each child node of master node which has a 'reg'
- * property.
+ * Registers an spi_device for each child node of controller node which
+ * represents a valid SPI slave.
  */
 static void of_register_spi_devices(struct spi_master *master)
 {
@@ -1828,28 +1837,129 @@ static struct class spi_master_class = {
 	.dev_groups	= spi_master_groups,
 };
 
+#ifdef CONFIG_SPI_SLAVE
+/**
+ * spi_slave_abort - abort the ongoing transfer request on an SPI slave
+ *		     controller
+ * @spi: device used for the current transfer
+ */
+int spi_slave_abort(struct spi_device *spi)
+{
+	struct spi_master *master = spi->master;
+
+	if (spi_controller_is_slave(master) && master->slave_abort)
+		return master->slave_abort(master);
+
+	return -ENOTSUPP;
+}
+EXPORT_SYMBOL_GPL(spi_slave_abort);
+
+static int match_true(struct device *dev, void *data)
+{
+	return 1;
+}
+
+static ssize_t spi_slave_show(struct device *dev,
+			      struct device_attribute *attr, char *buf)
+{
+	struct spi_master *ctlr = container_of(dev, struct spi_master, dev);
+	struct device *child;
+
+	child = device_find_child(&ctlr->dev, NULL, match_true);
+	return sprintf(buf, "%s\n",
+		       child ? to_spi_device(child)->modalias : NULL);
+}
+
+static ssize_t spi_slave_store(struct device *dev,
+			       struct device_attribute *attr, const char *buf,
+			       size_t count)
+{
+	struct spi_master *ctlr = container_of(dev, struct spi_master, dev);
+	struct spi_device *spi;
+	struct device *child;
+	char name[32];
+	int rc;
+
+	rc = sscanf(buf, "%31s", name);
+	if (rc != 1 || !name[0])
+		return -EINVAL;
+
+	child = device_find_child(&ctlr->dev, NULL, match_true);
+	if (child) {
+		/* Remove registered slave */
+		device_unregister(child);
+		put_device(child);
+	}
+
+	if (strcmp(name, "(null)")) {
+		/* Register new slave */
+		spi = spi_alloc_device(ctlr);
+		if (!spi)
+			return -ENOMEM;
+
+		strlcpy(spi->modalias, name, sizeof(spi->modalias));
+
+		rc = spi_add_device(spi);
+		if (rc) {
+			spi_dev_put(spi);
+			return rc;
+		}
+	}
+
+	return count;
+}
+
+static DEVICE_ATTR(slave, 0644, spi_slave_show, spi_slave_store);
+
+static struct attribute *spi_slave_attrs[] = {
+	&dev_attr_slave.attr,
+	NULL,
+};
+
+static const struct attribute_group spi_slave_group = {
+	.attrs = spi_slave_attrs,
+};
+
+static const struct attribute_group *spi_slave_groups[] = {
+	&spi_master_statistics_group,
+	&spi_slave_group,
+	NULL,
+};
+
+static struct class spi_slave_class = {
+	.name		= "spi_slave",
+	.owner		= THIS_MODULE,
+	.dev_release	= spi_master_release,
+	.dev_groups	= spi_slave_groups,
+};
+#else
+extern struct class spi_slave_class;	/* dummy */
+#endif
 
 /**
- * spi_alloc_master - allocate SPI master controller
+ * __spi_alloc_controller - allocate an SPI master or slave controller
  * @dev: the controller, possibly using the platform_bus
  * @size: how much zeroed driver-private data to allocate; the pointer to this
  *	memory is in the driver_data field of the returned device,
  *	accessible with spi_master_get_devdata().
+ * @slave: flag indicating whether to allocate an SPI master (false) or SPI
+ *	slave (true) controller
  * Context: can sleep
  *
- * This call is used only by SPI master controller drivers, which are the
+ * This call is used only by SPI controller drivers, which are the
  * only ones directly touching chip registers.  It's how they allocate
  * an spi_master structure, prior to calling spi_register_master().
  *
  * This must be called from context that can sleep.
  *
- * The caller is responsible for assigning the bus number and initializing
- * the master's methods before calling spi_register_master(); and (after errors
+ * The caller is responsible for assigning the bus number and initializing the
+ * controller's methods before calling spi_register_master(); and (after errors
  * adding the device) calling spi_master_put() to prevent a memory leak.
  *
- * Return: the SPI master structure on success, else NULL.
+ * Return: the SPI controller structure on success, else NULL.
  */
-struct spi_master *spi_alloc_master(struct device *dev, unsigned size)
+struct spi_master *__spi_alloc_controller(struct device *dev,
+					  unsigned int size, bool slave)
 {
 	struct spi_master	*master;
 
@@ -1863,14 +1973,18 @@ struct spi_master *spi_alloc_master(struct device *dev, unsigned size)
 	device_initialize(&master->dev);
 	master->bus_num = -1;
 	master->num_chipselect = 1;
-	master->dev.class = &spi_master_class;
+	master->slave = slave;
+	if (IS_ENABLED(CONFIG_SPI_SLAVE) && slave)
+		master->dev.class = &spi_slave_class;
+	else
+		master->dev.class = &spi_master_class;
 	master->dev.parent = dev;
 	pm_suspend_ignore_children(&master->dev, true);
 	spi_master_set_devdata(master, &master[1]);
 
 	return master;
 }
-EXPORT_SYMBOL_GPL(spi_alloc_master);
+EXPORT_SYMBOL_GPL(__spi_alloc_controller);
 
 #ifdef CONFIG_OF
 static int of_spi_register_master(struct spi_master *master)
@@ -1946,9 +2060,11 @@ int spi_register_master(struct spi_master *master)
 	if (!dev)
 		return -ENODEV;
 
-	status = of_spi_register_master(master);
-	if (status)
-		return status;
+	if (!spi_controller_is_slave(master)) {
+		status = of_spi_register_master(master);
+		if (status)
+			return status;
+	}
 
 	/* even if it's just one always-selected device, there must
 	 * be at least one chipselect
@@ -1985,8 +2101,9 @@ int spi_register_master(struct spi_master *master)
 	status = device_add(&master->dev);
 	if (status < 0)
 		goto done;
-	dev_dbg(dev, "registered master %s%s\n", dev_name(&master->dev),
-			dynamic ? " (dynamic)" : "");
+	dev_dbg(dev, "registered %s %s%s\n",
+			spi_controller_is_slave(master) ? "slave" : "master",
+			dev_name(&master->dev), dynamic ? " (dynamic)" : "");
 
 	/* If we're using a queued driver, start the queue */
 	if (master->transfer)
@@ -3159,6 +3276,9 @@ static struct spi_master *of_find_spi_master_by_node(struct device_node *node)
 
 	dev = class_find_device(&spi_master_class, NULL, node,
 				__spi_of_master_match);
+	if (!dev && IS_ENABLED(CONFIG_SPI_SLAVE))
+		dev = class_find_device(&spi_slave_class, NULL, node,
+					__spi_of_master_match);
 	if (!dev)
 		return NULL;
 
@@ -3240,6 +3360,9 @@ static struct spi_master *acpi_spi_find_master_by_adev(struct acpi_device *adev)
 
 	dev = class_find_device(&spi_master_class, NULL, adev,
 				spi_acpi_master_match);
+	if (!dev && IS_ENABLED(CONFIG_SPI_SLAVE))
+		dev = class_find_device(&spi_slave_class, NULL, adev,
+					spi_acpi_master_match);
 	if (!dev)
 		return NULL;
 
@@ -3312,6 +3435,12 @@ static int __init spi_init(void)
 	if (status < 0)
 		goto err2;
 
+	if (IS_ENABLED(CONFIG_SPI_SLAVE)) {
+		status = class_register(&spi_slave_class);
+		if (status < 0)
+			goto err3;
+	}
+
 	if (IS_ENABLED(CONFIG_OF_DYNAMIC))
 		WARN_ON(of_reconfig_notifier_register(&spi_of_notifier));
 	if (IS_ENABLED(CONFIG_ACPI))
@@ -3319,6 +3448,8 @@ static int __init spi_init(void)
 
 	return 0;
 
+err3:
+	class_unregister(&spi_master_class);
 err2:
 	bus_unregister(&spi_bus_type);
 err1:
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 935bd2854ff1..0a78745e5766 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -29,8 +29,8 @@ struct spi_transfer;
 struct spi_flash_read_message;
 
 /*
- * INTERFACES between SPI master-side drivers and SPI infrastructure.
- * (There's no SPI slave support for Linux yet...)
+ * INTERFACES between SPI master-side drivers and SPI slave protocol handlers,
+ * and SPI infrastructure.
  */
 extern struct bus_type spi_bus_type;
 
@@ -311,6 +311,7 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv)
  * @min_speed_hz: Lowest supported transfer speed
  * @max_speed_hz: Highest supported transfer speed
  * @flags: other constraints relevant to this driver
+ * @slave: indicates that this is an SPI slave controller
  * @max_transfer_size: function that returns the max transfer size for
  *	a &spi_device; may be %NULL, so the default %SIZE_MAX will be used.
  * @max_message_size: function that returns the max message size for
@@ -374,6 +375,7 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv)
  * @handle_err: the subsystem calls the driver to handle an error that occurs
  *		in the generic implementation of transfer_one_message().
  * @unprepare_message: undo any work done by prepare_message().
+ * @slave_abort: abort the ongoing transfer request on an SPI slave controller
  * @spi_flash_read: to support spi-controller hardwares that provide
  *                  accelerated interface to read from flash devices.
  * @spi_flash_can_dma: analogous to can_dma() interface, but for
@@ -447,6 +449,9 @@ struct spi_master {
 #define SPI_MASTER_MUST_TX      BIT(4)		/* requires tx */
 #define SPI_MASTER_GPIO_SS      BIT(5)		/* GPIO CS must select slave */
 
+	/* flag indicating this is an SPI slave controller */
+	bool			slave;
+
 	/*
 	 * on some hardware transfer / message size may be constrained
 	 * the limit may depend on device transfer settings
@@ -539,6 +544,7 @@ struct spi_master {
 			       struct spi_message *message);
 	int (*unprepare_message)(struct spi_master *master,
 				 struct spi_message *message);
+	int (*slave_abort)(struct spi_master *spi);
 	int (*spi_flash_read)(struct  spi_device *spi,
 			      struct spi_flash_read_message *msg);
 	bool (*spi_flash_can_dma)(struct spi_device *spi,
@@ -595,6 +601,11 @@ static inline void spi_master_put(struct spi_master *master)
 		put_device(&master->dev);
 }
 
+static inline bool spi_controller_is_slave(struct spi_master *ctlr)
+{
+	return IS_ENABLED(CONFIG_SPI_SLAVE) && ctlr->slave;
+}
+
 /* PM calls that need to be issued by the driver */
 extern int spi_master_suspend(struct spi_master *master);
 extern int spi_master_resume(struct spi_master *master);
@@ -605,8 +616,23 @@ extern void spi_finalize_current_message(struct spi_master *master);
 extern void spi_finalize_current_transfer(struct spi_master *master);
 
 /* the spi driver core manages memory for the spi_master classdev */
-extern struct spi_master *
-spi_alloc_master(struct device *host, unsigned size);
+extern struct spi_master *__spi_alloc_controller(struct device *host,
+						 unsigned int size, bool slave);
+
+static inline struct spi_master *spi_alloc_master(struct device *host,
+						  unsigned int size)
+{
+	return __spi_alloc_controller(host, size, false);
+}
+
+static inline struct spi_master *spi_alloc_slave(struct device *host,
+						 unsigned int size)
+{
+	if (!IS_ENABLED(CONFIG_SPI_SLAVE))
+		return NULL;
+
+	return __spi_alloc_controller(host, size, true);
+}
 
 extern int spi_register_master(struct spi_master *master);
 extern int devm_spi_register_master(struct device *dev,
@@ -912,6 +938,7 @@ extern int spi_setup(struct spi_device *spi);
 extern int spi_async(struct spi_device *spi, struct spi_message *message);
 extern int spi_async_locked(struct spi_device *spi,
 			    struct spi_message *message);
+extern int spi_slave_abort(struct spi_device *spi);
 
 static inline size_t
 spi_max_message_size(struct spi_device *spi)
-- 
cgit v1.2.3


From cf9e4784f3bde3e4749163384f27450ddffe746c Mon Sep 17 00:00:00 2001
From: Hisashi Nakamura <hisashi.nakamura.ak@renesas.com>
Date: Mon, 22 May 2017 15:11:43 +0200
Subject: spi: sh-msiof: Add slave mode support

Add slave mode support to the MSIOF driver, in both PIO and DMA mode.

For now this only supports the transmission of messages with a size
that is known in advance.

Signed-off-by: Hisashi Nakamura <hisashi.nakamura.ak@renesas.com>
Signed-off-by: Hiromitsu Yamasaki <hiromitsu.yamasaki.ym@renesas.com>
[geert: Timeout handling cleanup, spi core integration, cancellation,
	rewording]
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 Documentation/devicetree/bindings/spi/sh-msiof.txt |   2 +
 drivers/spi/spi-sh-msiof.c                         | 111 +++++++++++++++------
 include/linux/spi/sh_msiof.h                       |   6 ++
 3 files changed, 86 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/spi/sh-msiof.txt b/Documentation/devicetree/bindings/spi/sh-msiof.txt
index dc975064fa27..64ee489571c4 100644
--- a/Documentation/devicetree/bindings/spi/sh-msiof.txt
+++ b/Documentation/devicetree/bindings/spi/sh-msiof.txt
@@ -38,6 +38,8 @@ Optional properties:
 			 specifiers, one for transmission, and one for
 			 reception.
 - dma-names            : Must contain a list of two DMA names, "tx" and "rx".
+- spi-slave            : Empty property indicating the SPI controller is used
+			 in slave mode.
 - renesas,dtdl         : delay sync signal (setup) in transmit mode.
 			 Must contain one of the following values:
 			 0   (no bit delay)
diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c
index 2ce15ca97782..c304c7167866 100644
--- a/drivers/spi/spi-sh-msiof.c
+++ b/drivers/spi/spi-sh-msiof.c
@@ -2,7 +2,8 @@
  * SuperH MSIOF SPI Master Interface
  *
  * Copyright (c) 2009 Magnus Damm
- * Copyright (C) 2014 Glider bvba
+ * Copyright (C) 2014 Renesas Electronics Corporation
+ * Copyright (C) 2014-2017 Glider bvba
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -33,7 +34,6 @@
 
 #include <asm/unaligned.h>
 
-
 struct sh_msiof_chipdata {
 	u16 tx_fifo_size;
 	u16 rx_fifo_size;
@@ -53,6 +53,7 @@ struct sh_msiof_spi_priv {
 	void *rx_dma_page;
 	dma_addr_t tx_dma_addr;
 	dma_addr_t rx_dma_addr;
+	bool slave_aborted;
 };
 
 #define TMDR1	0x00	/* Transmit Mode Register 1 */
@@ -337,7 +338,10 @@ static void sh_msiof_spi_set_pin_regs(struct sh_msiof_spi_priv *p,
 	tmp |= !cs_high << MDR1_SYNCAC_SHIFT;
 	tmp |= lsb_first << MDR1_BITLSB_SHIFT;
 	tmp |= sh_msiof_spi_get_dtdl_and_syncdl(p);
-	sh_msiof_write(p, TMDR1, tmp | MDR1_TRMD | TMDR1_PCON);
+	if (spi_controller_is_slave(p->master))
+		sh_msiof_write(p, TMDR1, tmp | TMDR1_PCON);
+	else
+		sh_msiof_write(p, TMDR1, tmp | MDR1_TRMD | TMDR1_PCON);
 	if (p->master->flags & SPI_MASTER_MUST_TX) {
 		/* These bits are reserved if RX needs TX */
 		tmp &= ~0x0000ffff;
@@ -564,17 +568,19 @@ static int sh_msiof_prepare_message(struct spi_master *master,
 
 static int sh_msiof_spi_start(struct sh_msiof_spi_priv *p, void *rx_buf)
 {
-	int ret;
+	bool slave = spi_controller_is_slave(p->master);
+	int ret = 0;
 
 	/* setup clock and rx/tx signals */
-	ret = sh_msiof_modify_ctr_wait(p, 0, CTR_TSCKE);
+	if (!slave)
+		ret = sh_msiof_modify_ctr_wait(p, 0, CTR_TSCKE);
 	if (rx_buf && !ret)
 		ret = sh_msiof_modify_ctr_wait(p, 0, CTR_RXE);
 	if (!ret)
 		ret = sh_msiof_modify_ctr_wait(p, 0, CTR_TXE);
 
 	/* start by setting frame bit */
-	if (!ret)
+	if (!ret && !slave)
 		ret = sh_msiof_modify_ctr_wait(p, 0, CTR_TFSE);
 
 	return ret;
@@ -582,20 +588,49 @@ static int sh_msiof_spi_start(struct sh_msiof_spi_priv *p, void *rx_buf)
 
 static int sh_msiof_spi_stop(struct sh_msiof_spi_priv *p, void *rx_buf)
 {
-	int ret;
+	bool slave = spi_controller_is_slave(p->master);
+	int ret = 0;
 
 	/* shut down frame, rx/tx and clock signals */
-	ret = sh_msiof_modify_ctr_wait(p, CTR_TFSE, 0);
+	if (!slave)
+		ret = sh_msiof_modify_ctr_wait(p, CTR_TFSE, 0);
 	if (!ret)
 		ret = sh_msiof_modify_ctr_wait(p, CTR_TXE, 0);
 	if (rx_buf && !ret)
 		ret = sh_msiof_modify_ctr_wait(p, CTR_RXE, 0);
-	if (!ret)
+	if (!ret && !slave)
 		ret = sh_msiof_modify_ctr_wait(p, CTR_TSCKE, 0);
 
 	return ret;
 }
 
+static int sh_msiof_slave_abort(struct spi_master *master)
+{
+	struct sh_msiof_spi_priv *p = spi_master_get_devdata(master);
+
+	p->slave_aborted = true;
+	complete(&p->done);
+	return 0;
+}
+
+static int sh_msiof_wait_for_completion(struct sh_msiof_spi_priv *p)
+{
+	if (spi_controller_is_slave(p->master)) {
+		if (wait_for_completion_interruptible(&p->done) ||
+		    p->slave_aborted) {
+			dev_dbg(&p->pdev->dev, "interrupted\n");
+			return -EINTR;
+		}
+	} else {
+		if (!wait_for_completion_timeout(&p->done, HZ)) {
+			dev_err(&p->pdev->dev, "timeout\n");
+			return -ETIMEDOUT;
+		}
+	}
+
+	return 0;
+}
+
 static int sh_msiof_spi_txrx_once(struct sh_msiof_spi_priv *p,
 				  void (*tx_fifo)(struct sh_msiof_spi_priv *,
 						  const void *, int, int),
@@ -628,6 +663,7 @@ static int sh_msiof_spi_txrx_once(struct sh_msiof_spi_priv *p,
 		tx_fifo(p, tx_buf, words, fifo_shift);
 
 	reinit_completion(&p->done);
+	p->slave_aborted = false;
 
 	ret = sh_msiof_spi_start(p, rx_buf);
 	if (ret) {
@@ -636,11 +672,9 @@ static int sh_msiof_spi_txrx_once(struct sh_msiof_spi_priv *p,
 	}
 
 	/* wait for tx fifo to be emptied / rx fifo to be filled */
-	if (!wait_for_completion_timeout(&p->done, HZ)) {
-		dev_err(&p->pdev->dev, "PIO timeout\n");
-		ret = -ETIMEDOUT;
+	ret = sh_msiof_wait_for_completion(p);
+	if (ret)
 		goto stop_reset;
-	}
 
 	/* read rx fifo */
 	if (rx_buf)
@@ -732,6 +766,7 @@ static int sh_msiof_dma_once(struct sh_msiof_spi_priv *p, const void *tx,
 	sh_msiof_write(p, IER, ier_bits);
 
 	reinit_completion(&p->done);
+	p->slave_aborted = false;
 
 	/* Now start DMA */
 	if (rx)
@@ -746,11 +781,9 @@ static int sh_msiof_dma_once(struct sh_msiof_spi_priv *p, const void *tx,
 	}
 
 	/* wait for tx fifo to be emptied / rx fifo to be filled */
-	if (!wait_for_completion_timeout(&p->done, HZ)) {
-		dev_err(&p->pdev->dev, "DMA timeout\n");
-		ret = -ETIMEDOUT;
+	ret = sh_msiof_wait_for_completion(p);
+	if (ret)
 		goto stop_reset;
-	}
 
 	/* clear status bits */
 	sh_msiof_reset_str(p);
@@ -843,7 +876,8 @@ static int sh_msiof_transfer_one(struct spi_master *master,
 	int ret;
 
 	/* setup clocks (clock already enabled in chipselect()) */
-	sh_msiof_spi_set_clk_regs(p, clk_get_rate(p->clk), t->speed_hz);
+	if (!spi_controller_is_slave(p->master))
+		sh_msiof_spi_set_clk_regs(p, clk_get_rate(p->clk), t->speed_hz);
 
 	while (master->dma_tx && len > 15) {
 		/*
@@ -998,8 +1032,12 @@ static struct sh_msiof_spi_info *sh_msiof_spi_parse_dt(struct device *dev)
 	if (!info)
 		return NULL;
 
+	info->mode = of_property_read_bool(np, "spi-slave") ? MSIOF_SPI_SLAVE
+							    : MSIOF_SPI_MASTER;
+
 	/* Parse the MSIOF properties */
-	of_property_read_u32(np, "num-cs", &num_cs);
+	if (info->mode == MSIOF_SPI_MASTER)
+		of_property_read_u32(np, "num-cs", &num_cs);
 	of_property_read_u32(np, "renesas,tx-fifo-size",
 					&info->tx_fifo_override);
 	of_property_read_u32(np, "renesas,rx-fifo-size",
@@ -1159,34 +1197,40 @@ static int sh_msiof_spi_probe(struct platform_device *pdev)
 	struct spi_master *master;
 	const struct sh_msiof_chipdata *chipdata;
 	const struct of_device_id *of_id;
+	struct sh_msiof_spi_info *info;
 	struct sh_msiof_spi_priv *p;
 	int i;
 	int ret;
 
-	master = spi_alloc_master(&pdev->dev, sizeof(struct sh_msiof_spi_priv));
-	if (master == NULL)
-		return -ENOMEM;
-
-	p = spi_master_get_devdata(master);
-
-	platform_set_drvdata(pdev, p);
-	p->master = master;
-
 	of_id = of_match_device(sh_msiof_match, &pdev->dev);
 	if (of_id) {
 		chipdata = of_id->data;
-		p->info = sh_msiof_spi_parse_dt(&pdev->dev);
+		info = sh_msiof_spi_parse_dt(&pdev->dev);
 	} else {
 		chipdata = (const void *)pdev->id_entry->driver_data;
-		p->info = dev_get_platdata(&pdev->dev);
+		info = dev_get_platdata(&pdev->dev);
 	}
 
-	if (!p->info) {
+	if (!info) {
 		dev_err(&pdev->dev, "failed to obtain device info\n");
-		ret = -ENXIO;
-		goto err1;
+		return -ENXIO;
 	}
 
+	if (info->mode == MSIOF_SPI_SLAVE)
+		master = spi_alloc_slave(&pdev->dev,
+					 sizeof(struct sh_msiof_spi_priv));
+	else
+		master = spi_alloc_master(&pdev->dev,
+					  sizeof(struct sh_msiof_spi_priv));
+	if (master == NULL)
+		return -ENOMEM;
+
+	p = spi_master_get_devdata(master);
+
+	platform_set_drvdata(pdev, p);
+	p->master = master;
+	p->info = info;
+
 	init_completion(&p->done);
 
 	p->clk = devm_clk_get(&pdev->dev, NULL);
@@ -1237,6 +1281,7 @@ static int sh_msiof_spi_probe(struct platform_device *pdev)
 	master->num_chipselect = p->info->num_chipselect;
 	master->setup = sh_msiof_spi_setup;
 	master->prepare_message = sh_msiof_prepare_message;
+	master->slave_abort = sh_msiof_slave_abort;
 	master->bits_per_word_mask = SPI_BPW_RANGE_MASK(8, 32);
 	master->auto_runtime_pm = true;
 	master->transfer_one = sh_msiof_transfer_one;
diff --git a/include/linux/spi/sh_msiof.h b/include/linux/spi/sh_msiof.h
index b087a85f5f72..f74b581f242f 100644
--- a/include/linux/spi/sh_msiof.h
+++ b/include/linux/spi/sh_msiof.h
@@ -1,10 +1,16 @@
 #ifndef __SPI_SH_MSIOF_H__
 #define __SPI_SH_MSIOF_H__
 
+enum {
+	MSIOF_SPI_MASTER,
+	MSIOF_SPI_SLAVE,
+};
+
 struct sh_msiof_spi_info {
 	int tx_fifo_override;
 	int rx_fifo_override;
 	u16 num_chipselect;
+	int mode;
 	unsigned int dma_tx_id;
 	unsigned int dma_rx_id;
 	u32 dtdl;
-- 
cgit v1.2.3


From 83b4605b0c16cde5b00c8cf192408d51eab75402 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 20 May 2017 18:59:54 +0200
Subject: PCI/msi: fix the pci_alloc_irq_vectors_affinity stub

We need to return an error for any call that asks for MSI / MSI-X
vectors only, so that non-trivial fallback logic can work properly.

Also valid dev->irq and use the "correct" errno value based on feedback
from Linus.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reported-by: Steven Rostedt <rostedt@goodmis.org>
Fixes: aff17164 ("PCI: Provide sensible IRQ vector alloc/free routines")
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pci.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 33c2b0b77429..fc2e832d7b9c 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1342,9 +1342,9 @@ pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs,
 			       unsigned int max_vecs, unsigned int flags,
 			       const struct irq_affinity *aff_desc)
 {
-	if (min_vecs > 1)
-		return -EINVAL;
-	return 1;
+	if ((flags & PCI_IRQ_LEGACY) && min_vecs == 1 && dev->irq)
+		return 1;
+	return -ENOSPC;
 }
 
 static inline void pci_free_irq_vectors(struct pci_dev *dev)
-- 
cgit v1.2.3


From 1f51445af35e8477027d87ca015a10257b13f5a2 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Fri, 26 May 2017 08:37:23 +0200
Subject: bridge: Export VLAN filtering state

It's useful for drivers supporting bridge offload to be able to query
the bridge's VLAN filtering state.

Currently, upon enslavement to a bridge master, the offloading driver
will only learn about the bridge's VLAN filtering state after the bridge
device was already linked with its slave.

Being able to query the bridge's VLAN filtering state allows such
drivers to forbid enslavement in case resource couldn't be allocated for
a VLAN-aware bridge and also choose the correct initialization routine
for the enslaved port, which is dependent on the bridge type.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Reviewed-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_bridge.h | 9 +++++++++
 net/bridge/br_if.c        | 2 +-
 net/bridge/br_mdb.c       | 4 ++--
 net/bridge/br_netlink.c   | 2 +-
 net/bridge/br_private.h   | 9 ---------
 net/bridge/br_vlan.c      | 8 ++++++++
 6 files changed, 21 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index 0c16866a7aac..d6cd103eb165 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -80,4 +80,13 @@ static inline bool br_multicast_has_querier_adjacent(struct net_device *dev,
 }
 #endif
 
+#if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_BRIDGE_VLAN_FILTERING)
+bool br_vlan_enabled(const struct net_device *dev);
+#else
+static inline bool br_vlan_enabled(const struct net_device *dev)
+{
+	return false;
+}
+#endif
+
 #endif
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 7f8d05cf9065..f3aef22931ab 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -138,7 +138,7 @@ void br_manage_promisc(struct net_bridge *br)
 	/* If vlan filtering is disabled or bridge interface is placed
 	 * into promiscuous mode, place all ports in promiscuous mode.
 	 */
-	if ((br->dev->flags & IFF_PROMISC) || !br_vlan_enabled(br))
+	if ((br->dev->flags & IFF_PROMISC) || !br_vlan_enabled(br->dev))
 		set_all = true;
 
 	list_for_each_entry(p, &br->port_list, list) {
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index b0845480a3ae..09dcdb9c0f3c 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -599,7 +599,7 @@ static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
 		return -EINVAL;
 
 	vg = nbp_vlan_group(p);
-	if (br_vlan_enabled(br) && vg && entry->vid == 0) {
+	if (br_vlan_enabled(br->dev) && vg && entry->vid == 0) {
 		list_for_each_entry(v, &vg->vlan_list, vlist) {
 			entry->vid = v->vid;
 			err = __br_mdb_add(net, br, entry);
@@ -694,7 +694,7 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
 		return -EINVAL;
 
 	vg = nbp_vlan_group(p);
-	if (br_vlan_enabled(br) && vg && entry->vid == 0) {
+	if (br_vlan_enabled(br->dev) && vg && entry->vid == 0) {
 		list_for_each_entry(v, &vg->vlan_list, vlist) {
 			entry->vid = v->vid;
 			err = __br_mdb_del(br, entry);
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 574f78824d8a..1e63ec466d7c 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -1251,7 +1251,7 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev)
 	u32 ageing_time = jiffies_to_clock_t(br->ageing_time);
 	u32 stp_enabled = br->stp_enabled;
 	u16 priority = (br->bridge_id.prio[0] << 8) | br->bridge_id.prio[1];
-	u8 vlan_enabled = br_vlan_enabled(br);
+	u8 vlan_enabled = br_vlan_enabled(br->dev);
 	u64 clockval;
 
 	clockval = br_timer_value(&br->hello_timer);
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 0d177280aa84..20626927f433 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -854,10 +854,6 @@ static inline u16 br_get_pvid(const struct net_bridge_vlan_group *vg)
 	return vg->pvid;
 }
 
-static inline int br_vlan_enabled(struct net_bridge *br)
-{
-	return br->vlan_enabled;
-}
 #else
 static inline bool br_allowed_ingress(const struct net_bridge *br,
 				      struct net_bridge_vlan_group *vg,
@@ -945,11 +941,6 @@ static inline u16 br_get_pvid(const struct net_bridge_vlan_group *vg)
 	return 0;
 }
 
-static inline int br_vlan_enabled(struct net_bridge *br)
-{
-	return 0;
-}
-
 static inline int __br_vlan_filter_toggle(struct net_bridge *br,
 					  unsigned long val)
 {
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index b838213c408e..26a1a56639b2 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -706,6 +706,14 @@ int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val)
 	return __br_vlan_filter_toggle(br, val);
 }
 
+bool br_vlan_enabled(const struct net_device *dev)
+{
+	struct net_bridge *br = netdev_priv(dev);
+
+	return !!br->vlan_enabled;
+}
+EXPORT_SYMBOL_GPL(br_vlan_enabled);
+
 int __br_vlan_set_proto(struct net_bridge *br, __be16 proto)
 {
 	int err = 0;
-- 
cgit v1.2.3


From 9341b988e606f951df57d15569a425c6c74b945e Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Fri, 26 May 2017 08:37:24 +0200
Subject: bridge: Export multicast enabled state

During enslavement to a bridge, after the CHANGEUPPER is sent, the
multicast enabled state of the bridge isn't propagated down to the
offloading driver unless it's changed.

This patch allows such drivers to query the multicast enabled state from
the bridge, so that they'll be able to correctly configure their flood
tables during port enslavement.

In case multicast is disabled, unregistered multicast packets can be
treated as broadcast and be flooded through all the bridge ports.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Reviewed-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_bridge.h | 5 +++++
 net/bridge/br_multicast.c | 8 ++++++++
 2 files changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index d6cd103eb165..3cd18ac0697f 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -62,6 +62,7 @@ int br_multicast_list_adjacent(struct net_device *dev,
 			       struct list_head *br_ip_list);
 bool br_multicast_has_querier_anywhere(struct net_device *dev, int proto);
 bool br_multicast_has_querier_adjacent(struct net_device *dev, int proto);
+bool br_multicast_enabled(const struct net_device *dev);
 #else
 static inline int br_multicast_list_adjacent(struct net_device *dev,
 					     struct list_head *br_ip_list)
@@ -78,6 +79,10 @@ static inline bool br_multicast_has_querier_adjacent(struct net_device *dev,
 {
 	return false;
 }
+static inline bool br_multicast_enabled(const struct net_device *dev)
+{
+	return false;
+}
 #endif
 
 #if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_BRIDGE_VLAN_FILTERING)
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index faa7261a992f..8dc5c8d69bcd 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -2176,6 +2176,14 @@ unlock:
 	return err;
 }
 
+bool br_multicast_enabled(const struct net_device *dev)
+{
+	struct net_bridge *br = netdev_priv(dev);
+
+	return !br->multicast_disabled;
+}
+EXPORT_SYMBOL_GPL(br_multicast_enabled);
+
 int br_multicast_set_querier(struct net_bridge *br, unsigned long val)
 {
 	unsigned long max_delay;
-- 
cgit v1.2.3


From d3ba5a9a345b1243276f8a982e1bce557c2504fd Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 26 May 2017 12:03:11 +0300
Subject: posix-timers: Make posix_clocks immutable

There are no more modular users providing a posix clock. The register
function is now pointless so the posix clock array can be initialized
statically at compile time and the array including the various k_clock
structs can be marked 'const'.

Inspired by changes in the Grsecurity patch set, but done proper.

[ tglx: Massaged changelog and fixed the POSIX_TIMER=n case ]

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Mike Travis <mike.travis@hpe.com>
Cc: Dimitri Sivanich <sivanich@hpe.com>
Link: http://lkml.kernel.org/r/20170526090311.3377-3-hch@lst.de
---
 include/linux/posix-timers.h   |   9 +-
 kernel/time/alarmtimer.c       |  89 ++++++++++---------
 kernel/time/posix-clock.c      |   2 +-
 kernel/time/posix-cpu-timers.c |  34 +++-----
 kernel/time/posix-timers.c     | 191 +++++++++++++++++++----------------------
 5 files changed, 151 insertions(+), 174 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 8c1e43ab14a9..b313ef2e7385 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -105,10 +105,11 @@ struct k_clock {
 			   struct itimerspec64 *cur_setting);
 };
 
-extern struct k_clock clock_posix_cpu;
-extern struct k_clock clock_posix_dynamic;
-
-void posix_timers_register_clock(const clockid_t clock_id, struct k_clock *new_clock);
+extern const struct k_clock clock_posix_cpu;
+extern const struct k_clock clock_posix_dynamic;
+extern const struct k_clock clock_process;
+extern const struct k_clock clock_thread;
+extern const struct k_clock alarm_clock;
 
 /* function to call to trigger timer event */
 int posix_timer_event(struct k_itimer *timr, int si_private);
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 5cb5b0008d97..4f4cc3509b30 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -307,38 +307,6 @@ static int alarmtimer_resume(struct device *dev)
 }
 #endif
 
-static void alarmtimer_freezerset(ktime_t absexp, enum alarmtimer_type type)
-{
-	struct alarm_base *base;
-	unsigned long flags;
-	ktime_t delta;
-
-	switch(type) {
-	case ALARM_REALTIME:
-		base = &alarm_bases[ALARM_REALTIME];
-		type = ALARM_REALTIME_FREEZER;
-		break;
-	case ALARM_BOOTTIME:
-		base = &alarm_bases[ALARM_BOOTTIME];
-		type = ALARM_BOOTTIME_FREEZER;
-		break;
-	default:
-		WARN_ONCE(1, "Invalid alarm type: %d\n", type);
-		return;
-	}
-
-	delta = ktime_sub(absexp, base->gettime());
-
-	spin_lock_irqsave(&freezer_delta_lock, flags);
-	if (!freezer_delta || (delta < freezer_delta)) {
-		freezer_delta = delta;
-		freezer_expires = absexp;
-		freezer_alarmtype = type;
-	}
-	spin_unlock_irqrestore(&freezer_delta_lock, flags);
-}
-
-
 /**
  * alarm_init - Initialize an alarm structure
  * @alarm: ptr to alarm to be initialized
@@ -488,6 +456,38 @@ u64 alarm_forward_now(struct alarm *alarm, ktime_t interval)
 }
 EXPORT_SYMBOL_GPL(alarm_forward_now);
 
+#ifdef CONFIG_POSIX_TIMERS
+
+static void alarmtimer_freezerset(ktime_t absexp, enum alarmtimer_type type)
+{
+	struct alarm_base *base;
+	unsigned long flags;
+	ktime_t delta;
+
+	switch(type) {
+	case ALARM_REALTIME:
+		base = &alarm_bases[ALARM_REALTIME];
+		type = ALARM_REALTIME_FREEZER;
+		break;
+	case ALARM_BOOTTIME:
+		base = &alarm_bases[ALARM_BOOTTIME];
+		type = ALARM_BOOTTIME_FREEZER;
+		break;
+	default:
+		WARN_ONCE(1, "Invalid alarm type: %d\n", type);
+		return;
+	}
+
+	delta = ktime_sub(absexp, base->gettime());
+
+	spin_lock_irqsave(&freezer_delta_lock, flags);
+	if (!freezer_delta || (delta < freezer_delta)) {
+		freezer_delta = delta;
+		freezer_expires = absexp;
+		freezer_alarmtype = type;
+	}
+	spin_unlock_irqrestore(&freezer_delta_lock, flags);
+}
 
 /**
  * clock2alarm - helper that converts from clockid to alarmtypes
@@ -846,6 +846,17 @@ out:
 	return ret;
 }
 
+const struct k_clock alarm_clock = {
+	.clock_getres	= alarm_clock_getres,
+	.clock_get	= alarm_clock_get,
+	.timer_create	= alarm_timer_create,
+	.timer_set	= alarm_timer_set,
+	.timer_del	= alarm_timer_del,
+	.timer_get	= alarm_timer_get,
+	.nsleep		= alarm_timer_nsleep,
+};
+#endif /* CONFIG_POSIX_TIMERS */
+
 
 /* Suspend hook structures */
 static const struct dev_pm_ops alarmtimer_pm_ops = {
@@ -871,23 +882,9 @@ static int __init alarmtimer_init(void)
 	struct platform_device *pdev;
 	int error = 0;
 	int i;
-	struct k_clock alarm_clock = {
-		.clock_getres	= alarm_clock_getres,
-		.clock_get	= alarm_clock_get,
-		.timer_create	= alarm_timer_create,
-		.timer_set	= alarm_timer_set,
-		.timer_del	= alarm_timer_del,
-		.timer_get	= alarm_timer_get,
-		.nsleep		= alarm_timer_nsleep,
-	};
 
 	alarmtimer_rtc_timer_init();
 
-	if (IS_ENABLED(CONFIG_POSIX_TIMERS)) {
-		posix_timers_register_clock(CLOCK_REALTIME_ALARM, &alarm_clock);
-		posix_timers_register_clock(CLOCK_BOOTTIME_ALARM, &alarm_clock);
-	}
-
 	/* Initialize alarm bases */
 	alarm_bases[ALARM_REALTIME].base_clockid = CLOCK_REALTIME;
 	alarm_bases[ALARM_REALTIME].gettime = &ktime_get_real;
diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c
index 31d588d37a17..7e453005e078 100644
--- a/kernel/time/posix-clock.c
+++ b/kernel/time/posix-clock.c
@@ -434,7 +434,7 @@ static int pc_timer_settime(struct k_itimer *kit, int flags,
 	return err;
 }
 
-struct k_clock clock_posix_dynamic = {
+const struct k_clock clock_posix_dynamic = {
 	.clock_getres	= pc_clock_getres,
 	.clock_set	= pc_clock_settime,
 	.clock_get	= pc_clock_gettime,
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 1370f067fb51..1a522b39f19d 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -1413,7 +1413,7 @@ static int thread_cpu_timer_create(struct k_itimer *timer)
 	return posix_cpu_timer_create(timer);
 }
 
-struct k_clock clock_posix_cpu = {
+const struct k_clock clock_posix_cpu = {
 	.clock_getres	= posix_cpu_clock_getres,
 	.clock_set	= posix_cpu_clock_set,
 	.clock_get	= posix_cpu_clock_get,
@@ -1425,24 +1425,16 @@ struct k_clock clock_posix_cpu = {
 	.timer_get	= posix_cpu_timer_get,
 };
 
-static __init int init_posix_cpu_timers(void)
-{
-	struct k_clock process = {
-		.clock_getres	= process_cpu_clock_getres,
-		.clock_get	= process_cpu_clock_get,
-		.timer_create	= process_cpu_timer_create,
-		.nsleep		= process_cpu_nsleep,
-		.nsleep_restart	= process_cpu_nsleep_restart,
-	};
-	struct k_clock thread = {
-		.clock_getres	= thread_cpu_clock_getres,
-		.clock_get	= thread_cpu_clock_get,
-		.timer_create	= thread_cpu_timer_create,
-	};
-
-	posix_timers_register_clock(CLOCK_PROCESS_CPUTIME_ID, &process);
-	posix_timers_register_clock(CLOCK_THREAD_CPUTIME_ID, &thread);
+const struct k_clock clock_process = {
+	.clock_getres	= process_cpu_clock_getres,
+	.clock_get	= process_cpu_clock_get,
+	.timer_create	= process_cpu_timer_create,
+	.nsleep		= process_cpu_nsleep,
+	.nsleep_restart	= process_cpu_nsleep_restart,
+};
 
-	return 0;
-}
-__initcall(init_posix_cpu_timers);
+const struct k_clock clock_thread = {
+	.clock_getres	= thread_cpu_clock_getres,
+	.clock_get	= thread_cpu_clock_get,
+	.timer_create	= thread_cpu_timer_create,
+};
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 4d7b2ce09c27..0c0cccfa3586 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -125,8 +125,6 @@ static DEFINE_SPINLOCK(hash_lock);
  *	    which we beg off on and pass to do_sys_settimeofday().
  */
 
-static struct k_clock posix_clocks[MAX_CLOCKS];
-
 /*
  * These ones are defined below.
  */
@@ -280,74 +278,87 @@ static int posix_get_hrtimer_res(clockid_t which_clock, struct timespec64 *tp)
 	return 0;
 }
 
+
+static const struct k_clock clock_realtime = {
+	.clock_getres	= posix_get_hrtimer_res,
+	.clock_get	= posix_clock_realtime_get,
+	.clock_set	= posix_clock_realtime_set,
+	.clock_adj	= posix_clock_realtime_adj,
+	.nsleep		= common_nsleep,
+	.nsleep_restart	= hrtimer_nanosleep_restart,
+	.timer_create	= common_timer_create,
+	.timer_set	= common_timer_set,
+	.timer_get	= common_timer_get,
+	.timer_del	= common_timer_del,
+};
+
+static const struct k_clock clock_monotonic = {
+	.clock_getres	= posix_get_hrtimer_res,
+	.clock_get	= posix_ktime_get_ts,
+	.nsleep		= common_nsleep,
+	.nsleep_restart	= hrtimer_nanosleep_restart,
+	.timer_create	= common_timer_create,
+	.timer_set	= common_timer_set,
+	.timer_get	= common_timer_get,
+	.timer_del	= common_timer_del,
+};
+
+static const struct k_clock clock_monotonic_raw = {
+	.clock_getres	= posix_get_hrtimer_res,
+	.clock_get	= posix_get_monotonic_raw,
+};
+
+static const struct k_clock clock_realtime_coarse = {
+	.clock_getres	= posix_get_coarse_res,
+	.clock_get	= posix_get_realtime_coarse,
+};
+
+static const struct k_clock clock_monotonic_coarse = {
+	.clock_getres	= posix_get_coarse_res,
+	.clock_get	= posix_get_monotonic_coarse,
+};
+
+static const struct k_clock clock_tai = {
+	.clock_getres	= posix_get_hrtimer_res,
+	.clock_get	= posix_get_tai,
+	.nsleep		= common_nsleep,
+	.nsleep_restart	= hrtimer_nanosleep_restart,
+	.timer_create	= common_timer_create,
+	.timer_set	= common_timer_set,
+	.timer_get	= common_timer_get,
+	.timer_del	= common_timer_del,
+};
+
+static const struct k_clock clock_boottime = {
+	.clock_getres	= posix_get_hrtimer_res,
+	.clock_get	= posix_get_boottime,
+	.nsleep		= common_nsleep,
+	.nsleep_restart	= hrtimer_nanosleep_restart,
+	.timer_create	= common_timer_create,
+	.timer_set	= common_timer_set,
+	.timer_get	= common_timer_get,
+	.timer_del	= common_timer_del,
+};
+
+static const struct k_clock * const posix_clocks[] = {
+	[CLOCK_REALTIME]		= &clock_realtime,
+	[CLOCK_MONOTONIC]		= &clock_monotonic,
+	[CLOCK_PROCESS_CPUTIME_ID]	= &clock_process,
+	[CLOCK_THREAD_CPUTIME_ID]	= &clock_thread,
+	[CLOCK_MONOTONIC_RAW]		= &clock_monotonic_raw,
+	[CLOCK_REALTIME_COARSE]		= &clock_realtime_coarse,
+	[CLOCK_MONOTONIC_COARSE]	= &clock_monotonic_coarse,
+	[CLOCK_BOOTTIME]		= &clock_boottime,
+	[CLOCK_REALTIME_ALARM]		= &alarm_clock,
+	[CLOCK_BOOTTIME_ALARM]		= &alarm_clock,
+	[CLOCK_TAI]			= &clock_tai,
+};
+
 /*
  * Initialize everything, well, just everything in Posix clocks/timers ;)
  */
 static __init int init_posix_timers(void)
 {
-	struct k_clock clock_realtime = {
-		.clock_getres	= posix_get_hrtimer_res,
-		.clock_get	= posix_clock_realtime_get,
-		.clock_set	= posix_clock_realtime_set,
-		.clock_adj	= posix_clock_realtime_adj,
-		.nsleep		= common_nsleep,
-		.nsleep_restart	= hrtimer_nanosleep_restart,
-		.timer_create	= common_timer_create,
-		.timer_set	= common_timer_set,
-		.timer_get	= common_timer_get,
-		.timer_del	= common_timer_del,
-	};
-	struct k_clock clock_monotonic = {
-		.clock_getres	= posix_get_hrtimer_res,
-		.clock_get	= posix_ktime_get_ts,
-		.nsleep		= common_nsleep,
-		.nsleep_restart	= hrtimer_nanosleep_restart,
-		.timer_create	= common_timer_create,
-		.timer_set	= common_timer_set,
-		.timer_get	= common_timer_get,
-		.timer_del	= common_timer_del,
-	};
-	struct k_clock clock_monotonic_raw = {
-		.clock_getres	= posix_get_hrtimer_res,
-		.clock_get	= posix_get_monotonic_raw,
-	};
-	struct k_clock clock_realtime_coarse = {
-		.clock_getres	= posix_get_coarse_res,
-		.clock_get	= posix_get_realtime_coarse,
-	};
-	struct k_clock clock_monotonic_coarse = {
-		.clock_getres	= posix_get_coarse_res,
-		.clock_get	= posix_get_monotonic_coarse,
-	};
-	struct k_clock clock_tai = {
-		.clock_getres	= posix_get_hrtimer_res,
-		.clock_get	= posix_get_tai,
-		.nsleep		= common_nsleep,
-		.nsleep_restart	= hrtimer_nanosleep_restart,
-		.timer_create	= common_timer_create,
-		.timer_set	= common_timer_set,
-		.timer_get	= common_timer_get,
-		.timer_del	= common_timer_del,
-	};
-	struct k_clock clock_boottime = {
-		.clock_getres	= posix_get_hrtimer_res,
-		.clock_get	= posix_get_boottime,
-		.nsleep		= common_nsleep,
-		.nsleep_restart	= hrtimer_nanosleep_restart,
-		.timer_create	= common_timer_create,
-		.timer_set	= common_timer_set,
-		.timer_get	= common_timer_get,
-		.timer_del	= common_timer_del,
-	};
-
-	posix_timers_register_clock(CLOCK_REALTIME, &clock_realtime);
-	posix_timers_register_clock(CLOCK_MONOTONIC, &clock_monotonic);
-	posix_timers_register_clock(CLOCK_MONOTONIC_RAW, &clock_monotonic_raw);
-	posix_timers_register_clock(CLOCK_REALTIME_COARSE, &clock_realtime_coarse);
-	posix_timers_register_clock(CLOCK_MONOTONIC_COARSE, &clock_monotonic_coarse);
-	posix_timers_register_clock(CLOCK_BOOTTIME, &clock_boottime);
-	posix_timers_register_clock(CLOCK_TAI, &clock_tai);
-
 	posix_timers_cache = kmem_cache_create("posix_timers_cache",
 					sizeof (struct k_itimer), 0, SLAB_PANIC,
 					NULL);
@@ -521,30 +532,6 @@ static struct pid *good_sigevent(sigevent_t * event)
 	return task_pid(rtn);
 }
 
-void posix_timers_register_clock(const clockid_t clock_id,
-				 struct k_clock *new_clock)
-{
-	if ((unsigned) clock_id >= MAX_CLOCKS) {
-		printk(KERN_WARNING "POSIX clock register failed for clock_id %d\n",
-		       clock_id);
-		return;
-	}
-
-	if (!new_clock->clock_get) {
-		printk(KERN_WARNING "POSIX clock id %d lacks clock_get()\n",
-		       clock_id);
-		return;
-	}
-	if (!new_clock->clock_getres) {
-		printk(KERN_WARNING "POSIX clock id %d lacks clock_getres()\n",
-		       clock_id);
-		return;
-	}
-
-	posix_clocks[clock_id] = *new_clock;
-}
-EXPORT_SYMBOL_GPL(posix_timers_register_clock);
-
 static struct k_itimer * alloc_posix_timer(void)
 {
 	struct k_itimer *tmr;
@@ -581,15 +568,15 @@ static void release_posix_timer(struct k_itimer *tmr, int it_id_set)
 	call_rcu(&tmr->it.rcu, k_itimer_rcu_free);
 }
 
-static struct k_clock *clockid_to_kclock(const clockid_t id)
+static const struct k_clock *clockid_to_kclock(const clockid_t id)
 {
 	if (id < 0)
 		return (id & CLOCKFD_MASK) == CLOCKFD ?
 			&clock_posix_dynamic : &clock_posix_cpu;
 
-	if (id >= MAX_CLOCKS || !posix_clocks[id].clock_getres)
+	if (id >= ARRAY_SIZE(posix_clocks) || !posix_clocks[id])
 		return NULL;
-	return &posix_clocks[id];
+	return posix_clocks[id];
 }
 
 static int common_timer_create(struct k_itimer *new_timer)
@@ -604,7 +591,7 @@ SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock,
 		struct sigevent __user *, timer_event_spec,
 		timer_t __user *, created_timer_id)
 {
-	struct k_clock *kc = clockid_to_kclock(which_clock);
+	const struct k_clock *kc = clockid_to_kclock(which_clock);
 	struct k_itimer *new_timer;
 	int error, new_timer_id;
 	sigevent_t event;
@@ -781,7 +768,7 @@ SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id,
 	struct itimerspec64 cur_setting64;
 	struct itimerspec cur_setting;
 	struct k_itimer *timr;
-	struct k_clock *kc;
+	const struct k_clock *kc;
 	unsigned long flags;
 	int ret = 0;
 
@@ -890,7 +877,7 @@ SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags,
 	struct itimerspec new_spec, old_spec;
 	struct k_itimer *timr;
 	unsigned long flag;
-	struct k_clock *kc;
+	const struct k_clock *kc;
 	int error = 0;
 
 	if (!new_setting)
@@ -939,7 +926,7 @@ static int common_timer_del(struct k_itimer *timer)
 
 static inline int timer_delete_hook(struct k_itimer *timer)
 {
-	struct k_clock *kc = clockid_to_kclock(timer->it_clock);
+	const struct k_clock *kc = clockid_to_kclock(timer->it_clock);
 
 	if (WARN_ON_ONCE(!kc || !kc->timer_del))
 		return -EINVAL;
@@ -1018,7 +1005,7 @@ void exit_itimers(struct signal_struct *sig)
 SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock,
 		const struct timespec __user *, tp)
 {
-	struct k_clock *kc = clockid_to_kclock(which_clock);
+	const struct k_clock *kc = clockid_to_kclock(which_clock);
 	struct timespec64 new_tp64;
 	struct timespec new_tp;
 
@@ -1035,7 +1022,7 @@ SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock,
 SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock,
 		struct timespec __user *,tp)
 {
-	struct k_clock *kc = clockid_to_kclock(which_clock);
+	const struct k_clock *kc = clockid_to_kclock(which_clock);
 	struct timespec64 kernel_tp64;
 	struct timespec kernel_tp;
 	int error;
@@ -1055,7 +1042,7 @@ SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock,
 SYSCALL_DEFINE2(clock_adjtime, const clockid_t, which_clock,
 		struct timex __user *, utx)
 {
-	struct k_clock *kc = clockid_to_kclock(which_clock);
+	const struct k_clock *kc = clockid_to_kclock(which_clock);
 	struct timex ktx;
 	int err;
 
@@ -1078,7 +1065,7 @@ SYSCALL_DEFINE2(clock_adjtime, const clockid_t, which_clock,
 SYSCALL_DEFINE2(clock_getres, const clockid_t, which_clock,
 		struct timespec __user *, tp)
 {
-	struct k_clock *kc = clockid_to_kclock(which_clock);
+	const struct k_clock *kc = clockid_to_kclock(which_clock);
 	struct timespec64 rtn_tp64;
 	struct timespec rtn_tp;
 	int error;
@@ -1110,7 +1097,7 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags,
 		const struct timespec __user *, rqtp,
 		struct timespec __user *, rmtp)
 {
-	struct k_clock *kc = clockid_to_kclock(which_clock);
+	const struct k_clock *kc = clockid_to_kclock(which_clock);
 	struct timespec64 t64;
 	struct timespec t;
 
@@ -1136,7 +1123,7 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags,
 long clock_nanosleep_restart(struct restart_block *restart_block)
 {
 	clockid_t which_clock = restart_block->nanosleep.clockid;
-	struct k_clock *kc = clockid_to_kclock(which_clock);
+	const struct k_clock *kc = clockid_to_kclock(which_clock);
 
 	if (WARN_ON_ONCE(!kc || !kc->nsleep_restart))
 		return -EINVAL;
-- 
cgit v1.2.3


From a75d30c772078546ac00399a94ecdc82df1a4d72 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 27 May 2017 06:07:19 -0400
Subject: fs/locks: pass kernel struct flock to fcntl_getlk/setlk

This will make it easier to implement a sane compat fcntl syscall.

[ jlayton: fix undeclared identifiers in 32-bit fcntl64 syscall handler ]

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
---
 fs/fcntl.c         | 27 +++++++++++++++----
 fs/locks.c         | 79 +++++++++++++++---------------------------------------
 include/linux/fs.h |  8 +++---
 3 files changed, 48 insertions(+), 66 deletions(-)

(limited to 'include/linux')

diff --git a/fs/fcntl.c b/fs/fcntl.c
index f4e7267d117f..ed4283d500a3 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -246,6 +246,8 @@ static int f_getowner_uids(struct file *filp, unsigned long arg)
 static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
 		struct file *filp)
 {
+	void __user *argp = (void __user *)arg;
+	struct flock flock;
 	long err = -EINVAL;
 
 	switch (cmd) {
@@ -273,7 +275,11 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
 	case F_OFD_GETLK:
 #endif
 	case F_GETLK:
-		err = fcntl_getlk(filp, cmd, (struct flock __user *) arg);
+		if (copy_from_user(&flock, argp, sizeof(flock)))
+			return -EFAULT;
+		err = fcntl_getlk(filp, cmd, &flock);
+		if (!err && copy_to_user(argp, &flock, sizeof(flock)))
+			return -EFAULT;
 		break;
 #if BITS_PER_LONG != 32
 	/* 32-bit arches must use fcntl64() */
@@ -283,7 +289,9 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
 		/* Fallthrough */
 	case F_SETLK:
 	case F_SETLKW:
-		err = fcntl_setlk(fd, filp, cmd, (struct flock __user *) arg);
+		if (copy_from_user(&flock, argp, sizeof(flock)))
+			return -EFAULT;
+		err = fcntl_setlk(fd, filp, cmd, &flock);
 		break;
 	case F_GETOWN:
 		/*
@@ -383,7 +391,9 @@ out:
 SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
 		unsigned long, arg)
 {	
+	void __user *argp = (void __user *)arg;
 	struct fd f = fdget_raw(fd);
+	struct flock64 flock;
 	long err = -EBADF;
 
 	if (!f.file)
@@ -401,14 +411,21 @@ SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
 	switch (cmd) {
 	case F_GETLK64:
 	case F_OFD_GETLK:
-		err = fcntl_getlk64(f.file, cmd, (struct flock64 __user *) arg);
+		err = -EFAULT;
+		if (copy_from_user(&flock, argp, sizeof(flock)))
+			break;
+		err = fcntl_getlk64(f.file, cmd, &flock);
+		if (!err && copy_to_user(argp, &flock, sizeof(flock)))
+			err = -EFAULT;
 		break;
 	case F_SETLK64:
 	case F_SETLKW64:
 	case F_OFD_SETLK:
 	case F_OFD_SETLKW:
-		err = fcntl_setlk64(fd, f.file, cmd,
-				(struct flock64 __user *) arg);
+		err = -EFAULT;
+		if (copy_from_user(&flock, argp, sizeof(flock)))
+			break;
+		err = fcntl_setlk64(fd, f.file, cmd, &flock);
 		break;
 	default:
 		err = do_fcntl(fd, cmd, arg, f.file);
diff --git a/fs/locks.c b/fs/locks.c
index 4a4543a7f9c1..afefeb4ad6de 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -2084,26 +2084,22 @@ static void posix_lock_to_flock64(struct flock64 *flock, struct file_lock *fl)
 /* Report the first existing lock that would conflict with l.
  * This implements the F_GETLK command of fcntl().
  */
-int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock __user *l)
+int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock *flock)
 {
 	struct file_lock file_lock;
-	struct flock flock;
 	int error;
 
-	error = -EFAULT;
-	if (copy_from_user(&flock, l, sizeof(flock)))
-		goto out;
 	error = -EINVAL;
-	if ((flock.l_type != F_RDLCK) && (flock.l_type != F_WRLCK))
+	if (flock->l_type != F_RDLCK && flock->l_type != F_WRLCK)
 		goto out;
 
-	error = flock_to_posix_lock(filp, &file_lock, &flock);
+	error = flock_to_posix_lock(filp, &file_lock, flock);
 	if (error)
 		goto out;
 
 	if (cmd == F_OFD_GETLK) {
 		error = -EINVAL;
-		if (flock.l_pid != 0)
+		if (flock->l_pid != 0)
 			goto out;
 
 		cmd = F_GETLK;
@@ -2115,15 +2111,12 @@ int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock __user *l)
 	if (error)
 		goto out;
  
-	flock.l_type = file_lock.fl_type;
+	flock->l_type = file_lock.fl_type;
 	if (file_lock.fl_type != F_UNLCK) {
-		error = posix_lock_to_flock(&flock, &file_lock);
+		error = posix_lock_to_flock(flock, &file_lock);
 		if (error)
 			goto rel_priv;
 	}
-	error = -EFAULT;
-	if (!copy_to_user(l, &flock, sizeof(flock)))
-		error = 0;
 rel_priv:
 	locks_release_private(&file_lock);
 out:
@@ -2216,26 +2209,16 @@ check_fmode_for_setlk(struct file_lock *fl)
  * This implements both the F_SETLK and F_SETLKW commands of fcntl().
  */
 int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
-		struct flock __user *l)
+		struct flock *flock)
 {
 	struct file_lock *file_lock = locks_alloc_lock();
-	struct flock flock;
-	struct inode *inode;
+	struct inode *inode = locks_inode(filp);
 	struct file *f;
 	int error;
 
 	if (file_lock == NULL)
 		return -ENOLCK;
 
-	inode = locks_inode(filp);
-
-	/*
-	 * This might block, so we do it before checking the inode.
-	 */
-	error = -EFAULT;
-	if (copy_from_user(&flock, l, sizeof(flock)))
-		goto out;
-
 	/* Don't allow mandatory locks on files that may be memory mapped
 	 * and shared.
 	 */
@@ -2244,7 +2227,7 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
 		goto out;
 	}
 
-	error = flock_to_posix_lock(filp, file_lock, &flock);
+	error = flock_to_posix_lock(filp, file_lock, flock);
 	if (error)
 		goto out;
 
@@ -2259,7 +2242,7 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
 	switch (cmd) {
 	case F_OFD_SETLK:
 		error = -EINVAL;
-		if (flock.l_pid != 0)
+		if (flock->l_pid != 0)
 			goto out;
 
 		cmd = F_SETLK;
@@ -2268,7 +2251,7 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
 		break;
 	case F_OFD_SETLKW:
 		error = -EINVAL;
-		if (flock.l_pid != 0)
+		if (flock->l_pid != 0)
 			goto out;
 
 		cmd = F_SETLKW;
@@ -2313,26 +2296,22 @@ out:
 /* Report the first existing lock that would conflict with l.
  * This implements the F_GETLK command of fcntl().
  */
-int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 __user *l)
+int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 *flock)
 {
 	struct file_lock file_lock;
-	struct flock64 flock;
 	int error;
 
-	error = -EFAULT;
-	if (copy_from_user(&flock, l, sizeof(flock)))
-		goto out;
 	error = -EINVAL;
-	if ((flock.l_type != F_RDLCK) && (flock.l_type != F_WRLCK))
+	if (flock->l_type != F_RDLCK && flock->l_type != F_WRLCK)
 		goto out;
 
-	error = flock64_to_posix_lock(filp, &file_lock, &flock);
+	error = flock64_to_posix_lock(filp, &file_lock, flock);
 	if (error)
 		goto out;
 
 	if (cmd == F_OFD_GETLK) {
 		error = -EINVAL;
-		if (flock.l_pid != 0)
+		if (flock->l_pid != 0)
 			goto out;
 
 		cmd = F_GETLK64;
@@ -2344,13 +2323,9 @@ int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 __user *l)
 	if (error)
 		goto out;
 
-	flock.l_type = file_lock.fl_type;
+	flock->l_type = file_lock.fl_type;
 	if (file_lock.fl_type != F_UNLCK)
-		posix_lock_to_flock64(&flock, &file_lock);
-
-	error = -EFAULT;
-	if (!copy_to_user(l, &flock, sizeof(flock)))
-		error = 0;
+		posix_lock_to_flock64(flock, &file_lock);
 
 	locks_release_private(&file_lock);
 out:
@@ -2361,26 +2336,16 @@ out:
  * This implements both the F_SETLK and F_SETLKW commands of fcntl().
  */
 int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
-		struct flock64 __user *l)
+		struct flock64 *flock)
 {
 	struct file_lock *file_lock = locks_alloc_lock();
-	struct flock64 flock;
-	struct inode *inode;
+	struct inode *inode = locks_inode(filp);
 	struct file *f;
 	int error;
 
 	if (file_lock == NULL)
 		return -ENOLCK;
 
-	/*
-	 * This might block, so we do it before checking the inode.
-	 */
-	error = -EFAULT;
-	if (copy_from_user(&flock, l, sizeof(flock)))
-		goto out;
-
-	inode = locks_inode(filp);
-
 	/* Don't allow mandatory locks on files that may be memory mapped
 	 * and shared.
 	 */
@@ -2389,7 +2354,7 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
 		goto out;
 	}
 
-	error = flock64_to_posix_lock(filp, file_lock, &flock);
+	error = flock64_to_posix_lock(filp, file_lock, flock);
 	if (error)
 		goto out;
 
@@ -2404,7 +2369,7 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
 	switch (cmd) {
 	case F_OFD_SETLK:
 		error = -EINVAL;
-		if (flock.l_pid != 0)
+		if (flock->l_pid != 0)
 			goto out;
 
 		cmd = F_SETLK64;
@@ -2413,7 +2378,7 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
 		break;
 	case F_OFD_SETLKW:
 		error = -EINVAL;
-		if (flock.l_pid != 0)
+		if (flock->l_pid != 0)
 			goto out;
 
 		cmd = F_SETLKW64;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 803e5a9b2654..aa4affb38c39 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1038,14 +1038,14 @@ static inline struct inode *locks_inode(const struct file *f)
 }
 
 #ifdef CONFIG_FILE_LOCKING
-extern int fcntl_getlk(struct file *, unsigned int, struct flock __user *);
+extern int fcntl_getlk(struct file *, unsigned int, struct flock *);
 extern int fcntl_setlk(unsigned int, struct file *, unsigned int,
-			struct flock __user *);
+			struct flock *);
 
 #if BITS_PER_LONG == 32
-extern int fcntl_getlk64(struct file *, unsigned int, struct flock64 __user *);
+extern int fcntl_getlk64(struct file *, unsigned int, struct flock64 *);
 extern int fcntl_setlk64(unsigned int, struct file *, unsigned int,
-			struct flock64 __user *);
+			struct flock64 *);
 #endif
 
 extern int fcntl_setlease(unsigned int fd, struct file *filp, long arg);
-- 
cgit v1.2.3


From f822798e3ced63427d57d128ee8d118126455f84 Mon Sep 17 00:00:00 2001
From: Dimitri Sivanich <sivanich@hpe.com>
Date: Fri, 26 May 2017 23:30:52 +0200
Subject: posix-timers: Remove mmtimer leftovers

After removing mmtimer, the mmtimer struct can be removed from the k_itimer
struct.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Russ Anderson <rja@sgi.com>
Cc: Dimitri Sivanich <sivanich@sgi.com>
Cc: Mike Travis <mike.travis@hpe.com>
Cc: Nate Zimmer <nzimmer@sgi.com>
Cc: Christoph Hellwig <hch@lst.de>
Link: http://lkml.kernel.org/r/20170526130534.GE30788@hpe.com
---
 include/linux/posix-timers.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index b313ef2e7385..34e893a75771 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -72,12 +72,6 @@ struct k_itimer {
 			ktime_t interval;
 		} real;
 		struct cpu_timer_list cpu;
-		struct {
-			unsigned int clock;
-			unsigned int node;
-			unsigned long incr;
-			unsigned long expires;
-		} mmtimer;
 		struct {
 			struct alarm alarmtimer;
 			ktime_t interval;
-- 
cgit v1.2.3


From 613763a1f056211522bac77ff39f25706e678fdd Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 26 May 2017 22:04:29 -0400
Subject: take compat_sys_old_getrlimit() to native syscall

... and sanitize the ifdefs in there

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/powerpc/include/asm/compat.h |  1 -
 arch/s390/include/asm/compat.h    |  1 -
 arch/x86/include/asm/compat.h     |  1 -
 include/linux/syscalls.h          |  2 +-
 kernel/compat.c                   | 29 -----------------------------
 kernel/sys.c                      | 24 ++++++++++++++++++++++++
 6 files changed, 25 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/include/asm/compat.h b/arch/powerpc/include/asm/compat.h
index 4f2df589ec1d..f256e1d14a14 100644
--- a/arch/powerpc/include/asm/compat.h
+++ b/arch/powerpc/include/asm/compat.h
@@ -109,7 +109,6 @@ struct compat_statfs {
 	int		f_spare[4];
 };
 
-#define COMPAT_RLIM_OLD_INFINITY	0x7fffffff
 #define COMPAT_RLIM_INFINITY		0xffffffff
 
 typedef u32		compat_old_sigset_t;
diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h
index 0ddd37e6c29d..b9300f8aee10 100644
--- a/arch/s390/include/asm/compat.h
+++ b/arch/s390/include/asm/compat.h
@@ -178,7 +178,6 @@ struct compat_statfs64 {
 	u32		f_spare[4];
 };
 
-#define COMPAT_RLIM_OLD_INFINITY	0x7fffffff
 #define COMPAT_RLIM_INFINITY		0xffffffff
 
 typedef u32		compat_old_sigset_t;	/* at least 32 bits */
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index 24118c0b4640..5343c19814b3 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -116,7 +116,6 @@ struct compat_statfs {
 	int		f_spare[4];
 };
 
-#define COMPAT_RLIM_OLD_INFINITY	0x7fffffff
 #define COMPAT_RLIM_INFINITY		0xffffffff
 
 typedef u32		compat_old_sigset_t;	/* at least 32 bits */
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 980c3c9b06f8..3cb15ea48aee 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -650,7 +650,7 @@ asmlinkage long sys_olduname(struct oldold_utsname __user *);
 
 asmlinkage long sys_getrlimit(unsigned int resource,
 				struct rlimit __user *rlim);
-#if defined(COMPAT_RLIM_OLD_INFINITY) || !(defined(CONFIG_IA64))
+#ifdef __ARCH_WANT_SYS_OLD_GETRLIMIT
 asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *rlim);
 #endif
 asmlinkage long sys_setrlimit(unsigned int resource,
diff --git a/kernel/compat.c b/kernel/compat.c
index 933bcb31ae10..860f674fa556 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -468,35 +468,6 @@ COMPAT_SYSCALL_DEFINE2(setrlimit, unsigned int, resource,
 	return do_prlimit(current, resource, &r, NULL);
 }
 
-#ifdef COMPAT_RLIM_OLD_INFINITY
-
-COMPAT_SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource,
-		       struct compat_rlimit __user *, rlim)
-{
-	struct rlimit r;
-	int ret;
-	mm_segment_t old_fs = get_fs();
-
-	set_fs(KERNEL_DS);
-	ret = sys_old_getrlimit(resource, (struct rlimit __user *)&r);
-	set_fs(old_fs);
-
-	if (!ret) {
-		if (r.rlim_cur > COMPAT_RLIM_OLD_INFINITY)
-			r.rlim_cur = COMPAT_RLIM_INFINITY;
-		if (r.rlim_max > COMPAT_RLIM_OLD_INFINITY)
-			r.rlim_max = COMPAT_RLIM_INFINITY;
-
-		if (!access_ok(VERIFY_WRITE, rlim, sizeof(*rlim)) ||
-		    __put_user(r.rlim_cur, &rlim->rlim_cur) ||
-		    __put_user(r.rlim_max, &rlim->rlim_max))
-			return -EFAULT;
-	}
-	return ret;
-}
-
-#endif
-
 COMPAT_SYSCALL_DEFINE2(getrlimit, unsigned int, resource,
 		       struct compat_rlimit __user *, rlim)
 {
diff --git a/kernel/sys.c b/kernel/sys.c
index 8a94b4eabcaa..3778a8a417b6 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1328,6 +1328,30 @@ SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource,
 	return copy_to_user(rlim, &x, sizeof(x)) ? -EFAULT : 0;
 }
 
+#ifdef CONFIG_COMPAT
+COMPAT_SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource,
+		       struct compat_rlimit __user *, rlim)
+{
+	struct rlimit r;
+
+	if (resource >= RLIM_NLIMITS)
+		return -EINVAL;
+
+	task_lock(current->group_leader);
+	r = current->signal->rlim[resource];
+	task_unlock(current->group_leader);
+	if (r.rlim_cur > 0x7FFFFFFF)
+		r.rlim_cur = 0x7FFFFFFF;
+	if (r.rlim_max > 0x7FFFFFFF)
+		r.rlim_max = 0x7FFFFFFF;
+
+	if (put_user(r.rlim_cur, &rlim->rlim_cur) ||
+	    put_user(r.rlim_max, &rlim->rlim_max))
+		return -EFAULT;
+	return 0;
+}
+#endif
+
 #endif
 
 static inline bool rlim64_is_infinity(__u64 rlim64)
-- 
cgit v1.2.3


From 3d3ea5af5c0b382bc9d9aed378fd814fb5d4a011 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevich@gmail.com>
Date: Sat, 27 May 2017 10:14:34 -0400
Subject: rtnl: Add support for netdev event to link messages

When netdev events happen, a rtnetlink_event() handler will send
messages for every event in it's white list.  These messages contain
current information about a particular device, but they do not include
the iformation about which event just happened.  So, it is impossible
to tell what just happend for these events.

This patch adds a new extension to RTM_NEWLINK message called IFLA_EVENT
that would have an encoding of event that triggered this
message.  This would allow the the message consumer to easily determine
if it needs to perform certain actions.

Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
Acked-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rtnetlink.h    |  3 +-
 include/uapi/linux/if_link.h | 11 ++++++++
 net/core/dev.c               |  2 +-
 net/core/rtnetlink.c         | 65 ++++++++++++++++++++++++++++++++++++++------
 4 files changed, 70 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 57e54847b0b9..dea59c8eec54 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -18,7 +18,8 @@ extern int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst,
 
 void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change, gfp_t flags);
 struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev,
-				       unsigned change, gfp_t flags);
+				       unsigned change, u32 event,
+				       gfp_t flags);
 void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev,
 		       gfp_t flags);
 
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 15ac20382aba..8ed679fe603f 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -157,6 +157,7 @@ enum {
 	IFLA_GSO_MAX_SIZE,
 	IFLA_PAD,
 	IFLA_XDP,
+	IFLA_EVENT,
 	__IFLA_MAX
 };
 
@@ -911,4 +912,14 @@ enum {
 
 #define IFLA_XDP_MAX (__IFLA_XDP_MAX - 1)
 
+enum {
+	IFLA_EVENT_NONE,
+	IFLA_EVENT_REBOOT,		/* internal reset / reboot */
+	IFLA_EVENT_FEATURES,		/* change in offload features */
+	IFLA_EVENT_BONDING_FAILOVER,	/* change in active slave */
+	IFLA_EVENT_NOTIFY_PEERS,	/* re-sent grat. arp/ndisc */
+	IFLA_EVENT_IGMP_RESEND,		/* re-sent IGMP JOIN */
+	IFLA_EVENT_BONDING_OPTIONS,	/* change in bonding options */
+};
+
 #endif /* _UAPI_LINUX_IF_LINK_H */
diff --git a/net/core/dev.c b/net/core/dev.c
index 3d98fbf4cbb0..06e0a7492df8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -7084,7 +7084,7 @@ static void rollback_registered_many(struct list_head *head)
 
 		if (!dev->rtnl_link_ops ||
 		    dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
-			skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U,
+			skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0,
 						     GFP_KERNEL);
 
 		/*
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 64953af4a3b1..9da53e43750c 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -941,6 +941,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
 	       + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_SWITCH_ID */
 	       + nla_total_size(IFNAMSIZ) /* IFLA_PHYS_PORT_NAME */
 	       + rtnl_xdp_size() /* IFLA_XDP */
+	       + nla_total_size(4)  /* IFLA_EVENT */
 	       + nla_total_size(1); /* IFLA_PROTO_DOWN */
 
 }
@@ -1282,9 +1283,40 @@ err_cancel:
 	return err;
 }
 
+static u32 rtnl_get_event(unsigned long event)
+{
+	u32 rtnl_event_type = IFLA_EVENT_NONE;
+
+	switch (event) {
+	case NETDEV_REBOOT:
+		rtnl_event_type = IFLA_EVENT_REBOOT;
+		break;
+	case NETDEV_FEAT_CHANGE:
+		rtnl_event_type = IFLA_EVENT_FEATURES;
+		break;
+	case NETDEV_BONDING_FAILOVER:
+		rtnl_event_type = IFLA_EVENT_BONDING_FAILOVER;
+		break;
+	case NETDEV_NOTIFY_PEERS:
+		rtnl_event_type = IFLA_EVENT_NOTIFY_PEERS;
+		break;
+	case NETDEV_RESEND_IGMP:
+		rtnl_event_type = IFLA_EVENT_IGMP_RESEND;
+		break;
+	case NETDEV_CHANGEINFODATA:
+		rtnl_event_type = IFLA_EVENT_BONDING_OPTIONS;
+		break;
+	default:
+		break;
+	}
+
+	return rtnl_event_type;
+}
+
 static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 			    int type, u32 pid, u32 seq, u32 change,
-			    unsigned int flags, u32 ext_filter_mask)
+			    unsigned int flags, u32 ext_filter_mask,
+			    u32 event)
 {
 	struct ifinfomsg *ifm;
 	struct nlmsghdr *nlh;
@@ -1333,6 +1365,11 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 	    nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down))
 		goto nla_put_failure;
 
+	if (event != IFLA_EVENT_NONE) {
+		if (nla_put_u32(skb, IFLA_EVENT, event))
+			goto nla_put_failure;
+	}
+
 	if (rtnl_fill_link_ifmap(skb, dev))
 		goto nla_put_failure;
 
@@ -1467,6 +1504,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
 	[IFLA_LINK_NETNSID]	= { .type = NLA_S32 },
 	[IFLA_PROTO_DOWN]	= { .type = NLA_U8 },
 	[IFLA_XDP]		= { .type = NLA_NESTED },
+	[IFLA_EVENT]		= { .type = NLA_U32 },
 };
 
 static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
@@ -1626,7 +1664,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 					       NETLINK_CB(cb->skb).portid,
 					       cb->nlh->nlmsg_seq, 0,
 					       flags,
-					       ext_filter_mask);
+					       ext_filter_mask, 0);
 
 			if (err < 0) {
 				if (likely(skb->len))
@@ -2736,7 +2774,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
 		return -ENOBUFS;
 
 	err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).portid,
-			       nlh->nlmsg_seq, 0, 0, ext_filter_mask);
+			       nlh->nlmsg_seq, 0, 0, ext_filter_mask, 0);
 	if (err < 0) {
 		/* -EMSGSIZE implies BUG in if_nlmsg_size */
 		WARN_ON(err == -EMSGSIZE);
@@ -2808,7 +2846,8 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
 }
 
 struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev,
-				       unsigned int change, gfp_t flags)
+				       unsigned int change,
+				       u32 event, gfp_t flags)
 {
 	struct net *net = dev_net(dev);
 	struct sk_buff *skb;
@@ -2819,7 +2858,7 @@ struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev,
 	if (skb == NULL)
 		goto errout;
 
-	err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0, 0);
+	err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0, 0, event);
 	if (err < 0) {
 		/* -EMSGSIZE implies BUG in if_nlmsg_size() */
 		WARN_ON(err == -EMSGSIZE);
@@ -2840,18 +2879,25 @@ void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev, gfp_t flags)
 	rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, flags);
 }
 
-void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change,
-		  gfp_t flags)
+static void rtmsg_ifinfo_event(int type, struct net_device *dev,
+			       unsigned int change, u32 event,
+			       gfp_t flags)
 {
 	struct sk_buff *skb;
 
 	if (dev->reg_state != NETREG_REGISTERED)
 		return;
 
-	skb = rtmsg_ifinfo_build_skb(type, dev, change, flags);
+	skb = rtmsg_ifinfo_build_skb(type, dev, change, event, flags);
 	if (skb)
 		rtmsg_ifinfo_send(skb, dev, flags);
 }
+
+void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change,
+		  gfp_t flags)
+{
+	rtmsg_ifinfo_event(type, dev, change, IFLA_EVENT_NONE, flags);
+}
 EXPORT_SYMBOL(rtmsg_ifinfo);
 
 static int nlmsg_populate_fdb_fill(struct sk_buff *skb,
@@ -4168,7 +4214,8 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
 	case NETDEV_NOTIFY_PEERS:
 	case NETDEV_RESEND_IGMP:
 	case NETDEV_CHANGEINFODATA:
-		rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
+		rtmsg_ifinfo_event(RTM_NEWLINK, dev, 0, rtnl_get_event(event),
+				   GFP_KERNEL);
 		break;
 	default:
 		break;
-- 
cgit v1.2.3


From a3995460491d4570af8e99ad34ddf6d1948254d9 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Sat, 27 May 2017 10:42:25 -0700
Subject: net: phy: Relax error checking on sysfs_create_link()

Some Ethernet drivers will attach/connect to a PHY device before calling
register_netdevice() which is responsible for calling netdev_register_kobject()
which would do the network device's kobject initialization. In such a case,
sysfs_create_link() would return -ENOENT because the network device's kobject
is not ready yet, and we would fail to connect to the PHY device.

In order to keep things simple and symetrical, we just take the success path as
indicative of the ability to access the network device's kobject, and create
the second link if that's the case.

Fixes: 5568363f0cb3 ("net: phy: Create sysfs reciprocal links for attached_dev/phydev")
Reported-by: Woojung Hung <Woojung.Huh@microchip.com>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy_device.c | 30 ++++++++++++++++++++++--------
 include/linux/phy.h          |  2 ++
 2 files changed, 24 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index f84414b8f2ee..37a1e98908e3 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -960,15 +960,27 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
 
 	phydev->attached_dev = dev;
 	dev->phydev = phydev;
+
+	/* Some Ethernet drivers try to connect to a PHY device before
+	 * calling register_netdevice() -> netdev_register_kobject() and
+	 * does the dev->dev.kobj initialization. Here we only check for
+	 * success which indicates that the network device kobject is
+	 * ready. Once we do that we still need to keep track of whether
+	 * links were successfully set up or not for phy_detach() to
+	 * remove them accordingly.
+	 */
+	phydev->sysfs_links = false;
+
 	err = sysfs_create_link(&phydev->mdio.dev.kobj, &dev->dev.kobj,
 				"attached_dev");
-	if (err)
-		goto error;
+	if (!err) {
+		err = sysfs_create_link(&dev->dev.kobj, &phydev->mdio.dev.kobj,
+					"phydev");
+		if (err)
+			goto error;
 
-	err = sysfs_create_link(&dev->dev.kobj, &phydev->mdio.dev.kobj,
-				"phydev");
-	if (err)
-		goto error;
+		phydev->sysfs_links = true;
+	}
 
 	phydev->dev_flags = flags;
 
@@ -1059,8 +1071,10 @@ void phy_detach(struct phy_device *phydev)
 	struct mii_bus *bus;
 	int i;
 
-	sysfs_remove_link(&dev->dev.kobj, "phydev");
-	sysfs_remove_link(&phydev->mdio.dev.kobj, "attached_dev");
+	if (phydev->sysfs_links) {
+		sysfs_remove_link(&dev->dev.kobj, "phydev");
+		sysfs_remove_link(&phydev->mdio.dev.kobj, "attached_dev");
+	}
 	phydev->attached_dev->phydev = NULL;
 	phydev->attached_dev = NULL;
 	phy_suspend(phydev);
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 5a808a26e4cf..58f1b45a4c44 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -363,6 +363,7 @@ struct phy_c45_device_ids {
  * is_pseudo_fixed_link: Set to true if this phy is an Ethernet switch, etc.
  * has_fixups: Set to true if this phy has fixups/quirks.
  * suspended: Set to true if this phy has been suspended successfully.
+ * sysfs_links: Internal boolean tracking sysfs symbolic links setup/removal.
  * state: state of the PHY for management purposes
  * dev_flags: Device-specific flags used by the PHY driver.
  * link_timeout: The number of timer firings to wait before the
@@ -399,6 +400,7 @@ struct phy_device {
 	bool is_pseudo_fixed_link;
 	bool has_fixups;
 	bool suspended;
+	bool sysfs_links;
 
 	enum phy_state state;
 
-- 
cgit v1.2.3


From 3e08b2df12983159ea2d9a1aa2b2bc601093b3cb Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 25 Apr 2017 15:57:12 +0530
Subject: thermal: cpu_cooling: remove cpufreq_cooling_get_level()

There is only one user of cpufreq_cooling_get_level() and that already
has pointer to the cpufreq_cdev structure. It can directly call
get_level() instead and we can get rid of cpufreq_cooling_get_level().

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Lukasz Luba <lukasz.luba@arm.com>
Tested-by: Lukasz Luba <lukasz.luba@arm.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/thermal/cpu_cooling.c | 33 +--------------------------------
 include/linux/cpu_cooling.h   |  6 ------
 2 files changed, 1 insertion(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
index f1e784c22c5a..1f4b6a719d05 100644
--- a/drivers/thermal/cpu_cooling.c
+++ b/drivers/thermal/cpu_cooling.c
@@ -135,37 +135,6 @@ static unsigned long get_level(struct cpufreq_cooling_device *cpufreq_cdev,
 	return THERMAL_CSTATE_INVALID;
 }
 
-/**
- * cpufreq_cooling_get_level - for a given cpu, return the cooling level.
- * @cpu: cpu for which the level is required
- * @freq: the frequency of interest
- *
- * This function will match the cooling level corresponding to the
- * requested @freq and return it.
- *
- * Return: The matched cooling level on success or THERMAL_CSTATE_INVALID
- * otherwise.
- */
-unsigned long cpufreq_cooling_get_level(unsigned int cpu, unsigned int freq)
-{
-	struct cpufreq_cooling_device *cpufreq_cdev;
-
-	mutex_lock(&cooling_list_lock);
-	list_for_each_entry(cpufreq_cdev, &cpufreq_cdev_list, node) {
-		if (cpumask_test_cpu(cpu, &cpufreq_cdev->allowed_cpus)) {
-			unsigned long level = get_level(cpufreq_cdev, freq);
-
-			mutex_unlock(&cooling_list_lock);
-			return level;
-		}
-	}
-	mutex_unlock(&cooling_list_lock);
-
-	pr_err("%s: cpu:%d not part of any cooling device\n", __func__, cpu);
-	return THERMAL_CSTATE_INVALID;
-}
-EXPORT_SYMBOL_GPL(cpufreq_cooling_get_level);
-
 /**
  * cpufreq_thermal_notifier - notifier callback for cpufreq policy change.
  * @nb:	struct notifier_block * with callback info.
@@ -697,7 +666,7 @@ static int cpufreq_power2state(struct thermal_cooling_device *cdev,
 	normalised_power = (dyn_power * 100) / last_load;
 	target_freq = cpu_power_to_freq(cpufreq_cdev, normalised_power);
 
-	*state = cpufreq_cooling_get_level(cpu, target_freq);
+	*state = get_level(cpufreq_cdev, target_freq);
 	if (*state == THERMAL_CSTATE_INVALID) {
 		dev_err_ratelimited(&cdev->device,
 				    "Failed to convert %dKHz for cpu %d into a cdev state\n",
diff --git a/include/linux/cpu_cooling.h b/include/linux/cpu_cooling.h
index c156f5082758..96c5e4c2f9c8 100644
--- a/include/linux/cpu_cooling.h
+++ b/include/linux/cpu_cooling.h
@@ -82,7 +82,6 @@ of_cpufreq_power_cooling_register(struct device_node *np,
  */
 void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev);
 
-unsigned long cpufreq_cooling_get_level(unsigned int cpu, unsigned int freq);
 #else /* !CONFIG_CPU_THERMAL */
 static inline struct thermal_cooling_device *
 cpufreq_cooling_register(const struct cpumask *clip_cpus)
@@ -117,11 +116,6 @@ void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)
 {
 	return;
 }
-static inline
-unsigned long cpufreq_cooling_get_level(unsigned int cpu, unsigned int freq)
-{
-	return THERMAL_CSTATE_INVALID;
-}
 #endif	/* CONFIG_CPU_THERMAL */
 
 #endif /* __CPU_COOLING_H__ */
-- 
cgit v1.2.3


From 4d753aa7b6279e4b7d338947a434689962f430d1 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 25 Apr 2017 15:57:14 +0530
Subject: thermal: cpu_cooling: use cpufreq_policy to register cooling device

The CPU cooling driver uses the cpufreq policy, to get clip_cpus, the
frequency table, etc. Most of the callers of CPU cooling driver's
registration routines have the cpufreq policy with them, but they only
pass the policy->related_cpus cpumask. The __cpufreq_cooling_register()
routine then gets the policy by itself and uses it.

It would be much better if the callers can pass the policy instead
directly. This also fixes a basic design flaw, where the policy can be
freed while the CPU cooling driver is still active.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Lukasz Luba <lukasz.luba@arm.com>
Tested-by: Lukasz Luba <lukasz.luba@arm.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/cpufreq/arm_big_little.c                   |  2 +-
 drivers/cpufreq/cpufreq-dt.c                       |  2 +-
 drivers/cpufreq/dbx500-cpufreq.c                   |  2 +-
 drivers/cpufreq/mt8173-cpufreq.c                   |  4 +-
 drivers/cpufreq/qoriq-cpufreq.c                    |  3 +-
 drivers/thermal/cpu_cooling.c                      | 61 ++++++++--------------
 drivers/thermal/imx_thermal.c                      | 22 ++++++--
 drivers/thermal/ti-soc-thermal/ti-thermal-common.c | 22 +++++---
 include/linux/cpu_cooling.h                        | 26 ++++-----
 9 files changed, 74 insertions(+), 70 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/cpufreq/arm_big_little.c b/drivers/cpufreq/arm_big_little.c
index 418042201e6d..ea6d62547b10 100644
--- a/drivers/cpufreq/arm_big_little.c
+++ b/drivers/cpufreq/arm_big_little.c
@@ -540,7 +540,7 @@ static void bL_cpufreq_ready(struct cpufreq_policy *policy)
 				     &power_coefficient);
 
 		cdev[cur_cluster] = of_cpufreq_power_cooling_register(np,
-				policy->related_cpus, power_coefficient, NULL);
+				policy, power_coefficient, NULL);
 		if (IS_ERR(cdev[cur_cluster])) {
 			dev_err(cpu_dev,
 				"running cpufreq without cooling device: %ld\n",
diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c
index c943787d761e..fef3c2160691 100644
--- a/drivers/cpufreq/cpufreq-dt.c
+++ b/drivers/cpufreq/cpufreq-dt.c
@@ -326,7 +326,7 @@ static void cpufreq_ready(struct cpufreq_policy *policy)
 				     &power_coefficient);
 
 		priv->cdev = of_cpufreq_power_cooling_register(np,
-				policy->related_cpus, power_coefficient, NULL);
+				policy, power_coefficient, NULL);
 		if (IS_ERR(priv->cdev)) {
 			dev_err(priv->cpu_dev,
 				"running cpufreq without cooling device: %ld\n",
diff --git a/drivers/cpufreq/dbx500-cpufreq.c b/drivers/cpufreq/dbx500-cpufreq.c
index 3575b82210ba..4ee0431579c1 100644
--- a/drivers/cpufreq/dbx500-cpufreq.c
+++ b/drivers/cpufreq/dbx500-cpufreq.c
@@ -43,7 +43,7 @@ static int dbx500_cpufreq_exit(struct cpufreq_policy *policy)
 
 static void dbx500_cpufreq_ready(struct cpufreq_policy *policy)
 {
-	cdev = cpufreq_cooling_register(policy->cpus);
+	cdev = cpufreq_cooling_register(policy);
 	if (IS_ERR(cdev))
 		pr_err("Failed to register cooling device %ld\n", PTR_ERR(cdev));
 	else
diff --git a/drivers/cpufreq/mt8173-cpufreq.c b/drivers/cpufreq/mt8173-cpufreq.c
index fd1886faf33a..f9f00fb4bc3a 100644
--- a/drivers/cpufreq/mt8173-cpufreq.c
+++ b/drivers/cpufreq/mt8173-cpufreq.c
@@ -320,9 +320,7 @@ static void mtk_cpufreq_ready(struct cpufreq_policy *policy)
 		of_property_read_u32(np, DYNAMIC_POWER, &capacitance);
 
 		info->cdev = of_cpufreq_power_cooling_register(np,
-						policy->related_cpus,
-						capacitance,
-						NULL);
+						policy, capacitance, NULL);
 
 		if (IS_ERR(info->cdev)) {
 			dev_err(info->cpu_dev,
diff --git a/drivers/cpufreq/qoriq-cpufreq.c b/drivers/cpufreq/qoriq-cpufreq.c
index e2ea433a5f9c..4ada55b8856e 100644
--- a/drivers/cpufreq/qoriq-cpufreq.c
+++ b/drivers/cpufreq/qoriq-cpufreq.c
@@ -278,8 +278,7 @@ static void qoriq_cpufreq_ready(struct cpufreq_policy *policy)
 	struct device_node *np = of_get_cpu_node(policy->cpu, NULL);
 
 	if (of_find_property(np, "#cooling-cells", NULL)) {
-		cpud->cdev = of_cpufreq_cooling_register(np,
-							 policy->related_cpus);
+		cpud->cdev = of_cpufreq_cooling_register(np, policy);
 
 		if (IS_ERR(cpud->cdev) && PTR_ERR(cpud->cdev) != -ENOSYS) {
 			pr_err("cpu%d is not running as cooling device: %ld\n",
diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
index 002b48dc6bea..58e58065b650 100644
--- a/drivers/thermal/cpu_cooling.c
+++ b/drivers/thermal/cpu_cooling.c
@@ -717,7 +717,7 @@ static unsigned int find_next_max(struct cpufreq_frequency_table *table,
 /**
  * __cpufreq_cooling_register - helper function to create cpufreq cooling device
  * @np: a valid struct device_node to the cooling device device tree node
- * @clip_cpus: cpumask of cpus where the frequency constraints will happen.
+ * @policy: cpufreq policy
  * Normally this should be same as cpufreq policy->related_cpus.
  * @capacitance: dynamic power coefficient for these cpus
  * @plat_static_func: function to calculate the static power consumed by these
@@ -733,45 +733,34 @@ static unsigned int find_next_max(struct cpufreq_frequency_table *table,
  */
 static struct thermal_cooling_device *
 __cpufreq_cooling_register(struct device_node *np,
-			const struct cpumask *clip_cpus, u32 capacitance,
+			struct cpufreq_policy *policy, u32 capacitance,
 			get_static_t plat_static_func)
 {
-	struct cpufreq_policy *policy;
 	struct thermal_cooling_device *cdev;
 	struct cpufreq_cooling_device *cpufreq_cdev;
 	char dev_name[THERMAL_NAME_LENGTH];
 	struct cpufreq_frequency_table *pos, *table;
-	cpumask_var_t temp_mask;
 	unsigned int freq, i, num_cpus;
 	int ret;
 	struct thermal_cooling_device_ops *cooling_ops;
 	bool first;
 
-	if (!alloc_cpumask_var(&temp_mask, GFP_KERNEL))
-		return ERR_PTR(-ENOMEM);
-
-	cpumask_and(temp_mask, clip_cpus, cpu_online_mask);
-	policy = cpufreq_cpu_get(cpumask_first(temp_mask));
-	if (!policy) {
-		pr_debug("%s: CPUFreq policy not found\n", __func__);
-		cdev = ERR_PTR(-EPROBE_DEFER);
-		goto free_cpumask;
+	if (IS_ERR_OR_NULL(policy)) {
+		pr_err("%s: cpufreq policy isn't valid: %p", __func__, policy);
+		return ERR_PTR(-EINVAL);
 	}
 
 	table = policy->freq_table;
 	if (!table) {
 		pr_debug("%s: CPUFreq table not found\n", __func__);
-		cdev = ERR_PTR(-ENODEV);
-		goto put_policy;
+		return ERR_PTR(-ENODEV);
 	}
 
 	cpufreq_cdev = kzalloc(sizeof(*cpufreq_cdev), GFP_KERNEL);
-	if (!cpufreq_cdev) {
-		cdev = ERR_PTR(-ENOMEM);
-		goto put_policy;
-	}
+	if (!cpufreq_cdev)
+		return ERR_PTR(-ENOMEM);
 
-	num_cpus = cpumask_weight(clip_cpus);
+	num_cpus = cpumask_weight(policy->related_cpus);
 	cpufreq_cdev->time_in_idle = kcalloc(num_cpus,
 					    sizeof(*cpufreq_cdev->time_in_idle),
 					    GFP_KERNEL);
@@ -802,7 +791,7 @@ __cpufreq_cooling_register(struct device_node *np,
 	/* max_level is an index, not a counter */
 	cpufreq_cdev->max_level--;
 
-	cpumask_copy(&cpufreq_cdev->allowed_cpus, clip_cpus);
+	cpumask_copy(&cpufreq_cdev->allowed_cpus, policy->related_cpus);
 
 	if (capacitance) {
 		cpufreq_cdev->plat_get_static_power = plat_static_func;
@@ -858,7 +847,7 @@ __cpufreq_cooling_register(struct device_node *np,
 		cpufreq_register_notifier(&thermal_cpufreq_notifier_block,
 					  CPUFREQ_POLICY_NOTIFIER);
 
-	goto put_policy;
+	return cdev;
 
 remove_ida:
 	ida_simple_remove(&cpufreq_ida, cpufreq_cdev->id);
@@ -872,16 +861,12 @@ free_time_in_idle:
 	kfree(cpufreq_cdev->time_in_idle);
 free_cdev:
 	kfree(cpufreq_cdev);
-put_policy:
-	cpufreq_cpu_put(policy);
-free_cpumask:
-	free_cpumask_var(temp_mask);
 	return cdev;
 }
 
 /**
  * cpufreq_cooling_register - function to create cpufreq cooling device.
- * @clip_cpus: cpumask of cpus where the frequency constraints will happen.
+ * @policy: cpufreq policy
  *
  * This interface function registers the cpufreq cooling device with the name
  * "thermal-cpufreq-%x". This api can support multiple instances of cpufreq
@@ -891,16 +876,16 @@ free_cpumask:
  * on failure, it returns a corresponding ERR_PTR().
  */
 struct thermal_cooling_device *
-cpufreq_cooling_register(const struct cpumask *clip_cpus)
+cpufreq_cooling_register(struct cpufreq_policy *policy)
 {
-	return __cpufreq_cooling_register(NULL, clip_cpus, 0, NULL);
+	return __cpufreq_cooling_register(NULL, policy, 0, NULL);
 }
 EXPORT_SYMBOL_GPL(cpufreq_cooling_register);
 
 /**
  * of_cpufreq_cooling_register - function to create cpufreq cooling device.
  * @np: a valid struct device_node to the cooling device device tree node
- * @clip_cpus: cpumask of cpus where the frequency constraints will happen.
+ * @policy: cpufreq policy
  *
  * This interface function registers the cpufreq cooling device with the name
  * "thermal-cpufreq-%x". This api can support multiple instances of cpufreq
@@ -912,18 +897,18 @@ EXPORT_SYMBOL_GPL(cpufreq_cooling_register);
  */
 struct thermal_cooling_device *
 of_cpufreq_cooling_register(struct device_node *np,
-			    const struct cpumask *clip_cpus)
+			    struct cpufreq_policy *policy)
 {
 	if (!np)
 		return ERR_PTR(-EINVAL);
 
-	return __cpufreq_cooling_register(np, clip_cpus, 0, NULL);
+	return __cpufreq_cooling_register(np, policy, 0, NULL);
 }
 EXPORT_SYMBOL_GPL(of_cpufreq_cooling_register);
 
 /**
  * cpufreq_power_cooling_register() - create cpufreq cooling device with power extensions
- * @clip_cpus:	cpumask of cpus where the frequency constraints will happen
+ * @policy:		cpufreq policy
  * @capacitance:	dynamic power coefficient for these cpus
  * @plat_static_func:	function to calculate the static power consumed by these
  *			cpus (optional)
@@ -943,10 +928,10 @@ EXPORT_SYMBOL_GPL(of_cpufreq_cooling_register);
  * on failure, it returns a corresponding ERR_PTR().
  */
 struct thermal_cooling_device *
-cpufreq_power_cooling_register(const struct cpumask *clip_cpus, u32 capacitance,
+cpufreq_power_cooling_register(struct cpufreq_policy *policy, u32 capacitance,
 			       get_static_t plat_static_func)
 {
-	return __cpufreq_cooling_register(NULL, clip_cpus, capacitance,
+	return __cpufreq_cooling_register(NULL, policy, capacitance,
 				plat_static_func);
 }
 EXPORT_SYMBOL(cpufreq_power_cooling_register);
@@ -954,7 +939,7 @@ EXPORT_SYMBOL(cpufreq_power_cooling_register);
 /**
  * of_cpufreq_power_cooling_register() - create cpufreq cooling device with power extensions
  * @np:	a valid struct device_node to the cooling device device tree node
- * @clip_cpus:	cpumask of cpus where the frequency constraints will happen
+ * @policy: cpufreq policy
  * @capacitance:	dynamic power coefficient for these cpus
  * @plat_static_func:	function to calculate the static power consumed by these
  *			cpus (optional)
@@ -976,14 +961,14 @@ EXPORT_SYMBOL(cpufreq_power_cooling_register);
  */
 struct thermal_cooling_device *
 of_cpufreq_power_cooling_register(struct device_node *np,
-				  const struct cpumask *clip_cpus,
+				  struct cpufreq_policy *policy,
 				  u32 capacitance,
 				  get_static_t plat_static_func)
 {
 	if (!np)
 		return ERR_PTR(-EINVAL);
 
-	return __cpufreq_cooling_register(np, clip_cpus, capacitance,
+	return __cpufreq_cooling_register(np, policy, capacitance,
 				plat_static_func);
 }
 EXPORT_SYMBOL(of_cpufreq_power_cooling_register);
diff --git a/drivers/thermal/imx_thermal.c b/drivers/thermal/imx_thermal.c
index fb648a45754e..f7ec39f46ee4 100644
--- a/drivers/thermal/imx_thermal.c
+++ b/drivers/thermal/imx_thermal.c
@@ -8,6 +8,7 @@
  */
 
 #include <linux/clk.h>
+#include <linux/cpufreq.h>
 #include <linux/cpu_cooling.h>
 #include <linux/delay.h>
 #include <linux/device.h>
@@ -88,6 +89,7 @@ static struct thermal_soc_data thermal_imx6sx_data = {
 };
 
 struct imx_thermal_data {
+	struct cpufreq_policy *policy;
 	struct thermal_zone_device *tz;
 	struct thermal_cooling_device *cdev;
 	enum thermal_device_mode mode;
@@ -525,13 +527,18 @@ static int imx_thermal_probe(struct platform_device *pdev)
 	regmap_write(map, MISC0 + REG_SET, MISC0_REFTOP_SELBIASOFF);
 	regmap_write(map, TEMPSENSE0 + REG_SET, TEMPSENSE0_POWER_DOWN);
 
-	data->cdev = cpufreq_cooling_register(cpu_present_mask);
+	data->policy = cpufreq_cpu_get(0);
+	if (!data->policy) {
+		pr_debug("%s: CPUFreq policy not found\n", __func__);
+		return -EPROBE_DEFER;
+	}
+
+	data->cdev = cpufreq_cooling_register(data->policy);
 	if (IS_ERR(data->cdev)) {
 		ret = PTR_ERR(data->cdev);
-		if (ret != -EPROBE_DEFER)
-			dev_err(&pdev->dev,
-				"failed to register cpufreq cooling device: %d\n",
-				ret);
+		dev_err(&pdev->dev,
+			"failed to register cpufreq cooling device: %d\n", ret);
+		cpufreq_cpu_put(data->policy);
 		return ret;
 	}
 
@@ -542,6 +549,7 @@ static int imx_thermal_probe(struct platform_device *pdev)
 			dev_err(&pdev->dev,
 				"failed to get thermal clk: %d\n", ret);
 		cpufreq_cooling_unregister(data->cdev);
+		cpufreq_cpu_put(data->policy);
 		return ret;
 	}
 
@@ -556,6 +564,7 @@ static int imx_thermal_probe(struct platform_device *pdev)
 	if (ret) {
 		dev_err(&pdev->dev, "failed to enable thermal clk: %d\n", ret);
 		cpufreq_cooling_unregister(data->cdev);
+		cpufreq_cpu_put(data->policy);
 		return ret;
 	}
 
@@ -571,6 +580,7 @@ static int imx_thermal_probe(struct platform_device *pdev)
 			"failed to register thermal zone device %d\n", ret);
 		clk_disable_unprepare(data->thermal_clk);
 		cpufreq_cooling_unregister(data->cdev);
+		cpufreq_cpu_put(data->policy);
 		return ret;
 	}
 
@@ -599,6 +609,7 @@ static int imx_thermal_probe(struct platform_device *pdev)
 		clk_disable_unprepare(data->thermal_clk);
 		thermal_zone_device_unregister(data->tz);
 		cpufreq_cooling_unregister(data->cdev);
+		cpufreq_cpu_put(data->policy);
 		return ret;
 	}
 
@@ -620,6 +631,7 @@ static int imx_thermal_remove(struct platform_device *pdev)
 
 	thermal_zone_device_unregister(data->tz);
 	cpufreq_cooling_unregister(data->cdev);
+	cpufreq_cpu_put(data->policy);
 
 	return 0;
 }
diff --git a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
index 02790f69e26c..c211a8e4a210 100644
--- a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
+++ b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
@@ -28,6 +28,7 @@
 #include <linux/kernel.h>
 #include <linux/workqueue.h>
 #include <linux/thermal.h>
+#include <linux/cpufreq.h>
 #include <linux/cpumask.h>
 #include <linux/cpu_cooling.h>
 #include <linux/of.h>
@@ -37,6 +38,7 @@
 
 /* common data structures */
 struct ti_thermal_data {
+	struct cpufreq_policy *policy;
 	struct thermal_zone_device *ti_thermal;
 	struct thermal_zone_device *pcb_tz;
 	struct thermal_cooling_device *cool_dev;
@@ -247,15 +249,19 @@ int ti_thermal_register_cpu_cooling(struct ti_bandgap *bgp, int id)
 	if (!data)
 		return -EINVAL;
 
+	data->policy = cpufreq_cpu_get(0);
+	if (!data->policy) {
+		pr_debug("%s: CPUFreq policy not found\n", __func__);
+		return -EPROBE_DEFER;
+	}
+
 	/* Register cooling device */
-	data->cool_dev = cpufreq_cooling_register(cpu_present_mask);
+	data->cool_dev = cpufreq_cooling_register(data->policy);
 	if (IS_ERR(data->cool_dev)) {
 		int ret = PTR_ERR(data->cool_dev);
-
-		if (ret != -EPROBE_DEFER)
-			dev_err(bgp->dev,
-				"Failed to register cpu cooling device %d\n",
-				ret);
+		dev_err(bgp->dev, "Failed to register cpu cooling device %d\n",
+			ret);
+		cpufreq_cpu_put(data->policy);
 
 		return ret;
 	}
@@ -270,8 +276,10 @@ int ti_thermal_unregister_cpu_cooling(struct ti_bandgap *bgp, int id)
 
 	data = ti_bandgap_get_sensor_data(bgp, id);
 
-	if (data)
+	if (data) {
 		cpufreq_cooling_unregister(data->cool_dev);
+		cpufreq_cpu_put(data->policy);
+	}
 
 	return 0;
 }
diff --git a/include/linux/cpu_cooling.h b/include/linux/cpu_cooling.h
index 96c5e4c2f9c8..d4292ebc5c8b 100644
--- a/include/linux/cpu_cooling.h
+++ b/include/linux/cpu_cooling.h
@@ -28,47 +28,49 @@
 #include <linux/thermal.h>
 #include <linux/cpumask.h>
 
+struct cpufreq_policy;
+
 typedef int (*get_static_t)(cpumask_t *cpumask, int interval,
 			    unsigned long voltage, u32 *power);
 
 #ifdef CONFIG_CPU_THERMAL
 /**
  * cpufreq_cooling_register - function to create cpufreq cooling device.
- * @clip_cpus: cpumask of cpus where the frequency constraints will happen
+ * @policy: cpufreq policy.
  */
 struct thermal_cooling_device *
-cpufreq_cooling_register(const struct cpumask *clip_cpus);
+cpufreq_cooling_register(struct cpufreq_policy *policy);
 
 struct thermal_cooling_device *
-cpufreq_power_cooling_register(const struct cpumask *clip_cpus,
+cpufreq_power_cooling_register(struct cpufreq_policy *policy,
 			       u32 capacitance, get_static_t plat_static_func);
 
 /**
  * of_cpufreq_cooling_register - create cpufreq cooling device based on DT.
  * @np: a valid struct device_node to the cooling device device tree node.
- * @clip_cpus: cpumask of cpus where the frequency constraints will happen
+ * @policy: cpufreq policy.
  */
 #ifdef CONFIG_THERMAL_OF
 struct thermal_cooling_device *
 of_cpufreq_cooling_register(struct device_node *np,
-			    const struct cpumask *clip_cpus);
+			    struct cpufreq_policy *policy);
 
 struct thermal_cooling_device *
 of_cpufreq_power_cooling_register(struct device_node *np,
-				  const struct cpumask *clip_cpus,
+				  struct cpufreq_policy *policy,
 				  u32 capacitance,
 				  get_static_t plat_static_func);
 #else
 static inline struct thermal_cooling_device *
 of_cpufreq_cooling_register(struct device_node *np,
-			    const struct cpumask *clip_cpus)
+			    struct cpufreq_policy *policy)
 {
 	return ERR_PTR(-ENOSYS);
 }
 
 static inline struct thermal_cooling_device *
 of_cpufreq_power_cooling_register(struct device_node *np,
-				  const struct cpumask *clip_cpus,
+				  struct cpufreq_policy *policy,
 				  u32 capacitance,
 				  get_static_t plat_static_func)
 {
@@ -84,12 +86,12 @@ void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev);
 
 #else /* !CONFIG_CPU_THERMAL */
 static inline struct thermal_cooling_device *
-cpufreq_cooling_register(const struct cpumask *clip_cpus)
+cpufreq_cooling_register(struct cpufreq_policy *policy)
 {
 	return ERR_PTR(-ENOSYS);
 }
 static inline struct thermal_cooling_device *
-cpufreq_power_cooling_register(const struct cpumask *clip_cpus,
+cpufreq_power_cooling_register(struct cpufreq_policy *policy,
 			       u32 capacitance, get_static_t plat_static_func)
 {
 	return NULL;
@@ -97,14 +99,14 @@ cpufreq_power_cooling_register(const struct cpumask *clip_cpus,
 
 static inline struct thermal_cooling_device *
 of_cpufreq_cooling_register(struct device_node *np,
-			    const struct cpumask *clip_cpus)
+			    struct cpufreq_policy *policy)
 {
 	return ERR_PTR(-ENOSYS);
 }
 
 static inline struct thermal_cooling_device *
 of_cpufreq_power_cooling_register(struct device_node *np,
-				  const struct cpumask *clip_cpus,
+				  struct cpufreq_policy *policy,
 				  u32 capacitance,
 				  get_static_t plat_static_func)
 {
-- 
cgit v1.2.3


From 55d852931319d2e3ccde86cd426405231ce6c6ac Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 25 Apr 2017 15:57:15 +0530
Subject: cpufreq: create cpufreq_table_count_valid_entries()

We need such a routine at two places already, lets create one.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Lukasz Luba <lukasz.luba@arm.com>
Tested-by: Lukasz Luba <lukasz.luba@arm.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/cpufreq/cpufreq_stats.c | 13 ++++---------
 drivers/thermal/cpu_cooling.c   | 22 +++++++++-------------
 include/linux/cpufreq.h         | 14 ++++++++++++++
 3 files changed, 27 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
index f570ead62454..9c3d319dc129 100644
--- a/drivers/cpufreq/cpufreq_stats.c
+++ b/drivers/cpufreq/cpufreq_stats.c
@@ -170,11 +170,10 @@ void cpufreq_stats_create_table(struct cpufreq_policy *policy)
 	unsigned int i = 0, count = 0, ret = -ENOMEM;
 	struct cpufreq_stats *stats;
 	unsigned int alloc_size;
-	struct cpufreq_frequency_table *pos, *table;
+	struct cpufreq_frequency_table *pos;
 
-	/* We need cpufreq table for creating stats table */
-	table = policy->freq_table;
-	if (unlikely(!table))
+	count = cpufreq_table_count_valid_entries(policy);
+	if (!count)
 		return;
 
 	/* stats already initialized */
@@ -185,10 +184,6 @@ void cpufreq_stats_create_table(struct cpufreq_policy *policy)
 	if (!stats)
 		return;
 
-	/* Find total allocation size */
-	cpufreq_for_each_valid_entry(pos, table)
-		count++;
-
 	alloc_size = count * sizeof(int) + count * sizeof(u64);
 
 	alloc_size += count * count * sizeof(int);
@@ -205,7 +200,7 @@ void cpufreq_stats_create_table(struct cpufreq_policy *policy)
 	stats->max_state = count;
 
 	/* Find valid-unique entries */
-	cpufreq_for_each_valid_entry(pos, table)
+	cpufreq_for_each_valid_entry(pos, policy->freq_table)
 		if (freq_table_get_index(stats, pos->frequency) == -1)
 			stats->freq_table[i++] = pos->frequency;
 
diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
index 58e58065b650..55ff45c1e917 100644
--- a/drivers/thermal/cpu_cooling.c
+++ b/drivers/thermal/cpu_cooling.c
@@ -739,7 +739,6 @@ __cpufreq_cooling_register(struct device_node *np,
 	struct thermal_cooling_device *cdev;
 	struct cpufreq_cooling_device *cpufreq_cdev;
 	char dev_name[THERMAL_NAME_LENGTH];
-	struct cpufreq_frequency_table *pos, *table;
 	unsigned int freq, i, num_cpus;
 	int ret;
 	struct thermal_cooling_device_ops *cooling_ops;
@@ -750,9 +749,10 @@ __cpufreq_cooling_register(struct device_node *np,
 		return ERR_PTR(-EINVAL);
 	}
 
-	table = policy->freq_table;
-	if (!table) {
-		pr_debug("%s: CPUFreq table not found\n", __func__);
+	i = cpufreq_table_count_valid_entries(policy);
+	if (!i) {
+		pr_debug("%s: CPUFreq table not found or has no valid entries\n",
+			 __func__);
 		return ERR_PTR(-ENODEV);
 	}
 
@@ -777,20 +777,16 @@ __cpufreq_cooling_register(struct device_node *np,
 		goto free_time_in_idle;
 	}
 
-	/* Find max levels */
-	cpufreq_for_each_valid_entry(pos, table)
-		cpufreq_cdev->max_level++;
+	/* max_level is an index, not a counter */
+	cpufreq_cdev->max_level = i - 1;
 
-	cpufreq_cdev->freq_table = kmalloc(sizeof(*cpufreq_cdev->freq_table) *
-					  cpufreq_cdev->max_level, GFP_KERNEL);
+	cpufreq_cdev->freq_table = kmalloc(sizeof(*cpufreq_cdev->freq_table) * i,
+					  GFP_KERNEL);
 	if (!cpufreq_cdev->freq_table) {
 		cdev = ERR_PTR(-ENOMEM);
 		goto free_time_in_idle_timestamp;
 	}
 
-	/* max_level is an index, not a counter */
-	cpufreq_cdev->max_level--;
-
 	cpumask_copy(&cpufreq_cdev->allowed_cpus, policy->related_cpus);
 
 	if (capacitance) {
@@ -816,7 +812,7 @@ __cpufreq_cooling_register(struct device_node *np,
 
 	/* Fill freq-table in descending order of frequencies */
 	for (i = 0, freq = -1; i <= cpufreq_cdev->max_level; i++) {
-		freq = find_next_max(table, freq);
+		freq = find_next_max(policy->freq_table, freq);
 		cpufreq_cdev->freq_table[i] = freq;
 
 		/* Warn for duplicate entries */
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index a5ce0bbeadb5..eb9abfadaeac 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -862,6 +862,20 @@ static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
 		return -EINVAL;
 	}
 }
+
+static inline int cpufreq_table_count_valid_entries(const struct cpufreq_policy *policy)
+{
+	struct cpufreq_frequency_table *pos;
+	int count = 0;
+
+	if (unlikely(!policy->freq_table))
+		return 0;
+
+	cpufreq_for_each_valid_entry(pos, policy->freq_table)
+		count++;
+
+	return count;
+}
 #else
 static inline int cpufreq_boost_trigger_state(int state)
 {
-- 
cgit v1.2.3


From 0aa5e49c6845ecd82531341085f367767c9f419a Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 5 Apr 2017 09:49:19 -0700
Subject: compiler: Add __designated_init annotation

This allows structure annotations for requiring designated initialization
in GCC 5.1.0 and later:
https://gcc.gnu.org/onlinedocs/gcc/Designated-Inits.html

The structure randomization layout plugin will be using this to help
identify structures that need this form of initialization.

Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/linux/compiler-gcc.h | 8 ++++++++
 include/linux/compiler.h     | 4 ++++
 2 files changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 0efef9cf014f..386caf6771ed 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -294,6 +294,14 @@
 #define __no_sanitize_address __attribute__((no_sanitize_address))
 #endif
 
+#if GCC_VERSION >= 50100
+/*
+ * Mark structures as requiring designated initializers.
+ * https://gcc.gnu.org/onlinedocs/gcc/Designated-Inits.html
+ */
+#define __designated_init __attribute__((designated_init))
+#endif
+
 #endif	/* gcc version >= 40000 specific checks */
 
 #if !defined(__noclone)
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index f8110051188f..80a1dea36cbe 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -440,6 +440,10 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
 # define __attribute_const__	/* unimplemented */
 #endif
 
+#ifndef __designated_init
+# define __designated_init
+#endif
+
 #ifndef __latent_entropy
 # define __latent_entropy
 #endif
-- 
cgit v1.2.3


From 05f479bf7d239f01ff6546f2bdeb14ad0fe65601 Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Date: Tue, 23 May 2017 15:47:29 +0100
Subject: gpio: Add new flags to control sleep status of GPIOs

Add new flags to allow users to specify that they are not concerned with
the status of GPIOs whilst in a sleep/low power state.

Signed-off-by: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib-of.c       |  3 +++
 drivers/gpio/gpiolib.c          | 12 ++++++++++++
 drivers/gpio/gpiolib.h          |  1 +
 include/dt-bindings/gpio/gpio.h |  4 ++++
 include/linux/gpio/driver.h     |  3 +++
 include/linux/gpio/machine.h    |  2 ++
 include/linux/of_gpio.h         |  1 +
 7 files changed, 26 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index b13b7c7c335f..e2abf0eabaf8 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -153,6 +153,9 @@ struct gpio_desc *of_find_gpio(struct device *dev, const char *con_id,
 			*flags |= GPIO_OPEN_SOURCE;
 	}
 
+	if (of_flags & OF_GPIO_SLEEP_MAY_LOOSE_VALUE)
+		*flags |= GPIO_SLEEP_MAY_LOOSE_VALUE;
+
 	return desc;
 }
 
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 995ca9cf7dbf..c81c42093ff6 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -2869,6 +2869,16 @@ bool gpiochip_line_is_open_source(struct gpio_chip *chip, unsigned int offset)
 }
 EXPORT_SYMBOL_GPL(gpiochip_line_is_open_source);
 
+bool gpiochip_line_is_persistent(struct gpio_chip *chip, unsigned int offset)
+{
+	if (offset >= chip->ngpio)
+		return false;
+
+	return !test_bit(FLAG_SLEEP_MAY_LOOSE_VALUE,
+			 &chip->gpiodev->descs[offset].flags);
+}
+EXPORT_SYMBOL_GPL(gpiochip_line_is_persistent);
+
 /**
  * gpiod_get_raw_value_cansleep() - return a gpio's raw value
  * @desc: gpio whose value will be returned
@@ -3225,6 +3235,8 @@ static int gpiod_configure_flags(struct gpio_desc *desc, const char *con_id,
 		set_bit(FLAG_OPEN_DRAIN, &desc->flags);
 	if (lflags & GPIO_OPEN_SOURCE)
 		set_bit(FLAG_OPEN_SOURCE, &desc->flags);
+	if (lflags & GPIO_SLEEP_MAY_LOOSE_VALUE)
+		set_bit(FLAG_SLEEP_MAY_LOOSE_VALUE, &desc->flags);
 
 	/* No particular flag request, return here... */
 	if (!(dflags & GPIOD_FLAGS_BIT_DIR_SET)) {
diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h
index 2495b7ee1b42..f74b8a763242 100644
--- a/drivers/gpio/gpiolib.h
+++ b/drivers/gpio/gpiolib.h
@@ -190,6 +190,7 @@ struct gpio_desc {
 #define FLAG_OPEN_SOURCE 8	/* Gpio is open source type */
 #define FLAG_USED_AS_IRQ 9	/* GPIO is connected to an IRQ */
 #define FLAG_IS_HOGGED	11	/* GPIO is hogged */
+#define FLAG_SLEEP_MAY_LOOSE_VALUE 12	/* GPIO may loose value in sleep */
 
 	/* Connection label */
 	const char		*label;
diff --git a/include/dt-bindings/gpio/gpio.h b/include/dt-bindings/gpio/gpio.h
index b4f54da694eb..c5074584561d 100644
--- a/include/dt-bindings/gpio/gpio.h
+++ b/include/dt-bindings/gpio/gpio.h
@@ -28,4 +28,8 @@
 #define GPIO_OPEN_DRAIN (GPIO_SINGLE_ENDED | GPIO_LINE_OPEN_DRAIN)
 #define GPIO_OPEN_SOURCE (GPIO_SINGLE_ENDED | GPIO_LINE_OPEN_SOURCE)
 
+/* Bit 3 express GPIO suspend/resume persistence */
+#define GPIO_SLEEP_MAINTAIN_VALUE 0
+#define GPIO_SLEEP_MAY_LOOSE_VALUE 8
+
 #endif
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 393582867afd..af20369ec8e7 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -213,6 +213,9 @@ bool gpiochip_line_is_irq(struct gpio_chip *chip, unsigned int offset);
 bool gpiochip_line_is_open_drain(struct gpio_chip *chip, unsigned int offset);
 bool gpiochip_line_is_open_source(struct gpio_chip *chip, unsigned int offset);
 
+/* Sleep persistence inquiry for drivers */
+bool gpiochip_line_is_persistent(struct gpio_chip *chip, unsigned int offset);
+
 /* get driver data */
 void *gpiochip_get_data(struct gpio_chip *chip);
 
diff --git a/include/linux/gpio/machine.h b/include/linux/gpio/machine.h
index c0d712d22b07..13adadf53c09 100644
--- a/include/linux/gpio/machine.h
+++ b/include/linux/gpio/machine.h
@@ -9,6 +9,8 @@ enum gpio_lookup_flags {
 	GPIO_ACTIVE_LOW = (1 << 0),
 	GPIO_OPEN_DRAIN = (1 << 1),
 	GPIO_OPEN_SOURCE = (1 << 2),
+	GPIO_SLEEP_MAINTAIN_VALUE = (0 << 3),
+	GPIO_SLEEP_MAY_LOOSE_VALUE = (1 << 3),
 };
 
 /**
diff --git a/include/linux/of_gpio.h b/include/linux/of_gpio.h
index 1e089d5a182b..ca10f43564de 100644
--- a/include/linux/of_gpio.h
+++ b/include/linux/of_gpio.h
@@ -31,6 +31,7 @@ enum of_gpio_flags {
 	OF_GPIO_ACTIVE_LOW = 0x1,
 	OF_GPIO_SINGLE_ENDED = 0x2,
 	OF_GPIO_OPEN_DRAIN = 0x4,
+	OF_GPIO_SLEEP_MAY_LOOSE_VALUE = 0x8,
 };
 
 #ifdef CONFIG_OF_GPIO
-- 
cgit v1.2.3


From 25e3ef894eef419ee239da42edc6c1f8a4f1cfb5 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 23 May 2017 20:03:24 +0300
Subject: gpio: acpi: Split out acpi_gpio_get_irq_resource() helper

The helper does retrieve pointer to struct acpi_resource_gpio from
struct acpi_resource if it represents GpioInt() resource.

It will be used by PNP code later on.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib-acpi.c | 23 ++++++++++++++++++-----
 include/linux/acpi.h        |  7 +++++++
 2 files changed, 25 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c
index e431222edc2b..6bea176b066c 100644
--- a/drivers/gpio/gpiolib-acpi.c
+++ b/drivers/gpio/gpiolib-acpi.c
@@ -165,6 +165,23 @@ static void acpi_gpio_chip_dh(acpi_handle handle, void *data)
 	/* The address of this function is used as a key. */
 }
 
+bool acpi_gpio_get_irq_resource(struct acpi_resource *ares,
+				struct acpi_resource_gpio **agpio)
+{
+	struct acpi_resource_gpio *gpio;
+
+	if (ares->type != ACPI_RESOURCE_TYPE_GPIO)
+		return false;
+
+	gpio = &ares->data.gpio;
+	if (gpio->connection_type != ACPI_RESOURCE_GPIO_TYPE_INT)
+		return false;
+
+	*agpio = gpio;
+	return true;
+}
+EXPORT_SYMBOL_GPL(acpi_gpio_get_irq_resource);
+
 static acpi_status acpi_gpiochip_request_interrupt(struct acpi_resource *ares,
 						   void *context)
 {
@@ -178,11 +195,7 @@ static acpi_status acpi_gpiochip_request_interrupt(struct acpi_resource *ares,
 	unsigned long irqflags;
 	int ret, pin, irq;
 
-	if (ares->type != ACPI_RESOURCE_TYPE_GPIO)
-		return AE_OK;
-
-	agpio = &ares->data.gpio;
-	if (agpio->connection_type != ACPI_RESOURCE_GPIO_TYPE_INT)
+	if (!acpi_gpio_get_irq_resource(ares, &agpio))
 		return AE_OK;
 
 	handle = ACPI_HANDLE(chip->parent);
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 137e4a3d89c5..d5aa3c42f64d 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -964,6 +964,8 @@ int devm_acpi_dev_add_driver_gpios(struct device *dev,
 				   const struct acpi_gpio_mapping *gpios);
 void devm_acpi_dev_remove_driver_gpios(struct device *dev);
 
+bool acpi_gpio_get_irq_resource(struct acpi_resource *ares,
+				struct acpi_resource_gpio **agpio);
 int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index);
 #else
 static inline int acpi_dev_add_driver_gpios(struct acpi_device *adev,
@@ -980,6 +982,11 @@ static inline int devm_acpi_dev_add_driver_gpios(struct device *dev,
 }
 static inline void devm_acpi_dev_remove_driver_gpios(struct device *dev) {}
 
+static inline bool acpi_gpio_get_irq_resource(struct acpi_resource *ares,
+					      struct acpi_resource_gpio **agpio)
+{
+	return false;
+}
 static inline int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index)
 {
 	return -ENXIO;
-- 
cgit v1.2.3


From c1a4634013141b96324c647b45356e16f1fff781 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Sun, 21 May 2017 23:57:27 +0200
Subject: gpio: adp5588: move header file out of I2C realm

include/linux/i2c is not for client devices. Move the header file to a
more appropriate location.

Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 arch/blackfin/mach-bf537/boards/stamp.c |   2 +-
 drivers/gpio/gpio-adp5588.c             |   2 +-
 drivers/input/keyboard/adp5588-keys.c   |   2 +-
 include/linux/i2c/adp5588.h             | 172 --------------------------------
 include/linux/platform_data/adp5588.h   | 172 ++++++++++++++++++++++++++++++++
 5 files changed, 175 insertions(+), 175 deletions(-)
 delete mode 100644 include/linux/i2c/adp5588.h
 create mode 100644 include/linux/platform_data/adp5588.h

(limited to 'include/linux')

diff --git a/arch/blackfin/mach-bf537/boards/stamp.c b/arch/blackfin/mach-bf537/boards/stamp.c
index eaec7b4832a2..24985e658c19 100644
--- a/arch/blackfin/mach-bf537/boards/stamp.c
+++ b/arch/blackfin/mach-bf537/boards/stamp.c
@@ -22,7 +22,7 @@
 #include <linux/usb/isp1362.h>
 #endif
 #include <linux/i2c.h>
-#include <linux/i2c/adp5588.h>
+#include <linux/platform_data/adp5588.h>
 #include <linux/etherdevice.h>
 #include <linux/ata_platform.h>
 #include <linux/irq.h>
diff --git a/drivers/gpio/gpio-adp5588.c b/drivers/gpio/gpio-adp5588.c
index c0f718b12317..e717f8dc3966 100644
--- a/drivers/gpio/gpio-adp5588.c
+++ b/drivers/gpio/gpio-adp5588.c
@@ -16,7 +16,7 @@
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 
-#include <linux/i2c/adp5588.h>
+#include <linux/platform_data/adp5588.h>
 
 #define DRV_NAME	"adp5588-gpio"
 
diff --git a/drivers/input/keyboard/adp5588-keys.c b/drivers/input/keyboard/adp5588-keys.c
index 53fe9a3fb620..f9d273c8b306 100644
--- a/drivers/input/keyboard/adp5588-keys.c
+++ b/drivers/input/keyboard/adp5588-keys.c
@@ -20,7 +20,7 @@
 #include <linux/gpio.h>
 #include <linux/slab.h>
 
-#include <linux/i2c/adp5588.h>
+#include <linux/platform_data/adp5588.h>
 
 /* Key Event Register xy */
 #define KEY_EV_PRESSED		(1 << 7)
diff --git a/include/linux/i2c/adp5588.h b/include/linux/i2c/adp5588.h
deleted file mode 100644
index c2153049cfbd..000000000000
--- a/include/linux/i2c/adp5588.h
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
- * Analog Devices ADP5588 I/O Expander and QWERTY Keypad Controller
- *
- * Copyright 2009-2010 Analog Devices Inc.
- *
- * Licensed under the GPL-2 or later.
- */
-
-#ifndef _ADP5588_H
-#define _ADP5588_H
-
-#define DEV_ID 0x00		/* Device ID */
-#define CFG 0x01		/* Configuration Register1 */
-#define INT_STAT 0x02		/* Interrupt Status Register */
-#define KEY_LCK_EC_STAT 0x03	/* Key Lock and Event Counter Register */
-#define Key_EVENTA 0x04		/* Key Event Register A */
-#define Key_EVENTB 0x05		/* Key Event Register B */
-#define Key_EVENTC 0x06		/* Key Event Register C */
-#define Key_EVENTD 0x07		/* Key Event Register D */
-#define Key_EVENTE 0x08		/* Key Event Register E */
-#define Key_EVENTF 0x09		/* Key Event Register F */
-#define Key_EVENTG 0x0A		/* Key Event Register G */
-#define Key_EVENTH 0x0B		/* Key Event Register H */
-#define Key_EVENTI 0x0C		/* Key Event Register I */
-#define Key_EVENTJ 0x0D		/* Key Event Register J */
-#define KP_LCK_TMR 0x0E		/* Keypad Lock1 to Lock2 Timer */
-#define UNLOCK1 0x0F		/* Unlock Key1 */
-#define UNLOCK2 0x10		/* Unlock Key2 */
-#define GPIO_INT_STAT1 0x11	/* GPIO Interrupt Status */
-#define GPIO_INT_STAT2 0x12	/* GPIO Interrupt Status */
-#define GPIO_INT_STAT3 0x13	/* GPIO Interrupt Status */
-#define GPIO_DAT_STAT1 0x14	/* GPIO Data Status, Read twice to clear */
-#define GPIO_DAT_STAT2 0x15	/* GPIO Data Status, Read twice to clear */
-#define GPIO_DAT_STAT3 0x16	/* GPIO Data Status, Read twice to clear */
-#define GPIO_DAT_OUT1 0x17	/* GPIO DATA OUT */
-#define GPIO_DAT_OUT2 0x18	/* GPIO DATA OUT */
-#define GPIO_DAT_OUT3 0x19	/* GPIO DATA OUT */
-#define GPIO_INT_EN1 0x1A	/* GPIO Interrupt Enable */
-#define GPIO_INT_EN2 0x1B	/* GPIO Interrupt Enable */
-#define GPIO_INT_EN3 0x1C	/* GPIO Interrupt Enable */
-#define KP_GPIO1 0x1D		/* Keypad or GPIO Selection */
-#define KP_GPIO2 0x1E		/* Keypad or GPIO Selection */
-#define KP_GPIO3 0x1F		/* Keypad or GPIO Selection */
-#define GPI_EM1 0x20		/* GPI Event Mode 1 */
-#define GPI_EM2 0x21		/* GPI Event Mode 2 */
-#define GPI_EM3 0x22		/* GPI Event Mode 3 */
-#define GPIO_DIR1 0x23		/* GPIO Data Direction */
-#define GPIO_DIR2 0x24		/* GPIO Data Direction */
-#define GPIO_DIR3 0x25		/* GPIO Data Direction */
-#define GPIO_INT_LVL1 0x26	/* GPIO Edge/Level Detect */
-#define GPIO_INT_LVL2 0x27	/* GPIO Edge/Level Detect */
-#define GPIO_INT_LVL3 0x28	/* GPIO Edge/Level Detect */
-#define Debounce_DIS1 0x29	/* Debounce Disable */
-#define Debounce_DIS2 0x2A	/* Debounce Disable */
-#define Debounce_DIS3 0x2B	/* Debounce Disable */
-#define GPIO_PULL1 0x2C		/* GPIO Pull Disable */
-#define GPIO_PULL2 0x2D		/* GPIO Pull Disable */
-#define GPIO_PULL3 0x2E		/* GPIO Pull Disable */
-#define CMP_CFG_STAT 0x30	/* Comparator Configuration and Status Register */
-#define CMP_CONFG_SENS1 0x31	/* Sensor1 Comparator Configuration Register */
-#define CMP_CONFG_SENS2 0x32	/* L2 Light Sensor Reference Level, Output Falling for Sensor 1 */
-#define CMP1_LVL2_TRIP 0x33	/* L2 Light Sensor Hysteresis (Active when Output Rising) for Sensor 1 */
-#define CMP1_LVL2_HYS 0x34	/* L3 Light Sensor Reference Level, Output Falling For Sensor 1 */
-#define CMP1_LVL3_TRIP 0x35	/* L3 Light Sensor Hysteresis (Active when Output Rising) For Sensor 1 */
-#define CMP1_LVL3_HYS 0x36	/* Sensor 2 Comparator Configuration Register */
-#define CMP2_LVL2_TRIP 0x37	/* L2 Light Sensor Reference Level, Output Falling for Sensor 2 */
-#define CMP2_LVL2_HYS 0x38	/* L2 Light Sensor Hysteresis (Active when Output Rising) for Sensor 2 */
-#define CMP2_LVL3_TRIP 0x39	/* L3 Light Sensor Reference Level, Output Falling For Sensor 2 */
-#define CMP2_LVL3_HYS 0x3A	/* L3 Light Sensor Hysteresis (Active when Output Rising) For Sensor 2 */
-#define CMP1_ADC_DAT_R1 0x3B	/* Comparator 1 ADC data Register1 */
-#define CMP1_ADC_DAT_R2 0x3C	/* Comparator 1 ADC data Register2 */
-#define CMP2_ADC_DAT_R1 0x3D	/* Comparator 2 ADC data Register1 */
-#define CMP2_ADC_DAT_R2 0x3E	/* Comparator 2 ADC data Register2 */
-
-#define ADP5588_DEVICE_ID_MASK	0xF
-
- /* Configuration Register1 */
-#define ADP5588_AUTO_INC	(1 << 7)
-#define ADP5588_GPIEM_CFG	(1 << 6)
-#define ADP5588_OVR_FLOW_M	(1 << 5)
-#define ADP5588_INT_CFG		(1 << 4)
-#define ADP5588_OVR_FLOW_IEN	(1 << 3)
-#define ADP5588_K_LCK_IM	(1 << 2)
-#define ADP5588_GPI_IEN		(1 << 1)
-#define ADP5588_KE_IEN		(1 << 0)
-
-/* Interrupt Status Register */
-#define ADP5588_CMP2_INT	(1 << 5)
-#define ADP5588_CMP1_INT	(1 << 4)
-#define ADP5588_OVR_FLOW_INT	(1 << 3)
-#define ADP5588_K_LCK_INT	(1 << 2)
-#define ADP5588_GPI_INT		(1 << 1)
-#define ADP5588_KE_INT		(1 << 0)
-
-/* Key Lock and Event Counter Register */
-#define ADP5588_K_LCK_EN	(1 << 6)
-#define ADP5588_LCK21		0x30
-#define ADP5588_KEC		0xF
-
-#define ADP5588_MAXGPIO		18
-#define ADP5588_BANK(offs)	((offs) >> 3)
-#define ADP5588_BIT(offs)	(1u << ((offs) & 0x7))
-
-/* Put one of these structures in i2c_board_info platform_data */
-
-#define ADP5588_KEYMAPSIZE	80
-
-#define GPI_PIN_ROW0 97
-#define GPI_PIN_ROW1 98
-#define GPI_PIN_ROW2 99
-#define GPI_PIN_ROW3 100
-#define GPI_PIN_ROW4 101
-#define GPI_PIN_ROW5 102
-#define GPI_PIN_ROW6 103
-#define GPI_PIN_ROW7 104
-#define GPI_PIN_COL0 105
-#define GPI_PIN_COL1 106
-#define GPI_PIN_COL2 107
-#define GPI_PIN_COL3 108
-#define GPI_PIN_COL4 109
-#define GPI_PIN_COL5 110
-#define GPI_PIN_COL6 111
-#define GPI_PIN_COL7 112
-#define GPI_PIN_COL8 113
-#define GPI_PIN_COL9 114
-
-#define GPI_PIN_ROW_BASE GPI_PIN_ROW0
-#define GPI_PIN_ROW_END GPI_PIN_ROW7
-#define GPI_PIN_COL_BASE GPI_PIN_COL0
-#define GPI_PIN_COL_END GPI_PIN_COL9
-
-#define GPI_PIN_BASE GPI_PIN_ROW_BASE
-#define GPI_PIN_END GPI_PIN_COL_END
-
-#define ADP5588_GPIMAPSIZE_MAX (GPI_PIN_END - GPI_PIN_BASE + 1)
-
-struct adp5588_gpi_map {
-	unsigned short pin;
-	unsigned short sw_evt;
-};
-
-struct adp5588_kpad_platform_data {
-	int rows;			/* Number of rows */
-	int cols;			/* Number of columns */
-	const unsigned short *keymap;	/* Pointer to keymap */
-	unsigned short keymapsize;	/* Keymap size */
-	unsigned repeat:1;		/* Enable key repeat */
-	unsigned en_keylock:1;		/* Enable Key Lock feature */
-	unsigned short unlock_key1;	/* Unlock Key 1 */
-	unsigned short unlock_key2;	/* Unlock Key 2 */
-	const struct adp5588_gpi_map *gpimap;
-	unsigned short gpimapsize;
-	const struct adp5588_gpio_platform_data *gpio_data;
-};
-
-struct i2c_client; /* forward declaration */
-
-struct adp5588_gpio_platform_data {
-	int gpio_start;		/* GPIO Chip base # */
-	const char *const *names;
-	unsigned irq_base;	/* interrupt base # */
-	unsigned pullup_dis_mask; /* Pull-Up Disable Mask */
-	int	(*setup)(struct i2c_client *client,
-				unsigned gpio, unsigned ngpio,
-				void *context);
-	int	(*teardown)(struct i2c_client *client,
-				unsigned gpio, unsigned ngpio,
-				void *context);
-	void	*context;
-};
-
-#endif
diff --git a/include/linux/platform_data/adp5588.h b/include/linux/platform_data/adp5588.h
new file mode 100644
index 000000000000..c2153049cfbd
--- /dev/null
+++ b/include/linux/platform_data/adp5588.h
@@ -0,0 +1,172 @@
+/*
+ * Analog Devices ADP5588 I/O Expander and QWERTY Keypad Controller
+ *
+ * Copyright 2009-2010 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#ifndef _ADP5588_H
+#define _ADP5588_H
+
+#define DEV_ID 0x00		/* Device ID */
+#define CFG 0x01		/* Configuration Register1 */
+#define INT_STAT 0x02		/* Interrupt Status Register */
+#define KEY_LCK_EC_STAT 0x03	/* Key Lock and Event Counter Register */
+#define Key_EVENTA 0x04		/* Key Event Register A */
+#define Key_EVENTB 0x05		/* Key Event Register B */
+#define Key_EVENTC 0x06		/* Key Event Register C */
+#define Key_EVENTD 0x07		/* Key Event Register D */
+#define Key_EVENTE 0x08		/* Key Event Register E */
+#define Key_EVENTF 0x09		/* Key Event Register F */
+#define Key_EVENTG 0x0A		/* Key Event Register G */
+#define Key_EVENTH 0x0B		/* Key Event Register H */
+#define Key_EVENTI 0x0C		/* Key Event Register I */
+#define Key_EVENTJ 0x0D		/* Key Event Register J */
+#define KP_LCK_TMR 0x0E		/* Keypad Lock1 to Lock2 Timer */
+#define UNLOCK1 0x0F		/* Unlock Key1 */
+#define UNLOCK2 0x10		/* Unlock Key2 */
+#define GPIO_INT_STAT1 0x11	/* GPIO Interrupt Status */
+#define GPIO_INT_STAT2 0x12	/* GPIO Interrupt Status */
+#define GPIO_INT_STAT3 0x13	/* GPIO Interrupt Status */
+#define GPIO_DAT_STAT1 0x14	/* GPIO Data Status, Read twice to clear */
+#define GPIO_DAT_STAT2 0x15	/* GPIO Data Status, Read twice to clear */
+#define GPIO_DAT_STAT3 0x16	/* GPIO Data Status, Read twice to clear */
+#define GPIO_DAT_OUT1 0x17	/* GPIO DATA OUT */
+#define GPIO_DAT_OUT2 0x18	/* GPIO DATA OUT */
+#define GPIO_DAT_OUT3 0x19	/* GPIO DATA OUT */
+#define GPIO_INT_EN1 0x1A	/* GPIO Interrupt Enable */
+#define GPIO_INT_EN2 0x1B	/* GPIO Interrupt Enable */
+#define GPIO_INT_EN3 0x1C	/* GPIO Interrupt Enable */
+#define KP_GPIO1 0x1D		/* Keypad or GPIO Selection */
+#define KP_GPIO2 0x1E		/* Keypad or GPIO Selection */
+#define KP_GPIO3 0x1F		/* Keypad or GPIO Selection */
+#define GPI_EM1 0x20		/* GPI Event Mode 1 */
+#define GPI_EM2 0x21		/* GPI Event Mode 2 */
+#define GPI_EM3 0x22		/* GPI Event Mode 3 */
+#define GPIO_DIR1 0x23		/* GPIO Data Direction */
+#define GPIO_DIR2 0x24		/* GPIO Data Direction */
+#define GPIO_DIR3 0x25		/* GPIO Data Direction */
+#define GPIO_INT_LVL1 0x26	/* GPIO Edge/Level Detect */
+#define GPIO_INT_LVL2 0x27	/* GPIO Edge/Level Detect */
+#define GPIO_INT_LVL3 0x28	/* GPIO Edge/Level Detect */
+#define Debounce_DIS1 0x29	/* Debounce Disable */
+#define Debounce_DIS2 0x2A	/* Debounce Disable */
+#define Debounce_DIS3 0x2B	/* Debounce Disable */
+#define GPIO_PULL1 0x2C		/* GPIO Pull Disable */
+#define GPIO_PULL2 0x2D		/* GPIO Pull Disable */
+#define GPIO_PULL3 0x2E		/* GPIO Pull Disable */
+#define CMP_CFG_STAT 0x30	/* Comparator Configuration and Status Register */
+#define CMP_CONFG_SENS1 0x31	/* Sensor1 Comparator Configuration Register */
+#define CMP_CONFG_SENS2 0x32	/* L2 Light Sensor Reference Level, Output Falling for Sensor 1 */
+#define CMP1_LVL2_TRIP 0x33	/* L2 Light Sensor Hysteresis (Active when Output Rising) for Sensor 1 */
+#define CMP1_LVL2_HYS 0x34	/* L3 Light Sensor Reference Level, Output Falling For Sensor 1 */
+#define CMP1_LVL3_TRIP 0x35	/* L3 Light Sensor Hysteresis (Active when Output Rising) For Sensor 1 */
+#define CMP1_LVL3_HYS 0x36	/* Sensor 2 Comparator Configuration Register */
+#define CMP2_LVL2_TRIP 0x37	/* L2 Light Sensor Reference Level, Output Falling for Sensor 2 */
+#define CMP2_LVL2_HYS 0x38	/* L2 Light Sensor Hysteresis (Active when Output Rising) for Sensor 2 */
+#define CMP2_LVL3_TRIP 0x39	/* L3 Light Sensor Reference Level, Output Falling For Sensor 2 */
+#define CMP2_LVL3_HYS 0x3A	/* L3 Light Sensor Hysteresis (Active when Output Rising) For Sensor 2 */
+#define CMP1_ADC_DAT_R1 0x3B	/* Comparator 1 ADC data Register1 */
+#define CMP1_ADC_DAT_R2 0x3C	/* Comparator 1 ADC data Register2 */
+#define CMP2_ADC_DAT_R1 0x3D	/* Comparator 2 ADC data Register1 */
+#define CMP2_ADC_DAT_R2 0x3E	/* Comparator 2 ADC data Register2 */
+
+#define ADP5588_DEVICE_ID_MASK	0xF
+
+ /* Configuration Register1 */
+#define ADP5588_AUTO_INC	(1 << 7)
+#define ADP5588_GPIEM_CFG	(1 << 6)
+#define ADP5588_OVR_FLOW_M	(1 << 5)
+#define ADP5588_INT_CFG		(1 << 4)
+#define ADP5588_OVR_FLOW_IEN	(1 << 3)
+#define ADP5588_K_LCK_IM	(1 << 2)
+#define ADP5588_GPI_IEN		(1 << 1)
+#define ADP5588_KE_IEN		(1 << 0)
+
+/* Interrupt Status Register */
+#define ADP5588_CMP2_INT	(1 << 5)
+#define ADP5588_CMP1_INT	(1 << 4)
+#define ADP5588_OVR_FLOW_INT	(1 << 3)
+#define ADP5588_K_LCK_INT	(1 << 2)
+#define ADP5588_GPI_INT		(1 << 1)
+#define ADP5588_KE_INT		(1 << 0)
+
+/* Key Lock and Event Counter Register */
+#define ADP5588_K_LCK_EN	(1 << 6)
+#define ADP5588_LCK21		0x30
+#define ADP5588_KEC		0xF
+
+#define ADP5588_MAXGPIO		18
+#define ADP5588_BANK(offs)	((offs) >> 3)
+#define ADP5588_BIT(offs)	(1u << ((offs) & 0x7))
+
+/* Put one of these structures in i2c_board_info platform_data */
+
+#define ADP5588_KEYMAPSIZE	80
+
+#define GPI_PIN_ROW0 97
+#define GPI_PIN_ROW1 98
+#define GPI_PIN_ROW2 99
+#define GPI_PIN_ROW3 100
+#define GPI_PIN_ROW4 101
+#define GPI_PIN_ROW5 102
+#define GPI_PIN_ROW6 103
+#define GPI_PIN_ROW7 104
+#define GPI_PIN_COL0 105
+#define GPI_PIN_COL1 106
+#define GPI_PIN_COL2 107
+#define GPI_PIN_COL3 108
+#define GPI_PIN_COL4 109
+#define GPI_PIN_COL5 110
+#define GPI_PIN_COL6 111
+#define GPI_PIN_COL7 112
+#define GPI_PIN_COL8 113
+#define GPI_PIN_COL9 114
+
+#define GPI_PIN_ROW_BASE GPI_PIN_ROW0
+#define GPI_PIN_ROW_END GPI_PIN_ROW7
+#define GPI_PIN_COL_BASE GPI_PIN_COL0
+#define GPI_PIN_COL_END GPI_PIN_COL9
+
+#define GPI_PIN_BASE GPI_PIN_ROW_BASE
+#define GPI_PIN_END GPI_PIN_COL_END
+
+#define ADP5588_GPIMAPSIZE_MAX (GPI_PIN_END - GPI_PIN_BASE + 1)
+
+struct adp5588_gpi_map {
+	unsigned short pin;
+	unsigned short sw_evt;
+};
+
+struct adp5588_kpad_platform_data {
+	int rows;			/* Number of rows */
+	int cols;			/* Number of columns */
+	const unsigned short *keymap;	/* Pointer to keymap */
+	unsigned short keymapsize;	/* Keymap size */
+	unsigned repeat:1;		/* Enable key repeat */
+	unsigned en_keylock:1;		/* Enable Key Lock feature */
+	unsigned short unlock_key1;	/* Unlock Key 1 */
+	unsigned short unlock_key2;	/* Unlock Key 2 */
+	const struct adp5588_gpi_map *gpimap;
+	unsigned short gpimapsize;
+	const struct adp5588_gpio_platform_data *gpio_data;
+};
+
+struct i2c_client; /* forward declaration */
+
+struct adp5588_gpio_platform_data {
+	int gpio_start;		/* GPIO Chip base # */
+	const char *const *names;
+	unsigned irq_base;	/* interrupt base # */
+	unsigned pullup_dis_mask; /* Pull-Up Disable Mask */
+	int	(*setup)(struct i2c_client *client,
+				unsigned gpio, unsigned ngpio,
+				void *context);
+	int	(*teardown)(struct i2c_client *client,
+				unsigned gpio, unsigned ngpio,
+				void *context);
+	void	*context;
+};
+
+#endif
-- 
cgit v1.2.3


From 79ffd5f98c11572c004d52f7ecd270ab680a7f72 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Fri, 26 May 2017 16:55:10 -0700
Subject: Input: sparse-keymap - remove sparse_keymap_free()

Now that all users of sparse_keymap_free() are gone we can remove the stub.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/sparse-keymap.c       | 14 --------------
 include/linux/input/sparse-keymap.h |  1 -
 2 files changed, 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/sparse-keymap.c b/drivers/input/sparse-keymap.c
index 12a3ad83296d..bb0349fa64bc 100644
--- a/drivers/input/sparse-keymap.c
+++ b/drivers/input/sparse-keymap.c
@@ -223,20 +223,6 @@ int sparse_keymap_setup(struct input_dev *dev,
 }
 EXPORT_SYMBOL(sparse_keymap_setup);
 
-/**
- * sparse_keymap_free - free memory allocated for sparse keymap
- * @dev: Input device using sparse keymap
- *
- * This function used to free memory allocated by sparse keymap
- * in an input device that was set up by sparse_keymap_setup().
- * Since sparse_keymap_setup() now uses a managed allocation for the
- * keymap copy, use of this function is deprecated.
- */
-void sparse_keymap_free(struct input_dev *dev)
-{
-}
-EXPORT_SYMBOL(sparse_keymap_free);
-
 /**
  * sparse_keymap_report_entry - report event corresponding to given key entry
  * @dev: Input device for which event should be reported
diff --git a/include/linux/input/sparse-keymap.h b/include/linux/input/sparse-keymap.h
index 52db62064c6e..c7346e33d958 100644
--- a/include/linux/input/sparse-keymap.h
+++ b/include/linux/input/sparse-keymap.h
@@ -51,7 +51,6 @@ struct key_entry *sparse_keymap_entry_from_keycode(struct input_dev *dev,
 int sparse_keymap_setup(struct input_dev *dev,
 			const struct key_entry *keymap,
 			int (*setup)(struct input_dev *, struct key_entry *));
-void sparse_keymap_free(struct input_dev *dev);
 
 void sparse_keymap_report_entry(struct input_dev *dev, const struct key_entry *ke,
 				unsigned int value, bool autorelease);
-- 
cgit v1.2.3


From 4a78cc644eed3cf2dae00c3a959910a86c140fd6 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Fri, 26 May 2017 17:10:15 +0200
Subject: mtd: nand: Make sure drivers not supporting SET/GET_FEATURES return
 -ENOTSUPP

A lot of drivers are providing their own ->cmdfunc(), and most of the
time this implementation does not support all possible NAND operations.
But since ->cmdfunc() cannot return an error code, the core has no way
to know that the operation it requested is not supported.

This is a problem we cannot address for all kind of operations with the
current design, but we can prevent these silent failures for the
GET/SET FEATURES operation by overloading the default
->onfi_{set,get}_features() methods with one returning -ENOTSUPP.

Reported-by: Chris Packham <Chris.Packham@alliedtelesis.co.nz>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Tested-by: Chris Packham <Chris.Packham@alliedtelesis.co.nz>
---
 drivers/mtd/nand/bcm47xxnflash/ops_bcm4706.c  |  2 ++
 drivers/mtd/nand/cafe_nand.c                  |  2 ++
 drivers/mtd/nand/denali.c                     |  2 ++
 drivers/mtd/nand/docg4.c                      |  2 ++
 drivers/mtd/nand/fsl_elbc_nand.c              |  2 ++
 drivers/mtd/nand/fsl_ifc_nand.c               |  2 ++
 drivers/mtd/nand/hisi504_nand.c               |  2 ++
 drivers/mtd/nand/mpc5121_nfc.c                |  2 ++
 drivers/mtd/nand/nand_base.c                  | 19 +++++++++++++++++++
 drivers/mtd/nand/pxa3xx_nand.c                |  2 ++
 drivers/mtd/nand/qcom_nandc.c                 |  2 ++
 drivers/mtd/nand/sh_flctl.c                   |  2 ++
 drivers/mtd/nand/vf610_nfc.c                  |  2 ++
 drivers/staging/mt29f_spinand/mt29f_spinand.c |  2 ++
 include/linux/mtd/nand.h                      |  5 +++++
 15 files changed, 50 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/bcm47xxnflash/ops_bcm4706.c b/drivers/mtd/nand/bcm47xxnflash/ops_bcm4706.c
index f1da4ea88f2c..54bac5b73f0a 100644
--- a/drivers/mtd/nand/bcm47xxnflash/ops_bcm4706.c
+++ b/drivers/mtd/nand/bcm47xxnflash/ops_bcm4706.c
@@ -392,6 +392,8 @@ int bcm47xxnflash_ops_bcm4706_init(struct bcm47xxnflash *b47n)
 	b47n->nand_chip.read_byte = bcm47xxnflash_ops_bcm4706_read_byte;
 	b47n->nand_chip.read_buf = bcm47xxnflash_ops_bcm4706_read_buf;
 	b47n->nand_chip.write_buf = bcm47xxnflash_ops_bcm4706_write_buf;
+	b47n->nand_chip.onfi_set_features = nand_onfi_get_set_features_notsupp;
+	b47n->nand_chip.onfi_get_features = nand_onfi_get_set_features_notsupp;
 
 	nand_chip->chip_delay = 50;
 	b47n->nand_chip.bbt_options = NAND_BBT_USE_FLASH;
diff --git a/drivers/mtd/nand/cafe_nand.c b/drivers/mtd/nand/cafe_nand.c
index d40c32d311d8..2fd733eba0a3 100644
--- a/drivers/mtd/nand/cafe_nand.c
+++ b/drivers/mtd/nand/cafe_nand.c
@@ -654,6 +654,8 @@ static int cafe_nand_probe(struct pci_dev *pdev,
 	cafe->nand.read_buf = cafe_read_buf;
 	cafe->nand.write_buf = cafe_write_buf;
 	cafe->nand.select_chip = cafe_select_chip;
+	cafe->nand.onfi_set_features = nand_onfi_get_set_features_notsupp;
+	cafe->nand.onfi_get_features = nand_onfi_get_set_features_notsupp;
 
 	cafe->nand.chip_delay = 0;
 
diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c
index 16634df2e39a..b3c99d98fdee 100644
--- a/drivers/mtd/nand/denali.c
+++ b/drivers/mtd/nand/denali.c
@@ -1531,6 +1531,8 @@ int denali_init(struct denali_nand_info *denali)
 	chip->cmdfunc = denali_cmdfunc;
 	chip->read_byte = denali_read_byte;
 	chip->waitfunc = denali_waitfunc;
+	chip->onfi_set_features = nand_onfi_get_set_features_notsupp;
+	chip->onfi_get_features = nand_onfi_get_set_features_notsupp;
 
 	/*
 	 * scan for NAND devices attached to the controller
diff --git a/drivers/mtd/nand/docg4.c b/drivers/mtd/nand/docg4.c
index 7af2a3cd949e..a27a84fbfb84 100644
--- a/drivers/mtd/nand/docg4.c
+++ b/drivers/mtd/nand/docg4.c
@@ -1260,6 +1260,8 @@ static void __init init_mtd_structs(struct mtd_info *mtd)
 	nand->read_buf = docg4_read_buf;
 	nand->write_buf = docg4_write_buf16;
 	nand->erase = docg4_erase_block;
+	nand->onfi_set_features = nand_onfi_get_set_features_notsupp;
+	nand->onfi_get_features = nand_onfi_get_set_features_notsupp;
 	nand->ecc.read_page = docg4_read_page;
 	nand->ecc.write_page = docg4_write_page;
 	nand->ecc.read_page_raw = docg4_read_page_raw;
diff --git a/drivers/mtd/nand/fsl_elbc_nand.c b/drivers/mtd/nand/fsl_elbc_nand.c
index 113f76e59937..b9ac16f05057 100644
--- a/drivers/mtd/nand/fsl_elbc_nand.c
+++ b/drivers/mtd/nand/fsl_elbc_nand.c
@@ -775,6 +775,8 @@ static int fsl_elbc_chip_init(struct fsl_elbc_mtd *priv)
 	chip->select_chip = fsl_elbc_select_chip;
 	chip->cmdfunc = fsl_elbc_cmdfunc;
 	chip->waitfunc = fsl_elbc_wait;
+	chip->onfi_set_features = nand_onfi_get_set_features_notsupp;
+	chip->onfi_get_features = nand_onfi_get_set_features_notsupp;
 
 	chip->bbt_td = &bbt_main_descr;
 	chip->bbt_md = &bbt_mirror_descr;
diff --git a/drivers/mtd/nand/fsl_ifc_nand.c b/drivers/mtd/nand/fsl_ifc_nand.c
index d1570f512f0b..89e14daeaba6 100644
--- a/drivers/mtd/nand/fsl_ifc_nand.c
+++ b/drivers/mtd/nand/fsl_ifc_nand.c
@@ -831,6 +831,8 @@ static int fsl_ifc_chip_init(struct fsl_ifc_mtd *priv)
 	chip->select_chip = fsl_ifc_select_chip;
 	chip->cmdfunc = fsl_ifc_cmdfunc;
 	chip->waitfunc = fsl_ifc_wait;
+	chip->onfi_set_features = nand_onfi_get_set_features_notsupp;
+	chip->onfi_get_features = nand_onfi_get_set_features_notsupp;
 
 	chip->bbt_td = &bbt_main_descr;
 	chip->bbt_md = &bbt_mirror_descr;
diff --git a/drivers/mtd/nand/hisi504_nand.c b/drivers/mtd/nand/hisi504_nand.c
index e40364eeb556..530caa80b1b6 100644
--- a/drivers/mtd/nand/hisi504_nand.c
+++ b/drivers/mtd/nand/hisi504_nand.c
@@ -764,6 +764,8 @@ static int hisi_nfc_probe(struct platform_device *pdev)
 	chip->write_buf		= hisi_nfc_write_buf;
 	chip->read_buf		= hisi_nfc_read_buf;
 	chip->chip_delay	= HINFC504_CHIP_DELAY;
+	chip->onfi_set_features	= nand_onfi_get_set_features_notsupp;
+	chip->onfi_get_features	= nand_onfi_get_set_features_notsupp;
 
 	hisi_nfc_host_init(host);
 
diff --git a/drivers/mtd/nand/mpc5121_nfc.c b/drivers/mtd/nand/mpc5121_nfc.c
index 6d6eaed2d20c..0e86fb6277c3 100644
--- a/drivers/mtd/nand/mpc5121_nfc.c
+++ b/drivers/mtd/nand/mpc5121_nfc.c
@@ -708,6 +708,8 @@ static int mpc5121_nfc_probe(struct platform_device *op)
 	chip->read_buf = mpc5121_nfc_read_buf;
 	chip->write_buf = mpc5121_nfc_write_buf;
 	chip->select_chip = mpc5121_nfc_select_chip;
+	chip->onfi_set_features	= nand_onfi_get_set_features_notsupp;
+	chip->onfi_get_features	= nand_onfi_get_set_features_notsupp;
 	chip->bbt_options = NAND_BBT_USE_FLASH;
 	chip->ecc.mode = NAND_ECC_SOFT;
 	chip->ecc.algo = NAND_ECC_HAMMING;
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 0b3b1a88091a..ed08e3946727 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -3420,6 +3420,25 @@ static int nand_onfi_get_features(struct mtd_info *mtd, struct nand_chip *chip,
 	return 0;
 }
 
+/**
+ * nand_onfi_get_set_features_notsupp - set/get features stub returning
+ *					-ENOTSUPP
+ * @mtd: MTD device structure
+ * @chip: nand chip info structure
+ * @addr: feature address.
+ * @subfeature_param: the subfeature parameters, a four bytes array.
+ *
+ * Should be used by NAND controller drivers that do not support the SET/GET
+ * FEATURES operations.
+ */
+int nand_onfi_get_set_features_notsupp(struct mtd_info *mtd,
+				       struct nand_chip *chip, int addr,
+				       u8 *subfeature_param)
+{
+	return -ENOTSUPP;
+}
+EXPORT_SYMBOL(nand_onfi_get_set_features_notsupp);
+
 /**
  * nand_suspend - [MTD Interface] Suspend the NAND flash
  * @mtd: MTD device structure
diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c
index 649ba8200832..74dae4bbdac8 100644
--- a/drivers/mtd/nand/pxa3xx_nand.c
+++ b/drivers/mtd/nand/pxa3xx_nand.c
@@ -1812,6 +1812,8 @@ static int alloc_nand_resource(struct platform_device *pdev)
 		chip->write_buf		= pxa3xx_nand_write_buf;
 		chip->options		|= NAND_NO_SUBPAGE_WRITE;
 		chip->cmdfunc		= nand_cmdfunc;
+		chip->onfi_set_features	= nand_onfi_get_set_features_notsupp;
+		chip->onfi_get_features	= nand_onfi_get_set_features_notsupp;
 	}
 
 	nand_hw_control_init(chip->controller);
diff --git a/drivers/mtd/nand/qcom_nandc.c b/drivers/mtd/nand/qcom_nandc.c
index 57d483ac5765..88af7145a51a 100644
--- a/drivers/mtd/nand/qcom_nandc.c
+++ b/drivers/mtd/nand/qcom_nandc.c
@@ -2008,6 +2008,8 @@ static int qcom_nand_host_init(struct qcom_nand_controller *nandc,
 	chip->read_byte		= qcom_nandc_read_byte;
 	chip->read_buf		= qcom_nandc_read_buf;
 	chip->write_buf		= qcom_nandc_write_buf;
+	chip->onfi_set_features	= nand_onfi_get_set_features_notsupp;
+	chip->onfi_get_features	= nand_onfi_get_set_features_notsupp;
 
 	/*
 	 * the bad block marker is readable only when we read the last codeword
diff --git a/drivers/mtd/nand/sh_flctl.c b/drivers/mtd/nand/sh_flctl.c
index 442ce619b3b6..891ac7b99305 100644
--- a/drivers/mtd/nand/sh_flctl.c
+++ b/drivers/mtd/nand/sh_flctl.c
@@ -1183,6 +1183,8 @@ static int flctl_probe(struct platform_device *pdev)
 	nand->read_buf = flctl_read_buf;
 	nand->select_chip = flctl_select_chip;
 	nand->cmdfunc = flctl_cmdfunc;
+	nand->onfi_set_features = nand_onfi_get_set_features_notsupp;
+	nand->onfi_get_features = nand_onfi_get_set_features_notsupp;
 
 	if (pdata->flcmncr_val & SEL_16BIT)
 		nand->options |= NAND_BUSWIDTH_16;
diff --git a/drivers/mtd/nand/vf610_nfc.c b/drivers/mtd/nand/vf610_nfc.c
index 3ea4bb19e12d..744ab10e8962 100644
--- a/drivers/mtd/nand/vf610_nfc.c
+++ b/drivers/mtd/nand/vf610_nfc.c
@@ -703,6 +703,8 @@ static int vf610_nfc_probe(struct platform_device *pdev)
 	chip->read_buf = vf610_nfc_read_buf;
 	chip->write_buf = vf610_nfc_write_buf;
 	chip->select_chip = vf610_nfc_select_chip;
+	chip->onfi_set_features = nand_onfi_get_set_features_notsupp;
+	chip->onfi_get_features = nand_onfi_get_set_features_notsupp;
 
 	chip->options |= NAND_NO_SUBPAGE_WRITE;
 
diff --git a/drivers/staging/mt29f_spinand/mt29f_spinand.c b/drivers/staging/mt29f_spinand/mt29f_spinand.c
index e389009fca42..a4e3ae8f0c85 100644
--- a/drivers/staging/mt29f_spinand/mt29f_spinand.c
+++ b/drivers/staging/mt29f_spinand/mt29f_spinand.c
@@ -915,6 +915,8 @@ static int spinand_probe(struct spi_device *spi_nand)
 	chip->waitfunc	= spinand_wait;
 	chip->options	|= NAND_CACHEPRG;
 	chip->select_chip = spinand_select_chip;
+	chip->onfi_set_features = nand_onfi_get_set_features_notsupp;
+	chip->onfi_get_features = nand_onfi_get_set_features_notsupp;
 
 	mtd = nand_to_mtd(chip);
 
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 7a01d2eb7443..28f7dd9177e9 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -1259,6 +1259,11 @@ int nand_read_oob_std(struct mtd_info *mtd, struct nand_chip *chip, int page);
 int nand_read_oob_syndrome(struct mtd_info *mtd, struct nand_chip *chip,
 			   int page);
 
+/* Stub used by drivers that do not support GET/SET FEATURES operations */
+int nand_onfi_get_set_features_notsupp(struct mtd_info *mtd,
+				       struct nand_chip *chip, int addr,
+				       u8 *subfeature_param);
+
 /* Default read_page_raw implementation */
 int nand_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
 		       uint8_t *buf, int oob_required, int page);
-- 
cgit v1.2.3


From e45a79da863c199d7c47b1ee6d33cee23c89eac1 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 24 May 2017 09:07:47 +0200
Subject: skbuff/mac80211: introduce and use skb_put_zero()

This pattern was introduced a number of times in mac80211 just now,
and since it's present in a number of other places it makes sense
to add a little helper for it.

This just adds the helper and transforms the mac80211 code, a later
patch will transform other places.

Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/skbuff.h    | 9 +++++++++
 net/mac80211/mesh.c       | 9 +++------
 net/mac80211/mesh_plink.c | 3 +--
 3 files changed, 13 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index bfc7892f6c33..d92056b2da44 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1937,6 +1937,15 @@ static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len)
 	return tmp;
 }
 
+static inline unsigned char *skb_put_zero(struct sk_buff *skb, unsigned int len)
+{
+	unsigned char *tmp = skb_put(skb, len);
+
+	memset(tmp, 0, len);
+
+	return tmp;
+}
+
 unsigned char *skb_push(struct sk_buff *skb, unsigned int len);
 static inline unsigned char *__skb_push(struct sk_buff *skb, unsigned int len)
 {
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 3d6b3d674ca3..ad5d1cf39190 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -744,8 +744,7 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh)
 		int ie_len = 2 + sizeof(struct ieee80211_channel_sw_ie) +
 			     2 + sizeof(struct ieee80211_mesh_chansw_params_ie);
 
-		pos = skb_put(skb, ie_len);
-		memset(pos, 0, ie_len);
+		pos = skb_put_zero(skb, ie_len);
 		*pos++ = WLAN_EID_CHANNEL_SWITCH;
 		*pos++ = 3;
 		*pos++ = 0x0;
@@ -772,8 +771,7 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh)
 		switch (csa->settings.chandef.width) {
 		case NL80211_CHAN_WIDTH_40:
 			ie_len = 2 + sizeof(struct ieee80211_sec_chan_offs_ie);
-			pos = skb_put(skb, ie_len);
-			memset(pos, 0, ie_len);
+			pos = skb_put_zero(skb, ie_len);
 
 			*pos++ = WLAN_EID_SECONDARY_CHANNEL_OFFSET; /* EID */
 			*pos++ = 1;				    /* len */
@@ -789,8 +787,7 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh)
 			/* Channel Switch Wrapper + Wide Bandwidth CSA IE */
 			ie_len = 2 + 2 +
 				 sizeof(struct ieee80211_wide_bw_chansw_ie);
-			pos = skb_put(skb, ie_len);
-			memset(pos, 0, ie_len);
+			pos = skb_put_zero(skb, ie_len);
 
 			*pos++ = WLAN_EID_CHANNEL_SWITCH_WRAPPER; /* EID */
 			*pos++ = 5;				  /* len */
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 1131cd504a15..82cfd232a25e 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -264,8 +264,7 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
 		band = sband->band;
 
 		/* capability info */
-		pos = skb_put(skb, 2);
-		memset(pos, 0, 2);
+		pos = skb_put_zero(skb, 2);
 		if (action == WLAN_SP_MESH_PEERING_CONFIRM) {
 			/* AID */
 			pos = skb_put(skb, 2);
-- 
cgit v1.2.3


From 4b1c88984c8ac894c2c411570757bed7fa5f3226 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 18 May 2017 15:13:57 +0200
Subject: iommu/dma: Fix function declaration

Newly added code in the ipmmu-vmsa driver showed a small mistake
in a header file that can't be included by itself without CONFIG_IOMMU_DMA
enabled:

In file included from drivers/iommu/ipmmu-vmsa.c:13:0:
include/linux/dma-iommu.h:105:94: error: 'struct device' declared inside parameter list will not be visible outside of this definition or declaration [-Werror]

This adds a forward declaration for 'struct device', similar to how
we treat the other struct types in this case.

Fixes: 3ae47292024f ("iommu/ipmmu-vmsa: Add new IOMMU_DOMAIN_DMA ops")
Fixes: 273df9635385 ("iommu/dma: Make PCI window reservation generic")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/dma-iommu.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h
index 4eac2670bfa1..92f20832fd28 100644
--- a/include/linux/dma-iommu.h
+++ b/include/linux/dma-iommu.h
@@ -78,6 +78,7 @@ void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list);
 
 struct iommu_domain;
 struct msi_msg;
+struct device;
 
 static inline int iommu_dma_init(void)
 {
-- 
cgit v1.2.3


From 9ae287274817c032a4428fde84d1ab26d6b96761 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Sat, 27 May 2017 16:19:28 -0600
Subject: net: add extack arg to lwtunnel build state

Pass extack arg down to lwtunnel_build_state and the build_state callbacks.
Add messages for failures in lwtunnel_build_state, and add the extarg to
nla_parse where possible in the build_state callbacks.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h   | 10 ++++++++++
 include/net/lwtunnel.h    |  9 ++++++---
 net/core/lwt_bpf.c        |  5 +++--
 net/core/lwtunnel.c       | 20 +++++++++++++++++---
 net/ipv4/fib_lookup.h     |  3 ++-
 net/ipv4/fib_semantics.c  | 20 +++++++++++---------
 net/ipv4/fib_trie.c       |  2 +-
 net/ipv4/ip_tunnel_core.c | 11 +++++++----
 net/ipv6/ila/ila_lwt.c    |  5 +++--
 net/ipv6/route.c          |  2 +-
 net/ipv6/seg6_iptunnel.c  |  5 +++--
 net/mpls/mpls_iptunnel.c  |  5 +++--
 12 files changed, 67 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index a68aad484c69..8664fd26eb5d 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -102,6 +102,16 @@ struct netlink_ext_ack {
 		(extack)->bad_attr = (attr);		\
 } while (0)
 
+#define NL_SET_ERR_MSG_ATTR(extack, attr, msg) do {	\
+	static const char __msg[] = (msg);		\
+	struct netlink_ext_ack *__extack = (extack);	\
+							\
+	if (__extack) {					\
+		__extack->_msg = __msg;			\
+		__extack->bad_attr = (attr);		\
+	}						\
+} while (0)
+
 extern void netlink_kernel_release(struct sock *sk);
 extern int __netlink_change_ngroups(struct sock *sk, unsigned int groups);
 extern int netlink_change_ngroups(struct sock *sk, unsigned int groups);
diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h
index ca6f002774ef..7c26863b8cf4 100644
--- a/include/net/lwtunnel.h
+++ b/include/net/lwtunnel.h
@@ -35,7 +35,8 @@ struct lwtunnel_state {
 struct lwtunnel_encap_ops {
 	int (*build_state)(struct nlattr *encap,
 			   unsigned int family, const void *cfg,
-			   struct lwtunnel_state **ts);
+			   struct lwtunnel_state **ts,
+			   struct netlink_ext_ack *extack);
 	void (*destroy_state)(struct lwtunnel_state *lws);
 	int (*output)(struct net *net, struct sock *sk, struct sk_buff *skb);
 	int (*input)(struct sk_buff *skb);
@@ -114,7 +115,8 @@ int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int len,
 int lwtunnel_build_state(u16 encap_type,
 			 struct nlattr *encap,
 			 unsigned int family, const void *cfg,
-			 struct lwtunnel_state **lws);
+			 struct lwtunnel_state **lws,
+			 struct netlink_ext_ack *extack);
 int lwtunnel_fill_encap(struct sk_buff *skb,
 			struct lwtunnel_state *lwtstate);
 int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate);
@@ -192,7 +194,8 @@ static inline int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int len,
 static inline int lwtunnel_build_state(u16 encap_type,
 				       struct nlattr *encap,
 				       unsigned int family, const void *cfg,
-				       struct lwtunnel_state **lws)
+				       struct lwtunnel_state **lws,
+				       struct netlink_ext_ack *extack)
 {
 	return -EOPNOTSUPP;
 }
diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
index b3bc0a31af9f..1307731ddfe4 100644
--- a/net/core/lwt_bpf.c
+++ b/net/core/lwt_bpf.c
@@ -240,7 +240,8 @@ static const struct nla_policy bpf_nl_policy[LWT_BPF_MAX + 1] = {
 
 static int bpf_build_state(struct nlattr *nla,
 			   unsigned int family, const void *cfg,
-			   struct lwtunnel_state **ts)
+			   struct lwtunnel_state **ts,
+			   struct netlink_ext_ack *extack)
 {
 	struct nlattr *tb[LWT_BPF_MAX + 1];
 	struct lwtunnel_state *newts;
@@ -250,7 +251,7 @@ static int bpf_build_state(struct nlattr *nla,
 	if (family != AF_INET && family != AF_INET6)
 		return -EAFNOSUPPORT;
 
-	ret = nla_parse_nested(tb, LWT_BPF_MAX, nla, bpf_nl_policy, NULL);
+	ret = nla_parse_nested(tb, LWT_BPF_MAX, nla, bpf_nl_policy, extack);
 	if (ret < 0)
 		return ret;
 
diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c
index ab840386a74d..d9cb3532f1dd 100644
--- a/net/core/lwtunnel.c
+++ b/net/core/lwtunnel.c
@@ -103,25 +103,39 @@ EXPORT_SYMBOL(lwtunnel_encap_del_ops);
 
 int lwtunnel_build_state(u16 encap_type,
 			 struct nlattr *encap, unsigned int family,
-			 const void *cfg, struct lwtunnel_state **lws)
+			 const void *cfg, struct lwtunnel_state **lws,
+			 struct netlink_ext_ack *extack)
 {
 	const struct lwtunnel_encap_ops *ops;
+	bool found = false;
 	int ret = -EINVAL;
 
 	if (encap_type == LWTUNNEL_ENCAP_NONE ||
-	    encap_type > LWTUNNEL_ENCAP_MAX)
+	    encap_type > LWTUNNEL_ENCAP_MAX) {
+		NL_SET_ERR_MSG_ATTR(extack, encap,
+				    "Unknown LWT encapsulation type");
 		return ret;
+	}
 
 	ret = -EOPNOTSUPP;
 	rcu_read_lock();
 	ops = rcu_dereference(lwtun_encaps[encap_type]);
 	if (likely(ops && ops->build_state && try_module_get(ops->owner))) {
-		ret = ops->build_state(encap, family, cfg, lws);
+		found = true;
+		ret = ops->build_state(encap, family, cfg, lws, extack);
 		if (ret)
 			module_put(ops->owner);
 	}
 	rcu_read_unlock();
 
+	/* don't rely on -EOPNOTSUPP to detect match as build_state
+	 * handlers could return it
+	 */
+	if (!found) {
+		NL_SET_ERR_MSG_ATTR(extack, encap,
+				    "LWT encapsulation type not supported");
+	}
+
 	return ret;
 }
 EXPORT_SYMBOL(lwtunnel_build_state);
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index 2704e08545da..769ab87ebc4b 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -30,7 +30,8 @@ static inline void fib_alias_accessed(struct fib_alias *fa)
 void fib_release_info(struct fib_info *);
 struct fib_info *fib_create_info(struct fib_config *cfg,
 				 struct netlink_ext_ack *extack);
-int fib_nh_match(struct fib_config *cfg, struct fib_info *fi);
+int fib_nh_match(struct fib_config *cfg, struct fib_info *fi,
+		 struct netlink_ext_ack *extack);
 int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, u32 tb_id,
 		  u8 type, __be32 dst, int dst_len, u8 tos, struct fib_info *fi,
 		  unsigned int);
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index de9484658232..2157dc08c407 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -532,7 +532,7 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
 				ret = lwtunnel_build_state(nla_get_u16(
 							   nla_entype),
 							   nla,  AF_INET, cfg,
-							   &lwtstate);
+							   &lwtstate, extack);
 				if (ret)
 					goto errout;
 				nexthop_nh->nh_lwtstate =
@@ -614,7 +614,8 @@ static inline void fib_add_weight(struct fib_info *fi,
 static int fib_encap_match(u16 encap_type,
 			   struct nlattr *encap,
 			   const struct fib_nh *nh,
-			   const struct fib_config *cfg)
+			   const struct fib_config *cfg,
+			   struct netlink_ext_ack *extack)
 {
 	struct lwtunnel_state *lwtstate;
 	int ret, result = 0;
@@ -622,8 +623,8 @@ static int fib_encap_match(u16 encap_type,
 	if (encap_type == LWTUNNEL_ENCAP_NONE)
 		return 0;
 
-	ret = lwtunnel_build_state(encap_type, encap,
-				   AF_INET, cfg, &lwtstate);
+	ret = lwtunnel_build_state(encap_type, encap, AF_INET,
+				   cfg, &lwtstate, extack);
 	if (!ret) {
 		result = lwtunnel_cmp_encap(lwtstate, nh->nh_lwtstate);
 		lwtstate_free(lwtstate);
@@ -632,7 +633,8 @@ static int fib_encap_match(u16 encap_type,
 	return result;
 }
 
-int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
+int fib_nh_match(struct fib_config *cfg, struct fib_info *fi,
+		 struct netlink_ext_ack *extack)
 {
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 	struct rtnexthop *rtnh;
@@ -644,9 +646,9 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
 
 	if (cfg->fc_oif || cfg->fc_gw) {
 		if (cfg->fc_encap) {
-			if (fib_encap_match(cfg->fc_encap_type,
-					    cfg->fc_encap, fi->fib_nh, cfg))
-			    return 1;
+			if (fib_encap_match(cfg->fc_encap_type, cfg->fc_encap,
+					    fi->fib_nh, cfg, extack))
+				return 1;
 		}
 		if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) &&
 		    (!cfg->fc_gw  || cfg->fc_gw == fi->fib_nh->nh_gw))
@@ -1148,7 +1150,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
 			}
 			err = lwtunnel_build_state(cfg->fc_encap_type,
 						   cfg->fc_encap, AF_INET, cfg,
-						   &lwtstate);
+						   &lwtstate, extack);
 			if (err)
 				goto failure;
 
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index a624d380c81d..d56659e97a6e 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1562,7 +1562,7 @@ int fib_table_delete(struct net *net, struct fib_table *tb,
 		     fi->fib_prefsrc == cfg->fc_prefsrc) &&
 		    (!cfg->fc_protocol ||
 		     fi->fib_protocol == cfg->fc_protocol) &&
-		    fib_nh_match(cfg, fi) == 0) {
+		    fib_nh_match(cfg, fi, extack) == 0) {
 			fa_to_delete = fa;
 			break;
 		}
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index baf196eaf1d8..90e11479c725 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -228,14 +228,16 @@ static const struct nla_policy ip_tun_policy[LWTUNNEL_IP_MAX + 1] = {
 
 static int ip_tun_build_state(struct nlattr *attr,
 			      unsigned int family, const void *cfg,
-			      struct lwtunnel_state **ts)
+			      struct lwtunnel_state **ts,
+			      struct netlink_ext_ack *extack)
 {
 	struct ip_tunnel_info *tun_info;
 	struct lwtunnel_state *new_state;
 	struct nlattr *tb[LWTUNNEL_IP_MAX + 1];
 	int err;
 
-	err = nla_parse_nested(tb, LWTUNNEL_IP_MAX, attr, ip_tun_policy, NULL);
+	err = nla_parse_nested(tb, LWTUNNEL_IP_MAX, attr, ip_tun_policy,
+			       extack);
 	if (err < 0)
 		return err;
 
@@ -325,7 +327,8 @@ static const struct nla_policy ip6_tun_policy[LWTUNNEL_IP6_MAX + 1] = {
 
 static int ip6_tun_build_state(struct nlattr *attr,
 			       unsigned int family, const void *cfg,
-			       struct lwtunnel_state **ts)
+			       struct lwtunnel_state **ts,
+			       struct netlink_ext_ack *extack)
 {
 	struct ip_tunnel_info *tun_info;
 	struct lwtunnel_state *new_state;
@@ -333,7 +336,7 @@ static int ip6_tun_build_state(struct nlattr *attr,
 	int err;
 
 	err = nla_parse_nested(tb, LWTUNNEL_IP6_MAX, attr, ip6_tun_policy,
-			       NULL);
+			       extack);
 	if (err < 0)
 		return err;
 
diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c
index f4a413aba423..0c02a09bc351 100644
--- a/net/ipv6/ila/ila_lwt.c
+++ b/net/ipv6/ila/ila_lwt.c
@@ -117,7 +117,8 @@ static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = {
 
 static int ila_build_state(struct nlattr *nla,
 			   unsigned int family, const void *cfg,
-			   struct lwtunnel_state **ts)
+			   struct lwtunnel_state **ts,
+			   struct netlink_ext_ack *extack)
 {
 	struct ila_lwt *ilwt;
 	struct ila_params *p;
@@ -146,7 +147,7 @@ static int ila_build_state(struct nlattr *nla,
 		return -EINVAL;
 	}
 
-	ret = nla_parse_nested(tb, ILA_ATTR_MAX, nla, ila_nl_policy, NULL);
+	ret = nla_parse_nested(tb, ILA_ATTR_MAX, nla, ila_nl_policy, extack);
 	if (ret < 0)
 		return ret;
 
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 524a76b5206e..9d9b5bbea153 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1939,7 +1939,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
 
 		err = lwtunnel_build_state(cfg->fc_encap_type,
 					   cfg->fc_encap, AF_INET6, cfg,
-					   &lwtstate);
+					   &lwtstate, extack);
 		if (err)
 			goto out;
 		rt->dst.lwtstate = lwtstate_get(lwtstate);
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index 6a495490d43e..264d772d3c7d 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -326,7 +326,8 @@ drop:
 
 static int seg6_build_state(struct nlattr *nla,
 			    unsigned int family, const void *cfg,
-			    struct lwtunnel_state **ts)
+			    struct lwtunnel_state **ts,
+			    struct netlink_ext_ack *extack)
 {
 	struct nlattr *tb[SEG6_IPTUNNEL_MAX + 1];
 	struct seg6_iptunnel_encap *tuninfo;
@@ -336,7 +337,7 @@ static int seg6_build_state(struct nlattr *nla,
 	int err;
 
 	err = nla_parse_nested(tb, SEG6_IPTUNNEL_MAX, nla,
-			       seg6_iptunnel_policy, NULL);
+			       seg6_iptunnel_policy, extack);
 
 	if (err < 0)
 		return err;
diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c
index 369c7a23c86c..15e1aa708e50 100644
--- a/net/mpls/mpls_iptunnel.c
+++ b/net/mpls/mpls_iptunnel.c
@@ -159,7 +159,8 @@ drop:
 
 static int mpls_build_state(struct nlattr *nla,
 			    unsigned int family, const void *cfg,
-			    struct lwtunnel_state **ts)
+			    struct lwtunnel_state **ts,
+			    struct netlink_ext_ack *extack)
 {
 	struct mpls_iptunnel_encap *tun_encap_info;
 	struct nlattr *tb[MPLS_IPTUNNEL_MAX + 1];
@@ -168,7 +169,7 @@ static int mpls_build_state(struct nlattr *nla,
 	int ret;
 
 	ret = nla_parse_nested(tb, MPLS_IPTUNNEL_MAX, nla,
-			       mpls_iptunnel_policy, NULL);
+			       mpls_iptunnel_policy, extack);
 	if (ret < 0)
 		return ret;
 
-- 
cgit v1.2.3


From a4f4fa681add289ebfec6d776376ad7a2ffda669 Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
Date: Tue, 30 May 2017 09:25:48 -0700
Subject: PCI: Cache PRI and PASID bits in pci_dev

Device drivers need to check if an IOMMU enabled ATS, PRI and PASID in
order to know when they can use the SVM API.  Cache PRI and PASID bits in
the pci_dev structure, similarly to what is currently done for ATS.

Signed-off-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/ats.c   | 23 +++++++++++++++++++++++
 include/linux/pci.h |  2 ++
 2 files changed, 25 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
index eeb9fb2b47aa..21264976fa13 100644
--- a/drivers/pci/ats.c
+++ b/drivers/pci/ats.c
@@ -153,6 +153,9 @@ int pci_enable_pri(struct pci_dev *pdev, u32 reqs)
 	u32 max_requests;
 	int pos;
 
+	if (WARN_ON(pdev->pri_enabled))
+		return -EBUSY;
+
 	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
 	if (!pos)
 		return -EINVAL;
@@ -170,6 +173,8 @@ int pci_enable_pri(struct pci_dev *pdev, u32 reqs)
 	control |= PCI_PRI_CTRL_ENABLE;
 	pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control);
 
+	pdev->pri_enabled = 1;
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(pci_enable_pri);
@@ -185,6 +190,9 @@ void pci_disable_pri(struct pci_dev *pdev)
 	u16 control;
 	int pos;
 
+	if (WARN_ON(!pdev->pri_enabled))
+		return;
+
 	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
 	if (!pos)
 		return;
@@ -192,6 +200,8 @@ void pci_disable_pri(struct pci_dev *pdev)
 	pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control);
 	control &= ~PCI_PRI_CTRL_ENABLE;
 	pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control);
+
+	pdev->pri_enabled = 0;
 }
 EXPORT_SYMBOL_GPL(pci_disable_pri);
 
@@ -207,6 +217,9 @@ int pci_reset_pri(struct pci_dev *pdev)
 	u16 control;
 	int pos;
 
+	if (WARN_ON(pdev->pri_enabled))
+		return -EBUSY;
+
 	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
 	if (!pos)
 		return -EINVAL;
@@ -239,6 +252,9 @@ int pci_enable_pasid(struct pci_dev *pdev, int features)
 	u16 control, supported;
 	int pos;
 
+	if (WARN_ON(pdev->pasid_enabled))
+		return -EBUSY;
+
 	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID);
 	if (!pos)
 		return -EINVAL;
@@ -259,6 +275,8 @@ int pci_enable_pasid(struct pci_dev *pdev, int features)
 
 	pci_write_config_word(pdev, pos + PCI_PASID_CTRL, control);
 
+	pdev->pasid_enabled = 1;
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(pci_enable_pasid);
@@ -273,11 +291,16 @@ void pci_disable_pasid(struct pci_dev *pdev)
 	u16 control = 0;
 	int pos;
 
+	if (WARN_ON(!pdev->pasid_enabled))
+		return;
+
 	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID);
 	if (!pos)
 		return;
 
 	pci_write_config_word(pdev, pos + PCI_PASID_CTRL, control);
+
+	pdev->pasid_enabled = 0;
 }
 EXPORT_SYMBOL_GPL(pci_disable_pasid);
 
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 33c2b0b77429..f612c1d85863 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -356,6 +356,8 @@ struct pci_dev {
 	unsigned int	msix_enabled:1;
 	unsigned int	ari_enabled:1;	/* ARI forwarding */
 	unsigned int	ats_enabled:1;	/* Address Translation Service */
+	unsigned int	pasid_enabled:1;	/* Process Address Space ID */
+	unsigned int	pri_enabled:1;		/* Page Request Interface */
 	unsigned int	is_managed:1;
 	unsigned int    needs_freset:1; /* Dev requires fundamental reset */
 	unsigned int	state_saved:1;
-- 
cgit v1.2.3


From 4ebeb1ec56d4c54a56b6f43c2603d9a4688c83ba Mon Sep 17 00:00:00 2001
From: CQ Tang <cq.tang@intel.com>
Date: Tue, 30 May 2017 09:25:49 -0700
Subject: PCI: Restore PRI and PASID state after Function-Level Reset

After a Function-Level Reset, PCI states need to be restored.  Save PASID
features and PRI reqs cached.

[bhelgaas: search for capability only if PRI/PASID were enabled]
Signed-off-by: CQ Tang <cq.tang@intel.com>
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: Jean-Phillipe Brucker <jean-philippe.brucker@arm.com>
Cc: David Woodhouse <dwmw2@infradead.org>
---
 drivers/pci/ats.c       | 64 ++++++++++++++++++++++++++++++++++++-------------
 drivers/pci/pci.c       |  3 +++
 include/linux/pci-ats.h | 10 ++++++++
 include/linux/pci.h     |  6 +++++
 4 files changed, 67 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
index 21264976fa13..ad8ddbbbf245 100644
--- a/drivers/pci/ats.c
+++ b/drivers/pci/ats.c
@@ -160,17 +160,16 @@ int pci_enable_pri(struct pci_dev *pdev, u32 reqs)
 	if (!pos)
 		return -EINVAL;
 
-	pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control);
 	pci_read_config_word(pdev, pos + PCI_PRI_STATUS, &status);
-	if ((control & PCI_PRI_CTRL_ENABLE) ||
-	    !(status & PCI_PRI_STATUS_STOPPED))
+	if (!(status & PCI_PRI_STATUS_STOPPED))
 		return -EBUSY;
 
 	pci_read_config_dword(pdev, pos + PCI_PRI_MAX_REQ, &max_requests);
 	reqs = min(max_requests, reqs);
+	pdev->pri_reqs_alloc = reqs;
 	pci_write_config_dword(pdev, pos + PCI_PRI_ALLOC_REQ, reqs);
 
-	control |= PCI_PRI_CTRL_ENABLE;
+	control = PCI_PRI_CTRL_ENABLE;
 	pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control);
 
 	pdev->pri_enabled = 1;
@@ -205,6 +204,28 @@ void pci_disable_pri(struct pci_dev *pdev)
 }
 EXPORT_SYMBOL_GPL(pci_disable_pri);
 
+/**
+ * pci_restore_pri_state - Restore PRI
+ * @pdev: PCI device structure
+ */
+void pci_restore_pri_state(struct pci_dev *pdev)
+{
+	u16 control = PCI_PRI_CTRL_ENABLE;
+	u32 reqs = pdev->pri_reqs_alloc;
+	int pos;
+
+	if (!pdev->pri_enabled)
+		return;
+
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
+	if (!pos)
+		return;
+
+	pci_write_config_dword(pdev, pos + PCI_PRI_ALLOC_REQ, reqs);
+	pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control);
+}
+EXPORT_SYMBOL_GPL(pci_restore_pri_state);
+
 /**
  * pci_reset_pri - Resets device's PRI state
  * @pdev: PCI device structure
@@ -224,12 +245,7 @@ int pci_reset_pri(struct pci_dev *pdev)
 	if (!pos)
 		return -EINVAL;
 
-	pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control);
-	if (control & PCI_PRI_CTRL_ENABLE)
-		return -EBUSY;
-
-	control |= PCI_PRI_CTRL_RESET;
-
+	control = PCI_PRI_CTRL_RESET;
 	pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control);
 
 	return 0;
@@ -259,12 +275,7 @@ int pci_enable_pasid(struct pci_dev *pdev, int features)
 	if (!pos)
 		return -EINVAL;
 
-	pci_read_config_word(pdev, pos + PCI_PASID_CTRL, &control);
 	pci_read_config_word(pdev, pos + PCI_PASID_CAP, &supported);
-
-	if (control & PCI_PASID_CTRL_ENABLE)
-		return -EINVAL;
-
 	supported &= PCI_PASID_CAP_EXEC | PCI_PASID_CAP_PRIV;
 
 	/* User wants to enable anything unsupported? */
@@ -272,6 +283,7 @@ int pci_enable_pasid(struct pci_dev *pdev, int features)
 		return -EINVAL;
 
 	control = PCI_PASID_CTRL_ENABLE | features;
+	pdev->pasid_features = features;
 
 	pci_write_config_word(pdev, pos + PCI_PASID_CTRL, control);
 
@@ -284,7 +296,6 @@ EXPORT_SYMBOL_GPL(pci_enable_pasid);
 /**
  * pci_disable_pasid - Disable the PASID capability
  * @pdev: PCI device structure
- *
  */
 void pci_disable_pasid(struct pci_dev *pdev)
 {
@@ -304,6 +315,27 @@ void pci_disable_pasid(struct pci_dev *pdev)
 }
 EXPORT_SYMBOL_GPL(pci_disable_pasid);
 
+/**
+ * pci_restore_pasid_state - Restore PASID capabilities
+ * @pdev: PCI device structure
+ */
+void pci_restore_pasid_state(struct pci_dev *pdev)
+{
+	u16 control;
+	int pos;
+
+	if (!pdev->pasid_enabled)
+		return;
+
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID);
+	if (!pos)
+		return;
+
+	control = PCI_PASID_CTRL_ENABLE | pdev->pasid_features;
+	pci_write_config_word(pdev, pos + PCI_PASID_CTRL, control);
+}
+EXPORT_SYMBOL_GPL(pci_restore_pasid_state);
+
 /**
  * pci_pasid_features - Check which PASID features are supported
  * @pdev: PCI device structure
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index b01bd5bba8e6..3b38e98e68df 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -28,6 +28,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/pci_hotplug.h>
 #include <linux/vmalloc.h>
+#include <linux/pci-ats.h>
 #include <asm/setup.h>
 #include <asm/dma.h>
 #include <linux/aer.h>
@@ -1173,6 +1174,8 @@ void pci_restore_state(struct pci_dev *dev)
 
 	/* PCI Express register must be restored first */
 	pci_restore_pcie_state(dev);
+	pci_restore_pasid_state(dev);
+	pci_restore_pri_state(dev);
 	pci_restore_ats_state(dev);
 	pci_restore_vc_state(dev);
 
diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h
index 57e0b8250947..782fb8e0755f 100644
--- a/include/linux/pci-ats.h
+++ b/include/linux/pci-ats.h
@@ -7,6 +7,7 @@
 
 int pci_enable_pri(struct pci_dev *pdev, u32 reqs);
 void pci_disable_pri(struct pci_dev *pdev);
+void pci_restore_pri_state(struct pci_dev *pdev);
 int pci_reset_pri(struct pci_dev *pdev);
 
 #else /* CONFIG_PCI_PRI */
@@ -20,6 +21,10 @@ static inline void pci_disable_pri(struct pci_dev *pdev)
 {
 }
 
+static inline void pci_restore_pri_state(struct pci_dev *pdev)
+{
+}
+
 static inline int pci_reset_pri(struct pci_dev *pdev)
 {
 	return -ENODEV;
@@ -31,6 +36,7 @@ static inline int pci_reset_pri(struct pci_dev *pdev)
 
 int pci_enable_pasid(struct pci_dev *pdev, int features);
 void pci_disable_pasid(struct pci_dev *pdev);
+void pci_restore_pasid_state(struct pci_dev *pdev);
 int pci_pasid_features(struct pci_dev *pdev);
 int pci_max_pasids(struct pci_dev *pdev);
 
@@ -45,6 +51,10 @@ static inline void pci_disable_pasid(struct pci_dev *pdev)
 {
 }
 
+static inline void pci_restore_pasid_state(struct pci_dev *pdev)
+{
+}
+
 static inline int pci_pasid_features(struct pci_dev *pdev)
 {
 	return -EINVAL;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index f612c1d85863..c7cfdff2529c 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -400,6 +400,12 @@ struct pci_dev {
 	u16		ats_cap;	/* ATS Capability offset */
 	u8		ats_stu;	/* ATS Smallest Translation Unit */
 	atomic_t	ats_ref_cnt;	/* number of VFs with ATS enabled */
+#endif
+#ifdef CONFIG_PCI_PRI
+	u32		pri_reqs_alloc; /* Number of PRI requests allocated */
+#endif
+#ifdef CONFIG_PCI_PASID
+	u16		pasid_features;
 #endif
 	phys_addr_t rom; /* Physical address of ROM if it's not from the BAR */
 	size_t romlen; /* Length of ROM if it's not from the BAR */
-- 
cgit v1.2.3


From c7f3c595f6ff7a1cfbf7ac782722bf5173e27775 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 19 May 2017 15:29:10 -0700
Subject: pstore: Populate pstore record->time field

The current time will be initially available in the record->time field
for all pstore_read() and pstore_write() calls. Backends can either
update the field during read(), or use the field during write() instead
of fetching time themselves.

Signed-off-by: Kees Cook <keescook@chromium.org>
---
 drivers/firmware/efi/efi-pstore.c |  3 ---
 fs/pstore/platform.c              |  6 ++++++
 fs/pstore/ram.c                   | 16 +++++-----------
 include/linux/pstore.h            |  5 ++++-
 4 files changed, 15 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firmware/efi/efi-pstore.c b/drivers/firmware/efi/efi-pstore.c
index d30dc935e20e..5a0fa939d70f 100644
--- a/drivers/firmware/efi/efi-pstore.c
+++ b/drivers/firmware/efi/efi-pstore.c
@@ -244,9 +244,6 @@ static int efi_pstore_write(struct pstore_record *record)
 	efi_guid_t vendor = LINUX_EFI_CRASH_GUID;
 	int i, ret = 0;
 
-	record->time.tv_sec = get_seconds();
-	record->time.tv_nsec = 0;
-
 	record->id = generic_id(record->time.tv_sec, record->part,
 				record->count);
 
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 7798041f3fba..96fbff7b87c8 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -480,6 +480,12 @@ void pstore_record_init(struct pstore_record *record,
 	memset(record, 0, sizeof(*record));
 
 	record->psi = psinfo;
+
+	/* Report zeroed timestamp if called before timekeeping has resumed. */
+	if (__getnstimeofday(&record->time)) {
+		record->time.tv_sec = 0;
+		record->time.tv_nsec = 0;
+	}
 }
 
 /*
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 5cb022c8cd33..7125b398d312 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -27,7 +27,6 @@
 #include <linux/module.h>
 #include <linux/version.h>
 #include <linux/pstore.h>
-#include <linux/time.h>
 #include <linux/io.h>
 #include <linux/ioport.h>
 #include <linux/platform_device.h>
@@ -356,20 +355,15 @@ out:
 }
 
 static size_t ramoops_write_kmsg_hdr(struct persistent_ram_zone *prz,
-				     bool compressed)
+				     struct pstore_record *record)
 {
 	char *hdr;
-	struct timespec timestamp;
 	size_t len;
 
-	/* Report zeroed timestamp if called before timekeeping has resumed. */
-	if (__getnstimeofday(&timestamp)) {
-		timestamp.tv_sec = 0;
-		timestamp.tv_nsec = 0;
-	}
 	hdr = kasprintf(GFP_ATOMIC, RAMOOPS_KERNMSG_HDR "%lu.%lu-%c\n",
-		(long)timestamp.tv_sec, (long)(timestamp.tv_nsec / 1000),
-		compressed ? 'C' : 'D');
+		record->time.tv_sec,
+		record->time.tv_nsec / 1000,
+		record->compressed ? 'C' : 'D');
 	WARN_ON_ONCE(!hdr);
 	len = hdr ? strlen(hdr) : 0;
 	persistent_ram_write(prz, hdr, len);
@@ -440,7 +434,7 @@ static int notrace ramoops_pstore_write(struct pstore_record *record)
 	prz = cxt->dprzs[cxt->dump_write_cnt];
 
 	/* Build header and append record contents. */
-	hlen = ramoops_write_kmsg_hdr(prz, record->compressed);
+	hlen = ramoops_write_kmsg_hdr(prz, record);
 	size = record->size;
 	if (size + hlen > prz->buffer_size)
 		size = prz->buffer_size - hlen;
diff --git a/include/linux/pstore.h b/include/linux/pstore.h
index e2233f50f428..61f806a7fe29 100644
--- a/include/linux/pstore.h
+++ b/include/linux/pstore.h
@@ -138,7 +138,10 @@ struct pstore_record {
  *		memory allocation may be broken during an Oops. Regardless,
  *		@buf must be proccesed or copied before returning. The
  *		backend is also expected to write @id with something that
- 8		can help identify this record to a future @erase callback.
+ *		can help identify this record to a future @erase callback.
+ *		The @time field will be prepopulated with the current time,
+ *		when available. The @size field will have the size of data
+ *		in @buf.
  *
  *	Returns 0 on success, and non-zero on error.
  *
-- 
cgit v1.2.3


From 71189fa9b092ef125ee741eccb2f5fa916798afd Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Tue, 30 May 2017 13:31:27 -0700
Subject: bpf: free up BPF_JMP | BPF_CALL | BPF_X opcode

free up BPF_JMP | BPF_CALL | BPF_X opcode to be used by actual
indirect call by register and use kernel internal opcode to
mark call instruction into bpf_tail_call() helper.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/arm64/net/bpf_jit_comp.c     | 2 +-
 arch/powerpc/net/bpf_jit_comp64.c | 2 +-
 arch/s390/net/bpf_jit_comp.c      | 2 +-
 arch/sparc/net/bpf_jit_comp_64.c  | 2 +-
 arch/x86/net/bpf_jit_comp.c       | 2 +-
 include/linux/filter.h            | 3 +++
 kernel/bpf/core.c                 | 2 +-
 kernel/bpf/verifier.c             | 2 +-
 8 files changed, 10 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 71f930501ade..b1d38eeb24f6 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -586,7 +586,7 @@ emit_cond_jmp:
 		break;
 	}
 	/* tail call */
-	case BPF_JMP | BPF_CALL | BPF_X:
+	case BPF_JMP | BPF_TAIL_CALL:
 		if (emit_bpf_tail_call(ctx))
 			return -EFAULT;
 		break;
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index aee2bb817ac6..a01366584a4b 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -938,7 +938,7 @@ common_load:
 		/*
 		 * Tail call
 		 */
-		case BPF_JMP | BPF_CALL | BPF_X:
+		case BPF_JMP | BPF_TAIL_CALL:
 			ctx->seen |= SEEN_TAILCALL;
 			bpf_jit_emit_tail_call(image, ctx, addrs[i + 1]);
 			break;
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 6e97a2e3fd8d..42ad3832586c 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -991,7 +991,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
 		}
 		break;
 	}
-	case BPF_JMP | BPF_CALL | BPF_X:
+	case BPF_JMP | BPF_TAIL_CALL:
 		/*
 		 * Implicit input:
 		 *  B1: pointer to ctx
diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c
index 21de77419f48..4a52d34facf9 100644
--- a/arch/sparc/net/bpf_jit_comp_64.c
+++ b/arch/sparc/net/bpf_jit_comp_64.c
@@ -1217,7 +1217,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
 	}
 
 	/* tail call */
-	case BPF_JMP | BPF_CALL |BPF_X:
+	case BPF_JMP | BPF_TAIL_CALL:
 		emit_tail_call(ctx);
 		break;
 
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index f58939393eef..fec12eaa0dec 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -877,7 +877,7 @@ xadd:			if (is_imm8(insn->off))
 			}
 			break;
 
-		case BPF_JMP | BPF_CALL | BPF_X:
+		case BPF_JMP | BPF_TAIL_CALL:
 			emit_bpf_tail_call(&prog);
 			break;
 
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 62d948f80730..a20ba40fcb73 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -57,6 +57,9 @@ struct bpf_prog_aux;
 #define BPF_REG_AX		MAX_BPF_REG
 #define MAX_BPF_JIT_REG		(MAX_BPF_REG + 1)
 
+/* unused opcode to mark special call to bpf_tail_call() helper */
+#define BPF_TAIL_CALL	0xf0
+
 /* As per nm, we expose JITed images as text (code) section for
  * kallsyms. That way, tools like perf can find it to match
  * addresses.
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index dedf367f59bb..339289402b96 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -824,7 +824,7 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn)
 		[BPF_ALU64 | BPF_NEG] = &&ALU64_NEG,
 		/* Call instruction */
 		[BPF_JMP | BPF_CALL] = &&JMP_CALL,
-		[BPF_JMP | BPF_CALL | BPF_X] = &&JMP_TAIL_CALL,
+		[BPF_JMP | BPF_TAIL_CALL] = &&JMP_TAIL_CALL,
 		/* Jumps */
 		[BPF_JMP | BPF_JA] = &&JMP_JA,
 		[BPF_JMP | BPF_JEQ | BPF_X] = &&JMP_JEQ_X,
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 339c8a1371de..28113d0e8e92 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3469,7 +3469,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
 			 * that doesn't support bpf_tail_call yet
 			 */
 			insn->imm = 0;
-			insn->code |= BPF_X;
+			insn->code = BPF_JMP | BPF_TAIL_CALL;
 			continue;
 		}
 
-- 
cgit v1.2.3


From 8726679a0fa317f8e83d0843b266453f31bff092 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Tue, 30 May 2017 13:31:29 -0700
Subject: bpf: teach verifier to track stack depth

teach verifier to track bpf program stack depth

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h   |  1 +
 kernel/bpf/verifier.c | 10 +++++++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 6bb38d76faf4..fcc80ca11045 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -171,6 +171,7 @@ struct bpf_prog_aux {
 	atomic_t refcnt;
 	u32 used_map_cnt;
 	u32 max_ctx_offset;
+	u32 stack_depth;
 	struct latch_tree_node ksym_tnode;
 	struct list_head ksym_lnode;
 	const struct bpf_verifier_ops *ops;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 28113d0e8e92..d96f27ff9f6f 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -926,6 +926,10 @@ static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off,
 			verbose("invalid stack off=%d size=%d\n", off, size);
 			return -EACCES;
 		}
+
+		if (env->prog->aux->stack_depth < -off)
+			env->prog->aux->stack_depth = -off;
+
 		if (t == BPF_WRITE) {
 			if (!env->allow_ptr_leaks &&
 			    state->stack_slot_type[MAX_BPF_STACK + off] == STACK_SPILL &&
@@ -1032,6 +1036,9 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
 		return -EACCES;
 	}
 
+	if (env->prog->aux->stack_depth < -off)
+		env->prog->aux->stack_depth = -off;
+
 	if (meta && meta->raw_mode) {
 		meta->access_size = access_size;
 		meta->regno = regno;
@@ -3167,7 +3174,8 @@ process_bpf_exit:
 		insn_idx++;
 	}
 
-	verbose("processed %d insns\n", insn_processed);
+	verbose("processed %d insns, stack depth %d\n",
+		insn_processed, env->prog->aux->stack_depth);
 	return 0;
 }
 
-- 
cgit v1.2.3


From fc3973a1fa090d5f5437621a9ae1f2232a04ee5b Mon Sep 17 00:00:00 2001
From: Woojung Huh <Woojung.Huh@microchip.com>
Date: Wed, 31 May 2017 20:19:13 +0000
Subject: phy: micrel: add Microchip KSZ 9477 Switch PHY support

Adding Microchip 9477 Phy included in KSZ9477 Switch.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Woojung Huh <Woojung.Huh@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/micrel.c   | 11 +++++++++++
 include/linux/micrel_phy.h |  2 ++
 2 files changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 4cfd54182da2..46e80bcc7a8a 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -20,6 +20,7 @@
  *			   ksz8081, ksz8091,
  *			   ksz8061,
  *		Switch : ksz8873, ksz886x
+ *			 ksz9477
  */
 
 #include <linux/kernel.h>
@@ -996,6 +997,16 @@ static struct phy_driver ksphy_driver[] = {
 	.read_status	= ksz8873mll_read_status,
 	.suspend	= genphy_suspend,
 	.resume		= genphy_resume,
+}, {
+	.phy_id		= PHY_ID_KSZ9477,
+	.phy_id_mask	= MICREL_PHY_ID_MASK,
+	.name		= "Microchip KSZ9477",
+	.features	= PHY_GBIT_FEATURES,
+	.config_init	= kszphy_config_init,
+	.config_aneg	= genphy_config_aneg,
+	.read_status	= genphy_read_status,
+	.suspend	= genphy_suspend,
+	.resume		= genphy_resume,
 } };
 
 module_phy_driver(ksphy_driver);
diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h
index f541da68d1e7..472fa4d4ea62 100644
--- a/include/linux/micrel_phy.h
+++ b/include/linux/micrel_phy.h
@@ -37,6 +37,8 @@
 
 #define PHY_ID_KSZ8795		0x00221550
 
+#define	PHY_ID_KSZ9477		0x00221631
+
 /* struct phy_device dev_flags definitions */
 #define MICREL_PHY_50MHZ_CLK	0x00000001
 #define MICREL_PHY_FXEN		0x00000002
-- 
cgit v1.2.3


From b987e98e50ab90e5291581204ef7a1c649313a70 Mon Sep 17 00:00:00 2001
From: Woojung Huh <Woojung.Huh@microchip.com>
Date: Wed, 31 May 2017 20:19:19 +0000
Subject: dsa: add DSA switch driver for Microchip KSZ9477

The KSZ9477 is a fully integrated layer 2, managed, 7 ports GigE switch
with numerous advanced features. 5 ports incorporate 10/100/1000 Mbps PHYs.
The other 2 ports have interfaces that can be configured as SGMII, RGMII, MII
or RMII. Either of these may connect directly to a host processor or
to an external PHY. The SGMII port may interface to a fiber optic transceiver.

This driver currently supports vlan, fdb, mdb & mirror dsa switch operations.

Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Woojung Huh <Woojung.Huh@microchip.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/Kconfig                     |    2 +
 drivers/net/dsa/Makefile                    |    1 +
 drivers/net/dsa/microchip/Kconfig           |   12 +
 drivers/net/dsa/microchip/Makefile          |    2 +
 drivers/net/dsa/microchip/ksz_9477_reg.h    | 1676 +++++++++++++++++++++++++++
 drivers/net/dsa/microchip/ksz_common.c      | 1279 ++++++++++++++++++++
 drivers/net/dsa/microchip/ksz_priv.h        |  210 ++++
 drivers/net/dsa/microchip/ksz_spi.c         |  216 ++++
 include/linux/platform_data/microchip-ksz.h |   29 +
 9 files changed, 3427 insertions(+)
 create mode 100644 drivers/net/dsa/microchip/Kconfig
 create mode 100644 drivers/net/dsa/microchip/Makefile
 create mode 100644 drivers/net/dsa/microchip/ksz_9477_reg.h
 create mode 100644 drivers/net/dsa/microchip/ksz_common.c
 create mode 100644 drivers/net/dsa/microchip/ksz_priv.h
 create mode 100644 drivers/net/dsa/microchip/ksz_spi.c
 create mode 100644 include/linux/platform_data/microchip-ksz.h

(limited to 'include/linux')

diff --git a/drivers/net/dsa/Kconfig b/drivers/net/dsa/Kconfig
index 68131a45ac5e..83a9bc892a3b 100644
--- a/drivers/net/dsa/Kconfig
+++ b/drivers/net/dsa/Kconfig
@@ -39,6 +39,8 @@ config NET_DSA_MV88E6060
 	  This enables support for the Marvell 88E6060 ethernet switch
 	  chip.
 
+source "drivers/net/dsa/microchip/Kconfig"
+
 source "drivers/net/dsa/mv88e6xxx/Kconfig"
 
 config NET_DSA_QCA8K
diff --git a/drivers/net/dsa/Makefile b/drivers/net/dsa/Makefile
index 9613f36083a6..4a5b5bd297ee 100644
--- a/drivers/net/dsa/Makefile
+++ b/drivers/net/dsa/Makefile
@@ -8,4 +8,5 @@ obj-$(CONFIG_NET_DSA_SMSC_LAN9303) += lan9303-core.o
 obj-$(CONFIG_NET_DSA_SMSC_LAN9303_I2C) += lan9303_i2c.o
 obj-$(CONFIG_NET_DSA_SMSC_LAN9303_MDIO) += lan9303_mdio.o
 obj-y				+= b53/
+obj-y				+= microchip/
 obj-y				+= mv88e6xxx/
diff --git a/drivers/net/dsa/microchip/Kconfig b/drivers/net/dsa/microchip/Kconfig
new file mode 100644
index 000000000000..a8b8f59099ce
--- /dev/null
+++ b/drivers/net/dsa/microchip/Kconfig
@@ -0,0 +1,12 @@
+menuconfig MICROCHIP_KSZ
+	tristate "Microchip KSZ series switch support"
+	depends on NET_DSA
+	select NET_DSA_TAG_KSZ
+	help
+	  This driver adds support for Microchip KSZ switch chips.
+
+config MICROCHIP_KSZ_SPI_DRIVER
+	tristate "KSZ series SPI connected switch driver"
+	depends on MICROCHIP_KSZ && SPI
+	help
+	  Select to enable support for registering switches configured through SPI.
diff --git a/drivers/net/dsa/microchip/Makefile b/drivers/net/dsa/microchip/Makefile
new file mode 100644
index 000000000000..ed335e29fae8
--- /dev/null
+++ b/drivers/net/dsa/microchip/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_MICROCHIP_KSZ)	        += ksz_common.o
+obj-$(CONFIG_MICROCHIP_KSZ_SPI_DRIVER)	+= ksz_spi.o
diff --git a/drivers/net/dsa/microchip/ksz_9477_reg.h b/drivers/net/dsa/microchip/ksz_9477_reg.h
new file mode 100644
index 000000000000..6aa6752035a1
--- /dev/null
+++ b/drivers/net/dsa/microchip/ksz_9477_reg.h
@@ -0,0 +1,1676 @@
+/*
+ * Microchip KSZ9477 register definitions
+ *
+ * Copyright (C) 2017
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef __KSZ9477_REGS_H
+#define __KSZ9477_REGS_H
+
+#define KS_PRIO_M			0x7
+#define KS_PRIO_S			4
+
+/* 0 - Operation */
+#define REG_CHIP_ID0__1			0x0000
+
+#define REG_CHIP_ID1__1			0x0001
+
+#define FAMILY_ID			0x95
+#define FAMILY_ID_94			0x94
+#define FAMILY_ID_95			0x95
+#define FAMILY_ID_85			0x85
+#define FAMILY_ID_98			0x98
+#define FAMILY_ID_88			0x88
+
+#define REG_CHIP_ID2__1			0x0002
+
+#define CHIP_ID_63			0x63
+#define CHIP_ID_66			0x66
+#define CHIP_ID_67			0x67
+#define CHIP_ID_77			0x77
+#define CHIP_ID_93			0x93
+#define CHIP_ID_96			0x96
+#define CHIP_ID_97			0x97
+
+#define REG_CHIP_ID3__1			0x0003
+
+#define SWITCH_REVISION_M		0x0F
+#define SWITCH_REVISION_S		4
+#define SWITCH_RESET			0x01
+
+#define REG_SW_PME_CTRL			0x0006
+
+#define PME_ENABLE			BIT(1)
+#define PME_POLARITY			BIT(0)
+
+#define REG_GLOBAL_OPTIONS		0x000F
+
+#define SW_GIGABIT_ABLE			BIT(6)
+#define SW_REDUNDANCY_ABLE		BIT(5)
+#define SW_AVB_ABLE			BIT(4)
+#define SW_9567_RL_5_2			0xC
+#define SW_9477_SL_5_2			0xD
+
+#define SW_9896_GL_5_1			0xB
+#define SW_9896_RL_5_1			0x8
+#define SW_9896_SL_5_1			0x9
+
+#define SW_9895_GL_4_1			0x7
+#define SW_9895_RL_4_1			0x4
+#define SW_9895_SL_4_1			0x5
+
+#define SW_9896_RL_4_2			0x6
+
+#define SW_9893_RL_2_1			0x0
+#define SW_9893_SL_2_1			0x1
+#define SW_9893_GL_2_1			0x3
+
+#define SW_QW_ABLE			BIT(5)
+#define SW_9893_RN_2_1			0xC
+
+#define REG_SW_INT_STATUS__4		0x0010
+#define REG_SW_INT_MASK__4		0x0014
+
+#define LUE_INT				BIT(31)
+#define TRIG_TS_INT			BIT(30)
+#define APB_TIMEOUT_INT			BIT(29)
+
+#define SWITCH_INT_MASK			(TRIG_TS_INT | APB_TIMEOUT_INT)
+
+#define REG_SW_PORT_INT_STATUS__4	0x0018
+#define REG_SW_PORT_INT_MASK__4		0x001C
+#define REG_SW_PHY_INT_STATUS		0x0020
+#define REG_SW_PHY_INT_ENABLE		0x0024
+
+/* 1 - Global */
+#define REG_SW_GLOBAL_SERIAL_CTRL_0	0x0100
+#define SW_SPARE_REG_2			BIT(7)
+#define SW_SPARE_REG_1			BIT(6)
+#define SW_SPARE_REG_0			BIT(5)
+#define SW_BIG_ENDIAN			BIT(4)
+#define SPI_AUTO_EDGE_DETECTION		BIT(1)
+#define SPI_CLOCK_OUT_RISING_EDGE	BIT(0)
+
+#define REG_SW_GLOBAL_OUTPUT_CTRL__1	0x0103
+#define SW_ENABLE_REFCLKO		BIT(1)
+#define SW_REFCLKO_IS_125MHZ		BIT(0)
+
+#define REG_SW_IBA__4			0x0104
+
+#define SW_IBA_ENABLE			BIT(31)
+#define SW_IBA_DA_MATCH			BIT(30)
+#define SW_IBA_INIT			BIT(29)
+#define SW_IBA_QID_M			0xF
+#define SW_IBA_QID_S			22
+#define SW_IBA_PORT_M			0x2F
+#define SW_IBA_PORT_S			16
+#define SW_IBA_FRAME_TPID_M		0xFFFF
+
+#define REG_SW_APB_TIMEOUT_ADDR__4	0x0108
+
+#define APB_TIMEOUT_ACKNOWLEDGE		BIT(31)
+
+#define REG_SW_IBA_SYNC__1		0x010C
+
+#define REG_SW_IO_STRENGTH__1		0x010D
+#define SW_DRIVE_STRENGTH_M		0x7
+#define SW_DRIVE_STRENGTH_2MA		0
+#define SW_DRIVE_STRENGTH_4MA		1
+#define SW_DRIVE_STRENGTH_8MA		2
+#define SW_DRIVE_STRENGTH_12MA		3
+#define SW_DRIVE_STRENGTH_16MA		4
+#define SW_DRIVE_STRENGTH_20MA		5
+#define SW_DRIVE_STRENGTH_24MA		6
+#define SW_DRIVE_STRENGTH_28MA		7
+#define SW_HI_SPEED_DRIVE_STRENGTH_S	4
+#define SW_LO_SPEED_DRIVE_STRENGTH_S	0
+
+#define REG_SW_IBA_STATUS__4		0x0110
+
+#define SW_IBA_REQ			BIT(31)
+#define SW_IBA_RESP			BIT(30)
+#define SW_IBA_DA_MISMATCH		BIT(14)
+#define SW_IBA_FMT_MISMATCH		BIT(13)
+#define SW_IBA_CODE_ERROR		BIT(12)
+#define SW_IBA_CMD_ERROR		BIT(11)
+#define SW_IBA_CMD_LOC_M		(BIT(6) - 1)
+
+#define REG_SW_IBA_STATES__4		0x0114
+
+#define SW_IBA_BUF_STATE_S		30
+#define SW_IBA_CMD_STATE_S		28
+#define SW_IBA_RESP_STATE_S		26
+#define SW_IBA_STATE_M			0x3
+#define SW_IBA_PACKET_SIZE_M		0x7F
+#define SW_IBA_PACKET_SIZE_S		16
+#define SW_IBA_FMT_ID_M			0xFFFF
+
+#define REG_SW_IBA_RESULT__4		0x0118
+
+#define SW_IBA_SIZE_S			24
+
+#define SW_IBA_RETRY_CNT_M		(BIT(5) - 1)
+
+/* 2 - PHY */
+#define REG_SW_POWER_MANAGEMENT_CTRL	0x0201
+
+#define SW_PLL_POWER_DOWN		BIT(5)
+#define SW_POWER_DOWN_MODE		0x3
+#define SW_ENERGY_DETECTION		1
+#define SW_SOFT_POWER_DOWN		2
+#define SW_POWER_SAVING			3
+
+/* 3 - Operation Control */
+#define REG_SW_OPERATION		0x0300
+
+#define SW_DOUBLE_TAG			BIT(7)
+#define SW_RESET			BIT(1)
+#define SW_START			BIT(0)
+
+#define REG_SW_MAC_ADDR_0		0x0302
+#define REG_SW_MAC_ADDR_1		0x0303
+#define REG_SW_MAC_ADDR_2		0x0304
+#define REG_SW_MAC_ADDR_3		0x0305
+#define REG_SW_MAC_ADDR_4		0x0306
+#define REG_SW_MAC_ADDR_5		0x0307
+
+#define REG_SW_MTU__2			0x0308
+
+#define REG_SW_ISP_TPID__2		0x030A
+
+#define REG_SW_HSR_TPID__2		0x030C
+
+#define REG_AVB_STRATEGY__2		0x030E
+
+#define SW_SHAPING_CREDIT_ACCT		BIT(1)
+#define SW_POLICING_CREDIT_ACCT		BIT(0)
+
+#define REG_SW_LUE_CTRL_0		0x0310
+
+#define SW_VLAN_ENABLE			BIT(7)
+#define SW_DROP_INVALID_VID		BIT(6)
+#define SW_AGE_CNT_M			0x7
+#define SW_AGE_CNT_S			3
+#define SW_RESV_MCAST_ENABLE		BIT(2)
+#define SW_HASH_OPTION_M		0x03
+#define SW_HASH_OPTION_CRC		1
+#define SW_HASH_OPTION_XOR		2
+#define SW_HASH_OPTION_DIRECT		3
+
+#define REG_SW_LUE_CTRL_1		0x0311
+
+#define UNICAST_LEARN_DISABLE		BIT(7)
+#define SW_SRC_ADDR_FILTER		BIT(6)
+#define SW_FLUSH_STP_TABLE		BIT(5)
+#define SW_FLUSH_MSTP_TABLE		BIT(4)
+#define SW_FWD_MCAST_SRC_ADDR		BIT(3)
+#define SW_AGING_ENABLE			BIT(2)
+#define SW_FAST_AGING			BIT(1)
+#define SW_LINK_AUTO_AGING		BIT(0)
+
+#define REG_SW_LUE_CTRL_2		0x0312
+
+#define SW_TRAP_DOUBLE_TAG		BIT(6)
+#define SW_EGRESS_VLAN_FILTER_DYN	BIT(5)
+#define SW_EGRESS_VLAN_FILTER_STA	BIT(4)
+#define SW_FLUSH_OPTION_M		0x3
+#define SW_FLUSH_OPTION_S		2
+#define SW_FLUSH_OPTION_DYN_MAC		1
+#define SW_FLUSH_OPTION_STA_MAC		2
+#define SW_FLUSH_OPTION_BOTH		3
+#define SW_PRIO_M			0x3
+#define SW_PRIO_DA			0
+#define SW_PRIO_SA			1
+#define SW_PRIO_HIGHEST_DA_SA		2
+#define SW_PRIO_LOWEST_DA_SA		3
+
+#define REG_SW_LUE_CTRL_3		0x0313
+
+#define REG_SW_LUE_INT_STATUS		0x0314
+#define REG_SW_LUE_INT_ENABLE		0x0315
+
+#define LEARN_FAIL_INT			BIT(2)
+#define ALMOST_FULL_INT			BIT(1)
+#define WRITE_FAIL_INT			BIT(0)
+
+#define REG_SW_LUE_INDEX_0__2		0x0316
+
+#define ENTRY_INDEX_M			0x0FFF
+
+#define REG_SW_LUE_INDEX_1__2		0x0318
+
+#define FAIL_INDEX_M			0x03FF
+
+#define REG_SW_LUE_INDEX_2__2		0x031A
+
+#define REG_SW_LUE_UNK_UCAST_CTRL__4	0x0320
+
+#define SW_UNK_UCAST_ENABLE		BIT(31)
+
+#define REG_SW_LUE_UNK_MCAST_CTRL__4	0x0324
+
+#define SW_UNK_MCAST_ENABLE		BIT(31)
+
+#define REG_SW_LUE_UNK_VID_CTRL__4	0x0328
+
+#define SW_UNK_VID_ENABLE		BIT(31)
+
+#define REG_SW_MAC_CTRL_0		0x0330
+
+#define SW_NEW_BACKOFF			BIT(7)
+#define SW_CHECK_LENGTH			BIT(3)
+#define SW_PAUSE_UNH_MODE		BIT(1)
+#define SW_AGGR_BACKOFF			BIT(0)
+
+#define REG_SW_MAC_CTRL_1		0x0331
+
+#define MULTICAST_STORM_DISABLE		BIT(6)
+#define SW_BACK_PRESSURE		BIT(5)
+#define FAIR_FLOW_CTRL			BIT(4)
+#define NO_EXC_COLLISION_DROP		BIT(3)
+#define SW_JUMBO_PACKET			BIT(2)
+#define SW_LEGAL_PACKET_DISABLE		BIT(1)
+#define SW_PASS_SHORT_FRAME		BIT(0)
+
+#define REG_SW_MAC_CTRL_2		0x0332
+
+#define SW_REPLACE_VID			BIT(3)
+#define BROADCAST_STORM_RATE_HI		0x07
+
+#define REG_SW_MAC_CTRL_3		0x0333
+
+#define BROADCAST_STORM_RATE_LO		0xFF
+#define BROADCAST_STORM_RATE		0x07FF
+
+#define REG_SW_MAC_CTRL_4		0x0334
+
+#define SW_PASS_PAUSE			BIT(3)
+
+#define REG_SW_MAC_CTRL_5		0x0335
+
+#define SW_OUT_RATE_LIMIT_QUEUE_BASED	BIT(3)
+
+#define REG_SW_MAC_CTRL_6		0x0336
+
+#define SW_MIB_COUNTER_FLUSH		BIT(7)
+#define SW_MIB_COUNTER_FREEZE		BIT(6)
+
+#define REG_SW_MAC_802_1P_MAP_0		0x0338
+#define REG_SW_MAC_802_1P_MAP_1		0x0339
+#define REG_SW_MAC_802_1P_MAP_2		0x033A
+#define REG_SW_MAC_802_1P_MAP_3		0x033B
+
+#define SW_802_1P_MAP_M			KS_PRIO_M
+#define SW_802_1P_MAP_S			KS_PRIO_S
+
+#define REG_SW_MAC_ISP_CTRL		0x033C
+
+#define REG_SW_MAC_TOS_CTRL		0x033E
+
+#define SW_TOS_DSCP_REMARK		BIT(1)
+#define SW_TOS_DSCP_REMAP		BIT(0)
+
+#define REG_SW_MAC_TOS_PRIO_0		0x0340
+#define REG_SW_MAC_TOS_PRIO_1		0x0341
+#define REG_SW_MAC_TOS_PRIO_2		0x0342
+#define REG_SW_MAC_TOS_PRIO_3		0x0343
+#define REG_SW_MAC_TOS_PRIO_4		0x0344
+#define REG_SW_MAC_TOS_PRIO_5		0x0345
+#define REG_SW_MAC_TOS_PRIO_6		0x0346
+#define REG_SW_MAC_TOS_PRIO_7		0x0347
+#define REG_SW_MAC_TOS_PRIO_8		0x0348
+#define REG_SW_MAC_TOS_PRIO_9		0x0349
+#define REG_SW_MAC_TOS_PRIO_10		0x034A
+#define REG_SW_MAC_TOS_PRIO_11		0x034B
+#define REG_SW_MAC_TOS_PRIO_12		0x034C
+#define REG_SW_MAC_TOS_PRIO_13		0x034D
+#define REG_SW_MAC_TOS_PRIO_14		0x034E
+#define REG_SW_MAC_TOS_PRIO_15		0x034F
+#define REG_SW_MAC_TOS_PRIO_16		0x0350
+#define REG_SW_MAC_TOS_PRIO_17		0x0351
+#define REG_SW_MAC_TOS_PRIO_18		0x0352
+#define REG_SW_MAC_TOS_PRIO_19		0x0353
+#define REG_SW_MAC_TOS_PRIO_20		0x0354
+#define REG_SW_MAC_TOS_PRIO_21		0x0355
+#define REG_SW_MAC_TOS_PRIO_22		0x0356
+#define REG_SW_MAC_TOS_PRIO_23		0x0357
+#define REG_SW_MAC_TOS_PRIO_24		0x0358
+#define REG_SW_MAC_TOS_PRIO_25		0x0359
+#define REG_SW_MAC_TOS_PRIO_26		0x035A
+#define REG_SW_MAC_TOS_PRIO_27		0x035B
+#define REG_SW_MAC_TOS_PRIO_28		0x035C
+#define REG_SW_MAC_TOS_PRIO_29		0x035D
+#define REG_SW_MAC_TOS_PRIO_30		0x035E
+#define REG_SW_MAC_TOS_PRIO_31		0x035F
+
+#define REG_SW_MRI_CTRL_0		0x0370
+
+#define SW_IGMP_SNOOP			BIT(6)
+#define SW_IPV6_MLD_OPTION		BIT(3)
+#define SW_IPV6_MLD_SNOOP		BIT(2)
+#define SW_MIRROR_RX_TX			BIT(0)
+
+#define REG_SW_CLASS_D_IP_CTRL__4	0x0374
+
+#define SW_CLASS_D_IP_ENABLE		BIT(31)
+
+#define REG_SW_MRI_CTRL_8		0x0378
+
+#define SW_NO_COLOR_S			6
+#define SW_RED_COLOR_S			4
+#define SW_YELLOW_COLOR_S		2
+#define SW_GREEN_COLOR_S		0
+#define SW_COLOR_M			0x3
+
+#define REG_SW_QM_CTRL__4		0x0390
+
+#define PRIO_SCHEME_SELECT_M		KS_PRIO_M
+#define PRIO_SCHEME_SELECT_S		6
+#define PRIO_MAP_3_HI			0
+#define PRIO_MAP_2_HI			2
+#define PRIO_MAP_0_LO			3
+#define UNICAST_VLAN_BOUNDARY		BIT(1)
+
+#define REG_SW_EEE_QM_CTRL__2		0x03C0
+
+#define REG_SW_EEE_TXQ_WAIT_TIME__2	0x03C2
+
+/* 4 - */
+#define REG_SW_VLAN_ENTRY__4		0x0400
+
+#define VLAN_VALID			BIT(31)
+#define VLAN_FORWARD_OPTION		BIT(27)
+#define VLAN_PRIO_M			KS_PRIO_M
+#define VLAN_PRIO_S			24
+#define VLAN_MSTP_M			0x7
+#define VLAN_MSTP_S			12
+#define VLAN_FID_M			0x7F
+
+#define REG_SW_VLAN_ENTRY_UNTAG__4	0x0404
+#define REG_SW_VLAN_ENTRY_PORTS__4	0x0408
+
+#define REG_SW_VLAN_ENTRY_INDEX__2	0x040C
+
+#define VLAN_INDEX_M			0x0FFF
+
+#define REG_SW_VLAN_CTRL		0x040E
+
+#define VLAN_START			BIT(7)
+#define VLAN_ACTION			0x3
+#define VLAN_WRITE			1
+#define VLAN_READ			2
+#define VLAN_CLEAR			3
+
+#define REG_SW_ALU_INDEX_0		0x0410
+
+#define ALU_FID_INDEX_S			16
+#define ALU_MAC_ADDR_HI			0xFFFF
+
+#define REG_SW_ALU_INDEX_1		0x0414
+
+#define ALU_DIRECT_INDEX_M		(BIT(12) - 1)
+
+#define REG_SW_ALU_CTRL__4		0x0418
+
+#define ALU_VALID_CNT_M			(BIT(14) - 1)
+#define ALU_VALID_CNT_S			16
+#define ALU_START			BIT(7)
+#define ALU_VALID			BIT(6)
+#define ALU_DIRECT			BIT(2)
+#define ALU_ACTION			0x3
+#define ALU_WRITE			1
+#define ALU_READ			2
+#define ALU_SEARCH			3
+
+#define REG_SW_ALU_STAT_CTRL__4		0x041C
+
+#define ALU_STAT_INDEX_M		(BIT(4) - 1)
+#define ALU_STAT_INDEX_S		16
+#define ALU_RESV_MCAST_INDEX_M		(BIT(6) - 1)
+#define ALU_STAT_START			BIT(7)
+#define ALU_RESV_MCAST_ADDR		BIT(1)
+#define ALU_STAT_READ			BIT(0)
+
+#define REG_SW_ALU_VAL_A		0x0420
+
+#define ALU_V_STATIC_VALID		BIT(31)
+#define ALU_V_SRC_FILTER		BIT(30)
+#define ALU_V_DST_FILTER		BIT(29)
+#define ALU_V_PRIO_AGE_CNT_M		(BIT(3) - 1)
+#define ALU_V_PRIO_AGE_CNT_S		26
+#define ALU_V_MSTP_M			0x7
+
+#define REG_SW_ALU_VAL_B		0x0424
+
+#define ALU_V_OVERRIDE			BIT(31)
+#define ALU_V_USE_FID			BIT(30)
+#define ALU_V_PORT_MAP			(BIT(24) - 1)
+
+#define REG_SW_ALU_VAL_C		0x0428
+
+#define ALU_V_FID_M			(BIT(16) - 1)
+#define ALU_V_FID_S			16
+#define ALU_V_MAC_ADDR_HI		0xFFFF
+
+#define REG_SW_ALU_VAL_D		0x042C
+
+#define REG_HSR_ALU_INDEX_0		0x0440
+
+#define REG_HSR_ALU_INDEX_1		0x0444
+
+#define HSR_DST_MAC_INDEX_LO_S		16
+#define HSR_SRC_MAC_INDEX_HI		0xFFFF
+
+#define REG_HSR_ALU_INDEX_2		0x0448
+
+#define HSR_INDEX_MAX			BIT(9)
+#define HSR_DIRECT_INDEX_M		(HSR_INDEX_MAX - 1)
+
+#define REG_HSR_ALU_INDEX_3		0x044C
+
+#define HSR_PATH_INDEX_M		(BIT(4) - 1)
+
+#define REG_HSR_ALU_CTRL__4		0x0450
+
+#define HSR_VALID_CNT_M			(BIT(14) - 1)
+#define HSR_VALID_CNT_S			16
+#define HSR_START			BIT(7)
+#define HSR_VALID			BIT(6)
+#define HSR_SEARCH_END			BIT(5)
+#define HSR_DIRECT			BIT(2)
+#define HSR_ACTION			0x3
+#define HSR_WRITE			1
+#define HSR_READ			2
+#define HSR_SEARCH			3
+
+#define REG_HSR_ALU_VAL_A		0x0454
+
+#define HSR_V_STATIC_VALID		BIT(31)
+#define HSR_V_AGE_CNT_M			(BIT(3) - 1)
+#define HSR_V_AGE_CNT_S			26
+#define HSR_V_PATH_ID_M			(BIT(4) - 1)
+
+#define REG_HSR_ALU_VAL_B		0x0458
+
+#define REG_HSR_ALU_VAL_C		0x045C
+
+#define HSR_V_DST_MAC_ADDR_LO_S		16
+#define HSR_V_SRC_MAC_ADDR_HI		0xFFFF
+
+#define REG_HSR_ALU_VAL_D		0x0460
+
+#define REG_HSR_ALU_VAL_E		0x0464
+
+#define HSR_V_START_SEQ_1_S		16
+#define HSR_V_START_SEQ_2_S		0
+
+#define REG_HSR_ALU_VAL_F		0x0468
+
+#define HSR_V_EXP_SEQ_1_S		16
+#define HSR_V_EXP_SEQ_2_S		0
+
+#define REG_HSR_ALU_VAL_G		0x046C
+
+#define HSR_V_SEQ_CNT_1_S		16
+#define HSR_V_SEQ_CNT_2_S		0
+
+#define HSR_V_SEQ_M			(BIT(16) - 1)
+
+/* 5 - PTP Clock */
+#define REG_PTP_CLK_CTRL		0x0500
+
+#define PTP_STEP_ADJ			BIT(6)
+#define PTP_STEP_DIR			BIT(5)
+#define PTP_READ_TIME			BIT(4)
+#define PTP_LOAD_TIME			BIT(3)
+#define PTP_CLK_ADJ_ENABLE		BIT(2)
+#define PTP_CLK_ENABLE			BIT(1)
+#define PTP_CLK_RESET			BIT(0)
+
+#define REG_PTP_RTC_SUB_NANOSEC__2	0x0502
+
+#define PTP_RTC_SUB_NANOSEC_M		0x0007
+
+#define REG_PTP_RTC_NANOSEC		0x0504
+#define REG_PTP_RTC_NANOSEC_H		0x0504
+#define REG_PTP_RTC_NANOSEC_L		0x0506
+
+#define REG_PTP_RTC_SEC			0x0508
+#define REG_PTP_RTC_SEC_H		0x0508
+#define REG_PTP_RTC_SEC_L		0x050A
+
+#define REG_PTP_SUBNANOSEC_RATE		0x050C
+#define REG_PTP_SUBNANOSEC_RATE_H	0x050C
+
+#define PTP_RATE_DIR			BIT(31)
+#define PTP_TMP_RATE_ENABLE		BIT(30)
+
+#define REG_PTP_SUBNANOSEC_RATE_L	0x050E
+
+#define REG_PTP_RATE_DURATION		0x0510
+#define REG_PTP_RATE_DURATION_H		0x0510
+#define REG_PTP_RATE_DURATION_L		0x0512
+
+#define REG_PTP_MSG_CONF1		0x0514
+
+#define PTP_802_1AS			BIT(7)
+#define PTP_ENABLE			BIT(6)
+#define PTP_ETH_ENABLE			BIT(5)
+#define PTP_IPV4_UDP_ENABLE		BIT(4)
+#define PTP_IPV6_UDP_ENABLE		BIT(3)
+#define PTP_TC_P2P			BIT(2)
+#define PTP_MASTER			BIT(1)
+#define PTP_1STEP			BIT(0)
+
+#define REG_PTP_MSG_CONF2		0x0516
+
+#define PTP_UNICAST_ENABLE		BIT(12)
+#define PTP_ALTERNATE_MASTER		BIT(11)
+#define PTP_ALL_HIGH_PRIO		BIT(10)
+#define PTP_SYNC_CHECK			BIT(9)
+#define PTP_DELAY_CHECK			BIT(8)
+#define PTP_PDELAY_CHECK		BIT(7)
+#define PTP_DROP_SYNC_DELAY_REQ		BIT(5)
+#define PTP_DOMAIN_CHECK		BIT(4)
+#define PTP_UDP_CHECKSUM		BIT(2)
+
+#define REG_PTP_DOMAIN_VERSION		0x0518
+#define PTP_VERSION_M			0xFF00
+#define PTP_DOMAIN_M			0x00FF
+
+#define REG_PTP_UNIT_INDEX__4		0x0520
+
+#define PTP_UNIT_M			0xF
+
+#define PTP_GPIO_INDEX_S		16
+#define PTP_TSI_INDEX_S			8
+#define PTP_TOU_INDEX_S			0
+
+#define REG_PTP_TRIG_STATUS__4		0x0524
+
+#define TRIG_ERROR_S			16
+#define TRIG_DONE_S			0
+
+#define REG_PTP_INT_STATUS__4		0x0528
+
+#define TRIG_INT_S			16
+#define TS_INT_S			0
+
+#define TRIG_UNIT_M			0x7
+#define TS_UNIT_M			0x3
+
+#define REG_PTP_CTRL_STAT__4		0x052C
+
+#define GPIO_IN				BIT(7)
+#define GPIO_OUT			BIT(6)
+#define TS_INT_ENABLE			BIT(5)
+#define TRIG_ACTIVE			BIT(4)
+#define TRIG_ENABLE			BIT(3)
+#define TRIG_RESET			BIT(2)
+#define TS_ENABLE			BIT(1)
+#define TS_RESET			BIT(0)
+
+#define GPIO_CTRL_M			(GPIO_IN | GPIO_OUT)
+
+#define TRIG_CTRL_M			\
+	(TRIG_ACTIVE | TRIG_ENABLE | TRIG_RESET)
+
+#define TS_CTRL_M			\
+	(TS_INT_ENABLE | TS_ENABLE | TS_RESET)
+
+#define REG_TRIG_TARGET_NANOSEC		0x0530
+#define REG_TRIG_TARGET_SEC		0x0534
+
+#define REG_TRIG_CTRL__4		0x0538
+
+#define TRIG_CASCADE_ENABLE		BIT(31)
+#define TRIG_CASCADE_TAIL		BIT(30)
+#define TRIG_CASCADE_UPS_M		0xF
+#define TRIG_CASCADE_UPS_S		26
+#define TRIG_NOW			BIT(25)
+#define TRIG_NOTIFY			BIT(24)
+#define TRIG_EDGE			BIT(23)
+#define TRIG_PATTERN_S			20
+#define TRIG_PATTERN_M			0x7
+#define TRIG_NEG_EDGE			0
+#define TRIG_POS_EDGE			1
+#define TRIG_NEG_PULSE			2
+#define TRIG_POS_PULSE			3
+#define TRIG_NEG_PERIOD			4
+#define TRIG_POS_PERIOD			5
+#define TRIG_REG_OUTPUT			6
+#define TRIG_GPO_S			16
+#define TRIG_GPO_M			0xF
+#define TRIG_CASCADE_ITERATE_CNT_M	0xFFFF
+
+#define REG_TRIG_CYCLE_WIDTH		0x053C
+
+#define REG_TRIG_CYCLE_CNT		0x0540
+
+#define TRIG_CYCLE_CNT_M		0xFFFF
+#define TRIG_CYCLE_CNT_S		16
+#define TRIG_BIT_PATTERN_M		0xFFFF
+
+#define REG_TRIG_ITERATE_TIME		0x0544
+
+#define REG_TRIG_PULSE_WIDTH__4		0x0548
+
+#define TRIG_PULSE_WIDTH_M		0x00FFFFFF
+
+#define REG_TS_CTRL_STAT__4		0x0550
+
+#define TS_EVENT_DETECT_M		0xF
+#define TS_EVENT_DETECT_S		17
+#define TS_EVENT_OVERFLOW		BIT(16)
+#define TS_GPI_M			0xF
+#define TS_GPI_S			8
+#define TS_DETECT_RISE			BIT(7)
+#define TS_DETECT_FALL			BIT(6)
+#define TS_DETECT_S			6
+#define TS_CASCADE_TAIL			BIT(5)
+#define TS_CASCADE_UPS_M		0xF
+#define TS_CASCADE_UPS_S		1
+#define TS_CASCADE_ENABLE		BIT(0)
+
+#define DETECT_RISE			(TS_DETECT_RISE >> TS_DETECT_S)
+#define DETECT_FALL			(TS_DETECT_FALL >> TS_DETECT_S)
+
+#define REG_TS_EVENT_0_NANOSEC		0x0554
+#define REG_TS_EVENT_0_SEC		0x0558
+#define REG_TS_EVENT_0_SUB_NANOSEC	0x055C
+
+#define REG_TS_EVENT_1_NANOSEC		0x0560
+#define REG_TS_EVENT_1_SEC		0x0564
+#define REG_TS_EVENT_1_SUB_NANOSEC	0x0568
+
+#define REG_TS_EVENT_2_NANOSEC		0x056C
+#define REG_TS_EVENT_2_SEC		0x0570
+#define REG_TS_EVENT_2_SUB_NANOSEC	0x0574
+
+#define REG_TS_EVENT_3_NANOSEC		0x0578
+#define REG_TS_EVENT_3_SEC		0x057C
+#define REG_TS_EVENT_3_SUB_NANOSEC	0x0580
+
+#define REG_TS_EVENT_4_NANOSEC		0x0584
+#define REG_TS_EVENT_4_SEC		0x0588
+#define REG_TS_EVENT_4_SUB_NANOSEC	0x058C
+
+#define REG_TS_EVENT_5_NANOSEC		0x0590
+#define REG_TS_EVENT_5_SEC		0x0594
+#define REG_TS_EVENT_5_SUB_NANOSEC	0x0598
+
+#define REG_TS_EVENT_6_NANOSEC		0x059C
+#define REG_TS_EVENT_6_SEC		0x05A0
+#define REG_TS_EVENT_6_SUB_NANOSEC	0x05A4
+
+#define REG_TS_EVENT_7_NANOSEC		0x05A8
+#define REG_TS_EVENT_7_SEC		0x05AC
+#define REG_TS_EVENT_7_SUB_NANOSEC	0x05B0
+
+#define TS_EVENT_EDGE_M			0x1
+#define TS_EVENT_EDGE_S			30
+#define TS_EVENT_NANOSEC_M		(BIT(30) - 1)
+
+#define TS_EVENT_SUB_NANOSEC_M		0x7
+
+#define TS_EVENT_SAMPLE			\
+	(REG_TS_EVENT_1_NANOSEC - REG_TS_EVENT_0_NANOSEC)
+
+#define PORT_CTRL_ADDR(port, addr)	((addr) | (((port) + 1) << 12))
+
+#define REG_GLOBAL_RR_INDEX__1		0x0600
+
+/* DLR */
+#define REG_DLR_SRC_PORT__4		0x0604
+
+#define DLR_SRC_PORT_UNICAST		BIT(31)
+#define DLR_SRC_PORT_M			0x3
+#define DLR_SRC_PORT_BOTH		0
+#define DLR_SRC_PORT_EACH		1
+
+#define REG_DLR_IP_ADDR__4		0x0608
+
+#define REG_DLR_CTRL__1			0x0610
+
+#define DLR_RESET_SEQ_ID		BIT(3)
+#define DLR_BACKUP_AUTO_ON		BIT(2)
+#define DLR_BEACON_TX_ENABLE		BIT(1)
+#define DLR_ASSIST_ENABLE		BIT(0)
+
+#define REG_DLR_STATE__1		0x0611
+
+#define DLR_NODE_STATE_M		0x3
+#define DLR_NODE_STATE_S		1
+#define DLR_NODE_STATE_IDLE		0
+#define DLR_NODE_STATE_FAULT		1
+#define DLR_NODE_STATE_NORMAL		2
+#define DLR_RING_STATE_FAULT		0
+#define DLR_RING_STATE_NORMAL		1
+
+#define REG_DLR_PRECEDENCE__1		0x0612
+
+#define REG_DLR_BEACON_INTERVAL__4	0x0614
+
+#define REG_DLR_BEACON_TIMEOUT__4	0x0618
+
+#define REG_DLR_TIMEOUT_WINDOW__4	0x061C
+
+#define DLR_TIMEOUT_WINDOW_M		(BIT(22) - 1)
+
+#define REG_DLR_VLAN_ID__2		0x0620
+
+#define DLR_VLAN_ID_M			(BIT(12) - 1)
+
+#define REG_DLR_DEST_ADDR_0		0x0622
+#define REG_DLR_DEST_ADDR_1		0x0623
+#define REG_DLR_DEST_ADDR_2		0x0624
+#define REG_DLR_DEST_ADDR_3		0x0625
+#define REG_DLR_DEST_ADDR_4		0x0626
+#define REG_DLR_DEST_ADDR_5		0x0627
+
+#define REG_DLR_PORT_MAP__4		0x0628
+
+#define REG_DLR_CLASS__1		0x062C
+
+#define DLR_FRAME_QID_M			0x3
+
+/* HSR */
+#define REG_HSR_PORT_MAP__4		0x0640
+
+#define REG_HSR_ALU_CTRL_0__1		0x0644
+
+#define HSR_DUPLICATE_DISCARD		BIT(7)
+#define HSR_NODE_UNICAST		BIT(6)
+#define HSR_AGE_CNT_DEFAULT_M		0x7
+#define HSR_AGE_CNT_DEFAULT_S		3
+#define HSR_LEARN_MCAST_DISABLE		BIT(2)
+#define HSR_HASH_OPTION_M		0x3
+#define HSR_HASH_DISABLE		0
+#define HSR_HASH_UPPER_BITS		1
+#define HSR_HASH_LOWER_BITS		2
+#define HSR_HASH_XOR_BOTH_BITS		3
+
+#define REG_HSR_ALU_CTRL_1__1		0x0645
+
+#define HSR_LEARN_UCAST_DISABLE		BIT(7)
+#define HSR_FLUSH_TABLE			BIT(5)
+#define HSR_PROC_MCAST_SRC		BIT(3)
+#define HSR_AGING_ENABLE		BIT(2)
+
+#define REG_HSR_ALU_CTRL_2__2		0x0646
+
+#define REG_HSR_ALU_AGE_PERIOD__4	0x0648
+
+#define REG_HSR_ALU_INT_STATUS__1	0x064C
+#define REG_HSR_ALU_INT_MASK__1		0x064D
+
+#define HSR_WINDOW_OVERFLOW_INT		BIT(3)
+#define HSR_LEARN_FAIL_INT		BIT(2)
+#define HSR_ALMOST_FULL_INT		BIT(1)
+#define HSR_WRITE_FAIL_INT		BIT(0)
+
+#define REG_HSR_ALU_ENTRY_0__2		0x0650
+
+#define HSR_ENTRY_INDEX_M		(BIT(10) - 1)
+#define HSR_FAIL_INDEX_M		(BIT(8) - 1)
+
+#define REG_HSR_ALU_ENTRY_1__2		0x0652
+
+#define HSR_FAIL_LEARN_INDEX_M		(BIT(8) - 1)
+
+#define REG_HSR_ALU_ENTRY_3__2		0x0654
+
+#define HSR_CPU_ACCESS_ENTRY_INDEX_M	(BIT(8) - 1)
+
+/* 0 - Operation */
+#define REG_PORT_DEFAULT_VID		0x0000
+
+#define REG_PORT_CUSTOM_VID		0x0002
+#define REG_PORT_AVB_SR_1_VID		0x0004
+#define REG_PORT_AVB_SR_2_VID		0x0006
+
+#define REG_PORT_AVB_SR_1_TYPE		0x0008
+#define REG_PORT_AVB_SR_2_TYPE		0x000A
+
+#define REG_PORT_PME_STATUS		0x0013
+#define REG_PORT_PME_CTRL		0x0017
+
+#define PME_WOL_MAGICPKT		BIT(2)
+#define PME_WOL_LINKUP			BIT(1)
+#define PME_WOL_ENERGY			BIT(0)
+
+#define REG_PORT_INT_STATUS		0x001B
+#define REG_PORT_INT_MASK		0x001F
+
+#define PORT_SGMII_INT			BIT(3)
+#define PORT_PTP_INT			BIT(2)
+#define PORT_PHY_INT			BIT(1)
+#define PORT_ACL_INT			BIT(0)
+
+#define PORT_INT_MASK			\
+	(PORT_SGMII_INT | PORT_PTP_INT | PORT_PHY_INT | PORT_ACL_INT)
+
+#define REG_PORT_CTRL_0			0x0020
+
+#define PORT_MAC_LOOPBACK		BIT(7)
+#define PORT_FORCE_TX_FLOW_CTRL		BIT(4)
+#define PORT_FORCE_RX_FLOW_CTRL		BIT(3)
+#define PORT_TAIL_TAG_ENABLE		BIT(2)
+#define PORT_QUEUE_SPLIT_ENABLE		0x3
+
+#define REG_PORT_CTRL_1			0x0021
+
+#define PORT_SRP_ENABLE			0x3
+
+#define REG_PORT_STATUS_0		0x0030
+
+#define PORT_INTF_SPEED_M		0x3
+#define PORT_INTF_SPEED_S		3
+#define PORT_INTF_FULL_DUPLEX		BIT(2)
+#define PORT_TX_FLOW_CTRL		BIT(1)
+#define PORT_RX_FLOW_CTRL		BIT(0)
+
+#define REG_PORT_STATUS_1		0x0034
+
+/* 1 - PHY */
+#define REG_PORT_PHY_CTRL		0x0100
+
+#define PORT_PHY_RESET			BIT(15)
+#define PORT_PHY_LOOPBACK		BIT(14)
+#define PORT_SPEED_100MBIT		BIT(13)
+#define PORT_AUTO_NEG_ENABLE		BIT(12)
+#define PORT_POWER_DOWN			BIT(11)
+#define PORT_ISOLATE			BIT(10)
+#define PORT_AUTO_NEG_RESTART		BIT(9)
+#define PORT_FULL_DUPLEX		BIT(8)
+#define PORT_COLLISION_TEST		BIT(7)
+#define PORT_SPEED_1000MBIT		BIT(6)
+
+#define REG_PORT_PHY_STATUS		0x0102
+
+#define PORT_100BT4_CAPABLE		BIT(15)
+#define PORT_100BTX_FD_CAPABLE		BIT(14)
+#define PORT_100BTX_CAPABLE		BIT(13)
+#define PORT_10BT_FD_CAPABLE		BIT(12)
+#define PORT_10BT_CAPABLE		BIT(11)
+#define PORT_EXTENDED_STATUS		BIT(8)
+#define PORT_MII_SUPPRESS_CAPABLE	BIT(6)
+#define PORT_AUTO_NEG_ACKNOWLEDGE	BIT(5)
+#define PORT_REMOTE_FAULT		BIT(4)
+#define PORT_AUTO_NEG_CAPABLE		BIT(3)
+#define PORT_LINK_STATUS		BIT(2)
+#define PORT_JABBER_DETECT		BIT(1)
+#define PORT_EXTENDED_CAPABILITY	BIT(0)
+
+#define REG_PORT_PHY_ID_HI		0x0104
+#define REG_PORT_PHY_ID_LO		0x0106
+
+#define KSZ9477_ID_HI			0x0022
+#define KSZ9477_ID_LO			0x1622
+
+#define REG_PORT_PHY_AUTO_NEGOTIATION	0x0108
+
+#define PORT_AUTO_NEG_NEXT_PAGE		BIT(15)
+#define PORT_AUTO_NEG_REMOTE_FAULT	BIT(13)
+#define PORT_AUTO_NEG_ASYM_PAUSE	BIT(11)
+#define PORT_AUTO_NEG_SYM_PAUSE		BIT(10)
+#define PORT_AUTO_NEG_100BT4		BIT(9)
+#define PORT_AUTO_NEG_100BTX_FD		BIT(8)
+#define PORT_AUTO_NEG_100BTX		BIT(7)
+#define PORT_AUTO_NEG_10BT_FD		BIT(6)
+#define PORT_AUTO_NEG_10BT		BIT(5)
+#define PORT_AUTO_NEG_SELECTOR		0x001F
+#define PORT_AUTO_NEG_802_3		0x0001
+
+#define PORT_AUTO_NEG_PAUSE		\
+	(PORT_AUTO_NEG_ASYM_PAUSE | PORT_AUTO_NEG_SYM_PAUSE)
+
+#define REG_PORT_PHY_REMOTE_CAPABILITY	0x010A
+
+#define PORT_REMOTE_NEXT_PAGE		BIT(15)
+#define PORT_REMOTE_ACKNOWLEDGE		BIT(14)
+#define PORT_REMOTE_REMOTE_FAULT	BIT(13)
+#define PORT_REMOTE_ASYM_PAUSE		BIT(11)
+#define PORT_REMOTE_SYM_PAUSE		BIT(10)
+#define PORT_REMOTE_100BTX_FD		BIT(8)
+#define PORT_REMOTE_100BTX		BIT(7)
+#define PORT_REMOTE_10BT_FD		BIT(6)
+#define PORT_REMOTE_10BT		BIT(5)
+
+#define REG_PORT_PHY_1000_CTRL		0x0112
+
+#define PORT_AUTO_NEG_MANUAL		BIT(12)
+#define PORT_AUTO_NEG_MASTER		BIT(11)
+#define PORT_AUTO_NEG_MASTER_PREFERRED	BIT(10)
+#define PORT_AUTO_NEG_1000BT_FD		BIT(9)
+#define PORT_AUTO_NEG_1000BT		BIT(8)
+
+#define REG_PORT_PHY_1000_STATUS	0x0114
+
+#define PORT_MASTER_FAULT		BIT(15)
+#define PORT_LOCAL_MASTER		BIT(14)
+#define PORT_LOCAL_RX_OK		BIT(13)
+#define PORT_REMOTE_RX_OK		BIT(12)
+#define PORT_REMOTE_1000BT_FD		BIT(11)
+#define PORT_REMOTE_1000BT		BIT(10)
+#define PORT_REMOTE_IDLE_CNT_M		0x0F
+
+#define PORT_PHY_1000_STATIC_STATUS	\
+	(PORT_LOCAL_RX_OK |		\
+	PORT_REMOTE_RX_OK |		\
+	PORT_REMOTE_1000BT_FD |		\
+	PORT_REMOTE_1000BT)
+
+#define REG_PORT_PHY_MMD_SETUP		0x011A
+
+#define PORT_MMD_OP_MODE_M		0x3
+#define PORT_MMD_OP_MODE_S		14
+#define PORT_MMD_OP_INDEX		0
+#define PORT_MMD_OP_DATA_NO_INCR	1
+#define PORT_MMD_OP_DATA_INCR_RW	2
+#define PORT_MMD_OP_DATA_INCR_W		3
+#define PORT_MMD_DEVICE_ID_M		0x1F
+
+#define MMD_SETUP(mode, dev)		\
+	(((u16)(mode) << PORT_MMD_OP_MODE_S) | (dev))
+
+#define REG_PORT_PHY_MMD_INDEX_DATA	0x011C
+
+#define MMD_DEVICE_ID_DSP		1
+
+#define MMD_DSP_SQI_CHAN_A		0xAC
+#define MMD_DSP_SQI_CHAN_B		0xAD
+#define MMD_DSP_SQI_CHAN_C		0xAE
+#define MMD_DSP_SQI_CHAN_D		0xAF
+
+#define DSP_SQI_ERR_DETECTED		BIT(15)
+#define DSP_SQI_AVG_ERR			0x7FFF
+
+#define MMD_DEVICE_ID_COMMON		2
+
+#define MMD_DEVICE_ID_EEE_ADV		7
+
+#define MMD_EEE_ADV			0x3C
+#define EEE_ADV_100MBIT			BIT(1)
+#define EEE_ADV_1GBIT			BIT(2)
+
+#define MMD_EEE_LP_ADV			0x3D
+#define MMD_EEE_MSG_CODE		0x3F
+
+#define MMD_DEVICE_ID_AFED		0x1C
+
+#define REG_PORT_PHY_EXTENDED_STATUS	0x011E
+
+#define PORT_100BTX_FD_ABLE		BIT(15)
+#define PORT_100BTX_ABLE		BIT(14)
+#define PORT_10BT_FD_ABLE		BIT(13)
+#define PORT_10BT_ABLE			BIT(12)
+
+#define REG_PORT_SGMII_ADDR__4		0x0200
+#define PORT_SGMII_AUTO_INCR		BIT(23)
+#define PORT_SGMII_DEVICE_ID_M		0x1F
+#define PORT_SGMII_DEVICE_ID_S		16
+#define PORT_SGMII_ADDR_M		(BIT(21) - 1)
+
+#define REG_PORT_SGMII_DATA__4		0x0204
+#define PORT_SGMII_DATA_M		(BIT(16) - 1)
+
+#define MMD_DEVICE_ID_PMA		0x01
+#define MMD_DEVICE_ID_PCS		0x03
+#define MMD_DEVICE_ID_PHY_XS		0x04
+#define MMD_DEVICE_ID_DTE_XS		0x05
+#define MMD_DEVICE_ID_AN		0x07
+#define MMD_DEVICE_ID_VENDOR_CTRL	0x1E
+#define MMD_DEVICE_ID_VENDOR_MII	0x1F
+
+#define SR_MII				MMD_DEVICE_ID_VENDOR_MII
+
+#define MMD_SR_MII_CTRL			0x0000
+
+#define SR_MII_RESET			BIT(15)
+#define SR_MII_LOOPBACK			BIT(14)
+#define SR_MII_SPEED_100MBIT		BIT(13)
+#define SR_MII_AUTO_NEG_ENABLE		BIT(12)
+#define SR_MII_POWER_DOWN		BIT(11)
+#define SR_MII_AUTO_NEG_RESTART		BIT(9)
+#define SR_MII_FULL_DUPLEX		BIT(8)
+#define SR_MII_SPEED_1000MBIT		BIT(6)
+
+#define MMD_SR_MII_STATUS		0x0001
+#define MMD_SR_MII_ID_1			0x0002
+#define MMD_SR_MII_ID_2			0x0003
+#define MMD_SR_MII_AUTO_NEGOTIATION	0x0004
+
+#define SR_MII_AUTO_NEG_NEXT_PAGE	BIT(15)
+#define SR_MII_AUTO_NEG_REMOTE_FAULT_M	0x3
+#define SR_MII_AUTO_NEG_REMOTE_FAULT_S	12
+#define SR_MII_AUTO_NEG_NO_ERROR	0
+#define SR_MII_AUTO_NEG_OFFLINE		1
+#define SR_MII_AUTO_NEG_LINK_FAILURE	2
+#define SR_MII_AUTO_NEG_ERROR		3
+#define SR_MII_AUTO_NEG_PAUSE_M		0x3
+#define SR_MII_AUTO_NEG_PAUSE_S		7
+#define SR_MII_AUTO_NEG_NO_PAUSE	0
+#define SR_MII_AUTO_NEG_ASYM_PAUSE_TX	1
+#define SR_MII_AUTO_NEG_SYM_PAUSE	2
+#define SR_MII_AUTO_NEG_ASYM_PAUSE_RX	3
+#define SR_MII_AUTO_NEG_HALF_DUPLEX	BIT(6)
+#define SR_MII_AUTO_NEG_FULL_DUPLEX	BIT(5)
+
+#define MMD_SR_MII_REMOTE_CAPABILITY	0x0005
+#define MMD_SR_MII_AUTO_NEG_EXP		0x0006
+#define MMD_SR_MII_AUTO_NEG_EXT		0x000F
+
+#define MMD_SR_MII_DIGITAL_CTRL_1	0x8000
+
+#define MMD_SR_MII_AUTO_NEG_CTRL	0x8001
+
+#define SR_MII_8_BIT			BIT(8)
+#define SR_MII_SGMII_LINK_UP		BIT(4)
+#define SR_MII_TX_CFG_PHY_MASTER	BIT(3)
+#define SR_MII_PCS_MODE_M		0x3
+#define SR_MII_PCS_MODE_S		1
+#define SR_MII_PCS_SGMII		2
+#define SR_MII_AUTO_NEG_COMPLETE_INTR	BIT(0)
+
+#define MMD_SR_MII_AUTO_NEG_STATUS	0x8002
+
+#define SR_MII_STAT_LINK_UP		BIT(4)
+#define SR_MII_STAT_M			0x3
+#define SR_MII_STAT_S			2
+#define SR_MII_STAT_10_MBPS		0
+#define SR_MII_STAT_100_MBPS		1
+#define SR_MII_STAT_1000_MBPS		2
+#define SR_MII_STAT_FULL_DUPLEX		BIT(1)
+
+#define MMD_SR_MII_PHY_CTRL		0x80A0
+
+#define SR_MII_PHY_LANE_SEL_M		0xF
+#define SR_MII_PHY_LANE_SEL_S		8
+#define SR_MII_PHY_WRITE		BIT(1)
+#define SR_MII_PHY_START_BUSY		BIT(0)
+
+#define MMD_SR_MII_PHY_ADDR		0x80A1
+
+#define SR_MII_PHY_ADDR_M		(BIT(16) - 1)
+
+#define MMD_SR_MII_PHY_DATA		0x80A2
+
+#define SR_MII_PHY_DATA_M		(BIT(16) - 1)
+
+#define SR_MII_PHY_JTAG_CHIP_ID_HI	0x000C
+#define SR_MII_PHY_JTAG_CHIP_ID_LO	0x000D
+
+#define REG_PORT_PHY_REMOTE_LB_LED	0x0122
+
+#define PORT_REMOTE_LOOPBACK		BIT(8)
+#define PORT_LED_SELECT			(3 << 6)
+#define PORT_LED_CTRL			(3 << 4)
+#define PORT_LED_CTRL_TEST		BIT(3)
+#define PORT_10BT_PREAMBLE		BIT(2)
+#define PORT_LINK_MD_10BT_ENABLE	BIT(1)
+#define PORT_LINK_MD_PASS		BIT(0)
+
+#define REG_PORT_PHY_LINK_MD		0x0124
+
+#define PORT_START_CABLE_DIAG		BIT(15)
+#define PORT_TX_DISABLE			BIT(14)
+#define PORT_CABLE_DIAG_PAIR_M		0x3
+#define PORT_CABLE_DIAG_PAIR_S		12
+#define PORT_CABLE_DIAG_SELECT_M	0x3
+#define PORT_CABLE_DIAG_SELECT_S	10
+#define PORT_CABLE_DIAG_RESULT_M	0x3
+#define PORT_CABLE_DIAG_RESULT_S	8
+#define PORT_CABLE_STAT_NORMAL		0
+#define PORT_CABLE_STAT_OPEN		1
+#define PORT_CABLE_STAT_SHORT		2
+#define PORT_CABLE_STAT_FAILED		3
+#define PORT_CABLE_FAULT_COUNTER	0x00FF
+
+#define REG_PORT_PHY_PMA_STATUS		0x0126
+
+#define PORT_1000_LINK_GOOD		BIT(1)
+#define PORT_100_LINK_GOOD		BIT(0)
+
+#define REG_PORT_PHY_DIGITAL_STATUS	0x0128
+
+#define PORT_LINK_DETECT		BIT(14)
+#define PORT_SIGNAL_DETECT		BIT(13)
+#define PORT_PHY_STAT_MDI		BIT(12)
+#define PORT_PHY_STAT_MASTER		BIT(11)
+
+#define REG_PORT_PHY_RXER_COUNTER	0x012A
+
+#define REG_PORT_PHY_INT_ENABLE		0x0136
+#define REG_PORT_PHY_INT_STATUS		0x0137
+
+#define JABBER_INT			BIT(7)
+#define RX_ERR_INT			BIT(6)
+#define PAGE_RX_INT			BIT(5)
+#define PARALLEL_DETECT_FAULT_INT	BIT(4)
+#define LINK_PARTNER_ACK_INT		BIT(3)
+#define LINK_DOWN_INT			BIT(2)
+#define REMOTE_FAULT_INT		BIT(1)
+#define LINK_UP_INT			BIT(0)
+
+#define REG_PORT_PHY_DIGITAL_DEBUG_1	0x0138
+
+#define PORT_REG_CLK_SPEED_25_MHZ	BIT(14)
+#define PORT_PHY_FORCE_MDI		BIT(7)
+#define PORT_PHY_AUTO_MDIX_DISABLE	BIT(6)
+
+/* Same as PORT_PHY_LOOPBACK */
+#define PORT_PHY_PCS_LOOPBACK		BIT(0)
+
+#define REG_PORT_PHY_DIGITAL_DEBUG_2	0x013A
+
+#define REG_PORT_PHY_DIGITAL_DEBUG_3	0x013C
+
+#define PORT_100BT_FIXED_LATENCY	BIT(15)
+
+#define REG_PORT_PHY_PHY_CTRL		0x013E
+
+#define PORT_INT_PIN_HIGH		BIT(14)
+#define PORT_ENABLE_JABBER		BIT(9)
+#define PORT_STAT_SPEED_1000MBIT	BIT(6)
+#define PORT_STAT_SPEED_100MBIT		BIT(5)
+#define PORT_STAT_SPEED_10MBIT		BIT(4)
+#define PORT_STAT_FULL_DUPLEX		BIT(3)
+
+/* Same as PORT_PHY_STAT_MASTER */
+#define PORT_STAT_MASTER		BIT(2)
+#define PORT_RESET			BIT(1)
+#define PORT_LINK_STATUS_FAIL		BIT(0)
+
+/* 3 - xMII */
+#define REG_PORT_XMII_CTRL_0		0x0300
+
+#define PORT_SGMII_SEL			BIT(7)
+#define PORT_MII_FULL_DUPLEX		BIT(6)
+#define PORT_MII_100MBIT		BIT(4)
+#define PORT_GRXC_ENABLE		BIT(0)
+
+#define REG_PORT_XMII_CTRL_1		0x0301
+
+#define PORT_RMII_CLK_SEL		BIT(7)
+/* S1 */
+#define PORT_MII_1000MBIT_S1		BIT(6)
+/* S2 */
+#define PORT_MII_NOT_1GBIT		BIT(6)
+#define PORT_MII_SEL_EDGE		BIT(5)
+#define PORT_RGMII_ID_IG_ENABLE		BIT(4)
+#define PORT_RGMII_ID_EG_ENABLE		BIT(3)
+#define PORT_MII_MAC_MODE		BIT(2)
+#define PORT_MII_SEL_M			0x3
+/* S1 */
+#define PORT_MII_SEL_S1			0x0
+#define PORT_RMII_SEL_S1		0x1
+#define PORT_GMII_SEL_S1		0x2
+#define PORT_RGMII_SEL_S1		0x3
+/* S2 */
+#define PORT_RGMII_SEL			0x0
+#define PORT_RMII_SEL			0x1
+#define PORT_GMII_SEL			0x2
+#define PORT_MII_SEL			0x3
+
+/* 4 - MAC */
+#define REG_PORT_MAC_CTRL_0		0x0400
+
+#define PORT_BROADCAST_STORM		BIT(1)
+#define PORT_JUMBO_FRAME		BIT(0)
+
+#define REG_PORT_MAC_CTRL_1		0x0401
+
+#define PORT_BACK_PRESSURE		BIT(3)
+#define PORT_PASS_ALL			BIT(0)
+
+#define REG_PORT_MAC_CTRL_2		0x0402
+
+#define PORT_100BT_EEE_DISABLE		BIT(7)
+#define PORT_1000BT_EEE_DISABLE		BIT(6)
+
+#define REG_PORT_MAC_IN_RATE_LIMIT	0x0403
+
+#define PORT_IN_PORT_BASED_S		6
+#define PORT_RATE_PACKET_BASED_S	5
+#define PORT_IN_FLOW_CTRL_S		4
+#define PORT_COUNT_IFG_S		1
+#define PORT_COUNT_PREAMBLE_S		0
+#define PORT_IN_PORT_BASED		BIT(6)
+#define PORT_IN_PACKET_BASED		BIT(5)
+#define PORT_IN_FLOW_CTRL		BIT(4)
+#define PORT_IN_LIMIT_MODE_M		0x3
+#define PORT_IN_LIMIT_MODE_S		2
+#define PORT_IN_ALL			0
+#define PORT_IN_UNICAST			1
+#define PORT_IN_MULTICAST		2
+#define PORT_IN_BROADCAST		3
+#define PORT_COUNT_IFG			BIT(1)
+#define PORT_COUNT_PREAMBLE		BIT(0)
+
+#define REG_PORT_IN_RATE_0		0x0410
+#define REG_PORT_IN_RATE_1		0x0411
+#define REG_PORT_IN_RATE_2		0x0412
+#define REG_PORT_IN_RATE_3		0x0413
+#define REG_PORT_IN_RATE_4		0x0414
+#define REG_PORT_IN_RATE_5		0x0415
+#define REG_PORT_IN_RATE_6		0x0416
+#define REG_PORT_IN_RATE_7		0x0417
+
+#define REG_PORT_OUT_RATE_0		0x0420
+#define REG_PORT_OUT_RATE_1		0x0421
+#define REG_PORT_OUT_RATE_2		0x0422
+#define REG_PORT_OUT_RATE_3		0x0423
+
+#define PORT_RATE_LIMIT_M		(BIT(7) - 1)
+
+/* 5 - MIB Counters */
+#define REG_PORT_MIB_CTRL_STAT__4	0x0500
+
+#define MIB_COUNTER_OVERFLOW		BIT(31)
+#define MIB_COUNTER_VALID		BIT(30)
+#define MIB_COUNTER_READ		BIT(25)
+#define MIB_COUNTER_FLUSH_FREEZE	BIT(24)
+#define MIB_COUNTER_INDEX_M		(BIT(8) - 1)
+#define MIB_COUNTER_INDEX_S		16
+#define MIB_COUNTER_DATA_HI_M		0xF
+
+#define REG_PORT_MIB_DATA		0x0504
+
+/* 6 - ACL */
+#define REG_PORT_ACL_0			0x0600
+
+#define ACL_FIRST_RULE_M		0xF
+
+#define REG_PORT_ACL_1			0x0601
+
+#define ACL_MODE_M			0x3
+#define ACL_MODE_S			4
+#define ACL_MODE_DISABLE		0
+#define ACL_MODE_LAYER_2		1
+#define ACL_MODE_LAYER_3		2
+#define ACL_MODE_LAYER_4		3
+#define ACL_ENABLE_M			0x3
+#define ACL_ENABLE_S			2
+#define ACL_ENABLE_2_COUNT		0
+#define ACL_ENABLE_2_TYPE		1
+#define ACL_ENABLE_2_MAC		2
+#define ACL_ENABLE_2_BOTH		3
+#define ACL_ENABLE_3_IP			1
+#define ACL_ENABLE_3_SRC_DST_COMP	2
+#define ACL_ENABLE_4_PROTOCOL		0
+#define ACL_ENABLE_4_TCP_PORT_COMP	1
+#define ACL_ENABLE_4_UDP_PORT_COMP	2
+#define ACL_ENABLE_4_TCP_SEQN_COMP	3
+#define ACL_SRC				BIT(1)
+#define ACL_EQUAL			BIT(0)
+
+#define REG_PORT_ACL_2			0x0602
+#define REG_PORT_ACL_3			0x0603
+
+#define ACL_MAX_PORT			0xFFFF
+
+#define REG_PORT_ACL_4			0x0604
+#define REG_PORT_ACL_5			0x0605
+
+#define ACL_MIN_PORT			0xFFFF
+#define ACL_IP_ADDR			0xFFFFFFFF
+#define ACL_TCP_SEQNUM			0xFFFFFFFF
+
+#define REG_PORT_ACL_6			0x0606
+
+#define ACL_RESERVED			0xF8
+#define ACL_PORT_MODE_M			0x3
+#define ACL_PORT_MODE_S			1
+#define ACL_PORT_MODE_DISABLE		0
+#define ACL_PORT_MODE_EITHER		1
+#define ACL_PORT_MODE_IN_RANGE		2
+#define ACL_PORT_MODE_OUT_OF_RANGE	3
+
+#define REG_PORT_ACL_7			0x0607
+
+#define ACL_TCP_FLAG_ENABLE		BIT(0)
+
+#define REG_PORT_ACL_8			0x0608
+
+#define ACL_TCP_FLAG_M			0xFF
+
+#define REG_PORT_ACL_9			0x0609
+
+#define ACL_TCP_FLAG			0xFF
+#define ACL_ETH_TYPE			0xFFFF
+#define ACL_IP_M			0xFFFFFFFF
+
+#define REG_PORT_ACL_A			0x060A
+
+#define ACL_PRIO_MODE_M			0x3
+#define ACL_PRIO_MODE_S			6
+#define ACL_PRIO_MODE_DISABLE		0
+#define ACL_PRIO_MODE_HIGHER		1
+#define ACL_PRIO_MODE_LOWER		2
+#define ACL_PRIO_MODE_REPLACE		3
+#define ACL_PRIO_M			KS_PRIO_M
+#define ACL_PRIO_S			3
+#define ACL_VLAN_PRIO_REPLACE		BIT(2)
+#define ACL_VLAN_PRIO_M			KS_PRIO_M
+#define ACL_VLAN_PRIO_HI_M		0x3
+
+#define REG_PORT_ACL_B			0x060B
+
+#define ACL_VLAN_PRIO_LO_M		0x8
+#define ACL_VLAN_PRIO_S			7
+#define ACL_MAP_MODE_M			0x3
+#define ACL_MAP_MODE_S			5
+#define ACL_MAP_MODE_DISABLE		0
+#define ACL_MAP_MODE_OR			1
+#define ACL_MAP_MODE_AND		2
+#define ACL_MAP_MODE_REPLACE		3
+
+#define ACL_CNT_M			(BIT(11) - 1)
+#define ACL_CNT_S			5
+
+#define REG_PORT_ACL_C			0x060C
+
+#define REG_PORT_ACL_D			0x060D
+#define ACL_MSEC_UNIT			BIT(6)
+#define ACL_INTR_MODE			BIT(5)
+#define ACL_PORT_MAP			0x7F
+
+#define REG_PORT_ACL_E			0x060E
+#define REG_PORT_ACL_F			0x060F
+
+#define REG_PORT_ACL_BYTE_EN_MSB	0x0610
+#define REG_PORT_ACL_BYTE_EN_LSB	0x0611
+
+#define ACL_ACTION_START		0xA
+#define ACL_ACTION_LEN			4
+#define ACL_INTR_CNT_START		0xD
+#define ACL_RULESET_START		0xE
+#define ACL_RULESET_LEN			2
+#define ACL_TABLE_LEN			16
+
+#define ACL_ACTION_ENABLE		0x003C
+#define ACL_MATCH_ENABLE		0x7FC3
+#define ACL_RULESET_ENABLE		0x8003
+#define ACL_BYTE_ENABLE			0xFFFF
+
+#define REG_PORT_ACL_CTRL_0		0x0612
+
+#define PORT_ACL_WRITE_DONE		BIT(6)
+#define PORT_ACL_READ_DONE		BIT(5)
+#define PORT_ACL_WRITE			BIT(4)
+#define PORT_ACL_INDEX_M		0xF
+
+#define REG_PORT_ACL_CTRL_1		0x0613
+
+/* 8 - Classification and Policing */
+#define REG_PORT_MRI_MIRROR_CTRL	0x0800
+
+#define PORT_MIRROR_RX			BIT(6)
+#define PORT_MIRROR_TX			BIT(5)
+#define PORT_MIRROR_SNIFFER		BIT(1)
+
+#define REG_PORT_MRI_PRIO_CTRL		0x0801
+
+#define PORT_HIGHEST_PRIO		BIT(7)
+#define PORT_OR_PRIO			BIT(6)
+#define PORT_MAC_PRIO_ENABLE		BIT(4)
+#define PORT_VLAN_PRIO_ENABLE		BIT(3)
+#define PORT_802_1P_PRIO_ENABLE		BIT(2)
+#define PORT_DIFFSERV_PRIO_ENABLE	BIT(1)
+#define PORT_ACL_PRIO_ENABLE		BIT(0)
+
+#define REG_PORT_MRI_MAC_CTRL		0x0802
+
+#define PORT_USER_PRIO_CEILING		BIT(7)
+#define PORT_DROP_NON_VLAN		BIT(4)
+#define PORT_DROP_TAG			BIT(3)
+#define PORT_BASED_PRIO_M		KS_PRIO_M
+#define PORT_BASED_PRIO_S		0
+
+#define REG_PORT_MRI_AUTHEN_CTRL	0x0803
+
+#define PORT_ACL_ENABLE			BIT(2)
+#define PORT_AUTHEN_MODE		0x3
+#define PORT_AUTHEN_PASS		0
+#define PORT_AUTHEN_BLOCK		1
+#define PORT_AUTHEN_TRAP		2
+
+#define REG_PORT_MRI_INDEX__4		0x0804
+
+#define MRI_INDEX_P_M			0x7
+#define MRI_INDEX_P_S			16
+#define MRI_INDEX_Q_M			0x3
+#define MRI_INDEX_Q_S			0
+
+#define REG_PORT_MRI_TC_MAP__4		0x0808
+
+#define PORT_TC_MAP_M			0xf
+#define PORT_TC_MAP_S			4
+
+#define REG_PORT_MRI_POLICE_CTRL__4	0x080C
+
+#define POLICE_DROP_ALL			BIT(10)
+#define POLICE_PACKET_TYPE_M		0x3
+#define POLICE_PACKET_TYPE_S		8
+#define POLICE_PACKET_DROPPED		0
+#define POLICE_PACKET_GREEN		1
+#define POLICE_PACKET_YELLOW		2
+#define POLICE_PACKET_RED		3
+#define PORT_BASED_POLICING		BIT(7)
+#define NON_DSCP_COLOR_M		0x3
+#define NON_DSCP_COLOR_S		5
+#define COLOR_MARK_ENABLE		BIT(4)
+#define COLOR_REMAP_ENABLE		BIT(3)
+#define POLICE_DROP_SRP			BIT(2)
+#define POLICE_COLOR_NOT_AWARE		BIT(1)
+#define POLICE_ENABLE			BIT(0)
+
+#define REG_PORT_POLICE_COLOR_0__4	0x0810
+#define REG_PORT_POLICE_COLOR_1__4	0x0814
+#define REG_PORT_POLICE_COLOR_2__4	0x0818
+#define REG_PORT_POLICE_COLOR_3__4	0x081C
+
+#define POLICE_COLOR_MAP_S		2
+#define POLICE_COLOR_MAP_M		(BIT(POLICE_COLOR_MAP_S) - 1)
+
+#define REG_PORT_POLICE_RATE__4		0x0820
+
+#define POLICE_CIR_S			16
+#define POLICE_PIR_S			0
+
+#define REG_PORT_POLICE_BURST_SIZE__4	0x0824
+
+#define POLICE_BURST_SIZE_M		0x3FFF
+#define POLICE_CBS_S			16
+#define POLICE_PBS_S			0
+
+#define REG_PORT_WRED_PM_CTRL_0__4	0x0830
+
+#define WRED_PM_CTRL_M			(BIT(11) - 1)
+
+#define WRED_PM_MAX_THRESHOLD_S		16
+#define WRED_PM_MIN_THRESHOLD_S		0
+
+#define REG_PORT_WRED_PM_CTRL_1__4	0x0834
+
+#define WRED_PM_MULTIPLIER_S		16
+#define WRED_PM_AVG_QUEUE_SIZE_S	0
+
+#define REG_PORT_WRED_QUEUE_CTRL_0__4	0x0840
+#define REG_PORT_WRED_QUEUE_CTRL_1__4	0x0844
+
+#define REG_PORT_WRED_QUEUE_PMON__4	0x0848
+
+#define WRED_RANDOM_DROP_ENABLE		BIT(31)
+#define WRED_PMON_FLUSH			BIT(30)
+#define WRED_DROP_GYR_DISABLE		BIT(29)
+#define WRED_DROP_YR_DISABLE		BIT(28)
+#define WRED_DROP_R_DISABLE		BIT(27)
+#define WRED_DROP_ALL			BIT(26)
+#define WRED_PMON_M			(BIT(24) - 1)
+
+/* 9 - Shaping */
+
+#define REG_PORT_MTI_QUEUE_INDEX__4	0x0900
+
+#define REG_PORT_MTI_QUEUE_CTRL_0__4	0x0904
+
+#define MTI_PVID_REPLACE		BIT(0)
+
+#define REG_PORT_MTI_QUEUE_CTRL_0	0x0914
+
+#define MTI_SCHEDULE_MODE_M		0x3
+#define MTI_SCHEDULE_MODE_S		6
+#define MTI_SCHEDULE_STRICT_PRIO	0
+#define MTI_SCHEDULE_WRR		2
+#define MTI_SHAPING_M			0x3
+#define MTI_SHAPING_S			4
+#define MTI_SHAPING_OFF			0
+#define MTI_SHAPING_SRP			1
+#define MTI_SHAPING_TIME_AWARE		2
+
+#define REG_PORT_MTI_QUEUE_CTRL_1	0x0915
+
+#define MTI_TX_RATIO_M			(BIT(7) - 1)
+
+#define REG_PORT_MTI_QUEUE_CTRL_2__2	0x0916
+#define REG_PORT_MTI_HI_WATER_MARK	0x0916
+#define REG_PORT_MTI_QUEUE_CTRL_3__2	0x0918
+#define REG_PORT_MTI_LO_WATER_MARK	0x0918
+#define REG_PORT_MTI_QUEUE_CTRL_4__2	0x091A
+#define REG_PORT_MTI_CREDIT_INCREMENT	0x091A
+
+/* A - QM */
+
+#define REG_PORT_QM_CTRL__4		0x0A00
+
+#define PORT_QM_DROP_PRIO_M		0x3
+
+#define REG_PORT_VLAN_MEMBERSHIP__4	0x0A04
+
+#define REG_PORT_QM_QUEUE_INDEX__4	0x0A08
+
+#define PORT_QM_QUEUE_INDEX_S		24
+#define PORT_QM_BURST_SIZE_S		16
+#define PORT_QM_MIN_RESV_SPACE_M	(BIT(11) - 1)
+
+#define REG_PORT_QM_WATER_MARK__4	0x0A0C
+
+#define PORT_QM_HI_WATER_MARK_S		16
+#define PORT_QM_LO_WATER_MARK_S		0
+#define PORT_QM_WATER_MARK_M		(BIT(11) - 1)
+
+#define REG_PORT_QM_TX_CNT_0__4		0x0A10
+
+#define PORT_QM_TX_CNT_USED_S		0
+#define PORT_QM_TX_CNT_M		(BIT(11) - 1)
+
+#define REG_PORT_QM_TX_CNT_1__4		0x0A14
+
+#define PORT_QM_TX_CNT_CALCULATED_S	16
+#define PORT_QM_TX_CNT_AVAIL_S		0
+
+/* B - LUE */
+#define REG_PORT_LUE_CTRL		0x0B00
+
+#define PORT_VLAN_LOOKUP_VID_0		BIT(7)
+#define PORT_INGRESS_FILTER		BIT(6)
+#define PORT_DISCARD_NON_VID		BIT(5)
+#define PORT_MAC_BASED_802_1X		BIT(4)
+#define PORT_SRC_ADDR_FILTER		BIT(3)
+
+#define REG_PORT_LUE_MSTP_INDEX		0x0B01
+
+#define REG_PORT_LUE_MSTP_STATE		0x0B04
+
+#define PORT_TX_ENABLE			BIT(2)
+#define PORT_RX_ENABLE			BIT(1)
+#define PORT_LEARN_DISABLE		BIT(0)
+
+/* C - PTP */
+
+#define REG_PTP_PORT_RX_DELAY__2	0x0C00
+#define REG_PTP_PORT_TX_DELAY__2	0x0C02
+#define REG_PTP_PORT_ASYM_DELAY__2	0x0C04
+
+#define REG_PTP_PORT_XDELAY_TS		0x0C08
+#define REG_PTP_PORT_XDELAY_TS_H	0x0C08
+#define REG_PTP_PORT_XDELAY_TS_L	0x0C0A
+
+#define REG_PTP_PORT_SYNC_TS		0x0C0C
+#define REG_PTP_PORT_SYNC_TS_H		0x0C0C
+#define REG_PTP_PORT_SYNC_TS_L		0x0C0E
+
+#define REG_PTP_PORT_PDRESP_TS		0x0C10
+#define REG_PTP_PORT_PDRESP_TS_H	0x0C10
+#define REG_PTP_PORT_PDRESP_TS_L	0x0C12
+
+#define REG_PTP_PORT_TX_INT_STATUS__2	0x0C14
+#define REG_PTP_PORT_TX_INT_ENABLE__2	0x0C16
+
+#define PTP_PORT_SYNC_INT		BIT(15)
+#define PTP_PORT_XDELAY_REQ_INT		BIT(14)
+#define PTP_PORT_PDELAY_RESP_INT	BIT(13)
+
+#define REG_PTP_PORT_LINK_DELAY__4	0x0C18
+
+#define PRIO_QUEUES			4
+#define RX_PRIO_QUEUES			8
+
+#define KS_PRIO_IN_REG			2
+
+#define TOTAL_PORT_NUM			7
+
+#define KSZ9477_COUNTER_NUM		0x20
+#define TOTAL_KSZ9477_COUNTER_NUM	(KSZ9477_COUNTER_NUM + 2 + 2)
+
+#define SWITCH_COUNTER_NUM		KSZ9477_COUNTER_NUM
+#define TOTAL_SWITCH_COUNTER_NUM	TOTAL_KSZ9477_COUNTER_NUM
+
+#define P_BCAST_STORM_CTRL		REG_PORT_MAC_CTRL_0
+#define P_PRIO_CTRL			REG_PORT_MRI_PRIO_CTRL
+#define P_MIRROR_CTRL			REG_PORT_MRI_MIRROR_CTRL
+#define P_STP_CTRL			REG_PORT_LUE_MSTP_STATE
+#define P_PHY_CTRL			REG_PORT_PHY_CTRL
+#define P_NEG_RESTART_CTRL		REG_PORT_PHY_CTRL
+#define P_LINK_STATUS			REG_PORT_PHY_STATUS
+#define P_SPEED_STATUS			REG_PORT_PHY_PHY_CTRL
+#define P_RATE_LIMIT_CTRL		REG_PORT_MAC_IN_RATE_LIMIT
+
+#define S_LINK_AGING_CTRL		REG_SW_LUE_CTRL_1
+#define S_MIRROR_CTRL			REG_SW_MRI_CTRL_0
+#define S_REPLACE_VID_CTRL		REG_SW_MAC_CTRL_2
+#define S_802_1P_PRIO_CTRL		REG_SW_MAC_802_1P_MAP_0
+#define S_TOS_PRIO_CTRL			REG_SW_MAC_TOS_PRIO_0
+#define S_FLUSH_TABLE_CTRL		REG_SW_LUE_CTRL_1
+
+#define SW_FLUSH_DYN_MAC_TABLE		SW_FLUSH_MSTP_TABLE
+
+#define MAX_TIMESTAMP_UNIT		2
+#define MAX_TRIG_UNIT			3
+#define MAX_TIMESTAMP_EVENT_UNIT	8
+#define MAX_GPIO			4
+
+#define PTP_TRIG_UNIT_M			(BIT(MAX_TRIG_UNIT) - 1)
+#define PTP_TS_UNIT_M			(BIT(MAX_TIMESTAMP_UNIT) - 1)
+
+/* Driver set switch broadcast storm protection at 10% rate. */
+#define BROADCAST_STORM_PROT_RATE	10
+
+/* 148,800 frames * 67 ms / 100 */
+#define BROADCAST_STORM_VALUE		9969
+
+#endif /* KSZ9477_REGS_H */
diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
new file mode 100644
index 000000000000..b313ecdf2919
--- /dev/null
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -0,0 +1,1279 @@
+/*
+ * Microchip switch driver main logic
+ *
+ * Copyright (C) 2017
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <linux/delay.h>
+#include <linux/export.h>
+#include <linux/gpio.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_data/microchip-ksz.h>
+#include <linux/phy.h>
+#include <linux/etherdevice.h>
+#include <linux/if_bridge.h>
+#include <net/dsa.h>
+#include <net/switchdev.h>
+
+#include "ksz_priv.h"
+
+static const struct {
+	int index;
+	char string[ETH_GSTRING_LEN];
+} mib_names[TOTAL_SWITCH_COUNTER_NUM] = {
+	{ 0x00, "rx_hi" },
+	{ 0x01, "rx_undersize" },
+	{ 0x02, "rx_fragments" },
+	{ 0x03, "rx_oversize" },
+	{ 0x04, "rx_jabbers" },
+	{ 0x05, "rx_symbol_err" },
+	{ 0x06, "rx_crc_err" },
+	{ 0x07, "rx_align_err" },
+	{ 0x08, "rx_mac_ctrl" },
+	{ 0x09, "rx_pause" },
+	{ 0x0A, "rx_bcast" },
+	{ 0x0B, "rx_mcast" },
+	{ 0x0C, "rx_ucast" },
+	{ 0x0D, "rx_64_or_less" },
+	{ 0x0E, "rx_65_127" },
+	{ 0x0F, "rx_128_255" },
+	{ 0x10, "rx_256_511" },
+	{ 0x11, "rx_512_1023" },
+	{ 0x12, "rx_1024_1522" },
+	{ 0x13, "rx_1523_2000" },
+	{ 0x14, "rx_2001" },
+	{ 0x15, "tx_hi" },
+	{ 0x16, "tx_late_col" },
+	{ 0x17, "tx_pause" },
+	{ 0x18, "tx_bcast" },
+	{ 0x19, "tx_mcast" },
+	{ 0x1A, "tx_ucast" },
+	{ 0x1B, "tx_deferred" },
+	{ 0x1C, "tx_total_col" },
+	{ 0x1D, "tx_exc_col" },
+	{ 0x1E, "tx_single_col" },
+	{ 0x1F, "tx_mult_col" },
+	{ 0x80, "rx_total" },
+	{ 0x81, "tx_total" },
+	{ 0x82, "rx_discards" },
+	{ 0x83, "tx_discards" },
+};
+
+static void ksz_cfg(struct ksz_device *dev, u32 addr, u8 bits, bool set)
+{
+	u8 data;
+
+	ksz_read8(dev, addr, &data);
+	if (set)
+		data |= bits;
+	else
+		data &= ~bits;
+	ksz_write8(dev, addr, data);
+}
+
+static void ksz_cfg32(struct ksz_device *dev, u32 addr, u32 bits, bool set)
+{
+	u32 data;
+
+	ksz_read32(dev, addr, &data);
+	if (set)
+		data |= bits;
+	else
+		data &= ~bits;
+	ksz_write32(dev, addr, data);
+}
+
+static void ksz_port_cfg(struct ksz_device *dev, int port, int offset, u8 bits,
+			 bool set)
+{
+	u32 addr;
+	u8 data;
+
+	addr = PORT_CTRL_ADDR(port, offset);
+	ksz_read8(dev, addr, &data);
+
+	if (set)
+		data |= bits;
+	else
+		data &= ~bits;
+
+	ksz_write8(dev, addr, data);
+}
+
+static void ksz_port_cfg32(struct ksz_device *dev, int port, int offset,
+			   u32 bits, bool set)
+{
+	u32 addr;
+	u32 data;
+
+	addr = PORT_CTRL_ADDR(port, offset);
+	ksz_read32(dev, addr, &data);
+
+	if (set)
+		data |= bits;
+	else
+		data &= ~bits;
+
+	ksz_write32(dev, addr, data);
+}
+
+static int wait_vlan_ctrl_ready(struct ksz_device *dev, u32 waiton, int timeout)
+{
+	u8 data;
+
+	do {
+		ksz_read8(dev, REG_SW_VLAN_CTRL, &data);
+		if (!(data & waiton))
+			break;
+		usleep_range(1, 10);
+	} while (timeout-- > 0);
+
+	if (timeout <= 0)
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
+static int get_vlan_table(struct dsa_switch *ds, u16 vid, u32 *vlan_table)
+{
+	struct ksz_device *dev = ds->priv;
+	int ret;
+
+	mutex_lock(&dev->vlan_mutex);
+
+	ksz_write16(dev, REG_SW_VLAN_ENTRY_INDEX__2, vid & VLAN_INDEX_M);
+	ksz_write8(dev, REG_SW_VLAN_CTRL, VLAN_READ | VLAN_START);
+
+	/* wait to be cleared */
+	ret = wait_vlan_ctrl_ready(dev, VLAN_START, 1000);
+	if (ret < 0) {
+		dev_dbg(dev->dev, "Failed to read vlan table\n");
+		goto exit;
+	}
+
+	ksz_read32(dev, REG_SW_VLAN_ENTRY__4, &vlan_table[0]);
+	ksz_read32(dev, REG_SW_VLAN_ENTRY_UNTAG__4, &vlan_table[1]);
+	ksz_read32(dev, REG_SW_VLAN_ENTRY_PORTS__4, &vlan_table[2]);
+
+	ksz_write8(dev, REG_SW_VLAN_CTRL, 0);
+
+exit:
+	mutex_unlock(&dev->vlan_mutex);
+
+	return ret;
+}
+
+static int set_vlan_table(struct dsa_switch *ds, u16 vid, u32 *vlan_table)
+{
+	struct ksz_device *dev = ds->priv;
+	int ret;
+
+	mutex_lock(&dev->vlan_mutex);
+
+	ksz_write32(dev, REG_SW_VLAN_ENTRY__4, vlan_table[0]);
+	ksz_write32(dev, REG_SW_VLAN_ENTRY_UNTAG__4, vlan_table[1]);
+	ksz_write32(dev, REG_SW_VLAN_ENTRY_PORTS__4, vlan_table[2]);
+
+	ksz_write16(dev, REG_SW_VLAN_ENTRY_INDEX__2, vid & VLAN_INDEX_M);
+	ksz_write8(dev, REG_SW_VLAN_CTRL, VLAN_START | VLAN_WRITE);
+
+	/* wait to be cleared */
+	ret = wait_vlan_ctrl_ready(dev, VLAN_START, 1000);
+	if (ret < 0) {
+		dev_dbg(dev->dev, "Failed to write vlan table\n");
+		goto exit;
+	}
+
+	ksz_write8(dev, REG_SW_VLAN_CTRL, 0);
+
+	/* update vlan cache table */
+	dev->vlan_cache[vid].table[0] = vlan_table[0];
+	dev->vlan_cache[vid].table[1] = vlan_table[1];
+	dev->vlan_cache[vid].table[2] = vlan_table[2];
+
+exit:
+	mutex_unlock(&dev->vlan_mutex);
+
+	return ret;
+}
+
+static void read_table(struct dsa_switch *ds, u32 *table)
+{
+	struct ksz_device *dev = ds->priv;
+
+	ksz_read32(dev, REG_SW_ALU_VAL_A, &table[0]);
+	ksz_read32(dev, REG_SW_ALU_VAL_B, &table[1]);
+	ksz_read32(dev, REG_SW_ALU_VAL_C, &table[2]);
+	ksz_read32(dev, REG_SW_ALU_VAL_D, &table[3]);
+}
+
+static void write_table(struct dsa_switch *ds, u32 *table)
+{
+	struct ksz_device *dev = ds->priv;
+
+	ksz_write32(dev, REG_SW_ALU_VAL_A, table[0]);
+	ksz_write32(dev, REG_SW_ALU_VAL_B, table[1]);
+	ksz_write32(dev, REG_SW_ALU_VAL_C, table[2]);
+	ksz_write32(dev, REG_SW_ALU_VAL_D, table[3]);
+}
+
+static int wait_alu_ready(struct ksz_device *dev, u32 waiton, int timeout)
+{
+	u32 data;
+
+	do {
+		ksz_read32(dev, REG_SW_ALU_CTRL__4, &data);
+		if (!(data & waiton))
+			break;
+		usleep_range(1, 10);
+	} while (timeout-- > 0);
+
+	if (timeout <= 0)
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
+static int wait_alu_sta_ready(struct ksz_device *dev, u32 waiton, int timeout)
+{
+	u32 data;
+
+	do {
+		ksz_read32(dev, REG_SW_ALU_STAT_CTRL__4, &data);
+		if (!(data & waiton))
+			break;
+		usleep_range(1, 10);
+	} while (timeout-- > 0);
+
+	if (timeout <= 0)
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
+static int ksz_reset_switch(struct dsa_switch *ds)
+{
+	struct ksz_device *dev = ds->priv;
+	u8 data8;
+	u16 data16;
+	u32 data32;
+
+	/* reset switch */
+	ksz_cfg(dev, REG_SW_OPERATION, SW_RESET, true);
+
+	/* turn off SPI DO Edge select */
+	ksz_read8(dev, REG_SW_GLOBAL_SERIAL_CTRL_0, &data8);
+	data8 &= ~SPI_AUTO_EDGE_DETECTION;
+	ksz_write8(dev, REG_SW_GLOBAL_SERIAL_CTRL_0, data8);
+
+	/* default configuration */
+	ksz_read8(dev, REG_SW_LUE_CTRL_1, &data8);
+	data8 = SW_AGING_ENABLE | SW_LINK_AUTO_AGING |
+	      SW_SRC_ADDR_FILTER | SW_FLUSH_STP_TABLE | SW_FLUSH_MSTP_TABLE;
+	ksz_write8(dev, REG_SW_LUE_CTRL_1, data8);
+
+	/* disable interrupts */
+	ksz_write32(dev, REG_SW_INT_MASK__4, SWITCH_INT_MASK);
+	ksz_write32(dev, REG_SW_PORT_INT_MASK__4, 0x7F);
+	ksz_read32(dev, REG_SW_PORT_INT_STATUS__4, &data32);
+
+	/* set broadcast storm protection 10% rate */
+	ksz_read16(dev, REG_SW_MAC_CTRL_2, &data16);
+	data16 &= ~BROADCAST_STORM_RATE;
+	data16 |= (BROADCAST_STORM_VALUE * BROADCAST_STORM_PROT_RATE) / 100;
+	ksz_write16(dev, REG_SW_MAC_CTRL_2, data16);
+
+	return 0;
+}
+
+static void port_setup(struct ksz_device *dev, int port, bool cpu_port)
+{
+	u8 data8;
+	u16 data16;
+
+	/* enable tag tail for host port */
+	if (cpu_port)
+		ksz_port_cfg(dev, port, REG_PORT_CTRL_0, PORT_TAIL_TAG_ENABLE,
+			     true);
+
+	ksz_port_cfg(dev, port, REG_PORT_CTRL_0, PORT_MAC_LOOPBACK, false);
+
+	/* set back pressure */
+	ksz_port_cfg(dev, port, REG_PORT_MAC_CTRL_1, PORT_BACK_PRESSURE, true);
+
+	/* set flow control */
+	ksz_port_cfg(dev, port, REG_PORT_CTRL_0,
+		     PORT_FORCE_TX_FLOW_CTRL | PORT_FORCE_RX_FLOW_CTRL, true);
+
+	/* enable broadcast storm limit */
+	ksz_port_cfg(dev, port, P_BCAST_STORM_CTRL, PORT_BROADCAST_STORM, true);
+
+	/* disable DiffServ priority */
+	ksz_port_cfg(dev, port, P_PRIO_CTRL, PORT_DIFFSERV_PRIO_ENABLE, false);
+
+	/* replace priority */
+	ksz_port_cfg(dev, port, REG_PORT_MRI_MAC_CTRL, PORT_USER_PRIO_CEILING,
+		     false);
+	ksz_port_cfg32(dev, port, REG_PORT_MTI_QUEUE_CTRL_0__4,
+		       MTI_PVID_REPLACE, false);
+
+	/* enable 802.1p priority */
+	ksz_port_cfg(dev, port, P_PRIO_CTRL, PORT_802_1P_PRIO_ENABLE, true);
+
+	/* configure MAC to 1G & RGMII mode */
+	ksz_pread8(dev, port, REG_PORT_XMII_CTRL_1, &data8);
+	data8 |= PORT_RGMII_ID_EG_ENABLE;
+	data8 &= ~PORT_MII_NOT_1GBIT;
+	data8 &= ~PORT_MII_SEL_M;
+	data8 |= PORT_RGMII_SEL;
+	ksz_pwrite8(dev, port, REG_PORT_XMII_CTRL_1, data8);
+
+	/* clear pending interrupts */
+	ksz_pread16(dev, port, REG_PORT_PHY_INT_ENABLE, &data16);
+}
+
+static void ksz_config_cpu_port(struct dsa_switch *ds)
+{
+	struct ksz_device *dev = ds->priv;
+	int i;
+
+	ds->num_ports = dev->port_cnt;
+
+	for (i = 0; i < ds->num_ports; i++) {
+		if (dsa_is_cpu_port(ds, i) && (dev->cpu_ports & (1 << i))) {
+			dev->cpu_port = i;
+
+			/* enable cpu port */
+			port_setup(dev, i, true);
+		}
+	}
+}
+
+static int ksz_setup(struct dsa_switch *ds)
+{
+	struct ksz_device *dev = ds->priv;
+	int ret = 0;
+
+	dev->vlan_cache = devm_kcalloc(dev->dev, sizeof(struct vlan_table),
+				       dev->num_vlans, GFP_KERNEL);
+	if (!dev->vlan_cache)
+		return -ENOMEM;
+
+	ret = ksz_reset_switch(ds);
+	if (ret) {
+		dev_err(ds->dev, "failed to reset switch\n");
+		return ret;
+	}
+
+	/* accept packet up to 2000bytes */
+	ksz_cfg(dev, REG_SW_MAC_CTRL_1, SW_LEGAL_PACKET_DISABLE, true);
+
+	ksz_config_cpu_port(ds);
+
+	ksz_cfg(dev, REG_SW_MAC_CTRL_1, MULTICAST_STORM_DISABLE, true);
+
+	/* queue based egress rate limit */
+	ksz_cfg(dev, REG_SW_MAC_CTRL_5, SW_OUT_RATE_LIMIT_QUEUE_BASED, true);
+
+	/* start switch */
+	ksz_cfg(dev, REG_SW_OPERATION, SW_START, true);
+
+	return 0;
+}
+
+static enum dsa_tag_protocol ksz_get_tag_protocol(struct dsa_switch *ds)
+{
+	return DSA_TAG_PROTO_KSZ;
+}
+
+static int ksz_phy_read16(struct dsa_switch *ds, int addr, int reg)
+{
+	struct ksz_device *dev = ds->priv;
+	u16 val = 0;
+
+	ksz_pread16(dev, addr, 0x100 + (reg << 1), &val);
+
+	return val;
+}
+
+static int ksz_phy_write16(struct dsa_switch *ds, int addr, int reg, u16 val)
+{
+	struct ksz_device *dev = ds->priv;
+
+	ksz_pwrite16(dev, addr, 0x100 + (reg << 1), val);
+
+	return 0;
+}
+
+static int ksz_enable_port(struct dsa_switch *ds, int port,
+			   struct phy_device *phy)
+{
+	struct ksz_device *dev = ds->priv;
+
+	/* setup slave port */
+	port_setup(dev, port, false);
+
+	return 0;
+}
+
+static void ksz_disable_port(struct dsa_switch *ds, int port,
+			     struct phy_device *phy)
+{
+	struct ksz_device *dev = ds->priv;
+
+	/* there is no port disable */
+	ksz_port_cfg(dev, port, REG_PORT_CTRL_0, PORT_MAC_LOOPBACK, true);
+}
+
+static int ksz_sset_count(struct dsa_switch *ds)
+{
+	return TOTAL_SWITCH_COUNTER_NUM;
+}
+
+static void ksz_get_strings(struct dsa_switch *ds, int port, uint8_t *buf)
+{
+	int i;
+
+	for (i = 0; i < TOTAL_SWITCH_COUNTER_NUM; i++) {
+		memcpy(buf + i * ETH_GSTRING_LEN, mib_names[i].string,
+		       ETH_GSTRING_LEN);
+	}
+}
+
+static void ksz_get_ethtool_stats(struct dsa_switch *ds, int port,
+				  uint64_t *buf)
+{
+	struct ksz_device *dev = ds->priv;
+	int i;
+	u32 data;
+	int timeout;
+
+	mutex_lock(&dev->stats_mutex);
+
+	for (i = 0; i < TOTAL_SWITCH_COUNTER_NUM; i++) {
+		data = MIB_COUNTER_READ;
+		data |= ((mib_names[i].index & 0xFF) << MIB_COUNTER_INDEX_S);
+		ksz_pwrite32(dev, port, REG_PORT_MIB_CTRL_STAT__4, data);
+
+		timeout = 1000;
+		do {
+			ksz_pread32(dev, port, REG_PORT_MIB_CTRL_STAT__4,
+				    &data);
+			usleep_range(1, 10);
+			if (!(data & MIB_COUNTER_READ))
+				break;
+		} while (timeout-- > 0);
+
+		/* failed to read MIB. get out of loop */
+		if (!timeout) {
+			dev_dbg(dev->dev, "Failed to get MIB\n");
+			break;
+		}
+
+		/* count resets upon read */
+		ksz_pread32(dev, port, REG_PORT_MIB_DATA, &data);
+
+		dev->mib_value[i] += (uint64_t)data;
+		buf[i] = dev->mib_value[i];
+	}
+
+	mutex_unlock(&dev->stats_mutex);
+}
+
+static void ksz_port_stp_state_set(struct dsa_switch *ds, int port, u8 state)
+{
+	struct ksz_device *dev = ds->priv;
+	u8 data;
+
+	ksz_pread8(dev, port, P_STP_CTRL, &data);
+	data &= ~(PORT_TX_ENABLE | PORT_RX_ENABLE | PORT_LEARN_DISABLE);
+
+	switch (state) {
+	case BR_STATE_DISABLED:
+		data |= PORT_LEARN_DISABLE;
+		break;
+	case BR_STATE_LISTENING:
+		data |= (PORT_RX_ENABLE | PORT_LEARN_DISABLE);
+		break;
+	case BR_STATE_LEARNING:
+		data |= PORT_RX_ENABLE;
+		break;
+	case BR_STATE_FORWARDING:
+		data |= (PORT_TX_ENABLE | PORT_RX_ENABLE);
+		break;
+	case BR_STATE_BLOCKING:
+		data |= PORT_LEARN_DISABLE;
+		break;
+	default:
+		dev_err(ds->dev, "invalid STP state: %d\n", state);
+		return;
+	}
+
+	ksz_pwrite8(dev, port, P_STP_CTRL, data);
+}
+
+static void ksz_port_fast_age(struct dsa_switch *ds, int port)
+{
+	struct ksz_device *dev = ds->priv;
+	u8 data8;
+
+	ksz_read8(dev, REG_SW_LUE_CTRL_1, &data8);
+	data8 |= SW_FAST_AGING;
+	ksz_write8(dev, REG_SW_LUE_CTRL_1, data8);
+
+	data8 &= ~SW_FAST_AGING;
+	ksz_write8(dev, REG_SW_LUE_CTRL_1, data8);
+}
+
+static int ksz_port_vlan_filtering(struct dsa_switch *ds, int port, bool flag)
+{
+	struct ksz_device *dev = ds->priv;
+
+	if (flag) {
+		ksz_port_cfg(dev, port, REG_PORT_LUE_CTRL,
+			     PORT_VLAN_LOOKUP_VID_0, true);
+		ksz_cfg32(dev, REG_SW_QM_CTRL__4, UNICAST_VLAN_BOUNDARY, true);
+		ksz_cfg(dev, REG_SW_LUE_CTRL_0, SW_VLAN_ENABLE, true);
+	} else {
+		ksz_cfg(dev, REG_SW_LUE_CTRL_0, SW_VLAN_ENABLE, false);
+		ksz_cfg32(dev, REG_SW_QM_CTRL__4, UNICAST_VLAN_BOUNDARY, false);
+		ksz_port_cfg(dev, port, REG_PORT_LUE_CTRL,
+			     PORT_VLAN_LOOKUP_VID_0, false);
+	}
+
+	return 0;
+}
+
+static int ksz_port_vlan_prepare(struct dsa_switch *ds, int port,
+				 const struct switchdev_obj_port_vlan *vlan,
+				 struct switchdev_trans *trans)
+{
+	/* nothing needed */
+
+	return 0;
+}
+
+static void ksz_port_vlan_add(struct dsa_switch *ds, int port,
+			      const struct switchdev_obj_port_vlan *vlan,
+			      struct switchdev_trans *trans)
+{
+	struct ksz_device *dev = ds->priv;
+	u32 vlan_table[3];
+	u16 vid;
+	bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED;
+
+	for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) {
+		if (get_vlan_table(ds, vid, vlan_table)) {
+			dev_dbg(dev->dev, "Failed to get vlan table\n");
+			return;
+		}
+
+		vlan_table[0] = VLAN_VALID | (vid & VLAN_FID_M);
+		if (untagged)
+			vlan_table[1] |= BIT(port);
+		else
+			vlan_table[1] &= ~BIT(port);
+		vlan_table[1] &= ~(BIT(dev->cpu_port));
+
+		vlan_table[2] |= BIT(port) | BIT(dev->cpu_port);
+
+		if (set_vlan_table(ds, vid, vlan_table)) {
+			dev_dbg(dev->dev, "Failed to set vlan table\n");
+			return;
+		}
+
+		/* change PVID */
+		if (vlan->flags & BRIDGE_VLAN_INFO_PVID)
+			ksz_pwrite16(dev, port, REG_PORT_DEFAULT_VID, vid);
+	}
+}
+
+static int ksz_port_vlan_del(struct dsa_switch *ds, int port,
+			     const struct switchdev_obj_port_vlan *vlan)
+{
+	struct ksz_device *dev = ds->priv;
+	bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED;
+	u32 vlan_table[3];
+	u16 vid;
+	u16 pvid;
+
+	ksz_pread16(dev, port, REG_PORT_DEFAULT_VID, &pvid);
+	pvid = pvid & 0xFFF;
+
+	for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) {
+		if (get_vlan_table(ds, vid, vlan_table)) {
+			dev_dbg(dev->dev, "Failed to get vlan table\n");
+			return -ETIMEDOUT;
+		}
+
+		vlan_table[2] &= ~BIT(port);
+
+		if (pvid == vid)
+			pvid = 1;
+
+		if (untagged)
+			vlan_table[1] &= ~BIT(port);
+
+		if (set_vlan_table(ds, vid, vlan_table)) {
+			dev_dbg(dev->dev, "Failed to set vlan table\n");
+			return -ETIMEDOUT;
+		}
+	}
+
+	ksz_pwrite16(dev, port, REG_PORT_DEFAULT_VID, pvid);
+
+	return 0;
+}
+
+static int ksz_port_vlan_dump(struct dsa_switch *ds, int port,
+			      struct switchdev_obj_port_vlan *vlan,
+			      switchdev_obj_dump_cb_t *cb)
+{
+	struct ksz_device *dev = ds->priv;
+	u16 vid;
+	u16 data;
+	struct vlan_table *vlan_cache;
+	int err = 0;
+
+	mutex_lock(&dev->vlan_mutex);
+
+	/* use dev->vlan_cache due to lack of searching valid vlan entry */
+	for (vid = vlan->vid_begin; vid < dev->num_vlans; vid++) {
+		vlan_cache = &dev->vlan_cache[vid];
+
+		if (!(vlan_cache->table[0] & VLAN_VALID))
+			continue;
+
+		vlan->vid_begin = vid;
+		vlan->vid_end = vid;
+		vlan->flags = 0;
+		if (vlan_cache->table[2] & BIT(port)) {
+			if (vlan_cache->table[1] & BIT(port))
+				vlan->flags |= BRIDGE_VLAN_INFO_UNTAGGED;
+			ksz_pread16(dev, port, REG_PORT_DEFAULT_VID, &data);
+			if (vid == (data & 0xFFFFF))
+				vlan->flags |= BRIDGE_VLAN_INFO_PVID;
+
+			err = cb(&vlan->obj);
+			if (err)
+				break;
+		}
+	}
+
+	mutex_unlock(&dev->vlan_mutex);
+
+	return err;
+}
+
+static int ksz_port_fdb_prepare(struct dsa_switch *ds, int port,
+				const struct switchdev_obj_port_fdb *fdb,
+				struct switchdev_trans *trans)
+{
+	/* nothing needed */
+
+	return 0;
+}
+
+struct alu_struct {
+	/* entry 1 */
+	u8	is_static:1;
+	u8	is_src_filter:1;
+	u8	is_dst_filter:1;
+	u8	prio_age:3;
+	u32	_reserv_0_1:23;
+	u8	mstp:3;
+	/* entry 2 */
+	u8	is_override:1;
+	u8	is_use_fid:1;
+	u32	_reserv_1_1:23;
+	u8	port_forward:7;
+	/* entry 3 & 4*/
+	u32	_reserv_2_1:9;
+	u8	fid:7;
+	u8	mac[ETH_ALEN];
+};
+
+static void ksz_port_fdb_add(struct dsa_switch *ds, int port,
+			     const struct switchdev_obj_port_fdb *fdb,
+			     struct switchdev_trans *trans)
+{
+	struct ksz_device *dev = ds->priv;
+	u32 alu_table[4];
+	u32 data;
+
+	mutex_lock(&dev->alu_mutex);
+
+	/* find any entry with mac & vid */
+	data = fdb->vid << ALU_FID_INDEX_S;
+	data |= ((fdb->addr[0] << 8) | fdb->addr[1]);
+	ksz_write32(dev, REG_SW_ALU_INDEX_0, data);
+
+	data = ((fdb->addr[2] << 24) | (fdb->addr[3] << 16));
+	data |= ((fdb->addr[4] << 8) | fdb->addr[5]);
+	ksz_write32(dev, REG_SW_ALU_INDEX_1, data);
+
+	/* start read operation */
+	ksz_write32(dev, REG_SW_ALU_CTRL__4, ALU_READ | ALU_START);
+
+	/* wait to be finished */
+	if (wait_alu_ready(dev, ALU_START, 1000) < 0) {
+		dev_dbg(dev->dev, "Failed to read ALU\n");
+		goto exit;
+	}
+
+	/* read ALU entry */
+	read_table(ds, alu_table);
+
+	/* update ALU entry */
+	alu_table[0] = ALU_V_STATIC_VALID;
+	alu_table[1] |= BIT(port);
+	if (fdb->vid)
+		alu_table[1] |= ALU_V_USE_FID;
+	alu_table[2] = (fdb->vid << ALU_V_FID_S);
+	alu_table[2] |= ((fdb->addr[0] << 8) | fdb->addr[1]);
+	alu_table[3] = ((fdb->addr[2] << 24) | (fdb->addr[3] << 16));
+	alu_table[3] |= ((fdb->addr[4] << 8) | fdb->addr[5]);
+
+	write_table(ds, alu_table);
+
+	ksz_write32(dev, REG_SW_ALU_CTRL__4, ALU_WRITE | ALU_START);
+
+	/* wait to be finished */
+	if (wait_alu_ready(dev, ALU_START, 1000) < 0)
+		dev_dbg(dev->dev, "Failed to read ALU\n");
+
+exit:
+	mutex_unlock(&dev->alu_mutex);
+}
+
+static int ksz_port_fdb_del(struct dsa_switch *ds, int port,
+			    const struct switchdev_obj_port_fdb *fdb)
+{
+	struct ksz_device *dev = ds->priv;
+	u32 alu_table[4];
+	u32 data;
+	int ret = 0;
+
+	mutex_lock(&dev->alu_mutex);
+
+	/* read any entry with mac & vid */
+	data = fdb->vid << ALU_FID_INDEX_S;
+	data |= ((fdb->addr[0] << 8) | fdb->addr[1]);
+	ksz_write32(dev, REG_SW_ALU_INDEX_0, data);
+
+	data = ((fdb->addr[2] << 24) | (fdb->addr[3] << 16));
+	data |= ((fdb->addr[4] << 8) | fdb->addr[5]);
+	ksz_write32(dev, REG_SW_ALU_INDEX_1, data);
+
+	/* start read operation */
+	ksz_write32(dev, REG_SW_ALU_CTRL__4, ALU_READ | ALU_START);
+
+	/* wait to be finished */
+	ret = wait_alu_ready(dev, ALU_START, 1000);
+	if (ret < 0) {
+		dev_dbg(dev->dev, "Failed to read ALU\n");
+		goto exit;
+	}
+
+	ksz_read32(dev, REG_SW_ALU_VAL_A, &alu_table[0]);
+	if (alu_table[0] & ALU_V_STATIC_VALID) {
+		ksz_read32(dev, REG_SW_ALU_VAL_B, &alu_table[1]);
+		ksz_read32(dev, REG_SW_ALU_VAL_C, &alu_table[2]);
+		ksz_read32(dev, REG_SW_ALU_VAL_D, &alu_table[3]);
+
+		/* clear forwarding port */
+		alu_table[2] &= ~BIT(port);
+
+		/* if there is no port to forward, clear table */
+		if ((alu_table[2] & ALU_V_PORT_MAP) == 0) {
+			alu_table[0] = 0;
+			alu_table[1] = 0;
+			alu_table[2] = 0;
+			alu_table[3] = 0;
+		}
+	} else {
+		alu_table[0] = 0;
+		alu_table[1] = 0;
+		alu_table[2] = 0;
+		alu_table[3] = 0;
+	}
+
+	write_table(ds, alu_table);
+
+	ksz_write32(dev, REG_SW_ALU_CTRL__4, ALU_WRITE | ALU_START);
+
+	/* wait to be finished */
+	ret = wait_alu_ready(dev, ALU_START, 1000);
+	if (ret < 0)
+		dev_dbg(dev->dev, "Failed to write ALU\n");
+
+exit:
+	mutex_unlock(&dev->alu_mutex);
+
+	return ret;
+}
+
+static void convert_alu(struct alu_struct *alu, u32 *alu_table)
+{
+	alu->is_static = !!(alu_table[0] & ALU_V_STATIC_VALID);
+	alu->is_src_filter = !!(alu_table[0] & ALU_V_SRC_FILTER);
+	alu->is_dst_filter = !!(alu_table[0] & ALU_V_DST_FILTER);
+	alu->prio_age = (alu_table[0] >> ALU_V_PRIO_AGE_CNT_S) &
+			ALU_V_PRIO_AGE_CNT_M;
+	alu->mstp = alu_table[0] & ALU_V_MSTP_M;
+
+	alu->is_override = !!(alu_table[1] & ALU_V_OVERRIDE);
+	alu->is_use_fid = !!(alu_table[1] & ALU_V_USE_FID);
+	alu->port_forward = alu_table[1] & ALU_V_PORT_MAP;
+
+	alu->fid = (alu_table[2] >> ALU_V_FID_S) & ALU_V_FID_M;
+
+	alu->mac[0] = (alu_table[2] >> 8) & 0xFF;
+	alu->mac[1] = alu_table[2] & 0xFF;
+	alu->mac[2] = (alu_table[3] >> 24) & 0xFF;
+	alu->mac[3] = (alu_table[3] >> 16) & 0xFF;
+	alu->mac[4] = (alu_table[3] >> 8) & 0xFF;
+	alu->mac[5] = alu_table[3] & 0xFF;
+}
+
+static int ksz_port_fdb_dump(struct dsa_switch *ds, int port,
+			     struct switchdev_obj_port_fdb *fdb,
+			     switchdev_obj_dump_cb_t *cb)
+{
+	struct ksz_device *dev = ds->priv;
+	int ret = 0;
+	u32 data;
+	u32 alu_table[4];
+	struct alu_struct alu;
+	int timeout;
+
+	mutex_lock(&dev->alu_mutex);
+
+	/* start ALU search */
+	ksz_write32(dev, REG_SW_ALU_CTRL__4, ALU_START | ALU_SEARCH);
+
+	do {
+		timeout = 1000;
+		do {
+			ksz_read32(dev, REG_SW_ALU_CTRL__4, &data);
+			if ((data & ALU_VALID) || !(data & ALU_START))
+				break;
+			usleep_range(1, 10);
+		} while (timeout-- > 0);
+
+		if (!timeout) {
+			dev_dbg(dev->dev, "Failed to search ALU\n");
+			ret = -ETIMEDOUT;
+			goto exit;
+		}
+
+		/* read ALU table */
+		read_table(ds, alu_table);
+
+		convert_alu(&alu, alu_table);
+
+		if (alu.port_forward & BIT(port)) {
+			fdb->vid = alu.fid;
+			if (alu.is_static)
+				fdb->ndm_state = NUD_NOARP;
+			else
+				fdb->ndm_state = NUD_REACHABLE;
+			ether_addr_copy(fdb->addr, alu.mac);
+
+			ret = cb(&fdb->obj);
+			if (ret)
+				goto exit;
+		}
+	} while (data & ALU_START);
+
+exit:
+
+	/* stop ALU search */
+	ksz_write32(dev, REG_SW_ALU_CTRL__4, 0);
+
+	mutex_unlock(&dev->alu_mutex);
+
+	return ret;
+}
+
+static int ksz_port_mdb_prepare(struct dsa_switch *ds, int port,
+				const struct switchdev_obj_port_mdb *mdb,
+				struct switchdev_trans *trans)
+{
+	/* nothing to do */
+	return 0;
+}
+
+static void ksz_port_mdb_add(struct dsa_switch *ds, int port,
+			     const struct switchdev_obj_port_mdb *mdb,
+			     struct switchdev_trans *trans)
+{
+	struct ksz_device *dev = ds->priv;
+	u32 static_table[4];
+	u32 data;
+	int index;
+	u32 mac_hi, mac_lo;
+
+	mac_hi = ((mdb->addr[0] << 8) | mdb->addr[1]);
+	mac_lo = ((mdb->addr[2] << 24) | (mdb->addr[3] << 16));
+	mac_lo |= ((mdb->addr[4] << 8) | mdb->addr[5]);
+
+	mutex_lock(&dev->alu_mutex);
+
+	for (index = 0; index < dev->num_statics; index++) {
+		/* find empty slot first */
+		data = (index << ALU_STAT_INDEX_S) |
+			ALU_STAT_READ | ALU_STAT_START;
+		ksz_write32(dev, REG_SW_ALU_STAT_CTRL__4, data);
+
+		/* wait to be finished */
+		if (wait_alu_sta_ready(dev, ALU_STAT_START, 1000) < 0) {
+			dev_dbg(dev->dev, "Failed to read ALU STATIC\n");
+			goto exit;
+		}
+
+		/* read ALU static table */
+		read_table(ds, static_table);
+
+		if (static_table[0] & ALU_V_STATIC_VALID) {
+			/* check this has same vid & mac address */
+			if (((static_table[2] >> ALU_V_FID_S) == (mdb->vid)) &&
+			    ((static_table[2] & ALU_V_MAC_ADDR_HI) == mac_hi) &&
+			    (static_table[3] == mac_lo)) {
+				/* found matching one */
+				break;
+			}
+		} else {
+			/* found empty one */
+			break;
+		}
+	}
+
+	/* no available entry */
+	if (index == dev->num_statics)
+		goto exit;
+
+	/* add entry */
+	static_table[0] = ALU_V_STATIC_VALID;
+	static_table[1] |= BIT(port);
+	if (mdb->vid)
+		static_table[1] |= ALU_V_USE_FID;
+	static_table[2] = (mdb->vid << ALU_V_FID_S);
+	static_table[2] |= mac_hi;
+	static_table[3] = mac_lo;
+
+	write_table(ds, static_table);
+
+	data = (index << ALU_STAT_INDEX_S) | ALU_STAT_START;
+	ksz_write32(dev, REG_SW_ALU_STAT_CTRL__4, data);
+
+	/* wait to be finished */
+	if (wait_alu_sta_ready(dev, ALU_STAT_START, 1000) < 0)
+		dev_dbg(dev->dev, "Failed to read ALU STATIC\n");
+
+exit:
+	mutex_unlock(&dev->alu_mutex);
+}
+
+static int ksz_port_mdb_del(struct dsa_switch *ds, int port,
+			    const struct switchdev_obj_port_mdb *mdb)
+{
+	struct ksz_device *dev = ds->priv;
+	u32 static_table[4];
+	u32 data;
+	int index;
+	int ret = 0;
+	u32 mac_hi, mac_lo;
+
+	mac_hi = ((mdb->addr[0] << 8) | mdb->addr[1]);
+	mac_lo = ((mdb->addr[2] << 24) | (mdb->addr[3] << 16));
+	mac_lo |= ((mdb->addr[4] << 8) | mdb->addr[5]);
+
+	mutex_lock(&dev->alu_mutex);
+
+	for (index = 0; index < dev->num_statics; index++) {
+		/* find empty slot first */
+		data = (index << ALU_STAT_INDEX_S) |
+			ALU_STAT_READ | ALU_STAT_START;
+		ksz_write32(dev, REG_SW_ALU_STAT_CTRL__4, data);
+
+		/* wait to be finished */
+		ret = wait_alu_sta_ready(dev, ALU_STAT_START, 1000);
+		if (ret < 0) {
+			dev_dbg(dev->dev, "Failed to read ALU STATIC\n");
+			goto exit;
+		}
+
+		/* read ALU static table */
+		read_table(ds, static_table);
+
+		if (static_table[0] & ALU_V_STATIC_VALID) {
+			/* check this has same vid & mac address */
+
+			if (((static_table[2] >> ALU_V_FID_S) == (mdb->vid)) &&
+			    ((static_table[2] & ALU_V_MAC_ADDR_HI) == mac_hi) &&
+			    (static_table[3] == mac_lo)) {
+				/* found matching one */
+				break;
+			}
+		}
+	}
+
+	/* no available entry */
+	if (index == dev->num_statics) {
+		ret = -EINVAL;
+		goto exit;
+	}
+
+	/* clear port */
+	static_table[1] &= ~BIT(port);
+
+	if ((static_table[1] & ALU_V_PORT_MAP) == 0) {
+		/* delete entry */
+		static_table[0] = 0;
+		static_table[1] = 0;
+		static_table[2] = 0;
+		static_table[3] = 0;
+	}
+
+	write_table(ds, static_table);
+
+	data = (index << ALU_STAT_INDEX_S) | ALU_STAT_START;
+	ksz_write32(dev, REG_SW_ALU_STAT_CTRL__4, data);
+
+	/* wait to be finished */
+	ret = wait_alu_sta_ready(dev, ALU_STAT_START, 1000);
+	if (ret < 0)
+		dev_dbg(dev->dev, "Failed to read ALU STATIC\n");
+
+exit:
+	mutex_unlock(&dev->alu_mutex);
+
+	return ret;
+}
+
+static int ksz_port_mdb_dump(struct dsa_switch *ds, int port,
+			     struct switchdev_obj_port_mdb *mdb,
+			     switchdev_obj_dump_cb_t *cb)
+{
+	/* this is not called by switch layer */
+	return 0;
+}
+
+static int ksz_port_mirror_add(struct dsa_switch *ds, int port,
+			       struct dsa_mall_mirror_tc_entry *mirror,
+			       bool ingress)
+{
+	struct ksz_device *dev = ds->priv;
+
+	if (ingress)
+		ksz_port_cfg(dev, port, P_MIRROR_CTRL, PORT_MIRROR_RX, true);
+	else
+		ksz_port_cfg(dev, port, P_MIRROR_CTRL, PORT_MIRROR_TX, true);
+
+	ksz_port_cfg(dev, port, P_MIRROR_CTRL, PORT_MIRROR_SNIFFER, false);
+
+	/* configure mirror port */
+	ksz_port_cfg(dev, mirror->to_local_port, P_MIRROR_CTRL,
+		     PORT_MIRROR_SNIFFER, true);
+
+	ksz_cfg(dev, S_MIRROR_CTRL, SW_MIRROR_RX_TX, false);
+
+	return 0;
+}
+
+static void ksz_port_mirror_del(struct dsa_switch *ds, int port,
+				struct dsa_mall_mirror_tc_entry *mirror)
+{
+	struct ksz_device *dev = ds->priv;
+	u8 data;
+
+	if (mirror->ingress)
+		ksz_port_cfg(dev, port, P_MIRROR_CTRL, PORT_MIRROR_RX, false);
+	else
+		ksz_port_cfg(dev, port, P_MIRROR_CTRL, PORT_MIRROR_TX, false);
+
+	ksz_pread8(dev, port, P_MIRROR_CTRL, &data);
+
+	if (!(data & (PORT_MIRROR_RX | PORT_MIRROR_TX)))
+		ksz_port_cfg(dev, mirror->to_local_port, P_MIRROR_CTRL,
+			     PORT_MIRROR_SNIFFER, false);
+}
+
+static const struct dsa_switch_ops ksz_switch_ops = {
+	.get_tag_protocol	= ksz_get_tag_protocol,
+	.setup			= ksz_setup,
+	.phy_read		= ksz_phy_read16,
+	.phy_write		= ksz_phy_write16,
+	.port_enable		= ksz_enable_port,
+	.port_disable		= ksz_disable_port,
+	.get_strings		= ksz_get_strings,
+	.get_ethtool_stats	= ksz_get_ethtool_stats,
+	.get_sset_count		= ksz_sset_count,
+	.port_stp_state_set	= ksz_port_stp_state_set,
+	.port_fast_age		= ksz_port_fast_age,
+	.port_vlan_filtering	= ksz_port_vlan_filtering,
+	.port_vlan_prepare	= ksz_port_vlan_prepare,
+	.port_vlan_add		= ksz_port_vlan_add,
+	.port_vlan_del		= ksz_port_vlan_del,
+	.port_vlan_dump		= ksz_port_vlan_dump,
+	.port_fdb_prepare	= ksz_port_fdb_prepare,
+	.port_fdb_dump		= ksz_port_fdb_dump,
+	.port_fdb_add		= ksz_port_fdb_add,
+	.port_fdb_del		= ksz_port_fdb_del,
+	.port_mdb_prepare       = ksz_port_mdb_prepare,
+	.port_mdb_add           = ksz_port_mdb_add,
+	.port_mdb_del           = ksz_port_mdb_del,
+	.port_mdb_dump          = ksz_port_mdb_dump,
+	.port_mirror_add	= ksz_port_mirror_add,
+	.port_mirror_del	= ksz_port_mirror_del,
+};
+
+struct ksz_chip_data {
+	u32 chip_id;
+	const char *dev_name;
+	int num_vlans;
+	int num_alus;
+	int num_statics;
+	int cpu_ports;
+	int port_cnt;
+};
+
+static const struct ksz_chip_data ksz_switch_chips[] = {
+	{
+		.chip_id = 0x00947700,
+		.dev_name = "KSZ9477",
+		.num_vlans = 4096,
+		.num_alus = 4096,
+		.num_statics = 16,
+		.cpu_ports = 0x7F,	/* can be configured as cpu port */
+		.port_cnt = 7,		/* total physical port count */
+	},
+};
+
+static int ksz_switch_init(struct ksz_device *dev)
+{
+	int i;
+
+	mutex_init(&dev->reg_mutex);
+	mutex_init(&dev->stats_mutex);
+	mutex_init(&dev->alu_mutex);
+	mutex_init(&dev->vlan_mutex);
+
+	dev->ds->ops = &ksz_switch_ops;
+
+	for (i = 0; i < ARRAY_SIZE(ksz_switch_chips); i++) {
+		const struct ksz_chip_data *chip = &ksz_switch_chips[i];
+
+		if (dev->chip_id == chip->chip_id) {
+			dev->name = chip->dev_name;
+			dev->num_vlans = chip->num_vlans;
+			dev->num_alus = chip->num_alus;
+			dev->num_statics = chip->num_statics;
+			dev->port_cnt = chip->port_cnt;
+			dev->cpu_ports = chip->cpu_ports;
+
+			break;
+		}
+	}
+
+	/* no switch found */
+	if (!dev->port_cnt)
+		return -ENODEV;
+
+	return 0;
+}
+
+struct ksz_device *ksz_switch_alloc(struct device *base,
+				    const struct ksz_io_ops *ops,
+				    void *priv)
+{
+	struct dsa_switch *ds;
+	struct ksz_device *swdev;
+
+	ds = dsa_switch_alloc(base, DSA_MAX_PORTS);
+	if (!ds)
+		return NULL;
+
+	swdev = devm_kzalloc(base, sizeof(*swdev), GFP_KERNEL);
+	if (!swdev)
+		return NULL;
+
+	ds->priv = swdev;
+	swdev->dev = base;
+
+	swdev->ds = ds;
+	swdev->priv = priv;
+	swdev->ops = ops;
+
+	return swdev;
+}
+EXPORT_SYMBOL(ksz_switch_alloc);
+
+int ksz_switch_detect(struct ksz_device *dev)
+{
+	u8 data8;
+	u32 id32;
+	int ret;
+
+	/* turn off SPI DO Edge select */
+	ret = ksz_read8(dev, REG_SW_GLOBAL_SERIAL_CTRL_0, &data8);
+	if (ret)
+		return ret;
+
+	data8 &= ~SPI_AUTO_EDGE_DETECTION;
+	ret = ksz_write8(dev, REG_SW_GLOBAL_SERIAL_CTRL_0, data8);
+	if (ret)
+		return ret;
+
+	/* read chip id */
+	ret = ksz_read32(dev, REG_CHIP_ID0__1, &id32);
+	if (ret)
+		return ret;
+
+	dev->chip_id = id32;
+
+	return 0;
+}
+EXPORT_SYMBOL(ksz_switch_detect);
+
+int ksz_switch_register(struct ksz_device *dev)
+{
+	int ret;
+
+	if (dev->pdata)
+		dev->chip_id = dev->pdata->chip_id;
+
+	if (ksz_switch_detect(dev))
+		return -EINVAL;
+
+	ret = ksz_switch_init(dev);
+	if (ret)
+		return ret;
+
+	return dsa_register_switch(dev->ds);
+}
+EXPORT_SYMBOL(ksz_switch_register);
+
+void ksz_switch_remove(struct ksz_device *dev)
+{
+	dsa_unregister_switch(dev->ds);
+}
+EXPORT_SYMBOL(ksz_switch_remove);
+
+MODULE_AUTHOR("Woojung Huh <Woojung.Huh@microchip.com>");
+MODULE_DESCRIPTION("Microchip KSZ Series Switch DSA Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/dsa/microchip/ksz_priv.h b/drivers/net/dsa/microchip/ksz_priv.h
new file mode 100644
index 000000000000..2a98dbd51456
--- /dev/null
+++ b/drivers/net/dsa/microchip/ksz_priv.h
@@ -0,0 +1,210 @@
+/*
+ * Microchip KSZ series switch common definitions
+ *
+ * Copyright (C) 2017
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef __KSZ_PRIV_H
+#define __KSZ_PRIV_H
+
+#include <linux/kernel.h>
+#include <linux/mutex.h>
+#include <linux/phy.h>
+#include <linux/etherdevice.h>
+#include <net/dsa.h>
+
+#include "ksz_9477_reg.h"
+
+struct ksz_io_ops;
+
+struct vlan_table {
+	u32 table[3];
+};
+
+struct ksz_device {
+	struct dsa_switch *ds;
+	struct ksz_platform_data *pdata;
+	const char *name;
+
+	struct mutex reg_mutex;		/* register access */
+	struct mutex stats_mutex;	/* status access */
+	struct mutex alu_mutex;		/* ALU access */
+	struct mutex vlan_mutex;	/* vlan access */
+	const struct ksz_io_ops *ops;
+
+	struct device *dev;
+
+	void *priv;
+
+	/* chip specific data */
+	u32 chip_id;
+	int num_vlans;
+	int num_alus;
+	int num_statics;
+	int cpu_port;			/* port connected to CPU */
+	int cpu_ports;			/* port bitmap can be cpu port */
+	int port_cnt;
+
+	struct vlan_table *vlan_cache;
+
+	u64 mib_value[TOTAL_SWITCH_COUNTER_NUM];
+};
+
+struct ksz_io_ops {
+	int (*read8)(struct ksz_device *dev, u32 reg, u8 *value);
+	int (*read16)(struct ksz_device *dev, u32 reg, u16 *value);
+	int (*read24)(struct ksz_device *dev, u32 reg, u32 *value);
+	int (*read32)(struct ksz_device *dev, u32 reg, u32 *value);
+	int (*write8)(struct ksz_device *dev, u32 reg, u8 value);
+	int (*write16)(struct ksz_device *dev, u32 reg, u16 value);
+	int (*write24)(struct ksz_device *dev, u32 reg, u32 value);
+	int (*write32)(struct ksz_device *dev, u32 reg, u32 value);
+	int (*phy_read16)(struct ksz_device *dev, int addr, int reg,
+			  u16 *value);
+	int (*phy_write16)(struct ksz_device *dev, int addr, int reg,
+			   u16 value);
+};
+
+struct ksz_device *ksz_switch_alloc(struct device *base,
+				    const struct ksz_io_ops *ops, void *priv);
+int ksz_switch_detect(struct ksz_device *dev);
+int ksz_switch_register(struct ksz_device *dev);
+void ksz_switch_remove(struct ksz_device *dev);
+
+static inline int ksz_read8(struct ksz_device *dev, u32 reg, u8 *val)
+{
+	int ret;
+
+	mutex_lock(&dev->reg_mutex);
+	ret = dev->ops->read8(dev, reg, val);
+	mutex_unlock(&dev->reg_mutex);
+
+	return ret;
+}
+
+static inline int ksz_read16(struct ksz_device *dev, u32 reg, u16 *val)
+{
+	int ret;
+
+	mutex_lock(&dev->reg_mutex);
+	ret = dev->ops->read16(dev, reg, val);
+	mutex_unlock(&dev->reg_mutex);
+
+	return ret;
+}
+
+static inline int ksz_read24(struct ksz_device *dev, u32 reg, u32 *val)
+{
+	int ret;
+
+	mutex_lock(&dev->reg_mutex);
+	ret = dev->ops->read24(dev, reg, val);
+	mutex_unlock(&dev->reg_mutex);
+
+	return ret;
+}
+
+static inline int ksz_read32(struct ksz_device *dev, u32 reg, u32 *val)
+{
+	int ret;
+
+	mutex_lock(&dev->reg_mutex);
+	ret = dev->ops->read32(dev, reg, val);
+	mutex_unlock(&dev->reg_mutex);
+
+	return ret;
+}
+
+static inline int ksz_write8(struct ksz_device *dev, u32 reg, u8 value)
+{
+	int ret;
+
+	mutex_lock(&dev->reg_mutex);
+	ret = dev->ops->write8(dev, reg, value);
+	mutex_unlock(&dev->reg_mutex);
+
+	return ret;
+}
+
+static inline int ksz_write16(struct ksz_device *dev, u32 reg, u16 value)
+{
+	int ret;
+
+	mutex_lock(&dev->reg_mutex);
+	ret = dev->ops->write16(dev, reg, value);
+	mutex_unlock(&dev->reg_mutex);
+
+	return ret;
+}
+
+static inline int ksz_write24(struct ksz_device *dev, u32 reg, u32 value)
+{
+	int ret;
+
+	mutex_lock(&dev->reg_mutex);
+	ret = dev->ops->write24(dev, reg, value);
+	mutex_unlock(&dev->reg_mutex);
+
+	return ret;
+}
+
+static inline int ksz_write32(struct ksz_device *dev, u32 reg, u32 value)
+{
+	int ret;
+
+	mutex_lock(&dev->reg_mutex);
+	ret = dev->ops->write32(dev, reg, value);
+	mutex_unlock(&dev->reg_mutex);
+
+	return ret;
+}
+
+static inline void ksz_pread8(struct ksz_device *dev, int port, int offset,
+			      u8 *data)
+{
+	ksz_read8(dev, PORT_CTRL_ADDR(port, offset), data);
+}
+
+static inline void ksz_pread16(struct ksz_device *dev, int port, int offset,
+			       u16 *data)
+{
+	ksz_read16(dev, PORT_CTRL_ADDR(port, offset), data);
+}
+
+static inline void ksz_pread32(struct ksz_device *dev, int port, int offset,
+			       u32 *data)
+{
+	ksz_read32(dev, PORT_CTRL_ADDR(port, offset), data);
+}
+
+static inline void ksz_pwrite8(struct ksz_device *dev, int port, int offset,
+			       u8 data)
+{
+	ksz_write8(dev, PORT_CTRL_ADDR(port, offset), data);
+}
+
+static inline void ksz_pwrite16(struct ksz_device *dev, int port, int offset,
+				u16 data)
+{
+	ksz_write16(dev, PORT_CTRL_ADDR(port, offset), data);
+}
+
+static inline void ksz_pwrite32(struct ksz_device *dev, int port, int offset,
+				u32 data)
+{
+	ksz_write32(dev, PORT_CTRL_ADDR(port, offset), data);
+}
+
+#endif
diff --git a/drivers/net/dsa/microchip/ksz_spi.c b/drivers/net/dsa/microchip/ksz_spi.c
new file mode 100644
index 000000000000..c51946983bed
--- /dev/null
+++ b/drivers/net/dsa/microchip/ksz_spi.c
@@ -0,0 +1,216 @@
+/*
+ * Microchip KSZ series register access through SPI
+ *
+ * Copyright (C) 2017
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <asm/unaligned.h>
+
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/spi/spi.h>
+
+#include "ksz_priv.h"
+
+/* SPI frame opcodes */
+#define KS_SPIOP_RD			3
+#define KS_SPIOP_WR			2
+
+#define SPI_ADDR_SHIFT			24
+#define SPI_ADDR_MASK			(BIT(SPI_ADDR_SHIFT) - 1)
+#define SPI_TURNAROUND_SHIFT		5
+
+static int ksz_spi_read_reg(struct spi_device *spi, u32 reg, u8 *val,
+			    unsigned int len)
+{
+	u32 txbuf;
+	int ret;
+
+	txbuf = reg & SPI_ADDR_MASK;
+	txbuf |= KS_SPIOP_RD << SPI_ADDR_SHIFT;
+	txbuf <<= SPI_TURNAROUND_SHIFT;
+	txbuf = cpu_to_be32(txbuf);
+
+	ret = spi_write_then_read(spi, &txbuf, 4, val, len);
+	return ret;
+}
+
+static int ksz_spi_read(struct ksz_device *dev, u32 reg, u8 *data,
+			unsigned int len)
+{
+	struct spi_device *spi = dev->priv;
+
+	return ksz_spi_read_reg(spi, reg, data, len);
+}
+
+static int ksz_spi_read8(struct ksz_device *dev, u32 reg, u8 *val)
+{
+	return ksz_spi_read(dev, reg, val, 1);
+}
+
+static int ksz_spi_read16(struct ksz_device *dev, u32 reg, u16 *val)
+{
+	int ret = ksz_spi_read(dev, reg, (u8 *)val, 2);
+
+	if (!ret)
+		*val = be16_to_cpu(*val);
+
+	return ret;
+}
+
+static int ksz_spi_read24(struct ksz_device *dev, u32 reg, u32 *val)
+{
+	int ret;
+
+	*val = 0;
+	ret = ksz_spi_read(dev, reg, (u8 *)val, 3);
+	if (!ret) {
+		*val = be32_to_cpu(*val);
+		/* convert to 24bit */
+		*val >>= 8;
+	}
+
+	return ret;
+}
+
+static int ksz_spi_read32(struct ksz_device *dev, u32 reg, u32 *val)
+{
+	int ret = ksz_spi_read(dev, reg, (u8 *)val, 4);
+
+	if (!ret)
+		*val = be32_to_cpu(*val);
+
+	return ret;
+}
+
+static int ksz_spi_write_reg(struct spi_device *spi, u32 reg, u8 *val,
+			     unsigned int len)
+{
+	u32 txbuf;
+	u8 data[12];
+	int i;
+
+	txbuf = reg & SPI_ADDR_MASK;
+	txbuf |= (KS_SPIOP_WR << SPI_ADDR_SHIFT);
+	txbuf <<= SPI_TURNAROUND_SHIFT;
+	txbuf = cpu_to_be32(txbuf);
+
+	data[0] = txbuf & 0xFF;
+	data[1] = (txbuf & 0xFF00) >> 8;
+	data[2] = (txbuf & 0xFF0000) >> 16;
+	data[3] = (txbuf & 0xFF000000) >> 24;
+	for (i = 0; i < len; i++)
+		data[i + 4] = val[i];
+
+	return spi_write(spi, &data, 4 + len);
+}
+
+static int ksz_spi_write8(struct ksz_device *dev, u32 reg, u8 value)
+{
+	struct spi_device *spi = dev->priv;
+
+	return ksz_spi_write_reg(spi, reg, &value, 1);
+}
+
+static int ksz_spi_write16(struct ksz_device *dev, u32 reg, u16 value)
+{
+	struct spi_device *spi = dev->priv;
+
+	value = cpu_to_be16(value);
+	return ksz_spi_write_reg(spi, reg, (u8 *)&value, 2);
+}
+
+static int ksz_spi_write24(struct ksz_device *dev, u32 reg, u32 value)
+{
+	struct spi_device *spi = dev->priv;
+
+	/* make it to big endian 24bit from MSB */
+	value <<= 8;
+	value = cpu_to_be32(value);
+	return ksz_spi_write_reg(spi, reg, (u8 *)&value, 3);
+}
+
+static int ksz_spi_write32(struct ksz_device *dev, u32 reg, u32 value)
+{
+	struct spi_device *spi = dev->priv;
+
+	value = cpu_to_be32(value);
+	return ksz_spi_write_reg(spi, reg, (u8 *)&value, 4);
+}
+
+static const struct ksz_io_ops ksz_spi_ops = {
+	.read8 = ksz_spi_read8,
+	.read16 = ksz_spi_read16,
+	.read24 = ksz_spi_read24,
+	.read32 = ksz_spi_read32,
+	.write8 = ksz_spi_write8,
+	.write16 = ksz_spi_write16,
+	.write24 = ksz_spi_write24,
+	.write32 = ksz_spi_write32,
+};
+
+static int ksz_spi_probe(struct spi_device *spi)
+{
+	struct ksz_device *dev;
+	int ret;
+
+	dev = ksz_switch_alloc(&spi->dev, &ksz_spi_ops, spi);
+	if (!dev)
+		return -ENOMEM;
+
+	if (spi->dev.platform_data)
+		dev->pdata = spi->dev.platform_data;
+
+	ret = ksz_switch_register(dev);
+	if (ret)
+		return ret;
+
+	spi_set_drvdata(spi, dev);
+
+	return 0;
+}
+
+static int ksz_spi_remove(struct spi_device *spi)
+{
+	struct ksz_device *dev = spi_get_drvdata(spi);
+
+	if (dev)
+		ksz_switch_remove(dev);
+
+	return 0;
+}
+
+static const struct of_device_id ksz_dt_ids[] = {
+	{ .compatible = "microchip,ksz9477" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, ksz_dt_ids);
+
+static struct spi_driver ksz_spi_driver = {
+	.driver = {
+		.name	= "ksz9477-switch",
+		.owner	= THIS_MODULE,
+		.of_match_table = of_match_ptr(ksz_dt_ids),
+	},
+	.probe	= ksz_spi_probe,
+	.remove	= ksz_spi_remove,
+};
+
+module_spi_driver(ksz_spi_driver);
+
+MODULE_AUTHOR("Woojung Huh <Woojung.Huh@microchip.com>");
+MODULE_DESCRIPTION("Microchip KSZ Series Switch SPI access Driver");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/platform_data/microchip-ksz.h b/include/linux/platform_data/microchip-ksz.h
new file mode 100644
index 000000000000..84789ca634aa
--- /dev/null
+++ b/include/linux/platform_data/microchip-ksz.h
@@ -0,0 +1,29 @@
+/*
+ * Microchip KSZ series switch platform data
+ *
+ * Copyright (C) 2017
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef __MICROCHIP_KSZ_H
+#define __MICROCHIP_KSZ_H
+
+#include <linux/kernel.h>
+
+struct ksz_platform_data {
+	u32 chip_id;
+	u16 enabled_ports;
+};
+
+#endif
-- 
cgit v1.2.3


From 284e76387c38260e834c99b010a68d75fc46b394 Mon Sep 17 00:00:00 2001
From: Rick Altherr <raltherr@google.com>
Date: Mon, 22 May 2017 14:12:24 -0700
Subject: hw_random: timeriomem_rng: Allow setting RNG quality from platform
 data

When a hw_random device's quality is non-zero, it will automatically be
used to fill the kernel's entropy pool.  Since timeriomem_rng is used by
many different devices, the quality needs to be provided by platform
data or device tree.

Signed-off-by: Rick Altherr <raltherr@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/timeriomem-rng.c | 7 +++++++
 include/linux/timeriomem-rng.h          | 3 +++
 2 files changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/char/hw_random/timeriomem-rng.c b/drivers/char/hw_random/timeriomem-rng.c
index a0faa5f05deb..03ff5483d865 100644
--- a/drivers/char/hw_random/timeriomem-rng.c
+++ b/drivers/char/hw_random/timeriomem-rng.c
@@ -151,8 +151,15 @@ static int timeriomem_rng_probe(struct platform_device *pdev)
 			dev_err(&pdev->dev, "missing period\n");
 			return -EINVAL;
 		}
+
+		if (!of_property_read_u32(pdev->dev.of_node,
+						"quality", &i))
+			priv->rng_ops.quality = i;
+		else
+			priv->rng_ops.quality = 0;
 	} else {
 		period = pdata->period;
+		priv->rng_ops.quality = pdata->quality;
 	}
 
 	priv->period = ns_to_ktime(period * NSEC_PER_USEC);
diff --git a/include/linux/timeriomem-rng.h b/include/linux/timeriomem-rng.h
index 46eb27ddbfab..3e00122bcf88 100644
--- a/include/linux/timeriomem-rng.h
+++ b/include/linux/timeriomem-rng.h
@@ -13,4 +13,7 @@ struct timeriomem_rng_data {
 
 	/* measures in usecs */
 	unsigned int		period;
+
+	/* bits of entropy per 1024 bits read */
+	unsigned int		quality;
 };
-- 
cgit v1.2.3


From 9748e1d87573c94191442d6bd0307f523e5cd8b8 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Sat, 29 Apr 2017 11:06:45 +0200
Subject: mtd: nand: add support for Micron on-die ECC

Now that the core NAND subsystem has support for on-die ECC, this commit
brings the necessary code to support on-die ECC on Micron NANDs.

In micron_nand_init(), we detect if the Micron NAND chip supports on-die
ECC mode, by checking a number of conditions:

 - It must be an ONFI NAND
 - It must be a SLC NAND

 - Enabling *and* disabling on-die ECC must work

 - The on-die ECC must be correcting 4 bits per 512 bytes of data. Some
   Micron NAND chips have an on-die ECC able to correct 8 bits per 512
   bytes of data, but they work slightly differently and therefore we
   don't support them in this patch.

Then, if the on-die ECC cannot be disabled (some Micron NAND have on-die
ECC forcefully enabled), we bail out, as we don't support such
NANDs. Indeed, the implementation of raw_read()/raw_write() make the
assumption that on-die ECC can be disabled. Support for Micron NANDs
with on-die ECC forcefully enabled can easily be added, but in the
absence of such HW for testing, we preferred to simply bail out.

If the on-die ECC is supported, and requested in the Device Tree, then
it is indeed enabled, by using custom implementations of the
->read_page(), ->read_page_raw(), ->write_page() and ->write_page_raw()
operation to properly handle the on-die ECC.

In the non-raw functions, we need to enable the internal ECC engine
before issuing the NAND_CMD_READ0 or NAND_CMD_SEQIN commands, which is
why we set the NAND_ECC_CUSTOM_PAGE_ACCESS option at initialization
time (it asks the NAND core to let the NAND driver issue those
commands).

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/nand_micron.c | 216 +++++++++++++++++++++++++++++++++++++++++
 include/linux/mtd/nand.h       |   2 +
 2 files changed, 218 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/nand_micron.c b/drivers/mtd/nand/nand_micron.c
index 877011069251..9993f8ead1e2 100644
--- a/drivers/mtd/nand/nand_micron.c
+++ b/drivers/mtd/nand/nand_micron.c
@@ -17,6 +17,12 @@
 
 #include <linux/mtd/nand.h>
 
+/*
+ * Special Micron status bit that indicates when the block has been
+ * corrected by on-die ECC and should be rewritten
+ */
+#define NAND_STATUS_WRITE_RECOMMENDED	BIT(3)
+
 struct nand_onfi_vendor_micron {
 	u8 two_plane_read;
 	u8 read_cache;
@@ -66,9 +72,191 @@ static int micron_nand_onfi_init(struct nand_chip *chip)
 	return 0;
 }
 
+static int micron_nand_on_die_ooblayout_ecc(struct mtd_info *mtd, int section,
+					    struct mtd_oob_region *oobregion)
+{
+	if (section >= 4)
+		return -ERANGE;
+
+	oobregion->offset = (section * 16) + 8;
+	oobregion->length = 8;
+
+	return 0;
+}
+
+static int micron_nand_on_die_ooblayout_free(struct mtd_info *mtd, int section,
+					     struct mtd_oob_region *oobregion)
+{
+	if (section >= 4)
+		return -ERANGE;
+
+	oobregion->offset = (section * 16) + 2;
+	oobregion->length = 6;
+
+	return 0;
+}
+
+static const struct mtd_ooblayout_ops micron_nand_on_die_ooblayout_ops = {
+	.ecc = micron_nand_on_die_ooblayout_ecc,
+	.free = micron_nand_on_die_ooblayout_free,
+};
+
+static int micron_nand_on_die_ecc_setup(struct nand_chip *chip, bool enable)
+{
+	u8 feature[ONFI_SUBFEATURE_PARAM_LEN] = { 0, };
+
+	if (enable)
+		feature[0] |= ONFI_FEATURE_ON_DIE_ECC_EN;
+
+	return chip->onfi_set_features(nand_to_mtd(chip), chip,
+				       ONFI_FEATURE_ON_DIE_ECC, feature);
+}
+
+static int
+micron_nand_read_page_on_die_ecc(struct mtd_info *mtd, struct nand_chip *chip,
+				 uint8_t *buf, int oob_required,
+				 int page)
+{
+	int status;
+	int max_bitflips = 0;
+
+	micron_nand_on_die_ecc_setup(chip, true);
+
+	chip->cmdfunc(mtd, NAND_CMD_READ0, 0x00, page);
+	chip->cmdfunc(mtd, NAND_CMD_STATUS, -1, -1);
+	status = chip->read_byte(mtd);
+	if (status & NAND_STATUS_FAIL)
+		mtd->ecc_stats.failed++;
+	/*
+	 * The internal ECC doesn't tell us the number of bitflips
+	 * that have been corrected, but tells us if it recommends to
+	 * rewrite the block. If it's the case, then we pretend we had
+	 * a number of bitflips equal to the ECC strength, which will
+	 * hint the NAND core to rewrite the block.
+	 */
+	else if (status & NAND_STATUS_WRITE_RECOMMENDED)
+		max_bitflips = chip->ecc.strength;
+
+	chip->cmdfunc(mtd, NAND_CMD_READ0, -1, -1);
+
+	nand_read_page_raw(mtd, chip, buf, oob_required, page);
+
+	micron_nand_on_die_ecc_setup(chip, false);
+
+	return max_bitflips;
+}
+
+static int
+micron_nand_write_page_on_die_ecc(struct mtd_info *mtd, struct nand_chip *chip,
+				  const uint8_t *buf, int oob_required,
+				  int page)
+{
+	micron_nand_on_die_ecc_setup(chip, true);
+
+	chip->cmdfunc(mtd, NAND_CMD_SEQIN, 0x00, page);
+	nand_write_page_raw(mtd, chip, buf, oob_required, page);
+	chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1);
+
+	micron_nand_on_die_ecc_setup(chip, false);
+
+	return 0;
+}
+
+static int
+micron_nand_read_page_raw_on_die_ecc(struct mtd_info *mtd,
+				     struct nand_chip *chip,
+				     uint8_t *buf, int oob_required,
+				     int page)
+{
+	chip->cmdfunc(mtd, NAND_CMD_READ0, 0x00, page);
+	nand_read_page_raw(mtd, chip, buf, oob_required, page);
+
+	return 0;
+}
+
+static int
+micron_nand_write_page_raw_on_die_ecc(struct mtd_info *mtd,
+				      struct nand_chip *chip,
+				      const uint8_t *buf, int oob_required,
+				      int page)
+{
+	chip->cmdfunc(mtd, NAND_CMD_SEQIN, 0x00, page);
+	nand_write_page_raw(mtd, chip, buf, oob_required, page);
+	chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1);
+
+	return 0;
+}
+
+enum {
+	/* The NAND flash doesn't support on-die ECC */
+	MICRON_ON_DIE_UNSUPPORTED,
+
+	/*
+	 * The NAND flash supports on-die ECC and it can be
+	 * enabled/disabled by a set features command.
+	 */
+	MICRON_ON_DIE_SUPPORTED,
+
+	/*
+	 * The NAND flash supports on-die ECC, and it cannot be
+	 * disabled.
+	 */
+	MICRON_ON_DIE_MANDATORY,
+};
+
+/*
+ * Try to detect if the NAND support on-die ECC. To do this, we enable
+ * the feature, and read back if it has been enabled as expected. We
+ * also check if it can be disabled, because some Micron NANDs do not
+ * allow disabling the on-die ECC and we don't support such NANDs for
+ * now.
+ *
+ * This function also has the side effect of disabling on-die ECC if
+ * it had been left enabled by the firmware/bootloader.
+ */
+static int micron_supports_on_die_ecc(struct nand_chip *chip)
+{
+	u8 feature[ONFI_SUBFEATURE_PARAM_LEN] = { 0, };
+	int ret;
+
+	if (chip->onfi_version == 0)
+		return MICRON_ON_DIE_UNSUPPORTED;
+
+	if (chip->bits_per_cell != 1)
+		return MICRON_ON_DIE_UNSUPPORTED;
+
+	ret = micron_nand_on_die_ecc_setup(chip, true);
+	if (ret)
+		return MICRON_ON_DIE_UNSUPPORTED;
+
+	chip->onfi_get_features(nand_to_mtd(chip), chip,
+				ONFI_FEATURE_ON_DIE_ECC, feature);
+	if ((feature[0] & ONFI_FEATURE_ON_DIE_ECC_EN) == 0)
+		return MICRON_ON_DIE_UNSUPPORTED;
+
+	ret = micron_nand_on_die_ecc_setup(chip, false);
+	if (ret)
+		return MICRON_ON_DIE_UNSUPPORTED;
+
+	chip->onfi_get_features(nand_to_mtd(chip), chip,
+				ONFI_FEATURE_ON_DIE_ECC, feature);
+	if (feature[0] & ONFI_FEATURE_ON_DIE_ECC_EN)
+		return MICRON_ON_DIE_MANDATORY;
+
+	/*
+	 * Some Micron NANDs have an on-die ECC of 4/512, some other
+	 * 8/512. We only support the former.
+	 */
+	if (chip->onfi_params.ecc_bits != 4)
+		return MICRON_ON_DIE_UNSUPPORTED;
+
+	return MICRON_ON_DIE_SUPPORTED;
+}
+
 static int micron_nand_init(struct nand_chip *chip)
 {
 	struct mtd_info *mtd = nand_to_mtd(chip);
+	int ondie;
 	int ret;
 
 	ret = micron_nand_onfi_init(chip);
@@ -78,6 +266,34 @@ static int micron_nand_init(struct nand_chip *chip)
 	if (mtd->writesize == 2048)
 		chip->bbt_options |= NAND_BBT_SCAN2NDPAGE;
 
+	ondie = micron_supports_on_die_ecc(chip);
+
+	if (ondie == MICRON_ON_DIE_MANDATORY) {
+		pr_err("On-die ECC forcefully enabled, not supported\n");
+		return -EINVAL;
+	}
+
+	if (chip->ecc.mode == NAND_ECC_ON_DIE) {
+		if (ondie == MICRON_ON_DIE_UNSUPPORTED) {
+			pr_err("On-die ECC selected but not supported\n");
+			return -EINVAL;
+		}
+
+		chip->ecc.options = NAND_ECC_CUSTOM_PAGE_ACCESS;
+		chip->ecc.bytes = 8;
+		chip->ecc.size = 512;
+		chip->ecc.strength = 4;
+		chip->ecc.algo = NAND_ECC_BCH;
+		chip->ecc.read_page = micron_nand_read_page_on_die_ecc;
+		chip->ecc.write_page = micron_nand_write_page_on_die_ecc;
+		chip->ecc.read_page_raw =
+			micron_nand_read_page_raw_on_die_ecc;
+		chip->ecc.write_page_raw =
+			micron_nand_write_page_raw_on_die_ecc;
+
+		mtd_set_ooblayout(mtd, &micron_nand_on_die_ooblayout_ops);
+	}
+
 	return 0;
 }
 
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 28f7dd9177e9..893d0ce08030 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -258,6 +258,8 @@ struct nand_chip;
 
 /* Vendor-specific feature address (Micron) */
 #define ONFI_FEATURE_ADDR_READ_RETRY	0x89
+#define ONFI_FEATURE_ON_DIE_ECC		0x90
+#define   ONFI_FEATURE_ON_DIE_ECC_EN	BIT(3)
 
 /* ONFI subfeature parameters length */
 #define ONFI_SUBFEATURE_PARAM_LEN	4
-- 
cgit v1.2.3


From 104e442a67cfba4d0cc982384761befb917fb6a1 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Thu, 16 Mar 2017 09:35:58 +0100
Subject: mtd: nand: Pass the CS line to ->setup_data_interface()

Some NAND controllers can assign different NAND timings to different
CS lines. Pass the CS line information to ->setup_data_interface() so
that the NAND controller driver knows which CS line is concerned by
the setup_data_interface() request.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/fsmc_nand.c  |  7 +++----
 drivers/mtd/nand/mxc_nand.c   | 12 +++++-------
 drivers/mtd/nand/nand_base.c  | 22 +++++++++++++---------
 drivers/mtd/nand/s3c2410.c    |  5 ++---
 drivers/mtd/nand/sunxi_nand.c |  7 +++----
 drivers/mtd/nand/tango_nand.c |  7 +++----
 include/linux/mtd/nand.h      | 12 ++++++++----
 7 files changed, 37 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/fsmc_nand.c b/drivers/mtd/nand/fsmc_nand.c
index de57554b8c4f..9d8b051d3187 100644
--- a/drivers/mtd/nand/fsmc_nand.c
+++ b/drivers/mtd/nand/fsmc_nand.c
@@ -388,9 +388,8 @@ static int fsmc_calc_timings(struct fsmc_nand_data *host,
 	return 0;
 }
 
-static int fsmc_setup_data_interface(struct mtd_info *mtd,
-				     const struct nand_data_interface *conf,
-				     bool check_only)
+static int fsmc_setup_data_interface(struct mtd_info *mtd, int csline,
+				     const struct nand_data_interface *conf)
 {
 	struct nand_chip *nand = mtd_to_nand(mtd);
 	struct fsmc_nand_data *host = nand_get_controller_data(nand);
@@ -406,7 +405,7 @@ static int fsmc_setup_data_interface(struct mtd_info *mtd,
 	if (ret)
 		return ret;
 
-	if (check_only)
+	if (csline == NAND_DATA_IFACE_CHECK_ONLY)
 		return 0;
 
 	fsmc_nand_setup(host, &tims);
diff --git a/drivers/mtd/nand/mxc_nand.c b/drivers/mtd/nand/mxc_nand.c
index 61ca020c5272..a764d5ca7536 100644
--- a/drivers/mtd/nand/mxc_nand.c
+++ b/drivers/mtd/nand/mxc_nand.c
@@ -152,9 +152,8 @@ struct mxc_nand_devtype_data {
 	void (*select_chip)(struct mtd_info *mtd, int chip);
 	int (*correct_data)(struct mtd_info *mtd, u_char *dat,
 			u_char *read_ecc, u_char *calc_ecc);
-	int (*setup_data_interface)(struct mtd_info *mtd,
-				    const struct nand_data_interface *conf,
-				    bool check_only);
+	int (*setup_data_interface)(struct mtd_info *mtd, int csline,
+				    const struct nand_data_interface *conf);
 
 	/*
 	 * On i.MX21 the CONFIG2:INT bit cannot be read if interrupts are masked
@@ -1015,9 +1014,8 @@ static void preset_v1(struct mtd_info *mtd)
 	writew(0x4, NFC_V1_V2_WRPROT);
 }
 
-static int mxc_nand_v2_setup_data_interface(struct mtd_info *mtd,
-					const struct nand_data_interface *conf,
-					bool check_only)
+static int mxc_nand_v2_setup_data_interface(struct mtd_info *mtd, int csline,
+					const struct nand_data_interface *conf)
 {
 	struct nand_chip *nand_chip = mtd_to_nand(mtd);
 	struct mxc_nand_host *host = nand_get_controller_data(nand_chip);
@@ -1075,7 +1073,7 @@ static int mxc_nand_v2_setup_data_interface(struct mtd_info *mtd,
 		return -EINVAL;
 	}
 
-	if (check_only)
+	if (csline == NAND_DATA_IFACE_CHECK_ONLY)
 		return 0;
 
 	ret = clk_set_rate(host->clk, rate);
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 79d98c9fa8a7..3317acf0ce04 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -1042,12 +1042,13 @@ static int nand_wait(struct mtd_info *mtd, struct nand_chip *chip)
 /**
  * nand_reset_data_interface - Reset data interface and timings
  * @chip: The NAND chip
+ * @chipnr: Internal die id
  *
  * Reset the Data interface and timings to ONFI mode 0.
  *
  * Returns 0 for success or negative error code otherwise.
  */
-static int nand_reset_data_interface(struct nand_chip *chip)
+static int nand_reset_data_interface(struct nand_chip *chip, int chipnr)
 {
 	struct mtd_info *mtd = nand_to_mtd(chip);
 	const struct nand_data_interface *conf;
@@ -1071,7 +1072,7 @@ static int nand_reset_data_interface(struct nand_chip *chip)
 	 */
 
 	conf = nand_get_default_data_interface();
-	ret = chip->setup_data_interface(mtd, conf, false);
+	ret = chip->setup_data_interface(mtd, chipnr, conf);
 	if (ret)
 		pr_err("Failed to configure data interface to SDR timing mode 0\n");
 
@@ -1081,6 +1082,7 @@ static int nand_reset_data_interface(struct nand_chip *chip)
 /**
  * nand_setup_data_interface - Setup the best data interface and timings
  * @chip: The NAND chip
+ * @chipnr: Internal die id
  *
  * Find and configure the best data interface and NAND timings supported by
  * the chip and the driver.
@@ -1090,7 +1092,7 @@ static int nand_reset_data_interface(struct nand_chip *chip)
  *
  * Returns 0 for success or negative error code otherwise.
  */
-static int nand_setup_data_interface(struct nand_chip *chip)
+static int nand_setup_data_interface(struct nand_chip *chip, int chipnr)
 {
 	struct mtd_info *mtd = nand_to_mtd(chip);
 	int ret;
@@ -1114,7 +1116,7 @@ static int nand_setup_data_interface(struct nand_chip *chip)
 			goto err;
 	}
 
-	ret = chip->setup_data_interface(mtd, chip->data_interface, false);
+	ret = chip->setup_data_interface(mtd, chipnr, chip->data_interface);
 err:
 	return ret;
 }
@@ -1165,8 +1167,10 @@ static int nand_init_data_interface(struct nand_chip *chip)
 		if (ret)
 			continue;
 
-		ret = chip->setup_data_interface(mtd, chip->data_interface,
-						 true);
+		/* Pass -1 to only */
+		ret = chip->setup_data_interface(mtd,
+						 NAND_DATA_IFACE_CHECK_ONLY,
+						 chip->data_interface);
 		if (!ret) {
 			chip->onfi_timing_mode_default = mode;
 			break;
@@ -1193,7 +1197,7 @@ int nand_reset(struct nand_chip *chip, int chipnr)
 	struct mtd_info *mtd = nand_to_mtd(chip);
 	int ret;
 
-	ret = nand_reset_data_interface(chip);
+	ret = nand_reset_data_interface(chip, chipnr);
 	if (ret)
 		return ret;
 
@@ -1206,7 +1210,7 @@ int nand_reset(struct nand_chip *chip, int chipnr)
 	chip->select_chip(mtd, -1);
 
 	chip->select_chip(mtd, chipnr);
-	ret = nand_setup_data_interface(chip);
+	ret = nand_setup_data_interface(chip, chipnr);
 	chip->select_chip(mtd, -1);
 	if (ret)
 		return ret;
@@ -4396,7 +4400,7 @@ int nand_scan_ident(struct mtd_info *mtd, int maxchips,
 	 * For the other dies, nand_reset() will automatically switch to the
 	 * best mode for us.
 	 */
-	ret = nand_setup_data_interface(chip);
+	ret = nand_setup_data_interface(chip, 0);
 	if (ret)
 		return ret;
 
diff --git a/drivers/mtd/nand/s3c2410.c b/drivers/mtd/nand/s3c2410.c
index f0b030d44f71..9e0c849607b9 100644
--- a/drivers/mtd/nand/s3c2410.c
+++ b/drivers/mtd/nand/s3c2410.c
@@ -812,9 +812,8 @@ static int s3c2410_nand_add_partition(struct s3c2410_nand_info *info,
 	return -ENODEV;
 }
 
-static int s3c2410_nand_setup_data_interface(struct mtd_info *mtd,
-					const struct nand_data_interface *conf,
-					bool check_only)
+static int s3c2410_nand_setup_data_interface(struct mtd_info *mtd, int csline,
+					const struct nand_data_interface *conf)
 {
 	struct s3c2410_nand_info *info = s3c2410_nand_mtd_toinfo(mtd);
 	struct s3c2410_platform_nand *pdata = info->platform;
diff --git a/drivers/mtd/nand/sunxi_nand.c b/drivers/mtd/nand/sunxi_nand.c
index 118a26fff368..9c2dbe352c43 100644
--- a/drivers/mtd/nand/sunxi_nand.c
+++ b/drivers/mtd/nand/sunxi_nand.c
@@ -1592,9 +1592,8 @@ static int _sunxi_nand_lookup_timing(const s32 *lut, int lut_size, u32 duration,
 #define sunxi_nand_lookup_timing(l, p, c) \
 			_sunxi_nand_lookup_timing(l, ARRAY_SIZE(l), p, c)
 
-static int sunxi_nfc_setup_data_interface(struct mtd_info *mtd,
-					const struct nand_data_interface *conf,
-					bool check_only)
+static int sunxi_nfc_setup_data_interface(struct mtd_info *mtd, int csline,
+					const struct nand_data_interface *conf)
 {
 	struct nand_chip *nand = mtd_to_nand(mtd);
 	struct sunxi_nand_chip *chip = to_sunxi_nand(nand);
@@ -1707,7 +1706,7 @@ static int sunxi_nfc_setup_data_interface(struct mtd_info *mtd,
 		return tRHW;
 	}
 
-	if (check_only)
+	if (csline == NAND_DATA_IFACE_CHECK_ONLY)
 		return 0;
 
 	/*
diff --git a/drivers/mtd/nand/tango_nand.c b/drivers/mtd/nand/tango_nand.c
index 05b6e1065203..85e0d97593e8 100644
--- a/drivers/mtd/nand/tango_nand.c
+++ b/drivers/mtd/nand/tango_nand.c
@@ -476,9 +476,8 @@ static u32 to_ticks(int kHz, int ps)
 	return DIV_ROUND_UP_ULL((u64)kHz * ps, NSEC_PER_SEC);
 }
 
-static int tango_set_timings(struct mtd_info *mtd,
-			     const struct nand_data_interface *conf,
-			     bool check_only)
+static int tango_set_timings(struct mtd_info *mtd, int csline,
+			     const struct nand_data_interface *conf)
 {
 	const struct nand_sdr_timings *sdr = nand_get_sdr_timings(conf);
 	struct nand_chip *chip = mtd_to_nand(mtd);
@@ -490,7 +489,7 @@ static int tango_set_timings(struct mtd_info *mtd,
 	if (IS_ERR(sdr))
 		return PTR_ERR(sdr);
 
-	if (check_only)
+	if (csline == NAND_DATA_IFACE_CHECK_ONLY)
 		return 0;
 
 	Trdy = to_ticks(kHz, sdr->tCEA_max - sdr->tREA_max);
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 893d0ce08030..9de3686e738c 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -107,6 +107,8 @@ int nand_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len);
 #define NAND_STATUS_READY	0x40
 #define NAND_STATUS_WP		0x80
 
+#define NAND_DATA_IFACE_CHECK_ONLY	-1
+
 /*
  * Constants for ECC_MODES
  */
@@ -818,7 +820,10 @@ struct nand_manufacturer_ops {
  * @read_retries:	[INTERN] the number of read retry modes supported
  * @onfi_set_features:	[REPLACEABLE] set the features for ONFI nand
  * @onfi_get_features:	[REPLACEABLE] get the features for ONFI nand
- * @setup_data_interface: [OPTIONAL] setup the data interface and timing
+ * @setup_data_interface: [OPTIONAL] setup the data interface and timing. If
+ *			  chipnr is set to %NAND_DATA_IFACE_CHECK_ONLY this
+ *			  means the configuration should not be applied but
+ *			  only checked.
  * @bbt:		[INTERN] bad block table pointer
  * @bbt_td:		[REPLACEABLE] bad block table descriptor for flash
  *			lookup.
@@ -862,9 +867,8 @@ struct nand_chip {
 	int (*onfi_get_features)(struct mtd_info *mtd, struct nand_chip *chip,
 			int feature_addr, uint8_t *subfeature_para);
 	int (*setup_read_retry)(struct mtd_info *mtd, int retry_mode);
-	int (*setup_data_interface)(struct mtd_info *mtd,
-				    const struct nand_data_interface *conf,
-				    bool check_only);
+	int (*setup_data_interface)(struct mtd_info *mtd, int chipnr,
+				    const struct nand_data_interface *conf);
 
 
 	int chip_delay;
-- 
cgit v1.2.3


From 7d135bcced20be2b50128432c5426a7278ec4f6d Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Sat, 6 May 2017 18:03:33 +0200
Subject: mtd: nand: Drop the ->errstat() hook

The ->errstat() hook is no longer implemented NAND controller drivers.
Get rid of it before someone starts abusing it.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/nand_base.c | 16 ----------------
 include/linux/mtd/nand.h     |  5 -----
 2 files changed, 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 561f70e05c88..df613125795b 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -2753,14 +2753,6 @@ static int nand_write_page(struct mtd_info *mtd, struct nand_chip *chip,
 	if (nand_standard_page_accessors(&chip->ecc))
 		chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1);
 	status = chip->waitfunc(mtd, chip);
-	/*
-	 * See if operation failed and additional status checks are
-	 * available.
-	 */
-	if ((status & NAND_STATUS_FAIL) && (chip->errstat))
-		status = chip->errstat(mtd, chip, FL_WRITING, status,
-				       page);
-
 	if (status & NAND_STATUS_FAIL)
 		return -EIO;
 
@@ -3220,14 +3212,6 @@ int nand_erase_nand(struct mtd_info *mtd, struct erase_info *instr,
 
 		status = chip->erase(mtd, page & chip->pagemask);
 
-		/*
-		 * See if operation failed and additional status checks are
-		 * available
-		 */
-		if ((status & NAND_STATUS_FAIL) && (chip->errstat))
-			status = chip->errstat(mtd, chip, FL_ERASING,
-					       status, page);
-
 		/* See if block erase succeeded */
 		if (status & NAND_STATUS_FAIL) {
 			pr_debug("%s: failed erase, page 0x%08x\n",
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 9de3686e738c..8b3607bde1b5 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -834,9 +834,6 @@ struct nand_manufacturer_ops {
  *			structure which is shared among multiple independent
  *			devices.
  * @priv:		[OPTIONAL] pointer to private chip data
- * @errstat:		[OPTIONAL] hardware specific function to perform
- *			additional error status checks (determine if errors are
- *			correctable).
  * @manufacturer:	[INTERN] Contains manufacturer information
  */
 
@@ -860,8 +857,6 @@ struct nand_chip {
 	int(*waitfunc)(struct mtd_info *mtd, struct nand_chip *this);
 	int (*erase)(struct mtd_info *mtd, int page);
 	int (*scan_bbt)(struct mtd_info *mtd);
-	int (*errstat)(struct mtd_info *mtd, struct nand_chip *this, int state,
-			int status, int page);
 	int (*onfi_set_features)(struct mtd_info *mtd, struct nand_chip *chip,
 			int feature_addr, uint8_t *subfeature_para);
 	int (*onfi_get_features)(struct mtd_info *mtd, struct nand_chip *chip,
-- 
cgit v1.2.3


From 858edde001e14f070d0fff347fb56c6c79e15312 Mon Sep 17 00:00:00 2001
From: Vivek Gautam <vivek.gautam@codeaurora.org>
Date: Thu, 11 May 2017 12:17:41 +0530
Subject: phy: Move ULPI phy header out of drivers to include path

Although ULPI phy is currently being used by tusb1210,
there can be other consumers too in future. So move this
to the includes path for phy.

Signed-off-by: Vivek Gautam <vivek.gautam@codeaurora.org>
Cc: Stephen Boyd <stephen.boyd@linaro.org>
Cc: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Cc: Kishon Vijay Abraham I <kishon@ti.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Cc: linux-omap@vger.kernel.org
Cc: linux-usb@vger.kernel.org
Acked-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
---
 drivers/phy/phy-tusb1210.c   |  3 +--
 drivers/phy/ulpi_phy.h       | 31 -------------------------------
 include/linux/phy/ulpi_phy.h | 31 +++++++++++++++++++++++++++++++
 3 files changed, 32 insertions(+), 33 deletions(-)
 delete mode 100644 drivers/phy/ulpi_phy.h
 create mode 100644 include/linux/phy/ulpi_phy.h

(limited to 'include/linux')

diff --git a/drivers/phy/phy-tusb1210.c b/drivers/phy/phy-tusb1210.c
index 4f6d5e71507d..bb3fb031c478 100644
--- a/drivers/phy/phy-tusb1210.c
+++ b/drivers/phy/phy-tusb1210.c
@@ -12,8 +12,7 @@
 #include <linux/module.h>
 #include <linux/ulpi/driver.h>
 #include <linux/gpio/consumer.h>
-
-#include "ulpi_phy.h"
+#include <linux/phy/ulpi_phy.h>
 
 #define TUSB1210_VENDOR_SPECIFIC2		0x80
 #define TUSB1210_VENDOR_SPECIFIC2_IHSTX_SHIFT	0
diff --git a/drivers/phy/ulpi_phy.h b/drivers/phy/ulpi_phy.h
deleted file mode 100644
index f2ebe490a4bc..000000000000
--- a/drivers/phy/ulpi_phy.h
+++ /dev/null
@@ -1,31 +0,0 @@
-#include <linux/phy/phy.h>
-
-/**
- * Helper that registers PHY for a ULPI device and adds a lookup for binding it
- * and it's controller, which is always the parent.
- */
-static inline struct phy
-*ulpi_phy_create(struct ulpi *ulpi, const struct phy_ops *ops)
-{
-	struct phy *phy;
-	int ret;
-
-	phy = phy_create(&ulpi->dev, NULL, ops);
-	if (IS_ERR(phy))
-		return phy;
-
-	ret = phy_create_lookup(phy, "usb2-phy", dev_name(ulpi->dev.parent));
-	if (ret) {
-		phy_destroy(phy);
-		return ERR_PTR(ret);
-	}
-
-	return phy;
-}
-
-/* Remove a PHY that was created with ulpi_phy_create() and it's lookup. */
-static inline void ulpi_phy_destroy(struct ulpi *ulpi, struct phy *phy)
-{
-	phy_remove_lookup(phy, "usb2-phy", dev_name(ulpi->dev.parent));
-	phy_destroy(phy);
-}
diff --git a/include/linux/phy/ulpi_phy.h b/include/linux/phy/ulpi_phy.h
new file mode 100644
index 000000000000..f2ebe490a4bc
--- /dev/null
+++ b/include/linux/phy/ulpi_phy.h
@@ -0,0 +1,31 @@
+#include <linux/phy/phy.h>
+
+/**
+ * Helper that registers PHY for a ULPI device and adds a lookup for binding it
+ * and it's controller, which is always the parent.
+ */
+static inline struct phy
+*ulpi_phy_create(struct ulpi *ulpi, const struct phy_ops *ops)
+{
+	struct phy *phy;
+	int ret;
+
+	phy = phy_create(&ulpi->dev, NULL, ops);
+	if (IS_ERR(phy))
+		return phy;
+
+	ret = phy_create_lookup(phy, "usb2-phy", dev_name(ulpi->dev.parent));
+	if (ret) {
+		phy_destroy(phy);
+		return ERR_PTR(ret);
+	}
+
+	return phy;
+}
+
+/* Remove a PHY that was created with ulpi_phy_create() and it's lookup. */
+static inline void ulpi_phy_destroy(struct ulpi *ulpi, struct phy *phy)
+{
+	phy_remove_lookup(phy, "usb2-phy", dev_name(ulpi->dev.parent));
+	phy_destroy(phy);
+}
-- 
cgit v1.2.3


From debd3a3b27c76c65a7d032b6f01710e6a6d555ab Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Wed, 24 May 2017 17:53:54 +0300
Subject: of: Make of_fwnode_handle() safer

On the expense of a little bit more complexity in the of_fwnode_handle()
macro, make the macro result in NULL in case its argument is NULL while
still referencing it only once.

Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Reviewed-by: Kieran Bingham <kieran.bingham+renesas@ideasonboard.com>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 include/linux/of.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/of.h b/include/linux/of.h
index 50fcdb54087f..f05c87f0c495 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -159,7 +159,13 @@ static inline struct device_node *to_of_node(struct fwnode_handle *fwnode)
 		container_of(fwnode, struct device_node, fwnode) : NULL;
 }
 
-#define of_fwnode_handle(node) (&(node)->fwnode)
+#define of_fwnode_handle(node)						\
+	({								\
+		typeof(node) __of_fwnode_handle_node = (node);		\
+									\
+		__of_fwnode_handle_node ?				\
+			&__of_fwnode_handle_node->fwnode : NULL;	\
+	})
 
 static inline bool of_have_populated_dt(void)
 {
-- 
cgit v1.2.3


From d20dc1493db438fbbfb7733adc82f472dd8a0789 Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Wed, 24 May 2017 17:53:55 +0300
Subject: of: Support const and non-const use for to_of_node()

Turn to_of_node() into a macro in order to support both const and
non-const use. Additionally make the fwnode argument to is_of_node() const
as well.

Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Reviewed-by: Kieran Bingham <kieran.bingham+renesas@ideasonboard.com>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 include/linux/of.h | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/of.h b/include/linux/of.h
index f05c87f0c495..29b7b738b509 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -148,16 +148,20 @@ extern raw_spinlock_t devtree_lock;
 #ifdef CONFIG_OF
 void of_core_init(void);
 
-static inline bool is_of_node(struct fwnode_handle *fwnode)
+static inline bool is_of_node(const struct fwnode_handle *fwnode)
 {
 	return !IS_ERR_OR_NULL(fwnode) && fwnode->type == FWNODE_OF;
 }
 
-static inline struct device_node *to_of_node(struct fwnode_handle *fwnode)
-{
-	return is_of_node(fwnode) ?
-		container_of(fwnode, struct device_node, fwnode) : NULL;
-}
+#define to_of_node(__fwnode)						\
+	({								\
+		typeof(__fwnode) __to_of_node_fwnode = (__fwnode);	\
+									\
+		is_of_node(__to_of_node_fwnode) ?			\
+			container_of(__to_of_node_fwnode,		\
+				     struct device_node, fwnode) :	\
+			NULL;						\
+	})
 
 #define of_fwnode_handle(node)						\
 	({								\
@@ -539,12 +543,12 @@ static inline void of_core_init(void)
 {
 }
 
-static inline bool is_of_node(struct fwnode_handle *fwnode)
+static inline bool is_of_node(const struct fwnode_handle *fwnode)
 {
 	return false;
 }
 
-static inline struct device_node *to_of_node(struct fwnode_handle *fwnode)
+static inline struct device_node *to_of_node(const struct fwnode_handle *fwnode)
 {
 	return NULL;
 }
-- 
cgit v1.2.3


From 726fdbe9fa7ebccda1579716f68f8bae6fa9c87a Mon Sep 17 00:00:00 2001
From: "Mintz, Yuval" <Yuval.Mintz@cavium.com>
Date: Thu, 1 Jun 2017 15:29:06 +0300
Subject: qed: Encapsulate interrupt counters in struct

We already have an API struct that contains interrupt-related
numbers. Use it to encapsulate all information relating to the
status of SBs as (used|free).

Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_dev.c   |  4 ++--
 drivers/net/ethernet/qlogic/qed/qed_int.c   | 20 +++++++++-----------
 drivers/net/ethernet/qlogic/qed/qed_int.h   | 10 +++++-----
 drivers/net/ethernet/qlogic/qed/qed_main.c  |  2 +-
 drivers/net/ethernet/qlogic/qed/qed_sriov.c |  9 ++++-----
 include/linux/qed/qed_if.h                  | 12 +++++++++---
 6 files changed, 30 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index 3b6114d4461a..1fff0473ddbb 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -2061,7 +2061,7 @@ static void qed_hw_set_feat(struct qed_hwfn *p_hwfn)
 		qed_int_get_num_sbs(p_hwfn, &sb_cnt_info);
 		feat_num[QED_VF_L2_QUE] = min_t(u32,
 						RESC_NUM(p_hwfn, QED_L2_QUEUE),
-						sb_cnt_info.sb_iov_cnt);
+						sb_cnt_info.iov_cnt);
 		feat_num[QED_PF_L2_QUE] = min_t(u32,
 						RESC_NUM(p_hwfn, QED_SB) -
 						non_l2_sbs,
@@ -2255,7 +2255,7 @@ int qed_hw_get_dflt_resc(struct qed_hwfn *p_hwfn,
 	case QED_SB:
 		memset(&sb_cnt_info, 0, sizeof(sb_cnt_info));
 		qed_int_get_num_sbs(p_hwfn, &sb_cnt_info);
-		*p_resc_num = sb_cnt_info.sb_cnt;
+		*p_resc_num = sb_cnt_info.cnt;
 		break;
 	default:
 		return -EINVAL;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c
index 3307978f6eeb..c9164e27a1b2 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_int.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_int.c
@@ -1769,7 +1769,7 @@ void qed_int_igu_init_pure_rt(struct qed_hwfn *p_hwfn,
 			      bool b_set, bool b_slowpath)
 {
 	u32 igu_base_sb = p_hwfn->hw_info.p_igu_info->igu_base_sb;
-	u32 igu_sb_cnt = p_hwfn->hw_info.p_igu_info->igu_sb_cnt;
+	u32 igu_sb_cnt = p_hwfn->hw_info.p_igu_info->usage.cnt;
 	u32 igu_sb_id = 0, val = 0;
 
 	val = qed_rd(p_hwfn, p_ptt, IGU_REG_BLOCK_CONFIGURATION);
@@ -1827,7 +1827,6 @@ int qed_int_igu_read_cam(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 
        /* Initialize base sb / sb cnt for PFs and VFs */
 	p_igu_info->igu_base_sb = 0xffff;
-	p_igu_info->igu_sb_cnt = 0;
 	p_igu_info->igu_base_sb_iov = 0xffff;
 
 	/* Distinguish between existent and non-existent default SB */
@@ -1856,7 +1855,7 @@ int qed_int_igu_read_cam(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 			if (p_igu_info->igu_dsb_id != QED_SB_INVALID_IDX) {
 				if (p_igu_info->igu_base_sb == 0xffff)
 					p_igu_info->igu_base_sb = igu_sb_id;
-				p_igu_info->igu_sb_cnt++;
+				p_igu_info->usage.cnt++;
 			}
 		} else if (!(p_block->is_pf) &&
 			   (p_block->function_id >= min_vf) &&
@@ -1867,7 +1866,7 @@ int qed_int_igu_read_cam(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 
 			if (p_igu_info->igu_base_sb_iov == 0xffff)
 				p_igu_info->igu_base_sb_iov = igu_sb_id;
-			p_igu_info->free_blks++;
+			p_igu_info->usage.iov_cnt++;
 		}
 
 		/* Mark the First entry belonging to the PF or its VFs
@@ -1900,12 +1899,13 @@ int qed_int_igu_read_cam(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 	}
 
 	/* All non default SB are considered free at this point */
-	p_igu_info->igu_sb_cnt_iov = p_igu_info->free_blks;
+	p_igu_info->usage.free_cnt = p_igu_info->usage.cnt;
+	p_igu_info->usage.free_cnt_iov = p_igu_info->usage.iov_cnt;
 
 	DP_VERBOSE(p_hwfn, NETIF_MSG_INTR,
 		   "igu_dsb_id=0x%x, num Free SBs - PF: %04x VF: %04x\n",
 		   p_igu_info->igu_dsb_id,
-		   p_igu_info->igu_sb_cnt, p_igu_info->igu_sb_cnt_iov);
+		   p_igu_info->usage.cnt, p_igu_info->usage.iov_cnt);
 
 	return 0;
 }
@@ -2003,9 +2003,7 @@ void qed_int_get_num_sbs(struct qed_hwfn	*p_hwfn,
 	if (!info || !p_sb_cnt_info)
 		return;
 
-	p_sb_cnt_info->sb_cnt		= info->igu_sb_cnt;
-	p_sb_cnt_info->sb_iov_cnt	= info->igu_sb_cnt_iov;
-	p_sb_cnt_info->sb_free_blk	= info->free_blks;
+	memcpy(p_sb_cnt_info, &info->usage, sizeof(*p_sb_cnt_info));
 }
 
 u16 qed_int_queue_id_from_sb_id(struct qed_hwfn *p_hwfn, u16 sb_id)
@@ -2014,10 +2012,10 @@ u16 qed_int_queue_id_from_sb_id(struct qed_hwfn *p_hwfn, u16 sb_id)
 
 	/* Determine origin of SB id */
 	if ((sb_id >= p_info->igu_base_sb) &&
-	    (sb_id < p_info->igu_base_sb + p_info->igu_sb_cnt)) {
+	    (sb_id < p_info->igu_base_sb + p_info->usage.cnt)) {
 		return sb_id - p_info->igu_base_sb;
 	} else if ((sb_id >= p_info->igu_base_sb_iov) &&
-		   (sb_id < p_info->igu_base_sb_iov + p_info->igu_sb_cnt_iov)) {
+		   (sb_id < p_info->igu_base_sb_iov + p_info->usage.iov_cnt)) {
 		/* We want the first VF queue to be adjacent to the
 		 * last PF queue. Since L2 queues can be partial to
 		 * SBs, we'll use the feature instead.
diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.h b/drivers/net/ethernet/qlogic/qed/qed_int.h
index 60aaf9f9bb78..5a0e8f02c969 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_int.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_int.h
@@ -216,11 +216,11 @@ struct qed_igu_block {
 struct qed_igu_info {
 	struct qed_igu_block entry[MAX_TOT_SB_PER_PATH];
 	u16 igu_dsb_id;
-	u16			igu_base_sb;
-	u16			igu_base_sb_iov;
-	u16			igu_sb_cnt;
-	u16			igu_sb_cnt_iov;
-	u16			free_blks;
+
+	u16 igu_base_sb;
+	u16 igu_base_sb_iov;
+	struct qed_sb_cnt_info usage;
+
 };
 
 /* TODO Names of function may change... */
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index c5bb80b9afc1..ac3bdcd9f0b6 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -762,7 +762,7 @@ static int qed_slowpath_setup_int(struct qed_dev *cdev,
 	for_each_hwfn(cdev, i) {
 		memset(&sb_cnt_info, 0, sizeof(sb_cnt_info));
 		qed_int_get_num_sbs(&cdev->hwfns[i], &sb_cnt_info);
-		cdev->int_params.in.num_vectors += sb_cnt_info.sb_cnt;
+		cdev->int_params.in.num_vectors += sb_cnt_info.cnt;
 		cdev->int_params.in.num_vectors++; /* slowpath */
 	}
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
index 0827a4187dc7..62b207a80a03 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
@@ -874,9 +874,9 @@ static u8 qed_iov_alloc_vf_igu_sbs(struct qed_hwfn *p_hwfn,
 
 	igu_blocks = p_hwfn->hw_info.p_igu_info->entry;
 
-	if (num_rx_queues > p_hwfn->hw_info.p_igu_info->free_blks)
-		num_rx_queues = p_hwfn->hw_info.p_igu_info->free_blks;
-	p_hwfn->hw_info.p_igu_info->free_blks -= num_rx_queues;
+	if (num_rx_queues > p_hwfn->hw_info.p_igu_info->usage.free_cnt_iov)
+		num_rx_queues = p_hwfn->hw_info.p_igu_info->usage.free_cnt_iov;
+	p_hwfn->hw_info.p_igu_info->usage.free_cnt_iov -= num_rx_queues;
 
 	SET_FIELD(val, IGU_MAPPING_LINE_FUNCTION_NUMBER, vf->abs_vf_id);
 	SET_FIELD(val, IGU_MAPPING_LINE_VALID, 1);
@@ -932,8 +932,7 @@ static void qed_iov_free_vf_igu_sbs(struct qed_hwfn *p_hwfn,
 		qed_wr(p_hwfn, p_ptt, addr, val);
 
 		p_info->entry[igu_id].status |= QED_IGU_STATUS_FREE;
-
-		p_hwfn->hw_info.p_igu_info->free_blks++;
+		p_hwfn->hw_info.p_igu_info->usage.free_cnt_iov++;
 	}
 
 	vf->num_sbs = 0;
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index 73c46d6d5727..607e1c5e185a 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -886,9 +886,15 @@ struct qed_eth_stats {
 #define TX_PI(tc)       (RX_PI + 1 + tc)
 
 struct qed_sb_cnt_info {
-	int	sb_cnt;
-	int	sb_iov_cnt;
-	int	sb_free_blk;
+	/* Original, current, and free SBs for PF */
+	int orig;
+	int cnt;
+	int free_cnt;
+
+	/* Original, current and free SBS for child VFs */
+	int iov_orig;
+	int iov_cnt;
+	int free_cnt_iov;
 };
 
 static inline u16 qed_sb_update_sb_idx(struct qed_sb_info *sb_info)
-- 
cgit v1.2.3


From ec33d71de7309c50531c2ae0eb178244899e6e46 Mon Sep 17 00:00:00 2001
From: LABBE Corentin <clabbe.montjoie@gmail.com>
Date: Wed, 31 May 2017 09:18:33 +0200
Subject: net-next: stmmac: add optional setup function

Instead of adding more ifthen logic for adding a new mac_device_info
setup function, it is easier to add a function pointer to the function
needed.

Signed-off-by: Corentin Labbe <clabbe.montjoie@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 4 +++-
 include/linux/stmmac.h                            | 1 +
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index f158273eab9b..c80c9c3b67db 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -3933,7 +3933,9 @@ static int stmmac_hw_init(struct stmmac_priv *priv)
 	struct mac_device_info *mac;
 
 	/* Identify the MAC HW device */
-	if (priv->plat->has_gmac) {
+	if (priv->plat->setup) {
+		mac = priv->plat->setup(priv);
+	} else if (priv->plat->has_gmac) {
 		priv->dev->priv_flags |= IFF_UNICAST_FLT;
 		mac = dwmac1000_setup(priv->ioaddr,
 				      priv->plat->multicast_filter_bins,
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 3921cb9dfadb..8bb550bca96d 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -177,6 +177,7 @@ struct plat_stmmacenet_data {
 	void (*fix_mac_speed)(void *priv, unsigned int speed);
 	int (*init)(struct platform_device *pdev, void *priv);
 	void (*exit)(struct platform_device *pdev, void *priv);
+	struct mac_device_info *(*setup)(void *priv);
 	void *bsp_priv;
 	struct clk *stmmac_clk;
 	struct clk *pclk;
-- 
cgit v1.2.3


From 9f93ac8d4085f718d3c7c5fedcb98dbdd2287648 Mon Sep 17 00:00:00 2001
From: LABBE Corentin <clabbe.montjoie@gmail.com>
Date: Wed, 31 May 2017 09:18:36 +0200
Subject: net-next: stmmac: Add dwmac-sun8i

The dwmac-sun8i is a heavy hacked version of stmmac hardware by
allwinner.
In fact the only common part is the descriptor management and the first
register function.

Signed-off-by: Corentin Labbe <clabbe.montjoie@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/Kconfig        |  11 +
 drivers/net/ethernet/stmicro/stmmac/Makefile       |   1 +
 drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c  | 990 +++++++++++++++++++++
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c  |  15 +
 .../net/ethernet/stmicro/stmmac/stmmac_platform.c  |   9 +-
 include/linux/stmmac.h                             |   1 +
 6 files changed, 1025 insertions(+), 2 deletions(-)
 create mode 100644 drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig
index cfbe3634dfa1..85c0e41f8021 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Kconfig
+++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig
@@ -145,6 +145,17 @@ config DWMAC_SUNXI
 	  This selects Allwinner SoC glue layer support for the
 	  stmmac device driver. This driver is used for A20/A31
 	  GMAC ethernet controller.
+
+config DWMAC_SUN8I
+	tristate "Allwinner sun8i GMAC support"
+	default ARCH_SUNXI
+	depends on OF && (ARCH_SUNXI || COMPILE_TEST)
+	---help---
+	  Support for Allwinner H3 A83T A64 EMAC ethernet controllers.
+
+	  This selects Allwinner SoC glue layer support for the
+	  stmmac device driver. This driver is used for H3/A83T/A64
+	  EMAC ethernet controller.
 endif
 
 config STMMAC_PCI
diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile
index 700c60336674..fd4937a7fcab 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Makefile
+++ b/drivers/net/ethernet/stmicro/stmmac/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_DWMAC_SOCFPGA)	+= dwmac-altr-socfpga.o
 obj-$(CONFIG_DWMAC_STI)		+= dwmac-sti.o
 obj-$(CONFIG_DWMAC_STM32)	+= dwmac-stm32.o
 obj-$(CONFIG_DWMAC_SUNXI)	+= dwmac-sunxi.o
+obj-$(CONFIG_DWMAC_SUN8I)	+= dwmac-sun8i.o
 obj-$(CONFIG_DWMAC_DWC_QOS_ETH)	+= dwmac-dwc-qos-eth.o
 obj-$(CONFIG_DWMAC_GENERIC)	+= dwmac-generic.o
 stmmac-platform-objs:= stmmac_platform.o
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
new file mode 100644
index 000000000000..1a6bfe6c958f
--- /dev/null
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
@@ -0,0 +1,990 @@
+/*
+ * dwmac-sun8i.c - Allwinner sun8i DWMAC specific glue layer
+ *
+ * Copyright (C) 2017 Corentin Labbe <clabbe.montjoie@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/of_mdio.h>
+#include <linux/of_net.h>
+#include <linux/phy.h>
+#include <linux/platform_device.h>
+#include <linux/regulator/consumer.h>
+#include <linux/regmap.h>
+#include <linux/stmmac.h>
+
+#include "stmmac.h"
+#include "stmmac_platform.h"
+
+/* General notes on dwmac-sun8i:
+ * Locking: no locking is necessary in this file because all necessary locking
+ *		is done in the "stmmac files"
+ */
+
+/* struct emac_variant - Descrive dwmac-sun8i hardware variant
+ * @default_syscon_value:	The default value of the EMAC register in syscon
+ *				This value is used for disabling properly EMAC
+ *				and used as a good starting value in case of the
+ *				boot process(uboot) leave some stuff.
+ * @internal_phy:		Does the MAC embed an internal PHY
+ * @support_mii:		Does the MAC handle MII
+ * @support_rmii:		Does the MAC handle RMII
+ * @support_rgmii:		Does the MAC handle RGMII
+ */
+struct emac_variant {
+	u32 default_syscon_value;
+	int internal_phy;
+	bool support_mii;
+	bool support_rmii;
+	bool support_rgmii;
+};
+
+/* struct sunxi_priv_data - hold all sunxi private data
+ * @tx_clk:	reference to MAC TX clock
+ * @ephy_clk:	reference to the optional EPHY clock for the internal PHY
+ * @regulator:	reference to the optional regulator
+ * @rst_ephy:	reference to the optional EPHY reset for the internal PHY
+ * @variant:	reference to the current board variant
+ * @regmap:	regmap for using the syscon
+ * @use_internal_phy: Does the current PHY choice imply using the internal PHY
+ */
+struct sunxi_priv_data {
+	struct clk *tx_clk;
+	struct clk *ephy_clk;
+	struct regulator *regulator;
+	struct reset_control *rst_ephy;
+	const struct emac_variant *variant;
+	struct regmap *regmap;
+	bool use_internal_phy;
+};
+
+static const struct emac_variant emac_variant_h3 = {
+	.default_syscon_value = 0x58000,
+	.internal_phy = PHY_INTERFACE_MODE_MII,
+	.support_mii = true,
+	.support_rmii = true,
+	.support_rgmii = true
+};
+
+static const struct emac_variant emac_variant_a83t = {
+	.default_syscon_value = 0,
+	.internal_phy = 0,
+	.support_mii = true,
+	.support_rgmii = true
+};
+
+static const struct emac_variant emac_variant_a64 = {
+	.default_syscon_value = 0,
+	.internal_phy = 0,
+	.support_mii = true,
+	.support_rmii = true,
+	.support_rgmii = true
+};
+
+#define EMAC_BASIC_CTL0 0x00
+#define EMAC_BASIC_CTL1 0x04
+#define EMAC_INT_STA    0x08
+#define EMAC_INT_EN     0x0C
+#define EMAC_TX_CTL0    0x10
+#define EMAC_TX_CTL1    0x14
+#define EMAC_TX_FLOW_CTL        0x1C
+#define EMAC_TX_DESC_LIST 0x20
+#define EMAC_RX_CTL0    0x24
+#define EMAC_RX_CTL1    0x28
+#define EMAC_RX_DESC_LIST 0x34
+#define EMAC_RX_FRM_FLT 0x38
+#define EMAC_MDIO_CMD   0x48
+#define EMAC_MDIO_DATA  0x4C
+#define EMAC_MACADDR_HI(reg) (0x50 + (reg) * 8)
+#define EMAC_MACADDR_LO(reg) (0x54 + (reg) * 8)
+#define EMAC_TX_DMA_STA 0xB0
+#define EMAC_TX_CUR_DESC        0xB4
+#define EMAC_TX_CUR_BUF 0xB8
+#define EMAC_RX_DMA_STA 0xC0
+#define EMAC_RX_CUR_DESC        0xC4
+#define EMAC_RX_CUR_BUF 0xC8
+
+/* Use in EMAC_BASIC_CTL0 */
+#define EMAC_DUPLEX_FULL	BIT(0)
+#define EMAC_LOOPBACK		BIT(1)
+#define EMAC_SPEED_1000 0
+#define EMAC_SPEED_100 (0x03 << 2)
+#define EMAC_SPEED_10 (0x02 << 2)
+
+/* Use in EMAC_BASIC_CTL1 */
+#define EMAC_BURSTLEN_SHIFT		24
+
+/* Used in EMAC_RX_FRM_FLT */
+#define EMAC_FRM_FLT_RXALL              BIT(0)
+#define EMAC_FRM_FLT_CTL                BIT(13)
+#define EMAC_FRM_FLT_MULTICAST          BIT(16)
+
+/* Used in RX_CTL1*/
+#define EMAC_RX_MD              BIT(1)
+#define EMAC_RX_TH_MASK		GENMASK(4, 5)
+#define EMAC_RX_TH_32		0
+#define EMAC_RX_TH_64		(0x1 << 4)
+#define EMAC_RX_TH_96		(0x2 << 4)
+#define EMAC_RX_TH_128		(0x3 << 4)
+#define EMAC_RX_DMA_EN  BIT(30)
+#define EMAC_RX_DMA_START       BIT(31)
+
+/* Used in TX_CTL1*/
+#define EMAC_TX_MD              BIT(1)
+#define EMAC_TX_NEXT_FRM        BIT(2)
+#define EMAC_TX_TH_MASK		GENMASK(8, 10)
+#define EMAC_TX_TH_64		0
+#define EMAC_TX_TH_128		(0x1 << 8)
+#define EMAC_TX_TH_192		(0x2 << 8)
+#define EMAC_TX_TH_256		(0x3 << 8)
+#define EMAC_TX_DMA_EN  BIT(30)
+#define EMAC_TX_DMA_START       BIT(31)
+
+/* Used in RX_CTL0 */
+#define EMAC_RX_RECEIVER_EN             BIT(31)
+#define EMAC_RX_DO_CRC BIT(27)
+#define EMAC_RX_FLOW_CTL_EN             BIT(16)
+
+/* Used in TX_CTL0 */
+#define EMAC_TX_TRANSMITTER_EN  BIT(31)
+
+/* Used in EMAC_TX_FLOW_CTL */
+#define EMAC_TX_FLOW_CTL_EN             BIT(0)
+
+/* Used in EMAC_INT_STA */
+#define EMAC_TX_INT             BIT(0)
+#define EMAC_TX_DMA_STOP_INT    BIT(1)
+#define EMAC_TX_BUF_UA_INT      BIT(2)
+#define EMAC_TX_TIMEOUT_INT     BIT(3)
+#define EMAC_TX_UNDERFLOW_INT   BIT(4)
+#define EMAC_TX_EARLY_INT       BIT(5)
+#define EMAC_RX_INT             BIT(8)
+#define EMAC_RX_BUF_UA_INT      BIT(9)
+#define EMAC_RX_DMA_STOP_INT    BIT(10)
+#define EMAC_RX_TIMEOUT_INT     BIT(11)
+#define EMAC_RX_OVERFLOW_INT    BIT(12)
+#define EMAC_RX_EARLY_INT       BIT(13)
+#define EMAC_RGMII_STA_INT      BIT(16)
+
+#define MAC_ADDR_TYPE_DST BIT(31)
+
+/* H3 specific bits for EPHY */
+#define H3_EPHY_ADDR_SHIFT	20
+#define H3_EPHY_LED_POL		BIT(17) /* 1: active low, 0: active high */
+#define H3_EPHY_SHUTDOWN	BIT(16) /* 1: shutdown, 0: power up */
+#define H3_EPHY_SELECT		BIT(15) /* 1: internal PHY, 0: external PHY */
+
+/* H3/A64 specific bits */
+#define SYSCON_RMII_EN		BIT(13) /* 1: enable RMII (overrides EPIT) */
+
+/* Generic system control EMAC_CLK bits */
+#define SYSCON_ETXDC_MASK		GENMASK(2, 0)
+#define SYSCON_ETXDC_SHIFT		10
+#define SYSCON_ERXDC_MASK		GENMASK(4, 0)
+#define SYSCON_ERXDC_SHIFT		5
+/* EMAC PHY Interface Type */
+#define SYSCON_EPIT			BIT(2) /* 1: RGMII, 0: MII */
+#define SYSCON_ETCS_MASK		GENMASK(1, 0)
+#define SYSCON_ETCS_MII		0x0
+#define SYSCON_ETCS_EXT_GMII	0x1
+#define SYSCON_ETCS_INT_GMII	0x2
+#define SYSCON_EMAC_REG		0x30
+
+/* sun8i_dwmac_dma_reset() - reset the EMAC
+ * Called from stmmac via stmmac_dma_ops->reset
+ */
+static int sun8i_dwmac_dma_reset(void __iomem *ioaddr)
+{
+	writel(0, ioaddr + EMAC_RX_CTL1);
+	writel(0, ioaddr + EMAC_TX_CTL1);
+	writel(0, ioaddr + EMAC_RX_FRM_FLT);
+	writel(0, ioaddr + EMAC_RX_DESC_LIST);
+	writel(0, ioaddr + EMAC_TX_DESC_LIST);
+	writel(0, ioaddr + EMAC_INT_EN);
+	writel(0x1FFFFFF, ioaddr + EMAC_INT_STA);
+	return 0;
+}
+
+/* sun8i_dwmac_dma_init() - initialize the EMAC
+ * Called from stmmac via stmmac_dma_ops->init
+ */
+static void sun8i_dwmac_dma_init(void __iomem *ioaddr,
+				 struct stmmac_dma_cfg *dma_cfg,
+				 u32 dma_tx, u32 dma_rx, int atds)
+{
+	/* Write TX and RX descriptors address */
+	writel(dma_rx, ioaddr + EMAC_RX_DESC_LIST);
+	writel(dma_tx, ioaddr + EMAC_TX_DESC_LIST);
+
+	writel(EMAC_RX_INT | EMAC_TX_INT, ioaddr + EMAC_INT_EN);
+	writel(0x1FFFFFF, ioaddr + EMAC_INT_STA);
+}
+
+/* sun8i_dwmac_dump_regs() - Dump EMAC address space
+ * Called from stmmac_dma_ops->dump_regs
+ * Used for ethtool
+ */
+static void sun8i_dwmac_dump_regs(void __iomem *ioaddr, u32 *reg_space)
+{
+	int i;
+
+	for (i = 0; i < 0xC8; i += 4) {
+		if (i == 0x32 || i == 0x3C)
+			continue;
+		reg_space[i / 4] = readl(ioaddr + i);
+	}
+}
+
+/* sun8i_dwmac_dump_mac_regs() - Dump EMAC address space
+ * Called from stmmac_ops->dump_regs
+ * Used for ethtool
+ */
+static void sun8i_dwmac_dump_mac_regs(struct mac_device_info *hw,
+				      u32 *reg_space)
+{
+	int i;
+	void __iomem *ioaddr = hw->pcsr;
+
+	for (i = 0; i < 0xC8; i += 4) {
+		if (i == 0x32 || i == 0x3C)
+			continue;
+		reg_space[i / 4] = readl(ioaddr + i);
+	}
+}
+
+static void sun8i_dwmac_enable_dma_irq(void __iomem *ioaddr, u32 chan)
+{
+	writel(EMAC_RX_INT | EMAC_TX_INT, ioaddr + EMAC_INT_EN);
+}
+
+static void sun8i_dwmac_disable_dma_irq(void __iomem *ioaddr, u32 chan)
+{
+	writel(0, ioaddr + EMAC_INT_EN);
+}
+
+static void sun8i_dwmac_dma_start_tx(void __iomem *ioaddr, u32 chan)
+{
+	u32 v;
+
+	v = readl(ioaddr + EMAC_TX_CTL1);
+	v |= EMAC_TX_DMA_START;
+	v |= EMAC_TX_DMA_EN;
+	writel(v, ioaddr + EMAC_TX_CTL1);
+}
+
+static void sun8i_dwmac_enable_dma_transmission(void __iomem *ioaddr)
+{
+	u32 v;
+
+	v = readl(ioaddr + EMAC_TX_CTL1);
+	v |= EMAC_TX_DMA_START;
+	v |= EMAC_TX_DMA_EN;
+	writel(v, ioaddr + EMAC_TX_CTL1);
+}
+
+static void sun8i_dwmac_dma_stop_tx(void __iomem *ioaddr, u32 chan)
+{
+	u32 v;
+
+	v = readl(ioaddr + EMAC_TX_CTL1);
+	v &= ~EMAC_TX_DMA_EN;
+	writel(v, ioaddr + EMAC_TX_CTL1);
+}
+
+static void sun8i_dwmac_dma_start_rx(void __iomem *ioaddr, u32 chan)
+{
+	u32 v;
+
+	v = readl(ioaddr + EMAC_RX_CTL1);
+	v |= EMAC_RX_DMA_START;
+	v |= EMAC_RX_DMA_EN;
+	writel(v, ioaddr + EMAC_RX_CTL1);
+}
+
+static void sun8i_dwmac_dma_stop_rx(void __iomem *ioaddr, u32 chan)
+{
+	u32 v;
+
+	v = readl(ioaddr + EMAC_RX_CTL1);
+	v &= ~EMAC_RX_DMA_EN;
+	writel(v, ioaddr + EMAC_RX_CTL1);
+}
+
+static int sun8i_dwmac_dma_interrupt(void __iomem *ioaddr,
+				     struct stmmac_extra_stats *x, u32 chan)
+{
+	u32 v;
+	int ret = 0;
+
+	v = readl(ioaddr + EMAC_INT_STA);
+
+	if (v & EMAC_TX_INT) {
+		ret |= handle_tx;
+		x->tx_normal_irq_n++;
+	}
+
+	if (v & EMAC_TX_DMA_STOP_INT)
+		x->tx_process_stopped_irq++;
+
+	if (v & EMAC_TX_BUF_UA_INT)
+		x->tx_process_stopped_irq++;
+
+	if (v & EMAC_TX_TIMEOUT_INT)
+		ret |= tx_hard_error;
+
+	if (v & EMAC_TX_UNDERFLOW_INT) {
+		ret |= tx_hard_error;
+		x->tx_undeflow_irq++;
+	}
+
+	if (v & EMAC_TX_EARLY_INT)
+		x->tx_early_irq++;
+
+	if (v & EMAC_RX_INT) {
+		ret |= handle_rx;
+		x->rx_normal_irq_n++;
+	}
+
+	if (v & EMAC_RX_BUF_UA_INT)
+		x->rx_buf_unav_irq++;
+
+	if (v & EMAC_RX_DMA_STOP_INT)
+		x->rx_process_stopped_irq++;
+
+	if (v & EMAC_RX_TIMEOUT_INT)
+		ret |= tx_hard_error;
+
+	if (v & EMAC_RX_OVERFLOW_INT) {
+		ret |= tx_hard_error;
+		x->rx_overflow_irq++;
+	}
+
+	if (v & EMAC_RX_EARLY_INT)
+		x->rx_early_irq++;
+
+	if (v & EMAC_RGMII_STA_INT)
+		x->irq_rgmii_n++;
+
+	writel(v, ioaddr + EMAC_INT_STA);
+
+	return ret;
+}
+
+static void sun8i_dwmac_dma_operation_mode(void __iomem *ioaddr, int txmode,
+					   int rxmode, int rxfifosz)
+{
+	u32 v;
+
+	v = readl(ioaddr + EMAC_TX_CTL1);
+	if (txmode == SF_DMA_MODE) {
+		v |= EMAC_TX_MD;
+		/* Undocumented bit (called TX_NEXT_FRM in BSP), the original
+		 * comment is
+		 * "Operating on second frame increase the performance
+		 * especially when transmit store-and-forward is used."
+		 */
+		v |= EMAC_TX_NEXT_FRM;
+	} else {
+		v &= ~EMAC_TX_MD;
+		v &= ~EMAC_TX_TH_MASK;
+		if (txmode < 64)
+			v |= EMAC_TX_TH_64;
+		else if (txmode < 128)
+			v |= EMAC_TX_TH_128;
+		else if (txmode < 192)
+			v |= EMAC_TX_TH_192;
+		else if (txmode < 256)
+			v |= EMAC_TX_TH_256;
+	}
+	writel(v, ioaddr + EMAC_TX_CTL1);
+
+	v = readl(ioaddr + EMAC_RX_CTL1);
+	if (rxmode == SF_DMA_MODE) {
+		v |= EMAC_RX_MD;
+	} else {
+		v &= ~EMAC_RX_MD;
+		v &= ~EMAC_RX_TH_MASK;
+		if (rxmode < 32)
+			v |= EMAC_RX_TH_32;
+		else if (rxmode < 64)
+			v |= EMAC_RX_TH_64;
+		else if (rxmode < 96)
+			v |= EMAC_RX_TH_96;
+		else if (rxmode < 128)
+			v |= EMAC_RX_TH_128;
+	}
+	writel(v, ioaddr + EMAC_RX_CTL1);
+}
+
+static const struct stmmac_dma_ops sun8i_dwmac_dma_ops = {
+	.reset = sun8i_dwmac_dma_reset,
+	.init = sun8i_dwmac_dma_init,
+	.dump_regs = sun8i_dwmac_dump_regs,
+	.dma_mode = sun8i_dwmac_dma_operation_mode,
+	.enable_dma_transmission = sun8i_dwmac_enable_dma_transmission,
+	.enable_dma_irq = sun8i_dwmac_enable_dma_irq,
+	.disable_dma_irq = sun8i_dwmac_disable_dma_irq,
+	.start_tx = sun8i_dwmac_dma_start_tx,
+	.stop_tx = sun8i_dwmac_dma_stop_tx,
+	.start_rx = sun8i_dwmac_dma_start_rx,
+	.stop_rx = sun8i_dwmac_dma_stop_rx,
+	.dma_interrupt = sun8i_dwmac_dma_interrupt,
+};
+
+static int sun8i_dwmac_init(struct platform_device *pdev, void *priv)
+{
+	struct sunxi_priv_data *gmac = priv;
+	int ret;
+
+	if (gmac->regulator) {
+		ret = regulator_enable(gmac->regulator);
+		if (ret) {
+			dev_err(&pdev->dev, "Fail to enable regulator\n");
+			return ret;
+		}
+	}
+
+	ret = clk_prepare_enable(gmac->tx_clk);
+	if (ret) {
+		if (gmac->regulator)
+			regulator_disable(gmac->regulator);
+		dev_err(&pdev->dev, "Could not enable AHB clock\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static void sun8i_dwmac_core_init(struct mac_device_info *hw, int mtu)
+{
+	void __iomem *ioaddr = hw->pcsr;
+	u32 v;
+
+	v = (8 << EMAC_BURSTLEN_SHIFT); /* burst len */
+	writel(v, ioaddr + EMAC_BASIC_CTL1);
+}
+
+static void sun8i_dwmac_set_mac(void __iomem *ioaddr, bool enable)
+{
+	u32 t, r;
+
+	t = readl(ioaddr + EMAC_TX_CTL0);
+	r = readl(ioaddr + EMAC_RX_CTL0);
+	if (enable) {
+		t |= EMAC_TX_TRANSMITTER_EN;
+		r |= EMAC_RX_RECEIVER_EN;
+	} else {
+		t &= ~EMAC_TX_TRANSMITTER_EN;
+		r &= ~EMAC_RX_RECEIVER_EN;
+	}
+	writel(t, ioaddr + EMAC_TX_CTL0);
+	writel(r, ioaddr + EMAC_RX_CTL0);
+}
+
+/* Set MAC address at slot reg_n
+ * All slot > 0 need to be enabled with MAC_ADDR_TYPE_DST
+ * If addr is NULL, clear the slot
+ */
+static void sun8i_dwmac_set_umac_addr(struct mac_device_info *hw,
+				      unsigned char *addr,
+				      unsigned int reg_n)
+{
+	void __iomem *ioaddr = hw->pcsr;
+	u32 v;
+
+	if (!addr) {
+		writel(0, ioaddr + EMAC_MACADDR_HI(reg_n));
+		return;
+	}
+
+	stmmac_set_mac_addr(ioaddr, addr, EMAC_MACADDR_HI(reg_n),
+			    EMAC_MACADDR_LO(reg_n));
+	if (reg_n > 0) {
+		v = readl(ioaddr + EMAC_MACADDR_HI(reg_n));
+		v |= MAC_ADDR_TYPE_DST;
+		writel(v, ioaddr + EMAC_MACADDR_HI(reg_n));
+	}
+}
+
+static void sun8i_dwmac_get_umac_addr(struct mac_device_info *hw,
+				      unsigned char *addr,
+				      unsigned int reg_n)
+{
+	void __iomem *ioaddr = hw->pcsr;
+
+	stmmac_get_mac_addr(ioaddr, addr, EMAC_MACADDR_HI(reg_n),
+			    EMAC_MACADDR_LO(reg_n));
+}
+
+/* caution this function must return non 0 to work */
+static int sun8i_dwmac_rx_ipc_enable(struct mac_device_info *hw)
+{
+	void __iomem *ioaddr = hw->pcsr;
+	u32 v;
+
+	v = readl(ioaddr + EMAC_RX_CTL0);
+	v |= EMAC_RX_DO_CRC;
+	writel(v, ioaddr + EMAC_RX_CTL0);
+
+	return 1;
+}
+
+static void sun8i_dwmac_set_filter(struct mac_device_info *hw,
+				   struct net_device *dev)
+{
+	void __iomem *ioaddr = hw->pcsr;
+	u32 v;
+	int i = 1;
+	struct netdev_hw_addr *ha;
+	int macaddrs = netdev_uc_count(dev) + netdev_mc_count(dev) + 1;
+
+	v = EMAC_FRM_FLT_CTL;
+
+	if (dev->flags & IFF_PROMISC) {
+		v = EMAC_FRM_FLT_RXALL;
+	} else if (dev->flags & IFF_ALLMULTI) {
+		v |= EMAC_FRM_FLT_MULTICAST;
+	} else if (macaddrs <= hw->unicast_filter_entries) {
+		if (!netdev_mc_empty(dev)) {
+			netdev_for_each_mc_addr(ha, dev) {
+				sun8i_dwmac_set_umac_addr(hw, ha->addr, i);
+				i++;
+			}
+		}
+		if (!netdev_uc_empty(dev)) {
+			netdev_for_each_uc_addr(ha, dev) {
+				sun8i_dwmac_set_umac_addr(hw, ha->addr, i);
+				i++;
+			}
+		}
+	} else {
+		netdev_info(dev, "Too many address, switching to promiscuous\n");
+		v = EMAC_FRM_FLT_RXALL;
+	}
+
+	/* Disable unused address filter slots */
+	while (i < hw->unicast_filter_entries)
+		sun8i_dwmac_set_umac_addr(hw, NULL, i++);
+
+	writel(v, ioaddr + EMAC_RX_FRM_FLT);
+}
+
+static void sun8i_dwmac_flow_ctrl(struct mac_device_info *hw,
+				  unsigned int duplex, unsigned int fc,
+				  unsigned int pause_time, u32 tx_cnt)
+{
+	void __iomem *ioaddr = hw->pcsr;
+	u32 v;
+
+	v = readl(ioaddr + EMAC_RX_CTL0);
+	if (fc == FLOW_AUTO)
+		v |= EMAC_RX_FLOW_CTL_EN;
+	else
+		v &= ~EMAC_RX_FLOW_CTL_EN;
+	writel(v, ioaddr + EMAC_RX_CTL0);
+
+	v = readl(ioaddr + EMAC_TX_FLOW_CTL);
+	if (fc == FLOW_AUTO)
+		v |= EMAC_TX_FLOW_CTL_EN;
+	else
+		v &= ~EMAC_TX_FLOW_CTL_EN;
+	writel(v, ioaddr + EMAC_TX_FLOW_CTL);
+}
+
+static int sun8i_dwmac_reset(struct stmmac_priv *priv)
+{
+	u32 v;
+	int err;
+
+	v = readl(priv->ioaddr + EMAC_BASIC_CTL1);
+	writel(v | 0x01, priv->ioaddr + EMAC_BASIC_CTL1);
+
+	/* The timeout was previoulsy set to 10ms, but some board (OrangePI0)
+	 * need more if no cable plugged. 100ms seems OK
+	 */
+	err = readl_poll_timeout(priv->ioaddr + EMAC_BASIC_CTL1, v,
+				 !(v & 0x01), 100, 100000);
+
+	if (err) {
+		dev_err(priv->device, "EMAC reset timeout\n");
+		return -EFAULT;
+	}
+	return 0;
+}
+
+static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv)
+{
+	struct sunxi_priv_data *gmac = priv->plat->bsp_priv;
+	struct device_node *node = priv->device->of_node;
+	int ret;
+	u32 reg, val;
+
+	regmap_read(gmac->regmap, SYSCON_EMAC_REG, &val);
+	reg = gmac->variant->default_syscon_value;
+	if (reg != val)
+		dev_warn(priv->device,
+			 "Current syscon value is not the default %x (expect %x)\n",
+			 val, reg);
+
+	if (gmac->variant->internal_phy) {
+		if (!gmac->use_internal_phy) {
+			/* switch to external PHY interface */
+			reg &= ~H3_EPHY_SELECT;
+		} else {
+			reg |= H3_EPHY_SELECT;
+			reg &= ~H3_EPHY_SHUTDOWN;
+			dev_dbg(priv->device, "Select internal_phy %x\n", reg);
+
+			if (of_property_read_bool(priv->plat->phy_node,
+						  "allwinner,leds-active-low"))
+				reg |= H3_EPHY_LED_POL;
+			else
+				reg &= ~H3_EPHY_LED_POL;
+
+			ret = of_mdio_parse_addr(priv->device,
+						 priv->plat->phy_node);
+			if (ret < 0) {
+				dev_err(priv->device, "Could not parse MDIO addr\n");
+				return ret;
+			}
+			/* of_mdio_parse_addr returns a valid (0 ~ 31) PHY
+			 * address. No need to mask it again.
+			 */
+			reg |= ret << H3_EPHY_ADDR_SHIFT;
+		}
+	}
+
+	if (!of_property_read_u32(node, "allwinner,tx-delay-ps", &val)) {
+		if (val % 100) {
+			dev_err(priv->device, "tx-delay must be a multiple of 100\n");
+			return -EINVAL;
+		}
+		val /= 100;
+		dev_dbg(priv->device, "set tx-delay to %x\n", val);
+		if (val <= SYSCON_ETXDC_MASK) {
+			reg &= ~(SYSCON_ETXDC_MASK << SYSCON_ETXDC_SHIFT);
+			reg |= (val << SYSCON_ETXDC_SHIFT);
+		} else {
+			dev_err(priv->device, "Invalid TX clock delay: %d\n",
+				val);
+			return -EINVAL;
+		}
+	}
+
+	if (!of_property_read_u32(node, "allwinner,rx-delay-ps", &val)) {
+		if (val % 100) {
+			dev_err(priv->device, "rx-delay must be a multiple of 100\n");
+			return -EINVAL;
+		}
+		val /= 100;
+		dev_dbg(priv->device, "set rx-delay to %x\n", val);
+		if (val <= SYSCON_ERXDC_MASK) {
+			reg &= ~(SYSCON_ERXDC_MASK << SYSCON_ERXDC_SHIFT);
+			reg |= (val << SYSCON_ERXDC_SHIFT);
+		} else {
+			dev_err(priv->device, "Invalid RX clock delay: %d\n",
+				val);
+			return -EINVAL;
+		}
+	}
+
+	/* Clear interface mode bits */
+	reg &= ~(SYSCON_ETCS_MASK | SYSCON_EPIT);
+	if (gmac->variant->support_rmii)
+		reg &= ~SYSCON_RMII_EN;
+
+	switch (priv->plat->interface) {
+	case PHY_INTERFACE_MODE_MII:
+		/* default */
+		break;
+	case PHY_INTERFACE_MODE_RGMII:
+		reg |= SYSCON_EPIT | SYSCON_ETCS_INT_GMII;
+		break;
+	case PHY_INTERFACE_MODE_RMII:
+		reg |= SYSCON_RMII_EN | SYSCON_ETCS_EXT_GMII;
+		break;
+	default:
+		dev_err(priv->device, "Unsupported interface mode: %s",
+			phy_modes(priv->plat->interface));
+		return -EINVAL;
+	}
+
+	regmap_write(gmac->regmap, SYSCON_EMAC_REG, reg);
+
+	return 0;
+}
+
+static void sun8i_dwmac_unset_syscon(struct sunxi_priv_data *gmac)
+{
+	u32 reg = gmac->variant->default_syscon_value;
+
+	regmap_write(gmac->regmap, SYSCON_EMAC_REG, reg);
+}
+
+static int sun8i_dwmac_power_internal_phy(struct stmmac_priv *priv)
+{
+	struct sunxi_priv_data *gmac = priv->plat->bsp_priv;
+	int ret;
+
+	if (!gmac->use_internal_phy)
+		return 0;
+
+	ret = clk_prepare_enable(gmac->ephy_clk);
+	if (ret) {
+		dev_err(priv->device, "Cannot enable ephy\n");
+		return ret;
+	}
+
+	ret = reset_control_deassert(gmac->rst_ephy);
+	if (ret) {
+		dev_err(priv->device, "Cannot deassert ephy\n");
+		clk_disable_unprepare(gmac->ephy_clk);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int sun8i_dwmac_unpower_internal_phy(struct sunxi_priv_data *gmac)
+{
+	if (!gmac->use_internal_phy)
+		return 0;
+
+	clk_disable_unprepare(gmac->ephy_clk);
+	reset_control_assert(gmac->rst_ephy);
+	return 0;
+}
+
+/* sun8i_power_phy() - Activate the PHY:
+ * In case of error, no need to call sun8i_unpower_phy(),
+ * it will be called anyway by sun8i_dwmac_exit()
+ */
+static int sun8i_power_phy(struct stmmac_priv *priv)
+{
+	int ret;
+
+	ret = sun8i_dwmac_power_internal_phy(priv);
+	if (ret)
+		return ret;
+
+	ret = sun8i_dwmac_set_syscon(priv);
+	if (ret)
+		return ret;
+
+	/* After changing syscon value, the MAC need reset or it will use
+	 * the last value (and so the last PHY set.
+	 */
+	ret = sun8i_dwmac_reset(priv);
+	if (ret)
+		return ret;
+	return 0;
+}
+
+static void sun8i_unpower_phy(struct sunxi_priv_data *gmac)
+{
+	sun8i_dwmac_unset_syscon(gmac);
+	sun8i_dwmac_unpower_internal_phy(gmac);
+}
+
+static void sun8i_dwmac_exit(struct platform_device *pdev, void *priv)
+{
+	struct sunxi_priv_data *gmac = priv;
+
+	sun8i_unpower_phy(gmac);
+
+	clk_disable_unprepare(gmac->tx_clk);
+
+	if (gmac->regulator)
+		regulator_disable(gmac->regulator);
+}
+
+static const struct stmmac_ops sun8i_dwmac_ops = {
+	.core_init = sun8i_dwmac_core_init,
+	.set_mac = sun8i_dwmac_set_mac,
+	.dump_regs = sun8i_dwmac_dump_mac_regs,
+	.rx_ipc = sun8i_dwmac_rx_ipc_enable,
+	.set_filter = sun8i_dwmac_set_filter,
+	.flow_ctrl = sun8i_dwmac_flow_ctrl,
+	.set_umac_addr = sun8i_dwmac_set_umac_addr,
+	.get_umac_addr = sun8i_dwmac_get_umac_addr,
+};
+
+static struct mac_device_info *sun8i_dwmac_setup(void *ppriv)
+{
+	struct mac_device_info *mac;
+	struct stmmac_priv *priv = ppriv;
+	int ret;
+
+	mac = devm_kzalloc(priv->device, sizeof(*mac), GFP_KERNEL);
+	if (!mac)
+		return NULL;
+
+	ret = sun8i_power_phy(priv);
+	if (ret)
+		return NULL;
+
+	mac->pcsr = priv->ioaddr;
+	mac->mac = &sun8i_dwmac_ops;
+	mac->dma = &sun8i_dwmac_dma_ops;
+
+	/* The loopback bit seems to be re-set when link change
+	 * Simply mask it each time
+	 * Speed 10/100/1000 are set in BIT(2)/BIT(3)
+	 */
+	mac->link.speed_mask = GENMASK(3, 2) | EMAC_LOOPBACK;
+	mac->link.speed10 = EMAC_SPEED_10;
+	mac->link.speed100 = EMAC_SPEED_100;
+	mac->link.speed1000 = EMAC_SPEED_1000;
+	mac->link.duplex = EMAC_DUPLEX_FULL;
+	mac->mii.addr = EMAC_MDIO_CMD;
+	mac->mii.data = EMAC_MDIO_DATA;
+	mac->mii.reg_shift = 4;
+	mac->mii.reg_mask = GENMASK(8, 4);
+	mac->mii.addr_shift = 12;
+	mac->mii.addr_mask = GENMASK(16, 12);
+	mac->mii.clk_csr_shift = 20;
+	mac->mii.clk_csr_mask = GENMASK(22, 20);
+	mac->unicast_filter_entries = 8;
+
+	/* Synopsys Id is not available */
+	priv->synopsys_id = 0;
+
+	return mac;
+}
+
+static int sun8i_dwmac_probe(struct platform_device *pdev)
+{
+	struct plat_stmmacenet_data *plat_dat;
+	struct stmmac_resources stmmac_res;
+	struct sunxi_priv_data *gmac;
+	struct device *dev = &pdev->dev;
+	int ret;
+
+	ret = stmmac_get_platform_resources(pdev, &stmmac_res);
+	if (ret)
+		return ret;
+
+	plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac);
+	if (IS_ERR(plat_dat))
+		return PTR_ERR(plat_dat);
+
+	gmac = devm_kzalloc(dev, sizeof(*gmac), GFP_KERNEL);
+	if (!gmac)
+		return -ENOMEM;
+
+	gmac->variant = of_device_get_match_data(&pdev->dev);
+	if (!gmac->variant) {
+		dev_err(&pdev->dev, "Missing dwmac-sun8i variant\n");
+		return -EINVAL;
+	}
+
+	gmac->tx_clk = devm_clk_get(dev, "stmmaceth");
+	if (IS_ERR(gmac->tx_clk)) {
+		dev_err(dev, "Could not get TX clock\n");
+		return PTR_ERR(gmac->tx_clk);
+	}
+
+	/* Optional regulator for PHY */
+	gmac->regulator = devm_regulator_get_optional(dev, "phy");
+	if (IS_ERR(gmac->regulator)) {
+		if (PTR_ERR(gmac->regulator) == -EPROBE_DEFER)
+			return -EPROBE_DEFER;
+		dev_info(dev, "No regulator found\n");
+		gmac->regulator = NULL;
+	}
+
+	gmac->regmap = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
+						       "syscon");
+	if (IS_ERR(gmac->regmap)) {
+		ret = PTR_ERR(gmac->regmap);
+		dev_err(&pdev->dev, "Unable to map syscon: %d\n", ret);
+		return ret;
+	}
+
+	plat_dat->interface = of_get_phy_mode(dev->of_node);
+	if (plat_dat->interface == gmac->variant->internal_phy) {
+		dev_info(&pdev->dev, "Will use internal PHY\n");
+		gmac->use_internal_phy = true;
+		gmac->ephy_clk = of_clk_get(plat_dat->phy_node, 0);
+		if (IS_ERR(gmac->ephy_clk)) {
+			ret = PTR_ERR(gmac->ephy_clk);
+			dev_err(&pdev->dev, "Cannot get EPHY clock: %d\n", ret);
+			return -EINVAL;
+		}
+
+		gmac->rst_ephy = of_reset_control_get(plat_dat->phy_node, NULL);
+		if (IS_ERR(gmac->rst_ephy)) {
+			ret = PTR_ERR(gmac->rst_ephy);
+			if (ret == -EPROBE_DEFER)
+				return ret;
+			dev_err(&pdev->dev, "No EPHY reset control found %d\n",
+				ret);
+			return -EINVAL;
+		}
+	} else {
+		dev_info(&pdev->dev, "Will use external PHY\n");
+		gmac->use_internal_phy = false;
+	}
+
+	/* platform data specifying hardware features and callbacks.
+	 * hardware features were copied from Allwinner drivers.
+	 */
+	plat_dat->rx_coe = STMMAC_RX_COE_TYPE2;
+	plat_dat->tx_coe = 1;
+	plat_dat->has_sun8i = true;
+	plat_dat->bsp_priv = gmac;
+	plat_dat->init = sun8i_dwmac_init;
+	plat_dat->exit = sun8i_dwmac_exit;
+	plat_dat->setup = sun8i_dwmac_setup;
+
+	ret = sun8i_dwmac_init(pdev, plat_dat->bsp_priv);
+	if (ret)
+		return ret;
+
+	ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
+	if (ret)
+		sun8i_dwmac_exit(pdev, plat_dat->bsp_priv);
+
+	return ret;
+}
+
+static const struct of_device_id sun8i_dwmac_match[] = {
+	{ .compatible = "allwinner,sun8i-h3-emac",
+		.data = &emac_variant_h3 },
+	{ .compatible = "allwinner,sun8i-a83t-emac",
+		.data = &emac_variant_a83t },
+	{ .compatible = "allwinner,sun50i-a64-emac",
+		.data = &emac_variant_a64 },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, sun8i_dwmac_match);
+
+static struct platform_driver sun8i_dwmac_driver = {
+	.probe  = sun8i_dwmac_probe,
+	.remove = stmmac_pltfr_remove,
+	.driver = {
+		.name           = "dwmac-sun8i",
+		.pm		= &stmmac_pltfr_pm_ops,
+		.of_match_table = sun8i_dwmac_match,
+	},
+};
+module_platform_driver(sun8i_dwmac_driver);
+
+MODULE_AUTHOR("Corentin Labbe <clabbe.montjoie@gmail.com>");
+MODULE_DESCRIPTION("Allwinner sun8i DWMAC specific glue layer");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index c80c9c3b67db..68a188e74c54 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -235,6 +235,17 @@ static void stmmac_clk_csr_set(struct stmmac_priv *priv)
 		else if ((clk_rate >= CSR_F_250M) && (clk_rate < CSR_F_300M))
 			priv->clk_csr = STMMAC_CSR_250_300M;
 	}
+
+	if (priv->plat->has_sun8i) {
+		if (clk_rate > 160000000)
+			priv->clk_csr = 0x03;
+		else if (clk_rate > 80000000)
+			priv->clk_csr = 0x02;
+		else if (clk_rate > 40000000)
+			priv->clk_csr = 0x01;
+		else
+			priv->clk_csr = 0;
+	}
 }
 
 static void print_pkt(unsigned char *buf, int len)
@@ -3955,6 +3966,10 @@ static int stmmac_hw_init(struct stmmac_priv *priv)
 
 	priv->hw = mac;
 
+	/* dwmac-sun8i only work in chain mode */
+	if (priv->plat->has_sun8i)
+		chain_mode = 1;
+
 	/* To use the chained or ring mode */
 	if (priv->synopsys_id >= DWMAC_CORE_4_00) {
 		priv->hw->mode = &dwmac4_ring_mode_ops;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 7fc3a1ef395a..3840529344ed 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -309,6 +309,12 @@ static int stmmac_dt_phy(struct plat_stmmacenet_data *plat,
 			 struct device_node *np, struct device *dev)
 {
 	bool mdio = true;
+	static const struct of_device_id need_mdio_ids[] = {
+		{ .compatible = "snps,dwc-qos-ethernet-4.10" },
+		{ .compatible = "allwinner,sun8i-a83t-emac" },
+		{ .compatible = "allwinner,sun8i-h3-emac" },
+		{ .compatible = "allwinner,sun50i-a64-emac" },
+	};
 
 	/* If phy-handle property is passed from DT, use it as the PHY */
 	plat->phy_node = of_parse_phandle(np, "phy-handle", 0);
@@ -325,8 +331,7 @@ static int stmmac_dt_phy(struct plat_stmmacenet_data *plat,
 		mdio = false;
 	}
 
-	/* exception for dwmac-dwc-qos-eth glue logic */
-	if (of_device_is_compatible(np, "snps,dwc-qos-ethernet-4.10")) {
+	if (of_match_node(need_mdio_ids, np)) {
 		plat->mdio_node = of_get_child_by_name(np, "mdio");
 	} else {
 		/**
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 8bb550bca96d..108739ff9223 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -186,6 +186,7 @@ struct plat_stmmacenet_data {
 	struct reset_control *stmmac_rst;
 	struct stmmac_axi *axi;
 	int has_gmac4;
+	bool has_sun8i;
 	bool tso_en;
 	int mac_port_sel_speed;
 	bool en_tx_lpi_clockgating;
-- 
cgit v1.2.3


From 9efc160f4bbd69b17b48edec53067537d04e62b7 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Wed, 31 May 2017 14:43:46 -0700
Subject: block: Introduce queue flag QUEUE_FLAG_SCSI_PASSTHROUGH

From the context where a SCSI command is submitted it is not always
possible to figure out whether or not the queue the command is
submitted to has struct scsi_request as the first member of its
private data. Hence introduce the flag QUEUE_FLAG_SCSI_PASSTHROUGH.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Cc: Omar Sandoval <osandov@fb.com>
Cc: Don Brace <don.brace@microsemi.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/bsg-lib.c                   | 1 +
 drivers/block/cciss.c             | 1 +
 drivers/ide/ide-probe.c           | 1 +
 drivers/scsi/scsi_lib.c           | 2 ++
 drivers/scsi/scsi_transport_sas.c | 1 +
 include/linux/blkdev.h            | 3 +++
 6 files changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index 0a23dbba2d30..9b91daefcd9b 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -246,6 +246,7 @@ struct request_queue *bsg_setup_queue(struct device *dev, char *name,
 	q->bsg_job_size = dd_job_size;
 	q->bsg_job_fn = job_fn;
 	queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q);
+	queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
 	blk_queue_softirq_done(q, bsg_softirq_done);
 	blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT);
 
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index cd375503f7b0..3761066fe89d 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -1956,6 +1956,7 @@ static int cciss_add_disk(ctlr_info_t *h, struct gendisk *disk,
 	disk->queue->cmd_size = sizeof(struct scsi_request);
 	disk->queue->request_fn = do_cciss_request;
 	disk->queue->queue_lock = &h->lock;
+	queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, disk->queue);
 	if (blk_init_allocated_queue(disk->queue) < 0)
 		goto cleanup_queue;
 
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 023562565d11..b3f85250dea9 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -773,6 +773,7 @@ static int ide_init_queue(ide_drive_t *drive)
 	q->request_fn = do_ide_request;
 	q->init_rq_fn = ide_init_rq;
 	q->cmd_size = sizeof(struct ide_request);
+	queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
 	if (blk_init_allocated_queue(q) < 0) {
 		blk_cleanup_queue(q);
 		return 1;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 99e16ac479e3..884aaa84c2dd 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -2057,6 +2057,8 @@ void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q)
 {
 	struct device *dev = shost->dma_dev;
 
+	queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
+
 	/*
 	 * this limit is imposed by hardware restrictions
 	 */
diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c
index 0ebe2f1bb908..d16414bfe2ef 100644
--- a/drivers/scsi/scsi_transport_sas.c
+++ b/drivers/scsi/scsi_transport_sas.c
@@ -264,6 +264,7 @@ static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy)
 		q->queuedata = shost;
 
 	queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q);
+	queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
 	return 0;
 
 out_cleanup_queue:
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ab92c4ea138b..019f18c65098 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -618,6 +618,7 @@ struct request_queue {
 #define QUEUE_FLAG_STATS       27	/* track rq completion times */
 #define QUEUE_FLAG_POLL_STATS  28	/* collecting stats for hybrid polling */
 #define QUEUE_FLAG_REGISTERED  29	/* queue has been registered to a disk */
+#define QUEUE_FLAG_SCSI_PASSTHROUGH 30	/* queue supports SCSI commands */
 
 #define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_STACKABLE)	|	\
@@ -708,6 +709,8 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
 #define blk_queue_secure_erase(q) \
 	(test_bit(QUEUE_FLAG_SECERASE, &(q)->queue_flags))
 #define blk_queue_dax(q)	test_bit(QUEUE_FLAG_DAX, &(q)->queue_flags)
+#define blk_queue_scsi_passthrough(q)	\
+	test_bit(QUEUE_FLAG_SCSI_PASSTHROUGH, &(q)->queue_flags)
 
 #define blk_noretry_request(rq) \
 	((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \
-- 
cgit v1.2.3


From 681bdf80cff6844f81216b6b05516d82f69c23fd Mon Sep 17 00:00:00 2001
From: Jesse Brandeburg <jesse.brandeburg@intel.com>
Date: Thu, 11 May 2017 11:23:09 -0700
Subject: i40e/i40evf: create and use new unified header file

This moves a header for i40evf to include/linux/avf/virtchnl.h.
The directory name AVF is an acronym for the Intel(R) Adaptive
Virtual Function.

This first step creates the new file, which is a rename of
drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h to
include/linux/avf/virtchnl.h, and should show up in git
as a rename when using git log --follow.

To keep things building after the move, the changes to the i40evf
driver are made to point to the new include file location.

Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 MAINTAINERS                                        |   1 +
 drivers/net/ethernet/intel/i40evf/i40e_common.c    |   2 +-
 drivers/net/ethernet/intel/i40evf/i40e_prototype.h |   2 +-
 drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h  | 446 ---------------------
 drivers/net/ethernet/intel/i40evf/i40evf.h         |   2 +-
 include/linux/avf/virtchnl.h                       | 446 +++++++++++++++++++++
 6 files changed, 450 insertions(+), 449 deletions(-)
 delete mode 100644 drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h
 create mode 100644 include/linux/avf/virtchnl.h

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index 0fcb5e751ca7..6b7625ff9875 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6738,6 +6738,7 @@ F:	Documentation/networking/i40e.txt
 F:	Documentation/networking/i40evf.txt
 F:	drivers/net/ethernet/intel/
 F:	drivers/net/ethernet/intel/*/
+F:	include/linux/avf/virtchnl.h
 
 INTEL RDMA RNIC DRIVER
 M:     Faisal Latif <faisal.latif@intel.com>
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_common.c b/drivers/net/ethernet/intel/i40evf/i40e_common.c
index 6729624fda5b..1db028ac96f4 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_common.c
@@ -27,7 +27,7 @@
 #include "i40e_type.h"
 #include "i40e_adminq.h"
 #include "i40e_prototype.h"
-#include "i40e_virtchnl.h"
+#include <linux/avf/virtchnl.h>
 
 /**
  * i40e_set_mac_type - Sets MAC type
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_prototype.h b/drivers/net/ethernet/intel/i40evf/i40e_prototype.h
index 741223d5d809..227905b23690 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_prototype.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_prototype.h
@@ -29,7 +29,7 @@
 
 #include "i40e_type.h"
 #include "i40e_alloc.h"
-#include "i40e_virtchnl.h"
+#include <linux/avf/virtchnl.h>
 
 /* Prototypes for shared code functions that are not in
  * the standard function pointer structures.  These are
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h b/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h
deleted file mode 100644
index 7d6da3ac24f4..000000000000
--- a/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h
+++ /dev/null
@@ -1,446 +0,0 @@
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
-
-#ifndef _I40E_VIRTCHNL_H_
-#define _I40E_VIRTCHNL_H_
-
-/* Description:
- * This header file describes the VF-PF communication protocol used
- * by the various i40e drivers.
- *
- * Admin queue buffer usage:
- * desc->opcode is always i40e_aqc_opc_send_msg_to_pf
- * flags, retval, datalen, and data addr are all used normally.
- * Firmware copies the cookie fields when sending messages between the PF and
- * VF, but uses all other fields internally. Due to this limitation, we
- * must send all messages as "indirect", i.e. using an external buffer.
- *
- * All the vsi indexes are relative to the VF. Each VF can have maximum of
- * three VSIs. All the queue indexes are relative to the VSI.  Each VF can
- * have a maximum of sixteen queues for all of its VSIs.
- *
- * The PF is required to return a status code in v_retval for all messages
- * except RESET_VF, which does not require any response. The return value is of
- * i40e_status_code type, defined in the i40e_type.h.
- *
- * In general, VF driver initialization should roughly follow the order of these
- * opcodes. The VF driver must first validate the API version of the PF driver,
- * then request a reset, then get resources, then configure queues and
- * interrupts. After these operations are complete, the VF driver may start
- * its queues, optionally add MAC and VLAN filters, and process traffic.
- */
-
-/* Opcodes for VF-PF communication. These are placed in the v_opcode field
- * of the virtchnl_msg structure.
- */
-enum i40e_virtchnl_ops {
-/* The PF sends status change events to VFs using
- * the I40E_VIRTCHNL_OP_EVENT opcode.
- * VFs send requests to the PF using the other ops.
- */
-	I40E_VIRTCHNL_OP_UNKNOWN = 0,
-	I40E_VIRTCHNL_OP_VERSION = 1, /* must ALWAYS be 1 */
-	I40E_VIRTCHNL_OP_RESET_VF = 2,
-	I40E_VIRTCHNL_OP_GET_VF_RESOURCES = 3,
-	I40E_VIRTCHNL_OP_CONFIG_TX_QUEUE = 4,
-	I40E_VIRTCHNL_OP_CONFIG_RX_QUEUE = 5,
-	I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES = 6,
-	I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP = 7,
-	I40E_VIRTCHNL_OP_ENABLE_QUEUES = 8,
-	I40E_VIRTCHNL_OP_DISABLE_QUEUES = 9,
-	I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS = 10,
-	I40E_VIRTCHNL_OP_DEL_ETHER_ADDRESS = 11,
-	I40E_VIRTCHNL_OP_ADD_VLAN = 12,
-	I40E_VIRTCHNL_OP_DEL_VLAN = 13,
-	I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE = 14,
-	I40E_VIRTCHNL_OP_GET_STATS = 15,
-	I40E_VIRTCHNL_OP_RSVD = 16,
-	I40E_VIRTCHNL_OP_EVENT = 17, /* must ALWAYS be 17 */
-	I40E_VIRTCHNL_OP_IWARP = 20,
-	I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP = 21,
-	I40E_VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP = 22,
-	I40E_VIRTCHNL_OP_CONFIG_RSS_KEY = 23,
-	I40E_VIRTCHNL_OP_CONFIG_RSS_LUT = 24,
-	I40E_VIRTCHNL_OP_GET_RSS_HENA_CAPS = 25,
-	I40E_VIRTCHNL_OP_SET_RSS_HENA = 26,
-
-};
-
-/* Virtual channel message descriptor. This overlays the admin queue
- * descriptor. All other data is passed in external buffers.
- */
-
-struct i40e_virtchnl_msg {
-	u8 pad[8];			 /* AQ flags/opcode/len/retval fields */
-	enum i40e_virtchnl_ops v_opcode; /* avoid confusion with desc->opcode */
-	i40e_status v_retval;  /* ditto for desc->retval */
-	u32 vfid;			 /* used by PF when sending to VF */
-};
-
-/* Message descriptions and data structures.*/
-
-/* I40E_VIRTCHNL_OP_VERSION
- * VF posts its version number to the PF. PF responds with its version number
- * in the same format, along with a return code.
- * Reply from PF has its major/minor versions also in param0 and param1.
- * If there is a major version mismatch, then the VF cannot operate.
- * If there is a minor version mismatch, then the VF can operate but should
- * add a warning to the system log.
- *
- * This enum element MUST always be specified as == 1, regardless of other
- * changes in the API. The PF must always respond to this message without
- * error regardless of version mismatch.
- */
-#define I40E_VIRTCHNL_VERSION_MAJOR		1
-#define I40E_VIRTCHNL_VERSION_MINOR		1
-#define I40E_VIRTCHNL_VERSION_MINOR_NO_VF_CAPS	0
-
-struct i40e_virtchnl_version_info {
-	u32 major;
-	u32 minor;
-};
-
-/* I40E_VIRTCHNL_OP_RESET_VF
- * VF sends this request to PF with no parameters
- * PF does NOT respond! VF driver must delay then poll VFGEN_RSTAT register
- * until reset completion is indicated. The admin queue must be reinitialized
- * after this operation.
- *
- * When reset is complete, PF must ensure that all queues in all VSIs associated
- * with the VF are stopped, all queue configurations in the HMC are set to 0,
- * and all MAC and VLAN filters (except the default MAC address) on all VSIs
- * are cleared.
- */
-
-/* I40E_VIRTCHNL_OP_GET_VF_RESOURCES
- * Version 1.0 VF sends this request to PF with no parameters
- * Version 1.1 VF sends this request to PF with u32 bitmap of its capabilities
- * PF responds with an indirect message containing
- * i40e_virtchnl_vf_resource and one or more
- * i40e_virtchnl_vsi_resource structures.
- */
-
-struct i40e_virtchnl_vsi_resource {
-	u16 vsi_id;
-	u16 num_queue_pairs;
-	enum i40e_vsi_type vsi_type;
-	u16 qset_handle;
-	u8 default_mac_addr[ETH_ALEN];
-};
-/* VF offload flags */
-#define I40E_VIRTCHNL_VF_OFFLOAD_L2		0x00000001
-#define I40E_VIRTCHNL_VF_OFFLOAD_IWARP		0x00000002
-#define I40E_VIRTCHNL_VF_OFFLOAD_RSS_AQ		0x00000008
-#define I40E_VIRTCHNL_VF_OFFLOAD_RSS_REG	0x00000010
-#define I40E_VIRTCHNL_VF_OFFLOAD_WB_ON_ITR	0x00000020
-#define I40E_VIRTCHNL_VF_OFFLOAD_VLAN		0x00010000
-#define I40E_VIRTCHNL_VF_OFFLOAD_RX_POLLING	0x00020000
-#define I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2	0x00040000
-#define I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF		0X00080000
-#define I40E_VIRTCHNL_VF_OFFLOAD_ENCAP		0X00100000
-#define I40E_VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM	0X00200000
-
-#define I40E_VF_BASE_MODE_OFFLOADS (I40E_VIRTCHNL_VF_OFFLOAD_L2 | \
-				    I40E_VIRTCHNL_VF_OFFLOAD_VLAN | \
-				    I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF)
-
-struct i40e_virtchnl_vf_resource {
-	u16 num_vsis;
-	u16 num_queue_pairs;
-	u16 max_vectors;
-	u16 max_mtu;
-
-	u32 vf_offload_flags;
-	u32 rss_key_size;
-	u32 rss_lut_size;
-
-	struct i40e_virtchnl_vsi_resource vsi_res[1];
-};
-
-/* I40E_VIRTCHNL_OP_CONFIG_TX_QUEUE
- * VF sends this message to set up parameters for one TX queue.
- * External data buffer contains one instance of i40e_virtchnl_txq_info.
- * PF configures requested queue and returns a status code.
- */
-
-/* Tx queue config info */
-struct i40e_virtchnl_txq_info {
-	u16 vsi_id;
-	u16 queue_id;
-	u16 ring_len;		/* number of descriptors, multiple of 8 */
-	u16 headwb_enabled;
-	u64 dma_ring_addr;
-	u64 dma_headwb_addr;
-};
-
-/* I40E_VIRTCHNL_OP_CONFIG_RX_QUEUE
- * VF sends this message to set up parameters for one RX queue.
- * External data buffer contains one instance of i40e_virtchnl_rxq_info.
- * PF configures requested queue and returns a status code.
- */
-
-/* Rx queue config info */
-struct i40e_virtchnl_rxq_info {
-	u16 vsi_id;
-	u16 queue_id;
-	u32 ring_len;		/* number of descriptors, multiple of 32 */
-	u16 hdr_size;
-	u16 splithdr_enabled;
-	u32 databuffer_size;
-	u32 max_pkt_size;
-	u64 dma_ring_addr;
-	enum i40e_hmc_obj_rx_hsplit_0 rx_split_pos;
-};
-
-/* I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES
- * VF sends this message to set parameters for all active TX and RX queues
- * associated with the specified VSI.
- * PF configures queues and returns status.
- * If the number of queues specified is greater than the number of queues
- * associated with the VSI, an error is returned and no queues are configured.
- */
-struct i40e_virtchnl_queue_pair_info {
-	/* NOTE: vsi_id and queue_id should be identical for both queues. */
-	struct i40e_virtchnl_txq_info txq;
-	struct i40e_virtchnl_rxq_info rxq;
-};
-
-struct i40e_virtchnl_vsi_queue_config_info {
-	u16 vsi_id;
-	u16 num_queue_pairs;
-	struct i40e_virtchnl_queue_pair_info qpair[1];
-};
-
-/* I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP
- * VF uses this message to map vectors to queues.
- * The rxq_map and txq_map fields are bitmaps used to indicate which queues
- * are to be associated with the specified vector.
- * The "other" causes are always mapped to vector 0.
- * PF configures interrupt mapping and returns status.
- */
-struct i40e_virtchnl_vector_map {
-	u16 vsi_id;
-	u16 vector_id;
-	u16 rxq_map;
-	u16 txq_map;
-	u16 rxitr_idx;
-	u16 txitr_idx;
-};
-
-struct i40e_virtchnl_irq_map_info {
-	u16 num_vectors;
-	struct i40e_virtchnl_vector_map vecmap[1];
-};
-
-/* I40E_VIRTCHNL_OP_ENABLE_QUEUES
- * I40E_VIRTCHNL_OP_DISABLE_QUEUES
- * VF sends these message to enable or disable TX/RX queue pairs.
- * The queues fields are bitmaps indicating which queues to act upon.
- * (Currently, we only support 16 queues per VF, but we make the field
- * u32 to allow for expansion.)
- * PF performs requested action and returns status.
- */
-struct i40e_virtchnl_queue_select {
-	u16 vsi_id;
-	u16 pad;
-	u32 rx_queues;
-	u32 tx_queues;
-};
-
-/* I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS
- * VF sends this message in order to add one or more unicast or multicast
- * address filters for the specified VSI.
- * PF adds the filters and returns status.
- */
-
-/* I40E_VIRTCHNL_OP_DEL_ETHER_ADDRESS
- * VF sends this message in order to remove one or more unicast or multicast
- * filters for the specified VSI.
- * PF removes the filters and returns status.
- */
-
-struct i40e_virtchnl_ether_addr {
-	u8 addr[ETH_ALEN];
-	u8 pad[2];
-};
-
-struct i40e_virtchnl_ether_addr_list {
-	u16 vsi_id;
-	u16 num_elements;
-	struct i40e_virtchnl_ether_addr list[1];
-};
-
-/* I40E_VIRTCHNL_OP_ADD_VLAN
- * VF sends this message to add one or more VLAN tag filters for receives.
- * PF adds the filters and returns status.
- * If a port VLAN is configured by the PF, this operation will return an
- * error to the VF.
- */
-
-/* I40E_VIRTCHNL_OP_DEL_VLAN
- * VF sends this message to remove one or more VLAN tag filters for receives.
- * PF removes the filters and returns status.
- * If a port VLAN is configured by the PF, this operation will return an
- * error to the VF.
- */
-
-struct i40e_virtchnl_vlan_filter_list {
-	u16 vsi_id;
-	u16 num_elements;
-	u16 vlan_id[1];
-};
-
-/* I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE
- * VF sends VSI id and flags.
- * PF returns status code in retval.
- * Note: we assume that broadcast accept mode is always enabled.
- */
-struct i40e_virtchnl_promisc_info {
-	u16 vsi_id;
-	u16 flags;
-};
-
-#define I40E_FLAG_VF_UNICAST_PROMISC	0x00000001
-#define I40E_FLAG_VF_MULTICAST_PROMISC	0x00000002
-
-/* I40E_VIRTCHNL_OP_GET_STATS
- * VF sends this message to request stats for the selected VSI. VF uses
- * the i40e_virtchnl_queue_select struct to specify the VSI. The queue_id
- * field is ignored by the PF.
- *
- * PF replies with struct i40e_eth_stats in an external buffer.
- */
-
-/* I40E_VIRTCHNL_OP_CONFIG_RSS_KEY
- * I40E_VIRTCHNL_OP_CONFIG_RSS_LUT
- * VF sends these messages to configure RSS. Only supported if both PF
- * and VF drivers set the I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF bit during
- * configuration negotiation. If this is the case, then the RSS fields in
- * the VF resource struct are valid.
- * Both the key and LUT are initialized to 0 by the PF, meaning that
- * RSS is effectively disabled until set up by the VF.
- */
-struct i40e_virtchnl_rss_key {
-	u16 vsi_id;
-	u16 key_len;
-	u8 key[1];         /* RSS hash key, packed bytes */
-};
-
-struct i40e_virtchnl_rss_lut {
-	u16 vsi_id;
-	u16 lut_entries;
-	u8 lut[1];        /* RSS lookup table*/
-};
-
-/* I40E_VIRTCHNL_OP_GET_RSS_HENA_CAPS
- * I40E_VIRTCHNL_OP_SET_RSS_HENA
- * VF sends these messages to get and set the hash filter enable bits for RSS.
- * By default, the PF sets these to all possible traffic types that the
- * hardware supports. The VF can query this value if it wants to change the
- * traffic types that are hashed by the hardware.
- * Traffic types are defined in the i40e_filter_pctype enum in i40e_type.h
- */
-struct i40e_virtchnl_rss_hena {
-	u64 hena;
-};
-
-/* I40E_VIRTCHNL_OP_EVENT
- * PF sends this message to inform the VF driver of events that may affect it.
- * No direct response is expected from the VF, though it may generate other
- * messages in response to this one.
- */
-enum i40e_virtchnl_event_codes {
-	I40E_VIRTCHNL_EVENT_UNKNOWN = 0,
-	I40E_VIRTCHNL_EVENT_LINK_CHANGE,
-	I40E_VIRTCHNL_EVENT_RESET_IMPENDING,
-	I40E_VIRTCHNL_EVENT_PF_DRIVER_CLOSE,
-};
-#define I40E_PF_EVENT_SEVERITY_INFO		0
-#define I40E_PF_EVENT_SEVERITY_CERTAIN_DOOM	255
-
-struct i40e_virtchnl_pf_event {
-	enum i40e_virtchnl_event_codes event;
-	union {
-		struct {
-			enum i40e_aq_link_speed link_speed;
-			bool link_status;
-		} link_event;
-	} event_data;
-
-	int severity;
-};
-
-/* I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP
- * VF uses this message to request PF to map IWARP vectors to IWARP queues.
- * The request for this originates from the VF IWARP driver through
- * a client interface between VF LAN and VF IWARP driver.
- * A vector could have an AEQ and CEQ attached to it although
- * there is a single AEQ per VF IWARP instance in which case
- * most vectors will have an INVALID_IDX for aeq and valid idx for ceq.
- * There will never be a case where there will be multiple CEQs attached
- * to a single vector.
- * PF configures interrupt mapping and returns status.
- */
-
-/* HW does not define a type value for AEQ; only for RX/TX and CEQ.
- * In order for us to keep the interface simple, SW will define a
- * unique type value for AEQ.
- */
-#define I40E_QUEUE_TYPE_PE_AEQ  0x80
-#define I40E_QUEUE_INVALID_IDX  0xFFFF
-
-struct i40e_virtchnl_iwarp_qv_info {
-	u32 v_idx; /* msix_vector */
-	u16 ceq_idx;
-	u16 aeq_idx;
-	u8 itr_idx;
-};
-
-struct i40e_virtchnl_iwarp_qvlist_info {
-	u32 num_vectors;
-	struct i40e_virtchnl_iwarp_qv_info qv_info[1];
-};
-
-/* VF reset states - these are written into the RSTAT register:
- * I40E_VFGEN_RSTAT1 on the PF
- * I40E_VFGEN_RSTAT on the VF
- * When the PF initiates a reset, it writes 0
- * When the reset is complete, it writes 1
- * When the PF detects that the VF has recovered, it writes 2
- * VF checks this register periodically to determine if a reset has occurred,
- * then polls it to know when the reset is complete.
- * If either the PF or VF reads the register while the hardware
- * is in a reset state, it will return DEADBEEF, which, when masked
- * will result in 3.
- */
-enum i40e_vfr_states {
-	I40E_VFR_INPROGRESS = 0,
-	I40E_VFR_COMPLETED,
-	I40E_VFR_VFACTIVE,
-	I40E_VFR_UNKNOWN,
-};
-
-#endif /* _I40E_VIRTCHNL_H_ */
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h
index b8ada6d8d890..75d314b1a9bb 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf.h
+++ b/drivers/net/ethernet/intel/i40evf/i40evf.h
@@ -43,7 +43,7 @@
 #include <net/udp.h>
 
 #include "i40e_type.h"
-#include "i40e_virtchnl.h"
+#include <linux/avf/virtchnl.h>
 #include "i40e_txrx.h"
 
 #define DEFAULT_DEBUG_LEVEL_SHIFT 3
diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h
new file mode 100644
index 000000000000..7d6da3ac24f4
--- /dev/null
+++ b/include/linux/avf/virtchnl.h
@@ -0,0 +1,446 @@
+/*******************************************************************************
+ *
+ * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
+ * Copyright(c) 2013 - 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Contact Information:
+ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ ******************************************************************************/
+
+#ifndef _I40E_VIRTCHNL_H_
+#define _I40E_VIRTCHNL_H_
+
+/* Description:
+ * This header file describes the VF-PF communication protocol used
+ * by the various i40e drivers.
+ *
+ * Admin queue buffer usage:
+ * desc->opcode is always i40e_aqc_opc_send_msg_to_pf
+ * flags, retval, datalen, and data addr are all used normally.
+ * Firmware copies the cookie fields when sending messages between the PF and
+ * VF, but uses all other fields internally. Due to this limitation, we
+ * must send all messages as "indirect", i.e. using an external buffer.
+ *
+ * All the vsi indexes are relative to the VF. Each VF can have maximum of
+ * three VSIs. All the queue indexes are relative to the VSI.  Each VF can
+ * have a maximum of sixteen queues for all of its VSIs.
+ *
+ * The PF is required to return a status code in v_retval for all messages
+ * except RESET_VF, which does not require any response. The return value is of
+ * i40e_status_code type, defined in the i40e_type.h.
+ *
+ * In general, VF driver initialization should roughly follow the order of these
+ * opcodes. The VF driver must first validate the API version of the PF driver,
+ * then request a reset, then get resources, then configure queues and
+ * interrupts. After these operations are complete, the VF driver may start
+ * its queues, optionally add MAC and VLAN filters, and process traffic.
+ */
+
+/* Opcodes for VF-PF communication. These are placed in the v_opcode field
+ * of the virtchnl_msg structure.
+ */
+enum i40e_virtchnl_ops {
+/* The PF sends status change events to VFs using
+ * the I40E_VIRTCHNL_OP_EVENT opcode.
+ * VFs send requests to the PF using the other ops.
+ */
+	I40E_VIRTCHNL_OP_UNKNOWN = 0,
+	I40E_VIRTCHNL_OP_VERSION = 1, /* must ALWAYS be 1 */
+	I40E_VIRTCHNL_OP_RESET_VF = 2,
+	I40E_VIRTCHNL_OP_GET_VF_RESOURCES = 3,
+	I40E_VIRTCHNL_OP_CONFIG_TX_QUEUE = 4,
+	I40E_VIRTCHNL_OP_CONFIG_RX_QUEUE = 5,
+	I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES = 6,
+	I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP = 7,
+	I40E_VIRTCHNL_OP_ENABLE_QUEUES = 8,
+	I40E_VIRTCHNL_OP_DISABLE_QUEUES = 9,
+	I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS = 10,
+	I40E_VIRTCHNL_OP_DEL_ETHER_ADDRESS = 11,
+	I40E_VIRTCHNL_OP_ADD_VLAN = 12,
+	I40E_VIRTCHNL_OP_DEL_VLAN = 13,
+	I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE = 14,
+	I40E_VIRTCHNL_OP_GET_STATS = 15,
+	I40E_VIRTCHNL_OP_RSVD = 16,
+	I40E_VIRTCHNL_OP_EVENT = 17, /* must ALWAYS be 17 */
+	I40E_VIRTCHNL_OP_IWARP = 20,
+	I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP = 21,
+	I40E_VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP = 22,
+	I40E_VIRTCHNL_OP_CONFIG_RSS_KEY = 23,
+	I40E_VIRTCHNL_OP_CONFIG_RSS_LUT = 24,
+	I40E_VIRTCHNL_OP_GET_RSS_HENA_CAPS = 25,
+	I40E_VIRTCHNL_OP_SET_RSS_HENA = 26,
+
+};
+
+/* Virtual channel message descriptor. This overlays the admin queue
+ * descriptor. All other data is passed in external buffers.
+ */
+
+struct i40e_virtchnl_msg {
+	u8 pad[8];			 /* AQ flags/opcode/len/retval fields */
+	enum i40e_virtchnl_ops v_opcode; /* avoid confusion with desc->opcode */
+	i40e_status v_retval;  /* ditto for desc->retval */
+	u32 vfid;			 /* used by PF when sending to VF */
+};
+
+/* Message descriptions and data structures.*/
+
+/* I40E_VIRTCHNL_OP_VERSION
+ * VF posts its version number to the PF. PF responds with its version number
+ * in the same format, along with a return code.
+ * Reply from PF has its major/minor versions also in param0 and param1.
+ * If there is a major version mismatch, then the VF cannot operate.
+ * If there is a minor version mismatch, then the VF can operate but should
+ * add a warning to the system log.
+ *
+ * This enum element MUST always be specified as == 1, regardless of other
+ * changes in the API. The PF must always respond to this message without
+ * error regardless of version mismatch.
+ */
+#define I40E_VIRTCHNL_VERSION_MAJOR		1
+#define I40E_VIRTCHNL_VERSION_MINOR		1
+#define I40E_VIRTCHNL_VERSION_MINOR_NO_VF_CAPS	0
+
+struct i40e_virtchnl_version_info {
+	u32 major;
+	u32 minor;
+};
+
+/* I40E_VIRTCHNL_OP_RESET_VF
+ * VF sends this request to PF with no parameters
+ * PF does NOT respond! VF driver must delay then poll VFGEN_RSTAT register
+ * until reset completion is indicated. The admin queue must be reinitialized
+ * after this operation.
+ *
+ * When reset is complete, PF must ensure that all queues in all VSIs associated
+ * with the VF are stopped, all queue configurations in the HMC are set to 0,
+ * and all MAC and VLAN filters (except the default MAC address) on all VSIs
+ * are cleared.
+ */
+
+/* I40E_VIRTCHNL_OP_GET_VF_RESOURCES
+ * Version 1.0 VF sends this request to PF with no parameters
+ * Version 1.1 VF sends this request to PF with u32 bitmap of its capabilities
+ * PF responds with an indirect message containing
+ * i40e_virtchnl_vf_resource and one or more
+ * i40e_virtchnl_vsi_resource structures.
+ */
+
+struct i40e_virtchnl_vsi_resource {
+	u16 vsi_id;
+	u16 num_queue_pairs;
+	enum i40e_vsi_type vsi_type;
+	u16 qset_handle;
+	u8 default_mac_addr[ETH_ALEN];
+};
+/* VF offload flags */
+#define I40E_VIRTCHNL_VF_OFFLOAD_L2		0x00000001
+#define I40E_VIRTCHNL_VF_OFFLOAD_IWARP		0x00000002
+#define I40E_VIRTCHNL_VF_OFFLOAD_RSS_AQ		0x00000008
+#define I40E_VIRTCHNL_VF_OFFLOAD_RSS_REG	0x00000010
+#define I40E_VIRTCHNL_VF_OFFLOAD_WB_ON_ITR	0x00000020
+#define I40E_VIRTCHNL_VF_OFFLOAD_VLAN		0x00010000
+#define I40E_VIRTCHNL_VF_OFFLOAD_RX_POLLING	0x00020000
+#define I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2	0x00040000
+#define I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF		0X00080000
+#define I40E_VIRTCHNL_VF_OFFLOAD_ENCAP		0X00100000
+#define I40E_VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM	0X00200000
+
+#define I40E_VF_BASE_MODE_OFFLOADS (I40E_VIRTCHNL_VF_OFFLOAD_L2 | \
+				    I40E_VIRTCHNL_VF_OFFLOAD_VLAN | \
+				    I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF)
+
+struct i40e_virtchnl_vf_resource {
+	u16 num_vsis;
+	u16 num_queue_pairs;
+	u16 max_vectors;
+	u16 max_mtu;
+
+	u32 vf_offload_flags;
+	u32 rss_key_size;
+	u32 rss_lut_size;
+
+	struct i40e_virtchnl_vsi_resource vsi_res[1];
+};
+
+/* I40E_VIRTCHNL_OP_CONFIG_TX_QUEUE
+ * VF sends this message to set up parameters for one TX queue.
+ * External data buffer contains one instance of i40e_virtchnl_txq_info.
+ * PF configures requested queue and returns a status code.
+ */
+
+/* Tx queue config info */
+struct i40e_virtchnl_txq_info {
+	u16 vsi_id;
+	u16 queue_id;
+	u16 ring_len;		/* number of descriptors, multiple of 8 */
+	u16 headwb_enabled;
+	u64 dma_ring_addr;
+	u64 dma_headwb_addr;
+};
+
+/* I40E_VIRTCHNL_OP_CONFIG_RX_QUEUE
+ * VF sends this message to set up parameters for one RX queue.
+ * External data buffer contains one instance of i40e_virtchnl_rxq_info.
+ * PF configures requested queue and returns a status code.
+ */
+
+/* Rx queue config info */
+struct i40e_virtchnl_rxq_info {
+	u16 vsi_id;
+	u16 queue_id;
+	u32 ring_len;		/* number of descriptors, multiple of 32 */
+	u16 hdr_size;
+	u16 splithdr_enabled;
+	u32 databuffer_size;
+	u32 max_pkt_size;
+	u64 dma_ring_addr;
+	enum i40e_hmc_obj_rx_hsplit_0 rx_split_pos;
+};
+
+/* I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES
+ * VF sends this message to set parameters for all active TX and RX queues
+ * associated with the specified VSI.
+ * PF configures queues and returns status.
+ * If the number of queues specified is greater than the number of queues
+ * associated with the VSI, an error is returned and no queues are configured.
+ */
+struct i40e_virtchnl_queue_pair_info {
+	/* NOTE: vsi_id and queue_id should be identical for both queues. */
+	struct i40e_virtchnl_txq_info txq;
+	struct i40e_virtchnl_rxq_info rxq;
+};
+
+struct i40e_virtchnl_vsi_queue_config_info {
+	u16 vsi_id;
+	u16 num_queue_pairs;
+	struct i40e_virtchnl_queue_pair_info qpair[1];
+};
+
+/* I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP
+ * VF uses this message to map vectors to queues.
+ * The rxq_map and txq_map fields are bitmaps used to indicate which queues
+ * are to be associated with the specified vector.
+ * The "other" causes are always mapped to vector 0.
+ * PF configures interrupt mapping and returns status.
+ */
+struct i40e_virtchnl_vector_map {
+	u16 vsi_id;
+	u16 vector_id;
+	u16 rxq_map;
+	u16 txq_map;
+	u16 rxitr_idx;
+	u16 txitr_idx;
+};
+
+struct i40e_virtchnl_irq_map_info {
+	u16 num_vectors;
+	struct i40e_virtchnl_vector_map vecmap[1];
+};
+
+/* I40E_VIRTCHNL_OP_ENABLE_QUEUES
+ * I40E_VIRTCHNL_OP_DISABLE_QUEUES
+ * VF sends these message to enable or disable TX/RX queue pairs.
+ * The queues fields are bitmaps indicating which queues to act upon.
+ * (Currently, we only support 16 queues per VF, but we make the field
+ * u32 to allow for expansion.)
+ * PF performs requested action and returns status.
+ */
+struct i40e_virtchnl_queue_select {
+	u16 vsi_id;
+	u16 pad;
+	u32 rx_queues;
+	u32 tx_queues;
+};
+
+/* I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS
+ * VF sends this message in order to add one or more unicast or multicast
+ * address filters for the specified VSI.
+ * PF adds the filters and returns status.
+ */
+
+/* I40E_VIRTCHNL_OP_DEL_ETHER_ADDRESS
+ * VF sends this message in order to remove one or more unicast or multicast
+ * filters for the specified VSI.
+ * PF removes the filters and returns status.
+ */
+
+struct i40e_virtchnl_ether_addr {
+	u8 addr[ETH_ALEN];
+	u8 pad[2];
+};
+
+struct i40e_virtchnl_ether_addr_list {
+	u16 vsi_id;
+	u16 num_elements;
+	struct i40e_virtchnl_ether_addr list[1];
+};
+
+/* I40E_VIRTCHNL_OP_ADD_VLAN
+ * VF sends this message to add one or more VLAN tag filters for receives.
+ * PF adds the filters and returns status.
+ * If a port VLAN is configured by the PF, this operation will return an
+ * error to the VF.
+ */
+
+/* I40E_VIRTCHNL_OP_DEL_VLAN
+ * VF sends this message to remove one or more VLAN tag filters for receives.
+ * PF removes the filters and returns status.
+ * If a port VLAN is configured by the PF, this operation will return an
+ * error to the VF.
+ */
+
+struct i40e_virtchnl_vlan_filter_list {
+	u16 vsi_id;
+	u16 num_elements;
+	u16 vlan_id[1];
+};
+
+/* I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE
+ * VF sends VSI id and flags.
+ * PF returns status code in retval.
+ * Note: we assume that broadcast accept mode is always enabled.
+ */
+struct i40e_virtchnl_promisc_info {
+	u16 vsi_id;
+	u16 flags;
+};
+
+#define I40E_FLAG_VF_UNICAST_PROMISC	0x00000001
+#define I40E_FLAG_VF_MULTICAST_PROMISC	0x00000002
+
+/* I40E_VIRTCHNL_OP_GET_STATS
+ * VF sends this message to request stats for the selected VSI. VF uses
+ * the i40e_virtchnl_queue_select struct to specify the VSI. The queue_id
+ * field is ignored by the PF.
+ *
+ * PF replies with struct i40e_eth_stats in an external buffer.
+ */
+
+/* I40E_VIRTCHNL_OP_CONFIG_RSS_KEY
+ * I40E_VIRTCHNL_OP_CONFIG_RSS_LUT
+ * VF sends these messages to configure RSS. Only supported if both PF
+ * and VF drivers set the I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF bit during
+ * configuration negotiation. If this is the case, then the RSS fields in
+ * the VF resource struct are valid.
+ * Both the key and LUT are initialized to 0 by the PF, meaning that
+ * RSS is effectively disabled until set up by the VF.
+ */
+struct i40e_virtchnl_rss_key {
+	u16 vsi_id;
+	u16 key_len;
+	u8 key[1];         /* RSS hash key, packed bytes */
+};
+
+struct i40e_virtchnl_rss_lut {
+	u16 vsi_id;
+	u16 lut_entries;
+	u8 lut[1];        /* RSS lookup table*/
+};
+
+/* I40E_VIRTCHNL_OP_GET_RSS_HENA_CAPS
+ * I40E_VIRTCHNL_OP_SET_RSS_HENA
+ * VF sends these messages to get and set the hash filter enable bits for RSS.
+ * By default, the PF sets these to all possible traffic types that the
+ * hardware supports. The VF can query this value if it wants to change the
+ * traffic types that are hashed by the hardware.
+ * Traffic types are defined in the i40e_filter_pctype enum in i40e_type.h
+ */
+struct i40e_virtchnl_rss_hena {
+	u64 hena;
+};
+
+/* I40E_VIRTCHNL_OP_EVENT
+ * PF sends this message to inform the VF driver of events that may affect it.
+ * No direct response is expected from the VF, though it may generate other
+ * messages in response to this one.
+ */
+enum i40e_virtchnl_event_codes {
+	I40E_VIRTCHNL_EVENT_UNKNOWN = 0,
+	I40E_VIRTCHNL_EVENT_LINK_CHANGE,
+	I40E_VIRTCHNL_EVENT_RESET_IMPENDING,
+	I40E_VIRTCHNL_EVENT_PF_DRIVER_CLOSE,
+};
+#define I40E_PF_EVENT_SEVERITY_INFO		0
+#define I40E_PF_EVENT_SEVERITY_CERTAIN_DOOM	255
+
+struct i40e_virtchnl_pf_event {
+	enum i40e_virtchnl_event_codes event;
+	union {
+		struct {
+			enum i40e_aq_link_speed link_speed;
+			bool link_status;
+		} link_event;
+	} event_data;
+
+	int severity;
+};
+
+/* I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP
+ * VF uses this message to request PF to map IWARP vectors to IWARP queues.
+ * The request for this originates from the VF IWARP driver through
+ * a client interface between VF LAN and VF IWARP driver.
+ * A vector could have an AEQ and CEQ attached to it although
+ * there is a single AEQ per VF IWARP instance in which case
+ * most vectors will have an INVALID_IDX for aeq and valid idx for ceq.
+ * There will never be a case where there will be multiple CEQs attached
+ * to a single vector.
+ * PF configures interrupt mapping and returns status.
+ */
+
+/* HW does not define a type value for AEQ; only for RX/TX and CEQ.
+ * In order for us to keep the interface simple, SW will define a
+ * unique type value for AEQ.
+ */
+#define I40E_QUEUE_TYPE_PE_AEQ  0x80
+#define I40E_QUEUE_INVALID_IDX  0xFFFF
+
+struct i40e_virtchnl_iwarp_qv_info {
+	u32 v_idx; /* msix_vector */
+	u16 ceq_idx;
+	u16 aeq_idx;
+	u8 itr_idx;
+};
+
+struct i40e_virtchnl_iwarp_qvlist_info {
+	u32 num_vectors;
+	struct i40e_virtchnl_iwarp_qv_info qv_info[1];
+};
+
+/* VF reset states - these are written into the RSTAT register:
+ * I40E_VFGEN_RSTAT1 on the PF
+ * I40E_VFGEN_RSTAT on the VF
+ * When the PF initiates a reset, it writes 0
+ * When the reset is complete, it writes 1
+ * When the PF detects that the VF has recovered, it writes 2
+ * VF checks this register periodically to determine if a reset has occurred,
+ * then polls it to know when the reset is complete.
+ * If either the PF or VF reads the register while the hardware
+ * is in a reset state, it will return DEADBEEF, which, when masked
+ * will result in 3.
+ */
+enum i40e_vfr_states {
+	I40E_VFR_INPROGRESS = 0,
+	I40E_VFR_COMPLETED,
+	I40E_VFR_VFACTIVE,
+	I40E_VFR_UNKNOWN,
+};
+
+#endif /* _I40E_VIRTCHNL_H_ */
-- 
cgit v1.2.3


From 1410a90ae449061b7e1ae19d275148f36948801b Mon Sep 17 00:00:00 2001
From: Max Gurtovoy <maxg@mellanox.com>
Date: Sun, 28 May 2017 10:53:10 +0300
Subject: net/mlx5: Define interface bits for fencing UMR wqe

HW can implement UMR wqe re-transmission in various ways.
Thus, add HCA cap to distinguish the needed fence for UMR to make
sure that the wqe wouldn't fail on mkey checks.

Signed-off-by: Max Gurtovoy <maxg@mellanox.com>
Acked-by: Leon Romanovsky <leon@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 include/linux/mlx5/mlx5_ifc.h | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 32de0724b400..edafedb7b509 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -766,6 +766,12 @@ enum {
 	MLX5_CAP_PORT_TYPE_ETH = 0x1,
 };
 
+enum {
+	MLX5_CAP_UMR_FENCE_STRONG	= 0x0,
+	MLX5_CAP_UMR_FENCE_SMALL	= 0x1,
+	MLX5_CAP_UMR_FENCE_NONE		= 0x2,
+};
+
 struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         reserved_at_0[0x80];
 
@@ -875,7 +881,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         reserved_at_202[0x1];
 	u8         ipoib_enhanced_offloads[0x1];
 	u8         ipoib_basic_offloads[0x1];
-	u8         reserved_at_205[0xa];
+	u8         reserved_at_205[0x5];
+	u8         umr_fence[0x2];
+	u8         reserved_at_20c[0x3];
 	u8         drain_sigerr[0x1];
 	u8         cmdif_checksum[0x2];
 	u8         sigerr_cqe[0x1];
-- 
cgit v1.2.3


From 310a2ad92e3fd9139e3641464f1de113fa89825b Mon Sep 17 00:00:00 2001
From: Jesse Brandeburg <jesse.brandeburg@intel.com>
Date: Thu, 11 May 2017 11:23:11 -0700
Subject: virtchnl: rename i40e to generic virtchnl

This morphs all the i40e and i40evf references to/in virtchnl.h
to be generic, using only automated methods. Updates all the
callers to use the new names.  A followup patch provides separate
clean ups for messy line conversions from these "automatic"
changes, to make them more reviewable.

Was executed with the following sed script:
sed -i -f transform_script drivers/net/ethernet/intel/i40e/i40e_client.c
sed -i -f transform_script drivers/net/ethernet/intel/i40e/i40e_prototype.h
sed -i -f transform_script drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
sed -i -f transform_script drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
sed -i -f transform_script drivers/net/ethernet/intel/i40evf/i40e_common.c
sed -i -f transform_script drivers/net/ethernet/intel/i40evf/i40e_prototype.h
sed -i -f transform_script drivers/net/ethernet/intel/i40evf/i40evf.h
sed -i -f transform_script drivers/net/ethernet/intel/i40evf/i40evf_client.c
sed -i -f transform_script drivers/net/ethernet/intel/i40evf/i40evf_main.c
sed -i -f transform_script drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
sed -i -f transform_script include/linux/avf/virtchnl.h

transform_script:
----8<----
s/I40E_VIRTCHNL_SUPPORTED_QTYPES/SAVE_ME_SUPPORTED_QTYPES/g
s/I40E_VIRTCHNL_VF_CAP/SAVE_ME_VF_CAP/g

s/I40E_VIRTCHNL_/VIRTCHNL_/g
s/i40e_virtchnl_/virtchnl_/g
s/i40e_vfr_/virtchnl_vfr_/g
s/I40E_VFR_/VIRTCHNL_VFR_/g

s/VIRTCHNL_OP_ADD_ETHER_ADDRESS/VIRTCHNL_OP_ADD_ETH_ADDR/g
s/VIRTCHNL_OP_DEL_ETHER_ADDRESS/VIRTCHNL_OP_DEL_ETH_ADDR/g
s/VIRTCHNL_OP_FCOE/VIRTCHNL_OP_RSVD/g

s/SAVE_ME_SUPPORTED_QTYPES/I40E_VIRTCHNL_SUPPORTED_QTYPES/g
s/SAVE_ME_VF_CAP/I40E_VIRTCHNL_VF_CAP/g
----8<----

Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_client.c      |   2 +-
 drivers/net/ethernet/intel/i40e/i40e_prototype.h   |   4 +-
 drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 370 ++++++++++-----------
 drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h |   6 +-
 drivers/net/ethernet/intel/i40evf/i40e_common.c    |  10 +-
 drivers/net/ethernet/intel/i40evf/i40e_prototype.h |   4 +-
 drivers/net/ethernet/intel/i40evf/i40evf.h         |  22 +-
 drivers/net/ethernet/intel/i40evf/i40evf_client.c  |  18 +-
 drivers/net/ethernet/intel/i40evf/i40evf_main.c    |  48 +--
 .../net/ethernet/intel/i40evf/i40evf_virtchnl.c    | 280 ++++++++--------
 include/linux/avf/virtchnl.h                       | 233 ++++++-------
 11 files changed, 499 insertions(+), 498 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c
index 088b4a43bd2a..36f694ccdc09 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_client.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_client.c
@@ -565,7 +565,7 @@ static int i40e_client_virtchnl_send(struct i40e_info *ldev,
 	struct i40e_hw *hw = &pf->hw;
 	i40e_status err;
 
-	err = i40e_aq_send_msg_to_vf(hw, vf_id, I40E_VIRTCHNL_OP_IWARP,
+	err = i40e_aq_send_msg_to_vf(hw, vf_id, VIRTCHNL_OP_IWARP,
 				     0, msg, len, NULL);
 	if (err)
 		dev_err(&pf->pdev->dev, "Unable to send iWarp message to VF, error %d, aq status %d\n",
diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
index d9c555050e64..df613ea40313 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
@@ -333,10 +333,10 @@ static inline struct i40e_rx_ptype_decoded decode_rx_desc_ptype(u8 ptype)
 
 /* i40e_common for VF drivers*/
 void i40e_vf_parse_hw_config(struct i40e_hw *hw,
-			     struct i40e_virtchnl_vf_resource *msg);
+			     struct virtchnl_vf_resource *msg);
 i40e_status i40e_vf_reset(struct i40e_hw *hw);
 i40e_status i40e_aq_send_msg_to_pf(struct i40e_hw *hw,
-				enum i40e_virtchnl_ops v_opcode,
+				enum virtchnl_ops v_opcode,
 				i40e_status v_retval,
 				u8 *msg, u16 msglen,
 				struct i40e_asq_cmd_details *cmd_details);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 95c23fbaa211..9f361e810990 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -39,7 +39,7 @@
  * send a message to all VFs on a given PF
  **/
 static void i40e_vc_vf_broadcast(struct i40e_pf *pf,
-				 enum i40e_virtchnl_ops v_opcode,
+				 enum virtchnl_ops v_opcode,
 				 i40e_status v_retval, u8 *msg,
 				 u16 msglen)
 {
@@ -70,13 +70,13 @@ static void i40e_vc_vf_broadcast(struct i40e_pf *pf,
  **/
 static void i40e_vc_notify_vf_link_state(struct i40e_vf *vf)
 {
-	struct i40e_virtchnl_pf_event pfe;
+	struct virtchnl_pf_event pfe;
 	struct i40e_pf *pf = vf->pf;
 	struct i40e_hw *hw = &pf->hw;
 	struct i40e_link_status *ls = &pf->hw.phy.link_info;
 	int abs_vf_id = vf->vf_id + (int)hw->func_caps.vf_base_id;
 
-	pfe.event = I40E_VIRTCHNL_EVENT_LINK_CHANGE;
+	pfe.event = VIRTCHNL_EVENT_LINK_CHANGE;
 	pfe.severity = I40E_PF_EVENT_SEVERITY_INFO;
 	if (vf->link_forced) {
 		pfe.event_data.link_event.link_status = vf->link_up;
@@ -87,7 +87,7 @@ static void i40e_vc_notify_vf_link_state(struct i40e_vf *vf)
 			ls->link_info & I40E_AQ_LINK_UP;
 		pfe.event_data.link_event.link_speed = ls->link_speed;
 	}
-	i40e_aq_send_msg_to_vf(hw, abs_vf_id, I40E_VIRTCHNL_OP_EVENT,
+	i40e_aq_send_msg_to_vf(hw, abs_vf_id, VIRTCHNL_OP_EVENT,
 			       0, (u8 *)&pfe, sizeof(pfe), NULL);
 }
 
@@ -113,12 +113,12 @@ void i40e_vc_notify_link_state(struct i40e_pf *pf)
  **/
 void i40e_vc_notify_reset(struct i40e_pf *pf)
 {
-	struct i40e_virtchnl_pf_event pfe;
+	struct virtchnl_pf_event pfe;
 
-	pfe.event = I40E_VIRTCHNL_EVENT_RESET_IMPENDING;
+	pfe.event = VIRTCHNL_EVENT_RESET_IMPENDING;
 	pfe.severity = I40E_PF_EVENT_SEVERITY_CERTAIN_DOOM;
-	i40e_vc_vf_broadcast(pf, I40E_VIRTCHNL_OP_EVENT, 0,
-			     (u8 *)&pfe, sizeof(struct i40e_virtchnl_pf_event));
+	i40e_vc_vf_broadcast(pf, VIRTCHNL_OP_EVENT, 0,
+			     (u8 *)&pfe, sizeof(struct virtchnl_pf_event));
 }
 
 /**
@@ -129,7 +129,7 @@ void i40e_vc_notify_reset(struct i40e_pf *pf)
  **/
 void i40e_vc_notify_vf_reset(struct i40e_vf *vf)
 {
-	struct i40e_virtchnl_pf_event pfe;
+	struct virtchnl_pf_event pfe;
 	int abs_vf_id;
 
 	/* validate the request */
@@ -143,11 +143,11 @@ void i40e_vc_notify_vf_reset(struct i40e_vf *vf)
 
 	abs_vf_id = vf->vf_id + (int)vf->pf->hw.func_caps.vf_base_id;
 
-	pfe.event = I40E_VIRTCHNL_EVENT_RESET_IMPENDING;
+	pfe.event = VIRTCHNL_EVENT_RESET_IMPENDING;
 	pfe.severity = I40E_PF_EVENT_SEVERITY_CERTAIN_DOOM;
-	i40e_aq_send_msg_to_vf(&vf->pf->hw, abs_vf_id, I40E_VIRTCHNL_OP_EVENT,
+	i40e_aq_send_msg_to_vf(&vf->pf->hw, abs_vf_id, VIRTCHNL_OP_EVENT,
 			       0, (u8 *)&pfe,
-			       sizeof(struct i40e_virtchnl_pf_event), NULL);
+			       sizeof(struct virtchnl_pf_event), NULL);
 }
 /***********************misc routines*****************************/
 
@@ -250,7 +250,7 @@ static u16 i40e_vc_get_pf_queue_id(struct i40e_vf *vf, u16 vsi_id,
  * configure irq link list from the map
  **/
 static void i40e_config_irq_link_list(struct i40e_vf *vf, u16 vsi_id,
-				      struct i40e_virtchnl_vector_map *vecmap)
+				      struct virtchnl_vector_map *vecmap)
 {
 	unsigned long linklistmap = 0, tempmap;
 	struct i40e_pf *pf = vf->pf;
@@ -338,7 +338,7 @@ static void i40e_config_irq_link_list(struct i40e_vf *vf, u16 vsi_id,
 	/* if the vf is running in polling mode and using interrupt zero,
 	 * need to disable auto-mask on enabling zero interrupt for VFs.
 	 */
-	if ((vf->driver_caps & I40E_VIRTCHNL_VF_OFFLOAD_RX_POLLING) &&
+	if ((vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RX_POLLING) &&
 	    (vector_id == 0)) {
 		reg = rd32(hw, I40E_GLINT_CTL);
 		if (!(reg & I40E_GLINT_CTL_DIS_AUTOMASK_VF0_MASK)) {
@@ -359,7 +359,7 @@ irq_list_done:
 static void i40e_release_iwarp_qvlist(struct i40e_vf *vf)
 {
 	struct i40e_pf *pf = vf->pf;
-	struct i40e_virtchnl_iwarp_qvlist_info *qvlist_info = vf->qvlist_info;
+	struct virtchnl_iwarp_qvlist_info *qvlist_info = vf->qvlist_info;
 	u32 msix_vf;
 	u32 i;
 
@@ -368,7 +368,7 @@ static void i40e_release_iwarp_qvlist(struct i40e_vf *vf)
 
 	msix_vf = pf->hw.func_caps.num_msix_vectors_vf;
 	for (i = 0; i < qvlist_info->num_vectors; i++) {
-		struct i40e_virtchnl_iwarp_qv_info *qv_info;
+		struct virtchnl_iwarp_qv_info *qv_info;
 		u32 next_q_index, next_q_type;
 		struct i40e_hw *hw = &pf->hw;
 		u32 v_idx, reg_idx, reg;
@@ -409,17 +409,17 @@ static void i40e_release_iwarp_qvlist(struct i40e_vf *vf)
  * Return 0 on success or < 0 on error
  **/
 static int i40e_config_iwarp_qvlist(struct i40e_vf *vf,
-				    struct i40e_virtchnl_iwarp_qvlist_info *qvlist_info)
+				    struct virtchnl_iwarp_qvlist_info *qvlist_info)
 {
 	struct i40e_pf *pf = vf->pf;
 	struct i40e_hw *hw = &pf->hw;
-	struct i40e_virtchnl_iwarp_qv_info *qv_info;
+	struct virtchnl_iwarp_qv_info *qv_info;
 	u32 v_idx, i, reg_idx, reg;
 	u32 next_q_idx, next_q_type;
 	u32 msix_vf, size;
 
-	size = sizeof(struct i40e_virtchnl_iwarp_qvlist_info) +
-	       (sizeof(struct i40e_virtchnl_iwarp_qv_info) *
+	size = sizeof(struct virtchnl_iwarp_qvlist_info) +
+	       (sizeof(struct virtchnl_iwarp_qv_info) *
 						(qvlist_info->num_vectors - 1));
 	vf->qvlist_info = kzalloc(size, GFP_KERNEL);
 	vf->qvlist_info->num_vectors = qvlist_info->num_vectors;
@@ -492,7 +492,7 @@ err:
  **/
 static int i40e_config_vsi_tx_queue(struct i40e_vf *vf, u16 vsi_id,
 				    u16 vsi_queue_id,
-				    struct i40e_virtchnl_txq_info *info)
+				    struct virtchnl_txq_info *info)
 {
 	struct i40e_pf *pf = vf->pf;
 	struct i40e_hw *hw = &pf->hw;
@@ -569,7 +569,7 @@ error_context:
  **/
 static int i40e_config_vsi_rx_queue(struct i40e_vf *vf, u16 vsi_id,
 				    u16 vsi_queue_id,
-				    struct i40e_virtchnl_rxq_info *info)
+				    struct virtchnl_rxq_info *info)
 {
 	struct i40e_pf *pf = vf->pf;
 	struct i40e_hw *hw = &pf->hw;
@@ -1017,7 +1017,7 @@ static void i40e_cleanup_reset_vf(struct i40e_vf *vf)
 	 * after VF has been fully initialized, because the VF driver may
 	 * request resources immediately after setting this flag.
 	 */
-	wr32(hw, I40E_VFGEN_RSTAT1(vf->vf_id), I40E_VFR_VFACTIVE);
+	wr32(hw, I40E_VFGEN_RSTAT1(vf->vf_id), VIRTCHNL_VFR_VFACTIVE);
 }
 
 /**
@@ -1461,7 +1461,7 @@ static int i40e_vc_send_msg_to_vf(struct i40e_vf *vf, u32 v_opcode,
  * send resp msg to VF
  **/
 static int i40e_vc_send_resp_to_vf(struct i40e_vf *vf,
-				   enum i40e_virtchnl_ops opcode,
+				   enum virtchnl_ops opcode,
 				   i40e_status retval)
 {
 	return i40e_vc_send_msg_to_vf(vf, opcode, retval, NULL, 0);
@@ -1475,18 +1475,18 @@ static int i40e_vc_send_resp_to_vf(struct i40e_vf *vf,
  **/
 static int i40e_vc_get_version_msg(struct i40e_vf *vf, u8 *msg)
 {
-	struct i40e_virtchnl_version_info info = {
-		I40E_VIRTCHNL_VERSION_MAJOR, I40E_VIRTCHNL_VERSION_MINOR
+	struct virtchnl_version_info info = {
+		VIRTCHNL_VERSION_MAJOR, VIRTCHNL_VERSION_MINOR
 	};
 
-	vf->vf_ver = *(struct i40e_virtchnl_version_info *)msg;
+	vf->vf_ver = *(struct virtchnl_version_info *)msg;
 	/* VFs running the 1.0 API expect to get 1.0 back or they will cry. */
 	if (VF_IS_V10(vf))
-		info.minor = I40E_VIRTCHNL_VERSION_MINOR_NO_VF_CAPS;
-	return i40e_vc_send_msg_to_vf(vf, I40E_VIRTCHNL_OP_VERSION,
+		info.minor = VIRTCHNL_VERSION_MINOR_NO_VF_CAPS;
+	return i40e_vc_send_msg_to_vf(vf, VIRTCHNL_OP_VERSION,
 				      I40E_SUCCESS, (u8 *)&info,
 				      sizeof(struct
-					     i40e_virtchnl_version_info));
+					     virtchnl_version_info));
 }
 
 /**
@@ -1499,7 +1499,7 @@ static int i40e_vc_get_version_msg(struct i40e_vf *vf, u8 *msg)
  **/
 static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg)
 {
-	struct i40e_virtchnl_vf_resource *vfres = NULL;
+	struct virtchnl_vf_resource *vfres = NULL;
 	struct i40e_pf *pf = vf->pf;
 	i40e_status aq_ret = 0;
 	struct i40e_vsi *vsi;
@@ -1512,8 +1512,8 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg)
 		goto err;
 	}
 
-	len = (sizeof(struct i40e_virtchnl_vf_resource) +
-	       sizeof(struct i40e_virtchnl_vsi_resource) * num_vsis);
+	len = (sizeof(struct virtchnl_vf_resource) +
+	       sizeof(struct virtchnl_vsi_resource) * num_vsis);
 
 	vfres = kzalloc(len, GFP_KERNEL);
 	if (!vfres) {
@@ -1524,47 +1524,47 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg)
 	if (VF_IS_V11(vf))
 		vf->driver_caps = *(u32 *)msg;
 	else
-		vf->driver_caps = I40E_VIRTCHNL_VF_OFFLOAD_L2 |
-				  I40E_VIRTCHNL_VF_OFFLOAD_RSS_REG |
-				  I40E_VIRTCHNL_VF_OFFLOAD_VLAN;
+		vf->driver_caps = VIRTCHNL_VF_OFFLOAD_L2 |
+				  VIRTCHNL_VF_OFFLOAD_RSS_REG |
+				  VIRTCHNL_VF_OFFLOAD_VLAN;
 
-	vfres->vf_offload_flags = I40E_VIRTCHNL_VF_OFFLOAD_L2;
+	vfres->vf_offload_flags = VIRTCHNL_VF_OFFLOAD_L2;
 	vsi = pf->vsi[vf->lan_vsi_idx];
 	if (!vsi->info.pvid)
-		vfres->vf_offload_flags |= I40E_VIRTCHNL_VF_OFFLOAD_VLAN;
+		vfres->vf_offload_flags |= VIRTCHNL_VF_OFFLOAD_VLAN;
 
 	if (i40e_vf_client_capable(pf, vf->vf_id) &&
-	    (vf->driver_caps & I40E_VIRTCHNL_VF_OFFLOAD_IWARP)) {
-		vfres->vf_offload_flags |= I40E_VIRTCHNL_VF_OFFLOAD_IWARP;
+	    (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_IWARP)) {
+		vfres->vf_offload_flags |= VIRTCHNL_VF_OFFLOAD_IWARP;
 		set_bit(I40E_VF_STATE_IWARPENA, &vf->vf_states);
 	}
 
-	if (vf->driver_caps & I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF) {
-		vfres->vf_offload_flags |= I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF;
+	if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_PF) {
+		vfres->vf_offload_flags |= VIRTCHNL_VF_OFFLOAD_RSS_PF;
 	} else {
 		if ((pf->flags & I40E_FLAG_RSS_AQ_CAPABLE) &&
-		    (vf->driver_caps & I40E_VIRTCHNL_VF_OFFLOAD_RSS_AQ))
+		    (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_AQ))
 			vfres->vf_offload_flags |=
-					I40E_VIRTCHNL_VF_OFFLOAD_RSS_AQ;
+					VIRTCHNL_VF_OFFLOAD_RSS_AQ;
 		else
 			vfres->vf_offload_flags |=
-					I40E_VIRTCHNL_VF_OFFLOAD_RSS_REG;
+					VIRTCHNL_VF_OFFLOAD_RSS_REG;
 	}
 
 	if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE) {
-		if (vf->driver_caps & I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2)
+		if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2)
 			vfres->vf_offload_flags |=
-				I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2;
+				VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2;
 	}
 
-	if (vf->driver_caps & I40E_VIRTCHNL_VF_OFFLOAD_ENCAP)
-		vfres->vf_offload_flags |= I40E_VIRTCHNL_VF_OFFLOAD_ENCAP;
+	if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_ENCAP)
+		vfres->vf_offload_flags |= VIRTCHNL_VF_OFFLOAD_ENCAP;
 
 	if ((pf->flags & I40E_FLAG_OUTER_UDP_CSUM_CAPABLE) &&
-	    (vf->driver_caps & I40E_VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM))
-		vfres->vf_offload_flags |= I40E_VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM;
+	    (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM))
+		vfres->vf_offload_flags |= VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM;
 
-	if (vf->driver_caps & I40E_VIRTCHNL_VF_OFFLOAD_RX_POLLING) {
+	if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RX_POLLING) {
 		if (pf->flags & I40E_FLAG_MFP_ENABLED) {
 			dev_err(&pf->pdev->dev,
 				"VF %d requested polling mode: this feature is supported only when the device is running in single function per port (SFP) mode\n",
@@ -1572,13 +1572,13 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg)
 			ret = I40E_ERR_PARAM;
 			goto err;
 		}
-		vfres->vf_offload_flags |= I40E_VIRTCHNL_VF_OFFLOAD_RX_POLLING;
+		vfres->vf_offload_flags |= VIRTCHNL_VF_OFFLOAD_RX_POLLING;
 	}
 
 	if (pf->flags & I40E_FLAG_WB_ON_ITR_CAPABLE) {
-		if (vf->driver_caps & I40E_VIRTCHNL_VF_OFFLOAD_WB_ON_ITR)
+		if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_WB_ON_ITR)
 			vfres->vf_offload_flags |=
-					I40E_VIRTCHNL_VF_OFFLOAD_WB_ON_ITR;
+					VIRTCHNL_VF_OFFLOAD_WB_ON_ITR;
 	}
 
 	vfres->num_vsis = num_vsis;
@@ -1601,7 +1601,7 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg)
 
 err:
 	/* send the response back to the VF */
-	ret = i40e_vc_send_msg_to_vf(vf, I40E_VIRTCHNL_OP_GET_VF_RESOURCES,
+	ret = i40e_vc_send_msg_to_vf(vf, VIRTCHNL_OP_GET_VF_RESOURCES,
 				     aq_ret, (u8 *)vfres, len);
 
 	kfree(vfres);
@@ -1655,8 +1655,8 @@ static inline int i40e_getnum_vf_vsi_vlan_filters(struct i40e_vsi *vsi)
 static int i40e_vc_config_promiscuous_mode_msg(struct i40e_vf *vf,
 					       u8 *msg, u16 msglen)
 {
-	struct i40e_virtchnl_promisc_info *info =
-	    (struct i40e_virtchnl_promisc_info *)msg;
+	struct virtchnl_promisc_info *info =
+	    (struct virtchnl_promisc_info *)msg;
 	struct i40e_pf *pf = vf->pf;
 	struct i40e_hw *hw = &pf->hw;
 	struct i40e_mac_filter *f;
@@ -1788,7 +1788,7 @@ static int i40e_vc_config_promiscuous_mode_msg(struct i40e_vf *vf,
 error_param:
 	/* send the response to the VF */
 	return i40e_vc_send_resp_to_vf(vf,
-				       I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE,
+				       VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE,
 				       aq_ret);
 }
 
@@ -1803,9 +1803,9 @@ error_param:
  **/
 static int i40e_vc_config_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 {
-	struct i40e_virtchnl_vsi_queue_config_info *qci =
-	    (struct i40e_virtchnl_vsi_queue_config_info *)msg;
-	struct i40e_virtchnl_queue_pair_info *qpi;
+	struct virtchnl_vsi_queue_config_info *qci =
+	    (struct virtchnl_vsi_queue_config_info *)msg;
+	struct virtchnl_queue_pair_info *qpi;
 	struct i40e_pf *pf = vf->pf;
 	u16 vsi_id, vsi_queue_id;
 	i40e_status aq_ret = 0;
@@ -1845,7 +1845,7 @@ static int i40e_vc_config_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 
 error_param:
 	/* send the response to the VF */
-	return i40e_vc_send_resp_to_vf(vf, I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES,
+	return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_CONFIG_VSI_QUEUES,
 				       aq_ret);
 }
 
@@ -1860,9 +1860,9 @@ error_param:
  **/
 static int i40e_vc_config_irq_map_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 {
-	struct i40e_virtchnl_irq_map_info *irqmap_info =
-	    (struct i40e_virtchnl_irq_map_info *)msg;
-	struct i40e_virtchnl_vector_map *map;
+	struct virtchnl_irq_map_info *irqmap_info =
+	    (struct virtchnl_irq_map_info *)msg;
+	struct virtchnl_vector_map *map;
 	u16 vsi_id, vsi_queue_id, vector_id;
 	i40e_status aq_ret = 0;
 	unsigned long tempmap;
@@ -1908,7 +1908,7 @@ static int i40e_vc_config_irq_map_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 	}
 error_param:
 	/* send the response to the VF */
-	return i40e_vc_send_resp_to_vf(vf, I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP,
+	return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_CONFIG_IRQ_MAP,
 				       aq_ret);
 }
 
@@ -1922,8 +1922,8 @@ error_param:
  **/
 static int i40e_vc_enable_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 {
-	struct i40e_virtchnl_queue_select *vqs =
-	    (struct i40e_virtchnl_queue_select *)msg;
+	struct virtchnl_queue_select *vqs =
+	    (struct virtchnl_queue_select *)msg;
 	struct i40e_pf *pf = vf->pf;
 	u16 vsi_id = vqs->vsi_id;
 	i40e_status aq_ret = 0;
@@ -1947,7 +1947,7 @@ static int i40e_vc_enable_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 		aq_ret = I40E_ERR_TIMEOUT;
 error_param:
 	/* send the response to the VF */
-	return i40e_vc_send_resp_to_vf(vf, I40E_VIRTCHNL_OP_ENABLE_QUEUES,
+	return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_ENABLE_QUEUES,
 				       aq_ret);
 }
 
@@ -1962,8 +1962,8 @@ error_param:
  **/
 static int i40e_vc_disable_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 {
-	struct i40e_virtchnl_queue_select *vqs =
-	    (struct i40e_virtchnl_queue_select *)msg;
+	struct virtchnl_queue_select *vqs =
+	    (struct virtchnl_queue_select *)msg;
 	struct i40e_pf *pf = vf->pf;
 	i40e_status aq_ret = 0;
 
@@ -1986,7 +1986,7 @@ static int i40e_vc_disable_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 
 error_param:
 	/* send the response to the VF */
-	return i40e_vc_send_resp_to_vf(vf, I40E_VIRTCHNL_OP_DISABLE_QUEUES,
+	return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_DISABLE_QUEUES,
 				       aq_ret);
 }
 
@@ -2000,8 +2000,8 @@ error_param:
  **/
 static int i40e_vc_get_stats_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 {
-	struct i40e_virtchnl_queue_select *vqs =
-	    (struct i40e_virtchnl_queue_select *)msg;
+	struct virtchnl_queue_select *vqs =
+	    (struct virtchnl_queue_select *)msg;
 	struct i40e_pf *pf = vf->pf;
 	struct i40e_eth_stats stats;
 	i40e_status aq_ret = 0;
@@ -2029,7 +2029,7 @@ static int i40e_vc_get_stats_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 
 error_param:
 	/* send the response back to the VF */
-	return i40e_vc_send_msg_to_vf(vf, I40E_VIRTCHNL_OP_GET_STATS, aq_ret,
+	return i40e_vc_send_msg_to_vf(vf, VIRTCHNL_OP_GET_STATS, aq_ret,
 				      (u8 *)&stats, sizeof(stats));
 }
 
@@ -2088,8 +2088,8 @@ static inline int i40e_check_vf_permission(struct i40e_vf *vf, u8 *macaddr)
  **/
 static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 {
-	struct i40e_virtchnl_ether_addr_list *al =
-	    (struct i40e_virtchnl_ether_addr_list *)msg;
+	struct virtchnl_ether_addr_list *al =
+	    (struct virtchnl_ether_addr_list *)msg;
 	struct i40e_pf *pf = vf->pf;
 	struct i40e_vsi *vsi = NULL;
 	u16 vsi_id = al->vsi_id;
@@ -2143,7 +2143,7 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 
 error_param:
 	/* send the response to the VF */
-	return i40e_vc_send_resp_to_vf(vf, I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS,
+	return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_ADD_ETH_ADDR,
 				       ret);
 }
 
@@ -2157,8 +2157,8 @@ error_param:
  **/
 static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 {
-	struct i40e_virtchnl_ether_addr_list *al =
-	    (struct i40e_virtchnl_ether_addr_list *)msg;
+	struct virtchnl_ether_addr_list *al =
+	    (struct virtchnl_ether_addr_list *)msg;
 	struct i40e_pf *pf = vf->pf;
 	struct i40e_vsi *vsi = NULL;
 	u16 vsi_id = al->vsi_id;
@@ -2203,7 +2203,7 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 
 error_param:
 	/* send the response to the VF */
-	return i40e_vc_send_resp_to_vf(vf, I40E_VIRTCHNL_OP_DEL_ETHER_ADDRESS,
+	return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_DEL_ETH_ADDR,
 				       ret);
 }
 
@@ -2217,8 +2217,8 @@ error_param:
  **/
 static int i40e_vc_add_vlan_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 {
-	struct i40e_virtchnl_vlan_filter_list *vfl =
-	    (struct i40e_virtchnl_vlan_filter_list *)msg;
+	struct virtchnl_vlan_filter_list *vfl =
+	    (struct virtchnl_vlan_filter_list *)msg;
 	struct i40e_pf *pf = vf->pf;
 	struct i40e_vsi *vsi = NULL;
 	u16 vsi_id = vfl->vsi_id;
@@ -2277,7 +2277,7 @@ static int i40e_vc_add_vlan_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 
 error_param:
 	/* send the response to the VF */
-	return i40e_vc_send_resp_to_vf(vf, I40E_VIRTCHNL_OP_ADD_VLAN, aq_ret);
+	return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_ADD_VLAN, aq_ret);
 }
 
 /**
@@ -2290,8 +2290,8 @@ error_param:
  **/
 static int i40e_vc_remove_vlan_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 {
-	struct i40e_virtchnl_vlan_filter_list *vfl =
-	    (struct i40e_virtchnl_vlan_filter_list *)msg;
+	struct virtchnl_vlan_filter_list *vfl =
+	    (struct virtchnl_vlan_filter_list *)msg;
 	struct i40e_pf *pf = vf->pf;
 	struct i40e_vsi *vsi = NULL;
 	u16 vsi_id = vfl->vsi_id;
@@ -2335,7 +2335,7 @@ static int i40e_vc_remove_vlan_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 
 error_param:
 	/* send the response to the VF */
-	return i40e_vc_send_resp_to_vf(vf, I40E_VIRTCHNL_OP_DEL_VLAN, aq_ret);
+	return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_DEL_VLAN, aq_ret);
 }
 
 /**
@@ -2363,7 +2363,7 @@ static int i40e_vc_iwarp_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 
 error_param:
 	/* send the response to the VF */
-	return i40e_vc_send_resp_to_vf(vf, I40E_VIRTCHNL_OP_IWARP,
+	return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_IWARP,
 				       aq_ret);
 }
 
@@ -2379,8 +2379,8 @@ error_param:
 static int i40e_vc_iwarp_qvmap_msg(struct i40e_vf *vf, u8 *msg, u16 msglen,
 				   bool config)
 {
-	struct i40e_virtchnl_iwarp_qvlist_info *qvlist_info =
-				(struct i40e_virtchnl_iwarp_qvlist_info *)msg;
+	struct virtchnl_iwarp_qvlist_info *qvlist_info =
+				(struct virtchnl_iwarp_qvlist_info *)msg;
 	i40e_status aq_ret = 0;
 
 	if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) ||
@@ -2399,8 +2399,8 @@ static int i40e_vc_iwarp_qvmap_msg(struct i40e_vf *vf, u8 *msg, u16 msglen,
 error_param:
 	/* send the response to the VF */
 	return i40e_vc_send_resp_to_vf(vf,
-			       config ? I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP :
-			       I40E_VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP,
+			       config ? VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP :
+			       VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP,
 			       aq_ret);
 }
 
@@ -2414,8 +2414,8 @@ error_param:
  **/
 static int i40e_vc_config_rss_key(struct i40e_vf *vf, u8 *msg, u16 msglen)
 {
-	struct i40e_virtchnl_rss_key *vrk =
-		(struct i40e_virtchnl_rss_key *)msg;
+	struct virtchnl_rss_key *vrk =
+		(struct virtchnl_rss_key *)msg;
 	struct i40e_pf *pf = vf->pf;
 	struct i40e_vsi *vsi = NULL;
 	u16 vsi_id = vrk->vsi_id;
@@ -2432,7 +2432,7 @@ static int i40e_vc_config_rss_key(struct i40e_vf *vf, u8 *msg, u16 msglen)
 	aq_ret = i40e_config_rss(vsi, vrk->key, NULL, 0);
 err:
 	/* send the response to the VF */
-	return i40e_vc_send_resp_to_vf(vf, I40E_VIRTCHNL_OP_CONFIG_RSS_KEY,
+	return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_CONFIG_RSS_KEY,
 				       aq_ret);
 }
 
@@ -2446,8 +2446,8 @@ err:
  **/
 static int i40e_vc_config_rss_lut(struct i40e_vf *vf, u8 *msg, u16 msglen)
 {
-	struct i40e_virtchnl_rss_lut *vrl =
-		(struct i40e_virtchnl_rss_lut *)msg;
+	struct virtchnl_rss_lut *vrl =
+		(struct virtchnl_rss_lut *)msg;
 	struct i40e_pf *pf = vf->pf;
 	struct i40e_vsi *vsi = NULL;
 	u16 vsi_id = vrl->vsi_id;
@@ -2464,7 +2464,7 @@ static int i40e_vc_config_rss_lut(struct i40e_vf *vf, u8 *msg, u16 msglen)
 	aq_ret = i40e_config_rss(vsi, NULL, vrl->lut, I40E_VF_HLUT_ARRAY_SIZE);
 	/* send the response to the VF */
 err:
-	return i40e_vc_send_resp_to_vf(vf, I40E_VIRTCHNL_OP_CONFIG_RSS_LUT,
+	return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_CONFIG_RSS_LUT,
 				       aq_ret);
 }
 
@@ -2478,7 +2478,7 @@ err:
  **/
 static int i40e_vc_get_rss_hena(struct i40e_vf *vf, u8 *msg, u16 msglen)
 {
-	struct i40e_virtchnl_rss_hena *vrh = NULL;
+	struct virtchnl_rss_hena *vrh = NULL;
 	struct i40e_pf *pf = vf->pf;
 	i40e_status aq_ret = 0;
 	int len = 0;
@@ -2487,7 +2487,7 @@ static int i40e_vc_get_rss_hena(struct i40e_vf *vf, u8 *msg, u16 msglen)
 		aq_ret = I40E_ERR_PARAM;
 		goto err;
 	}
-	len = sizeof(struct i40e_virtchnl_rss_hena);
+	len = sizeof(struct virtchnl_rss_hena);
 
 	vrh = kzalloc(len, GFP_KERNEL);
 	if (!vrh) {
@@ -2498,7 +2498,7 @@ static int i40e_vc_get_rss_hena(struct i40e_vf *vf, u8 *msg, u16 msglen)
 	vrh->hena = i40e_pf_get_default_rss_hena(pf);
 err:
 	/* send the response back to the VF */
-	aq_ret = i40e_vc_send_msg_to_vf(vf, I40E_VIRTCHNL_OP_GET_RSS_HENA_CAPS,
+	aq_ret = i40e_vc_send_msg_to_vf(vf, VIRTCHNL_OP_GET_RSS_HENA_CAPS,
 					aq_ret, (u8 *)vrh, len);
 	kfree(vrh);
 	return aq_ret;
@@ -2514,8 +2514,8 @@ err:
  **/
 static int i40e_vc_set_rss_hena(struct i40e_vf *vf, u8 *msg, u16 msglen)
 {
-	struct i40e_virtchnl_rss_hena *vrh =
-		(struct i40e_virtchnl_rss_hena *)msg;
+	struct virtchnl_rss_hena *vrh =
+		(struct virtchnl_rss_hena *)msg;
 	struct i40e_pf *pf = vf->pf;
 	struct i40e_hw *hw = &pf->hw;
 	i40e_status aq_ret = 0;
@@ -2530,7 +2530,7 @@ static int i40e_vc_set_rss_hena(struct i40e_vf *vf, u8 *msg, u16 msglen)
 
 	/* send the response to the VF */
 err:
-	return i40e_vc_send_resp_to_vf(vf, I40E_VIRTCHNL_OP_SET_RSS_HENA,
+	return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_SET_RSS_HENA,
 				       aq_ret);
 }
 
@@ -2555,78 +2555,78 @@ static int i40e_vc_validate_vf_msg(struct i40e_vf *vf, u32 v_opcode,
 
 	/* Validate message length. */
 	switch (v_opcode) {
-	case I40E_VIRTCHNL_OP_VERSION:
-		valid_len = sizeof(struct i40e_virtchnl_version_info);
+	case VIRTCHNL_OP_VERSION:
+		valid_len = sizeof(struct virtchnl_version_info);
 		break;
-	case I40E_VIRTCHNL_OP_RESET_VF:
+	case VIRTCHNL_OP_RESET_VF:
 		break;
-	case I40E_VIRTCHNL_OP_GET_VF_RESOURCES:
+	case VIRTCHNL_OP_GET_VF_RESOURCES:
 		if (VF_IS_V11(vf))
 			valid_len = sizeof(u32);
 		break;
-	case I40E_VIRTCHNL_OP_CONFIG_TX_QUEUE:
-		valid_len = sizeof(struct i40e_virtchnl_txq_info);
+	case VIRTCHNL_OP_CONFIG_TX_QUEUE:
+		valid_len = sizeof(struct virtchnl_txq_info);
 		break;
-	case I40E_VIRTCHNL_OP_CONFIG_RX_QUEUE:
-		valid_len = sizeof(struct i40e_virtchnl_rxq_info);
+	case VIRTCHNL_OP_CONFIG_RX_QUEUE:
+		valid_len = sizeof(struct virtchnl_rxq_info);
 		break;
-	case I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES:
-		valid_len = sizeof(struct i40e_virtchnl_vsi_queue_config_info);
+	case VIRTCHNL_OP_CONFIG_VSI_QUEUES:
+		valid_len = sizeof(struct virtchnl_vsi_queue_config_info);
 		if (msglen >= valid_len) {
-			struct i40e_virtchnl_vsi_queue_config_info *vqc =
-			    (struct i40e_virtchnl_vsi_queue_config_info *)msg;
+			struct virtchnl_vsi_queue_config_info *vqc =
+			    (struct virtchnl_vsi_queue_config_info *)msg;
 			valid_len += (vqc->num_queue_pairs *
 				      sizeof(struct
-					     i40e_virtchnl_queue_pair_info));
+					     virtchnl_queue_pair_info));
 			if (vqc->num_queue_pairs == 0)
 				err_msg_format = true;
 		}
 		break;
-	case I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP:
-		valid_len = sizeof(struct i40e_virtchnl_irq_map_info);
+	case VIRTCHNL_OP_CONFIG_IRQ_MAP:
+		valid_len = sizeof(struct virtchnl_irq_map_info);
 		if (msglen >= valid_len) {
-			struct i40e_virtchnl_irq_map_info *vimi =
-			    (struct i40e_virtchnl_irq_map_info *)msg;
+			struct virtchnl_irq_map_info *vimi =
+			    (struct virtchnl_irq_map_info *)msg;
 			valid_len += (vimi->num_vectors *
-				      sizeof(struct i40e_virtchnl_vector_map));
+				      sizeof(struct virtchnl_vector_map));
 			if (vimi->num_vectors == 0)
 				err_msg_format = true;
 		}
 		break;
-	case I40E_VIRTCHNL_OP_ENABLE_QUEUES:
-	case I40E_VIRTCHNL_OP_DISABLE_QUEUES:
-		valid_len = sizeof(struct i40e_virtchnl_queue_select);
+	case VIRTCHNL_OP_ENABLE_QUEUES:
+	case VIRTCHNL_OP_DISABLE_QUEUES:
+		valid_len = sizeof(struct virtchnl_queue_select);
 		break;
-	case I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS:
-	case I40E_VIRTCHNL_OP_DEL_ETHER_ADDRESS:
-		valid_len = sizeof(struct i40e_virtchnl_ether_addr_list);
+	case VIRTCHNL_OP_ADD_ETH_ADDR:
+	case VIRTCHNL_OP_DEL_ETH_ADDR:
+		valid_len = sizeof(struct virtchnl_ether_addr_list);
 		if (msglen >= valid_len) {
-			struct i40e_virtchnl_ether_addr_list *veal =
-			    (struct i40e_virtchnl_ether_addr_list *)msg;
+			struct virtchnl_ether_addr_list *veal =
+			    (struct virtchnl_ether_addr_list *)msg;
 			valid_len += veal->num_elements *
-			    sizeof(struct i40e_virtchnl_ether_addr);
+			    sizeof(struct virtchnl_ether_addr);
 			if (veal->num_elements == 0)
 				err_msg_format = true;
 		}
 		break;
-	case I40E_VIRTCHNL_OP_ADD_VLAN:
-	case I40E_VIRTCHNL_OP_DEL_VLAN:
-		valid_len = sizeof(struct i40e_virtchnl_vlan_filter_list);
+	case VIRTCHNL_OP_ADD_VLAN:
+	case VIRTCHNL_OP_DEL_VLAN:
+		valid_len = sizeof(struct virtchnl_vlan_filter_list);
 		if (msglen >= valid_len) {
-			struct i40e_virtchnl_vlan_filter_list *vfl =
-			    (struct i40e_virtchnl_vlan_filter_list *)msg;
+			struct virtchnl_vlan_filter_list *vfl =
+			    (struct virtchnl_vlan_filter_list *)msg;
 			valid_len += vfl->num_elements * sizeof(u16);
 			if (vfl->num_elements == 0)
 				err_msg_format = true;
 		}
 		break;
-	case I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE:
-		valid_len = sizeof(struct i40e_virtchnl_promisc_info);
+	case VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE:
+		valid_len = sizeof(struct virtchnl_promisc_info);
 		break;
-	case I40E_VIRTCHNL_OP_GET_STATS:
-		valid_len = sizeof(struct i40e_virtchnl_queue_select);
+	case VIRTCHNL_OP_GET_STATS:
+		valid_len = sizeof(struct virtchnl_queue_select);
 		break;
-	case I40E_VIRTCHNL_OP_IWARP:
+	case VIRTCHNL_OP_IWARP:
 		/* These messages are opaque to us and will be validated in
 		 * the RDMA client code. We just need to check for nonzero
 		 * length. The firmware will enforce max length restrictions.
@@ -2636,27 +2636,27 @@ static int i40e_vc_validate_vf_msg(struct i40e_vf *vf, u32 v_opcode,
 		else
 			err_msg_format = true;
 		break;
-	case I40E_VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP:
+	case VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP:
 		valid_len = 0;
 		break;
-	case I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP:
-		valid_len = sizeof(struct i40e_virtchnl_iwarp_qvlist_info);
+	case VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP:
+		valid_len = sizeof(struct virtchnl_iwarp_qvlist_info);
 		if (msglen >= valid_len) {
-			struct i40e_virtchnl_iwarp_qvlist_info *qv =
-				(struct i40e_virtchnl_iwarp_qvlist_info *)msg;
+			struct virtchnl_iwarp_qvlist_info *qv =
+				(struct virtchnl_iwarp_qvlist_info *)msg;
 			if (qv->num_vectors == 0) {
 				err_msg_format = true;
 				break;
 			}
 			valid_len += ((qv->num_vectors - 1) *
-				sizeof(struct i40e_virtchnl_iwarp_qv_info));
+				sizeof(struct virtchnl_iwarp_qv_info));
 		}
 		break;
-	case I40E_VIRTCHNL_OP_CONFIG_RSS_KEY:
-		valid_len = sizeof(struct i40e_virtchnl_rss_key);
+	case VIRTCHNL_OP_CONFIG_RSS_KEY:
+		valid_len = sizeof(struct virtchnl_rss_key);
 		if (msglen >= valid_len) {
-			struct i40e_virtchnl_rss_key *vrk =
-				(struct i40e_virtchnl_rss_key *)msg;
+			struct virtchnl_rss_key *vrk =
+				(struct virtchnl_rss_key *)msg;
 			if (vrk->key_len != I40E_HKEY_ARRAY_SIZE) {
 				err_msg_format = true;
 				break;
@@ -2664,11 +2664,11 @@ static int i40e_vc_validate_vf_msg(struct i40e_vf *vf, u32 v_opcode,
 			valid_len += vrk->key_len - 1;
 		}
 		break;
-	case I40E_VIRTCHNL_OP_CONFIG_RSS_LUT:
-		valid_len = sizeof(struct i40e_virtchnl_rss_lut);
+	case VIRTCHNL_OP_CONFIG_RSS_LUT:
+		valid_len = sizeof(struct virtchnl_rss_lut);
 		if (msglen >= valid_len) {
-			struct i40e_virtchnl_rss_lut *vrl =
-				(struct i40e_virtchnl_rss_lut *)msg;
+			struct virtchnl_rss_lut *vrl =
+				(struct virtchnl_rss_lut *)msg;
 			if (vrl->lut_entries != I40E_VF_HLUT_ARRAY_SIZE) {
 				err_msg_format = true;
 				break;
@@ -2676,14 +2676,14 @@ static int i40e_vc_validate_vf_msg(struct i40e_vf *vf, u32 v_opcode,
 			valid_len += vrl->lut_entries - 1;
 		}
 		break;
-	case I40E_VIRTCHNL_OP_GET_RSS_HENA_CAPS:
+	case VIRTCHNL_OP_GET_RSS_HENA_CAPS:
 		break;
-	case I40E_VIRTCHNL_OP_SET_RSS_HENA:
-		valid_len = sizeof(struct i40e_virtchnl_rss_hena);
+	case VIRTCHNL_OP_SET_RSS_HENA:
+		valid_len = sizeof(struct virtchnl_rss_hena);
 		break;
 	/* These are always errors coming from the VF. */
-	case I40E_VIRTCHNL_OP_EVENT:
-	case I40E_VIRTCHNL_OP_UNKNOWN:
+	case VIRTCHNL_OP_EVENT:
+	case VIRTCHNL_OP_UNKNOWN:
 	default:
 		return -EPERM;
 	}
@@ -2729,70 +2729,70 @@ int i40e_vc_process_vf_msg(struct i40e_pf *pf, s16 vf_id, u32 v_opcode,
 	}
 
 	switch (v_opcode) {
-	case I40E_VIRTCHNL_OP_VERSION:
+	case VIRTCHNL_OP_VERSION:
 		ret = i40e_vc_get_version_msg(vf, msg);
 		break;
-	case I40E_VIRTCHNL_OP_GET_VF_RESOURCES:
+	case VIRTCHNL_OP_GET_VF_RESOURCES:
 		ret = i40e_vc_get_vf_resources_msg(vf, msg);
 		break;
-	case I40E_VIRTCHNL_OP_RESET_VF:
+	case VIRTCHNL_OP_RESET_VF:
 		i40e_vc_reset_vf_msg(vf);
 		ret = 0;
 		break;
-	case I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE:
+	case VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE:
 		ret = i40e_vc_config_promiscuous_mode_msg(vf, msg, msglen);
 		break;
-	case I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES:
+	case VIRTCHNL_OP_CONFIG_VSI_QUEUES:
 		ret = i40e_vc_config_queues_msg(vf, msg, msglen);
 		break;
-	case I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP:
+	case VIRTCHNL_OP_CONFIG_IRQ_MAP:
 		ret = i40e_vc_config_irq_map_msg(vf, msg, msglen);
 		break;
-	case I40E_VIRTCHNL_OP_ENABLE_QUEUES:
+	case VIRTCHNL_OP_ENABLE_QUEUES:
 		ret = i40e_vc_enable_queues_msg(vf, msg, msglen);
 		i40e_vc_notify_vf_link_state(vf);
 		break;
-	case I40E_VIRTCHNL_OP_DISABLE_QUEUES:
+	case VIRTCHNL_OP_DISABLE_QUEUES:
 		ret = i40e_vc_disable_queues_msg(vf, msg, msglen);
 		break;
-	case I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS:
+	case VIRTCHNL_OP_ADD_ETH_ADDR:
 		ret = i40e_vc_add_mac_addr_msg(vf, msg, msglen);
 		break;
-	case I40E_VIRTCHNL_OP_DEL_ETHER_ADDRESS:
+	case VIRTCHNL_OP_DEL_ETH_ADDR:
 		ret = i40e_vc_del_mac_addr_msg(vf, msg, msglen);
 		break;
-	case I40E_VIRTCHNL_OP_ADD_VLAN:
+	case VIRTCHNL_OP_ADD_VLAN:
 		ret = i40e_vc_add_vlan_msg(vf, msg, msglen);
 		break;
-	case I40E_VIRTCHNL_OP_DEL_VLAN:
+	case VIRTCHNL_OP_DEL_VLAN:
 		ret = i40e_vc_remove_vlan_msg(vf, msg, msglen);
 		break;
-	case I40E_VIRTCHNL_OP_GET_STATS:
+	case VIRTCHNL_OP_GET_STATS:
 		ret = i40e_vc_get_stats_msg(vf, msg, msglen);
 		break;
-	case I40E_VIRTCHNL_OP_IWARP:
+	case VIRTCHNL_OP_IWARP:
 		ret = i40e_vc_iwarp_msg(vf, msg, msglen);
 		break;
-	case I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP:
+	case VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP:
 		ret = i40e_vc_iwarp_qvmap_msg(vf, msg, msglen, true);
 		break;
-	case I40E_VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP:
+	case VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP:
 		ret = i40e_vc_iwarp_qvmap_msg(vf, msg, msglen, false);
 		break;
-	case I40E_VIRTCHNL_OP_CONFIG_RSS_KEY:
+	case VIRTCHNL_OP_CONFIG_RSS_KEY:
 		ret = i40e_vc_config_rss_key(vf, msg, msglen);
 		break;
-	case I40E_VIRTCHNL_OP_CONFIG_RSS_LUT:
+	case VIRTCHNL_OP_CONFIG_RSS_LUT:
 		ret = i40e_vc_config_rss_lut(vf, msg, msglen);
 		break;
-	case I40E_VIRTCHNL_OP_GET_RSS_HENA_CAPS:
+	case VIRTCHNL_OP_GET_RSS_HENA_CAPS:
 		ret = i40e_vc_get_rss_hena(vf, msg, msglen);
 		break;
-	case I40E_VIRTCHNL_OP_SET_RSS_HENA:
+	case VIRTCHNL_OP_SET_RSS_HENA:
 		ret = i40e_vc_set_rss_hena(vf, msg, msglen);
 		break;
 
-	case I40E_VIRTCHNL_OP_UNKNOWN:
+	case VIRTCHNL_OP_UNKNOWN:
 	default:
 		dev_err(&pf->pdev->dev, "Unsupported opcode %d from VF %d\n",
 			v_opcode, local_vf_id);
@@ -3218,7 +3218,7 @@ int i40e_ndo_set_vf_link_state(struct net_device *netdev, int vf_id, int link)
 {
 	struct i40e_netdev_priv *np = netdev_priv(netdev);
 	struct i40e_pf *pf = np->vsi->back;
-	struct i40e_virtchnl_pf_event pfe;
+	struct virtchnl_pf_event pfe;
 	struct i40e_hw *hw = &pf->hw;
 	struct i40e_vf *vf;
 	int abs_vf_id;
@@ -3234,7 +3234,7 @@ int i40e_ndo_set_vf_link_state(struct net_device *netdev, int vf_id, int link)
 	vf = &pf->vf[vf_id];
 	abs_vf_id = vf->vf_id + hw->func_caps.vf_base_id;
 
-	pfe.event = I40E_VIRTCHNL_EVENT_LINK_CHANGE;
+	pfe.event = VIRTCHNL_EVENT_LINK_CHANGE;
 	pfe.severity = I40E_PF_EVENT_SEVERITY_INFO;
 
 	switch (link) {
@@ -3262,7 +3262,7 @@ int i40e_ndo_set_vf_link_state(struct net_device *netdev, int vf_id, int link)
 		goto error_out;
 	}
 	/* Notify the VF of its new link state */
-	i40e_aq_send_msg_to_vf(hw, abs_vf_id, I40E_VIRTCHNL_OP_EVENT,
+	i40e_aq_send_msg_to_vf(hw, abs_vf_id, VIRTCHNL_OP_EVENT,
 			       0, (u8 *)&pfe, sizeof(pfe), NULL);
 
 error_out:
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
index 20d7c8160e9e..b57ffffce141 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
@@ -81,13 +81,13 @@ struct i40e_vf {
 	s16 vf_id;
 	/* all VF vsis connect to the same parent */
 	enum i40e_switch_element_types parent_type;
-	struct i40e_virtchnl_version_info vf_ver;
+	struct virtchnl_version_info vf_ver;
 	u32 driver_caps; /* reported by VF driver */
 
 	/* VF Port Extender (PE) stag if used */
 	u16 stag;
 
-	struct i40e_virtchnl_ether_addr default_lan_addr;
+	struct virtchnl_ether_addr default_lan_addr;
 	u16 port_vlan_id;
 	bool pf_set_mac;	/* The VMM admin set the VF MAC address */
 	bool trusted;
@@ -115,7 +115,7 @@ struct i40e_vf {
 	u16 num_vlan;
 
 	/* RDMA Client */
-	struct i40e_virtchnl_iwarp_qvlist_info *qvlist_info;
+	struct virtchnl_iwarp_qvlist_info *qvlist_info;
 };
 
 void i40e_free_vfs(struct i40e_pf *pf);
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_common.c b/drivers/net/ethernet/intel/i40evf/i40e_common.c
index 1db028ac96f4..9a7d995080b6 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_common.c
@@ -1054,7 +1054,7 @@ do_retry:
  * completion before returning.
  **/
 i40e_status i40e_aq_send_msg_to_pf(struct i40e_hw *hw,
-				enum i40e_virtchnl_ops v_opcode,
+				enum virtchnl_ops v_opcode,
 				i40e_status v_retval,
 				u8 *msg, u16 msglen,
 				struct i40e_asq_cmd_details *cmd_details)
@@ -1092,9 +1092,9 @@ i40e_status i40e_aq_send_msg_to_pf(struct i40e_hw *hw,
  * with appropriate information.
  **/
 void i40e_vf_parse_hw_config(struct i40e_hw *hw,
-			     struct i40e_virtchnl_vf_resource *msg)
+			     struct virtchnl_vf_resource *msg)
 {
-	struct i40e_virtchnl_vsi_resource *vsi_res;
+	struct virtchnl_vsi_resource *vsi_res;
 	int i;
 
 	vsi_res = &msg->vsi_res[0];
@@ -1104,7 +1104,7 @@ void i40e_vf_parse_hw_config(struct i40e_hw *hw,
 	hw->dev_caps.num_tx_qp = msg->num_queue_pairs;
 	hw->dev_caps.num_msix_vectors_vf = msg->max_vectors;
 	hw->dev_caps.dcb = msg->vf_offload_flags &
-			   I40E_VIRTCHNL_VF_OFFLOAD_L2;
+			   VIRTCHNL_VF_OFFLOAD_L2;
 	hw->dev_caps.fcoe = 0;
 	for (i = 0; i < msg->num_vsis; i++) {
 		if (vsi_res->vsi_type == I40E_VSI_SRIOV) {
@@ -1127,7 +1127,7 @@ void i40e_vf_parse_hw_config(struct i40e_hw *hw,
  **/
 i40e_status i40e_vf_reset(struct i40e_hw *hw)
 {
-	return i40e_aq_send_msg_to_pf(hw, I40E_VIRTCHNL_OP_RESET_VF,
+	return i40e_aq_send_msg_to_pf(hw, VIRTCHNL_OP_RESET_VF,
 				      0, NULL, 0, NULL);
 }
 
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_prototype.h b/drivers/net/ethernet/intel/i40evf/i40e_prototype.h
index 227905b23690..c9836bba487d 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_prototype.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_prototype.h
@@ -87,10 +87,10 @@ static inline struct i40e_rx_ptype_decoded decode_rx_desc_ptype(u8 ptype)
 
 /* i40e_common for VF drivers*/
 void i40e_vf_parse_hw_config(struct i40e_hw *hw,
-			     struct i40e_virtchnl_vf_resource *msg);
+			     struct virtchnl_vf_resource *msg);
 i40e_status i40e_vf_reset(struct i40e_hw *hw);
 i40e_status i40e_aq_send_msg_to_pf(struct i40e_hw *hw,
-				enum i40e_virtchnl_ops v_opcode,
+				enum virtchnl_ops v_opcode,
 				i40e_status v_retval,
 				u8 *msg, u16 msglen,
 				struct i40e_asq_cmd_details *cmd_details);
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h
index 75d314b1a9bb..9d8c21b36332 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf.h
+++ b/drivers/net/ethernet/intel/i40evf/i40evf.h
@@ -264,25 +264,25 @@ struct i40evf_adapter {
 	bool netdev_registered;
 	bool link_up;
 	enum i40e_aq_link_speed link_speed;
-	enum i40e_virtchnl_ops current_op;
+	enum virtchnl_ops current_op;
 #define CLIENT_ALLOWED(_a) ((_a)->vf_res ? \
 			    (_a)->vf_res->vf_offload_flags & \
-				I40E_VIRTCHNL_VF_OFFLOAD_IWARP : \
+				VIRTCHNL_VF_OFFLOAD_IWARP : \
 			    0)
 #define CLIENT_ENABLED(_a) ((_a)->cinst)
 /* RSS by the PF should be preferred over RSS via other methods. */
 #define RSS_PF(_a) ((_a)->vf_res->vf_offload_flags & \
-		    I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF)
+		    VIRTCHNL_VF_OFFLOAD_RSS_PF)
 #define RSS_AQ(_a) ((_a)->vf_res->vf_offload_flags & \
-		    I40E_VIRTCHNL_VF_OFFLOAD_RSS_AQ)
+		    VIRTCHNL_VF_OFFLOAD_RSS_AQ)
 #define RSS_REG(_a) (!((_a)->vf_res->vf_offload_flags & \
-		       (I40E_VIRTCHNL_VF_OFFLOAD_RSS_AQ | \
-			I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF)))
+		       (VIRTCHNL_VF_OFFLOAD_RSS_AQ | \
+			VIRTCHNL_VF_OFFLOAD_RSS_PF)))
 #define VLAN_ALLOWED(_a) ((_a)->vf_res->vf_offload_flags & \
-			  I40E_VIRTCHNL_VF_OFFLOAD_VLAN)
-	struct i40e_virtchnl_vf_resource *vf_res; /* incl. all VSIs */
-	struct i40e_virtchnl_vsi_resource *vsi_res; /* our LAN VSI */
-	struct i40e_virtchnl_version_info pf_version;
+			  VIRTCHNL_VF_OFFLOAD_VLAN)
+	struct virtchnl_vf_resource *vf_res; /* incl. all VSIs */
+	struct virtchnl_vsi_resource *vsi_res; /* our LAN VSI */
+	struct virtchnl_version_info pf_version;
 #define PF_IS_V11(_a) (((_a)->pf_version.major == 1) && \
 		       ((_a)->pf_version.minor == 1))
 	u16 msg_enable;
@@ -348,7 +348,7 @@ void i40evf_set_hena(struct i40evf_adapter *adapter);
 void i40evf_set_rss_key(struct i40evf_adapter *adapter);
 void i40evf_set_rss_lut(struct i40evf_adapter *adapter);
 void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
-				enum i40e_virtchnl_ops v_opcode,
+				enum virtchnl_ops v_opcode,
 				i40e_status v_retval, u8 *msg, u16 msglen);
 int i40evf_config_rss(struct i40evf_adapter *adapter);
 int i40evf_lan_add_device(struct i40evf_adapter *adapter);
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_client.c b/drivers/net/ethernet/intel/i40evf/i40evf_client.c
index ee737680a0e9..93cf5fd17d91 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_client.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_client.c
@@ -120,7 +120,7 @@ static int i40evf_client_release_qvlist(struct i40e_info *ldev)
 		return -EAGAIN;
 
 	err = i40e_aq_send_msg_to_pf(&adapter->hw,
-			I40E_VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP,
+			VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP,
 			I40E_SUCCESS, NULL, 0, NULL);
 
 	if (err)
@@ -410,7 +410,7 @@ static u32 i40evf_client_virtchnl_send(struct i40e_info *ldev,
 	if (adapter->aq_required)
 		return -EAGAIN;
 
-	err = i40e_aq_send_msg_to_pf(&adapter->hw, I40E_VIRTCHNL_OP_IWARP,
+	err = i40e_aq_send_msg_to_pf(&adapter->hw, VIRTCHNL_OP_IWARP,
 				     I40E_SUCCESS, msg, len, NULL);
 	if (err)
 		dev_err(&adapter->pdev->dev, "Unable to send iWarp message to PF, error %d, aq status %d\n",
@@ -431,7 +431,7 @@ static int i40evf_client_setup_qvlist(struct i40e_info *ldev,
 				      struct i40e_client *client,
 				      struct i40e_qvlist_info *qvlist_info)
 {
-	struct i40e_virtchnl_iwarp_qvlist_info *v_qvlist_info;
+	struct virtchnl_iwarp_qvlist_info *v_qvlist_info;
 	struct i40evf_adapter *adapter = ldev->vf;
 	struct i40e_qv_info *qv_info;
 	i40e_status err;
@@ -453,14 +453,14 @@ static int i40evf_client_setup_qvlist(struct i40e_info *ldev,
 			return -EINVAL;
 	}
 
-	v_qvlist_info = (struct i40e_virtchnl_iwarp_qvlist_info *)qvlist_info;
-	msg_size = sizeof(struct i40e_virtchnl_iwarp_qvlist_info) +
-			(sizeof(struct i40e_virtchnl_iwarp_qv_info) *
+	v_qvlist_info = (struct virtchnl_iwarp_qvlist_info *)qvlist_info;
+	msg_size = sizeof(struct virtchnl_iwarp_qvlist_info) +
+			(sizeof(struct virtchnl_iwarp_qv_info) *
 			(v_qvlist_info->num_vectors - 1));
 
-	adapter->client_pending |= BIT(I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP);
+	adapter->client_pending |= BIT(VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP);
 	err = i40e_aq_send_msg_to_pf(&adapter->hw,
-			I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP,
+			VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP,
 			I40E_SUCCESS, (u8 *)v_qvlist_info, msg_size, NULL);
 
 	if (err) {
@@ -474,7 +474,7 @@ static int i40evf_client_setup_qvlist(struct i40e_info *ldev,
 	for (i = 0; i < 5; i++) {
 		msleep(100);
 		if (!(adapter->client_pending &
-		      BIT(I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP))) {
+		      BIT(VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP))) {
 			err = 0;
 			break;
 		}
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
index ea110a730e16..5d7b613e0d62 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
@@ -1131,7 +1131,7 @@ void i40evf_down(struct i40evf_adapter *adapter)
 	if (!(adapter->flags & I40EVF_FLAG_PF_COMMS_FAILED) &&
 	    adapter->state != __I40EVF_RESETTING) {
 		/* cancel any current operation */
-		adapter->current_op = I40E_VIRTCHNL_OP_UNKNOWN;
+		adapter->current_op = VIRTCHNL_OP_UNKNOWN;
 		/* Schedule operations to close down the HW. Don't wait
 		 * here for this to complete. The watchdog is still running
 		 * and it will take care of this.
@@ -1311,7 +1311,7 @@ static int i40evf_config_rss_aq(struct i40evf_adapter *adapter)
 	struct i40e_hw *hw = &adapter->hw;
 	int ret = 0;
 
-	if (adapter->current_op != I40E_VIRTCHNL_OP_UNKNOWN) {
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
 		/* bail because we already have a command pending */
 		dev_err(&adapter->pdev->dev, "Cannot configure RSS, command %d pending\n",
 			adapter->current_op);
@@ -1410,7 +1410,7 @@ static int i40evf_init_rss(struct i40evf_adapter *adapter)
 	if (!RSS_PF(adapter)) {
 		/* Enable PCTYPES for RSS, TCP/UDP with IPv4/IPv6 */
 		if (adapter->vf_res->vf_offload_flags &
-		    I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2)
+		    VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2)
 			adapter->hena = I40E_DEFAULT_RSS_HENA_EXPANDED;
 		else
 			adapter->hena = I40E_DEFAULT_RSS_HENA;
@@ -1588,8 +1588,8 @@ static void i40evf_watchdog_task(struct work_struct *work)
 	if (adapter->flags & I40EVF_FLAG_PF_COMMS_FAILED) {
 		reg_val = rd32(hw, I40E_VFGEN_RSTAT) &
 			  I40E_VFGEN_RSTAT_VFR_STATE_MASK;
-		if ((reg_val == I40E_VFR_VFACTIVE) ||
-		    (reg_val == I40E_VFR_COMPLETED)) {
+		if ((reg_val == VIRTCHNL_VFR_VFACTIVE) ||
+		    (reg_val == VIRTCHNL_VFR_COMPLETED)) {
 			/* A chance for redemption! */
 			dev_err(&adapter->pdev->dev, "Hardware came out of reset. Attempting reinit.\n");
 			adapter->state = __I40EVF_STARTUP;
@@ -1605,7 +1605,7 @@ static void i40evf_watchdog_task(struct work_struct *work)
 			return;
 		}
 		adapter->aq_required = 0;
-		adapter->current_op = I40E_VIRTCHNL_OP_UNKNOWN;
+		adapter->current_op = VIRTCHNL_OP_UNKNOWN;
 		goto watchdog_done;
 	}
 
@@ -1621,7 +1621,7 @@ static void i40evf_watchdog_task(struct work_struct *work)
 		dev_err(&adapter->pdev->dev, "Hardware reset detected\n");
 		schedule_work(&adapter->reset_task);
 		adapter->aq_required = 0;
-		adapter->current_op = I40E_VIRTCHNL_OP_UNKNOWN;
+		adapter->current_op = VIRTCHNL_OP_UNKNOWN;
 		goto watchdog_done;
 	}
 
@@ -1854,7 +1854,7 @@ static void i40evf_reset_task(struct work_struct *work)
 
 		reg_val = rd32(hw, I40E_VFGEN_RSTAT) &
 			  I40E_VFGEN_RSTAT_VFR_STATE_MASK;
-		if (reg_val == I40E_VFR_VFACTIVE)
+		if (reg_val == VIRTCHNL_VFR_VFACTIVE)
 			break;
 	}
 
@@ -1888,7 +1888,7 @@ continue_reset:
 
 	/* kill and reinit the admin queue */
 	i40evf_shutdown_adminq(hw);
-	adapter->current_op = I40E_VIRTCHNL_OP_UNKNOWN;
+	adapter->current_op = VIRTCHNL_OP_UNKNOWN;
 	err = i40evf_init_adminq(hw);
 	if (err)
 		dev_info(&adapter->pdev->dev, "Failed to init adminq: %d\n",
@@ -1949,7 +1949,7 @@ static void i40evf_adminq_task(struct work_struct *work)
 		container_of(work, struct i40evf_adapter, adminq_task);
 	struct i40e_hw *hw = &adapter->hw;
 	struct i40e_arq_event_info event;
-	struct i40e_virtchnl_msg *v_msg;
+	struct virtchnl_msg *v_msg;
 	i40e_status ret;
 	u32 val, oldval;
 	u16 pending;
@@ -1962,7 +1962,7 @@ static void i40evf_adminq_task(struct work_struct *work)
 	if (!event.msg_buf)
 		goto out;
 
-	v_msg = (struct i40e_virtchnl_msg *)&event.desc;
+	v_msg = (struct virtchnl_msg *)&event.desc;
 	do {
 		ret = i40evf_clean_arq_element(hw, &event, &pending);
 		if (ret || !v_msg->v_opcode)
@@ -2347,7 +2347,7 @@ static netdev_features_t i40evf_fix_features(struct net_device *netdev,
 	struct i40evf_adapter *adapter = netdev_priv(netdev);
 
 	features &= ~I40EVF_VLAN_FEATURES;
-	if (adapter->vf_res->vf_offload_flags & I40E_VIRTCHNL_VF_OFFLOAD_VLAN)
+	if (adapter->vf_res->vf_offload_flags & VIRTCHNL_VF_OFFLOAD_VLAN)
 		features |= I40EVF_VLAN_FEATURES;
 	return features;
 }
@@ -2384,8 +2384,8 @@ static int i40evf_check_reset_complete(struct i40e_hw *hw)
 	for (i = 0; i < 100; i++) {
 		rstat = rd32(hw, I40E_VFGEN_RSTAT) &
 			    I40E_VFGEN_RSTAT_VFR_STATE_MASK;
-		if ((rstat == I40E_VFR_VFACTIVE) ||
-		    (rstat == I40E_VFR_COMPLETED))
+		if ((rstat == VIRTCHNL_VFR_VFACTIVE) ||
+		    (rstat == VIRTCHNL_VFR_COMPLETED))
 			return 0;
 		usleep_range(10, 20);
 	}
@@ -2401,7 +2401,7 @@ static int i40evf_check_reset_complete(struct i40e_hw *hw)
  **/
 int i40evf_process_config(struct i40evf_adapter *adapter)
 {
-	struct i40e_virtchnl_vf_resource *vfres = adapter->vf_res;
+	struct virtchnl_vf_resource *vfres = adapter->vf_res;
 	struct net_device *netdev = adapter->netdev;
 	struct i40e_vsi *vsi = &adapter->vsi;
 	int i;
@@ -2434,7 +2434,7 @@ int i40evf_process_config(struct i40evf_adapter *adapter)
 	/* advertise to stack only if offloads for encapsulated packets is
 	 * supported
 	 */
-	if (vfres->vf_offload_flags & I40E_VIRTCHNL_VF_OFFLOAD_ENCAP) {
+	if (vfres->vf_offload_flags & VIRTCHNL_VF_OFFLOAD_ENCAP) {
 		hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL	|
 				   NETIF_F_GSO_GRE		|
 				   NETIF_F_GSO_GRE_CSUM		|
@@ -2445,7 +2445,7 @@ int i40evf_process_config(struct i40evf_adapter *adapter)
 				   0;
 
 		if (!(vfres->vf_offload_flags &
-		      I40E_VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM))
+		      VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM))
 			netdev->gso_partial_features |=
 				NETIF_F_GSO_UDP_TUNNEL_CSUM;
 
@@ -2472,7 +2472,7 @@ int i40evf_process_config(struct i40evf_adapter *adapter)
 	adapter->vsi.work_limit = I40E_DEFAULT_IRQ_WORK;
 	vsi->netdev = adapter->netdev;
 	vsi->qs_handle = adapter->vsi_res->qset_handle;
-	if (vfres->vf_offload_flags & I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF) {
+	if (vfres->vf_offload_flags & VIRTCHNL_VF_OFFLOAD_RSS_PF) {
 		adapter->rss_key_size = vfres->rss_key_size;
 		adapter->rss_lut_size = vfres->rss_lut_size;
 	} else {
@@ -2558,8 +2558,8 @@ static void i40evf_init_task(struct work_struct *work)
 				dev_err(&pdev->dev, "Unsupported PF API version %d.%d, expected %d.%d\n",
 					adapter->pf_version.major,
 					adapter->pf_version.minor,
-					I40E_VIRTCHNL_VERSION_MAJOR,
-					I40E_VIRTCHNL_VERSION_MINOR);
+					VIRTCHNL_VERSION_MAJOR,
+					VIRTCHNL_VERSION_MINOR);
 			goto err;
 		}
 		err = i40evf_send_vf_config_msg(adapter);
@@ -2573,9 +2573,9 @@ static void i40evf_init_task(struct work_struct *work)
 	case __I40EVF_INIT_GET_RESOURCES:
 		/* aq msg sent, awaiting reply */
 		if (!adapter->vf_res) {
-			bufsz = sizeof(struct i40e_virtchnl_vf_resource) +
+			bufsz = sizeof(struct virtchnl_vf_resource) +
 				(I40E_MAX_VF_VSI *
-				 sizeof(struct i40e_virtchnl_vsi_resource));
+				 sizeof(struct virtchnl_vsi_resource));
 			adapter->vf_res = kzalloc(bufsz, GFP_KERNEL);
 			if (!adapter->vf_res)
 				goto err;
@@ -2606,7 +2606,7 @@ static void i40evf_init_task(struct work_struct *work)
 
 	if (i40evf_process_config(adapter))
 		goto err_alloc;
-	adapter->current_op = I40E_VIRTCHNL_OP_UNKNOWN;
+	adapter->current_op = VIRTCHNL_OP_UNKNOWN;
 
 	adapter->flags |= I40EVF_FLAG_RX_CSUM_ENABLED;
 
@@ -2644,7 +2644,7 @@ static void i40evf_init_task(struct work_struct *work)
 		goto err_sw_init;
 	i40evf_map_rings_to_vectors(adapter);
 	if (adapter->vf_res->vf_offload_flags &
-	    I40E_VIRTCHNL_VF_OFFLOAD_WB_ON_ITR)
+	    VIRTCHNL_VF_OFFLOAD_WB_ON_ITR)
 		adapter->flags |= I40EVF_FLAG_WB_ON_ITR_CAPABLE;
 
 	err = i40evf_request_misc_irq(adapter);
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
index 91b21f26f8d4..90a17b0347b9 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
@@ -42,7 +42,7 @@
  * Send message to PF and print status if failure.
  **/
 static int i40evf_send_pf_msg(struct i40evf_adapter *adapter,
-			      enum i40e_virtchnl_ops op, u8 *msg, u16 len)
+			      enum virtchnl_ops op, u8 *msg, u16 len)
 {
 	struct i40e_hw *hw = &adapter->hw;
 	i40e_status err;
@@ -68,12 +68,12 @@ static int i40evf_send_pf_msg(struct i40evf_adapter *adapter,
  **/
 int i40evf_send_api_ver(struct i40evf_adapter *adapter)
 {
-	struct i40e_virtchnl_version_info vvi;
+	struct virtchnl_version_info vvi;
 
-	vvi.major = I40E_VIRTCHNL_VERSION_MAJOR;
-	vvi.minor = I40E_VIRTCHNL_VERSION_MINOR;
+	vvi.major = VIRTCHNL_VERSION_MAJOR;
+	vvi.minor = VIRTCHNL_VERSION_MINOR;
 
-	return i40evf_send_pf_msg(adapter, I40E_VIRTCHNL_OP_VERSION, (u8 *)&vvi,
+	return i40evf_send_pf_msg(adapter, VIRTCHNL_OP_VERSION, (u8 *)&vvi,
 				  sizeof(vvi));
 }
 
@@ -88,10 +88,10 @@ int i40evf_send_api_ver(struct i40evf_adapter *adapter)
  **/
 int i40evf_verify_api_ver(struct i40evf_adapter *adapter)
 {
-	struct i40e_virtchnl_version_info *pf_vvi;
+	struct virtchnl_version_info *pf_vvi;
 	struct i40e_hw *hw = &adapter->hw;
 	struct i40e_arq_event_info event;
-	enum i40e_virtchnl_ops op;
+	enum virtchnl_ops op;
 	i40e_status err;
 
 	event.buf_len = I40EVF_MAX_AQ_BUF_SIZE;
@@ -109,8 +109,8 @@ int i40evf_verify_api_ver(struct i40evf_adapter *adapter)
 		if (err)
 			goto out_alloc;
 		op =
-		    (enum i40e_virtchnl_ops)le32_to_cpu(event.desc.cookie_high);
-		if (op == I40E_VIRTCHNL_OP_VERSION)
+		    (enum virtchnl_ops)le32_to_cpu(event.desc.cookie_high);
+		if (op == VIRTCHNL_OP_VERSION)
 			break;
 	}
 
@@ -119,19 +119,19 @@ int i40evf_verify_api_ver(struct i40evf_adapter *adapter)
 	if (err)
 		goto out_alloc;
 
-	if (op != I40E_VIRTCHNL_OP_VERSION) {
+	if (op != VIRTCHNL_OP_VERSION) {
 		dev_info(&adapter->pdev->dev, "Invalid reply type %d from PF\n",
 			op);
 		err = -EIO;
 		goto out_alloc;
 	}
 
-	pf_vvi = (struct i40e_virtchnl_version_info *)event.msg_buf;
+	pf_vvi = (struct virtchnl_version_info *)event.msg_buf;
 	adapter->pf_version = *pf_vvi;
 
-	if ((pf_vvi->major > I40E_VIRTCHNL_VERSION_MAJOR) ||
-	    ((pf_vvi->major == I40E_VIRTCHNL_VERSION_MAJOR) &&
-	     (pf_vvi->minor > I40E_VIRTCHNL_VERSION_MINOR)))
+	if ((pf_vvi->major > VIRTCHNL_VERSION_MAJOR) ||
+	    ((pf_vvi->major == VIRTCHNL_VERSION_MAJOR) &&
+	     (pf_vvi->minor > VIRTCHNL_VERSION_MINOR)))
 		err = -EIO;
 
 out_alloc:
@@ -152,25 +152,25 @@ int i40evf_send_vf_config_msg(struct i40evf_adapter *adapter)
 {
 	u32 caps;
 
-	caps = I40E_VIRTCHNL_VF_OFFLOAD_L2 |
-	       I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF |
-	       I40E_VIRTCHNL_VF_OFFLOAD_RSS_AQ |
-	       I40E_VIRTCHNL_VF_OFFLOAD_RSS_REG |
-	       I40E_VIRTCHNL_VF_OFFLOAD_VLAN |
-	       I40E_VIRTCHNL_VF_OFFLOAD_WB_ON_ITR |
-	       I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2 |
-	       I40E_VIRTCHNL_VF_OFFLOAD_ENCAP |
-	       I40E_VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM;
-
-	adapter->current_op = I40E_VIRTCHNL_OP_GET_VF_RESOURCES;
+	caps = VIRTCHNL_VF_OFFLOAD_L2 |
+	       VIRTCHNL_VF_OFFLOAD_RSS_PF |
+	       VIRTCHNL_VF_OFFLOAD_RSS_AQ |
+	       VIRTCHNL_VF_OFFLOAD_RSS_REG |
+	       VIRTCHNL_VF_OFFLOAD_VLAN |
+	       VIRTCHNL_VF_OFFLOAD_WB_ON_ITR |
+	       VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2 |
+	       VIRTCHNL_VF_OFFLOAD_ENCAP |
+	       VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM;
+
+	adapter->current_op = VIRTCHNL_OP_GET_VF_RESOURCES;
 	adapter->aq_required &= ~I40EVF_FLAG_AQ_GET_CONFIG;
 	if (PF_IS_V11(adapter))
 		return i40evf_send_pf_msg(adapter,
-					  I40E_VIRTCHNL_OP_GET_VF_RESOURCES,
+					  VIRTCHNL_OP_GET_VF_RESOURCES,
 					  (u8 *)&caps, sizeof(caps));
 	else
 		return i40evf_send_pf_msg(adapter,
-					  I40E_VIRTCHNL_OP_GET_VF_RESOURCES,
+					  VIRTCHNL_OP_GET_VF_RESOURCES,
 					  NULL, 0);
 }
 
@@ -188,12 +188,12 @@ int i40evf_get_vf_config(struct i40evf_adapter *adapter)
 {
 	struct i40e_hw *hw = &adapter->hw;
 	struct i40e_arq_event_info event;
-	enum i40e_virtchnl_ops op;
+	enum virtchnl_ops op;
 	i40e_status err;
 	u16 len;
 
-	len =  sizeof(struct i40e_virtchnl_vf_resource) +
-		I40E_MAX_VF_VSI * sizeof(struct i40e_virtchnl_vsi_resource);
+	len =  sizeof(struct virtchnl_vf_resource) +
+		I40E_MAX_VF_VSI * sizeof(struct virtchnl_vsi_resource);
 	event.buf_len = len;
 	event.msg_buf = kzalloc(event.buf_len, GFP_KERNEL);
 	if (!event.msg_buf) {
@@ -209,8 +209,8 @@ int i40evf_get_vf_config(struct i40evf_adapter *adapter)
 		if (err)
 			goto out_alloc;
 		op =
-		    (enum i40e_virtchnl_ops)le32_to_cpu(event.desc.cookie_high);
-		if (op == I40E_VIRTCHNL_OP_GET_VF_RESOURCES)
+		    (enum virtchnl_ops)le32_to_cpu(event.desc.cookie_high);
+		if (op == VIRTCHNL_OP_GET_VF_RESOURCES)
 			break;
 	}
 
@@ -232,20 +232,20 @@ out:
  **/
 void i40evf_configure_queues(struct i40evf_adapter *adapter)
 {
-	struct i40e_virtchnl_vsi_queue_config_info *vqci;
-	struct i40e_virtchnl_queue_pair_info *vqpi;
+	struct virtchnl_vsi_queue_config_info *vqci;
+	struct virtchnl_queue_pair_info *vqpi;
 	int pairs = adapter->num_active_queues;
 	int i, len, max_frame = I40E_MAX_RXBUFFER;
 
-	if (adapter->current_op != I40E_VIRTCHNL_OP_UNKNOWN) {
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
 		/* bail because we already have a command pending */
 		dev_err(&adapter->pdev->dev, "Cannot configure queues, command %d pending\n",
 			adapter->current_op);
 		return;
 	}
-	adapter->current_op = I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES;
-	len = sizeof(struct i40e_virtchnl_vsi_queue_config_info) +
-		       (sizeof(struct i40e_virtchnl_queue_pair_info) * pairs);
+	adapter->current_op = VIRTCHNL_OP_CONFIG_VSI_QUEUES;
+	len = sizeof(struct virtchnl_vsi_queue_config_info) +
+		       (sizeof(struct virtchnl_queue_pair_info) * pairs);
 	vqci = kzalloc(len, GFP_KERNEL);
 	if (!vqci)
 		return;
@@ -278,7 +278,7 @@ void i40evf_configure_queues(struct i40evf_adapter *adapter)
 	}
 
 	adapter->aq_required &= ~I40EVF_FLAG_AQ_CONFIGURE_QUEUES;
-	i40evf_send_pf_msg(adapter, I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES,
+	i40evf_send_pf_msg(adapter, VIRTCHNL_OP_CONFIG_VSI_QUEUES,
 			   (u8 *)vqci, len);
 	kfree(vqci);
 }
@@ -291,20 +291,20 @@ void i40evf_configure_queues(struct i40evf_adapter *adapter)
  **/
 void i40evf_enable_queues(struct i40evf_adapter *adapter)
 {
-	struct i40e_virtchnl_queue_select vqs;
+	struct virtchnl_queue_select vqs;
 
-	if (adapter->current_op != I40E_VIRTCHNL_OP_UNKNOWN) {
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
 		/* bail because we already have a command pending */
 		dev_err(&adapter->pdev->dev, "Cannot enable queues, command %d pending\n",
 			adapter->current_op);
 		return;
 	}
-	adapter->current_op = I40E_VIRTCHNL_OP_ENABLE_QUEUES;
+	adapter->current_op = VIRTCHNL_OP_ENABLE_QUEUES;
 	vqs.vsi_id = adapter->vsi_res->vsi_id;
 	vqs.tx_queues = BIT(adapter->num_active_queues) - 1;
 	vqs.rx_queues = vqs.tx_queues;
 	adapter->aq_required &= ~I40EVF_FLAG_AQ_ENABLE_QUEUES;
-	i40evf_send_pf_msg(adapter, I40E_VIRTCHNL_OP_ENABLE_QUEUES,
+	i40evf_send_pf_msg(adapter, VIRTCHNL_OP_ENABLE_QUEUES,
 			   (u8 *)&vqs, sizeof(vqs));
 }
 
@@ -316,20 +316,20 @@ void i40evf_enable_queues(struct i40evf_adapter *adapter)
  **/
 void i40evf_disable_queues(struct i40evf_adapter *adapter)
 {
-	struct i40e_virtchnl_queue_select vqs;
+	struct virtchnl_queue_select vqs;
 
-	if (adapter->current_op != I40E_VIRTCHNL_OP_UNKNOWN) {
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
 		/* bail because we already have a command pending */
 		dev_err(&adapter->pdev->dev, "Cannot disable queues, command %d pending\n",
 			adapter->current_op);
 		return;
 	}
-	adapter->current_op = I40E_VIRTCHNL_OP_DISABLE_QUEUES;
+	adapter->current_op = VIRTCHNL_OP_DISABLE_QUEUES;
 	vqs.vsi_id = adapter->vsi_res->vsi_id;
 	vqs.tx_queues = BIT(adapter->num_active_queues) - 1;
 	vqs.rx_queues = vqs.tx_queues;
 	adapter->aq_required &= ~I40EVF_FLAG_AQ_DISABLE_QUEUES;
-	i40evf_send_pf_msg(adapter, I40E_VIRTCHNL_OP_DISABLE_QUEUES,
+	i40evf_send_pf_msg(adapter, VIRTCHNL_OP_DISABLE_QUEUES,
 			   (u8 *)&vqs, sizeof(vqs));
 }
 
@@ -342,23 +342,23 @@ void i40evf_disable_queues(struct i40evf_adapter *adapter)
  **/
 void i40evf_map_queues(struct i40evf_adapter *adapter)
 {
-	struct i40e_virtchnl_irq_map_info *vimi;
+	struct virtchnl_irq_map_info *vimi;
 	int v_idx, q_vectors, len;
 	struct i40e_q_vector *q_vector;
 
-	if (adapter->current_op != I40E_VIRTCHNL_OP_UNKNOWN) {
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
 		/* bail because we already have a command pending */
 		dev_err(&adapter->pdev->dev, "Cannot map queues to vectors, command %d pending\n",
 			adapter->current_op);
 		return;
 	}
-	adapter->current_op = I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP;
+	adapter->current_op = VIRTCHNL_OP_CONFIG_IRQ_MAP;
 
 	q_vectors = adapter->num_msix_vectors - NONQ_VECS;
 
-	len = sizeof(struct i40e_virtchnl_irq_map_info) +
+	len = sizeof(struct virtchnl_irq_map_info) +
 	      (adapter->num_msix_vectors *
-		sizeof(struct i40e_virtchnl_vector_map));
+		sizeof(struct virtchnl_vector_map));
 	vimi = kzalloc(len, GFP_KERNEL);
 	if (!vimi)
 		return;
@@ -379,7 +379,7 @@ void i40evf_map_queues(struct i40evf_adapter *adapter)
 	vimi->vecmap[v_idx].rxq_map = 0;
 
 	adapter->aq_required &= ~I40EVF_FLAG_AQ_MAP_VECTORS;
-	i40evf_send_pf_msg(adapter, I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP,
+	i40evf_send_pf_msg(adapter, VIRTCHNL_OP_CONFIG_IRQ_MAP,
 			   (u8 *)vimi, len);
 	kfree(vimi);
 }
@@ -394,12 +394,12 @@ void i40evf_map_queues(struct i40evf_adapter *adapter)
  **/
 void i40evf_add_ether_addrs(struct i40evf_adapter *adapter)
 {
-	struct i40e_virtchnl_ether_addr_list *veal;
+	struct virtchnl_ether_addr_list *veal;
 	int len, i = 0, count = 0;
 	struct i40evf_mac_filter *f;
 	bool more = false;
 
-	if (adapter->current_op != I40E_VIRTCHNL_OP_UNKNOWN) {
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
 		/* bail because we already have a command pending */
 		dev_err(&adapter->pdev->dev, "Cannot add filters, command %d pending\n",
 			adapter->current_op);
@@ -413,17 +413,17 @@ void i40evf_add_ether_addrs(struct i40evf_adapter *adapter)
 		adapter->aq_required &= ~I40EVF_FLAG_AQ_ADD_MAC_FILTER;
 		return;
 	}
-	adapter->current_op = I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS;
+	adapter->current_op = VIRTCHNL_OP_ADD_ETH_ADDR;
 
-	len = sizeof(struct i40e_virtchnl_ether_addr_list) +
-	      (count * sizeof(struct i40e_virtchnl_ether_addr));
+	len = sizeof(struct virtchnl_ether_addr_list) +
+	      (count * sizeof(struct virtchnl_ether_addr));
 	if (len > I40EVF_MAX_AQ_BUF_SIZE) {
 		dev_warn(&adapter->pdev->dev, "Too many add MAC changes in one request\n");
 		count = (I40EVF_MAX_AQ_BUF_SIZE -
-			 sizeof(struct i40e_virtchnl_ether_addr_list)) /
-			sizeof(struct i40e_virtchnl_ether_addr);
-		len = sizeof(struct i40e_virtchnl_ether_addr_list) +
-		      (count * sizeof(struct i40e_virtchnl_ether_addr));
+			 sizeof(struct virtchnl_ether_addr_list)) /
+			sizeof(struct virtchnl_ether_addr);
+		len = sizeof(struct virtchnl_ether_addr_list) +
+		      (count * sizeof(struct virtchnl_ether_addr));
 		more = true;
 	}
 
@@ -444,7 +444,7 @@ void i40evf_add_ether_addrs(struct i40evf_adapter *adapter)
 	}
 	if (!more)
 		adapter->aq_required &= ~I40EVF_FLAG_AQ_ADD_MAC_FILTER;
-	i40evf_send_pf_msg(adapter, I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS,
+	i40evf_send_pf_msg(adapter, VIRTCHNL_OP_ADD_ETH_ADDR,
 			   (u8 *)veal, len);
 	kfree(veal);
 }
@@ -459,12 +459,12 @@ void i40evf_add_ether_addrs(struct i40evf_adapter *adapter)
  **/
 void i40evf_del_ether_addrs(struct i40evf_adapter *adapter)
 {
-	struct i40e_virtchnl_ether_addr_list *veal;
+	struct virtchnl_ether_addr_list *veal;
 	struct i40evf_mac_filter *f, *ftmp;
 	int len, i = 0, count = 0;
 	bool more = false;
 
-	if (adapter->current_op != I40E_VIRTCHNL_OP_UNKNOWN) {
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
 		/* bail because we already have a command pending */
 		dev_err(&adapter->pdev->dev, "Cannot remove filters, command %d pending\n",
 			adapter->current_op);
@@ -478,17 +478,17 @@ void i40evf_del_ether_addrs(struct i40evf_adapter *adapter)
 		adapter->aq_required &= ~I40EVF_FLAG_AQ_DEL_MAC_FILTER;
 		return;
 	}
-	adapter->current_op = I40E_VIRTCHNL_OP_DEL_ETHER_ADDRESS;
+	adapter->current_op = VIRTCHNL_OP_DEL_ETH_ADDR;
 
-	len = sizeof(struct i40e_virtchnl_ether_addr_list) +
-	      (count * sizeof(struct i40e_virtchnl_ether_addr));
+	len = sizeof(struct virtchnl_ether_addr_list) +
+	      (count * sizeof(struct virtchnl_ether_addr));
 	if (len > I40EVF_MAX_AQ_BUF_SIZE) {
 		dev_warn(&adapter->pdev->dev, "Too many delete MAC changes in one request\n");
 		count = (I40EVF_MAX_AQ_BUF_SIZE -
-			 sizeof(struct i40e_virtchnl_ether_addr_list)) /
-			sizeof(struct i40e_virtchnl_ether_addr);
-		len = sizeof(struct i40e_virtchnl_ether_addr_list) +
-		      (count * sizeof(struct i40e_virtchnl_ether_addr));
+			 sizeof(struct virtchnl_ether_addr_list)) /
+			sizeof(struct virtchnl_ether_addr);
+		len = sizeof(struct virtchnl_ether_addr_list) +
+		      (count * sizeof(struct virtchnl_ether_addr));
 		more = true;
 	}
 	veal = kzalloc(len, GFP_KERNEL);
@@ -509,7 +509,7 @@ void i40evf_del_ether_addrs(struct i40evf_adapter *adapter)
 	}
 	if (!more)
 		adapter->aq_required &= ~I40EVF_FLAG_AQ_DEL_MAC_FILTER;
-	i40evf_send_pf_msg(adapter, I40E_VIRTCHNL_OP_DEL_ETHER_ADDRESS,
+	i40evf_send_pf_msg(adapter, VIRTCHNL_OP_DEL_ETH_ADDR,
 			   (u8 *)veal, len);
 	kfree(veal);
 }
@@ -524,12 +524,12 @@ void i40evf_del_ether_addrs(struct i40evf_adapter *adapter)
  **/
 void i40evf_add_vlans(struct i40evf_adapter *adapter)
 {
-	struct i40e_virtchnl_vlan_filter_list *vvfl;
+	struct virtchnl_vlan_filter_list *vvfl;
 	int len, i = 0, count = 0;
 	struct i40evf_vlan_filter *f;
 	bool more = false;
 
-	if (adapter->current_op != I40E_VIRTCHNL_OP_UNKNOWN) {
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
 		/* bail because we already have a command pending */
 		dev_err(&adapter->pdev->dev, "Cannot add VLANs, command %d pending\n",
 			adapter->current_op);
@@ -544,16 +544,16 @@ void i40evf_add_vlans(struct i40evf_adapter *adapter)
 		adapter->aq_required &= ~I40EVF_FLAG_AQ_ADD_VLAN_FILTER;
 		return;
 	}
-	adapter->current_op = I40E_VIRTCHNL_OP_ADD_VLAN;
+	adapter->current_op = VIRTCHNL_OP_ADD_VLAN;
 
-	len = sizeof(struct i40e_virtchnl_vlan_filter_list) +
+	len = sizeof(struct virtchnl_vlan_filter_list) +
 	      (count * sizeof(u16));
 	if (len > I40EVF_MAX_AQ_BUF_SIZE) {
 		dev_warn(&adapter->pdev->dev, "Too many add VLAN changes in one request\n");
 		count = (I40EVF_MAX_AQ_BUF_SIZE -
-			 sizeof(struct i40e_virtchnl_vlan_filter_list)) /
+			 sizeof(struct virtchnl_vlan_filter_list)) /
 			sizeof(u16);
-		len = sizeof(struct i40e_virtchnl_vlan_filter_list) +
+		len = sizeof(struct virtchnl_vlan_filter_list) +
 		      (count * sizeof(u16));
 		more = true;
 	}
@@ -574,7 +574,7 @@ void i40evf_add_vlans(struct i40evf_adapter *adapter)
 	}
 	if (!more)
 		adapter->aq_required &= ~I40EVF_FLAG_AQ_ADD_VLAN_FILTER;
-	i40evf_send_pf_msg(adapter, I40E_VIRTCHNL_OP_ADD_VLAN, (u8 *)vvfl, len);
+	i40evf_send_pf_msg(adapter, VIRTCHNL_OP_ADD_VLAN, (u8 *)vvfl, len);
 	kfree(vvfl);
 }
 
@@ -588,12 +588,12 @@ void i40evf_add_vlans(struct i40evf_adapter *adapter)
  **/
 void i40evf_del_vlans(struct i40evf_adapter *adapter)
 {
-	struct i40e_virtchnl_vlan_filter_list *vvfl;
+	struct virtchnl_vlan_filter_list *vvfl;
 	struct i40evf_vlan_filter *f, *ftmp;
 	int len, i = 0, count = 0;
 	bool more = false;
 
-	if (adapter->current_op != I40E_VIRTCHNL_OP_UNKNOWN) {
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
 		/* bail because we already have a command pending */
 		dev_err(&adapter->pdev->dev, "Cannot remove VLANs, command %d pending\n",
 			adapter->current_op);
@@ -608,16 +608,16 @@ void i40evf_del_vlans(struct i40evf_adapter *adapter)
 		adapter->aq_required &= ~I40EVF_FLAG_AQ_DEL_VLAN_FILTER;
 		return;
 	}
-	adapter->current_op = I40E_VIRTCHNL_OP_DEL_VLAN;
+	adapter->current_op = VIRTCHNL_OP_DEL_VLAN;
 
-	len = sizeof(struct i40e_virtchnl_vlan_filter_list) +
+	len = sizeof(struct virtchnl_vlan_filter_list) +
 	      (count * sizeof(u16));
 	if (len > I40EVF_MAX_AQ_BUF_SIZE) {
 		dev_warn(&adapter->pdev->dev, "Too many delete VLAN changes in one request\n");
 		count = (I40EVF_MAX_AQ_BUF_SIZE -
-			 sizeof(struct i40e_virtchnl_vlan_filter_list)) /
+			 sizeof(struct virtchnl_vlan_filter_list)) /
 			sizeof(u16);
-		len = sizeof(struct i40e_virtchnl_vlan_filter_list) +
+		len = sizeof(struct virtchnl_vlan_filter_list) +
 		      (count * sizeof(u16));
 		more = true;
 	}
@@ -639,7 +639,7 @@ void i40evf_del_vlans(struct i40evf_adapter *adapter)
 	}
 	if (!more)
 		adapter->aq_required &= ~I40EVF_FLAG_AQ_DEL_VLAN_FILTER;
-	i40evf_send_pf_msg(adapter, I40E_VIRTCHNL_OP_DEL_VLAN, (u8 *)vvfl, len);
+	i40evf_send_pf_msg(adapter, VIRTCHNL_OP_DEL_VLAN, (u8 *)vvfl, len);
 	kfree(vvfl);
 }
 
@@ -652,10 +652,10 @@ void i40evf_del_vlans(struct i40evf_adapter *adapter)
  **/
 void i40evf_set_promiscuous(struct i40evf_adapter *adapter, int flags)
 {
-	struct i40e_virtchnl_promisc_info vpi;
+	struct virtchnl_promisc_info vpi;
 	int promisc_all;
 
-	if (adapter->current_op != I40E_VIRTCHNL_OP_UNKNOWN) {
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
 		/* bail because we already have a command pending */
 		dev_err(&adapter->pdev->dev, "Cannot set promiscuous mode, command %d pending\n",
 			adapter->current_op);
@@ -682,10 +682,10 @@ void i40evf_set_promiscuous(struct i40evf_adapter *adapter, int flags)
 		dev_info(&adapter->pdev->dev, "Leaving promiscuous mode\n");
 	}
 
-	adapter->current_op = I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE;
+	adapter->current_op = VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE;
 	vpi.vsi_id = adapter->vsi_res->vsi_id;
 	vpi.flags = flags;
-	i40evf_send_pf_msg(adapter, I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE,
+	i40evf_send_pf_msg(adapter, VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE,
 			   (u8 *)&vpi, sizeof(vpi));
 }
 
@@ -697,19 +697,19 @@ void i40evf_set_promiscuous(struct i40evf_adapter *adapter, int flags)
  **/
 void i40evf_request_stats(struct i40evf_adapter *adapter)
 {
-	struct i40e_virtchnl_queue_select vqs;
+	struct virtchnl_queue_select vqs;
 
-	if (adapter->current_op != I40E_VIRTCHNL_OP_UNKNOWN) {
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
 		/* no error message, this isn't crucial */
 		return;
 	}
-	adapter->current_op = I40E_VIRTCHNL_OP_GET_STATS;
+	adapter->current_op = VIRTCHNL_OP_GET_STATS;
 	vqs.vsi_id = adapter->vsi_res->vsi_id;
 	/* queue maps are ignored for this message - only the vsi is used */
-	if (i40evf_send_pf_msg(adapter, I40E_VIRTCHNL_OP_GET_STATS,
+	if (i40evf_send_pf_msg(adapter, VIRTCHNL_OP_GET_STATS,
 			       (u8 *)&vqs, sizeof(vqs)))
 		/* if the request failed, don't lock out others */
-		adapter->current_op = I40E_VIRTCHNL_OP_UNKNOWN;
+		adapter->current_op = VIRTCHNL_OP_UNKNOWN;
 }
 
 /**
@@ -720,15 +720,15 @@ void i40evf_request_stats(struct i40evf_adapter *adapter)
  **/
 void i40evf_get_hena(struct i40evf_adapter *adapter)
 {
-	if (adapter->current_op != I40E_VIRTCHNL_OP_UNKNOWN) {
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
 		/* bail because we already have a command pending */
 		dev_err(&adapter->pdev->dev, "Cannot get RSS hash capabilities, command %d pending\n",
 			adapter->current_op);
 		return;
 	}
-	adapter->current_op = I40E_VIRTCHNL_OP_GET_RSS_HENA_CAPS;
+	adapter->current_op = VIRTCHNL_OP_GET_RSS_HENA_CAPS;
 	adapter->aq_required &= ~I40EVF_FLAG_AQ_GET_HENA;
-	i40evf_send_pf_msg(adapter, I40E_VIRTCHNL_OP_GET_RSS_HENA_CAPS,
+	i40evf_send_pf_msg(adapter, VIRTCHNL_OP_GET_RSS_HENA_CAPS,
 			   NULL, 0);
 }
 
@@ -740,18 +740,18 @@ void i40evf_get_hena(struct i40evf_adapter *adapter)
  **/
 void i40evf_set_hena(struct i40evf_adapter *adapter)
 {
-	struct i40e_virtchnl_rss_hena vrh;
+	struct virtchnl_rss_hena vrh;
 
-	if (adapter->current_op != I40E_VIRTCHNL_OP_UNKNOWN) {
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
 		/* bail because we already have a command pending */
 		dev_err(&adapter->pdev->dev, "Cannot set RSS hash enable, command %d pending\n",
 			adapter->current_op);
 		return;
 	}
 	vrh.hena = adapter->hena;
-	adapter->current_op = I40E_VIRTCHNL_OP_SET_RSS_HENA;
+	adapter->current_op = VIRTCHNL_OP_SET_RSS_HENA;
 	adapter->aq_required &= ~I40EVF_FLAG_AQ_SET_HENA;
-	i40evf_send_pf_msg(adapter, I40E_VIRTCHNL_OP_SET_RSS_HENA,
+	i40evf_send_pf_msg(adapter, VIRTCHNL_OP_SET_RSS_HENA,
 			   (u8 *)&vrh, sizeof(vrh));
 }
 
@@ -763,16 +763,16 @@ void i40evf_set_hena(struct i40evf_adapter *adapter)
  **/
 void i40evf_set_rss_key(struct i40evf_adapter *adapter)
 {
-	struct i40e_virtchnl_rss_key *vrk;
+	struct virtchnl_rss_key *vrk;
 	int len;
 
-	if (adapter->current_op != I40E_VIRTCHNL_OP_UNKNOWN) {
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
 		/* bail because we already have a command pending */
 		dev_err(&adapter->pdev->dev, "Cannot set RSS key, command %d pending\n",
 			adapter->current_op);
 		return;
 	}
-	len = sizeof(struct i40e_virtchnl_rss_key) +
+	len = sizeof(struct virtchnl_rss_key) +
 	      (adapter->rss_key_size * sizeof(u8)) - 1;
 	vrk = kzalloc(len, GFP_KERNEL);
 	if (!vrk)
@@ -781,9 +781,9 @@ void i40evf_set_rss_key(struct i40evf_adapter *adapter)
 	vrk->key_len = adapter->rss_key_size;
 	memcpy(vrk->key, adapter->rss_key, adapter->rss_key_size);
 
-	adapter->current_op = I40E_VIRTCHNL_OP_CONFIG_RSS_KEY;
+	adapter->current_op = VIRTCHNL_OP_CONFIG_RSS_KEY;
 	adapter->aq_required &= ~I40EVF_FLAG_AQ_SET_RSS_KEY;
-	i40evf_send_pf_msg(adapter, I40E_VIRTCHNL_OP_CONFIG_RSS_KEY,
+	i40evf_send_pf_msg(adapter, VIRTCHNL_OP_CONFIG_RSS_KEY,
 			   (u8 *)vrk, len);
 	kfree(vrk);
 }
@@ -796,16 +796,16 @@ void i40evf_set_rss_key(struct i40evf_adapter *adapter)
  **/
 void i40evf_set_rss_lut(struct i40evf_adapter *adapter)
 {
-	struct i40e_virtchnl_rss_lut *vrl;
+	struct virtchnl_rss_lut *vrl;
 	int len;
 
-	if (adapter->current_op != I40E_VIRTCHNL_OP_UNKNOWN) {
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
 		/* bail because we already have a command pending */
 		dev_err(&adapter->pdev->dev, "Cannot set RSS LUT, command %d pending\n",
 			adapter->current_op);
 		return;
 	}
-	len = sizeof(struct i40e_virtchnl_rss_lut) +
+	len = sizeof(struct virtchnl_rss_lut) +
 	      (adapter->rss_lut_size * sizeof(u8)) - 1;
 	vrl = kzalloc(len, GFP_KERNEL);
 	if (!vrl)
@@ -813,9 +813,9 @@ void i40evf_set_rss_lut(struct i40evf_adapter *adapter)
 	vrl->vsi_id = adapter->vsi.id;
 	vrl->lut_entries = adapter->rss_lut_size;
 	memcpy(vrl->lut, adapter->rss_lut, adapter->rss_lut_size);
-	adapter->current_op = I40E_VIRTCHNL_OP_CONFIG_RSS_LUT;
+	adapter->current_op = VIRTCHNL_OP_CONFIG_RSS_LUT;
 	adapter->aq_required &= ~I40EVF_FLAG_AQ_SET_RSS_LUT;
-	i40evf_send_pf_msg(adapter, I40E_VIRTCHNL_OP_CONFIG_RSS_LUT,
+	i40evf_send_pf_msg(adapter, VIRTCHNL_OP_CONFIG_RSS_LUT,
 			   (u8 *)vrl, len);
 	kfree(vrl);
 }
@@ -871,8 +871,8 @@ static void i40evf_print_link_message(struct i40evf_adapter *adapter)
 void i40evf_request_reset(struct i40evf_adapter *adapter)
 {
 	/* Don't check CURRENT_OP - this is always higher priority */
-	i40evf_send_pf_msg(adapter, I40E_VIRTCHNL_OP_RESET_VF, NULL, 0);
-	adapter->current_op = I40E_VIRTCHNL_OP_UNKNOWN;
+	i40evf_send_pf_msg(adapter, VIRTCHNL_OP_RESET_VF, NULL, 0);
+	adapter->current_op = VIRTCHNL_OP_UNKNOWN;
 }
 
 /**
@@ -888,17 +888,17 @@ void i40evf_request_reset(struct i40evf_adapter *adapter)
  * This function handles the reply messages.
  **/
 void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
-				enum i40e_virtchnl_ops v_opcode,
+				enum virtchnl_ops v_opcode,
 				i40e_status v_retval,
 				u8 *msg, u16 msglen)
 {
 	struct net_device *netdev = adapter->netdev;
 
-	if (v_opcode == I40E_VIRTCHNL_OP_EVENT) {
-		struct i40e_virtchnl_pf_event *vpe =
-			(struct i40e_virtchnl_pf_event *)msg;
+	if (v_opcode == VIRTCHNL_OP_EVENT) {
+		struct virtchnl_pf_event *vpe =
+			(struct virtchnl_pf_event *)msg;
 		switch (vpe->event) {
-		case I40E_VIRTCHNL_EVENT_LINK_CHANGE:
+		case VIRTCHNL_EVENT_LINK_CHANGE:
 			adapter->link_speed =
 				vpe->event_data.link_event.link_speed;
 			if (adapter->link_up !=
@@ -915,7 +915,7 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
 				i40evf_print_link_message(adapter);
 			}
 			break;
-		case I40E_VIRTCHNL_EVENT_RESET_IMPENDING:
+		case VIRTCHNL_EVENT_RESET_IMPENDING:
 			dev_info(&adapter->pdev->dev, "PF reset warning received\n");
 			if (!(adapter->flags & I40EVF_FLAG_RESET_PENDING)) {
 				adapter->flags |= I40EVF_FLAG_RESET_PENDING;
@@ -932,19 +932,19 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
 	}
 	if (v_retval) {
 		switch (v_opcode) {
-		case I40E_VIRTCHNL_OP_ADD_VLAN:
+		case VIRTCHNL_OP_ADD_VLAN:
 			dev_err(&adapter->pdev->dev, "Failed to add VLAN filter, error %s\n",
 				i40evf_stat_str(&adapter->hw, v_retval));
 			break;
-		case I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS:
+		case VIRTCHNL_OP_ADD_ETH_ADDR:
 			dev_err(&adapter->pdev->dev, "Failed to add MAC filter, error %s\n",
 				i40evf_stat_str(&adapter->hw, v_retval));
 			break;
-		case I40E_VIRTCHNL_OP_DEL_VLAN:
+		case VIRTCHNL_OP_DEL_VLAN:
 			dev_err(&adapter->pdev->dev, "Failed to delete VLAN filter, error %s\n",
 				i40evf_stat_str(&adapter->hw, v_retval));
 			break;
-		case I40E_VIRTCHNL_OP_DEL_ETHER_ADDRESS:
+		case VIRTCHNL_OP_DEL_ETH_ADDR:
 			dev_err(&adapter->pdev->dev, "Failed to delete MAC filter, error %s\n",
 				i40evf_stat_str(&adapter->hw, v_retval));
 			break;
@@ -956,7 +956,7 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
 		}
 	}
 	switch (v_opcode) {
-	case I40E_VIRTCHNL_OP_GET_STATS: {
+	case VIRTCHNL_OP_GET_STATS: {
 		struct i40e_eth_stats *stats =
 			(struct i40e_eth_stats *)msg;
 		netdev->stats.rx_packets = stats->rx_unicast +
@@ -973,10 +973,10 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
 		adapter->current_stats = *stats;
 		}
 		break;
-	case I40E_VIRTCHNL_OP_GET_VF_RESOURCES: {
-		u16 len = sizeof(struct i40e_virtchnl_vf_resource) +
+	case VIRTCHNL_OP_GET_VF_RESOURCES: {
+		u16 len = sizeof(struct virtchnl_vf_resource) +
 			  I40E_MAX_VF_VSI *
-			  sizeof(struct i40e_virtchnl_vsi_resource);
+			  sizeof(struct virtchnl_vsi_resource);
 		memcpy(adapter->vf_res, msg, min(msglen, len));
 		i40e_vf_parse_hw_config(&adapter->hw, adapter->vf_res);
 		/* restore current mac address */
@@ -984,18 +984,18 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
 		i40evf_process_config(adapter);
 		}
 		break;
-	case I40E_VIRTCHNL_OP_ENABLE_QUEUES:
+	case VIRTCHNL_OP_ENABLE_QUEUES:
 		/* enable transmits */
 		i40evf_irq_enable(adapter, true);
 		break;
-	case I40E_VIRTCHNL_OP_DISABLE_QUEUES:
+	case VIRTCHNL_OP_DISABLE_QUEUES:
 		i40evf_free_all_tx_resources(adapter);
 		i40evf_free_all_rx_resources(adapter);
 		if (adapter->state == __I40EVF_DOWN_PENDING)
 			adapter->state = __I40EVF_DOWN;
 		break;
-	case I40E_VIRTCHNL_OP_VERSION:
-	case I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP:
+	case VIRTCHNL_OP_VERSION:
+	case VIRTCHNL_OP_CONFIG_IRQ_MAP:
 		/* Don't display an error if we get these out of sequence.
 		 * If the firmware needed to get kicked, we'll get these and
 		 * it's no problem.
@@ -1003,7 +1003,7 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
 		if (v_opcode != adapter->current_op)
 			return;
 		break;
-	case I40E_VIRTCHNL_OP_IWARP:
+	case VIRTCHNL_OP_IWARP:
 		/* Gobble zero-length replies from the PF. They indicate that
 		 * a previous message was received OK, and the client doesn't
 		 * care about that.
@@ -1013,13 +1013,13 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
 						     msg, msglen);
 		break;
 
-	case I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP:
+	case VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP:
 		adapter->client_pending &=
-				~(BIT(I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP));
+				~(BIT(VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP));
 		break;
-	case I40E_VIRTCHNL_OP_GET_RSS_HENA_CAPS: {
-		struct i40e_virtchnl_rss_hena *vrh =
-			(struct i40e_virtchnl_rss_hena *)msg;
+	case VIRTCHNL_OP_GET_RSS_HENA_CAPS: {
+		struct virtchnl_rss_hena *vrh =
+			(struct virtchnl_rss_hena *)msg;
 		if (msglen == sizeof(*vrh))
 			adapter->hena = vrh->hena;
 		else
@@ -1033,5 +1033,5 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
 				 adapter->current_op, v_opcode);
 		break;
 	} /* switch v_opcode */
-	adapter->current_op = I40E_VIRTCHNL_OP_UNKNOWN;
+	adapter->current_op = VIRTCHNL_OP_UNKNOWN;
 }
diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h
index 7d6da3ac24f4..a8b616121960 100644
--- a/include/linux/avf/virtchnl.h
+++ b/include/linux/avf/virtchnl.h
@@ -24,8 +24,8 @@
  *
  ******************************************************************************/
 
-#ifndef _I40E_VIRTCHNL_H_
-#define _I40E_VIRTCHNL_H_
+#ifndef _VIRTCHNL_H_
+#define _VIRTCHNL_H_
 
 /* Description:
  * This header file describes the VF-PF communication protocol used
@@ -56,36 +56,36 @@
 /* Opcodes for VF-PF communication. These are placed in the v_opcode field
  * of the virtchnl_msg structure.
  */
-enum i40e_virtchnl_ops {
+enum virtchnl_ops {
 /* The PF sends status change events to VFs using
- * the I40E_VIRTCHNL_OP_EVENT opcode.
+ * the VIRTCHNL_OP_EVENT opcode.
  * VFs send requests to the PF using the other ops.
  */
-	I40E_VIRTCHNL_OP_UNKNOWN = 0,
-	I40E_VIRTCHNL_OP_VERSION = 1, /* must ALWAYS be 1 */
-	I40E_VIRTCHNL_OP_RESET_VF = 2,
-	I40E_VIRTCHNL_OP_GET_VF_RESOURCES = 3,
-	I40E_VIRTCHNL_OP_CONFIG_TX_QUEUE = 4,
-	I40E_VIRTCHNL_OP_CONFIG_RX_QUEUE = 5,
-	I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES = 6,
-	I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP = 7,
-	I40E_VIRTCHNL_OP_ENABLE_QUEUES = 8,
-	I40E_VIRTCHNL_OP_DISABLE_QUEUES = 9,
-	I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS = 10,
-	I40E_VIRTCHNL_OP_DEL_ETHER_ADDRESS = 11,
-	I40E_VIRTCHNL_OP_ADD_VLAN = 12,
-	I40E_VIRTCHNL_OP_DEL_VLAN = 13,
-	I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE = 14,
-	I40E_VIRTCHNL_OP_GET_STATS = 15,
-	I40E_VIRTCHNL_OP_RSVD = 16,
-	I40E_VIRTCHNL_OP_EVENT = 17, /* must ALWAYS be 17 */
-	I40E_VIRTCHNL_OP_IWARP = 20,
-	I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP = 21,
-	I40E_VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP = 22,
-	I40E_VIRTCHNL_OP_CONFIG_RSS_KEY = 23,
-	I40E_VIRTCHNL_OP_CONFIG_RSS_LUT = 24,
-	I40E_VIRTCHNL_OP_GET_RSS_HENA_CAPS = 25,
-	I40E_VIRTCHNL_OP_SET_RSS_HENA = 26,
+	VIRTCHNL_OP_UNKNOWN = 0,
+	VIRTCHNL_OP_VERSION = 1, /* must ALWAYS be 1 */
+	VIRTCHNL_OP_RESET_VF = 2,
+	VIRTCHNL_OP_GET_VF_RESOURCES = 3,
+	VIRTCHNL_OP_CONFIG_TX_QUEUE = 4,
+	VIRTCHNL_OP_CONFIG_RX_QUEUE = 5,
+	VIRTCHNL_OP_CONFIG_VSI_QUEUES = 6,
+	VIRTCHNL_OP_CONFIG_IRQ_MAP = 7,
+	VIRTCHNL_OP_ENABLE_QUEUES = 8,
+	VIRTCHNL_OP_DISABLE_QUEUES = 9,
+	VIRTCHNL_OP_ADD_ETH_ADDR = 10,
+	VIRTCHNL_OP_DEL_ETH_ADDR = 11,
+	VIRTCHNL_OP_ADD_VLAN = 12,
+	VIRTCHNL_OP_DEL_VLAN = 13,
+	VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE = 14,
+	VIRTCHNL_OP_GET_STATS = 15,
+	VIRTCHNL_OP_RSVD = 16,
+	VIRTCHNL_OP_EVENT = 17, /* must ALWAYS be 17 */
+	VIRTCHNL_OP_IWARP = 20,
+	VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP = 21,
+	VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP = 22,
+	VIRTCHNL_OP_CONFIG_RSS_KEY = 23,
+	VIRTCHNL_OP_CONFIG_RSS_LUT = 24,
+	VIRTCHNL_OP_GET_RSS_HENA_CAPS = 25,
+	VIRTCHNL_OP_SET_RSS_HENA = 26,
 
 };
 
@@ -93,16 +93,16 @@ enum i40e_virtchnl_ops {
  * descriptor. All other data is passed in external buffers.
  */
 
-struct i40e_virtchnl_msg {
+struct virtchnl_msg {
 	u8 pad[8];			 /* AQ flags/opcode/len/retval fields */
-	enum i40e_virtchnl_ops v_opcode; /* avoid confusion with desc->opcode */
+	enum virtchnl_ops v_opcode; /* avoid confusion with desc->opcode */
 	i40e_status v_retval;  /* ditto for desc->retval */
 	u32 vfid;			 /* used by PF when sending to VF */
 };
 
 /* Message descriptions and data structures.*/
 
-/* I40E_VIRTCHNL_OP_VERSION
+/* VIRTCHNL_OP_VERSION
  * VF posts its version number to the PF. PF responds with its version number
  * in the same format, along with a return code.
  * Reply from PF has its major/minor versions also in param0 and param1.
@@ -114,16 +114,16 @@ struct i40e_virtchnl_msg {
  * changes in the API. The PF must always respond to this message without
  * error regardless of version mismatch.
  */
-#define I40E_VIRTCHNL_VERSION_MAJOR		1
-#define I40E_VIRTCHNL_VERSION_MINOR		1
-#define I40E_VIRTCHNL_VERSION_MINOR_NO_VF_CAPS	0
+#define VIRTCHNL_VERSION_MAJOR		1
+#define VIRTCHNL_VERSION_MINOR		1
+#define VIRTCHNL_VERSION_MINOR_NO_VF_CAPS	0
 
-struct i40e_virtchnl_version_info {
+struct virtchnl_version_info {
 	u32 major;
 	u32 minor;
 };
 
-/* I40E_VIRTCHNL_OP_RESET_VF
+/* VIRTCHNL_OP_RESET_VF
  * VF sends this request to PF with no parameters
  * PF does NOT respond! VF driver must delay then poll VFGEN_RSTAT register
  * until reset completion is indicated. The admin queue must be reinitialized
@@ -135,15 +135,15 @@ struct i40e_virtchnl_version_info {
  * are cleared.
  */
 
-/* I40E_VIRTCHNL_OP_GET_VF_RESOURCES
+/* VIRTCHNL_OP_GET_VF_RESOURCES
  * Version 1.0 VF sends this request to PF with no parameters
  * Version 1.1 VF sends this request to PF with u32 bitmap of its capabilities
  * PF responds with an indirect message containing
- * i40e_virtchnl_vf_resource and one or more
- * i40e_virtchnl_vsi_resource structures.
+ * virtchnl_vf_resource and one or more
+ * virtchnl_vsi_resource structures.
  */
 
-struct i40e_virtchnl_vsi_resource {
+struct virtchnl_vsi_resource {
 	u16 vsi_id;
 	u16 num_queue_pairs;
 	enum i40e_vsi_type vsi_type;
@@ -151,23 +151,24 @@ struct i40e_virtchnl_vsi_resource {
 	u8 default_mac_addr[ETH_ALEN];
 };
 /* VF offload flags */
-#define I40E_VIRTCHNL_VF_OFFLOAD_L2		0x00000001
-#define I40E_VIRTCHNL_VF_OFFLOAD_IWARP		0x00000002
-#define I40E_VIRTCHNL_VF_OFFLOAD_RSS_AQ		0x00000008
-#define I40E_VIRTCHNL_VF_OFFLOAD_RSS_REG	0x00000010
-#define I40E_VIRTCHNL_VF_OFFLOAD_WB_ON_ITR	0x00000020
-#define I40E_VIRTCHNL_VF_OFFLOAD_VLAN		0x00010000
-#define I40E_VIRTCHNL_VF_OFFLOAD_RX_POLLING	0x00020000
-#define I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2	0x00040000
-#define I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF		0X00080000
-#define I40E_VIRTCHNL_VF_OFFLOAD_ENCAP		0X00100000
-#define I40E_VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM	0X00200000
-
-#define I40E_VF_BASE_MODE_OFFLOADS (I40E_VIRTCHNL_VF_OFFLOAD_L2 | \
-				    I40E_VIRTCHNL_VF_OFFLOAD_VLAN | \
-				    I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF)
-
-struct i40e_virtchnl_vf_resource {
+#define VIRTCHNL_VF_OFFLOAD_L2		0x00000001
+#define VIRTCHNL_VF_OFFLOAD_IWARP		0x00000002
+#define VIRTCHNL_VF_OFFLOAD_FCOE		0x00000004
+#define VIRTCHNL_VF_OFFLOAD_RSS_AQ		0x00000008
+#define VIRTCHNL_VF_OFFLOAD_RSS_REG	0x00000010
+#define VIRTCHNL_VF_OFFLOAD_WB_ON_ITR	0x00000020
+#define VIRTCHNL_VF_OFFLOAD_VLAN		0x00010000
+#define VIRTCHNL_VF_OFFLOAD_RX_POLLING	0x00020000
+#define VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2	0x00040000
+#define VIRTCHNL_VF_OFFLOAD_RSS_PF		0X00080000
+#define VIRTCHNL_VF_OFFLOAD_ENCAP		0X00100000
+#define VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM	0X00200000
+
+#define I40E_VF_BASE_MODE_OFFLOADS (VIRTCHNL_VF_OFFLOAD_L2 | \
+				    VIRTCHNL_VF_OFFLOAD_VLAN | \
+				    VIRTCHNL_VF_OFFLOAD_RSS_PF)
+
+struct virtchnl_vf_resource {
 	u16 num_vsis;
 	u16 num_queue_pairs;
 	u16 max_vectors;
@@ -177,17 +178,17 @@ struct i40e_virtchnl_vf_resource {
 	u32 rss_key_size;
 	u32 rss_lut_size;
 
-	struct i40e_virtchnl_vsi_resource vsi_res[1];
+	struct virtchnl_vsi_resource vsi_res[1];
 };
 
-/* I40E_VIRTCHNL_OP_CONFIG_TX_QUEUE
+/* VIRTCHNL_OP_CONFIG_TX_QUEUE
  * VF sends this message to set up parameters for one TX queue.
- * External data buffer contains one instance of i40e_virtchnl_txq_info.
+ * External data buffer contains one instance of virtchnl_txq_info.
  * PF configures requested queue and returns a status code.
  */
 
 /* Tx queue config info */
-struct i40e_virtchnl_txq_info {
+struct virtchnl_txq_info {
 	u16 vsi_id;
 	u16 queue_id;
 	u16 ring_len;		/* number of descriptors, multiple of 8 */
@@ -196,14 +197,14 @@ struct i40e_virtchnl_txq_info {
 	u64 dma_headwb_addr;
 };
 
-/* I40E_VIRTCHNL_OP_CONFIG_RX_QUEUE
+/* VIRTCHNL_OP_CONFIG_RX_QUEUE
  * VF sends this message to set up parameters for one RX queue.
- * External data buffer contains one instance of i40e_virtchnl_rxq_info.
+ * External data buffer contains one instance of virtchnl_rxq_info.
  * PF configures requested queue and returns a status code.
  */
 
 /* Rx queue config info */
-struct i40e_virtchnl_rxq_info {
+struct virtchnl_rxq_info {
 	u16 vsi_id;
 	u16 queue_id;
 	u32 ring_len;		/* number of descriptors, multiple of 32 */
@@ -215,33 +216,33 @@ struct i40e_virtchnl_rxq_info {
 	enum i40e_hmc_obj_rx_hsplit_0 rx_split_pos;
 };
 
-/* I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES
+/* VIRTCHNL_OP_CONFIG_VSI_QUEUES
  * VF sends this message to set parameters for all active TX and RX queues
  * associated with the specified VSI.
  * PF configures queues and returns status.
  * If the number of queues specified is greater than the number of queues
  * associated with the VSI, an error is returned and no queues are configured.
  */
-struct i40e_virtchnl_queue_pair_info {
+struct virtchnl_queue_pair_info {
 	/* NOTE: vsi_id and queue_id should be identical for both queues. */
-	struct i40e_virtchnl_txq_info txq;
-	struct i40e_virtchnl_rxq_info rxq;
+	struct virtchnl_txq_info txq;
+	struct virtchnl_rxq_info rxq;
 };
 
-struct i40e_virtchnl_vsi_queue_config_info {
+struct virtchnl_vsi_queue_config_info {
 	u16 vsi_id;
 	u16 num_queue_pairs;
-	struct i40e_virtchnl_queue_pair_info qpair[1];
+	struct virtchnl_queue_pair_info qpair[1];
 };
 
-/* I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP
+/* VIRTCHNL_OP_CONFIG_IRQ_MAP
  * VF uses this message to map vectors to queues.
  * The rxq_map and txq_map fields are bitmaps used to indicate which queues
  * are to be associated with the specified vector.
  * The "other" causes are always mapped to vector 0.
  * PF configures interrupt mapping and returns status.
  */
-struct i40e_virtchnl_vector_map {
+struct virtchnl_vector_map {
 	u16 vsi_id;
 	u16 vector_id;
 	u16 rxq_map;
@@ -250,75 +251,75 @@ struct i40e_virtchnl_vector_map {
 	u16 txitr_idx;
 };
 
-struct i40e_virtchnl_irq_map_info {
+struct virtchnl_irq_map_info {
 	u16 num_vectors;
-	struct i40e_virtchnl_vector_map vecmap[1];
+	struct virtchnl_vector_map vecmap[1];
 };
 
-/* I40E_VIRTCHNL_OP_ENABLE_QUEUES
- * I40E_VIRTCHNL_OP_DISABLE_QUEUES
+/* VIRTCHNL_OP_ENABLE_QUEUES
+ * VIRTCHNL_OP_DISABLE_QUEUES
  * VF sends these message to enable or disable TX/RX queue pairs.
  * The queues fields are bitmaps indicating which queues to act upon.
  * (Currently, we only support 16 queues per VF, but we make the field
  * u32 to allow for expansion.)
  * PF performs requested action and returns status.
  */
-struct i40e_virtchnl_queue_select {
+struct virtchnl_queue_select {
 	u16 vsi_id;
 	u16 pad;
 	u32 rx_queues;
 	u32 tx_queues;
 };
 
-/* I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS
+/* VIRTCHNL_OP_ADD_ETH_ADDR
  * VF sends this message in order to add one or more unicast or multicast
  * address filters for the specified VSI.
  * PF adds the filters and returns status.
  */
 
-/* I40E_VIRTCHNL_OP_DEL_ETHER_ADDRESS
+/* VIRTCHNL_OP_DEL_ETH_ADDR
  * VF sends this message in order to remove one or more unicast or multicast
  * filters for the specified VSI.
  * PF removes the filters and returns status.
  */
 
-struct i40e_virtchnl_ether_addr {
+struct virtchnl_ether_addr {
 	u8 addr[ETH_ALEN];
 	u8 pad[2];
 };
 
-struct i40e_virtchnl_ether_addr_list {
+struct virtchnl_ether_addr_list {
 	u16 vsi_id;
 	u16 num_elements;
-	struct i40e_virtchnl_ether_addr list[1];
+	struct virtchnl_ether_addr list[1];
 };
 
-/* I40E_VIRTCHNL_OP_ADD_VLAN
+/* VIRTCHNL_OP_ADD_VLAN
  * VF sends this message to add one or more VLAN tag filters for receives.
  * PF adds the filters and returns status.
  * If a port VLAN is configured by the PF, this operation will return an
  * error to the VF.
  */
 
-/* I40E_VIRTCHNL_OP_DEL_VLAN
+/* VIRTCHNL_OP_DEL_VLAN
  * VF sends this message to remove one or more VLAN tag filters for receives.
  * PF removes the filters and returns status.
  * If a port VLAN is configured by the PF, this operation will return an
  * error to the VF.
  */
 
-struct i40e_virtchnl_vlan_filter_list {
+struct virtchnl_vlan_filter_list {
 	u16 vsi_id;
 	u16 num_elements;
 	u16 vlan_id[1];
 };
 
-/* I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE
+/* VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE
  * VF sends VSI id and flags.
  * PF returns status code in retval.
  * Note: we assume that broadcast accept mode is always enabled.
  */
-struct i40e_virtchnl_promisc_info {
+struct virtchnl_promisc_info {
 	u16 vsi_id;
 	u16 flags;
 };
@@ -326,63 +327,63 @@ struct i40e_virtchnl_promisc_info {
 #define I40E_FLAG_VF_UNICAST_PROMISC	0x00000001
 #define I40E_FLAG_VF_MULTICAST_PROMISC	0x00000002
 
-/* I40E_VIRTCHNL_OP_GET_STATS
+/* VIRTCHNL_OP_GET_STATS
  * VF sends this message to request stats for the selected VSI. VF uses
- * the i40e_virtchnl_queue_select struct to specify the VSI. The queue_id
+ * the virtchnl_queue_select struct to specify the VSI. The queue_id
  * field is ignored by the PF.
  *
  * PF replies with struct i40e_eth_stats in an external buffer.
  */
 
-/* I40E_VIRTCHNL_OP_CONFIG_RSS_KEY
- * I40E_VIRTCHNL_OP_CONFIG_RSS_LUT
+/* VIRTCHNL_OP_CONFIG_RSS_KEY
+ * VIRTCHNL_OP_CONFIG_RSS_LUT
  * VF sends these messages to configure RSS. Only supported if both PF
- * and VF drivers set the I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF bit during
+ * and VF drivers set the VIRTCHNL_VF_OFFLOAD_RSS_PF bit during
  * configuration negotiation. If this is the case, then the RSS fields in
  * the VF resource struct are valid.
  * Both the key and LUT are initialized to 0 by the PF, meaning that
  * RSS is effectively disabled until set up by the VF.
  */
-struct i40e_virtchnl_rss_key {
+struct virtchnl_rss_key {
 	u16 vsi_id;
 	u16 key_len;
 	u8 key[1];         /* RSS hash key, packed bytes */
 };
 
-struct i40e_virtchnl_rss_lut {
+struct virtchnl_rss_lut {
 	u16 vsi_id;
 	u16 lut_entries;
 	u8 lut[1];        /* RSS lookup table*/
 };
 
-/* I40E_VIRTCHNL_OP_GET_RSS_HENA_CAPS
- * I40E_VIRTCHNL_OP_SET_RSS_HENA
+/* VIRTCHNL_OP_GET_RSS_HENA_CAPS
+ * VIRTCHNL_OP_SET_RSS_HENA
  * VF sends these messages to get and set the hash filter enable bits for RSS.
  * By default, the PF sets these to all possible traffic types that the
  * hardware supports. The VF can query this value if it wants to change the
  * traffic types that are hashed by the hardware.
  * Traffic types are defined in the i40e_filter_pctype enum in i40e_type.h
  */
-struct i40e_virtchnl_rss_hena {
+struct virtchnl_rss_hena {
 	u64 hena;
 };
 
-/* I40E_VIRTCHNL_OP_EVENT
+/* VIRTCHNL_OP_EVENT
  * PF sends this message to inform the VF driver of events that may affect it.
  * No direct response is expected from the VF, though it may generate other
  * messages in response to this one.
  */
-enum i40e_virtchnl_event_codes {
-	I40E_VIRTCHNL_EVENT_UNKNOWN = 0,
-	I40E_VIRTCHNL_EVENT_LINK_CHANGE,
-	I40E_VIRTCHNL_EVENT_RESET_IMPENDING,
-	I40E_VIRTCHNL_EVENT_PF_DRIVER_CLOSE,
+enum virtchnl_event_codes {
+	VIRTCHNL_EVENT_UNKNOWN = 0,
+	VIRTCHNL_EVENT_LINK_CHANGE,
+	VIRTCHNL_EVENT_RESET_IMPENDING,
+	VIRTCHNL_EVENT_PF_DRIVER_CLOSE,
 };
 #define I40E_PF_EVENT_SEVERITY_INFO		0
 #define I40E_PF_EVENT_SEVERITY_CERTAIN_DOOM	255
 
-struct i40e_virtchnl_pf_event {
-	enum i40e_virtchnl_event_codes event;
+struct virtchnl_pf_event {
+	enum virtchnl_event_codes event;
 	union {
 		struct {
 			enum i40e_aq_link_speed link_speed;
@@ -393,7 +394,7 @@ struct i40e_virtchnl_pf_event {
 	int severity;
 };
 
-/* I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP
+/* VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP
  * VF uses this message to request PF to map IWARP vectors to IWARP queues.
  * The request for this originates from the VF IWARP driver through
  * a client interface between VF LAN and VF IWARP driver.
@@ -412,16 +413,16 @@ struct i40e_virtchnl_pf_event {
 #define I40E_QUEUE_TYPE_PE_AEQ  0x80
 #define I40E_QUEUE_INVALID_IDX  0xFFFF
 
-struct i40e_virtchnl_iwarp_qv_info {
+struct virtchnl_iwarp_qv_info {
 	u32 v_idx; /* msix_vector */
 	u16 ceq_idx;
 	u16 aeq_idx;
 	u8 itr_idx;
 };
 
-struct i40e_virtchnl_iwarp_qvlist_info {
+struct virtchnl_iwarp_qvlist_info {
 	u32 num_vectors;
-	struct i40e_virtchnl_iwarp_qv_info qv_info[1];
+	struct virtchnl_iwarp_qv_info qv_info[1];
 };
 
 /* VF reset states - these are written into the RSTAT register:
@@ -436,11 +437,11 @@ struct i40e_virtchnl_iwarp_qvlist_info {
  * is in a reset state, it will return DEADBEEF, which, when masked
  * will result in 3.
  */
-enum i40e_vfr_states {
-	I40E_VFR_INPROGRESS = 0,
-	I40E_VFR_COMPLETED,
-	I40E_VFR_VFACTIVE,
-	I40E_VFR_UNKNOWN,
+enum virtchnl_vfr_states {
+	VIRTCHNL_VFR_INPROGRESS = 0,
+	VIRTCHNL_VFR_COMPLETED,
+	VIRTCHNL_VFR_VFACTIVE,
+	VIRTCHNL_VFR_UNKNOWN,
 };
 
-#endif /* _I40E_VIRTCHNL_H_ */
+#endif /* _VIRTCHNL_H_ */
-- 
cgit v1.2.3


From eedcfef85b15ae02c488625556702594a618c616 Mon Sep 17 00:00:00 2001
From: Jesse Brandeburg <jesse.brandeburg@intel.com>
Date: Thu, 11 May 2017 11:23:13 -0700
Subject: virtchnl: convert to new macros

As part of the conversion, change the arguments
to VF_IS_V1[01] macros and move them to virtchnl.h

Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 6 +++---
 drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h | 3 ---
 include/linux/avf/virtchnl.h                       | 3 +++
 3 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index d7fcc4ffa393..923026a255c0 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -1481,7 +1481,7 @@ static int i40e_vc_get_version_msg(struct i40e_vf *vf, u8 *msg)
 
 	vf->vf_ver = *(struct virtchnl_version_info *)msg;
 	/* VFs running the 1.0 API expect to get 1.0 back or they will cry. */
-	if (VF_IS_V10(vf))
+	if (VF_IS_V10(&vf->vf_ver))
 		info.minor = VIRTCHNL_VERSION_MINOR_NO_VF_CAPS;
 	return i40e_vc_send_msg_to_vf(vf, VIRTCHNL_OP_VERSION,
 				      I40E_SUCCESS, (u8 *)&info,
@@ -1521,7 +1521,7 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg)
 		len = 0;
 		goto err;
 	}
-	if (VF_IS_V11(vf))
+	if (VF_IS_V11(&vf->vf_ver))
 		vf->driver_caps = *(u32 *)msg;
 	else
 		vf->driver_caps = VIRTCHNL_VF_OFFLOAD_L2 |
@@ -2557,7 +2557,7 @@ static int i40e_vc_validate_vf_msg(struct i40e_vf *vf, u32 v_opcode,
 	case VIRTCHNL_OP_RESET_VF:
 		break;
 	case VIRTCHNL_OP_GET_VF_RESOURCES:
-		if (VF_IS_V11(vf))
+		if (VF_IS_V11(&vf->vf_ver))
 			valid_len = sizeof(u32);
 		break;
 	case VIRTCHNL_OP_CONFIG_TX_QUEUE:
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
index b57ffffce141..1f4b0c504368 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
@@ -40,9 +40,6 @@
 #define I40E_VLAN_MASK			0xFFF
 #define I40E_PRIORITY_MASK		0x7000
 
-#define VF_IS_V10(_v) (((_v)->vf_ver.major == 1) && ((_v)->vf_ver.minor == 0))
-#define VF_IS_V11(_v) (((_v)->vf_ver.major == 1) && ((_v)->vf_ver.minor == 1))
-
 /* Various queue ctrls */
 enum i40e_queue_ctrl {
 	I40E_QUEUE_CTRL_UNKNOWN = 0,
diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h
index a8b616121960..8ffa670c2ffd 100644
--- a/include/linux/avf/virtchnl.h
+++ b/include/linux/avf/virtchnl.h
@@ -123,6 +123,9 @@ struct virtchnl_version_info {
 	u32 minor;
 };
 
+#define VF_IS_V10(_v) (((_v)->major == 1) && ((_v)->minor == 0))
+#define VF_IS_V11(_ver) (((_ver)->major == 1) && ((_ver)->minor == 1))
+
 /* VIRTCHNL_OP_RESET_VF
  * VF sends this request to PF with no parameters
  * PF does NOT respond! VF driver must delay then poll VFGEN_RSTAT register
-- 
cgit v1.2.3


From 764430ce6f8c38d7ed3b6d2cfe9450b9d3c78809 Mon Sep 17 00:00:00 2001
From: Jesse Brandeburg <jesse.brandeburg@intel.com>
Date: Thu, 11 May 2017 11:23:14 -0700
Subject: i40e/virtchnl: refactor code for validate checks

This change updates the arguments passed to the validate function
and fixes the caller, as well as uses the new return values added to
virtchnl.h

One other minor tweak, remove a duplicate set to zero of valid_len.

This is in preparation for moving the function to virtchnl.h.

Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 36 ++++++++++++----------
 include/linux/avf/virtchnl.h                       | 17 ++++++++++
 2 files changed, 37 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 923026a255c0..61f948c587ad 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -2536,15 +2536,16 @@ err:
 
 /**
  * i40e_vc_validate_vf_msg
- * @vf: pointer to the VF info
+ * @ver: Virtchnl version info
+ * @v_opcode: Opcode for the message
  * @msg: pointer to the msg buffer
  * @msglen: msg length
- * @msghndl: msg handle
  *
- * validate msg
+ * validate msg format against struct for each opcode
  **/
-static int i40e_vc_validate_vf_msg(struct i40e_vf *vf, u32 v_opcode,
-				   u32 v_retval, u8 *msg, u16 msglen)
+static int
+i40e_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode,
+			u8 *msg, u16 msglen)
 {
 	bool err_msg_format = false;
 	int valid_len = 0;
@@ -2557,7 +2558,7 @@ static int i40e_vc_validate_vf_msg(struct i40e_vf *vf, u32 v_opcode,
 	case VIRTCHNL_OP_RESET_VF:
 		break;
 	case VIRTCHNL_OP_GET_VF_RESOURCES:
-		if (VF_IS_V11(&vf->vf_ver))
+		if (VF_IS_V11(ver))
 			valid_len = sizeof(u32);
 		break;
 	case VIRTCHNL_OP_CONFIG_TX_QUEUE:
@@ -2633,7 +2634,6 @@ static int i40e_vc_validate_vf_msg(struct i40e_vf *vf, u32 v_opcode,
 			err_msg_format = true;
 		break;
 	case VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP:
-		valid_len = 0;
 		break;
 	case VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP:
 		valid_len = sizeof(struct virtchnl_iwarp_qvlist_info);
@@ -2673,15 +2673,13 @@ static int i40e_vc_validate_vf_msg(struct i40e_vf *vf, u32 v_opcode,
 	case VIRTCHNL_OP_EVENT:
 	case VIRTCHNL_OP_UNKNOWN:
 	default:
-		return -EPERM;
+		return VIRTCHNL_ERR_PARAM;
 	}
 	/* few more checks */
-	if ((valid_len != msglen) || (err_msg_format)) {
-		i40e_vc_send_resp_to_vf(vf, v_opcode, I40E_ERR_PARAM);
-		return -EINVAL;
-	} else {
-		return 0;
-	}
+	if ((valid_len != msglen) || (err_msg_format))
+		return VIRTCHNL_STATUS_ERR_OPCODE_MISMATCH;
+
+	return 0;
 }
 
 /**
@@ -2713,7 +2711,7 @@ int i40e_vc_process_vf_msg(struct i40e_pf *pf, s16 vf_id, u32 v_opcode,
 		return I40E_ERR_PARAM;
 
 	/* perform basic checks on the msg */
-	ret = i40e_vc_validate_vf_msg(vf, v_opcode, v_retval, msg, msglen);
+	ret = i40e_vc_validate_vf_msg(&vf->vf_ver, v_opcode, msg, msglen);
 
 	/* perform additional checks specific to this driver */
 	if (v_opcode == VIRTCHNL_OP_CONFIG_RSS_KEY) {
@@ -2729,9 +2727,15 @@ int i40e_vc_process_vf_msg(struct i40e_pf *pf, s16 vf_id, u32 v_opcode,
 	}
 
 	if (ret) {
+		i40e_vc_send_resp_to_vf(vf, v_opcode, I40E_ERR_PARAM);
 		dev_err(&pf->pdev->dev, "Invalid message from VF %d, opcode %d, len %d\n",
 			local_vf_id, v_opcode, msglen);
-		return ret;
+		switch (ret) {
+		case VIRTCHNL_ERR_PARAM:
+			return -EPERM;
+		default:
+			return -EINVAL;
+		}
 	}
 
 	switch (v_opcode) {
diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h
index 8ffa670c2ffd..f1cc1f02036e 100644
--- a/include/linux/avf/virtchnl.h
+++ b/include/linux/avf/virtchnl.h
@@ -53,6 +53,23 @@
  * its queues, optionally add MAC and VLAN filters, and process traffic.
  */
 
+/* START GENERIC DEFINES
+ * Need to ensure the following enums and defines hold the same meaning and
+ * value in current and future projects
+ */
+
+/* Error Codes */
+enum virtchnl_status_code {
+	VIRTCHNL_STATUS_SUCCESS				= 0,
+	VIRTCHNL_ERR_PARAM				= -5,
+	VIRTCHNL_STATUS_ERR_OPCODE_MISMATCH		= -38,
+	VIRTCHNL_STATUS_ERR_CQP_COMPL_ERROR		= -39,
+	VIRTCHNL_STATUS_ERR_INVALID_VF_ID		= -40,
+	VIRTCHNL_STATUS_NOT_SUPPORTED			= -64,
+};
+
+/* END GENERIC DEFINES */
+
 /* Opcodes for VF-PF communication. These are placed in the v_opcode field
  * of the virtchnl_msg structure.
  */
-- 
cgit v1.2.3


From f0adc6e831baaef16577ea2af5eb3e91fd4efef4 Mon Sep 17 00:00:00 2001
From: Jesse Brandeburg <jesse.brandeburg@intel.com>
Date: Thu, 11 May 2017 11:23:15 -0700
Subject: i40evf/virtchnl: whitespace cleanups

This patch fixes up a bunch of whitespace issues introduced
by the previous automated change of name from i40e to virtchnl.

Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c  | 12 ++++--------
 drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c |  3 +--
 include/linux/avf/virtchnl.h                        |  6 +++---
 3 files changed, 8 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 61f948c587ad..422cccf0de86 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -1485,8 +1485,7 @@ static int i40e_vc_get_version_msg(struct i40e_vf *vf, u8 *msg)
 		info.minor = VIRTCHNL_VERSION_MINOR_NO_VF_CAPS;
 	return i40e_vc_send_msg_to_vf(vf, VIRTCHNL_OP_VERSION,
 				      I40E_SUCCESS, (u8 *)&info,
-				      sizeof(struct
-					     virtchnl_version_info));
+				      sizeof(struct virtchnl_version_info));
 }
 
 /**
@@ -1544,11 +1543,9 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg)
 	} else {
 		if ((pf->flags & I40E_FLAG_RSS_AQ_CAPABLE) &&
 		    (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_AQ))
-			vfres->vf_offload_flags |=
-					VIRTCHNL_VF_OFFLOAD_RSS_AQ;
+			vfres->vf_offload_flags |= VIRTCHNL_VF_OFFLOAD_RSS_AQ;
 		else
-			vfres->vf_offload_flags |=
-					VIRTCHNL_VF_OFFLOAD_RSS_REG;
+			vfres->vf_offload_flags |= VIRTCHNL_VF_OFFLOAD_RSS_REG;
 	}
 
 	if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE) {
@@ -2530,8 +2527,7 @@ static int i40e_vc_set_rss_hena(struct i40e_vf *vf, u8 *msg, u16 msglen)
 
 	/* send the response to the VF */
 err:
-	return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_SET_RSS_HENA,
-				       aq_ret);
+	return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_SET_RSS_HENA, aq_ret);
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
index 90a17b0347b9..d9f040900373 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
@@ -1018,8 +1018,7 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
 				~(BIT(VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP));
 		break;
 	case VIRTCHNL_OP_GET_RSS_HENA_CAPS: {
-		struct virtchnl_rss_hena *vrh =
-			(struct virtchnl_rss_hena *)msg;
+		struct virtchnl_rss_hena *vrh = (struct virtchnl_rss_hena *)msg;
 		if (msglen == sizeof(*vrh))
 			adapter->hena = vrh->hena;
 		else
diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h
index f1cc1f02036e..73970bd439fe 100644
--- a/include/linux/avf/virtchnl.h
+++ b/include/linux/avf/virtchnl.h
@@ -175,10 +175,10 @@ struct virtchnl_vsi_resource {
 #define VIRTCHNL_VF_OFFLOAD_IWARP		0x00000002
 #define VIRTCHNL_VF_OFFLOAD_FCOE		0x00000004
 #define VIRTCHNL_VF_OFFLOAD_RSS_AQ		0x00000008
-#define VIRTCHNL_VF_OFFLOAD_RSS_REG	0x00000010
-#define VIRTCHNL_VF_OFFLOAD_WB_ON_ITR	0x00000020
+#define VIRTCHNL_VF_OFFLOAD_RSS_REG		0x00000010
+#define VIRTCHNL_VF_OFFLOAD_WB_ON_ITR		0x00000020
 #define VIRTCHNL_VF_OFFLOAD_VLAN		0x00010000
-#define VIRTCHNL_VF_OFFLOAD_RX_POLLING	0x00020000
+#define VIRTCHNL_VF_OFFLOAD_RX_POLLING		0x00020000
 #define VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2	0x00040000
 #define VIRTCHNL_VF_OFFLOAD_RSS_PF		0X00080000
 #define VIRTCHNL_VF_OFFLOAD_ENCAP		0X00100000
-- 
cgit v1.2.3


From ff3f4cc267f6f39c2fc525c8918c929809defbfa Mon Sep 17 00:00:00 2001
From: Jesse Brandeburg <jesse.brandeburg@intel.com>
Date: Thu, 11 May 2017 11:23:16 -0700
Subject: virtchnl: finish conversion to virtchnl interface

This patch implements the complete version of the virtchnl.h file
with final renames, and fixes the related code in i40e and i40evf.

It also expands comments, and adds details on the usage of
certain fields.

In addition, due to the changes a couple of casts are needed
to prevent errors found by sparse after renaming some fields.

Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c |  18 +--
 drivers/net/ethernet/intel/i40evf/i40e_common.c    |   2 +-
 drivers/net/ethernet/intel/i40evf/i40evf.h         |   2 +-
 drivers/net/ethernet/intel/i40evf/i40evf_main.c    |  11 +-
 .../net/ethernet/intel/i40evf/i40evf_virtchnl.c    |   6 +-
 include/linux/avf/virtchnl.h                       | 128 +++++++++++++--------
 6 files changed, 102 insertions(+), 65 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 422cccf0de86..352d9d2ef3d2 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -77,7 +77,7 @@ static void i40e_vc_notify_vf_link_state(struct i40e_vf *vf)
 	int abs_vf_id = vf->vf_id + (int)hw->func_caps.vf_base_id;
 
 	pfe.event = VIRTCHNL_EVENT_LINK_CHANGE;
-	pfe.severity = I40E_PF_EVENT_SEVERITY_INFO;
+	pfe.severity = PF_EVENT_SEVERITY_INFO;
 	if (vf->link_forced) {
 		pfe.event_data.link_event.link_status = vf->link_up;
 		pfe.event_data.link_event.link_speed =
@@ -85,7 +85,8 @@ static void i40e_vc_notify_vf_link_state(struct i40e_vf *vf)
 	} else {
 		pfe.event_data.link_event.link_status =
 			ls->link_info & I40E_AQ_LINK_UP;
-		pfe.event_data.link_event.link_speed = ls->link_speed;
+		pfe.event_data.link_event.link_speed =
+			(enum virtchnl_link_speed)ls->link_speed;
 	}
 	i40e_aq_send_msg_to_vf(hw, abs_vf_id, VIRTCHNL_OP_EVENT,
 			       0, (u8 *)&pfe, sizeof(pfe), NULL);
@@ -116,7 +117,7 @@ void i40e_vc_notify_reset(struct i40e_pf *pf)
 	struct virtchnl_pf_event pfe;
 
 	pfe.event = VIRTCHNL_EVENT_RESET_IMPENDING;
-	pfe.severity = I40E_PF_EVENT_SEVERITY_CERTAIN_DOOM;
+	pfe.severity = PF_EVENT_SEVERITY_CERTAIN_DOOM;
 	i40e_vc_vf_broadcast(pf, VIRTCHNL_OP_EVENT, 0,
 			     (u8 *)&pfe, sizeof(struct virtchnl_pf_event));
 }
@@ -144,7 +145,7 @@ void i40e_vc_notify_vf_reset(struct i40e_vf *vf)
 	abs_vf_id = vf->vf_id + (int)vf->pf->hw.func_caps.vf_base_id;
 
 	pfe.event = VIRTCHNL_EVENT_RESET_IMPENDING;
-	pfe.severity = I40E_PF_EVENT_SEVERITY_CERTAIN_DOOM;
+	pfe.severity = PF_EVENT_SEVERITY_CERTAIN_DOOM;
 	i40e_aq_send_msg_to_vf(&vf->pf->hw, abs_vf_id, VIRTCHNL_OP_EVENT,
 			       0, (u8 *)&pfe,
 			       sizeof(struct virtchnl_pf_event), NULL);
@@ -1586,7 +1587,7 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg)
 
 	if (vf->lan_vsi_idx) {
 		vfres->vsi_res[0].vsi_id = vf->lan_vsi_id;
-		vfres->vsi_res[0].vsi_type = I40E_VSI_SRIOV;
+		vfres->vsi_res[0].vsi_type = VIRTCHNL_VSI_SRIOV;
 		vfres->vsi_res[0].num_queue_pairs = vsi->alloc_queue_pairs;
 		/* VFs only use TC 0 */
 		vfres->vsi_res[0].qset_handle
@@ -1680,7 +1681,7 @@ static int i40e_vc_config_promiscuous_mode_msg(struct i40e_vf *vf,
 		goto error_param;
 	}
 	/* Multicast promiscuous handling*/
-	if (info->flags & I40E_FLAG_VF_MULTICAST_PROMISC)
+	if (info->flags & FLAG_VF_MULTICAST_PROMISC)
 		allmulti = true;
 
 	if (vf->port_vlan_id) {
@@ -1731,7 +1732,7 @@ static int i40e_vc_config_promiscuous_mode_msg(struct i40e_vf *vf,
 			clear_bit(I40E_VF_STATE_MC_PROMISC, &vf->vf_states);
 	}
 
-	if (info->flags & I40E_FLAG_VF_UNICAST_PROMISC)
+	if (info->flags & FLAG_VF_UNICAST_PROMISC)
 		alluni = true;
 	if (vf->port_vlan_id) {
 		aq_ret = i40e_aq_set_vsi_uc_promisc_on_vlan(hw, vsi->seid,
@@ -3241,7 +3242,7 @@ int i40e_ndo_set_vf_link_state(struct net_device *netdev, int vf_id, int link)
 	abs_vf_id = vf->vf_id + hw->func_caps.vf_base_id;
 
 	pfe.event = VIRTCHNL_EVENT_LINK_CHANGE;
-	pfe.severity = I40E_PF_EVENT_SEVERITY_INFO;
+	pfe.severity = PF_EVENT_SEVERITY_INFO;
 
 	switch (link) {
 	case IFLA_VF_LINK_STATE_AUTO:
@@ -3249,6 +3250,7 @@ int i40e_ndo_set_vf_link_state(struct net_device *netdev, int vf_id, int link)
 		pfe.event_data.link_event.link_status =
 			pf->hw.phy.link_info.link_info & I40E_AQ_LINK_UP;
 		pfe.event_data.link_event.link_speed =
+			(enum virtchnl_link_speed)
 			pf->hw.phy.link_info.link_speed;
 		break;
 	case IFLA_VF_LINK_STATE_ENABLE:
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_common.c b/drivers/net/ethernet/intel/i40evf/i40e_common.c
index 9a7d995080b6..9dec7753911c 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_common.c
@@ -1107,7 +1107,7 @@ void i40e_vf_parse_hw_config(struct i40e_hw *hw,
 			   VIRTCHNL_VF_OFFLOAD_L2;
 	hw->dev_caps.fcoe = 0;
 	for (i = 0; i < msg->num_vsis; i++) {
-		if (vsi_res->vsi_type == I40E_VSI_SRIOV) {
+		if (vsi_res->vsi_type == VIRTCHNL_VSI_SRIOV) {
 			ether_addr_copy(hw->mac.perm_addr,
 					vsi_res->default_mac_addr);
 			ether_addr_copy(hw->mac.addr,
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h
index 9d8c21b36332..6cc92089fecb 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf.h
+++ b/drivers/net/ethernet/intel/i40evf/i40evf.h
@@ -263,7 +263,7 @@ struct i40evf_adapter {
 	struct work_struct watchdog_task;
 	bool netdev_registered;
 	bool link_up;
-	enum i40e_aq_link_speed link_speed;
+	enum virtchnl_link_speed link_speed;
 	enum virtchnl_ops current_op;
 #define CLIENT_ALLOWED(_a) ((_a)->vf_res ? \
 			    (_a)->vf_res->vf_offload_flags & \
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
index 5d7b613e0d62..1b00274de530 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
@@ -1707,13 +1707,13 @@ static void i40evf_watchdog_task(struct work_struct *work)
 	}
 
 	if (adapter->aq_required & I40EVF_FLAG_AQ_REQUEST_PROMISC) {
-		i40evf_set_promiscuous(adapter, I40E_FLAG_VF_UNICAST_PROMISC |
-				       I40E_FLAG_VF_MULTICAST_PROMISC);
+		i40evf_set_promiscuous(adapter, FLAG_VF_UNICAST_PROMISC |
+				       FLAG_VF_MULTICAST_PROMISC);
 		goto watchdog_done;
 	}
 
 	if (adapter->aq_required & I40EVF_FLAG_AQ_REQUEST_ALLMULTI) {
-		i40evf_set_promiscuous(adapter, I40E_FLAG_VF_MULTICAST_PROMISC);
+		i40evf_set_promiscuous(adapter, FLAG_VF_MULTICAST_PROMISC);
 		goto watchdog_done;
 	}
 
@@ -1969,7 +1969,8 @@ static void i40evf_adminq_task(struct work_struct *work)
 			break; /* No event to process or error cleaning ARQ */
 
 		i40evf_virtchnl_completion(adapter, v_msg->v_opcode,
-					   v_msg->v_retval, event.msg_buf,
+					   (i40e_status)v_msg->v_retval,
+					   event.msg_buf,
 					   event.msg_len);
 		if (pending != 0)
 			memset(event.msg_buf, 0, I40EVF_MAX_AQ_BUF_SIZE);
@@ -2410,7 +2411,7 @@ int i40evf_process_config(struct i40evf_adapter *adapter)
 
 	/* got VF config message back from PF, now we can parse it */
 	for (i = 0; i < vfres->num_vsis; i++) {
-		if (vfres->vsi_res[i].vsi_type == I40E_VSI_SRIOV)
+		if (vfres->vsi_res[i].vsi_type == VIRTCHNL_VSI_SRIOV)
 			adapter->vsi_res = &vfres->vsi_res[i];
 	}
 	if (!adapter->vsi_res) {
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
index d9f040900373..d2bb250a71af 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
@@ -662,15 +662,15 @@ void i40evf_set_promiscuous(struct i40evf_adapter *adapter, int flags)
 		return;
 	}
 
-	promisc_all = I40E_FLAG_VF_UNICAST_PROMISC |
-		      I40E_FLAG_VF_MULTICAST_PROMISC;
+	promisc_all = FLAG_VF_UNICAST_PROMISC |
+		      FLAG_VF_MULTICAST_PROMISC;
 	if ((flags & promisc_all) == promisc_all) {
 		adapter->flags |= I40EVF_FLAG_PROMISC_ON;
 		adapter->aq_required &= ~I40EVF_FLAG_AQ_REQUEST_PROMISC;
 		dev_info(&adapter->pdev->dev, "Entering promiscuous mode\n");
 	}
 
-	if (flags & I40E_FLAG_VF_MULTICAST_PROMISC) {
+	if (flags & FLAG_VF_MULTICAST_PROMISC) {
 		adapter->flags |= I40EVF_FLAG_ALLMULTI_ON;
 		adapter->aq_required &= ~I40EVF_FLAG_AQ_REQUEST_ALLMULTI;
 		dev_info(&adapter->pdev->dev, "Entering multicast promiscuous mode\n");
diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h
index 73970bd439fe..6c6fbb492b5d 100644
--- a/include/linux/avf/virtchnl.h
+++ b/include/linux/avf/virtchnl.h
@@ -29,28 +29,29 @@
 
 /* Description:
  * This header file describes the VF-PF communication protocol used
- * by the various i40e drivers.
+ * by the drivers for all devices starting from our 40G product line
  *
  * Admin queue buffer usage:
- * desc->opcode is always i40e_aqc_opc_send_msg_to_pf
+ * desc->opcode is always aqc_opc_send_msg_to_pf
  * flags, retval, datalen, and data addr are all used normally.
- * Firmware copies the cookie fields when sending messages between the PF and
- * VF, but uses all other fields internally. Due to this limitation, we
- * must send all messages as "indirect", i.e. using an external buffer.
+ * The Firmware copies the cookie fields when sending messages between the
+ * PF and VF, but uses all other fields internally. Due to this limitation,
+ * we must send all messages as "indirect", i.e. using an external buffer.
  *
- * All the vsi indexes are relative to the VF. Each VF can have maximum of
+ * All the VSI indexes are relative to the VF. Each VF can have maximum of
  * three VSIs. All the queue indexes are relative to the VSI.  Each VF can
  * have a maximum of sixteen queues for all of its VSIs.
  *
  * The PF is required to return a status code in v_retval for all messages
- * except RESET_VF, which does not require any response. The return value is of
- * i40e_status_code type, defined in the i40e_type.h.
+ * except RESET_VF, which does not require any response. The return value
+ * is of status_code type, defined in the shared type.h.
  *
- * In general, VF driver initialization should roughly follow the order of these
- * opcodes. The VF driver must first validate the API version of the PF driver,
- * then request a reset, then get resources, then configure queues and
- * interrupts. After these operations are complete, the VF driver may start
- * its queues, optionally add MAC and VLAN filters, and process traffic.
+ * In general, VF driver initialization should roughly follow the order of
+ * these opcodes. The VF driver must first validate the API version of the
+ * PF driver, then request a reset, then get resources, then configure
+ * queues and interrupts. After these operations are complete, the VF
+ * driver may start its queues, optionally add MAC and VLAN filters, and
+ * process traffic.
  */
 
 /* START GENERIC DEFINES
@@ -68,6 +69,33 @@ enum virtchnl_status_code {
 	VIRTCHNL_STATUS_NOT_SUPPORTED			= -64,
 };
 
+#define VIRTCHNL_LINK_SPEED_100MB_SHIFT		0x1
+#define VIRTCHNL_LINK_SPEED_1000MB_SHIFT	0x2
+#define VIRTCHNL_LINK_SPEED_10GB_SHIFT		0x3
+#define VIRTCHNL_LINK_SPEED_40GB_SHIFT		0x4
+#define VIRTCHNL_LINK_SPEED_20GB_SHIFT		0x5
+#define VIRTCHNL_LINK_SPEED_25GB_SHIFT		0x6
+
+enum virtchnl_link_speed {
+	VIRTCHNL_LINK_SPEED_UNKNOWN	= 0,
+	VIRTCHNL_LINK_SPEED_100MB	= BIT(VIRTCHNL_LINK_SPEED_100MB_SHIFT),
+	VIRTCHNL_LINK_SPEED_1GB		= BIT(VIRTCHNL_LINK_SPEED_1000MB_SHIFT),
+	VIRTCHNL_LINK_SPEED_10GB	= BIT(VIRTCHNL_LINK_SPEED_10GB_SHIFT),
+	VIRTCHNL_LINK_SPEED_40GB	= BIT(VIRTCHNL_LINK_SPEED_40GB_SHIFT),
+	VIRTCHNL_LINK_SPEED_20GB	= BIT(VIRTCHNL_LINK_SPEED_20GB_SHIFT),
+	VIRTCHNL_LINK_SPEED_25GB	= BIT(VIRTCHNL_LINK_SPEED_25GB_SHIFT),
+};
+
+/* for hsplit_0 field of Rx HMC context */
+/* deprecated with AVF 1.0 */
+enum virtchnl_rx_hsplit {
+	VIRTCHNL_RX_HSPLIT_NO_SPLIT      = 0,
+	VIRTCHNL_RX_HSPLIT_SPLIT_L2      = 1,
+	VIRTCHNL_RX_HSPLIT_SPLIT_IP      = 2,
+	VIRTCHNL_RX_HSPLIT_SPLIT_TCP_UDP = 4,
+	VIRTCHNL_RX_HSPLIT_SPLIT_SCTP    = 8,
+};
+
 /* END GENERIC DEFINES */
 
 /* Opcodes for VF-PF communication. These are placed in the v_opcode field
@@ -77,6 +105,8 @@ enum virtchnl_ops {
 /* The PF sends status change events to VFs using
  * the VIRTCHNL_OP_EVENT opcode.
  * VFs send requests to the PF using the other ops.
+ * Use of "advanced opcode" features must be negotiated as part of capabilities
+ * exchange and are not considered part of base mode feature set.
  */
 	VIRTCHNL_OP_UNKNOWN = 0,
 	VIRTCHNL_OP_VERSION = 1, /* must ALWAYS be 1 */
@@ -96,14 +126,13 @@ enum virtchnl_ops {
 	VIRTCHNL_OP_GET_STATS = 15,
 	VIRTCHNL_OP_RSVD = 16,
 	VIRTCHNL_OP_EVENT = 17, /* must ALWAYS be 17 */
-	VIRTCHNL_OP_IWARP = 20,
-	VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP = 21,
-	VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP = 22,
+	VIRTCHNL_OP_IWARP = 20, /* advanced opcode */
+	VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP = 21, /* advanced opcode */
+	VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP = 22, /* advanced opcode */
 	VIRTCHNL_OP_CONFIG_RSS_KEY = 23,
 	VIRTCHNL_OP_CONFIG_RSS_LUT = 24,
 	VIRTCHNL_OP_GET_RSS_HENA_CAPS = 25,
 	VIRTCHNL_OP_SET_RSS_HENA = 26,
-
 };
 
 /* Virtual channel message descriptor. This overlays the admin queue
@@ -113,7 +142,7 @@ enum virtchnl_ops {
 struct virtchnl_msg {
 	u8 pad[8];			 /* AQ flags/opcode/len/retval fields */
 	enum virtchnl_ops v_opcode; /* avoid confusion with desc->opcode */
-	i40e_status v_retval;  /* ditto for desc->retval */
+	enum virtchnl_status_code v_retval;  /* ditto for desc->retval */
 	u32 vfid;			 /* used by PF when sending to VF */
 };
 
@@ -155,6 +184,15 @@ struct virtchnl_version_info {
  * are cleared.
  */
 
+/* VSI types that use VIRTCHNL interface for VF-PF communication. VSI_SRIOV
+ * vsi_type should always be 6 for backward compatibility. Add other fields
+ * as needed.
+ */
+enum virtchnl_vsi_type {
+	VIRTCHNL_VSI_TYPE_INVALID = 0,
+	VIRTCHNL_VSI_SRIOV = 6,
+};
+
 /* VIRTCHNL_OP_GET_VF_RESOURCES
  * Version 1.0 VF sends this request to PF with no parameters
  * Version 1.1 VF sends this request to PF with u32 bitmap of its capabilities
@@ -166,14 +204,18 @@ struct virtchnl_version_info {
 struct virtchnl_vsi_resource {
 	u16 vsi_id;
 	u16 num_queue_pairs;
-	enum i40e_vsi_type vsi_type;
+	enum virtchnl_vsi_type vsi_type;
 	u16 qset_handle;
 	u8 default_mac_addr[ETH_ALEN];
 };
-/* VF offload flags */
-#define VIRTCHNL_VF_OFFLOAD_L2		0x00000001
+
+/* VF offload flags
+ * VIRTCHNL_VF_OFFLOAD_L2 flag is inclusive of base mode L2 offloads including
+ * TX/RX Checksum offloading and TSO for non-tunnelled packets.
+ */
+#define VIRTCHNL_VF_OFFLOAD_L2			0x00000001
 #define VIRTCHNL_VF_OFFLOAD_IWARP		0x00000002
-#define VIRTCHNL_VF_OFFLOAD_FCOE		0x00000004
+#define VIRTCHNL_VF_OFFLOAD_RSVD		0x00000004
 #define VIRTCHNL_VF_OFFLOAD_RSS_AQ		0x00000008
 #define VIRTCHNL_VF_OFFLOAD_RSS_REG		0x00000010
 #define VIRTCHNL_VF_OFFLOAD_WB_ON_ITR		0x00000020
@@ -182,11 +224,12 @@ struct virtchnl_vsi_resource {
 #define VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2	0x00040000
 #define VIRTCHNL_VF_OFFLOAD_RSS_PF		0X00080000
 #define VIRTCHNL_VF_OFFLOAD_ENCAP		0X00100000
-#define VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM	0X00200000
+#define VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM		0X00200000
+#define VIRTCHNL_VF_OFFLOAD_RX_ENCAP_CSUM	0X00400000
 
-#define I40E_VF_BASE_MODE_OFFLOADS (VIRTCHNL_VF_OFFLOAD_L2 | \
-				    VIRTCHNL_VF_OFFLOAD_VLAN | \
-				    VIRTCHNL_VF_OFFLOAD_RSS_PF)
+#define VF_BASE_MODE_OFFLOADS (VIRTCHNL_VF_OFFLOAD_L2 | \
+			       VIRTCHNL_VF_OFFLOAD_VLAN | \
+			       VIRTCHNL_VF_OFFLOAD_RSS_PF)
 
 struct virtchnl_vf_resource {
 	u16 num_vsis;
@@ -212,9 +255,9 @@ struct virtchnl_txq_info {
 	u16 vsi_id;
 	u16 queue_id;
 	u16 ring_len;		/* number of descriptors, multiple of 8 */
-	u16 headwb_enabled;
+	u16 headwb_enabled; /* deprecated with AVF 1.0 */
 	u64 dma_ring_addr;
-	u64 dma_headwb_addr;
+	u64 dma_headwb_addr; /* deprecated with AVF 1.0 */
 };
 
 /* VIRTCHNL_OP_CONFIG_RX_QUEUE
@@ -229,11 +272,11 @@ struct virtchnl_rxq_info {
 	u16 queue_id;
 	u32 ring_len;		/* number of descriptors, multiple of 32 */
 	u16 hdr_size;
-	u16 splithdr_enabled;
+	u16 splithdr_enabled; /* deprecated with AVF 1.0 */
 	u32 databuffer_size;
 	u32 max_pkt_size;
 	u64 dma_ring_addr;
-	enum i40e_hmc_obj_rx_hsplit_0 rx_split_pos;
+	enum virtchnl_rx_hsplit rx_split_pos; /* deprecated with AVF 1.0 */
 };
 
 /* VIRTCHNL_OP_CONFIG_VSI_QUEUES
@@ -344,15 +387,15 @@ struct virtchnl_promisc_info {
 	u16 flags;
 };
 
-#define I40E_FLAG_VF_UNICAST_PROMISC	0x00000001
-#define I40E_FLAG_VF_MULTICAST_PROMISC	0x00000002
+#define FLAG_VF_UNICAST_PROMISC	0x00000001
+#define FLAG_VF_MULTICAST_PROMISC	0x00000002
 
 /* VIRTCHNL_OP_GET_STATS
  * VF sends this message to request stats for the selected VSI. VF uses
  * the virtchnl_queue_select struct to specify the VSI. The queue_id
  * field is ignored by the PF.
  *
- * PF replies with struct i40e_eth_stats in an external buffer.
+ * PF replies with struct eth_stats in an external buffer.
  */
 
 /* VIRTCHNL_OP_CONFIG_RSS_KEY
@@ -382,7 +425,6 @@ struct virtchnl_rss_lut {
  * By default, the PF sets these to all possible traffic types that the
  * hardware supports. The VF can query this value if it wants to change the
  * traffic types that are hashed by the hardware.
- * Traffic types are defined in the i40e_filter_pctype enum in i40e_type.h
  */
 struct virtchnl_rss_hena {
 	u64 hena;
@@ -399,14 +441,15 @@ enum virtchnl_event_codes {
 	VIRTCHNL_EVENT_RESET_IMPENDING,
 	VIRTCHNL_EVENT_PF_DRIVER_CLOSE,
 };
-#define I40E_PF_EVENT_SEVERITY_INFO		0
-#define I40E_PF_EVENT_SEVERITY_CERTAIN_DOOM	255
+
+#define PF_EVENT_SEVERITY_INFO		0
+#define PF_EVENT_SEVERITY_CERTAIN_DOOM	255
 
 struct virtchnl_pf_event {
 	enum virtchnl_event_codes event;
 	union {
 		struct {
-			enum i40e_aq_link_speed link_speed;
+			enum virtchnl_link_speed link_speed;
 			bool link_status;
 		} link_event;
 	} event_data;
@@ -426,13 +469,6 @@ struct virtchnl_pf_event {
  * PF configures interrupt mapping and returns status.
  */
 
-/* HW does not define a type value for AEQ; only for RX/TX and CEQ.
- * In order for us to keep the interface simple, SW will define a
- * unique type value for AEQ.
- */
-#define I40E_QUEUE_TYPE_PE_AEQ  0x80
-#define I40E_QUEUE_INVALID_IDX  0xFFFF
-
 struct virtchnl_iwarp_qv_info {
 	u32 v_idx; /* msix_vector */
 	u16 ceq_idx;
@@ -446,8 +482,7 @@ struct virtchnl_iwarp_qvlist_info {
 };
 
 /* VF reset states - these are written into the RSTAT register:
- * I40E_VFGEN_RSTAT1 on the PF
- * I40E_VFGEN_RSTAT on the VF
+ * VFGEN_RSTAT on the VF
  * When the PF initiates a reset, it writes 0
  * When the reset is complete, it writes 1
  * When the PF detects that the VF has recovered, it writes 2
@@ -461,7 +496,6 @@ enum virtchnl_vfr_states {
 	VIRTCHNL_VFR_INPROGRESS = 0,
 	VIRTCHNL_VFR_COMPLETED,
 	VIRTCHNL_VFR_VFACTIVE,
-	VIRTCHNL_VFR_UNKNOWN,
 };
 
 #endif /* _VIRTCHNL_H_ */
-- 
cgit v1.2.3


From 735e35c56bbc91621942dc5111b2970beb00e75a Mon Sep 17 00:00:00 2001
From: Jesse Brandeburg <jesse.brandeburg@intel.com>
Date: Thu, 11 May 2017 11:23:17 -0700
Subject: i40e/virtchnl: move function to virtchnl

This moves a function that is needed for the virtchnl interface
from the i40e PF driver over to the virtchnl.h file.

It was manually verified that the function in question is unchanged
except for the function name and function header, which explains
the slight difference in the number of lines removed/added.

Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 150 +--------------------
 include/linux/avf/virtchnl.h                       | 147 ++++++++++++++++++++
 2 files changed, 148 insertions(+), 149 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 352d9d2ef3d2..6bee254d34ee 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -2531,154 +2531,6 @@ err:
 	return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_SET_RSS_HENA, aq_ret);
 }
 
-/**
- * i40e_vc_validate_vf_msg
- * @ver: Virtchnl version info
- * @v_opcode: Opcode for the message
- * @msg: pointer to the msg buffer
- * @msglen: msg length
- *
- * validate msg format against struct for each opcode
- **/
-static int
-i40e_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode,
-			u8 *msg, u16 msglen)
-{
-	bool err_msg_format = false;
-	int valid_len = 0;
-
-	/* Validate message length. */
-	switch (v_opcode) {
-	case VIRTCHNL_OP_VERSION:
-		valid_len = sizeof(struct virtchnl_version_info);
-		break;
-	case VIRTCHNL_OP_RESET_VF:
-		break;
-	case VIRTCHNL_OP_GET_VF_RESOURCES:
-		if (VF_IS_V11(ver))
-			valid_len = sizeof(u32);
-		break;
-	case VIRTCHNL_OP_CONFIG_TX_QUEUE:
-		valid_len = sizeof(struct virtchnl_txq_info);
-		break;
-	case VIRTCHNL_OP_CONFIG_RX_QUEUE:
-		valid_len = sizeof(struct virtchnl_rxq_info);
-		break;
-	case VIRTCHNL_OP_CONFIG_VSI_QUEUES:
-		valid_len = sizeof(struct virtchnl_vsi_queue_config_info);
-		if (msglen >= valid_len) {
-			struct virtchnl_vsi_queue_config_info *vqc =
-			    (struct virtchnl_vsi_queue_config_info *)msg;
-			valid_len += (vqc->num_queue_pairs *
-				      sizeof(struct
-					     virtchnl_queue_pair_info));
-			if (vqc->num_queue_pairs == 0)
-				err_msg_format = true;
-		}
-		break;
-	case VIRTCHNL_OP_CONFIG_IRQ_MAP:
-		valid_len = sizeof(struct virtchnl_irq_map_info);
-		if (msglen >= valid_len) {
-			struct virtchnl_irq_map_info *vimi =
-			    (struct virtchnl_irq_map_info *)msg;
-			valid_len += (vimi->num_vectors *
-				      sizeof(struct virtchnl_vector_map));
-			if (vimi->num_vectors == 0)
-				err_msg_format = true;
-		}
-		break;
-	case VIRTCHNL_OP_ENABLE_QUEUES:
-	case VIRTCHNL_OP_DISABLE_QUEUES:
-		valid_len = sizeof(struct virtchnl_queue_select);
-		break;
-	case VIRTCHNL_OP_ADD_ETH_ADDR:
-	case VIRTCHNL_OP_DEL_ETH_ADDR:
-		valid_len = sizeof(struct virtchnl_ether_addr_list);
-		if (msglen >= valid_len) {
-			struct virtchnl_ether_addr_list *veal =
-			    (struct virtchnl_ether_addr_list *)msg;
-			valid_len += veal->num_elements *
-			    sizeof(struct virtchnl_ether_addr);
-			if (veal->num_elements == 0)
-				err_msg_format = true;
-		}
-		break;
-	case VIRTCHNL_OP_ADD_VLAN:
-	case VIRTCHNL_OP_DEL_VLAN:
-		valid_len = sizeof(struct virtchnl_vlan_filter_list);
-		if (msglen >= valid_len) {
-			struct virtchnl_vlan_filter_list *vfl =
-			    (struct virtchnl_vlan_filter_list *)msg;
-			valid_len += vfl->num_elements * sizeof(u16);
-			if (vfl->num_elements == 0)
-				err_msg_format = true;
-		}
-		break;
-	case VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE:
-		valid_len = sizeof(struct virtchnl_promisc_info);
-		break;
-	case VIRTCHNL_OP_GET_STATS:
-		valid_len = sizeof(struct virtchnl_queue_select);
-		break;
-	case VIRTCHNL_OP_IWARP:
-		/* These messages are opaque to us and will be validated in
-		 * the RDMA client code. We just need to check for nonzero
-		 * length. The firmware will enforce max length restrictions.
-		 */
-		if (msglen)
-			valid_len = msglen;
-		else
-			err_msg_format = true;
-		break;
-	case VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP:
-		break;
-	case VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP:
-		valid_len = sizeof(struct virtchnl_iwarp_qvlist_info);
-		if (msglen >= valid_len) {
-			struct virtchnl_iwarp_qvlist_info *qv =
-				(struct virtchnl_iwarp_qvlist_info *)msg;
-			if (qv->num_vectors == 0) {
-				err_msg_format = true;
-				break;
-			}
-			valid_len += ((qv->num_vectors - 1) *
-				sizeof(struct virtchnl_iwarp_qv_info));
-		}
-		break;
-	case VIRTCHNL_OP_CONFIG_RSS_KEY:
-		valid_len = sizeof(struct virtchnl_rss_key);
-		if (msglen >= valid_len) {
-			struct virtchnl_rss_key *vrk =
-				(struct virtchnl_rss_key *)msg;
-			valid_len += vrk->key_len - 1;
-		}
-		break;
-	case VIRTCHNL_OP_CONFIG_RSS_LUT:
-		valid_len = sizeof(struct virtchnl_rss_lut);
-		if (msglen >= valid_len) {
-			struct virtchnl_rss_lut *vrl =
-				(struct virtchnl_rss_lut *)msg;
-			valid_len += vrl->lut_entries - 1;
-		}
-		break;
-	case VIRTCHNL_OP_GET_RSS_HENA_CAPS:
-		break;
-	case VIRTCHNL_OP_SET_RSS_HENA:
-		valid_len = sizeof(struct virtchnl_rss_hena);
-		break;
-	/* These are always errors coming from the VF. */
-	case VIRTCHNL_OP_EVENT:
-	case VIRTCHNL_OP_UNKNOWN:
-	default:
-		return VIRTCHNL_ERR_PARAM;
-	}
-	/* few more checks */
-	if ((valid_len != msglen) || (err_msg_format))
-		return VIRTCHNL_STATUS_ERR_OPCODE_MISMATCH;
-
-	return 0;
-}
-
 /**
  * i40e_vc_process_vf_msg
  * @pf: pointer to the PF structure
@@ -2708,7 +2560,7 @@ int i40e_vc_process_vf_msg(struct i40e_pf *pf, s16 vf_id, u32 v_opcode,
 		return I40E_ERR_PARAM;
 
 	/* perform basic checks on the msg */
-	ret = i40e_vc_validate_vf_msg(&vf->vf_ver, v_opcode, msg, msglen);
+	ret = virtchnl_vc_validate_vf_msg(&vf->vf_ver, v_opcode, msg, msglen);
 
 	/* perform additional checks specific to this driver */
 	if (v_opcode == VIRTCHNL_OP_CONFIG_RSS_KEY) {
diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h
index 6c6fbb492b5d..dab76e947b9f 100644
--- a/include/linux/avf/virtchnl.h
+++ b/include/linux/avf/virtchnl.h
@@ -498,4 +498,151 @@ enum virtchnl_vfr_states {
 	VIRTCHNL_VFR_VFACTIVE,
 };
 
+/**
+ * virtchnl_vc_validate_vf_msg
+ * @ver: Virtchnl version info
+ * @v_opcode: Opcode for the message
+ * @msg: pointer to the msg buffer
+ * @msglen: msg length
+ *
+ * validate msg format against struct for each opcode
+ */
+static inline int
+virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode,
+			    u8 *msg, u16 msglen)
+{
+	bool err_msg_format = false;
+	int valid_len = 0;
+
+	/* Validate message length. */
+	switch (v_opcode) {
+	case VIRTCHNL_OP_VERSION:
+		valid_len = sizeof(struct virtchnl_version_info);
+		break;
+	case VIRTCHNL_OP_RESET_VF:
+		break;
+	case VIRTCHNL_OP_GET_VF_RESOURCES:
+		if (VF_IS_V11(ver))
+			valid_len = sizeof(u32);
+		break;
+	case VIRTCHNL_OP_CONFIG_TX_QUEUE:
+		valid_len = sizeof(struct virtchnl_txq_info);
+		break;
+	case VIRTCHNL_OP_CONFIG_RX_QUEUE:
+		valid_len = sizeof(struct virtchnl_rxq_info);
+		break;
+	case VIRTCHNL_OP_CONFIG_VSI_QUEUES:
+		valid_len = sizeof(struct virtchnl_vsi_queue_config_info);
+		if (msglen >= valid_len) {
+			struct virtchnl_vsi_queue_config_info *vqc =
+			    (struct virtchnl_vsi_queue_config_info *)msg;
+			valid_len += (vqc->num_queue_pairs *
+				      sizeof(struct
+					     virtchnl_queue_pair_info));
+			if (vqc->num_queue_pairs == 0)
+				err_msg_format = true;
+		}
+		break;
+	case VIRTCHNL_OP_CONFIG_IRQ_MAP:
+		valid_len = sizeof(struct virtchnl_irq_map_info);
+		if (msglen >= valid_len) {
+			struct virtchnl_irq_map_info *vimi =
+			    (struct virtchnl_irq_map_info *)msg;
+			valid_len += (vimi->num_vectors *
+				      sizeof(struct virtchnl_vector_map));
+			if (vimi->num_vectors == 0)
+				err_msg_format = true;
+		}
+		break;
+	case VIRTCHNL_OP_ENABLE_QUEUES:
+	case VIRTCHNL_OP_DISABLE_QUEUES:
+		valid_len = sizeof(struct virtchnl_queue_select);
+		break;
+	case VIRTCHNL_OP_ADD_ETH_ADDR:
+	case VIRTCHNL_OP_DEL_ETH_ADDR:
+		valid_len = sizeof(struct virtchnl_ether_addr_list);
+		if (msglen >= valid_len) {
+			struct virtchnl_ether_addr_list *veal =
+			    (struct virtchnl_ether_addr_list *)msg;
+			valid_len += veal->num_elements *
+			    sizeof(struct virtchnl_ether_addr);
+			if (veal->num_elements == 0)
+				err_msg_format = true;
+		}
+		break;
+	case VIRTCHNL_OP_ADD_VLAN:
+	case VIRTCHNL_OP_DEL_VLAN:
+		valid_len = sizeof(struct virtchnl_vlan_filter_list);
+		if (msglen >= valid_len) {
+			struct virtchnl_vlan_filter_list *vfl =
+			    (struct virtchnl_vlan_filter_list *)msg;
+			valid_len += vfl->num_elements * sizeof(u16);
+			if (vfl->num_elements == 0)
+				err_msg_format = true;
+		}
+		break;
+	case VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE:
+		valid_len = sizeof(struct virtchnl_promisc_info);
+		break;
+	case VIRTCHNL_OP_GET_STATS:
+		valid_len = sizeof(struct virtchnl_queue_select);
+		break;
+	case VIRTCHNL_OP_IWARP:
+		/* These messages are opaque to us and will be validated in
+		 * the RDMA client code. We just need to check for nonzero
+		 * length. The firmware will enforce max length restrictions.
+		 */
+		if (msglen)
+			valid_len = msglen;
+		else
+			err_msg_format = true;
+		break;
+	case VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP:
+		break;
+	case VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP:
+		valid_len = sizeof(struct virtchnl_iwarp_qvlist_info);
+		if (msglen >= valid_len) {
+			struct virtchnl_iwarp_qvlist_info *qv =
+				(struct virtchnl_iwarp_qvlist_info *)msg;
+			if (qv->num_vectors == 0) {
+				err_msg_format = true;
+				break;
+			}
+			valid_len += ((qv->num_vectors - 1) *
+				sizeof(struct virtchnl_iwarp_qv_info));
+		}
+		break;
+	case VIRTCHNL_OP_CONFIG_RSS_KEY:
+		valid_len = sizeof(struct virtchnl_rss_key);
+		if (msglen >= valid_len) {
+			struct virtchnl_rss_key *vrk =
+				(struct virtchnl_rss_key *)msg;
+			valid_len += vrk->key_len - 1;
+		}
+		break;
+	case VIRTCHNL_OP_CONFIG_RSS_LUT:
+		valid_len = sizeof(struct virtchnl_rss_lut);
+		if (msglen >= valid_len) {
+			struct virtchnl_rss_lut *vrl =
+				(struct virtchnl_rss_lut *)msg;
+			valid_len += vrl->lut_entries - 1;
+		}
+		break;
+	case VIRTCHNL_OP_GET_RSS_HENA_CAPS:
+		break;
+	case VIRTCHNL_OP_SET_RSS_HENA:
+		valid_len = sizeof(struct virtchnl_rss_hena);
+		break;
+	/* These are always errors coming from the VF. */
+	case VIRTCHNL_OP_EVENT:
+	case VIRTCHNL_OP_UNKNOWN:
+	default:
+		return VIRTCHNL_ERR_PARAM;
+	}
+	/* few more checks */
+	if ((valid_len != msglen) || (err_msg_format))
+		return VIRTCHNL_STATUS_ERR_OPCODE_MISMATCH;
+
+	return 0;
+}
 #endif /* _VIRTCHNL_H_ */
-- 
cgit v1.2.3


From a33c83c4353b2efc4d883bad06a86a9ba2dde4fc Mon Sep 17 00:00:00 2001
From: Sridhar Samudrala <sridhar.samudrala@intel.com>
Date: Thu, 11 May 2017 11:23:18 -0700
Subject: virtchnl: Add pad fields to a couple of structures

This removes holes and makes structure sizes consistent across
32 and 64 bit builds.

Signed-off-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 include/linux/avf/virtchnl.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h
index dab76e947b9f..72466c69f749 100644
--- a/include/linux/avf/virtchnl.h
+++ b/include/linux/avf/virtchnl.h
@@ -275,8 +275,10 @@ struct virtchnl_rxq_info {
 	u16 splithdr_enabled; /* deprecated with AVF 1.0 */
 	u32 databuffer_size;
 	u32 max_pkt_size;
+	u32 pad1;
 	u64 dma_ring_addr;
 	enum virtchnl_rx_hsplit rx_split_pos; /* deprecated with AVF 1.0 */
+	u32 pad2;
 };
 
 /* VIRTCHNL_OP_CONFIG_VSI_QUEUES
@@ -295,6 +297,7 @@ struct virtchnl_queue_pair_info {
 struct virtchnl_vsi_queue_config_info {
 	u16 vsi_id;
 	u16 num_queue_pairs;
+	u32 pad;
 	struct virtchnl_queue_pair_info qpair[1];
 };
 
-- 
cgit v1.2.3


From 73556269aab30c39cba9cf8efafc402d0deb87b2 Mon Sep 17 00:00:00 2001
From: Sridhar Samudrala <sridhar.samudrala@intel.com>
Date: Thu, 11 May 2017 11:23:19 -0700
Subject: virtchnl: Add compile time static asserts to validate structure sizes

This uses preprocessor tricks to make sure that a divide by zero
occurs if a struct changes size outside the expected number of
bytes.

Signed-off-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 include/linux/avf/virtchnl.h | 50 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h
index 72466c69f749..c893b9520a67 100644
--- a/include/linux/avf/virtchnl.h
+++ b/include/linux/avf/virtchnl.h
@@ -135,6 +135,14 @@ enum virtchnl_ops {
 	VIRTCHNL_OP_SET_RSS_HENA = 26,
 };
 
+/* This macro is used to generate a compilation error if a structure
+ * is not exactly the correct length. It gives a divide by zero error if the
+ * structure is not of the correct size, otherwise it creates an enum that is
+ * never used.
+ */
+#define VIRTCHNL_CHECK_STRUCT_LEN(n, X) enum virtchnl_static_assert_enum_##X \
+	{ virtchnl_static_assert_##X = (n)/((sizeof(struct X) == (n)) ? 1 : 0) }
+
 /* Virtual channel message descriptor. This overlays the admin queue
  * descriptor. All other data is passed in external buffers.
  */
@@ -146,6 +154,8 @@ struct virtchnl_msg {
 	u32 vfid;			 /* used by PF when sending to VF */
 };
 
+VIRTCHNL_CHECK_STRUCT_LEN(20, virtchnl_msg);
+
 /* Message descriptions and data structures.*/
 
 /* VIRTCHNL_OP_VERSION
@@ -169,6 +179,8 @@ struct virtchnl_version_info {
 	u32 minor;
 };
 
+VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_version_info);
+
 #define VF_IS_V10(_v) (((_v)->major == 1) && ((_v)->minor == 0))
 #define VF_IS_V11(_ver) (((_ver)->major == 1) && ((_ver)->minor == 1))
 
@@ -209,6 +221,8 @@ struct virtchnl_vsi_resource {
 	u8 default_mac_addr[ETH_ALEN];
 };
 
+VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource);
+
 /* VF offload flags
  * VIRTCHNL_VF_OFFLOAD_L2 flag is inclusive of base mode L2 offloads including
  * TX/RX Checksum offloading and TSO for non-tunnelled packets.
@@ -244,6 +258,8 @@ struct virtchnl_vf_resource {
 	struct virtchnl_vsi_resource vsi_res[1];
 };
 
+VIRTCHNL_CHECK_STRUCT_LEN(36, virtchnl_vf_resource);
+
 /* VIRTCHNL_OP_CONFIG_TX_QUEUE
  * VF sends this message to set up parameters for one TX queue.
  * External data buffer contains one instance of virtchnl_txq_info.
@@ -260,6 +276,8 @@ struct virtchnl_txq_info {
 	u64 dma_headwb_addr; /* deprecated with AVF 1.0 */
 };
 
+VIRTCHNL_CHECK_STRUCT_LEN(24, virtchnl_txq_info);
+
 /* VIRTCHNL_OP_CONFIG_RX_QUEUE
  * VF sends this message to set up parameters for one RX queue.
  * External data buffer contains one instance of virtchnl_rxq_info.
@@ -281,6 +299,8 @@ struct virtchnl_rxq_info {
 	u32 pad2;
 };
 
+VIRTCHNL_CHECK_STRUCT_LEN(40, virtchnl_rxq_info);
+
 /* VIRTCHNL_OP_CONFIG_VSI_QUEUES
  * VF sends this message to set parameters for all active TX and RX queues
  * associated with the specified VSI.
@@ -294,6 +314,8 @@ struct virtchnl_queue_pair_info {
 	struct virtchnl_rxq_info rxq;
 };
 
+VIRTCHNL_CHECK_STRUCT_LEN(64, virtchnl_queue_pair_info);
+
 struct virtchnl_vsi_queue_config_info {
 	u16 vsi_id;
 	u16 num_queue_pairs;
@@ -301,6 +323,8 @@ struct virtchnl_vsi_queue_config_info {
 	struct virtchnl_queue_pair_info qpair[1];
 };
 
+VIRTCHNL_CHECK_STRUCT_LEN(72, virtchnl_vsi_queue_config_info);
+
 /* VIRTCHNL_OP_CONFIG_IRQ_MAP
  * VF uses this message to map vectors to queues.
  * The rxq_map and txq_map fields are bitmaps used to indicate which queues
@@ -317,11 +341,15 @@ struct virtchnl_vector_map {
 	u16 txitr_idx;
 };
 
+VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_vector_map);
+
 struct virtchnl_irq_map_info {
 	u16 num_vectors;
 	struct virtchnl_vector_map vecmap[1];
 };
 
+VIRTCHNL_CHECK_STRUCT_LEN(14, virtchnl_irq_map_info);
+
 /* VIRTCHNL_OP_ENABLE_QUEUES
  * VIRTCHNL_OP_DISABLE_QUEUES
  * VF sends these message to enable or disable TX/RX queue pairs.
@@ -337,6 +365,8 @@ struct virtchnl_queue_select {
 	u32 tx_queues;
 };
 
+VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_queue_select);
+
 /* VIRTCHNL_OP_ADD_ETH_ADDR
  * VF sends this message in order to add one or more unicast or multicast
  * address filters for the specified VSI.
@@ -354,12 +384,16 @@ struct virtchnl_ether_addr {
 	u8 pad[2];
 };
 
+VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_ether_addr);
+
 struct virtchnl_ether_addr_list {
 	u16 vsi_id;
 	u16 num_elements;
 	struct virtchnl_ether_addr list[1];
 };
 
+VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_ether_addr_list);
+
 /* VIRTCHNL_OP_ADD_VLAN
  * VF sends this message to add one or more VLAN tag filters for receives.
  * PF adds the filters and returns status.
@@ -380,6 +414,8 @@ struct virtchnl_vlan_filter_list {
 	u16 vlan_id[1];
 };
 
+VIRTCHNL_CHECK_STRUCT_LEN(6, virtchnl_vlan_filter_list);
+
 /* VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE
  * VF sends VSI id and flags.
  * PF returns status code in retval.
@@ -390,6 +426,8 @@ struct virtchnl_promisc_info {
 	u16 flags;
 };
 
+VIRTCHNL_CHECK_STRUCT_LEN(4, virtchnl_promisc_info);
+
 #define FLAG_VF_UNICAST_PROMISC	0x00000001
 #define FLAG_VF_MULTICAST_PROMISC	0x00000002
 
@@ -416,12 +454,16 @@ struct virtchnl_rss_key {
 	u8 key[1];         /* RSS hash key, packed bytes */
 };
 
+VIRTCHNL_CHECK_STRUCT_LEN(6, virtchnl_rss_key);
+
 struct virtchnl_rss_lut {
 	u16 vsi_id;
 	u16 lut_entries;
 	u8 lut[1];        /* RSS lookup table*/
 };
 
+VIRTCHNL_CHECK_STRUCT_LEN(6, virtchnl_rss_lut);
+
 /* VIRTCHNL_OP_GET_RSS_HENA_CAPS
  * VIRTCHNL_OP_SET_RSS_HENA
  * VF sends these messages to get and set the hash filter enable bits for RSS.
@@ -433,6 +475,8 @@ struct virtchnl_rss_hena {
 	u64 hena;
 };
 
+VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_rss_hena);
+
 /* VIRTCHNL_OP_EVENT
  * PF sends this message to inform the VF driver of events that may affect it.
  * No direct response is expected from the VF, though it may generate other
@@ -460,6 +504,8 @@ struct virtchnl_pf_event {
 	int severity;
 };
 
+VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_pf_event);
+
 /* VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP
  * VF uses this message to request PF to map IWARP vectors to IWARP queues.
  * The request for this originates from the VF IWARP driver through
@@ -479,11 +525,15 @@ struct virtchnl_iwarp_qv_info {
 	u8 itr_idx;
 };
 
+VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_iwarp_qv_info);
+
 struct virtchnl_iwarp_qvlist_info {
 	u32 num_vectors;
 	struct virtchnl_iwarp_qv_info qv_info[1];
 };
 
+VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_iwarp_qvlist_info);
+
 /* VF reset states - these are written into the RSTAT register:
  * VFGEN_RSTAT on the VF
  * When the PF initiates a reset, it writes 0
-- 
cgit v1.2.3


From 278cba7eaf5422510fc4a6b5a4d447f17b00506e Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Wed, 14 Dec 2016 03:35:03 +0200
Subject: drm: omapdrm: Remove unused default display name support

The default display name is both unused and never set by platform data.
Remove default display name module parameter, platform data field and
runtime infrastructure.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Reviewed-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
---
 drivers/gpu/drm/omapdrm/dss/core.c          | 18 ------------------
 drivers/gpu/drm/omapdrm/dss/omapdss.h       |  1 -
 drivers/video/fbdev/omap2/omapfb/dss/core.c |  2 --
 include/linux/platform_data/omapdss.h       |  1 -
 4 files changed, 22 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/omapdrm/dss/core.c b/drivers/gpu/drm/omapdrm/dss/core.c
index 3351ce23f413..bc4fa4ec8557 100644
--- a/drivers/gpu/drm/omapdrm/dss/core.c
+++ b/drivers/gpu/drm/omapdrm/dss/core.c
@@ -41,20 +41,8 @@
 
 static struct {
 	struct platform_device *pdev;
-
-	const char *default_display_name;
 } core;
 
-static char *def_disp_name;
-module_param_named(def_disp, def_disp_name, charp, 0);
-MODULE_PARM_DESC(def_disp, "default display name");
-
-const char *omapdss_get_default_display_name(void)
-{
-	return core.default_display_name;
-}
-EXPORT_SYMBOL(omapdss_get_default_display_name);
-
 enum omapdss_version omapdss_get_version(void)
 {
 	struct omap_dss_board_info *pdata = core.pdev->dev.platform_data;
@@ -175,7 +163,6 @@ static void dss_disable_all_devices(void)
 
 static int __init omap_dss_probe(struct platform_device *pdev)
 {
-	struct omap_dss_board_info *pdata = pdev->dev.platform_data;
 	int r;
 
 	core.pdev = pdev;
@@ -186,11 +173,6 @@ static int __init omap_dss_probe(struct platform_device *pdev)
 	if (r)
 		goto err_debugfs;
 
-	if (def_disp_name)
-		core.default_display_name = def_disp_name;
-	else if (pdata->default_display_name)
-		core.default_display_name = pdata->default_display_name;
-
 	return 0;
 
 err_debugfs:
diff --git a/drivers/gpu/drm/omapdrm/dss/omapdss.h b/drivers/gpu/drm/omapdrm/dss/omapdss.h
index b19dae1fd6c5..3bee528380cd 100644
--- a/drivers/gpu/drm/omapdrm/dss/omapdss.h
+++ b/drivers/gpu/drm/omapdrm/dss/omapdss.h
@@ -781,7 +781,6 @@ void omap_dss_put_device(struct omap_dss_device *dssdev);
 struct omap_dss_device *omap_dss_get_next_device(struct omap_dss_device *from);
 struct omap_dss_device *omap_dss_find_device(void *data,
 		int (*match)(struct omap_dss_device *dssdev, void *data));
-const char *omapdss_get_default_display_name(void);
 
 int dss_feat_get_num_mgrs(void);
 int dss_feat_get_num_ovls(void);
diff --git a/drivers/video/fbdev/omap2/omapfb/dss/core.c b/drivers/video/fbdev/omap2/omapfb/dss/core.c
index 29de4827589d..eecf695c16f4 100644
--- a/drivers/video/fbdev/omap2/omapfb/dss/core.c
+++ b/drivers/video/fbdev/omap2/omapfb/dss/core.c
@@ -206,8 +206,6 @@ static int __init omap_dss_probe(struct platform_device *pdev)
 
 	if (def_disp_name)
 		core.default_display_name = def_disp_name;
-	else if (pdata->default_display_name)
-		core.default_display_name = pdata->default_display_name;
 
 	register_pm_notifier(&omap_dss_pm_notif_block);
 
diff --git a/include/linux/platform_data/omapdss.h b/include/linux/platform_data/omapdss.h
index 679177929045..7feb011ed500 100644
--- a/include/linux/platform_data/omapdss.h
+++ b/include/linux/platform_data/omapdss.h
@@ -27,7 +27,6 @@ enum omapdss_version {
 
 /* Board specific data */
 struct omap_dss_board_info {
-	const char *default_display_name;
 	int (*dsi_enable_pads)(int dsi_id, unsigned int lane_mask);
 	void (*dsi_disable_pads)(int dsi_id, unsigned int lane_mask);
 	int (*set_min_bus_tput)(struct device *dev, unsigned long r);
-- 
cgit v1.2.3


From a9548c55295a4268f9187e1ec93264a0682fa745 Mon Sep 17 00:00:00 2001
From: John Youn <johnyoun@synopsys.com>
Date: Fri, 28 Apr 2017 12:55:20 +0400
Subject: usb: gadget: Allow a non-SuperSpeed gadget to support LPM

This commit allows a gadget that does not support SuperSpeed to indicate
that it supports LPM. It does this by setting the 'lpm_capable' flag in
the gadget structure.

If a gadget sets this, the composite gadget framework will set the
bcdUSB to 0x0201 to indicate that this supports BOS descriptors, and
also return a USB 2.0 Extension descriptor as part of the BOS descriptor
set.

See USB 2.0 LPM ECN Section 3.

Signed-off-by: John Youn <johnyoun@synopsys.com>
Signed-off-by: Sevak Arakelyan <sevaka@synopsys.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/gadget/composite.c | 8 ++++++--
 include/linux/usb/gadget.h     | 3 +++
 2 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
index abec93ab81ee..d62f53d7f418 100644
--- a/drivers/usb/gadget/composite.c
+++ b/drivers/usb/gadget/composite.c
@@ -1608,7 +1608,10 @@ composite_setup(struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl)
 					cdev->desc.bcdUSB = cpu_to_le16(0x0210);
 				}
 			} else {
-				cdev->desc.bcdUSB = cpu_to_le16(0x0200);
+				if (gadget->lpm_capable)
+					cdev->desc.bcdUSB = cpu_to_le16(0x0201);
+				else
+					cdev->desc.bcdUSB = cpu_to_le16(0x0200);
 			}
 
 			value = min(w_length, (u16) sizeof cdev->desc);
@@ -1639,7 +1642,8 @@ composite_setup(struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl)
 				value = min(w_length, (u16) value);
 			break;
 		case USB_DT_BOS:
-			if (gadget_is_superspeed(gadget)) {
+			if (gadget_is_superspeed(gadget) ||
+			    gadget->lpm_capable) {
 				value = bos_desc(cdev);
 				value = min(w_length, (u16) value);
 			}
diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h
index fbc22a39e7bc..3ee5f2a7c0b4 100644
--- a/include/linux/usb/gadget.h
+++ b/include/linux/usb/gadget.h
@@ -352,6 +352,8 @@ struct usb_gadget_ops {
  * @deactivated: True if gadget is deactivated - in deactivated state it cannot
  *	be connected.
  * @connected: True if gadget is connected.
+ * @lpm_capable: If the gadget max_speed is FULL or HIGH, this flag
+ *	indicates that it supports LPM as per the LPM ECN & errata.
  *
  * Gadgets have a mostly-portable "gadget driver" implementing device
  * functions, handling all usb configurations and interfaces.  Gadget
@@ -404,6 +406,7 @@ struct usb_gadget {
 	unsigned			is_selfpowered:1;
 	unsigned			deactivated:1;
 	unsigned			connected:1;
+	unsigned			lpm_capable:1;
 };
 #define work_to_gadget(w)	(container_of((w), struct usb_gadget, work))
 
-- 
cgit v1.2.3


From 3a5f8997dc643a0e0e9a0895c2214b21e5e774a2 Mon Sep 17 00:00:00 2001
From: Zhang Shengju <zhangshengju@cmss.chinamobile.com>
Date: Thu, 1 Jun 2017 15:37:02 +0800
Subject: team: add macro MODULE_ALIAS_TEAM_MODE for team mode alias

Add a new macro MODULE_ALIAS_TEAM_MODE to unify and simplify the
declaration of team mode alias.

Signed-off-by: Zhang Shengju <zhangshengju@cmss.chinamobile.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/team/team_mode_activebackup.c | 2 +-
 drivers/net/team/team_mode_broadcast.c    | 2 +-
 drivers/net/team/team_mode_loadbalance.c  | 2 +-
 drivers/net/team/team_mode_random.c       | 2 +-
 drivers/net/team/team_mode_roundrobin.c   | 2 +-
 include/linux/if_team.h                   | 2 ++
 6 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/team/team_mode_activebackup.c b/drivers/net/team/team_mode_activebackup.c
index 3f189823ba3b..ddd16a0c1fc1 100644
--- a/drivers/net/team/team_mode_activebackup.c
+++ b/drivers/net/team/team_mode_activebackup.c
@@ -146,4 +146,4 @@ module_exit(ab_cleanup_module);
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Jiri Pirko <jpirko@redhat.com>");
 MODULE_DESCRIPTION("Active-backup mode for team");
-MODULE_ALIAS("team-mode-activebackup");
+MODULE_ALIAS_TEAM_MODE("activebackup");
diff --git a/drivers/net/team/team_mode_broadcast.c b/drivers/net/team/team_mode_broadcast.c
index 302ff35b0cbc..e4eac3de1862 100644
--- a/drivers/net/team/team_mode_broadcast.c
+++ b/drivers/net/team/team_mode_broadcast.c
@@ -75,4 +75,4 @@ module_exit(bc_cleanup_module);
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Jiri Pirko <jpirko@redhat.com>");
 MODULE_DESCRIPTION("Broadcast mode for team");
-MODULE_ALIAS("team-mode-broadcast");
+MODULE_ALIAS_TEAM_MODE("broadcast");
diff --git a/drivers/net/team/team_mode_loadbalance.c b/drivers/net/team/team_mode_loadbalance.c
index b228bea7931f..1468ddf424cc 100644
--- a/drivers/net/team/team_mode_loadbalance.c
+++ b/drivers/net/team/team_mode_loadbalance.c
@@ -695,4 +695,4 @@ module_exit(lb_cleanup_module);
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Jiri Pirko <jpirko@redhat.com>");
 MODULE_DESCRIPTION("Load-balancing mode for team");
-MODULE_ALIAS("team-mode-loadbalance");
+MODULE_ALIAS_TEAM_MODE("loadbalance");
diff --git a/drivers/net/team/team_mode_random.c b/drivers/net/team/team_mode_random.c
index 215f845782db..c20b9446e2e4 100644
--- a/drivers/net/team/team_mode_random.c
+++ b/drivers/net/team/team_mode_random.c
@@ -65,4 +65,4 @@ module_exit(rnd_cleanup_module);
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Jiri Pirko <jiri@resnulli.us>");
 MODULE_DESCRIPTION("Random mode for team");
-MODULE_ALIAS("team-mode-random");
+MODULE_ALIAS_TEAM_MODE("random");
diff --git a/drivers/net/team/team_mode_roundrobin.c b/drivers/net/team/team_mode_roundrobin.c
index 0aa234118c03..66c3209dc4a6 100644
--- a/drivers/net/team/team_mode_roundrobin.c
+++ b/drivers/net/team/team_mode_roundrobin.c
@@ -77,4 +77,4 @@ module_exit(rr_cleanup_module);
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Jiri Pirko <jpirko@redhat.com>");
 MODULE_DESCRIPTION("Round-robin mode for team");
-MODULE_ALIAS("team-mode-roundrobin");
+MODULE_ALIAS_TEAM_MODE("roundrobin");
diff --git a/include/linux/if_team.h b/include/linux/if_team.h
index c05216a8fbac..30294603526f 100644
--- a/include/linux/if_team.h
+++ b/include/linux/if_team.h
@@ -298,4 +298,6 @@ extern void team_mode_unregister(const struct team_mode *mode);
 #define TEAM_DEFAULT_NUM_TX_QUEUES 16
 #define TEAM_DEFAULT_NUM_RX_QUEUES 16
 
+#define MODULE_ALIAS_TEAM_MODE(kind) MODULE_ALIAS("team-mode-" kind)
+
 #endif /* _LINUX_IF_TEAM_H_ */
-- 
cgit v1.2.3


From 3c5da94278026a4583320f97f6547573fb3a93aa Mon Sep 17 00:00:00 2001
From: "Mintz, Yuval" <Yuval.Mintz@cavium.com>
Date: Fri, 2 Jun 2017 08:58:31 +0300
Subject: qed: Share additional information with qedf

Share several new tidbits with qedf:
 - wwpn & wwnn
 - Absolute pf-id [this one is actually meant for qedi as well]
 - Number of available CQs

While we're at it, now that qedf will be aware of the available CQs
we can add some validation on the inputs it provides.

Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_dev.c  |  8 +++++++-
 drivers/net/ethernet/qlogic/qed/qed_fcoe.c | 14 ++++++++++++++
 drivers/net/ethernet/qlogic/qed/qed_main.c |  2 ++
 include/linux/qed/qed_fcoe_if.h            |  5 +++++
 include/linux/qed/qed_if.h                 |  2 ++
 5 files changed, 30 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index 7649f35000db..2d88d4883483 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -2071,16 +2071,22 @@ static void qed_hw_set_feat(struct qed_hwfn *p_hwfn)
 							 QED_VF_L2_QUE));
 	}
 
+	if (p_hwfn->hw_info.personality == QED_PCI_FCOE)
+		feat_num[QED_FCOE_CQ] =  min_t(u32, sb_cnt.cnt,
+					       RESC_NUM(p_hwfn,
+							QED_CMDQS_CQS));
+
 	if (p_hwfn->hw_info.personality == QED_PCI_ISCSI)
 		feat_num[QED_ISCSI_CQ] = min_t(u32, sb_cnt.cnt,
 					       RESC_NUM(p_hwfn,
 							QED_CMDQS_CQS));
 	DP_VERBOSE(p_hwfn,
 		   NETIF_MSG_PROBE,
-		   "#PF_L2_QUEUES=%d VF_L2_QUEUES=%d #ROCE_CNQ=%d ISCSI_CQ=%d #SBS=%d\n",
+		   "#PF_L2_QUEUES=%d VF_L2_QUEUES=%d #ROCE_CNQ=%d FCOE_CQ=%d ISCSI_CQ=%d #SBS=%d\n",
 		   (int)FEAT_NUM(p_hwfn, QED_PF_L2_QUE),
 		   (int)FEAT_NUM(p_hwfn, QED_VF_L2_QUE),
 		   (int)FEAT_NUM(p_hwfn, QED_RDMA_CNQ),
+		   (int)FEAT_NUM(p_hwfn, QED_FCOE_CQ),
 		   (int)FEAT_NUM(p_hwfn, QED_ISCSI_CQ),
 		   (int)sb_cnt.cnt);
 }
diff --git a/drivers/net/ethernet/qlogic/qed/qed_fcoe.c b/drivers/net/ethernet/qlogic/qed/qed_fcoe.c
index 3fc4ff22960e..df195c02b711 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_fcoe.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_fcoe.c
@@ -141,6 +141,15 @@ qed_sp_fcoe_func_start(struct qed_hwfn *p_hwfn,
 	p_data = &p_ramrod->init_ramrod_data;
 	fcoe_pf_params = &p_hwfn->pf_params.fcoe_pf_params;
 
+	/* Sanity */
+	if (fcoe_pf_params->num_cqs > p_hwfn->hw_info.feat_num[QED_FCOE_CQ]) {
+		DP_ERR(p_hwfn,
+		       "Cannot satisfy CQ amount. CQs requested %d, CQs available %d. Aborting function start\n",
+		       fcoe_pf_params->num_cqs,
+		       p_hwfn->hw_info.feat_num[QED_FCOE_CQ]);
+		return -EINVAL;
+	}
+
 	p_data->mtu = cpu_to_le16(fcoe_pf_params->mtu);
 	tmp = cpu_to_le16(fcoe_pf_params->sq_num_pbl_pages);
 	p_data->sq_num_pages_in_pbl = tmp;
@@ -739,6 +748,11 @@ static int qed_fill_fcoe_dev_info(struct qed_dev *cdev,
 	info->secondary_bdq_rq_addr =
 	    qed_fcoe_get_secondary_bdq_prod(hwfn, BDQ_ID_RQ);
 
+	info->wwpn = hwfn->mcp_info->func_info.wwn_port;
+	info->wwnn = hwfn->mcp_info->func_info.wwn_node;
+
+	info->num_cqs = FEAT_NUM(hwfn, QED_FCOE_CQ);
+
 	return rc;
 }
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index ac3bdcd9f0b6..baebd5926895 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -269,6 +269,8 @@ int qed_fill_dev_info(struct qed_dev *cdev,
 		if (QED_LEADING_HWFN(cdev)->hw_info.b_wol_support ==
 		    QED_WOL_SUPPORT_PME)
 			dev_info->wol_support = true;
+
+		dev_info->abs_pf_id = QED_LEADING_HWFN(cdev)->abs_pf_id;
 	} else {
 		qed_vf_get_fw_version(&cdev->hwfns[0], &dev_info->fw_major,
 				      &dev_info->fw_minor, &dev_info->fw_rev,
diff --git a/include/linux/qed/qed_fcoe_if.h b/include/linux/qed/qed_fcoe_if.h
index bd6bcb809415..1e015c50e6b8 100644
--- a/include/linux/qed/qed_fcoe_if.h
+++ b/include/linux/qed/qed_fcoe_if.h
@@ -24,6 +24,11 @@ struct qed_dev_fcoe_info {
 
 	void __iomem *primary_dbq_rq_addr;
 	void __iomem *secondary_bdq_rq_addr;
+
+	u64 wwpn;
+	u64 wwnn;
+
+	u8 num_cqs;
 };
 
 struct qed_fcoe_params_offload {
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index 607e1c5e185a..e29c6f74a4d4 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -360,6 +360,8 @@ struct qed_dev_info {
 	bool		vxlan_enable;
 	bool		gre_enable;
 	bool		geneve_enable;
+
+	u8		abs_pf_id;
 };
 
 enum qed_sb_type {
-- 
cgit v1.2.3


From 20675b37ee76d11430fd3d4da0851fc6a4e36abc Mon Sep 17 00:00:00 2001
From: "Mintz, Yuval" <Yuval.Mintz@cavium.com>
Date: Fri, 2 Jun 2017 08:58:32 +0300
Subject: qed: Support NVM-image reading API

Storage drivers require images from the nvram in boot-from-SAN
scenarios. This provides the necessary API between qed and the
protocol drivers to perform such reads.

Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_main.c | 16 ++++++
 drivers/net/ethernet/qlogic/qed/qed_mcp.c  | 89 ++++++++++++++++++++++++++++++
 drivers/net/ethernet/qlogic/qed/qed_mcp.h  | 21 +++++++
 include/linux/qed/qed_if.h                 | 18 ++++++
 4 files changed, 144 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index baebd5926895..6ac10ce14e5b 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -1535,6 +1535,21 @@ static int qed_drain(struct qed_dev *cdev)
 	return 0;
 }
 
+static int qed_nvm_get_image(struct qed_dev *cdev, enum qed_nvm_images type,
+			     u8 *buf, u16 len)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_ptt *ptt = qed_ptt_acquire(hwfn);
+	int rc;
+
+	if (!ptt)
+		return -EAGAIN;
+
+	rc = qed_mcp_get_nvm_image(hwfn, ptt, type, buf, len);
+	qed_ptt_release(hwfn, ptt);
+	return rc;
+}
+
 static void qed_get_coalesce(struct qed_dev *cdev, u16 *rx_coal, u16 *tx_coal)
 {
 	*rx_coal = cdev->rx_coalesce_usecs;
@@ -1712,6 +1727,7 @@ const struct qed_common_ops qed_common_ops_pass = {
 	.dbg_all_data_size = &qed_dbg_all_data_size,
 	.chain_alloc = &qed_chain_alloc,
 	.chain_free = &qed_chain_free,
+	.nvm_get_image = &qed_nvm_get_image,
 	.get_coalesce = &qed_get_coalesce,
 	.set_coalesce = &qed_set_coalesce,
 	.set_led = &qed_set_led,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index e82f32950361..9da91045d167 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -2310,6 +2310,95 @@ int qed_mcp_bist_nvm_test_get_image_att(struct qed_hwfn *p_hwfn,
 	return rc;
 }
 
+static int
+qed_mcp_get_nvm_image_att(struct qed_hwfn *p_hwfn,
+			  struct qed_ptt *p_ptt,
+			  enum qed_nvm_images image_id,
+			  struct qed_nvm_image_att *p_image_att)
+{
+	struct bist_nvm_image_att mfw_image_att;
+	enum nvm_image_type type;
+	u32 num_images, i;
+	int rc;
+
+	/* Translate image_id into MFW definitions */
+	switch (image_id) {
+	case QED_NVM_IMAGE_ISCSI_CFG:
+		type = NVM_TYPE_ISCSI_CFG;
+		break;
+	case QED_NVM_IMAGE_FCOE_CFG:
+		type = NVM_TYPE_FCOE_CFG;
+		break;
+	default:
+		DP_NOTICE(p_hwfn, "Unknown request of image_id %08x\n",
+			  image_id);
+		return -EINVAL;
+	}
+
+	/* Learn number of images, then traverse and see if one fits */
+	rc = qed_mcp_bist_nvm_test_get_num_images(p_hwfn, p_ptt, &num_images);
+	if (rc || !num_images)
+		return -EINVAL;
+
+	for (i = 0; i < num_images; i++) {
+		rc = qed_mcp_bist_nvm_test_get_image_att(p_hwfn, p_ptt,
+							 &mfw_image_att, i);
+		if (rc)
+			return rc;
+
+		if (type == mfw_image_att.image_type)
+			break;
+	}
+	if (i == num_images) {
+		DP_VERBOSE(p_hwfn, QED_MSG_STORAGE,
+			   "Failed to find nvram image of type %08x\n",
+			   image_id);
+		return -EINVAL;
+	}
+
+	p_image_att->start_addr = mfw_image_att.nvm_start_addr;
+	p_image_att->length = mfw_image_att.len;
+
+	return 0;
+}
+
+int qed_mcp_get_nvm_image(struct qed_hwfn *p_hwfn,
+			  struct qed_ptt *p_ptt,
+			  enum qed_nvm_images image_id,
+			  u8 *p_buffer, u32 buffer_len)
+{
+	struct qed_nvm_image_att image_att;
+	int rc;
+
+	memset(p_buffer, 0, buffer_len);
+
+	rc = qed_mcp_get_nvm_image_att(p_hwfn, p_ptt, image_id, &image_att);
+	if (rc)
+		return rc;
+
+	/* Validate sizes - both the image's and the supplied buffer's */
+	if (image_att.length <= 4) {
+		DP_VERBOSE(p_hwfn, QED_MSG_STORAGE,
+			   "Image [%d] is too small - only %d bytes\n",
+			   image_id, image_att.length);
+		return -EINVAL;
+	}
+
+	/* Each NVM image is suffixed by CRC; Upper-layer has no need for it */
+	image_att.length -= 4;
+
+	if (image_att.length > buffer_len) {
+		DP_VERBOSE(p_hwfn,
+			   QED_MSG_STORAGE,
+			   "Image [%d] is too big - %08x bytes where only %08x are available\n",
+			   image_id, image_att.length, buffer_len);
+		return -ENOMEM;
+	}
+
+	return qed_mcp_nvm_read(p_hwfn->cdev, image_att.start_addr,
+				p_buffer, image_att.length);
+}
+
 static enum resource_id_enum qed_mcp_get_mfw_res_id(enum qed_resources res_id)
 {
 	enum resource_id_enum mfw_res_id = RESOURCE_NUM_INVALID;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
index 40247593e772..af03b3651411 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
@@ -430,6 +430,27 @@ int qed_mcp_set_led(struct qed_hwfn *p_hwfn,
  */
 int qed_mcp_nvm_read(struct qed_dev *cdev, u32 addr, u8 *p_buf, u32 len);
 
+struct qed_nvm_image_att {
+	u32 start_addr;
+	u32 length;
+};
+
+/**
+ * @brief Allows reading a whole nvram image
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ * @param image_id - image requested for reading
+ * @param p_buffer - allocated buffer into which to fill data
+ * @param buffer_len - length of the allocated buffer.
+ *
+ * @return 0 iff p_buffer now contains the nvram image.
+ */
+int qed_mcp_get_nvm_image(struct qed_hwfn *p_hwfn,
+			  struct qed_ptt *p_ptt,
+			  enum qed_nvm_images image_id,
+			  u8 *p_buffer, u32 buffer_len);
+
 /**
  * @brief Bist register test
  *
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index e29c6f74a4d4..567ea3ea6c0e 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -156,6 +156,11 @@ struct qed_dcbx_get {
 	struct qed_dcbx_admin_params local;
 };
 
+enum qed_nvm_images {
+	QED_NVM_IMAGE_ISCSI_CFG,
+	QED_NVM_IMAGE_FCOE_CFG,
+};
+
 enum qed_led_mode {
 	QED_LED_MODE_OFF,
 	QED_LED_MODE_ON,
@@ -630,6 +635,19 @@ struct qed_common_ops {
 	void		(*chain_free)(struct qed_dev *cdev,
 				      struct qed_chain *p_chain);
 
+/**
+ * @brief nvm_get_image - reads an entire image from nvram
+ *
+ * @param cdev
+ * @param type - type of the request nvram image
+ * @param buf - preallocated buffer to fill with the image
+ * @param len - length of the allocated buffer
+ *
+ * @return 0 on success, error otherwise
+ */
+	int (*nvm_get_image)(struct qed_dev *cdev,
+			     enum qed_nvm_images type, u8 *buf, u16 len);
+
 /**
  * @brief get_coalesce - Get coalesce parameters in usec
  *
-- 
cgit v1.2.3


From dc4528e9e890f82900d75ac6276aba8ce89a80b6 Mon Sep 17 00:00:00 2001
From: "Mintz, Yuval" <Yuval.Mintz@cavium.com>
Date: Fri, 2 Jun 2017 08:58:33 +0300
Subject: qed: Add support for changing iSCSI mac

Enhance API between qedi and qed, allowing qedi to inform device's
firmware when the iSCSI mac is to be changed.

Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_iscsi.c | 66 +++++++++++++++++++++++++++++
 drivers/net/ethernet/qlogic/qed/qed_sp.h    |  1 +
 include/linux/qed/qed_iscsi_if.h            |  7 +++
 3 files changed, 74 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
index bc8ce09d390f..6103723d7118 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
@@ -488,6 +488,54 @@ static int qed_sp_iscsi_conn_update(struct qed_hwfn *p_hwfn,
 	return qed_spq_post(p_hwfn, p_ent, NULL);
 }
 
+static int
+qed_sp_iscsi_mac_update(struct qed_hwfn *p_hwfn,
+			struct qed_iscsi_conn *p_conn,
+			enum spq_mode comp_mode,
+			struct qed_spq_comp_cb *p_comp_addr)
+{
+	struct iscsi_spe_conn_mac_update *p_ramrod = NULL;
+	struct qed_spq_entry *p_ent = NULL;
+	struct qed_sp_init_data init_data;
+	int rc = -EINVAL;
+	u8 ucval;
+
+	/* Get SPQ entry */
+	memset(&init_data, 0, sizeof(init_data));
+	init_data.cid = p_conn->icid;
+	init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+	init_data.comp_mode = comp_mode;
+	init_data.p_comp_data = p_comp_addr;
+
+	rc = qed_sp_init_request(p_hwfn, &p_ent,
+				 ISCSI_RAMROD_CMD_ID_MAC_UPDATE,
+				 PROTOCOLID_ISCSI, &init_data);
+	if (rc)
+		return rc;
+
+	p_ramrod = &p_ent->ramrod.iscsi_conn_mac_update;
+	p_ramrod->hdr.op_code = ISCSI_RAMROD_CMD_ID_MAC_UPDATE;
+	SET_FIELD(p_ramrod->hdr.flags,
+		  ISCSI_SLOW_PATH_HDR_LAYER_CODE, p_conn->layer_code);
+
+	p_ramrod->conn_id = cpu_to_le16(p_conn->conn_id);
+	p_ramrod->fw_cid = cpu_to_le32(p_conn->icid);
+	ucval = p_conn->remote_mac[1];
+	((u8 *)(&p_ramrod->remote_mac_addr_hi))[0] = ucval;
+	ucval = p_conn->remote_mac[0];
+	((u8 *)(&p_ramrod->remote_mac_addr_hi))[1] = ucval;
+	ucval = p_conn->remote_mac[3];
+	((u8 *)(&p_ramrod->remote_mac_addr_mid))[0] = ucval;
+	ucval = p_conn->remote_mac[2];
+	((u8 *)(&p_ramrod->remote_mac_addr_mid))[1] = ucval;
+	ucval = p_conn->remote_mac[5];
+	((u8 *)(&p_ramrod->remote_mac_addr_lo))[0] = ucval;
+	ucval = p_conn->remote_mac[4];
+	((u8 *)(&p_ramrod->remote_mac_addr_lo))[1] = ucval;
+
+	return qed_spq_post(p_hwfn, p_ent, NULL);
+}
+
 static int qed_sp_iscsi_conn_terminate(struct qed_hwfn *p_hwfn,
 				       struct qed_iscsi_conn *p_conn,
 				       enum spq_mode comp_mode,
@@ -1324,6 +1372,23 @@ static int qed_iscsi_stats(struct qed_dev *cdev, struct qed_iscsi_stats *stats)
 	return qed_iscsi_get_stats(QED_LEADING_HWFN(cdev), stats);
 }
 
+static int qed_iscsi_change_mac(struct qed_dev *cdev,
+				u32 handle, const u8 *mac)
+{
+	struct qed_hash_iscsi_con *hash_con;
+
+	hash_con = qed_iscsi_get_hash(cdev, handle);
+	if (!hash_con) {
+		DP_NOTICE(cdev, "Failed to find connection for handle %d\n",
+			  handle);
+		return -EINVAL;
+	}
+
+	return qed_sp_iscsi_mac_update(QED_LEADING_HWFN(cdev),
+				       hash_con->con,
+				       QED_SPQ_MODE_EBLOCK, NULL);
+}
+
 void qed_get_protocol_stats_iscsi(struct qed_dev *cdev,
 				  struct qed_mcp_iscsi_stats *stats)
 {
@@ -1358,6 +1423,7 @@ static const struct qed_iscsi_ops qed_iscsi_ops_pass = {
 	.destroy_conn = &qed_iscsi_destroy_conn,
 	.clear_sq = &qed_iscsi_clear_conn_sq,
 	.get_stats = &qed_iscsi_stats,
+	.change_mac = &qed_iscsi_change_mac,
 };
 
 const struct qed_iscsi_ops *qed_get_iscsi_ops(void)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp.h b/drivers/net/ethernet/qlogic/qed/qed_sp.h
index b9464f3ab0e2..00dd50f8c42f 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_sp.h
@@ -120,6 +120,7 @@ union ramrod_data {
 	struct iscsi_spe_func_dstry iscsi_destroy;
 	struct iscsi_spe_conn_offload iscsi_conn_offload;
 	struct iscsi_conn_update_ramrod_params iscsi_conn_update;
+	struct iscsi_spe_conn_mac_update iscsi_conn_mac_update;
 	struct iscsi_spe_conn_termination iscsi_conn_terminate;
 
 	struct vf_start_ramrod_data vf_start;
diff --git a/include/linux/qed/qed_iscsi_if.h b/include/linux/qed/qed_iscsi_if.h
index 3414649133d2..111e606a74c8 100644
--- a/include/linux/qed/qed_iscsi_if.h
+++ b/include/linux/qed/qed_iscsi_if.h
@@ -210,6 +210,11 @@ struct qed_iscsi_cb_ops {
  *			@param stats - pointer to struck that would be filled
  *				we stats
  *			@return 0 on success, error otherwise.
+ * @change_mac		Change MAC of interface
+ *			@param cdev
+ *			@param handle - the connection handle.
+ *			@param mac - new MAC to configure.
+ *			@return 0 on success, otherwise error value.
  */
 struct qed_iscsi_ops {
 	const struct qed_common_ops *common;
@@ -248,6 +253,8 @@ struct qed_iscsi_ops {
 
 	int (*get_stats)(struct qed_dev *cdev,
 			 struct qed_iscsi_stats *stats);
+
+	int (*change_mac)(struct qed_dev *cdev, u32 handle, const u8 *mac);
 };
 
 const struct qed_iscsi_ops *qed_get_iscsi_ops(void);
-- 
cgit v1.2.3


From 428c9de583921c4b699622272c04af4e362c474c Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Fri, 28 Apr 2017 15:08:53 +0200
Subject: clk: Provide dummy of_clk_get_from_provider() for compile-testing

When CONFIG_ON=n, dummies are provided for of_clk_get() and
of_clk_get_by_name(), but not for of_clk_get_from_provider().

Provide a dummy for the latter, to improve the ability to do
compile-testing.  This requires removing the existing dummy in the
Lantiq clock code.

Fixes: 766e6a4ec602d0c1 ("clk: add DT clock binding support")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Thomas Langer <thomas.langer@intel.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 arch/mips/lantiq/clk.c | 5 -----
 include/linux/clk.h    | 4 ++++
 2 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/arch/mips/lantiq/clk.c b/arch/mips/lantiq/clk.c
index 149f0513c4f5..a263d1b751ff 100644
--- a/arch/mips/lantiq/clk.c
+++ b/arch/mips/lantiq/clk.c
@@ -160,11 +160,6 @@ void clk_deactivate(struct clk *clk)
 }
 EXPORT_SYMBOL(clk_deactivate);
 
-struct clk *of_clk_get_from_provider(struct of_phandle_args *clkspec)
-{
-	return NULL;
-}
-
 static inline u32 get_counter_resolution(void)
 {
 	u32 res;
diff --git a/include/linux/clk.h b/include/linux/clk.h
index 024cd07870d0..e790d19b8ffb 100644
--- a/include/linux/clk.h
+++ b/include/linux/clk.h
@@ -539,6 +539,10 @@ static inline struct clk *of_clk_get_by_name(struct device_node *np,
 {
 	return ERR_PTR(-ENOENT);
 }
+static inline struct clk *of_clk_get_from_provider(struct of_phandle_args *clkspec)
+{
+	return ERR_PTR(-ENOENT);
+}
 #endif
 
 #endif
-- 
cgit v1.2.3


From d2f31c49cf7cfe8f02b70614ae56a39b0c1d8a75 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Thu, 18 May 2017 23:11:39 +0200
Subject: i2c: sh_mobile: remove platform_data

No platform currently upstream makes use of this platform_data anymore.
The ones that did are converted to DT meanwhile. So, remove it. The old
platforms likely don't have the 'clks_per_cnt' feature, otherwise it
would have been implemented by now. And in the unlikely case they need
to setup a different bus speed, we should rather go for a generic i2c
platform data just for that.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-sh_mobile.c |  8 --------
 include/linux/i2c/i2c-sh_mobile.h  | 11 -----------
 2 files changed, 19 deletions(-)
 delete mode 100644 include/linux/i2c/i2c-sh_mobile.h

(limited to 'include/linux')

diff --git a/drivers/i2c/busses/i2c-sh_mobile.c b/drivers/i2c/busses/i2c-sh_mobile.c
index 3d7559348745..d5e39eccae9b 100644
--- a/drivers/i2c/busses/i2c-sh_mobile.c
+++ b/drivers/i2c/busses/i2c-sh_mobile.c
@@ -24,7 +24,6 @@
 #include <linux/dma-mapping.h>
 #include <linux/err.h>
 #include <linux/i2c.h>
-#include <linux/i2c/i2c-sh_mobile.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
@@ -879,7 +878,6 @@ static int sh_mobile_i2c_hook_irqs(struct platform_device *dev, struct sh_mobile
 
 static int sh_mobile_i2c_probe(struct platform_device *dev)
 {
-	struct i2c_sh_mobile_platform_data *pdata = dev_get_platdata(&dev->dev);
 	struct sh_mobile_i2c_data *pd;
 	struct i2c_adapter *adap;
 	struct resource *res;
@@ -910,7 +908,6 @@ static int sh_mobile_i2c_probe(struct platform_device *dev)
 	if (IS_ERR(pd->reg))
 		return PTR_ERR(pd->reg);
 
-	/* Use platform data bus speed or STANDARD_MODE */
 	ret = of_property_read_u32(dev->dev.of_node, "clock-frequency", &bus_speed);
 	pd->bus_speed = ret ? STANDARD_MODE : bus_speed;
 
@@ -929,11 +926,6 @@ static int sh_mobile_i2c_probe(struct platform_device *dev)
 			if (config->setup)
 				config->setup(pd);
 		}
-	} else {
-		if (pdata && pdata->bus_speed)
-			pd->bus_speed = pdata->bus_speed;
-		if (pdata && pdata->clks_per_count)
-			pd->clks_per_count = pdata->clks_per_count;
 	}
 
 	/* The IIC blocks on SH-Mobile ARM processors
diff --git a/include/linux/i2c/i2c-sh_mobile.h b/include/linux/i2c/i2c-sh_mobile.h
deleted file mode 100644
index 06e3089795fb..000000000000
--- a/include/linux/i2c/i2c-sh_mobile.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef __I2C_SH_MOBILE_H__
-#define __I2C_SH_MOBILE_H__
-
-#include <linux/platform_device.h>
-
-struct i2c_sh_mobile_platform_data {
-	unsigned long bus_speed;
-	unsigned int clks_per_count;
-};
-
-#endif /* __I2C_SH_MOBILE_H__ */
-- 
cgit v1.2.3


From 1bde33e051233f0ed93a8bc67137016ab38c3d2d Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Fri, 2 Jun 2017 14:46:13 -0700
Subject: include/linux/gfp.h: fix ___GFP_NOLOCKDEP value

Igor Stoppa has noticed that __GFP_NOLOCKDEP can use a lower bit.  At
the time commit 7e7844226f10 ("lockdep: allow to disable reclaim lockup
detection") was written we still had __GFP_OTHER_NODE but I have removed
it in commit 41b6167e8f74 ("mm: get rid of __GFP_OTHER_NODE") and forgot
to lower the bit value.

The current value is outside of __GFP_BITS_SHIFT so it cannot be used
actually.

Fixes: 7e7844226f10 ("lockdep: allow to disable reclaim lockup detection")
Signed-off-by: Michal Hocko <mhocko@suse.com>
Reported-by: Igor Stoppa <igor.stoppa@nokia.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 2b1a44f5bdb6..a89d37e8b387 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -41,7 +41,7 @@ struct vm_area_struct;
 #define ___GFP_WRITE		0x800000u
 #define ___GFP_KSWAPD_RECLAIM	0x1000000u
 #ifdef CONFIG_LOCKDEP
-#define ___GFP_NOLOCKDEP	0x4000000u
+#define ___GFP_NOLOCKDEP	0x2000000u
 #else
 #define ___GFP_NOLOCKDEP	0
 #endif
-- 
cgit v1.2.3


From 60b0a8c3d2480f3b57282b47b7cae7ee71c48635 Mon Sep 17 00:00:00 2001
From: Matthias Kaehlcke <mka@chromium.org>
Date: Fri, 2 Jun 2017 14:46:16 -0700
Subject: frv: declare jiffies to be located in the .data section

Commit 7c30f352c852 ("jiffies.h: declare jiffies and jiffies_64 with
____cacheline_aligned_in_smp") removed a section specification from the
jiffies declaration that caused conflicts on some platforms.

Unfortunately this change broke the build for frv:

  kernel/built-in.o: In function `__do_softirq': (.text+0x6460): relocation truncated to fit: R_FRV_GPREL12 against symbol
      `jiffies' defined in *ABS* section in .tmp_vmlinux1
  kernel/built-in.o: In function `__do_softirq': (.text+0x6574): relocation truncated to fit: R_FRV_GPREL12 against symbol
      `jiffies' defined in *ABS* section in .tmp_vmlinux1
  kernel/built-in.o: In function `pwq_activate_delayed_work': workqueue.c:(.text+0x15b9c): relocation truncated to fit: R_FRV_GPREL12 against
      symbol `jiffies' defined in *ABS* section in .tmp_vmlinux1
  ...

Add __jiffy_arch_data to the declaration of jiffies and use it on frv to
include the section specification.  For all other platforms
__jiffy_arch_data (currently) has no effect.

Fixes: 7c30f352c852 ("jiffies.h: declare jiffies and jiffies_64 with ____cacheline_aligned_in_smp")
Link: http://lkml.kernel.org/r/20170516221333.177280-1-mka@chromium.org
Signed-off-by: Matthias Kaehlcke <mka@chromium.org>
Reported-by: Guenter Roeck <linux@roeck-us.net>
Tested-by: Guenter Roeck <linux@roeck-us.net>
Reviewed-by: David Howells <dhowells@redhat.com>
Cc: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/frv/include/asm/timex.h | 6 ++++++
 include/linux/jiffies.h      | 6 +++++-
 2 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/arch/frv/include/asm/timex.h b/arch/frv/include/asm/timex.h
index a89bddefdacf..139093fab326 100644
--- a/arch/frv/include/asm/timex.h
+++ b/arch/frv/include/asm/timex.h
@@ -16,5 +16,11 @@ static inline cycles_t get_cycles(void)
 #define vxtime_lock()		do {} while (0)
 #define vxtime_unlock()		do {} while (0)
 
+/* This attribute is used in include/linux/jiffies.h alongside with
+ * __cacheline_aligned_in_smp. It is assumed that __cacheline_aligned_in_smp
+ * for frv does not contain another section specification.
+ */
+#define __jiffy_arch_data	__attribute__((__section__(".data")))
+
 #endif
 
diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index 36872fbb815d..734377ad42e9 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -64,13 +64,17 @@ extern int register_refined_jiffies(long clock_tick_rate);
 /* TICK_USEC is the time between ticks in usec assuming fake USER_HZ */
 #define TICK_USEC ((1000000UL + USER_HZ/2) / USER_HZ)
 
+#ifndef __jiffy_arch_data
+#define __jiffy_arch_data
+#endif
+
 /*
  * The 64-bit value is not atomic - you MUST NOT read it
  * without sampling the sequence number in jiffies_lock.
  * get_jiffies_64() will do this for you as appropriate.
  */
 extern u64 __cacheline_aligned_in_smp jiffies_64;
-extern unsigned long volatile __cacheline_aligned_in_smp jiffies;
+extern unsigned long volatile __cacheline_aligned_in_smp __jiffy_arch_data jiffies;
 
 #if (BITS_PER_LONG < 64)
 u64 get_jiffies_64(void);
-- 
cgit v1.2.3


From 9a291a7c9428155e8e623e4a3989f8be47134df5 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Fri, 2 Jun 2017 14:46:46 -0700
Subject: mm/hugetlb: report -EHWPOISON not -EFAULT when FOLL_HWPOISON is
 specified

KVM uses get_user_pages() to resolve its stage2 faults.  KVM sets the
FOLL_HWPOISON flag causing faultin_page() to return -EHWPOISON when it
finds a VM_FAULT_HWPOISON.  KVM handles these hwpoison pages as a
special case.  (check_user_page_hwpoison())

When huge pages are involved, this doesn't work so well.
get_user_pages() calls follow_hugetlb_page(), which stops early if it
receives VM_FAULT_HWPOISON from hugetlb_fault(), eventually returning
-EFAULT to the caller.  The step to map this to -EHWPOISON based on the
FOLL_ flags is missing.  The hwpoison special case is skipped, and
-EFAULT is returned to user-space, causing Qemu or kvmtool to exit.

Instead, move this VM_FAULT_ to errno mapping code into a header file
and use it from faultin_page() and follow_hugetlb_page().

With this, KVM works as expected.

This isn't a problem for arm64 today as we haven't enabled
MEMORY_FAILURE, but I can't see any reason this doesn't happen on x86
too, so I think this should be a fix.  This doesn't apply earlier than
stable's v4.11.1 due to all sorts of cleanup.

[james.morse@arm.com: add vm_fault_to_errno() call to faultin_page()]
suggested.
  Link: http://lkml.kernel.org/r/20170525171035.16359-1-james.morse@arm.com
[akpm@linux-foundation.org: coding-style fixes]
Link: http://lkml.kernel.org/r/20170524160900.28786-1-james.morse@arm.com
Signed-off-by: James Morse <james.morse@arm.com>
Acked-by: Punit Agrawal <punit.agrawal@arm.com>
Acked-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: "Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>
Cc: <stable@vger.kernel.org>	[4.11.1+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 11 +++++++++++
 mm/gup.c           | 20 ++++++++------------
 mm/hugetlb.c       |  5 +++++
 3 files changed, 24 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7cb17c6b97de..b892e95d4929 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2327,6 +2327,17 @@ static inline struct page *follow_page(struct vm_area_struct *vma,
 #define FOLL_REMOTE	0x2000	/* we are working on non-current tsk/mm */
 #define FOLL_COW	0x4000	/* internal GUP flag */
 
+static inline int vm_fault_to_errno(int vm_fault, int foll_flags)
+{
+	if (vm_fault & VM_FAULT_OOM)
+		return -ENOMEM;
+	if (vm_fault & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE))
+		return (foll_flags & FOLL_HWPOISON) ? -EHWPOISON : -EFAULT;
+	if (vm_fault & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV))
+		return -EFAULT;
+	return 0;
+}
+
 typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
 			void *data);
 extern int apply_to_page_range(struct mm_struct *mm, unsigned long address,
diff --git a/mm/gup.c b/mm/gup.c
index d9e6fddcc51f..b3c7214d710d 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -407,12 +407,10 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
 
 	ret = handle_mm_fault(vma, address, fault_flags);
 	if (ret & VM_FAULT_ERROR) {
-		if (ret & VM_FAULT_OOM)
-			return -ENOMEM;
-		if (ret & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE))
-			return *flags & FOLL_HWPOISON ? -EHWPOISON : -EFAULT;
-		if (ret & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV))
-			return -EFAULT;
+		int err = vm_fault_to_errno(ret, *flags);
+
+		if (err)
+			return err;
 		BUG();
 	}
 
@@ -723,12 +721,10 @@ retry:
 	ret = handle_mm_fault(vma, address, fault_flags);
 	major |= ret & VM_FAULT_MAJOR;
 	if (ret & VM_FAULT_ERROR) {
-		if (ret & VM_FAULT_OOM)
-			return -ENOMEM;
-		if (ret & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE))
-			return -EHWPOISON;
-		if (ret & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV))
-			return -EFAULT;
+		int err = vm_fault_to_errno(ret, 0);
+
+		if (err)
+			return err;
 		BUG();
 	}
 
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index e5828875f7bb..3eedb187e549 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4170,6 +4170,11 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 			}
 			ret = hugetlb_fault(mm, vma, vaddr, fault_flags);
 			if (ret & VM_FAULT_ERROR) {
+				int err = vm_fault_to_errno(ret, flags);
+
+				if (err)
+					return err;
+
 				remainder = 0;
 				break;
 			}
-- 
cgit v1.2.3


From 864b9a393dcb5aed09b8fd31b9bbda0fdda99374 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Fri, 2 Jun 2017 14:46:49 -0700
Subject: mm: consider memblock reservations for deferred memory initialization
 sizing

We have seen an early OOM killer invocation on ppc64 systems with
crashkernel=4096M:

	kthreadd invoked oom-killer: gfp_mask=0x16040c0(GFP_KERNEL|__GFP_COMP|__GFP_NOTRACK), nodemask=7, order=0, oom_score_adj=0
	kthreadd cpuset=/ mems_allowed=7
	CPU: 0 PID: 2 Comm: kthreadd Not tainted 4.4.68-1.gd7fe927-default #1
	Call Trace:
	  dump_stack+0xb0/0xf0 (unreliable)
	  dump_header+0xb0/0x258
	  out_of_memory+0x5f0/0x640
	  __alloc_pages_nodemask+0xa8c/0xc80
	  kmem_getpages+0x84/0x1a0
	  fallback_alloc+0x2a4/0x320
	  kmem_cache_alloc_node+0xc0/0x2e0
	  copy_process.isra.25+0x260/0x1b30
	  _do_fork+0x94/0x470
	  kernel_thread+0x48/0x60
	  kthreadd+0x264/0x330
	  ret_from_kernel_thread+0x5c/0xa4

	Mem-Info:
	active_anon:0 inactive_anon:0 isolated_anon:0
	 active_file:0 inactive_file:0 isolated_file:0
	 unevictable:0 dirty:0 writeback:0 unstable:0
	 slab_reclaimable:5 slab_unreclaimable:73
	 mapped:0 shmem:0 pagetables:0 bounce:0
	 free:0 free_pcp:0 free_cma:0
	Node 7 DMA free:0kB min:0kB low:0kB high:0kB active_anon:0kB inactive_anon:0kB active_file:0kB inactive_file:0kB unevictable:0kB isolated(anon):0kB isolated(file):0kB present:52428800kB managed:110016kB mlocked:0kB dirty:0kB writeback:0kB mapped:0kB shmem:0kB slab_reclaimable:320kB slab_unreclaimable:4672kB kernel_stack:1152kB pagetables:0kB unstable:0kB bounce:0kB free_pcp:0kB local_pcp:0kB free_cma:0kB writeback_tmp:0kB pages_scanned:0 all_unreclaimable? yes
	lowmem_reserve[]: 0 0 0 0
	Node 7 DMA: 0*64kB 0*128kB 0*256kB 0*512kB 0*1024kB 0*2048kB 0*4096kB 0*8192kB 0*16384kB = 0kB
	0 total pagecache pages
	0 pages in swap cache
	Swap cache stats: add 0, delete 0, find 0/0
	Free swap  = 0kB
	Total swap = 0kB
	819200 pages RAM
	0 pages HighMem/MovableOnly
	817481 pages reserved
	0 pages cma reserved
	0 pages hwpoisoned

the reason is that the managed memory is too low (only 110MB) while the
rest of the the 50GB is still waiting for the deferred intialization to
be done.  update_defer_init estimates the initial memoty to initialize
to 2GB at least but it doesn't consider any memory allocated in that
range.  In this particular case we've had

	Reserving 4096MB of memory at 128MB for crashkernel (System RAM: 51200MB)

so the low 2GB is mostly depleted.

Fix this by considering memblock allocations in the initial static
initialization estimation.  Move the max_initialise to
reset_deferred_meminit and implement a simple memblock_reserved_memory
helper which iterates all reserved blocks and sums the size of all that
start below the given address.  The cumulative size is than added on top
of the initial estimation.  This is still not ideal because
reset_deferred_meminit doesn't consider holes and so reservation might
be above the initial estimation whihch we ignore but let's make the
logic simpler until we really need to handle more complicated cases.

Fixes: 3a80a7fa7989 ("mm: meminit: initialise a subset of struct pages if CONFIG_DEFERRED_STRUCT_PAGE_INIT is set")
Link: http://lkml.kernel.org/r/20170531104010.GI27783@dhcp22.suse.cz
Signed-off-by: Michal Hocko <mhocko@suse.com>
Acked-by: Mel Gorman <mgorman@suse.de>
Tested-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: <stable@vger.kernel.org>	[4.2+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memblock.h |  8 ++++++++
 include/linux/mmzone.h   |  1 +
 mm/memblock.c            | 23 +++++++++++++++++++++++
 mm/page_alloc.c          | 33 ++++++++++++++++++++++-----------
 4 files changed, 54 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 4ce24a376262..8098695e5d8d 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -425,12 +425,20 @@ static inline void early_memtest(phys_addr_t start, phys_addr_t end)
 }
 #endif
 
+extern unsigned long memblock_reserved_memory_within(phys_addr_t start_addr,
+		phys_addr_t end_addr);
 #else
 static inline phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align)
 {
 	return 0;
 }
 
+static inline unsigned long memblock_reserved_memory_within(phys_addr_t start_addr,
+		phys_addr_t end_addr)
+{
+	return 0;
+}
+
 #endif /* CONFIG_HAVE_MEMBLOCK */
 
 #endif /* __KERNEL__ */
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index ebaccd4e7d8c..ef6a13b7bd3e 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -678,6 +678,7 @@ typedef struct pglist_data {
 	 * is the first PFN that needs to be initialised.
 	 */
 	unsigned long first_deferred_pfn;
+	unsigned long static_init_size;
 #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
diff --git a/mm/memblock.c b/mm/memblock.c
index b049c9b2dba8..7b8a5db76a2f 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1739,6 +1739,29 @@ static void __init_memblock memblock_dump(struct memblock_type *type)
 	}
 }
 
+extern unsigned long __init_memblock
+memblock_reserved_memory_within(phys_addr_t start_addr, phys_addr_t end_addr)
+{
+	struct memblock_region *rgn;
+	unsigned long size = 0;
+	int idx;
+
+	for_each_memblock_type((&memblock.reserved), rgn) {
+		phys_addr_t start, end;
+
+		if (rgn->base + rgn->size < start_addr)
+			continue;
+		if (rgn->base > end_addr)
+			continue;
+
+		start = rgn->base;
+		end = start + rgn->size;
+		size += end - start;
+	}
+
+	return size;
+}
+
 void __init_memblock __memblock_dump_all(void)
 {
 	pr_info("MEMBLOCK configuration:\n");
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index b7a6f583a373..2302f250d6b1 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -292,6 +292,26 @@ int page_group_by_mobility_disabled __read_mostly;
 #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
 static inline void reset_deferred_meminit(pg_data_t *pgdat)
 {
+	unsigned long max_initialise;
+	unsigned long reserved_lowmem;
+
+	/*
+	 * Initialise at least 2G of a node but also take into account that
+	 * two large system hashes that can take up 1GB for 0.25TB/node.
+	 */
+	max_initialise = max(2UL << (30 - PAGE_SHIFT),
+		(pgdat->node_spanned_pages >> 8));
+
+	/*
+	 * Compensate the all the memblock reservations (e.g. crash kernel)
+	 * from the initial estimation to make sure we will initialize enough
+	 * memory to boot.
+	 */
+	reserved_lowmem = memblock_reserved_memory_within(pgdat->node_start_pfn,
+			pgdat->node_start_pfn + max_initialise);
+	max_initialise += reserved_lowmem;
+
+	pgdat->static_init_size = min(max_initialise, pgdat->node_spanned_pages);
 	pgdat->first_deferred_pfn = ULONG_MAX;
 }
 
@@ -314,20 +334,11 @@ static inline bool update_defer_init(pg_data_t *pgdat,
 				unsigned long pfn, unsigned long zone_end,
 				unsigned long *nr_initialised)
 {
-	unsigned long max_initialise;
-
 	/* Always populate low zones for address-contrained allocations */
 	if (zone_end < pgdat_end_pfn(pgdat))
 		return true;
-	/*
-	 * Initialise at least 2G of a node but also take into account that
-	 * two large system hashes that can take up 1GB for 0.25TB/node.
-	 */
-	max_initialise = max(2UL << (30 - PAGE_SHIFT),
-		(pgdat->node_spanned_pages >> 8));
-
 	(*nr_initialised)++;
-	if ((*nr_initialised > max_initialise) &&
+	if ((*nr_initialised > pgdat->static_init_size) &&
 	    (pfn & (PAGES_PER_SECTION - 1)) == 0) {
 		pgdat->first_deferred_pfn = pfn;
 		return false;
@@ -6138,7 +6149,6 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
 	/* pg_data_t should be reset to zero when it's allocated */
 	WARN_ON(pgdat->nr_zones || pgdat->kswapd_classzone_idx);
 
-	reset_deferred_meminit(pgdat);
 	pgdat->node_id = nid;
 	pgdat->node_start_pfn = node_start_pfn;
 	pgdat->per_cpu_nodestats = NULL;
@@ -6160,6 +6170,7 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
 		(unsigned long)pgdat->node_mem_map);
 #endif
 
+	reset_deferred_meminit(pgdat);
 	free_area_init_core(pgdat);
 }
 
-- 
cgit v1.2.3


From 266e4e9d9150e98141b85c7400f8aa3cd57a7f9b Mon Sep 17 00:00:00 2001
From: Dong Aisheng <aisheng.dong@nxp.com>
Date: Fri, 19 May 2017 21:49:04 +0800
Subject: clk: add clk_bulk_get accessories

These helper function allows drivers to get several clk consumers in
one operation. If any of the clk cannot be acquired then any clks
that were got will be put before returning to the caller.

This can relieve the driver owners' life who needs to handle many clocks,
as well as each clock error reporting.

Cc: Michael Turquette <mturquette@baylibre.com>
Cc: Stephen Boyd <sboyd@codeaurora.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Cc: Mark Brown <broonie@kernel.org>
Cc: Shawn Guo <shawnguo@kernel.org>
Cc: Fabio Estevam <fabio.estevam@nxp.com>
Cc: Sascha Hauer <kernel@pengutronix.de>
Cc: Anson Huang <anson.huang@nxp.com>
Cc: Robin Gong <yibin.gong@nxp.com>
Cc: Bai Ping <ping.bai@nxp.com>
Cc: Leonard Crestez <leonard.crestez@nxp.com>
Cc: Octavian Purdila <octavian.purdila@nxp.com>
Signed-off-by: Dong Aisheng <aisheng.dong@nxp.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/Makefile   |   2 +-
 drivers/clk/clk-bulk.c | 157 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/clk.h    | 111 ++++++++++++++++++++++++++++++++++
 3 files changed, 269 insertions(+), 1 deletion(-)
 create mode 100644 drivers/clk/clk-bulk.c

(limited to 'include/linux')

diff --git a/drivers/clk/Makefile b/drivers/clk/Makefile
index c19983afcb81..ed1e99f19ec3 100644
--- a/drivers/clk/Makefile
+++ b/drivers/clk/Makefile
@@ -1,5 +1,5 @@
 # common clock types
-obj-$(CONFIG_HAVE_CLK)		+= clk-devres.o
+obj-$(CONFIG_HAVE_CLK)		+= clk-devres.o clk-bulk.o
 obj-$(CONFIG_CLKDEV_LOOKUP)	+= clkdev.o
 obj-$(CONFIG_COMMON_CLK)	+= clk.o
 obj-$(CONFIG_COMMON_CLK)	+= clk-divider.o
diff --git a/drivers/clk/clk-bulk.c b/drivers/clk/clk-bulk.c
new file mode 100644
index 000000000000..c834f5abfc49
--- /dev/null
+++ b/drivers/clk/clk-bulk.c
@@ -0,0 +1,157 @@
+/*
+ * Copyright 2017 NXP
+ *
+ * Dong Aisheng <aisheng.dong@nxp.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/clk.h>
+#include <linux/device.h>
+#include <linux/export.h>
+
+void clk_bulk_put(int num_clks, struct clk_bulk_data *clks)
+{
+	while (--num_clks >= 0) {
+		clk_put(clks[num_clks].clk);
+		clks[num_clks].clk = NULL;
+	}
+}
+EXPORT_SYMBOL_GPL(clk_bulk_put);
+
+int __must_check clk_bulk_get(struct device *dev, int num_clks,
+			      struct clk_bulk_data *clks)
+{
+	int ret;
+	int i;
+
+	for (i = 0; i < num_clks; i++)
+		clks[i].clk = NULL;
+
+	for (i = 0; i < num_clks; i++) {
+		clks[i].clk = clk_get(dev, clks[i].id);
+		if (IS_ERR(clks[i].clk)) {
+			ret = PTR_ERR(clks[i].clk);
+			dev_err(dev, "Failed to get clk '%s': %d\n",
+				clks[i].id, ret);
+			clks[i].clk = NULL;
+			goto err;
+		}
+	}
+
+	return 0;
+
+err:
+	clk_bulk_put(i, clks);
+
+	return ret;
+}
+EXPORT_SYMBOL(clk_bulk_get);
+
+#ifdef CONFIG_HAVE_CLK_PREPARE
+
+/**
+ * clk_bulk_unprepare - undo preparation of a set of clock sources
+ * @num_clks: the number of clk_bulk_data
+ * @clks: the clk_bulk_data table being unprepared
+ *
+ * clk_bulk_unprepare may sleep, which differentiates it from clk_bulk_disable.
+ * Returns 0 on success, -EERROR otherwise.
+ */
+void clk_bulk_unprepare(int num_clks, const struct clk_bulk_data *clks)
+{
+	while (--num_clks >= 0)
+		clk_unprepare(clks[num_clks].clk);
+}
+EXPORT_SYMBOL_GPL(clk_bulk_unprepare);
+
+/**
+ * clk_bulk_prepare - prepare a set of clocks
+ * @num_clks: the number of clk_bulk_data
+ * @clks: the clk_bulk_data table being prepared
+ *
+ * clk_bulk_prepare may sleep, which differentiates it from clk_bulk_enable.
+ * Returns 0 on success, -EERROR otherwise.
+ */
+int __must_check clk_bulk_prepare(int num_clks,
+				  const struct clk_bulk_data *clks)
+{
+	int ret;
+	int i;
+
+	for (i = 0; i < num_clks; i++) {
+		ret = clk_prepare(clks[i].clk);
+		if (ret) {
+			pr_err("Failed to prepare clk '%s': %d\n",
+				clks[i].id, ret);
+			goto err;
+		}
+	}
+
+	return 0;
+
+err:
+	clk_bulk_unprepare(i, clks);
+
+	return  ret;
+}
+
+#endif /* CONFIG_HAVE_CLK_PREPARE */
+
+/**
+ * clk_bulk_disable - gate a set of clocks
+ * @num_clks: the number of clk_bulk_data
+ * @clks: the clk_bulk_data table being gated
+ *
+ * clk_bulk_disable must not sleep, which differentiates it from
+ * clk_bulk_unprepare. clk_bulk_disable must be called before
+ * clk_bulk_unprepare.
+ */
+void clk_bulk_disable(int num_clks, const struct clk_bulk_data *clks)
+{
+
+	while (--num_clks >= 0)
+		clk_disable(clks[num_clks].clk);
+}
+EXPORT_SYMBOL_GPL(clk_bulk_disable);
+
+/**
+ * clk_bulk_enable - ungate a set of clocks
+ * @num_clks: the number of clk_bulk_data
+ * @clks: the clk_bulk_data table being ungated
+ *
+ * clk_bulk_enable must not sleep
+ * Returns 0 on success, -EERROR otherwise.
+ */
+int __must_check clk_bulk_enable(int num_clks, const struct clk_bulk_data *clks)
+{
+	int ret;
+	int i;
+
+	for (i = 0; i < num_clks; i++) {
+		ret = clk_enable(clks[i].clk);
+		if (ret) {
+			pr_err("Failed to enable clk '%s': %d\n",
+				clks[i].id, ret);
+			goto err;
+		}
+	}
+
+	return 0;
+
+err:
+	clk_bulk_disable(i, clks);
+
+	return  ret;
+}
+EXPORT_SYMBOL_GPL(clk_bulk_enable);
diff --git a/include/linux/clk.h b/include/linux/clk.h
index 024cd07870d0..72b0cfce9165 100644
--- a/include/linux/clk.h
+++ b/include/linux/clk.h
@@ -77,6 +77,21 @@ struct clk_notifier_data {
 	unsigned long		new_rate;
 };
 
+/**
+ * struct clk_bulk_data - Data used for bulk clk operations.
+ *
+ * @id: clock consumer ID
+ * @clk: struct clk * to store the associated clock
+ *
+ * The CLK APIs provide a series of clk_bulk_() API calls as
+ * a convenience to consumers which require multiple clks.  This
+ * structure is used to manage data for these calls.
+ */
+struct clk_bulk_data {
+	const char		*id;
+	struct clk		*clk;
+};
+
 #ifdef CONFIG_COMMON_CLK
 
 /**
@@ -185,12 +200,20 @@ static inline bool clk_is_match(const struct clk *p, const struct clk *q)
  */
 #ifdef CONFIG_HAVE_CLK_PREPARE
 int clk_prepare(struct clk *clk);
+int __must_check clk_bulk_prepare(int num_clks,
+				  const struct clk_bulk_data *clks);
 #else
 static inline int clk_prepare(struct clk *clk)
 {
 	might_sleep();
 	return 0;
 }
+
+static inline int clk_bulk_prepare(int num_clks, struct clk_bulk_data *clks)
+{
+	might_sleep();
+	return 0;
+}
 #endif
 
 /**
@@ -204,11 +227,16 @@ static inline int clk_prepare(struct clk *clk)
  */
 #ifdef CONFIG_HAVE_CLK_PREPARE
 void clk_unprepare(struct clk *clk);
+void clk_bulk_unprepare(int num_clks, const struct clk_bulk_data *clks);
 #else
 static inline void clk_unprepare(struct clk *clk)
 {
 	might_sleep();
 }
+static inline void clk_bulk_unprepare(int num_clks, struct clk_bulk_data *clks)
+{
+	might_sleep();
+}
 #endif
 
 #ifdef CONFIG_HAVE_CLK
@@ -229,6 +257,29 @@ static inline void clk_unprepare(struct clk *clk)
  */
 struct clk *clk_get(struct device *dev, const char *id);
 
+/**
+ * clk_bulk_get - lookup and obtain a number of references to clock producer.
+ * @dev: device for clock "consumer"
+ * @num_clks: the number of clk_bulk_data
+ * @clks: the clk_bulk_data table of consumer
+ *
+ * This helper function allows drivers to get several clk consumers in one
+ * operation. If any of the clk cannot be acquired then any clks
+ * that were obtained will be freed before returning to the caller.
+ *
+ * Returns 0 if all clocks specified in clk_bulk_data table are obtained
+ * successfully, or valid IS_ERR() condition containing errno.
+ * The implementation uses @dev and @clk_bulk_data.id to determine the
+ * clock consumer, and thereby the clock producer.
+ * The clock returned is stored in each @clk_bulk_data.clk field.
+ *
+ * Drivers must assume that the clock source is not enabled.
+ *
+ * clk_bulk_get should not be called from within interrupt context.
+ */
+int __must_check clk_bulk_get(struct device *dev, int num_clks,
+			      struct clk_bulk_data *clks);
+
 /**
  * devm_clk_get - lookup and obtain a managed reference to a clock producer.
  * @dev: device for clock "consumer"
@@ -278,6 +329,18 @@ struct clk *devm_get_clk_from_child(struct device *dev,
  */
 int clk_enable(struct clk *clk);
 
+/**
+ * clk_bulk_enable - inform the system when the set of clks should be running.
+ * @num_clks: the number of clk_bulk_data
+ * @clks: the clk_bulk_data table of consumer
+ *
+ * May be called from atomic contexts.
+ *
+ * Returns success (0) or negative errno.
+ */
+int __must_check clk_bulk_enable(int num_clks,
+				 const struct clk_bulk_data *clks);
+
 /**
  * clk_disable - inform the system when the clock source is no longer required.
  * @clk: clock source
@@ -294,6 +357,24 @@ int clk_enable(struct clk *clk);
  */
 void clk_disable(struct clk *clk);
 
+/**
+ * clk_bulk_disable - inform the system when the set of clks is no
+ *		      longer required.
+ * @num_clks: the number of clk_bulk_data
+ * @clks: the clk_bulk_data table of consumer
+ *
+ * Inform the system that a set of clks is no longer required by
+ * a driver and may be shut down.
+ *
+ * May be called from atomic contexts.
+ *
+ * Implementation detail: if the set of clks is shared between
+ * multiple drivers, clk_bulk_enable() calls must be balanced by the
+ * same number of clk_bulk_disable() calls for the clock source to be
+ * disabled.
+ */
+void clk_bulk_disable(int num_clks, const struct clk_bulk_data *clks);
+
 /**
  * clk_get_rate - obtain the current clock rate (in Hz) for a clock source.
  *		  This is only valid once the clock source has been enabled.
@@ -313,6 +394,19 @@ unsigned long clk_get_rate(struct clk *clk);
  */
 void clk_put(struct clk *clk);
 
+/**
+ * clk_bulk_put	- "free" the clock source
+ * @num_clks: the number of clk_bulk_data
+ * @clks: the clk_bulk_data table of consumer
+ *
+ * Note: drivers must ensure that all clk_bulk_enable calls made on this
+ * clock source are balanced by clk_bulk_disable calls prior to calling
+ * this function.
+ *
+ * clk_bulk_put should not be called from within interrupt context.
+ */
+void clk_bulk_put(int num_clks, struct clk_bulk_data *clks);
+
 /**
  * devm_clk_put	- "free" a managed clock source
  * @dev: device used to acquire the clock
@@ -445,6 +539,12 @@ static inline struct clk *clk_get(struct device *dev, const char *id)
 	return NULL;
 }
 
+static inline int clk_bulk_get(struct device *dev, int num_clks,
+			       struct clk_bulk_data *clks)
+{
+	return 0;
+}
+
 static inline struct clk *devm_clk_get(struct device *dev, const char *id)
 {
 	return NULL;
@@ -458,6 +558,8 @@ static inline struct clk *devm_get_clk_from_child(struct device *dev,
 
 static inline void clk_put(struct clk *clk) {}
 
+static inline void clk_bulk_put(int num_clks, struct clk_bulk_data *clks) {}
+
 static inline void devm_clk_put(struct device *dev, struct clk *clk) {}
 
 static inline int clk_enable(struct clk *clk)
@@ -465,8 +567,17 @@ static inline int clk_enable(struct clk *clk)
 	return 0;
 }
 
+static inline int clk_bulk_enable(int num_clks, struct clk_bulk_data *clks)
+{
+	return 0;
+}
+
 static inline void clk_disable(struct clk *clk) {}
 
+
+static inline void clk_bulk_disable(int num_clks,
+				    struct clk_bulk_data *clks) {}
+
 static inline unsigned long clk_get_rate(struct clk *clk)
 {
 	return 0;
-- 
cgit v1.2.3


From 618aee02e2f57042f4cdeab228caf631e524b281 Mon Sep 17 00:00:00 2001
From: Dong Aisheng <aisheng.dong@nxp.com>
Date: Fri, 19 May 2017 21:49:05 +0800
Subject: clk: add managed version of clk_bulk_get

This patch introduces the managed version of clk_bulk_get.

Cc: Michael Turquette <mturquette@baylibre.com>
Cc: Stephen Boyd <sboyd@codeaurora.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Cc: Mark Brown <broonie@kernel.org>
Cc: Shawn Guo <shawnguo@kernel.org>
Cc: Fabio Estevam <fabio.estevam@nxp.com>
Cc: Sascha Hauer <kernel@pengutronix.de>
Cc: Anson Huang <anson.huang@nxp.com>
Cc: Robin Gong <yibin.gong@nxp.com>
Cc: Bai Ping <ping.bai@nxp.com>
Cc: Leonard Crestez <leonard.crestez@nxp.com>
Cc: Octavian Purdila <octavian.purdila@nxp.com>
Signed-off-by: Dong Aisheng <aisheng.dong@nxp.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/clk-devres.c | 36 ++++++++++++++++++++++++++++++++++++
 include/linux/clk.h      | 21 +++++++++++++++++++++
 2 files changed, 57 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/clk/clk-devres.c b/drivers/clk/clk-devres.c
index 3a218c3a06ae..d854e26a8ddb 100644
--- a/drivers/clk/clk-devres.c
+++ b/drivers/clk/clk-devres.c
@@ -34,6 +34,42 @@ struct clk *devm_clk_get(struct device *dev, const char *id)
 }
 EXPORT_SYMBOL(devm_clk_get);
 
+struct clk_bulk_devres {
+	struct clk_bulk_data *clks;
+	int num_clks;
+};
+
+static void devm_clk_bulk_release(struct device *dev, void *res)
+{
+	struct clk_bulk_devres *devres = res;
+
+	clk_bulk_put(devres->num_clks, devres->clks);
+}
+
+int __must_check devm_clk_bulk_get(struct device *dev, int num_clks,
+		      struct clk_bulk_data *clks)
+{
+	struct clk_bulk_devres *devres;
+	int ret;
+
+	devres = devres_alloc(devm_clk_bulk_release,
+			      sizeof(*devres), GFP_KERNEL);
+	if (!devres)
+		return -ENOMEM;
+
+	ret = clk_bulk_get(dev, num_clks, clks);
+	if (!ret) {
+		devres->clks = clks;
+		devres->num_clks = num_clks;
+		devres_add(dev, devres);
+	} else {
+		devres_free(devres);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(devm_clk_bulk_get);
+
 static int devm_clk_match(struct device *dev, void *res, void *data)
 {
 	struct clk **c = res;
diff --git a/include/linux/clk.h b/include/linux/clk.h
index 72b0cfce9165..c673f0b91751 100644
--- a/include/linux/clk.h
+++ b/include/linux/clk.h
@@ -280,6 +280,21 @@ struct clk *clk_get(struct device *dev, const char *id);
 int __must_check clk_bulk_get(struct device *dev, int num_clks,
 			      struct clk_bulk_data *clks);
 
+/**
+ * devm_clk_bulk_get - managed get multiple clk consumers
+ * @dev: device for clock "consumer"
+ * @num_clks: the number of clk_bulk_data
+ * @clks: the clk_bulk_data table of consumer
+ *
+ * Return 0 on success, an errno on failure.
+ *
+ * This helper function allows drivers to get several clk
+ * consumers in one operation with management, the clks will
+ * automatically be freed when the device is unbound.
+ */
+int __must_check devm_clk_bulk_get(struct device *dev, int num_clks,
+				   struct clk_bulk_data *clks);
+
 /**
  * devm_clk_get - lookup and obtain a managed reference to a clock producer.
  * @dev: device for clock "consumer"
@@ -550,6 +565,12 @@ static inline struct clk *devm_clk_get(struct device *dev, const char *id)
 	return NULL;
 }
 
+static inline int devm_clk_bulk_get(struct device *dev, int num_clks,
+				    struct clk_bulk_data *clks)
+{
+	return 0;
+}
+
 static inline struct clk *devm_get_clk_from_child(struct device *dev,
 				struct device_node *np, const char *con_id)
 {
-- 
cgit v1.2.3


From cfe76a28e37112f471d4bcb8d5f336e3416299b7 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Fri, 12 May 2017 09:40:49 +0200
Subject: clk: versatile: delete old RealView clock implementation

The old RealView clock implementation is not used anymore
(nothing in the kernel calls realview_clk_init()) as we have
moved all clocks over to device tree. Delete it.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/versatile/Makefile             |  1 -
 drivers/clk/versatile/clk-realview.c       | 97 ------------------------------
 include/linux/platform_data/clk-realview.h |  1 -
 3 files changed, 99 deletions(-)
 delete mode 100644 drivers/clk/versatile/clk-realview.c
 delete mode 100644 include/linux/platform_data/clk-realview.h

(limited to 'include/linux')

diff --git a/drivers/clk/versatile/Makefile b/drivers/clk/versatile/Makefile
index 794130402c8d..58b54b814a6d 100644
--- a/drivers/clk/versatile/Makefile
+++ b/drivers/clk/versatile/Makefile
@@ -1,6 +1,5 @@
 # Makefile for Versatile-specific clocks
 obj-$(CONFIG_ICST)		+= icst.o clk-icst.o clk-versatile.o
 obj-$(CONFIG_INTEGRATOR_IMPD1)	+= clk-impd1.o
-obj-$(CONFIG_ARCH_REALVIEW)	+= clk-realview.o
 obj-$(CONFIG_CLK_SP810)		+= clk-sp810.o
 obj-$(CONFIG_CLK_VEXPRESS_OSC)	+= clk-vexpress-osc.o
diff --git a/drivers/clk/versatile/clk-realview.c b/drivers/clk/versatile/clk-realview.c
deleted file mode 100644
index 6fdfee3232f4..000000000000
--- a/drivers/clk/versatile/clk-realview.c
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Clock driver for the ARM RealView boards
- * Copyright (C) 2012 Linus Walleij
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <linux/clkdev.h>
-#include <linux/err.h>
-#include <linux/io.h>
-#include <linux/clk-provider.h>
-
-#include "icst.h"
-#include "clk-icst.h"
-
-#define REALVIEW_SYS_OSC0_OFFSET             0x0C
-#define REALVIEW_SYS_OSC1_OFFSET             0x10
-#define REALVIEW_SYS_OSC2_OFFSET             0x14
-#define REALVIEW_SYS_OSC3_OFFSET             0x18
-#define REALVIEW_SYS_OSC4_OFFSET             0x1C	/* OSC1 for RealView/AB */
-#define REALVIEW_SYS_LOCK_OFFSET             0x20
-
-/*
- * Implementation of the ARM RealView clock trees.
- */
-
-static const struct icst_params realview_oscvco_params = {
-	.ref		= 24000000,
-	.vco_max	= ICST307_VCO_MAX,
-	.vco_min	= ICST307_VCO_MIN,
-	.vd_min		= 4 + 8,
-	.vd_max		= 511 + 8,
-	.rd_min		= 1 + 2,
-	.rd_max		= 127 + 2,
-	.s2div		= icst307_s2div,
-	.idx2s		= icst307_idx2s,
-};
-
-static const struct clk_icst_desc realview_osc0_desc __initconst = {
-	.params = &realview_oscvco_params,
-	.vco_offset = REALVIEW_SYS_OSC0_OFFSET,
-	.lock_offset = REALVIEW_SYS_LOCK_OFFSET,
-};
-
-static const struct clk_icst_desc realview_osc4_desc __initconst = {
-	.params = &realview_oscvco_params,
-	.vco_offset = REALVIEW_SYS_OSC4_OFFSET,
-	.lock_offset = REALVIEW_SYS_LOCK_OFFSET,
-};
-
-/*
- * realview_clk_init() - set up the RealView clock tree
- */
-void __init realview_clk_init(void __iomem *sysbase, bool is_pb1176)
-{
-	struct clk *clk;
-
-	/* APB clock dummy */
-	clk = clk_register_fixed_rate(NULL, "apb_pclk", NULL, 0, 0);
-	clk_register_clkdev(clk, "apb_pclk", NULL);
-
-	/* 24 MHz clock */
-	clk = clk_register_fixed_rate(NULL, "clk24mhz", NULL, 0, 24000000);
-	clk_register_clkdev(clk, NULL, "dev:uart0");
-	clk_register_clkdev(clk, NULL, "dev:uart1");
-	clk_register_clkdev(clk, NULL, "dev:uart2");
-	clk_register_clkdev(clk, NULL, "fpga:kmi0");
-	clk_register_clkdev(clk, NULL, "fpga:kmi1");
-	clk_register_clkdev(clk, NULL, "fpga:mmc0");
-	clk_register_clkdev(clk, NULL, "dev:ssp0");
-	if (is_pb1176) {
-		/*
-		 * UART3 is on the dev chip in PB1176
-		 * UART4 only exists in PB1176
-		 */
-		clk_register_clkdev(clk, NULL, "dev:uart3");
-		clk_register_clkdev(clk, NULL, "dev:uart4");
-	} else
-		clk_register_clkdev(clk, NULL, "fpga:uart3");
-
-
-	/* 1 MHz clock */
-	clk = clk_register_fixed_rate(NULL, "clk1mhz", NULL, 0, 1000000);
-	clk_register_clkdev(clk, NULL, "sp804");
-
-	/* ICST VCO clock */
-	if (is_pb1176)
-		clk = icst_clk_register(NULL, &realview_osc0_desc,
-					"osc0", NULL, sysbase);
-	else
-		clk = icst_clk_register(NULL, &realview_osc4_desc,
-					"osc4", NULL, sysbase);
-
-	clk_register_clkdev(clk, NULL, "dev:clcd");
-	clk_register_clkdev(clk, NULL, "issp:clcd");
-}
diff --git a/include/linux/platform_data/clk-realview.h b/include/linux/platform_data/clk-realview.h
deleted file mode 100644
index 2e426a7dbc51..000000000000
--- a/include/linux/platform_data/clk-realview.h
+++ /dev/null
@@ -1 +0,0 @@
-void realview_clk_init(void __iomem *sysbase, bool is_pb1176);
-- 
cgit v1.2.3


From 5c82a6ae0242416cfead597bb2b42aa3481a0ba7 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Fri, 2 Jun 2017 14:15:15 +0200
Subject: rtc: remove rtc_device.name

rtc->name is only used in messages were it is superfluous. Remove it
completely from the structure.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/class.c | 7 +++----
 include/linux/rtc.h | 1 -
 2 files changed, 3 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c
index 5fb439897fe1..543c64cd3df0 100644
--- a/drivers/rtc/class.c
+++ b/drivers/rtc/class.c
@@ -224,7 +224,6 @@ struct rtc_device *rtc_device_register(const char *name, struct device *dev,
 	rtc->pie_timer.function = rtc_pie_update_irq;
 	rtc->pie_enabled = 0;
 
-	strlcpy(rtc->name, name, RTC_DEVICE_NAME_SIZE);
 	dev_set_name(&rtc->dev, "rtc%d", id);
 
 	/* Check to see if there is an ALARM already set in hw */
@@ -238,20 +237,20 @@ struct rtc_device *rtc_device_register(const char *name, struct device *dev,
 	err = cdev_device_add(&rtc->char_dev, &rtc->dev);
 	if (err) {
 		dev_warn(&rtc->dev, "%s: failed to add char device %d:%d\n",
-			 rtc->name, MAJOR(rtc->dev.devt), rtc->id);
+			 name, MAJOR(rtc->dev.devt), rtc->id);
 
 		/* This will free both memory and the ID */
 		put_device(&rtc->dev);
 		goto exit;
 	} else {
-		dev_dbg(&rtc->dev, "%s: dev (%d:%d)\n", rtc->name,
+		dev_dbg(&rtc->dev, "%s: dev (%d:%d)\n", name,
 			MAJOR(rtc->dev.devt), rtc->id);
 	}
 
 	rtc_proc_add_device(rtc);
 
 	dev_info(dev, "rtc core: registered %s as %s\n",
-			rtc->name, dev_name(&rtc->dev));
+			name, dev_name(&rtc->dev));
 
 	return rtc;
 
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index b693adac853b..d354f56e0cf5 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -116,7 +116,6 @@ struct rtc_device {
 	struct module *owner;
 
 	int id;
-	char name[RTC_DEVICE_NAME_SIZE];
 
 	const struct rtc_class_ops *ops;
 	struct mutex ops_lock;
-- 
cgit v1.2.3


From bab3548078237706f53baafe43ae58257225549d Mon Sep 17 00:00:00 2001
From: Badhri Jagan Sridharan <badhri@google.com>
Date: Tue, 30 May 2017 12:39:53 -0700
Subject: usb: typec: Add a sysfs node to manage port type

User space applications in some cases have the need to enforce a
specific port type(DFP/UFP/DRP). This change allows userspace to
attempt setting the desired port type. Low level drivers can
however reject the request if the specific port type is not supported.

Signed-off-by: Badhri Jagan Sridharan <Badhri@google.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/ABI/testing/sysfs-class-typec |  15 ++++
 drivers/usb/typec/typec.c                   | 106 +++++++++++++++++++++++++---
 include/linux/usb/typec.h                   |   4 ++
 3 files changed, 115 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-class-typec b/Documentation/ABI/testing/sysfs-class-typec
index d4a3d23eb09c..5be552e255e9 100644
--- a/Documentation/ABI/testing/sysfs-class-typec
+++ b/Documentation/ABI/testing/sysfs-class-typec
@@ -30,6 +30,21 @@ Description:
 
 		Valid values: source, sink
 
+What:           /sys/class/typec/<port>/port_type
+Date:           May 2017
+Contact:	Badhri Jagan Sridharan <Badhri@google.com>
+Description:
+		Indicates the type of the port. This attribute can be used for
+		requesting a change in the port type. Port type change is
+		supported as a synchronous operation, so write(2) to the
+		attribute will not return until the operation has finished.
+
+		Valid values:
+		- source (The port will behave as source only DFP port)
+		- sink (The port will behave as sink only UFP port)
+		- dual (The port will behave as dual-role-data and
+			dual-role-power port)
+
 What:		/sys/class/typec/<port>/vconn_source
 Date:		April 2017
 Contact:	Heikki Krogerus <heikki.krogerus@linux.intel.com>
diff --git a/drivers/usb/typec/typec.c b/drivers/usb/typec/typec.c
index 1a6822ee1a52..24e355ba109d 100644
--- a/drivers/usb/typec/typec.c
+++ b/drivers/usb/typec/typec.c
@@ -11,6 +11,7 @@
 
 #include <linux/device.h>
 #include <linux/module.h>
+#include <linux/mutex.h>
 #include <linux/slab.h>
 #include <linux/usb/typec.h>
 
@@ -69,6 +70,8 @@ struct typec_port {
 	enum typec_role			pwr_role;
 	enum typec_role			vconn_role;
 	enum typec_pwr_opmode		pwr_opmode;
+	enum typec_port_type		port_type;
+	struct mutex			port_type_lock;
 
 	const struct typec_capability	*cap;
 };
@@ -790,6 +793,18 @@ static const char * const typec_data_roles[] = {
 	[TYPEC_HOST]	= "host",
 };
 
+static const char * const typec_port_types[] = {
+	[TYPEC_PORT_DFP] = "source",
+	[TYPEC_PORT_UFP] = "sink",
+	[TYPEC_PORT_DRP] = "dual",
+};
+
+static const char * const typec_port_types_drp[] = {
+	[TYPEC_PORT_DFP] = "dual [source] sink",
+	[TYPEC_PORT_UFP] = "dual source [sink]",
+	[TYPEC_PORT_DRP] = "[dual] source sink",
+};
+
 static ssize_t
 preferred_role_store(struct device *dev, struct device_attribute *attr,
 		     const char *buf, size_t size)
@@ -847,11 +862,6 @@ static ssize_t data_role_store(struct device *dev,
 	struct typec_port *port = to_typec_port(dev);
 	int ret;
 
-	if (port->cap->type != TYPEC_PORT_DRP) {
-		dev_dbg(dev, "data role swap only supported with DRP ports\n");
-		return -EOPNOTSUPP;
-	}
-
 	if (!port->cap->dr_set) {
 		dev_dbg(dev, "data role swapping not supported\n");
 		return -EOPNOTSUPP;
@@ -861,11 +871,22 @@ static ssize_t data_role_store(struct device *dev,
 	if (ret < 0)
 		return ret;
 
+	mutex_lock(&port->port_type_lock);
+	if (port->port_type != TYPEC_PORT_DRP) {
+		dev_dbg(dev, "port type fixed at \"%s\"",
+			     typec_port_types[port->port_type]);
+		ret = -EOPNOTSUPP;
+		goto unlock_and_ret;
+	}
+
 	ret = port->cap->dr_set(port->cap, ret);
 	if (ret)
-		return ret;
+		goto unlock_and_ret;
 
-	return size;
+	ret = size;
+unlock_and_ret:
+	mutex_unlock(&port->port_type_lock);
+	return ret;
 }
 
 static ssize_t data_role_show(struct device *dev,
@@ -886,7 +907,7 @@ static ssize_t power_role_store(struct device *dev,
 				const char *buf, size_t size)
 {
 	struct typec_port *port = to_typec_port(dev);
-	int ret = size;
+	int ret;
 
 	if (!port->cap->pd_revision) {
 		dev_dbg(dev, "USB Power Delivery not supported\n");
@@ -907,11 +928,22 @@ static ssize_t power_role_store(struct device *dev,
 	if (ret < 0)
 		return ret;
 
+	mutex_lock(&port->port_type_lock);
+	if (port->port_type != TYPEC_PORT_DRP) {
+		dev_dbg(dev, "port type fixed at \"%s\"",
+			     typec_port_types[port->port_type]);
+		ret = -EOPNOTSUPP;
+		goto unlock_and_ret;
+	}
+
 	ret = port->cap->pr_set(port->cap, ret);
 	if (ret)
-		return ret;
+		goto unlock_and_ret;
 
-	return size;
+	ret = size;
+unlock_and_ret:
+	mutex_unlock(&port->port_type_lock);
+	return ret;
 }
 
 static ssize_t power_role_show(struct device *dev,
@@ -927,6 +959,57 @@ static ssize_t power_role_show(struct device *dev,
 }
 static DEVICE_ATTR_RW(power_role);
 
+static ssize_t
+port_type_store(struct device *dev, struct device_attribute *attr,
+			const char *buf, size_t size)
+{
+	struct typec_port *port = to_typec_port(dev);
+	int ret;
+	enum typec_port_type type;
+
+	if (!port->cap->port_type_set || port->cap->type != TYPEC_PORT_DRP) {
+		dev_dbg(dev, "changing port type not supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	ret = sysfs_match_string(typec_port_types, buf);
+	if (ret < 0)
+		return ret;
+
+	type = ret;
+	mutex_lock(&port->port_type_lock);
+
+	if (port->port_type == type) {
+		ret = size;
+		goto unlock_and_ret;
+	}
+
+	ret = port->cap->port_type_set(port->cap, type);
+	if (ret)
+		goto unlock_and_ret;
+
+	port->port_type = type;
+	ret = size;
+
+unlock_and_ret:
+	mutex_unlock(&port->port_type_lock);
+	return ret;
+}
+
+static ssize_t
+port_type_show(struct device *dev, struct device_attribute *attr,
+		char *buf)
+{
+	struct typec_port *port = to_typec_port(dev);
+
+	if (port->cap->type == TYPEC_PORT_DRP)
+		return sprintf(buf, "%s\n",
+			       typec_port_types_drp[port->port_type]);
+
+	return sprintf(buf, "[%s]\n", typec_port_types[port->cap->type]);
+}
+static DEVICE_ATTR_RW(port_type);
+
 static const char * const typec_pwr_opmodes[] = {
 	[TYPEC_PWR_MODE_USB]	= "default",
 	[TYPEC_PWR_MODE_1_5A]	= "1.5A",
@@ -1036,6 +1119,7 @@ static struct attribute *typec_attrs[] = {
 	&dev_attr_usb_power_delivery_revision.attr,
 	&dev_attr_usb_typec_revision.attr,
 	&dev_attr_vconn_source.attr,
+	&dev_attr_port_type.attr,
 	NULL,
 };
 ATTRIBUTE_GROUPS(typec);
@@ -1231,6 +1315,8 @@ struct typec_port *typec_register_port(struct device *parent,
 
 	port->id = id;
 	port->cap = cap;
+	port->port_type = cap->type;
+	mutex_init(&port->port_type_lock);
 	port->prefer_role = cap->prefer_role;
 
 	port->dev.class = typec_class;
diff --git a/include/linux/usb/typec.h b/include/linux/usb/typec.h
index d1d2ebcf36ec..ffe7487886ca 100644
--- a/include/linux/usb/typec.h
+++ b/include/linux/usb/typec.h
@@ -190,6 +190,7 @@ struct typec_partner_desc {
  * @pr_set: Set Power Role
  * @vconn_set: Set VCONN Role
  * @activate_mode: Enter/exit given Alternate Mode
+ * @port_type_set: Set port type
  *
  * Static capabilities of a single USB Type-C port.
  */
@@ -214,6 +215,9 @@ struct typec_capability {
 
 	int		(*activate_mode)(const struct typec_capability *,
 					 int mode, int activate);
+	int		(*port_type_set)(const struct typec_capability *,
+					enum typec_port_type);
+
 };
 
 /* Specific to try_role(). Indicates the user want's to clear the preference. */
-- 
cgit v1.2.3


From 615ffd63149117aa5693d6672944966b490cdb66 Mon Sep 17 00:00:00 2001
From: Juri Lelli <juri.lelli@arm.com>
Date: Wed, 31 May 2017 17:59:30 +0100
Subject: arm,arm64,drivers: move externs in a new header file

Create a new header file (include/linux/arch_topology.h) and put there
declarations of interfaces used by arm, arm64 and drivers code.

Signed-off-by: Juri Lelli <juri.lelli@arm.com>
Acked-by: Russell King <rmk+kernel@armlinux.org.uk>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/kernel/topology.c    |  7 +------
 arch/arm64/kernel/topology.c  |  4 +---
 drivers/base/arch_topology.c  |  1 +
 include/linux/arch_topology.h | 17 +++++++++++++++++
 4 files changed, 20 insertions(+), 9 deletions(-)
 create mode 100644 include/linux/arch_topology.h

(limited to 'include/linux')

diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c
index 1e35a3265ddf..557be4f1d2d7 100644
--- a/arch/arm/kernel/topology.c
+++ b/arch/arm/kernel/topology.c
@@ -11,6 +11,7 @@
  * for more details.
  */
 
+#include <linux/arch_topology.h>
 #include <linux/cpu.h>
 #include <linux/cpufreq.h>
 #include <linux/cpumask.h>
@@ -45,10 +46,6 @@
  * updated during this sequence.
  */
 
-extern unsigned long
-arch_scale_cpu_capacity(struct sched_domain *sd, int cpu);
-extern void set_capacity_scale(unsigned int cpu, unsigned long capacity);
-
 #ifdef CONFIG_OF
 struct cpu_efficiency {
 	const char *compatible;
@@ -76,8 +73,6 @@ static unsigned long *__cpu_capacity;
 
 static unsigned long middle_capacity = 1;
 static bool cap_from_dt = true;
-extern void normalize_cpu_capacity(void);
-extern int __init parse_cpu_capacity(struct device_node *cpu_node, int cpu);
 
 /*
  * Iterate all CPUs' descriptor in DT and compute the efficiency
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index 7e1f6f75185b..255230c3e835 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -11,6 +11,7 @@
  * for more details.
  */
 
+#include <linux/arch_topology.h>
 #include <linux/cpu.h>
 #include <linux/cpumask.h>
 #include <linux/init.h>
@@ -27,9 +28,6 @@
 #include <asm/cputype.h>
 #include <asm/topology.h>
 
-extern void normalize_cpu_capacity(void);
-extern int __init parse_cpu_capacity(struct device_node *cpu_node, int cpu);
-
 static int __init get_cpu_for_node(struct device_node *node)
 {
 	struct device_node *cpu_node;
diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
index acf99372c5cf..76c19aa0d82f 100644
--- a/drivers/base/arch_topology.c
+++ b/drivers/base/arch_topology.c
@@ -13,6 +13,7 @@
  */
 
 #include <linux/acpi.h>
+#include <linux/arch_topology.h>
 #include <linux/cpu.h>
 #include <linux/cpufreq.h>
 #include <linux/device.h>
diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h
new file mode 100644
index 000000000000..4edae9fe8cdd
--- /dev/null
+++ b/include/linux/arch_topology.h
@@ -0,0 +1,17 @@
+/*
+ * include/linux/arch_topology.h - arch specific cpu topology information
+ */
+#ifndef _LINUX_ARCH_TOPOLOGY_H_
+#define _LINUX_ARCH_TOPOLOGY_H_
+
+void normalize_cpu_capacity(void);
+
+struct device_node;
+int parse_cpu_capacity(struct device_node *cpu_node, int cpu);
+
+struct sched_domain;
+unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu);
+
+void set_capacity_scale(unsigned int cpu, unsigned long capacity);
+
+#endif /* _LINUX_ARCH_TOPOLOGY_H_ */
-- 
cgit v1.2.3


From 4ca4f26a9c66103ca158689b7554f07f4968a32c Mon Sep 17 00:00:00 2001
From: Juri Lelli <juri.lelli@arm.com>
Date: Wed, 31 May 2017 17:59:31 +0100
Subject: arm,arm64,drivers: add a prefix to drivers arch_topology interfaces

Now that some functions that deal with arch topology information live
under drivers, there is a clash of naming that might create confusion.

Tidy things up by creating a topology namespace for interfaces used by
arch code; achieve this by prepending a 'topology_' prefix to driver
interfaces.

Signed-off-by: Juri Lelli <juri.lelli@arm.com>
Acked-by: Russell King <rmk+kernel@armlinux.org.uk>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/kernel/topology.c    |  8 ++++----
 arch/arm64/kernel/topology.c  |  4 ++--
 drivers/base/arch_topology.c  | 20 ++++++++++----------
 include/linux/arch_topology.h |  8 ++++----
 4 files changed, 20 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c
index 557be4f1d2d7..bf949a763dbe 100644
--- a/arch/arm/kernel/topology.c
+++ b/arch/arm/kernel/topology.c
@@ -111,7 +111,7 @@ static void __init parse_dt_topology(void)
 			continue;
 		}
 
-		if (parse_cpu_capacity(cn, cpu)) {
+		if (topology_parse_cpu_capacity(cn, cpu)) {
 			of_node_put(cn);
 			continue;
 		}
@@ -160,7 +160,7 @@ static void __init parse_dt_topology(void)
 				>> (SCHED_CAPACITY_SHIFT-1)) + 1;
 
 	if (cap_from_dt)
-		normalize_cpu_capacity();
+		topology_normalize_cpu_scale();
 }
 
 /*
@@ -173,10 +173,10 @@ static void update_cpu_capacity(unsigned int cpu)
 	if (!cpu_capacity(cpu) || cap_from_dt)
 		return;
 
-	set_capacity_scale(cpu, cpu_capacity(cpu) / middle_capacity);
+	topology_set_cpu_scale(cpu, cpu_capacity(cpu) / middle_capacity);
 
 	pr_info("CPU%u: update cpu_capacity %lu\n",
-		cpu, arch_scale_cpu_capacity(NULL, cpu));
+		cpu, topology_get_cpu_scale(NULL, cpu));
 }
 
 #else
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index 255230c3e835..79244c75eaec 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -39,7 +39,7 @@ static int __init get_cpu_for_node(struct device_node *node)
 
 	for_each_possible_cpu(cpu) {
 		if (of_get_cpu_node(cpu, NULL) == cpu_node) {
-			parse_cpu_capacity(cpu_node, cpu);
+			topology_parse_cpu_capacity(cpu_node, cpu);
 			of_node_put(cpu_node);
 			return cpu;
 		}
@@ -191,7 +191,7 @@ static int __init parse_dt_topology(void)
 	if (ret != 0)
 		goto out_map;
 
-	normalize_cpu_capacity();
+	topology_normalize_cpu_scale();
 
 	/*
 	 * Check that all cores are in the topology; the SMP code will
diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
index 76c19aa0d82f..d1c33a85059e 100644
--- a/drivers/base/arch_topology.c
+++ b/drivers/base/arch_topology.c
@@ -25,12 +25,12 @@
 static DEFINE_MUTEX(cpu_scale_mutex);
 static DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE;
 
-unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu)
+unsigned long topology_get_cpu_scale(struct sched_domain *sd, int cpu)
 {
 	return per_cpu(cpu_scale, cpu);
 }
 
-void set_capacity_scale(unsigned int cpu, unsigned long capacity)
+void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity)
 {
 	per_cpu(cpu_scale, cpu) = capacity;
 }
@@ -42,7 +42,7 @@ static ssize_t cpu_capacity_show(struct device *dev,
 	struct cpu *cpu = container_of(dev, struct cpu, dev);
 
 	return sprintf(buf, "%lu\n",
-			arch_scale_cpu_capacity(NULL, cpu->dev.id));
+			topology_get_cpu_scale(NULL, cpu->dev.id));
 }
 
 static ssize_t cpu_capacity_store(struct device *dev,
@@ -67,7 +67,7 @@ static ssize_t cpu_capacity_store(struct device *dev,
 
 	mutex_lock(&cpu_scale_mutex);
 	for_each_cpu(i, &cpu_topology[this_cpu].core_sibling)
-		set_capacity_scale(i, new_capacity);
+		topology_set_cpu_scale(i, new_capacity);
 	mutex_unlock(&cpu_scale_mutex);
 
 	return count;
@@ -98,7 +98,7 @@ static u32 capacity_scale;
 static u32 *raw_capacity;
 static bool cap_parsing_failed;
 
-void normalize_cpu_capacity(void)
+void topology_normalize_cpu_scale(void)
 {
 	u64 capacity;
 	int cpu;
@@ -113,14 +113,14 @@ void normalize_cpu_capacity(void)
 			 cpu, raw_capacity[cpu]);
 		capacity = (raw_capacity[cpu] << SCHED_CAPACITY_SHIFT)
 			/ capacity_scale;
-		set_capacity_scale(cpu, capacity);
+		topology_set_cpu_scale(cpu, capacity);
 		pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu\n",
-			cpu, arch_scale_cpu_capacity(NULL, cpu));
+			cpu, topology_get_cpu_scale(NULL, cpu));
 	}
 	mutex_unlock(&cpu_scale_mutex);
 }
 
-int __init parse_cpu_capacity(struct device_node *cpu_node, int cpu)
+int __init topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu)
 {
 	int ret = 1;
 	u32 cpu_capacity;
@@ -185,12 +185,12 @@ init_cpu_capacity_callback(struct notifier_block *nb,
 			       cpus_to_visit,
 			       policy->related_cpus);
 		for_each_cpu(cpu, policy->related_cpus) {
-			raw_capacity[cpu] = arch_scale_cpu_capacity(NULL, cpu) *
+			raw_capacity[cpu] = topology_get_cpu_scale(NULL, cpu) *
 					    policy->cpuinfo.max_freq / 1000UL;
 			capacity_scale = max(raw_capacity[cpu], capacity_scale);
 		}
 		if (cpumask_empty(cpus_to_visit)) {
-			normalize_cpu_capacity();
+			topology_normalize_cpu_scale();
 			kfree(raw_capacity);
 			pr_debug("cpu_capacity: parsing done\n");
 			cap_parsing_done = true;
diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h
index 4edae9fe8cdd..9af3c174c03a 100644
--- a/include/linux/arch_topology.h
+++ b/include/linux/arch_topology.h
@@ -4,14 +4,14 @@
 #ifndef _LINUX_ARCH_TOPOLOGY_H_
 #define _LINUX_ARCH_TOPOLOGY_H_
 
-void normalize_cpu_capacity(void);
+void topology_normalize_cpu_scale(void);
 
 struct device_node;
-int parse_cpu_capacity(struct device_node *cpu_node, int cpu);
+int topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu);
 
 struct sched_domain;
-unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu);
+unsigned long topology_get_cpu_scale(struct sched_domain *sd, int cpu);
 
-void set_capacity_scale(unsigned int cpu, unsigned long capacity);
+void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity);
 
 #endif /* _LINUX_ARCH_TOPOLOGY_H_ */
-- 
cgit v1.2.3


From a3b02a9c6591ce154cd44e2383406390a45b530c Mon Sep 17 00:00:00 2001
From: Peter Rosin <peda@axentia.se>
Date: Sun, 14 May 2017 21:51:06 +0200
Subject: mux: minimal mux subsystem

Add a new minimalistic subsystem that handles multiplexer controllers.
When multiplexers are used in various places in the kernel, and the
same multiplexer controller can be used for several independent things,
there should be one place to implement support for said multiplexer
controller.

A single multiplexer controller can also be used to control several
parallel multiplexers, that are in turn used by different subsystems
in the kernel, leading to a need to coordinate multiplexer accesses.
The multiplexer subsystem handles this coordination.

Thanks go out to Lars-Peter Clausen, Jonathan Cameron, Rob Herring,
Wolfram Sang, Paul Gortmaker, Dan Carpenter, Colin Ian King, Greg
Kroah-Hartman and last but certainly not least to Philipp Zabel for
helpful comments, reviews, patches and general encouragement!

Reviewed-by: Jonathan Cameron <jic23@kernel.org>
Signed-off-by: Peter Rosin <peda@axentia.se>
Reviewed-by: Philipp Zabel <p.zabel@pengutronix.de>
Tested-by: Philipp Zabel <p.zabel@pengutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/ABI/testing/sysfs-class-mux |  16 +
 Documentation/driver-model/devres.txt     |   5 +
 MAINTAINERS                               |   3 +
 drivers/Kconfig                           |   2 +
 drivers/Makefile                          |   1 +
 drivers/mux/Kconfig                       |  16 +
 drivers/mux/Makefile                      |   5 +
 drivers/mux/mux-core.c                    | 547 ++++++++++++++++++++++++++++++
 include/linux/mux/consumer.h              |  32 ++
 include/linux/mux/driver.h                | 108 ++++++
 10 files changed, 735 insertions(+)
 create mode 100644 Documentation/ABI/testing/sysfs-class-mux
 create mode 100644 drivers/mux/Kconfig
 create mode 100644 drivers/mux/Makefile
 create mode 100644 drivers/mux/mux-core.c
 create mode 100644 include/linux/mux/consumer.h
 create mode 100644 include/linux/mux/driver.h

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-class-mux b/Documentation/ABI/testing/sysfs-class-mux
new file mode 100644
index 000000000000..8715f9c7bd4f
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-mux
@@ -0,0 +1,16 @@
+What:		/sys/class/mux/
+Date:		April 2017
+KernelVersion:	4.13
+Contact:	Peter Rosin <peda@axentia.se>
+Description:
+		The mux/ class sub-directory belongs to the Generic MUX
+		Framework and provides a sysfs interface for using MUX
+		controllers.
+
+What:		/sys/class/mux/muxchipN/
+Date:		April 2017
+KernelVersion:	4.13
+Contact:	Peter Rosin <peda@axentia.se>
+Description:
+		A /sys/class/mux/muxchipN directory is created for each
+		probed MUX chip where N is a simple enumeration.
diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt
index af08b4cd7968..40e4787c399c 100644
--- a/Documentation/driver-model/devres.txt
+++ b/Documentation/driver-model/devres.txt
@@ -337,6 +337,11 @@ MEM
 MFD
   devm_mfd_add_devices()
 
+MUX
+  devm_mux_chip_alloc()
+  devm_mux_chip_register()
+  devm_mux_control_get()
+
 PER-CPU MEM
   devm_alloc_percpu()
   devm_free_percpu()
diff --git a/MAINTAINERS b/MAINTAINERS
index b2cc32caabb9..a507bb4316ce 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8719,8 +8719,11 @@ F:	include/linux/spi/mmc_spi.h
 MULTIPLEXER SUBSYSTEM
 M:	Peter Rosin <peda@axentia.se>
 S:	Maintained
+F:	Documentation/ABI/testing/mux/sysfs-class-mux*
 F:	Documentation/devicetree/bindings/mux/
 F:	include/linux/dt-bindings/mux/
+F:	include/linux/mux/
+F:	drivers/mux/
 
 MULTISOUND SOUND DRIVER
 M:	Andrew Veliath <andrewtv@usa.net>
diff --git a/drivers/Kconfig b/drivers/Kconfig
index ba2901e76769..505c676fa9c7 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -206,4 +206,6 @@ source "drivers/fsi/Kconfig"
 
 source "drivers/tee/Kconfig"
 
+source "drivers/mux/Kconfig"
+
 endmenu
diff --git a/drivers/Makefile b/drivers/Makefile
index cfabd141dba2..dfdcda00bfe3 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -181,3 +181,4 @@ obj-$(CONFIG_NVMEM)		+= nvmem/
 obj-$(CONFIG_FPGA)		+= fpga/
 obj-$(CONFIG_FSI)		+= fsi/
 obj-$(CONFIG_TEE)		+= tee/
+obj-$(CONFIG_MULTIPLEXER)	+= mux/
diff --git a/drivers/mux/Kconfig b/drivers/mux/Kconfig
new file mode 100644
index 000000000000..23ab2cde83b1
--- /dev/null
+++ b/drivers/mux/Kconfig
@@ -0,0 +1,16 @@
+#
+# Multiplexer devices
+#
+
+menuconfig MULTIPLEXER
+	tristate "Multiplexer subsystem"
+	help
+	  Multiplexer controller subsystem. Multiplexers are used in a
+	  variety of settings, and this subsystem abstracts their use
+	  so that the rest of the kernel sees a common interface. When
+	  multiple parallel multiplexers are controlled by one single
+	  multiplexer controller, this subsystem also coordinates the
+	  multiplexer accesses.
+
+	  To compile the subsystem as a module, choose M here: the module will
+	  be called mux-core.
diff --git a/drivers/mux/Makefile b/drivers/mux/Makefile
new file mode 100644
index 000000000000..09f0299e109d
--- /dev/null
+++ b/drivers/mux/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for multiplexer devices.
+#
+
+obj-$(CONFIG_MULTIPLEXER)	+= mux-core.o
diff --git a/drivers/mux/mux-core.c b/drivers/mux/mux-core.c
new file mode 100644
index 000000000000..90b8995f07cb
--- /dev/null
+++ b/drivers/mux/mux-core.c
@@ -0,0 +1,547 @@
+/*
+ * Multiplexer subsystem
+ *
+ * Copyright (C) 2017 Axentia Technologies AB
+ *
+ * Author: Peter Rosin <peda@axentia.se>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#define pr_fmt(fmt) "mux-core: " fmt
+
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/idr.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mux/consumer.h>
+#include <linux/mux/driver.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/slab.h>
+
+/*
+ * The idle-as-is "state" is not an actual state that may be selected, it
+ * only implies that the state should not be changed. So, use that state
+ * as indication that the cached state of the multiplexer is unknown.
+ */
+#define MUX_CACHE_UNKNOWN MUX_IDLE_AS_IS
+
+static struct class mux_class = {
+	.name = "mux",
+	.owner = THIS_MODULE,
+};
+
+static DEFINE_IDA(mux_ida);
+
+static int __init mux_init(void)
+{
+	ida_init(&mux_ida);
+	return class_register(&mux_class);
+}
+
+static void __exit mux_exit(void)
+{
+	class_register(&mux_class);
+	ida_destroy(&mux_ida);
+}
+
+static void mux_chip_release(struct device *dev)
+{
+	struct mux_chip *mux_chip = to_mux_chip(dev);
+
+	ida_simple_remove(&mux_ida, mux_chip->id);
+	kfree(mux_chip);
+}
+
+static struct device_type mux_type = {
+	.name = "mux-chip",
+	.release = mux_chip_release,
+};
+
+/**
+ * mux_chip_alloc() - Allocate a mux-chip.
+ * @dev: The parent device implementing the mux interface.
+ * @controllers: The number of mux controllers to allocate for this chip.
+ * @sizeof_priv: Size of extra memory area for private use by the caller.
+ *
+ * After allocating the mux-chip with the desired number of mux controllers
+ * but before registering the chip, the mux driver is required to configure
+ * the number of valid mux states in the mux_chip->mux[N].states members and
+ * the desired idle state in the returned mux_chip->mux[N].idle_state members.
+ * The default idle state is MUX_IDLE_AS_IS. The mux driver also needs to
+ * provide a pointer to the operations struct in the mux_chip->ops member
+ * before registering the mux-chip with mux_chip_register.
+ *
+ * Return: A pointer to the new mux-chip, or an ERR_PTR with a negative errno.
+ */
+struct mux_chip *mux_chip_alloc(struct device *dev,
+				unsigned int controllers, size_t sizeof_priv)
+{
+	struct mux_chip *mux_chip;
+	int i;
+
+	if (WARN_ON(!dev || !controllers))
+		return ERR_PTR(-EINVAL);
+
+	mux_chip = kzalloc(sizeof(*mux_chip) +
+			   controllers * sizeof(*mux_chip->mux) +
+			   sizeof_priv, GFP_KERNEL);
+	if (!mux_chip)
+		return ERR_PTR(-ENOMEM);
+
+	mux_chip->mux = (struct mux_control *)(mux_chip + 1);
+	mux_chip->dev.class = &mux_class;
+	mux_chip->dev.type = &mux_type;
+	mux_chip->dev.parent = dev;
+	mux_chip->dev.of_node = dev->of_node;
+	dev_set_drvdata(&mux_chip->dev, mux_chip);
+
+	mux_chip->id = ida_simple_get(&mux_ida, 0, 0, GFP_KERNEL);
+	if (mux_chip->id < 0) {
+		int err = mux_chip->id;
+
+		pr_err("muxchipX failed to get a device id\n");
+		kfree(mux_chip);
+		return ERR_PTR(err);
+	}
+	dev_set_name(&mux_chip->dev, "muxchip%d", mux_chip->id);
+
+	mux_chip->controllers = controllers;
+	for (i = 0; i < controllers; ++i) {
+		struct mux_control *mux = &mux_chip->mux[i];
+
+		mux->chip = mux_chip;
+		sema_init(&mux->lock, 1);
+		mux->cached_state = MUX_CACHE_UNKNOWN;
+		mux->idle_state = MUX_IDLE_AS_IS;
+	}
+
+	device_initialize(&mux_chip->dev);
+
+	return mux_chip;
+}
+EXPORT_SYMBOL_GPL(mux_chip_alloc);
+
+static int mux_control_set(struct mux_control *mux, int state)
+{
+	int ret = mux->chip->ops->set(mux, state);
+
+	mux->cached_state = ret < 0 ? MUX_CACHE_UNKNOWN : state;
+
+	return ret;
+}
+
+/**
+ * mux_chip_register() - Register a mux-chip, thus readying the controllers
+ *			 for use.
+ * @mux_chip: The mux-chip to register.
+ *
+ * Do not retry registration of the same mux-chip on failure. You should
+ * instead put it away with mux_chip_free() and allocate a new one, if you
+ * for some reason would like to retry registration.
+ *
+ * Return: Zero on success or a negative errno on error.
+ */
+int mux_chip_register(struct mux_chip *mux_chip)
+{
+	int i;
+	int ret;
+
+	for (i = 0; i < mux_chip->controllers; ++i) {
+		struct mux_control *mux = &mux_chip->mux[i];
+
+		if (mux->idle_state == mux->cached_state)
+			continue;
+
+		ret = mux_control_set(mux, mux->idle_state);
+		if (ret < 0) {
+			dev_err(&mux_chip->dev, "unable to set idle state\n");
+			return ret;
+		}
+	}
+
+	ret = device_add(&mux_chip->dev);
+	if (ret < 0)
+		dev_err(&mux_chip->dev,
+			"device_add failed in %s: %d\n", __func__, ret);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(mux_chip_register);
+
+/**
+ * mux_chip_unregister() - Take the mux-chip off-line.
+ * @mux_chip: The mux-chip to unregister.
+ *
+ * mux_chip_unregister() reverses the effects of mux_chip_register().
+ * But not completely, you should not try to call mux_chip_register()
+ * on a mux-chip that has been registered before.
+ */
+void mux_chip_unregister(struct mux_chip *mux_chip)
+{
+	device_del(&mux_chip->dev);
+}
+EXPORT_SYMBOL_GPL(mux_chip_unregister);
+
+/**
+ * mux_chip_free() - Free the mux-chip for good.
+ * @mux_chip: The mux-chip to free.
+ *
+ * mux_chip_free() reverses the effects of mux_chip_alloc().
+ */
+void mux_chip_free(struct mux_chip *mux_chip)
+{
+	if (!mux_chip)
+		return;
+
+	put_device(&mux_chip->dev);
+}
+EXPORT_SYMBOL_GPL(mux_chip_free);
+
+static void devm_mux_chip_release(struct device *dev, void *res)
+{
+	struct mux_chip *mux_chip = *(struct mux_chip **)res;
+
+	mux_chip_free(mux_chip);
+}
+
+/**
+ * devm_mux_chip_alloc() - Resource-managed version of mux_chip_alloc().
+ * @dev: The parent device implementing the mux interface.
+ * @controllers: The number of mux controllers to allocate for this chip.
+ * @sizeof_priv: Size of extra memory area for private use by the caller.
+ *
+ * See mux_chip_alloc() for more details.
+ *
+ * Return: A pointer to the new mux-chip, or an ERR_PTR with a negative errno.
+ */
+struct mux_chip *devm_mux_chip_alloc(struct device *dev,
+				     unsigned int controllers,
+				     size_t sizeof_priv)
+{
+	struct mux_chip **ptr, *mux_chip;
+
+	ptr = devres_alloc(devm_mux_chip_release, sizeof(*ptr), GFP_KERNEL);
+	if (!ptr)
+		return ERR_PTR(-ENOMEM);
+
+	mux_chip = mux_chip_alloc(dev, controllers, sizeof_priv);
+	if (IS_ERR(mux_chip)) {
+		devres_free(ptr);
+		return mux_chip;
+	}
+
+	*ptr = mux_chip;
+	devres_add(dev, ptr);
+
+	return mux_chip;
+}
+EXPORT_SYMBOL_GPL(devm_mux_chip_alloc);
+
+static void devm_mux_chip_reg_release(struct device *dev, void *res)
+{
+	struct mux_chip *mux_chip = *(struct mux_chip **)res;
+
+	mux_chip_unregister(mux_chip);
+}
+
+/**
+ * devm_mux_chip_register() - Resource-managed version mux_chip_register().
+ * @dev: The parent device implementing the mux interface.
+ * @mux_chip: The mux-chip to register.
+ *
+ * See mux_chip_register() for more details.
+ *
+ * Return: Zero on success or a negative errno on error.
+ */
+int devm_mux_chip_register(struct device *dev,
+			   struct mux_chip *mux_chip)
+{
+	struct mux_chip **ptr;
+	int res;
+
+	ptr = devres_alloc(devm_mux_chip_reg_release, sizeof(*ptr), GFP_KERNEL);
+	if (!ptr)
+		return -ENOMEM;
+
+	res = mux_chip_register(mux_chip);
+	if (res) {
+		devres_free(ptr);
+		return res;
+	}
+
+	*ptr = mux_chip;
+	devres_add(dev, ptr);
+
+	return res;
+}
+EXPORT_SYMBOL_GPL(devm_mux_chip_register);
+
+/**
+ * mux_control_states() - Query the number of multiplexer states.
+ * @mux: The mux-control to query.
+ *
+ * Return: The number of multiplexer states.
+ */
+unsigned int mux_control_states(struct mux_control *mux)
+{
+	return mux->states;
+}
+EXPORT_SYMBOL_GPL(mux_control_states);
+
+/*
+ * The mux->lock must be down when calling this function.
+ */
+static int __mux_control_select(struct mux_control *mux, int state)
+{
+	int ret;
+
+	if (WARN_ON(state < 0 || state >= mux->states))
+		return -EINVAL;
+
+	if (mux->cached_state == state)
+		return 0;
+
+	ret = mux_control_set(mux, state);
+	if (ret >= 0)
+		return 0;
+
+	/* The mux update failed, try to revert if appropriate... */
+	if (mux->idle_state != MUX_IDLE_AS_IS)
+		mux_control_set(mux, mux->idle_state);
+
+	return ret;
+}
+
+/**
+ * mux_control_select() - Select the given multiplexer state.
+ * @mux: The mux-control to request a change of state from.
+ * @state: The new requested state.
+ *
+ * On successfully selecting the mux-control state, it will be locked until
+ * there is a call to mux_control_deselect(). If the mux-control is already
+ * selected when mux_control_select() is called, the caller will be blocked
+ * until mux_control_deselect() is called (by someone else).
+ *
+ * Therefore, make sure to call mux_control_deselect() when the operation is
+ * complete and the mux-control is free for others to use, but do not call
+ * mux_control_deselect() if mux_control_select() fails.
+ *
+ * Return: 0 when the mux-control state has the requested state or a negative
+ * errno on error.
+ */
+int mux_control_select(struct mux_control *mux, unsigned int state)
+{
+	int ret;
+
+	ret = down_killable(&mux->lock);
+	if (ret < 0)
+		return ret;
+
+	ret = __mux_control_select(mux, state);
+
+	if (ret < 0)
+		up(&mux->lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(mux_control_select);
+
+/**
+ * mux_control_try_select() - Try to select the given multiplexer state.
+ * @mux: The mux-control to request a change of state from.
+ * @state: The new requested state.
+ *
+ * On successfully selecting the mux-control state, it will be locked until
+ * mux_control_deselect() called.
+ *
+ * Therefore, make sure to call mux_control_deselect() when the operation is
+ * complete and the mux-control is free for others to use, but do not call
+ * mux_control_deselect() if mux_control_try_select() fails.
+ *
+ * Return: 0 when the mux-control state has the requested state or a negative
+ * errno on error. Specifically -EBUSY if the mux-control is contended.
+ */
+int mux_control_try_select(struct mux_control *mux, unsigned int state)
+{
+	int ret;
+
+	if (down_trylock(&mux->lock))
+		return -EBUSY;
+
+	ret = __mux_control_select(mux, state);
+
+	if (ret < 0)
+		up(&mux->lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(mux_control_try_select);
+
+/**
+ * mux_control_deselect() - Deselect the previously selected multiplexer state.
+ * @mux: The mux-control to deselect.
+ *
+ * It is required that a single call is made to mux_control_deselect() for
+ * each and every successful call made to either of mux_control_select() or
+ * mux_control_try_select().
+ *
+ * Return: 0 on success and a negative errno on error. An error can only
+ * occur if the mux has an idle state. Note that even if an error occurs, the
+ * mux-control is unlocked and is thus free for the next access.
+ */
+int mux_control_deselect(struct mux_control *mux)
+{
+	int ret = 0;
+
+	if (mux->idle_state != MUX_IDLE_AS_IS &&
+	    mux->idle_state != mux->cached_state)
+		ret = mux_control_set(mux, mux->idle_state);
+
+	up(&mux->lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(mux_control_deselect);
+
+static int of_dev_node_match(struct device *dev, const void *data)
+{
+	return dev->of_node == data;
+}
+
+static struct mux_chip *of_find_mux_chip_by_node(struct device_node *np)
+{
+	struct device *dev;
+
+	dev = class_find_device(&mux_class, NULL, np, of_dev_node_match);
+
+	return dev ? to_mux_chip(dev) : NULL;
+}
+
+/**
+ * mux_control_get() - Get the mux-control for a device.
+ * @dev: The device that needs a mux-control.
+ * @mux_name: The name identifying the mux-control.
+ *
+ * Return: A pointer to the mux-control, or an ERR_PTR with a negative errno.
+ */
+struct mux_control *mux_control_get(struct device *dev, const char *mux_name)
+{
+	struct device_node *np = dev->of_node;
+	struct of_phandle_args args;
+	struct mux_chip *mux_chip;
+	unsigned int controller;
+	int index = 0;
+	int ret;
+
+	if (mux_name) {
+		index = of_property_match_string(np, "mux-control-names",
+						 mux_name);
+		if (index < 0) {
+			dev_err(dev, "mux controller '%s' not found\n",
+				mux_name);
+			return ERR_PTR(index);
+		}
+	}
+
+	ret = of_parse_phandle_with_args(np,
+					 "mux-controls", "#mux-control-cells",
+					 index, &args);
+	if (ret) {
+		dev_err(dev, "%s: failed to get mux-control %s(%i)\n",
+			np->full_name, mux_name ?: "", index);
+		return ERR_PTR(ret);
+	}
+
+	mux_chip = of_find_mux_chip_by_node(args.np);
+	of_node_put(args.np);
+	if (!mux_chip)
+		return ERR_PTR(-EPROBE_DEFER);
+
+	if (args.args_count > 1 ||
+	    (!args.args_count && (mux_chip->controllers > 1))) {
+		dev_err(dev, "%s: wrong #mux-control-cells for %s\n",
+			np->full_name, args.np->full_name);
+		return ERR_PTR(-EINVAL);
+	}
+
+	controller = 0;
+	if (args.args_count)
+		controller = args.args[0];
+
+	if (controller >= mux_chip->controllers) {
+		dev_err(dev, "%s: bad mux controller %u specified in %s\n",
+			np->full_name, controller, args.np->full_name);
+		return ERR_PTR(-EINVAL);
+	}
+
+	get_device(&mux_chip->dev);
+	return &mux_chip->mux[controller];
+}
+EXPORT_SYMBOL_GPL(mux_control_get);
+
+/**
+ * mux_control_put() - Put away the mux-control for good.
+ * @mux: The mux-control to put away.
+ *
+ * mux_control_put() reverses the effects of mux_control_get().
+ */
+void mux_control_put(struct mux_control *mux)
+{
+	put_device(&mux->chip->dev);
+}
+EXPORT_SYMBOL_GPL(mux_control_put);
+
+static void devm_mux_control_release(struct device *dev, void *res)
+{
+	struct mux_control *mux = *(struct mux_control **)res;
+
+	mux_control_put(mux);
+}
+
+/**
+ * devm_mux_control_get() - Get the mux-control for a device, with resource
+ *			    management.
+ * @dev: The device that needs a mux-control.
+ * @mux_name: The name identifying the mux-control.
+ *
+ * Return: Pointer to the mux-control, or an ERR_PTR with a negative errno.
+ */
+struct mux_control *devm_mux_control_get(struct device *dev,
+					 const char *mux_name)
+{
+	struct mux_control **ptr, *mux;
+
+	ptr = devres_alloc(devm_mux_control_release, sizeof(*ptr), GFP_KERNEL);
+	if (!ptr)
+		return ERR_PTR(-ENOMEM);
+
+	mux = mux_control_get(dev, mux_name);
+	if (IS_ERR(mux)) {
+		devres_free(ptr);
+		return mux;
+	}
+
+	*ptr = mux;
+	devres_add(dev, ptr);
+
+	return mux;
+}
+EXPORT_SYMBOL_GPL(devm_mux_control_get);
+
+/*
+ * Using subsys_initcall instead of module_init here to try to ensure - for
+ * the non-modular case - that the subsystem is initialized when mux consumers
+ * and mux controllers start to use it.
+ * For the modular case, the ordering is ensured with module dependencies.
+ */
+subsys_initcall(mux_init);
+module_exit(mux_exit);
+
+MODULE_DESCRIPTION("Multiplexer subsystem");
+MODULE_AUTHOR("Peter Rosin <peda@axentia.se>");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/mux/consumer.h b/include/linux/mux/consumer.h
new file mode 100644
index 000000000000..5577e1b773c4
--- /dev/null
+++ b/include/linux/mux/consumer.h
@@ -0,0 +1,32 @@
+/*
+ * mux/consumer.h - definitions for the multiplexer consumer interface
+ *
+ * Copyright (C) 2017 Axentia Technologies AB
+ *
+ * Author: Peter Rosin <peda@axentia.se>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _LINUX_MUX_CONSUMER_H
+#define _LINUX_MUX_CONSUMER_H
+
+struct device;
+struct mux_control;
+
+unsigned int mux_control_states(struct mux_control *mux);
+int __must_check mux_control_select(struct mux_control *mux,
+				    unsigned int state);
+int __must_check mux_control_try_select(struct mux_control *mux,
+					unsigned int state);
+int mux_control_deselect(struct mux_control *mux);
+
+struct mux_control *mux_control_get(struct device *dev, const char *mux_name);
+void mux_control_put(struct mux_control *mux);
+
+struct mux_control *devm_mux_control_get(struct device *dev,
+					 const char *mux_name);
+
+#endif /* _LINUX_MUX_CONSUMER_H */
diff --git a/include/linux/mux/driver.h b/include/linux/mux/driver.h
new file mode 100644
index 000000000000..35c3579c3304
--- /dev/null
+++ b/include/linux/mux/driver.h
@@ -0,0 +1,108 @@
+/*
+ * mux/driver.h - definitions for the multiplexer driver interface
+ *
+ * Copyright (C) 2017 Axentia Technologies AB
+ *
+ * Author: Peter Rosin <peda@axentia.se>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _LINUX_MUX_DRIVER_H
+#define _LINUX_MUX_DRIVER_H
+
+#include <dt-bindings/mux/mux.h>
+#include <linux/device.h>
+#include <linux/semaphore.h>
+
+struct mux_chip;
+struct mux_control;
+
+/**
+ * struct mux_control_ops -	Mux controller operations for a mux chip.
+ * @set:			Set the state of the given mux controller.
+ */
+struct mux_control_ops {
+	int (*set)(struct mux_control *mux, int state);
+};
+
+/**
+ * struct mux_control -	Represents a mux controller.
+ * @lock:		Protects the mux controller state.
+ * @chip:		The mux chip that is handling this mux controller.
+ * @cached_state:	The current mux controller state, or -1 if none.
+ * @states:		The number of mux controller states.
+ * @idle_state:		The mux controller state to use when inactive, or one
+ *			of MUX_IDLE_AS_IS and MUX_IDLE_DISCONNECT.
+ *
+ * Mux drivers may only change @states and @idle_state, and may only do so
+ * between allocation and registration of the mux controller. Specifically,
+ * @cached_state is internal to the mux core and should never be written by
+ * mux drivers.
+ */
+struct mux_control {
+	struct semaphore lock; /* protects the state of the mux */
+
+	struct mux_chip *chip;
+	int cached_state;
+
+	unsigned int states;
+	int idle_state;
+};
+
+/**
+ * struct mux_chip -	Represents a chip holding mux controllers.
+ * @controllers:	Number of mux controllers handled by the chip.
+ * @mux:		Array of mux controllers that are handled.
+ * @dev:		Device structure.
+ * @id:			Used to identify the device internally.
+ * @ops:		Mux controller operations.
+ */
+struct mux_chip {
+	unsigned int controllers;
+	struct mux_control *mux;
+	struct device dev;
+	int id;
+
+	const struct mux_control_ops *ops;
+};
+
+#define to_mux_chip(x) container_of((x), struct mux_chip, dev)
+
+/**
+ * mux_chip_priv() - Get the extra memory reserved by mux_chip_alloc().
+ * @mux_chip: The mux-chip to get the private memory from.
+ *
+ * Return: Pointer to the private memory reserved by the allocator.
+ */
+static inline void *mux_chip_priv(struct mux_chip *mux_chip)
+{
+	return &mux_chip->mux[mux_chip->controllers];
+}
+
+struct mux_chip *mux_chip_alloc(struct device *dev,
+				unsigned int controllers, size_t sizeof_priv);
+int mux_chip_register(struct mux_chip *mux_chip);
+void mux_chip_unregister(struct mux_chip *mux_chip);
+void mux_chip_free(struct mux_chip *mux_chip);
+
+struct mux_chip *devm_mux_chip_alloc(struct device *dev,
+				     unsigned int controllers,
+				     size_t sizeof_priv);
+int devm_mux_chip_register(struct device *dev, struct mux_chip *mux_chip);
+
+/**
+ * mux_control_get_index() - Get the index of the given mux controller
+ * @mux: The mux-control to get the index for.
+ *
+ * Return: The index of the mux controller within the mux chip the mux
+ * controller is a part of.
+ */
+static inline unsigned int mux_control_get_index(struct mux_control *mux)
+{
+	return mux - mux->chip->mux;
+}
+
+#endif /* _LINUX_MUX_DRIVER_H */
-- 
cgit v1.2.3


From 8a848e754956dcbc35cd2fcf417f66dafa020c9e Mon Sep 17 00:00:00 2001
From: Peter Rosin <peda@axentia.se>
Date: Sun, 14 May 2017 21:51:08 +0200
Subject: iio: inkern: api for manipulating ext_info of iio channels

Extend the inkern api with functions for reading and writing ext_info
of iio channels.

Acked-by: Jonathan Cameron <jic23@kernel.org>
Signed-off-by: Peter Rosin <peda@axentia.se>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/iio/inkern.c         | 60 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/iio/consumer.h | 37 +++++++++++++++++++++++++++
 2 files changed, 97 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/iio/inkern.c b/drivers/iio/inkern.c
index 7a13535dc3e9..8292ad4435ea 100644
--- a/drivers/iio/inkern.c
+++ b/drivers/iio/inkern.c
@@ -869,3 +869,63 @@ err_unlock:
 	return ret;
 }
 EXPORT_SYMBOL_GPL(iio_write_channel_raw);
+
+unsigned int iio_get_channel_ext_info_count(struct iio_channel *chan)
+{
+	const struct iio_chan_spec_ext_info *ext_info;
+	unsigned int i = 0;
+
+	if (!chan->channel->ext_info)
+		return i;
+
+	for (ext_info = chan->channel->ext_info; ext_info->name; ext_info++)
+		++i;
+
+	return i;
+}
+EXPORT_SYMBOL_GPL(iio_get_channel_ext_info_count);
+
+static const struct iio_chan_spec_ext_info *iio_lookup_ext_info(
+						const struct iio_channel *chan,
+						const char *attr)
+{
+	const struct iio_chan_spec_ext_info *ext_info;
+
+	if (!chan->channel->ext_info)
+		return NULL;
+
+	for (ext_info = chan->channel->ext_info; ext_info->name; ++ext_info) {
+		if (!strcmp(attr, ext_info->name))
+			return ext_info;
+	}
+
+	return NULL;
+}
+
+ssize_t iio_read_channel_ext_info(struct iio_channel *chan,
+				  const char *attr, char *buf)
+{
+	const struct iio_chan_spec_ext_info *ext_info;
+
+	ext_info = iio_lookup_ext_info(chan, attr);
+	if (!ext_info)
+		return -EINVAL;
+
+	return ext_info->read(chan->indio_dev, ext_info->private,
+			      chan->channel, buf);
+}
+EXPORT_SYMBOL_GPL(iio_read_channel_ext_info);
+
+ssize_t iio_write_channel_ext_info(struct iio_channel *chan, const char *attr,
+				   const char *buf, size_t len)
+{
+	const struct iio_chan_spec_ext_info *ext_info;
+
+	ext_info = iio_lookup_ext_info(chan, attr);
+	if (!ext_info)
+		return -EINVAL;
+
+	return ext_info->write(chan->indio_dev, ext_info->private,
+			       chan->channel, buf, len);
+}
+EXPORT_SYMBOL_GPL(iio_write_channel_ext_info);
diff --git a/include/linux/iio/consumer.h b/include/linux/iio/consumer.h
index 47eeec3218b5..5e347a9805fd 100644
--- a/include/linux/iio/consumer.h
+++ b/include/linux/iio/consumer.h
@@ -312,4 +312,41 @@ int iio_read_channel_scale(struct iio_channel *chan, int *val,
 int iio_convert_raw_to_processed(struct iio_channel *chan, int raw,
 	int *processed, unsigned int scale);
 
+/**
+ * iio_get_channel_ext_info_count() - get number of ext_info attributes
+ *				      connected to the channel.
+ * @chan:		The channel being queried
+ *
+ * Returns the number of ext_info attributes
+ */
+unsigned int iio_get_channel_ext_info_count(struct iio_channel *chan);
+
+/**
+ * iio_read_channel_ext_info() - read ext_info attribute from a given channel
+ * @chan:		The channel being queried.
+ * @attr:		The ext_info attribute to read.
+ * @buf:		Where to store the attribute value. Assumed to hold
+ *			at least PAGE_SIZE bytes.
+ *
+ * Returns the number of bytes written to buf (perhaps w/o zero termination;
+ * it need not even be a string), or an error code.
+ */
+ssize_t iio_read_channel_ext_info(struct iio_channel *chan,
+				  const char *attr, char *buf);
+
+/**
+ * iio_write_channel_ext_info() - write ext_info attribute from a given channel
+ * @chan:		The channel being queried.
+ * @attr:		The ext_info attribute to read.
+ * @buf:		The new attribute value. Strings needs to be zero-
+ *			terminated, but the terminator should not be included
+ *			in the below len.
+ * @len:		The size of the new attribute value.
+ *
+ * Returns the number of accepted bytes, which should be the same as len.
+ * An error code can also be returned.
+ */
+ssize_t iio_write_channel_ext_info(struct iio_channel *chan, const char *attr,
+				   const char *buf, size_t len);
+
 #endif
-- 
cgit v1.2.3


From 201d7f47f34bd7cb19161d0426f13b141e381f30 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 31 May 2017 11:58:32 +0200
Subject: genirq: Handle NOAUTOEN interrupt setup proper

If an interrupt is marked NOAUTOEN then request_irq() installs the action,
but does not enable the interrupt via startup_irq().  The interrupt is
enabled via enable_irq() later from the driver. enable_irq() calls
irq_enable().

That means that for interrupts which have a irq_startup() callback this
callback is never invoked. Neither is irq_domain_activate_irq() invoked for
such interrupts.

If an interrupt depends on irq_startup() or irq_domain_activate_irq() then
the enable via irq_enable() is not enough.

Add a status flag IRQD_IRQ_STARTED_UP and use this to select the proper
mechanism in enable_irq(). Use the flag also to avoid pointless calls into
the low level functions.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Cc: dianders@chromium.org
Cc: jeffy <jeffy.chen@rock-chips.com>
Cc: Brian Norris <briannorris@chromium.org>
Cc: tfiga@chromium.org
Link: http://lkml.kernel.org/r/20170531100212.130986205@linutronix.de
---
 include/linux/irq.h |  6 +++++
 kernel/irq/chip.c   | 76 +++++++++++++++++++++++++++++++++++++----------------
 kernel/irq/manage.c | 12 ++++++---
 3 files changed, 69 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index f887351aa80e..94d1ad6ffdd4 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -216,6 +216,7 @@ enum {
 	IRQD_WAKEUP_ARMED		= (1 << 19),
 	IRQD_FORWARDED_TO_VCPU		= (1 << 20),
 	IRQD_AFFINITY_MANAGED		= (1 << 21),
+	IRQD_IRQ_STARTED		= (1 << 22),
 };
 
 #define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors)
@@ -329,6 +330,11 @@ static inline void irqd_clr_activated(struct irq_data *d)
 	__irqd_to_state(d) &= ~IRQD_ACTIVATED;
 }
 
+static inline bool irqd_is_started(struct irq_data *d)
+{
+	return __irqd_to_state(d) & IRQD_IRQ_STARTED;
+}
+
 #undef __irqd_to_state
 
 static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index c94da688ee9b..e0051d58c909 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -185,37 +185,64 @@ static void irq_state_set_masked(struct irq_desc *desc)
 	irqd_set(&desc->irq_data, IRQD_IRQ_MASKED);
 }
 
+static void irq_state_clr_started(struct irq_desc *desc)
+{
+	irqd_clear(&desc->irq_data, IRQD_IRQ_STARTED);
+}
+
+static void irq_state_set_started(struct irq_desc *desc)
+{
+	irqd_set(&desc->irq_data, IRQD_IRQ_STARTED);
+}
+
 int irq_startup(struct irq_desc *desc, bool resend)
 {
 	int ret = 0;
 
-	irq_state_clr_disabled(desc);
 	desc->depth = 0;
 
-	irq_domain_activate_irq(&desc->irq_data);
-	if (desc->irq_data.chip->irq_startup) {
-		ret = desc->irq_data.chip->irq_startup(&desc->irq_data);
-		irq_state_clr_masked(desc);
-	} else {
+	if (irqd_is_started(&desc->irq_data)) {
 		irq_enable(desc);
+	} else {
+		irq_domain_activate_irq(&desc->irq_data);
+		if (desc->irq_data.chip->irq_startup) {
+			ret = desc->irq_data.chip->irq_startup(&desc->irq_data);
+			irq_state_clr_disabled(desc);
+			irq_state_clr_masked(desc);
+		} else {
+			irq_enable(desc);
+		}
+		irq_state_set_started(desc);
 	}
+
 	if (resend)
 		check_irq_resend(desc);
+
 	return ret;
 }
 
+static void __irq_disable(struct irq_desc *desc, bool mask);
+
 void irq_shutdown(struct irq_desc *desc)
 {
-	irq_state_set_disabled(desc);
-	desc->depth = 1;
-	if (desc->irq_data.chip->irq_shutdown)
-		desc->irq_data.chip->irq_shutdown(&desc->irq_data);
-	else if (desc->irq_data.chip->irq_disable)
-		desc->irq_data.chip->irq_disable(&desc->irq_data);
-	else
-		desc->irq_data.chip->irq_mask(&desc->irq_data);
+	if (irqd_is_started(&desc->irq_data)) {
+		desc->depth = 1;
+		if (desc->irq_data.chip->irq_shutdown) {
+			desc->irq_data.chip->irq_shutdown(&desc->irq_data);
+			irq_state_set_disabled(desc);
+			irq_state_set_masked(desc);
+		} else {
+			__irq_disable(desc, true);
+		}
+		irq_state_clr_started(desc);
+	}
+	/*
+	 * This must be called even if the interrupt was never started up,
+	 * because the activation can happen before the interrupt is
+	 * available for request/startup. It has it's own state tracking so
+	 * it's safe to call it unconditionally.
+	 */
 	irq_domain_deactivate_irq(&desc->irq_data);
-	irq_state_set_masked(desc);
 }
 
 void irq_enable(struct irq_desc *desc)
@@ -228,6 +255,17 @@ void irq_enable(struct irq_desc *desc)
 	irq_state_clr_masked(desc);
 }
 
+static void __irq_disable(struct irq_desc *desc, bool mask)
+{
+	irq_state_set_disabled(desc);
+	if (desc->irq_data.chip->irq_disable) {
+		desc->irq_data.chip->irq_disable(&desc->irq_data);
+		irq_state_set_masked(desc);
+	} else if (mask) {
+		mask_irq(desc);
+	}
+}
+
 /**
  * irq_disable - Mark interrupt disabled
  * @desc:	irq descriptor which should be disabled
@@ -250,13 +288,7 @@ void irq_enable(struct irq_desc *desc)
  */
 void irq_disable(struct irq_desc *desc)
 {
-	irq_state_set_disabled(desc);
-	if (desc->irq_data.chip->irq_disable) {
-		desc->irq_data.chip->irq_disable(&desc->irq_data);
-		irq_state_set_masked(desc);
-	} else if (irq_settings_disable_unlazy(desc)) {
-		mask_irq(desc);
-	}
+	__irq_disable(desc, irq_settings_disable_unlazy(desc));
 }
 
 void irq_percpu_enable(struct irq_desc *desc, unsigned int cpu)
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 070be980c37a..57056109f176 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -533,9 +533,15 @@ void __enable_irq(struct irq_desc *desc)
 			goto err_out;
 		/* Prevent probing on this irq: */
 		irq_settings_set_noprobe(desc);
-		irq_enable(desc);
-		check_irq_resend(desc);
-		/* fall-through */
+		/*
+		 * Call irq_startup() not irq_enable() here because the
+		 * interrupt might be marked NOAUTOEN. So irq_startup()
+		 * needs to be invoked when it gets enabled the first
+		 * time. If it was already started up, then irq_startup()
+		 * will invoke irq_enable() under the hood.
+		 */
+		irq_startup(desc, true);
+		break;
 	}
 	default:
 		desc->depth--;
-- 
cgit v1.2.3


From 7994200ce69a3873dfa2641254a13bb0a40056f3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 3 Jun 2017 21:00:59 +0200
Subject: ia64: Remove HAVE_ARCH_COPY_SIGINFO

Since ia64 defines __ARCH_SI_PREAMBLE_SIZE it can just use the generic
copy_siginfo implementation, which is identical to the architecture
specific one.

With that support for HAVE_ARCH_COPY_SIGINFO can go away entirely.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-arch@vger.kernel.org
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-ia64@vger.kernel.org
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: sparclinux@vger.kernel.org
Cc: "David S. Miller" <davem@davemloft.net>
Link: http://lkml.kernel.org/r/20170603190102.28866-3-hch@lst.de
---
 arch/ia64/include/asm/siginfo.h      | 22 ----------------------
 arch/ia64/include/uapi/asm/siginfo.h |  1 -
 include/linux/signal.h               |  7 +------
 3 files changed, 1 insertion(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/include/asm/siginfo.h b/arch/ia64/include/asm/siginfo.h
index 6f2e2dd0f28f..dd2f2fce976b 100644
--- a/arch/ia64/include/asm/siginfo.h
+++ b/arch/ia64/include/asm/siginfo.h
@@ -1,23 +1 @@
-/*
- * Based on <asm-i386/siginfo.h>.
- *
- * Modified 1998-2002
- *	David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co
- */
-#ifndef _ASM_IA64_SIGINFO_H
-#define _ASM_IA64_SIGINFO_H
-
-#include <linux/string.h>
 #include <uapi/asm/siginfo.h>
-
-static inline void
-copy_siginfo (siginfo_t *to, siginfo_t *from)
-{
-	if (from->si_code < 0)
-		memcpy(to, from, sizeof(siginfo_t));
-	else
-		/* _sigchld is currently the largest know union member */
-		memcpy(to, from, 4*sizeof(int) + sizeof(from->_sifields._sigchld));
-}
-
-#endif /* _ASM_IA64_SIGINFO_H */
diff --git a/arch/ia64/include/uapi/asm/siginfo.h b/arch/ia64/include/uapi/asm/siginfo.h
index f72bf0172bb2..4694c64252d6 100644
--- a/arch/ia64/include/uapi/asm/siginfo.h
+++ b/arch/ia64/include/uapi/asm/siginfo.h
@@ -11,7 +11,6 @@
 #define __ARCH_SI_PREAMBLE_SIZE	(4 * sizeof(int))
 
 #define HAVE_ARCH_SIGINFO_T
-#define HAVE_ARCH_COPY_SIGINFO
 #define HAVE_ARCH_COPY_SIGINFO_TO_USER
 
 #include <asm-generic/siginfo.h>
diff --git a/include/linux/signal.h b/include/linux/signal.h
index 1f5a16620693..80c7418be359 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -3,16 +3,13 @@
 
 #include <linux/bug.h>
 #include <linux/signal_types.h>
+#include <linux/string.h>
 
 struct task_struct;
 
 /* for sysctl */
 extern int print_fatal_signals;
 
-#ifndef HAVE_ARCH_COPY_SIGINFO
-
-#include <linux/string.h>
-
 static inline void copy_siginfo(struct siginfo *to, struct siginfo *from)
 {
 	if (from->si_code < 0)
@@ -22,8 +19,6 @@ static inline void copy_siginfo(struct siginfo *to, struct siginfo *from)
 		memcpy(to, from, __ARCH_SI_PREAMBLE_SIZE + sizeof(from->_sifields._sigchld));
 }
 
-#endif
-
 /*
  * Define some primitives to manipulate sigset_t.
  */
-- 
cgit v1.2.3


From 31ea70e0308b73a1b862bd17c06efc3cbcfd2016 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 3 Jun 2017 21:01:00 +0200
Subject: posix-timers: Move the do_schedule_next_timer declaration

Having it in asm-generic/siginfo.h doesn't make any sense as it is in no way
architecture specific.  Move it to posix-timers.h instead.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-arch@vger.kernel.org
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-ia64@vger.kernel.org
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: sparclinux@vger.kernel.org
Cc: "David S. Miller" <davem@davemloft.net>
Link: http://lkml.kernel.org/r/20170603190102.28866-4-hch@lst.de
---
 include/asm-generic/siginfo.h | 1 -
 include/linux/posix-timers.h  | 3 +++
 kernel/signal.c               | 1 +
 3 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/asm-generic/siginfo.h b/include/asm-generic/siginfo.h
index a2508a8f9a9c..5a9394763a66 100644
--- a/include/asm-generic/siginfo.h
+++ b/include/asm-generic/siginfo.h
@@ -15,7 +15,6 @@
 #define __SI_CODE(T,N)	((T) | ((N) & 0xffff))
 
 struct siginfo;
-void do_schedule_next_timer(struct siginfo *info);
 
 extern int copy_siginfo_to_user(struct siginfo __user *to, const struct siginfo *from);
 
diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 34e893a75771..8929f7e8f452 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -7,6 +7,7 @@
 #include <linux/timex.h>
 #include <linux/alarmtimer.h>
 
+struct siginfo;
 
 struct cpu_timer_list {
 	struct list_head entry;
@@ -120,4 +121,6 @@ long clock_nanosleep_restart(struct restart_block *restart_block);
 
 void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new);
 
+void do_schedule_next_timer(struct siginfo *info);
+
 #endif
diff --git a/kernel/signal.c b/kernel/signal.c
index ca92bcfeb322..1f85c843be8e 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -39,6 +39,7 @@
 #include <linux/compat.h>
 #include <linux/cn_proc.h>
 #include <linux/compiler.h>
+#include <linux/posix-timers.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/signal.h>
-- 
cgit v1.2.3


From b9253a43370e8f3c46c0ee24b04fa2ffec37b7c0 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 3 Jun 2017 21:01:01 +0200
Subject: signal: Move copy_siginfo_to_user to <linux/signal.h>

Having it in asm-generic/siginfo.h doesn't make any sense as it is in no way
architecture specific.  Move it to signal.h instead where several related
functions already reside.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-arch@vger.kernel.org
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-ia64@vger.kernel.org
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: sparclinux@vger.kernel.org
Cc: "David S. Miller" <davem@davemloft.net>
Link: http://lkml.kernel.org/r/20170603190102.28866-5-hch@lst.de
---
 include/asm-generic/siginfo.h | 4 ----
 include/linux/signal.h        | 2 ++
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/asm-generic/siginfo.h b/include/asm-generic/siginfo.h
index 5a9394763a66..31268a5bf63e 100644
--- a/include/asm-generic/siginfo.h
+++ b/include/asm-generic/siginfo.h
@@ -14,8 +14,4 @@
 #define __SI_SYS	(7 << 16)
 #define __SI_CODE(T,N)	((T) | ((N) & 0xffff))
 
-struct siginfo;
-
-extern int copy_siginfo_to_user(struct siginfo __user *to, const struct siginfo *from);
-
 #endif
diff --git a/include/linux/signal.h b/include/linux/signal.h
index 80c7418be359..a39feddd71ba 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -19,6 +19,8 @@ static inline void copy_siginfo(struct siginfo *to, struct siginfo *from)
 		memcpy(to, from, __ARCH_SI_PREAMBLE_SIZE + sizeof(from->_sifields._sigchld));
 }
 
+int copy_siginfo_to_user(struct siginfo __user *to, const struct siginfo *from);
+
 /*
  * Define some primitives to manipulate sigset_t.
  */
-- 
cgit v1.2.3


From 3a06c7ac24f9f24ec059cd77c2dbdf7fbfd0aaaf Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 30 May 2017 23:15:38 +0200
Subject: posix-clocks: Remove interval timer facility and mmap/fasync
 callbacks

The only user of this facility is ptp_clock, which does not implement any of
those functions.

Remove them to prevent accidental users. Especially the interval timer
interfaces are now more or less impossible to implement because the
necessary infrastructure has been confined to the core code. Aside of that
it's really complex to make these callbacks implemented according to spec
as the alarm timer implementation demonstrates. If at all then a nanosleep
callback might be a reasonable extension. For now keep just what ptp_clock
needs.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: John Stultz <john.stultz@linaro.org>
Link: http://lkml.kernel.org/r/20170530211656.145036286@linutronix.de
---
 include/linux/posix-clock.h |  22 ---------
 kernel/time/posix-clock.c   | 113 --------------------------------------------
 2 files changed, 135 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/posix-clock.h b/include/linux/posix-clock.h
index 83b22ae9ae12..38d8225510f1 100644
--- a/include/linux/posix-clock.h
+++ b/include/linux/posix-clock.h
@@ -42,12 +42,6 @@ struct posix_clock;
  * @clock_gettime:  Read the current time
  * @clock_getres:   Get the clock resolution
  * @clock_settime:  Set the current time value
- * @timer_create:   Create a new timer
- * @timer_delete:   Remove a previously created timer
- * @timer_gettime:  Get remaining time and interval of a timer
- * @timer_settime: Set a timer's initial expiration and interval
- * @fasync:         Optional character device fasync method
- * @mmap:           Optional character device mmap method
  * @open:           Optional character device open method
  * @release:        Optional character device release method
  * @ioctl:          Optional character device ioctl method
@@ -66,28 +60,12 @@ struct posix_clock_operations {
 	int  (*clock_settime)(struct posix_clock *pc,
 			      const struct timespec64 *ts);
 
-	int  (*timer_create) (struct posix_clock *pc, struct k_itimer *kit);
-
-	int  (*timer_delete) (struct posix_clock *pc, struct k_itimer *kit);
-
-	void (*timer_gettime)(struct posix_clock *pc,
-			      struct k_itimer *kit, struct itimerspec64 *tsp);
-
-	int  (*timer_settime)(struct posix_clock *pc,
-			      struct k_itimer *kit, int flags,
-			      struct itimerspec64 *tsp, struct itimerspec64 *old);
 	/*
 	 * Optional character device methods:
 	 */
-	int     (*fasync)  (struct posix_clock *pc,
-			    int fd, struct file *file, int on);
-
 	long    (*ioctl)   (struct posix_clock *pc,
 			    unsigned int cmd, unsigned long arg);
 
-	int     (*mmap)    (struct posix_clock *pc,
-			    struct vm_area_struct *vma);
-
 	int     (*open)    (struct posix_clock *pc, fmode_t f_mode);
 
 	uint    (*poll)    (struct posix_clock *pc,
diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c
index 7e453005e078..bd4fb785652f 100644
--- a/kernel/time/posix-clock.c
+++ b/kernel/time/posix-clock.c
@@ -82,38 +82,6 @@ static unsigned int posix_clock_poll(struct file *fp, poll_table *wait)
 	return result;
 }
 
-static int posix_clock_fasync(int fd, struct file *fp, int on)
-{
-	struct posix_clock *clk = get_posix_clock(fp);
-	int err = 0;
-
-	if (!clk)
-		return -ENODEV;
-
-	if (clk->ops.fasync)
-		err = clk->ops.fasync(clk, fd, fp, on);
-
-	put_posix_clock(clk);
-
-	return err;
-}
-
-static int posix_clock_mmap(struct file *fp, struct vm_area_struct *vma)
-{
-	struct posix_clock *clk = get_posix_clock(fp);
-	int err = -ENODEV;
-
-	if (!clk)
-		return -ENODEV;
-
-	if (clk->ops.mmap)
-		err = clk->ops.mmap(clk, vma);
-
-	put_posix_clock(clk);
-
-	return err;
-}
-
 static long posix_clock_ioctl(struct file *fp,
 			      unsigned int cmd, unsigned long arg)
 {
@@ -199,8 +167,6 @@ static const struct file_operations posix_clock_file_operations = {
 	.unlocked_ioctl	= posix_clock_ioctl,
 	.open		= posix_clock_open,
 	.release	= posix_clock_release,
-	.fasync		= posix_clock_fasync,
-	.mmap		= posix_clock_mmap,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= posix_clock_compat_ioctl,
 #endif
@@ -359,88 +325,9 @@ out:
 	return err;
 }
 
-static int pc_timer_create(struct k_itimer *kit)
-{
-	clockid_t id = kit->it_clock;
-	struct posix_clock_desc cd;
-	int err;
-
-	err = get_clock_desc(id, &cd);
-	if (err)
-		return err;
-
-	if (cd.clk->ops.timer_create)
-		err = cd.clk->ops.timer_create(cd.clk, kit);
-	else
-		err = -EOPNOTSUPP;
-
-	put_clock_desc(&cd);
-
-	return err;
-}
-
-static int pc_timer_delete(struct k_itimer *kit)
-{
-	clockid_t id = kit->it_clock;
-	struct posix_clock_desc cd;
-	int err;
-
-	err = get_clock_desc(id, &cd);
-	if (err)
-		return err;
-
-	if (cd.clk->ops.timer_delete)
-		err = cd.clk->ops.timer_delete(cd.clk, kit);
-	else
-		err = -EOPNOTSUPP;
-
-	put_clock_desc(&cd);
-
-	return err;
-}
-
-static void pc_timer_gettime(struct k_itimer *kit, struct itimerspec64 *ts)
-{
-	clockid_t id = kit->it_clock;
-	struct posix_clock_desc cd;
-
-	if (get_clock_desc(id, &cd))
-		return;
-
-	if (cd.clk->ops.timer_gettime)
-		cd.clk->ops.timer_gettime(cd.clk, kit, ts);
-
-	put_clock_desc(&cd);
-}
-
-static int pc_timer_settime(struct k_itimer *kit, int flags,
-			    struct itimerspec64 *ts, struct itimerspec64 *old)
-{
-	clockid_t id = kit->it_clock;
-	struct posix_clock_desc cd;
-	int err;
-
-	err = get_clock_desc(id, &cd);
-	if (err)
-		return err;
-
-	if (cd.clk->ops.timer_settime)
-		err = cd.clk->ops.timer_settime(cd.clk, kit, flags, ts, old);
-	else
-		err = -EOPNOTSUPP;
-
-	put_clock_desc(&cd);
-
-	return err;
-}
-
 const struct k_clock clock_posix_dynamic = {
 	.clock_getres	= pc_clock_getres,
 	.clock_set	= pc_clock_settime,
 	.clock_get	= pc_clock_gettime,
 	.clock_adj	= pc_clock_adjtime,
-	.timer_create	= pc_timer_create,
-	.timer_set	= pc_timer_settime,
-	.timer_del	= pc_timer_delete,
-	.timer_get	= pc_timer_gettime,
 };
-- 
cgit v1.2.3


From 03676b41a8ffcbb1f6d9eb6ca754b2bfa431fd59 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 30 May 2017 23:15:40 +0200
Subject: posix-timers: Cleanup struct k_itimer

As a preparation for further changes, cleanup the formatting of the
k_itimer structure and add kernel doc comments.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: John Stultz <john.stultz@linaro.org>
Link: http://lkml.kernel.org/r/20170530211656.316574129@linutronix.de
---
 include/linux/posix-timers.h | 61 +++++++++++++++++++++++++++++---------------
 1 file changed, 40 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 8929f7e8f452..e06062c3967b 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -49,35 +49,54 @@ struct cpu_timer_list {
 #define FD_TO_CLOCKID(fd)	((~(clockid_t) (fd) << 3) | CLOCKFD)
 #define CLOCKID_TO_FD(clk)	((unsigned int) ~((clk) >> 3))
 
-/* POSIX.1b interval timer structure. */
-struct k_itimer {
-	struct list_head list;		/* free/ allocate list */
-	struct hlist_node t_hash;
-	spinlock_t it_lock;
-	clockid_t it_clock;		/* which timer type */
-	timer_t it_id;			/* timer id */
-	int it_overrun;			/* overrun on pending signal  */
-	int it_overrun_last;		/* overrun on last delivered signal */
-	int it_requeue_pending;		/* waiting to requeue this timer */
 #define REQUEUE_PENDING 1
-	int it_sigev_notify;		/* notify word of sigevent struct */
-	struct signal_struct *it_signal;
+
+/**
+ * struct k_itimer - POSIX.1b interval timer structure.
+ * @list:		List head for binding the timer to signals->posix_timers
+ * @t_hash:		Entry in the posix timer hash table
+ * @it_lock:		Lock protecting the timer
+ * @it_clock:		The posix timer clock id
+ * @it_id:		The posix timer id for identifying the timer
+ * @it_overrun:		The overrun counter for pending signals
+ * @it_overrun_last:	The overrun at the time of the last delivered signal
+ * @it_requeue_pending:	Indicator that timer waits for being requeued on
+ *			signal delivery
+ * @it_sigev_notify:	The notify word of sigevent struct for signal delivery
+ * @it_signal:		Pointer to the creators signal struct
+ * @it_pid:		The pid of the process/task targeted by the signal
+ * @it_process:		The task to wakeup on clock_nanosleep (CPU timers)
+ * @sigq:		Pointer to preallocated sigqueue
+ * @it:			Union representing the various posix timer type
+ *			internals. Also used for rcu freeing the timer.
+ */
+struct k_itimer {
+	struct list_head	list;
+	struct hlist_node	t_hash;
+	spinlock_t		it_lock;
+	clockid_t		it_clock;
+	timer_t			it_id;
+	int			it_overrun;
+	int			it_overrun_last;
+	int			it_requeue_pending;
+	int			it_sigev_notify;
+	struct signal_struct	*it_signal;
 	union {
-		struct pid *it_pid;	/* pid of process to send signal to */
-		struct task_struct *it_process;	/* for clock_nanosleep */
+		struct pid		*it_pid;
+		struct task_struct	*it_process;
 	};
-	struct sigqueue *sigq;		/* signal queue entry. */
+	struct sigqueue		*sigq;
 	union {
 		struct {
-			struct hrtimer timer;
-			ktime_t interval;
+			struct hrtimer	timer;
+			ktime_t		interval;
 		} real;
-		struct cpu_timer_list cpu;
+		struct cpu_timer_list	cpu;
 		struct {
-			struct alarm alarmtimer;
-			ktime_t interval;
+			struct alarm	alarmtimer;
+			ktime_t		interval;
 		} alarm;
-		struct rcu_head rcu;
+		struct rcu_head		rcu;
 	} it;
 };
 
-- 
cgit v1.2.3


From bab0aae9dcba9466dcc968b8bd21914f8f691631 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 30 May 2017 23:15:41 +0200
Subject: posix-timers: Move posix-timer internals to core

None of these declarations is required outside of kernel/time. Move them to
an internal header.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Christoph Hellwig <hch@lst.de>
Link: http://lkml.kernel.org/r/20170530211656.394803853@linutronix.de
---
 include/linux/posix-timers.h   | 30 ------------------------------
 kernel/time/alarmtimer.c       |  2 ++
 kernel/time/posix-clock.c      |  2 ++
 kernel/time/posix-cpu-timers.c |  2 ++
 kernel/time/posix-timers.c     |  1 +
 kernel/time/posix-timers.h     | 29 +++++++++++++++++++++++++++++
 6 files changed, 36 insertions(+), 30 deletions(-)
 create mode 100644 kernel/time/posix-timers.h

(limited to 'include/linux')

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index e06062c3967b..a372e7e3a396 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -100,36 +100,6 @@ struct k_itimer {
 	} it;
 };
 
-struct k_clock {
-	int (*clock_getres) (const clockid_t which_clock, struct timespec64 *tp);
-	int (*clock_set) (const clockid_t which_clock,
-			  const struct timespec64 *tp);
-	int (*clock_get) (const clockid_t which_clock, struct timespec64 *tp);
-	int (*clock_adj) (const clockid_t which_clock, struct timex *tx);
-	int (*timer_create) (struct k_itimer *timer);
-	int (*nsleep) (const clockid_t which_clock, int flags,
-		       struct timespec64 *, struct timespec __user *);
-	long (*nsleep_restart) (struct restart_block *restart_block);
-	int (*timer_set) (struct k_itimer *timr, int flags,
-			  struct itimerspec64 *new_setting,
-			  struct itimerspec64 *old_setting);
-	int (*timer_del) (struct k_itimer *timr);
-#define TIMER_RETRY 1
-	void (*timer_get) (struct k_itimer *timr,
-			   struct itimerspec64 *cur_setting);
-};
-
-extern const struct k_clock clock_posix_cpu;
-extern const struct k_clock clock_posix_dynamic;
-extern const struct k_clock clock_process;
-extern const struct k_clock clock_thread;
-extern const struct k_clock alarm_clock;
-
-/* function to call to trigger timer event */
-int posix_timer_event(struct k_itimer *timr, int si_private);
-
-void posix_cpu_timer_schedule(struct k_itimer *timer);
-
 void run_posix_cpu_timers(struct task_struct *task);
 void posix_cpu_timers_exit(struct task_struct *task);
 void posix_cpu_timers_exit_group(struct task_struct *task);
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 2a8675f9aac5..36855d675da5 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -28,6 +28,8 @@
 #include <linux/workqueue.h>
 #include <linux/freezer.h>
 
+#include "posix-timers.h"
+
 #define CREATE_TRACE_POINTS
 #include <trace/events/alarmtimer.h>
 
diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c
index bd4fb785652f..17cdc554c9fe 100644
--- a/kernel/time/posix-clock.c
+++ b/kernel/time/posix-clock.c
@@ -25,6 +25,8 @@
 #include <linux/syscalls.h>
 #include <linux/uaccess.h>
 
+#include "posix-timers.h"
+
 static void delete_clock(struct kref *kref);
 
 /*
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index c99434739fd5..a77a792f2570 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -13,6 +13,8 @@
 #include <linux/tick.h>
 #include <linux/workqueue.h>
 
+#include "posix-timers.h"
+
 /*
  * Called after updating RLIMIT_CPU to run cpu timer and update
  * tsk->signal->cputime_expires expiration cache if necessary. Needs
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index b60b655dfbcd..dee6a0d911d4 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -51,6 +51,7 @@
 #include <linux/hashtable.h>
 
 #include "timekeeping.h"
+#include "posix-timers.h"
 
 /*
  * Management arrays for POSIX timers. Timers are now kept in static hash table
diff --git a/kernel/time/posix-timers.h b/kernel/time/posix-timers.h
new file mode 100644
index 000000000000..ad2dbd29b389
--- /dev/null
+++ b/kernel/time/posix-timers.h
@@ -0,0 +1,29 @@
+#define TIMER_RETRY 1
+
+struct k_clock {
+	int (*clock_getres) (const clockid_t which_clock, struct timespec64 *tp);
+	int (*clock_set) (const clockid_t which_clock,
+			  const struct timespec64 *tp);
+	int (*clock_get) (const clockid_t which_clock, struct timespec64 *tp);
+	int (*clock_adj) (const clockid_t which_clock, struct timex *tx);
+	int (*timer_create) (struct k_itimer *timer);
+	int (*nsleep) (const clockid_t which_clock, int flags,
+		       struct timespec64 *, struct timespec __user *);
+	long (*nsleep_restart) (struct restart_block *restart_block);
+	int (*timer_set) (struct k_itimer *timr, int flags,
+			  struct itimerspec64 *new_setting,
+			  struct itimerspec64 *old_setting);
+	int (*timer_del) (struct k_itimer *timr);
+	void (*timer_get) (struct k_itimer *timr,
+			   struct itimerspec64 *cur_setting);
+};
+
+extern const struct k_clock clock_posix_cpu;
+extern const struct k_clock clock_posix_dynamic;
+extern const struct k_clock clock_process;
+extern const struct k_clock clock_thread;
+extern const struct k_clock alarm_clock;
+
+int posix_timer_event(struct k_itimer *timr, int si_private);
+
+void posix_cpu_timer_schedule(struct k_itimer *timer);
-- 
cgit v1.2.3


From 80105cd0e62ba8a2caf8eebd52f42952c7c04046 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 30 May 2017 23:15:43 +0200
Subject: posix-timers: Move interval out of the union

Preparatory patch to unify the alarm timer and hrtimer based posix interval
timer handling.

The interval is used as a criteria for rearming decisions so moving it out
of the clock specific data structures allows later unification.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: John Stultz <john.stultz@linaro.org>
Link: http://lkml.kernel.org/r/20170530211656.563922908@linutronix.de
---
 include/linux/posix-timers.h |  4 ++--
 kernel/time/alarmtimer.c     | 13 ++++++-------
 kernel/time/posix-timers.c   | 20 ++++++++++----------
 3 files changed, 18 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index a372e7e3a396..908048f488ae 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -63,6 +63,7 @@ struct cpu_timer_list {
  * @it_requeue_pending:	Indicator that timer waits for being requeued on
  *			signal delivery
  * @it_sigev_notify:	The notify word of sigevent struct for signal delivery
+ * @it_interval:	The interval for periodic timers
  * @it_signal:		Pointer to the creators signal struct
  * @it_pid:		The pid of the process/task targeted by the signal
  * @it_process:		The task to wakeup on clock_nanosleep (CPU timers)
@@ -80,6 +81,7 @@ struct k_itimer {
 	int			it_overrun_last;
 	int			it_requeue_pending;
 	int			it_sigev_notify;
+	ktime_t			it_interval;
 	struct signal_struct	*it_signal;
 	union {
 		struct pid		*it_pid;
@@ -89,12 +91,10 @@ struct k_itimer {
 	union {
 		struct {
 			struct hrtimer	timer;
-			ktime_t		interval;
 		} real;
 		struct cpu_timer_list	cpu;
 		struct {
 			struct alarm	alarmtimer;
-			ktime_t		interval;
 		} alarm;
 		struct rcu_head		rcu;
 	} it;
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 36855d675da5..5b8cf4b61854 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -527,9 +527,8 @@ static enum alarmtimer_restart alarm_handle_timer(struct alarm *alarm,
 	}
 
 	/* Re-add periodic timers */
-	if (ptr->it.alarm.interval) {
-		ptr->it_overrun += alarm_forward(alarm, now,
-						ptr->it.alarm.interval);
+	if (ptr->it_interval) {
+		ptr->it_overrun += alarm_forward(alarm, now, ptr->it_interval);
 		result = ALARMTIMER_RESTART;
 	}
 	spin_unlock_irqrestore(&ptr->it_lock, flags);
@@ -613,7 +612,7 @@ static void alarm_timer_get(struct k_itimer *timr,
 		cur_setting->it_value.tv_nsec = 0;
 	}
 
-	cur_setting->it_interval = ktime_to_timespec64(timr->it.alarm.interval);
+	cur_setting->it_interval = ktime_to_timespec64(timr->it_interval);
 }
 
 /**
@@ -662,14 +661,14 @@ static int alarm_timer_set(struct k_itimer *timr, int flags,
 		return TIMER_RETRY;
 
 	/* start the timer */
-	timr->it.alarm.interval = timespec64_to_ktime(new_setting->it_interval);
+	timr->it_interval = timespec64_to_ktime(new_setting->it_interval);
 
 	/*
 	 * Rate limit to the tick as a hot fix to prevent DOS. Will be
 	 * mopped up later.
 	 */
-	if (timr->it.alarm.interval < TICK_NSEC)
-		timr->it.alarm.interval = TICK_NSEC;
+	if (timr->it_interval < TICK_NSEC)
+		timr->it_interval = TICK_NSEC;
 
 	exp = timespec64_to_ktime(new_setting->it_value);
 	/* Convert (if necessary) to absolute time */
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 79a00e0f1ef9..7dd992cc7105 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -285,12 +285,12 @@ static void schedule_next_timer(struct k_itimer *timr)
 {
 	struct hrtimer *timer = &timr->it.real.timer;
 
-	if (timr->it.real.interval == 0)
+	if (!timr->it_interval)
 		return;
 
 	timr->it_overrun += (unsigned int) hrtimer_forward(timer,
 						timer->base->get_time(),
-						timr->it.real.interval);
+						timr->it_interval);
 	hrtimer_restart(timer);
 }
 
@@ -375,7 +375,7 @@ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer)
 	timr = container_of(timer, struct k_itimer, it.real.timer);
 	spin_lock_irqsave(&timr->it_lock, flags);
 
-	if (timr->it.real.interval != 0)
+	if (timr->it_interval != 0)
 		si_private = ++timr->it_requeue_pending;
 
 	if (posix_timer_event(timr, si_private)) {
@@ -384,7 +384,7 @@ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer)
 		 * we will not get a call back to restart it AND
 		 * it should be restarted.
 		 */
-		if (timr->it.real.interval != 0) {
+		if (timr->it_interval != 0) {
 			ktime_t now = hrtimer_cb_get_time(timer);
 
 			/*
@@ -413,13 +413,13 @@ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer)
 			{
 				ktime_t kj = NSEC_PER_SEC / HZ;
 
-				if (timr->it.real.interval < kj)
+				if (timr->it_interval < kj)
 					now = ktime_add(now, kj);
 			}
 #endif
 			timr->it_overrun += (unsigned int)
 				hrtimer_forward(timer, now,
-						timr->it.real.interval);
+						timr->it_interval);
 			ret = HRTIMER_RESTART;
 			++timr->it_requeue_pending;
 		}
@@ -631,7 +631,7 @@ common_timer_get(struct k_itimer *timr, struct itimerspec64 *cur_setting)
 
 	memset(cur_setting, 0, sizeof(*cur_setting));
 
-	iv = timr->it.real.interval;
+	iv = timr->it_interval;
 
 	/* interval timer ? */
 	if (iv)
@@ -732,7 +732,7 @@ common_timer_set(struct k_itimer *timr, int flags,
 		common_timer_get(timr, old_setting);
 
 	/* disable the timer */
-	timr->it.real.interval = 0;
+	timr->it_interval = 0;
 	/*
 	 * careful here.  If smp we could be in the "fire" routine which will
 	 * be spinning as we hold the lock.  But this is ONLY an SMP issue.
@@ -755,7 +755,7 @@ common_timer_set(struct k_itimer *timr, int flags,
 	hrtimer_set_expires(timer, timespec64_to_ktime(new_setting->it_value));
 
 	/* Convert interval */
-	timr->it.real.interval = timespec64_to_ktime(new_setting->it_interval);
+	timr->it_interval = timespec64_to_ktime(new_setting->it_interval);
 
 	/* SIGEV_NONE timers are not queued ! See common_timer_get */
 	if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) {
@@ -820,7 +820,7 @@ retry:
 
 static int common_timer_del(struct k_itimer *timer)
 {
-	timer->it.real.interval = 0;
+	timer->it_interval = 0;
 
 	if (hrtimer_try_to_cancel(&timer->it.real.timer) < 0)
 		return TIMER_RETRY;
-- 
cgit v1.2.3


From d97bb75ddd2f38068df01da8abf26df78756253c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 30 May 2017 23:15:44 +0200
Subject: posix-timers: Store k_clock pointer in k_itimer

Having the k_clock pointer in the k_itimer struct avoids the lookup in
several code pathes and makes the next steps of unification of the hrtimer
and alarmtimer based posix timers simpler.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: John Stultz <john.stultz@linaro.org>
Link: http://lkml.kernel.org/r/20170530211656.641222072@linutronix.de
---
 include/linux/posix-timers.h   | 2 ++
 kernel/time/posix-cpu-timers.c | 2 ++
 kernel/time/posix-timers.c     | 7 ++++---
 3 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 908048f488ae..8f9cca390cdb 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -56,6 +56,7 @@ struct cpu_timer_list {
  * @list:		List head for binding the timer to signals->posix_timers
  * @t_hash:		Entry in the posix timer hash table
  * @it_lock:		Lock protecting the timer
+ * @kclock:		Pointer to the k_clock struct handling this timer
  * @it_clock:		The posix timer clock id
  * @it_id:		The posix timer id for identifying the timer
  * @it_overrun:		The overrun counter for pending signals
@@ -75,6 +76,7 @@ struct k_itimer {
 	struct list_head	list;
 	struct hlist_node	t_hash;
 	spinlock_t		it_lock;
+	const struct k_clock	*kclock;
 	clockid_t		it_clock;
 	timer_t			it_id;
 	int			it_overrun;
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 1683e503179e..0123ece6851b 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -324,6 +324,8 @@ static int posix_cpu_timer_create(struct k_itimer *new_timer)
 	if (CPUCLOCK_WHICH(new_timer->it_clock) >= CPUCLOCK_MAX)
 		return -EINVAL;
 
+	new_timer->kclock = &clock_posix_cpu;
+
 	INIT_LIST_HEAD(&new_timer->it.cpu.entry);
 
 	rcu_read_lock();
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 7dd992cc7105..eb007e19811d 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -519,6 +519,7 @@ SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock,
 	it_id_set = IT_ID_SET;
 	new_timer->it_id = (timer_t) new_timer_id;
 	new_timer->it_clock = which_clock;
+	new_timer->kclock = kc;
 	new_timer->it_overrun = -1;
 
 	if (timer_event_spec) {
@@ -679,7 +680,7 @@ SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id,
 	if (!timr)
 		return -EINVAL;
 
-	kc = clockid_to_kclock(timr->it_clock);
+	kc = timr->kclock;
 	if (WARN_ON_ONCE(!kc || !kc->timer_get))
 		ret = -EINVAL;
 	else
@@ -798,7 +799,7 @@ retry:
 	if (!timr)
 		return -EINVAL;
 
-	kc = clockid_to_kclock(timr->it_clock);
+	kc = timr->kclock;
 	if (WARN_ON_ONCE(!kc || !kc->timer_set))
 		error = -EINVAL;
 	else
@@ -829,7 +830,7 @@ static int common_timer_del(struct k_itimer *timer)
 
 static inline int timer_delete_hook(struct k_itimer *timer)
 {
-	const struct k_clock *kc = clockid_to_kclock(timer->it_clock);
+	const struct k_clock *kc = timer->kclock;
 
 	if (WARN_ON_ONCE(!kc || !kc->timer_del))
 		return -EINVAL;
-- 
cgit v1.2.3


From 96fe3b072f134e4993f829d599eaa1e0eb5a10e5 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 30 May 2017 23:15:46 +0200
Subject: posix-timers: Rename do_schedule_next_timer

That function is a misnomer. Rename it with a proper prefix to
posixtimer_rearm().

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: John Stultz <john.stultz@linaro.org>
Link: http://lkml.kernel.org/r/20170530211656.811362578@linutronix.de
---
 include/linux/posix-timers.h   |  2 +-
 kernel/signal.c                |  2 +-
 kernel/time/posix-cpu-timers.c |  2 +-
 kernel/time/posix-timers.c     | 10 +++++-----
 4 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 8f9cca390cdb..771e5f788c90 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -112,6 +112,6 @@ long clock_nanosleep_restart(struct restart_block *restart_block);
 
 void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new);
 
-void do_schedule_next_timer(struct siginfo *info);
+void posixtimer_rearm(struct siginfo *info);
 
 #endif
diff --git a/kernel/signal.c b/kernel/signal.c
index 1f85c843be8e..d031cd24f8a9 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -630,7 +630,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
 		 * about to disable them again anyway.
 		 */
 		spin_unlock(&tsk->sighand->siglock);
-		do_schedule_next_timer(info);
+		posixtimer_rearm(info);
 		spin_lock(&tsk->sighand->siglock);
 	}
 #endif
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 0123ece6851b..1ba576d3151a 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -981,7 +981,7 @@ static void check_process_timers(struct task_struct *tsk,
 }
 
 /*
- * This is called from the signal code (via do_schedule_next_timer)
+ * This is called from the signal code (via posixtimer_rearm)
  * when the last timer signal was delivered and we have to reload the timer.
  */
 void posix_cpu_timer_schedule(struct k_itimer *timer)
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index eb007e19811d..036b7e70c65c 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -305,7 +305,7 @@ static void schedule_next_timer(struct k_itimer *timr)
  * To protect against the timer going away while the interrupt is queued,
  * we require that the it_requeue_pending flag be set.
  */
-void do_schedule_next_timer(struct siginfo *info)
+void posixtimer_rearm(struct siginfo *info)
 {
 	struct k_itimer *timr;
 	unsigned long flags;
@@ -336,12 +336,12 @@ int posix_timer_event(struct k_itimer *timr, int si_private)
 	int shared, ret = -1;
 	/*
 	 * FIXME: if ->sigq is queued we can race with
-	 * dequeue_signal()->do_schedule_next_timer().
+	 * dequeue_signal()->posixtimer_rearm().
 	 *
 	 * If dequeue_signal() sees the "right" value of
-	 * si_sys_private it calls do_schedule_next_timer().
+	 * si_sys_private it calls posixtimer_rearm().
 	 * We re-queue ->sigq and drop ->it_lock().
-	 * do_schedule_next_timer() locks the timer
+	 * posixtimer_rearm() locks the timer
 	 * and re-schedules it while ->sigq is pending.
 	 * Not really bad, but not that we want.
 	 */
@@ -701,7 +701,7 @@ SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id,
  * accumulating overruns on the next timer.  The overrun is frozen when
  * the signal is delivered, either at the notify time (if the info block
  * is not queued) or at the actual delivery time (as we are informed by
- * the call back to do_schedule_next_timer().  So all we need to do is
+ * the call back to posixtimer_rearm().  So all we need to do is
  * to pick up the frozen overrun.
  */
 SYSCALL_DEFINE1(timer_getoverrun, timer_t, timer_id)
-- 
cgit v1.2.3


From 21e55c1f83880a56360287c00f2b5cd5e5a4a912 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 30 May 2017 23:15:48 +0200
Subject: posix-timers: Add active flag to k_itimer

Keep track of the activation state of posix timers. This is a preparatory
change for making common_timer_get() usable by both hrtimer and alarm timer
implementations.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: John Stultz <john.stultz@linaro.org>
Link: http://lkml.kernel.org/r/20170530211656.967783982@linutronix.de
---
 include/linux/posix-timers.h | 2 ++
 kernel/time/posix-timers.c   | 8 +++++++-
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 771e5f788c90..667095dbcd37 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -59,6 +59,7 @@ struct cpu_timer_list {
  * @kclock:		Pointer to the k_clock struct handling this timer
  * @it_clock:		The posix timer clock id
  * @it_id:		The posix timer id for identifying the timer
+ * @it_active:		Marker that timer is active
  * @it_overrun:		The overrun counter for pending signals
  * @it_overrun_last:	The overrun at the time of the last delivered signal
  * @it_requeue_pending:	Indicator that timer waits for being requeued on
@@ -79,6 +80,7 @@ struct k_itimer {
 	const struct k_clock	*kclock;
 	clockid_t		it_clock;
 	timer_t			it_id;
+	int			it_active;
 	int			it_overrun;
 	int			it_overrun_last;
 	int			it_requeue_pending;
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index b12582a4b122..795215bba73d 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -316,6 +316,7 @@ void posixtimer_rearm(struct siginfo *info)
 	if (timr->it_requeue_pending == info->si_sys_private) {
 		timr->kclock->timer_rearm(timr);
 
+		timr->it_active = 1;
 		timr->it_overrun_last = timr->it_overrun;
 		timr->it_overrun = -1;
 		++timr->it_requeue_pending;
@@ -371,6 +372,7 @@ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer)
 	timr = container_of(timer, struct k_itimer, it.real.timer);
 	spin_lock_irqsave(&timr->it_lock, flags);
 
+	timr->it_active = 0;
 	if (timr->it_interval != 0)
 		si_private = ++timr->it_requeue_pending;
 
@@ -418,6 +420,7 @@ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer)
 						timr->it_interval);
 			ret = HRTIMER_RESTART;
 			++timr->it_requeue_pending;
+			timr->it_active = 1;
 		}
 	}
 
@@ -737,7 +740,8 @@ common_timer_set(struct k_itimer *timr, int flags,
 	if (hrtimer_try_to_cancel(timer) < 0)
 		return TIMER_RETRY;
 
-	timr->it_requeue_pending = (timr->it_requeue_pending + 2) & 
+	timr->it_active = 0;
+	timr->it_requeue_pending = (timr->it_requeue_pending + 2) &
 		~REQUEUE_PENDING;
 	timr->it_overrun_last = 0;
 
@@ -763,6 +767,7 @@ common_timer_set(struct k_itimer *timr, int flags,
 		return 0;
 	}
 
+	timr->it_active = 1;
 	hrtimer_start_expires(timer, mode);
 	return 0;
 }
@@ -821,6 +826,7 @@ static int common_timer_del(struct k_itimer *timer)
 
 	if (hrtimer_try_to_cancel(&timer->it.real.timer) < 0)
 		return TIMER_RETRY;
+	timer->it_active = 0;
 	return 0;
 }
 
-- 
cgit v1.2.3


From 2387149eade25f32dcf1398811b3d0293181d005 Mon Sep 17 00:00:00 2001
From: Andrew Jones <drjones@redhat.com>
Date: Sun, 4 Jun 2017 14:43:51 +0200
Subject: KVM: improve arch vcpu request defining

Marc Zyngier suggested that we define the arch specific VCPU request
base, rather than requiring each arch to remember to start from 8.
That suggestion, along with Radim Krcmar's recent VCPU request flag
addition, snowballed into defining something of an arch VCPU request
defining API.

No functional change.

(Looks like x86 is running out of arch VCPU request bits.  Maybe
 someday we'll need to extend to 64.)

Signed-off-by: Andrew Jones <drjones@redhat.com>
Acked-by: Christoffer Dall <cdall@linaro.org>
Signed-off-by: Christoffer Dall <cdall@linaro.org>
---
 arch/arm/include/asm/kvm_host.h     |  3 ++-
 arch/arm64/include/asm/kvm_host.h   |  3 ++-
 arch/powerpc/include/asm/kvm_host.h |  4 ++--
 arch/s390/include/asm/kvm_host.h    |  6 ++---
 arch/x86/include/asm/kvm_host.h     | 47 ++++++++++++++++++++-----------------
 include/linux/kvm_host.h            |  7 ++++++
 6 files changed, 41 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 12274d46df70..c556babe467c 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -44,7 +44,8 @@
 #define KVM_MAX_VCPUS VGIC_V2_MAX_CPUS
 #endif
 
-#define KVM_REQ_VCPU_EXIT	(8 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_VCPU_EXIT \
+	KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
 
 u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode);
 int __attribute_const__ kvm_target_cpu(void);
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 32cbe8a3bb0d..0ff991c9c66e 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -41,7 +41,8 @@
 
 #define KVM_VCPU_MAX_FEATURES 4
 
-#define KVM_REQ_VCPU_EXIT	(8 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_VCPU_EXIT \
+	KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
 
 int __attribute_const__ kvm_target_cpu(void);
 int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 9c51ac4b8f36..50e0bc9723cc 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -52,8 +52,8 @@
 #define KVM_IRQCHIP_NUM_PINS     256
 
 /* PPC-specific vcpu->requests bit members */
-#define KVM_REQ_WATCHDOG           8
-#define KVM_REQ_EPR_EXIT           9
+#define KVM_REQ_WATCHDOG	KVM_ARCH_REQ(0)
+#define KVM_REQ_EPR_EXIT	KVM_ARCH_REQ(1)
 
 #include <linux/mmu_notifier.h>
 
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 426614a882a9..9c3bd94204ac 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -42,9 +42,9 @@
 #define KVM_HALT_POLL_NS_DEFAULT 80000
 
 /* s390-specific vcpu->requests bit members */
-#define KVM_REQ_ENABLE_IBS         8
-#define KVM_REQ_DISABLE_IBS        9
-#define KVM_REQ_ICPT_OPEREXC       10
+#define KVM_REQ_ENABLE_IBS	KVM_ARCH_REQ(0)
+#define KVM_REQ_DISABLE_IBS	KVM_ARCH_REQ(1)
+#define KVM_REQ_ICPT_OPEREXC	KVM_ARCH_REQ(2)
 
 #define SIGP_CTRL_C		0x80
 #define SIGP_CTRL_SCN_MASK	0x3f
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 9c761fea0c98..563979976fab 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -48,28 +48,31 @@
 #define KVM_IRQCHIP_NUM_PINS  KVM_IOAPIC_NUM_PINS
 
 /* x86-specific vcpu->requests bit members */
-#define KVM_REQ_MIGRATE_TIMER      8
-#define KVM_REQ_REPORT_TPR_ACCESS  9
-#define KVM_REQ_TRIPLE_FAULT      10
-#define KVM_REQ_MMU_SYNC          11
-#define KVM_REQ_CLOCK_UPDATE      12
-#define KVM_REQ_EVENT             14
-#define KVM_REQ_APF_HALT          15
-#define KVM_REQ_STEAL_UPDATE      16
-#define KVM_REQ_NMI               17
-#define KVM_REQ_PMU               18
-#define KVM_REQ_PMI               19
-#define KVM_REQ_SMI               20
-#define KVM_REQ_MASTERCLOCK_UPDATE 21
-#define KVM_REQ_MCLOCK_INPROGRESS (22 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
-#define KVM_REQ_SCAN_IOAPIC       (23 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
-#define KVM_REQ_GLOBAL_CLOCK_UPDATE 24
-#define KVM_REQ_APIC_PAGE_RELOAD  (25 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
-#define KVM_REQ_HV_CRASH          26
-#define KVM_REQ_IOAPIC_EOI_EXIT   27
-#define KVM_REQ_HV_RESET          28
-#define KVM_REQ_HV_EXIT           29
-#define KVM_REQ_HV_STIMER         30
+#define KVM_REQ_MIGRATE_TIMER		KVM_ARCH_REQ(0)
+#define KVM_REQ_REPORT_TPR_ACCESS	KVM_ARCH_REQ(1)
+#define KVM_REQ_TRIPLE_FAULT		KVM_ARCH_REQ(2)
+#define KVM_REQ_MMU_SYNC		KVM_ARCH_REQ(3)
+#define KVM_REQ_CLOCK_UPDATE		KVM_ARCH_REQ(4)
+#define KVM_REQ_EVENT			KVM_ARCH_REQ(6)
+#define KVM_REQ_APF_HALT		KVM_ARCH_REQ(7)
+#define KVM_REQ_STEAL_UPDATE		KVM_ARCH_REQ(8)
+#define KVM_REQ_NMI			KVM_ARCH_REQ(9)
+#define KVM_REQ_PMU			KVM_ARCH_REQ(10)
+#define KVM_REQ_PMI			KVM_ARCH_REQ(11)
+#define KVM_REQ_SMI			KVM_ARCH_REQ(12)
+#define KVM_REQ_MASTERCLOCK_UPDATE	KVM_ARCH_REQ(13)
+#define KVM_REQ_MCLOCK_INPROGRESS \
+	KVM_ARCH_REQ_FLAGS(14, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_SCAN_IOAPIC \
+	KVM_ARCH_REQ_FLAGS(15, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_GLOBAL_CLOCK_UPDATE	KVM_ARCH_REQ(16)
+#define KVM_REQ_APIC_PAGE_RELOAD \
+	KVM_ARCH_REQ_FLAGS(17, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_HV_CRASH		KVM_ARCH_REQ(18)
+#define KVM_REQ_IOAPIC_EOI_EXIT		KVM_ARCH_REQ(19)
+#define KVM_REQ_HV_RESET		KVM_ARCH_REQ(20)
+#define KVM_REQ_HV_EXIT			KVM_ARCH_REQ(21)
+#define KVM_REQ_HV_STIMER		KVM_ARCH_REQ(22)
 
 #define CR0_RESERVED_BITS                                               \
 	(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 8c0664309815..3724b51aab64 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -126,6 +126,13 @@ static inline bool is_error_page(struct page *page)
 #define KVM_REQ_MMU_RELOAD        (1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
 #define KVM_REQ_PENDING_TIMER     2
 #define KVM_REQ_UNHALT            3
+#define KVM_REQUEST_ARCH_BASE     8
+
+#define KVM_ARCH_REQ_FLAGS(nr, flags) ({ \
+	BUILD_BUG_ON((unsigned)(nr) >= 32 - KVM_REQUEST_ARCH_BASE); \
+	(unsigned)(((nr) + KVM_REQUEST_ARCH_BASE) | (flags)); \
+})
+#define KVM_ARCH_REQ(nr)           KVM_ARCH_REQ_FLAGS(nr, 0)
 
 #define KVM_USERSPACE_IRQ_SOURCE_ID		0
 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID	1
-- 
cgit v1.2.3


From 2fa6e1e12a024b48b2c7ea39f50205246e027da7 Mon Sep 17 00:00:00 2001
From: Radim Krčmář <rkrcmar@redhat.com>
Date: Sun, 4 Jun 2017 14:43:52 +0200
Subject: KVM: add kvm_request_pending
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A first step in vcpu->requests encapsulation.  Additionally, we now
use READ_ONCE() when accessing vcpu->requests, which ensures we
always load vcpu->requests when it's accessed.  This is important as
other threads can change it any time.  Also, READ_ONCE() documents
that vcpu->requests is used with other threads, likely requiring
memory barriers, which it does.

Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
[ Documented the new use of READ_ONCE() and converted another check
  in arch/mips/kvm/vz.c ]
Signed-off-by: Andrew Jones <drjones@redhat.com>
Acked-by: Christoffer Dall <cdall@linaro.org>
Signed-off-by: Christoffer Dall <cdall@linaro.org>
---
 arch/mips/kvm/trap_emul.c  | 2 +-
 arch/mips/kvm/vz.c         | 2 +-
 arch/powerpc/kvm/booke.c   | 2 +-
 arch/powerpc/kvm/powerpc.c | 5 ++---
 arch/s390/kvm/kvm-s390.c   | 2 +-
 arch/x86/kvm/x86.c         | 4 ++--
 include/linux/kvm_host.h   | 5 +++++
 7 files changed, 13 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/arch/mips/kvm/trap_emul.c b/arch/mips/kvm/trap_emul.c
index a563759fd142..6a0d7040d882 100644
--- a/arch/mips/kvm/trap_emul.c
+++ b/arch/mips/kvm/trap_emul.c
@@ -1094,7 +1094,7 @@ static void kvm_trap_emul_check_requests(struct kvm_vcpu *vcpu, int cpu,
 	struct mm_struct *mm;
 	int i;
 
-	if (likely(!vcpu->requests))
+	if (likely(!kvm_request_pending(vcpu)))
 		return;
 
 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
diff --git a/arch/mips/kvm/vz.c b/arch/mips/kvm/vz.c
index 71d8856ade64..74805035edc8 100644
--- a/arch/mips/kvm/vz.c
+++ b/arch/mips/kvm/vz.c
@@ -2337,7 +2337,7 @@ static int kvm_vz_check_requests(struct kvm_vcpu *vcpu, int cpu)
 	int ret = 0;
 	int i;
 
-	if (!vcpu->requests)
+	if (!kvm_request_pending(vcpu))
 		return 0;
 
 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 3eaac3809977..071b87ee682f 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -687,7 +687,7 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
 
 	kvmppc_core_check_exceptions(vcpu);
 
-	if (vcpu->requests) {
+	if (kvm_request_pending(vcpu)) {
 		/* Exception delivery raised request; start over */
 		return 1;
 	}
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index f7cf2cd564ef..fd64f087737c 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -55,8 +55,7 @@ EXPORT_SYMBOL_GPL(kvmppc_pr_ops);
 
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
 {
-	return !!(v->arch.pending_exceptions) ||
-	       v->requests;
+	return !!(v->arch.pending_exceptions) || kvm_request_pending(v);
 }
 
 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
@@ -108,7 +107,7 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
 		 */
 		smp_mb();
 
-		if (vcpu->requests) {
+		if (kvm_request_pending(vcpu)) {
 			/* Make sure we process requests preemptable */
 			local_irq_enable();
 			trace_kvm_check_requests(vcpu);
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 689ac48361c6..ad41e0fa3a21 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -2440,7 +2440,7 @@ static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
 {
 retry:
 	kvm_s390_vcpu_request_handled(vcpu);
-	if (!vcpu->requests)
+	if (!kvm_request_pending(vcpu))
 		return 0;
 	/*
 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 464da936c53d..f81060518635 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6710,7 +6710,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 
 	bool req_immediate_exit = false;
 
-	if (vcpu->requests) {
+	if (kvm_request_pending(vcpu)) {
 		if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
 			kvm_mmu_unload(vcpu);
 		if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
@@ -6874,7 +6874,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 			kvm_x86_ops->sync_pir_to_irr(vcpu);
 	}
 
-	if (vcpu->mode == EXITING_GUEST_MODE || vcpu->requests
+	if (vcpu->mode == EXITING_GUEST_MODE || kvm_request_pending(vcpu)
 	    || need_resched() || signal_pending(current)) {
 		vcpu->mode = OUTSIDE_GUEST_MODE;
 		smp_wmb();
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 3724b51aab64..0b50e7b35ed4 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1105,6 +1105,11 @@ static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu)
 	set_bit(req & KVM_REQUEST_MASK, &vcpu->requests);
 }
 
+static inline bool kvm_request_pending(struct kvm_vcpu *vcpu)
+{
+	return READ_ONCE(vcpu->requests);
+}
+
 static inline bool kvm_test_request(int req, struct kvm_vcpu *vcpu)
 {
 	return test_bit(req & KVM_REQUEST_MASK, &vcpu->requests);
-- 
cgit v1.2.3


From f91840a32deef5cb1bf73338bc5010f843b01426 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Fri, 2 Jun 2017 21:03:52 -0700
Subject: perf, bpf: Add BPF support to all perf_event types

Allow BPF_PROG_TYPE_PERF_EVENT program types to attach to all
perf_event types, including HW_CACHE, RAW, and dynamic pmu events.
Only tracepoint/kprobe events are treated differently which require
BPF_PROG_TYPE_TRACEPOINT/BPF_PROG_TYPE_KPROBE program types accordingly.

Also add support for reading all event counters using
bpf_perf_event_read() helper.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/perf_event.h |  7 +++++--
 kernel/bpf/arraymap.c      | 28 +++++++--------------------
 kernel/events/core.c       | 47 +++++++++++++++++++++++++++-------------------
 kernel/trace/bpf_trace.c   | 22 ++++++++--------------
 4 files changed, 48 insertions(+), 56 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 24a635887f28..8fc5f0fada5e 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -896,7 +896,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr,
 				void *context);
 extern void perf_pmu_migrate_context(struct pmu *pmu,
 				int src_cpu, int dst_cpu);
-extern u64 perf_event_read_local(struct perf_event *event);
+int perf_event_read_local(struct perf_event *event, u64 *value);
 extern u64 perf_event_read_value(struct perf_event *event,
 				 u64 *enabled, u64 *running);
 
@@ -1301,7 +1301,10 @@ static inline const struct perf_event_attr *perf_event_attrs(struct perf_event *
 {
 	return ERR_PTR(-EINVAL);
 }
-static inline u64 perf_event_read_local(struct perf_event *event)	{ return -EINVAL; }
+static inline int perf_event_read_local(struct perf_event *event, u64 *value)
+{
+	return -EINVAL;
+}
 static inline void perf_event_print_debug(void)				{ }
 static inline int perf_event_task_disable(void)				{ return -EINVAL; }
 static inline int perf_event_task_enable(void)				{ return -EINVAL; }
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 172dc8ee0e3b..ecb43542246e 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -452,38 +452,24 @@ static void bpf_event_entry_free_rcu(struct bpf_event_entry *ee)
 static void *perf_event_fd_array_get_ptr(struct bpf_map *map,
 					 struct file *map_file, int fd)
 {
-	const struct perf_event_attr *attr;
 	struct bpf_event_entry *ee;
 	struct perf_event *event;
 	struct file *perf_file;
+	u64 value;
 
 	perf_file = perf_event_get(fd);
 	if (IS_ERR(perf_file))
 		return perf_file;
 
+	ee = ERR_PTR(-EOPNOTSUPP);
 	event = perf_file->private_data;
-	ee = ERR_PTR(-EINVAL);
-
-	attr = perf_event_attrs(event);
-	if (IS_ERR(attr) || attr->inherit)
+	if (perf_event_read_local(event, &value) == -EOPNOTSUPP)
 		goto err_out;
 
-	switch (attr->type) {
-	case PERF_TYPE_SOFTWARE:
-		if (attr->config != PERF_COUNT_SW_BPF_OUTPUT)
-			goto err_out;
-		/* fall-through */
-	case PERF_TYPE_RAW:
-	case PERF_TYPE_HARDWARE:
-		ee = bpf_event_entry_gen(perf_file, map_file);
-		if (ee)
-			return ee;
-		ee = ERR_PTR(-ENOMEM);
-		/* fall-through */
-	default:
-		break;
-	}
-
+	ee = bpf_event_entry_gen(perf_file, map_file);
+	if (ee)
+		return ee;
+	ee = ERR_PTR(-ENOMEM);
 err_out:
 	fput(perf_file);
 	return ee;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 6e75a5c9412d..51e40e4876c0 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3636,10 +3636,10 @@ static inline u64 perf_event_count(struct perf_event *event)
  *     will not be local and we cannot read them atomically
  *   - must not have a pmu::count method
  */
-u64 perf_event_read_local(struct perf_event *event)
+int perf_event_read_local(struct perf_event *event, u64 *value)
 {
 	unsigned long flags;
-	u64 val;
+	int ret = 0;
 
 	/*
 	 * Disabling interrupts avoids all counter scheduling (context
@@ -3647,25 +3647,37 @@ u64 perf_event_read_local(struct perf_event *event)
 	 */
 	local_irq_save(flags);
 
-	/* If this is a per-task event, it must be for current */
-	WARN_ON_ONCE((event->attach_state & PERF_ATTACH_TASK) &&
-		     event->hw.target != current);
-
-	/* If this is a per-CPU event, it must be for this CPU */
-	WARN_ON_ONCE(!(event->attach_state & PERF_ATTACH_TASK) &&
-		     event->cpu != smp_processor_id());
-
 	/*
 	 * It must not be an event with inherit set, we cannot read
 	 * all child counters from atomic context.
 	 */
-	WARN_ON_ONCE(event->attr.inherit);
+	if (event->attr.inherit) {
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
 
 	/*
 	 * It must not have a pmu::count method, those are not
 	 * NMI safe.
 	 */
-	WARN_ON_ONCE(event->pmu->count);
+	if (event->pmu->count) {
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
+
+	/* If this is a per-task event, it must be for current */
+	if ((event->attach_state & PERF_ATTACH_TASK) &&
+	    event->hw.target != current) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	/* If this is a per-CPU event, it must be for this CPU */
+	if (!(event->attach_state & PERF_ATTACH_TASK) &&
+	    event->cpu != smp_processor_id()) {
+		ret = -EINVAL;
+		goto out;
+	}
 
 	/*
 	 * If the event is currently on this CPU, its either a per-task event,
@@ -3675,10 +3687,11 @@ u64 perf_event_read_local(struct perf_event *event)
 	if (event->oncpu == smp_processor_id())
 		event->pmu->read(event);
 
-	val = local64_read(&event->count);
+	*value = local64_read(&event->count);
+out:
 	local_irq_restore(flags);
 
-	return val;
+	return ret;
 }
 
 static int perf_event_read(struct perf_event *event, bool group)
@@ -8037,12 +8050,8 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
 	bool is_kprobe, is_tracepoint;
 	struct bpf_prog *prog;
 
-	if (event->attr.type == PERF_TYPE_HARDWARE ||
-	    event->attr.type == PERF_TYPE_SOFTWARE)
-		return perf_event_set_bpf_handler(event, prog_fd);
-
 	if (event->attr.type != PERF_TYPE_TRACEPOINT)
-		return -EINVAL;
+		return perf_event_set_bpf_handler(event, prog_fd);
 
 	if (event->tp_event->prog)
 		return -EEXIST;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 460a031c77e5..08eb072430b9 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -234,7 +234,8 @@ BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags)
 	unsigned int cpu = smp_processor_id();
 	u64 index = flags & BPF_F_INDEX_MASK;
 	struct bpf_event_entry *ee;
-	struct perf_event *event;
+	u64 value = 0;
+	int err;
 
 	if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
 		return -EINVAL;
@@ -247,21 +248,14 @@ BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags)
 	if (!ee)
 		return -ENOENT;
 
-	event = ee->event;
-	if (unlikely(event->attr.type != PERF_TYPE_HARDWARE &&
-		     event->attr.type != PERF_TYPE_RAW))
-		return -EINVAL;
-
-	/* make sure event is local and doesn't have pmu::count */
-	if (unlikely(event->oncpu != cpu || event->pmu->count))
-		return -EINVAL;
-
+	err = perf_event_read_local(ee->event, &value);
 	/*
-	 * we don't know if the function is run successfully by the
-	 * return value. It can be judged in other places, such as
-	 * eBPF programs.
+	 * this api is ugly since we miss [-22..-2] range of valid
+	 * counter values, but that's uapi
 	 */
-	return perf_event_read_local(event);
+	if (err)
+		return err;
+	return value;
 }
 
 static const struct bpf_func_proto bpf_perf_event_read_proto = {
-- 
cgit v1.2.3


From 48a1df65334b74bd7531f932cca5928932abf769 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Sun, 4 Jun 2017 04:16:22 +0200
Subject: skbuff: return -EMSGSIZE in skb_to_sgvec to prevent overflow

This is a defense-in-depth measure in response to bugs like
4d6fa57b4dab ("macsec: avoid heap overflow in skb_to_sgvec"). There's
not only a potential overflow of sglist items, but also a stack overflow
potential, so we fix this by limiting the amount of recursion this function
is allowed to do. Not actually providing a bounded base case is a future
disaster that we can easily avoid here.

As a small matter of house keeping, we take this opportunity to move the
documentation comment over the actual function the documentation is for.

While this could be implemented by using an explicit stack of skbuffs,
when implementing this, the function complexity increased considerably,
and I don't think such complexity and bloat is actually worth it. So,
instead I built this and tested it on x86, x86_64, ARM, ARM64, and MIPS,
and measured the stack usage there. I also reverted the recent MIPS
changes that give it a separate IRQ stack, so that I could experience
some worst-case situations. I found that limiting it to 24 layers deep
yielded a good stack usage with room for safety, as well as being much
deeper than any driver actually ever creates.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Cc: Steffen Klassert <steffen.klassert@secunet.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: David Howells <dhowells@redhat.com>
Cc: Sabrina Dubroca <sd@queasysnail.net>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Jason Wang <jasowang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  8 +++----
 net/core/skbuff.c      | 65 ++++++++++++++++++++++++++++++++------------------
 2 files changed, 46 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 45a59c1e0cc7..d460a4cbda1c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -953,10 +953,10 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb,
 				     unsigned int headroom);
 struct sk_buff *skb_copy_expand(const struct sk_buff *skb, int newheadroom,
 				int newtailroom, gfp_t priority);
-int skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg,
-			int offset, int len);
-int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset,
-		 int len);
+int __must_check skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg,
+				     int offset, int len);
+int __must_check skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg,
+			      int offset, int len);
 int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer);
 int skb_pad(struct sk_buff *skb, int pad);
 #define dev_kfree_skb(a)	consume_skb(a)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 780b7c1563d0..bba33cf4f7cd 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3508,24 +3508,18 @@ void __init skb_init(void)
 						NULL);
 }
 
-/**
- *	skb_to_sgvec - Fill a scatter-gather list from a socket buffer
- *	@skb: Socket buffer containing the buffers to be mapped
- *	@sg: The scatter-gather list to map into
- *	@offset: The offset into the buffer's contents to start mapping
- *	@len: Length of buffer space to be mapped
- *
- *	Fill the specified scatter-gather list with mappings/pointers into a
- *	region of the buffer space attached to a socket buffer.
- */
 static int
-__skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
+__skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len,
+	       unsigned int recursion_level)
 {
 	int start = skb_headlen(skb);
 	int i, copy = start - offset;
 	struct sk_buff *frag_iter;
 	int elt = 0;
 
+	if (unlikely(recursion_level >= 24))
+		return -EMSGSIZE;
+
 	if (copy > 0) {
 		if (copy > len)
 			copy = len;
@@ -3544,6 +3538,8 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
 		end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]);
 		if ((copy = end - offset) > 0) {
 			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+			if (unlikely(elt && sg_is_last(&sg[elt - 1])))
+				return -EMSGSIZE;
 
 			if (copy > len)
 				copy = len;
@@ -3558,16 +3554,22 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
 	}
 
 	skb_walk_frags(skb, frag_iter) {
-		int end;
+		int end, ret;
 
 		WARN_ON(start > offset + len);
 
 		end = start + frag_iter->len;
 		if ((copy = end - offset) > 0) {
+			if (unlikely(elt && sg_is_last(&sg[elt - 1])))
+				return -EMSGSIZE;
+
 			if (copy > len)
 				copy = len;
-			elt += __skb_to_sgvec(frag_iter, sg+elt, offset - start,
-					      copy);
+			ret = __skb_to_sgvec(frag_iter, sg+elt, offset - start,
+					      copy, recursion_level + 1);
+			if (unlikely(ret < 0))
+				return ret;
+			elt += ret;
 			if ((len -= copy) == 0)
 				return elt;
 			offset += copy;
@@ -3578,6 +3580,31 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
 	return elt;
 }
 
+/**
+ *	skb_to_sgvec - Fill a scatter-gather list from a socket buffer
+ *	@skb: Socket buffer containing the buffers to be mapped
+ *	@sg: The scatter-gather list to map into
+ *	@offset: The offset into the buffer's contents to start mapping
+ *	@len: Length of buffer space to be mapped
+ *
+ *	Fill the specified scatter-gather list with mappings/pointers into a
+ *	region of the buffer space attached to a socket buffer. Returns either
+ *	the number of scatterlist items used, or -EMSGSIZE if the contents
+ *	could not fit.
+ */
+int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
+{
+	int nsg = __skb_to_sgvec(skb, sg, offset, len, 0);
+
+	if (nsg <= 0)
+		return nsg;
+
+	sg_mark_end(&sg[nsg - 1]);
+
+	return nsg;
+}
+EXPORT_SYMBOL_GPL(skb_to_sgvec);
+
 /* As compared with skb_to_sgvec, skb_to_sgvec_nomark only map skb to given
  * sglist without mark the sg which contain last skb data as the end.
  * So the caller can mannipulate sg list as will when padding new data after
@@ -3600,19 +3627,11 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
 int skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg,
 			int offset, int len)
 {
-	return __skb_to_sgvec(skb, sg, offset, len);
+	return __skb_to_sgvec(skb, sg, offset, len, 0);
 }
 EXPORT_SYMBOL_GPL(skb_to_sgvec_nomark);
 
-int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
-{
-	int nsg = __skb_to_sgvec(skb, sg, offset, len);
 
-	sg_mark_end(&sg[nsg - 1]);
-
-	return nsg;
-}
-EXPORT_SYMBOL_GPL(skb_to_sgvec);
 
 /**
  *	skb_cow_data - Check that a socket buffer's data buffers are writable
-- 
cgit v1.2.3


From f604b17d7fdef574792a7e0b39f1b926d6b43d9d Mon Sep 17 00:00:00 2001
From: "Mintz, Yuval" <Yuval.Mintz@cavium.com>
Date: Sun, 4 Jun 2017 13:31:01 +0300
Subject: qed*: L2 interface to use the SB structures directly

Part of an effort of a cleaner seperation between qed and the protocol
drivers, the L2 interface is to use the SB structure for initialization
purposes opaquely.

Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_l2.c     | 32 +++++++++++++++-------------
 drivers/net/ethernet/qlogic/qed/qed_l2.h     | 24 +++++++++++++++------
 drivers/net/ethernet/qlogic/qed/qed_sriov.c  | 13 +++++++++--
 drivers/net/ethernet/qlogic/qed/qed_vf.c     |  8 +++----
 drivers/net/ethernet/qlogic/qede/qede_main.c |  4 ++--
 include/linux/qed/qed_eth_if.h               |  3 +--
 6 files changed, 52 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c
index 9d5791155fcf..262b2ba13e79 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
@@ -182,9 +182,15 @@ _qed_eth_queue_to_cid(struct qed_hwfn *p_hwfn,
 	p_cid->opaque_fid = opaque_fid;
 	p_cid->cid = cid;
 	p_cid->vf_qid = vf_qid;
-	p_cid->rel = *p_params;
 	p_cid->p_owner = p_hwfn;
 
+	/* Fill in parameters */
+	p_cid->rel.vport_id = p_params->vport_id;
+	p_cid->rel.queue_id = p_params->queue_id;
+	p_cid->rel.stats_id = p_params->stats_id;
+	p_cid->sb_igu_id = p_params->p_sb->igu_sb_id;
+	p_cid->sb_idx = p_params->sb_idx;
+
 	/* Don't try calculating the absolute indices for VFs */
 	if (IS_VF(p_hwfn->cdev)) {
 		p_cid->abs = p_cid->rel;
@@ -215,10 +221,6 @@ _qed_eth_queue_to_cid(struct qed_hwfn *p_hwfn,
 		p_cid->abs.stats_id = p_cid->rel.stats_id;
 	}
 
-	/* SBs relevant information was already provided as absolute */
-	p_cid->abs.sb = p_cid->rel.sb;
-	p_cid->abs.sb_idx = p_cid->rel.sb_idx;
-
 	/* This is tricky - we're actually interested in whehter this is a PF
 	 * entry meant for the VF.
 	 */
@@ -235,7 +237,7 @@ out:
 		   p_cid->rel.queue_id,
 		   p_cid->abs.queue_id,
 		   p_cid->rel.stats_id,
-		   p_cid->abs.stats_id, p_cid->abs.sb, p_cid->abs.sb_idx);
+		   p_cid->abs.stats_id, p_cid->sb_igu_id, p_cid->sb_idx);
 
 	return p_cid;
 
@@ -767,7 +769,7 @@ int qed_eth_rxq_start_ramrod(struct qed_hwfn *p_hwfn,
 	DP_VERBOSE(p_hwfn, QED_MSG_SP,
 		   "opaque_fid=0x%x, cid=0x%x, rx_qzone=0x%x, vport_id=0x%x, sb_id=0x%x\n",
 		   p_cid->opaque_fid, p_cid->cid,
-		   p_cid->abs.queue_id, p_cid->abs.vport_id, p_cid->abs.sb);
+		   p_cid->abs.queue_id, p_cid->abs.vport_id, p_cid->sb_igu_id);
 
 	/* Get SPQ entry */
 	memset(&init_data, 0, sizeof(init_data));
@@ -783,8 +785,8 @@ int qed_eth_rxq_start_ramrod(struct qed_hwfn *p_hwfn,
 
 	p_ramrod = &p_ent->ramrod.rx_queue_start;
 
-	p_ramrod->sb_id = cpu_to_le16(p_cid->abs.sb);
-	p_ramrod->sb_index = p_cid->abs.sb_idx;
+	p_ramrod->sb_id = cpu_to_le16(p_cid->sb_igu_id);
+	p_ramrod->sb_index = p_cid->sb_idx;
 	p_ramrod->vport_id = p_cid->abs.vport_id;
 	p_ramrod->stats_counter_id = p_cid->abs.stats_id;
 	p_ramrod->rx_queue_id = cpu_to_le16(p_cid->abs.queue_id);
@@ -1001,8 +1003,8 @@ qed_eth_txq_start_ramrod(struct qed_hwfn *p_hwfn,
 	p_ramrod = &p_ent->ramrod.tx_queue_start;
 	p_ramrod->vport_id = p_cid->abs.vport_id;
 
-	p_ramrod->sb_id = cpu_to_le16(p_cid->abs.sb);
-	p_ramrod->sb_index = p_cid->abs.sb_idx;
+	p_ramrod->sb_id = cpu_to_le16(p_cid->sb_igu_id);
+	p_ramrod->sb_index = p_cid->sb_idx;
 	p_ramrod->stats_counter_id = p_cid->abs.stats_id;
 
 	p_ramrod->queue_zone_id = cpu_to_le16(p_cid->abs.queue_id);
@@ -2279,9 +2281,9 @@ static int qed_start_rxq(struct qed_dev *cdev,
 	}
 
 	DP_VERBOSE(cdev, (QED_MSG_SPQ | NETIF_MSG_IFUP),
-		   "Started RX-Q %d [rss_num %d] on V-PORT %d and SB %d\n",
+		   "Started RX-Q %d [rss_num %d] on V-PORT %d and SB igu %d\n",
 		   p_params->queue_id, rss_num, p_params->vport_id,
-		   p_params->sb);
+		   p_params->p_sb->igu_sb_id);
 
 	return 0;
 }
@@ -2329,9 +2331,9 @@ static int qed_start_txq(struct qed_dev *cdev,
 	}
 
 	DP_VERBOSE(cdev, (QED_MSG_SPQ | NETIF_MSG_IFUP),
-		   "Started TX-Q %d [rss_num %d] on V-PORT %d and SB %d\n",
+		   "Started TX-Q %d [rss_num %d] on V-PORT %d and SB igu %d\n",
 		   p_params->queue_id, rss_num, p_params->vport_id,
-		   p_params->sb);
+		   p_params->p_sb->igu_sb_id);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.h b/drivers/net/ethernet/qlogic/qed/qed_l2.h
index 8606bbfa6612..6ad36449dae9 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.h
@@ -279,14 +279,24 @@ void qed_reset_vport_stats(struct qed_dev *cdev);
 
 #define MAX_QUEUES_PER_QZONE    (sizeof(unsigned long) * 8)
 
+/* Almost identical to the qed_queue_start_common_params,
+ * but here we maintain the SB index in IGU CAM.
+ */
+struct qed_queue_cid_params {
+	u8 vport_id;
+	u16 queue_id;
+	u8 stats_id;
+};
+
 struct qed_queue_cid {
-	/* 'Relative' is a relative term ;-). Usually the indices [not counting
-	 * SBs] would be PF-relative, but there are some cases where that isn't
-	 * the case - specifically for a PF configuring its VF indices it's
-	 * possible some fields [E.g., stats-id] in 'rel' would already be abs.
-	 */
-	struct qed_queue_start_common_params rel;
-	struct qed_queue_start_common_params abs;
+	/* For stats-id, the `rel' is actually absolute as well */
+	struct qed_queue_cid_params rel;
+	struct qed_queue_cid_params abs;
+
+	/* These have no 'relative' meaning */
+	u16 sb_igu_id;
+	u8 sb_idx;
+
 	u32 cid;
 	u16 opaque_fid;
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
index 5ae8827534f8..498c83ebc385 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
@@ -1951,6 +1951,7 @@ static void qed_iov_vf_mbx_start_rxq(struct qed_hwfn *p_hwfn,
 	u8 status = PFVF_STATUS_NO_RESOURCE;
 	struct qed_vf_q_info *p_queue;
 	struct vfpf_start_rxq_tlv *req;
+	struct qed_sb_info sb_dummy;
 	bool b_legacy_vf = false;
 	int rc;
 
@@ -1968,7 +1969,10 @@ static void qed_iov_vf_mbx_start_rxq(struct qed_hwfn *p_hwfn,
 	params.queue_id = p_queue->fw_rx_qid;
 	params.vport_id = vf->vport_id;
 	params.stats_id = vf->abs_vf_id + 0x10;
-	params.sb = req->hw_sb;
+	/* Since IGU index is passed via sb_info, construct a dummy one */
+	memset(&sb_dummy, 0, sizeof(sb_dummy));
+	sb_dummy.igu_sb_id = req->hw_sb;
+	params.p_sb = &sb_dummy;
 	params.sb_idx = req->sb_index;
 
 	p_queue->p_rx_cid = _qed_eth_queue_to_cid(p_hwfn,
@@ -2273,6 +2277,7 @@ static void qed_iov_vf_mbx_start_txq(struct qed_hwfn *p_hwfn,
 	u8 status = PFVF_STATUS_NO_RESOURCE;
 	struct vfpf_start_txq_tlv *req;
 	struct qed_vf_q_info *p_queue;
+	struct qed_sb_info sb_dummy;
 	int rc;
 	u16 pq;
 
@@ -2290,7 +2295,11 @@ static void qed_iov_vf_mbx_start_txq(struct qed_hwfn *p_hwfn,
 	params.queue_id = p_queue->fw_tx_qid;
 	params.vport_id = vf->vport_id;
 	params.stats_id = vf->abs_vf_id + 0x10;
-	params.sb = req->hw_sb;
+
+	/* Since IGU index is passed via sb_info, construct a dummy one */
+	memset(&sb_dummy, 0, sizeof(sb_dummy));
+	sb_dummy.igu_sb_id = req->hw_sb;
+	params.p_sb = &sb_dummy;
 	params.sb_idx = req->sb_index;
 
 	p_queue->p_tx_cid = _qed_eth_queue_to_cid(p_hwfn,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c b/drivers/net/ethernet/qlogic/qed/qed_vf.c
index 29d74074238f..877d41e456e4 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_vf.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c
@@ -588,8 +588,8 @@ qed_vf_pf_rxq_start(struct qed_hwfn *p_hwfn,
 	req->cqe_pbl_addr = cqe_pbl_addr;
 	req->cqe_pbl_size = cqe_pbl_size;
 	req->rxq_addr = bd_chain_phys_addr;
-	req->hw_sb = p_cid->rel.sb;
-	req->sb_index = p_cid->rel.sb_idx;
+	req->hw_sb = p_cid->sb_igu_id;
+	req->sb_index = p_cid->sb_idx;
 	req->bd_max_bytes = bd_max_bytes;
 	req->stat_id = -1;
 
@@ -697,8 +697,8 @@ qed_vf_pf_txq_start(struct qed_hwfn *p_hwfn,
 	/* Tx */
 	req->pbl_addr = pbl_addr;
 	req->pbl_size = pbl_size;
-	req->hw_sb = p_cid->rel.sb;
-	req->sb_index = p_cid->rel.sb_idx;
+	req->hw_sb = p_cid->sb_igu_id;
+	req->sb_index = p_cid->sb_idx;
 
 	/* add list termination tlv */
 	qed_add_tlv(p_hwfn, &p_iov->offset,
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 00c70625f8a4..ad1e24962bdb 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -1770,7 +1770,7 @@ static int qede_start_txq(struct qede_dev *edev,
 	else
 		params.queue_id = txq->index;
 
-	params.sb = fp->sb_info->igu_sb_id;
+	params.p_sb = fp->sb_info;
 	params.sb_idx = sb_idx;
 
 	rc = edev->ops->q_tx_start(edev->cdev, rss_id, &params, phys_table,
@@ -1849,7 +1849,7 @@ static int qede_start_queues(struct qede_dev *edev, bool clear_stats)
 			memset(&q_params, 0, sizeof(q_params));
 			q_params.queue_id = rxq->rxq_id;
 			q_params.vport_id = 0;
-			q_params.sb = fp->sb_info->igu_sb_id;
+			q_params.p_sb = fp->sb_info;
 			q_params.sb_idx = RX_PI;
 
 			p_phys_table =
diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h
index d66d16a559e1..fd72056f8d49 100644
--- a/include/linux/qed/qed_eth_if.h
+++ b/include/linux/qed/qed_eth_if.h
@@ -47,8 +47,7 @@ struct qed_queue_start_common_params {
 	/* Relative, but relevant only for PFs */
 	u8 stats_id;
 
-	/* These are always absolute */
-	u16 sb;
+	struct qed_sb_info *p_sb;
 	u8 sb_idx;
 };
 
-- 
cgit v1.2.3


From 08bc8f15e69cbd9f8e3d7bbba4814cec50d51cfe Mon Sep 17 00:00:00 2001
From: "Mintz, Yuval" <Yuval.Mintz@cavium.com>
Date: Sun, 4 Jun 2017 13:31:06 +0300
Subject: qed: Multiple qzone queues for VFs

This adds the infrastructure for supporting VFs that want to open
multiple transmission queues on the same queue-zone.
At this point, there are no VFs that actually request this functionality,
but later patches would remedy that.

 a. VF and PF would communicate the capability during ACQUIRE;
    Legacy VFs would continue on behaving as they do today

 b. PF would communicate number of supported CIDs to the VF
    and would enforce said limitation

 c. Whenever VF passes a request for a given queue configuration
    it would also pass an associated index within said queue-zone

Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_cxt.c   |   8 +-
 drivers/net/ethernet/qlogic/qed/qed_l2.c    |  30 ++++--
 drivers/net/ethernet/qlogic/qed/qed_sriov.c | 136 ++++++++++++++++++++++++----
 drivers/net/ethernet/qlogic/qed/qed_sriov.h |   1 +
 drivers/net/ethernet/qlogic/qed/qed_vf.c    |  39 +++++++-
 drivers/net/ethernet/qlogic/qed/qed_vf.h    |  32 ++++++-
 include/linux/qed/qed_if.h                  |   4 +
 7 files changed, 215 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
index 25d5b91f7928..e201214764db 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
@@ -2116,8 +2116,12 @@ int qed_cxt_set_pf_params(struct qed_hwfn *p_hwfn, u32 rdma_tasks)
 		struct qed_eth_pf_params *p_params =
 		    &p_hwfn->pf_params.eth_pf_params;
 
-		qed_cxt_set_proto_cid_count(p_hwfn, PROTOCOLID_ETH,
-					    p_params->num_cons, 1);
+			if (!p_params->num_vf_cons)
+				p_params->num_vf_cons =
+				    ETH_PF_PARAMS_VF_CONS_DEFAULT;
+			qed_cxt_set_proto_cid_count(p_hwfn, PROTOCOLID_ETH,
+						    p_params->num_cons,
+						    p_params->num_vf_cons);
 		p_hwfn->p_cxt_mngr->arfs_count = p_params->num_arfs_filters;
 		break;
 	}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c
index 75643c322642..cffa8e7e539b 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
@@ -198,10 +198,10 @@ static void qed_eth_queue_qid_usage_del(struct qed_hwfn *p_hwfn,
 void qed_eth_queue_cid_release(struct qed_hwfn *p_hwfn,
 			       struct qed_queue_cid *p_cid)
 {
-	/* VFs' CIDs are 0-based in PF-view, and uninitialized on VF */
-	if ((p_cid->vfid == QED_QUEUE_CID_SELF) &&
-	    IS_PF(p_hwfn->cdev))
-		qed_cxt_release_cid(p_hwfn, p_cid->cid);
+	bool b_legacy_vf = !!(p_cid->vf_legacy & QED_QCID_LEGACY_VF_CID);
+
+	if (IS_PF(p_hwfn->cdev) && !b_legacy_vf)
+		_qed_cxt_release_cid(p_hwfn, p_cid->cid, p_cid->vfid);
 
 	/* For PF's VFs we maintain the index inside queue-zone in IOV */
 	if (p_cid->vfid == QED_QUEUE_CID_SELF)
@@ -319,18 +319,30 @@ qed_eth_queue_to_cid(struct qed_hwfn *p_hwfn,
 		     struct qed_queue_cid_vf_params *p_vf_params)
 {
 	struct qed_queue_cid *p_cid;
+	u8 vfid = QED_CXT_PF_CID;
 	bool b_legacy_vf = false;
 	u32 cid = 0;
 
-	/* Currently, PF doesn't need to allocate CIDs for any VF */
-	if (p_vf_params)
-		b_legacy_vf = true;
+	/* In case of legacy VFs, The CID can be derived from the additional
+	 * VF parameters - the VF assumes queue X uses CID X, so we can simply
+	 * use the vf_qid for this purpose as well.
+	 */
+	if (p_vf_params) {
+		vfid = p_vf_params->vfid;
+
+		if (p_vf_params->vf_legacy & QED_QCID_LEGACY_VF_CID) {
+			b_legacy_vf = true;
+			cid = p_vf_params->vf_qid;
+		}
+	}
+
 	/* Get a unique firmware CID for this queue, in case it's a PF.
 	 * VF's don't need a CID as the queue configuration will be done
 	 * by PF.
 	 */
 	if (IS_PF(p_hwfn->cdev) && !b_legacy_vf) {
-		if (qed_cxt_acquire_cid(p_hwfn, PROTOCOLID_ETH, &cid)) {
+		if (_qed_cxt_acquire_cid(p_hwfn, PROTOCOLID_ETH,
+					 &cid, vfid)) {
 			DP_NOTICE(p_hwfn, "Failed to acquire cid\n");
 			return NULL;
 		}
@@ -339,7 +351,7 @@ qed_eth_queue_to_cid(struct qed_hwfn *p_hwfn,
 	p_cid = _qed_eth_queue_to_cid(p_hwfn, opaque_fid, cid,
 				      p_params, b_is_rx, p_vf_params);
 	if (!p_cid && IS_PF(p_hwfn->cdev) && !b_legacy_vf)
-		qed_cxt_release_cid(p_hwfn, cid);
+		_qed_cxt_release_cid(p_hwfn, cid, vfid);
 
 	return p_cid;
 }
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
index e6fb5684b8fd..c620a5fa250b 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
@@ -47,12 +47,16 @@
 
 static u8 qed_vf_calculate_legacy(struct qed_vf_info *p_vf)
 {
-	u8 legacy = QED_QCID_LEGACY_VF_CID;
+	u8 legacy = 0;
 
 	if (p_vf->acquire.vfdev_info.eth_fp_hsi_minor ==
 	    ETH_HSI_VER_NO_PKT_LEN_TUNN)
 		legacy |= QED_QCID_LEGACY_VF_RX_PROD;
 
+	if (!(p_vf->acquire.vfdev_info.capabilities &
+	      VFPF_ACQUIRE_CAP_QUEUE_QIDS))
+		legacy |= QED_QCID_LEGACY_VF_CID;
+
 	return legacy;
 }
 
@@ -1413,6 +1417,10 @@ static u8 qed_iov_vf_mbx_acquire_resc(struct qed_hwfn *p_hwfn,
 	p_resp->num_vlan_filters = min_t(u8, p_vf->num_vlan_filters,
 					 p_req->num_vlan_filters);
 
+	p_resp->num_cids =
+	    min_t(u8, p_req->num_cids,
+		  p_hwfn->pf_params.eth_pf_params.num_vf_cons);
+
 	/* This isn't really needed/enforced, but some legacy VFs might depend
 	 * on the correct filling of this field.
 	 */
@@ -1424,10 +1432,11 @@ static u8 qed_iov_vf_mbx_acquire_resc(struct qed_hwfn *p_hwfn,
 	    p_resp->num_sbs < p_req->num_sbs ||
 	    p_resp->num_mac_filters < p_req->num_mac_filters ||
 	    p_resp->num_vlan_filters < p_req->num_vlan_filters ||
-	    p_resp->num_mc_filters < p_req->num_mc_filters) {
+	    p_resp->num_mc_filters < p_req->num_mc_filters ||
+	    p_resp->num_cids < p_req->num_cids) {
 		DP_VERBOSE(p_hwfn,
 			   QED_MSG_IOV,
-			   "VF[%d] - Insufficient resources: rxq [%02x/%02x] txq [%02x/%02x] sbs [%02x/%02x] mac [%02x/%02x] vlan [%02x/%02x] mc [%02x/%02x]\n",
+			   "VF[%d] - Insufficient resources: rxq [%02x/%02x] txq [%02x/%02x] sbs [%02x/%02x] mac [%02x/%02x] vlan [%02x/%02x] mc [%02x/%02x] cids [%02x/%02x]\n",
 			   p_vf->abs_vf_id,
 			   p_req->num_rxqs,
 			   p_resp->num_rxqs,
@@ -1439,7 +1448,9 @@ static u8 qed_iov_vf_mbx_acquire_resc(struct qed_hwfn *p_hwfn,
 			   p_resp->num_mac_filters,
 			   p_req->num_vlan_filters,
 			   p_resp->num_vlan_filters,
-			   p_req->num_mc_filters, p_resp->num_mc_filters);
+			   p_req->num_mc_filters,
+			   p_resp->num_mc_filters,
+			   p_req->num_cids, p_resp->num_cids);
 
 		/* Some legacy OSes are incapable of correctly handling this
 		 * failure.
@@ -1555,6 +1566,12 @@ static void qed_iov_vf_mbx_acquire(struct qed_hwfn *p_hwfn,
 	if (p_hwfn->cdev->num_hwfns > 1)
 		pfdev_info->capabilities |= PFVF_ACQUIRE_CAP_100G;
 
+	/* Share our ability to use multiple queue-ids only with VFs
+	 * that request it.
+	 */
+	if (req->vfdev_info.capabilities & VFPF_ACQUIRE_CAP_QUEUE_QIDS)
+		pfdev_info->capabilities |= PFVF_ACQUIRE_CAP_QUEUE_QIDS;
+
 	qed_iov_vf_mbx_acquire_stats(p_hwfn, &pfdev_info->stats_info);
 
 	memcpy(pfdev_info->port_mac, p_hwfn->hw_info.hw_mac_addr, ETH_ALEN);
@@ -1977,10 +1994,37 @@ static void qed_iov_vf_mbx_start_rxq_resp(struct qed_hwfn *p_hwfn,
 static u8 qed_iov_vf_mbx_qid(struct qed_hwfn *p_hwfn,
 			     struct qed_vf_info *p_vf, bool b_is_tx)
 {
-	if (b_is_tx)
-		return QED_IOV_LEGACY_QID_TX;
-	else
-		return QED_IOV_LEGACY_QID_RX;
+	struct qed_iov_vf_mbx *p_mbx = &p_vf->vf_mbx;
+	struct vfpf_qid_tlv *p_qid_tlv;
+
+	/* Search for the qid if the VF published its going to provide it */
+	if (!(p_vf->acquire.vfdev_info.capabilities &
+	      VFPF_ACQUIRE_CAP_QUEUE_QIDS)) {
+		if (b_is_tx)
+			return QED_IOV_LEGACY_QID_TX;
+		else
+			return QED_IOV_LEGACY_QID_RX;
+	}
+
+	p_qid_tlv = (struct vfpf_qid_tlv *)
+		    qed_iov_search_list_tlvs(p_hwfn, p_mbx->req_virt,
+					     CHANNEL_TLV_QID);
+	if (!p_qid_tlv) {
+		DP_VERBOSE(p_hwfn, QED_MSG_IOV,
+			   "VF[%2x]: Failed to provide qid\n",
+			   p_vf->relative_vf_id);
+
+		return QED_IOV_QID_INVALID;
+	}
+
+	if (p_qid_tlv->qid >= MAX_QUEUES_PER_QZONE) {
+		DP_VERBOSE(p_hwfn, QED_MSG_IOV,
+			   "VF[%02x]: Provided qid out-of-bounds %02x\n",
+			   p_vf->relative_vf_id, p_qid_tlv->qid);
+		return QED_IOV_QID_INVALID;
+	}
+
+	return p_qid_tlv->qid;
 }
 
 static void qed_iov_vf_mbx_start_rxq(struct qed_hwfn *p_hwfn,
@@ -2006,7 +2050,12 @@ static void qed_iov_vf_mbx_start_rxq(struct qed_hwfn *p_hwfn,
 		goto out;
 
 	qid_usage_idx = qed_iov_vf_mbx_qid(p_hwfn, vf, false);
+	if (qid_usage_idx == QED_IOV_QID_INVALID)
+		goto out;
+
 	p_queue = &vf->vf_queues[req->rx_qid];
+	if (p_queue->cids[qid_usage_idx].p_cid)
+		goto out;
 
 	vf_legacy = qed_vf_calculate_legacy(vf);
 
@@ -2332,12 +2381,17 @@ static void qed_iov_vf_mbx_start_txq(struct qed_hwfn *p_hwfn,
 	req = &mbx->req_virt->start_txq;
 
 	if (!qed_iov_validate_txq(p_hwfn, vf, req->tx_qid,
-				  QED_IOV_VALIDATE_Q_DISABLE) ||
+				  QED_IOV_VALIDATE_Q_NA) ||
 	    !qed_iov_validate_sb(p_hwfn, vf, req->hw_sb))
 		goto out;
 
 	qid_usage_idx = qed_iov_vf_mbx_qid(p_hwfn, vf, true);
+	if (qid_usage_idx == QED_IOV_QID_INVALID)
+		goto out;
+
 	p_queue = &vf->vf_queues[req->tx_qid];
+	if (p_queue->cids[qid_usage_idx].p_cid)
+		goto out;
 
 	vf_legacy = qed_vf_calculate_legacy(vf);
 
@@ -2388,17 +2442,33 @@ static int qed_iov_vf_stop_rxqs(struct qed_hwfn *p_hwfn,
 	struct qed_vf_queue *p_queue;
 	int rc = 0;
 
-	if (!qed_iov_validate_rxq(p_hwfn, vf, rxq_id,
-				  QED_IOV_VALIDATE_Q_ENABLE)) {
+	if (!qed_iov_validate_rxq(p_hwfn, vf, rxq_id, QED_IOV_VALIDATE_Q_NA)) {
 		DP_VERBOSE(p_hwfn,
 			   QED_MSG_IOV,
-			   "VF[%d] Tried Closing Rx 0x%04x which is inactive\n",
-			   vf->relative_vf_id, rxq_id);
+			   "VF[%d] Tried Closing Rx 0x%04x.%02x which is inactive\n",
+			   vf->relative_vf_id, rxq_id, qid_usage_idx);
 		return -EINVAL;
 	}
 
 	p_queue = &vf->vf_queues[rxq_id];
 
+	/* We've validated the index and the existence of the active RXQ -
+	 * now we need to make sure that it's using the correct qid.
+	 */
+	if (!p_queue->cids[qid_usage_idx].p_cid ||
+	    p_queue->cids[qid_usage_idx].b_is_tx) {
+		struct qed_queue_cid *p_cid;
+
+		p_cid = qed_iov_get_vf_rx_queue_cid(p_queue);
+		DP_VERBOSE(p_hwfn,
+			   QED_MSG_IOV,
+			   "VF[%d] - Tried Closing Rx 0x%04x.%02x, but Rx is at %04x.%02x\n",
+			   vf->relative_vf_id,
+			   rxq_id, qid_usage_idx, rxq_id, p_cid->qid_usage_idx);
+		return -EINVAL;
+	}
+
+	/* Now that we know we have a valid Rx-queue - close it */
 	rc = qed_eth_rx_queue_stop(p_hwfn,
 				   p_queue->cids[qid_usage_idx].p_cid,
 				   false, cqe_completion);
@@ -2418,11 +2488,13 @@ static int qed_iov_vf_stop_txqs(struct qed_hwfn *p_hwfn,
 	struct qed_vf_queue *p_queue;
 	int rc = 0;
 
-	if (!qed_iov_validate_txq(p_hwfn, vf, txq_id,
-				  QED_IOV_VALIDATE_Q_ENABLE))
+	if (!qed_iov_validate_txq(p_hwfn, vf, txq_id, QED_IOV_VALIDATE_Q_NA))
 		return -EINVAL;
 
 	p_queue = &vf->vf_queues[txq_id];
+	if (!p_queue->cids[qid_usage_idx].p_cid ||
+	    !p_queue->cids[qid_usage_idx].b_is_tx)
+		return -EINVAL;
 
 	rc = qed_eth_tx_queue_stop(p_hwfn, p_queue->cids[qid_usage_idx].p_cid);
 	if (rc)
@@ -2458,6 +2530,8 @@ static void qed_iov_vf_mbx_stop_rxqs(struct qed_hwfn *p_hwfn,
 
 	/* Find which qid-index is associated with the queue */
 	qid_usage_idx = qed_iov_vf_mbx_qid(p_hwfn, vf, false);
+	if (qid_usage_idx == QED_IOV_QID_INVALID)
+		goto out;
 
 	rc = qed_iov_vf_stop_rxqs(p_hwfn, vf, req->rx_qid,
 				  qid_usage_idx, req->cqe_completion);
@@ -2494,6 +2568,8 @@ static void qed_iov_vf_mbx_stop_txqs(struct qed_hwfn *p_hwfn,
 
 	/* Find which qid-index is associated with the queue */
 	qid_usage_idx = qed_iov_vf_mbx_qid(p_hwfn, vf, true);
+	if (qid_usage_idx == QED_IOV_QID_INVALID)
+		goto out;
 
 	rc = qed_iov_vf_stop_txqs(p_hwfn, vf, req->tx_qid, qid_usage_idx);
 	if (!rc)
@@ -2524,15 +2600,35 @@ static void qed_iov_vf_mbx_update_rxqs(struct qed_hwfn *p_hwfn,
 	complete_event_flg = !!(req->flags & VFPF_RXQ_UPD_COMPLETE_EVENT_FLAG);
 
 	qid_usage_idx = qed_iov_vf_mbx_qid(p_hwfn, vf, false);
+	if (qid_usage_idx == QED_IOV_QID_INVALID)
+		goto out;
 
-	/* Validate inputs */
-	for (i = req->rx_qid; i < req->rx_qid + req->num_rxqs; i++)
+	/* There shouldn't exist a VF that uses queue-qids yet uses this
+	 * API with multiple Rx queues. Validate this.
+	 */
+	if ((vf->acquire.vfdev_info.capabilities &
+	     VFPF_ACQUIRE_CAP_QUEUE_QIDS) && req->num_rxqs != 1) {
+		DP_VERBOSE(p_hwfn, QED_MSG_IOV,
+			   "VF[%d] supports QIDs but sends multiple queues\n",
+			   vf->relative_vf_id);
+		goto out;
+	}
+
+	/* Validate inputs - for the legacy case this is still true since
+	 * qid_usage_idx for each Rx queue would be LEGACY_QID_RX.
+	 */
+	for (i = req->rx_qid; i < req->rx_qid + req->num_rxqs; i++) {
 		if (!qed_iov_validate_rxq(p_hwfn, vf, i,
-					  QED_IOV_VALIDATE_Q_ENABLE)) {
-			DP_INFO(p_hwfn, "VF[%d]: Incorrect Rxqs [%04x, %02x]\n",
-				vf->relative_vf_id, req->rx_qid, req->num_rxqs);
+					  QED_IOV_VALIDATE_Q_NA) ||
+		    !vf->vf_queues[i].cids[qid_usage_idx].p_cid ||
+		    vf->vf_queues[i].cids[qid_usage_idx].b_is_tx) {
+			DP_VERBOSE(p_hwfn, QED_MSG_IOV,
+				   "VF[%d]: Incorrect Rxqs [%04x, %02x]\n",
+				   vf->relative_vf_id, req->rx_qid,
+				   req->num_rxqs);
 			goto out;
 		}
+	}
 
 	/* Prepare the handlers */
 	for (i = 0; i < req->num_rxqs; i++) {
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.h b/drivers/net/ethernet/qlogic/qed/qed_sriov.h
index 480cd99c69b5..95f55ae2ee8b 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sriov.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.h
@@ -151,6 +151,7 @@ struct qed_iov_vf_mbx {
 
 #define QED_IOV_LEGACY_QID_RX (0)
 #define QED_IOV_LEGACY_QID_TX (1)
+#define QED_IOV_QID_INVALID (0xFE)
 
 struct qed_vf_queue_cid {
 	bool b_is_tx;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c b/drivers/net/ethernet/qlogic/qed/qed_vf.c
index 877d41e456e4..c0d2febad86a 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_vf.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c
@@ -153,6 +153,22 @@ static int qed_send_msg2pf(struct qed_hwfn *p_hwfn, u8 *done, u32 resp_size)
 	return rc;
 }
 
+static void qed_vf_pf_add_qid(struct qed_hwfn *p_hwfn,
+			      struct qed_queue_cid *p_cid)
+{
+	struct qed_vf_iov *p_iov = p_hwfn->vf_iov_info;
+	struct vfpf_qid_tlv *p_qid_tlv;
+
+	/* Only add QIDs for the queue if it was negotiated with PF */
+	if (!(p_iov->acquire_resp.pfdev_info.capabilities &
+	      PFVF_ACQUIRE_CAP_QUEUE_QIDS))
+		return;
+
+	p_qid_tlv = qed_add_tlv(p_hwfn, &p_iov->offset,
+				CHANNEL_TLV_QID, sizeof(*p_qid_tlv));
+	p_qid_tlv->qid = p_cid->qid_usage_idx;
+}
+
 #define VF_ACQUIRE_THRESH 3
 static void qed_vf_pf_acquire_reduce_resc(struct qed_hwfn *p_hwfn,
 					  struct vf_pf_resc_request *p_req,
@@ -160,7 +176,7 @@ static void qed_vf_pf_acquire_reduce_resc(struct qed_hwfn *p_hwfn,
 {
 	DP_VERBOSE(p_hwfn,
 		   QED_MSG_IOV,
-		   "PF unwilling to fullill resource request: rxq [%02x/%02x] txq [%02x/%02x] sbs [%02x/%02x] mac [%02x/%02x] vlan [%02x/%02x] mc [%02x/%02x]. Try PF recommended amount\n",
+		   "PF unwilling to fullill resource request: rxq [%02x/%02x] txq [%02x/%02x] sbs [%02x/%02x] mac [%02x/%02x] vlan [%02x/%02x] mc [%02x/%02x] cids [%02x/%02x]. Try PF recommended amount\n",
 		   p_req->num_rxqs,
 		   p_resp->num_rxqs,
 		   p_req->num_rxqs,
@@ -171,7 +187,8 @@ static void qed_vf_pf_acquire_reduce_resc(struct qed_hwfn *p_hwfn,
 		   p_resp->num_mac_filters,
 		   p_req->num_vlan_filters,
 		   p_resp->num_vlan_filters,
-		   p_req->num_mc_filters, p_resp->num_mc_filters);
+		   p_req->num_mc_filters,
+		   p_resp->num_mc_filters, p_req->num_cids, p_resp->num_cids);
 
 	/* humble our request */
 	p_req->num_txqs = p_resp->num_txqs;
@@ -180,6 +197,7 @@ static void qed_vf_pf_acquire_reduce_resc(struct qed_hwfn *p_hwfn,
 	p_req->num_mac_filters = p_resp->num_mac_filters;
 	p_req->num_vlan_filters = p_resp->num_vlan_filters;
 	p_req->num_mc_filters = p_resp->num_mc_filters;
+	p_req->num_cids = p_resp->num_cids;
 }
 
 static int qed_vf_pf_acquire(struct qed_hwfn *p_hwfn)
@@ -204,6 +222,7 @@ static int qed_vf_pf_acquire(struct qed_hwfn *p_hwfn)
 	p_resc->num_sbs = QED_MAX_VF_CHAINS_PER_PF;
 	p_resc->num_mac_filters = QED_ETH_VF_NUM_MAC_FILTERS;
 	p_resc->num_vlan_filters = QED_ETH_VF_NUM_VLAN_FILTERS;
+	p_resc->num_cids = QED_ETH_VF_DEFAULT_NUM_CIDS;
 
 	req->vfdev_info.os_type = VFPF_ACQUIRE_OS_LINUX;
 	req->vfdev_info.fw_major = FW_MAJOR_VERSION;
@@ -307,6 +326,13 @@ static int qed_vf_pf_acquire(struct qed_hwfn *p_hwfn)
 	if (req->vfdev_info.capabilities & VFPF_ACQUIRE_CAP_PRE_FP_HSI)
 		p_iov->b_pre_fp_hsi = true;
 
+	/* In case PF doesn't support multi-queue Tx, update the number of
+	 * CIDs to reflect the number of queues [older PFs didn't fill that
+	 * field].
+	 */
+	if (!(resp->pfdev_info.capabilities & PFVF_ACQUIRE_CAP_QUEUE_QIDS))
+		resp->resc.num_cids = resp->resc.num_rxqs + resp->resc.num_txqs;
+
 	/* Update bulletin board size with response from PF */
 	p_iov->bulletin.size = resp->bulletin_size;
 
@@ -609,6 +635,9 @@ qed_vf_pf_rxq_start(struct qed_hwfn *p_hwfn,
 		__internal_ram_wr(p_hwfn, *pp_prod, sizeof(u32),
 				  (u32 *)(&init_prod_val));
 	}
+
+	qed_vf_pf_add_qid(p_hwfn, p_cid);
+
 	/* add list termination tlv */
 	qed_add_tlv(p_hwfn, &p_iov->offset,
 		    CHANNEL_TLV_LIST_END, sizeof(struct channel_list_end_tlv));
@@ -657,6 +686,8 @@ int qed_vf_pf_rxq_stop(struct qed_hwfn *p_hwfn,
 	req->num_rxqs = 1;
 	req->cqe_completion = cqe_completion;
 
+	qed_vf_pf_add_qid(p_hwfn, p_cid);
+
 	/* add list termination tlv */
 	qed_add_tlv(p_hwfn, &p_iov->offset,
 		    CHANNEL_TLV_LIST_END, sizeof(struct channel_list_end_tlv));
@@ -700,6 +731,8 @@ qed_vf_pf_txq_start(struct qed_hwfn *p_hwfn,
 	req->hw_sb = p_cid->sb_igu_id;
 	req->sb_index = p_cid->sb_idx;
 
+	qed_vf_pf_add_qid(p_hwfn, p_cid);
+
 	/* add list termination tlv */
 	qed_add_tlv(p_hwfn, &p_iov->offset,
 		    CHANNEL_TLV_LIST_END, sizeof(struct channel_list_end_tlv));
@@ -749,6 +782,8 @@ int qed_vf_pf_txq_stop(struct qed_hwfn *p_hwfn, struct qed_queue_cid *p_cid)
 	req->tx_qid = p_cid->rel.queue_id;
 	req->num_txqs = 1;
 
+	qed_vf_pf_add_qid(p_hwfn, p_cid);
+
 	/* add list termination tlv */
 	qed_add_tlv(p_hwfn, &p_iov->offset,
 		    CHANNEL_TLV_LIST_END, sizeof(struct channel_list_end_tlv));
diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.h b/drivers/net/ethernet/qlogic/qed/qed_vf.h
index d7b9c90b2f60..9588ae779267 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_vf.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_vf.h
@@ -46,7 +46,8 @@ struct vf_pf_resc_request {
 	u8 num_mac_filters;
 	u8 num_vlan_filters;
 	u8 num_mc_filters;
-	u16 padding;
+	u8 num_cids;
+	u8 padding;
 };
 
 struct hw_sb_info {
@@ -113,6 +114,11 @@ struct vfpf_acquire_tlv {
 	struct vf_pf_vfdev_info {
 #define VFPF_ACQUIRE_CAP_PRE_FP_HSI     (1 << 0) /* VF pre-FP hsi version */
 #define VFPF_ACQUIRE_CAP_100G		(1 << 1) /* VF can support 100g */
+	/* A requirement for supporting multi-Tx queues on a single queue-zone,
+	 * VF would pass qids as additional information whenever passing queue
+	 * references.
+	 */
+#define VFPF_ACQUIRE_CAP_QUEUE_QIDS     BIT(2)
 		u64 capabilities;
 		u8 fw_major;
 		u8 fw_minor;
@@ -185,6 +191,9 @@ struct pfvf_acquire_resp_tlv {
  */
 #define PFVF_ACQUIRE_CAP_POST_FW_OVERRIDE	BIT(2)
 
+	/* PF expects queues to be received with additional qids */
+#define PFVF_ACQUIRE_CAP_QUEUE_QIDS             BIT(3)
+
 		u16 db_size;
 		u8 indices_per_sb;
 		u8 os_type;
@@ -221,7 +230,8 @@ struct pfvf_acquire_resp_tlv {
 		u8 num_mac_filters;
 		u8 num_vlan_filters;
 		u8 num_mc_filters;
-		u8 padding[2];
+		u8 num_cids;
+		u8 padding;
 	} resc;
 
 	u32 bulletin_size;
@@ -234,6 +244,16 @@ struct pfvf_start_queue_resp_tlv {
 	u8 padding[4];
 };
 
+/* Extended queue information - additional index for reference inside qzone.
+ * If commmunicated between VF/PF, each TLV relating to queues should be
+ * extended by one such [or have a future base TLV that already contains info].
+ */
+struct vfpf_qid_tlv {
+	struct channel_tlv tl;
+	u8 qid;
+	u8 padding[3];
+};
+
 /* Setup Queue */
 struct vfpf_start_rxq_tlv {
 	struct vfpf_first_tlv first_tlv;
@@ -597,6 +617,8 @@ enum {
 	CHANNEL_TLV_VPORT_UPDATE_ACCEPT_ANY_VLAN,
 	CHANNEL_TLV_VPORT_UPDATE_SGE_TPA,
 	CHANNEL_TLV_UPDATE_TUNN_PARAM,
+	CHANNEL_TLV_RESERVED,
+	CHANNEL_TLV_QID,
 	CHANNEL_TLV_MAX,
 
 	/* Required for iterating over vport-update tlvs.
@@ -605,6 +627,12 @@ enum {
 	CHANNEL_TLV_VPORT_UPDATE_MAX = CHANNEL_TLV_VPORT_UPDATE_SGE_TPA + 1,
 };
 
+/* Default number of CIDs [total of both Rx and Tx] to be requested
+ * by default, and maximum possible number.
+ */
+#define QED_ETH_VF_DEFAULT_NUM_CIDS (32)
+#define QED_ETH_VF_MAX_NUM_CIDS (250)
+
 /* This data is held in the qed_hwfn structure for VFs only. */
 struct qed_vf_iov {
 	union vfpf_tlvs *vf2pf_request;
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index 567ea3ea6c0e..74f6b99754aa 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -185,6 +185,10 @@ struct qed_eth_pf_params {
 	 */
 	u16 num_cons;
 
+	/* per-VF number of CIDs */
+	u8 num_vf_cons;
+#define ETH_PF_PARAMS_VF_CONS_DEFAULT	(32)
+
 	/* To enable arfs, previous to HW-init a positive number needs to be
 	 * set [as filters require allocated searcher ILT memory].
 	 * This will set the maximal number of configured steering-filters.
-- 
cgit v1.2.3


From cbb8a12c089c7f04b86d08d89bdab71ec9bff1f5 Mon Sep 17 00:00:00 2001
From: "Mintz, Yuval" <Yuval.Mintz@cavium.com>
Date: Sun, 4 Jun 2017 13:31:08 +0300
Subject: qed: VF XDP support

The final addition on the qed front -
 - VFs would now require their PFs to provide multiple CIDs
 - Based on the availability of connections from PF, determine whether
   XDP is feasible and share it with qede via dev_info.

Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_l2.c | 19 +++++++++++++++----
 drivers/net/ethernet/qlogic/qed/qed_vf.c | 13 ++++++++++---
 drivers/net/ethernet/qlogic/qed/qed_vf.h | 12 ++++++++++++
 include/linux/qed/qed_eth_if.h           |  3 +++
 4 files changed, 40 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c
index cffa8e7e539b..cb8b05dbfc6e 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
@@ -2119,15 +2119,26 @@ static int qed_fill_eth_dev_info(struct qed_dev *cdev,
 
 		ether_addr_copy(info->port_mac,
 				cdev->hwfns[0].hw_info.hw_mac_addr);
+
+		info->xdp_supported = true;
 	} else {
-		qed_vf_get_num_rxqs(QED_LEADING_HWFN(cdev), &info->num_queues);
-		if (cdev->num_hwfns > 1) {
-			u8 queues = 0;
+		u16 total_cids = 0;
+
+		/* Determine queues &  XDP support */
+		for_each_hwfn(cdev, i) {
+			struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
+			u8 queues, cids;
 
-			qed_vf_get_num_rxqs(&cdev->hwfns[1], &queues);
+			qed_vf_get_num_cids(p_hwfn, &cids);
+			qed_vf_get_num_rxqs(p_hwfn, &queues);
 			info->num_queues += queues;
+			total_cids += cids;
 		}
 
+		/* Enable VF XDP in case PF guarntees sufficient connections */
+		if (total_cids >= info->num_queues * 3)
+			info->xdp_supported = true;
+
 		qed_vf_get_num_vlan_filters(&cdev->hwfns[0],
 					    (u8 *)&info->num_vlan_filters);
 		qed_vf_get_num_mac_filters(&cdev->hwfns[0],
diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c b/drivers/net/ethernet/qlogic/qed/qed_vf.c
index cb81c357bf62..1926d1ed439f 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_vf.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c
@@ -291,9 +291,11 @@ static int qed_vf_pf_acquire(struct qed_hwfn *p_hwfn)
 	req->vfdev_info.capabilities |= VFPF_ACQUIRE_CAP_100G;
 
 	/* If we've mapped the doorbell bar, try using queue qids */
-	if (p_iov->b_doorbell_bar)
+	if (p_iov->b_doorbell_bar) {
 		req->vfdev_info.capabilities |= VFPF_ACQUIRE_CAP_PHYSICAL_BAR |
 						VFPF_ACQUIRE_CAP_QUEUE_QIDS;
+		p_resc->num_cids = QED_ETH_VF_MAX_NUM_CIDS;
+	}
 
 	/* pf 2 vf bulletin board address */
 	req->bulletin_addr = p_iov->bulletin.phys;
@@ -884,8 +886,8 @@ qed_vf_pf_txq_start(struct qed_hwfn *p_hwfn,
 	}
 
 	DP_VERBOSE(p_hwfn, QED_MSG_IOV,
-		   "Txq[0x%02x]: doorbell at %p [offset 0x%08x]\n",
-		   qid, *pp_doorbell, resp->offset);
+		   "Txq[0x%02x.%02x]: doorbell at %p [offset 0x%08x]\n",
+		   qid, p_cid->qid_usage_idx, *pp_doorbell, resp->offset);
 exit:
 	qed_vf_pf_req_end(p_hwfn, rc);
 
@@ -1478,6 +1480,11 @@ void qed_vf_get_num_txqs(struct qed_hwfn *p_hwfn, u8 *num_txqs)
 	*num_txqs = p_hwfn->vf_iov_info->acquire_resp.resc.num_txqs;
 }
 
+void qed_vf_get_num_cids(struct qed_hwfn *p_hwfn, u8 *num_cids)
+{
+	*num_cids = p_hwfn->vf_iov_info->acquire_resp.resc.num_cids;
+}
+
 void qed_vf_get_port_mac(struct qed_hwfn *p_hwfn, u8 *port_mac)
 {
 	memcpy(port_mac,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.h b/drivers/net/ethernet/qlogic/qed/qed_vf.h
index b8ce4bef8c94..b65bbc54a097 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_vf.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_vf.h
@@ -731,6 +731,14 @@ void qed_vf_get_num_rxqs(struct qed_hwfn *p_hwfn, u8 *num_rxqs);
  */
 void qed_vf_get_num_txqs(struct qed_hwfn *p_hwfn, u8 *num_txqs);
 
+/**
+ * @brief Get number of available connections [both Rx and Tx] for VF
+ *
+ * @param p_hwfn
+ * @param num_cids - allocated number of connections
+ */
+void qed_vf_get_num_cids(struct qed_hwfn *p_hwfn, u8 *num_cids);
+
 /**
  * @brief Get port mac address for VF
  *
@@ -1010,6 +1018,10 @@ static inline void qed_vf_get_num_txqs(struct qed_hwfn *p_hwfn, u8 *num_txqs)
 {
 }
 
+static inline void qed_vf_get_num_cids(struct qed_hwfn *p_hwfn, u8 *num_cids)
+{
+}
+
 static inline void qed_vf_get_port_mac(struct qed_hwfn *p_hwfn, u8 *port_mac)
 {
 }
diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h
index fd72056f8d49..0eef0a2b1901 100644
--- a/include/linux/qed/qed_eth_if.h
+++ b/include/linux/qed/qed_eth_if.h
@@ -73,6 +73,9 @@ struct qed_dev_eth_info {
 
 	/* Legacy VF - this affects the datapath, so qede has to know */
 	bool is_legacy;
+
+	/* Might depend on available resources [in case of VF] */
+	bool xdp_supported;
 };
 
 struct qed_update_vport_rss_params {
-- 
cgit v1.2.3


From 6dc06c08bef1c746ff8da33dab677cfbacdcad32 Mon Sep 17 00:00:00 2001
From: Talat Batheesh <talatb@mellanox.com>
Date: Sun, 4 Jun 2017 14:30:07 +0300
Subject: net/mlx4: Fix the check in attaching steering rules

Our previous patch (cited below) introduced a regression
for RAW Eth QPs.

Fix it by checking if the QP number provided by user-space
exists, hence allowing steering rules to be added for valid
QPs only.

Fixes: 89c557687a32 ("net/mlx4_en: Avoid adding steering rules with invalid ring")
Reported-by: Or Gerlitz <gerlitz.or@gmail.com>
Signed-off-by: Talat Batheesh <talatb@mellanox.com>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Acked-by: Or Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_ethtool.c |  5 -----
 drivers/net/ethernet/mellanox/mlx4/mcg.c        | 15 +++++++++++----
 drivers/net/ethernet/mellanox/mlx4/qp.c         | 13 +++++++++++++
 include/linux/mlx4/qp.h                         |  1 +
 4 files changed, 25 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index ae5fdc2df654..ffbcb27c05e5 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -1562,11 +1562,6 @@ static int mlx4_en_flow_replace(struct net_device *dev,
 		qpn = priv->drop_qp.qpn;
 	else if (cmd->fs.ring_cookie & EN_ETHTOOL_QP_ATTACH) {
 		qpn = cmd->fs.ring_cookie & (EN_ETHTOOL_QP_ATTACH - 1);
-		if (qpn < priv->rss_map.base_qpn ||
-		    qpn >= priv->rss_map.base_qpn + priv->rx_ring_num) {
-			en_warn(priv, "rxnfc: QP (0x%x) doesn't exist\n", qpn);
-			return -EINVAL;
-		}
 	} else {
 		if (cmd->fs.ring_cookie >= priv->rx_ring_num) {
 			en_warn(priv, "rxnfc: RX ring (%llu) doesn't exist\n",
diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c
index 1a670b681555..0710b3677464 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mcg.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c
@@ -35,6 +35,7 @@
 #include <linux/etherdevice.h>
 
 #include <linux/mlx4/cmd.h>
+#include <linux/mlx4/qp.h>
 #include <linux/export.h>
 
 #include "mlx4.h"
@@ -985,16 +986,21 @@ int mlx4_flow_attach(struct mlx4_dev *dev,
 	if (IS_ERR(mailbox))
 		return PTR_ERR(mailbox);
 
+	if (!mlx4_qp_lookup(dev, rule->qpn)) {
+		mlx4_err_rule(dev, "QP doesn't exist\n", rule);
+		ret = -EINVAL;
+		goto out;
+	}
+
 	trans_rule_ctrl_to_hw(rule, mailbox->buf);
 
 	size += sizeof(struct mlx4_net_trans_rule_hw_ctrl);
 
 	list_for_each_entry(cur, &rule->list, list) {
 		ret = parse_trans_rule(dev, cur, mailbox->buf + size);
-		if (ret < 0) {
-			mlx4_free_cmd_mailbox(dev, mailbox);
-			return ret;
-		}
+		if (ret < 0)
+			goto out;
+
 		size += ret;
 	}
 
@@ -1021,6 +1027,7 @@ int mlx4_flow_attach(struct mlx4_dev *dev,
 		}
 	}
 
+out:
 	mlx4_free_cmd_mailbox(dev, mailbox);
 
 	return ret;
diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c
index 2d6abd4662b1..ad92d2311478 100644
--- a/drivers/net/ethernet/mellanox/mlx4/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx4/qp.c
@@ -384,6 +384,19 @@ static void mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn)
 		__mlx4_qp_free_icm(dev, qpn);
 }
 
+struct mlx4_qp *mlx4_qp_lookup(struct mlx4_dev *dev, u32 qpn)
+{
+	struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
+	struct mlx4_qp *qp;
+
+	spin_lock(&qp_table->lock);
+
+	qp = __mlx4_qp_lookup(dev, qpn);
+
+	spin_unlock(&qp_table->lock);
+	return qp;
+}
+
 int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp, gfp_t gfp)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
index b4ee8f62ce8d..8e2828d48d7f 100644
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h
@@ -470,6 +470,7 @@ struct mlx4_update_qp_params {
 	u16	rate_val;
 };
 
+struct mlx4_qp *mlx4_qp_lookup(struct mlx4_dev *dev, u32 qpn);
 int mlx4_update_qp(struct mlx4_dev *dev, u32 qpn,
 		   enum mlx4_update_qp_attr attr,
 		   struct mlx4_update_qp_params *params);
-- 
cgit v1.2.3


From 9b01d43170aa70a435105f6413759e2ab7e00219 Mon Sep 17 00:00:00 2001
From: Perr Zhang <strongbox8@zoho.com>
Date: Fri, 2 Jun 2017 11:59:53 +0800
Subject: sched/header: Remove leftover, obsolete comment

There is no more set_task_vxid() helper, remove its description.

Signed-off-by: Perr Zhang <strongbox8@zoho.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20170602035953.28949-1-strongbox8@zoho.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2b69fc650201..1abaa3728bf7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1096,8 +1096,6 @@ static inline struct pid *task_session(struct task_struct *task)
  *                     current.
  * task_xid_nr_ns()  : id seen from the ns specified;
  *
- * set_task_vxid()   : assigns a virtual id to a task;
- *
  * see also pid_nr() etc in include/linux/pid.h
  */
 pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type, struct pid_namespace *ns);
-- 
cgit v1.2.3


From 45ca7df7c345465dbd2426a33012c9c33d27de62 Mon Sep 17 00:00:00 2001
From: Sudeep Holla <sudeep.holla@arm.com>
Date: Thu, 27 Apr 2017 15:08:51 +0100
Subject: firmware: arm_scpi: add support to populate OPPs and get transition
 latency

Currently only CPU devices use the transition latency and the OPPs
populated in the SCPI driver. scpi-cpufreq has logic to handle these.
However, even GPU and other users of SCPI DVFS will need the same logic.

In order to avoid duplication, this patch adds support to get DVFS
transition latency and add all the OPPs to the device using OPP library
helper functions. The helper functions added here can be used for any
device whose DVFS are managed by SCPI.

Also, we also have incorrect dependency on the cluster identifier for
the CPUs. It's fundamentally wrong as the domain id need not match the
cluster id. This patch gets rid of that dependency by making use of the
clock bindings which are already in place.

Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
---
 drivers/firmware/arm_scpi.c   | 63 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/scpi_protocol.h |  3 +++
 2 files changed, 66 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/firmware/arm_scpi.c b/drivers/firmware/arm_scpi.c
index f6cfc31d34c7..8043e51de897 100644
--- a/drivers/firmware/arm_scpi.c
+++ b/drivers/firmware/arm_scpi.c
@@ -39,6 +39,7 @@
 #include <linux/of_address.h>
 #include <linux/of_platform.h>
 #include <linux/printk.h>
+#include <linux/pm_opp.h>
 #include <linux/scpi_protocol.h>
 #include <linux/slab.h>
 #include <linux/sort.h>
@@ -684,6 +685,65 @@ static struct scpi_dvfs_info *scpi_dvfs_get_info(u8 domain)
 	return info;
 }
 
+static int scpi_dev_domain_id(struct device *dev)
+{
+	struct of_phandle_args clkspec;
+
+	if (of_parse_phandle_with_args(dev->of_node, "clocks", "#clock-cells",
+				       0, &clkspec))
+		return -EINVAL;
+
+	return clkspec.args[0];
+}
+
+static struct scpi_dvfs_info *scpi_dvfs_info(struct device *dev)
+{
+	int domain = scpi_dev_domain_id(dev);
+
+	if (domain < 0)
+		return ERR_PTR(domain);
+
+	return scpi_dvfs_get_info(domain);
+}
+
+static int scpi_dvfs_get_transition_latency(struct device *dev)
+{
+	struct scpi_dvfs_info *info = scpi_dvfs_info(dev);
+
+	if (IS_ERR(info))
+		return PTR_ERR(info);
+
+	if (!info->latency)
+		return 0;
+
+	return info->latency;
+}
+
+static int scpi_dvfs_add_opps_to_device(struct device *dev)
+{
+	int idx, ret;
+	struct scpi_opp *opp;
+	struct scpi_dvfs_info *info = scpi_dvfs_info(dev);
+
+	if (IS_ERR(info))
+		return PTR_ERR(info);
+
+	if (!info->opps)
+		return -EIO;
+
+	for (opp = info->opps, idx = 0; idx < info->count; idx++, opp++) {
+		ret = dev_pm_opp_add(dev, opp->freq, opp->m_volt * 1000);
+		if (ret) {
+			dev_warn(dev, "failed to add opp %uHz %umV\n",
+				 opp->freq, opp->m_volt);
+			while (idx-- > 0)
+				dev_pm_opp_remove(dev, (--opp)->freq);
+			return ret;
+		}
+	}
+	return 0;
+}
+
 static int scpi_sensor_get_capability(u16 *sensors)
 {
 	struct sensor_capabilities cap_buf;
@@ -765,6 +825,9 @@ static struct scpi_ops scpi_ops = {
 	.dvfs_get_idx = scpi_dvfs_get_idx,
 	.dvfs_set_idx = scpi_dvfs_set_idx,
 	.dvfs_get_info = scpi_dvfs_get_info,
+	.device_domain_id = scpi_dev_domain_id,
+	.get_transition_latency = scpi_dvfs_get_transition_latency,
+	.add_opps_to_device = scpi_dvfs_add_opps_to_device,
 	.sensor_get_capability = scpi_sensor_get_capability,
 	.sensor_get_info = scpi_sensor_get_info,
 	.sensor_get_value = scpi_sensor_get_value,
diff --git a/include/linux/scpi_protocol.h b/include/linux/scpi_protocol.h
index dc5f989be226..327d65663dbf 100644
--- a/include/linux/scpi_protocol.h
+++ b/include/linux/scpi_protocol.h
@@ -67,6 +67,9 @@ struct scpi_ops {
 	int (*dvfs_get_idx)(u8);
 	int (*dvfs_set_idx)(u8, u8);
 	struct scpi_dvfs_info *(*dvfs_get_info)(u8);
+	int (*device_domain_id)(struct device *);
+	int (*get_transition_latency)(struct device *);
+	int (*add_opps_to_device)(struct device *);
 	int (*sensor_get_capability)(u16 *sensors);
 	int (*sensor_get_info)(u16 sensor_id, struct scpi_sensor_info *);
 	int (*sensor_get_value)(u16, u64 *);
-- 
cgit v1.2.3


From 4722974d90e06d0164ca1b73a6b34cec6bdb64ad Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 5 Jun 2017 14:30:49 +0100
Subject: rxrpc: Implement service upgrade

Implement AuriStor's service upgrade facility.  There are three problems
that this is meant to deal with:

 (1) Various of the standard AFS RPC calls have IPv4 addresses in their
     requests and/or replies - but there's no room for including IPv6
     addresses.

 (2) Definition of IPv6-specific RPC operations in the standard operation
     sets has not yet been achieved.

 (3) One could envision the creation a new service on the same port that as
     the original service.  The new service could implement improved
     operations - and the client could try this first, falling back to the
     original service if it's not there.

     Unfortunately, certain servers ignore packets addressed to a service
     they don't implement and don't respond in any way - not even with an
     ABORT.  This means that the client must then wait for the call timeout
     to occur.

What service upgrade does is to see if the connection is marked as being
'upgradeable' and if so, change the service ID in the server and thus the
request and reply formats.  Note that the upgrade isn't mandatory - a
server that supports only the original call set will ignore the upgrade
request.

In the protocol, the procedure is then as follows:

 (1) To request an upgrade, the first DATA packet in a new connection must
     have the userStatus set to 1 (this is normally 0).  The userStatus
     value is normally ignored by the server.

 (2) If the server doesn't support upgrading, the reply packets will
     contain the same service ID as for the first request packet.

 (3) If the server does support upgrading, all future reply packets on that
     connection will contain the new service ID and the new service ID will
     be applied to *all* further calls on that connection as well.

 (4) The RPC op used to probe the upgrade must take the same request data
     as the shadow call in the upgrade set (but may return a different
     reply).  GetCapability RPC ops were added to all standard sets for
     just this purpose.  Ops where the request formats differ cannot be
     used for probing.

 (5) The client must wait for completion of the probe before sending any
     further RPC ops to the same destination.  It should then use the
     service ID that recvmsg() reported back in all future calls.

 (6) The shadow service must have call definitions for all the operation
     IDs defined by the original service.


To support service upgrading, a server should:

 (1) Call bind() twice on its AF_RXRPC socket before calling listen().
     Each bind() should supply a different service ID, but the transport
     addresses must be the same.  This allows the server to receive
     requests with either service ID.

 (2) Enable automatic upgrading by calling setsockopt(), specifying
     RXRPC_UPGRADEABLE_SERVICE and passing in a two-member array of
     unsigned shorts as the argument:

	unsigned short optval[2];

     This specifies a pair of service IDs.  They must be different and must
     match the service IDs bound to the socket.  Member 0 is the service ID
     to upgrade from and member 1 is the service ID to upgrade to.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 Documentation/networking/rxrpc.txt | 34 ++++++++++++++++++++++++++--------
 include/linux/rxrpc.h              |  1 +
 include/rxrpc/packet.h             |  2 ++
 net/rxrpc/af_rxrpc.c               | 23 +++++++++++++++++++++++
 net/rxrpc/ar-internal.h            | 10 ++++++++--
 net/rxrpc/call_accept.c            |  2 +-
 net/rxrpc/conn_service.c           | 11 ++++++++++-
 7 files changed, 71 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt
index b7115ec55e04..2a1662760450 100644
--- a/Documentation/networking/rxrpc.txt
+++ b/Documentation/networking/rxrpc.txt
@@ -433,6 +433,13 @@ AF_RXRPC sockets support a few socket options at the SOL_RXRPC level:
 	 Encrypted checksum plus entire packet padded and encrypted, including
 	 actual packet length.
 
+ (*) RXRPC_UPGRADEABLE_SERVICE
+
+     This is used to indicate that a service socket with two bindings may
+     upgrade one bound service to the other if requested by the client.  optval
+     must point to an array of two unsigned short ints.  The first is the
+     service ID to upgrade from and the second the service ID to upgrade to.
+
 
 ========
 SECURITY
@@ -588,7 +595,7 @@ A server would be set up to accept operations in the following manner:
      The keyring can be manipulated after it has been given to the socket. This
      permits the server to add more keys, replace keys, etc. whilst it is live.
 
- (2) A local address must then be bound:
+ (3) A local address must then be bound:
 
 	struct sockaddr_rxrpc srx = {
 		.srx_family	= AF_RXRPC,
@@ -604,11 +611,22 @@ A server would be set up to accept operations in the following manner:
      parameters are the same.  The limit is currently two.  To do this, bind()
      should be called twice.
 
- (3) The server is then set to listen out for incoming calls:
+ (4) If service upgrading is required, first two service IDs must have been
+     bound and then the following option must be set:
+
+	unsigned short service_ids[2] = { from_ID, to_ID };
+	setsockopt(server, SOL_RXRPC, RXRPC_UPGRADEABLE_SERVICE,
+		   service_ids, sizeof(service_ids));
+
+     This will automatically upgrade connections on service from_ID to service
+     to_ID if they request it.  This will be reflected in msg_name obtained
+     through recvmsg() when the request data is delivered to userspace.
+
+ (5) The server is then set to listen out for incoming calls:
 
 	listen(server, 100);
 
- (4) The kernel notifies the server of pending incoming connections by sending
+ (6) The kernel notifies the server of pending incoming connections by sending
      it a message for each.  This is received with recvmsg() on the server
      socket.  It has no data, and has a single dataless control message
      attached:
@@ -620,13 +638,13 @@ A server would be set up to accept operations in the following manner:
      the time it is accepted - in which case the first call still on the queue
      will be accepted.
 
- (5) The server then accepts the new call by issuing a sendmsg() with two
+ (7) The server then accepts the new call by issuing a sendmsg() with two
      pieces of control data and no actual data:
 
 	RXRPC_ACCEPT		- indicate connection acceptance
 	RXRPC_USER_CALL_ID	- specify user ID for this call
 
- (6) The first request data packet will then be posted to the server socket for
+ (8) The first request data packet will then be posted to the server socket for
      recvmsg() to pick up.  At that point, the RxRPC address for the call can
      be read from the address fields in the msghdr struct.
 
@@ -638,7 +656,7 @@ A server would be set up to accept operations in the following manner:
 
 	RXRPC_USER_CALL_ID	- specifies the user ID for this call
 
- (8) The reply data should then be posted to the server socket using a series
+ (9) The reply data should then be posted to the server socket using a series
      of sendmsg() calls, each with the following control messages attached:
 
 	RXRPC_USER_CALL_ID	- specifies the user ID for this call
@@ -646,7 +664,7 @@ A server would be set up to accept operations in the following manner:
      MSG_MORE should be set in msghdr::msg_flags on all but the last message
      for a particular call.
 
- (9) The final ACK from the client will be posted for retrieval by recvmsg()
+(10) The final ACK from the client will be posted for retrieval by recvmsg()
      when it is received.  It will take the form of a dataless message with two
      control messages attached:
 
@@ -656,7 +674,7 @@ A server would be set up to accept operations in the following manner:
      MSG_EOR will be flagged to indicate that this is the final message for
      this call.
 
-(10) Up to the point the final packet of reply data is sent, the call can be
+(11) Up to the point the final packet of reply data is sent, the call can be
      aborted by calling sendmsg() with a dataless message with the following
      control messages attached:
 
diff --git a/include/linux/rxrpc.h b/include/linux/rxrpc.h
index c68307bc306f..634116561a6a 100644
--- a/include/linux/rxrpc.h
+++ b/include/linux/rxrpc.h
@@ -37,6 +37,7 @@ struct sockaddr_rxrpc {
 #define RXRPC_SECURITY_KEYRING		2	/* [srvr] set ring of server security keys */
 #define RXRPC_EXCLUSIVE_CONNECTION	3	/* Deprecated; use RXRPC_EXCLUSIVE_CALL instead */
 #define RXRPC_MIN_SECURITY_LEVEL	4	/* minimum security level */
+#define RXRPC_UPGRADEABLE_SERVICE	5	/* Upgrade service[0] -> service[1] */
 
 /*
  * RxRPC control messages
diff --git a/include/rxrpc/packet.h b/include/rxrpc/packet.h
index 703a64b4681a..a2dcfb850b9f 100644
--- a/include/rxrpc/packet.h
+++ b/include/rxrpc/packet.h
@@ -58,6 +58,8 @@ struct rxrpc_wire_header {
 #define RXRPC_SLOW_START_OK	0x20		/* [ACK] slow start supported */
 
 	uint8_t		userStatus;	/* app-layer defined status */
+#define RXRPC_USERSTATUS_SERVICE_UPGRADE 0x01	/* AuriStor service upgrade request */
+	
 	uint8_t		securityIndex;	/* security protocol ID */
 	union {
 		__be16	_rsvd;		/* reserved */
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 3b982bca7d22..0c4dc4a7832c 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -490,6 +490,7 @@ static int rxrpc_setsockopt(struct socket *sock, int level, int optname,
 {
 	struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
 	unsigned int min_sec_level;
+	u16 service_upgrade[2];
 	int ret;
 
 	_enter(",%d,%d,,%d", level, optname, optlen);
@@ -546,6 +547,28 @@ static int rxrpc_setsockopt(struct socket *sock, int level, int optname,
 			rx->min_sec_level = min_sec_level;
 			goto success;
 
+		case RXRPC_UPGRADEABLE_SERVICE:
+			ret = -EINVAL;
+			if (optlen != sizeof(service_upgrade) ||
+			    rx->service_upgrade.from != 0)
+				goto error;
+			ret = -EISCONN;
+			if (rx->sk.sk_state != RXRPC_SERVER_BOUND2)
+				goto error;
+			ret = -EFAULT;
+			if (copy_from_user(service_upgrade, optval,
+					   sizeof(service_upgrade)) != 0)
+				goto error;
+			ret = -EINVAL;
+			if ((service_upgrade[0] != rx->srx.srx_service ||
+			     service_upgrade[1] != rx->second_service) &&
+			    (service_upgrade[0] != rx->second_service ||
+			     service_upgrade[1] != rx->srx.srx_service))
+				goto error;
+			rx->service_upgrade.from = service_upgrade[0];
+			rx->service_upgrade.to = service_upgrade[1];
+			goto success;
+
 		default:
 			break;
 		}
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 781fbc253b5a..c1ebd886a53f 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -144,8 +144,13 @@ struct rxrpc_sock {
 #define RXRPC_SECURITY_MAX	RXRPC_SECURITY_ENCRYPT
 	bool			exclusive;	/* Exclusive connection for a client socket */
 	u16			second_service;	/* Additional service bound to the endpoint */
+	struct {
+		/* Service upgrade information */
+		u16		from;		/* Service ID to upgrade (if not 0) */
+		u16		to;		/* service ID to upgrade to */
+	} service_upgrade;
 	sa_family_t		family;		/* Protocol family created with */
-	struct sockaddr_rxrpc	srx;		/* local address */
+	struct sockaddr_rxrpc	srx;		/* Primary Service/local addresses */
 	struct sockaddr_rxrpc	connect_srx;	/* Default client address from connect() */
 };
 
@@ -861,7 +866,8 @@ static inline void rxrpc_put_connection(struct rxrpc_connection *conn)
 struct rxrpc_connection *rxrpc_find_service_conn_rcu(struct rxrpc_peer *,
 						     struct sk_buff *);
 struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *, gfp_t);
-void rxrpc_new_incoming_connection(struct rxrpc_connection *, struct sk_buff *);
+void rxrpc_new_incoming_connection(struct rxrpc_sock *,
+				   struct rxrpc_connection *, struct sk_buff *);
 void rxrpc_unpublish_service_conn(struct rxrpc_connection *);
 
 /*
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index 544df53ccf79..0d4d84e8c074 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -296,7 +296,7 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx,
 		conn->params.local = local;
 		conn->params.peer = peer;
 		rxrpc_see_connection(conn);
-		rxrpc_new_incoming_connection(conn, skb);
+		rxrpc_new_incoming_connection(rx, conn, skb);
 	} else {
 		rxrpc_get_connection(conn);
 	}
diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c
index c7f8682a55b2..e60fcd2a4a02 100644
--- a/net/rxrpc/conn_service.c
+++ b/net/rxrpc/conn_service.c
@@ -150,7 +150,8 @@ struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *rxn
  * Set up an incoming connection.  This is called in BH context with the RCU
  * read lock held.
  */
-void rxrpc_new_incoming_connection(struct rxrpc_connection *conn,
+void rxrpc_new_incoming_connection(struct rxrpc_sock *rx,
+				   struct rxrpc_connection *conn,
 				   struct sk_buff *skb)
 {
 	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
@@ -168,6 +169,14 @@ void rxrpc_new_incoming_connection(struct rxrpc_connection *conn,
 	else
 		conn->state	= RXRPC_CONN_SERVICE;
 
+	/* See if we should upgrade the service.  This can only happen on the
+	 * first packet on a new connection.  Once done, it applies to all
+	 * subsequent calls on that connection.
+	 */
+	if (sp->hdr.userStatus == RXRPC_USERSTATUS_SERVICE_UPGRADE &&
+	    conn->service_id == rx->service_upgrade.from)
+		conn->service_id = rx->service_upgrade.to;
+
 	/* Make the connection a target for incoming packets. */
 	rxrpc_publish_service_conn(conn->params.peer, conn);
 
-- 
cgit v1.2.3


From 4e255721d1575a766ada06dc7eb03acdcd34eaaf Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 5 Jun 2017 14:30:49 +0100
Subject: rxrpc: Add service upgrade support for client connections

Make it possible for a client to use AuriStor's service upgrade facility.

The client does this by adding an RXRPC_UPGRADE_SERVICE control message to
the first sendmsg() of a call.  This takes no parameters.

When recvmsg() starts returning data from the call, the service ID field in
the returned msg_name will reflect the result of the upgrade attempt.  If
the upgrade was ignored, srx_service will match what was set in the
sendmsg(); if the upgrade happened the srx_service will be altered to
indicate the service the server upgraded to.

Note that:

 (1) The choice of upgrade service is up to the server

 (2) Further client calls to the same server that would share a connection
     are blocked if an upgrade probe is in progress.

 (3) This should only be used to probe the service.  Clients should then
     use the returned service ID in all subsequent communications with that
     server (and not set the upgrade).  Note that the kernel will not
     retain this information should the connection expire from its cache.

 (4) If a server that supports upgrading is replaced by one that doesn't,
     whilst a connection is live, and if the replacement is running, say,
     OpenAFS 1.6.4 or older or an older IBM AFS, then the replacement
     server will not respond to packets sent to the upgraded connection.

     At this point, calls will time out and the server must be reprobed.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 Documentation/networking/rxrpc.txt | 30 ++++++++++++++++++++++++++
 include/linux/rxrpc.h              |  1 +
 include/trace/events/rxrpc.h       |  1 +
 net/rxrpc/ar-internal.h            |  3 +++
 net/rxrpc/conn_client.c            | 43 +++++++++++++++++++++++++++++++-------
 net/rxrpc/input.c                  | 17 +++++++++++++++
 net/rxrpc/output.c                 |  4 ++++
 net/rxrpc/sendmsg.c                | 19 +++++++++++++----
 8 files changed, 106 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt
index 2a1662760450..18078e630a63 100644
--- a/Documentation/networking/rxrpc.txt
+++ b/Documentation/networking/rxrpc.txt
@@ -325,6 +325,8 @@ calls, to invoke certain actions and to report certain conditions.  These are:
 	RXRPC_LOCAL_ERROR	-rt error num	Local error encountered
 	RXRPC_NEW_CALL		-r- n/a		New call received
 	RXRPC_ACCEPT		s-- n/a		Accept new call
+	RXRPC_EXCLUSIVE_CALL	s-- n/a		Make an exclusive client call
+	RXRPC_UPGRADE_SERVICE	s-- n/a		Client call can be upgraded
 
 	(SRT = usable in Sendmsg / delivered by Recvmsg / Terminal message)
 
@@ -387,6 +389,23 @@ calls, to invoke certain actions and to report certain conditions.  These are:
      return error ENODATA.  If the user ID is already in use by another call,
      then error EBADSLT will be returned.
 
+ (*) RXRPC_EXCLUSIVE_CALL
+
+     This is used to indicate that a client call should be made on a one-off
+     connection.  The connection is discarded once the call has terminated.
+
+ (*) RXRPC_UPGRADE_SERVICE
+
+     This is used to make a client call to probe if the specified service ID
+     may be upgraded by the server.  The caller must check msg_name returned to
+     recvmsg() for the service ID actually in use.  The operation probed must
+     be one that takes the same arguments in both services.
+
+     Once this has been used to establish the upgrade capability (or lack
+     thereof) of the server, the service ID returned should be used for all
+     future communication to that server and RXRPC_UPGRADE_SERVICE should no
+     longer be set.
+
 
 ==============
 SOCKET OPTIONS
@@ -566,6 +585,17 @@ A client would issue an operation by:
      buffer instead, and MSG_EOR will be flagged to indicate the end of that
      call.
 
+A client may ask for a service ID it knows and ask that this be upgraded to a
+better service if one is available by supplying RXRPC_UPGRADE_SERVICE on the
+first sendmsg() of a call.  The client should then check srx_service in the
+msg_name filled in by recvmsg() when collecting the result.  srx_service will
+hold the same value as given to sendmsg() if the upgrade request was ignored by
+the service - otherwise it will be altered to indicate the service ID the
+server upgraded to.  Note that the upgraded service ID is chosen by the server.
+The caller has to wait until it sees the service ID in the reply before sending
+any more calls (further calls to the same destination will be blocked until the
+probe is concluded).
+
 
 ====================
 EXAMPLE SERVER USAGE
diff --git a/include/linux/rxrpc.h b/include/linux/rxrpc.h
index 634116561a6a..707910c6c6c5 100644
--- a/include/linux/rxrpc.h
+++ b/include/linux/rxrpc.h
@@ -54,6 +54,7 @@ struct sockaddr_rxrpc {
 #define RXRPC_NEW_CALL		8	/* -r: [Service] new incoming call notification */
 #define RXRPC_ACCEPT		9	/* s-: [Service] accept request */
 #define RXRPC_EXCLUSIVE_CALL	10	/* s-: Call should be on exclusive connection */
+#define RXRPC_UPGRADE_SERVICE	11	/* s-: Request service upgrade for client call */
 
 /*
  * RxRPC security levels
diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
index 29a3d53a4015..ebe96796027a 100644
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -233,6 +233,7 @@ enum rxrpc_congest_change {
 	EM(RXRPC_CONN_CLIENT_INACTIVE,		"Inac") \
 	EM(RXRPC_CONN_CLIENT_WAITING,		"Wait") \
 	EM(RXRPC_CONN_CLIENT_ACTIVE,		"Actv") \
+	EM(RXRPC_CONN_CLIENT_UPGRADE,		"Upgd") \
 	EM(RXRPC_CONN_CLIENT_CULLED,		"Cull") \
 	E_(RXRPC_CONN_CLIENT_IDLE,		"Idle") \
 
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index c1ebd886a53f..e9b536cb0acf 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -320,6 +320,7 @@ struct rxrpc_conn_parameters {
 	struct rxrpc_peer	*peer;		/* Remote endpoint */
 	struct key		*key;		/* Security details */
 	bool			exclusive;	/* T if conn is exclusive */
+	bool			upgrade;	/* T if service ID can be upgraded */
 	u16			service_id;	/* Service ID for this connection */
 	u32			security_level;	/* Security level selected */
 };
@@ -334,6 +335,7 @@ enum rxrpc_conn_flag {
 	RXRPC_CONN_EXPOSED,		/* Conn has extra ref for exposure */
 	RXRPC_CONN_DONT_REUSE,		/* Don't reuse this connection */
 	RXRPC_CONN_COUNTED,		/* Counted by rxrpc_nr_client_conns */
+	RXRPC_CONN_PROBING_FOR_UPGRADE,	/* Probing for service upgrade */
 };
 
 /*
@@ -350,6 +352,7 @@ enum rxrpc_conn_cache_state {
 	RXRPC_CONN_CLIENT_INACTIVE,	/* Conn is not yet listed */
 	RXRPC_CONN_CLIENT_WAITING,	/* Conn is on wait list, waiting for capacity */
 	RXRPC_CONN_CLIENT_ACTIVE,	/* Conn is on active list, doing calls */
+	RXRPC_CONN_CLIENT_UPGRADE,	/* Conn is on active list, probing for upgrade */
 	RXRPC_CONN_CLIENT_CULLED,	/* Conn is culled and delisted, doing calls */
 	RXRPC_CONN_CLIENT_IDLE,		/* Conn is on idle list, doing mostly nothing */
 	RXRPC_CONN__NR_CACHE_STATES
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index 3f358bf424ad..dd8bb919c15a 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -36,12 +36,15 @@
  *
  *	rxrpc_nr_active_client_conns is held incremented also.
  *
- *  (4) CULLED - The connection got summarily culled to try and free up
+ *  (4) UPGRADE - As for ACTIVE, but only one call may be in progress and is
+ *      being used to probe for service upgrade.
+ *
+ *  (5) CULLED - The connection got summarily culled to try and free up
  *      capacity.  Calls currently in progress on the connection are allowed to
  *      continue, but new calls will have to wait.  There can be no waiters in
  *      this state - the conn would have to go to the WAITING state instead.
  *
- *  (5) IDLE - The connection has no calls in progress upon it and must have
+ *  (6) IDLE - The connection has no calls in progress upon it and must have
  *      been exposed to the world (ie. the EXPOSED flag must be set).  When it
  *      expires, the EXPOSED flag is cleared and the connection transitions to
  *      the INACTIVE state.
@@ -184,6 +187,8 @@ rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp)
 	atomic_set(&conn->usage, 1);
 	if (cp->exclusive)
 		__set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags);
+	if (cp->upgrade)
+		__set_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags);
 
 	conn->params		= *cp;
 	conn->out_clientflag	= RXRPC_CLIENT_INITIATED;
@@ -300,7 +305,8 @@ static int rxrpc_get_client_conn(struct rxrpc_call *call,
 #define cmp(X) ((long)conn->params.X - (long)cp->X)
 			diff = (cmp(peer) ?:
 				cmp(key) ?:
-				cmp(security_level));
+				cmp(security_level) ?:
+				cmp(upgrade));
 #undef cmp
 			if (diff < 0) {
 				p = p->rb_left;
@@ -365,7 +371,8 @@ static int rxrpc_get_client_conn(struct rxrpc_call *call,
 #define cmp(X) ((long)conn->params.X - (long)candidate->params.X)
 		diff = (cmp(peer) ?:
 			cmp(key) ?:
-			cmp(security_level));
+			cmp(security_level) ?:
+			cmp(upgrade));
 #undef cmp
 		if (diff < 0) {
 			pp = &(*pp)->rb_left;
@@ -436,8 +443,13 @@ error:
 static void rxrpc_activate_conn(struct rxrpc_net *rxnet,
 				struct rxrpc_connection *conn)
 {
-	trace_rxrpc_client(conn, -1, rxrpc_client_to_active);
-	conn->cache_state = RXRPC_CONN_CLIENT_ACTIVE;
+	if (test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags)) {
+		trace_rxrpc_client(conn, -1, rxrpc_client_to_upgrade);
+		conn->cache_state = RXRPC_CONN_CLIENT_UPGRADE;
+	} else {
+		trace_rxrpc_client(conn, -1, rxrpc_client_to_active);
+		conn->cache_state = RXRPC_CONN_CLIENT_ACTIVE;
+	}
 	rxnet->nr_active_client_conns++;
 	list_move_tail(&conn->cache_link, &rxnet->active_client_conns);
 }
@@ -461,7 +473,8 @@ static void rxrpc_animate_client_conn(struct rxrpc_net *rxnet,
 
 	_enter("%d,%d", conn->debug_id, conn->cache_state);
 
-	if (conn->cache_state == RXRPC_CONN_CLIENT_ACTIVE)
+	if (conn->cache_state == RXRPC_CONN_CLIENT_ACTIVE ||
+	    conn->cache_state == RXRPC_CONN_CLIENT_UPGRADE)
 		goto out;
 
 	spin_lock(&rxnet->client_conn_cache_lock);
@@ -474,6 +487,7 @@ static void rxrpc_animate_client_conn(struct rxrpc_net *rxnet,
 
 	switch (conn->cache_state) {
 	case RXRPC_CONN_CLIENT_ACTIVE:
+	case RXRPC_CONN_CLIENT_UPGRADE:
 	case RXRPC_CONN_CLIENT_WAITING:
 		break;
 
@@ -577,6 +591,9 @@ static void rxrpc_activate_channels_locked(struct rxrpc_connection *conn)
 	case RXRPC_CONN_CLIENT_ACTIVE:
 		mask = RXRPC_ACTIVE_CHANS_MASK;
 		break;
+	case RXRPC_CONN_CLIENT_UPGRADE:
+		mask = 0x01;
+		break;
 	default:
 		return;
 	}
@@ -787,6 +804,15 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call)
 	spin_lock(&rxnet->client_conn_cache_lock);
 
 	switch (conn->cache_state) {
+	case RXRPC_CONN_CLIENT_UPGRADE:
+		/* Deal with termination of a service upgrade probe. */
+		if (test_bit(RXRPC_CONN_EXPOSED, &conn->flags)) {
+			clear_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags);
+			trace_rxrpc_client(conn, channel, rxrpc_client_to_active);
+			conn->cache_state = RXRPC_CONN_CLIENT_ACTIVE;
+			rxrpc_activate_channels_locked(conn);
+		}
+		/* fall through */
 	case RXRPC_CONN_CLIENT_ACTIVE:
 		if (list_empty(&conn->waiting_calls)) {
 			rxrpc_deactivate_one_channel(conn, channel);
@@ -941,7 +967,8 @@ static void rxrpc_cull_active_client_conns(struct rxrpc_net *rxnet)
 		ASSERT(!list_empty(&rxnet->active_client_conns));
 		conn = list_entry(rxnet->active_client_conns.next,
 				  struct rxrpc_connection, cache_link);
-		ASSERTCMP(conn->cache_state, ==, RXRPC_CONN_CLIENT_ACTIVE);
+		ASSERTIFCMP(conn->cache_state != RXRPC_CONN_CLIENT_ACTIVE,
+			    conn->cache_state, ==, RXRPC_CONN_CLIENT_UPGRADE);
 
 		if (list_empty(&conn->waiting_calls)) {
 			trace_rxrpc_client(conn, -1, rxrpc_client_to_culled);
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 45dba732a3b4..e56e23ed2229 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -1142,6 +1142,13 @@ void rxrpc_data_ready(struct sock *udp_sk)
 		if (sp->hdr.securityIndex != conn->security_ix)
 			goto wrong_security;
 
+		if (sp->hdr.serviceId != conn->service_id) {
+			if (!test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags) ||
+			    conn->service_id != conn->params.service_id)
+				goto reupgrade;
+			conn->service_id = sp->hdr.serviceId;
+		}
+		
 		if (sp->hdr.callNumber == 0) {
 			/* Connection-level packet */
 			_debug("CONN %p {%d}", conn, conn->debug_id);
@@ -1194,6 +1201,9 @@ void rxrpc_data_ready(struct sock *udp_sk)
 				rxrpc_input_implicit_end_call(conn, call);
 			call = NULL;
 		}
+
+		if (call && sp->hdr.serviceId != call->service_id)
+			call->service_id = sp->hdr.serviceId;
 	} else {
 		skew = 0;
 		call = NULL;
@@ -1237,11 +1247,18 @@ wrong_security:
 	skb->priority = RXKADINCONSISTENCY;
 	goto post_abort;
 
+reupgrade:
+	rcu_read_unlock();
+	trace_rxrpc_abort("UPG", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
+			  RX_PROTOCOL_ERROR, EBADMSG);
+	goto protocol_error;
+
 bad_message_unlock:
 	rcu_read_unlock();
 bad_message:
 	trace_rxrpc_abort("BAD", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
 			  RX_PROTOCOL_ERROR, EBADMSG);
+protocol_error:
 	skb->priority = RX_PROTOCOL_ERROR;
 post_abort:
 	skb->mark = RXRPC_SKB_MARK_LOCAL_ABORT;
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index 5dab1ff3a6c2..5bd2d0fa4a03 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -292,6 +292,10 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
 	whdr._rsvd	= htons(sp->hdr._rsvd);
 	whdr.serviceId	= htons(call->service_id);
 
+	if (test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags) &&
+	    sp->hdr.seq == 1)
+		whdr.userStatus	= RXRPC_USERSTATUS_SERVICE_UPGRADE;
+
 	iov[0].iov_base = &whdr;
 	iov[0].iov_len = sizeof(whdr);
 	iov[1].iov_base = skb->head;
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 96ffa5d5733b..5a4801e7f560 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -366,7 +366,8 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg,
 			      unsigned long *user_call_ID,
 			      enum rxrpc_command *command,
 			      u32 *abort_code,
-			      bool *_exclusive)
+			      bool *_exclusive,
+			      bool *_upgrade)
 {
 	struct cmsghdr *cmsg;
 	bool got_user_ID = false;
@@ -429,6 +430,13 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg,
 			if (len != 0)
 				return -EINVAL;
 			break;
+
+		case RXRPC_UPGRADE_SERVICE:
+			*_upgrade = true;
+			if (len != 0)
+				return -EINVAL;
+			break;
+
 		default:
 			return -EINVAL;
 		}
@@ -447,7 +455,8 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg,
  */
 static struct rxrpc_call *
 rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
-				  unsigned long user_call_ID, bool exclusive)
+				  unsigned long user_call_ID, bool exclusive,
+				  bool upgrade)
 	__releases(&rx->sk.sk_lock.slock)
 {
 	struct rxrpc_conn_parameters cp;
@@ -472,6 +481,7 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
 	cp.key			= rx->key;
 	cp.security_level	= rx->min_sec_level;
 	cp.exclusive		= rx->exclusive | exclusive;
+	cp.upgrade		= upgrade;
 	cp.service_id		= srx->srx_service;
 	call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, GFP_KERNEL);
 	/* The socket is now unlocked */
@@ -493,13 +503,14 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
 	struct rxrpc_call *call;
 	unsigned long user_call_ID = 0;
 	bool exclusive = false;
+	bool upgrade = true;
 	u32 abort_code = 0;
 	int ret;
 
 	_enter("");
 
 	ret = rxrpc_sendmsg_cmsg(msg, &user_call_ID, &cmd, &abort_code,
-				 &exclusive);
+				 &exclusive, &upgrade);
 	if (ret < 0)
 		goto error_release_sock;
 
@@ -521,7 +532,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
 		if (cmd != RXRPC_CMD_SEND_DATA)
 			goto error_release_sock;
 		call = rxrpc_new_client_call_for_sendmsg(rx, msg, user_call_ID,
-							 exclusive);
+							 exclusive, upgrade);
 		/* The socket is now unlocked... */
 		if (IS_ERR(call))
 			return PTR_ERR(call);
-- 
cgit v1.2.3


From 41bb26f8db3ad33b083e57eb9fc5828796110e77 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 28 May 2017 08:56:46 +0300
Subject: uuid,afs: move struct uuid_v1 back into afs

This essentially is a partial revert of commit ff548773
("afs: Move UUID struct to linux/uuid.h") and moves struct uuid_v1 back into
fs/afs as struct afs_uuid.  It however keeps it as big endian structure
so that we can use the normal uuid generation helpers when casting to/from
struct afs_uuid.

The V1 uuid intrepretation in struct form isn't really useful to the
rest of the kernel, and not really compatible to it either, so move it
back to AFS instead of polluting the global uuid.h.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: David Howells <dhowells@redhat.com>
---
 fs/afs/cmservice.c   | 16 ++++++++--------
 fs/afs/internal.h    | 11 ++++++++++-
 fs/afs/main.c        |  2 +-
 include/linux/uuid.h | 24 ------------------------
 4 files changed, 19 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 3062cceb5c2a..782d4d05a53b 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -350,7 +350,7 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
 {
 	struct sockaddr_rxrpc srx;
 	struct afs_server *server;
-	struct uuid_v1 *r;
+	struct afs_uuid *r;
 	unsigned loop;
 	__be32 *b;
 	int ret;
@@ -380,7 +380,7 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
 		}
 
 		_debug("unmarshall UUID");
-		call->request = kmalloc(sizeof(struct uuid_v1), GFP_KERNEL);
+		call->request = kmalloc(sizeof(struct afs_uuid), GFP_KERNEL);
 		if (!call->request)
 			return -ENOMEM;
 
@@ -453,7 +453,7 @@ static int afs_deliver_cb_probe(struct afs_call *call)
 static void SRXAFSCB_ProbeUuid(struct work_struct *work)
 {
 	struct afs_call *call = container_of(work, struct afs_call, work);
-	struct uuid_v1 *r = call->request;
+	struct afs_uuid *r = call->request;
 
 	struct {
 		__be32	match;
@@ -476,7 +476,7 @@ static void SRXAFSCB_ProbeUuid(struct work_struct *work)
  */
 static int afs_deliver_cb_probe_uuid(struct afs_call *call)
 {
-	struct uuid_v1 *r;
+	struct afs_uuid *r;
 	unsigned loop;
 	__be32 *b;
 	int ret;
@@ -502,15 +502,15 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call)
 		}
 
 		_debug("unmarshall UUID");
-		call->request = kmalloc(sizeof(struct uuid_v1), GFP_KERNEL);
+		call->request = kmalloc(sizeof(struct afs_uuid), GFP_KERNEL);
 		if (!call->request)
 			return -ENOMEM;
 
 		b = call->buffer;
 		r = call->request;
-		r->time_low			= b[0];
-		r->time_mid			= htons(ntohl(b[1]));
-		r->time_hi_and_version		= htons(ntohl(b[2]));
+		r->time_low			= ntohl(b[0]);
+		r->time_mid			= ntohl(b[1]);
+		r->time_hi_and_version		= ntohl(b[2]);
 		r->clock_seq_hi_and_reserved 	= ntohl(b[3]);
 		r->clock_seq_low		= ntohl(b[4]);
 
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 393672997cc2..4e2556606623 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -410,6 +410,15 @@ struct afs_interface {
 	unsigned	mtu;		/* MTU of interface */
 };
 
+struct afs_uuid {
+	__be32		time_low;			/* low part of timestamp */
+	__be16		time_mid;			/* mid part of timestamp */
+	__be16		time_hi_and_version;		/* high part of timestamp and version  */
+	__u8		clock_seq_hi_and_reserved;	/* clock seq hi and variant */
+	__u8		clock_seq_low;			/* clock seq low */
+	__u8		node[6];			/* spatially unique node ID (MAC addr) */
+};
+
 /*****************************************************************************/
 /*
  * cache.c
@@ -544,7 +553,7 @@ extern int afs_drop_inode(struct inode *);
  * main.c
  */
 extern struct workqueue_struct *afs_wq;
-extern struct uuid_v1 afs_uuid;
+extern struct afs_uuid afs_uuid;
 
 /*
  * misc.c
diff --git a/fs/afs/main.c b/fs/afs/main.c
index 51d7d17bca57..9944770849da 100644
--- a/fs/afs/main.c
+++ b/fs/afs/main.c
@@ -31,7 +31,7 @@ static char *rootcell;
 module_param(rootcell, charp, 0);
 MODULE_PARM_DESC(rootcell, "root AFS cell name and VL server IP addr list");
 
-struct uuid_v1 afs_uuid;
+struct afs_uuid afs_uuid;
 struct workqueue_struct *afs_wq;
 
 /*
diff --git a/include/linux/uuid.h b/include/linux/uuid.h
index 4dff73a89758..2d095fc60204 100644
--- a/include/linux/uuid.h
+++ b/include/linux/uuid.h
@@ -18,30 +18,6 @@
 
 #include <uapi/linux/uuid.h>
 
-/*
- * V1 (time-based) UUID definition [RFC 4122].
- * - the timestamp is a 60-bit value, split 32/16/12, and goes in 100ns
- *   increments since midnight 15th October 1582
- *   - add AFS_UUID_TO_UNIX_TIME to convert unix time in 100ns units to UUID
- *     time
- * - the clock sequence is a 14-bit counter to avoid duplicate times
- */
-struct uuid_v1 {
-	__be32		time_low;			/* low part of timestamp */
-	__be16		time_mid;			/* mid part of timestamp */
-	__be16		time_hi_and_version;		/* high part of timestamp and version  */
-#define UUID_TO_UNIX_TIME	0x01b21dd213814000ULL
-#define UUID_TIMEHI_MASK	0x0fff
-#define UUID_VERSION_TIME	0x1000	/* time-based UUID */
-#define UUID_VERSION_NAME	0x3000	/* name-based UUID */
-#define UUID_VERSION_RANDOM	0x4000	/* (pseudo-)random generated UUID */
-	u8		clock_seq_hi_and_reserved;	/* clock seq hi and variant */
-#define UUID_CLOCKHI_MASK	0x3f
-#define UUID_VARIANT_STD	0x80
-	u8		clock_seq_low;			/* clock seq low */
-	u8		node[6];			/* spatially unique node ID (MAC addr) */
-};
-
 /*
  * The length of a UUID string ("aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee")
  * not including trailing NUL.
-- 
cgit v1.2.3


From 60927bc314363f91616c1f4577541c2a2e27aba3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 17 May 2017 09:56:45 +0200
Subject: uuid: remove uuid_be defintions from the uapi header

We don't use uuid_be and the UUID_BE constants in any uapi headers, so make
them private to the kernel.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 include/linux/uuid.h      | 15 +++++++++++++++
 include/uapi/linux/uuid.h | 16 ----------------
 2 files changed, 15 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/uuid.h b/include/linux/uuid.h
index 2d095fc60204..30fb13018e29 100644
--- a/include/linux/uuid.h
+++ b/include/linux/uuid.h
@@ -18,6 +18,21 @@
 
 #include <uapi/linux/uuid.h>
 
+typedef struct {
+	__u8 b[16];
+} uuid_be;
+
+#define UUID_BE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7)		\
+((uuid_be)								\
+{{ ((a) >> 24) & 0xff, ((a) >> 16) & 0xff, ((a) >> 8) & 0xff, (a) & 0xff, \
+   ((b) >> 8) & 0xff, (b) & 0xff,					\
+   ((c) >> 8) & 0xff, (c) & 0xff,					\
+   (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) }})
+
+#define NULL_UUID_BE							\
+	UUID_BE(0x00000000, 0x0000, 0x0000, 0x00, 0x00, 0x00, 0x00,	\
+		0x00, 0x00, 0x00, 0x00)
+
 /*
  * The length of a UUID string ("aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee")
  * not including trailing NUL.
diff --git a/include/uapi/linux/uuid.h b/include/uapi/linux/uuid.h
index 3738e5fb6a4d..0099756c4bac 100644
--- a/include/uapi/linux/uuid.h
+++ b/include/uapi/linux/uuid.h
@@ -24,10 +24,6 @@ typedef struct {
 	__u8 b[16];
 } uuid_le;
 
-typedef struct {
-	__u8 b[16];
-} uuid_be;
-
 #define UUID_LE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7)		\
 ((uuid_le)								\
 {{ (a) & 0xff, ((a) >> 8) & 0xff, ((a) >> 16) & 0xff, ((a) >> 24) & 0xff, \
@@ -35,20 +31,8 @@ typedef struct {
    (c) & 0xff, ((c) >> 8) & 0xff,					\
    (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) }})
 
-#define UUID_BE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7)		\
-((uuid_be)								\
-{{ ((a) >> 24) & 0xff, ((a) >> 16) & 0xff, ((a) >> 8) & 0xff, (a) & 0xff, \
-   ((b) >> 8) & 0xff, (b) & 0xff,					\
-   ((c) >> 8) & 0xff, (c) & 0xff,					\
-   (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) }})
-
 #define NULL_UUID_LE							\
 	UUID_LE(0x00000000, 0x0000, 0x0000, 0x00, 0x00, 0x00, 0x00,	\
 		0x00, 0x00, 0x00, 0x00)
 
-#define NULL_UUID_BE							\
-	UUID_BE(0x00000000, 0x0000, 0x0000, 0x00, 0x00, 0x00, 0x00,	\
-		0x00, 0x00, 0x00, 0x00)
-
-
 #endif /* _UAPI_LINUX_UUID_H_ */
-- 
cgit v1.2.3


From f9727a17db9bab71ddae91f74f11a8a2f9a0ece6 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 17 May 2017 10:02:48 +0200
Subject: uuid: rename uuid types

Our "little endian" UUID really is a Wintel GUID, so rename it and its
helpers such (guid_t).  The big endian UUID is the only true one, so
give it the name uuid_t.  The uuid_le and uuid_be names are retained for
now, but will hopefully go away soon.  The exception to that are the _cmp
helpers that will be replaced by better primitives ASAP and thus don't
get the new names.

Also the _to_bin helpers are named to match the better named uuid_parse
routine in userspace.

Also remove the existing typedef in XFS that's now been superceeded by
the generic type name.

Signed-off-by: Christoph Hellwig <hch@lst.de>
[andy: also update the UUID_LE/UUID_BE macros including fallout]
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_linux.h        |  2 --
 include/linux/uuid.h      | 55 +++++++++++++++++++++++++++--------------------
 include/uapi/linux/uuid.h | 12 +++++++----
 lib/test_uuid.c           | 32 +++++++++++++--------------
 lib/uuid.c                | 30 +++++++++++++-------------
 lib/vsprintf.c            |  4 ++--
 6 files changed, 73 insertions(+), 62 deletions(-)

(limited to 'include/linux')

diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index 89ee5ec66837..2c33d915e550 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -39,8 +39,6 @@ typedef __s64			xfs_daddr_t;	/* <disk address> type */
 typedef __u32			xfs_dev_t;
 typedef __u32			xfs_nlink_t;
 
-typedef uuid_be			uuid_t;
-
 #include "xfs_types.h"
 
 #include "kmem.h"
diff --git a/include/linux/uuid.h b/include/linux/uuid.h
index 30fb13018e29..c2adb8046095 100644
--- a/include/linux/uuid.h
+++ b/include/linux/uuid.h
@@ -20,46 +20,55 @@
 
 typedef struct {
 	__u8 b[16];
-} uuid_be;
+} uuid_t;
 
-#define UUID_BE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7)		\
-((uuid_be)								\
+#define UUID_INIT(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7)			\
+((uuid_t)								\
 {{ ((a) >> 24) & 0xff, ((a) >> 16) & 0xff, ((a) >> 8) & 0xff, (a) & 0xff, \
    ((b) >> 8) & 0xff, (b) & 0xff,					\
    ((c) >> 8) & 0xff, (c) & 0xff,					\
    (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) }})
 
-#define NULL_UUID_BE							\
-	UUID_BE(0x00000000, 0x0000, 0x0000, 0x00, 0x00, 0x00, 0x00,	\
-		0x00, 0x00, 0x00, 0x00)
-
 /*
  * The length of a UUID string ("aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee")
  * not including trailing NUL.
  */
 #define	UUID_STRING_LEN		36
 
-static inline int uuid_le_cmp(const uuid_le u1, const uuid_le u2)
-{
-	return memcmp(&u1, &u2, sizeof(uuid_le));
-}
-
-static inline int uuid_be_cmp(const uuid_be u1, const uuid_be u2)
-{
-	return memcmp(&u1, &u2, sizeof(uuid_be));
-}
-
 void generate_random_uuid(unsigned char uuid[16]);
 
-extern void uuid_le_gen(uuid_le *u);
-extern void uuid_be_gen(uuid_be *u);
+extern void guid_gen(guid_t *u);
+extern void uuid_gen(uuid_t *u);
 
 bool __must_check uuid_is_valid(const char *uuid);
 
-extern const u8 uuid_le_index[16];
-extern const u8 uuid_be_index[16];
+extern const u8 guid_index[16];
+extern const u8 uuid_index[16];
+
+int guid_parse(const char *uuid, guid_t *u);
+int uuid_parse(const char *uuid, uuid_t *u);
 
-int uuid_le_to_bin(const char *uuid, uuid_le *u);
-int uuid_be_to_bin(const char *uuid, uuid_be *u);
+/* backwards compatibility, don't use in new code */
+typedef uuid_t uuid_be;
+#define UUID_BE(a, _b, c, d0, d1, d2, d3, d4, d5, d6, d7) \
+	UUID_INIT(a, _b, c, d0, d1, d2, d3, d4, d5, d6, d7)
+#define NULL_UUID_BE 							\
+	UUID_BE(0x00000000, 0x0000, 0x0000, 0x00, 0x00, 0x00, 0x00,	\
+	     0x00, 0x00, 0x00, 0x00)
+
+#define uuid_le_gen(u)		guid_gen(u)
+#define uuid_be_gen(u)		uuid_gen(u)
+#define uuid_le_to_bin(guid, u)	guid_parse(guid, u)
+#define uuid_be_to_bin(uuid, u)	uuid_parse(uuid, u)
+
+static inline int uuid_le_cmp(const guid_t u1, const guid_t u2)
+{
+	return memcmp(&u1, &u2, sizeof(guid_t));
+}
+
+static inline int uuid_be_cmp(const uuid_t u1, const uuid_t u2)
+{
+	return memcmp(&u1, &u2, sizeof(uuid_t));
+}
 
 #endif
diff --git a/include/uapi/linux/uuid.h b/include/uapi/linux/uuid.h
index 0099756c4bac..8ef82f433877 100644
--- a/include/uapi/linux/uuid.h
+++ b/include/uapi/linux/uuid.h
@@ -22,17 +22,21 @@
 
 typedef struct {
 	__u8 b[16];
-} uuid_le;
+} guid_t;
 
-#define UUID_LE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7)		\
-((uuid_le)								\
+#define GUID_INIT(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7)			\
+((guid_t)								\
 {{ (a) & 0xff, ((a) >> 8) & 0xff, ((a) >> 16) & 0xff, ((a) >> 24) & 0xff, \
    (b) & 0xff, ((b) >> 8) & 0xff,					\
    (c) & 0xff, ((c) >> 8) & 0xff,					\
    (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) }})
 
+/* backwards compatibility, don't use in new code */
+typedef guid_t uuid_le;
+#define UUID_LE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7)		\
+	GUID_INIT(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7)
 #define NULL_UUID_LE							\
 	UUID_LE(0x00000000, 0x0000, 0x0000, 0x00, 0x00, 0x00, 0x00,	\
-		0x00, 0x00, 0x00, 0x00)
+	     0x00, 0x00, 0x00, 0x00)
 
 #endif /* _UAPI_LINUX_UUID_H_ */
diff --git a/lib/test_uuid.c b/lib/test_uuid.c
index 547d3127a3cf..ff36f3240e90 100644
--- a/lib/test_uuid.c
+++ b/lib/test_uuid.c
@@ -11,25 +11,25 @@
 
 struct test_uuid_data {
 	const char *uuid;
-	uuid_le le;
-	uuid_be be;
+	guid_t le;
+	uuid_t be;
 };
 
 static const struct test_uuid_data test_uuid_test_data[] = {
 	{
 		.uuid = "c33f4995-3701-450e-9fbf-206a2e98e576",
-		.le = UUID_LE(0xc33f4995, 0x3701, 0x450e, 0x9f, 0xbf, 0x20, 0x6a, 0x2e, 0x98, 0xe5, 0x76),
-		.be = UUID_BE(0xc33f4995, 0x3701, 0x450e, 0x9f, 0xbf, 0x20, 0x6a, 0x2e, 0x98, 0xe5, 0x76),
+		.le = GUID_INIT(0xc33f4995, 0x3701, 0x450e, 0x9f, 0xbf, 0x20, 0x6a, 0x2e, 0x98, 0xe5, 0x76),
+		.be = UUID_INIT(0xc33f4995, 0x3701, 0x450e, 0x9f, 0xbf, 0x20, 0x6a, 0x2e, 0x98, 0xe5, 0x76),
 	},
 	{
 		.uuid = "64b4371c-77c1-48f9-8221-29f054fc023b",
-		.le = UUID_LE(0x64b4371c, 0x77c1, 0x48f9, 0x82, 0x21, 0x29, 0xf0, 0x54, 0xfc, 0x02, 0x3b),
-		.be = UUID_BE(0x64b4371c, 0x77c1, 0x48f9, 0x82, 0x21, 0x29, 0xf0, 0x54, 0xfc, 0x02, 0x3b),
+		.le = GUID_INIT(0x64b4371c, 0x77c1, 0x48f9, 0x82, 0x21, 0x29, 0xf0, 0x54, 0xfc, 0x02, 0x3b),
+		.be = UUID_INIT(0x64b4371c, 0x77c1, 0x48f9, 0x82, 0x21, 0x29, 0xf0, 0x54, 0xfc, 0x02, 0x3b),
 	},
 	{
 		.uuid = "0cb4ddff-a545-4401-9d06-688af53e7f84",
-		.le = UUID_LE(0x0cb4ddff, 0xa545, 0x4401, 0x9d, 0x06, 0x68, 0x8a, 0xf5, 0x3e, 0x7f, 0x84),
-		.be = UUID_BE(0x0cb4ddff, 0xa545, 0x4401, 0x9d, 0x06, 0x68, 0x8a, 0xf5, 0x3e, 0x7f, 0x84),
+		.le = GUID_INIT(0x0cb4ddff, 0xa545, 0x4401, 0x9d, 0x06, 0x68, 0x8a, 0xf5, 0x3e, 0x7f, 0x84),
+		.be = UUID_INIT(0x0cb4ddff, 0xa545, 0x4401, 0x9d, 0x06, 0x68, 0x8a, 0xf5, 0x3e, 0x7f, 0x84),
 	},
 };
 
@@ -61,13 +61,13 @@ static void __init test_uuid_failed(const char *prefix, bool wrong, bool be,
 
 static void __init test_uuid_test(const struct test_uuid_data *data)
 {
-	uuid_le le;
-	uuid_be be;
+	guid_t le;
+	uuid_t be;
 	char buf[48];
 
 	/* LE */
 	total_tests++;
-	if (uuid_le_to_bin(data->uuid, &le))
+	if (guid_parse(data->uuid, &le))
 		test_uuid_failed("conversion", false, false, data->uuid, NULL);
 
 	total_tests++;
@@ -78,7 +78,7 @@ static void __init test_uuid_test(const struct test_uuid_data *data)
 
 	/* BE */
 	total_tests++;
-	if (uuid_be_to_bin(data->uuid, &be))
+	if (uuid_parse(data->uuid, &be))
 		test_uuid_failed("conversion", false, true, data->uuid, NULL);
 
 	total_tests++;
@@ -90,17 +90,17 @@ static void __init test_uuid_test(const struct test_uuid_data *data)
 
 static void __init test_uuid_wrong(const char *data)
 {
-	uuid_le le;
-	uuid_be be;
+	guid_t le;
+	uuid_t be;
 
 	/* LE */
 	total_tests++;
-	if (!uuid_le_to_bin(data, &le))
+	if (!guid_parse(data, &le))
 		test_uuid_failed("negative", true, false, data, NULL);
 
 	/* BE */
 	total_tests++;
-	if (!uuid_be_to_bin(data, &be))
+	if (!uuid_parse(data, &be))
 		test_uuid_failed("negative", true, true, data, NULL);
 }
 
diff --git a/lib/uuid.c b/lib/uuid.c
index 37687af77ff8..90bee73f7bd7 100644
--- a/lib/uuid.c
+++ b/lib/uuid.c
@@ -21,10 +21,10 @@
 #include <linux/uuid.h>
 #include <linux/random.h>
 
-const u8 uuid_le_index[16] = {3,2,1,0,5,4,7,6,8,9,10,11,12,13,14,15};
-EXPORT_SYMBOL(uuid_le_index);
-const u8 uuid_be_index[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
-EXPORT_SYMBOL(uuid_be_index);
+const u8 guid_index[16] = {3,2,1,0,5,4,7,6,8,9,10,11,12,13,14,15};
+EXPORT_SYMBOL(guid_index);
+const u8 uuid_index[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+EXPORT_SYMBOL(uuid_index);
 
 /***************************************************************
  * Random UUID interface
@@ -53,21 +53,21 @@ static void __uuid_gen_common(__u8 b[16])
 	b[8] = (b[8] & 0x3F) | 0x80;
 }
 
-void uuid_le_gen(uuid_le *lu)
+void guid_gen(guid_t *lu)
 {
 	__uuid_gen_common(lu->b);
 	/* version 4 : random generation */
 	lu->b[7] = (lu->b[7] & 0x0F) | 0x40;
 }
-EXPORT_SYMBOL_GPL(uuid_le_gen);
+EXPORT_SYMBOL_GPL(guid_gen);
 
-void uuid_be_gen(uuid_be *bu)
+void uuid_gen(uuid_t *bu)
 {
 	__uuid_gen_common(bu->b);
 	/* version 4 : random generation */
 	bu->b[6] = (bu->b[6] & 0x0F) | 0x40;
 }
-EXPORT_SYMBOL_GPL(uuid_be_gen);
+EXPORT_SYMBOL_GPL(uuid_gen);
 
 /**
   * uuid_is_valid - checks if UUID string valid
@@ -97,7 +97,7 @@ bool uuid_is_valid(const char *uuid)
 }
 EXPORT_SYMBOL(uuid_is_valid);
 
-static int __uuid_to_bin(const char *uuid, __u8 b[16], const u8 ei[16])
+static int __uuid_parse(const char *uuid, __u8 b[16], const u8 ei[16])
 {
 	static const u8 si[16] = {0,2,4,6,9,11,14,16,19,21,24,26,28,30,32,34};
 	unsigned int i;
@@ -115,14 +115,14 @@ static int __uuid_to_bin(const char *uuid, __u8 b[16], const u8 ei[16])
 	return 0;
 }
 
-int uuid_le_to_bin(const char *uuid, uuid_le *u)
+int guid_parse(const char *uuid, guid_t *u)
 {
-	return __uuid_to_bin(uuid, u->b, uuid_le_index);
+	return __uuid_parse(uuid, u->b, guid_index);
 }
-EXPORT_SYMBOL(uuid_le_to_bin);
+EXPORT_SYMBOL(guid_parse);
 
-int uuid_be_to_bin(const char *uuid, uuid_be *u)
+int uuid_parse(const char *uuid, uuid_t *u)
 {
-	return __uuid_to_bin(uuid, u->b, uuid_be_index);
+	return __uuid_parse(uuid, u->b, uuid_index);
 }
-EXPORT_SYMBOL(uuid_be_to_bin);
+EXPORT_SYMBOL(uuid_parse);
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 2d41de3f98a1..9f37d6208e99 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -1308,14 +1308,14 @@ char *uuid_string(char *buf, char *end, const u8 *addr,
 	char uuid[UUID_STRING_LEN + 1];
 	char *p = uuid;
 	int i;
-	const u8 *index = uuid_be_index;
+	const u8 *index = uuid_index;
 	bool uc = false;
 
 	switch (*(++fmt)) {
 	case 'L':
 		uc = true;		/* fall-through */
 	case 'l':
-		index = uuid_le_index;
+		index = guid_index;
 		break;
 	case 'B':
 		uc = true;
-- 
cgit v1.2.3


From df33767d9fe0ca93c606cc9042df05e5045c8158 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 11 May 2017 14:00:57 +0200
Subject: uuid: hoist helpers uuid_equal() and uuid_copy() from xfs

These helper are used to compare and copy two uuid_t type objects.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
[hch: also provide the respective guid_ versions]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 fs/xfs/uuid.c        |  6 ------
 fs/xfs/uuid.h        |  7 -------
 include/linux/uuid.h | 20 ++++++++++++++++++++
 lib/test_uuid.c      |  4 ++--
 4 files changed, 22 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/fs/xfs/uuid.c b/fs/xfs/uuid.c
index b83f76b6d410..29ed78c8637b 100644
--- a/fs/xfs/uuid.c
+++ b/fs/xfs/uuid.c
@@ -55,9 +55,3 @@ uuid_is_nil(uuid_t *uuid)
 		if (*cp++) return 0;	/* not nil */
 	return 1;	/* is nil */
 }
-
-int
-uuid_equal(uuid_t *uuid1, uuid_t *uuid2)
-{
-	return memcmp(uuid1, uuid2, sizeof(uuid_t)) ? 0 : 1;
-}
diff --git a/fs/xfs/uuid.h b/fs/xfs/uuid.h
index 4f1441ba4fa5..86bbed071e79 100644
--- a/fs/xfs/uuid.h
+++ b/fs/xfs/uuid.h
@@ -19,13 +19,6 @@
 #define __XFS_SUPPORT_UUID_H__
 
 extern int uuid_is_nil(uuid_t *uuid);
-extern int uuid_equal(uuid_t *uuid1, uuid_t *uuid2);
 extern void uuid_getnodeuniq(uuid_t *uuid, int fsid [2]);
 
-static inline void
-uuid_copy(uuid_t *dst, uuid_t *src)
-{
-	memcpy(dst, src, sizeof(uuid_t));
-}
-
 #endif	/* __XFS_SUPPORT_UUID_H__ */
diff --git a/include/linux/uuid.h b/include/linux/uuid.h
index c2adb8046095..777f9cb01eb1 100644
--- a/include/linux/uuid.h
+++ b/include/linux/uuid.h
@@ -35,6 +35,26 @@ typedef struct {
  */
 #define	UUID_STRING_LEN		36
 
+static inline bool guid_equal(const guid_t *u1, const guid_t *u2)
+{
+	return memcmp(u1, u2, sizeof(guid_t)) == 0;
+}
+
+static inline void guid_copy(guid_t *dst, const guid_t *src)
+{
+	memcpy(dst, src, sizeof(guid_t));
+}
+
+static inline bool uuid_equal(const uuid_t *u1, const uuid_t *u2)
+{
+	return memcmp(u1, u2, sizeof(uuid_t)) == 0;
+}
+
+static inline void uuid_copy(uuid_t *dst, const uuid_t *src)
+{
+	memcpy(dst, src, sizeof(uuid_t));
+}
+
 void generate_random_uuid(unsigned char uuid[16]);
 
 extern void guid_gen(guid_t *u);
diff --git a/lib/test_uuid.c b/lib/test_uuid.c
index ff36f3240e90..478c049630b5 100644
--- a/lib/test_uuid.c
+++ b/lib/test_uuid.c
@@ -71,7 +71,7 @@ static void __init test_uuid_test(const struct test_uuid_data *data)
 		test_uuid_failed("conversion", false, false, data->uuid, NULL);
 
 	total_tests++;
-	if (uuid_le_cmp(data->le, le)) {
+	if (!guid_equal(&data->le, &le)) {
 		sprintf(buf, "%pUl", &le);
 		test_uuid_failed("cmp", false, false, data->uuid, buf);
 	}
@@ -82,7 +82,7 @@ static void __init test_uuid_test(const struct test_uuid_data *data)
 		test_uuid_failed("conversion", false, true, data->uuid, NULL);
 
 	total_tests++;
-	if (uuid_be_cmp(data->be, be)) {
+	if (uuid_equal(&data->be, &be)) {
 		sprintf(buf, "%pUb", &be);
 		test_uuid_failed("cmp", false, true, data->uuid, buf);
 	}
-- 
cgit v1.2.3


From ef40dda5bbc310f6517082c0ff002913104358cd Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 11 May 2017 09:01:42 +0200
Subject: uuid: hoist uuid_is_null() helper from libnvdimm

Hoist the libnvdimm helper as an inline helper to linux/uuid.h
using an auxiliary const variable uuid_null in lib/uuid.c.

[hch: also add the guid variant.  Both do the same but I'd like
to keep casts to a minimum]

The common helper uses the new abstract type uuid_t * instead of
u8 *.

Suggested-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
[hch: added guid_is_null]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 drivers/nvdimm/btt_devs.c |  9 +--------
 include/linux/uuid.h      | 13 +++++++++++++
 lib/uuid.c                |  5 +++++
 3 files changed, 19 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c
index ae00dc0d9791..4c989bb9a8a0 100644
--- a/drivers/nvdimm/btt_devs.c
+++ b/drivers/nvdimm/btt_devs.c
@@ -222,13 +222,6 @@ struct device *nd_btt_create(struct nd_region *nd_region)
 	return dev;
 }
 
-static bool uuid_is_null(u8 *uuid)
-{
-	static const u8 null_uuid[16];
-
-	return (memcmp(uuid, null_uuid, 16) == 0);
-}
-
 /**
  * nd_btt_arena_is_valid - check if the metadata layout is valid
  * @nd_btt:	device with BTT geometry and backing device info
@@ -249,7 +242,7 @@ bool nd_btt_arena_is_valid(struct nd_btt *nd_btt, struct btt_sb *super)
 	if (memcmp(super->signature, BTT_SIG, BTT_SIG_LEN) != 0)
 		return false;
 
-	if (!uuid_is_null(super->parent_uuid))
+	if (!guid_is_null((guid_t *)&super->parent_uuid))
 		if (memcmp(super->parent_uuid, parent_uuid, 16) != 0)
 			return false;
 
diff --git a/include/linux/uuid.h b/include/linux/uuid.h
index 777f9cb01eb1..75f7182d5360 100644
--- a/include/linux/uuid.h
+++ b/include/linux/uuid.h
@@ -35,6 +35,9 @@ typedef struct {
  */
 #define	UUID_STRING_LEN		36
 
+extern const guid_t guid_null;
+extern const uuid_t uuid_null;
+
 static inline bool guid_equal(const guid_t *u1, const guid_t *u2)
 {
 	return memcmp(u1, u2, sizeof(guid_t)) == 0;
@@ -45,6 +48,11 @@ static inline void guid_copy(guid_t *dst, const guid_t *src)
 	memcpy(dst, src, sizeof(guid_t));
 }
 
+static inline bool guid_is_null(guid_t *guid)
+{
+	return guid_equal(guid, &guid_null);
+}
+
 static inline bool uuid_equal(const uuid_t *u1, const uuid_t *u2)
 {
 	return memcmp(u1, u2, sizeof(uuid_t)) == 0;
@@ -55,6 +63,11 @@ static inline void uuid_copy(uuid_t *dst, const uuid_t *src)
 	memcpy(dst, src, sizeof(uuid_t));
 }
 
+static inline bool uuid_is_null(uuid_t *uuid)
+{
+	return uuid_equal(uuid, &uuid_null);
+}
+
 void generate_random_uuid(unsigned char uuid[16]);
 
 extern void guid_gen(guid_t *u);
diff --git a/lib/uuid.c b/lib/uuid.c
index f7116ed88e01..680b9fb9ba09 100644
--- a/lib/uuid.c
+++ b/lib/uuid.c
@@ -21,6 +21,11 @@
 #include <linux/uuid.h>
 #include <linux/random.h>
 
+const guid_t guid_null;
+EXPORT_SYMBOL(guid_null);
+const uuid_t uuid_null;
+EXPORT_SYMBOL(uuid_null);
+
 const u8 guid_index[16] = {3,2,1,0,5,4,7,6,8,9,10,11,12,13,14,15};
 const u8 uuid_index[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
 
-- 
cgit v1.2.3


From 1dd771eb0b09fe9c12ea58b18c676b32a528be39 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 10 May 2017 15:16:44 +0200
Subject: block: remove blk_part_pack_uuid

This helper was only used by IMA of all things, which would get spurious
errors if CONFIG_BLOCK is disabled.  Just opencode the call there.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Acked-by: Mimi Zohar <zohar@linux.vnet.ibm.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 include/linux/genhd.h               | 11 -----------
 security/integrity/ima/ima_policy.c |  3 +--
 2 files changed, 1 insertion(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index acff9437e5c3..e619fae2f037 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -219,12 +219,6 @@ static inline struct gendisk *part_to_disk(struct hd_struct *part)
 	return NULL;
 }
 
-static inline int blk_part_pack_uuid(const u8 *uuid_str, u8 *to)
-{
-	uuid_be_to_bin(uuid_str, (uuid_be *)to);
-	return 0;
-}
-
 static inline int disk_max_parts(struct gendisk *disk)
 {
 	if (disk->flags & GENHD_FL_EXT_DEVT)
@@ -736,11 +730,6 @@ static inline dev_t blk_lookup_devt(const char *name, int partno)
 	dev_t devt = MKDEV(0, 0);
 	return devt;
 }
-
-static inline int blk_part_pack_uuid(const u8 *uuid_str, u8 *to)
-{
-	return -EINVAL;
-}
 #endif /* CONFIG_BLOCK */
 
 #endif /* _LINUX_GENHD_H */
diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c
index 3ab1067db624..1431ada649e5 100644
--- a/security/integrity/ima/ima_policy.c
+++ b/security/integrity/ima/ima_policy.c
@@ -717,8 +717,7 @@ static int ima_parse_rule(char *rule, struct ima_rule_entry *entry)
 				break;
 			}
 
-			result = blk_part_pack_uuid(args[0].from,
-						    entry->fsuuid);
+			result = uuid_parse(args[0].from, (uuid_t *)&entry->fsuuid);
 			if (!result)
 				entry->flags |= IMA_FSUUID;
 			break;
-- 
cgit v1.2.3


From 85787090a21eb749d8b347eaf9ff1a455637473c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 10 May 2017 15:06:33 +0200
Subject: fs: switch ->s_uuid to uuid_t
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For some file systems we still memcpy into it, but in various places this
already allows us to use the proper uuid helpers.  More to come..

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Acked-by: Mimi Zohar <zohar@linux.vnet.ibm.com> (Changes to IMA/EVM)
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 drivers/xen/tmem.c                  |  6 +++---
 fs/ext4/super.c                     |  2 +-
 fs/f2fs/super.c                     |  2 +-
 fs/gfs2/ops_fstype.c                |  2 +-
 fs/gfs2/sys.c                       | 22 +++++-----------------
 fs/ocfs2/super.c                    |  2 +-
 fs/overlayfs/copy_up.c              |  5 ++---
 fs/overlayfs/namei.c                |  2 +-
 fs/xfs/xfs_mount.c                  |  3 +--
 include/linux/cleancache.h          |  2 +-
 include/linux/fs.h                  |  5 +++--
 mm/cleancache.c                     |  2 +-
 security/integrity/evm/evm_crypto.c |  2 +-
 security/integrity/ima/ima_policy.c |  2 +-
 14 files changed, 23 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/xen/tmem.c b/drivers/xen/tmem.c
index 4ac2ca8a7656..bf13d1ec51f3 100644
--- a/drivers/xen/tmem.c
+++ b/drivers/xen/tmem.c
@@ -233,12 +233,12 @@ static int tmem_cleancache_init_fs(size_t pagesize)
 	return xen_tmem_new_pool(uuid_private, 0, pagesize);
 }
 
-static int tmem_cleancache_init_shared_fs(char *uuid, size_t pagesize)
+static int tmem_cleancache_init_shared_fs(uuid_t *uuid, size_t pagesize)
 {
 	struct tmem_pool_uuid shared_uuid;
 
-	shared_uuid.uuid_lo = *(u64 *)uuid;
-	shared_uuid.uuid_hi = *(u64 *)(&uuid[8]);
+	shared_uuid.uuid_lo = *(u64 *)&uuid->b[0];
+	shared_uuid.uuid_hi = *(u64 *)&uuid->b[8];
 	return xen_tmem_new_pool(shared_uuid, TMEM_POOL_SHARED, pagesize);
 }
 
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 0b177da9ea82..6e3b4186a22f 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3952,7 +3952,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 		sb->s_qcop = &ext4_qctl_operations;
 	sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
 #endif
-	memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
+	memcpy(&sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
 
 	INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
 	mutex_init(&sbi->s_orphan_lock);
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 83355ec4a92c..0b89b0b7b9f7 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1937,7 +1937,7 @@ try_onemore:
 	sb->s_time_gran = 1;
 	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
 		(test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0);
-	memcpy(sb->s_uuid, raw_super->uuid, sizeof(raw_super->uuid));
+	memcpy(&sb->s_uuid, raw_super->uuid, sizeof(raw_super->uuid));
 
 	/* init f2fs-specific super block info */
 	sbi->valid_super_block = valid_super_block;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index ed67548b286c..b92135c202c2 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -203,7 +203,7 @@ static void gfs2_sb_in(struct gfs2_sbd *sdp, const void *buf)
 
 	memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN);
 	memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN);
-	memcpy(s->s_uuid, str->sb_uuid, 16);
+	memcpy(&s->s_uuid, str->sb_uuid, 16);
 }
 
 /**
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 7a515345610c..e77bc52b468f 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -71,25 +71,14 @@ static ssize_t fsname_show(struct gfs2_sbd *sdp, char *buf)
 	return snprintf(buf, PAGE_SIZE, "%s\n", sdp->sd_fsname);
 }
 
-static int gfs2_uuid_valid(const u8 *uuid)
-{
-	int i;
-
-	for (i = 0; i < 16; i++) {
-		if (uuid[i])
-			return 1;
-	}
-	return 0;
-}
-
 static ssize_t uuid_show(struct gfs2_sbd *sdp, char *buf)
 {
 	struct super_block *s = sdp->sd_vfs;
-	const u8 *uuid = s->s_uuid;
+
 	buf[0] = '\0';
-	if (!gfs2_uuid_valid(uuid))
+	if (uuid_is_null(&s->s_uuid))
 		return 0;
-	return snprintf(buf, PAGE_SIZE, "%pUB\n", uuid);
+	return snprintf(buf, PAGE_SIZE, "%pUB\n", &s->s_uuid);
 }
 
 static ssize_t freeze_show(struct gfs2_sbd *sdp, char *buf)
@@ -712,14 +701,13 @@ static int gfs2_uevent(struct kset *kset, struct kobject *kobj,
 {
 	struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
 	struct super_block *s = sdp->sd_vfs;
-	const u8 *uuid = s->s_uuid;
 
 	add_uevent_var(env, "LOCKTABLE=%s", sdp->sd_table_name);
 	add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name);
 	if (!test_bit(SDF_NOJOURNALID, &sdp->sd_flags))
 		add_uevent_var(env, "JOURNALID=%d", sdp->sd_lockstruct.ls_jid);
-	if (gfs2_uuid_valid(uuid))
-		add_uevent_var(env, "UUID=%pUB", uuid);
+	if (!uuid_is_null(&s->s_uuid))
+		add_uevent_var(env, "UUID=%pUB", &s->s_uuid);
 	return 0;
 }
 
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index ca1646fbcaef..83005f486451 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -2062,7 +2062,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
 	cbits = le32_to_cpu(di->id2.i_super.s_clustersize_bits);
 	bbits = le32_to_cpu(di->id2.i_super.s_blocksize_bits);
 	sb->s_maxbytes = ocfs2_max_file_offset(bbits, cbits);
-	memcpy(sb->s_uuid, di->id2.i_super.s_uuid,
+	memcpy(&sb->s_uuid, di->id2.i_super.s_uuid,
 	       sizeof(di->id2.i_super.s_uuid));
 
 	osb->osb_dx_mask = (1 << (cbits - bbits)) - 1;
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 7a44533f4bbf..d55fceb4e414 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -284,7 +284,6 @@ static int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
 			  struct dentry *upper)
 {
 	struct super_block *sb = lower->d_sb;
-	uuid_be *uuid = (uuid_be *) &sb->s_uuid;
 	const struct ovl_fh *fh = NULL;
 	int err;
 
@@ -294,8 +293,8 @@ static int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
 	 * up and a pure upper inode.
 	 */
 	if (sb->s_export_op && sb->s_export_op->fh_to_dentry &&
-	    uuid_be_cmp(*uuid, NULL_UUID_BE)) {
-		fh = ovl_encode_fh(lower, uuid);
+	    !uuid_is_null(&sb->s_uuid)) {
+		fh = ovl_encode_fh(lower, &sb->s_uuid);
 		if (IS_ERR(fh))
 			return PTR_ERR(fh);
 	}
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index f3136c31e72a..de0d4f742f36 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -135,7 +135,7 @@ static struct dentry *ovl_get_origin(struct dentry *dentry,
 	 * Make sure that the stored uuid matches the uuid of the lower
 	 * layer where file handle will be decoded.
 	 */
-	if (uuid_be_cmp(fh->uuid, *(uuid_be *) &mnt->mnt_sb->s_uuid))
+	if (!uuid_equal(&fh->uuid, &mnt->mnt_sb->s_uuid))
 		goto out;
 
 	origin = exportfs_decode_fh(mnt, (struct fid *)fh->fid,
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 54452967e35e..d249546da15e 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -74,8 +74,7 @@ xfs_uuid_mount(
 	int			hole, i;
 
 	/* Publish UUID in struct super_block */
-	BUILD_BUG_ON(sizeof(mp->m_super->s_uuid) != sizeof(uuid_t));
-	memcpy(&mp->m_super->s_uuid, uuid, sizeof(uuid_t));
+	uuid_copy(&mp->m_super->s_uuid, uuid);
 
 	if (mp->m_flags & XFS_MOUNT_NOUUID)
 		return 0;
diff --git a/include/linux/cleancache.h b/include/linux/cleancache.h
index fccf7f44139d..bbb3712dd892 100644
--- a/include/linux/cleancache.h
+++ b/include/linux/cleancache.h
@@ -27,7 +27,7 @@ struct cleancache_filekey {
 
 struct cleancache_ops {
 	int (*init_fs)(size_t);
-	int (*init_shared_fs)(char *uuid, size_t);
+	int (*init_shared_fs)(uuid_t *uuid, size_t);
 	int (*get_page)(int, struct cleancache_filekey,
 			pgoff_t, struct page *);
 	void (*put_page)(int, struct cleancache_filekey,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 803e5a9b2654..3e68cabb8457 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -30,6 +30,7 @@
 #include <linux/percpu-rwsem.h>
 #include <linux/workqueue.h>
 #include <linux/delayed_call.h>
+#include <linux/uuid.h>
 
 #include <asm/byteorder.h>
 #include <uapi/linux/fs.h>
@@ -1328,8 +1329,8 @@ struct super_block {
 
 	struct sb_writers	s_writers;
 
-	char s_id[32];				/* Informational name */
-	u8 s_uuid[16];				/* UUID */
+	char			s_id[32];	/* Informational name */
+	uuid_t			s_uuid;		/* UUID */
 
 	void 			*s_fs_info;	/* Filesystem private info */
 	unsigned int		s_max_links;
diff --git a/mm/cleancache.c b/mm/cleancache.c
index ba5d8f3e6d68..f7b9fdc79d97 100644
--- a/mm/cleancache.c
+++ b/mm/cleancache.c
@@ -130,7 +130,7 @@ void __cleancache_init_shared_fs(struct super_block *sb)
 	int pool_id = CLEANCACHE_NO_BACKEND_SHARED;
 
 	if (cleancache_ops) {
-		pool_id = cleancache_ops->init_shared_fs(sb->s_uuid, PAGE_SIZE);
+		pool_id = cleancache_ops->init_shared_fs(&sb->s_uuid, PAGE_SIZE);
 		if (pool_id < 0)
 			pool_id = CLEANCACHE_NO_POOL;
 	}
diff --git a/security/integrity/evm/evm_crypto.c b/security/integrity/evm/evm_crypto.c
index d7f282d75cc1..1d32cd20009a 100644
--- a/security/integrity/evm/evm_crypto.c
+++ b/security/integrity/evm/evm_crypto.c
@@ -164,7 +164,7 @@ static void hmac_add_misc(struct shash_desc *desc, struct inode *inode,
 	hmac_misc.mode = inode->i_mode;
 	crypto_shash_update(desc, (const u8 *)&hmac_misc, sizeof(hmac_misc));
 	if (evm_hmac_attrs & EVM_ATTR_FSUUID)
-		crypto_shash_update(desc, inode->i_sb->s_uuid,
+		crypto_shash_update(desc, &inode->i_sb->s_uuid.b[0],
 				    sizeof(inode->i_sb->s_uuid));
 	crypto_shash_final(desc, digest);
 }
diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c
index 9a7c7cbdbe7c..6f885fab9d84 100644
--- a/security/integrity/ima/ima_policy.c
+++ b/security/integrity/ima/ima_policy.c
@@ -244,7 +244,7 @@ static bool ima_match_rules(struct ima_rule_entry *rule, struct inode *inode,
 	    && rule->fsmagic != inode->i_sb->s_magic)
 		return false;
 	if ((rule->flags & IMA_FSUUID) &&
-	    memcmp(&rule->fsuuid, inode->i_sb->s_uuid, sizeof(rule->fsuuid)))
+	    !uuid_equal(&rule->fsuuid, &inode->i_sb->s_uuid))
 		return false;
 	if ((rule->flags & IMA_UID) && !rule->uid_op(cred->uid, rule->uid))
 		return false;
-- 
cgit v1.2.3


From 8e41226324e7c00f2087bfbc9f470d665e92df18 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 17 May 2017 09:54:27 +0200
Subject: nvme: switch to uuid_t

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 drivers/nvme/host/fabrics.c | 8 ++++----
 drivers/nvme/host/fabrics.h | 2 +-
 drivers/nvme/host/fc.c      | 3 +--
 drivers/nvme/target/nvmet.h | 1 +
 include/linux/nvme-fc.h     | 3 +--
 include/linux/nvme.h        | 3 ++-
 6 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 990e6fb32a63..c190d7e36900 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -58,7 +58,7 @@ static struct nvmf_host *nvmf_host_add(const char *hostnqn)
 
 	kref_init(&host->ref);
 	memcpy(host->nqn, hostnqn, NVMF_NQN_SIZE);
-	uuid_be_gen(&host->id);
+	uuid_gen(&host->id);
 
 	list_add_tail(&host->list, &nvmf_hosts);
 out_unlock:
@@ -75,7 +75,7 @@ static struct nvmf_host *nvmf_host_default(void)
 		return NULL;
 
 	kref_init(&host->ref);
-	uuid_be_gen(&host->id);
+	uuid_gen(&host->id);
 	snprintf(host->nqn, NVMF_NQN_SIZE,
 		"nqn.2014-08.org.nvmexpress:NVMf:uuid:%pUb", &host->id);
 
@@ -395,7 +395,7 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl)
 	if (!data)
 		return -ENOMEM;
 
-	memcpy(&data->hostid, &ctrl->opts->host->id, sizeof(uuid_be));
+	uuid_copy(&data->hostid, &ctrl->opts->host->id);
 	data->cntlid = cpu_to_le16(0xffff);
 	strncpy(data->subsysnqn, ctrl->opts->subsysnqn, NVMF_NQN_SIZE);
 	strncpy(data->hostnqn, ctrl->opts->host->nqn, NVMF_NQN_SIZE);
@@ -454,7 +454,7 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid)
 	if (!data)
 		return -ENOMEM;
 
-	memcpy(&data->hostid, &ctrl->opts->host->id, sizeof(uuid_be));
+	uuid_copy(&data->hostid, &ctrl->opts->host->id);
 	data->cntlid = cpu_to_le16(ctrl->cntlid);
 	strncpy(data->subsysnqn, ctrl->opts->subsysnqn, NVMF_NQN_SIZE);
 	strncpy(data->hostnqn, ctrl->opts->host->nqn, NVMF_NQN_SIZE);
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index f5a9c1fb186f..29be7600689d 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -36,7 +36,7 @@ struct nvmf_host {
 	struct kref		ref;
 	struct list_head	list;
 	char			nqn[NVMF_NQN_SIZE];
-	uuid_be			id;
+	uuid_t			id;
 };
 
 /**
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 5b14cbefb724..96b983bb44bd 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -878,8 +878,7 @@ nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl,
 	assoc_rqst->assoc_cmd.sqsize = cpu_to_be16(qsize);
 	/* Linux supports only Dynamic controllers */
 	assoc_rqst->assoc_cmd.cntlid = cpu_to_be16(0xffff);
-	memcpy(&assoc_rqst->assoc_cmd.hostid, &ctrl->ctrl.opts->host->id,
-		min_t(size_t, FCNVME_ASSOC_HOSTID_LEN, sizeof(uuid_be)));
+	uuid_copy(&assoc_rqst->assoc_cmd.hostid, &ctrl->ctrl.opts->host->id);
 	strncpy(assoc_rqst->assoc_cmd.hostnqn, ctrl->ctrl.opts->host->nqn,
 		min(FCNVME_ASSOC_HOSTNQN_LEN, NVMF_NQN_SIZE));
 	strncpy(assoc_rqst->assoc_cmd.subnqn, ctrl->ctrl.opts->subsysnqn,
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index cfc5c7fb0ab7..8ff6e430b30a 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -21,6 +21,7 @@
 #include <linux/percpu-refcount.h>
 #include <linux/list.h>
 #include <linux/mutex.h>
+#include <linux/uuid.h>
 #include <linux/nvme.h>
 #include <linux/configfs.h>
 #include <linux/rcupdate.h>
diff --git a/include/linux/nvme-fc.h b/include/linux/nvme-fc.h
index e997c4a49a88..bc711a10be05 100644
--- a/include/linux/nvme-fc.h
+++ b/include/linux/nvme-fc.h
@@ -177,7 +177,6 @@ struct fcnvme_lsdesc_rjt {
 };
 
 
-#define FCNVME_ASSOC_HOSTID_LEN		16
 #define FCNVME_ASSOC_HOSTNQN_LEN	256
 #define FCNVME_ASSOC_SUBNQN_LEN		256
 
@@ -191,7 +190,7 @@ struct fcnvme_lsdesc_cr_assoc_cmd {
 	__be16	cntlid;
 	__be16	sqsize;
 	__be32	rsvd52;
-	u8	hostid[FCNVME_ASSOC_HOSTID_LEN];
+	uuid_t	hostid;
 	u8	hostnqn[FCNVME_ASSOC_HOSTNQN_LEN];
 	u8	subnqn[FCNVME_ASSOC_SUBNQN_LEN];
 	u8	rsvd632[384];
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index b625bacf37ef..e400a69fa1d3 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -16,6 +16,7 @@
 #define _LINUX_NVME_H
 
 #include <linux/types.h>
+#include <linux/uuid.h>
 
 /* NQN names in commands fields specified one size */
 #define NVMF_NQN_FIELD_LEN	256
@@ -843,7 +844,7 @@ struct nvmf_connect_command {
 };
 
 struct nvmf_connect_data {
-	__u8		hostid[16];
+	uuid_t		hostid;
 	__le16		cntlid;
 	char		resv4[238];
 	char		subsysnqn[NVMF_NQN_FIELD_LEN];
-- 
cgit v1.2.3


From 82c01a84d5a9bd3b9347bb03eed2f05bbccef933 Mon Sep 17 00:00:00 2001
From: "yuval.shaia@oracle.com" <yuval.shaia@oracle.com>
Date: Sun, 4 Jun 2017 20:22:00 +0300
Subject: net/{mii, smsc}: Make mii_ethtool_get_link_ksettings and
 smc_netdev_get_ecmd return void

Make return value void since functions never returns meaningfull value.

Signed-off-by: Yuval Shaia <yuval.shaia@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/cris/eth_v10.c                              |  5 ++---
 drivers/net/ethernet/3com/3c59x.c                       |  4 +++-
 drivers/net/ethernet/amd/pcnet32.c                      |  5 +----
 drivers/net/ethernet/cirrus/ep93xx_eth.c                |  5 ++++-
 drivers/net/ethernet/dec/tulip/winbond-840.c            |  5 ++---
 drivers/net/ethernet/faraday/ftmac100.c                 |  5 ++++-
 drivers/net/ethernet/fealnx.c                           |  5 ++---
 drivers/net/ethernet/intel/e100.c                       |  5 ++++-
 drivers/net/ethernet/jme.c                              |  5 ++---
 drivers/net/ethernet/korina.c                           |  5 ++---
 drivers/net/ethernet/micrel/ks8851.c                    |  5 ++++-
 drivers/net/ethernet/micrel/ks8851_mll.c                |  5 ++++-
 drivers/net/ethernet/nuvoton/w90p910_ether.c            |  5 ++++-
 drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c |  6 +++---
 drivers/net/ethernet/realtek/8139cp.c                   |  5 ++---
 drivers/net/ethernet/realtek/r8169.c                    |  4 +++-
 drivers/net/ethernet/sgi/ioc3-eth.c                     |  5 ++---
 drivers/net/ethernet/sis/sis190.c                       |  4 +++-
 drivers/net/ethernet/smsc/epic100.c                     |  5 ++---
 drivers/net/ethernet/smsc/smc911x.c                     |  7 +++----
 drivers/net/ethernet/smsc/smc91c92_cs.c                 | 13 +++++--------
 drivers/net/ethernet/smsc/smc91x.c                      |  7 ++-----
 drivers/net/ethernet/tundra/tsi108_eth.c                |  5 ++---
 drivers/net/ethernet/via/via-rhine.c                    |  5 ++---
 drivers/net/mii.c                                       |  8 ++------
 drivers/net/usb/ax88179_178a.c                          |  5 ++++-
 drivers/net/usb/r8152.c                                 |  2 +-
 drivers/net/usb/usbnet.c                                |  4 +++-
 include/linux/mii.h                                     |  2 +-
 29 files changed, 78 insertions(+), 73 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/cris/eth_v10.c b/drivers/net/cris/eth_v10.c
index da020418a652..017f48cdcab9 100644
--- a/drivers/net/cris/eth_v10.c
+++ b/drivers/net/cris/eth_v10.c
@@ -1417,10 +1417,9 @@ static int e100_get_link_ksettings(struct net_device *dev,
 {
 	struct net_local *np = netdev_priv(dev);
 	u32 supported;
-	int err;
 
 	spin_lock_irq(&np->lock);
-	err = mii_ethtool_get_link_ksettings(&np->mii_if, cmd);
+	mii_ethtool_get_link_ksettings(&np->mii_if, cmd);
 	spin_unlock_irq(&np->lock);
 
 	/* The PHY may support 1000baseT, but the Etrax100 does not.  */
@@ -1432,7 +1431,7 @@ static int e100_get_link_ksettings(struct net_device *dev,
 	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
 						supported);
 
-	return err;
+	return 0;
 }
 
 static int e100_set_link_ksettings(struct net_device *dev,
diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c
index e41245a54f8b..14cff6017756 100644
--- a/drivers/net/ethernet/3com/3c59x.c
+++ b/drivers/net/ethernet/3com/3c59x.c
@@ -2912,7 +2912,9 @@ static int vortex_get_link_ksettings(struct net_device *dev,
 {
 	struct vortex_private *vp = netdev_priv(dev);
 
-	return mii_ethtool_get_link_ksettings(&vp->mii, cmd);
+	mii_ethtool_get_link_ksettings(&vp->mii, cmd);
+
+	return 0;
 }
 
 static int vortex_set_link_ksettings(struct net_device *dev,
diff --git a/drivers/net/ethernet/amd/pcnet32.c b/drivers/net/ethernet/amd/pcnet32.c
index 86369d7c9a0f..7f60d17819ce 100644
--- a/drivers/net/ethernet/amd/pcnet32.c
+++ b/drivers/net/ethernet/amd/pcnet32.c
@@ -731,12 +731,10 @@ static int pcnet32_get_link_ksettings(struct net_device *dev,
 {
 	struct pcnet32_private *lp = netdev_priv(dev);
 	unsigned long flags;
-	int r = -EOPNOTSUPP;
 
 	spin_lock_irqsave(&lp->lock, flags);
 	if (lp->mii) {
 		mii_ethtool_get_link_ksettings(&lp->mii_if, cmd);
-		r = 0;
 	} else if (lp->chip_version == PCNET32_79C970A) {
 		if (lp->autoneg) {
 			cmd->base.autoneg = AUTONEG_ENABLE;
@@ -753,10 +751,9 @@ static int pcnet32_get_link_ksettings(struct net_device *dev,
 		ethtool_convert_legacy_u32_to_link_mode(
 						cmd->link_modes.supported,
 						SUPPORTED_TP | SUPPORTED_AUI);
-		r = 0;
 	}
 	spin_unlock_irqrestore(&lp->lock, flags);
-	return r;
+	return 0;
 }
 
 static int pcnet32_set_link_ksettings(struct net_device *dev,
diff --git a/drivers/net/ethernet/cirrus/ep93xx_eth.c b/drivers/net/ethernet/cirrus/ep93xx_eth.c
index 7a7c02f1f8b9..e2a702996db4 100644
--- a/drivers/net/ethernet/cirrus/ep93xx_eth.c
+++ b/drivers/net/ethernet/cirrus/ep93xx_eth.c
@@ -702,7 +702,10 @@ static int ep93xx_get_link_ksettings(struct net_device *dev,
 				     struct ethtool_link_ksettings *cmd)
 {
 	struct ep93xx_priv *ep = netdev_priv(dev);
-	return mii_ethtool_get_link_ksettings(&ep->mii, cmd);
+
+	mii_ethtool_get_link_ksettings(&ep->mii, cmd);
+
+	return 0;
 }
 
 static int ep93xx_set_link_ksettings(struct net_device *dev,
diff --git a/drivers/net/ethernet/dec/tulip/winbond-840.c b/drivers/net/ethernet/dec/tulip/winbond-840.c
index d1f2f3cc7cfa..32d7229544fa 100644
--- a/drivers/net/ethernet/dec/tulip/winbond-840.c
+++ b/drivers/net/ethernet/dec/tulip/winbond-840.c
@@ -1395,13 +1395,12 @@ static int netdev_get_link_ksettings(struct net_device *dev,
 				     struct ethtool_link_ksettings *cmd)
 {
 	struct netdev_private *np = netdev_priv(dev);
-	int rc;
 
 	spin_lock_irq(&np->lock);
-	rc = mii_ethtool_get_link_ksettings(&np->mii_if, cmd);
+	mii_ethtool_get_link_ksettings(&np->mii_if, cmd);
 	spin_unlock_irq(&np->lock);
 
-	return rc;
+	return 0;
 }
 
 static int netdev_set_link_ksettings(struct net_device *dev,
diff --git a/drivers/net/ethernet/faraday/ftmac100.c b/drivers/net/ethernet/faraday/ftmac100.c
index 1536356e2ea8..66928a922824 100644
--- a/drivers/net/ethernet/faraday/ftmac100.c
+++ b/drivers/net/ethernet/faraday/ftmac100.c
@@ -829,7 +829,10 @@ static int ftmac100_get_link_ksettings(struct net_device *netdev,
 				       struct ethtool_link_ksettings *cmd)
 {
 	struct ftmac100 *priv = netdev_priv(netdev);
-	return mii_ethtool_get_link_ksettings(&priv->mii, cmd);
+
+	mii_ethtool_get_link_ksettings(&priv->mii, cmd);
+
+	return 0;
 }
 
 static int ftmac100_set_link_ksettings(struct net_device *netdev,
diff --git a/drivers/net/ethernet/fealnx.c b/drivers/net/ethernet/fealnx.c
index 766636a7c25e..610f9c07c21d 100644
--- a/drivers/net/ethernet/fealnx.c
+++ b/drivers/net/ethernet/fealnx.c
@@ -1821,13 +1821,12 @@ static int netdev_get_link_ksettings(struct net_device *dev,
 				     struct ethtool_link_ksettings *cmd)
 {
 	struct netdev_private *np = netdev_priv(dev);
-	int rc;
 
 	spin_lock_irq(&np->lock);
-	rc = mii_ethtool_get_link_ksettings(&np->mii, cmd);
+	mii_ethtool_get_link_ksettings(&np->mii, cmd);
 	spin_unlock_irq(&np->lock);
 
-	return rc;
+	return 0;
 }
 
 static int netdev_set_link_ksettings(struct net_device *dev,
diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c
index 2b7323d392dc..4d10270ddf8f 100644
--- a/drivers/net/ethernet/intel/e100.c
+++ b/drivers/net/ethernet/intel/e100.c
@@ -2430,7 +2430,10 @@ static int e100_get_link_ksettings(struct net_device *netdev,
 				   struct ethtool_link_ksettings *cmd)
 {
 	struct nic *nic = netdev_priv(netdev);
-	return mii_ethtool_get_link_ksettings(&nic->mii, cmd);
+
+	mii_ethtool_get_link_ksettings(&nic->mii, cmd);
+
+	return 0;
 }
 
 static int e100_set_link_ksettings(struct net_device *netdev,
diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c
index 0e5083a48937..62d848df26ef 100644
--- a/drivers/net/ethernet/jme.c
+++ b/drivers/net/ethernet/jme.c
@@ -2610,12 +2610,11 @@ jme_get_link_ksettings(struct net_device *netdev,
 		       struct ethtool_link_ksettings *cmd)
 {
 	struct jme_adapter *jme = netdev_priv(netdev);
-	int rc;
 
 	spin_lock_bh(&jme->phy_lock);
-	rc = mii_ethtool_get_link_ksettings(&jme->mii_if, cmd);
+	mii_ethtool_get_link_ksettings(&jme->mii_if, cmd);
 	spin_unlock_bh(&jme->phy_lock);
-	return rc;
+	return 0;
 }
 
 static int
diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c
index 9fae98caf83a..3c0a6451273d 100644
--- a/drivers/net/ethernet/korina.c
+++ b/drivers/net/ethernet/korina.c
@@ -699,13 +699,12 @@ static int netdev_get_link_ksettings(struct net_device *dev,
 				     struct ethtool_link_ksettings *cmd)
 {
 	struct korina_private *lp = netdev_priv(dev);
-	int rc;
 
 	spin_lock_irq(&lp->lock);
-	rc = mii_ethtool_get_link_ksettings(&lp->mii_if, cmd);
+	mii_ethtool_get_link_ksettings(&lp->mii_if, cmd);
 	spin_unlock_irq(&lp->lock);
 
-	return rc;
+	return 0;
 }
 
 static int netdev_set_link_ksettings(struct net_device *dev,
diff --git a/drivers/net/ethernet/micrel/ks8851.c b/drivers/net/ethernet/micrel/ks8851.c
index 20358f87de57..2fe96f1f3fe5 100644
--- a/drivers/net/ethernet/micrel/ks8851.c
+++ b/drivers/net/ethernet/micrel/ks8851.c
@@ -1071,7 +1071,10 @@ static int ks8851_get_link_ksettings(struct net_device *dev,
 				     struct ethtool_link_ksettings *cmd)
 {
 	struct ks8851_net *ks = netdev_priv(dev);
-	return mii_ethtool_get_link_ksettings(&ks->mii, cmd);
+
+	mii_ethtool_get_link_ksettings(&ks->mii, cmd);
+
+	return 0;
 }
 
 static int ks8851_set_link_ksettings(struct net_device *dev,
diff --git a/drivers/net/ethernet/micrel/ks8851_mll.c b/drivers/net/ethernet/micrel/ks8851_mll.c
index 7647f7bdbcb8..f3e9dd47b56f 100644
--- a/drivers/net/ethernet/micrel/ks8851_mll.c
+++ b/drivers/net/ethernet/micrel/ks8851_mll.c
@@ -1315,7 +1315,10 @@ static int ks_get_link_ksettings(struct net_device *netdev,
 				 struct ethtool_link_ksettings *cmd)
 {
 	struct ks_net *ks = netdev_priv(netdev);
-	return mii_ethtool_get_link_ksettings(&ks->mii, cmd);
+
+	mii_ethtool_get_link_ksettings(&ks->mii, cmd);
+
+	return 0;
 }
 
 static int ks_set_link_ksettings(struct net_device *netdev,
diff --git a/drivers/net/ethernet/nuvoton/w90p910_ether.c b/drivers/net/ethernet/nuvoton/w90p910_ether.c
index 159564d8dcdb..89ab786da25f 100644
--- a/drivers/net/ethernet/nuvoton/w90p910_ether.c
+++ b/drivers/net/ethernet/nuvoton/w90p910_ether.c
@@ -868,7 +868,10 @@ static int w90p910_get_link_ksettings(struct net_device *dev,
 				      struct ethtool_link_ksettings *cmd)
 {
 	struct w90p910_ether *ether = netdev_priv(dev);
-	return mii_ethtool_get_link_ksettings(&ether->mii, cmd);
+
+	mii_ethtool_get_link_ksettings(&ether->mii, cmd);
+
+	return 0;
 }
 
 static int w90p910_set_link_ksettings(struct net_device *dev,
diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c
index 21093276d2b7..731ce1e419e4 100644
--- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c
+++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c
@@ -85,9 +85,8 @@ static int pch_gbe_get_link_ksettings(struct net_device *netdev,
 {
 	struct pch_gbe_adapter *adapter = netdev_priv(netdev);
 	u32 supported, advertising;
-	int ret;
 
-	ret = mii_ethtool_get_link_ksettings(&adapter->mii, ecmd);
+	mii_ethtool_get_link_ksettings(&adapter->mii, ecmd);
 
 	ethtool_convert_link_mode_to_legacy_u32(&supported,
 						ecmd->link_modes.supported);
@@ -104,7 +103,8 @@ static int pch_gbe_get_link_ksettings(struct net_device *netdev,
 
 	if (!netif_carrier_ok(adapter->netdev))
 		ecmd->base.speed = SPEED_UNKNOWN;
-	return ret;
+
+	return 0;
 }
 
 /**
diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c
index 72233ab9474b..e7ab23e87de2 100644
--- a/drivers/net/ethernet/realtek/8139cp.c
+++ b/drivers/net/ethernet/realtek/8139cp.c
@@ -1410,14 +1410,13 @@ static int cp_get_link_ksettings(struct net_device *dev,
 				 struct ethtool_link_ksettings *cmd)
 {
 	struct cp_private *cp = netdev_priv(dev);
-	int rc;
 	unsigned long flags;
 
 	spin_lock_irqsave(&cp->lock, flags);
-	rc = mii_ethtool_get_link_ksettings(&cp->mii_if, cmd);
+	mii_ethtool_get_link_ksettings(&cp->mii_if, cmd);
 	spin_unlock_irqrestore(&cp->lock, flags);
 
-	return rc;
+	return 0;
 }
 
 static int cp_set_link_ksettings(struct net_device *dev,
diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index 0a8f2817ea60..bd07a15d3b7c 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -2148,7 +2148,9 @@ static int rtl8169_get_link_ksettings_xmii(struct net_device *dev,
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
 
-	return mii_ethtool_get_link_ksettings(&tp->mii, cmd);
+	mii_ethtool_get_link_ksettings(&tp->mii, cmd);
+
+	return 0;
 }
 
 static int rtl8169_get_link_ksettings(struct net_device *dev,
diff --git a/drivers/net/ethernet/sgi/ioc3-eth.c b/drivers/net/ethernet/sgi/ioc3-eth.c
index 52ead5524de7..b607936e1b3e 100644
--- a/drivers/net/ethernet/sgi/ioc3-eth.c
+++ b/drivers/net/ethernet/sgi/ioc3-eth.c
@@ -1562,13 +1562,12 @@ static int ioc3_get_link_ksettings(struct net_device *dev,
 				   struct ethtool_link_ksettings *cmd)
 {
 	struct ioc3_private *ip = netdev_priv(dev);
-	int rc;
 
 	spin_lock_irq(&ip->ioc3_lock);
-	rc = mii_ethtool_get_link_ksettings(&ip->mii, cmd);
+	mii_ethtool_get_link_ksettings(&ip->mii, cmd);
 	spin_unlock_irq(&ip->ioc3_lock);
 
-	return rc;
+	return 0;
 }
 
 static int ioc3_set_link_ksettings(struct net_device *dev,
diff --git a/drivers/net/ethernet/sis/sis190.c b/drivers/net/ethernet/sis/sis190.c
index 02da106c6e04..445109bd6910 100644
--- a/drivers/net/ethernet/sis/sis190.c
+++ b/drivers/net/ethernet/sis/sis190.c
@@ -1739,7 +1739,9 @@ static int sis190_get_link_ksettings(struct net_device *dev,
 {
 	struct sis190_private *tp = netdev_priv(dev);
 
-	return mii_ethtool_get_link_ksettings(&tp->mii_if, cmd);
+	mii_ethtool_get_link_ksettings(&tp->mii_if, cmd);
+
+	return 0;
 }
 
 static int sis190_set_link_ksettings(struct net_device *dev,
diff --git a/drivers/net/ethernet/smsc/epic100.c b/drivers/net/ethernet/smsc/epic100.c
index db6dcb06193d..6a0e1d4b597c 100644
--- a/drivers/net/ethernet/smsc/epic100.c
+++ b/drivers/net/ethernet/smsc/epic100.c
@@ -1391,13 +1391,12 @@ static int netdev_get_link_ksettings(struct net_device *dev,
 				     struct ethtool_link_ksettings *cmd)
 {
 	struct epic_private *np = netdev_priv(dev);
-	int rc;
 
 	spin_lock_irq(&np->lock);
-	rc = mii_ethtool_get_link_ksettings(&np->mii, cmd);
+	mii_ethtool_get_link_ksettings(&np->mii, cmd);
 	spin_unlock_irq(&np->lock);
 
-	return rc;
+	return 0;
 }
 
 static int netdev_set_link_ksettings(struct net_device *dev,
diff --git a/drivers/net/ethernet/smsc/smc911x.c b/drivers/net/ethernet/smsc/smc911x.c
index 36307d34f641..05157442a980 100644
--- a/drivers/net/ethernet/smsc/smc911x.c
+++ b/drivers/net/ethernet/smsc/smc911x.c
@@ -1450,7 +1450,7 @@ smc911x_ethtool_get_link_ksettings(struct net_device *dev,
 				   struct ethtool_link_ksettings *cmd)
 {
 	struct smc911x_local *lp = netdev_priv(dev);
-	int ret, status;
+	int status;
 	unsigned long flags;
 	u32 supported;
 
@@ -1458,7 +1458,7 @@ smc911x_ethtool_get_link_ksettings(struct net_device *dev,
 
 	if (lp->phy_type != 0) {
 		spin_lock_irqsave(&lp->lock, flags);
-		ret = mii_ethtool_get_link_ksettings(&lp->mii, cmd);
+		mii_ethtool_get_link_ksettings(&lp->mii, cmd);
 		spin_unlock_irqrestore(&lp->lock, flags);
 	} else {
 		supported = SUPPORTED_10baseT_Half |
@@ -1480,10 +1480,9 @@ smc911x_ethtool_get_link_ksettings(struct net_device *dev,
 		ethtool_convert_legacy_u32_to_link_mode(
 			cmd->link_modes.supported, supported);
 
-		ret = 0;
 	}
 
-	return ret;
+	return 0;
 }
 
 static int
diff --git a/drivers/net/ethernet/smsc/smc91c92_cs.c b/drivers/net/ethernet/smsc/smc91c92_cs.c
index 976aa876789a..92c927aec66d 100644
--- a/drivers/net/ethernet/smsc/smc91c92_cs.c
+++ b/drivers/net/ethernet/smsc/smc91c92_cs.c
@@ -1843,8 +1843,8 @@ static int smc_link_ok(struct net_device *dev)
     }
 }
 
-static int smc_netdev_get_ecmd(struct net_device *dev,
-			       struct ethtool_link_ksettings *ecmd)
+static void smc_netdev_get_ecmd(struct net_device *dev,
+				struct ethtool_link_ksettings *ecmd)
 {
 	u16 tmp;
 	unsigned int ioaddr = dev->base_addr;
@@ -1865,8 +1865,6 @@ static int smc_netdev_get_ecmd(struct net_device *dev,
 
 	ethtool_convert_legacy_u32_to_link_mode(ecmd->link_modes.supported,
 						supported);
-
-	return 0;
 }
 
 static int smc_netdev_set_ecmd(struct net_device *dev,
@@ -1918,18 +1916,17 @@ static int smc_get_link_ksettings(struct net_device *dev,
 	struct smc_private *smc = netdev_priv(dev);
 	unsigned int ioaddr = dev->base_addr;
 	u16 saved_bank = inw(ioaddr + BANK_SELECT);
-	int ret;
 	unsigned long flags;
 
 	spin_lock_irqsave(&smc->lock, flags);
 	SMC_SELECT_BANK(3);
 	if (smc->cfg & CFG_MII_SELECT)
-		ret = mii_ethtool_get_link_ksettings(&smc->mii_if, ecmd);
+		mii_ethtool_get_link_ksettings(&smc->mii_if, ecmd);
 	else
-		ret = smc_netdev_get_ecmd(dev, ecmd);
+		smc_netdev_get_ecmd(dev, ecmd);
 	SMC_SELECT_BANK(saved_bank);
 	spin_unlock_irqrestore(&smc->lock, flags);
-	return ret;
+	return 0;
 }
 
 static int smc_set_link_ksettings(struct net_device *dev,
diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c
index 91e9bd7159ab..0d230b125c6c 100644
--- a/drivers/net/ethernet/smsc/smc91x.c
+++ b/drivers/net/ethernet/smsc/smc91x.c
@@ -1539,11 +1539,10 @@ smc_ethtool_get_link_ksettings(struct net_device *dev,
 			       struct ethtool_link_ksettings *cmd)
 {
 	struct smc_local *lp = netdev_priv(dev);
-	int ret;
 
 	if (lp->phy_type != 0) {
 		spin_lock_irq(&lp->lock);
-		ret = mii_ethtool_get_link_ksettings(&lp->mii, cmd);
+		mii_ethtool_get_link_ksettings(&lp->mii, cmd);
 		spin_unlock_irq(&lp->lock);
 	} else {
 		u32 supported = SUPPORTED_10baseT_Half |
@@ -1562,11 +1561,9 @@ smc_ethtool_get_link_ksettings(struct net_device *dev,
 
 		ethtool_convert_legacy_u32_to_link_mode(
 			cmd->link_modes.supported, supported);
-
-		ret = 0;
 	}
 
-	return ret;
+	return 0;
 }
 
 static int
diff --git a/drivers/net/ethernet/tundra/tsi108_eth.c b/drivers/net/ethernet/tundra/tsi108_eth.c
index 5ac6eaa9e785..c2d15d9c0c33 100644
--- a/drivers/net/ethernet/tundra/tsi108_eth.c
+++ b/drivers/net/ethernet/tundra/tsi108_eth.c
@@ -1504,13 +1504,12 @@ static int tsi108_get_link_ksettings(struct net_device *dev,
 {
 	struct tsi108_prv_data *data = netdev_priv(dev);
 	unsigned long flags;
-	int rc;
 
 	spin_lock_irqsave(&data->txlock, flags);
-	rc = mii_ethtool_get_link_ksettings(&data->mii_if, cmd);
+	mii_ethtool_get_link_ksettings(&data->mii_if, cmd);
 	spin_unlock_irqrestore(&data->txlock, flags);
 
-	return rc;
+	return 0;
 }
 
 static int tsi108_set_link_ksettings(struct net_device *dev,
diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c
index 4cf41f779d0e..acd29d60174a 100644
--- a/drivers/net/ethernet/via/via-rhine.c
+++ b/drivers/net/ethernet/via/via-rhine.c
@@ -2307,13 +2307,12 @@ static int netdev_get_link_ksettings(struct net_device *dev,
 				     struct ethtool_link_ksettings *cmd)
 {
 	struct rhine_private *rp = netdev_priv(dev);
-	int rc;
 
 	mutex_lock(&rp->task_lock);
-	rc = mii_ethtool_get_link_ksettings(&rp->mii_if, cmd);
+	mii_ethtool_get_link_ksettings(&rp->mii_if, cmd);
 	mutex_unlock(&rp->task_lock);
 
-	return rc;
+	return 0;
 }
 
 static int netdev_set_link_ksettings(struct net_device *dev,
diff --git a/drivers/net/mii.c b/drivers/net/mii.c
index 6d953c53eed6..44612122338b 100644
--- a/drivers/net/mii.c
+++ b/drivers/net/mii.c
@@ -141,11 +141,9 @@ int mii_ethtool_gset(struct mii_if_info *mii, struct ethtool_cmd *ecmd)
  *
  * The @cmd parameter is expected to have been cleared before calling
  * mii_ethtool_get_link_ksettings().
- *
- * Returns 0 for success, negative on error.
  */
-int mii_ethtool_get_link_ksettings(struct mii_if_info *mii,
-				   struct ethtool_link_ksettings *cmd)
+void mii_ethtool_get_link_ksettings(struct mii_if_info *mii,
+				    struct ethtool_link_ksettings *cmd)
 {
 	struct net_device *dev = mii->dev;
 	u16 bmcr, bmsr, ctrl1000 = 0, stat1000 = 0;
@@ -227,8 +225,6 @@ int mii_ethtool_get_link_ksettings(struct mii_if_info *mii,
 						lp_advertising);
 
 	/* ignore maxtxpkt, maxrxpkt for now */
-
-	return 0;
 }
 
 /**
diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c
index 51cf60092a18..793ce900dffa 100644
--- a/drivers/net/usb/ax88179_178a.c
+++ b/drivers/net/usb/ax88179_178a.c
@@ -624,7 +624,10 @@ static int ax88179_get_link_ksettings(struct net_device *net,
 				      struct ethtool_link_ksettings *cmd)
 {
 	struct usbnet *dev = netdev_priv(net);
-	return mii_ethtool_get_link_ksettings(&dev->mii, cmd);
+
+	mii_ethtool_get_link_ksettings(&dev->mii, cmd);
+
+	return 0;
 }
 
 static int ax88179_set_link_ksettings(struct net_device *net,
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index e902df9595b9..fd31fab2a9da 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -3835,7 +3835,7 @@ int rtl8152_get_link_ksettings(struct net_device *netdev,
 
 	mutex_lock(&tp->control);
 
-	ret = mii_ethtool_get_link_ksettings(&tp->mii, cmd);
+	mii_ethtool_get_link_ksettings(&tp->mii, cmd);
 
 	mutex_unlock(&tp->control);
 
diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 79048e72c1bd..6510e5cc1817 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -956,7 +956,9 @@ int usbnet_get_link_ksettings(struct net_device *net,
 	if (!dev->mii.mdio_read)
 		return -EOPNOTSUPP;
 
-	return mii_ethtool_get_link_ksettings(&dev->mii, cmd);
+	mii_ethtool_get_link_ksettings(&dev->mii, cmd);
+
+	return 0;
 }
 EXPORT_SYMBOL_GPL(usbnet_get_link_ksettings);
 
diff --git a/include/linux/mii.h b/include/linux/mii.h
index 1629a0c32679..e870bfa6abfe 100644
--- a/include/linux/mii.h
+++ b/include/linux/mii.h
@@ -31,7 +31,7 @@ struct mii_if_info {
 extern int mii_link_ok (struct mii_if_info *mii);
 extern int mii_nway_restart (struct mii_if_info *mii);
 extern int mii_ethtool_gset(struct mii_if_info *mii, struct ethtool_cmd *ecmd);
-extern int mii_ethtool_get_link_ksettings(
+extern void mii_ethtool_get_link_ksettings(
 	struct mii_if_info *mii, struct ethtool_link_ksettings *cmd);
 extern int mii_ethtool_sset(struct mii_if_info *mii, struct ethtool_cmd *ecmd);
 extern int mii_ethtool_set_link_ksettings(
-- 
cgit v1.2.3


From 3fabd628d5ea24b02ddb1230ffca1df0f779f84e Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Fri, 2 Jun 2017 13:52:02 +0000
Subject: efi/capsule-loader: Redirect calls to efi_capsule_setup_info() via
 weak alias

To allow platform specific code to hook into the capsule loading
routines, indirect calls to efi_capsule_setup_info() via a weak alias
of __efi_capsule_setup_info(), allowing platforms to redefine the former
but still use the latter.

Tested-by: Bryan O'Donoghue <pure.logic@nexus-software.ie>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Link: http://lkml.kernel.org/r/20170602135207.21708-9-ard.biesheuvel@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/firmware/efi/capsule-loader.c | 56 +++++++++++++++++------------------
 include/linux/efi.h                   | 12 ++++++++
 2 files changed, 39 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firmware/efi/capsule-loader.c b/drivers/firmware/efi/capsule-loader.c
index 2357bcdcb44d..cbc3526953d5 100644
--- a/drivers/firmware/efi/capsule-loader.c
+++ b/drivers/firmware/efi/capsule-loader.c
@@ -20,16 +20,6 @@
 
 #define NO_FURTHER_WRITE_ACTION -1
 
-struct capsule_info {
-	efi_capsule_header_t	header;
-	int			reset_type;
-	long			index;
-	size_t			count;
-	size_t			total_size;
-	struct page		**pages;
-	size_t			page_bytes_remain;
-};
-
 /**
  * efi_free_all_buff_pages - free all previous allocated buffer pages
  * @cap_info: pointer to current instance of capsule_info structure
@@ -46,28 +36,13 @@ static void efi_free_all_buff_pages(struct capsule_info *cap_info)
 	cap_info->index = NO_FURTHER_WRITE_ACTION;
 }
 
-/**
- * efi_capsule_setup_info - obtain the efi capsule header in the binary and
- *			    setup capsule_info structure
- * @cap_info: pointer to current instance of capsule_info structure
- * @kbuff: a mapped first page buffer pointer
- * @hdr_bytes: the total received number of bytes for efi header
- **/
-static int efi_capsule_setup_info(struct capsule_info *cap_info,
-				  void *kbuff, size_t hdr_bytes)
+int __efi_capsule_setup_info(struct capsule_info *cap_info)
 {
 	size_t pages_needed;
 	int ret;
 	void *temp_page;
 
-	/* Only process data block that is larger than efi header size */
-	if (hdr_bytes < sizeof(efi_capsule_header_t))
-		return 0;
-
-	/* Reset back to the correct offset of header */
-	kbuff -= cap_info->count;
-	memcpy(&cap_info->header, kbuff, sizeof(cap_info->header));
-	pages_needed = ALIGN(cap_info->header.imagesize, PAGE_SIZE) / PAGE_SIZE;
+	pages_needed = ALIGN(cap_info->total_size, PAGE_SIZE) / PAGE_SIZE;
 
 	if (pages_needed == 0) {
 		pr_err("invalid capsule size");
@@ -84,7 +59,6 @@ static int efi_capsule_setup_info(struct capsule_info *cap_info,
 		return ret;
 	}
 
-	cap_info->total_size = cap_info->header.imagesize;
 	temp_page = krealloc(cap_info->pages,
 			     pages_needed * sizeof(void *),
 			     GFP_KERNEL | __GFP_ZERO);
@@ -96,6 +70,30 @@ static int efi_capsule_setup_info(struct capsule_info *cap_info,
 	return 0;
 }
 
+/**
+ * efi_capsule_setup_info - obtain the efi capsule header in the binary and
+ *			    setup capsule_info structure
+ * @cap_info: pointer to current instance of capsule_info structure
+ * @kbuff: a mapped first page buffer pointer
+ * @hdr_bytes: the total received number of bytes for efi header
+ *
+ * Platforms with non-standard capsule update mechanisms can override
+ * this __weak function so they can perform any required capsule
+ * image munging. See quark_quirk_function() for an example.
+ **/
+int __weak efi_capsule_setup_info(struct capsule_info *cap_info, void *kbuff,
+				  size_t hdr_bytes)
+{
+	/* Only process data block that is larger than efi header size */
+	if (hdr_bytes < sizeof(efi_capsule_header_t))
+		return 0;
+
+	memcpy(&cap_info->header, kbuff, sizeof(cap_info->header));
+	cap_info->total_size = cap_info->header.imagesize;
+
+	return __efi_capsule_setup_info(cap_info);
+}
+
 /**
  * efi_capsule_submit_update - invoke the efi_capsule_update API once binary
  *			       upload done
@@ -182,7 +180,7 @@ static ssize_t efi_capsule_write(struct file *file, const char __user *buff,
 
 	/* Setup capsule binary info structure */
 	if (cap_info->header.headersize == 0) {
-		ret = efi_capsule_setup_info(cap_info, kbuff,
+		ret = efi_capsule_setup_info(cap_info, kbuff - cap_info->count,
 					     cap_info->count + write_byte);
 		if (ret)
 			goto fail_unmap;
diff --git a/include/linux/efi.h b/include/linux/efi.h
index ec36f42a2add..a7379a2b5680 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -137,6 +137,18 @@ struct efi_boot_memmap {
 #define EFI_CAPSULE_POPULATE_SYSTEM_TABLE	0x00020000
 #define EFI_CAPSULE_INITIATE_RESET		0x00040000
 
+struct capsule_info {
+	efi_capsule_header_t	header;
+	int			reset_type;
+	long			index;
+	size_t			count;
+	size_t			total_size;
+	struct page		**pages;
+	size_t			page_bytes_remain;
+};
+
+int __efi_capsule_setup_info(struct capsule_info *cap_info);
+
 /*
  * Allocation types for calls to boottime->allocate_pages.
  */
-- 
cgit v1.2.3


From 2a457fb31df62c6b482f78e4f74aaed99271f44d Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Fri, 2 Jun 2017 13:52:03 +0000
Subject: efi/capsule-loader: Use page addresses rather than struct page
 pointers

To give some leeway to code that handles non-standard capsule headers,
let's keep an array of page addresses rather than struct page pointers.

This gives special implementations of efi_capsule_setup_info() the
opportunity to mangle the payload a bit before it is presented to the
firmware, without putting any knowledge of the nature of such quirks
into the generic code.

Tested-by: Bryan O'Donoghue <pure.logic@nexus-software.ie>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Link: http://lkml.kernel.org/r/20170602135207.21708-10-ard.biesheuvel@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/firmware/efi/capsule-loader.c | 12 ++++++++----
 drivers/firmware/efi/capsule.c        |  7 ++++---
 include/linux/efi.h                   |  4 ++--
 3 files changed, 14 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firmware/efi/capsule-loader.c b/drivers/firmware/efi/capsule-loader.c
index cbc3526953d5..ec8ac5c4dd84 100644
--- a/drivers/firmware/efi/capsule-loader.c
+++ b/drivers/firmware/efi/capsule-loader.c
@@ -20,6 +20,10 @@
 
 #define NO_FURTHER_WRITE_ACTION -1
 
+#ifndef phys_to_page
+#define phys_to_page(x)		pfn_to_page((x) >> PAGE_SHIFT)
+#endif
+
 /**
  * efi_free_all_buff_pages - free all previous allocated buffer pages
  * @cap_info: pointer to current instance of capsule_info structure
@@ -31,7 +35,7 @@
 static void efi_free_all_buff_pages(struct capsule_info *cap_info)
 {
 	while (cap_info->index > 0)
-		__free_page(cap_info->pages[--cap_info->index]);
+		__free_page(phys_to_page(cap_info->pages[--cap_info->index]));
 
 	cap_info->index = NO_FURTHER_WRITE_ACTION;
 }
@@ -161,12 +165,12 @@ static ssize_t efi_capsule_write(struct file *file, const char __user *buff,
 			goto failed;
 		}
 
-		cap_info->pages[cap_info->index++] = page;
+		cap_info->pages[cap_info->index++] = page_to_phys(page);
 		cap_info->page_bytes_remain = PAGE_SIZE;
+	} else {
+		page = phys_to_page(cap_info->pages[cap_info->index - 1]);
 	}
 
-	page = cap_info->pages[cap_info->index - 1];
-
 	kbuff = kmap(page);
 	kbuff += PAGE_SIZE - cap_info->page_bytes_remain;
 
diff --git a/drivers/firmware/efi/capsule.c b/drivers/firmware/efi/capsule.c
index e603ccf39d80..901b9306bf94 100644
--- a/drivers/firmware/efi/capsule.c
+++ b/drivers/firmware/efi/capsule.c
@@ -214,7 +214,7 @@ efi_capsule_update_locked(efi_capsule_header_t *capsule,
  *
  * Return 0 on success, a converted EFI status code on failure.
  */
-int efi_capsule_update(efi_capsule_header_t *capsule, struct page **pages)
+int efi_capsule_update(efi_capsule_header_t *capsule, phys_addr_t *pages)
 {
 	u32 imagesize = capsule->imagesize;
 	efi_guid_t guid = capsule->guid;
@@ -249,10 +249,11 @@ int efi_capsule_update(efi_capsule_header_t *capsule, struct page **pages)
 		sglist = kmap(sg_pages[i]);
 
 		for (j = 0; j < SGLIST_PER_PAGE && count > 0; j++) {
-			u64 sz = min_t(u64, imagesize, PAGE_SIZE);
+			u64 sz = min_t(u64, imagesize,
+				       PAGE_SIZE - (u64)*pages % PAGE_SIZE);
 
 			sglist[j].length = sz;
-			sglist[j].data = page_to_phys(*pages++);
+			sglist[j].data = *pages++;
 
 			imagesize -= sz;
 			count--;
diff --git a/include/linux/efi.h b/include/linux/efi.h
index a7379a2b5680..8269bcb8ccf7 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -143,7 +143,7 @@ struct capsule_info {
 	long			index;
 	size_t			count;
 	size_t			total_size;
-	struct page		**pages;
+	phys_addr_t		*pages;
 	size_t			page_bytes_remain;
 };
 
@@ -1415,7 +1415,7 @@ extern int efi_capsule_supported(efi_guid_t guid, u32 flags,
 				 size_t size, int *reset);
 
 extern int efi_capsule_update(efi_capsule_header_t *capsule,
-			      struct page **pages);
+			      phys_addr_t *pages);
 
 #ifdef CONFIG_EFI_RUNTIME_MAP
 int efi_runtime_map_init(struct kobject *);
-- 
cgit v1.2.3


From 41c8bdb3ab10c1fefcac61d081e2fd9aaf8694b8 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 5 Jun 2017 19:40:42 +0300
Subject: acpi, nfit: Switch to use new generic UUID API

There are new types and helpers that are supposed to be used in new code.

As a preparation to get rid of legacy types and API functions do
the conversion here.

Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/acpi/nfit/core.c | 54 ++++++++++++++++++++++++------------------------
 drivers/acpi/nfit/nfit.h |  3 +--
 include/linux/acpi.h     |  1 +
 3 files changed, 29 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index 656acb5d7166..d9b39d0e9d6a 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -74,11 +74,11 @@ struct nfit_table_prev {
 	struct list_head flushes;
 };
 
-static u8 nfit_uuid[NFIT_UUID_MAX][16];
+static guid_t nfit_uuid[NFIT_UUID_MAX];
 
-const u8 *to_nfit_uuid(enum nfit_uuids id)
+const guid_t *to_nfit_uuid(enum nfit_uuids id)
 {
-	return nfit_uuid[id];
+	return &nfit_uuid[id];
 }
 EXPORT_SYMBOL(to_nfit_uuid);
 
@@ -222,7 +222,7 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
 	u32 offset, fw_status = 0;
 	acpi_handle handle;
 	unsigned int func;
-	const u8 *uuid;
+	const guid_t *guid;
 	int rc, i;
 
 	func = cmd;
@@ -245,7 +245,7 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
 		cmd_mask = nvdimm_cmd_mask(nvdimm);
 		dsm_mask = nfit_mem->dsm_mask;
 		desc = nd_cmd_dimm_desc(cmd);
-		uuid = to_nfit_uuid(nfit_mem->family);
+		guid = to_nfit_uuid(nfit_mem->family);
 		handle = adev->handle;
 	} else {
 		struct acpi_device *adev = to_acpi_dev(acpi_desc);
@@ -254,7 +254,7 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
 		cmd_mask = nd_desc->cmd_mask;
 		dsm_mask = cmd_mask;
 		desc = nd_cmd_bus_desc(cmd);
-		uuid = to_nfit_uuid(NFIT_DEV_BUS);
+		guid = to_nfit_uuid(NFIT_DEV_BUS);
 		handle = adev->handle;
 		dimm_name = "bus";
 	}
@@ -289,7 +289,7 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
 			in_buf.buffer.pointer,
 			min_t(u32, 256, in_buf.buffer.length), true);
 
-	out_obj = acpi_evaluate_dsm(handle, uuid, 1, func, &in_obj);
+	out_obj = acpi_evaluate_dsm(handle, guid.b, 1, func, &in_obj);
 	if (!out_obj) {
 		dev_dbg(dev, "%s:%s _DSM failed cmd: %s\n", __func__, dimm_name,
 				cmd_name);
@@ -409,7 +409,7 @@ int nfit_spa_type(struct acpi_nfit_system_address *spa)
 	int i;
 
 	for (i = 0; i < NFIT_UUID_MAX; i++)
-		if (memcmp(to_nfit_uuid(i), spa->range_guid, 16) == 0)
+		if (guid_equal(to_nfit_uuid(i), (guid_t *)&spa->range_guid))
 			return i;
 	return -1;
 }
@@ -1415,7 +1415,7 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
 	struct acpi_device *adev, *adev_dimm;
 	struct device *dev = acpi_desc->dev;
 	unsigned long dsm_mask;
-	const u8 *uuid;
+	const guid_t *guid;
 	int i;
 	int family = -1;
 
@@ -1444,7 +1444,7 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
 	/*
 	 * Until standardization materializes we need to consider 4
 	 * different command sets.  Note, that checking for function0 (bit0)
-	 * tells us if any commands are reachable through this uuid.
+	 * tells us if any commands are reachable through this GUID.
 	 */
 	for (i = NVDIMM_FAMILY_INTEL; i <= NVDIMM_FAMILY_MSFT; i++)
 		if (acpi_check_dsm(adev_dimm->handle, to_nfit_uuid(i), 1, 1))
@@ -1474,9 +1474,9 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
 		return 0;
 	}
 
-	uuid = to_nfit_uuid(nfit_mem->family);
+	guid = to_nfit_uuid(nfit_mem->family);
 	for_each_set_bit(i, &dsm_mask, BITS_PER_LONG)
-		if (acpi_check_dsm(adev_dimm->handle, uuid, 1, 1ULL << i))
+		if (acpi_check_dsm(adev_dimm->handle, guid.b, 1, 1ULL << i))
 			set_bit(i, &nfit_mem->dsm_mask);
 
 	return 0;
@@ -1611,7 +1611,7 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
 static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc)
 {
 	struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
-	const u8 *uuid = to_nfit_uuid(NFIT_DEV_BUS);
+	const guid_t *guid = to_nfit_uuid(NFIT_DEV_BUS);
 	struct acpi_device *adev;
 	int i;
 
@@ -1621,7 +1621,7 @@ static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc)
 		return;
 
 	for (i = ND_CMD_ARS_CAP; i <= ND_CMD_CLEAR_ERROR; i++)
-		if (acpi_check_dsm(adev->handle, uuid, 1, 1ULL << i))
+		if (acpi_check_dsm(adev->handle, guid.b, 1, 1ULL << i))
 			set_bit(i, &nd_desc->cmd_mask);
 }
 
@@ -3051,19 +3051,19 @@ static __init int nfit_init(void)
 	BUILD_BUG_ON(sizeof(struct acpi_nfit_control_region) != 80);
 	BUILD_BUG_ON(sizeof(struct acpi_nfit_data_region) != 40);
 
-	acpi_str_to_uuid(UUID_VOLATILE_MEMORY, nfit_uuid[NFIT_SPA_VOLATILE]);
-	acpi_str_to_uuid(UUID_PERSISTENT_MEMORY, nfit_uuid[NFIT_SPA_PM]);
-	acpi_str_to_uuid(UUID_CONTROL_REGION, nfit_uuid[NFIT_SPA_DCR]);
-	acpi_str_to_uuid(UUID_DATA_REGION, nfit_uuid[NFIT_SPA_BDW]);
-	acpi_str_to_uuid(UUID_VOLATILE_VIRTUAL_DISK, nfit_uuid[NFIT_SPA_VDISK]);
-	acpi_str_to_uuid(UUID_VOLATILE_VIRTUAL_CD, nfit_uuid[NFIT_SPA_VCD]);
-	acpi_str_to_uuid(UUID_PERSISTENT_VIRTUAL_DISK, nfit_uuid[NFIT_SPA_PDISK]);
-	acpi_str_to_uuid(UUID_PERSISTENT_VIRTUAL_CD, nfit_uuid[NFIT_SPA_PCD]);
-	acpi_str_to_uuid(UUID_NFIT_BUS, nfit_uuid[NFIT_DEV_BUS]);
-	acpi_str_to_uuid(UUID_NFIT_DIMM, nfit_uuid[NFIT_DEV_DIMM]);
-	acpi_str_to_uuid(UUID_NFIT_DIMM_N_HPE1, nfit_uuid[NFIT_DEV_DIMM_N_HPE1]);
-	acpi_str_to_uuid(UUID_NFIT_DIMM_N_HPE2, nfit_uuid[NFIT_DEV_DIMM_N_HPE2]);
-	acpi_str_to_uuid(UUID_NFIT_DIMM_N_MSFT, nfit_uuid[NFIT_DEV_DIMM_N_MSFT]);
+	guid_parse(UUID_VOLATILE_MEMORY, &nfit_uuid[NFIT_SPA_VOLATILE]);
+	guid_parse(UUID_PERSISTENT_MEMORY, &nfit_uuid[NFIT_SPA_PM]);
+	guid_parse(UUID_CONTROL_REGION, &nfit_uuid[NFIT_SPA_DCR]);
+	guid_parse(UUID_DATA_REGION, &nfit_uuid[NFIT_SPA_BDW]);
+	guid_parse(UUID_VOLATILE_VIRTUAL_DISK, &nfit_uuid[NFIT_SPA_VDISK]);
+	guid_parse(UUID_VOLATILE_VIRTUAL_CD, &nfit_uuid[NFIT_SPA_VCD]);
+	guid_parse(UUID_PERSISTENT_VIRTUAL_DISK, &nfit_uuid[NFIT_SPA_PDISK]);
+	guid_parse(UUID_PERSISTENT_VIRTUAL_CD, &nfit_uuid[NFIT_SPA_PCD]);
+	guid_parse(UUID_NFIT_BUS, &nfit_uuid[NFIT_DEV_BUS]);
+	guid_parse(UUID_NFIT_DIMM, &nfit_uuid[NFIT_DEV_DIMM]);
+	guid_parse(UUID_NFIT_DIMM_N_HPE1, &nfit_uuid[NFIT_DEV_DIMM_N_HPE1]);
+	guid_parse(UUID_NFIT_DIMM_N_HPE2, &nfit_uuid[NFIT_DEV_DIMM_N_HPE2]);
+	guid_parse(UUID_NFIT_DIMM_N_MSFT, &nfit_uuid[NFIT_DEV_DIMM_N_MSFT]);
 
 	nfit_wq = create_singlethread_workqueue("nfit");
 	if (!nfit_wq)
diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h
index 58fb7d68e04a..29bdd959517f 100644
--- a/drivers/acpi/nfit/nfit.h
+++ b/drivers/acpi/nfit/nfit.h
@@ -18,7 +18,6 @@
 #include <linux/libnvdimm.h>
 #include <linux/ndctl.h>
 #include <linux/types.h>
-#include <linux/uuid.h>
 #include <linux/acpi.h>
 #include <acpi/acuuid.h>
 
@@ -237,7 +236,7 @@ static inline struct acpi_nfit_desc *to_acpi_desc(
 	return container_of(nd_desc, struct acpi_nfit_desc, nd_desc);
 }
 
-const u8 *to_nfit_uuid(enum nfit_uuids id);
+const guid_t *to_nfit_uuid(enum nfit_uuids id);
 int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *nfit, acpi_size sz);
 void acpi_nfit_shutdown(void *data);
 void __acpi_nfit_notify(struct device *dev, acpi_handle handle, u32 event);
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 137e4a3d89c5..b0e1636ca5c3 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -39,6 +39,7 @@
 #include <linux/dynamic_debug.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
+#include <linux/uuid.h>
 
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
-- 
cgit v1.2.3


From 1d51d5f3907abf86ef0521971bcddf5853564263 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 4 Jun 2017 14:42:22 +0200
Subject: libata: clarify log page naming / grouping

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/ata/libata-core.c | 10 +++++-----
 include/linux/ata.h       | 10 +++++++---
 2 files changed, 12 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index e79085809791..bf6b40335598 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -2226,7 +2226,7 @@ static void ata_dev_config_ncq_prio(struct ata_device *dev)
 	}
 
 	err_mask = ata_read_log_page(dev,
-				     ATA_LOG_SATA_ID_DEV_DATA,
+				     ATA_LOG_IDENTIFY_DEVICE,
 				     ATA_LOG_SATA_SETTINGS,
 				     ap->sector_buf,
 				     1);
@@ -2346,7 +2346,7 @@ static void ata_dev_config_zac(struct ata_device *dev)
 	if (!(dev->flags & ATA_DFLAG_ZAC))
 		return;
 
-	if (!ata_log_supported(dev, ATA_LOG_SATA_ID_DEV_DATA)) {
+	if (!ata_log_supported(dev, ATA_LOG_IDENTIFY_DEVICE)) {
 		ata_dev_warn(dev, "ATA Identify Device Log not supported\n");
 		return;
 	}
@@ -2355,7 +2355,7 @@ static void ata_dev_config_zac(struct ata_device *dev)
 	 * Read IDENTIFY DEVICE data log, page 0, to figure out
 	 * if page 9 is supported.
 	 */
-	err_mask = ata_read_log_page(dev, ATA_LOG_SATA_ID_DEV_DATA, 0,
+	err_mask = ata_read_log_page(dev, ATA_LOG_IDENTIFY_DEVICE, 0,
 				     identify_buf, 1);
 	if (err_mask) {
 		ata_dev_info(dev,
@@ -2379,7 +2379,7 @@ static void ata_dev_config_zac(struct ata_device *dev)
 	/*
 	 * Read IDENTIFY DEVICE data log, page 9 (Zoned-device information)
 	 */
-	err_mask = ata_read_log_page(dev, ATA_LOG_SATA_ID_DEV_DATA,
+	err_mask = ata_read_log_page(dev, ATA_LOG_IDENTIFY_DEVICE,
 				     ATA_LOG_ZONED_INFORMATION,
 				     identify_buf, 1);
 	if (!err_mask) {
@@ -2608,7 +2608,7 @@ int ata_dev_configure(struct ata_device *dev)
 
 			dev->flags |= ATA_DFLAG_DEVSLP;
 			err_mask = ata_read_log_page(dev,
-						     ATA_LOG_SATA_ID_DEV_DATA,
+						     ATA_LOG_IDENTIFY_DEVICE,
 						     ATA_LOG_SATA_SETTINGS,
 						     sata_setting,
 						     1);
diff --git a/include/linux/ata.h b/include/linux/ata.h
index 73fe18edfdaf..c14bdcf31fdb 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -336,11 +336,15 @@ enum {
 	/* READ_LOG_EXT pages */
 	ATA_LOG_DIRECTORY	= 0x0,
 	ATA_LOG_SATA_NCQ	= 0x10,
-	ATA_LOG_NCQ_NON_DATA	  = 0x12,
-	ATA_LOG_NCQ_SEND_RECV	  = 0x13,
-	ATA_LOG_SATA_ID_DEV_DATA  = 0x30,
+	ATA_LOG_NCQ_NON_DATA	= 0x12,
+	ATA_LOG_NCQ_SEND_RECV	= 0x13,
+	ATA_LOG_IDENTIFY_DEVICE	= 0x30,
+
+	/* Identify device log pages: */
 	ATA_LOG_SATA_SETTINGS	  = 0x08,
 	ATA_LOG_ZONED_INFORMATION = 0x09,
+
+	/* Identify device SATA settings log:*/
 	ATA_LOG_DEVSLP_OFFSET	  = 0x30,
 	ATA_LOG_DEVSLP_SIZE	  = 0x08,
 	ATA_LOG_DEVSLP_MDAT	  = 0x00,
-- 
cgit v1.2.3


From 818831c8b22f75353f59a63a484e20736c0567c9 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 4 Jun 2017 14:42:24 +0200
Subject: libata: implement SECURITY PROTOCOL IN/OUT

This allows us to use the generic OPAL code with ATA devices.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/ata/libata-core.c | 32 ++++++++++++++++++++
 drivers/ata/libata-scsi.c | 76 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/ata.h       |  1 +
 include/linux/libata.h    |  1 +
 4 files changed, 110 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 61c97818568c..a846c29f3248 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -2405,6 +2405,37 @@ static void ata_dev_config_zac(struct ata_device *dev)
 	}
 }
 
+static void ata_dev_config_trusted(struct ata_device *dev)
+{
+	struct ata_port *ap = dev->link->ap;
+	u64 trusted_cap;
+	unsigned int err;
+
+	if (!ata_identify_page_supported(dev, ATA_LOG_SECURITY)) {
+		ata_dev_warn(dev,
+			     "Security Log not supported\n");
+		return;
+	}
+
+	err = ata_read_log_page(dev, ATA_LOG_IDENTIFY_DEVICE, ATA_LOG_SECURITY,
+			ap->sector_buf, 1);
+	if (err) {
+		ata_dev_dbg(dev,
+			    "failed to read Security Log, Emask 0x%x\n", err);
+		return;
+	}
+
+	trusted_cap = get_unaligned_le64(&ap->sector_buf[40]);
+	if (!(trusted_cap & (1ULL << 63))) {
+		ata_dev_dbg(dev,
+			    "Trusted Computing capability qword not valid!\n");
+		return;
+	}
+
+	if (trusted_cap & (1 << 0))
+		dev->flags |= ATA_DFLAG_TRUSTED;
+}
+
 /**
  *	ata_dev_configure - Configure the specified ATA/ATAPI device
  *	@dev: Target device to configure
@@ -2629,6 +2660,7 @@ int ata_dev_configure(struct ata_device *dev)
 		}
 		ata_dev_config_sense_reporting(dev);
 		ata_dev_config_zac(dev);
+		ata_dev_config_trusted(dev);
 		dev->cdb_len = 16;
 	}
 
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index b0866f040d1f..8dc84fd77369 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -3564,6 +3564,11 @@ static unsigned int ata_scsiop_maint_in(struct ata_scsi_args *args, u8 *rbuf)
 		    dev->class == ATA_DEV_ZAC)
 			supported = 3;
 		break;
+	case SECURITY_PROTOCOL_IN:
+	case SECURITY_PROTOCOL_OUT:
+		if (dev->flags & ATA_DFLAG_TRUSTED)
+			supported = 3;
+		break;
 	default:
 		break;
 	}
@@ -4068,6 +4073,71 @@ static unsigned int ata_scsi_mode_select_xlat(struct ata_queued_cmd *qc)
 	return 1;
 }
 
+static u8 ata_scsi_trusted_op(u32 len, bool send, bool dma)
+{
+	if (len == 0)
+		return ATA_CMD_TRUSTED_NONDATA;
+	else if (send)
+		return dma ? ATA_CMD_TRUSTED_SND_DMA : ATA_CMD_TRUSTED_SND;
+	else
+		return dma ? ATA_CMD_TRUSTED_RCV_DMA : ATA_CMD_TRUSTED_RCV;
+}
+
+static unsigned int ata_scsi_security_inout_xlat(struct ata_queued_cmd *qc)
+{
+	struct scsi_cmnd *scmd = qc->scsicmd;
+	const u8 *cdb = scmd->cmnd;
+	struct ata_taskfile *tf = &qc->tf;
+	u8 secp = cdb[1];
+	bool send = (cdb[0] == SECURITY_PROTOCOL_OUT);
+	u16 spsp = get_unaligned_be16(&cdb[2]);
+	u32 len = get_unaligned_be32(&cdb[6]);
+	bool dma = !(qc->dev->flags & ATA_DFLAG_PIO);
+
+	/*
+	 * We don't support the ATA "security" protocol.
+	 */
+	if (secp == 0xef) {
+		ata_scsi_set_invalid_field(qc->dev, scmd, 1, 0);
+		return 1;
+	}
+
+	if (cdb[4] & 7) { /* INC_512 */
+		if (len > 0xffff) {
+			ata_scsi_set_invalid_field(qc->dev, scmd, 6, 0);
+			return 1;
+		}
+	} else {
+		if (len > 0x01fffe00) {
+			ata_scsi_set_invalid_field(qc->dev, scmd, 6, 0);
+			return 1;
+		}
+
+		/* convert to the sector-based ATA addressing */
+		len = (len + 511) / 512;
+	}
+
+	tf->protocol = dma ? ATA_PROT_DMA : ATA_PROT_PIO;
+	tf->flags |= ATA_TFLAG_DEVICE | ATA_TFLAG_ISADDR | ATA_TFLAG_LBA;
+	if (send)
+		tf->flags |= ATA_TFLAG_WRITE;
+	tf->command = ata_scsi_trusted_op(len, send, dma);
+	tf->feature = secp;
+	tf->lbam = spsp & 0xff;
+	tf->lbah = spsp >> 8;
+
+	if (len) {
+		tf->nsect = len & 0xff;
+		tf->lbal = len >> 8;
+	} else {
+		if (!send)
+			tf->lbah = (1 << 7);
+	}
+
+	ata_qc_set_pc_nbytes(qc);
+	return 0;
+}
+
 /**
  *	ata_get_xlat_func - check if SCSI to ATA translation is possible
  *	@dev: ATA device
@@ -4119,6 +4189,12 @@ static inline ata_xlat_func_t ata_get_xlat_func(struct ata_device *dev, u8 cmd)
 	case ZBC_OUT:
 		return ata_scsi_zbc_out_xlat;
 
+	case SECURITY_PROTOCOL_IN:
+	case SECURITY_PROTOCOL_OUT:
+		if (!(dev->flags & ATA_DFLAG_TRUSTED))
+			break;
+		return ata_scsi_security_inout_xlat;
+
 	case START_STOP:
 		return ata_scsi_start_stop_xlat;
 	}
diff --git a/include/linux/ata.h b/include/linux/ata.h
index c14bdcf31fdb..e65ae4b2ed48 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -341,6 +341,7 @@ enum {
 	ATA_LOG_IDENTIFY_DEVICE	= 0x30,
 
 	/* Identify device log pages: */
+	ATA_LOG_SECURITY	  = 0x06,
 	ATA_LOG_SATA_SETTINGS	  = 0x08,
 	ATA_LOG_ZONED_INFORMATION = 0x09,
 
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 9e6633235ad7..55de3da58b1c 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -156,6 +156,7 @@ enum {
 	ATA_DFLAG_ACPI_PENDING	= (1 << 5), /* ACPI resume action pending */
 	ATA_DFLAG_ACPI_FAILED	= (1 << 6), /* ACPI on devcfg has failed */
 	ATA_DFLAG_AN		= (1 << 7), /* AN configured */
+	ATA_DFLAG_TRUSTED	= (1 << 8), /* device supports trusted send/recv */
 	ATA_DFLAG_DMADIR	= (1 << 10), /* device requires DMADIR */
 	ATA_DFLAG_CFG_MASK	= (1 << 12) - 1,
 
-- 
cgit v1.2.3


From 844af950da946cfab227a04b950614da04cb6275 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Tue, 24 Nov 2015 19:49:23 -0800
Subject: platform/x86: wmi: Turn WMI into a bus driver
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

WMI is logically a bus: the WMI driver binds to an ACPI node (or
more than one), and each instance of the WMI driver enumerates its
children and hopes that drivers will attach to the children that are
useful.

This patch gives WMI a driver model bus type and the ability to
match to drivers. The bus itself is a device in the new "wmi_bus"
class, and all of the individual WMI devices are slotted into the
device hierarchy correctly.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Mario Limonciello <mario_limonciello@dell.com>
Cc: Pali Rohár <pali.rohar@gmail.com>
Cc: linux-kernel@vger.kernel.org
Cc: platform-driver-x86@vger.kernel.org
Cc: linux-acpi@vger.kernel.org
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Darren Hart (VMware) <dvhart@infradead.org>
---
 drivers/platform/x86/wmi.c | 197 +++++++++++++++++++++++++++++++++++----------
 include/linux/wmi.h        |  47 +++++++++++
 2 files changed, 201 insertions(+), 43 deletions(-)
 create mode 100644 include/linux/wmi.h

(limited to 'include/linux')

diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c
index faaa9a7c9c2e..f06b7c00339d 100644
--- a/drivers/platform/x86/wmi.c
+++ b/drivers/platform/x86/wmi.c
@@ -37,6 +37,7 @@
 #include <linux/acpi.h>
 #include <linux/slab.h>
 #include <linux/module.h>
+#include <linux/wmi.h>
 #include <linux/uuid.h>
 
 ACPI_MODULE_NAME("wmi");
@@ -44,8 +45,6 @@ MODULE_AUTHOR("Carlos Corbacho");
 MODULE_DESCRIPTION("ACPI-WMI Mapping Driver");
 MODULE_LICENSE("GPL");
 
-#define ACPI_WMI_CLASS "wmi"
-
 static LIST_HEAD(wmi_block_list);
 
 struct guid_block {
@@ -62,12 +61,12 @@ struct guid_block {
 };
 
 struct wmi_block {
+	struct wmi_device dev;
 	struct list_head list;
 	struct guid_block gblock;
 	struct acpi_device *acpi_device;
 	wmi_notify_handler handler;
 	void *handler_data;
-	struct device dev;
 };
 
 
@@ -102,8 +101,8 @@ static const struct acpi_device_id wmi_device_ids[] = {
 MODULE_DEVICE_TABLE(acpi, wmi_device_ids);
 
 static struct acpi_driver acpi_wmi_driver = {
-	.name = "wmi",
-	.class = ACPI_WMI_CLASS,
+	.name = "acpi-wmi",
+	.owner = THIS_MODULE,
 	.ids = wmi_device_ids,
 	.ops = {
 		.add = acpi_wmi_add,
@@ -545,77 +544,146 @@ bool wmi_has_guid(const char *guid_string)
 }
 EXPORT_SYMBOL_GPL(wmi_has_guid);
 
+static struct wmi_block *dev_to_wblock(struct device *dev)
+{
+	return container_of(dev, struct wmi_block, dev.dev);
+}
+
+static struct wmi_device *dev_to_wdev(struct device *dev)
+{
+	return container_of(dev, struct wmi_device, dev);
+}
+
 /*
  * sysfs interface
  */
 static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
 			     char *buf)
 {
-	struct wmi_block *wblock;
-
-	wblock = dev_get_drvdata(dev);
-	if (!wblock) {
-		strcat(buf, "\n");
-		return strlen(buf);
-	}
+	struct wmi_block *wblock = dev_to_wblock(dev);
 
 	return sprintf(buf, "wmi:%pUL\n", wblock->gblock.guid);
 }
 static DEVICE_ATTR_RO(modalias);
 
+static ssize_t guid_show(struct device *dev, struct device_attribute *attr,
+			 char *buf)
+{
+	struct wmi_block *wblock = dev_to_wblock(dev);
+
+	return sprintf(buf, "%pUL\n", wblock->gblock.guid);
+}
+static DEVICE_ATTR_RO(guid);
+
 static struct attribute *wmi_attrs[] = {
 	&dev_attr_modalias.attr,
+	&dev_attr_guid.attr,
 	NULL,
 };
 ATTRIBUTE_GROUPS(wmi);
 
 static int wmi_dev_uevent(struct device *dev, struct kobj_uevent_env *env)
 {
-	char guid_string[37];
-
-	struct wmi_block *wblock;
+	struct wmi_block *wblock = dev_to_wblock(dev);
 
-	if (add_uevent_var(env, "MODALIAS="))
+	if (add_uevent_var(env, "MODALIAS=wmi:%pUL", wblock->gblock.guid))
 		return -ENOMEM;
 
-	wblock = dev_get_drvdata(dev);
-	if (!wblock)
+	if (add_uevent_var(env, "WMI_GUID=%pUL", wblock->gblock.guid))
 		return -ENOMEM;
 
-	sprintf(guid_string, "%pUL", wblock->gblock.guid);
+	return 0;
+}
+
+static void wmi_dev_release(struct device *dev)
+{
+	struct wmi_block *wblock = dev_to_wblock(dev);
+
+	kfree(wblock);
+}
+
+static int wmi_dev_match(struct device *dev, struct device_driver *driver)
+{
+	struct wmi_driver *wmi_driver =
+		container_of(driver, struct wmi_driver, driver);
+	struct wmi_block *wblock = dev_to_wblock(dev);
+	const struct wmi_device_id *id = wmi_driver->id_table;
 
-	strcpy(&env->buf[env->buflen - 1], "wmi:");
-	memcpy(&env->buf[env->buflen - 1 + 4], guid_string, 36);
-	env->buflen += 40;
+	while (id->guid_string) {
+		uuid_le driver_guid;
+
+		if (WARN_ON(uuid_le_to_bin(id->guid_string, &driver_guid)))
+			continue;
+		if (!memcmp(&driver_guid, wblock->gblock.guid, 16))
+			return 1;
+
+		id++;
+	}
 
 	return 0;
 }
 
-static void wmi_dev_free(struct device *dev)
+static int wmi_dev_probe(struct device *dev)
 {
-	struct wmi_block *wmi_block = container_of(dev, struct wmi_block, dev);
+	struct wmi_block *wblock = dev_to_wblock(dev);
+	struct wmi_driver *wdriver =
+		container_of(dev->driver, struct wmi_driver, driver);
+	int ret = 0;
+
+	if (ACPI_FAILURE(wmi_method_enable(wblock, 1)))
+		dev_warn(dev, "failed to enable device -- probing anyway\n");
+
+	if (wdriver->probe) {
+		ret = wdriver->probe(dev_to_wdev(dev));
+		if (ret != 0 && ACPI_FAILURE(wmi_method_enable(wblock, 0)))
+			dev_warn(dev, "failed to disable device\n");
+	}
+
+	return ret;
+}
+
+static int wmi_dev_remove(struct device *dev)
+{
+	struct wmi_block *wblock = dev_to_wblock(dev);
+	struct wmi_driver *wdriver =
+		container_of(dev->driver, struct wmi_driver, driver);
+	int ret = 0;
+
+	if (wdriver->remove)
+		ret = wdriver->remove(dev_to_wdev(dev));
+
+	if (ACPI_FAILURE(wmi_method_enable(wblock, 0)))
+		dev_warn(dev, "failed to disable device\n");
 
-	kfree(wmi_block);
+	return ret;
 }
 
-static struct class wmi_class = {
+static struct class wmi_bus_class = {
+	.name = "wmi_bus",
+};
+
+static struct bus_type wmi_bus_type = {
 	.name = "wmi",
-	.dev_release = wmi_dev_free,
-	.dev_uevent = wmi_dev_uevent,
 	.dev_groups = wmi_groups,
+	.match = wmi_dev_match,
+	.uevent = wmi_dev_uevent,
+	.probe = wmi_dev_probe,
+	.remove = wmi_dev_remove,
 };
 
-static int wmi_create_device(const struct guid_block *gblock,
+static int wmi_create_device(struct device *wmi_bus_dev,
+			     const struct guid_block *gblock,
 			     struct wmi_block *wblock,
 			     struct acpi_device *device)
 {
-	wblock->dev.class = &wmi_class;
+	wblock->dev.dev.bus = &wmi_bus_type;
+	wblock->dev.dev.parent = wmi_bus_dev;
 
-	dev_set_name(&wblock->dev, "%pUL", gblock->guid);
+	dev_set_name(&wblock->dev.dev, "%pUL", gblock->guid);
 
-	dev_set_drvdata(&wblock->dev, wblock);
+	wblock->dev.dev.release = wmi_dev_release;
 
-	return device_register(&wblock->dev);
+	return device_register(&wblock->dev.dev);
 }
 
 static void wmi_free_devices(struct acpi_device *device)
@@ -626,8 +694,8 @@ static void wmi_free_devices(struct acpi_device *device)
 	list_for_each_entry_safe(wblock, next, &wmi_block_list, list) {
 		if (wblock->acpi_device == device) {
 			list_del(&wblock->list);
-			if (wblock->dev.class)
-				device_unregister(&wblock->dev);
+			if (wblock->dev.dev.bus)
+				device_unregister(&wblock->dev.dev);
 			else
 				kfree(wblock);
 		}
@@ -659,7 +727,7 @@ static bool guid_already_parsed(struct acpi_device *device,
 /*
  * Parse the _WDG method for the GUID data blocks
  */
-static int parse_wdg(struct acpi_device *device)
+static int parse_wdg(struct device *wmi_bus_dev, struct acpi_device *device)
 {
 	struct acpi_buffer out = {ACPI_ALLOCATE_BUFFER, NULL};
 	union acpi_object *obj;
@@ -703,7 +771,8 @@ static int parse_wdg(struct acpi_device *device)
 		  for device creation.
 		*/
 		if (!guid_already_parsed(device, gblock[i].guid)) {
-			retval = wmi_create_device(&gblock[i], wblock, device);
+			retval = wmi_create_device(wmi_bus_dev, &gblock[i],
+						   wblock, device);
 			if (retval) {
 				wmi_free_devices(device);
 				goto out_free_pointer;
@@ -803,12 +872,15 @@ static int acpi_wmi_remove(struct acpi_device *device)
 	acpi_remove_address_space_handler(device->handle,
 				ACPI_ADR_SPACE_EC, &acpi_wmi_ec_space_handler);
 	wmi_free_devices(device);
+	device_unregister((struct device *)acpi_driver_data(device));
+	device->driver_data = NULL;
 
 	return 0;
 }
 
 static int acpi_wmi_add(struct acpi_device *device)
 {
+	struct device *wmi_bus_dev;
 	acpi_status status;
 	int error;
 
@@ -821,14 +893,25 @@ static int acpi_wmi_add(struct acpi_device *device)
 		return -ENODEV;
 	}
 
-	error = parse_wdg(device);
+	wmi_bus_dev = device_create(&wmi_bus_class, &device->dev, MKDEV(0, 0),
+				    NULL, "wmi_bus-%s", dev_name(&device->dev));
+	if (IS_ERR(wmi_bus_dev)) {
+		error = PTR_ERR(wmi_bus_dev);
+		goto err_remove_handler;
+	}
+	device->driver_data = wmi_bus_dev;
+
+	error = parse_wdg(wmi_bus_dev, device);
 	if (error) {
 		pr_err("Failed to parse WDG method\n");
-		goto err_remove_handler;
+		goto err_remove_busdev;
 	}
 
 	return 0;
 
+err_remove_busdev:
+	device_unregister(wmi_bus_dev);
+
 err_remove_handler:
 	acpi_remove_address_space_handler(device->handle,
 					  ACPI_ADR_SPACE_EC,
@@ -837,6 +920,22 @@ err_remove_handler:
 	return error;
 }
 
+int __must_check __wmi_driver_register(struct wmi_driver *driver,
+				       struct module *owner)
+{
+	driver->driver.owner = owner;
+	driver->driver.bus = &wmi_bus_type;
+
+	return driver_register(&driver->driver);
+}
+EXPORT_SYMBOL(__wmi_driver_register);
+
+void wmi_driver_unregister(struct wmi_driver *driver)
+{
+	driver_unregister(&driver->driver);
+}
+EXPORT_SYMBOL(wmi_driver_unregister);
+
 static int __init acpi_wmi_init(void)
 {
 	int error;
@@ -844,24 +943,36 @@ static int __init acpi_wmi_init(void)
 	if (acpi_disabled)
 		return -ENODEV;
 
-	error = class_register(&wmi_class);
+	error = class_register(&wmi_bus_class);
 	if (error)
 		return error;
 
+	error = bus_register(&wmi_bus_type);
+	if (error)
+		goto err_unreg_class;
+
 	error = acpi_bus_register_driver(&acpi_wmi_driver);
 	if (error) {
 		pr_err("Error loading mapper\n");
-		class_unregister(&wmi_class);
-		return error;
+		goto err_unreg_bus;
 	}
 
 	return 0;
+
+err_unreg_class:
+	class_unregister(&wmi_bus_class);
+
+err_unreg_bus:
+	bus_unregister(&wmi_bus_type);
+
+	return error;
 }
 
 static void __exit acpi_wmi_exit(void)
 {
 	acpi_bus_unregister_driver(&acpi_wmi_driver);
-	class_unregister(&wmi_class);
+	class_unregister(&wmi_bus_class);
+	bus_unregister(&wmi_bus_type);
 }
 
 subsys_initcall(acpi_wmi_init);
diff --git a/include/linux/wmi.h b/include/linux/wmi.h
new file mode 100644
index 000000000000..29ed34b4dae1
--- /dev/null
+++ b/include/linux/wmi.h
@@ -0,0 +1,47 @@
+/*
+ * wmi.h - ACPI WMI interface
+ *
+ * Copyright (c) 2015 Andrew Lutomirski
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#ifndef _LINUX_WMI_H
+#define _LINUX_WMI_H
+
+#include <linux/device.h>
+#include <linux/acpi.h>
+
+struct wmi_device {
+	struct device dev;
+};
+
+struct wmi_device_id {
+	const char *guid_string;
+};
+
+struct wmi_driver {
+	struct device_driver driver;
+	const struct wmi_device_id *id_table;
+
+	int (*probe)(struct wmi_device *wdev);
+	int (*remove)(struct wmi_device *wdev);
+};
+
+extern int __must_check __wmi_driver_register(struct wmi_driver *driver,
+					      struct module *owner);
+extern void wmi_driver_unregister(struct wmi_driver *driver);
+#define wmi_driver_register(driver) __wmi_driver_register((driver), THIS_MODULE)
+
+#define module_wmi_driver(__wmi_driver) \
+	module_driver(__wmi_driver, wmi_driver_register, \
+		      wmi_driver_unregister)
+
+#endif
-- 
cgit v1.2.3


From d4fc91adfde11c41295d1cf001bdbec5d6879016 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Wed, 25 Nov 2015 14:03:43 -0800
Subject: platform/x86: wmi: Probe data objects for read and write capabilities
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Dell XPS 13 9350 has one RW data object, one RO data object, and one
totally inaccessible data object. Check for the existence of the
accessor methods and report in sysfs.

The docs also permit WQxx getters for single-instance objects to
take no parameters. Probe for that as well to avoid ACPICA warnings
about mismatched signatures.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Mario Limonciello <mario_limonciello@dell.com>
Cc: Pali Rohár <pali.rohar@gmail.com>
Cc: linux-kernel@vger.kernel.org
Cc: platform-driver-x86@vger.kernel.org
Cc: linux-acpi@vger.kernel.org
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Darren Hart (VMware) <dvhart@infradead.org>
---
 drivers/platform/x86/wmi.c | 101 +++++++++++++++++++++++++++++++++++++++++++--
 include/linux/wmi.h        |   6 +++
 2 files changed, 103 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c
index 33a3609d54db..651693a5e0ea 100644
--- a/drivers/platform/x86/wmi.c
+++ b/drivers/platform/x86/wmi.c
@@ -67,6 +67,8 @@ struct wmi_block {
 	struct acpi_device *acpi_device;
 	wmi_notify_handler handler;
 	void *handler_data;
+
+	bool read_takes_no_args;	/* only defined if readable */
 };
 
 
@@ -138,6 +140,30 @@ static bool find_guid(const char *guid_string, struct wmi_block **out)
 	return false;
 }
 
+static int get_subobj_info(acpi_handle handle, const char *pathname,
+			   struct acpi_device_info **info)
+{
+	struct acpi_device_info *dummy_info, **info_ptr;
+	acpi_handle subobj_handle;
+	acpi_status status;
+
+	status = acpi_get_handle(handle, (char *)pathname, &subobj_handle);
+	if (status == AE_NOT_FOUND)
+		return -ENOENT;
+	else if (ACPI_FAILURE(status))
+		return -EIO;
+
+	info_ptr = info ? info : &dummy_info;
+	status = acpi_get_object_info(subobj_handle, info_ptr);
+	if (ACPI_FAILURE(status))
+		return -EIO;
+
+	if (!info)
+		kfree(dummy_info);
+
+	return 0;
+}
+
 static acpi_status wmi_method_enable(struct wmi_block *wblock, int enable)
 {
 	struct guid_block *block = NULL;
@@ -261,6 +287,9 @@ struct acpi_buffer *out)
 	wq_params[0].type = ACPI_TYPE_INTEGER;
 	wq_params[0].integer.value = instance;
 
+	if (instance == 0 && wblock->read_takes_no_args)
+		input.count = 0;
+
 	/*
 	 * If ACPI_WMI_EXPENSIVE, call the relevant WCxx method first to
 	 * enable collection.
@@ -628,11 +657,37 @@ static ssize_t object_id_show(struct device *dev, struct device_attribute *attr,
 }
 static DEVICE_ATTR_RO(object_id);
 
-static struct attribute *wmi_data_or_method_attrs[] = {
+static ssize_t readable_show(struct device *dev, struct device_attribute *attr,
+			     char *buf)
+{
+	struct wmi_device *wdev = dev_to_wdev(dev);
+
+	return sprintf(buf, "%d\n", (int)wdev->readable);
+}
+static DEVICE_ATTR_RO(readable);
+
+static ssize_t writeable_show(struct device *dev, struct device_attribute *attr,
+			      char *buf)
+{
+	struct wmi_device *wdev = dev_to_wdev(dev);
+
+	return sprintf(buf, "%d\n", (int)wdev->writeable);
+}
+static DEVICE_ATTR_RO(writeable);
+
+static struct attribute *wmi_data_attrs[] = {
+	&dev_attr_object_id.attr,
+	&dev_attr_readable.attr,
+	&dev_attr_writeable.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(wmi_data);
+
+static struct attribute *wmi_method_attrs[] = {
 	&dev_attr_object_id.attr,
 	NULL,
 };
-ATTRIBUTE_GROUPS(wmi_data_or_method);
+ATTRIBUTE_GROUPS(wmi_method);
 
 static int wmi_dev_uevent(struct device *dev, struct kobj_uevent_env *env)
 {
@@ -731,13 +786,13 @@ static struct device_type wmi_type_event = {
 
 static struct device_type wmi_type_method = {
 	.name = "method",
-	.groups = wmi_data_or_method_groups,
+	.groups = wmi_method_groups,
 	.release = wmi_dev_release,
 };
 
 static struct device_type wmi_type_data = {
 	.name = "data",
-	.groups = wmi_data_or_method_groups,
+	.groups = wmi_data_groups,
 	.release = wmi_dev_release,
 };
 
@@ -756,7 +811,45 @@ static int wmi_create_device(struct device *wmi_bus_dev,
 	} else if (gblock->flags & ACPI_WMI_METHOD) {
 		wblock->dev.dev.type = &wmi_type_method;
 	} else {
+		struct acpi_device_info *info;
+		char method[5];
+		int result;
+
 		wblock->dev.dev.type = &wmi_type_data;
+
+		strcpy(method, "WQ");
+		strncat(method, wblock->gblock.object_id, 2);
+		result = get_subobj_info(device->handle, method, &info);
+
+		if (result == 0) {
+			wblock->dev.readable = true;
+
+			/*
+			 * The Microsoft documentation specifically states:
+			 *
+			 *   Data blocks registered with only a single instance
+			 *   can ignore the parameter.
+			 *
+			 * ACPICA will get mad at us if we call the method
+			 * with the wrong number of arguments, so check what
+			 * our method expects.  (On some Dell laptops, WQxx
+			 * may not be a method at all.)
+			 */
+			if (info->type != ACPI_TYPE_METHOD ||
+			    info->param_count == 0)
+				wblock->read_takes_no_args = true;
+
+			kfree(info);
+		}
+
+		strcpy(method, "WS");
+		strncat(method, wblock->gblock.object_id, 2);
+		result = get_subobj_info(device->handle, method, NULL);
+
+		if (result == 0) {
+			wblock->dev.writeable = true;
+		}
+
 	}
 
 	return device_register(&wblock->dev.dev);
diff --git a/include/linux/wmi.h b/include/linux/wmi.h
index 29ed34b4dae1..53095006821e 100644
--- a/include/linux/wmi.h
+++ b/include/linux/wmi.h
@@ -21,6 +21,12 @@
 
 struct wmi_device {
 	struct device dev;
+
+	/*
+	 * These are true for data objects that support reads and writes,
+	 * respectively.
+	 */
+	bool readable, writeable;
 };
 
 struct wmi_device_id {
-- 
cgit v1.2.3


From 1686f5444546c3b53547aa8736afcf05833ed31a Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Wed, 25 Nov 2015 17:33:25 -0800
Subject: platform/x86: wmi: Incorporate acpi_install_notify_handler
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As a platform driver, acpi_driver.notify will not be available,
so use acpi_install_notify_handler as we will be converting to a
platform driver.

This gives event drivers a simple way to handle events. It
also seems closer to what the Windows docs suggest that Windows
does: it sounds like, in Windows, the mapper is responsible for
called _WED before dispatching to the subdriver.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
[dvhart: merge two development commits and update commit message]
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Mario Limonciello <mario_limonciello@dell.com>
Cc: Pali Rohár <pali.rohar@gmail.com>
Cc: linux-kernel@vger.kernel.org
Cc: platform-driver-x86@vger.kernel.org
Cc: linux-acpi@vger.kernel.org
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Darren Hart (VMware) <dvhart@infradead.org>
---
 drivers/platform/x86/wmi.c | 89 +++++++++++++++++++++++++++++++++++++---------
 include/linux/wmi.h        |  1 +
 2 files changed, 73 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c
index fbce8765e222..4395d83ba9cc 100644
--- a/drivers/platform/x86/wmi.c
+++ b/drivers/platform/x86/wmi.c
@@ -93,7 +93,6 @@ MODULE_PARM_DESC(debug_dump_wdg,
 
 static int acpi_wmi_remove(struct acpi_device *device);
 static int acpi_wmi_add(struct acpi_device *device);
-static void acpi_wmi_notify(struct acpi_device *device, u32 event);
 
 static const struct acpi_device_id wmi_device_ids[] = {
 	{"PNP0C14", 0},
@@ -109,7 +108,6 @@ static struct acpi_driver acpi_wmi_driver = {
 	.ops = {
 		.add = acpi_wmi_add,
 		.remove = acpi_wmi_remove,
-		.notify = acpi_wmi_notify,
 	},
 };
 
@@ -1019,36 +1017,80 @@ acpi_wmi_ec_space_handler(u32 function, acpi_physical_address address,
 	}
 }
 
-static void acpi_wmi_notify(struct acpi_device *device, u32 event)
+static void acpi_wmi_notify_handler(acpi_handle handle, u32 event,
+				    void *context)
 {
 	struct guid_block *block;
 	struct wmi_block *wblock;
 	struct list_head *p;
+	bool found_it = false;
 
 	list_for_each(p, &wmi_block_list) {
 		wblock = list_entry(p, struct wmi_block, list);
 		block = &wblock->gblock;
 
-		if (wblock->acpi_device == device &&
+		if (wblock->acpi_device->handle == handle &&
 		    (block->flags & ACPI_WMI_EVENT) &&
-		    (block->notify_id == event)) {
-			if (wblock->handler)
-				wblock->handler(event, wblock->handler_data);
-			if (debug_event) {
-				pr_info("DEBUG Event GUID: %pUL\n",
-					wblock->gblock.guid);
-			}
-
-			acpi_bus_generate_netlink_event(
-				device->pnp.device_class, dev_name(&device->dev),
-				event, 0);
+		    (block->notify_id == event))
+		{
+			found_it = true;
 			break;
 		}
 	}
+
+	if (!found_it)
+		return;
+
+	/* If a driver is bound, then notify the driver. */
+	if (wblock->dev.dev.driver) {
+		struct wmi_driver *driver;
+		struct acpi_object_list input;
+		union acpi_object params[1];
+		struct acpi_buffer evdata = { ACPI_ALLOCATE_BUFFER, NULL };
+		acpi_status status;
+
+		driver = container_of(wblock->dev.dev.driver,
+				      struct wmi_driver, driver);
+
+		input.count = 1;
+		input.pointer = params;
+		params[0].type = ACPI_TYPE_INTEGER;
+		params[0].integer.value = event;
+
+		status = acpi_evaluate_object(wblock->acpi_device->handle,
+					      "_WED", &input, &evdata);
+		if (ACPI_FAILURE(status)) {
+			dev_warn(&wblock->dev.dev,
+				 "failed to get event data\n");
+			return;
+		}
+
+		if (driver->notify)
+			driver->notify(&wblock->dev,
+				       (union acpi_object *)evdata.pointer);
+
+		kfree(evdata.pointer);
+	} else if (wblock->handler) {
+		/* Legacy handler */
+		wblock->handler(event, wblock->handler_data);
+	}
+
+	if (debug_event) {
+		pr_info("DEBUG Event GUID: %pUL\n",
+			wblock->gblock.guid);
+	}
+
+	acpi_bus_generate_netlink_event(
+		wblock->acpi_device->pnp.device_class,
+		dev_name(&wblock->dev.dev),
+		event, 0);
+
 }
 
 static int acpi_wmi_remove(struct acpi_device *device)
 {
+	acpi_remove_notify_handler(device->handle, ACPI_DEVICE_NOTIFY,
+				   acpi_wmi_notify_handler);
 	acpi_remove_address_space_handler(device->handle,
 				ACPI_ADR_SPACE_EC, &acpi_wmi_ec_space_handler);
 	wmi_free_devices(device);
@@ -1073,11 +1115,20 @@ static int acpi_wmi_add(struct acpi_device *device)
 		return -ENODEV;
 	}
 
+	status = acpi_install_notify_handler(device->handle, ACPI_DEVICE_NOTIFY,
+					     acpi_wmi_notify_handler,
+					     NULL);
+	if (ACPI_FAILURE(status)) {
+		dev_err(&device->dev, "Error installing notify handler\n");
+		error = -ENODEV;
+		goto err_remove_ec_handler;
+	}
+
 	wmi_bus_dev = device_create(&wmi_bus_class, &device->dev, MKDEV(0, 0),
 				    NULL, "wmi_bus-%s", dev_name(&device->dev));
 	if (IS_ERR(wmi_bus_dev)) {
 		error = PTR_ERR(wmi_bus_dev);
-		goto err_remove_handler;
+		goto err_remove_notify_handler;
 	}
 	device->driver_data = wmi_bus_dev;
 
@@ -1092,7 +1143,11 @@ static int acpi_wmi_add(struct acpi_device *device)
 err_remove_busdev:
 	device_unregister(wmi_bus_dev);
 
-err_remove_handler:
+err_remove_notify_handler:
+	acpi_remove_notify_handler(device->handle, ACPI_DEVICE_NOTIFY,
+				   acpi_wmi_notify_handler);
+
+err_remove_ec_handler:
 	acpi_remove_address_space_handler(device->handle,
 					  ACPI_ADR_SPACE_EC,
 					  &acpi_wmi_ec_space_handler);
diff --git a/include/linux/wmi.h b/include/linux/wmi.h
index 53095006821e..c6eedfd94e7d 100644
--- a/include/linux/wmi.h
+++ b/include/linux/wmi.h
@@ -39,6 +39,7 @@ struct wmi_driver {
 
 	int (*probe)(struct wmi_device *wdev);
 	int (*remove)(struct wmi_device *wdev);
+	void (*notify)(struct wmi_device *device, union acpi_object *data);
 };
 
 extern int __must_check __wmi_driver_register(struct wmi_driver *driver,
-- 
cgit v1.2.3


From 56a370259db4f6204d3514431d1629e0a7135b53 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Wed, 25 Nov 2015 18:19:26 -0800
Subject: platform/x86: wmi: Add a new interface to read block data
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

wmi_query_block is unnecessarily indirect. Add a straightforward
method for wmi bus drivers to use to read block data.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Mario Limonciello <mario_limonciello@dell.com>
Cc: Pali Rohár <pali.rohar@gmail.com>
Cc: linux-kernel@vger.kernel.org
Cc: platform-driver-x86@vger.kernel.org
Cc: linux-acpi@vger.kernel.org
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Darren Hart (VMware) <dvhart@infradead.org>
---
 drivers/platform/x86/wmi.c | 54 ++++++++++++++++++++++++++++++++--------------
 include/linux/wmi.h        |  4 ++++
 2 files changed, 42 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c
index 4395d83ba9cc..f8fdd48b78c4 100644
--- a/drivers/platform/x86/wmi.c
+++ b/drivers/platform/x86/wmi.c
@@ -244,19 +244,10 @@ u32 method_id, const struct acpi_buffer *in, struct acpi_buffer *out)
 }
 EXPORT_SYMBOL_GPL(wmi_evaluate_method);
 
-/**
- * wmi_query_block - Return contents of a WMI block
- * @guid_string: 36 char string of the form fa50ff2b-f2e8-45de-83fa-65417f2f49ba
- * @instance: Instance index
- * &out: Empty buffer to return the contents of the data block to
- *
- * Return the contents of an ACPI-WMI data block to a buffer
- */
-acpi_status wmi_query_block(const char *guid_string, u8 instance,
-struct acpi_buffer *out)
+static acpi_status __query_block(struct wmi_block *wblock, u8 instance,
+				 struct acpi_buffer *out)
 {
 	struct guid_block *block = NULL;
-	struct wmi_block *wblock = NULL;
 	acpi_handle handle;
 	acpi_status status, wc_status = AE_ERROR;
 	struct acpi_object_list input;
@@ -264,12 +255,9 @@ struct acpi_buffer *out)
 	char method[5];
 	char wc_method[5] = "WC";
 
-	if (!guid_string || !out)
+	if (!out)
 		return AE_BAD_PARAMETER;
 
-	if (!find_guid(guid_string, &wblock))
-		return AE_ERROR;
-
 	block = &wblock->gblock;
 	handle = wblock->acpi_device->handle;
 
@@ -320,8 +308,42 @@ struct acpi_buffer *out)
 
 	return status;
 }
+
+/**
+ * wmi_query_block - Return contents of a WMI block (deprecated)
+ * @guid_string: 36 char string of the form fa50ff2b-f2e8-45de-83fa-65417f2f49ba
+ * @instance: Instance index
+ * &out: Empty buffer to return the contents of the data block to
+ *
+ * Return the contents of an ACPI-WMI data block to a buffer
+ */
+acpi_status wmi_query_block(const char *guid_string, u8 instance,
+			    struct acpi_buffer *out)
+{
+	struct wmi_block *wblock;
+
+	if (!guid_string)
+		return AE_BAD_PARAMETER;
+
+	if (!find_guid(guid_string, &wblock))
+		return AE_ERROR;
+
+	return __query_block(wblock, instance, out);
+}
 EXPORT_SYMBOL_GPL(wmi_query_block);
 
+union acpi_object *wmidev_block_query(struct wmi_device *wdev, u8 instance)
+{
+	struct acpi_buffer out = { ACPI_ALLOCATE_BUFFER, NULL };
+	struct wmi_block *wblock = container_of(wdev, struct wmi_block, dev);
+
+	if (ACPI_FAILURE(__query_block(wblock, instance, &out)))
+		return NULL;
+
+	return (union acpi_object *)out.pointer;
+}
+EXPORT_SYMBOL_GPL(wmidev_block_query);
+
 /**
  * wmi_set_block - Write to a WMI block
  * @guid_string: 36 char string of the form fa50ff2b-f2e8-45de-83fa-65417f2f49ba
@@ -331,7 +353,7 @@ EXPORT_SYMBOL_GPL(wmi_query_block);
  * Write the contents of the input buffer to an ACPI-WMI data block
  */
 acpi_status wmi_set_block(const char *guid_string, u8 instance,
-const struct acpi_buffer *in)
+			  const struct acpi_buffer *in)
 {
 	struct guid_block *block = NULL;
 	struct wmi_block *wblock = NULL;
diff --git a/include/linux/wmi.h b/include/linux/wmi.h
index c6eedfd94e7d..0ab254019488 100644
--- a/include/linux/wmi.h
+++ b/include/linux/wmi.h
@@ -29,6 +29,10 @@ struct wmi_device {
 	bool readable, writeable;
 };
 
+/* Caller must kfree the result. */
+extern union acpi_object *wmidev_block_query(struct wmi_device *wdev,
+					     u8 instance);
+
 struct wmi_device_id {
 	const char *guid_string;
 };
-- 
cgit v1.2.3


From f63019861cd1192e546397b13f926876a93450fd Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Tue, 29 Dec 2015 22:53:51 -0800
Subject: platform/x86: wmi: Add an interface for subdrivers to access sibling
 devices
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some subdrivers need to access sibling devices. This gives them a
clean way to do so.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Mario Limonciello <mario_limonciello@dell.com>
Cc: Pali Rohár <pali.rohar@gmail.com>
Cc: linux-kernel@vger.kernel.org
Cc: platform-driver-x86@vger.kernel.org
Cc: linux-acpi@vger.kernel.org
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Darren Hart (VMware) <dvhart@infradead.org>
---
 drivers/platform/x86/wmi.c | 17 +++++++++++++++++
 include/linux/wmi.h        |  4 ++++
 2 files changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c
index 52452ec5099b..250d7b2398b4 100644
--- a/drivers/platform/x86/wmi.c
+++ b/drivers/platform/x86/wmi.c
@@ -344,6 +344,23 @@ union acpi_object *wmidev_block_query(struct wmi_device *wdev, u8 instance)
 }
 EXPORT_SYMBOL_GPL(wmidev_block_query);
 
+struct wmi_device *wmidev_get_other_guid(struct wmi_device *wdev,
+					 const char *guid_string)
+{
+	struct wmi_block *this_wb = container_of(wdev, struct wmi_block, dev);
+	struct wmi_block *other_wb;
+
+	if (!find_guid(guid_string, &other_wb))
+		return NULL;
+
+	if (other_wb->acpi_device != this_wb->acpi_device)
+		return NULL;
+
+	get_device(&other_wb->dev.dev);
+	return &other_wb->dev;
+}
+EXPORT_SYMBOL_GPL(wmidev_get_other_guid);
+
 /**
  * wmi_set_block - Write to a WMI block
  * @guid_string: 36 char string of the form fa50ff2b-f2e8-45de-83fa-65417f2f49ba
diff --git a/include/linux/wmi.h b/include/linux/wmi.h
index 0ab254019488..a283768afb7e 100644
--- a/include/linux/wmi.h
+++ b/include/linux/wmi.h
@@ -33,6 +33,10 @@ struct wmi_device {
 extern union acpi_object *wmidev_block_query(struct wmi_device *wdev,
 					     u8 instance);
 
+/* Gets another device on the same bus.  Caller must put_device the result. */
+extern struct wmi_device *wmidev_get_other_guid(struct wmi_device *wdev,
+						const char *guid_string);
+
 struct wmi_device_id {
 	const char *guid_string;
 };
-- 
cgit v1.2.3


From fd70da6a6267c91fbdda9c560f098cfd52fba00f Mon Sep 17 00:00:00 2001
From: "Darren Hart (VMware)" <dvhart@infradead.org>
Date: Fri, 19 May 2017 19:28:36 -0700
Subject: platform/x86: wmi: Require query for data blocks, rename writable to
 setable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Microsoft WMI documentation requires all data blocks to implement
the Query Control Method (WQxx). If we encounter a data block not
implementing this control method, issue a warning, and ignore the data
block. Remove the "readable" attribute as all data blocks must be
readable (query-able).

Be consistent with the language in the documentation, replace the
"writable" attribute with "setable".

Simplify (flatten) the control flow of wmi_create_device a bit while
we are updating it for the above changes.

Signed-off-by: Darren Hart (VMware) <dvhart@infradead.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Mario Limonciello <mario_limonciello@dell.com>
Cc: Pali Rohár <pali.rohar@gmail.com>
Cc: linux-kernel@vger.kernel.org
Cc: platform-driver-x86@vger.kernel.org
Cc: linux-acpi@vger.kernel.org
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/platform/x86/wmi.c | 117 +++++++++++++++++++++++----------------------
 include/linux/wmi.h        |   7 +--
 2 files changed, 63 insertions(+), 61 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c
index 250d7b2398b4..37f6651d1bf7 100644
--- a/drivers/platform/x86/wmi.c
+++ b/drivers/platform/x86/wmi.c
@@ -69,7 +69,7 @@ struct wmi_block {
 	wmi_notify_handler handler;
 	void *handler_data;
 
-	bool read_takes_no_args;	/* only defined if readable */
+	bool read_takes_no_args;
 };
 
 
@@ -694,28 +694,18 @@ static ssize_t object_id_show(struct device *dev, struct device_attribute *attr,
 }
 static DEVICE_ATTR_RO(object_id);
 
-static ssize_t readable_show(struct device *dev, struct device_attribute *attr,
-			     char *buf)
+static ssize_t setable_show(struct device *dev, struct device_attribute *attr,
+			    char *buf)
 {
 	struct wmi_device *wdev = dev_to_wdev(dev);
 
-	return sprintf(buf, "%d\n", (int)wdev->readable);
+	return sprintf(buf, "%d\n", (int)wdev->setable);
 }
-static DEVICE_ATTR_RO(readable);
-
-static ssize_t writeable_show(struct device *dev, struct device_attribute *attr,
-			      char *buf)
-{
-	struct wmi_device *wdev = dev_to_wdev(dev);
-
-	return sprintf(buf, "%d\n", (int)wdev->writeable);
-}
-static DEVICE_ATTR_RO(writeable);
+static DEVICE_ATTR_RO(setable);
 
 static struct attribute *wmi_data_attrs[] = {
 	&dev_attr_object_id.attr,
-	&dev_attr_readable.attr,
-	&dev_attr_writeable.attr,
+	&dev_attr_setable.attr,
 	NULL,
 };
 ATTRIBUTE_GROUPS(wmi_data);
@@ -833,63 +823,74 @@ static struct device_type wmi_type_data = {
 	.release = wmi_dev_release,
 };
 
-static void wmi_create_device(struct device *wmi_bus_dev,
+static int wmi_create_device(struct device *wmi_bus_dev,
 			     const struct guid_block *gblock,
 			     struct wmi_block *wblock,
 			     struct acpi_device *device)
 {
-	wblock->dev.dev.bus = &wmi_bus_type;
-	wblock->dev.dev.parent = wmi_bus_dev;
-
-	dev_set_name(&wblock->dev.dev, "%pUL", gblock->guid);
+	struct acpi_device_info *info;
+	char method[5];
+	int result;
 
 	if (gblock->flags & ACPI_WMI_EVENT) {
 		wblock->dev.dev.type = &wmi_type_event;
-	} else if (gblock->flags & ACPI_WMI_METHOD) {
+		goto out_init;
+	}
+
+	if (gblock->flags & ACPI_WMI_METHOD) {
 		wblock->dev.dev.type = &wmi_type_method;
-	} else {
-		struct acpi_device_info *info;
-		char method[5];
-		int result;
+		goto out_init;
+	}
 
-		wblock->dev.dev.type = &wmi_type_data;
+	/*
+	 * Data Block Query Control Method (WQxx by convention) is
+	 * required per the WMI documentation. If it is not present,
+	 * we ignore this data block.
+	 */
+	strcpy(method, "WQ");
+	strncat(method, wblock->gblock.object_id, 2);
+	result = get_subobj_info(device->handle, method, &info);
+
+	if (result) {
+		dev_warn(wmi_bus_dev,
+			 "%s data block query control method not found",
+			 method);
+		return result;
+	}
 
-		strcpy(method, "WQ");
-		strncat(method, wblock->gblock.object_id, 2);
-		result = get_subobj_info(device->handle, method, &info);
+	wblock->dev.dev.type = &wmi_type_data;
 
-		if (result == 0) {
-			wblock->dev.readable = true;
+	/*
+	 * The Microsoft documentation specifically states:
+	 *
+	 *   Data blocks registered with only a single instance
+	 *   can ignore the parameter.
+	 *
+	 * ACPICA will get mad at us if we call the method with the wrong number
+	 * of arguments, so check what our method expects.  (On some Dell
+	 * laptops, WQxx may not be a method at all.)
+	 */
+	if (info->type != ACPI_TYPE_METHOD || info->param_count == 0)
+		wblock->read_takes_no_args = true;
 
-			/*
-			 * The Microsoft documentation specifically states:
-			 *
-			 *   Data blocks registered with only a single instance
-			 *   can ignore the parameter.
-			 *
-			 * ACPICA will get mad at us if we call the method
-			 * with the wrong number of arguments, so check what
-			 * our method expects.  (On some Dell laptops, WQxx
-			 * may not be a method at all.)
-			 */
-			if (info->type != ACPI_TYPE_METHOD ||
-			    info->param_count == 0)
-				wblock->read_takes_no_args = true;
+	kfree(info);
 
-			kfree(info);
-		}
+	strcpy(method, "WS");
+	strncat(method, wblock->gblock.object_id, 2);
+	result = get_subobj_info(device->handle, method, NULL);
 
-		strcpy(method, "WS");
-		strncat(method, wblock->gblock.object_id, 2);
-		result = get_subobj_info(device->handle, method, NULL);
+	if (result == 0)
+		wblock->dev.setable = true;
 
-		if (result == 0) {
-			wblock->dev.writeable = true;
-		}
+ out_init:
+	wblock->dev.dev.bus = &wmi_bus_type;
+	wblock->dev.dev.parent = wmi_bus_dev;
 
-	}
+	dev_set_name(&wblock->dev.dev, "%pUL", gblock->guid);
 
 	device_initialize(&wblock->dev.dev);
+
+	return 0;
 }
 
 static void wmi_free_devices(struct acpi_device *device)
@@ -978,7 +979,11 @@ static int parse_wdg(struct device *wmi_bus_dev, struct acpi_device *device)
 		wblock->acpi_device = device;
 		wblock->gblock = gblock[i];
 
-		wmi_create_device(wmi_bus_dev, &gblock[i], wblock, device);
+		retval = wmi_create_device(wmi_bus_dev, &gblock[i], wblock, device);
+		if (retval) {
+			kfree(wblock);
+			continue;
+		}
 
 		list_add_tail(&wblock->list, &wmi_block_list);
 
diff --git a/include/linux/wmi.h b/include/linux/wmi.h
index a283768afb7e..cd0d7734dc49 100644
--- a/include/linux/wmi.h
+++ b/include/linux/wmi.h
@@ -22,11 +22,8 @@
 struct wmi_device {
 	struct device dev;
 
-	/*
-	 * These are true for data objects that support reads and writes,
-	 * respectively.
-	 */
-	bool readable, writeable;
+	 /* True for data blocks implementing the Set Control Method */
+	bool setable;
 };
 
 /* Caller must kfree the result. */
-- 
cgit v1.2.3


From 9bd2bbc01d17ddd567cc0f81f77fe1163e497462 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Fri, 2 Jun 2017 20:35:51 -0700
Subject: elevator: fix truncation of icq_cache_name
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

gcc 7.1 reports the following warning:

    block/elevator.c: In function ‘elv_register’:
    block/elevator.c:898:5: warning: ‘snprintf’ output may be truncated before the last format character [-Wformat-truncation=]
         "%s_io_cq", e->elevator_name);
         ^~~~~~~~~~
    block/elevator.c:897:3: note: ‘snprintf’ output between 7 and 22 bytes into a destination of size 21
       snprintf(e->icq_cache_name, sizeof(e->icq_cache_name),
       ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
         "%s_io_cq", e->elevator_name);
         ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

The bug is that the name of the icq_cache is 6 characters longer than
the elevator name, but only ELV_NAME_MAX + 5 characters were reserved
for it --- so in the case of a maximum-length elevator name, the 'q'
character in "_io_cq" would be truncated by snprintf().  Fix it by
reserving ELV_NAME_MAX + 6 characters instead.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Bart Van Assche <Bart.VanAssche@sandisk.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/elevator.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 9ec5e22846e0..0e306c5a86d6 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -153,7 +153,7 @@ struct elevator_type
 #endif
 
 	/* managed by elevator core */
-	char icq_cache_name[ELV_NAME_MAX + 5];	/* elvname + "_io_cq" */
+	char icq_cache_name[ELV_NAME_MAX + 6];	/* elvname + "_io_cq" */
 	struct list_head list;
 };
 
-- 
cgit v1.2.3


From cc5d0db390b0ff0f5da95b643a2b070da15a9c3e Mon Sep 17 00:00:00 2001
From: "Alex A. Mihaylov" <minimumlaw@rambler.ru>
Date: Fri, 2 Jun 2017 10:06:27 +0300
Subject: regmap: Add 1-Wire bus support

Add basic support regmap (register map access) API for 1-Wire bus

Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/Kconfig     |   6 +-
 drivers/base/regmap/Makefile    |   1 +
 drivers/base/regmap/regmap-w1.c | 245 ++++++++++++++++++++++++++++++++++++++++
 include/linux/regmap.h          |  34 ++++++
 4 files changed, 285 insertions(+), 1 deletion(-)
 create mode 100644 drivers/base/regmap/regmap-w1.c

(limited to 'include/linux')

diff --git a/drivers/base/regmap/Kconfig b/drivers/base/regmap/Kconfig
index db9d00c36a3e..413af5f94058 100644
--- a/drivers/base/regmap/Kconfig
+++ b/drivers/base/regmap/Kconfig
@@ -3,7 +3,7 @@
 # subsystems should select the appropriate symbols.
 
 config REGMAP
-	default y if (REGMAP_I2C || REGMAP_SPI || REGMAP_SPMI || REGMAP_AC97 || REGMAP_MMIO || REGMAP_IRQ)
+	default y if (REGMAP_I2C || REGMAP_SPI || REGMAP_SPMI || REGMAP_W1 || REGMAP_AC97 || REGMAP_MMIO || REGMAP_IRQ)
 	select LZO_COMPRESS
 	select LZO_DECOMPRESS
 	select IRQ_DOMAIN if REGMAP_IRQ
@@ -24,6 +24,10 @@ config REGMAP_SPMI
 	tristate
 	depends on SPMI
 
+config REGMAP_W1
+	tristate
+	depends on W1
+
 config REGMAP_MMIO
 	tristate
 
diff --git a/drivers/base/regmap/Makefile b/drivers/base/regmap/Makefile
index 609e4c84f485..17741ae14ef4 100644
--- a/drivers/base/regmap/Makefile
+++ b/drivers/base/regmap/Makefile
@@ -10,3 +10,4 @@ obj-$(CONFIG_REGMAP_SPI) += regmap-spi.o
 obj-$(CONFIG_REGMAP_SPMI) += regmap-spmi.o
 obj-$(CONFIG_REGMAP_MMIO) += regmap-mmio.o
 obj-$(CONFIG_REGMAP_IRQ) += regmap-irq.o
+obj-$(CONFIG_REGMAP_W1) += regmap-w1.o
diff --git a/drivers/base/regmap/regmap-w1.c b/drivers/base/regmap/regmap-w1.c
new file mode 100644
index 000000000000..5f04e7bf063e
--- /dev/null
+++ b/drivers/base/regmap/regmap-w1.c
@@ -0,0 +1,245 @@
+/*
+ * Register map access API - W1 (1-Wire) support
+ *
+ * Copyright (C) 2017 OAO Radioavionica
+ * Author: Alex A. Mihaylov <minimumlaw@rambler.ru>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation
+ */
+
+#include <linux/regmap.h>
+#include <linux/module.h>
+#include "../../w1/w1.h"
+
+#include "internal.h"
+
+#define W1_CMD_READ_DATA	0x69
+#define W1_CMD_WRITE_DATA	0x6C
+
+/*
+ * 1-Wire slaves registers with addess 8 bit and data 8 bit
+ */
+
+static int w1_reg_a8_v8_read(void *context, unsigned int reg, unsigned int *val)
+{
+	struct device *dev = context;
+	struct w1_slave *sl = container_of(dev, struct w1_slave, dev);
+	int ret = 0;
+
+	if (reg > 255)
+		return -EINVAL;
+
+	mutex_lock(&sl->master->bus_mutex);
+	if (!w1_reset_select_slave(sl)) {
+		w1_write_8(sl->master, W1_CMD_READ_DATA);
+		w1_write_8(sl->master, reg);
+		*val = w1_read_8(sl->master);
+	} else {
+		ret = -ENODEV;
+	}
+	mutex_unlock(&sl->master->bus_mutex);
+
+	return ret;
+}
+
+static int w1_reg_a8_v8_write(void *context, unsigned int reg, unsigned int val)
+{
+	struct device *dev = context;
+	struct w1_slave *sl = container_of(dev, struct w1_slave, dev);
+	int ret = 0;
+
+	if (reg > 255)
+		return -EINVAL;
+
+	mutex_lock(&sl->master->bus_mutex);
+	if (!w1_reset_select_slave(sl)) {
+		w1_write_8(sl->master, W1_CMD_WRITE_DATA);
+		w1_write_8(sl->master, reg);
+		w1_write_8(sl->master, val);
+	} else {
+		ret = -ENODEV;
+	}
+	mutex_unlock(&sl->master->bus_mutex);
+
+	return ret;
+}
+
+/*
+ * 1-Wire slaves registers with addess 8 bit and data 16 bit
+ */
+
+static int w1_reg_a8_v16_read(void *context, unsigned int reg,
+				unsigned int *val)
+{
+	struct device *dev = context;
+	struct w1_slave *sl = container_of(dev, struct w1_slave, dev);
+	int ret = 0;
+
+	if (reg > 255)
+		return -EINVAL;
+
+	mutex_lock(&sl->master->bus_mutex);
+	if (!w1_reset_select_slave(sl)) {
+		w1_write_8(sl->master, W1_CMD_READ_DATA);
+		w1_write_8(sl->master, reg);
+		*val = w1_read_8(sl->master);
+		*val |= w1_read_8(sl->master)<<8;
+	} else {
+		ret = -ENODEV;
+	}
+	mutex_unlock(&sl->master->bus_mutex);
+
+	return ret;
+}
+
+static int w1_reg_a8_v16_write(void *context, unsigned int reg,
+				unsigned int val)
+{
+	struct device *dev = context;
+	struct w1_slave *sl = container_of(dev, struct w1_slave, dev);
+	int ret = 0;
+
+	if (reg > 255)
+		return -EINVAL;
+
+	mutex_lock(&sl->master->bus_mutex);
+	if (!w1_reset_select_slave(sl)) {
+		w1_write_8(sl->master, W1_CMD_WRITE_DATA);
+		w1_write_8(sl->master, reg);
+		w1_write_8(sl->master, val & 0x00FF);
+		w1_write_8(sl->master, val>>8 & 0x00FF);
+	} else {
+		ret = -ENODEV;
+	}
+	mutex_unlock(&sl->master->bus_mutex);
+
+	return ret;
+}
+
+/*
+ * 1-Wire slaves registers with addess 16 bit and data 16 bit
+ */
+
+static int w1_reg_a16_v16_read(void *context, unsigned int reg,
+				unsigned int *val)
+{
+	struct device *dev = context;
+	struct w1_slave *sl = container_of(dev, struct w1_slave, dev);
+	int ret = 0;
+
+	if (reg > 65535)
+		return -EINVAL;
+
+	mutex_lock(&sl->master->bus_mutex);
+	if (!w1_reset_select_slave(sl)) {
+		w1_write_8(sl->master, W1_CMD_READ_DATA);
+		w1_write_8(sl->master, reg & 0x00FF);
+		w1_write_8(sl->master, reg>>8 & 0x00FF);
+		*val = w1_read_8(sl->master);
+		*val |= w1_read_8(sl->master)<<8;
+	} else {
+		ret = -ENODEV;
+	}
+	mutex_unlock(&sl->master->bus_mutex);
+
+	return ret;
+}
+
+static int w1_reg_a16_v16_write(void *context, unsigned int reg,
+				unsigned int val)
+{
+	struct device *dev = context;
+	struct w1_slave *sl = container_of(dev, struct w1_slave, dev);
+	int ret = 0;
+
+	if (reg > 65535)
+		return -EINVAL;
+
+	mutex_lock(&sl->master->bus_mutex);
+	if (!w1_reset_select_slave(sl)) {
+		w1_write_8(sl->master, W1_CMD_WRITE_DATA);
+		w1_write_8(sl->master, reg & 0x00FF);
+		w1_write_8(sl->master, reg>>8 & 0x00FF);
+		w1_write_8(sl->master, val & 0x00FF);
+		w1_write_8(sl->master, val>>8 & 0x00FF);
+	} else {
+		ret = -ENODEV;
+	}
+	mutex_unlock(&sl->master->bus_mutex);
+
+	return ret;
+}
+
+/*
+ * Various types of supported bus addressing
+ */
+
+static struct regmap_bus regmap_w1_bus_a8_v8 = {
+	.reg_read = w1_reg_a8_v8_read,
+	.reg_write = w1_reg_a8_v8_write,
+};
+
+static struct regmap_bus regmap_w1_bus_a8_v16 = {
+	.reg_read = w1_reg_a8_v16_read,
+	.reg_write = w1_reg_a8_v16_write,
+};
+
+static struct regmap_bus regmap_w1_bus_a16_v16 = {
+	.reg_read = w1_reg_a16_v16_read,
+	.reg_write = w1_reg_a16_v16_write,
+};
+
+static const struct regmap_bus *regmap_get_w1_bus(struct device *w1_dev,
+					const struct regmap_config *config)
+{
+	if (config->reg_bits == 8 && config->val_bits == 8)
+		return &regmap_w1_bus_a8_v8;
+
+	if (config->reg_bits == 8 && config->val_bits == 16)
+		return &regmap_w1_bus_a8_v16;
+
+	if (config->reg_bits == 16 && config->val_bits == 16)
+		return &regmap_w1_bus_a16_v16;
+
+	return ERR_PTR(-ENOTSUPP);
+}
+
+struct regmap *__regmap_init_w1(struct device *w1_dev,
+				 const struct regmap_config *config,
+				 struct lock_class_key *lock_key,
+				 const char *lock_name)
+{
+
+	const struct regmap_bus *bus = regmap_get_w1_bus(w1_dev, config);
+
+	if (IS_ERR(bus))
+		return ERR_CAST(bus);
+
+	return __regmap_init(w1_dev, bus, w1_dev, config,
+			 lock_key, lock_name);
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(__regmap_init_w1);
+
+struct regmap *__devm_regmap_init_w1(struct device *w1_dev,
+				 const struct regmap_config *config,
+				 struct lock_class_key *lock_key,
+				 const char *lock_name)
+{
+
+	const struct regmap_bus *bus = regmap_get_w1_bus(w1_dev, config);
+
+	if (IS_ERR(bus))
+		return ERR_CAST(bus);
+
+	return __devm_regmap_init(w1_dev, bus, w1_dev, config,
+				 lock_key, lock_name);
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(__devm_regmap_init_w1);
+
+MODULE_LICENSE("GPL");
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index e88649225a60..86eeacc1425a 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -461,6 +461,10 @@ struct regmap *__regmap_init_spmi_ext(struct spmi_device *dev,
 				      const struct regmap_config *config,
 				      struct lock_class_key *lock_key,
 				      const char *lock_name);
+struct regmap *__regmap_init_w1(struct device *w1_dev,
+				 const struct regmap_config *config,
+				 struct lock_class_key *lock_key,
+				 const char *lock_name);
 struct regmap *__regmap_init_mmio_clk(struct device *dev, const char *clk_id,
 				      void __iomem *regs,
 				      const struct regmap_config *config,
@@ -493,6 +497,10 @@ struct regmap *__devm_regmap_init_spmi_ext(struct spmi_device *dev,
 					   const struct regmap_config *config,
 					   struct lock_class_key *lock_key,
 					   const char *lock_name);
+struct regmap *__devm_regmap_init_w1(struct device *w1_dev,
+				      const struct regmap_config *config,
+				      struct lock_class_key *lock_key,
+				      const char *lock_name);
 struct regmap *__devm_regmap_init_mmio_clk(struct device *dev,
 					   const char *clk_id,
 					   void __iomem *regs,
@@ -596,6 +604,19 @@ int regmap_attach_dev(struct device *dev, struct regmap *map,
 	__regmap_lockdep_wrapper(__regmap_init_spmi_ext, #config,	\
 				dev, config)
 
+/**
+ * regmap_init_w1() - Initialise register map
+ *
+ * @w1_dev: Device that will be interacted with
+ * @config: Configuration for register map
+ *
+ * The return value will be an ERR_PTR() on error or a valid pointer to
+ * a struct regmap.
+ */
+#define regmap_init_w1(w1_dev, config)					\
+	__regmap_lockdep_wrapper(__regmap_init_w1, #config,		\
+				w1_dev, config)
+
 /**
  * regmap_init_mmio_clk() - Initialise register map with register clock
  *
@@ -711,6 +732,19 @@ bool regmap_ac97_default_volatile(struct device *dev, unsigned int reg);
 	__regmap_lockdep_wrapper(__devm_regmap_init_spmi_ext, #config,	\
 				dev, config)
 
+/**
+ * devm_regmap_init_w1() - Initialise managed register map
+ *
+ * @w1_dev: Device that will be interacted with
+ * @config: Configuration for register map
+ *
+ * The return value will be an ERR_PTR() on error or a valid pointer
+ * to a struct regmap.  The regmap will be automatically freed by the
+ * device management code.
+ */
+#define devm_regmap_init_w1(w1_dev, config)				\
+	__regmap_lockdep_wrapper(__devm_regmap_init_w1, #config,	\
+				w1_dev, config)
 /**
  * devm_regmap_init_mmio_clk() - Initialise managed register map with clock
  *
-- 
cgit v1.2.3


From fc0b2acc754a183aa79e2abb8bca8fd915832694 Mon Sep 17 00:00:00 2001
From: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Date: Tue, 30 May 2017 17:31:21 +0300
Subject: spi: pxa2xx: Add support for Intel Cannonlake

Intel Cannonlake LPSS SPI has up to four chip selects per port like in
Broxton and is clocked like Sunrisepoint and Kaby Lake. Add a new type
LPSS_CNL_SSP and configuration that enable runtime chip select detection
and use the same FIFO thresholds than in Sunrisepoint.

Patch adds support for both Cannonlake SoC and PCH.

Signed-off-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-pxa2xx.c   | 22 ++++++++++++++++++++++
 include/linux/pxa2xx_ssp.h |  1 +
 2 files changed, 23 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index 47b65d7c4072..38d053682892 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -151,6 +151,18 @@ static const struct lpss_config lpss_platforms[] = {
 		.cs_sel_shift = 8,
 		.cs_sel_mask = 3 << 8,
 	},
+	{	/* LPSS_CNL_SSP */
+		.offset = 0x200,
+		.reg_general = -1,
+		.reg_ssp = 0x20,
+		.reg_cs_ctrl = 0x24,
+		.reg_capabilities = 0xfc,
+		.rx_threshold = 1,
+		.tx_threshold_lo = 32,
+		.tx_threshold_hi = 56,
+		.cs_sel_shift = 8,
+		.cs_sel_mask = 3 << 8,
+	},
 };
 
 static inline const struct lpss_config
@@ -167,6 +179,7 @@ static bool is_lpss_ssp(const struct driver_data *drv_data)
 	case LPSS_BSW_SSP:
 	case LPSS_SPT_SSP:
 	case LPSS_BXT_SSP:
+	case LPSS_CNL_SSP:
 		return true;
 	default:
 		return false;
@@ -1275,6 +1288,7 @@ static int setup(struct spi_device *spi)
 	case LPSS_BSW_SSP:
 	case LPSS_SPT_SSP:
 	case LPSS_BXT_SSP:
+	case LPSS_CNL_SSP:
 		config = lpss_get_config(drv_data);
 		tx_thres = config->tx_threshold_lo;
 		tx_hi_thres = config->tx_threshold_hi;
@@ -1470,6 +1484,14 @@ static const struct pci_device_id pxa2xx_spi_pci_compound_match[] = {
 	{ PCI_VDEVICE(INTEL, 0x5ac2), LPSS_BXT_SSP },
 	{ PCI_VDEVICE(INTEL, 0x5ac4), LPSS_BXT_SSP },
 	{ PCI_VDEVICE(INTEL, 0x5ac6), LPSS_BXT_SSP },
+	/* CNL-LP */
+	{ PCI_VDEVICE(INTEL, 0x9daa), LPSS_CNL_SSP },
+	{ PCI_VDEVICE(INTEL, 0x9dab), LPSS_CNL_SSP },
+	{ PCI_VDEVICE(INTEL, 0x9dfb), LPSS_CNL_SSP },
+	/* CNL-H */
+	{ PCI_VDEVICE(INTEL, 0xa32a), LPSS_CNL_SSP },
+	{ PCI_VDEVICE(INTEL, 0xa32b), LPSS_CNL_SSP },
+	{ PCI_VDEVICE(INTEL, 0xa37b), LPSS_CNL_SSP },
 	{ },
 };
 
diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h
index a0522328d7aa..8461b18e4608 100644
--- a/include/linux/pxa2xx_ssp.h
+++ b/include/linux/pxa2xx_ssp.h
@@ -196,6 +196,7 @@ enum pxa_ssp_type {
 	LPSS_BSW_SSP,
 	LPSS_SPT_SSP,
 	LPSS_BXT_SSP,
+	LPSS_CNL_SSP,
 };
 
 struct ssp_device {
-- 
cgit v1.2.3


From f8fe99754673719ab791713a676bf27dae616fbc Mon Sep 17 00:00:00 2001
From: "yuval.shaia@oracle.com" <yuval.shaia@oracle.com>
Date: Mon, 5 Jun 2017 10:18:40 +0300
Subject: net: phy: Delete unused function phy_ethtool_gset

It's unused, so remove it.

Signed-off-by: Yuval Shaia <yuval.shaia@oracle.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/phy.txt |  1 -
 drivers/net/phy/phy.c            | 24 ------------------------
 include/linux/phy.h              |  1 -
 3 files changed, 26 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/networking/phy.txt b/Documentation/networking/phy.txt
index 16f90d817224..bdec0f700bc1 100644
--- a/Documentation/networking/phy.txt
+++ b/Documentation/networking/phy.txt
@@ -295,7 +295,6 @@ Doing it all yourself
    settings in the PHY.
 
  int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd);
- int phy_ethtool_gset(struct phy_device *phydev, struct ethtool_cmd *cmd);
 
    Ethtool convenience functions.
 
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 82ab8fb82587..40f4c6a2ef6c 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -484,30 +484,6 @@ int phy_ethtool_ksettings_set(struct phy_device *phydev,
 }
 EXPORT_SYMBOL(phy_ethtool_ksettings_set);
 
-int phy_ethtool_gset(struct phy_device *phydev, struct ethtool_cmd *cmd)
-{
-	cmd->supported = phydev->supported;
-
-	cmd->advertising = phydev->advertising;
-	cmd->lp_advertising = phydev->lp_advertising;
-
-	ethtool_cmd_speed_set(cmd, phydev->speed);
-	cmd->duplex = phydev->duplex;
-	if (phydev->interface == PHY_INTERFACE_MODE_MOCA)
-		cmd->port = PORT_BNC;
-	else
-		cmd->port = PORT_MII;
-	cmd->phy_address = phydev->mdio.addr;
-	cmd->transceiver = phy_is_internal(phydev) ?
-		XCVR_INTERNAL : XCVR_EXTERNAL;
-	cmd->autoneg = phydev->autoneg;
-	cmd->eth_tp_mdix_ctrl = phydev->mdix_ctrl;
-	cmd->eth_tp_mdix = phydev->mdix;
-
-	return 0;
-}
-EXPORT_SYMBOL(phy_ethtool_gset);
-
 int phy_ethtool_ksettings_get(struct phy_device *phydev,
 			      struct ethtool_link_ksettings *cmd)
 {
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 58f1b45a4c44..748e526c0698 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -854,7 +854,6 @@ void phy_start_machine(struct phy_device *phydev);
 void phy_stop_machine(struct phy_device *phydev);
 void phy_trigger_machine(struct phy_device *phydev, bool sync);
 int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd);
-int phy_ethtool_gset(struct phy_device *phydev, struct ethtool_cmd *cmd);
 int phy_ethtool_ksettings_get(struct phy_device *phydev,
 			      struct ethtool_link_ksettings *cmd);
 int phy_ethtool_ksettings_set(struct phy_device *phydev,
-- 
cgit v1.2.3


From dc4bb0e2356149aee4cdae061936f3bbdd45595c Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Mon, 5 Jun 2017 12:15:46 -0700
Subject: bpf: Introduce bpf_prog ID

This patch generates an unique ID for each BPF_PROG_LOAD-ed prog.
It is worth to note that each BPF_PROG_LOAD-ed prog will have
a different ID even they have the same bpf instructions.

The ID is generated by the existing idr_alloc_cyclic().
The ID is ranged from [1, INT_MAX).  It is allocated in cyclic manner,
so an ID will get reused every 2 billion BPF_PROG_LOAD.

The bpf_prog_alloc_id() is done after bpf_prog_select_runtime()
because the jit process may have allocated a new prog.  Hence,
we need to ensure the value of pointer 'prog' will not be changed
any more before storing the prog to the prog_idr.

After bpf_prog_select_runtime(), the prog is read-only.  Hence,
the id is stored in 'struct bpf_prog_aux'.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Alexei Starovoitov <ast@fb.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h  |  1 +
 kernel/bpf/syscall.c | 40 +++++++++++++++++++++++++++++++++++++++-
 2 files changed, 40 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index fcc80ca11045..c5946d19f2ca 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -172,6 +172,7 @@ struct bpf_prog_aux {
 	u32 used_map_cnt;
 	u32 max_ctx_offset;
 	u32 stack_depth;
+	u32 id;
 	struct latch_tree_node ksym_tnode;
 	struct list_head ksym_lnode;
 	const struct bpf_verifier_ops *ops;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 59da103adb85..2a1b32b470f1 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -22,8 +22,11 @@
 #include <linux/filter.h>
 #include <linux/version.h>
 #include <linux/kernel.h>
+#include <linux/idr.h>
 
 DEFINE_PER_CPU(int, bpf_prog_active);
+static DEFINE_IDR(prog_idr);
+static DEFINE_SPINLOCK(prog_idr_lock);
 
 int sysctl_unprivileged_bpf_disabled __read_mostly;
 
@@ -650,6 +653,34 @@ static void bpf_prog_uncharge_memlock(struct bpf_prog *prog)
 	free_uid(user);
 }
 
+static int bpf_prog_alloc_id(struct bpf_prog *prog)
+{
+	int id;
+
+	spin_lock_bh(&prog_idr_lock);
+	id = idr_alloc_cyclic(&prog_idr, prog, 1, INT_MAX, GFP_ATOMIC);
+	if (id > 0)
+		prog->aux->id = id;
+	spin_unlock_bh(&prog_idr_lock);
+
+	/* id is in [1, INT_MAX) */
+	if (WARN_ON_ONCE(!id))
+		return -ENOSPC;
+
+	return id > 0 ? 0 : id;
+}
+
+static void bpf_prog_free_id(struct bpf_prog *prog)
+{
+	/* cBPF to eBPF migrations are currently not in the idr store. */
+	if (!prog->aux->id)
+		return;
+
+	spin_lock_bh(&prog_idr_lock);
+	idr_remove(&prog_idr, prog->aux->id);
+	spin_unlock_bh(&prog_idr_lock);
+}
+
 static void __bpf_prog_put_rcu(struct rcu_head *rcu)
 {
 	struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu);
@@ -663,6 +694,7 @@ void bpf_prog_put(struct bpf_prog *prog)
 {
 	if (atomic_dec_and_test(&prog->aux->refcnt)) {
 		trace_bpf_prog_put_rcu(prog);
+		bpf_prog_free_id(prog);
 		bpf_prog_kallsyms_del(prog);
 		call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
 	}
@@ -857,15 +889,21 @@ static int bpf_prog_load(union bpf_attr *attr)
 	if (err < 0)
 		goto free_used_maps;
 
+	err = bpf_prog_alloc_id(prog);
+	if (err)
+		goto free_used_maps;
+
 	err = bpf_prog_new_fd(prog);
 	if (err < 0)
 		/* failed to allocate fd */
-		goto free_used_maps;
+		goto free_id;
 
 	bpf_prog_kallsyms_add(prog);
 	trace_bpf_prog_load(prog, err);
 	return err;
 
+free_id:
+	bpf_prog_free_id(prog);
 free_used_maps:
 	free_used_maps(prog->aux);
 free_prog:
-- 
cgit v1.2.3


From f3f1c054c288bb6e503005e6d73611151ed20e91 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Mon, 5 Jun 2017 12:15:47 -0700
Subject: bpf: Introduce bpf_map ID

This patch generates an unique ID for each created bpf_map.
The approach is similar to the earlier patch for bpf_prog ID.

It is worth to note that the bpf_map's ID and bpf_prog's ID
are in two independent ID spaces and both have the same valid range:
[1, INT_MAX).

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Alexei Starovoitov <ast@fb.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h  |  1 +
 kernel/bpf/syscall.c | 34 +++++++++++++++++++++++++++++++++-
 2 files changed, 34 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index c5946d19f2ca..c32bace66d3d 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -46,6 +46,7 @@ struct bpf_map {
 	u32 max_entries;
 	u32 map_flags;
 	u32 pages;
+	u32 id;
 	struct user_struct *user;
 	const struct bpf_map_ops *ops;
 	struct work_struct work;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 2a1b32b470f1..4c3075b5d840 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -27,6 +27,8 @@
 DEFINE_PER_CPU(int, bpf_prog_active);
 static DEFINE_IDR(prog_idr);
 static DEFINE_SPINLOCK(prog_idr_lock);
+static DEFINE_IDR(map_idr);
+static DEFINE_SPINLOCK(map_idr_lock);
 
 int sysctl_unprivileged_bpf_disabled __read_mostly;
 
@@ -117,6 +119,29 @@ static void bpf_map_uncharge_memlock(struct bpf_map *map)
 	free_uid(user);
 }
 
+static int bpf_map_alloc_id(struct bpf_map *map)
+{
+	int id;
+
+	spin_lock_bh(&map_idr_lock);
+	id = idr_alloc_cyclic(&map_idr, map, 1, INT_MAX, GFP_ATOMIC);
+	if (id > 0)
+		map->id = id;
+	spin_unlock_bh(&map_idr_lock);
+
+	if (WARN_ON_ONCE(!id))
+		return -ENOSPC;
+
+	return id > 0 ? 0 : id;
+}
+
+static void bpf_map_free_id(struct bpf_map *map)
+{
+	spin_lock_bh(&map_idr_lock);
+	idr_remove(&map_idr, map->id);
+	spin_unlock_bh(&map_idr_lock);
+}
+
 /* called from workqueue */
 static void bpf_map_free_deferred(struct work_struct *work)
 {
@@ -141,6 +166,7 @@ static void bpf_map_put_uref(struct bpf_map *map)
 void bpf_map_put(struct bpf_map *map)
 {
 	if (atomic_dec_and_test(&map->refcnt)) {
+		bpf_map_free_id(map);
 		INIT_WORK(&map->work, bpf_map_free_deferred);
 		schedule_work(&map->work);
 	}
@@ -239,14 +265,20 @@ static int map_create(union bpf_attr *attr)
 	if (err)
 		goto free_map_nouncharge;
 
+	err = bpf_map_alloc_id(map);
+	if (err)
+		goto free_map;
+
 	err = bpf_map_new_fd(map);
 	if (err < 0)
 		/* failed to allocate fd */
-		goto free_map;
+		goto free_id;
 
 	trace_bpf_map_create(map, err);
 	return err;
 
+free_id:
+	bpf_map_free_id(map);
 free_map:
 	bpf_map_uncharge_memlock(map);
 free_map_nouncharge:
-- 
cgit v1.2.3


From 783d28dd11f68fb25d1f2e0de7c42336394ef128 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Mon, 5 Jun 2017 12:15:51 -0700
Subject: bpf: Add jited_len to struct bpf_prog

Add jited_len to struct bpf_prog.  It will be
useful for the struct bpf_prog_info which will
be added in the later patch.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Alexei Starovoitov <ast@fb.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/arm64/net/bpf_jit_comp.c     | 1 +
 arch/powerpc/net/bpf_jit_comp64.c | 1 +
 arch/s390/net/bpf_jit_comp.c      | 1 +
 arch/sparc/net/bpf_jit_comp_64.c  | 1 +
 arch/x86/net/bpf_jit_comp.c       | 1 +
 include/linux/filter.h            | 1 +
 6 files changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index b1d38eeb24f6..4f95873d7142 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -900,6 +900,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 	bpf_jit_binary_lock_ro(header);
 	prog->bpf_func = (void *)ctx.image;
 	prog->jited = 1;
+	prog->jited_len = image_size;
 
 out_off:
 	kfree(ctx.offset);
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index a01366584a4b..861c5af1c9c4 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -1052,6 +1052,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 
 	fp->bpf_func = (void *)image;
 	fp->jited = 1;
+	fp->jited_len = alloclen;
 
 	bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE));
 
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 42ad3832586c..01c6fbc3e85b 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -1329,6 +1329,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 	bpf_jit_binary_lock_ro(header);
 	fp->bpf_func = (void *) jit.prg_buf;
 	fp->jited = 1;
+	fp->jited_len = jit.size;
 free_addrs:
 	kfree(jit.addrs);
 out:
diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c
index 098874a81f6e..8799ae9a8788 100644
--- a/arch/sparc/net/bpf_jit_comp_64.c
+++ b/arch/sparc/net/bpf_jit_comp_64.c
@@ -1560,6 +1560,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 
 	prog->bpf_func = (void *)ctx.image;
 	prog->jited = 1;
+	prog->jited_len = image_size;
 
 out_off:
 	kfree(ctx.offset);
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 617eac9c4511..e1324f280e06 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1167,6 +1167,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 		bpf_jit_binary_lock_ro(header);
 		prog->bpf_func = (void *)image;
 		prog->jited = 1;
+		prog->jited_len = proglen;
 	} else {
 		prog = orig_prog;
 	}
diff --git a/include/linux/filter.h b/include/linux/filter.h
index a20ba40fcb73..1e2dddf21f3b 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -432,6 +432,7 @@ struct bpf_prog {
 	kmemcheck_bitfield_end(meta);
 	enum bpf_prog_type	type;		/* Type of BPF program */
 	u32			len;		/* Number of filter blocks */
+	u32			jited_len;	/* Size of jited insns in bytes */
 	u8			tag[BPF_TAG_SIZE];
 	struct bpf_prog_aux	*aux;		/* Auxiliary fields */
 	struct sock_fprog_kern	*orig_prog;	/* Original BPF program */
-- 
cgit v1.2.3


From 1e270976908686ec25fb91b8a34145be54137976 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Mon, 5 Jun 2017 12:15:52 -0700
Subject: bpf: Add BPF_OBJ_GET_INFO_BY_FD

A single BPF_OBJ_GET_INFO_BY_FD cmd is used to obtain the info
for both bpf_prog and bpf_map.  The kernel can figure out the
fd is associated with a bpf_prog or bpf_map.

The suggested struct bpf_prog_info and struct bpf_map_info are
not meant to be a complete list and it is not the goal of this patch.
New fields can be added in the future patch.

The focus of this patch is to create the interface,
BPF_OBJ_GET_INFO_BY_FD cmd for exposing the bpf_prog's and
bpf_map's info.

The obj's info, which will be extended (and get bigger) over time, is
separated from the bpf_attr to avoid bloating the bpf_attr.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Alexei Starovoitov <ast@fb.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h   |   2 -
 include/uapi/linux/bpf.h |  28 ++++++++
 kernel/bpf/syscall.c     | 163 ++++++++++++++++++++++++++++++++++++++++++-----
 3 files changed, 174 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 1e2dddf21f3b..1fa26dc562ce 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -69,8 +69,6 @@ struct bpf_prog_aux;
 /* BPF program can access up to 512 bytes of stack space. */
 #define MAX_BPF_STACK	512
 
-#define BPF_TAG_SIZE	8
-
 /* Helper macros for filter block array initializers. */
 
 /* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index dd23f47ff00c..9b2c10b45733 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -86,6 +86,7 @@ enum bpf_cmd {
 	BPF_MAP_GET_NEXT_ID,
 	BPF_PROG_GET_FD_BY_ID,
 	BPF_MAP_GET_FD_BY_ID,
+	BPF_OBJ_GET_INFO_BY_FD,
 };
 
 enum bpf_map_type {
@@ -222,6 +223,12 @@ union bpf_attr {
 		};
 		__u32		next_id;
 	};
+
+	struct { /* anonymous struct used by BPF_OBJ_GET_INFO_BY_FD */
+		__u32		bpf_fd;
+		__u32		info_len;
+		__aligned_u64	info;
+	} info;
 } __attribute__((aligned(8)));
 
 /* BPF helper function descriptions:
@@ -686,4 +693,25 @@ struct xdp_md {
 	__u32 data_end;
 };
 
+#define BPF_TAG_SIZE	8
+
+struct bpf_prog_info {
+	__u32 type;
+	__u32 id;
+	__u8  tag[BPF_TAG_SIZE];
+	__u32 jited_prog_len;
+	__u32 xlated_prog_len;
+	__aligned_u64 jited_prog_insns;
+	__aligned_u64 xlated_prog_insns;
+} __attribute__((aligned(8)));
+
+struct bpf_map_info {
+	__u32 type;
+	__u32 id;
+	__u32 key_size;
+	__u32 value_size;
+	__u32 max_entries;
+	__u32 map_flags;
+} __attribute__((aligned(8)));
+
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 1802bb9c47d9..8942c820d620 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1239,6 +1239,145 @@ static int bpf_map_get_fd_by_id(const union bpf_attr *attr)
 	return fd;
 }
 
+static int check_uarg_tail_zero(void __user *uaddr,
+				size_t expected_size,
+				size_t actual_size)
+{
+	unsigned char __user *addr;
+	unsigned char __user *end;
+	unsigned char val;
+	int err;
+
+	if (actual_size <= expected_size)
+		return 0;
+
+	addr = uaddr + expected_size;
+	end  = uaddr + actual_size;
+
+	for (; addr < end; addr++) {
+		err = get_user(val, addr);
+		if (err)
+			return err;
+		if (val)
+			return -E2BIG;
+	}
+
+	return 0;
+}
+
+static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
+				   const union bpf_attr *attr,
+				   union bpf_attr __user *uattr)
+{
+	struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info);
+	struct bpf_prog_info info = {};
+	u32 info_len = attr->info.info_len;
+	char __user *uinsns;
+	u32 ulen;
+	int err;
+
+	err = check_uarg_tail_zero(uinfo, sizeof(info), info_len);
+	if (err)
+		return err;
+	info_len = min_t(u32, sizeof(info), info_len);
+
+	if (copy_from_user(&info, uinfo, info_len))
+		return err;
+
+	info.type = prog->type;
+	info.id = prog->aux->id;
+
+	memcpy(info.tag, prog->tag, sizeof(prog->tag));
+
+	if (!capable(CAP_SYS_ADMIN)) {
+		info.jited_prog_len = 0;
+		info.xlated_prog_len = 0;
+		goto done;
+	}
+
+	ulen = info.jited_prog_len;
+	info.jited_prog_len = prog->jited_len;
+	if (info.jited_prog_len && ulen) {
+		uinsns = u64_to_user_ptr(info.jited_prog_insns);
+		ulen = min_t(u32, info.jited_prog_len, ulen);
+		if (copy_to_user(uinsns, prog->bpf_func, ulen))
+			return -EFAULT;
+	}
+
+	ulen = info.xlated_prog_len;
+	info.xlated_prog_len = bpf_prog_size(prog->len);
+	if (info.xlated_prog_len && ulen) {
+		uinsns = u64_to_user_ptr(info.xlated_prog_insns);
+		ulen = min_t(u32, info.xlated_prog_len, ulen);
+		if (copy_to_user(uinsns, prog->insnsi, ulen))
+			return -EFAULT;
+	}
+
+done:
+	if (copy_to_user(uinfo, &info, info_len) ||
+	    put_user(info_len, &uattr->info.info_len))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int bpf_map_get_info_by_fd(struct bpf_map *map,
+				  const union bpf_attr *attr,
+				  union bpf_attr __user *uattr)
+{
+	struct bpf_map_info __user *uinfo = u64_to_user_ptr(attr->info.info);
+	struct bpf_map_info info = {};
+	u32 info_len = attr->info.info_len;
+	int err;
+
+	err = check_uarg_tail_zero(uinfo, sizeof(info), info_len);
+	if (err)
+		return err;
+	info_len = min_t(u32, sizeof(info), info_len);
+
+	info.type = map->map_type;
+	info.id = map->id;
+	info.key_size = map->key_size;
+	info.value_size = map->value_size;
+	info.max_entries = map->max_entries;
+	info.map_flags = map->map_flags;
+
+	if (copy_to_user(uinfo, &info, info_len) ||
+	    put_user(info_len, &uattr->info.info_len))
+		return -EFAULT;
+
+	return 0;
+}
+
+#define BPF_OBJ_GET_INFO_BY_FD_LAST_FIELD info.info
+
+static int bpf_obj_get_info_by_fd(const union bpf_attr *attr,
+				  union bpf_attr __user *uattr)
+{
+	int ufd = attr->info.bpf_fd;
+	struct fd f;
+	int err;
+
+	if (CHECK_ATTR(BPF_OBJ_GET_INFO_BY_FD))
+		return -EINVAL;
+
+	f = fdget(ufd);
+	if (!f.file)
+		return -EBADFD;
+
+	if (f.file->f_op == &bpf_prog_fops)
+		err = bpf_prog_get_info_by_fd(f.file->private_data, attr,
+					      uattr);
+	else if (f.file->f_op == &bpf_map_fops)
+		err = bpf_map_get_info_by_fd(f.file->private_data, attr,
+					     uattr);
+	else
+		err = -EINVAL;
+
+	fdput(f);
+	return err;
+}
+
 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
 {
 	union bpf_attr attr = {};
@@ -1258,23 +1397,10 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
 	 * user-space does not rely on any kernel feature
 	 * extensions we dont know about yet.
 	 */
-	if (size > sizeof(attr)) {
-		unsigned char __user *addr;
-		unsigned char __user *end;
-		unsigned char val;
-
-		addr = (void __user *)uattr + sizeof(attr);
-		end  = (void __user *)uattr + size;
-
-		for (; addr < end; addr++) {
-			err = get_user(val, addr);
-			if (err)
-				return err;
-			if (val)
-				return -E2BIG;
-		}
-		size = sizeof(attr);
-	}
+	err = check_uarg_tail_zero(uattr, sizeof(attr), size);
+	if (err)
+		return err;
+	size = min_t(u32, size, sizeof(attr));
 
 	/* copy attributes from user space, may be less than sizeof(bpf_attr) */
 	if (copy_from_user(&attr, uattr, size) != 0)
@@ -1330,6 +1456,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
 	case BPF_MAP_GET_FD_BY_ID:
 		err = bpf_map_get_fd_by_id(&attr);
 		break;
+	case BPF_OBJ_GET_INFO_BY_FD:
+		err = bpf_obj_get_info_by_fd(&attr, uattr);
+		break;
 	default:
 		err = -EINVAL;
 		break;
-- 
cgit v1.2.3


From abb2ea7dfd82451d85ce669b811310c05ab5ca46 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Tue, 6 Jun 2017 13:36:24 -0700
Subject: compiler, clang: suppress warning for unused static inline functions

GCC explicitly does not warn for unused static inline functions for
-Wunused-function.  The manual states:

	Warn whenever a static function is declared but not defined or
	a non-inline static function is unused.

Clang does warn for static inline functions that are unused.

It turns out that suppressing the warnings avoids potentially complex
#ifdef directives, which also reduces LOC.

Suppress the warning for clang.

Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler-clang.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index de179993e039..ea9126006a69 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -15,3 +15,10 @@
  * with any version that can compile the kernel
  */
 #define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
+
+/*
+ * GCC does not warn about unused static inline functions for
+ * -Wunused-function.  This turns out to avoid the need for complex #ifdef
+ * directives.  Suppress the warning in clang as well.
+ */
+#define inline inline __attribute__((unused))
-- 
cgit v1.2.3


From f3b7eaae1b35eb8077610eb7c7db042c9b0645e1 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 7 Jun 2017 00:57:37 +0200
Subject: Revert "ACPI / sleep: Ignore spurious SCI wakeups from
 suspend-to-idle"

Revert commit eed4d47efe95 (ACPI / sleep: Ignore spurious SCI wakeups
from suspend-to-idle) as it turned out to be premature and triggered
a number of different issues on various systems.

That includes, but is not limited to, premature suspend-to-RAM aborts
on Dell XPS 13 (9343) reported by Dominik.

The issue the commit in question attempted to address is real and
will need to be taken care of going forward, but evidently more work
is needed for this purpose.

Reported-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/battery.c      |  2 +-
 drivers/acpi/button.c       |  5 ++---
 drivers/acpi/device_pm.c    |  3 +--
 drivers/acpi/sleep.c        | 28 ----------------------------
 drivers/base/power/main.c   |  5 +++++
 drivers/base/power/wakeup.c | 18 ++++++------------
 include/linux/suspend.h     |  7 ++-----
 kernel/power/process.c      |  2 +-
 kernel/power/suspend.c      | 29 ++++-------------------------
 9 files changed, 22 insertions(+), 77 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c
index 83ab17e4a795..4ef1e4624b2b 100644
--- a/drivers/acpi/battery.c
+++ b/drivers/acpi/battery.c
@@ -776,7 +776,7 @@ static int acpi_battery_update(struct acpi_battery *battery, bool resume)
 	if ((battery->state & ACPI_BATTERY_STATE_CRITICAL) ||
 	    (test_bit(ACPI_BATTERY_ALARM_PRESENT, &battery->flags) &&
             (battery->capacity_now <= battery->alarm)))
-		pm_wakeup_hard_event(&battery->device->dev);
+		pm_wakeup_event(&battery->device->dev, 0);
 
 	return result;
 }
diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c
index b7c2a06963d6..668137e4a069 100644
--- a/drivers/acpi/button.c
+++ b/drivers/acpi/button.c
@@ -216,7 +216,7 @@ static int acpi_lid_notify_state(struct acpi_device *device, int state)
 	}
 
 	if (state)
-		pm_wakeup_hard_event(&device->dev);
+		pm_wakeup_event(&device->dev, 0);
 
 	ret = blocking_notifier_call_chain(&acpi_lid_notifier, state, device);
 	if (ret == NOTIFY_DONE)
@@ -398,7 +398,7 @@ static void acpi_button_notify(struct acpi_device *device, u32 event)
 		} else {
 			int keycode;
 
-			pm_wakeup_hard_event(&device->dev);
+			pm_wakeup_event(&device->dev, 0);
 			if (button->suspended)
 				break;
 
@@ -530,7 +530,6 @@ static int acpi_button_add(struct acpi_device *device)
 		lid_device = device;
 	}
 
-	device_init_wakeup(&device->dev, true);
 	printk(KERN_INFO PREFIX "%s [%s]\n", name, acpi_device_bid(device));
 	return 0;
 
diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c
index 798d5003a039..993fd31394c8 100644
--- a/drivers/acpi/device_pm.c
+++ b/drivers/acpi/device_pm.c
@@ -24,7 +24,6 @@
 #include <linux/pm_qos.h>
 #include <linux/pm_domain.h>
 #include <linux/pm_runtime.h>
-#include <linux/suspend.h>
 
 #include "internal.h"
 
@@ -400,7 +399,7 @@ static void acpi_pm_notify_handler(acpi_handle handle, u32 val, void *not_used)
 	mutex_lock(&acpi_pm_notifier_lock);
 
 	if (adev->wakeup.flags.notifier_present) {
-		pm_wakeup_ws_event(adev->wakeup.ws, 0, true);
+		__pm_wakeup_event(adev->wakeup.ws, 0);
 		if (adev->wakeup.context.work.func)
 			queue_pm_work(&adev->wakeup.context.work);
 	}
diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index e84005d642e6..a4327af676fe 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -662,40 +662,14 @@ static int acpi_freeze_prepare(void)
 	acpi_os_wait_events_complete();
 	if (acpi_sci_irq_valid())
 		enable_irq_wake(acpi_sci_irq);
-
 	return 0;
 }
 
-static void acpi_freeze_wake(void)
-{
-	/*
-	 * If IRQD_WAKEUP_ARMED is not set for the SCI at this point, it means
-	 * that the SCI has triggered while suspended, so cancel the wakeup in
-	 * case it has not been a wakeup event (the GPEs will be checked later).
-	 */
-	if (acpi_sci_irq_valid() &&
-	    !irqd_is_wakeup_armed(irq_get_irq_data(acpi_sci_irq)))
-		pm_system_cancel_wakeup();
-}
-
-static void acpi_freeze_sync(void)
-{
-	/*
-	 * Process all pending events in case there are any wakeup ones.
-	 *
-	 * The EC driver uses the system workqueue, so that one needs to be
-	 * flushed too.
-	 */
-	acpi_os_wait_events_complete();
-	flush_scheduled_work();
-}
-
 static void acpi_freeze_restore(void)
 {
 	acpi_disable_wakeup_devices(ACPI_STATE_S0);
 	if (acpi_sci_irq_valid())
 		disable_irq_wake(acpi_sci_irq);
-
 	acpi_enable_all_runtime_gpes();
 }
 
@@ -707,8 +681,6 @@ static void acpi_freeze_end(void)
 static const struct platform_freeze_ops acpi_freeze_ops = {
 	.begin = acpi_freeze_begin,
 	.prepare = acpi_freeze_prepare,
-	.wake = acpi_freeze_wake,
-	.sync = acpi_freeze_sync,
 	.restore = acpi_freeze_restore,
 	.end = acpi_freeze_end,
 };
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index e987a6f55d36..9faee1c893e5 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -1091,6 +1091,11 @@ static int __device_suspend_noirq(struct device *dev, pm_message_t state, bool a
 	if (async_error)
 		goto Complete;
 
+	if (pm_wakeup_pending()) {
+		async_error = -EBUSY;
+		goto Complete;
+	}
+
 	if (dev->power.syscore || dev->power.direct_complete)
 		goto Complete;
 
diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
index 9c36b27996fc..c313b600d356 100644
--- a/drivers/base/power/wakeup.c
+++ b/drivers/base/power/wakeup.c
@@ -28,8 +28,8 @@ bool events_check_enabled __read_mostly;
 /* First wakeup IRQ seen by the kernel in the last cycle. */
 unsigned int pm_wakeup_irq __read_mostly;
 
-/* If greater than 0 and the system is suspending, terminate the suspend. */
-static atomic_t pm_abort_suspend __read_mostly;
+/* If set and the system is suspending, terminate the suspend. */
+static bool pm_abort_suspend __read_mostly;
 
 /*
  * Combined counters of registered wakeup events and wakeup events in progress.
@@ -855,26 +855,20 @@ bool pm_wakeup_pending(void)
 		pm_print_active_wakeup_sources();
 	}
 
-	return ret || atomic_read(&pm_abort_suspend) > 0;
+	return ret || pm_abort_suspend;
 }
 
 void pm_system_wakeup(void)
 {
-	atomic_inc(&pm_abort_suspend);
+	pm_abort_suspend = true;
 	freeze_wake();
 }
 EXPORT_SYMBOL_GPL(pm_system_wakeup);
 
-void pm_system_cancel_wakeup(void)
-{
-	atomic_dec(&pm_abort_suspend);
-}
-
-void pm_wakeup_clear(bool reset)
+void pm_wakeup_clear(void)
 {
+	pm_abort_suspend = false;
 	pm_wakeup_irq = 0;
-	if (reset)
-		atomic_set(&pm_abort_suspend, 0);
 }
 
 void pm_system_irq_wakeup(unsigned int irq_number)
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 0b1cf32edfd7..d9718378a8be 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -189,8 +189,6 @@ struct platform_suspend_ops {
 struct platform_freeze_ops {
 	int (*begin)(void);
 	int (*prepare)(void);
-	void (*wake)(void);
-	void (*sync)(void);
 	void (*restore)(void);
 	void (*end)(void);
 };
@@ -430,8 +428,7 @@ extern unsigned int pm_wakeup_irq;
 
 extern bool pm_wakeup_pending(void);
 extern void pm_system_wakeup(void);
-extern void pm_system_cancel_wakeup(void);
-extern void pm_wakeup_clear(bool reset);
+extern void pm_wakeup_clear(void);
 extern void pm_system_irq_wakeup(unsigned int irq_number);
 extern bool pm_get_wakeup_count(unsigned int *count, bool block);
 extern bool pm_save_wakeup_count(unsigned int count);
@@ -481,7 +478,7 @@ static inline int unregister_pm_notifier(struct notifier_block *nb)
 
 static inline bool pm_wakeup_pending(void) { return false; }
 static inline void pm_system_wakeup(void) {}
-static inline void pm_wakeup_clear(bool reset) {}
+static inline void pm_wakeup_clear(void) {}
 static inline void pm_system_irq_wakeup(unsigned int irq_number) {}
 
 static inline void lock_system_sleep(void) {}
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 78672d324a6e..c7209f060eeb 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -132,7 +132,7 @@ int freeze_processes(void)
 	if (!pm_freezing)
 		atomic_inc(&system_freezing_cnt);
 
-	pm_wakeup_clear(true);
+	pm_wakeup_clear();
 	pr_info("Freezing user space processes ... ");
 	pm_freezing = true;
 	error = try_to_freeze_tasks(true);
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index c0248c74d6d4..15e6baef5c73 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -72,8 +72,6 @@ static void freeze_begin(void)
 
 static void freeze_enter(void)
 {
-	trace_suspend_resume(TPS("machine_suspend"), PM_SUSPEND_FREEZE, true);
-
 	spin_lock_irq(&suspend_freeze_lock);
 	if (pm_wakeup_pending())
 		goto out;
@@ -100,27 +98,6 @@ static void freeze_enter(void)
  out:
 	suspend_freeze_state = FREEZE_STATE_NONE;
 	spin_unlock_irq(&suspend_freeze_lock);
-
-	trace_suspend_resume(TPS("machine_suspend"), PM_SUSPEND_FREEZE, false);
-}
-
-static void s2idle_loop(void)
-{
-	do {
-		freeze_enter();
-
-		if (freeze_ops && freeze_ops->wake)
-			freeze_ops->wake();
-
-		dpm_resume_noirq(PMSG_RESUME);
-		if (freeze_ops && freeze_ops->sync)
-			freeze_ops->sync();
-
-		if (pm_wakeup_pending())
-			break;
-
-		pm_wakeup_clear(false);
-	} while (!dpm_suspend_noirq(PMSG_SUSPEND));
 }
 
 void freeze_wake(void)
@@ -394,8 +371,10 @@ static int suspend_enter(suspend_state_t state, bool *wakeup)
 	 * all the devices are suspended.
 	 */
 	if (state == PM_SUSPEND_FREEZE) {
-		s2idle_loop();
-		goto Platform_early_resume;
+		trace_suspend_resume(TPS("machine_suspend"), state, true);
+		freeze_enter();
+		trace_suspend_resume(TPS("machine_suspend"), state, false);
+		goto Platform_wake;
 	}
 
 	error = disable_nonboot_cpus();
-- 
cgit v1.2.3


From 5acde34a5a420ffe7441bb7d3909dc2618025c3c Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Mon, 5 Jun 2017 12:22:50 +0100
Subject: net: phy: add 802.3 clause 45 support to phylib

Add generic helpers for 802.3 clause 45 PHYs for >= 10Gbps support.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/Makefile     |   2 +-
 drivers/net/phy/phy-c45.c    | 234 +++++++++++++++++++++++++++++++++++++++++++
 drivers/net/phy/phy_device.c |  20 ++--
 include/linux/phy.h          |  12 +++
 4 files changed, 253 insertions(+), 15 deletions(-)
 create mode 100644 drivers/net/phy/phy-c45.c

(limited to 'include/linux')

diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile
index e2fde094f63d..ae58f507aba9 100644
--- a/drivers/net/phy/Makefile
+++ b/drivers/net/phy/Makefile
@@ -1,6 +1,6 @@
 # Makefile for Linux PHY drivers and MDIO bus drivers
 
-libphy-y			:= phy.o phy-core.o phy_device.o
+libphy-y			:= phy.o phy-c45.o phy-core.o phy_device.o
 mdio-bus-y			+= mdio_bus.o mdio_device.o
 
 ifdef CONFIG_MDIO_DEVICE
diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c
new file mode 100644
index 000000000000..d311d6e6141c
--- /dev/null
+++ b/drivers/net/phy/phy-c45.c
@@ -0,0 +1,234 @@
+/*
+ * Clause 45 PHY support
+ */
+#include <linux/ethtool.h>
+#include <linux/export.h>
+#include <linux/mdio.h>
+#include <linux/mii.h>
+#include <linux/phy.h>
+
+/**
+ * genphy_c45_setup_forced - configures a forced speed
+ * @phydev: target phy_device struct
+ */
+int genphy_c45_pma_setup_forced(struct phy_device *phydev)
+{
+	int ctrl1, ctrl2, ret;
+
+	/* Half duplex is not supported */
+	if (phydev->duplex != DUPLEX_FULL)
+		return -EINVAL;
+
+	ctrl1 = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_CTRL1);
+	if (ctrl1 < 0)
+		return ctrl1;
+
+	ctrl2 = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_CTRL2);
+	if (ctrl2 < 0)
+		return ctrl2;
+
+	ctrl1 &= ~MDIO_CTRL1_SPEEDSEL;
+	/*
+	 * PMA/PMD type selection is 1.7.5:0 not 1.7.3:0.  See 45.2.1.6.1
+	 * in 802.3-2012 and 802.3-2015.
+	 */
+	ctrl2 &= ~(MDIO_PMA_CTRL2_TYPE | 0x30);
+
+	switch (phydev->speed) {
+	case SPEED_10:
+		ctrl2 |= MDIO_PMA_CTRL2_10BT;
+		break;
+	case SPEED_100:
+		ctrl1 |= MDIO_PMA_CTRL1_SPEED100;
+		ctrl2 |= MDIO_PMA_CTRL2_100BTX;
+		break;
+	case SPEED_1000:
+		ctrl1 |= MDIO_PMA_CTRL1_SPEED1000;
+		/* Assume 1000base-T */
+		ctrl2 |= MDIO_PMA_CTRL2_1000BT;
+		break;
+	case SPEED_10000:
+		ctrl1 |= MDIO_CTRL1_SPEED10G;
+		/* Assume 10Gbase-T */
+		ctrl2 |= MDIO_PMA_CTRL2_10GBT;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	ret = phy_write_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_CTRL1, ctrl1);
+	if (ret < 0)
+		return ret;
+
+	return phy_write_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_CTRL2, ctrl2);
+}
+EXPORT_SYMBOL_GPL(genphy_c45_pma_setup_forced);
+
+/**
+ * genphy_c45_an_disable_aneg - disable auto-negotiation
+ * @phydev: target phy_device struct
+ *
+ * Disable auto-negotiation in the Clause 45 PHY. The link parameters
+ * parameters are controlled through the PMA/PMD MMD registers.
+ *
+ * Returns zero on success, negative errno code on failure.
+ */
+int genphy_c45_an_disable_aneg(struct phy_device *phydev)
+{
+	int val;
+
+	val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_CTRL1);
+	if (val < 0)
+		return val;
+
+	val &= ~(MDIO_AN_CTRL1_ENABLE | MDIO_AN_CTRL1_RESTART);
+
+	return phy_write_mmd(phydev, MDIO_MMD_AN, MDIO_CTRL1, val);
+}
+EXPORT_SYMBOL_GPL(genphy_c45_an_disable_aneg);
+
+/**
+ * genphy_c45_restart_aneg - Enable and restart auto-negotiation
+ * @phydev: target phy_device struct
+ *
+ * This assumes that the auto-negotiation MMD is present.
+ *
+ * Enable and restart auto-negotiation.
+ */
+int genphy_c45_restart_aneg(struct phy_device *phydev)
+{
+	int val;
+
+	val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_CTRL1);
+	if (val < 0)
+		return val;
+
+	val |= MDIO_AN_CTRL1_ENABLE | MDIO_AN_CTRL1_RESTART;
+
+	return phy_write_mmd(phydev, MDIO_MMD_AN, MDIO_CTRL1, val);
+}
+EXPORT_SYMBOL_GPL(genphy_c45_restart_aneg);
+
+/**
+ * genphy_c45_aneg_done - return auto-negotiation complete status
+ * @phydev: target phy_device struct
+ *
+ * This assumes that the auto-negotiation MMD is present.
+ *
+ * Reads the status register from the auto-negotiation MMD, returning:
+ * - positive if auto-negotiation is complete
+ * - negative errno code on error
+ * - zero otherwise
+ */
+int genphy_c45_aneg_done(struct phy_device *phydev)
+{
+	int val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_STAT1);
+
+	return val < 0 ? val : val & MDIO_AN_STAT1_COMPLETE ? 1 : 0;
+}
+EXPORT_SYMBOL_GPL(genphy_c45_aneg_done);
+
+/**
+ * genphy_c45_read_link - read the overall link status from the MMDs
+ * @phydev: target phy_device struct
+ * @mmd_mask: MMDs to read status from
+ *
+ * Read the link status from the specified MMDs, and if they all indicate
+ * that the link is up, return positive.  If an error is encountered,
+ * a negative errno will be returned, otherwise zero.
+ */
+int genphy_c45_read_link(struct phy_device *phydev, u32 mmd_mask)
+{
+	int val, devad;
+	bool link = true;
+
+	while (mmd_mask) {
+		devad = __ffs(mmd_mask);
+		mmd_mask &= ~BIT(devad);
+
+		/* The link state is latched low so that momentary link
+		 * drops can be detected.  Do not double-read the status
+		 * register if the link is down.
+		 */
+		val = phy_read_mmd(phydev, devad, MDIO_STAT1);
+		if (val < 0)
+			return val;
+
+		if (!(val & MDIO_STAT1_LSTATUS))
+			link = false;
+	}
+
+	return link;
+}
+EXPORT_SYMBOL_GPL(genphy_c45_read_link);
+
+/**
+ * genphy_c45_read_lpa - read the link partner advertisment and pause
+ * @phydev: target phy_device struct
+ *
+ * Read the Clause 45 defined base (7.19) and 10G (7.33) status registers,
+ * filling in the link partner advertisment, pause and asym_pause members
+ * in @phydev.  This assumes that the auto-negotiation MMD is present, and
+ * the backplane bit (7.48.0) is clear.  Clause 45 PHY drivers are expected
+ * to fill in the remainder of the link partner advert from vendor registers.
+ */
+int genphy_c45_read_lpa(struct phy_device *phydev)
+{
+	int val;
+
+	/* Read the link partner's base page advertisment */
+	val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_LPA);
+	if (val < 0)
+		return val;
+
+	phydev->lp_advertising = mii_lpa_to_ethtool_lpa_t(val);
+	phydev->pause = val & LPA_PAUSE_CAP ? 1 : 0;
+	phydev->asym_pause = val & LPA_PAUSE_ASYM ? 1 : 0;
+
+	/* Read the link partner's 10G advertisment */
+	val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_10GBT_STAT);
+	if (val < 0)
+		return val;
+
+	if (val & MDIO_AN_10GBT_STAT_LP10G)
+		phydev->lp_advertising |= ADVERTISED_10000baseT_Full;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(genphy_c45_read_lpa);
+
+/**
+ * genphy_c45_read_pma - read link speed etc from PMA
+ * @phydev: target phy_device struct
+ */
+int genphy_c45_read_pma(struct phy_device *phydev)
+{
+	int val;
+
+	val = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_CTRL1);
+	if (val < 0)
+		return val;
+
+	switch (val & MDIO_CTRL1_SPEEDSEL) {
+	case 0:
+		phydev->speed = SPEED_10;
+		break;
+	case MDIO_PMA_CTRL1_SPEED100:
+		phydev->speed = SPEED_100;
+		break;
+	case MDIO_PMA_CTRL1_SPEED1000:
+		phydev->speed = SPEED_1000;
+		break;
+	case MDIO_CTRL1_SPEED10G:
+		phydev->speed = SPEED_10000;
+		break;
+	default:
+		phydev->speed = SPEED_UNKNOWN;
+		break;
+	}
+
+	phydev->duplex = DUPLEX_FULL;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(genphy_c45_read_pma);
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 37a1e98908e3..6a79e24fa312 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -1508,27 +1508,19 @@ EXPORT_SYMBOL(genphy_read_status);
 
 static int gen10g_read_status(struct phy_device *phydev)
 {
-	int devad, reg;
 	u32 mmd_mask = phydev->c45_ids.devices_in_package;
-
-	phydev->link = 1;
+	int ret;
 
 	/* For now just lie and say it's 10G all the time */
 	phydev->speed = SPEED_10000;
 	phydev->duplex = DUPLEX_FULL;
 
-	for (devad = 0; mmd_mask; devad++, mmd_mask = mmd_mask >> 1) {
-		if (!(mmd_mask & 1))
-			continue;
+	/* Avoid reading the vendor MMDs */
+	mmd_mask &= ~(BIT(MDIO_MMD_VEND1) | BIT(MDIO_MMD_VEND2));
 
-		/* Read twice because link state is latched and a
-		 * read moves the current state into the register
-		 */
-		phy_read_mmd(phydev, devad, MDIO_STAT1);
-		reg = phy_read_mmd(phydev, devad, MDIO_STAT1);
-		if (reg < 0 || !(reg & MDIO_STAT1_LSTATUS))
-			phydev->link = 0;
-	}
+	ret = genphy_c45_read_link(phydev, mmd_mask);
+
+	phydev->link = ret > 0 ? 1 : 0;
 
 	return 0;
 }
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 748e526c0698..a47eb5e841d2 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -827,6 +827,8 @@ static inline const char *phydev_name(const struct phy_device *phydev)
 void phy_attached_print(struct phy_device *phydev, const char *fmt, ...)
 	__printf(2, 3);
 void phy_attached_info(struct phy_device *phydev);
+
+/* Clause 22 PHY */
 int genphy_config_init(struct phy_device *phydev);
 int genphy_setup_forced(struct phy_device *phydev);
 int genphy_restart_aneg(struct phy_device *phydev);
@@ -841,6 +843,16 @@ static inline int genphy_no_soft_reset(struct phy_device *phydev)
 {
 	return 0;
 }
+
+/* Clause 45 PHY */
+int genphy_c45_restart_aneg(struct phy_device *phydev);
+int genphy_c45_aneg_done(struct phy_device *phydev);
+int genphy_c45_read_link(struct phy_device *phydev, u32 mmd_mask);
+int genphy_c45_read_lpa(struct phy_device *phydev);
+int genphy_c45_read_pma(struct phy_device *phydev);
+int genphy_c45_pma_setup_forced(struct phy_device *phydev);
+int genphy_c45_an_disable_aneg(struct phy_device *phydev);
+
 void phy_driver_unregister(struct phy_driver *drv);
 void phy_drivers_unregister(struct phy_driver *drv, int n);
 int phy_driver_register(struct phy_driver *new_driver, struct module *owner);
-- 
cgit v1.2.3


From 002ba7058a7f141cf22d37967a4ef78239c50e9e Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Mon, 5 Jun 2017 12:23:00 +0100
Subject: net: phy: hook up clause 45 autonegotiation restart

genphy_restart_aneg() can only restart autonegotiation on clause 22
PHYs.  Add a phy_restart_aneg() function which selects between the
clause 22 and clause 45 restart functionality depending on the PHY
type and whether the Clause 45 PHY supports the Clause 22 register set.

Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy.c | 23 +++++++++++++++++++++--
 include/linux/phy.h   |  1 +
 2 files changed, 22 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index c232ee04754b..12548e5b6037 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -149,6 +149,25 @@ static int phy_config_interrupt(struct phy_device *phydev, u32 interrupts)
 	return 0;
 }
 
+/**
+ * phy_restart_aneg - restart auto-negotiation
+ * @phydev: target phy_device struct
+ *
+ * Restart the autonegotiation on @phydev.  Returns >= 0 on success or
+ * negative errno on error.
+ */
+int phy_restart_aneg(struct phy_device *phydev)
+{
+	int ret;
+
+	if (phydev->is_c45 && !(phydev->c45_ids.devices_in_package & BIT(0)))
+		ret = genphy_c45_restart_aneg(phydev);
+	else
+		ret = genphy_restart_aneg(phydev);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(phy_restart_aneg);
 
 /**
  * phy_aneg_done - return auto-negotiation status
@@ -1397,7 +1416,7 @@ int phy_ethtool_set_eee(struct phy_device *phydev, struct ethtool_eee *data)
 		/* Restart autonegotiation so the new modes get sent to the
 		 * link partner.
 		 */
-		ret = genphy_restart_aneg(phydev);
+		ret = phy_restart_aneg(phydev);
 		if (ret < 0)
 			return ret;
 	}
@@ -1456,6 +1475,6 @@ int phy_ethtool_nway_reset(struct net_device *ndev)
 	if (!phydev->drv)
 		return -EIO;
 
-	return genphy_restart_aneg(phydev);
+	return phy_restart_aneg(phydev);
 }
 EXPORT_SYMBOL(phy_ethtool_nway_reset);
diff --git a/include/linux/phy.h b/include/linux/phy.h
index a47eb5e841d2..b24de9ddc886 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -804,6 +804,7 @@ int phy_start_aneg(struct phy_device *phydev);
 int phy_aneg_done(struct phy_device *phydev);
 
 int phy_stop_interrupts(struct phy_device *phydev);
+int phy_restart_aneg(struct phy_device *phydev);
 
 static inline int phy_read_status(struct phy_device *phydev)
 {
-- 
cgit v1.2.3


From c125ca091873f2e848cc31c2371a3a66c2fd4dd8 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Mon, 5 Jun 2017 12:23:10 +0100
Subject: net: phy: add XAUI and 10GBASE-KR PHY connection types

XAUI allows XGMII to reach an extended distance by using a XGXS layer at
each end of the MAC to PHY link, operating over four Serdes lanes.

10GBASE-KR is a single lane Serdes backplane ethernet connection method
with autonegotiation on the link.  Some PHYs use this to connect to the
ethernet interface at 10G speeds, switching to other connection types
when utilising slower speeds.

10GBASE-KR is also used for XFI and SFI to connect to XFP and SFP fiber
modules.

Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/ABI/testing/sysfs-class-net-phydev   | 2 +-
 Documentation/devicetree/bindings/net/ethernet.txt | 2 ++
 include/linux/phy.h                                | 7 +++++++
 3 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-class-net-phydev b/Documentation/ABI/testing/sysfs-class-net-phydev
index c768d5fd8496..6ebabfb27912 100644
--- a/Documentation/ABI/testing/sysfs-class-net-phydev
+++ b/Documentation/ABI/testing/sysfs-class-net-phydev
@@ -32,5 +32,5 @@ Description:
 		<empty> (not available), mii, gmii, sgmii, tbi, rev-mii,
 		rmii, rgmii, rgmii-id, rgmii-rxid, rgmii-txid, rtbi, smii
 		xgmii, moca, qsgmii, trgmii, 1000base-x, 2500base-x, rxaui,
-		unknown
+		xaui, 10gbase-kr, unknown
 
diff --git a/Documentation/devicetree/bindings/net/ethernet.txt b/Documentation/devicetree/bindings/net/ethernet.txt
index 3a6916909d90..d4abe9a98109 100644
--- a/Documentation/devicetree/bindings/net/ethernet.txt
+++ b/Documentation/devicetree/bindings/net/ethernet.txt
@@ -32,6 +32,8 @@ The following properties are common to the Ethernet controllers:
   * "2000base-x",
   * "2500base-x",
   * "rxaui"
+  * "xaui"
+  * "10gbase-kr" (10GBASE-KR, XFI, SFI)
 - phy-connection-type: the same as "phy-mode" property but described in ePAPR;
 - phy-handle: phandle, specifies a reference to a node representing a PHY
   device; this property is described in ePAPR and so preferred;
diff --git a/include/linux/phy.h b/include/linux/phy.h
index b24de9ddc886..414242200a90 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -83,6 +83,9 @@ typedef enum {
 	PHY_INTERFACE_MODE_1000BASEX,
 	PHY_INTERFACE_MODE_2500BASEX,
 	PHY_INTERFACE_MODE_RXAUI,
+	PHY_INTERFACE_MODE_XAUI,
+	/* 10GBASE-KR, XFI, SFI - single lane 10G Serdes */
+	PHY_INTERFACE_MODE_10GKR,
 	PHY_INTERFACE_MODE_MAX,
 } phy_interface_t;
 
@@ -149,6 +152,10 @@ static inline const char *phy_modes(phy_interface_t interface)
 		return "2500base-x";
 	case PHY_INTERFACE_MODE_RXAUI:
 		return "rxaui";
+	case PHY_INTERFACE_MODE_XAUI:
+		return "xaui";
+	case PHY_INTERFACE_MODE_10GKR:
+		return "10gbase-kr";
 	default:
 		return "unknown";
 	}
-- 
cgit v1.2.3


From 6f428096a4d1e1809b162ca40dec5f7d09f3f1d1 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Tue, 6 Jun 2017 15:32:03 +0200
Subject: driver core: remove CLASS_ATTR usage

There was only 2 remaining users of CLASS_ATTR() so let's finally get
rid of them and force everyone to use the correct RW/RO/WO versions
instead.

Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/platforms/pseries/dlpar.c    | 2 +-
 arch/powerpc/platforms/pseries/mobility.c | 7 ++++---
 include/linux/device.h                    | 2 --
 3 files changed, 5 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
index bda18d8e1674..39187696ee74 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -588,7 +588,7 @@ static ssize_t dlpar_show(struct class *class, struct class_attribute *attr,
 	return sprintf(buf, "%s\n", "memory,cpu");
 }
 
-static CLASS_ATTR(dlpar, S_IWUSR | S_IRUSR, dlpar_show, dlpar_store);
+static CLASS_ATTR_RW(dlpar);
 
 static int __init pseries_dlpar_init(void)
 {
diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index 5a0c7ba429ce..2da4851eff99 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -349,8 +349,9 @@ void post_mobility_fixup(void)
 	return;
 }
 
-static ssize_t migrate_store(struct class *class, struct class_attribute *attr,
-			     const char *buf, size_t count)
+static ssize_t migration_store(struct class *class,
+			       struct class_attribute *attr, const char *buf,
+			       size_t count)
 {
 	u64 streamid;
 	int rc;
@@ -380,7 +381,7 @@ static ssize_t migrate_store(struct class *class, struct class_attribute *attr,
  */
 #define MIGRATION_API_VERSION	1
 
-static CLASS_ATTR(migration, S_IWUSR, NULL, migrate_store);
+static CLASS_ATTR_WO(migration);
 static CLASS_ATTR_STRING(api_version, S_IRUGO, __stringify(MIGRATION_API_VERSION));
 
 static int __init mobility_sysfs_init(void)
diff --git a/include/linux/device.h b/include/linux/device.h
index 9ef518af5515..9a902ae33932 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -465,8 +465,6 @@ struct class_attribute {
 			const char *buf, size_t count);
 };
 
-#define CLASS_ATTR(_name, _mode, _show, _store) \
-	struct class_attribute class_attr_##_name = __ATTR(_name, _mode, _show, _store)
 #define CLASS_ATTR_RW(_name) \
 	struct class_attribute class_attr_##_name = __ATTR_RW(_name)
 #define CLASS_ATTR_RO(_name) \
-- 
cgit v1.2.3


From 94116f8126de9762751fd92731581b73b56292e5 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 5 Jun 2017 19:40:46 +0300
Subject: ACPI: Switch to use generic guid_t in acpi_evaluate_dsm()

acpi_evaluate_dsm() and friends take a pointer to a raw buffer of 16
bytes. Instead we convert them to use guid_t type. At the same time we
convert current users.

acpi_str_to_uuid() becomes useless after the conversion and it's safe to
get rid of it.

Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Acked-by: Dan Williams <dan.j.williams@intel.com>
Cc: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Cc: Ben Skeggs <bskeggs@redhat.com>
Acked-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Acked-by: Joerg Roedel <jroedel@suse.de>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Yisen Zhuang <yisen.zhuang@huawei.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Felipe Balbi <felipe.balbi@linux.intel.com>
Acked-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Acked-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/acpi/acpi_extlog.c                         |  4 ++--
 drivers/acpi/bus.c                                 | 23 ----------------------
 drivers/acpi/nfit/core.c                           |  6 +++---
 drivers/acpi/utils.c                               | 16 +++++++--------
 drivers/char/tpm/tpm_crb.c                         |  9 ++++-----
 drivers/char/tpm/tpm_ppi.c                         | 20 ++++++++-----------
 drivers/gpu/drm/i915/intel_acpi.c                  | 14 +++++--------
 drivers/gpu/drm/nouveau/nouveau_acpi.c             | 20 +++++++++----------
 drivers/gpu/drm/nouveau/nvkm/subdev/mxm/base.c     |  9 ++++-----
 drivers/hid/i2c-hid/i2c-hid.c                      |  9 ++++-----
 drivers/iommu/dmar.c                               | 11 +++++------
 drivers/mmc/host/sdhci-pci-core.c                  |  9 ++++-----
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c | 15 +++++++-------
 drivers/pci/pci-acpi.c                             | 13 ++++++------
 drivers/pci/pci-label.c                            |  4 ++--
 drivers/usb/dwc3/dwc3-pci.c                        | 10 +++++-----
 drivers/usb/host/xhci-pci.c                        |  9 ++++-----
 drivers/usb/misc/ucsi.c                            |  6 +++---
 drivers/usb/typec/typec_wcove.c                    |  8 ++++----
 include/acpi/acpi_bus.h                            | 11 ++++++-----
 include/linux/acpi.h                               |  3 +--
 include/linux/pci-acpi.h                           |  2 +-
 sound/soc/intel/skylake/skl-nhlt.c                 |  7 ++++---
 tools/testing/nvdimm/test/iomap.c                  |  6 +++---
 tools/testing/nvdimm/test/nfit.c                   |  2 +-
 tools/testing/nvdimm/test/nfit_test.h              |  4 +++-
 26 files changed, 106 insertions(+), 144 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c
index 193529417cc3..560fdae8cc59 100644
--- a/drivers/acpi/acpi_extlog.c
+++ b/drivers/acpi/acpi_extlog.c
@@ -190,9 +190,9 @@ static bool __init extlog_get_l1addr(void)
 		return false;
 	if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle)))
 		return false;
-	if (!acpi_check_dsm(handle, guid.b, EXTLOG_DSM_REV, 1 << EXTLOG_FN_ADDR))
+	if (!acpi_check_dsm(handle, &guid, EXTLOG_DSM_REV, 1 << EXTLOG_FN_ADDR))
 		return false;
-	obj = acpi_evaluate_dsm_typed(handle, guid.b, EXTLOG_DSM_REV,
+	obj = acpi_evaluate_dsm_typed(handle, &guid, EXTLOG_DSM_REV,
 				      EXTLOG_FN_ADDR, NULL, ACPI_TYPE_INTEGER);
 	if (!obj) {
 		return false;
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 042cd16265b3..5a6fbe0fcaf2 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -196,29 +196,6 @@ static void acpi_print_osc_error(acpi_handle handle,
 	pr_debug("\n");
 }
 
-acpi_status acpi_str_to_uuid(char *str, u8 *uuid)
-{
-	int i;
-	static int opc_map_to_uuid[16] = {6, 4, 2, 0, 11, 9, 16, 14, 19, 21,
-		24, 26, 28, 30, 32, 34};
-
-	if (strlen(str) != 36)
-		return AE_BAD_PARAMETER;
-	for (i = 0; i < 36; i++) {
-		if (i == 8 || i == 13 || i == 18 || i == 23) {
-			if (str[i] != '-')
-				return AE_BAD_PARAMETER;
-		} else if (!isxdigit(str[i]))
-			return AE_BAD_PARAMETER;
-	}
-	for (i = 0; i < 16; i++) {
-		uuid[i] = hex_to_bin(str[opc_map_to_uuid[i]]) << 4;
-		uuid[i] |= hex_to_bin(str[opc_map_to_uuid[i] + 1]);
-	}
-	return AE_OK;
-}
-EXPORT_SYMBOL_GPL(acpi_str_to_uuid);
-
 acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context)
 {
 	acpi_status status;
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index d9b39d0e9d6a..097eff0b963d 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -289,7 +289,7 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
 			in_buf.buffer.pointer,
 			min_t(u32, 256, in_buf.buffer.length), true);
 
-	out_obj = acpi_evaluate_dsm(handle, guid.b, 1, func, &in_obj);
+	out_obj = acpi_evaluate_dsm(handle, guid, 1, func, &in_obj);
 	if (!out_obj) {
 		dev_dbg(dev, "%s:%s _DSM failed cmd: %s\n", __func__, dimm_name,
 				cmd_name);
@@ -1476,7 +1476,7 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
 
 	guid = to_nfit_uuid(nfit_mem->family);
 	for_each_set_bit(i, &dsm_mask, BITS_PER_LONG)
-		if (acpi_check_dsm(adev_dimm->handle, guid.b, 1, 1ULL << i))
+		if (acpi_check_dsm(adev_dimm->handle, guid, 1, 1ULL << i))
 			set_bit(i, &nfit_mem->dsm_mask);
 
 	return 0;
@@ -1621,7 +1621,7 @@ static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc)
 		return;
 
 	for (i = ND_CMD_ARS_CAP; i <= ND_CMD_CLEAR_ERROR; i++)
-		if (acpi_check_dsm(adev->handle, guid.b, 1, 1ULL << i))
+		if (acpi_check_dsm(adev->handle, guid, 1, 1ULL << i))
 			set_bit(i, &nd_desc->cmd_mask);
 }
 
diff --git a/drivers/acpi/utils.c b/drivers/acpi/utils.c
index 27d0dcfcf47d..b9d956c916f5 100644
--- a/drivers/acpi/utils.c
+++ b/drivers/acpi/utils.c
@@ -613,19 +613,19 @@ acpi_status acpi_evaluate_lck(acpi_handle handle, int lock)
 /**
  * acpi_evaluate_dsm - evaluate device's _DSM method
  * @handle: ACPI device handle
- * @uuid: UUID of requested functions, should be 16 bytes
+ * @guid: GUID of requested functions, should be 16 bytes
  * @rev: revision number of requested function
  * @func: requested function number
  * @argv4: the function specific parameter
  *
- * Evaluate device's _DSM method with specified UUID, revision id and
+ * Evaluate device's _DSM method with specified GUID, revision id and
  * function number. Caller needs to free the returned object.
  *
  * Though ACPI defines the fourth parameter for _DSM should be a package,
  * some old BIOSes do expect a buffer or an integer etc.
  */
 union acpi_object *
-acpi_evaluate_dsm(acpi_handle handle, const u8 *uuid, u64 rev, u64 func,
+acpi_evaluate_dsm(acpi_handle handle, const guid_t *guid, u64 rev, u64 func,
 		  union acpi_object *argv4)
 {
 	acpi_status ret;
@@ -638,7 +638,7 @@ acpi_evaluate_dsm(acpi_handle handle, const u8 *uuid, u64 rev, u64 func,
 
 	params[0].type = ACPI_TYPE_BUFFER;
 	params[0].buffer.length = 16;
-	params[0].buffer.pointer = (char *)uuid;
+	params[0].buffer.pointer = (u8 *)guid;
 	params[1].type = ACPI_TYPE_INTEGER;
 	params[1].integer.value = rev;
 	params[2].type = ACPI_TYPE_INTEGER;
@@ -666,7 +666,7 @@ EXPORT_SYMBOL(acpi_evaluate_dsm);
 /**
  * acpi_check_dsm - check if _DSM method supports requested functions.
  * @handle: ACPI device handle
- * @uuid: UUID of requested functions, should be 16 bytes at least
+ * @guid: GUID of requested functions, should be 16 bytes at least
  * @rev: revision number of requested functions
  * @funcs: bitmap of requested functions
  *
@@ -674,7 +674,7 @@ EXPORT_SYMBOL(acpi_evaluate_dsm);
  * functions. Currently only support 64 functions at maximum, should be
  * enough for now.
  */
-bool acpi_check_dsm(acpi_handle handle, const u8 *uuid, u64 rev, u64 funcs)
+bool acpi_check_dsm(acpi_handle handle, const guid_t *guid, u64 rev, u64 funcs)
 {
 	int i;
 	u64 mask = 0;
@@ -683,7 +683,7 @@ bool acpi_check_dsm(acpi_handle handle, const u8 *uuid, u64 rev, u64 funcs)
 	if (funcs == 0)
 		return false;
 
-	obj = acpi_evaluate_dsm(handle, uuid, rev, 0, NULL);
+	obj = acpi_evaluate_dsm(handle, guid, rev, 0, NULL);
 	if (!obj)
 		return false;
 
@@ -697,7 +697,7 @@ bool acpi_check_dsm(acpi_handle handle, const u8 *uuid, u64 rev, u64 funcs)
 
 	/*
 	 * Bit 0 indicates whether there's support for any functions other than
-	 * function 0 for the specified UUID and revision.
+	 * function 0 for the specified GUID and revision.
 	 */
 	if ((mask & 0x1) && (mask & funcs) == funcs)
 		return true;
diff --git a/drivers/char/tpm/tpm_crb.c b/drivers/char/tpm/tpm_crb.c
index b917b9d5f710..c378c7b15d49 100644
--- a/drivers/char/tpm/tpm_crb.c
+++ b/drivers/char/tpm/tpm_crb.c
@@ -27,10 +27,9 @@
 
 #define ACPI_SIG_TPM2 "TPM2"
 
-static const u8 CRB_ACPI_START_UUID[] = {
-	/* 0000 */ 0xAB, 0x6C, 0xBF, 0x6B, 0x63, 0x54, 0x14, 0x47,
-	/* 0008 */ 0xB7, 0xCD, 0xF0, 0x20, 0x3C, 0x03, 0x68, 0xD4
-};
+static const guid_t crb_acpi_start_guid =
+	GUID_INIT(0x6BBF6CAB, 0x5463, 0x4714,
+		  0xB7, 0xCD, 0xF0, 0x20, 0x3C, 0x03, 0x68, 0xD4);
 
 enum crb_defaults {
 	CRB_ACPI_START_REVISION_ID = 1,
@@ -266,7 +265,7 @@ static int crb_do_acpi_start(struct tpm_chip *chip)
 	int rc;
 
 	obj = acpi_evaluate_dsm(chip->acpi_dev_handle,
-				CRB_ACPI_START_UUID,
+				&crb_acpi_start_guid,
 				CRB_ACPI_START_REVISION_ID,
 				CRB_ACPI_START_INDEX,
 				NULL);
diff --git a/drivers/char/tpm/tpm_ppi.c b/drivers/char/tpm/tpm_ppi.c
index 692a2c6ae036..86dd8521feef 100644
--- a/drivers/char/tpm/tpm_ppi.c
+++ b/drivers/char/tpm/tpm_ppi.c
@@ -32,20 +32,16 @@
 #define PPI_VS_REQ_START	128
 #define PPI_VS_REQ_END		255
 
-static const u8 tpm_ppi_uuid[] = {
-	0xA6, 0xFA, 0xDD, 0x3D,
-	0x1B, 0x36,
-	0xB4, 0x4E,
-	0xA4, 0x24,
-	0x8D, 0x10, 0x08, 0x9D, 0x16, 0x53
-};
+static const guid_t tpm_ppi_guid =
+	GUID_INIT(0x3DDDFAA6, 0x361B, 0x4EB4,
+		  0xA4, 0x24, 0x8D, 0x10, 0x08, 0x9D, 0x16, 0x53);
 
 static inline union acpi_object *
 tpm_eval_dsm(acpi_handle ppi_handle, int func, acpi_object_type type,
 	     union acpi_object *argv4)
 {
 	BUG_ON(!ppi_handle);
-	return acpi_evaluate_dsm_typed(ppi_handle, tpm_ppi_uuid,
+	return acpi_evaluate_dsm_typed(ppi_handle, &tpm_ppi_guid,
 				       TPM_PPI_REVISION_ID,
 				       func, argv4, type);
 }
@@ -107,7 +103,7 @@ static ssize_t tpm_store_ppi_request(struct device *dev,
 	 * is updated with function index from SUBREQ to SUBREQ2 since PPI
 	 * version 1.1
 	 */
-	if (acpi_check_dsm(chip->acpi_dev_handle, tpm_ppi_uuid,
+	if (acpi_check_dsm(chip->acpi_dev_handle, &tpm_ppi_guid,
 			   TPM_PPI_REVISION_ID, 1 << TPM_PPI_FN_SUBREQ2))
 		func = TPM_PPI_FN_SUBREQ2;
 
@@ -268,7 +264,7 @@ static ssize_t show_ppi_operations(acpi_handle dev_handle, char *buf, u32 start,
 		"User not required",
 	};
 
-	if (!acpi_check_dsm(dev_handle, tpm_ppi_uuid, TPM_PPI_REVISION_ID,
+	if (!acpi_check_dsm(dev_handle, &tpm_ppi_guid, TPM_PPI_REVISION_ID,
 			    1 << TPM_PPI_FN_GETOPR))
 		return -EPERM;
 
@@ -341,12 +337,12 @@ void tpm_add_ppi(struct tpm_chip *chip)
 	if (!chip->acpi_dev_handle)
 		return;
 
-	if (!acpi_check_dsm(chip->acpi_dev_handle, tpm_ppi_uuid,
+	if (!acpi_check_dsm(chip->acpi_dev_handle, &tpm_ppi_guid,
 			    TPM_PPI_REVISION_ID, 1 << TPM_PPI_FN_VERSION))
 		return;
 
 	/* Cache PPI version string. */
-	obj = acpi_evaluate_dsm_typed(chip->acpi_dev_handle, tpm_ppi_uuid,
+	obj = acpi_evaluate_dsm_typed(chip->acpi_dev_handle, &tpm_ppi_guid,
 				      TPM_PPI_REVISION_ID, TPM_PPI_FN_VERSION,
 				      NULL, ACPI_TYPE_STRING);
 	if (obj) {
diff --git a/drivers/gpu/drm/i915/intel_acpi.c b/drivers/gpu/drm/i915/intel_acpi.c
index eb638a1e69d2..42fb436f6cdc 100644
--- a/drivers/gpu/drm/i915/intel_acpi.c
+++ b/drivers/gpu/drm/i915/intel_acpi.c
@@ -15,13 +15,9 @@ static struct intel_dsm_priv {
 	acpi_handle dhandle;
 } intel_dsm_priv;
 
-static const u8 intel_dsm_guid[] = {
-	0xd3, 0x73, 0xd8, 0x7e,
-	0xd0, 0xc2,
-	0x4f, 0x4e,
-	0xa8, 0x54,
-	0x0f, 0x13, 0x17, 0xb0, 0x1c, 0x2c
-};
+static const guid_t intel_dsm_guid =
+	GUID_INIT(0x7ed873d3, 0xc2d0, 0x4e4f,
+		  0xa8, 0x54, 0x0f, 0x13, 0x17, 0xb0, 0x1c, 0x2c);
 
 static char *intel_dsm_port_name(u8 id)
 {
@@ -80,7 +76,7 @@ static void intel_dsm_platform_mux_info(void)
 	int i;
 	union acpi_object *pkg, *connector_count;
 
-	pkg = acpi_evaluate_dsm_typed(intel_dsm_priv.dhandle, intel_dsm_guid,
+	pkg = acpi_evaluate_dsm_typed(intel_dsm_priv.dhandle, &intel_dsm_guid,
 			INTEL_DSM_REVISION_ID, INTEL_DSM_FN_PLATFORM_MUX_INFO,
 			NULL, ACPI_TYPE_PACKAGE);
 	if (!pkg) {
@@ -118,7 +114,7 @@ static bool intel_dsm_pci_probe(struct pci_dev *pdev)
 	if (!dhandle)
 		return false;
 
-	if (!acpi_check_dsm(dhandle, intel_dsm_guid, INTEL_DSM_REVISION_ID,
+	if (!acpi_check_dsm(dhandle, &intel_dsm_guid, INTEL_DSM_REVISION_ID,
 			    1 << INTEL_DSM_FN_PLATFORM_MUX_INFO)) {
 		DRM_DEBUG_KMS("no _DSM method for intel device\n");
 		return false;
diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.c b/drivers/gpu/drm/nouveau/nouveau_acpi.c
index 39468c218027..7459ef9943ec 100644
--- a/drivers/gpu/drm/nouveau/nouveau_acpi.c
+++ b/drivers/gpu/drm/nouveau/nouveau_acpi.c
@@ -60,15 +60,13 @@ bool nouveau_is_v1_dsm(void) {
 }
 
 #ifdef CONFIG_VGA_SWITCHEROO
-static const char nouveau_dsm_muid[] = {
-	0xA0, 0xA0, 0x95, 0x9D, 0x60, 0x00, 0x48, 0x4D,
-	0xB3, 0x4D, 0x7E, 0x5F, 0xEA, 0x12, 0x9F, 0xD4,
-};
+static const guid_t nouveau_dsm_muid =
+	GUID_INIT(0x9D95A0A0, 0x0060, 0x4D48,
+		  0xB3, 0x4D, 0x7E, 0x5F, 0xEA, 0x12, 0x9F, 0xD4);
 
-static const char nouveau_op_dsm_muid[] = {
-	0xF8, 0xD8, 0x86, 0xA4, 0xDA, 0x0B, 0x1B, 0x47,
-	0xA7, 0x2B, 0x60, 0x42, 0xA6, 0xB5, 0xBE, 0xE0,
-};
+static const guid_t nouveau_op_dsm_muid =
+	GUID_INIT(0xA486D8F8, 0x0BDA, 0x471B,
+		  0xA7, 0x2B, 0x60, 0x42, 0xA6, 0xB5, 0xBE, 0xE0);
 
 static int nouveau_optimus_dsm(acpi_handle handle, int func, int arg, uint32_t *result)
 {
@@ -86,7 +84,7 @@ static int nouveau_optimus_dsm(acpi_handle handle, int func, int arg, uint32_t *
 		args_buff[i] = (arg >> i * 8) & 0xFF;
 
 	*result = 0;
-	obj = acpi_evaluate_dsm_typed(handle, nouveau_op_dsm_muid, 0x00000100,
+	obj = acpi_evaluate_dsm_typed(handle, &nouveau_op_dsm_muid, 0x00000100,
 				      func, &argv4, ACPI_TYPE_BUFFER);
 	if (!obj) {
 		acpi_handle_info(handle, "failed to evaluate _DSM\n");
@@ -138,7 +136,7 @@ static int nouveau_dsm(acpi_handle handle, int func, int arg)
 		.integer.value = arg,
 	};
 
-	obj = acpi_evaluate_dsm_typed(handle, nouveau_dsm_muid, 0x00000102,
+	obj = acpi_evaluate_dsm_typed(handle, &nouveau_dsm_muid, 0x00000102,
 				      func, &argv4, ACPI_TYPE_INTEGER);
 	if (!obj) {
 		acpi_handle_info(handle, "failed to evaluate _DSM\n");
@@ -259,7 +257,7 @@ static void nouveau_dsm_pci_probe(struct pci_dev *pdev, acpi_handle *dhandle_out
 	if (!acpi_has_method(dhandle, "_DSM"))
 		return;
 
-	supports_mux = acpi_check_dsm(dhandle, nouveau_dsm_muid, 0x00000102,
+	supports_mux = acpi_check_dsm(dhandle, &nouveau_dsm_muid, 0x00000102,
 				      1 << NOUVEAU_DSM_POWER);
 	optimus_funcs = nouveau_dsm_get_optimus_functions(dhandle);
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mxm/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mxm/base.c
index e3e2f5e83815..f44682d62f75 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mxm/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mxm/base.c
@@ -81,10 +81,9 @@ mxm_shadow_dsm(struct nvkm_mxm *mxm, u8 version)
 {
 	struct nvkm_subdev *subdev = &mxm->subdev;
 	struct nvkm_device *device = subdev->device;
-	static char muid[] = {
-		0x00, 0xA4, 0x04, 0x40, 0x7D, 0x91, 0xF2, 0x4C,
-		0xB8, 0x9C, 0x79, 0xB6, 0x2F, 0xD5, 0x56, 0x65
-	};
+	static guid_t muid =
+		GUID_INIT(0x4004A400, 0x917D, 0x4CF2,
+			  0xB8, 0x9C, 0x79, 0xB6, 0x2F, 0xD5, 0x56, 0x65);
 	u32 mxms_args[] = { 0x00000000 };
 	union acpi_object argv4 = {
 		.buffer.type = ACPI_TYPE_BUFFER,
@@ -105,7 +104,7 @@ mxm_shadow_dsm(struct nvkm_mxm *mxm, u8 version)
 	 * unless you pass in exactly the version it supports..
 	 */
 	rev = (version & 0xf0) << 4 | (version & 0x0f);
-	obj = acpi_evaluate_dsm(handle, muid, rev, 0x00000010, &argv4);
+	obj = acpi_evaluate_dsm(handle, &muid, rev, 0x00000010, &argv4);
 	if (!obj) {
 		nvkm_debug(subdev, "DSM MXMS failed\n");
 		return false;
diff --git a/drivers/hid/i2c-hid/i2c-hid.c b/drivers/hid/i2c-hid/i2c-hid.c
index fb55fb4c39fc..04015032a35a 100644
--- a/drivers/hid/i2c-hid/i2c-hid.c
+++ b/drivers/hid/i2c-hid/i2c-hid.c
@@ -872,10 +872,9 @@ static int i2c_hid_fetch_hid_descriptor(struct i2c_hid *ihid)
 static int i2c_hid_acpi_pdata(struct i2c_client *client,
 		struct i2c_hid_platform_data *pdata)
 {
-	static u8 i2c_hid_guid[] = {
-		0xF7, 0xF6, 0xDF, 0x3C, 0x67, 0x42, 0x55, 0x45,
-		0xAD, 0x05, 0xB3, 0x0A, 0x3D, 0x89, 0x38, 0xDE,
-	};
+	static guid_t i2c_hid_guid =
+		GUID_INIT(0x3CDFF6F7, 0x4267, 0x4555,
+			  0xAD, 0x05, 0xB3, 0x0A, 0x3D, 0x89, 0x38, 0xDE);
 	union acpi_object *obj;
 	struct acpi_device *adev;
 	acpi_handle handle;
@@ -884,7 +883,7 @@ static int i2c_hid_acpi_pdata(struct i2c_client *client,
 	if (!handle || acpi_bus_get_device(handle, &adev))
 		return -ENODEV;
 
-	obj = acpi_evaluate_dsm_typed(handle, i2c_hid_guid, 1, 1, NULL,
+	obj = acpi_evaluate_dsm_typed(handle, &i2c_hid_guid, 1, 1, NULL,
 				      ACPI_TYPE_INTEGER);
 	if (!obj) {
 		dev_err(&client->dev, "device _DSM execution failed\n");
diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index cbf7763d8091..c8b0329c85d2 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -1808,10 +1808,9 @@ IOMMU_INIT_POST(detect_intel_iommu);
  * for Directed-IO Architecture Specifiction, Rev 2.2, Section 8.8
  * "Remapping Hardware Unit Hot Plug".
  */
-static u8 dmar_hp_uuid[] = {
-	/* 0000 */    0xA6, 0xA3, 0xC1, 0xD8, 0x9B, 0xBE, 0x9B, 0x4C,
-	/* 0008 */    0x91, 0xBF, 0xC3, 0xCB, 0x81, 0xFC, 0x5D, 0xAF
-};
+static guid_t dmar_hp_guid =
+	GUID_INIT(0xD8C1A3A6, 0xBE9B, 0x4C9B,
+		  0x91, 0xBF, 0xC3, 0xCB, 0x81, 0xFC, 0x5D, 0xAF);
 
 /*
  * Currently there's only one revision and BIOS will not check the revision id,
@@ -1824,7 +1823,7 @@ static u8 dmar_hp_uuid[] = {
 
 static inline bool dmar_detect_dsm(acpi_handle handle, int func)
 {
-	return acpi_check_dsm(handle, dmar_hp_uuid, DMAR_DSM_REV_ID, 1 << func);
+	return acpi_check_dsm(handle, &dmar_hp_guid, DMAR_DSM_REV_ID, 1 << func);
 }
 
 static int dmar_walk_dsm_resource(acpi_handle handle, int func,
@@ -1843,7 +1842,7 @@ static int dmar_walk_dsm_resource(acpi_handle handle, int func,
 	if (!dmar_detect_dsm(handle, func))
 		return 0;
 
-	obj = acpi_evaluate_dsm_typed(handle, dmar_hp_uuid, DMAR_DSM_REV_ID,
+	obj = acpi_evaluate_dsm_typed(handle, &dmar_hp_guid, DMAR_DSM_REV_ID,
 				      func, NULL, ACPI_TYPE_BUFFER);
 	if (!obj)
 		return -ENODEV;
diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c
index 92fc3f7c538d..9577beb278e7 100644
--- a/drivers/mmc/host/sdhci-pci-core.c
+++ b/drivers/mmc/host/sdhci-pci-core.c
@@ -404,10 +404,9 @@ struct intel_host {
 	bool	d3_retune;
 };
 
-const u8 intel_dsm_uuid[] = {
-	0xA5, 0x3E, 0xC1, 0xF6, 0xCD, 0x65, 0x1F, 0x46,
-	0xAB, 0x7A, 0x29, 0xF7, 0xE8, 0xD5, 0xBD, 0x61,
-};
+const guid_t intel_dsm_guid =
+	GUID_INIT(0xF6C13EA5, 0x65CD, 0x461F,
+		  0xAB, 0x7A, 0x29, 0xF7, 0xE8, 0xD5, 0xBD, 0x61);
 
 static int __intel_dsm(struct intel_host *intel_host, struct device *dev,
 		       unsigned int fn, u32 *result)
@@ -416,7 +415,7 @@ static int __intel_dsm(struct intel_host *intel_host, struct device *dev,
 	int err = 0;
 	size_t len;
 
-	obj = acpi_evaluate_dsm(ACPI_HANDLE(dev), intel_dsm_uuid, 0, fn, NULL);
+	obj = acpi_evaluate_dsm(ACPI_HANDLE(dev), &intel_dsm_guid, 0, fn, NULL);
 	if (!obj)
 		return -EOPNOTSUPP;
 
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
index e13aa064a8e9..6b15a507999c 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
@@ -29,10 +29,9 @@ enum _dsm_rst_type {
 	HNS_ROCE_RESET_FUNC     = 0x7,
 };
 
-const u8 hns_dsaf_acpi_dsm_uuid[] = {
-	0x1A, 0xAA, 0x85, 0x1A, 0x93, 0xE2, 0x5E, 0x41,
-	0x8E, 0x28, 0x8D, 0x69, 0x0A, 0x0F, 0x82, 0x0A
-};
+const guid_t hns_dsaf_acpi_dsm_guid =
+	GUID_INIT(0x1A85AA1A, 0xE293, 0x415E,
+		  0x8E, 0x28, 0x8D, 0x69, 0x0A, 0x0F, 0x82, 0x0A);
 
 static void dsaf_write_sub(struct dsaf_device *dsaf_dev, u32 reg, u32 val)
 {
@@ -151,7 +150,7 @@ static void hns_dsaf_acpi_srst_by_port(struct dsaf_device *dsaf_dev, u8 op_type,
 	argv4.package.elements = obj_args;
 
 	obj = acpi_evaluate_dsm(ACPI_HANDLE(dsaf_dev->dev),
-				hns_dsaf_acpi_dsm_uuid, 0, op_type, &argv4);
+				&hns_dsaf_acpi_dsm_guid, 0, op_type, &argv4);
 	if (!obj) {
 		dev_warn(dsaf_dev->dev, "reset port_type%d port%d fail!",
 			 port_type, port);
@@ -434,7 +433,7 @@ static phy_interface_t hns_mac_get_phy_if_acpi(struct hns_mac_cb *mac_cb)
 	argv4.package.elements = &obj_args,
 
 	obj = acpi_evaluate_dsm(ACPI_HANDLE(mac_cb->dev),
-				hns_dsaf_acpi_dsm_uuid, 0,
+				&hns_dsaf_acpi_dsm_guid, 0,
 				HNS_OP_GET_PORT_TYPE_FUNC, &argv4);
 
 	if (!obj || obj->type != ACPI_TYPE_INTEGER)
@@ -474,7 +473,7 @@ int hns_mac_get_sfp_prsnt_acpi(struct hns_mac_cb *mac_cb, int *sfp_prsnt)
 	argv4.package.elements = &obj_args,
 
 	obj = acpi_evaluate_dsm(ACPI_HANDLE(mac_cb->dev),
-				hns_dsaf_acpi_dsm_uuid, 0,
+				&hns_dsaf_acpi_dsm_guid, 0,
 				HNS_OP_GET_SFP_STAT_FUNC, &argv4);
 
 	if (!obj || obj->type != ACPI_TYPE_INTEGER)
@@ -565,7 +564,7 @@ hns_mac_config_sds_loopback_acpi(struct hns_mac_cb *mac_cb, bool en)
 	argv4.package.elements = obj_args;
 
 	obj = acpi_evaluate_dsm(ACPI_HANDLE(mac_cb->dsaf_dev->dev),
-				hns_dsaf_acpi_dsm_uuid, 0,
+				&hns_dsaf_acpi_dsm_guid, 0,
 				HNS_OP_SERDES_LP_FUNC, &argv4);
 	if (!obj) {
 		dev_warn(mac_cb->dsaf_dev->dev, "set port%d serdes lp fail!",
diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index 001860361434..47070cff508c 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -21,13 +21,12 @@
 #include "pci.h"
 
 /*
- * The UUID is defined in the PCI Firmware Specification available here:
+ * The GUID is defined in the PCI Firmware Specification available here:
  * https://www.pcisig.com/members/downloads/pcifw_r3_1_13Dec10.pdf
  */
-const u8 pci_acpi_dsm_uuid[] = {
-	0xd0, 0x37, 0xc9, 0xe5, 0x53, 0x35, 0x7a, 0x4d,
-	0x91, 0x17, 0xea, 0x4d, 0x19, 0xc3, 0x43, 0x4d
-};
+const guid_t pci_acpi_dsm_guid =
+	GUID_INIT(0xe5c937d0, 0x3553, 0x4d7a,
+		  0x91, 0x17, 0xea, 0x4d, 0x19, 0xc3, 0x43, 0x4d);
 
 #if defined(CONFIG_PCI_QUIRKS) && defined(CONFIG_ARM64)
 static int acpi_get_rc_addr(struct acpi_device *adev, struct resource *res)
@@ -680,7 +679,7 @@ void acpi_pci_add_bus(struct pci_bus *bus)
 	if (!pci_is_root_bus(bus))
 		return;
 
-	obj = acpi_evaluate_dsm(ACPI_HANDLE(bus->bridge), pci_acpi_dsm_uuid, 3,
+	obj = acpi_evaluate_dsm(ACPI_HANDLE(bus->bridge), &pci_acpi_dsm_guid, 3,
 				RESET_DELAY_DSM, NULL);
 	if (!obj)
 		return;
@@ -745,7 +744,7 @@ static void pci_acpi_optimize_delay(struct pci_dev *pdev,
 	if (bridge->ignore_reset_delay)
 		pdev->d3cold_delay = 0;
 
-	obj = acpi_evaluate_dsm(handle, pci_acpi_dsm_uuid, 3,
+	obj = acpi_evaluate_dsm(handle, &pci_acpi_dsm_guid, 3,
 				FUNCTION_DELAY_DSM, NULL);
 	if (!obj)
 		return;
diff --git a/drivers/pci/pci-label.c b/drivers/pci/pci-label.c
index 51357377efbc..2d8db3ead6e8 100644
--- a/drivers/pci/pci-label.c
+++ b/drivers/pci/pci-label.c
@@ -172,7 +172,7 @@ static int dsm_get_label(struct device *dev, char *buf,
 	if (!handle)
 		return -1;
 
-	obj = acpi_evaluate_dsm(handle, pci_acpi_dsm_uuid, 0x2,
+	obj = acpi_evaluate_dsm(handle, &pci_acpi_dsm_guid, 0x2,
 				DEVICE_LABEL_DSM, NULL);
 	if (!obj)
 		return -1;
@@ -212,7 +212,7 @@ static bool device_has_dsm(struct device *dev)
 	if (!handle)
 		return false;
 
-	return !!acpi_check_dsm(handle, pci_acpi_dsm_uuid, 0x2,
+	return !!acpi_check_dsm(handle, &pci_acpi_dsm_guid, 0x2,
 				1 << DEVICE_LABEL_DSM);
 }
 
diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c
index 84a2cebfc712..fe851544d7fb 100644
--- a/drivers/usb/dwc3/dwc3-pci.c
+++ b/drivers/usb/dwc3/dwc3-pci.c
@@ -42,7 +42,7 @@
 #define PCI_DEVICE_ID_INTEL_CNPLP		0x9dee
 #define PCI_DEVICE_ID_INTEL_CNPH		0xa36e
 
-#define PCI_INTEL_BXT_DSM_UUID		"732b85d5-b7a7-4a1b-9ba0-4bbd00ffd511"
+#define PCI_INTEL_BXT_DSM_GUID		"732b85d5-b7a7-4a1b-9ba0-4bbd00ffd511"
 #define PCI_INTEL_BXT_FUNC_PMU_PWR	4
 #define PCI_INTEL_BXT_STATE_D0		0
 #define PCI_INTEL_BXT_STATE_D3		3
@@ -51,14 +51,14 @@
  * struct dwc3_pci - Driver private structure
  * @dwc3: child dwc3 platform_device
  * @pci: our link to PCI bus
- * @uuid: _DSM UUID
+ * @guid: _DSM GUID
  * @has_dsm_for_pm: true for devices which need to run _DSM on runtime PM
  */
 struct dwc3_pci {
 	struct platform_device *dwc3;
 	struct pci_dev *pci;
 
-	u8 uuid[16];
+	guid_t guid;
 
 	unsigned int has_dsm_for_pm:1;
 };
@@ -120,7 +120,7 @@ static int dwc3_pci_quirks(struct dwc3_pci *dwc)
 
 		if (pdev->device == PCI_DEVICE_ID_INTEL_BXT ||
 				pdev->device == PCI_DEVICE_ID_INTEL_BXT_M) {
-			acpi_str_to_uuid(PCI_INTEL_BXT_DSM_UUID, dwc->uuid);
+			guid_parse(PCI_INTEL_BXT_DSM_GUID, &dwc->guid);
 			dwc->has_dsm_for_pm = true;
 		}
 
@@ -292,7 +292,7 @@ static int dwc3_pci_dsm(struct dwc3_pci *dwc, int param)
 	tmp.type = ACPI_TYPE_INTEGER;
 	tmp.integer.value = param;
 
-	obj = acpi_evaluate_dsm(ACPI_HANDLE(&dwc->pci->dev), dwc->uuid,
+	obj = acpi_evaluate_dsm(ACPI_HANDLE(&dwc->pci->dev), &dwc->guid,
 			1, PCI_INTEL_BXT_FUNC_PMU_PWR, &argv4);
 	if (!obj) {
 		dev_err(&dwc->pci->dev, "failed to evaluate _DSM\n");
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index fcf1f3f63e7a..4842be5687a7 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -213,13 +213,12 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
 #ifdef CONFIG_ACPI
 static void xhci_pme_acpi_rtd3_enable(struct pci_dev *dev)
 {
-	static const u8 intel_dsm_uuid[] = {
-		0xb7, 0x0c, 0x34, 0xac,	0x01, 0xe9, 0xbf, 0x45,
-		0xb7, 0xe6, 0x2b, 0x34, 0xec, 0x93, 0x1e, 0x23,
-	};
+	static const guid_t intel_dsm_guid =
+		GUID_INIT(0xac340cb7, 0xe901, 0x45bf,
+			  0xb7, 0xe6, 0x2b, 0x34, 0xec, 0x93, 0x1e, 0x23);
 	union acpi_object *obj;
 
-	obj = acpi_evaluate_dsm(ACPI_HANDLE(&dev->dev), intel_dsm_uuid, 3, 1,
+	obj = acpi_evaluate_dsm(ACPI_HANDLE(&dev->dev), &intel_dsm_guid, 3, 1,
 				NULL);
 	ACPI_FREE(obj);
 }
diff --git a/drivers/usb/misc/ucsi.c b/drivers/usb/misc/ucsi.c
index 07397bddefa3..81251aaa20f9 100644
--- a/drivers/usb/misc/ucsi.c
+++ b/drivers/usb/misc/ucsi.c
@@ -55,13 +55,13 @@ struct ucsi {
 
 static int ucsi_acpi_cmd(struct ucsi *ucsi, struct ucsi_control *ctrl)
 {
-	uuid_le uuid = UUID_LE(0x6f8398c2, 0x7ca4, 0x11e4,
-			       0xad, 0x36, 0x63, 0x10, 0x42, 0xb5, 0x00, 0x8f);
+	guid_t guid = GUID_INIT(0x6f8398c2, 0x7ca4, 0x11e4,
+				0xad, 0x36, 0x63, 0x10, 0x42, 0xb5, 0x00, 0x8f);
 	union acpi_object *obj;
 
 	ucsi->data->ctrl.raw_cmd = ctrl->raw_cmd;
 
-	obj = acpi_evaluate_dsm(ACPI_HANDLE(ucsi->dev), uuid.b, 1, 1, NULL);
+	obj = acpi_evaluate_dsm(ACPI_HANDLE(ucsi->dev), &guid, 1, 1, NULL);
 	if (!obj) {
 		dev_err(ucsi->dev, "%s: failed to evaluate _DSM\n", __func__);
 		return -EIO;
diff --git a/drivers/usb/typec/typec_wcove.c b/drivers/usb/typec/typec_wcove.c
index d5a7b21fa3f1..c2ce25289027 100644
--- a/drivers/usb/typec/typec_wcove.c
+++ b/drivers/usb/typec/typec_wcove.c
@@ -105,8 +105,8 @@ enum wcove_typec_role {
 	WCOVE_ROLE_DEVICE,
 };
 
-static uuid_le uuid = UUID_LE(0x482383f0, 0x2876, 0x4e49,
-			      0x86, 0x85, 0xdb, 0x66, 0x21, 0x1a, 0xf0, 0x37);
+static guid_t guid = GUID_INIT(0x482383f0, 0x2876, 0x4e49,
+			       0x86, 0x85, 0xdb, 0x66, 0x21, 0x1a, 0xf0, 0x37);
 
 static int wcove_typec_func(struct wcove_typec *wcove,
 			    enum wcove_typec_func func, int param)
@@ -118,7 +118,7 @@ static int wcove_typec_func(struct wcove_typec *wcove,
 	tmp.type = ACPI_TYPE_INTEGER;
 	tmp.integer.value = param;
 
-	obj = acpi_evaluate_dsm(ACPI_HANDLE(wcove->dev), uuid.b, 1, func,
+	obj = acpi_evaluate_dsm(ACPI_HANDLE(wcove->dev), &guid, 1, func,
 				&argv4);
 	if (!obj) {
 		dev_err(wcove->dev, "%s: failed to evaluate _DSM\n", __func__);
@@ -314,7 +314,7 @@ static int wcove_typec_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	if (!acpi_check_dsm(ACPI_HANDLE(&pdev->dev), uuid.b, 0, 0x1f)) {
+	if (!acpi_check_dsm(ACPI_HANDLE(&pdev->dev), &guid, 0, 0x1f)) {
 		dev_err(&pdev->dev, "Missing _DSM functions\n");
 		return -ENODEV;
 	}
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 197f3fffc9a7..ea7df16e71a7 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -61,17 +61,18 @@ bool acpi_ata_match(acpi_handle handle);
 bool acpi_bay_match(acpi_handle handle);
 bool acpi_dock_match(acpi_handle handle);
 
-bool acpi_check_dsm(acpi_handle handle, const u8 *uuid, u64 rev, u64 funcs);
-union acpi_object *acpi_evaluate_dsm(acpi_handle handle, const u8 *uuid,
+bool acpi_check_dsm(acpi_handle handle, const guid_t *guid, u64 rev, u64 funcs);
+union acpi_object *acpi_evaluate_dsm(acpi_handle handle, const guid_t *guid,
 			u64 rev, u64 func, union acpi_object *argv4);
 
 static inline union acpi_object *
-acpi_evaluate_dsm_typed(acpi_handle handle, const u8 *uuid, u64 rev, u64 func,
-			union acpi_object *argv4, acpi_object_type type)
+acpi_evaluate_dsm_typed(acpi_handle handle, const guid_t *guid, u64 rev,
+			u64 func, union acpi_object *argv4,
+			acpi_object_type type)
 {
 	union acpi_object *obj;
 
-	obj = acpi_evaluate_dsm(handle, uuid, rev, func, argv4);
+	obj = acpi_evaluate_dsm(handle, guid, rev, func, argv4);
 	if (obj && obj->type != type) {
 		ACPI_FREE(obj);
 		obj = NULL;
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index b0e1636ca5c3..ab19365c905f 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -458,7 +458,6 @@ struct acpi_osc_context {
 	struct acpi_buffer ret;		/* free by caller if success */
 };
 
-acpi_status acpi_str_to_uuid(char *str, u8 *uuid);
 acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context);
 
 /* Indexes into _OSC Capabilities Buffer (DWORDs 2 & 3 are device-specific) */
@@ -742,7 +741,7 @@ static inline bool acpi_driver_match_device(struct device *dev,
 }
 
 static inline union acpi_object *acpi_evaluate_dsm(acpi_handle handle,
-						   const u8 *uuid,
+						   const guid_t *guid,
 						   int rev, int func,
 						   union acpi_object *argv4)
 {
diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h
index 7a4e83a8c89c..dd86c97f2454 100644
--- a/include/linux/pci-acpi.h
+++ b/include/linux/pci-acpi.h
@@ -105,7 +105,7 @@ static inline void acpiphp_remove_slots(struct pci_bus *bus) { }
 static inline void acpiphp_check_host_bridge(struct acpi_device *adev) { }
 #endif
 
-extern const u8 pci_acpi_dsm_uuid[];
+extern const guid_t pci_acpi_dsm_guid;
 #define DEVICE_LABEL_DSM	0x07
 #define RESET_DELAY_DSM		0x08
 #define FUNCTION_DELAY_DSM	0x09
diff --git a/sound/soc/intel/skylake/skl-nhlt.c b/sound/soc/intel/skylake/skl-nhlt.c
index e3f06672fd6d..e7d766d56c8e 100644
--- a/sound/soc/intel/skylake/skl-nhlt.c
+++ b/sound/soc/intel/skylake/skl-nhlt.c
@@ -21,8 +21,9 @@
 #include "skl.h"
 
 /* Unique identification for getting NHLT blobs */
-static u8 OSC_UUID[16] = {0x6E, 0x88, 0x9F, 0xA6, 0xEB, 0x6C, 0x94, 0x45,
-				0xA4, 0x1F, 0x7B, 0x5D, 0xCE, 0x24, 0xC5, 0x53};
+static guid_t osc_guid =
+	GUID_INIT(0xA69F886E, 0x6CEB, 0x4594,
+		  0xA4, 0x1F, 0x7B, 0x5D, 0xCE, 0x24, 0xC5, 0x53);
 
 struct nhlt_acpi_table *skl_nhlt_init(struct device *dev)
 {
@@ -37,7 +38,7 @@ struct nhlt_acpi_table *skl_nhlt_init(struct device *dev)
 		return NULL;
 	}
 
-	obj = acpi_evaluate_dsm(handle, OSC_UUID, 1, 1, NULL);
+	obj = acpi_evaluate_dsm(handle, &osc_guid, 1, 1, NULL);
 	if (obj && obj->type == ACPI_TYPE_BUFFER) {
 		nhlt_ptr = (struct nhlt_resource_desc  *)obj->buffer.pointer;
 		nhlt_table = (struct nhlt_acpi_table *)
diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c
index 64cae1a5deff..e1f75a1914a1 100644
--- a/tools/testing/nvdimm/test/iomap.c
+++ b/tools/testing/nvdimm/test/iomap.c
@@ -370,7 +370,7 @@ acpi_status __wrap_acpi_evaluate_object(acpi_handle handle, acpi_string path,
 }
 EXPORT_SYMBOL(__wrap_acpi_evaluate_object);
 
-union acpi_object * __wrap_acpi_evaluate_dsm(acpi_handle handle, const u8 *uuid,
+union acpi_object * __wrap_acpi_evaluate_dsm(acpi_handle handle, const guid_t *guid,
 		u64 rev, u64 func, union acpi_object *argv4)
 {
 	union acpi_object *obj = ERR_PTR(-ENXIO);
@@ -379,11 +379,11 @@ union acpi_object * __wrap_acpi_evaluate_dsm(acpi_handle handle, const u8 *uuid,
 	rcu_read_lock();
 	ops = list_first_or_null_rcu(&iomap_head, typeof(*ops), list);
 	if (ops)
-		obj = ops->evaluate_dsm(handle, uuid, rev, func, argv4);
+		obj = ops->evaluate_dsm(handle, guid, rev, func, argv4);
 	rcu_read_unlock();
 
 	if (IS_ERR(obj))
-		return acpi_evaluate_dsm(handle, uuid, rev, func, argv4);
+		return acpi_evaluate_dsm(handle, guid, rev, func, argv4);
 	return obj;
 }
 EXPORT_SYMBOL(__wrap_acpi_evaluate_dsm);
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index c2187178fb13..28859da78edf 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -1559,7 +1559,7 @@ static unsigned long nfit_ctl_handle;
 union acpi_object *result;
 
 static union acpi_object *nfit_test_evaluate_dsm(acpi_handle handle,
-		const u8 *uuid, u64 rev, u64 func, union acpi_object *argv4)
+		const guid_t *guid, u64 rev, u64 func, union acpi_object *argv4)
 {
 	if (handle != &nfit_ctl_handle)
 		return ERR_PTR(-ENXIO);
diff --git a/tools/testing/nvdimm/test/nfit_test.h b/tools/testing/nvdimm/test/nfit_test.h
index f54c0032c6ff..d3d63dd5ed38 100644
--- a/tools/testing/nvdimm/test/nfit_test.h
+++ b/tools/testing/nvdimm/test/nfit_test.h
@@ -13,6 +13,7 @@
 #ifndef __NFIT_TEST_H__
 #define __NFIT_TEST_H__
 #include <linux/list.h>
+#include <linux/uuid.h>
 #include <linux/ioport.h>
 #include <linux/spinlock_types.h>
 
@@ -36,7 +37,8 @@ typedef void *acpi_handle;
 
 typedef struct nfit_test_resource *(*nfit_test_lookup_fn)(resource_size_t);
 typedef union acpi_object *(*nfit_test_evaluate_dsm_fn)(acpi_handle handle,
-		const u8 *uuid, u64 rev, u64 func, union acpi_object *argv4);
+		 const guid_t *guid, u64 rev, u64 func,
+		 union acpi_object *argv4);
 void __iomem *__wrap_ioremap_nocache(resource_size_t offset,
 		unsigned long size);
 void __wrap_iounmap(volatile void __iomem *addr);
-- 
cgit v1.2.3


From 22833a9165a1c72a54ddc696a3765bd6f87fbb92 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@free-electrons.com>
Date: Wed, 17 May 2017 09:40:30 +0200
Subject: clk: divider: Make divider_round_rate take the parent clock

So far, divider_round_rate only considers the parent clock returned by
clk_hw_get_parent.

This works fine on clocks that have a single parents, this doesn't work on
muxes, since we will only consider the first parent, while other parents
may totally be able to provide a better combination.

Clocks in that case cannot use divider_round_rate, so would have to come up
with a very similar logic to work around it. Instead of having to do
something like this, and duplicate that logic everywhere, create a
divider_round_rate parent to allow caller to give an additional parameter
for the parent clock to consider.

Reviewed-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
Acked-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Chen-Yu Tsai <wens@csie.org>
---
 drivers/clk/clk-divider.c    | 19 ++++++++++---------
 include/linux/clk-provider.h | 16 +++++++++++++---
 2 files changed, 23 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/clk/clk-divider.c b/drivers/clk/clk-divider.c
index 96386ffc8483..9bb472cccca6 100644
--- a/drivers/clk/clk-divider.c
+++ b/drivers/clk/clk-divider.c
@@ -275,7 +275,8 @@ static int _next_div(const struct clk_div_table *table, int div,
 	return div;
 }
 
-static int clk_divider_bestdiv(struct clk_hw *hw, unsigned long rate,
+static int clk_divider_bestdiv(struct clk_hw *hw, struct clk_hw *parent,
+			       unsigned long rate,
 			       unsigned long *best_parent_rate,
 			       const struct clk_div_table *table, u8 width,
 			       unsigned long flags)
@@ -314,8 +315,7 @@ static int clk_divider_bestdiv(struct clk_hw *hw, unsigned long rate,
 			*best_parent_rate = parent_rate_saved;
 			return i;
 		}
-		parent_rate = clk_hw_round_rate(clk_hw_get_parent(hw),
-					       rate * i);
+		parent_rate = clk_hw_round_rate(parent, rate * i);
 		now = DIV_ROUND_UP_ULL((u64)parent_rate, i);
 		if (_is_best_div(rate, now, best, flags)) {
 			bestdiv = i;
@@ -326,23 +326,24 @@ static int clk_divider_bestdiv(struct clk_hw *hw, unsigned long rate,
 
 	if (!bestdiv) {
 		bestdiv = _get_maxdiv(table, width, flags);
-		*best_parent_rate = clk_hw_round_rate(clk_hw_get_parent(hw), 1);
+		*best_parent_rate = clk_hw_round_rate(parent, 1);
 	}
 
 	return bestdiv;
 }
 
-long divider_round_rate(struct clk_hw *hw, unsigned long rate,
-			unsigned long *prate, const struct clk_div_table *table,
-			u8 width, unsigned long flags)
+long divider_round_rate_parent(struct clk_hw *hw, struct clk_hw *parent,
+			       unsigned long rate, unsigned long *prate,
+			       const struct clk_div_table *table,
+			       u8 width, unsigned long flags)
 {
 	int div;
 
-	div = clk_divider_bestdiv(hw, rate, prate, table, width, flags);
+	div = clk_divider_bestdiv(hw, parent, rate, prate, table, width, flags);
 
 	return DIV_ROUND_UP_ULL((u64)*prate, div);
 }
-EXPORT_SYMBOL_GPL(divider_round_rate);
+EXPORT_SYMBOL_GPL(divider_round_rate_parent);
 
 static long clk_divider_round_rate(struct clk_hw *hw, unsigned long rate,
 				unsigned long *prate)
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index a428aec36ace..c59c62571e4f 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -412,9 +412,10 @@ extern const struct clk_ops clk_divider_ro_ops;
 unsigned long divider_recalc_rate(struct clk_hw *hw, unsigned long parent_rate,
 		unsigned int val, const struct clk_div_table *table,
 		unsigned long flags);
-long divider_round_rate(struct clk_hw *hw, unsigned long rate,
-		unsigned long *prate, const struct clk_div_table *table,
-		u8 width, unsigned long flags);
+long divider_round_rate_parent(struct clk_hw *hw, struct clk_hw *parent,
+			       unsigned long rate, unsigned long *prate,
+			       const struct clk_div_table *table,
+			       u8 width, unsigned long flags);
 int divider_get_val(unsigned long rate, unsigned long parent_rate,
 		const struct clk_div_table *table, u8 width,
 		unsigned long flags);
@@ -757,6 +758,15 @@ static inline void __clk_hw_set_clk(struct clk_hw *dst, struct clk_hw *src)
 	dst->core = src->core;
 }
 
+static inline long divider_round_rate(struct clk_hw *hw, unsigned long rate,
+				      unsigned long *prate,
+				      const struct clk_div_table *table,
+				      u8 width, unsigned long flags)
+{
+	return divider_round_rate_parent(hw, clk_hw_get_parent(hw),
+					 rate, prate, table, width, flags);
+}
+
 /*
  * FIXME clock api without lock protection
  */
-- 
cgit v1.2.3


From 515559ca21713218595f3a4dad44a4e7eea2fcfb Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 7 Jun 2017 16:27:15 +0100
Subject: rxrpc: Provide a getsockopt call to query what cmsgs types are
 supported

Provide a getsockopt() call that can query what cmsg types are supported by
AF_RXRPC.
---
 Documentation/networking/rxrpc.txt |  9 +++++++++
 include/linux/rxrpc.h              | 24 ++++++++++++++----------
 net/rxrpc/af_rxrpc.c               | 30 +++++++++++++++++++++++++++++-
 3 files changed, 52 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt
index 18078e630a63..bce8e10a2a8e 100644
--- a/Documentation/networking/rxrpc.txt
+++ b/Documentation/networking/rxrpc.txt
@@ -406,6 +406,10 @@ calls, to invoke certain actions and to report certain conditions.  These are:
      future communication to that server and RXRPC_UPGRADE_SERVICE should no
      longer be set.
 
+The symbol RXRPC__SUPPORTED is defined as one more than the highest control
+message type supported.  At run time this can be queried by means of the
+RXRPC_SUPPORTED_CMSG socket option (see below).
+
 
 ==============
 SOCKET OPTIONS
@@ -459,6 +463,11 @@ AF_RXRPC sockets support a few socket options at the SOL_RXRPC level:
      must point to an array of two unsigned short ints.  The first is the
      service ID to upgrade from and the second the service ID to upgrade to.
 
+ (*) RXRPC_SUPPORTED_CMSG
+
+     This is a read-only option that writes an int into the buffer indicating
+     the highest control message type supported.
+
 
 ========
 SECURITY
diff --git a/include/linux/rxrpc.h b/include/linux/rxrpc.h
index 707910c6c6c5..bdd3175b9a48 100644
--- a/include/linux/rxrpc.h
+++ b/include/linux/rxrpc.h
@@ -38,6 +38,7 @@ struct sockaddr_rxrpc {
 #define RXRPC_EXCLUSIVE_CONNECTION	3	/* Deprecated; use RXRPC_EXCLUSIVE_CALL instead */
 #define RXRPC_MIN_SECURITY_LEVEL	4	/* minimum security level */
 #define RXRPC_UPGRADEABLE_SERVICE	5	/* Upgrade service[0] -> service[1] */
+#define RXRPC_SUPPORTED_CMSG		6	/* Get highest supported control message type */
 
 /*
  * RxRPC control messages
@@ -45,16 +46,19 @@ struct sockaddr_rxrpc {
  * - terminal messages mean that a user call ID tag can be recycled
  * - s/r/- indicate whether these are applicable to sendmsg() and/or recvmsg()
  */
-#define RXRPC_USER_CALL_ID	1	/* sr: user call ID specifier */
-#define RXRPC_ABORT		2	/* sr: abort request / notification [terminal] */
-#define RXRPC_ACK		3	/* -r: [Service] RPC op final ACK received [terminal] */
-#define RXRPC_NET_ERROR		5	/* -r: network error received [terminal] */
-#define RXRPC_BUSY		6	/* -r: server busy received [terminal] */
-#define RXRPC_LOCAL_ERROR	7	/* -r: local error generated [terminal] */
-#define RXRPC_NEW_CALL		8	/* -r: [Service] new incoming call notification */
-#define RXRPC_ACCEPT		9	/* s-: [Service] accept request */
-#define RXRPC_EXCLUSIVE_CALL	10	/* s-: Call should be on exclusive connection */
-#define RXRPC_UPGRADE_SERVICE	11	/* s-: Request service upgrade for client call */
+enum rxrpc_cmsg_type {
+	RXRPC_USER_CALL_ID	= 1,	/* sr: user call ID specifier */
+	RXRPC_ABORT		= 2,	/* sr: abort request / notification [terminal] */
+	RXRPC_ACK		= 3,	/* -r: [Service] RPC op final ACK received [terminal] */
+	RXRPC_NET_ERROR		= 5,	/* -r: network error received [terminal] */
+	RXRPC_BUSY		= 6,	/* -r: server busy received [terminal] */
+	RXRPC_LOCAL_ERROR	= 7,	/* -r: local error generated [terminal] */
+	RXRPC_NEW_CALL		= 8,	/* -r: [Service] new incoming call notification */
+	RXRPC_ACCEPT		= 9,	/* s-: [Service] accept request */
+	RXRPC_EXCLUSIVE_CALL	= 10,	/* s-: Call should be on exclusive connection */
+	RXRPC_UPGRADE_SERVICE	= 11,	/* s-: Request service upgrade for client call */
+	RXRPC__SUPPORTED
+};
 
 /*
  * RxRPC security levels
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 0c4dc4a7832c..44a52b82bb5d 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -581,6 +581,34 @@ error:
 	return ret;
 }
 
+/*
+ * Get socket options.
+ */
+static int rxrpc_getsockopt(struct socket *sock, int level, int optname,
+			    char __user *optval, int __user *_optlen)
+{
+	int optlen;
+	
+	if (level != SOL_RXRPC)
+		return -EOPNOTSUPP;
+
+	if (get_user(optlen, _optlen))
+		return -EFAULT;
+	
+	switch (optname) {
+	case RXRPC_SUPPORTED_CMSG:
+		if (optlen < sizeof(int))
+			return -ETOOSMALL;
+		if (put_user(RXRPC__SUPPORTED - 1, (int __user *)optval) ||
+		    put_user(sizeof(int), _optlen))
+			return -EFAULT;
+		return 0;
+		
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
 /*
  * permit an RxRPC socket to be polled
  */
@@ -784,7 +812,7 @@ static const struct proto_ops rxrpc_rpc_ops = {
 	.listen		= rxrpc_listen,
 	.shutdown	= rxrpc_shutdown,
 	.setsockopt	= rxrpc_setsockopt,
-	.getsockopt	= sock_no_getsockopt,
+	.getsockopt	= rxrpc_getsockopt,
 	.sendmsg	= rxrpc_sendmsg,
 	.recvmsg	= rxrpc_recvmsg,
 	.mmap		= sock_no_mmap,
-- 
cgit v1.2.3


From e754eba685aac2a9b5538176fa2d254ad25f464d Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 7 Jun 2017 12:40:03 +0100
Subject: rxrpc: Provide a cmsg to specify the amount of Tx data for a call

Provide a control message that can be specified on the first sendmsg() of a
client call or the first sendmsg() of a service response to indicate the
total length of the data to be transmitted for that call.

Currently, because the length of the payload of an encrypted DATA packet is
encrypted in front of the data, the packet cannot be encrypted until we
know how much data it will hold.

By specifying the length at the beginning of the transmit phase, each DATA
packet length can be set before we start loading data from userspace (where
several sendmsg() calls may contribute to a particular packet).

An error will be returned if too little or too much data is presented in
the Tx phase.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 Documentation/networking/rxrpc.txt | 34 ++++++++++++++++++++++++
 fs/afs/rxrpc.c                     | 18 ++++++++++++-
 include/linux/rxrpc.h              |  1 +
 include/net/af_rxrpc.h             |  2 ++
 net/rxrpc/af_rxrpc.c               |  5 +++-
 net/rxrpc/ar-internal.h            |  3 ++-
 net/rxrpc/call_object.c            |  3 +++
 net/rxrpc/sendmsg.c                | 54 ++++++++++++++++++++++++++++++++++++--
 8 files changed, 115 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt
index bce8e10a2a8e..8c70ba5dee4d 100644
--- a/Documentation/networking/rxrpc.txt
+++ b/Documentation/networking/rxrpc.txt
@@ -327,6 +327,7 @@ calls, to invoke certain actions and to report certain conditions.  These are:
 	RXRPC_ACCEPT		s-- n/a		Accept new call
 	RXRPC_EXCLUSIVE_CALL	s-- n/a		Make an exclusive client call
 	RXRPC_UPGRADE_SERVICE	s-- n/a		Client call can be upgraded
+	RXRPC_TX_LENGTH		s-- data len	Total length of Tx data
 
 	(SRT = usable in Sendmsg / delivered by Recvmsg / Terminal message)
 
@@ -406,6 +407,19 @@ calls, to invoke certain actions and to report certain conditions.  These are:
      future communication to that server and RXRPC_UPGRADE_SERVICE should no
      longer be set.
 
+ (*) RXRPC_TX_LENGTH
+
+     This is used to inform the kernel of the total amount of data that is
+     going to be transmitted by a call (whether in a client request or a
+     service response).  If given, it allows the kernel to encrypt from the
+     userspace buffer directly to the packet buffers, rather than copying into
+     the buffer and then encrypting in place.  This may only be given with the
+     first sendmsg() providing data for a call.  EMSGSIZE will be generated if
+     the amount of data actually given is different.
+
+     This takes a parameter of __s64 type that indicates how much will be
+     transmitted.  This may not be less than zero.
+
 The symbol RXRPC__SUPPORTED is defined as one more than the highest control
 message type supported.  At run time this can be queried by means of the
 RXRPC_SUPPORTED_CMSG socket option (see below).
@@ -577,6 +591,9 @@ A client would issue an operation by:
      MSG_MORE should be set in msghdr::msg_flags on all but the last part of
      the request.  Multiple requests may be made simultaneously.
 
+     An RXRPC_TX_LENGTH control message can also be specified on the first
+     sendmsg() call.
+
      If a call is intended to go to a destination other than the default
      specified through connect(), then msghdr::msg_name should be set on the
      first request message of that call.
@@ -764,6 +781,7 @@ The kernel interface functions are as follows:
 				struct sockaddr_rxrpc *srx,
 				struct key *key,
 				unsigned long user_call_ID,
+				s64 tx_total_len,
 				gfp_t gfp);
 
      This allocates the infrastructure to make a new RxRPC call and assigns
@@ -780,6 +798,11 @@ The kernel interface functions are as follows:
      control data buffer.  It is entirely feasible to use this to point to a
      kernel data structure.
 
+     tx_total_len is the amount of data the caller is intending to transmit
+     with this call (or -1 if unknown at this point).  Setting the data size
+     allows the kernel to encrypt directly to the packet buffers, thereby
+     saving a copy.  The value may not be less than -1.
+
      If this function is successful, an opaque reference to the RxRPC call is
      returned.  The caller now holds a reference on this and it must be
      properly ended.
@@ -931,6 +954,17 @@ The kernel interface functions are as follows:
 
      This is used to find the remote peer address of a call.
 
+ (*) Set the total transmit data size on a call.
+
+	void rxrpc_kernel_set_tx_length(struct socket *sock,
+					struct rxrpc_call *call,
+					s64 tx_total_len);
+
+     This sets the amount of data that the caller is intending to transmit on a
+     call.  It's intended to be used for setting the reply size as the request
+     size should be set when the call is begun.  tx_total_len may not be less
+     than zero.
+
 
 =======================
 CONFIGURABLE PARAMETERS
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index d5990eb160bd..02781e78ffb6 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -341,6 +341,7 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
 	struct msghdr msg;
 	struct kvec iov[1];
 	size_t offset;
+	s64 tx_total_len;
 	u32 abort_code;
 	int ret;
 
@@ -364,9 +365,20 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
 	srx.transport.sin.sin_port = call->port;
 	memcpy(&srx.transport.sin.sin_addr, addr, 4);
 
+	/* Work out the length we're going to transmit.  This is awkward for
+	 * calls such as FS.StoreData where there's an extra injection of data
+	 * after the initial fixed part.
+	 */
+	tx_total_len = call->request_size;
+	if (call->send_pages) {
+		tx_total_len += call->last_to - call->first_offset;
+		tx_total_len += (call->last - call->first) * PAGE_SIZE;
+	}
+
 	/* create a call */
 	rxcall = rxrpc_kernel_begin_call(afs_socket, &srx, call->key,
-					 (unsigned long) call, gfp,
+					 (unsigned long)call,
+					 tx_total_len, gfp,
 					 (async ?
 					  afs_wake_up_async_call :
 					  afs_wake_up_call_waiter));
@@ -738,6 +750,8 @@ void afs_send_empty_reply(struct afs_call *call)
 
 	_enter("");
 
+	rxrpc_kernel_set_tx_length(afs_socket, call->rxcall, 0);
+
 	msg.msg_name		= NULL;
 	msg.msg_namelen		= 0;
 	iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, NULL, 0, 0);
@@ -772,6 +786,8 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
 
 	_enter("");
 
+	rxrpc_kernel_set_tx_length(afs_socket, call->rxcall, len);
+
 	iov[0].iov_base		= (void *) buf;
 	iov[0].iov_len		= len;
 	msg.msg_name		= NULL;
diff --git a/include/linux/rxrpc.h b/include/linux/rxrpc.h
index bdd3175b9a48..7343f71783dc 100644
--- a/include/linux/rxrpc.h
+++ b/include/linux/rxrpc.h
@@ -57,6 +57,7 @@ enum rxrpc_cmsg_type {
 	RXRPC_ACCEPT		= 9,	/* s-: [Service] accept request */
 	RXRPC_EXCLUSIVE_CALL	= 10,	/* s-: Call should be on exclusive connection */
 	RXRPC_UPGRADE_SERVICE	= 11,	/* s-: Request service upgrade for client call */
+	RXRPC_TX_LENGTH		= 12,	/* s-: Total length of Tx data */
 	RXRPC__SUPPORTED
 };
 
diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h
index b5f5187f488c..c172709787af 100644
--- a/include/net/af_rxrpc.h
+++ b/include/net/af_rxrpc.h
@@ -33,6 +33,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *,
 					   struct sockaddr_rxrpc *,
 					   struct key *,
 					   unsigned long,
+					   s64,
 					   gfp_t,
 					   rxrpc_notify_rx_t);
 int rxrpc_kernel_send_data(struct socket *, struct rxrpc_call *,
@@ -46,5 +47,6 @@ void rxrpc_kernel_get_peer(struct socket *, struct rxrpc_call *,
 			   struct sockaddr_rxrpc *);
 int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t,
 			       rxrpc_user_attach_call_t, unsigned long, gfp_t);
+void rxrpc_kernel_set_tx_length(struct socket *, struct rxrpc_call *, s64);
 
 #endif /* _NET_RXRPC_H */
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 44a52b82bb5d..58ae0db52ea1 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -262,6 +262,7 @@ static int rxrpc_listen(struct socket *sock, int backlog)
  * @srx: The address of the peer to contact
  * @key: The security context to use (defaults to socket setting)
  * @user_call_ID: The ID to use
+ * @tx_total_len: Total length of data to transmit during the call (or -1)
  * @gfp: The allocation constraints
  * @notify_rx: Where to send notifications instead of socket queue
  *
@@ -276,6 +277,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
 					   struct sockaddr_rxrpc *srx,
 					   struct key *key,
 					   unsigned long user_call_ID,
+					   s64 tx_total_len,
 					   gfp_t gfp,
 					   rxrpc_notify_rx_t notify_rx)
 {
@@ -303,7 +305,8 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
 	cp.security_level	= 0;
 	cp.exclusive		= false;
 	cp.service_id		= srx->srx_service;
-	call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, gfp);
+	call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, tx_total_len,
+				     gfp);
 	/* The socket has been unlocked. */
 	if (!IS_ERR(call))
 		call->notify_rx = notify_rx;
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index e9b536cb0acf..adbf37946450 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -528,6 +528,7 @@ struct rxrpc_call {
 	struct rb_node		sock_node;	/* Node in rx->calls */
 	struct sk_buff		*tx_pending;	/* Tx socket buffer being filled */
 	wait_queue_head_t	waitq;		/* Wait queue for channel or Tx */
+	s64			tx_total_len;	/* Total length left to be transmitted (or -1) */
 	__be32			crypto_buf[2];	/* Temporary packet crypto buffer */
 	unsigned long		user_call_ID;	/* user-defined call ID */
 	unsigned long		flags;
@@ -683,7 +684,7 @@ struct rxrpc_call *rxrpc_alloc_call(gfp_t);
 struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *,
 					 struct rxrpc_conn_parameters *,
 					 struct sockaddr_rxrpc *,
-					 unsigned long, gfp_t);
+					 unsigned long, s64, gfp_t);
 void rxrpc_incoming_call(struct rxrpc_sock *, struct rxrpc_call *,
 			 struct sk_buff *);
 void rxrpc_release_call(struct rxrpc_sock *, struct rxrpc_call *);
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 692110808baa..423030fd93be 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -127,6 +127,7 @@ struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
 	rwlock_init(&call->state_lock);
 	atomic_set(&call->usage, 1);
 	call->debug_id = atomic_inc_return(&rxrpc_debug_id);
+	call->tx_total_len = -1;
 
 	memset(&call->sock_node, 0xed, sizeof(call->sock_node));
 
@@ -201,6 +202,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
 					 struct rxrpc_conn_parameters *cp,
 					 struct sockaddr_rxrpc *srx,
 					 unsigned long user_call_ID,
+					 s64 tx_total_len,
 					 gfp_t gfp)
 	__releases(&rx->sk.sk_lock.slock)
 {
@@ -219,6 +221,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
 		return call;
 	}
 
+	call->tx_total_len = tx_total_len;
 	trace_rxrpc_call(call, rxrpc_call_new_client, atomic_read(&call->usage),
 			 here, (const void *)user_call_ID);
 
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index d939a5b1abc3..2e636a525a65 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -29,6 +29,7 @@ enum rxrpc_command {
 };
 
 struct rxrpc_send_params {
+	s64			tx_total_len;	/* Total Tx data length (if send data) */
 	unsigned long		user_call_ID;	/* User's call ID */
 	u32			abort_code;	/* Abort code to Tx (if abort) */
 	enum rxrpc_command	command : 8;	/* The command to implement */
@@ -207,6 +208,13 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
 
 	more = msg->msg_flags & MSG_MORE;
 
+	if (call->tx_total_len != -1) {
+		if (len > call->tx_total_len)
+			return -EMSGSIZE;
+		if (!more && len != call->tx_total_len)
+			return -EMSGSIZE;
+	}
+
 	skb = call->tx_pending;
 	call->tx_pending = NULL;
 	rxrpc_see_skb(skb, rxrpc_skb_tx_seen);
@@ -299,6 +307,8 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
 			sp->remain -= copy;
 			skb->mark += copy;
 			copied += copy;
+			if (call->tx_total_len != -1)
+				call->tx_total_len -= copy;
 		}
 
 		/* check for the far side aborting the call or a network error
@@ -436,6 +446,14 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg, struct rxrpc_send_params *p)
 				return -EINVAL;
 			break;
 
+		case RXRPC_TX_LENGTH:
+			if (p->tx_total_len != -1 || len != sizeof(__s64))
+				return -EINVAL;
+			p->tx_total_len = *(__s64 *)CMSG_DATA(cmsg);
+			if (p->tx_total_len < 0)
+				return -EINVAL;
+			break;
+
 		default:
 			return -EINVAL;
 		}
@@ -443,6 +461,8 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg, struct rxrpc_send_params *p)
 
 	if (!got_user_ID)
 		return -EINVAL;
+	if (p->tx_total_len != -1 && p->command != RXRPC_CMD_SEND_DATA)
+		return -EINVAL;
 	_leave(" = 0");
 	return 0;
 }
@@ -481,7 +501,8 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
 	cp.exclusive		= rx->exclusive | p->exclusive;
 	cp.upgrade		= p->upgrade;
 	cp.service_id		= srx->srx_service;
-	call = rxrpc_new_client_call(rx, &cp, srx, p->user_call_ID, GFP_KERNEL);
+	call = rxrpc_new_client_call(rx, &cp, srx, p->user_call_ID,
+				     p->tx_total_len, GFP_KERNEL);
 	/* The socket is now unlocked */
 
 	_leave(" = %p\n", call);
@@ -501,6 +522,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
 	int ret;
 
 	struct rxrpc_send_params p = {
+		.tx_total_len	= -1,
 		.user_call_ID	= 0,
 		.abort_code	= 0,
 		.command	= RXRPC_CMD_SEND_DATA,
@@ -555,6 +577,15 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
 			ret = -ERESTARTSYS;
 			goto error_put;
 		}
+
+		if (p.tx_total_len != -1) {
+			ret = -EINVAL;
+			if (call->tx_total_len != -1 ||
+			    call->tx_pending ||
+			    call->tx_top != 0)
+				goto error_put;
+			call->tx_total_len = p.tx_total_len;
+		}
 	}
 
 	state = READ_ONCE(call->state);
@@ -672,5 +703,24 @@ bool rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call,
 	mutex_unlock(&call->user_mutex);
 	return aborted;
 }
-
 EXPORT_SYMBOL(rxrpc_kernel_abort_call);
+
+/**
+ * rxrpc_kernel_set_tx_length - Set the total Tx length on a call
+ * @sock: The socket the call is on
+ * @call: The call to be informed
+ * @tx_total_len: The amount of data to be transmitted for this call
+ *
+ * Allow a kernel service to set the total transmit length on a call.  This
+ * allows buffer-to-packet encrypt-and-copy to be performed.
+ *
+ * This function is primarily for use for setting the reply length since the
+ * request length can be set when beginning the call.
+ */
+void rxrpc_kernel_set_tx_length(struct socket *sock, struct rxrpc_call *call,
+				s64 tx_total_len)
+{
+	WARN_ON(call->tx_total_len != -1);
+	call->tx_total_len = tx_total_len;
+}
+EXPORT_SYMBOL(rxrpc_kernel_set_tx_length);
-- 
cgit v1.2.3


From cf124db566e6b036b8bcbe8decbed740bdfac8c6 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 8 May 2017 12:52:56 -0400
Subject: net: Fix inconsistent teardown and release of private netdev state.

Network devices can allocate reasources and private memory using
netdev_ops->ndo_init().  However, the release of these resources
can occur in one of two different places.

Either netdev_ops->ndo_uninit() or netdev->destructor().

The decision of which operation frees the resources depends upon
whether it is necessary for all netdev refs to be released before it
is safe to perform the freeing.

netdev_ops->ndo_uninit() presumably can occur right after the
NETDEV_UNREGISTER notifier completes and the unicast and multicast
address lists are flushed.

netdev->destructor(), on the other hand, does not run until the
netdev references all go away.

Further complicating the situation is that netdev->destructor()
almost universally does also a free_netdev().

This creates a problem for the logic in register_netdevice().
Because all callers of register_netdevice() manage the freeing
of the netdev, and invoke free_netdev(dev) if register_netdevice()
fails.

If netdev_ops->ndo_init() succeeds, but something else fails inside
of register_netdevice(), it does call ndo_ops->ndo_uninit().  But
it is not able to invoke netdev->destructor().

This is because netdev->destructor() will do a free_netdev() and
then the caller of register_netdevice() will do the same.

However, this means that the resources that would normally be released
by netdev->destructor() will not be.

Over the years drivers have added local hacks to deal with this, by
invoking their destructor parts by hand when register_netdevice()
fails.

Many drivers do not try to deal with this, and instead we have leaks.

Let's close this hole by formalizing the distinction between what
private things need to be freed up by netdev->destructor() and whether
the driver needs unregister_netdevice() to perform the free_netdev().

netdev->priv_destructor() performs all actions to free up the private
resources that used to be freed by netdev->destructor(), except for
free_netdev().

netdev->needs_free_netdev is a boolean that indicates whether
free_netdev() should be done at the end of unregister_netdevice().

Now, register_netdevice() can sanely release all resources after
ndo_ops->ndo_init() succeeds, by invoking both ndo_ops->ndo_uninit()
and netdev->priv_destructor().

And at the end of unregister_netdevice(), we invoke
netdev->priv_destructor() and optionally call free_netdev().

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c                             | 6 +++---
 drivers/net/caif/caif_hsi.c                                 | 2 +-
 drivers/net/caif/caif_serial.c                              | 2 +-
 drivers/net/caif/caif_spi.c                                 | 2 +-
 drivers/net/caif/caif_virtio.c                              | 2 +-
 drivers/net/can/slcan.c                                     | 7 +++----
 drivers/net/can/vcan.c                                      | 2 +-
 drivers/net/can/vxcan.c                                     | 2 +-
 drivers/net/dummy.c                                         | 4 ++--
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c             | 2 +-
 drivers/net/geneve.c                                        | 2 +-
 drivers/net/gtp.c                                           | 2 +-
 drivers/net/hamradio/6pack.c                                | 2 +-
 drivers/net/hamradio/bpqether.c                             | 2 +-
 drivers/net/ifb.c                                           | 4 ++--
 drivers/net/ipvlan/ipvlan_main.c                            | 2 +-
 drivers/net/loopback.c                                      | 4 ++--
 drivers/net/macsec.c                                        | 4 ++--
 drivers/net/macvlan.c                                       | 2 +-
 drivers/net/nlmon.c                                         | 2 +-
 drivers/net/slip/slip.c                                     | 7 +++----
 drivers/net/team/team.c                                     | 4 ++--
 drivers/net/tun.c                                           | 4 ++--
 drivers/net/usb/cdc-phonet.c                                | 2 +-
 drivers/net/usb/qmi_wwan.c                                  | 2 +-
 drivers/net/veth.c                                          | 4 ++--
 drivers/net/vrf.c                                           | 2 +-
 drivers/net/vsockmon.c                                      | 2 +-
 drivers/net/vxlan.c                                         | 2 +-
 drivers/net/wan/dlci.c                                      | 2 +-
 drivers/net/wan/hdlc_fr.c                                   | 2 +-
 drivers/net/wan/lapbether.c                                 | 2 +-
 drivers/net/wireless/ath/ath6kl/main.c                      | 2 +-
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c | 1 -
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c     | 3 ++-
 drivers/net/wireless/intersil/hostap/hostap_main.c          | 2 +-
 drivers/net/wireless/mac80211_hwsim.c                       | 2 +-
 drivers/net/wireless/marvell/mwifiex/main.c                 | 2 +-
 drivers/staging/rtl8188eu/os_dep/mon.c                      | 2 +-
 drivers/usb/gadget/function/f_phonet.c                      | 2 +-
 include/linux/netdevice.h                                   | 7 ++++---
 net/8021q/vlan_dev.c                                        | 4 ++--
 net/batman-adv/soft-interface.c                             | 5 ++---
 net/bluetooth/6lowpan.c                                     | 2 +-
 net/bridge/br_device.c                                      | 2 +-
 net/caif/chnl_net.c                                         | 4 ++--
 net/core/dev.c                                              | 8 ++++++--
 net/hsr/hsr_device.c                                        | 4 ++--
 net/ieee802154/6lowpan/core.c                               | 2 +-
 net/ipv4/ip_tunnel.c                                        | 4 ++--
 net/ipv4/ipmr.c                                             | 2 +-
 net/ipv6/ip6_gre.c                                          | 9 +++++----
 net/ipv6/ip6_tunnel.c                                       | 8 ++++----
 net/ipv6/ip6_vti.c                                          | 8 ++++----
 net/ipv6/ip6mr.c                                            | 2 +-
 net/ipv6/sit.c                                              | 6 +++---
 net/irda/irlan/irlan_eth.c                                  | 2 +-
 net/l2tp/l2tp_eth.c                                         | 2 +-
 net/mac80211/iface.c                                        | 6 +++---
 net/mac802154/iface.c                                       | 7 +++----
 net/openvswitch/vport-internal_dev.c                        | 4 ++--
 net/phonet/pep-gprs.c                                       | 2 +-
 62 files changed, 105 insertions(+), 103 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 2359478b977f..8ab6bdbe1682 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -4192,7 +4192,6 @@ static void bond_destructor(struct net_device *bond_dev)
 	struct bonding *bond = netdev_priv(bond_dev);
 	if (bond->wq)
 		destroy_workqueue(bond->wq);
-	free_netdev(bond_dev);
 }
 
 void bond_setup(struct net_device *bond_dev)
@@ -4212,7 +4211,8 @@ void bond_setup(struct net_device *bond_dev)
 	bond_dev->netdev_ops = &bond_netdev_ops;
 	bond_dev->ethtool_ops = &bond_ethtool_ops;
 
-	bond_dev->destructor = bond_destructor;
+	bond_dev->needs_free_netdev = true;
+	bond_dev->priv_destructor = bond_destructor;
 
 	SET_NETDEV_DEVTYPE(bond_dev, &bond_type);
 
@@ -4736,7 +4736,7 @@ int bond_create(struct net *net, const char *name)
 
 	rtnl_unlock();
 	if (res < 0)
-		bond_destructor(bond_dev);
+		free_netdev(bond_dev);
 	return res;
 }
 
diff --git a/drivers/net/caif/caif_hsi.c b/drivers/net/caif/caif_hsi.c
index ddabce759456..71a7c3b44fdd 100644
--- a/drivers/net/caif/caif_hsi.c
+++ b/drivers/net/caif/caif_hsi.c
@@ -1121,7 +1121,7 @@ static void cfhsi_setup(struct net_device *dev)
 	dev->flags = IFF_POINTOPOINT | IFF_NOARP;
 	dev->mtu = CFHSI_MAX_CAIF_FRAME_SZ;
 	dev->priv_flags |= IFF_NO_QUEUE;
-	dev->destructor = free_netdev;
+	dev->needs_free_netdev = true;
 	dev->netdev_ops = &cfhsi_netdevops;
 	for (i = 0; i < CFHSI_PRIO_LAST; ++i)
 		skb_queue_head_init(&cfhsi->qhead[i]);
diff --git a/drivers/net/caif/caif_serial.c b/drivers/net/caif/caif_serial.c
index c2dea4916e5d..76e1d3545105 100644
--- a/drivers/net/caif/caif_serial.c
+++ b/drivers/net/caif/caif_serial.c
@@ -428,7 +428,7 @@ static void caifdev_setup(struct net_device *dev)
 	dev->flags = IFF_POINTOPOINT | IFF_NOARP;
 	dev->mtu = CAIF_MAX_MTU;
 	dev->priv_flags |= IFF_NO_QUEUE;
-	dev->destructor = free_netdev;
+	dev->needs_free_netdev = true;
 	skb_queue_head_init(&serdev->head);
 	serdev->common.link_select = CAIF_LINK_LOW_LATENCY;
 	serdev->common.use_frag = true;
diff --git a/drivers/net/caif/caif_spi.c b/drivers/net/caif/caif_spi.c
index 3a529fbe539f..fc21afe852b9 100644
--- a/drivers/net/caif/caif_spi.c
+++ b/drivers/net/caif/caif_spi.c
@@ -712,7 +712,7 @@ static void cfspi_setup(struct net_device *dev)
 	dev->flags = IFF_NOARP | IFF_POINTOPOINT;
 	dev->priv_flags |= IFF_NO_QUEUE;
 	dev->mtu = SPI_MAX_PAYLOAD_SIZE;
-	dev->destructor = free_netdev;
+	dev->needs_free_netdev = true;
 	skb_queue_head_init(&cfspi->qhead);
 	skb_queue_head_init(&cfspi->chead);
 	cfspi->cfdev.link_select = CAIF_LINK_HIGH_BANDW;
diff --git a/drivers/net/caif/caif_virtio.c b/drivers/net/caif/caif_virtio.c
index 6122768c8644..1794ea0420b7 100644
--- a/drivers/net/caif/caif_virtio.c
+++ b/drivers/net/caif/caif_virtio.c
@@ -617,7 +617,7 @@ static void cfv_netdev_setup(struct net_device *netdev)
 	netdev->tx_queue_len = 100;
 	netdev->flags = IFF_POINTOPOINT | IFF_NOARP;
 	netdev->mtu = CFV_DEF_MTU_SIZE;
-	netdev->destructor = free_netdev;
+	netdev->needs_free_netdev = true;
 }
 
 /* Create debugfs counters for the device */
diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c
index eb7173713bbc..6a6e896e52fa 100644
--- a/drivers/net/can/slcan.c
+++ b/drivers/net/can/slcan.c
@@ -417,7 +417,7 @@ static int slc_open(struct net_device *dev)
 static void slc_free_netdev(struct net_device *dev)
 {
 	int i = dev->base_addr;
-	free_netdev(dev);
+
 	slcan_devs[i] = NULL;
 }
 
@@ -436,7 +436,8 @@ static const struct net_device_ops slc_netdev_ops = {
 static void slc_setup(struct net_device *dev)
 {
 	dev->netdev_ops		= &slc_netdev_ops;
-	dev->destructor		= slc_free_netdev;
+	dev->needs_free_netdev	= true;
+	dev->priv_destructor	= slc_free_netdev;
 
 	dev->hard_header_len	= 0;
 	dev->addr_len		= 0;
@@ -761,8 +762,6 @@ static void __exit slcan_exit(void)
 		if (sl->tty) {
 			printk(KERN_ERR "%s: tty discipline still running\n",
 			       dev->name);
-			/* Intentionally leak the control block. */
-			dev->destructor = NULL;
 		}
 
 		unregister_netdev(dev);
diff --git a/drivers/net/can/vcan.c b/drivers/net/can/vcan.c
index facca33d53e9..0eda1b308583 100644
--- a/drivers/net/can/vcan.c
+++ b/drivers/net/can/vcan.c
@@ -163,7 +163,7 @@ static void vcan_setup(struct net_device *dev)
 		dev->flags |= IFF_ECHO;
 
 	dev->netdev_ops		= &vcan_netdev_ops;
-	dev->destructor		= free_netdev;
+	dev->needs_free_netdev	= true;
 }
 
 static struct rtnl_link_ops vcan_link_ops __read_mostly = {
diff --git a/drivers/net/can/vxcan.c b/drivers/net/can/vxcan.c
index 7fbb24795681..30cf2368becf 100644
--- a/drivers/net/can/vxcan.c
+++ b/drivers/net/can/vxcan.c
@@ -156,7 +156,7 @@ static void vxcan_setup(struct net_device *dev)
 	dev->tx_queue_len	= 0;
 	dev->flags		= (IFF_NOARP|IFF_ECHO);
 	dev->netdev_ops		= &vxcan_netdev_ops;
-	dev->destructor		= free_netdev;
+	dev->needs_free_netdev	= true;
 }
 
 /* forward declaration for rtnl_create_link() */
diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c
index 149244aac20a..9905b52fe293 100644
--- a/drivers/net/dummy.c
+++ b/drivers/net/dummy.c
@@ -328,7 +328,6 @@ static void dummy_free_netdev(struct net_device *dev)
 	struct dummy_priv *priv = netdev_priv(dev);
 
 	kfree(priv->vfinfo);
-	free_netdev(dev);
 }
 
 static void dummy_setup(struct net_device *dev)
@@ -338,7 +337,8 @@ static void dummy_setup(struct net_device *dev)
 	/* Initialize the device structure. */
 	dev->netdev_ops = &dummy_netdev_ops;
 	dev->ethtool_ops = &dummy_ethtool_ops;
-	dev->destructor = dummy_free_netdev;
+	dev->needs_free_netdev = true;
+	dev->priv_destructor = dummy_free_netdev;
 
 	/* Fill in device structure with ethernet-generic values. */
 	dev->flags |= IFF_NOARP;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 77ed2f628f9c..ea1bfcf1870a 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -4525,7 +4525,7 @@ static void dummy_setup(struct net_device *dev)
 	/* Initialize the device structure. */
 	dev->netdev_ops = &cxgb4_mgmt_netdev_ops;
 	dev->ethtool_ops = &cxgb4_mgmt_ethtool_ops;
-	dev->destructor = free_netdev;
+	dev->needs_free_netdev = true;
 }
 
 static int config_mgmt_dev(struct pci_dev *pdev)
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 6ebb0f559a42..199459bd6961 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -1007,7 +1007,7 @@ static void geneve_setup(struct net_device *dev)
 
 	dev->netdev_ops = &geneve_netdev_ops;
 	dev->ethtool_ops = &geneve_ethtool_ops;
-	dev->destructor = free_netdev;
+	dev->needs_free_netdev = true;
 
 	SET_NETDEV_DEVTYPE(dev, &geneve_type);
 
diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
index 7b652bb7ebe4..ca110cd2a4e4 100644
--- a/drivers/net/gtp.c
+++ b/drivers/net/gtp.c
@@ -611,7 +611,7 @@ static const struct net_device_ops gtp_netdev_ops = {
 static void gtp_link_setup(struct net_device *dev)
 {
 	dev->netdev_ops		= &gtp_netdev_ops;
-	dev->destructor		= free_netdev;
+	dev->needs_free_netdev	= true;
 
 	dev->hard_header_len = 0;
 	dev->addr_len = 0;
diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c
index 922bf440e9f1..021a8ec411ab 100644
--- a/drivers/net/hamradio/6pack.c
+++ b/drivers/net/hamradio/6pack.c
@@ -311,7 +311,7 @@ static void sp_setup(struct net_device *dev)
 {
 	/* Finish setting up the DEVICE info. */
 	dev->netdev_ops		= &sp_netdev_ops;
-	dev->destructor		= free_netdev;
+	dev->needs_free_netdev	= true;
 	dev->mtu		= SIXP_MTU;
 	dev->hard_header_len	= AX25_MAX_HEADER_LEN;
 	dev->header_ops 	= &ax25_header_ops;
diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c
index f62e7f325cf9..78a6414c5fd9 100644
--- a/drivers/net/hamradio/bpqether.c
+++ b/drivers/net/hamradio/bpqether.c
@@ -476,7 +476,7 @@ static const struct net_device_ops bpq_netdev_ops = {
 static void bpq_setup(struct net_device *dev)
 {
 	dev->netdev_ops	     = &bpq_netdev_ops;
-	dev->destructor	     = free_netdev;
+	dev->needs_free_netdev = true;
 
 	memcpy(dev->broadcast, &ax25_bcast, AX25_ADDR_LEN);
 	memcpy(dev->dev_addr,  &ax25_defaddr, AX25_ADDR_LEN);
diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c
index 312fce7302d3..144ea5ae8ab4 100644
--- a/drivers/net/ifb.c
+++ b/drivers/net/ifb.c
@@ -207,7 +207,6 @@ static void ifb_dev_free(struct net_device *dev)
 		__skb_queue_purge(&txp->tq);
 	}
 	kfree(dp->tx_private);
-	free_netdev(dev);
 }
 
 static void ifb_setup(struct net_device *dev)
@@ -230,7 +229,8 @@ static void ifb_setup(struct net_device *dev)
 	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
 	netif_keep_dst(dev);
 	eth_hw_addr_random(dev);
-	dev->destructor = ifb_dev_free;
+	dev->needs_free_netdev = true;
+	dev->priv_destructor = ifb_dev_free;
 }
 
 static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev)
diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index 618ed88fad0f..7c7680c8f0e3 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -632,7 +632,7 @@ void ipvlan_link_setup(struct net_device *dev)
 	dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
 	dev->priv_flags |= IFF_UNICAST_FLT | IFF_NO_QUEUE;
 	dev->netdev_ops = &ipvlan_netdev_ops;
-	dev->destructor = free_netdev;
+	dev->needs_free_netdev = true;
 	dev->header_ops = &ipvlan_header_ops;
 	dev->ethtool_ops = &ipvlan_ethtool_ops;
 }
diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index 224f65cb576b..30612497643c 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -159,7 +159,6 @@ static void loopback_dev_free(struct net_device *dev)
 {
 	dev_net(dev)->loopback_dev = NULL;
 	free_percpu(dev->lstats);
-	free_netdev(dev);
 }
 
 static const struct net_device_ops loopback_ops = {
@@ -196,7 +195,8 @@ static void loopback_setup(struct net_device *dev)
 	dev->ethtool_ops	= &loopback_ethtool_ops;
 	dev->header_ops		= &eth_header_ops;
 	dev->netdev_ops		= &loopback_ops;
-	dev->destructor		= loopback_dev_free;
+	dev->needs_free_netdev	= true;
+	dev->priv_destructor	= loopback_dev_free;
 }
 
 /* Setup and register the loopback device. */
diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index cdc347be68f2..79411675f0e6 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -2996,7 +2996,6 @@ static void macsec_free_netdev(struct net_device *dev)
 	free_percpu(macsec->secy.tx_sc.stats);
 
 	dev_put(real_dev);
-	free_netdev(dev);
 }
 
 static void macsec_setup(struct net_device *dev)
@@ -3006,7 +3005,8 @@ static void macsec_setup(struct net_device *dev)
 	dev->max_mtu = ETH_MAX_MTU;
 	dev->priv_flags |= IFF_NO_QUEUE;
 	dev->netdev_ops = &macsec_netdev_ops;
-	dev->destructor = macsec_free_netdev;
+	dev->needs_free_netdev = true;
+	dev->priv_destructor = macsec_free_netdev;
 	SET_NETDEV_DEVTYPE(dev, &macsec_type);
 
 	eth_zero_addr(dev->broadcast);
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 346ad2ff3998..67bf7ebae5c6 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -1092,7 +1092,7 @@ void macvlan_common_setup(struct net_device *dev)
 	netif_keep_dst(dev);
 	dev->priv_flags	       |= IFF_UNICAST_FLT;
 	dev->netdev_ops		= &macvlan_netdev_ops;
-	dev->destructor		= free_netdev;
+	dev->needs_free_netdev	= true;
 	dev->header_ops		= &macvlan_hard_header_ops;
 	dev->ethtool_ops	= &macvlan_ethtool_ops;
 }
diff --git a/drivers/net/nlmon.c b/drivers/net/nlmon.c
index b91603835d26..c4b3362da4a2 100644
--- a/drivers/net/nlmon.c
+++ b/drivers/net/nlmon.c
@@ -113,7 +113,7 @@ static void nlmon_setup(struct net_device *dev)
 
 	dev->netdev_ops	= &nlmon_ops;
 	dev->ethtool_ops = &nlmon_ethtool_ops;
-	dev->destructor	= free_netdev;
+	dev->needs_free_netdev = true;
 
 	dev->features = NETIF_F_SG | NETIF_F_FRAGLIST |
 			NETIF_F_HIGHDMA | NETIF_F_LLTX;
diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c
index 1da31dc47f86..74b907206aa7 100644
--- a/drivers/net/slip/slip.c
+++ b/drivers/net/slip/slip.c
@@ -629,7 +629,7 @@ static void sl_uninit(struct net_device *dev)
 static void sl_free_netdev(struct net_device *dev)
 {
 	int i = dev->base_addr;
-	free_netdev(dev);
+
 	slip_devs[i] = NULL;
 }
 
@@ -651,7 +651,8 @@ static const struct net_device_ops sl_netdev_ops = {
 static void sl_setup(struct net_device *dev)
 {
 	dev->netdev_ops		= &sl_netdev_ops;
-	dev->destructor		= sl_free_netdev;
+	dev->needs_free_netdev	= true;
+	dev->priv_destructor	= sl_free_netdev;
 
 	dev->hard_header_len	= 0;
 	dev->addr_len		= 0;
@@ -1369,8 +1370,6 @@ static void __exit slip_exit(void)
 		if (sl->tty) {
 			printk(KERN_ERR "%s: tty discipline still running\n",
 			       dev->name);
-			/* Intentionally leak the control block. */
-			dev->destructor = NULL;
 		}
 
 		unregister_netdev(dev);
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 6c5d5ef46f75..fba8c136aa7c 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -1643,7 +1643,6 @@ static void team_destructor(struct net_device *dev)
 	struct team *team = netdev_priv(dev);
 
 	free_percpu(team->pcpu_stats);
-	free_netdev(dev);
 }
 
 static int team_open(struct net_device *dev)
@@ -2079,7 +2078,8 @@ static void team_setup(struct net_device *dev)
 
 	dev->netdev_ops = &team_netdev_ops;
 	dev->ethtool_ops = &team_ethtool_ops;
-	dev->destructor	= team_destructor;
+	dev->needs_free_netdev = true;
+	dev->priv_destructor = team_destructor;
 	dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
 	dev->priv_flags |= IFF_NO_QUEUE;
 	dev->priv_flags |= IFF_TEAM;
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index bbd707b9ef7a..9ee7d4275640 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1560,7 +1560,6 @@ static void tun_free_netdev(struct net_device *dev)
 	free_percpu(tun->pcpu_stats);
 	tun_flow_uninit(tun);
 	security_tun_dev_free_security(tun->security);
-	free_netdev(dev);
 }
 
 static void tun_setup(struct net_device *dev)
@@ -1571,7 +1570,8 @@ static void tun_setup(struct net_device *dev)
 	tun->group = INVALID_GID;
 
 	dev->ethtool_ops = &tun_ethtool_ops;
-	dev->destructor = tun_free_netdev;
+	dev->needs_free_netdev = true;
+	dev->priv_destructor = tun_free_netdev;
 	/* We prefer our own queue length */
 	dev->tx_queue_len = TUN_READQ_SIZE;
 }
diff --git a/drivers/net/usb/cdc-phonet.c b/drivers/net/usb/cdc-phonet.c
index eb52de8205f0..c7a350bbaaa7 100644
--- a/drivers/net/usb/cdc-phonet.c
+++ b/drivers/net/usb/cdc-phonet.c
@@ -298,7 +298,7 @@ static void usbpn_setup(struct net_device *dev)
 	dev->addr_len		= 1;
 	dev->tx_queue_len	= 3;
 
-	dev->destructor		= free_netdev;
+	dev->needs_free_netdev	= true;
 }
 
 /*
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 8f923a147fa9..949671ce4039 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -123,7 +123,7 @@ static void qmimux_setup(struct net_device *dev)
 	dev->addr_len        = 0;
 	dev->flags           = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;
 	dev->netdev_ops      = &qmimux_netdev_ops;
-	dev->destructor      = free_netdev;
+	dev->needs_free_netdev = true;
 }
 
 static struct net_device *qmimux_find_dev(struct usbnet *dev, u8 mux_id)
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 38f0f03a29c8..0156fe8cac17 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -222,7 +222,6 @@ static int veth_dev_init(struct net_device *dev)
 static void veth_dev_free(struct net_device *dev)
 {
 	free_percpu(dev->vstats);
-	free_netdev(dev);
 }
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -317,7 +316,8 @@ static void veth_setup(struct net_device *dev)
 			       NETIF_F_HW_VLAN_STAG_TX |
 			       NETIF_F_HW_VLAN_CTAG_RX |
 			       NETIF_F_HW_VLAN_STAG_RX);
-	dev->destructor = veth_dev_free;
+	dev->needs_free_netdev = true;
+	dev->priv_destructor = veth_dev_free;
 	dev->max_mtu = ETH_MAX_MTU;
 
 	dev->hw_features = VETH_FEATURES;
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index db882493875c..d38f11d833fe 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -1348,7 +1348,7 @@ static void vrf_setup(struct net_device *dev)
 	dev->netdev_ops = &vrf_netdev_ops;
 	dev->l3mdev_ops = &vrf_l3mdev_ops;
 	dev->ethtool_ops = &vrf_ethtool_ops;
-	dev->destructor = free_netdev;
+	dev->needs_free_netdev = true;
 
 	/* Fill in device structure with ethernet-generic values. */
 	eth_hw_addr_random(dev);
diff --git a/drivers/net/vsockmon.c b/drivers/net/vsockmon.c
index 7f0136f2dd9d..c28bdce14fd5 100644
--- a/drivers/net/vsockmon.c
+++ b/drivers/net/vsockmon.c
@@ -135,7 +135,7 @@ static void vsockmon_setup(struct net_device *dev)
 
 	dev->netdev_ops	= &vsockmon_ops;
 	dev->ethtool_ops = &vsockmon_ethtool_ops;
-	dev->destructor	= free_netdev;
+	dev->needs_free_netdev = true;
 
 	dev->features = NETIF_F_SG | NETIF_F_FRAGLIST |
 			NETIF_F_HIGHDMA | NETIF_F_LLTX;
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index a6b5052c1d36..5fa798a5c9a6 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -2611,7 +2611,7 @@ static void vxlan_setup(struct net_device *dev)
 	eth_hw_addr_random(dev);
 	ether_setup(dev);
 
-	dev->destructor = free_netdev;
+	dev->needs_free_netdev = true;
 	SET_NETDEV_DEVTYPE(dev, &vxlan_type);
 
 	dev->features	|= NETIF_F_LLTX;
diff --git a/drivers/net/wan/dlci.c b/drivers/net/wan/dlci.c
index 65ee2a6f248c..a0d76f70c428 100644
--- a/drivers/net/wan/dlci.c
+++ b/drivers/net/wan/dlci.c
@@ -475,7 +475,7 @@ static void dlci_setup(struct net_device *dev)
 	dev->flags		= 0;
 	dev->header_ops		= &dlci_header_ops;
 	dev->netdev_ops		= &dlci_netdev_ops;
-	dev->destructor		= free_netdev;
+	dev->needs_free_netdev	= true;
 
 	dlp->receive		= dlci_receive;
 
diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c
index eb915281197e..78596e42a3f3 100644
--- a/drivers/net/wan/hdlc_fr.c
+++ b/drivers/net/wan/hdlc_fr.c
@@ -1106,7 +1106,7 @@ static int fr_add_pvc(struct net_device *frad, unsigned int dlci, int type)
 		return -EIO;
 	}
 
-	dev->destructor = free_netdev;
+	dev->needs_free_netdev = true;
 	*get_dev_p(pvc, type) = dev;
 	if (!used) {
 		state(hdlc)->dce_changed = 1;
diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c
index 9df9ed62beff..63f749078a1f 100644
--- a/drivers/net/wan/lapbether.c
+++ b/drivers/net/wan/lapbether.c
@@ -306,7 +306,7 @@ static const struct net_device_ops lapbeth_netdev_ops = {
 static void lapbeth_setup(struct net_device *dev)
 {
 	dev->netdev_ops	     = &lapbeth_netdev_ops;
-	dev->destructor	     = free_netdev;
+	dev->needs_free_netdev = true;
 	dev->type            = ARPHRD_X25;
 	dev->hard_header_len = 3;
 	dev->mtu             = 1000;
diff --git a/drivers/net/wireless/ath/ath6kl/main.c b/drivers/net/wireless/ath/ath6kl/main.c
index 91ee542de3d7..b90c77ef792e 100644
--- a/drivers/net/wireless/ath/ath6kl/main.c
+++ b/drivers/net/wireless/ath/ath6kl/main.c
@@ -1287,7 +1287,7 @@ void init_netdev(struct net_device *dev)
 	struct ath6kl *ar = ath6kl_priv(dev);
 
 	dev->netdev_ops = &ath6kl_netdev_ops;
-	dev->destructor = free_netdev;
+	dev->needs_free_netdev = true;
 	dev->watchdog_timeo = ATH6KL_TX_TIMEOUT;
 
 	dev->needed_headroom = ETH_HLEN;
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
index cd1d6730eab7..617199c0e5a0 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
@@ -5225,7 +5225,6 @@ void brcmf_cfg80211_free_netdev(struct net_device *ndev)
 
 	if (vif)
 		brcmf_free_vif(vif);
-	free_netdev(ndev);
 }
 
 static bool brcmf_is_linkup(const struct brcmf_event_msg *e)
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
index a3d82368f1a9..511d190c6cca 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
@@ -624,7 +624,8 @@ struct brcmf_if *brcmf_add_if(struct brcmf_pub *drvr, s32 bsscfgidx, s32 ifidx,
 		if (!ndev)
 			return ERR_PTR(-ENOMEM);
 
-		ndev->destructor = brcmf_cfg80211_free_netdev;
+		ndev->needs_free_netdev = true;
+		ndev->priv_destructor = brcmf_cfg80211_free_netdev;
 		ifp = netdev_priv(ndev);
 		ifp->ndev = ndev;
 		/* store mapping ifidx to bsscfgidx */
diff --git a/drivers/net/wireless/intersil/hostap/hostap_main.c b/drivers/net/wireless/intersil/hostap/hostap_main.c
index 544fc09dcb62..1372b20f931e 100644
--- a/drivers/net/wireless/intersil/hostap/hostap_main.c
+++ b/drivers/net/wireless/intersil/hostap/hostap_main.c
@@ -73,7 +73,7 @@ struct net_device * hostap_add_interface(struct local_info *local,
 	dev->mem_end = mdev->mem_end;
 
 	hostap_setup_dev(dev, local, type);
-	dev->destructor = free_netdev;
+	dev->needs_free_netdev = true;
 
 	sprintf(dev->name, "%s%s", prefix, name);
 	if (!rtnl_locked)
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 002b25cff5b6..c854a557998b 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -2861,7 +2861,7 @@ static const struct net_device_ops hwsim_netdev_ops = {
 static void hwsim_mon_setup(struct net_device *dev)
 {
 	dev->netdev_ops = &hwsim_netdev_ops;
-	dev->destructor = free_netdev;
+	dev->needs_free_netdev = true;
 	ether_setup(dev);
 	dev->priv_flags |= IFF_NO_QUEUE;
 	dev->type = ARPHRD_IEEE80211_RADIOTAP;
diff --git a/drivers/net/wireless/marvell/mwifiex/main.c b/drivers/net/wireless/marvell/mwifiex/main.c
index dd87b9ff64c3..39b6b5e3f6e0 100644
--- a/drivers/net/wireless/marvell/mwifiex/main.c
+++ b/drivers/net/wireless/marvell/mwifiex/main.c
@@ -1280,7 +1280,7 @@ void mwifiex_init_priv_params(struct mwifiex_private *priv,
 			      struct net_device *dev)
 {
 	dev->netdev_ops = &mwifiex_netdev_ops;
-	dev->destructor = free_netdev;
+	dev->needs_free_netdev = true;
 	/* Initialize private structure */
 	priv->current_key_index = 0;
 	priv->media_connected = false;
diff --git a/drivers/staging/rtl8188eu/os_dep/mon.c b/drivers/staging/rtl8188eu/os_dep/mon.c
index cfe37eb026d6..859d0d6051cd 100644
--- a/drivers/staging/rtl8188eu/os_dep/mon.c
+++ b/drivers/staging/rtl8188eu/os_dep/mon.c
@@ -152,7 +152,7 @@ static const struct net_device_ops mon_netdev_ops = {
 static void mon_setup(struct net_device *dev)
 {
 	dev->netdev_ops = &mon_netdev_ops;
-	dev->destructor = free_netdev;
+	dev->needs_free_netdev = true;
 	ether_setup(dev);
 	dev->priv_flags |= IFF_NO_QUEUE;
 	dev->type = ARPHRD_IEEE80211;
diff --git a/drivers/usb/gadget/function/f_phonet.c b/drivers/usb/gadget/function/f_phonet.c
index b4058f0000e4..6a1ce6a55158 100644
--- a/drivers/usb/gadget/function/f_phonet.c
+++ b/drivers/usb/gadget/function/f_phonet.c
@@ -281,7 +281,7 @@ static void pn_net_setup(struct net_device *dev)
 	dev->tx_queue_len	= 1;
 
 	dev->netdev_ops		= &pn_netdev_ops;
-	dev->destructor		= free_netdev;
+	dev->needs_free_netdev	= true;
 	dev->header_ops		= &phonet_header_ops;
 }
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3f39d27decf4..ab7ca3fdc495 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1596,8 +1596,8 @@ enum netdev_priv_flags {
  *	@rtnl_link_state:	This enum represents the phases of creating
  *				a new link
  *
- *	@destructor:		Called from unregister,
- *				can be used to call free_netdev
+ *	@needs_free_netdev:	Should unregister perform free_netdev?
+ *	@priv_destructor:	Called from unregister
  *	@npinfo:		XXX: need comments on this one
  * 	@nd_net:		Network namespace this network device is inside
  *
@@ -1858,7 +1858,8 @@ struct net_device {
 		RTNL_LINK_INITIALIZING,
 	} rtnl_link_state:16;
 
-	void (*destructor)(struct net_device *dev);
+	bool needs_free_netdev;
+	void (*priv_destructor)(struct net_device *dev);
 
 #ifdef CONFIG_NETPOLL
 	struct netpoll_info __rcu	*npinfo;
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 953b6728bd00..abc5f400fc71 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -813,7 +813,6 @@ static void vlan_dev_free(struct net_device *dev)
 
 	free_percpu(vlan->vlan_pcpu_stats);
 	vlan->vlan_pcpu_stats = NULL;
-	free_netdev(dev);
 }
 
 void vlan_setup(struct net_device *dev)
@@ -826,7 +825,8 @@ void vlan_setup(struct net_device *dev)
 	netif_keep_dst(dev);
 
 	dev->netdev_ops		= &vlan_netdev_ops;
-	dev->destructor		= vlan_dev_free;
+	dev->needs_free_netdev	= true;
+	dev->priv_destructor	= vlan_dev_free;
 	dev->ethtool_ops	= &vlan_ethtool_ops;
 
 	dev->min_mtu		= 0;
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index b25789abf7b9..10f7edfb176e 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -1034,8 +1034,6 @@ static void batadv_softif_free(struct net_device *dev)
 	 * netdev and its private data (bat_priv)
 	 */
 	rcu_barrier();
-
-	free_netdev(dev);
 }
 
 /**
@@ -1047,7 +1045,8 @@ static void batadv_softif_init_early(struct net_device *dev)
 	ether_setup(dev);
 
 	dev->netdev_ops = &batadv_netdev_ops;
-	dev->destructor = batadv_softif_free;
+	dev->needs_free_netdev = true;
+	dev->priv_destructor = batadv_softif_free;
 	dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_NETNS_LOCAL;
 	dev->priv_flags |= IFF_NO_QUEUE;
 
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index 608959989f8e..ab3b654b05cc 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -598,7 +598,7 @@ static void netdev_setup(struct net_device *dev)
 
 	dev->netdev_ops		= &netdev_ops;
 	dev->header_ops		= &header_ops;
-	dev->destructor		= free_netdev;
+	dev->needs_free_netdev	= true;
 }
 
 static struct device_type bt_type = {
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 430b53e7d941..f0f3447e8aa4 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -379,7 +379,7 @@ void br_dev_setup(struct net_device *dev)
 	ether_setup(dev);
 
 	dev->netdev_ops = &br_netdev_ops;
-	dev->destructor = free_netdev;
+	dev->needs_free_netdev = true;
 	dev->ethtool_ops = &br_ethtool_ops;
 	SET_NETDEV_DEVTYPE(dev, &br_type);
 	dev->priv_flags = IFF_EBRIDGE | IFF_NO_QUEUE;
diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index 1816fc9f1ee7..fe3c53efb949 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c
@@ -392,14 +392,14 @@ static void chnl_net_destructor(struct net_device *dev)
 {
 	struct chnl_net *priv = netdev_priv(dev);
 	caif_free_client(&priv->chnl);
-	free_netdev(dev);
 }
 
 static void ipcaif_net_setup(struct net_device *dev)
 {
 	struct chnl_net *priv;
 	dev->netdev_ops = &netdev_ops;
-	dev->destructor = chnl_net_destructor;
+	dev->needs_free_netdev = true;
+	dev->priv_destructor = chnl_net_destructor;
 	dev->flags |= IFF_NOARP;
 	dev->flags |= IFF_POINTOPOINT;
 	dev->mtu = GPRS_PDP_MTU;
diff --git a/net/core/dev.c b/net/core/dev.c
index 84e1e86a4bce..4c15466305c3 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -7502,6 +7502,8 @@ out:
 err_uninit:
 	if (dev->netdev_ops->ndo_uninit)
 		dev->netdev_ops->ndo_uninit(dev);
+	if (dev->priv_destructor)
+		dev->priv_destructor(dev);
 	goto out;
 }
 EXPORT_SYMBOL(register_netdevice);
@@ -7709,8 +7711,10 @@ void netdev_run_todo(void)
 		WARN_ON(rcu_access_pointer(dev->ip6_ptr));
 		WARN_ON(dev->dn_ptr);
 
-		if (dev->destructor)
-			dev->destructor(dev);
+		if (dev->priv_destructor)
+			dev->priv_destructor(dev);
+		if (dev->needs_free_netdev)
+			free_netdev(dev);
 
 		/* Report a network device has been unregistered */
 		rtnl_lock();
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index c73160fb11e7..0a0a392dc2bd 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -378,7 +378,6 @@ static void hsr_dev_destroy(struct net_device *hsr_dev)
 	del_timer_sync(&hsr->announce_timer);
 
 	synchronize_rcu();
-	free_netdev(hsr_dev);
 }
 
 static const struct net_device_ops hsr_device_ops = {
@@ -404,7 +403,8 @@ void hsr_dev_setup(struct net_device *dev)
 	SET_NETDEV_DEVTYPE(dev, &hsr_type);
 	dev->priv_flags |= IFF_NO_QUEUE;
 
-	dev->destructor = hsr_dev_destroy;
+	dev->needs_free_netdev = true;
+	dev->priv_destructor = hsr_dev_destroy;
 
 	dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
 			   NETIF_F_GSO_MASK | NETIF_F_HW_CSUM |
diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c
index d7efbf0dad20..0a866f332290 100644
--- a/net/ieee802154/6lowpan/core.c
+++ b/net/ieee802154/6lowpan/core.c
@@ -107,7 +107,7 @@ static void lowpan_setup(struct net_device *ldev)
 
 	ldev->netdev_ops	= &lowpan_netdev_ops;
 	ldev->header_ops	= &lowpan_header_ops;
-	ldev->destructor	= free_netdev;
+	ldev->needs_free_netdev	= true;
 	ldev->features		|= NETIF_F_NETNS_LOCAL;
 }
 
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index b878ecbc0608..b436d0775631 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -967,7 +967,6 @@ static void ip_tunnel_dev_free(struct net_device *dev)
 	gro_cells_destroy(&tunnel->gro_cells);
 	dst_cache_destroy(&tunnel->dst_cache);
 	free_percpu(dev->tstats);
-	free_netdev(dev);
 }
 
 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
@@ -1155,7 +1154,8 @@ int ip_tunnel_init(struct net_device *dev)
 	struct iphdr *iph = &tunnel->parms.iph;
 	int err;
 
-	dev->destructor	= ip_tunnel_dev_free;
+	dev->needs_free_netdev = true;
+	dev->priv_destructor = ip_tunnel_dev_free;
 	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
 	if (!dev->tstats)
 		return -ENOMEM;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 551de4d023a8..b4f9622ee9f5 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -501,7 +501,7 @@ static void reg_vif_setup(struct net_device *dev)
 	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr) - 8;
 	dev->flags		= IFF_NOARP;
 	dev->netdev_ops		= &reg_vif_netdev_ops;
-	dev->destructor		= free_netdev;
+	dev->needs_free_netdev	= true;
 	dev->features		|= NETIF_F_NETNS_LOCAL;
 }
 
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 0c5b4caa1949..64eea3962733 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -991,13 +991,13 @@ static void ip6gre_dev_free(struct net_device *dev)
 
 	dst_cache_destroy(&t->dst_cache);
 	free_percpu(dev->tstats);
-	free_netdev(dev);
 }
 
 static void ip6gre_tunnel_setup(struct net_device *dev)
 {
 	dev->netdev_ops = &ip6gre_netdev_ops;
-	dev->destructor = ip6gre_dev_free;
+	dev->needs_free_netdev = true;
+	dev->priv_destructor = ip6gre_dev_free;
 
 	dev->type = ARPHRD_IP6GRE;
 
@@ -1148,7 +1148,7 @@ static int __net_init ip6gre_init_net(struct net *net)
 	return 0;
 
 err_reg_dev:
-	ip6gre_dev_free(ign->fb_tunnel_dev);
+	free_netdev(ign->fb_tunnel_dev);
 err_alloc_dev:
 	return err;
 }
@@ -1300,7 +1300,8 @@ static void ip6gre_tap_setup(struct net_device *dev)
 	ether_setup(dev);
 
 	dev->netdev_ops = &ip6gre_tap_netdev_ops;
-	dev->destructor = ip6gre_dev_free;
+	dev->needs_free_netdev = true;
+	dev->priv_destructor = ip6gre_dev_free;
 
 	dev->features |= NETIF_F_NETNS_LOCAL;
 	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 9b37f9747fc6..c3581973f5d7 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -254,7 +254,6 @@ static void ip6_dev_free(struct net_device *dev)
 	gro_cells_destroy(&t->gro_cells);
 	dst_cache_destroy(&t->dst_cache);
 	free_percpu(dev->tstats);
-	free_netdev(dev);
 }
 
 static int ip6_tnl_create2(struct net_device *dev)
@@ -322,7 +321,7 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p)
 	return t;
 
 failed_free:
-	ip6_dev_free(dev);
+	free_netdev(dev);
 failed:
 	return ERR_PTR(err);
 }
@@ -1777,7 +1776,8 @@ static const struct net_device_ops ip6_tnl_netdev_ops = {
 static void ip6_tnl_dev_setup(struct net_device *dev)
 {
 	dev->netdev_ops = &ip6_tnl_netdev_ops;
-	dev->destructor = ip6_dev_free;
+	dev->needs_free_netdev = true;
+	dev->priv_destructor = ip6_dev_free;
 
 	dev->type = ARPHRD_TUNNEL6;
 	dev->flags |= IFF_NOARP;
@@ -2224,7 +2224,7 @@ static int __net_init ip6_tnl_init_net(struct net *net)
 	return 0;
 
 err_register:
-	ip6_dev_free(ip6n->fb_tnl_dev);
+	free_netdev(ip6n->fb_tnl_dev);
 err_alloc_dev:
 	return err;
 }
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index d67ef56454b2..837ea1eefe7f 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -180,7 +180,6 @@ vti6_tnl_unlink(struct vti6_net *ip6n, struct ip6_tnl *t)
 static void vti6_dev_free(struct net_device *dev)
 {
 	free_percpu(dev->tstats);
-	free_netdev(dev);
 }
 
 static int vti6_tnl_create2(struct net_device *dev)
@@ -235,7 +234,7 @@ static struct ip6_tnl *vti6_tnl_create(struct net *net, struct __ip6_tnl_parm *p
 	return t;
 
 failed_free:
-	vti6_dev_free(dev);
+	free_netdev(dev);
 failed:
 	return NULL;
 }
@@ -842,7 +841,8 @@ static const struct net_device_ops vti6_netdev_ops = {
 static void vti6_dev_setup(struct net_device *dev)
 {
 	dev->netdev_ops = &vti6_netdev_ops;
-	dev->destructor = vti6_dev_free;
+	dev->needs_free_netdev = true;
+	dev->priv_destructor = vti6_dev_free;
 
 	dev->type = ARPHRD_TUNNEL6;
 	dev->hard_header_len = LL_MAX_HEADER + sizeof(struct ipv6hdr);
@@ -1100,7 +1100,7 @@ static int __net_init vti6_init_net(struct net *net)
 	return 0;
 
 err_register:
-	vti6_dev_free(ip6n->fb_tnl_dev);
+	free_netdev(ip6n->fb_tnl_dev);
 err_alloc_dev:
 	return err;
 }
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 374997d26488..2ecb39b943b5 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -733,7 +733,7 @@ static void reg_vif_setup(struct net_device *dev)
 	dev->mtu		= 1500 - sizeof(struct ipv6hdr) - 8;
 	dev->flags		= IFF_NOARP;
 	dev->netdev_ops		= &reg_vif_netdev_ops;
-	dev->destructor		= free_netdev;
+	dev->needs_free_netdev	= true;
 	dev->features		|= NETIF_F_NETNS_LOCAL;
 }
 
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 61e5902f0687..2378503577b0 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -265,7 +265,7 @@ static struct ip_tunnel *ipip6_tunnel_locate(struct net *net,
 	return nt;
 
 failed_free:
-	ipip6_dev_free(dev);
+	free_netdev(dev);
 failed:
 	return NULL;
 }
@@ -1336,7 +1336,6 @@ static void ipip6_dev_free(struct net_device *dev)
 
 	dst_cache_destroy(&tunnel->dst_cache);
 	free_percpu(dev->tstats);
-	free_netdev(dev);
 }
 
 #define SIT_FEATURES (NETIF_F_SG	   | \
@@ -1351,7 +1350,8 @@ static void ipip6_tunnel_setup(struct net_device *dev)
 	int t_hlen = tunnel->hlen + sizeof(struct iphdr);
 
 	dev->netdev_ops		= &ipip6_netdev_ops;
-	dev->destructor		= ipip6_dev_free;
+	dev->needs_free_netdev	= true;
+	dev->priv_destructor	= ipip6_dev_free;
 
 	dev->type		= ARPHRD_SIT;
 	dev->hard_header_len	= LL_MAX_HEADER + t_hlen;
diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c
index 74d09f91709e..3be852808a9d 100644
--- a/net/irda/irlan/irlan_eth.c
+++ b/net/irda/irlan/irlan_eth.c
@@ -65,7 +65,7 @@ static void irlan_eth_setup(struct net_device *dev)
 	ether_setup(dev);
 
 	dev->netdev_ops		= &irlan_eth_netdev_ops;
-	dev->destructor		= free_netdev;
+	dev->needs_free_netdev	= true;
 	dev->min_mtu		= 0;
 	dev->max_mtu		= ETH_MAX_MTU;
 
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 8b21af7321b9..f7c54ece3733 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -141,7 +141,7 @@ static void l2tp_eth_dev_setup(struct net_device *dev)
 	dev->priv_flags		&= ~IFF_TX_SKB_SHARING;
 	dev->features		|= NETIF_F_LLTX;
 	dev->netdev_ops		= &l2tp_eth_netdev_ops;
-	dev->destructor		= free_netdev;
+	dev->needs_free_netdev	= true;
 }
 
 static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb, int data_len)
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 8fae1a72e6a7..915d7e1b4545 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1213,7 +1213,6 @@ static const struct net_device_ops ieee80211_monitorif_ops = {
 static void ieee80211_if_free(struct net_device *dev)
 {
 	free_percpu(dev->tstats);
-	free_netdev(dev);
 }
 
 static void ieee80211_if_setup(struct net_device *dev)
@@ -1221,7 +1220,8 @@ static void ieee80211_if_setup(struct net_device *dev)
 	ether_setup(dev);
 	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
 	dev->netdev_ops = &ieee80211_dataif_ops;
-	dev->destructor = ieee80211_if_free;
+	dev->needs_free_netdev = true;
+	dev->priv_destructor = ieee80211_if_free;
 }
 
 static void ieee80211_if_setup_no_queue(struct net_device *dev)
@@ -1905,7 +1905,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
 
 		ret = register_netdevice(ndev);
 		if (ret) {
-			ieee80211_if_free(ndev);
+			free_netdev(ndev);
 			return ret;
 		}
 	}
diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c
index 06019dba4b10..bd88a9b80773 100644
--- a/net/mac802154/iface.c
+++ b/net/mac802154/iface.c
@@ -526,8 +526,6 @@ static void mac802154_wpan_free(struct net_device *dev)
 	struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
 
 	mac802154_llsec_destroy(&sdata->sec);
-
-	free_netdev(dev);
 }
 
 static void ieee802154_if_setup(struct net_device *dev)
@@ -593,7 +591,8 @@ ieee802154_setup_sdata(struct ieee802154_sub_if_data *sdata,
 					sdata->dev->dev_addr);
 
 		sdata->dev->header_ops = &mac802154_header_ops;
-		sdata->dev->destructor = mac802154_wpan_free;
+		sdata->dev->needs_free_netdev = true;
+		sdata->dev->priv_destructor = mac802154_wpan_free;
 		sdata->dev->netdev_ops = &mac802154_wpan_ops;
 		sdata->dev->ml_priv = &mac802154_mlme_wpan;
 		wpan_dev->promiscuous_mode = false;
@@ -608,7 +607,7 @@ ieee802154_setup_sdata(struct ieee802154_sub_if_data *sdata,
 
 		break;
 	case NL802154_IFTYPE_MONITOR:
-		sdata->dev->destructor = free_netdev;
+		sdata->dev->needs_free_netdev = true;
 		sdata->dev->netdev_ops = &mac802154_monitor_ops;
 		wpan_dev->promiscuous_mode = true;
 		break;
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index 89193a634da4..04a3128adcf0 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -94,7 +94,6 @@ static void internal_dev_destructor(struct net_device *dev)
 	struct vport *vport = ovs_internal_dev_get_vport(dev);
 
 	ovs_vport_free(vport);
-	free_netdev(dev);
 }
 
 static void
@@ -156,7 +155,8 @@ static void do_setup(struct net_device *netdev)
 	netdev->priv_flags &= ~IFF_TX_SKB_SHARING;
 	netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_OPENVSWITCH |
 			      IFF_PHONY_HEADROOM | IFF_NO_QUEUE;
-	netdev->destructor = internal_dev_destructor;
+	netdev->needs_free_netdev = true;
+	netdev->priv_destructor = internal_dev_destructor;
 	netdev->ethtool_ops = &internal_dev_ethtool_ops;
 	netdev->rtnl_link_ops = &internal_dev_link_ops;
 
diff --git a/net/phonet/pep-gprs.c b/net/phonet/pep-gprs.c
index 21c28b51be94..2c9337946e30 100644
--- a/net/phonet/pep-gprs.c
+++ b/net/phonet/pep-gprs.c
@@ -236,7 +236,7 @@ static void gprs_setup(struct net_device *dev)
 	dev->tx_queue_len	= 10;
 
 	dev->netdev_ops		= &gprs_netdev_ops;
-	dev->destructor		= free_netdev;
+	dev->needs_free_netdev	= true;
 }
 
 /*
-- 
cgit v1.2.3


From bcbc2265f269cc57924371e3bce8c3220d0270c5 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 8 Jun 2017 09:02:20 +0200
Subject: acpi: always include uuid.h

Without this the build will fail for !CONFIG_ACPI builds on x86.

Fixes: 94116f81 ("ACPI: Switch to use generic guid_t in acpi_evaluate_dsm()")
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/acpi.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index ab19365c905f..cafdfb84ca28 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -26,6 +26,7 @@
 #include <linux/resource_ext.h>
 #include <linux/device.h>
 #include <linux/property.h>
+#include <linux/uuid.h>
 
 #ifndef _LINUX
 #define _LINUX
@@ -39,7 +40,6 @@
 #include <linux/dynamic_debug.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
-#include <linux/uuid.h>
 
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
-- 
cgit v1.2.3


From 5dd0b16cdaff9b94da06074d5888b03235c0bf17 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Mon, 5 Jun 2017 07:40:25 -0700
Subject: mm/vmstat: Make NR_TLB_REMOTE_FLUSH_RECEIVED available even on UP

This fixes CONFIG_SMP=n, CONFIG_DEBUG_TLBFLUSH=y without introducing
further #ifdef soup.  Caught by a Kbuild bot randconfig build.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: ce4a4e565f52 ("x86/mm: Remove the UP asm/tlbflush.h code, always use the (formerly) SMP code")
Link: http://lkml.kernel.org/r/76da9a3cc4415996f2ad2c905b93414add322021.1496673616.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/vm_event_item.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index d84ae90ccd5c..be3ab2d13adf 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -93,10 +93,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 #endif
 #endif
 #ifdef CONFIG_DEBUG_TLBFLUSH
-#ifdef CONFIG_SMP
 		NR_TLB_REMOTE_FLUSH,	/* cpu tried to flush others' tlbs */
 		NR_TLB_REMOTE_FLUSH_RECEIVED,/* cpu received ipi for flush */
-#endif /* CONFIG_SMP */
 		NR_TLB_LOCAL_FLUSH_ALL,
 		NR_TLB_LOCAL_FLUSH_ONE,
 #endif /* CONFIG_DEBUG_TLBFLUSH */
-- 
cgit v1.2.3


From 209a0cbda7a01d2ea32a8b631d35e873bee498e9 Mon Sep 17 00:00:00 2001
From: Luca Abeni <luca.abeni@santannapisa.it>
Date: Thu, 18 May 2017 22:13:29 +0200
Subject: sched/deadline: Improve the tracking of active utilization

This patch implements a more theoretically sound algorithm for
tracking active utilization: instead of decreasing it when a
task blocks, use a timer (the "inactive timer", named after the
"Inactive" task state of the GRUB algorithm) to decrease the
active utilization at the so called "0-lag time".

Tested-by: Claudio Scordino <claudio@evidence.eu.com>
Tested-by: Daniel Bristot de Oliveira <bristot@redhat.com>
Signed-off-by: Luca Abeni <luca.abeni@santannapisa.it>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Joel Fernandes <joelaf@google.com>
Cc: Juri Lelli <juri.lelli@arm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tommaso Cucinotta <tommaso.cucinotta@sssup.it>
Link: http://lkml.kernel.org/r/1495138417-6203-3-git-send-email-luca.abeni@santannapisa.it
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h   |  17 +++
 kernel/sched/core.c     |   3 +
 kernel/sched/deadline.c | 269 +++++++++++++++++++++++++++++++++++++++++++++---
 kernel/sched/sched.h    |   2 +
 4 files changed, 276 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1abaa3728bf7..f1ead2e88d3d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -445,16 +445,33 @@ struct sched_dl_entity {
 	 *
 	 * @dl_yielded tells if task gave up the CPU before consuming
 	 * all its available runtime during the last job.
+	 *
+	 * @dl_non_contending tells if the task is inactive while still
+	 * contributing to the active utilization. In other words, it
+	 * indicates if the inactive timer has been armed and its handler
+	 * has not been executed yet. This flag is useful to avoid race
+	 * conditions between the inactive timer handler and the wakeup
+	 * code.
 	 */
 	int				dl_throttled;
 	int				dl_boosted;
 	int				dl_yielded;
+	int				dl_non_contending;
 
 	/*
 	 * Bandwidth enforcement timer. Each -deadline task has its
 	 * own bandwidth to be enforced, thus we need one timer per task.
 	 */
 	struct hrtimer			dl_timer;
+
+	/*
+	 * Inactive timer, responsible for decreasing the active utilization
+	 * at the "0-lag time". When a -deadline task blocks, it contributes
+	 * to GRUB's active utilization until the "0-lag time", hence a
+	 * timer is needed to decrease the active utilization at the correct
+	 * time.
+	 */
+	struct hrtimer inactive_timer;
 };
 
 union rcu_special {
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index c3e50cada84d..968c655ec5d9 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2153,6 +2153,7 @@ void __dl_clear_params(struct task_struct *p)
 
 	dl_se->dl_throttled = 0;
 	dl_se->dl_yielded = 0;
+	dl_se->dl_non_contending = 0;
 }
 
 /*
@@ -2184,6 +2185,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
 
 	RB_CLEAR_NODE(&p->dl.rb_node);
 	init_dl_task_timer(&p->dl);
+	init_dl_inactive_task_timer(&p->dl);
 	__dl_clear_params(p);
 
 	INIT_LIST_HEAD(&p->rt.run_list);
@@ -2506,6 +2508,7 @@ static int dl_overflow(struct task_struct *p, int policy,
 		   !__dl_overflow(dl_b, cpus, p->dl.dl_bw, new_bw)) {
 		__dl_clear(dl_b, p->dl.dl_bw);
 		__dl_add(dl_b, new_bw);
+		dl_change_utilization(p, new_bw);
 		err = 0;
 	} else if (!dl_policy(policy) && task_has_dl_policy(p)) {
 		__dl_clear(dl_b, p->dl.dl_bw);
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index b36ecc2b1b10..6480a929417c 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -65,6 +65,161 @@ void sub_running_bw(u64 dl_bw, struct dl_rq *dl_rq)
 		dl_rq->running_bw = 0;
 }
 
+void dl_change_utilization(struct task_struct *p, u64 new_bw)
+{
+	if (task_on_rq_queued(p))
+		return;
+
+	if (!p->dl.dl_non_contending)
+		return;
+
+	sub_running_bw(p->dl.dl_bw, &task_rq(p)->dl);
+	p->dl.dl_non_contending = 0;
+	/*
+	 * If the timer handler is currently running and the
+	 * timer cannot be cancelled, inactive_task_timer()
+	 * will see that dl_not_contending is not set, and
+	 * will not touch the rq's active utilization,
+	 * so we are still safe.
+	 */
+	if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
+		put_task_struct(p);
+}
+
+/*
+ * The utilization of a task cannot be immediately removed from
+ * the rq active utilization (running_bw) when the task blocks.
+ * Instead, we have to wait for the so called "0-lag time".
+ *
+ * If a task blocks before the "0-lag time", a timer (the inactive
+ * timer) is armed, and running_bw is decreased when the timer
+ * fires.
+ *
+ * If the task wakes up again before the inactive timer fires,
+ * the timer is cancelled, whereas if the task wakes up after the
+ * inactive timer fired (and running_bw has been decreased) the
+ * task's utilization has to be added to running_bw again.
+ * A flag in the deadline scheduling entity (dl_non_contending)
+ * is used to avoid race conditions between the inactive timer handler
+ * and task wakeups.
+ *
+ * The following diagram shows how running_bw is updated. A task is
+ * "ACTIVE" when its utilization contributes to running_bw; an
+ * "ACTIVE contending" task is in the TASK_RUNNING state, while an
+ * "ACTIVE non contending" task is a blocked task for which the "0-lag time"
+ * has not passed yet. An "INACTIVE" task is a task for which the "0-lag"
+ * time already passed, which does not contribute to running_bw anymore.
+ *                              +------------------+
+ *             wakeup           |    ACTIVE        |
+ *          +------------------>+   contending     |
+ *          | add_running_bw    |                  |
+ *          |                   +----+------+------+
+ *          |                        |      ^
+ *          |                dequeue |      |
+ * +--------+-------+                |      |
+ * |                |   t >= 0-lag   |      | wakeup
+ * |    INACTIVE    |<---------------+      |
+ * |                | sub_running_bw |      |
+ * +--------+-------+                |      |
+ *          ^                        |      |
+ *          |              t < 0-lag |      |
+ *          |                        |      |
+ *          |                        V      |
+ *          |                   +----+------+------+
+ *          | sub_running_bw    |    ACTIVE        |
+ *          +-------------------+                  |
+ *            inactive timer    |  non contending  |
+ *            fired             +------------------+
+ *
+ * The task_non_contending() function is invoked when a task
+ * blocks, and checks if the 0-lag time already passed or
+ * not (in the first case, it directly updates running_bw;
+ * in the second case, it arms the inactive timer).
+ *
+ * The task_contending() function is invoked when a task wakes
+ * up, and checks if the task is still in the "ACTIVE non contending"
+ * state or not (in the second case, it updates running_bw).
+ */
+static void task_non_contending(struct task_struct *p)
+{
+	struct sched_dl_entity *dl_se = &p->dl;
+	struct hrtimer *timer = &dl_se->inactive_timer;
+	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
+	struct rq *rq = rq_of_dl_rq(dl_rq);
+	s64 zerolag_time;
+
+	/*
+	 * If this is a non-deadline task that has been boosted,
+	 * do nothing
+	 */
+	if (dl_se->dl_runtime == 0)
+		return;
+
+	WARN_ON(hrtimer_active(&dl_se->inactive_timer));
+	WARN_ON(dl_se->dl_non_contending);
+
+	zerolag_time = dl_se->deadline -
+		 div64_long((dl_se->runtime * dl_se->dl_period),
+			dl_se->dl_runtime);
+
+	/*
+	 * Using relative times instead of the absolute "0-lag time"
+	 * allows to simplify the code
+	 */
+	zerolag_time -= rq_clock(rq);
+
+	/*
+	 * If the "0-lag time" already passed, decrease the active
+	 * utilization now, instead of starting a timer
+	 */
+	if (zerolag_time < 0) {
+		if (dl_task(p))
+			sub_running_bw(dl_se->dl_bw, dl_rq);
+		if (!dl_task(p) || p->state == TASK_DEAD)
+			__dl_clear_params(p);
+
+		return;
+	}
+
+	dl_se->dl_non_contending = 1;
+	get_task_struct(p);
+	hrtimer_start(timer, ns_to_ktime(zerolag_time), HRTIMER_MODE_REL);
+}
+
+static void task_contending(struct sched_dl_entity *dl_se)
+{
+	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
+
+	/*
+	 * If this is a non-deadline task that has been boosted,
+	 * do nothing
+	 */
+	if (dl_se->dl_runtime == 0)
+		return;
+
+	if (dl_se->dl_non_contending) {
+		dl_se->dl_non_contending = 0;
+		/*
+		 * If the timer handler is currently running and the
+		 * timer cannot be cancelled, inactive_task_timer()
+		 * will see that dl_not_contending is not set, and
+		 * will not touch the rq's active utilization,
+		 * so we are still safe.
+		 */
+		if (hrtimer_try_to_cancel(&dl_se->inactive_timer) == 1)
+			put_task_struct(dl_task_of(dl_se));
+	} else {
+		/*
+		 * Since "dl_non_contending" is not set, the
+		 * task's utilization has already been removed from
+		 * active utilization (either when the task blocked,
+		 * when the "inactive timer" fired).
+		 * So, add it back.
+		 */
+		add_running_bw(dl_se->dl_bw, dl_rq);
+	}
+}
+
 static inline int is_leftmost(struct task_struct *p, struct dl_rq *dl_rq)
 {
 	struct sched_dl_entity *dl_se = &p->dl;
@@ -617,10 +772,8 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
 	 * The task might have changed its scheduling policy to something
 	 * different than SCHED_DEADLINE (through switched_from_dl()).
 	 */
-	if (!dl_task(p)) {
-		__dl_clear_params(p);
+	if (!dl_task(p))
 		goto unlock;
-	}
 
 	/*
 	 * The task might have been boosted by someone else and might be in the
@@ -839,6 +992,49 @@ throttle:
 	}
 }
 
+static enum hrtimer_restart inactive_task_timer(struct hrtimer *timer)
+{
+	struct sched_dl_entity *dl_se = container_of(timer,
+						     struct sched_dl_entity,
+						     inactive_timer);
+	struct task_struct *p = dl_task_of(dl_se);
+	struct rq_flags rf;
+	struct rq *rq;
+
+	rq = task_rq_lock(p, &rf);
+
+	if (!dl_task(p) || p->state == TASK_DEAD) {
+		if (p->state == TASK_DEAD && dl_se->dl_non_contending) {
+			sub_running_bw(p->dl.dl_bw, dl_rq_of_se(&p->dl));
+			dl_se->dl_non_contending = 0;
+		}
+		__dl_clear_params(p);
+
+		goto unlock;
+	}
+	if (dl_se->dl_non_contending == 0)
+		goto unlock;
+
+	sched_clock_tick();
+	update_rq_clock(rq);
+
+	sub_running_bw(dl_se->dl_bw, &rq->dl);
+	dl_se->dl_non_contending = 0;
+unlock:
+	task_rq_unlock(rq, p, &rf);
+	put_task_struct(p);
+
+	return HRTIMER_NORESTART;
+}
+
+void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se)
+{
+	struct hrtimer *timer = &dl_se->inactive_timer;
+
+	hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	timer->function = inactive_task_timer;
+}
+
 #ifdef CONFIG_SMP
 
 static void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
@@ -971,9 +1167,7 @@ enqueue_dl_entity(struct sched_dl_entity *dl_se,
 	 * we want a replenishment of its runtime.
 	 */
 	if (flags & ENQUEUE_WAKEUP) {
-		struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
-
-		add_running_bw(dl_se->dl_bw, dl_rq);
+		task_contending(dl_se);
 		update_dl_entity(dl_se, pi_se);
 	} else if (flags & ENQUEUE_REPLENISH) {
 		replenish_dl_entity(dl_se, pi_se);
@@ -1042,7 +1236,9 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
 	 * add_running_bw().
 	 */
 	if (p->dl.dl_throttled && !(flags & ENQUEUE_REPLENISH)) {
-		add_running_bw(p->dl.dl_bw, &rq->dl);
+		if (flags & ENQUEUE_WAKEUP)
+			task_contending(&p->dl);
+
 		return;
 	}
 
@@ -1067,7 +1263,8 @@ static void dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags)
 		sub_running_bw(p->dl.dl_bw, &rq->dl);
 
 	/*
-	 * This check allows to decrease the active utilization in two cases:
+	 * This check allows to start the inactive timer (or to immediately
+	 * decrease the active utilization, if needed) in two cases:
 	 * when the task blocks and when it is terminating
 	 * (p->state == TASK_DEAD). We can handle the two cases in the same
 	 * way, because from GRUB's point of view the same thing is happening
@@ -1075,7 +1272,7 @@ static void dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags)
 	 * or "inactive")
 	 */
 	if (flags & DEQUEUE_SLEEP)
-		sub_running_bw(p->dl.dl_bw, &rq->dl);
+		task_non_contending(p);
 }
 
 /*
@@ -1153,6 +1350,35 @@ out:
 	return cpu;
 }
 
+static void migrate_task_rq_dl(struct task_struct *p)
+{
+	struct rq *rq;
+
+	if (!(p->state == TASK_WAKING) || !(p->dl.dl_non_contending))
+		return;
+
+	rq = task_rq(p);
+	/*
+	 * Since p->state == TASK_WAKING, set_task_cpu() has been called
+	 * from try_to_wake_up(). Hence, p->pi_lock is locked, but
+	 * rq->lock is not... So, lock it
+	 */
+	raw_spin_lock(&rq->lock);
+	sub_running_bw(p->dl.dl_bw, &rq->dl);
+	p->dl.dl_non_contending = 0;
+	/*
+	 * If the timer handler is currently running and the
+	 * timer cannot be cancelled, inactive_task_timer()
+	 * will see that dl_not_contending is not set, and
+	 * will not touch the rq's active utilization,
+	 * so we are still safe.
+	 */
+	if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
+		put_task_struct(p);
+
+	raw_spin_unlock(&rq->lock);
+}
+
 static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
 {
 	/*
@@ -1794,13 +2020,23 @@ void __init init_sched_dl_class(void)
 static void switched_from_dl(struct rq *rq, struct task_struct *p)
 {
 	/*
-	 * Start the deadline timer; if we switch back to dl before this we'll
-	 * continue consuming our current CBS slice. If we stay outside of
-	 * SCHED_DEADLINE until the deadline passes, the timer will reset the
-	 * task.
+	 * task_non_contending() can start the "inactive timer" (if the 0-lag
+	 * time is in the future). If the task switches back to dl before
+	 * the "inactive timer" fires, it can continue to consume its current
+	 * runtime using its current deadline. If it stays outside of
+	 * SCHED_DEADLINE until the 0-lag time passes, inactive_task_timer()
+	 * will reset the task parameters.
 	 */
-	if (!start_dl_timer(p))
-		__dl_clear_params(p);
+	if (task_on_rq_queued(p) && p->dl.dl_runtime)
+		task_non_contending(p);
+
+	/*
+	 * We cannot use inactive_task_timer() to invoke sub_running_bw()
+	 * at the 0-lag time, because the task could have been migrated
+	 * while SCHED_OTHER in the meanwhile.
+	 */
+	if (p->dl.dl_non_contending)
+		p->dl.dl_non_contending = 0;
 
 	/*
 	 * Since this might be the only -deadline task on the rq,
@@ -1819,6 +2055,8 @@ static void switched_from_dl(struct rq *rq, struct task_struct *p)
  */
 static void switched_to_dl(struct rq *rq, struct task_struct *p)
 {
+	if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
+		put_task_struct(p);
 
 	/* If p is not queued we will update its parameters at next wakeup. */
 	if (!task_on_rq_queued(p))
@@ -1893,6 +2131,7 @@ const struct sched_class dl_sched_class = {
 
 #ifdef CONFIG_SMP
 	.select_task_rq		= select_task_rq_dl,
+	.migrate_task_rq	= migrate_task_rq_dl,
 	.set_cpus_allowed       = set_cpus_allowed_dl,
 	.rq_online              = rq_online_dl,
 	.rq_offline             = rq_offline_dl,
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index ee26867da339..c58f38905e0a 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -244,6 +244,7 @@ bool __dl_overflow(struct dl_bw *dl_b, int cpus, u64 old_bw, u64 new_bw)
 	       dl_b->bw * cpus < dl_b->total_bw - old_bw + new_bw;
 }
 
+void dl_change_utilization(struct task_struct *p, u64 new_bw);
 extern void init_dl_bw(struct dl_bw *dl_b);
 
 #ifdef CONFIG_CGROUP_SCHED
@@ -1493,6 +1494,7 @@ extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime
 extern struct dl_bandwidth def_dl_bandwidth;
 extern void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime);
 extern void init_dl_task_timer(struct sched_dl_entity *dl_se);
+extern void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se);
 
 unsigned long to_ratio(u64 period, u64 runtime);
 
-- 
cgit v1.2.3


From 54d6d3039e2d84b6fbfbe59ec57d856371edf0a2 Mon Sep 17 00:00:00 2001
From: Daniel Bristot de Oliveira <bristot@redhat.com>
Date: Mon, 29 May 2017 16:24:02 +0200
Subject: sched/deadline: Fix dl_bw comment

The sched_dl_entity's dl_bw variable stores the utilization (dl_runtime / dl_period)
of a task, not its density (dl_runtime / dl_deadline), as the comment says.

Signed-off-by: Daniel Bristot de Oliveira <bristot@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Juri Lelli <juri.lelli@arm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Luca Abeni <luca.abeni@santannapisa.it>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Romulo Silva de Oliveira <romulo.deoliveira@ufsc.br>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tommaso Cucinotta <tommaso.cucinotta@sssup.it>
Cc: Xunlei Pang <xpang@redhat.com>
Link: http://lkml.kernel.org/r/8d05f1ccfd02da1a11bda62494d98f5456c1469a.1495803804.git.bristot@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index f1ead2e88d3d..3113c828483b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -421,7 +421,7 @@ struct sched_dl_entity {
 	u64				dl_runtime;	/* Maximum runtime for each instance	*/
 	u64				dl_deadline;	/* Relative deadline of each instance	*/
 	u64				dl_period;	/* Separation of two instances (period) */
-	u64				dl_bw;		/* dl_runtime / dl_deadline		*/
+	u64				dl_bw;		/* dl_runtime / dl_period		*/
 
 	/*
 	 * Actual scheduling parameters. Initialized with the values above,
-- 
cgit v1.2.3


From 3effcb4247e74a51f5d8b775a1ee4abf87cc089a Mon Sep 17 00:00:00 2001
From: Daniel Bristot de Oliveira <bristot@redhat.com>
Date: Mon, 29 May 2017 16:24:03 +0200
Subject: sched/deadline: Use the revised wakeup rule for suspending
 constrained dl tasks

We have been facing some problems with self-suspending constrained
deadline tasks. The main reason is that the original CBS was not
designed for such sort of tasks.

One problem reported by Xunlei Pang takes place when a task
suspends, and then is awakened before the deadline, but so close
to the deadline that its remaining runtime can cause the task
to have an absolute density higher than allowed. In such situation,
the original CBS assumes that the task is facing an early activation,
and so it replenishes the task and set another deadline, one deadline
in the future. This rule works fine for implicit deadline tasks.
Moreover, it allows the system to adapt the period of a task in which
the external event source suffered from a clock drift.

However, this opens the window for bandwidth leakage for constrained
deadline tasks. For instance, a task with the following parameters:

  runtime   = 5 ms
  deadline  = 7 ms
  [density] = 5 / 7 = 0.71
  period    = 1000 ms

If the task runs for 1 ms, and then suspends for another 1ms,
it will be awakened with the following parameters:

  remaining runtime = 4
  laxity = 5

presenting a absolute density of 4 / 5 = 0.80.

In this case, the original CBS would assume the task had an early
wakeup. Then, CBS will reset the runtime, and the absolute deadline will
be postponed by one relative deadline, allowing the task to run.

The problem is that, if the task runs this pattern forever, it will keep
receiving bandwidth, being able to run 1ms every 2ms. Following this
behavior, the task would be able to run 500 ms in 1 sec. Thus running
more than the 5 ms / 1 sec the admission control allowed it to run.

Trying to address the self-suspending case, Luca Abeni, Giuseppe
Lipari, and Juri Lelli [1] revisited the CBS in order to deal with
self-suspending tasks. In the new approach, rather than
replenishing/postponing the absolute deadline, the revised wakeup rule
adjusts the remaining runtime, reducing it to fit into the allowed
density.

A revised version of the idea is:

At a given time t, the maximum absolute density of a task cannot be
higher than its relative density, that is:

  runtime / (deadline - t) <= dl_runtime / dl_deadline

Knowing the laxity of a task (deadline - t), it is possible to move
it to the other side of the equality, thus enabling to define max
remaining runtime a task can use within the absolute deadline, without
over-running the allowed density:

  runtime = (dl_runtime / dl_deadline) * (deadline - t)

For instance, in our previous example, the task could still run:

  runtime = ( 5 / 7 ) * 5
  runtime = 3.57 ms

Without causing damage for other deadline tasks. It is note worthy
that the laxity cannot be negative because that would cause a negative
runtime. Thus, this patch depends on the patch:

  df8eac8cafce ("sched/deadline: Throttle a constrained deadline task activated after the deadline")

Which throttles a constrained deadline task activated after the
deadline.

Finally, it is also possible to use the revised wakeup rule for
all other tasks, but that would require some more discussions
about pros and cons.

Reported-by: Xunlei Pang <xpang@redhat.com>
Signed-off-by: Daniel Bristot de Oliveira <bristot@redhat.com>
[peterz: replaced dl_is_constrained with dl_is_implicit]
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Juri Lelli <juri.lelli@arm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Luca Abeni <luca.abeni@santannapisa.it>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Romulo Silva de Oliveira <romulo.deoliveira@ufsc.br>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tommaso Cucinotta <tommaso.cucinotta@sssup.it>
Link: http://lkml.kernel.org/r/5c800ab3a74a168a84ee5f3f84d12a02e11383be.1495803804.git.bristot@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h   |  1 +
 kernel/sched/core.c     |  2 +
 kernel/sched/deadline.c | 98 +++++++++++++++++++++++++++++++++++++++++++------
 3 files changed, 89 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3113c828483b..1f0f427e0292 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -422,6 +422,7 @@ struct sched_dl_entity {
 	u64				dl_deadline;	/* Relative deadline of each instance	*/
 	u64				dl_period;	/* Separation of two instances (period) */
 	u64				dl_bw;		/* dl_runtime / dl_period		*/
+	u64				dl_density;	/* dl_runtime / dl_deadline		*/
 
 	/*
 	 * Actual scheduling parameters. Initialized with the values above,
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 799647927c4c..e5bd587e87f8 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2150,6 +2150,7 @@ void __dl_clear_params(struct task_struct *p)
 	dl_se->dl_period = 0;
 	dl_se->flags = 0;
 	dl_se->dl_bw = 0;
+	dl_se->dl_density = 0;
 
 	dl_se->dl_throttled = 0;
 	dl_se->dl_yielded = 0;
@@ -4030,6 +4031,7 @@ __setparam_dl(struct task_struct *p, const struct sched_attr *attr)
 	dl_se->dl_period = attr->sched_period ?: dl_se->dl_deadline;
 	dl_se->flags = attr->sched_flags;
 	dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime);
+	dl_se->dl_density = to_ratio(dl_se->dl_deadline, dl_se->dl_runtime);
 }
 
 /*
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 54302cf68bb9..e12f85975857 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -704,13 +704,84 @@ static bool dl_entity_overflow(struct sched_dl_entity *dl_se,
 }
 
 /*
- * When a -deadline entity is queued back on the runqueue, its runtime and
- * deadline might need updating.
+ * Revised wakeup rule [1]: For self-suspending tasks, rather then
+ * re-initializing task's runtime and deadline, the revised wakeup
+ * rule adjusts the task's runtime to avoid the task to overrun its
+ * density.
  *
- * The policy here is that we update the deadline of the entity only if:
- *  - the current deadline is in the past,
- *  - using the remaining runtime with the current deadline would make
- *    the entity exceed its bandwidth.
+ * Reasoning: a task may overrun the density if:
+ *    runtime / (deadline - t) > dl_runtime / dl_deadline
+ *
+ * Therefore, runtime can be adjusted to:
+ *     runtime = (dl_runtime / dl_deadline) * (deadline - t)
+ *
+ * In such way that runtime will be equal to the maximum density
+ * the task can use without breaking any rule.
+ *
+ * [1] Luca Abeni, Giuseppe Lipari, and Juri Lelli. 2015. Constant
+ * bandwidth server revisited. SIGBED Rev. 11, 4 (January 2015), 19-24.
+ */
+static void
+update_dl_revised_wakeup(struct sched_dl_entity *dl_se, struct rq *rq)
+{
+	u64 laxity = dl_se->deadline - rq_clock(rq);
+
+	/*
+	 * If the task has deadline < period, and the deadline is in the past,
+	 * it should already be throttled before this check.
+	 *
+	 * See update_dl_entity() comments for further details.
+	 */
+	WARN_ON(dl_time_before(dl_se->deadline, rq_clock(rq)));
+
+	dl_se->runtime = (dl_se->dl_density * laxity) >> BW_SHIFT;
+}
+
+/*
+ * Regarding the deadline, a task with implicit deadline has a relative
+ * deadline == relative period. A task with constrained deadline has a
+ * relative deadline <= relative period.
+ *
+ * We support constrained deadline tasks. However, there are some restrictions
+ * applied only for tasks which do not have an implicit deadline. See
+ * update_dl_entity() to know more about such restrictions.
+ *
+ * The dl_is_implicit() returns true if the task has an implicit deadline.
+ */
+static inline bool dl_is_implicit(struct sched_dl_entity *dl_se)
+{
+	return dl_se->dl_deadline == dl_se->dl_period;
+}
+
+/*
+ * When a deadline entity is placed in the runqueue, its runtime and deadline
+ * might need to be updated. This is done by a CBS wake up rule. There are two
+ * different rules: 1) the original CBS; and 2) the Revisited CBS.
+ *
+ * When the task is starting a new period, the Original CBS is used. In this
+ * case, the runtime is replenished and a new absolute deadline is set.
+ *
+ * When a task is queued before the begin of the next period, using the
+ * remaining runtime and deadline could make the entity to overflow, see
+ * dl_entity_overflow() to find more about runtime overflow. When such case
+ * is detected, the runtime and deadline need to be updated.
+ *
+ * If the task has an implicit deadline, i.e., deadline == period, the Original
+ * CBS is applied. the runtime is replenished and a new absolute deadline is
+ * set, as in the previous cases.
+ *
+ * However, the Original CBS does not work properly for tasks with
+ * deadline < period, which are said to have a constrained deadline. By
+ * applying the Original CBS, a constrained deadline task would be able to run
+ * runtime/deadline in a period. With deadline < period, the task would
+ * overrun the runtime/period allowed bandwidth, breaking the admission test.
+ *
+ * In order to prevent this misbehave, the Revisited CBS is used for
+ * constrained deadline tasks when a runtime overflow is detected. In the
+ * Revisited CBS, rather than replenishing & setting a new absolute deadline,
+ * the remaining runtime of the task is reduced to avoid runtime overflow.
+ * Please refer to the comments update_dl_revised_wakeup() function to find
+ * more about the Revised CBS rule.
  */
 static void update_dl_entity(struct sched_dl_entity *dl_se,
 			     struct sched_dl_entity *pi_se)
@@ -720,6 +791,14 @@ static void update_dl_entity(struct sched_dl_entity *dl_se,
 
 	if (dl_time_before(dl_se->deadline, rq_clock(rq)) ||
 	    dl_entity_overflow(dl_se, pi_se, rq_clock(rq))) {
+
+		if (unlikely(!dl_is_implicit(dl_se) &&
+			     !dl_time_before(dl_se->deadline, rq_clock(rq)) &&
+			     !dl_se->dl_boosted)){
+			update_dl_revised_wakeup(dl_se, rq);
+			return;
+		}
+
 		dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
 		dl_se->runtime = pi_se->dl_runtime;
 	}
@@ -1274,11 +1353,6 @@ static void dequeue_dl_entity(struct sched_dl_entity *dl_se)
 	__dequeue_dl_entity(dl_se);
 }
 
-static inline bool dl_is_constrained(struct sched_dl_entity *dl_se)
-{
-	return dl_se->dl_deadline < dl_se->dl_period;
-}
-
 static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
 {
 	struct task_struct *pi_task = rt_mutex_get_top_task(p);
@@ -1310,7 +1384,7 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
 	 * If that is the case, the task will be throttled and
 	 * the replenishment timer will be set to the next period.
 	 */
-	if (!p->dl.dl_throttled && dl_is_constrained(&p->dl))
+	if (!p->dl.dl_throttled && !dl_is_implicit(&p->dl))
 		dl_check_constrained_dl(&p->dl);
 
 	if (p->on_rq == TASK_ON_RQ_MIGRATING || flags & ENQUEUE_RESTORE) {
-- 
cgit v1.2.3


From f5694788ad8da5da41b501f3d6d2ae22379c4ef9 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 19 Sep 2016 12:15:37 +0200
Subject: rt_mutex: Add lockdep annotations

Now that (PI) futexes have their own private RT-mutex interface and
implementation we can easily add lockdep annotations to the existing
RT-mutex interface.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/rtmutex.h        | 25 +++++++++++++++++++++----
 kernel/locking/rtmutex-debug.c |  6 +++++-
 kernel/locking/rtmutex-debug.h |  2 +-
 kernel/locking/rtmutex.c       | 36 +++++++++++++++++++++++++++++-------
 kernel/locking/rtmutex.h       |  2 +-
 lib/Kconfig.debug              |  3 +++
 6 files changed, 60 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h
index 1abba5ce2a2f..44fd002f7cd5 100644
--- a/include/linux/rtmutex.h
+++ b/include/linux/rtmutex.h
@@ -37,6 +37,9 @@ struct rt_mutex {
 	int			line;
 	void			*magic;
 #endif
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	struct lockdep_map	dep_map;
+#endif
 };
 
 struct rt_mutex_waiter;
@@ -58,19 +61,33 @@ struct hrtimer_sleeper;
 #ifdef CONFIG_DEBUG_RT_MUTEXES
 # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname) \
 	, .name = #mutexname, .file = __FILE__, .line = __LINE__
-# define rt_mutex_init(mutex)			__rt_mutex_init(mutex, __func__)
+
+# define rt_mutex_init(mutex) \
+do { \
+	static struct lock_class_key __key; \
+	__rt_mutex_init(mutex, __func__, &__key); \
+} while (0)
+
  extern void rt_mutex_debug_task_free(struct task_struct *tsk);
 #else
 # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname)
-# define rt_mutex_init(mutex)			__rt_mutex_init(mutex, NULL)
+# define rt_mutex_init(mutex)			__rt_mutex_init(mutex, NULL, NULL)
 # define rt_mutex_debug_task_free(t)			do { } while (0)
 #endif
 
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+#define __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname) \
+	, .dep_map = { .name = #mutexname }
+#else
+#define __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname)
+#endif
+
 #define __RT_MUTEX_INITIALIZER(mutexname) \
 	{ .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \
 	, .waiters = RB_ROOT \
 	, .owner = NULL \
-	__DEBUG_RT_MUTEX_INITIALIZER(mutexname)}
+	__DEBUG_RT_MUTEX_INITIALIZER(mutexname) \
+	__DEP_MAP_RT_MUTEX_INITIALIZER(mutexname)}
 
 #define DEFINE_RT_MUTEX(mutexname) \
 	struct rt_mutex mutexname = __RT_MUTEX_INITIALIZER(mutexname)
@@ -86,7 +103,7 @@ static inline int rt_mutex_is_locked(struct rt_mutex *lock)
 	return lock->owner != NULL;
 }
 
-extern void __rt_mutex_init(struct rt_mutex *lock, const char *name);
+extern void __rt_mutex_init(struct rt_mutex *lock, const char *name, struct lock_class_key *key);
 extern void rt_mutex_destroy(struct rt_mutex *lock);
 
 extern void rt_mutex_lock(struct rt_mutex *lock);
diff --git a/kernel/locking/rtmutex-debug.c b/kernel/locking/rtmutex-debug.c
index 58e366ad36f4..ac35e648b0e5 100644
--- a/kernel/locking/rtmutex-debug.c
+++ b/kernel/locking/rtmutex-debug.c
@@ -166,12 +166,16 @@ void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter)
 	memset(waiter, 0x22, sizeof(*waiter));
 }
 
-void debug_rt_mutex_init(struct rt_mutex *lock, const char *name)
+void debug_rt_mutex_init(struct rt_mutex *lock, const char *name, struct lock_class_key *key)
 {
 	/*
 	 * Make sure we are not reinitializing a held lock:
 	 */
 	debug_check_no_locks_freed((void *)lock, sizeof(*lock));
 	lock->name = name;
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	lockdep_init_map(&lock->dep_map, name, key, 0);
+#endif
 }
 
diff --git a/kernel/locking/rtmutex-debug.h b/kernel/locking/rtmutex-debug.h
index b585af9a1b50..5078c6ddf4a5 100644
--- a/kernel/locking/rtmutex-debug.h
+++ b/kernel/locking/rtmutex-debug.h
@@ -11,7 +11,7 @@
 
 extern void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter);
 extern void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter);
-extern void debug_rt_mutex_init(struct rt_mutex *lock, const char *name);
+extern void debug_rt_mutex_init(struct rt_mutex *lock, const char *name, struct lock_class_key *key);
 extern void debug_rt_mutex_lock(struct rt_mutex *lock);
 extern void debug_rt_mutex_unlock(struct rt_mutex *lock);
 extern void debug_rt_mutex_proxy_lock(struct rt_mutex *lock,
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 28cd09e635ed..43123533e9b1 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1481,6 +1481,7 @@ void __sched rt_mutex_lock(struct rt_mutex *lock)
 {
 	might_sleep();
 
+	mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
 	rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, rt_mutex_slowlock);
 }
 EXPORT_SYMBOL_GPL(rt_mutex_lock);
@@ -1496,9 +1497,16 @@ EXPORT_SYMBOL_GPL(rt_mutex_lock);
  */
 int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock)
 {
+	int ret;
+
 	might_sleep();
 
-	return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, rt_mutex_slowlock);
+	mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
+	ret = rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, rt_mutex_slowlock);
+	if (ret)
+		mutex_release(&lock->dep_map, 1, _RET_IP_);
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);
 
@@ -1526,11 +1534,18 @@ int __sched rt_mutex_futex_trylock(struct rt_mutex *lock)
 int
 rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout)
 {
+	int ret;
+
 	might_sleep();
 
-	return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
+	mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
+	ret = rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
 				       RT_MUTEX_MIN_CHAINWALK,
 				       rt_mutex_slowlock);
+	if (ret)
+		mutex_release(&lock->dep_map, 1, _RET_IP_);
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
 
@@ -1547,10 +1562,16 @@ EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
  */
 int __sched rt_mutex_trylock(struct rt_mutex *lock)
 {
+	int ret;
+
 	if (WARN_ON_ONCE(in_irq() || in_nmi() || in_serving_softirq()))
 		return 0;
 
-	return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock);
+	ret = rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock);
+	if (ret)
+		mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(rt_mutex_trylock);
 
@@ -1561,6 +1582,7 @@ EXPORT_SYMBOL_GPL(rt_mutex_trylock);
  */
 void __sched rt_mutex_unlock(struct rt_mutex *lock)
 {
+	mutex_release(&lock->dep_map, 1, _RET_IP_);
 	rt_mutex_fastunlock(lock, rt_mutex_slowunlock);
 }
 EXPORT_SYMBOL_GPL(rt_mutex_unlock);
@@ -1620,7 +1642,6 @@ void rt_mutex_destroy(struct rt_mutex *lock)
 	lock->magic = NULL;
 #endif
 }
-
 EXPORT_SYMBOL_GPL(rt_mutex_destroy);
 
 /**
@@ -1632,14 +1653,15 @@ EXPORT_SYMBOL_GPL(rt_mutex_destroy);
  *
  * Initializing of a locked rt lock is not allowed
  */
-void __rt_mutex_init(struct rt_mutex *lock, const char *name)
+void __rt_mutex_init(struct rt_mutex *lock, const char *name,
+		     struct lock_class_key *key)
 {
 	lock->owner = NULL;
 	raw_spin_lock_init(&lock->wait_lock);
 	lock->waiters = RB_ROOT;
 	lock->waiters_leftmost = NULL;
 
-	debug_rt_mutex_init(lock, name);
+	debug_rt_mutex_init(lock, name, key);
 }
 EXPORT_SYMBOL_GPL(__rt_mutex_init);
 
@@ -1660,7 +1682,7 @@ EXPORT_SYMBOL_GPL(__rt_mutex_init);
 void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
 				struct task_struct *proxy_owner)
 {
-	__rt_mutex_init(lock, NULL);
+	__rt_mutex_init(lock, NULL, NULL);
 	debug_rt_mutex_proxy_lock(lock, proxy_owner);
 	rt_mutex_set_owner(lock, proxy_owner);
 }
diff --git a/kernel/locking/rtmutex.h b/kernel/locking/rtmutex.h
index 6607802efa8b..5c253caffe91 100644
--- a/kernel/locking/rtmutex.h
+++ b/kernel/locking/rtmutex.h
@@ -17,7 +17,7 @@
 #define debug_rt_mutex_proxy_lock(l,p)			do { } while (0)
 #define debug_rt_mutex_proxy_unlock(l)			do { } while (0)
 #define debug_rt_mutex_unlock(l)			do { } while (0)
-#define debug_rt_mutex_init(m, n)			do { } while (0)
+#define debug_rt_mutex_init(m, n, k)			do { } while (0)
 #define debug_rt_mutex_deadlock(d, a ,l)		do { } while (0)
 #define debug_rt_mutex_print_deadlock(w)		do { } while (0)
 #define debug_rt_mutex_reset_waiter(w)			do { } while (0)
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index e4587ebe52c7..ca615129aec5 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1052,6 +1052,7 @@ config DEBUG_LOCK_ALLOC
 	depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
 	select DEBUG_SPINLOCK
 	select DEBUG_MUTEXES
+	select DEBUG_RT_MUTEXES if RT_MUTEXES
 	select LOCKDEP
 	help
 	 This feature will check whether any held lock (spinlock, rwlock,
@@ -1067,6 +1068,7 @@ config PROVE_LOCKING
 	select LOCKDEP
 	select DEBUG_SPINLOCK
 	select DEBUG_MUTEXES
+	select DEBUG_RT_MUTEXES if RT_MUTEXES
 	select DEBUG_LOCK_ALLOC
 	select TRACE_IRQFLAGS
 	default n
@@ -1121,6 +1123,7 @@ config LOCK_STAT
 	select LOCKDEP
 	select DEBUG_SPINLOCK
 	select DEBUG_MUTEXES
+	select DEBUG_RT_MUTEXES if RT_MUTEXES
 	select DEBUG_LOCK_ALLOC
 	default n
 	help
-- 
cgit v1.2.3


From 71399aa5d68bb3ed8c4caf8bfd71faae39555876 Mon Sep 17 00:00:00 2001
From: Benson Leung <bleung@chromium.org>
Date: Mon, 8 May 2017 15:02:48 -0700
Subject: power: supply: Add Apple Brick ID power supply type

Apple currently supports three very common USB chargers:
https://www.apple.com/power-adapters/

These chargers implement a proprietary Apple method for advertising
1A, 2.1A, and 2.4A at 5V called "Brick ID".
In addition, 3rd parties implement the same charging method in many
charging accessories that work with iOS devices.

Devices that have charger detection chips such as the Pericom PI3USB9281,
eg. Google Chromebook Pixel 2015, are capable of detecting
these chargers, so let's add a type to facilicate passing that info
up to userspace.

This adds a separate power supply type for Apple's proprietary
"Brick ID" charging method.

Signed-off-by: Benson Leung <bleung@chromium.org>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
---
 drivers/power/supply/power_supply_sysfs.c |  2 +-
 include/linux/power_supply.h              | 15 ++++++++-------
 2 files changed, 9 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/power/supply/power_supply_sysfs.c b/drivers/power/supply/power_supply_sysfs.c
index bcde8d13476a..07b484f995c1 100644
--- a/drivers/power/supply/power_supply_sysfs.c
+++ b/drivers/power/supply/power_supply_sysfs.c
@@ -46,7 +46,7 @@ static ssize_t power_supply_show_property(struct device *dev,
 	static char *type_text[] = {
 		"Unknown", "Battery", "UPS", "Mains", "USB",
 		"USB_DCP", "USB_CDP", "USB_ACA", "USB_C",
-		"USB_PD", "USB_PD_DRP"
+		"USB_PD", "USB_PD_DRP", "BrickID"
 	};
 	static char *status_text[] = {
 		"Unknown", "Charging", "Discharging", "Not charging", "Full"
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index 3965503315ef..4bd34051995e 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -159,13 +159,14 @@ enum power_supply_type {
 	POWER_SUPPLY_TYPE_BATTERY,
 	POWER_SUPPLY_TYPE_UPS,
 	POWER_SUPPLY_TYPE_MAINS,
-	POWER_SUPPLY_TYPE_USB,		/* Standard Downstream Port */
-	POWER_SUPPLY_TYPE_USB_DCP,	/* Dedicated Charging Port */
-	POWER_SUPPLY_TYPE_USB_CDP,	/* Charging Downstream Port */
-	POWER_SUPPLY_TYPE_USB_ACA,	/* Accessory Charger Adapters */
-	POWER_SUPPLY_TYPE_USB_TYPE_C,	/* Type C Port */
-	POWER_SUPPLY_TYPE_USB_PD,	/* Power Delivery Port */
-	POWER_SUPPLY_TYPE_USB_PD_DRP,	/* PD Dual Role Port */
+	POWER_SUPPLY_TYPE_USB,			/* Standard Downstream Port */
+	POWER_SUPPLY_TYPE_USB_DCP,		/* Dedicated Charging Port */
+	POWER_SUPPLY_TYPE_USB_CDP,		/* Charging Downstream Port */
+	POWER_SUPPLY_TYPE_USB_ACA,		/* Accessory Charger Adapters */
+	POWER_SUPPLY_TYPE_USB_TYPE_C,		/* Type C Port */
+	POWER_SUPPLY_TYPE_USB_PD,		/* Power Delivery Port */
+	POWER_SUPPLY_TYPE_USB_PD_DRP,		/* PD Dual Role Port */
+	POWER_SUPPLY_TYPE_APPLE_BRICK_ID,	/* Apple Charging Method */
 };
 
 enum power_supply_notifier_events {
-- 
cgit v1.2.3


From 0c90e9c6b5549825e410b6589ad6c4478f81ebad Mon Sep 17 00:00:00 2001
From: Maor Gottlieb <maorg@mellanox.com>
Date: Sun, 12 Mar 2017 11:35:23 +0200
Subject: net/mlx5: Update flow table commands layout

Update struct mlx5_ifc_create(modify)_flow_table_bits according to
the last device specification.

Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c | 32 ++++++++++-------
 include/linux/mlx5/mlx5_ifc.h                    | 46 +++++++++++-------------
 2 files changed, 40 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index abb44a268563..e750f07793b8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -78,28 +78,33 @@ int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
 		 MLX5_CMD_OP_CREATE_FLOW_TABLE);
 
 	MLX5_SET(create_flow_table_in, in, table_type, type);
-	MLX5_SET(create_flow_table_in, in, level, level);
-	MLX5_SET(create_flow_table_in, in, log_size, log_size);
+	MLX5_SET(create_flow_table_in, in, flow_table_context.level, level);
+	MLX5_SET(create_flow_table_in, in, flow_table_context.log_size, log_size);
 	if (vport) {
 		MLX5_SET(create_flow_table_in, in, vport_number, vport);
 		MLX5_SET(create_flow_table_in, in, other_vport, 1);
 	}
 
-	MLX5_SET(create_flow_table_in, in, decap_en, en_encap_decap);
-	MLX5_SET(create_flow_table_in, in, encap_en, en_encap_decap);
+	MLX5_SET(create_flow_table_in, in, flow_table_context.decap_en,
+		 en_encap_decap);
+	MLX5_SET(create_flow_table_in, in, flow_table_context.encap_en,
+		 en_encap_decap);
 
 	switch (op_mod) {
 	case FS_FT_OP_MOD_NORMAL:
 		if (next_ft) {
-			MLX5_SET(create_flow_table_in, in, table_miss_mode, 1);
-			MLX5_SET(create_flow_table_in, in, table_miss_id, next_ft->id);
+			MLX5_SET(create_flow_table_in, in,
+				 flow_table_context.table_miss_action, 1);
+			MLX5_SET(create_flow_table_in, in,
+				 flow_table_context.table_miss_id, next_ft->id);
 		}
 		break;
 
 	case FS_FT_OP_MOD_LAG_DEMUX:
 		MLX5_SET(create_flow_table_in, in, op_mod, 0x1);
 		if (next_ft)
-			MLX5_SET(create_flow_table_in, in, lag_master_next_table_id,
+			MLX5_SET(create_flow_table_in, in,
+				 flow_table_context.lag_master_next_table_id,
 				 next_ft->id);
 		break;
 	}
@@ -146,10 +151,10 @@ int mlx5_cmd_modify_flow_table(struct mlx5_core_dev *dev,
 			 MLX5_MODIFY_FLOW_TABLE_LAG_NEXT_TABLE_ID);
 		if (next_ft) {
 			MLX5_SET(modify_flow_table_in, in,
-				 lag_master_next_table_id, next_ft->id);
+				 flow_table_context.lag_master_next_table_id, next_ft->id);
 		} else {
 			MLX5_SET(modify_flow_table_in, in,
-				 lag_master_next_table_id, 0);
+				 flow_table_context.lag_master_next_table_id, 0);
 		}
 	} else {
 		if (ft->vport) {
@@ -160,11 +165,14 @@ int mlx5_cmd_modify_flow_table(struct mlx5_core_dev *dev,
 		MLX5_SET(modify_flow_table_in, in, modify_field_select,
 			 MLX5_MODIFY_FLOW_TABLE_MISS_TABLE_ID);
 		if (next_ft) {
-			MLX5_SET(modify_flow_table_in, in, table_miss_mode, 1);
-			MLX5_SET(modify_flow_table_in, in, table_miss_id,
+			MLX5_SET(modify_flow_table_in, in,
+				 flow_table_context.table_miss_action, 1);
+			MLX5_SET(modify_flow_table_in, in,
+				 flow_table_context.table_miss_id,
 				 next_ft->id);
 		} else {
-			MLX5_SET(modify_flow_table_in, in, table_miss_mode, 0);
+			MLX5_SET(modify_flow_table_in, in,
+				 flow_table_context.table_miss_action, 0);
 		}
 	}
 
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 56e96f6a0a45..ec308657af3b 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -6627,6 +6627,24 @@ struct mlx5_ifc_create_flow_table_out_bits {
 	u8         reserved_at_60[0x20];
 };
 
+struct mlx5_ifc_flow_table_context_bits {
+	u8         encap_en[0x1];
+	u8         decap_en[0x1];
+	u8         reserved_at_2[0x2];
+	u8         table_miss_action[0x4];
+	u8         level[0x8];
+	u8         reserved_at_10[0x8];
+	u8         log_size[0x8];
+
+	u8         reserved_at_20[0x8];
+	u8         table_miss_id[0x18];
+
+	u8         reserved_at_40[0x8];
+	u8         lag_master_next_table_id[0x18];
+
+	u8         reserved_at_60[0xe0];
+};
+
 struct mlx5_ifc_create_flow_table_in_bits {
 	u8         opcode[0x10];
 	u8         reserved_at_10[0x10];
@@ -6645,21 +6663,7 @@ struct mlx5_ifc_create_flow_table_in_bits {
 
 	u8         reserved_at_a0[0x20];
 
-	u8         encap_en[0x1];
-	u8         decap_en[0x1];
-	u8         reserved_at_c2[0x2];
-	u8         table_miss_mode[0x4];
-	u8         level[0x8];
-	u8         reserved_at_d0[0x8];
-	u8         log_size[0x8];
-
-	u8         reserved_at_e0[0x8];
-	u8         table_miss_id[0x18];
-
-	u8         reserved_at_100[0x8];
-	u8         lag_master_next_table_id[0x18];
-
-	u8         reserved_at_120[0x80];
+	struct mlx5_ifc_flow_table_context_bits flow_table_context;
 };
 
 struct mlx5_ifc_create_flow_group_out_bits {
@@ -8277,17 +8281,7 @@ struct mlx5_ifc_modify_flow_table_in_bits {
 	u8         reserved_at_a0[0x8];
 	u8         table_id[0x18];
 
-	u8         reserved_at_c0[0x4];
-	u8         table_miss_mode[0x4];
-	u8         reserved_at_c8[0x18];
-
-	u8         reserved_at_e0[0x8];
-	u8         table_miss_id[0x18];
-
-	u8         reserved_at_100[0x8];
-	u8         lag_master_next_table_id[0x18];
-
-	u8         reserved_at_120[0x80];
+	struct mlx5_ifc_flow_table_context_bits flow_table_context;
 };
 
 struct mlx5_ifc_ets_tcn_config_reg_bits {
-- 
cgit v1.2.3


From 5b4793f817452e478442684e6bba85bddb5a9345 Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Mon, 13 Feb 2017 14:00:59 +0200
Subject: net/mlx5e: Add support for reading connector type from PTYS

Read port connector type from the firmware instead of caching it in the
driver metadata.

Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/en_ethtool.c   | 22 ++++++++++++++++++++--
 include/linux/mlx5/mlx5_ifc.h                      |  7 +++++--
 include/linux/mlx5/port.h                          | 13 +++++++++++++
 3 files changed, 38 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index e9e33fd68279..f03c2d088d0c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -809,8 +809,23 @@ static void get_advertising(u32 eth_proto_cap, u8 tx_pause,
 		ethtool_link_ksettings_add_link_mode(link_ksettings, advertising, Asym_Pause);
 }
 
-static u8 get_connector_port(u32 eth_proto)
+static int ptys2connector_type[MLX5E_CONNECTOR_TYPE_NUMBER] = {
+		[MLX5E_PORT_UNKNOWN]            = PORT_OTHER,
+		[MLX5E_PORT_NONE]               = PORT_NONE,
+		[MLX5E_PORT_TP]                 = PORT_TP,
+		[MLX5E_PORT_AUI]                = PORT_AUI,
+		[MLX5E_PORT_BNC]                = PORT_BNC,
+		[MLX5E_PORT_MII]                = PORT_MII,
+		[MLX5E_PORT_FIBRE]              = PORT_FIBRE,
+		[MLX5E_PORT_DA]                 = PORT_DA,
+		[MLX5E_PORT_OTHER]              = PORT_OTHER,
+	};
+
+static u8 get_connector_port(u32 eth_proto, u8 connector_type)
 {
+	if (connector_type && connector_type < MLX5E_CONNECTOR_TYPE_NUMBER)
+		return ptys2connector_type[connector_type];
+
 	if (eth_proto & (MLX5E_PROT_MASK(MLX5E_10GBASE_SR)
 			 | MLX5E_PROT_MASK(MLX5E_40GBASE_SR4)
 			 | MLX5E_PROT_MASK(MLX5E_100GBASE_SR4)
@@ -856,6 +871,7 @@ static int mlx5e_get_link_ksettings(struct net_device *netdev,
 	u32 eth_proto_oper;
 	u8 an_disable_admin;
 	u8 an_status;
+	u8 connector_type;
 	int err;
 
 	err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1);
@@ -871,6 +887,7 @@ static int mlx5e_get_link_ksettings(struct net_device *netdev,
 	eth_proto_lp     = MLX5_GET(ptys_reg, out, eth_proto_lp_advertise);
 	an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin);
 	an_status        = MLX5_GET(ptys_reg, out, an_status);
+	connector_type   = MLX5_GET(ptys_reg, out, connector_type);
 
 	mlx5_query_port_pause(mdev, &rx_pause, &tx_pause);
 
@@ -883,7 +900,8 @@ static int mlx5e_get_link_ksettings(struct net_device *netdev,
 
 	eth_proto_oper = eth_proto_oper ? eth_proto_oper : eth_proto_cap;
 
-	link_ksettings->base.port = get_connector_port(eth_proto_oper);
+	link_ksettings->base.port = get_connector_port(eth_proto_oper,
+						       connector_type);
 	get_lp_advertising(eth_proto_lp, link_ksettings);
 
 	if (an_status == MLX5_AN_COMPLETE)
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index ec308657af3b..32b044e953d2 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -7295,7 +7295,8 @@ struct mlx5_ifc_ptys_reg_bits {
 	u8         ib_link_width_oper[0x10];
 	u8         ib_proto_oper[0x10];
 
-	u8         reserved_at_160[0x20];
+	u8         reserved_at_160[0x1c];
+	u8         connector_type[0x4];
 
 	u8         eth_proto_lp_advertise[0x20];
 
@@ -7698,8 +7699,10 @@ struct mlx5_ifc_peir_reg_bits {
 };
 
 struct mlx5_ifc_pcam_enhanced_features_bits {
-	u8         reserved_at_0[0x7e];
+	u8         reserved_at_0[0x7c];
 
+	u8         ptys_connector_type[0x1];
+	u8         reserved_at_7d[0x1];
 	u8         ppcnt_discard_group[0x1];
 	u8         ppcnt_statistical_group[0x1];
 };
diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h
index e527732fb31b..c57d4b7de3a8 100644
--- a/include/linux/mlx5/port.h
+++ b/include/linux/mlx5/port.h
@@ -92,6 +92,19 @@ enum mlx5e_link_mode {
 	MLX5E_LINK_MODES_NUMBER,
 };
 
+enum mlx5e_connector_type {
+	MLX5E_PORT_UNKNOWN	= 0,
+	MLX5E_PORT_NONE			= 1,
+	MLX5E_PORT_TP			= 2,
+	MLX5E_PORT_AUI			= 3,
+	MLX5E_PORT_BNC			= 4,
+	MLX5E_PORT_MII			= 5,
+	MLX5E_PORT_FIBRE		= 6,
+	MLX5E_PORT_DA			= 7,
+	MLX5E_PORT_OTHER		= 8,
+	MLX5E_CONNECTOR_TYPE_NUMBER,
+};
+
 #define MLX5E_PROT_MASK(link_mode) (1 << link_mode)
 
 #define PORT_MODULE_EVENT_MODULE_STATUS_MASK 0xF
-- 
cgit v1.2.3


From aaac082dac0a8ac6b00509c7ae2fa8280f966652 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Tue, 6 Jun 2017 23:59:34 -0700
Subject: HID: serialize hid_hw_open and hid_hw_close

The HID transport drivers either re-implement exactly the same logic
(usbhid, i2c-hid) or forget to implement it (usbhid) which causes issues
when the same device is accessed via multiple interfaces (for example input
device through evdev and also hidraw). Let's muve the locking logic into
HID core to make sure the serialized behavior is always enforced.

Also let's uninline and move hid_hw_start() and hid_hw_stop() into hid-core
as hid_hw_start() is somewhat large and do not believe we get any benefit
from these two being inline.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-core.c | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/hid.h    | 72 +++++-----------------------------------
 2 files changed, 98 insertions(+), 63 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 04cee65531d7..f93dd6f48a79 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1750,6 +1750,94 @@ void hid_disconnect(struct hid_device *hdev)
 }
 EXPORT_SYMBOL_GPL(hid_disconnect);
 
+/**
+ * hid_hw_start - start underlying HW
+ * @hdev: hid device
+ * @connect_mask: which outputs to connect, see HID_CONNECT_*
+ *
+ * Call this in probe function *after* hid_parse. This will setup HW
+ * buffers and start the device (if not defeirred to device open).
+ * hid_hw_stop must be called if this was successful.
+ */
+int hid_hw_start(struct hid_device *hdev, unsigned int connect_mask)
+{
+	int error;
+
+	error = hdev->ll_driver->start(hdev);
+	if (error)
+		return error;
+
+	if (connect_mask) {
+		error = hid_connect(hdev, connect_mask);
+		if (error) {
+			hdev->ll_driver->stop(hdev);
+			return error;
+		}
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(hid_hw_start);
+
+/**
+ * hid_hw_stop - stop underlying HW
+ * @hdev: hid device
+ *
+ * This is usually called from remove function or from probe when something
+ * failed and hid_hw_start was called already.
+ */
+void hid_hw_stop(struct hid_device *hdev)
+{
+	hid_disconnect(hdev);
+	hdev->ll_driver->stop(hdev);
+}
+EXPORT_SYMBOL_GPL(hid_hw_stop);
+
+/**
+ * hid_hw_open - signal underlying HW to start delivering events
+ * @hdev: hid device
+ *
+ * Tell underlying HW to start delivering events from the device.
+ * This function should be called sometime after successful call
+ * to hid_hiw_start().
+ */
+int hid_hw_open(struct hid_device *hdev)
+{
+	int ret;
+
+	ret = mutex_lock_killable(&hdev->ll_open_lock);
+	if (ret)
+		return ret;
+
+	if (!hdev->ll_open_count++) {
+		ret = hdev->ll_driver->open(hdev);
+		if (ret)
+			hdev->ll_open_count--;
+	}
+
+	mutex_unlock(&hdev->ll_open_lock);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(hid_hw_open);
+
+/**
+ * hid_hw_close - signal underlaying HW to stop delivering events
+ *
+ * @hdev: hid device
+ *
+ * This function indicates that we are not interested in the events
+ * from this device anymore. Delivery of events may or may not stop,
+ * depending on the number of users still outstanding.
+ */
+void hid_hw_close(struct hid_device *hdev)
+{
+	mutex_lock(&hdev->ll_open_lock);
+	if (!--hdev->ll_open_count)
+		hdev->ll_driver->close(hdev);
+	mutex_unlock(&hdev->ll_open_lock);
+}
+EXPORT_SYMBOL_GPL(hid_hw_close);
+
 /*
  * A list of devices for which there is a specialized driver on HID bus.
  *
@@ -2747,6 +2835,7 @@ struct hid_device *hid_allocate_device(void)
 	spin_lock_init(&hdev->debug_list_lock);
 	sema_init(&hdev->driver_lock, 1);
 	sema_init(&hdev->driver_input_lock, 1);
+	mutex_init(&hdev->ll_open_lock);
 
 	return hdev;
 }
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 5be325d890d9..5501eb64dbc4 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -34,6 +34,7 @@
 #include <linux/workqueue.h>
 #include <linux/input.h>
 #include <linux/semaphore.h>
+#include <linux/mutex.h>
 #include <linux/power_supply.h>
 #include <uapi/linux/hid.h>
 
@@ -520,7 +521,10 @@ struct hid_device {							/* device report descriptor */
 	struct semaphore driver_input_lock;				/* protects the current driver */
 	struct device dev;						/* device */
 	struct hid_driver *driver;
+
 	struct hid_ll_driver *ll_driver;
+	struct mutex ll_open_lock;
+	unsigned int ll_open_count;
 
 #ifdef CONFIG_HID_BATTERY_STRENGTH
 	/*
@@ -937,69 +941,11 @@ static inline int __must_check hid_parse(struct hid_device *hdev)
 	return hid_open_report(hdev);
 }
 
-/**
- * hid_hw_start - start underlaying HW
- *
- * @hdev: hid device
- * @connect_mask: which outputs to connect, see HID_CONNECT_*
- *
- * Call this in probe function *after* hid_parse. This will setup HW buffers
- * and start the device (if not deffered to device open). hid_hw_stop must be
- * called if this was successful.
- */
-static inline int __must_check hid_hw_start(struct hid_device *hdev,
-		unsigned int connect_mask)
-{
-	int ret = hdev->ll_driver->start(hdev);
-	if (ret || !connect_mask)
-		return ret;
-	ret = hid_connect(hdev, connect_mask);
-	if (ret)
-		hdev->ll_driver->stop(hdev);
-	return ret;
-}
-
-/**
- * hid_hw_stop - stop underlaying HW
- *
- * @hdev: hid device
- *
- * This is usually called from remove function or from probe when something
- * failed and hid_hw_start was called already.
- */
-static inline void hid_hw_stop(struct hid_device *hdev)
-{
-	hid_disconnect(hdev);
-	hdev->ll_driver->stop(hdev);
-}
-
-/**
- * hid_hw_open - signal underlaying HW to start delivering events
- *
- * @hdev: hid device
- *
- * Tell underlying HW to start delivering events from the device.
- * This function should be called sometime after successful call
- * to hid_hiw_start().
- */
-static inline int __must_check hid_hw_open(struct hid_device *hdev)
-{
-	return hdev->ll_driver->open(hdev);
-}
-
-/**
- * hid_hw_close - signal underlaying HW to stop delivering events
- *
- * @hdev: hid device
- *
- * This function indicates that we are not interested in the events
- * from this device anymore. Delivery of events may or may not stop,
- * depending on the number of users still outstanding.
- */
-static inline void hid_hw_close(struct hid_device *hdev)
-{
-	hdev->ll_driver->close(hdev);
-}
+int __must_check hid_hw_start(struct hid_device *hdev,
+			      unsigned int connect_mask);
+void hid_hw_stop(struct hid_device *hdev);
+int __must_check hid_hw_open(struct hid_device *hdev);
+void hid_hw_close(struct hid_device *hdev);
 
 /**
  * hid_hw_power - requests underlying HW to go into given power mode
-- 
cgit v1.2.3


From 283a21da1239d8db7fdf6d9077feed73a6efffa2 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Tue, 6 Jun 2017 23:59:38 -0700
Subject: HID: remove no longer used hid->open field

Now that all users have migrated to use hid->ll_open_count, we can remove
hid->open field.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/hid.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/hid.h b/include/linux/hid.h
index 5501eb64dbc4..72e8ac667771 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -548,7 +548,6 @@ struct hid_device {							/* device report descriptor */
 	void *hiddev;							/* The hiddev structure */
 	void *hidraw;
 
-	int open;							/* is the device open by anyone? */
 	char name[128];							/* Device name */
 	char phys[64];							/* Device physical location */
 	char uniq[64];							/* Device unique identifier (serial #) */
-- 
cgit v1.2.3


From a5fcf8a6c968ed8e312ff0b2a55d4c62d821eabb Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Tue, 6 Jun 2017 17:00:16 +0200
Subject: net: propagate tc filter chain index down the ndo_setup_tc call

We need to push the chain index down to the drivers, so they have the
information to which chain the rule belongs. For now, no driver supports
multichain offload, so only chain 0 is supported. This is needed to
prevent chain squashes during offload for now. Later this will be used
to implement multichain offload.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amd/xgbe/xgbe-drv.c            |  3 ++-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c     |  4 ++--
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h     |  4 ++--
 drivers/net/ethernet/broadcom/bnxt/bnxt.c           |  4 ++--
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c     |  7 +++++--
 drivers/net/ethernet/freescale/dpaa/dpaa_eth.c      |  4 ++--
 drivers/net/ethernet/intel/fm10k/fm10k_netdev.c     |  4 ++--
 drivers/net/ethernet/intel/i40e/i40e_main.c         |  3 ++-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c       |  7 +++++--
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c      |  3 ++-
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c   |  6 +++++-
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c    |  7 ++++++-
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c      |  6 +++++-
 drivers/net/ethernet/netronome/nfp/nfp_net_common.c |  7 +++++--
 drivers/net/ethernet/sfc/efx.h                      |  4 ++--
 drivers/net/ethernet/sfc/falcon/efx.h               |  4 ++--
 drivers/net/ethernet/sfc/falcon/tx.c                |  4 ++--
 drivers/net/ethernet/sfc/tx.c                       |  4 ++--
 drivers/net/ethernet/ti/netcp_core.c                |  4 ++--
 include/linux/netdevice.h                           |  4 ++--
 net/dsa/slave.c                                     | 11 ++++++-----
 net/sched/cls_bpf.c                                 |  1 +
 net/sched/cls_flower.c                              | 10 ++++++----
 net/sched/cls_matchall.c                            |  9 +++++----
 net/sched/cls_u32.c                                 | 12 ++++++++----
 net/sched/sch_mqprio.c                              |  5 +++--
 26 files changed, 88 insertions(+), 53 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index 5a2ad9c5faab..a934bd5d0507 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -1846,7 +1846,8 @@ static void xgbe_poll_controller(struct net_device *netdev)
 }
 #endif /* End CONFIG_NET_POLL_CONTROLLER */
 
-static int xgbe_setup_tc(struct net_device *netdev, u32 handle, __be16 proto,
+static int xgbe_setup_tc(struct net_device *netdev, u32 handle, u32 chain_index,
+			 __be16 proto,
 			 struct tc_to_netdev *tc_to_netdev)
 {
 	struct xgbe_prv_data *pdata = netdev_priv(netdev);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 5f49334dcad5..ef734675885e 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -4273,8 +4273,8 @@ int bnx2x_setup_tc(struct net_device *dev, u8 num_tc)
 	return 0;
 }
 
-int __bnx2x_setup_tc(struct net_device *dev, u32 handle, __be16 proto,
-		     struct tc_to_netdev *tc)
+int __bnx2x_setup_tc(struct net_device *dev, u32 handle, u32 chain_index,
+		     __be16 proto, struct tc_to_netdev *tc)
 {
 	if (tc->type != TC_SETUP_MQPRIO)
 		return -EINVAL;
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
index 243cb9748d35..c26688d2f326 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
@@ -486,8 +486,8 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev);
 
 /* setup_tc callback */
 int bnx2x_setup_tc(struct net_device *dev, u8 num_tc);
-int __bnx2x_setup_tc(struct net_device *dev, u32 handle, __be16 proto,
-		     struct tc_to_netdev *tc);
+int __bnx2x_setup_tc(struct net_device *dev, u32 handle, u32 chain_index,
+		     __be16 proto, struct tc_to_netdev *tc);
 
 int bnx2x_get_vf_config(struct net_device *dev, int vf,
 			struct ifla_vf_info *ivi);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index c1cd72a5eccf..11e8a866a312 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -7103,8 +7103,8 @@ int bnxt_setup_mq_tc(struct net_device *dev, u8 tc)
 	return 0;
 }
 
-static int bnxt_setup_tc(struct net_device *dev, u32 handle, __be16 proto,
-			 struct tc_to_netdev *ntc)
+static int bnxt_setup_tc(struct net_device *dev, u32 handle, u32 chain_index,
+			 __be16 proto, struct tc_to_netdev *ntc)
 {
 	if (ntc->type != TC_SETUP_MQPRIO)
 		return -EINVAL;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 91685bf21878..ff8bcf56bf3f 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -2823,12 +2823,15 @@ static int cxgb_set_tx_maxrate(struct net_device *dev, int index, u32 rate)
 	return err;
 }
 
-static int cxgb_setup_tc(struct net_device *dev, u32 handle, __be16 proto,
-			 struct tc_to_netdev *tc)
+static int cxgb_setup_tc(struct net_device *dev, u32 handle, u32 chain_index,
+			 __be16 proto, struct tc_to_netdev *tc)
 {
 	struct port_info *pi = netdev2pinfo(dev);
 	struct adapter *adap = netdev2adap(dev);
 
+	if (chain_index)
+		return -EOPNOTSUPP;
+
 	if (!(adap->flags & FULL_INIT_DONE)) {
 		dev_err(adap->pdev_dev,
 			"Failed to setup tc on port %d. Link Down?\n",
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
index 9a520e4f0df9..a5501af6db99 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
@@ -342,8 +342,8 @@ static void dpaa_get_stats64(struct net_device *net_dev,
 	}
 }
 
-static int dpaa_setup_tc(struct net_device *net_dev, u32 handle, __be16 proto,
-			 struct tc_to_netdev *tc)
+static int dpaa_setup_tc(struct net_device *net_dev, u32 handle,
+			 u32 chain_index, __be16 proto, struct tc_to_netdev *tc)
 {
 	struct dpaa_priv *priv = netdev_priv(net_dev);
 	u8 num_tc;
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
index 24f2f6f86f5a..5e37387c7082 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
@@ -1265,8 +1265,8 @@ err_queueing_scheme:
 	return err;
 }
 
-static int __fm10k_setup_tc(struct net_device *dev, u32 handle, __be16 proto,
-			    struct tc_to_netdev *tc)
+static int __fm10k_setup_tc(struct net_device *dev, u32 handle, u32 chain_index,
+			    __be16 proto, struct tc_to_netdev *tc)
 {
 	if (tc->type != TC_SETUP_MQPRIO)
 		return -EINVAL;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 5fef27ebfa52..abab7fb7a3fc 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -5509,7 +5509,8 @@ exit:
 	return ret;
 }
 
-static int __i40e_setup_tc(struct net_device *netdev, u32 handle, __be16 proto,
+static int __i40e_setup_tc(struct net_device *netdev, u32 handle,
+			   u32 chain_index, __be16 proto,
 			   struct tc_to_netdev *tc)
 {
 	if (tc->type != TC_SETUP_MQPRIO)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 54463f03b3db..812319ab77db 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -9200,11 +9200,14 @@ free_jump:
 	return err;
 }
 
-static int __ixgbe_setup_tc(struct net_device *dev, u32 handle, __be16 proto,
-			    struct tc_to_netdev *tc)
+static int __ixgbe_setup_tc(struct net_device *dev, u32 handle, u32 chain_index,
+			    __be16 proto, struct tc_to_netdev *tc)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(dev);
 
+	if (chain_index)
+		return -EOPNOTSUPP;
+
 	if (TC_H_MAJ(handle) == TC_H_MAJ(TC_H_INGRESS) &&
 	    tc->type == TC_SETUP_CLSU32) {
 		switch (tc->cls_u32->command) {
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 82436742ad75..c1de75fc399a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -86,7 +86,8 @@ int mlx4_en_setup_tc(struct net_device *dev, u8 up)
 	return 0;
 }
 
-static int __mlx4_en_setup_tc(struct net_device *dev, u32 handle, __be16 proto,
+static int __mlx4_en_setup_tc(struct net_device *dev, u32 handle,
+			      u32 chain_index, __be16 proto,
 			      struct tc_to_netdev *tc)
 {
 	if (tc->type != TC_SETUP_MQPRIO)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index cdff04b2aea1..5afec0f4a658 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -2991,13 +2991,17 @@ out:
 }
 
 static int mlx5e_ndo_setup_tc(struct net_device *dev, u32 handle,
-			      __be16 proto, struct tc_to_netdev *tc)
+			      u32 chain_index, __be16 proto,
+			      struct tc_to_netdev *tc)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
 
 	if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS))
 		goto mqprio;
 
+	if (chain_index)
+		return -EOPNOTSUPP;
+
 	switch (tc->type) {
 	case TC_SETUP_CLSFLOWER:
 		switch (tc->cls_flower->command) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 79462c0368a0..70c2b8d020bd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -652,7 +652,8 @@ static int mlx5e_rep_get_phys_port_name(struct net_device *dev,
 }
 
 static int mlx5e_rep_ndo_setup_tc(struct net_device *dev, u32 handle,
-				  __be16 proto, struct tc_to_netdev *tc)
+				  u32 chain_index, __be16 proto,
+				  struct tc_to_netdev *tc)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
 
@@ -664,9 +665,13 @@ static int mlx5e_rep_ndo_setup_tc(struct net_device *dev, u32 handle,
 		struct net_device *uplink_dev = mlx5_eswitch_get_uplink_netdev(esw);
 
 		return uplink_dev->netdev_ops->ndo_setup_tc(uplink_dev, handle,
+							    chain_index,
 							    proto, tc);
 	}
 
+	if (chain_index)
+		return -EOPNOTSUPP;
+
 	switch (tc->type) {
 	case TC_SETUP_CLSFLOWER:
 		switch (tc->cls_flower->command) {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index f60e2ba515d0..a2316d038810 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -1699,11 +1699,15 @@ static void mlxsw_sp_port_del_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port,
 }
 
 static int mlxsw_sp_setup_tc(struct net_device *dev, u32 handle,
-			     __be16 proto, struct tc_to_netdev *tc)
+			     u32 chain_index, __be16 proto,
+			     struct tc_to_netdev *tc)
 {
 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
 	bool ingress = TC_H_MAJ(handle) == TC_H_MAJ(TC_H_INGRESS);
 
+	if (chain_index)
+		return -EOPNOTSUPP;
+
 	switch (tc->type) {
 	case TC_SETUP_MATCHALL:
 		switch (tc->cls_mall->command) {
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 4f0df63de626..49d1756d6a8e 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -2994,11 +2994,14 @@ static void nfp_net_stat64(struct net_device *netdev,
 }
 
 static int
-nfp_net_setup_tc(struct net_device *netdev, u32 handle, __be16 proto,
-		 struct tc_to_netdev *tc)
+nfp_net_setup_tc(struct net_device *netdev, u32 handle, u32 chain_index,
+		 __be16 proto, struct tc_to_netdev *tc)
 {
 	struct nfp_net *nn = netdev_priv(netdev);
 
+	if (chain_index)
+		return -EOPNOTSUPP;
+
 	return nfp_app_setup_tc(nn->app, netdev, handle, proto, tc);
 }
 
diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h
index a0c52e328102..fcea9371ab7f 100644
--- a/drivers/net/ethernet/sfc/efx.h
+++ b/drivers/net/ethernet/sfc/efx.h
@@ -32,8 +32,8 @@ netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb,
 				struct net_device *net_dev);
 netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb);
 void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index);
-int efx_setup_tc(struct net_device *net_dev, u32 handle, __be16 proto,
-		 struct tc_to_netdev *tc);
+int efx_setup_tc(struct net_device *net_dev, u32 handle, u32 chain_index,
+		 __be16 proto, struct tc_to_netdev *tc);
 unsigned int efx_tx_max_skb_descs(struct efx_nic *efx);
 extern unsigned int efx_piobuf_size;
 extern bool efx_separate_tx_channels;
diff --git a/drivers/net/ethernet/sfc/falcon/efx.h b/drivers/net/ethernet/sfc/falcon/efx.h
index c89456fa148c..e5a7a40cc8b6 100644
--- a/drivers/net/ethernet/sfc/falcon/efx.h
+++ b/drivers/net/ethernet/sfc/falcon/efx.h
@@ -32,8 +32,8 @@ netdev_tx_t ef4_hard_start_xmit(struct sk_buff *skb,
 				struct net_device *net_dev);
 netdev_tx_t ef4_enqueue_skb(struct ef4_tx_queue *tx_queue, struct sk_buff *skb);
 void ef4_xmit_done(struct ef4_tx_queue *tx_queue, unsigned int index);
-int ef4_setup_tc(struct net_device *net_dev, u32 handle, __be16 proto,
-		 struct tc_to_netdev *tc);
+int ef4_setup_tc(struct net_device *net_dev, u32 handle, u32 chain_index,
+		 __be16 proto, struct tc_to_netdev *tc);
 unsigned int ef4_tx_max_skb_descs(struct ef4_nic *efx);
 extern bool ef4_separate_tx_channels;
 
diff --git a/drivers/net/ethernet/sfc/falcon/tx.c b/drivers/net/ethernet/sfc/falcon/tx.c
index f6daf09b8627..f1520a404ac6 100644
--- a/drivers/net/ethernet/sfc/falcon/tx.c
+++ b/drivers/net/ethernet/sfc/falcon/tx.c
@@ -425,8 +425,8 @@ void ef4_init_tx_queue_core_txq(struct ef4_tx_queue *tx_queue)
 				     efx->n_tx_channels : 0));
 }
 
-int ef4_setup_tc(struct net_device *net_dev, u32 handle, __be16 proto,
-		 struct tc_to_netdev *ntc)
+int ef4_setup_tc(struct net_device *net_dev, u32 handle, u32 chain_index,
+		 __be16 proto, struct tc_to_netdev *ntc)
 {
 	struct ef4_nic *efx = netdev_priv(net_dev);
 	struct ef4_channel *channel;
diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c
index 3bdf87f31087..02d41eb4a8e9 100644
--- a/drivers/net/ethernet/sfc/tx.c
+++ b/drivers/net/ethernet/sfc/tx.c
@@ -653,8 +653,8 @@ void efx_init_tx_queue_core_txq(struct efx_tx_queue *tx_queue)
 				     efx->n_tx_channels : 0));
 }
 
-int efx_setup_tc(struct net_device *net_dev, u32 handle, __be16 proto,
-		 struct tc_to_netdev *ntc)
+int efx_setup_tc(struct net_device *net_dev, u32 handle, u32 chain_index,
+		 __be16 proto, struct tc_to_netdev *ntc)
 {
 	struct efx_nic *efx = netdev_priv(net_dev);
 	struct efx_channel *channel;
diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c
index e6222e535019..9d52c3a78621 100644
--- a/drivers/net/ethernet/ti/netcp_core.c
+++ b/drivers/net/ethernet/ti/netcp_core.c
@@ -1877,8 +1877,8 @@ static u16 netcp_select_queue(struct net_device *dev, struct sk_buff *skb,
 	return 0;
 }
 
-static int netcp_setup_tc(struct net_device *dev, u32 handle, __be16 proto,
-			  struct tc_to_netdev *tc)
+static int netcp_setup_tc(struct net_device *dev, u32 handle, u32 chain_index,
+			  __be16 proto, struct tc_to_netdev *tc)
 {
 	u8 num_tc;
 	int i;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c50c9218e31e..524c7776ce96 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -972,7 +972,7 @@ struct xfrmdev_ops {
  *      with PF and querying it may introduce a theoretical security risk.
  * int (*ndo_set_vf_rss_query_en)(struct net_device *dev, int vf, bool setting);
  * int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb);
- * int (*ndo_setup_tc)(struct net_device *dev, u32 handle,
+ * int (*ndo_setup_tc)(struct net_device *dev, u32 handle, u32 chain_index,
  *		       __be16 protocol, struct tc_to_netdev *tc);
  *	Called to setup any 'tc' scheduler, classifier or action on @dev.
  *	This is always called from the stack with the rtnl lock held and netif
@@ -1222,7 +1222,7 @@ struct net_device_ops {
 						   struct net_device *dev,
 						   int vf, bool setting);
 	int			(*ndo_setup_tc)(struct net_device *dev,
-						u32 handle,
+						u32 handle, u32 chain_index,
 						__be16 protocol,
 						struct tc_to_netdev *tc);
 #if IS_ENABLED(CONFIG_FCOE)
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 1cfdb31a2f44..5f3caee725ee 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -836,10 +836,13 @@ static void dsa_slave_del_cls_matchall(struct net_device *dev,
 }
 
 static int dsa_slave_setup_tc(struct net_device *dev, u32 handle,
-			      __be16 protocol, struct tc_to_netdev *tc)
+			      u32 chain_index, __be16 protocol,
+			      struct tc_to_netdev *tc)
 {
 	bool ingress = TC_H_MAJ(handle) == TC_H_MAJ(TC_H_INGRESS);
-	int ret = -EOPNOTSUPP;
+
+	if (chain_index)
+		return -EOPNOTSUPP;
 
 	switch (tc->type) {
 	case TC_SETUP_MATCHALL:
@@ -853,10 +856,8 @@ static int dsa_slave_setup_tc(struct net_device *dev, u32 handle,
 			return 0;
 		}
 	default:
-		break;
+		return -EOPNOTSUPP;
 	}
-
-	return ret;
 }
 
 void dsa_cpu_port_ethtool_init(struct ethtool_ops *ops)
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index a9c56ad4533a..be0cfdf48976 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -162,6 +162,7 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
 	bpf_offload.gen_flags = prog->gen_flags;
 
 	err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
+					    tp->chain->index,
 					    tp->protocol, &offload);
 
 	if (!err && (cmd == TC_CLSBPF_ADD || cmd == TC_CLSBPF_REPLACE))
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 33feaee197cf..7832eb93379b 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -239,7 +239,8 @@ static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f)
 	tc->type = TC_SETUP_CLSFLOWER;
 	tc->cls_flower = &offload;
 
-	dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, tc);
+	dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->chain->index,
+				      tp->protocol, tc);
 }
 
 static int fl_hw_replace_filter(struct tcf_proto *tp,
@@ -275,8 +276,8 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
 	tc->type = TC_SETUP_CLSFLOWER;
 	tc->cls_flower = &offload;
 
-	err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol,
-					    tc);
+	err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
+					    tp->chain->index, tp->protocol, tc);
 	if (!err)
 		f->flags |= TCA_CLS_FLAGS_IN_HW;
 
@@ -302,7 +303,8 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f)
 	tc->type = TC_SETUP_CLSFLOWER;
 	tc->cls_flower = &offload;
 
-	dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, tc);
+	dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
+				      tp->chain->index, tp->protocol, tc);
 }
 
 static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f)
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index 51859b8edd7e..9dc26c32cf32 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -64,8 +64,9 @@ static int mall_replace_hw_filter(struct tcf_proto *tp,
 	offload.cls_mall->exts = &head->exts;
 	offload.cls_mall->cookie = cookie;
 
-	err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol,
-					    &offload);
+	err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
+					    tp->chain->index,
+					    tp->protocol, &offload);
 	if (!err)
 		head->flags |= TCA_CLS_FLAGS_IN_HW;
 
@@ -86,8 +87,8 @@ static void mall_destroy_hw_filter(struct tcf_proto *tp,
 	offload.cls_mall->exts = NULL;
 	offload.cls_mall->cookie = cookie;
 
-	dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol,
-					     &offload);
+	dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->chain->index,
+				      tp->protocol, &offload);
 }
 
 static void mall_destroy(struct tcf_proto *tp)
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index d20e72a095d5..2d01195153e6 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -441,7 +441,8 @@ static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle)
 		offload.cls_u32->command = TC_CLSU32_DELETE_KNODE;
 		offload.cls_u32->knode.handle = handle;
 		dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
-					      tp->protocol, &offload);
+					      tp->chain->index, tp->protocol,
+					      &offload);
 	}
 }
 
@@ -465,7 +466,8 @@ static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
 	offload.cls_u32->hnode.prio = h->prio;
 
 	err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
-					    tp->protocol, &offload);
+					    tp->chain->index, tp->protocol,
+					    &offload);
 	if (tc_skip_sw(flags))
 		return err;
 
@@ -488,7 +490,8 @@ static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h)
 		offload.cls_u32->hnode.prio = h->prio;
 
 		dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
-					      tp->protocol, &offload);
+					      tp->chain->index, tp->protocol,
+					      &offload);
 	}
 }
 
@@ -522,7 +525,8 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
 		offload.cls_u32->knode.link_handle = n->ht_down->handle;
 
 	err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
-					    tp->protocol, &offload);
+					    tp->chain->index, tp->protocol,
+					    &offload);
 
 	if (!err)
 		n->flags |= TCA_CLS_FLAGS_IN_HW;
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 0a4cf27ea54b..e0c02725cd48 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -43,7 +43,7 @@ static void mqprio_destroy(struct Qdisc *sch)
 		struct tc_to_netdev tc = { .type = TC_SETUP_MQPRIO,
 					   { .mqprio = &offload } };
 
-		dev->netdev_ops->ndo_setup_tc(dev, sch->handle, 0, &tc);
+		dev->netdev_ops->ndo_setup_tc(dev, sch->handle, 0, 0, &tc);
 	} else {
 		netdev_set_num_tc(dev, 0);
 	}
@@ -152,7 +152,8 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
 		struct tc_to_netdev tc = { .type = TC_SETUP_MQPRIO,
 					   { .mqprio = &offload } };
 
-		err = dev->netdev_ops->ndo_setup_tc(dev, sch->handle, 0, &tc);
+		err = dev->netdev_ops->ndo_setup_tc(dev, sch->handle,
+						    0, 0, &tc);
 		if (err)
 			return err;
 
-- 
cgit v1.2.3


From c08b1f45d7d193b3e6dcbbf30d403cb49b667b8c Mon Sep 17 00:00:00 2001
From: Liam Breck <kernel@networkimprov.net>
Date: Wed, 7 Jun 2017 11:37:51 -0700
Subject: power: supply: core: Add power_supply_battery_info and API

power_supply_get_battery_info() reads battery data from devicetree.
struct power_supply_battery_info provides battery data to drivers.
Its fields correspond to elements in enum power_supply_property.
Drivers may surface battery data in sysfs via corresponding
POWER_SUPPLY_PROP_* fields.

Signed-off-by: Matt Ranostay <matt@ranostay.consulting>
Signed-off-by: Liam Breck <kernel@networkimprov.net>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
---
 Documentation/power/power_supply_class.txt | 12 +++++++
 drivers/power/supply/power_supply_core.c   | 57 ++++++++++++++++++++++++++++++
 include/linux/power_supply.h               | 22 ++++++++++++
 3 files changed, 91 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/power/power_supply_class.txt b/Documentation/power/power_supply_class.txt
index 0c72588bd967..01f007591070 100644
--- a/Documentation/power/power_supply_class.txt
+++ b/Documentation/power/power_supply_class.txt
@@ -174,6 +174,18 @@ issued by external power supply will notify supplicants via
 external_power_changed callback.
 
 
+Devicetree battery characteristics
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Drivers should call power_supply_get_battery_info() to obtain battery
+characteristics from a devicetree battery node, defined in
+Documentation/devicetree/bindings/power/supply/battery.txt. This is
+implemented in drivers/power/supply/bq27xxx_battery.c.
+
+Properties in struct power_supply_battery_info and their counterparts in the
+battery node have names corresponding to elements in enum power_supply_property,
+for naming consistency between sysfs attributes and battery node properties.
+
+
 QA
 ~~
 Q: Where is POWER_SUPPLY_PROP_XYZ attribute?
diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c
index 0c09144193a6..a4303ed66144 100644
--- a/drivers/power/supply/power_supply_core.c
+++ b/drivers/power/supply/power_supply_core.c
@@ -17,6 +17,7 @@
 #include <linux/device.h>
 #include <linux/notifier.h>
 #include <linux/err.h>
+#include <linux/of.h>
 #include <linux/power_supply.h>
 #include <linux/thermal.h>
 #include "power_supply.h"
@@ -519,6 +520,62 @@ struct power_supply *devm_power_supply_get_by_phandle(struct device *dev,
 EXPORT_SYMBOL_GPL(devm_power_supply_get_by_phandle);
 #endif /* CONFIG_OF */
 
+int power_supply_get_battery_info(struct power_supply *psy,
+				  struct power_supply_battery_info *info)
+{
+	struct device_node *battery_np;
+	const char *value;
+	int err;
+
+	info->energy_full_design_uwh         = -EINVAL;
+	info->charge_full_design_uah         = -EINVAL;
+	info->voltage_min_design_uv          = -EINVAL;
+	info->precharge_current_ua           = -EINVAL;
+	info->charge_term_current_ua         = -EINVAL;
+	info->constant_charge_current_max_ua = -EINVAL;
+	info->constant_charge_voltage_max_uv = -EINVAL;
+
+	if (!psy->of_node) {
+		dev_warn(&psy->dev, "%s currently only supports devicetree\n",
+			 __func__);
+		return -ENXIO;
+	}
+
+	battery_np = of_parse_phandle(psy->of_node, "monitored-battery", 0);
+	if (!battery_np)
+		return -ENODEV;
+
+	err = of_property_read_string(battery_np, "compatible", &value);
+	if (err)
+		return err;
+
+	if (strcmp("simple-battery", value))
+		return -ENODEV;
+
+	/* The property and field names below must correspond to elements
+	 * in enum power_supply_property. For reasoning, see
+	 * Documentation/power/power_supply_class.txt.
+	 */
+
+	of_property_read_u32(battery_np, "energy-full-design-microwatt-hours",
+			     &info->energy_full_design_uwh);
+	of_property_read_u32(battery_np, "charge-full-design-microamp-hours",
+			     &info->charge_full_design_uah);
+	of_property_read_u32(battery_np, "voltage-min-design-microvolt",
+			     &info->voltage_min_design_uv);
+	of_property_read_u32(battery_np, "precharge-current-microamp",
+			     &info->precharge_current_ua);
+	of_property_read_u32(battery_np, "charge-term-current-microamp",
+			     &info->charge_term_current_ua);
+	of_property_read_u32(battery_np, "constant_charge_current_max_microamp",
+			     &info->constant_charge_current_max_ua);
+	of_property_read_u32(battery_np, "constant_charge_voltage_max_microvolt",
+			     &info->constant_charge_voltage_max_uv);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(power_supply_get_battery_info);
+
 int power_supply_get_property(struct power_supply *psy,
 			    enum power_supply_property psp,
 			    union power_supply_propval *val)
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index 4bd34051995e..34345d716286 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -289,6 +289,25 @@ struct power_supply_info {
 	int use_for_apm;
 };
 
+/*
+ * This is the recommended struct to manage static battery parameters,
+ * populated by power_supply_get_battery_info(). Most platform drivers should
+ * use these for consistency.
+ * Its field names must correspond to elements in enum power_supply_property.
+ * The default field value is -EINVAL.
+ * Power supply class itself doesn't use this.
+ */
+
+struct power_supply_battery_info {
+	int energy_full_design_uwh;	    /* microWatt-hours */
+	int charge_full_design_uah;	    /* microAmp-hours */
+	int voltage_min_design_uv;	    /* microVolts */
+	int precharge_current_ua;	    /* microAmps */
+	int charge_term_current_ua;	    /* microAmps */
+	int constant_charge_current_max_ua; /* microAmps */
+	int constant_charge_voltage_max_uv; /* microVolts */
+};
+
 extern struct atomic_notifier_head power_supply_notifier;
 extern int power_supply_reg_notifier(struct notifier_block *nb);
 extern void power_supply_unreg_notifier(struct notifier_block *nb);
@@ -307,6 +326,9 @@ static inline struct power_supply *
 devm_power_supply_get_by_phandle(struct device *dev, const char *property)
 { return NULL; }
 #endif /* CONFIG_OF */
+
+extern int power_supply_get_battery_info(struct power_supply *psy,
+					 struct power_supply_battery_info *info);
 extern void power_supply_changed(struct power_supply *psy);
 extern int power_supply_am_i_supplied(struct power_supply *psy);
 extern int power_supply_set_battery_charged(struct power_supply *psy);
-- 
cgit v1.2.3


From 413de34ab93edc80ef710c54ceb0987b8496aef3 Mon Sep 17 00:00:00 2001
From: Liam Breck <kernel@networkimprov.net>
Date: Wed, 7 Jun 2017 11:37:52 -0700
Subject: power: supply: core: Add power_supply_prop_precharge

Battery chargers use POWER_SUPPLY_PROP_PRECHARGE_CURRENT
Clarify related item POWER_SUPPLY_PROP_CHARGE_TERM_CURRENT

Signed-off-by: Liam Breck <kernel@networkimprov.net>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
---
 Documentation/power/power_supply_class.txt | 19 ++++++++++++-------
 drivers/power/supply/power_supply_sysfs.c  |  1 +
 include/linux/power_supply.h               |  3 +++
 3 files changed, 16 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/power/power_supply_class.txt b/Documentation/power/power_supply_class.txt
index 01f007591070..300d37896e51 100644
--- a/Documentation/power/power_supply_class.txt
+++ b/Documentation/power/power_supply_class.txt
@@ -115,28 +115,33 @@ of charge when battery became full/empty". It also could mean "value of
 charge when battery considered full/empty at given conditions (temperature,
 age)". I.e. these attributes represents real thresholds, not design values.
 
+ENERGY_FULL, ENERGY_EMPTY - same as above but for energy.
+
 CHARGE_COUNTER - the current charge counter (in µAh).  This could easily
 be negative; there is no empty or full value.  It is only useful for
 relative, time-based measurements.
 
+PRECHARGE_CURRENT - the maximum charge current during precharge phase
+of charge cycle (typically 20% of battery capacity).
+CHARGE_TERM_CURRENT - Charge termination current. The charge cycle
+terminates when battery voltage is above recharge threshold, and charge
+current is below this setting (typically 10% of battery capacity).
+
 CONSTANT_CHARGE_CURRENT - constant charge current programmed by charger.
 CONSTANT_CHARGE_CURRENT_MAX - maximum charge current supported by the
 power supply object.
-INPUT_CURRENT_LIMIT - input current limit programmed by charger. Indicates
-the current drawn from a charging source.
-CHARGE_TERM_CURRENT - Charge termination current used to detect the end of charge
-condition.
-
-CALIBRATE - battery or coulomb counter calibration status
 
 CONSTANT_CHARGE_VOLTAGE - constant charge voltage programmed by charger.
 CONSTANT_CHARGE_VOLTAGE_MAX - maximum charge voltage supported by the
 power supply object.
 
+INPUT_CURRENT_LIMIT - input current limit programmed by charger. Indicates
+the current drawn from a charging source.
+
 CHARGE_CONTROL_LIMIT - current charge control limit setting
 CHARGE_CONTROL_LIMIT_MAX - maximum charge control limit setting
 
-ENERGY_FULL, ENERGY_EMPTY - same as above but for energy.
+CALIBRATE - battery or coulomb counter calibration status
 
 CAPACITY - capacity in percents.
 CAPACITY_ALERT_MIN - minimum capacity alert value in percents.
diff --git a/drivers/power/supply/power_supply_sysfs.c b/drivers/power/supply/power_supply_sysfs.c
index b32c14183773..5204f115970f 100644
--- a/drivers/power/supply/power_supply_sysfs.c
+++ b/drivers/power/supply/power_supply_sysfs.c
@@ -242,6 +242,7 @@ static struct device_attribute power_supply_attrs[] = {
 	POWER_SUPPLY_ATTR(time_to_full_avg),
 	POWER_SUPPLY_ATTR(type),
 	POWER_SUPPLY_ATTR(scope),
+	POWER_SUPPLY_ATTR(precharge_current),
 	POWER_SUPPLY_ATTR(charge_term_current),
 	POWER_SUPPLY_ATTR(calibrate),
 	/* Properties of type `const char *' */
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index 34345d716286..de89066b72b1 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -146,6 +146,7 @@ enum power_supply_property {
 	POWER_SUPPLY_PROP_TIME_TO_FULL_AVG,
 	POWER_SUPPLY_PROP_TYPE, /* use power_supply.type instead */
 	POWER_SUPPLY_PROP_SCOPE,
+	POWER_SUPPLY_PROP_PRECHARGE_CURRENT,
 	POWER_SUPPLY_PROP_CHARGE_TERM_CURRENT,
 	POWER_SUPPLY_PROP_CALIBRATE,
 	/* Properties of type `const char *' */
@@ -382,6 +383,8 @@ static inline bool power_supply_is_amp_property(enum power_supply_property psp)
 	case POWER_SUPPLY_PROP_CHARGE_NOW:
 	case POWER_SUPPLY_PROP_CHARGE_AVG:
 	case POWER_SUPPLY_PROP_CHARGE_COUNTER:
+	case POWER_SUPPLY_PROP_PRECHARGE_CURRENT:
+	case POWER_SUPPLY_PROP_CHARGE_TERM_CURRENT:
 	case POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT:
 	case POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT_MAX:
 	case POWER_SUPPLY_PROP_CURRENT_MAX:
-- 
cgit v1.2.3


From 14073f6614f62dc7862c83575b042424599cc867 Mon Sep 17 00:00:00 2001
From: Matt Ranostay <matt@ranostay.consulting>
Date: Wed, 7 Jun 2017 11:37:54 -0700
Subject: power: supply: bq27xxx: Add bulk transfer bus methods

Declare bus.write/read_bulk/write_bulk().
Add I2C write/read_bulk/write_bulk() to implement the above.
Add bq27xxx_write/read_block/write_block() helpers to call the above.

Signed-off-by: Matt Ranostay <matt@ranostay.consulting>
Signed-off-by: Liam Breck <kernel@networkimprov.net>
Acked-by: "Andrew F. Davis" <afd@ti.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
---
 drivers/power/supply/bq27xxx_battery.c     | 67 +++++++++++++++++++++++-
 drivers/power/supply/bq27xxx_battery_i2c.c | 82 +++++++++++++++++++++++++++++-
 include/linux/power/bq27xxx_battery.h      |  3 ++
 3 files changed, 149 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/power/supply/bq27xxx_battery.c b/drivers/power/supply/bq27xxx_battery.c
index 398801a21b86..a11dfad76ed4 100644
--- a/drivers/power/supply/bq27xxx_battery.c
+++ b/drivers/power/supply/bq27xxx_battery.c
@@ -794,11 +794,74 @@ MODULE_PARM_DESC(poll_interval,
 static inline int bq27xxx_read(struct bq27xxx_device_info *di, int reg_index,
 			       bool single)
 {
-	/* Reports EINVAL for invalid/missing registers */
+	int ret;
+
 	if (!di || di->regs[reg_index] == INVALID_REG_ADDR)
 		return -EINVAL;
 
-	return di->bus.read(di, di->regs[reg_index], single);
+	ret = di->bus.read(di, di->regs[reg_index], single);
+	if (ret < 0)
+		dev_dbg(di->dev, "failed to read register 0x%02x (index %d)\n",
+			di->regs[reg_index], reg_index);
+
+	return ret;
+}
+
+static inline int bq27xxx_write(struct bq27xxx_device_info *di, int reg_index,
+				u16 value, bool single)
+{
+	int ret;
+
+	if (!di || di->regs[reg_index] == INVALID_REG_ADDR)
+		return -EINVAL;
+
+	if (!di->bus.write)
+		return -EPERM;
+
+	ret = di->bus.write(di, di->regs[reg_index], value, single);
+	if (ret < 0)
+		dev_dbg(di->dev, "failed to write register 0x%02x (index %d)\n",
+			di->regs[reg_index], reg_index);
+
+	return ret;
+}
+
+static inline int bq27xxx_read_block(struct bq27xxx_device_info *di, int reg_index,
+				     u8 *data, int len)
+{
+	int ret;
+
+	if (!di || di->regs[reg_index] == INVALID_REG_ADDR)
+		return -EINVAL;
+
+	if (!di->bus.read_bulk)
+		return -EPERM;
+
+	ret = di->bus.read_bulk(di, di->regs[reg_index], data, len);
+	if (ret < 0)
+		dev_dbg(di->dev, "failed to read_bulk register 0x%02x (index %d)\n",
+			di->regs[reg_index], reg_index);
+
+	return ret;
+}
+
+static inline int bq27xxx_write_block(struct bq27xxx_device_info *di, int reg_index,
+				      u8 *data, int len)
+{
+	int ret;
+
+	if (!di || di->regs[reg_index] == INVALID_REG_ADDR)
+		return -EINVAL;
+
+	if (!di->bus.write_bulk)
+		return -EPERM;
+
+	ret = di->bus.write_bulk(di, di->regs[reg_index], data, len);
+	if (ret < 0)
+		dev_dbg(di->dev, "failed to write_bulk register 0x%02x (index %d)\n",
+			di->regs[reg_index], reg_index);
+
+	return ret;
 }
 
 /*
diff --git a/drivers/power/supply/bq27xxx_battery_i2c.c b/drivers/power/supply/bq27xxx_battery_i2c.c
index c68fbc3fe50a..a5972214f074 100644
--- a/drivers/power/supply/bq27xxx_battery_i2c.c
+++ b/drivers/power/supply/bq27xxx_battery_i2c.c
@@ -38,7 +38,7 @@ static int bq27xxx_battery_i2c_read(struct bq27xxx_device_info *di, u8 reg,
 {
 	struct i2c_client *client = to_i2c_client(di->dev);
 	struct i2c_msg msg[2];
-	unsigned char data[2];
+	u8 data[2];
 	int ret;
 
 	if (!client->adapter)
@@ -68,6 +68,82 @@ static int bq27xxx_battery_i2c_read(struct bq27xxx_device_info *di, u8 reg,
 	return ret;
 }
 
+static int bq27xxx_battery_i2c_write(struct bq27xxx_device_info *di, u8 reg,
+				     int value, bool single)
+{
+	struct i2c_client *client = to_i2c_client(di->dev);
+	struct i2c_msg msg;
+	u8 data[4];
+	int ret;
+
+	if (!client->adapter)
+		return -ENODEV;
+
+	data[0] = reg;
+	if (single) {
+		data[1] = (u8) value;
+		msg.len = 2;
+	} else {
+		put_unaligned_le16(value, &data[1]);
+		msg.len = 3;
+	}
+
+	msg.buf = data;
+	msg.addr = client->addr;
+	msg.flags = 0;
+
+	ret = i2c_transfer(client->adapter, &msg, 1);
+	if (ret < 0)
+		return ret;
+	if (ret != 1)
+		return -EINVAL;
+	return 0;
+}
+
+static int bq27xxx_battery_i2c_bulk_read(struct bq27xxx_device_info *di, u8 reg,
+					 u8 *data, int len)
+{
+	struct i2c_client *client = to_i2c_client(di->dev);
+	int ret;
+
+	if (!client->adapter)
+		return -ENODEV;
+
+	ret = i2c_smbus_read_i2c_block_data(client, reg, len, data);
+	if (ret < 0)
+		return ret;
+	if (ret != len)
+		return -EINVAL;
+	return 0;
+}
+
+static int bq27xxx_battery_i2c_bulk_write(struct bq27xxx_device_info *di,
+					  u8 reg, u8 *data, int len)
+{
+	struct i2c_client *client = to_i2c_client(di->dev);
+	struct i2c_msg msg;
+	u8 buf[33];
+	int ret;
+
+	if (!client->adapter)
+		return -ENODEV;
+
+	buf[0] = reg;
+	memcpy(&buf[1], data, len);
+
+	msg.buf = buf;
+	msg.addr = client->addr;
+	msg.flags = 0;
+	msg.len = len + 1;
+
+	ret = i2c_transfer(client->adapter, &msg, 1);
+	if (ret < 0)
+		return ret;
+	if (ret != 1)
+		return -EINVAL;
+	return 0;
+}
+
 static int bq27xxx_battery_i2c_probe(struct i2c_client *client,
 				     const struct i2c_device_id *id)
 {
@@ -95,7 +171,11 @@ static int bq27xxx_battery_i2c_probe(struct i2c_client *client,
 	di->dev = &client->dev;
 	di->chip = id->driver_data;
 	di->name = name;
+
 	di->bus.read = bq27xxx_battery_i2c_read;
+	di->bus.write = bq27xxx_battery_i2c_write;
+	di->bus.read_bulk = bq27xxx_battery_i2c_bulk_read;
+	di->bus.write_bulk = bq27xxx_battery_i2c_bulk_write;
 
 	ret = bq27xxx_battery_setup(di);
 	if (ret)
diff --git a/include/linux/power/bq27xxx_battery.h b/include/linux/power/bq27xxx_battery.h
index b312bcef53da..c3369fa605f9 100644
--- a/include/linux/power/bq27xxx_battery.h
+++ b/include/linux/power/bq27xxx_battery.h
@@ -40,6 +40,9 @@ struct bq27xxx_platform_data {
 struct bq27xxx_device_info;
 struct bq27xxx_access_methods {
 	int (*read)(struct bq27xxx_device_info *di, u8 reg, bool single);
+	int (*write)(struct bq27xxx_device_info *di, u8 reg, int value, bool single);
+	int (*read_bulk)(struct bq27xxx_device_info *di, u8 reg, u8 *data, int len);
+	int (*write_bulk)(struct bq27xxx_device_info *di, u8 reg, u8 *data, int len);
 };
 
 struct bq27xxx_reg_cache {
-- 
cgit v1.2.3


From 8397ed36b7c585f8d3e06c431f4137309124f78f Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Wed, 7 Jun 2017 12:26:23 -0600
Subject: net: ipv6: Release route when device is unregistering

Roopa reported attempts to delete a bond device that is referenced in a
multipath route is hanging:

$ ifdown bond2    # ifupdown2 command that deletes virtual devices
unregister_netdevice: waiting for bond2 to become free. Usage count = 2

Steps to reproduce:
    echo 1 > /proc/sys/net/ipv6/conf/all/ignore_routes_with_linkdown
    ip link add dev bond12 type bond
    ip link add dev bond13 type bond
    ip addr add 2001:db8:2::0/64 dev bond12
    ip addr add 2001:db8:3::0/64 dev bond13
    ip route add 2001:db8:33::0/64 nexthop via 2001:db8:2::2 nexthop via 2001:db8:3::2
    ip link del dev bond12
    ip link del dev bond13

The root cause is the recent change to keep routes on a linkdown. Update
the check to detect when the device is unregistering and release the
route for that case.

Fixes: a1a22c12060e4 ("net: ipv6: Keep nexthop of multipath route on admin down")
Reported-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David Ahern <dsahern@gmail.com>
Acked-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 5 +++++
 net/ipv6/route.c          | 1 +
 2 files changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ab7ca3fdc495..846193dfb0ac 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4262,6 +4262,11 @@ static inline const char *netdev_name(const struct net_device *dev)
 	return dev->name;
 }
 
+static inline bool netdev_unregistering(const struct net_device *dev)
+{
+	return dev->reg_state == NETREG_UNREGISTERING;
+}
+
 static inline const char *netdev_reg_state(const struct net_device *dev)
 {
 	switch (dev->reg_state) {
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index dc61b0b5e64e..7cebd954d5bb 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2804,6 +2804,7 @@ static int fib6_ifdown(struct rt6_info *rt, void *arg)
 	if ((rt->dst.dev == dev || !dev) &&
 	    rt != adn->net->ipv6.ip6_null_entry &&
 	    (rt->rt6i_nsiblings == 0 ||
+	     (dev && netdev_unregistering(dev)) ||
 	     !rt->rt6i_idev->cnf.ignore_routes_with_linkdown))
 		return -1;
 
-- 
cgit v1.2.3


From 1123a6041654e8f889014659593bad4168e542c2 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 31 May 2017 14:03:11 +0200
Subject: srcu: Allow use of Classic SRCU from both process and interrupt
 context

Linu Cherian reported a WARN in cleanup_srcu_struct() when shutting
down a guest running iperf on a VFIO assigned device.  This happens
because irqfd_wakeup() calls srcu_read_lock(&kvm->irq_srcu) in interrupt
context, while a worker thread does the same inside kvm_set_irq().  If the
interrupt happens while the worker thread is executing __srcu_read_lock(),
updates to the Classic SRCU ->lock_count[] field or the Tree SRCU
->srcu_lock_count[] field can be lost.

The docs say you are not supposed to call srcu_read_lock() and
srcu_read_unlock() from irq context, but KVM interrupt injection happens
from (host) interrupt context and it would be nice if SRCU supported the
use case.  KVM is using SRCU here not really for the "sleepable" part,
but rather due to its IPI-free fast detection of grace periods.  It is
therefore not desirable to switch back to RCU, which would effectively
revert commit 719d93cd5f5c ("kvm/irqchip: Speed up KVM_SET_GSI_ROUTING",
2014-01-16).

However, the docs are overly conservative.  You can have an SRCU instance
only has users in irq context, and you can mix process and irq context
as long as process context users disable interrupts.  In addition,
__srcu_read_unlock() actually uses this_cpu_dec() on both Tree SRCU and
Classic SRCU.  For those two implementations, only srcu_read_lock()
is unsafe.

When Classic SRCU's __srcu_read_unlock() was changed to use this_cpu_dec(),
in commit 5a41344a3d83 ("srcu: Simplify __srcu_read_unlock() via
this_cpu_dec()", 2012-11-29), __srcu_read_lock() did two increments.
Therefore it kept __this_cpu_inc(), with preempt_disable/enable in
the caller.  Tree SRCU however only does one increment, so on most
architectures it is more efficient for __srcu_read_lock() to use
this_cpu_inc(), and any performance differences appear to be down in
the noise.

Cc: stable@vger.kernel.org
Fixes: 719d93cd5f5c ("kvm/irqchip: Speed up KVM_SET_GSI_ROUTING")
Reported-by: Linu Cherian <linuc.decode@gmail.com>
Suggested-by: Linu Cherian <linuc.decode@gmail.com>
Cc: kvm@vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/srcu.h | 2 --
 kernel/rcu/srcu.c    | 5 ++---
 2 files changed, 2 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 167ad8831aaf..4c1d5f7e62c4 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -172,9 +172,7 @@ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp)
 {
 	int retval;
 
-	preempt_disable();
 	retval = __srcu_read_lock(sp);
-	preempt_enable();
 	rcu_lock_acquire(&(sp)->dep_map);
 	return retval;
 }
diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c
index 584d8a983883..dea03614263f 100644
--- a/kernel/rcu/srcu.c
+++ b/kernel/rcu/srcu.c
@@ -263,7 +263,7 @@ EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
 
 /*
  * Counts the new reader in the appropriate per-CPU element of the
- * srcu_struct.  Must be called from process context.
+ * srcu_struct.
  * Returns an index that must be passed to the matching srcu_read_unlock().
  */
 int __srcu_read_lock(struct srcu_struct *sp)
@@ -271,7 +271,7 @@ int __srcu_read_lock(struct srcu_struct *sp)
 	int idx;
 
 	idx = READ_ONCE(sp->completed) & 0x1;
-	__this_cpu_inc(sp->per_cpu_ref->lock_count[idx]);
+	this_cpu_inc(sp->per_cpu_ref->lock_count[idx]);
 	smp_mb(); /* B */  /* Avoid leaking the critical section. */
 	return idx;
 }
@@ -281,7 +281,6 @@ EXPORT_SYMBOL_GPL(__srcu_read_lock);
  * Removes the count for the old reader from the appropriate per-CPU
  * element of the srcu_struct.  Note that this may well be a different
  * CPU than that which was incremented by the corresponding srcu_read_lock().
- * Must be called from process context.
  */
 void __srcu_read_unlock(struct srcu_struct *sp, int idx)
 {
-- 
cgit v1.2.3


From 6016ffc3874d3a1ddf41518481da54b4714717af Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 19 Apr 2017 16:20:07 -0700
Subject: atomics: Add header comment so spin_unlock_wait()

There is material describing the ordering guarantees provided by
spin_unlock_wait(), but it is not necessarily easy to find.  This commit
therefore adds a docbook header comment to this function informally
describing its semantics.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
---
 include/linux/spinlock.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index 59248dcc6ef3..d9510e8522d4 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -369,6 +369,26 @@ static __always_inline int spin_trylock_irq(spinlock_t *lock)
 	raw_spin_trylock_irqsave(spinlock_check(lock), flags); \
 })
 
+/**
+ * spin_unlock_wait - Interpose between successive critical sections
+ * @lock: the spinlock whose critical sections are to be interposed.
+ *
+ * Semantically this is equivalent to a spin_lock() immediately
+ * followed by a spin_unlock().  However, most architectures have
+ * more efficient implementations in which the spin_unlock_wait()
+ * cannot block concurrent lock acquisition, and in some cases
+ * where spin_unlock_wait() does not write to the lock variable.
+ * Nevertheless, spin_unlock_wait() can have high overhead, so if
+ * you feel the need to use it, please check to see if there is
+ * a better way to get your job done.
+ *
+ * The ordering guarantees provided by spin_unlock_wait() are:
+ *
+ * 1.  All accesses preceding the spin_unlock_wait() happen before
+ *     any accesses in later critical sections for this same lock.
+ * 2.  All accesses following the spin_unlock_wait() happen after
+ *     any accesses in earlier critical sections for this same lock.
+ */
 static __always_inline void spin_unlock_wait(spinlock_t *lock)
 {
 	raw_spin_unlock_wait(&lock->rlock);
-- 
cgit v1.2.3


From 3ddf20c953520203c42dbed1f091ed52080e1cd2 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 21 Apr 2017 13:33:20 -0700
Subject: srcu: Shrink Tiny SRCU a bit more

This commit rearranges Tiny SRCU's srcu_struct structure, substitutes
u8 for bool, and shrinks counters down to short.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/srcutiny.h | 8 ++++----
 kernel/rcu/rcutorture.c  | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h
index 42311ee0334f..b8859179b001 100644
--- a/include/linux/srcutiny.h
+++ b/include/linux/srcutiny.h
@@ -27,15 +27,15 @@
 #include <linux/swait.h>
 
 struct srcu_struct {
-	int srcu_lock_nesting[2];	/* srcu_read_lock() nesting depth. */
+	short srcu_lock_nesting[2];	/* srcu_read_lock() nesting depth. */
+	short srcu_idx;			/* Current reader array element. */
+	u8 srcu_gp_running;		/* GP workqueue running? */
+	u8 srcu_gp_waiting;		/* GP waiting for readers? */
 	struct swait_queue_head srcu_wq;
 					/* Last srcu_read_unlock() wakes GP. */
 	unsigned long srcu_gp_seq;	/* GP seq # for callback tagging. */
 	struct rcu_segcblist srcu_cblist;
 					/* Pending SRCU callbacks. */
-	int srcu_idx;			/* Current reader array element. */
-	bool srcu_gp_running;		/* GP workqueue running? */
-	bool srcu_gp_waiting;		/* GP waiting for readers? */
 	struct work_struct srcu_work;	/* For driving grace periods. */
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	struct lockdep_map dep_map;
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index ae6e574d4cf5..a58592b73f19 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -609,7 +609,7 @@ static void srcu_torture_stats(void)
 	pr_cont("\n");
 #elif defined(CONFIG_TINY_SRCU)
 	idx = READ_ONCE(srcu_ctlp->srcu_idx) & 0x1;
-	pr_alert("%s%s Tiny SRCU per-CPU(idx=%d): (%d,%d)\n",
+	pr_alert("%s%s Tiny SRCU per-CPU(idx=%d): (%hd,%hd)\n",
 		 torture_type, TORTURE_FLAG, idx,
 		 READ_ONCE(srcu_ctlp->srcu_lock_nesting[!idx]),
 		 READ_ONCE(srcu_ctlp->srcu_lock_nesting[idx]));
-- 
cgit v1.2.3


From 59d80fd8351b7b9a5dc7bbfa8bc4ca19f6ff3dad Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 28 Apr 2017 10:20:28 -0700
Subject: rcu: Print out rcupdate.c non-default boot-time settings

This commit adds a rcupdate_announce_bootup_oddness() function to
print out non-default values of significant kernel boot parameter
settings to aid in debugging.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h |  1 +
 kernel/rcu/tree_plugin.h |  1 +
 kernel/rcu/update.c      | 42 +++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 43 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index e1e5d002fdb9..393e461d3ea8 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -76,6 +76,7 @@ bool rcu_gp_is_normal(void);     /* Internal RCU use. */
 bool rcu_gp_is_expedited(void);  /* Internal RCU use. */
 void rcu_expedite_gp(void);
 void rcu_unexpedite_gp(void);
+void rcupdate_announce_bootup_oddness(void);
 #endif /* #else #ifdef CONFIG_TINY_RCU */
 
 enum rcutorture_type {
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 3b432fa4c45b..eb5ebdce25ff 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -92,6 +92,7 @@ static void __init rcu_bootup_announce_oddness(void)
 		pr_info("\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids);
 	if (IS_ENABLED(CONFIG_RCU_BOOST))
 		pr_info("\tRCU kthread priority: %d.\n", kthread_prio);
+	rcupdate_announce_bootup_oddness();
 }
 
 #ifdef CONFIG_PREEMPT_RCU
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 273e869ca21d..82a5aa10dbc5 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -560,7 +560,8 @@ static DEFINE_RAW_SPINLOCK(rcu_tasks_cbs_lock);
 DEFINE_SRCU(tasks_rcu_exit_srcu);
 
 /* Control stall timeouts.  Disable with <= 0, otherwise jiffies till stall. */
-static int rcu_task_stall_timeout __read_mostly = HZ * 60 * 10;
+#define RCU_TASK_STALL_TIMEOUT (HZ * 60 * 10)
+static int rcu_task_stall_timeout __read_mostly = RCU_TASK_STALL_TIMEOUT;
 module_param(rcu_task_stall_timeout, int, 0644);
 
 static void rcu_spawn_tasks_kthread(void);
@@ -851,6 +852,23 @@ static void rcu_spawn_tasks_kthread(void)
 
 #endif /* #ifdef CONFIG_TASKS_RCU */
 
+#ifndef CONFIG_TINY_RCU
+
+/*
+ * Print any non-default Tasks RCU settings.
+ */
+static void __init rcu_tasks_bootup_oddness(void)
+{
+#ifdef CONFIG_TASKS_RCU
+	if (rcu_task_stall_timeout != RCU_TASK_STALL_TIMEOUT)
+		pr_info("\tTasks-RCU CPU stall warnings timeout set to %d (rcu_task_stall_timeout).\n", rcu_task_stall_timeout);
+	else
+		pr_info("\tTasks RCU enabled.\n");
+#endif /* #ifdef CONFIG_TASKS_RCU */
+}
+
+#endif /* #ifndef CONFIG_TINY_RCU */
+
 #ifdef CONFIG_PROVE_RCU
 
 /*
@@ -935,3 +953,25 @@ late_initcall(rcu_verify_early_boot_tests);
 #else
 void rcu_early_boot_tests(void) {}
 #endif /* CONFIG_PROVE_RCU */
+
+#ifndef CONFIG_TINY_RCU
+
+/*
+ * Print any significant non-default boot-time settings.
+ */
+void __init rcupdate_announce_bootup_oddness(void)
+{
+	if (rcu_normal)
+		pr_info("\tNo expedited grace period (rcu_normal).\n");
+	else if (rcu_normal_after_boot)
+		pr_info("\tNo expedited grace period (rcu_normal_after_boot).\n");
+	else if (rcu_expedited)
+		pr_info("\tAll grace periods are expedited (rcu_expedited).\n");
+	if (rcu_cpu_stall_suppress)
+		pr_info("\tRCU CPU stall warnings suppressed (rcu_cpu_stall_suppress).\n");
+	if (rcu_cpu_stall_timeout != CONFIG_RCU_CPU_STALL_TIMEOUT)
+		pr_info("\tRCU CPU stall warnings timeout set to %d (rcu_cpu_stall_timeout).\n", rcu_cpu_stall_timeout);
+	rcu_tasks_bootup_oddness();
+}
+
+#endif /* #ifndef CONFIG_TINY_RCU */
-- 
cgit v1.2.3


From 07f6e64bf2ab98cad0d9c595659209858e7bff83 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 28 Apr 2017 13:53:04 -0700
Subject: srcu: Make SRCU be once again optional

Commit d160a727c40e ("srcu: Make SRCU be built by default") in response
to build errors, which were caused by code that included srcu.h
despite !SRCU.  However, srcutiny.o is almost 2K of code, which is not
insignificant for those attempting to run the Linux kernel on IoT devices.
This commit therefore makes SRCU be once again optional, and adjusts
srcu.h to allow error-free inclusion in !SRCU kernel builds.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Nicolas Pitre <nico@linaro.org>
---
 include/linux/srcu.h | 7 ++++++-
 init/Kconfig         | 1 -
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 4c1d5f7e62c4..ea356d800675 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -62,8 +62,13 @@ int init_srcu_struct(struct srcu_struct *sp);
 #include <linux/srcutree.h>
 #elif defined(CONFIG_CLASSIC_SRCU)
 #include <linux/srcuclassic.h>
-#else
+#elif defined(CONFIG_SRCU)
 #error "Unknown SRCU implementation specified to kernel configuration"
+#else
+
+/* Dummy definition for things like notifiers.  Actual use gets link error. */
+struct srcu_struct { };
+
 #endif
 
 /**
diff --git a/init/Kconfig b/init/Kconfig
index 1d3475fc9496..d928a3724af9 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -521,7 +521,6 @@ config RCU_EXPERT
 
 config SRCU
 	bool
-	default y
 	help
 	  This option selects the sleepable version of RCU. This version
 	  permits arbitrary sleeping or blocking within RCU read-side critical
-- 
cgit v1.2.3


From d4efe6c5ad91f9a1f2f1d66b7fbfc87e320b2abc Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 28 Apr 2017 14:16:16 -0700
Subject: srcu: Shrink Tiny SRCU a bit

In Tiny SRCU, __srcu_read_lock() is a trivial function, outweighed by
its EXPORT_SYMBOL_GPL(), and on many architectures, its call sequence.
This commit therefore moves it to srcutiny.h so that it can be inlined.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/srcutiny.h | 15 +++++++++++++++
 kernel/rcu/srcutiny.c    | 16 ----------------
 2 files changed, 15 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h
index b8859179b001..b6edd9c8fdce 100644
--- a/include/linux/srcutiny.h
+++ b/include/linux/srcutiny.h
@@ -63,6 +63,21 @@ void srcu_drive_gp(struct work_struct *wp);
 
 void synchronize_srcu(struct srcu_struct *sp);
 
+/*
+ * Counts the new reader in the appropriate per-CPU element of the
+ * srcu_struct.  Can be invoked from irq/bh handlers, but the matching
+ * __srcu_read_unlock() must be in the same handler instance.  Returns an
+ * index that must be passed to the matching srcu_read_unlock().
+ */
+static inline int __srcu_read_lock(struct srcu_struct *sp)
+{
+	int idx;
+
+	idx = READ_ONCE(sp->srcu_idx);
+	WRITE_ONCE(sp->srcu_lock_nesting[idx], sp->srcu_lock_nesting[idx] + 1);
+	return idx;
+}
+
 static inline void synchronize_srcu_expedited(struct srcu_struct *sp)
 {
 	synchronize_srcu(sp);
diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c
index 32798eb14853..988543721d5d 100644
--- a/kernel/rcu/srcutiny.c
+++ b/kernel/rcu/srcutiny.c
@@ -95,22 +95,6 @@ void cleanup_srcu_struct(struct srcu_struct *sp)
 }
 EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
 
-/*
- * Counts the new reader in the appropriate per-CPU element of the
- * srcu_struct.  Can be invoked from irq/bh handlers, but the matching
- * __srcu_read_unlock() must be in the same handler instance.  Returns an
- * index that must be passed to the matching srcu_read_unlock().
- */
-int __srcu_read_lock(struct srcu_struct *sp)
-{
-	int idx;
-
-	idx = READ_ONCE(sp->srcu_idx);
-	WRITE_ONCE(sp->srcu_lock_nesting[idx], sp->srcu_lock_nesting[idx] + 1);
-	return idx;
-}
-EXPORT_SYMBOL_GPL(__srcu_read_lock);
-
 /*
  * Removes the count for the old reader from the appropriate element of
  * the srcu_struct.
-- 
cgit v1.2.3


From 0670c9b3588f163cfcfcd8ea532f321ec004e6ad Mon Sep 17 00:00:00 2001
From: Liam Breck <kernel@networkimprov.net>
Date: Wed, 7 Jun 2017 11:37:55 -0700
Subject: power: supply: bq27xxx: Add chip data memory read/write support

Add these to enable read/write of chip data memory RAM/NVM/flash:
  bq27xxx_battery_seal()
  bq27xxx_battery_unseal()
  bq27xxx_battery_set_cfgupdate()
  bq27xxx_battery_soft_reset()
  bq27xxx_battery_read_dm_block()
  bq27xxx_battery_write_dm_block()
  bq27xxx_battery_checksum_dm_block()

Signed-off-by: Matt Ranostay <matt@ranostay.consulting>
Signed-off-by: Liam Breck <kernel@networkimprov.net>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
---
 drivers/power/supply/bq27xxx_battery.c | 265 +++++++++++++++++++++++++++++++++
 include/linux/power/bq27xxx_battery.h  |   1 +
 2 files changed, 266 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/power/supply/bq27xxx_battery.c b/drivers/power/supply/bq27xxx_battery.c
index a11dfad76ed4..6a4ac14cb15c 100644
--- a/drivers/power/supply/bq27xxx_battery.c
+++ b/drivers/power/supply/bq27xxx_battery.c
@@ -5,6 +5,7 @@
  * Copyright (C) 2008 Eurotech S.p.A. <info@eurotech.it>
  * Copyright (C) 2010-2011 Lars-Peter Clausen <lars@metafoo.de>
  * Copyright (C) 2011 Pali Rohár <pali.rohar@gmail.com>
+ * Copyright (C) 2017 Liam Breck <kernel@networkimprov.net>
  *
  * Based on a previous work by Copyright (C) 2008 Texas Instruments, Inc.
  *
@@ -65,6 +66,7 @@
 #define BQ27XXX_FLAG_DSC	BIT(0)
 #define BQ27XXX_FLAG_SOCF	BIT(1) /* State-of-Charge threshold final */
 #define BQ27XXX_FLAG_SOC1	BIT(2) /* State-of-Charge threshold 1 */
+#define BQ27XXX_FLAG_CFGUP	BIT(4)
 #define BQ27XXX_FLAG_FC		BIT(9)
 #define BQ27XXX_FLAG_OTD	BIT(14)
 #define BQ27XXX_FLAG_OTC	BIT(15)
@@ -78,6 +80,12 @@
 #define BQ27000_FLAG_FC		BIT(5)
 #define BQ27000_FLAG_CHGS	BIT(7) /* Charge state flag */
 
+/* control register params */
+#define BQ27XXX_SEALED			0x20
+#define BQ27XXX_SET_CFGUPDATE		0x13
+#define BQ27XXX_SOFT_RESET		0x42
+#define BQ27XXX_RESET			0x41
+
 #define BQ27XXX_RS			(20) /* Resistor sense mOhm */
 #define BQ27XXX_POWER_CONSTANT		(29200) /* 29.2 µV^2 * 1000 */
 #define BQ27XXX_CURRENT_CONSTANT	(3570) /* 3.57 µV * 1000 */
@@ -108,9 +116,21 @@ enum bq27xxx_reg_index {
 	BQ27XXX_REG_SOC,	/* State-of-Charge */
 	BQ27XXX_REG_DCAP,	/* Design Capacity */
 	BQ27XXX_REG_AP,		/* Average Power */
+	BQ27XXX_DM_CTRL,	/* Block Data Control */
+	BQ27XXX_DM_CLASS,	/* Data Class */
+	BQ27XXX_DM_BLOCK,	/* Data Block */
+	BQ27XXX_DM_DATA,	/* Block Data */
+	BQ27XXX_DM_CKSUM,	/* Block Data Checksum */
 	BQ27XXX_REG_MAX,	/* sentinel */
 };
 
+#define BQ27XXX_DM_REG_ROWS \
+	[BQ27XXX_DM_CTRL] = 0x61,  \
+	[BQ27XXX_DM_CLASS] = 0x3e, \
+	[BQ27XXX_DM_BLOCK] = 0x3f, \
+	[BQ27XXX_DM_DATA] = 0x40,  \
+	[BQ27XXX_DM_CKSUM] = 0x60
+
 /* Register mappings */
 static u8 bq27xxx_regs[][BQ27XXX_REG_MAX] = {
 	[BQ27000] = {
@@ -131,6 +151,11 @@ static u8 bq27xxx_regs[][BQ27XXX_REG_MAX] = {
 		[BQ27XXX_REG_SOC] = 0x0b,
 		[BQ27XXX_REG_DCAP] = 0x76,
 		[BQ27XXX_REG_AP] = 0x24,
+		[BQ27XXX_DM_CTRL] = INVALID_REG_ADDR,
+		[BQ27XXX_DM_CLASS] = INVALID_REG_ADDR,
+		[BQ27XXX_DM_BLOCK] = INVALID_REG_ADDR,
+		[BQ27XXX_DM_DATA] = INVALID_REG_ADDR,
+		[BQ27XXX_DM_CKSUM] = INVALID_REG_ADDR,
 	},
 	[BQ27010] = {
 		[BQ27XXX_REG_CTRL] = 0x00,
@@ -150,6 +175,11 @@ static u8 bq27xxx_regs[][BQ27XXX_REG_MAX] = {
 		[BQ27XXX_REG_SOC] = 0x0b,
 		[BQ27XXX_REG_DCAP] = 0x76,
 		[BQ27XXX_REG_AP] = INVALID_REG_ADDR,
+		[BQ27XXX_DM_CTRL] = INVALID_REG_ADDR,
+		[BQ27XXX_DM_CLASS] = INVALID_REG_ADDR,
+		[BQ27XXX_DM_BLOCK] = INVALID_REG_ADDR,
+		[BQ27XXX_DM_DATA] = INVALID_REG_ADDR,
+		[BQ27XXX_DM_CKSUM] = INVALID_REG_ADDR,
 	},
 	[BQ2750X] = {
 		[BQ27XXX_REG_CTRL] = 0x00,
@@ -169,6 +199,7 @@ static u8 bq27xxx_regs[][BQ27XXX_REG_MAX] = {
 		[BQ27XXX_REG_SOC] = 0x2c,
 		[BQ27XXX_REG_DCAP] = 0x3c,
 		[BQ27XXX_REG_AP] = INVALID_REG_ADDR,
+		BQ27XXX_DM_REG_ROWS,
 	},
 	[BQ2751X] = {
 		[BQ27XXX_REG_CTRL] = 0x00,
@@ -188,6 +219,7 @@ static u8 bq27xxx_regs[][BQ27XXX_REG_MAX] = {
 		[BQ27XXX_REG_SOC] = 0x20,
 		[BQ27XXX_REG_DCAP] = 0x2e,
 		[BQ27XXX_REG_AP] = INVALID_REG_ADDR,
+		BQ27XXX_DM_REG_ROWS,
 	},
 	[BQ27500] = {
 		[BQ27XXX_REG_CTRL] = 0x00,
@@ -207,6 +239,7 @@ static u8 bq27xxx_regs[][BQ27XXX_REG_MAX] = {
 		[BQ27XXX_REG_SOC] = 0x2c,
 		[BQ27XXX_REG_DCAP] = 0x3c,
 		[BQ27XXX_REG_AP] = 0x24,
+		BQ27XXX_DM_REG_ROWS,
 	},
 	[BQ27510G1] = {
 		[BQ27XXX_REG_CTRL] = 0x00,
@@ -226,6 +259,7 @@ static u8 bq27xxx_regs[][BQ27XXX_REG_MAX] = {
 		[BQ27XXX_REG_SOC] = 0x2c,
 		[BQ27XXX_REG_DCAP] = 0x3c,
 		[BQ27XXX_REG_AP] = 0x24,
+		BQ27XXX_DM_REG_ROWS,
 	},
 	[BQ27510G2] = {
 		[BQ27XXX_REG_CTRL] = 0x00,
@@ -245,6 +279,7 @@ static u8 bq27xxx_regs[][BQ27XXX_REG_MAX] = {
 		[BQ27XXX_REG_SOC] = 0x2c,
 		[BQ27XXX_REG_DCAP] = 0x3c,
 		[BQ27XXX_REG_AP] = 0x24,
+		BQ27XXX_DM_REG_ROWS,
 	},
 	[BQ27510G3] = {
 		[BQ27XXX_REG_CTRL] = 0x00,
@@ -264,6 +299,7 @@ static u8 bq27xxx_regs[][BQ27XXX_REG_MAX] = {
 		[BQ27XXX_REG_SOC] = 0x20,
 		[BQ27XXX_REG_DCAP] = 0x2e,
 		[BQ27XXX_REG_AP] = INVALID_REG_ADDR,
+		BQ27XXX_DM_REG_ROWS,
 	},
 	[BQ27520G1] = {
 		[BQ27XXX_REG_CTRL] = 0x00,
@@ -283,6 +319,7 @@ static u8 bq27xxx_regs[][BQ27XXX_REG_MAX] = {
 		[BQ27XXX_REG_SOC] = 0x2c,
 		[BQ27XXX_REG_DCAP] = 0x3c,
 		[BQ27XXX_REG_AP] = 0x24,
+		BQ27XXX_DM_REG_ROWS,
 	},
 	[BQ27520G2] = {
 		[BQ27XXX_REG_CTRL] = 0x00,
@@ -302,6 +339,7 @@ static u8 bq27xxx_regs[][BQ27XXX_REG_MAX] = {
 		[BQ27XXX_REG_SOC] = 0x2c,
 		[BQ27XXX_REG_DCAP] = 0x3c,
 		[BQ27XXX_REG_AP] = 0x24,
+		BQ27XXX_DM_REG_ROWS,
 	},
 	[BQ27520G3] = {
 		[BQ27XXX_REG_CTRL] = 0x00,
@@ -321,6 +359,7 @@ static u8 bq27xxx_regs[][BQ27XXX_REG_MAX] = {
 		[BQ27XXX_REG_SOC] = 0x2c,
 		[BQ27XXX_REG_DCAP] = 0x3c,
 		[BQ27XXX_REG_AP] = 0x24,
+		BQ27XXX_DM_REG_ROWS,
 	},
 	[BQ27520G4] = {
 		[BQ27XXX_REG_CTRL] = 0x00,
@@ -340,6 +379,7 @@ static u8 bq27xxx_regs[][BQ27XXX_REG_MAX] = {
 		[BQ27XXX_REG_SOC] = 0x20,
 		[BQ27XXX_REG_DCAP] = INVALID_REG_ADDR,
 		[BQ27XXX_REG_AP] = INVALID_REG_ADDR,
+		BQ27XXX_DM_REG_ROWS,
 	},
 	[BQ27530] = {
 		[BQ27XXX_REG_CTRL] = 0x00,
@@ -359,6 +399,7 @@ static u8 bq27xxx_regs[][BQ27XXX_REG_MAX] = {
 		[BQ27XXX_REG_SOC] = 0x2c,
 		[BQ27XXX_REG_DCAP] = INVALID_REG_ADDR,
 		[BQ27XXX_REG_AP] = 0x24,
+		BQ27XXX_DM_REG_ROWS,
 	},
 	[BQ27541] = {
 		[BQ27XXX_REG_CTRL] = 0x00,
@@ -378,6 +419,7 @@ static u8 bq27xxx_regs[][BQ27XXX_REG_MAX] = {
 		[BQ27XXX_REG_SOC] = 0x2c,
 		[BQ27XXX_REG_DCAP] = 0x3c,
 		[BQ27XXX_REG_AP] = 0x24,
+		BQ27XXX_DM_REG_ROWS,
 	},
 	[BQ27545] = {
 		[BQ27XXX_REG_CTRL] = 0x00,
@@ -397,6 +439,7 @@ static u8 bq27xxx_regs[][BQ27XXX_REG_MAX] = {
 		[BQ27XXX_REG_SOC] = 0x2c,
 		[BQ27XXX_REG_DCAP] = INVALID_REG_ADDR,
 		[BQ27XXX_REG_AP] = 0x24,
+		BQ27XXX_DM_REG_ROWS,
 	},
 	[BQ27421] = {
 		[BQ27XXX_REG_CTRL] = 0x00,
@@ -416,6 +459,7 @@ static u8 bq27xxx_regs[][BQ27XXX_REG_MAX] = {
 		[BQ27XXX_REG_SOC] = 0x1c,
 		[BQ27XXX_REG_DCAP] = 0x3c,
 		[BQ27XXX_REG_AP] = 0x18,
+		BQ27XXX_DM_REG_ROWS,
 	},
 };
 
@@ -757,6 +801,28 @@ static struct {
 static DEFINE_MUTEX(bq27xxx_list_lock);
 static LIST_HEAD(bq27xxx_battery_devices);
 
+#define BQ27XXX_MSLEEP(i) usleep_range((i)*1000, (i)*1000+500)
+
+#define BQ27XXX_DM_SZ	32
+
+/**
+ * struct bq27xxx_dm_buf - chip data memory buffer
+ * @class: data memory subclass_id
+ * @block: data memory block number
+ * @data: data from/for the block
+ * @has_data: true if data has been filled by read
+ * @dirty: true if data has changed since last read/write
+ *
+ * Encapsulates info required to manage chip data memory blocks.
+ */
+struct bq27xxx_dm_buf {
+	u8 class;
+	u8 block;
+	u8 data[BQ27XXX_DM_SZ];
+	bool has_data, dirty;
+};
+
+
 static int poll_interval_param_set(const char *val, const struct kernel_param *kp)
 {
 	struct bq27xxx_device_info *di;
@@ -864,6 +930,205 @@ static inline int bq27xxx_write_block(struct bq27xxx_device_info *di, int reg_in
 	return ret;
 }
 
+static int bq27xxx_battery_seal(struct bq27xxx_device_info *di)
+{
+	int ret;
+
+	ret = bq27xxx_write(di, BQ27XXX_REG_CTRL, BQ27XXX_SEALED, false);
+	if (ret < 0) {
+		dev_err(di->dev, "bus error on seal: %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int bq27xxx_battery_unseal(struct bq27xxx_device_info *di)
+{
+	int ret;
+
+	if (di->unseal_key == 0) {
+		dev_err(di->dev, "unseal failed due to missing key\n");
+		return -EINVAL;
+	}
+
+	ret = bq27xxx_write(di, BQ27XXX_REG_CTRL, (u16)(di->unseal_key >> 16), false);
+	if (ret < 0)
+		goto out;
+
+	ret = bq27xxx_write(di, BQ27XXX_REG_CTRL, (u16)di->unseal_key, false);
+	if (ret < 0)
+		goto out;
+
+	return 0;
+
+out:
+	dev_err(di->dev, "bus error on unseal: %d\n", ret);
+	return ret;
+}
+
+static u8 bq27xxx_battery_checksum_dm_block(struct bq27xxx_dm_buf *buf)
+{
+	u16 sum = 0;
+	int i;
+
+	for (i = 0; i < BQ27XXX_DM_SZ; i++)
+		sum += buf->data[i];
+	sum &= 0xff;
+
+	return 0xff - sum;
+}
+
+static int bq27xxx_battery_read_dm_block(struct bq27xxx_device_info *di,
+					 struct bq27xxx_dm_buf *buf)
+{
+	int ret;
+
+	buf->has_data = false;
+
+	ret = bq27xxx_write(di, BQ27XXX_DM_CLASS, buf->class, true);
+	if (ret < 0)
+		goto out;
+
+	ret = bq27xxx_write(di, BQ27XXX_DM_BLOCK, buf->block, true);
+	if (ret < 0)
+		goto out;
+
+	BQ27XXX_MSLEEP(1);
+
+	ret = bq27xxx_read_block(di, BQ27XXX_DM_DATA, buf->data, BQ27XXX_DM_SZ);
+	if (ret < 0)
+		goto out;
+
+	ret = bq27xxx_read(di, BQ27XXX_DM_CKSUM, true);
+	if (ret < 0)
+		goto out;
+
+	if ((u8)ret != bq27xxx_battery_checksum_dm_block(buf)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	buf->has_data = true;
+	buf->dirty = false;
+
+	return 0;
+
+out:
+	dev_err(di->dev, "bus error reading chip memory: %d\n", ret);
+	return ret;
+}
+
+static int bq27xxx_battery_cfgupdate_priv(struct bq27xxx_device_info *di, bool active)
+{
+	const int limit = 100;
+	u16 cmd = active ? BQ27XXX_SET_CFGUPDATE : BQ27XXX_SOFT_RESET;
+	int ret, try = limit;
+
+	ret = bq27xxx_write(di, BQ27XXX_REG_CTRL, cmd, false);
+	if (ret < 0)
+		return ret;
+
+	do {
+		BQ27XXX_MSLEEP(25);
+		ret = bq27xxx_read(di, BQ27XXX_REG_FLAGS, false);
+		if (ret < 0)
+			return ret;
+	} while (!!(ret & BQ27XXX_FLAG_CFGUP) != active && --try);
+
+	if (!try) {
+		dev_err(di->dev, "timed out waiting for cfgupdate flag %d\n", active);
+		return -EINVAL;
+	}
+
+	if (limit - try > 3)
+		dev_warn(di->dev, "cfgupdate %d, retries %d\n", active, limit - try);
+
+	return 0;
+}
+
+static inline int bq27xxx_battery_set_cfgupdate(struct bq27xxx_device_info *di)
+{
+	int ret = bq27xxx_battery_cfgupdate_priv(di, true);
+	if (ret < 0 && ret != -EINVAL)
+		dev_err(di->dev, "bus error on set_cfgupdate: %d\n", ret);
+
+	return ret;
+}
+
+static inline int bq27xxx_battery_soft_reset(struct bq27xxx_device_info *di)
+{
+	int ret = bq27xxx_battery_cfgupdate_priv(di, false);
+	if (ret < 0 && ret != -EINVAL)
+		dev_err(di->dev, "bus error on soft_reset: %d\n", ret);
+
+	return ret;
+}
+
+static int bq27xxx_battery_write_dm_block(struct bq27xxx_device_info *di,
+					  struct bq27xxx_dm_buf *buf)
+{
+	bool cfgup = di->chip == BQ27421; /* assume related chips need cfgupdate */
+	int ret;
+
+	if (!buf->dirty)
+		return 0;
+
+	if (cfgup) {
+		ret = bq27xxx_battery_set_cfgupdate(di);
+		if (ret < 0)
+			return ret;
+	}
+
+	ret = bq27xxx_write(di, BQ27XXX_DM_CTRL, 0, true);
+	if (ret < 0)
+		goto out;
+
+	ret = bq27xxx_write(di, BQ27XXX_DM_CLASS, buf->class, true);
+	if (ret < 0)
+		goto out;
+
+	ret = bq27xxx_write(di, BQ27XXX_DM_BLOCK, buf->block, true);
+	if (ret < 0)
+		goto out;
+
+	BQ27XXX_MSLEEP(1);
+
+	ret = bq27xxx_write_block(di, BQ27XXX_DM_DATA, buf->data, BQ27XXX_DM_SZ);
+	if (ret < 0)
+		goto out;
+
+	ret = bq27xxx_write(di, BQ27XXX_DM_CKSUM,
+			    bq27xxx_battery_checksum_dm_block(buf), true);
+	if (ret < 0)
+		goto out;
+
+	/* DO NOT read BQ27XXX_DM_CKSUM here to verify it! That may cause NVM
+	 * corruption on the '425 chip (and perhaps others), which can damage
+	 * the chip.
+	 */
+
+	if (cfgup) {
+		BQ27XXX_MSLEEP(1);
+		ret = bq27xxx_battery_soft_reset(di);
+		if (ret < 0)
+			return ret;
+	} else {
+		BQ27XXX_MSLEEP(100); /* flash DM updates in <100ms */
+	}
+
+	buf->dirty = false;
+
+	return 0;
+
+out:
+	if (cfgup)
+		bq27xxx_battery_soft_reset(di);
+
+	dev_err(di->dev, "bus error writing chip memory: %d\n", ret);
+	return ret;
+}
+
 /*
  * Return the battery State-of-Charge
  * Or < 0 if something fails.
diff --git a/include/linux/power/bq27xxx_battery.h b/include/linux/power/bq27xxx_battery.h
index c3369fa605f9..b1defb86e9b6 100644
--- a/include/linux/power/bq27xxx_battery.h
+++ b/include/linux/power/bq27xxx_battery.h
@@ -64,6 +64,7 @@ struct bq27xxx_device_info {
 	int id;
 	enum bq27xxx_chip chip;
 	const char *name;
+	u32 unseal_key;
 	struct bq27xxx_access_methods bus;
 	struct bq27xxx_reg_cache cache;
 	int charge_design_full;
-- 
cgit v1.2.3


From ccce440956c79343ab3aa1269a4cf57f9cce030f Mon Sep 17 00:00:00 2001
From: Liam Breck <kernel@networkimprov.net>
Date: Wed, 7 Jun 2017 11:37:56 -0700
Subject: power: supply: bq27xxx: Add power_supply_battery_info support

Previously there was no way to configure these chips in the event that the
defaults didn't match the battery in question.

For chips with RAM data memory (and also those with flash/NVM data memory
if CONFIG_BATTERY_BQ27XXX_DT_UPDATES_NVM is defined and the user has not
set module param dt_monitored_battery_updates_nvm=0) we now call
power_supply_get_battery_info(), check its values, and write battery
properties to chip data memory if there is a dm_regs table for the chip.

Signed-off-by: Matt Ranostay <matt@ranostay.consulting>
Signed-off-by: Liam Breck <kernel@networkimprov.net>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
---
 drivers/power/supply/Kconfig           |  11 ++
 drivers/power/supply/bq27xxx_battery.c | 204 ++++++++++++++++++++++++++++++++-
 include/linux/power/bq27xxx_battery.h  |   2 +
 3 files changed, 216 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/power/supply/Kconfig b/drivers/power/supply/Kconfig
index f04c44c6c3f3..969f5005669c 100644
--- a/drivers/power/supply/Kconfig
+++ b/drivers/power/supply/Kconfig
@@ -198,6 +198,17 @@ config BATTERY_BQ27XXX_I2C
 	  Say Y here to enable support for batteries with BQ27xxx chips
 	  connected over an I2C bus.
 
+config BATTERY_BQ27XXX_DT_UPDATES_NVM
+	bool "BQ27xxx support for update of NVM/flash data memory"
+	depends on BATTERY_BQ27XXX_I2C
+	help
+	  Say Y here to enable devicetree monitored-battery config to update
+	  NVM/flash data memory. Only enable this option for devices with a
+	  fuel gauge mounted on the circuit board, and a battery that cannot
+	  easily be replaced with one of a different type. Not for
+	  general-purpose kernels, as this can cause misconfiguration of a
+	  smart battery with embedded NVM/flash.
+
 config BATTERY_DA9030
 	tristate "DA9030 battery driver"
 	depends on PMIC_DA903X
diff --git a/drivers/power/supply/bq27xxx_battery.c b/drivers/power/supply/bq27xxx_battery.c
index 6a4ac14cb15c..ed44439d0112 100644
--- a/drivers/power/supply/bq27xxx_battery.c
+++ b/drivers/power/supply/bq27xxx_battery.c
@@ -805,6 +805,13 @@ static LIST_HEAD(bq27xxx_battery_devices);
 
 #define BQ27XXX_DM_SZ	32
 
+struct bq27xxx_dm_reg {
+	u8 subclass_id;
+	u8 offset;
+	u8 bytes;
+	u16 min, max;
+};
+
 /**
  * struct bq27xxx_dm_buf - chip data memory buffer
  * @class: data memory subclass_id
@@ -822,6 +829,44 @@ struct bq27xxx_dm_buf {
 	bool has_data, dirty;
 };
 
+#define BQ27XXX_DM_BUF(di, i) { \
+	.class = (di)->dm_regs[i].subclass_id, \
+	.block = (di)->dm_regs[i].offset / BQ27XXX_DM_SZ, \
+}
+
+static inline u16 *bq27xxx_dm_reg_ptr(struct bq27xxx_dm_buf *buf,
+				      struct bq27xxx_dm_reg *reg)
+{
+	if (buf->class == reg->subclass_id &&
+	    buf->block == reg->offset / BQ27XXX_DM_SZ)
+		return (u16 *) (buf->data + reg->offset % BQ27XXX_DM_SZ);
+
+	return NULL;
+}
+
+enum bq27xxx_dm_reg_id {
+	BQ27XXX_DM_DESIGN_CAPACITY = 0,
+	BQ27XXX_DM_DESIGN_ENERGY,
+	BQ27XXX_DM_TERMINATE_VOLTAGE,
+};
+
+static const char * const bq27xxx_dm_reg_name[] = {
+	[BQ27XXX_DM_DESIGN_CAPACITY] = "design-capacity",
+	[BQ27XXX_DM_DESIGN_ENERGY] = "design-energy",
+	[BQ27XXX_DM_TERMINATE_VOLTAGE] = "terminate-voltage",
+};
+
+
+static bool bq27xxx_dt_to_nvm = true;
+module_param_named(dt_monitored_battery_updates_nvm, bq27xxx_dt_to_nvm, bool, 0444);
+MODULE_PARM_DESC(dt_monitored_battery_updates_nvm,
+	"Devicetree monitored-battery config updates data memory on NVM/flash chips.\n"
+	"Users must set this =0 when installing a different type of battery!\n"
+	"Default is =1."
+#ifndef CONFIG_BATTERY_BQ27XXX_DT_UPDATES_NVM
+	"\nSetting this affects future kernel updates, not the current configuration."
+#endif
+);
 
 static int poll_interval_param_set(const char *val, const struct kernel_param *kp)
 {
@@ -1019,6 +1064,55 @@ out:
 	return ret;
 }
 
+static void bq27xxx_battery_update_dm_block(struct bq27xxx_device_info *di,
+					    struct bq27xxx_dm_buf *buf,
+					    enum bq27xxx_dm_reg_id reg_id,
+					    unsigned int val)
+{
+	struct bq27xxx_dm_reg *reg = &di->dm_regs[reg_id];
+	const char *str = bq27xxx_dm_reg_name[reg_id];
+	u16 *prev = bq27xxx_dm_reg_ptr(buf, reg);
+
+	if (prev == NULL) {
+		dev_warn(di->dev, "buffer does not match %s dm spec\n", str);
+		return;
+	}
+
+	if (reg->bytes != 2) {
+		dev_warn(di->dev, "%s dm spec has unsupported byte size\n", str);
+		return;
+	}
+
+	if (!buf->has_data)
+		return;
+
+	if (be16_to_cpup(prev) == val) {
+		dev_info(di->dev, "%s has %u\n", str, val);
+		return;
+	}
+
+#ifdef CONFIG_BATTERY_BQ27XXX_DT_UPDATES_NVM
+	if (!di->ram_chip && !bq27xxx_dt_to_nvm) {
+#else
+	if (!di->ram_chip) {
+#endif
+		/* devicetree and NVM differ; defer to NVM */
+		dev_warn(di->dev, "%s has %u; update to %u disallowed "
+#ifdef CONFIG_BATTERY_BQ27XXX_DT_UPDATES_NVM
+			 "by dt_monitored_battery_updates_nvm=0"
+#else
+			 "for flash/NVM data memory"
+#endif
+			 "\n", str, be16_to_cpup(prev), val);
+		return;
+	}
+
+	dev_info(di->dev, "update %s to %u\n", str, val);
+
+	*prev = cpu_to_be16(val);
+	buf->dirty = true;
+}
+
 static int bq27xxx_battery_cfgupdate_priv(struct bq27xxx_device_info *di, bool active)
 {
 	const int limit = 100;
@@ -1129,6 +1223,103 @@ out:
 	return ret;
 }
 
+static void bq27xxx_battery_set_config(struct bq27xxx_device_info *di,
+				       struct power_supply_battery_info *info)
+{
+	struct bq27xxx_dm_buf bd = BQ27XXX_DM_BUF(di, BQ27XXX_DM_DESIGN_CAPACITY);
+	struct bq27xxx_dm_buf bt = BQ27XXX_DM_BUF(di, BQ27XXX_DM_TERMINATE_VOLTAGE);
+	bool updated;
+
+	if (bq27xxx_battery_unseal(di) < 0)
+		return;
+
+	if (info->charge_full_design_uah != -EINVAL &&
+	    info->energy_full_design_uwh != -EINVAL) {
+		bq27xxx_battery_read_dm_block(di, &bd);
+		/* assume design energy & capacity are in same block */
+		bq27xxx_battery_update_dm_block(di, &bd,
+					BQ27XXX_DM_DESIGN_CAPACITY,
+					info->charge_full_design_uah / 1000);
+		bq27xxx_battery_update_dm_block(di, &bd,
+					BQ27XXX_DM_DESIGN_ENERGY,
+					info->energy_full_design_uwh / 1000);
+	}
+
+	if (info->voltage_min_design_uv != -EINVAL) {
+		bool same = bd.class == bt.class && bd.block == bt.block;
+		if (!same)
+			bq27xxx_battery_read_dm_block(di, &bt);
+		bq27xxx_battery_update_dm_block(di, same ? &bd : &bt,
+					BQ27XXX_DM_TERMINATE_VOLTAGE,
+					info->voltage_min_design_uv / 1000);
+	}
+
+	updated = bd.dirty || bt.dirty;
+
+	bq27xxx_battery_write_dm_block(di, &bd);
+	bq27xxx_battery_write_dm_block(di, &bt);
+
+	bq27xxx_battery_seal(di);
+
+	if (updated && di->chip != BQ27421) { /* not a cfgupdate chip, so reset */
+		bq27xxx_write(di, BQ27XXX_REG_CTRL, BQ27XXX_RESET, false);
+		BQ27XXX_MSLEEP(300); /* reset time is not documented */
+	}
+	/* assume bq27xxx_battery_update() is called hereafter */
+}
+
+static void bq27xxx_battery_settings(struct bq27xxx_device_info *di)
+{
+	struct power_supply_battery_info info = {};
+	unsigned int min, max;
+
+	if (power_supply_get_battery_info(di->bat, &info) < 0)
+		return;
+
+	if (!di->dm_regs) {
+		dev_warn(di->dev, "data memory update not supported for chip\n");
+		return;
+	}
+
+	if (info.energy_full_design_uwh != info.charge_full_design_uah) {
+		if (info.energy_full_design_uwh == -EINVAL)
+			dev_warn(di->dev, "missing battery:energy-full-design-microwatt-hours\n");
+		else if (info.charge_full_design_uah == -EINVAL)
+			dev_warn(di->dev, "missing battery:charge-full-design-microamp-hours\n");
+	}
+
+	/* assume min == 0 */
+	max = di->dm_regs[BQ27XXX_DM_DESIGN_ENERGY].max;
+	if (info.energy_full_design_uwh > max * 1000) {
+		dev_err(di->dev, "invalid battery:energy-full-design-microwatt-hours %d\n",
+			info.energy_full_design_uwh);
+		info.energy_full_design_uwh = -EINVAL;
+	}
+
+	/* assume min == 0 */
+	max = di->dm_regs[BQ27XXX_DM_DESIGN_CAPACITY].max;
+	if (info.charge_full_design_uah > max * 1000) {
+		dev_err(di->dev, "invalid battery:charge-full-design-microamp-hours %d\n",
+			info.charge_full_design_uah);
+		info.charge_full_design_uah = -EINVAL;
+	}
+
+	min = di->dm_regs[BQ27XXX_DM_TERMINATE_VOLTAGE].min;
+	max = di->dm_regs[BQ27XXX_DM_TERMINATE_VOLTAGE].max;
+	if ((info.voltage_min_design_uv < min * 1000 ||
+	     info.voltage_min_design_uv > max * 1000) &&
+	     info.voltage_min_design_uv != -EINVAL) {
+		dev_err(di->dev, "invalid battery:voltage-min-design-microvolt %d\n",
+			info.voltage_min_design_uv);
+		info.voltage_min_design_uv = -EINVAL;
+	}
+
+	if ((info.energy_full_design_uwh != -EINVAL &&
+	     info.charge_full_design_uah != -EINVAL) ||
+	     info.voltage_min_design_uv  != -EINVAL)
+		bq27xxx_battery_set_config(di, &info);
+}
+
 /*
  * Return the battery State-of-Charge
  * Or < 0 if something fails.
@@ -1646,6 +1837,13 @@ static int bq27xxx_battery_get_property(struct power_supply *psy,
 	case POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN:
 		ret = bq27xxx_simple_value(di->charge_design_full, val);
 		break;
+	/*
+	 * TODO: Implement these to make registers set from
+	 * power_supply_battery_info visible in sysfs.
+	 */
+	case POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN:
+	case POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN:
+		return -EINVAL;
 	case POWER_SUPPLY_PROP_CYCLE_COUNT:
 		ret = bq27xxx_simple_value(di->cache.cycle_count, val);
 		break;
@@ -1679,7 +1877,10 @@ static void bq27xxx_external_power_changed(struct power_supply *psy)
 int bq27xxx_battery_setup(struct bq27xxx_device_info *di)
 {
 	struct power_supply_desc *psy_desc;
-	struct power_supply_config psy_cfg = { .drv_data = di, };
+	struct power_supply_config psy_cfg = {
+		.of_node = di->dev->of_node,
+		.drv_data = di,
+	};
 
 	INIT_DELAYED_WORK(&di->work, bq27xxx_battery_poll);
 	mutex_init(&di->lock);
@@ -1704,6 +1905,7 @@ int bq27xxx_battery_setup(struct bq27xxx_device_info *di)
 
 	dev_info(di->dev, "support ver. %s enabled\n", DRIVER_VERSION);
 
+	bq27xxx_battery_settings(di);
 	bq27xxx_battery_update(di);
 
 	mutex_lock(&bq27xxx_list_lock);
diff --git a/include/linux/power/bq27xxx_battery.h b/include/linux/power/bq27xxx_battery.h
index b1defb86e9b6..11e11685dd1d 100644
--- a/include/linux/power/bq27xxx_battery.h
+++ b/include/linux/power/bq27xxx_battery.h
@@ -63,7 +63,9 @@ struct bq27xxx_device_info {
 	struct device *dev;
 	int id;
 	enum bq27xxx_chip chip;
+	bool ram_chip;
 	const char *name;
+	struct bq27xxx_dm_reg *dm_regs;
 	u32 unseal_key;
 	struct bq27xxx_access_methods bus;
 	struct bq27xxx_reg_cache cache;
-- 
cgit v1.2.3


From f1bea8793d939e594afb3407c26d9cec8792d42f Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@linaro.org>
Date: Fri, 26 May 2017 23:51:29 -0700
Subject: power: reset: reboot-mode: Make include file global

Move the reboot-mode.h include file into include/linux to allow drivers
outside drivers/power/reset to implement reboot-mode.

Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
---
 drivers/power/reset/reboot-mode.c        |  2 +-
 drivers/power/reset/reboot-mode.h        | 18 ------------------
 drivers/power/reset/syscon-reboot-mode.c |  2 +-
 include/linux/reboot-mode.h              | 18 ++++++++++++++++++
 4 files changed, 20 insertions(+), 20 deletions(-)
 delete mode 100644 drivers/power/reset/reboot-mode.h
 create mode 100644 include/linux/reboot-mode.h

(limited to 'include/linux')

diff --git a/drivers/power/reset/reboot-mode.c b/drivers/power/reset/reboot-mode.c
index fb512183ace3..8f975ca0a8c4 100644
--- a/drivers/power/reset/reboot-mode.c
+++ b/drivers/power/reset/reboot-mode.c
@@ -13,7 +13,7 @@
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/reboot.h>
-#include "reboot-mode.h"
+#include <linux/reboot-mode.h>
 
 #define PREFIX "mode-"
 
diff --git a/drivers/power/reset/reboot-mode.h b/drivers/power/reset/reboot-mode.h
deleted file mode 100644
index 75f7fe5c881f..000000000000
--- a/drivers/power/reset/reboot-mode.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef __REBOOT_MODE_H__
-#define __REBOOT_MODE_H__
-
-struct reboot_mode_driver {
-	struct device *dev;
-	struct list_head head;
-	int (*write)(struct reboot_mode_driver *reboot, unsigned int magic);
-	struct notifier_block reboot_notifier;
-};
-
-int reboot_mode_register(struct reboot_mode_driver *reboot);
-int reboot_mode_unregister(struct reboot_mode_driver *reboot);
-int devm_reboot_mode_register(struct device *dev,
-			      struct reboot_mode_driver *reboot);
-void devm_reboot_mode_unregister(struct device *dev,
-				 struct reboot_mode_driver *reboot);
-
-#endif
diff --git a/drivers/power/reset/syscon-reboot-mode.c b/drivers/power/reset/syscon-reboot-mode.c
index c8c371b285b1..563a97d7f73e 100644
--- a/drivers/power/reset/syscon-reboot-mode.c
+++ b/drivers/power/reset/syscon-reboot-mode.c
@@ -15,7 +15,7 @@
 #include <linux/reboot.h>
 #include <linux/regmap.h>
 #include <linux/mfd/syscon.h>
-#include "reboot-mode.h"
+#include <linux/reboot-mode.h>
 
 struct syscon_reboot_mode {
 	struct regmap *map;
diff --git a/include/linux/reboot-mode.h b/include/linux/reboot-mode.h
new file mode 100644
index 000000000000..75f7fe5c881f
--- /dev/null
+++ b/include/linux/reboot-mode.h
@@ -0,0 +1,18 @@
+#ifndef __REBOOT_MODE_H__
+#define __REBOOT_MODE_H__
+
+struct reboot_mode_driver {
+	struct device *dev;
+	struct list_head head;
+	int (*write)(struct reboot_mode_driver *reboot, unsigned int magic);
+	struct notifier_block reboot_notifier;
+};
+
+int reboot_mode_register(struct reboot_mode_driver *reboot);
+int reboot_mode_unregister(struct reboot_mode_driver *reboot);
+int devm_reboot_mode_register(struct device *dev,
+			      struct reboot_mode_driver *reboot);
+void devm_reboot_mode_unregister(struct device *dev,
+				 struct reboot_mode_driver *reboot);
+
+#endif
-- 
cgit v1.2.3


From aaaad0bfac019bb7701f92ebc1b31b4f85e47b55 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 2 May 2017 09:39:09 -0700
Subject: rcu: Flag need for rcu_node_tree.h and rcu_segcblist.h visibility

The rcu_node_tree.h and rcu_segcblist.h header files in the include/linux
directory might appear at first sight to be internal to the RCU
implementation.  However, the definitions in these files are needed to
determine the size of TREE SRCU's srcu_struct structure, so they must
be externally visible, which is why they live in include/linux.

This commit adds comments to this effect to those files.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcu_node_tree.h | 4 ++++
 include/linux/rcu_segcblist.h | 4 ++++
 2 files changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rcu_node_tree.h b/include/linux/rcu_node_tree.h
index 4b766b61e1a0..426cee67f0e2 100644
--- a/include/linux/rcu_node_tree.h
+++ b/include/linux/rcu_node_tree.h
@@ -7,6 +7,10 @@
  * unlimited scalability while maintaining a constant level of contention
  * on the root node.
  *
+ * This seemingly RCU-private file must be available to SRCU users
+ * because the size of the TREE SRCU srcu_struct structure depends
+ * on these definitions.
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
diff --git a/include/linux/rcu_segcblist.h b/include/linux/rcu_segcblist.h
index ba4d2621d9ca..c3ad00e63556 100644
--- a/include/linux/rcu_segcblist.h
+++ b/include/linux/rcu_segcblist.h
@@ -1,6 +1,10 @@
 /*
  * RCU segmented callback lists
  *
+ * This seemingly RCU-private file must be available to SRCU users
+ * because the size of the TREE SRCU srcu_struct structure depends
+ * on these definitions.
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
-- 
cgit v1.2.3


From a68a2bb28bbf7a6dd4672a25bd87fd1b5db4fa7d Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 3 May 2017 08:34:57 -0700
Subject: rcu: Move docbook comments out of rcupdate.h

The include/linux/rcupdate.h file is included by more than 200
files, so shrinking it should provide some build-time benefits.
This commit therefore moves several docbook comments from rcupdate.h to
kernel/rcu/update.c, kernel/rcu/tree.c, and kernel/rcu/tree_plugin.h, thus
reducing the number of times that the compiler has to scan these comments.
This likely provides only a small benefit, but every little bit helps.

This commit also fixes a malformed bulleted list noted by the 0day
Test Robot.

Reported-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 117 ++---------------------------------------------
 kernel/rcu/tree.c        |  42 +++++++++++++++--
 kernel/rcu/tree_plugin.h |  33 ++++++++++++-
 kernel/rcu/update.c      |  20 ++++++--
 4 files changed, 89 insertions(+), 123 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 393e461d3ea8..7a206f039fc2 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -140,115 +140,14 @@ void do_trace_rcu_torture_read(const char *rcutorturename,
 /* Exported common interfaces */
 
 #ifdef CONFIG_PREEMPT_RCU
-
-/**
- * call_rcu() - Queue an RCU callback for invocation after a grace period.
- * @head: structure to be used for queueing the RCU updates.
- * @func: actual callback function to be invoked after the grace period
- *
- * The callback function will be invoked some time after a full grace
- * period elapses, in other words after all pre-existing RCU read-side
- * critical sections have completed.  However, the callback function
- * might well execute concurrently with RCU read-side critical sections
- * that started after call_rcu() was invoked.  RCU read-side critical
- * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
- * and may be nested.
- *
- * Note that all CPUs must agree that the grace period extended beyond
- * all pre-existing RCU read-side critical section.  On systems with more
- * than one CPU, this means that when "func()" is invoked, each CPU is
- * guaranteed to have executed a full memory barrier since the end of its
- * last RCU read-side critical section whose beginning preceded the call
- * to call_rcu().  It also means that each CPU executing an RCU read-side
- * critical section that continues beyond the start of "func()" must have
- * executed a memory barrier after the call_rcu() but before the beginning
- * of that RCU read-side critical section.  Note that these guarantees
- * include CPUs that are offline, idle, or executing in user mode, as
- * well as CPUs that are executing in the kernel.
- *
- * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the
- * resulting RCU callback function "func()", then both CPU A and CPU B are
- * guaranteed to execute a full memory barrier during the time interval
- * between the call to call_rcu() and the invocation of "func()" -- even
- * if CPU A and CPU B are the same CPU (but again only if the system has
- * more than one CPU).
- */
-void call_rcu(struct rcu_head *head,
-	      rcu_callback_t func);
-
+void call_rcu(struct rcu_head *head, rcu_callback_t func);
 #else /* #ifdef CONFIG_PREEMPT_RCU */
-
-/* In classic RCU, call_rcu() is just call_rcu_sched(). */
 #define	call_rcu	call_rcu_sched
-
 #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
 
-/**
- * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period.
- * @head: structure to be used for queueing the RCU updates.
- * @func: actual callback function to be invoked after the grace period
- *
- * The callback function will be invoked some time after a full grace
- * period elapses, in other words after all currently executing RCU
- * read-side critical sections have completed. call_rcu_bh() assumes
- * that the read-side critical sections end on completion of a softirq
- * handler. This means that read-side critical sections in process
- * context must not be interrupted by softirqs. This interface is to be
- * used when most of the read-side critical sections are in softirq context.
- * RCU read-side critical sections are delimited by :
- *  - rcu_read_lock() and  rcu_read_unlock(), if in interrupt context.
- *  OR
- *  - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context.
- *  These may be nested.
- *
- * See the description of call_rcu() for more detailed information on
- * memory ordering guarantees.
- */
-void call_rcu_bh(struct rcu_head *head,
-		 rcu_callback_t func);
-
-/**
- * call_rcu_sched() - Queue an RCU for invocation after sched grace period.
- * @head: structure to be used for queueing the RCU updates.
- * @func: actual callback function to be invoked after the grace period
- *
- * The callback function will be invoked some time after a full grace
- * period elapses, in other words after all currently executing RCU
- * read-side critical sections have completed. call_rcu_sched() assumes
- * that the read-side critical sections end on enabling of preemption
- * or on voluntary preemption.
- * RCU read-side critical sections are delimited by :
- *  - rcu_read_lock_sched() and  rcu_read_unlock_sched(),
- *  OR
- *  anything that disables preemption.
- *  These may be nested.
- *
- * See the description of call_rcu() for more detailed information on
- * memory ordering guarantees.
- */
-void call_rcu_sched(struct rcu_head *head,
-		    rcu_callback_t func);
-
+void call_rcu_bh(struct rcu_head *head, rcu_callback_t func);
+void call_rcu_sched(struct rcu_head *head, rcu_callback_t func);
 void synchronize_sched(void);
-
-/**
- * call_rcu_tasks() - Queue an RCU for invocation task-based grace period
- * @head: structure to be used for queueing the RCU updates.
- * @func: actual callback function to be invoked after the grace period
- *
- * The callback function will be invoked some time after a full grace
- * period elapses, in other words after all currently executing RCU
- * read-side critical sections have completed. call_rcu_tasks() assumes
- * that the read-side critical sections end at a voluntary context
- * switch (not a preemption!), entry into idle, or transition to usermode
- * execution.  As such, there are no read-side primitives analogous to
- * rcu_read_lock() and rcu_read_unlock() because this primitive is intended
- * to determine that all tasks have passed through a safe state, not so
- * much for data-strcuture synchronization.
- *
- * See the description of call_rcu() for more detailed information on
- * memory ordering guarantees.
- */
 void call_rcu_tasks(struct rcu_head *head, rcu_callback_t func);
 void synchronize_rcu_tasks(void);
 void rcu_barrier_tasks(void);
@@ -474,18 +373,8 @@ extern struct lockdep_map rcu_bh_lock_map;
 extern struct lockdep_map rcu_sched_lock_map;
 extern struct lockdep_map rcu_callback_map;
 int debug_lockdep_rcu_enabled(void);
-
 int rcu_read_lock_held(void);
 int rcu_read_lock_bh_held(void);
-
-/**
- * rcu_read_lock_sched_held() - might we be in RCU-sched read-side critical section?
- *
- * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an
- * RCU-sched read-side critical section.  In absence of
- * CONFIG_DEBUG_LOCK_ALLOC, this assumes we are in an RCU-sched read-side
- * critical section unless it can prove otherwise.
- */
 int rcu_read_lock_sched_held(void);
 
 #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 121c1436a7f3..5ebc830297c1 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3223,8 +3223,24 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func,
 	local_irq_restore(flags);
 }
 
-/*
- * Queue an RCU-sched callback for invocation after a grace period.
+/**
+ * call_rcu_sched() - Queue an RCU for invocation after sched grace period.
+ * @head: structure to be used for queueing the RCU updates.
+ * @func: actual callback function to be invoked after the grace period
+ *
+ * The callback function will be invoked some time after a full grace
+ * period elapses, in other words after all currently executing RCU
+ * read-side critical sections have completed. call_rcu_sched() assumes
+ * that the read-side critical sections end on enabling of preemption
+ * or on voluntary preemption.
+ * RCU read-side critical sections are delimited by :
+ *  - rcu_read_lock_sched() and rcu_read_unlock_sched(), OR
+ *  - anything that disables preemption.
+ *
+ *  These may be nested.
+ *
+ * See the description of call_rcu() for more detailed information on
+ * memory ordering guarantees.
  */
 void call_rcu_sched(struct rcu_head *head, rcu_callback_t func)
 {
@@ -3232,8 +3248,26 @@ void call_rcu_sched(struct rcu_head *head, rcu_callback_t func)
 }
 EXPORT_SYMBOL_GPL(call_rcu_sched);
 
-/*
- * Queue an RCU callback for invocation after a quicker grace period.
+/**
+ * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period.
+ * @head: structure to be used for queueing the RCU updates.
+ * @func: actual callback function to be invoked after the grace period
+ *
+ * The callback function will be invoked some time after a full grace
+ * period elapses, in other words after all currently executing RCU
+ * read-side critical sections have completed. call_rcu_bh() assumes
+ * that the read-side critical sections end on completion of a softirq
+ * handler. This means that read-side critical sections in process
+ * context must not be interrupted by softirqs. This interface is to be
+ * used when most of the read-side critical sections are in softirq context.
+ * RCU read-side critical sections are delimited by :
+ *  - rcu_read_lock() and  rcu_read_unlock(), if in interrupt context.
+ *  OR
+ *  - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context.
+ *  These may be nested.
+ *
+ * See the description of call_rcu() for more detailed information on
+ * memory ordering guarantees.
  */
 void call_rcu_bh(struct rcu_head *head, rcu_callback_t func)
 {
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 573fbe9640a0..116cf8339826 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -675,8 +675,37 @@ static void rcu_preempt_do_callbacks(void)
 
 #endif /* #ifdef CONFIG_RCU_BOOST */
 
-/*
- * Queue a preemptible-RCU callback for invocation after a grace period.
+/**
+ * call_rcu() - Queue an RCU callback for invocation after a grace period.
+ * @head: structure to be used for queueing the RCU updates.
+ * @func: actual callback function to be invoked after the grace period
+ *
+ * The callback function will be invoked some time after a full grace
+ * period elapses, in other words after all pre-existing RCU read-side
+ * critical sections have completed.  However, the callback function
+ * might well execute concurrently with RCU read-side critical sections
+ * that started after call_rcu() was invoked.  RCU read-side critical
+ * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
+ * and may be nested.
+ *
+ * Note that all CPUs must agree that the grace period extended beyond
+ * all pre-existing RCU read-side critical section.  On systems with more
+ * than one CPU, this means that when "func()" is invoked, each CPU is
+ * guaranteed to have executed a full memory barrier since the end of its
+ * last RCU read-side critical section whose beginning preceded the call
+ * to call_rcu().  It also means that each CPU executing an RCU read-side
+ * critical section that continues beyond the start of "func()" must have
+ * executed a memory barrier after the call_rcu() but before the beginning
+ * of that RCU read-side critical section.  Note that these guarantees
+ * include CPUs that are offline, idle, or executing in user mode, as
+ * well as CPUs that are executing in the kernel.
+ *
+ * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the
+ * resulting RCU callback function "func()", then both CPU A and CPU B are
+ * guaranteed to execute a full memory barrier during the time interval
+ * between the call to call_rcu() and the invocation of "func()" -- even
+ * if CPU A and CPU B are the same CPU (but again only if the system has
+ * more than one CPU).
  */
 void call_rcu(struct rcu_head *head, rcu_callback_t func)
 {
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 123a9c4b5055..84dec2c8ad1b 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -576,9 +576,23 @@ module_param(rcu_task_stall_timeout, int, 0644);
 static void rcu_spawn_tasks_kthread(void);
 static struct task_struct *rcu_tasks_kthread_ptr;
 
-/*
- * Post an RCU-tasks callback.  First call must be from process context
- * after the scheduler if fully operational.
+/**
+ * call_rcu_tasks() - Queue an RCU for invocation task-based grace period
+ * @rhp: structure to be used for queueing the RCU updates.
+ * @func: actual callback function to be invoked after the grace period
+ *
+ * The callback function will be invoked some time after a full grace
+ * period elapses, in other words after all currently executing RCU
+ * read-side critical sections have completed. call_rcu_tasks() assumes
+ * that the read-side critical sections end at a voluntary context
+ * switch (not a preemption!), entry into idle, or transition to usermode
+ * execution.  As such, there are no read-side primitives analogous to
+ * rcu_read_lock() and rcu_read_unlock() because this primitive is intended
+ * to determine that all tasks have passed through a safe state, not so
+ * much for data-strcuture synchronization.
+ *
+ * See the description of call_rcu() for more detailed information on
+ * memory ordering guarantees.
  */
 void call_rcu_tasks(struct rcu_head *rhp, rcu_callback_t func)
 {
-- 
cgit v1.2.3


From 3caec62fbb313946b9be53720bbf2280bb19ec28 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 3 May 2017 09:27:15 -0700
Subject: rcu: Move rcu_expedited and rcu_normal externs from rcupdate.h

The rcu_expedited and rcu_normal variables are used only by sysctl
and kernel/rcu/update.c, so it does not make sense to their extern
declarations in rcupdate.h.  This commit therefore moves these
extern declarations to update.c.

Reported-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 7 -------
 kernel/rcu/update.c      | 2 ++
 2 files changed, 2 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 7a206f039fc2..6e7e930c1610 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -46,13 +46,6 @@
 #include <linux/ktime.h>
 #include <linux/irqflags.h>
 
-#include <asm/barrier.h>
-
-#ifndef CONFIG_TINY_RCU
-extern int rcu_expedited; /* for sysctl */
-extern int rcu_normal;    /* also for sysctl */
-#endif /* #ifndef CONFIG_TINY_RCU */
-
 #ifdef CONFIG_TINY_RCU
 /* Tiny RCU doesn't expedite, as its purpose in life is instead to be tiny. */
 static inline bool rcu_gp_is_normal(void)  /* Internal RCU use. */
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 84dec2c8ad1b..00e77c470017 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -62,7 +62,9 @@
 #define MODULE_PARAM_PREFIX "rcupdate."
 
 #ifndef CONFIG_TINY_RCU
+extern int rcu_expedited; /* from sysctl */
 module_param(rcu_expedited, int, 0);
+extern int rcu_normal; /* from sysctl */
 module_param(rcu_normal, int, 0);
 static int rcu_normal_after_boot;
 module_param(rcu_normal_after_boot, int, 0);
-- 
cgit v1.2.3


From 25c36329a30c8cac090effe1fbae9bb916fa95fe Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 3 May 2017 09:51:55 -0700
Subject: rcu: Move expediting-related access/control out of rcupdate.h

The rcu_gp_is_normal(), rcu_gp_is_expedited(), rcu_expedite_gp(), and
rcu_unexpedite_gp() functions are intended only for use within the
RCU implementation itself -- the sysfs access is what should be used
outside of RCU.  This commit therefore moves the declarations for
these functions to kernel/rcu/rcu.h, and also includes this file into
kernel/rcu/rcutorture.c and kernel/rcu/rcuperf.c.  This also has the
beneficial effect of shrinking rcupdate.c a bit.

Reported-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 26 --------------------------
 kernel/rcu/rcu.h         | 26 ++++++++++++++++++++++++++
 kernel/rcu/rcuperf.c     |  2 ++
 kernel/rcu/rcutorture.c  |  2 ++
 4 files changed, 30 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 6e7e930c1610..049c62c59f1b 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -46,32 +46,6 @@
 #include <linux/ktime.h>
 #include <linux/irqflags.h>
 
-#ifdef CONFIG_TINY_RCU
-/* Tiny RCU doesn't expedite, as its purpose in life is instead to be tiny. */
-static inline bool rcu_gp_is_normal(void)  /* Internal RCU use. */
-{
-	return true;
-}
-static inline bool rcu_gp_is_expedited(void)  /* Internal RCU use. */
-{
-	return false;
-}
-
-static inline void rcu_expedite_gp(void)
-{
-}
-
-static inline void rcu_unexpedite_gp(void)
-{
-}
-#else /* #ifdef CONFIG_TINY_RCU */
-bool rcu_gp_is_normal(void);     /* Internal RCU use. */
-bool rcu_gp_is_expedited(void);  /* Internal RCU use. */
-void rcu_expedite_gp(void);
-void rcu_unexpedite_gp(void);
-void rcupdate_announce_bootup_oddness(void);
-#endif /* #else #ifdef CONFIG_TINY_RCU */
-
 enum rcutorture_type {
 	RCU_FLAVOR,
 	RCU_BH_FLAVOR,
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 73e16ec4054b..ceb78110db1b 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -293,4 +293,30 @@ static inline void rcu_init_levelspread(int *levelspread, const int *levelcnt)
 
 #endif /* #if defined(SRCU) || !defined(TINY_RCU) */
 
+#ifdef CONFIG_TINY_RCU
+/* Tiny RCU doesn't expedite, as its purpose in life is instead to be tiny. */
+static inline bool rcu_gp_is_normal(void)  /* Internal RCU use. */
+{
+	return true;
+}
+static inline bool rcu_gp_is_expedited(void)  /* Internal RCU use. */
+{
+	return false;
+}
+
+static inline void rcu_expedite_gp(void)
+{
+}
+
+static inline void rcu_unexpedite_gp(void)
+{
+}
+#else /* #ifdef CONFIG_TINY_RCU */
+bool rcu_gp_is_normal(void);     /* Internal RCU use. */
+bool rcu_gp_is_expedited(void);  /* Internal RCU use. */
+void rcu_expedite_gp(void);
+void rcu_unexpedite_gp(void);
+void rcupdate_announce_bootup_oddness(void);
+#endif /* #else #ifdef CONFIG_TINY_RCU */
+
 #endif /* __LINUX_RCU_H */
diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c
index d80f11d9f8bd..3cc18110b612 100644
--- a/kernel/rcu/rcuperf.c
+++ b/kernel/rcu/rcuperf.c
@@ -48,6 +48,8 @@
 #include <linux/torture.h>
 #include <linux/vmalloc.h>
 
+#include "rcu.h"
+
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.vnet.ibm.com>");
 
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index a58592b73f19..03cdf79e73d4 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -52,6 +52,8 @@
 #include <linux/torture.h>
 #include <linux/vmalloc.h>
 
+#include "rcu.h"
+
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and Josh Triplett <josh@joshtriplett.org>");
 
-- 
cgit v1.2.3


From cad7b3897279c869de61dc88133037b941f84233 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 3 May 2017 10:22:57 -0700
Subject: rcu: Move torture-related definitions from rcupdate.h to rcu.h

The include/linux/rcupdate.h file contains a number of definitions that
are used only to communicate between rcutorture, rcuperf, and the RCU code
itself.  There is no point in having these definitions exposed globally
throughout the kernel, so this commit moves them to kernel/rcu/rcu.h.
This change has the added benefit of shrinking rcupdate.h.

Reported-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h    | 52 ---------------------------
 include/linux/rcutiny.h     |  5 +++
 include/linux/rcutree.h     |  1 +
 include/linux/srcuclassic.h | 14 --------
 include/linux/srcutiny.h    | 12 -------
 include/linux/srcutree.h    |  4 ---
 kernel/rcu/rcu.h            | 85 +++++++++++++++++++++++++++++++++++++++++++++
 7 files changed, 91 insertions(+), 82 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 049c62c59f1b..7557499d8e70 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -46,58 +46,6 @@
 #include <linux/ktime.h>
 #include <linux/irqflags.h>
 
-enum rcutorture_type {
-	RCU_FLAVOR,
-	RCU_BH_FLAVOR,
-	RCU_SCHED_FLAVOR,
-	RCU_TASKS_FLAVOR,
-	SRCU_FLAVOR,
-	INVALID_RCU_FLAVOR
-};
-
-#if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU)
-void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags,
-			    unsigned long *gpnum, unsigned long *completed);
-void rcutorture_record_test_transition(void);
-void rcutorture_record_progress(unsigned long vernum);
-void do_trace_rcu_torture_read(const char *rcutorturename,
-			       struct rcu_head *rhp,
-			       unsigned long secs,
-			       unsigned long c_old,
-			       unsigned long c);
-bool rcu_irq_enter_disabled(void);
-#else
-static inline void rcutorture_get_gp_data(enum rcutorture_type test_type,
-					  int *flags,
-					  unsigned long *gpnum,
-					  unsigned long *completed)
-{
-	*flags = 0;
-	*gpnum = 0;
-	*completed = 0;
-}
-static inline void rcutorture_record_test_transition(void)
-{
-}
-static inline void rcutorture_record_progress(unsigned long vernum)
-{
-}
-static inline bool rcu_irq_enter_disabled(void)
-{
-	return false;
-}
-#ifdef CONFIG_RCU_TRACE
-void do_trace_rcu_torture_read(const char *rcutorturename,
-			       struct rcu_head *rhp,
-			       unsigned long secs,
-			       unsigned long c_old,
-			       unsigned long c);
-#else
-#define do_trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \
-	do { } while (0)
-#endif
-#endif
-
 #define UINT_CMP_GE(a, b)	(UINT_MAX / 2 >= (a) - (b))
 #define UINT_CMP_LT(a, b)	(UINT_MAX / 2 < (a) - (b))
 #define ULONG_CMP_GE(a, b)	(ULONG_MAX / 2 >= (a) - (b))
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 74d9c3a1feee..ade360e0d58c 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -202,6 +202,11 @@ static inline void rcu_irq_enter(void)
 {
 }
 
+static inline bool rcu_irq_enter_disabled(void)
+{
+	return false;
+}
+
 static inline void rcu_irq_exit_irqson(void)
 {
 }
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 0bacb6b2af69..28af91a19573 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -101,6 +101,7 @@ void rcu_irq_enter(void);
 void rcu_irq_exit(void);
 void rcu_irq_enter_irqson(void);
 void rcu_irq_exit_irqson(void);
+bool rcu_irq_enter_disabled(void);
 
 void exit_rcu(void);
 
diff --git a/include/linux/srcuclassic.h b/include/linux/srcuclassic.h
index 5753f7322262..41cf99930f34 100644
--- a/include/linux/srcuclassic.h
+++ b/include/linux/srcuclassic.h
@@ -98,18 +98,4 @@ void synchronize_srcu_expedited(struct srcu_struct *sp);
 void srcu_barrier(struct srcu_struct *sp);
 unsigned long srcu_batches_completed(struct srcu_struct *sp);
 
-static inline void srcutorture_get_gp_data(enum rcutorture_type test_type,
-					   struct srcu_struct *sp, int *flags,
-					   unsigned long *gpnum,
-					   unsigned long *completed)
-{
-	if (test_type != SRCU_FLAVOR)
-		return;
-	*flags = 0;
-	*completed = sp->completed;
-	*gpnum = *completed;
-	if (sp->batch_queue.head || sp->batch_check0.head || sp->batch_check0.head)
-		(*gpnum)++;
-}
-
 #endif
diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h
index b6edd9c8fdce..85bddce6a7a6 100644
--- a/include/linux/srcutiny.h
+++ b/include/linux/srcutiny.h
@@ -93,16 +93,4 @@ static inline unsigned long srcu_batches_completed(struct srcu_struct *sp)
 	return 0;
 }
 
-static inline void srcutorture_get_gp_data(enum rcutorture_type test_type,
-					   struct srcu_struct *sp, int *flags,
-					   unsigned long *gpnum,
-					   unsigned long *completed)
-{
-	if (test_type != SRCU_FLAVOR)
-		return;
-	*flags = 0;
-	*completed = sp->srcu_gp_seq;
-	*gpnum = *completed;
-}
-
 #endif
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index 32e86d85fd11..f4adfed17b51 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -143,8 +143,4 @@ void synchronize_srcu_expedited(struct srcu_struct *sp);
 void srcu_barrier(struct srcu_struct *sp);
 unsigned long srcu_batches_completed(struct srcu_struct *sp);
 
-void srcutorture_get_gp_data(enum rcutorture_type test_type,
-			     struct srcu_struct *sp, int *flags,
-			     unsigned long *gpnum, unsigned long *completed);
-
 #endif
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index ceb78110db1b..f190fc1c8215 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -319,4 +319,89 @@ void rcu_unexpedite_gp(void);
 void rcupdate_announce_bootup_oddness(void);
 #endif /* #else #ifdef CONFIG_TINY_RCU */
 
+enum rcutorture_type {
+	RCU_FLAVOR,
+	RCU_BH_FLAVOR,
+	RCU_SCHED_FLAVOR,
+	RCU_TASKS_FLAVOR,
+	SRCU_FLAVOR,
+	INVALID_RCU_FLAVOR
+};
+
+#if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU)
+void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags,
+			    unsigned long *gpnum, unsigned long *completed);
+void rcutorture_record_test_transition(void);
+void rcutorture_record_progress(unsigned long vernum);
+void do_trace_rcu_torture_read(const char *rcutorturename,
+			       struct rcu_head *rhp,
+			       unsigned long secs,
+			       unsigned long c_old,
+			       unsigned long c);
+#else
+static inline void rcutorture_get_gp_data(enum rcutorture_type test_type,
+					  int *flags,
+					  unsigned long *gpnum,
+					  unsigned long *completed)
+{
+	*flags = 0;
+	*gpnum = 0;
+	*completed = 0;
+}
+static inline void rcutorture_record_test_transition(void)
+{
+}
+static inline void rcutorture_record_progress(unsigned long vernum)
+{
+}
+#ifdef CONFIG_RCU_TRACE
+void do_trace_rcu_torture_read(const char *rcutorturename,
+			       struct rcu_head *rhp,
+			       unsigned long secs,
+			       unsigned long c_old,
+			       unsigned long c);
+#else
+#define do_trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \
+	do { } while (0)
+#endif
+#endif
+
+#ifdef CONFIG_TINY_SRCU
+
+static inline void srcutorture_get_gp_data(enum rcutorture_type test_type,
+					   struct srcu_struct *sp, int *flags,
+					   unsigned long *gpnum,
+					   unsigned long *completed)
+{
+	if (test_type != SRCU_FLAVOR)
+		return;
+	*flags = 0;
+	*completed = sp->srcu_gp_seq;
+	*gpnum = *completed;
+}
+
+#elif defined(CONFIG_TREE_SRCU)
+
+void srcutorture_get_gp_data(enum rcutorture_type test_type,
+			     struct srcu_struct *sp, int *flags,
+			     unsigned long *gpnum, unsigned long *completed);
+
+#elif defined(CONFIG_CLASSIC_SRCU)
+
+static inline void srcutorture_get_gp_data(enum rcutorture_type test_type,
+					   struct srcu_struct *sp, int *flags,
+					   unsigned long *gpnum,
+					   unsigned long *completed)
+{
+	if (test_type != SRCU_FLAVOR)
+		return;
+	*flags = 0;
+	*completed = sp->completed;
+	*gpnum = *completed;
+	if (sp->batch_queue.head || sp->batch_check0.head || sp->batch_check0.head)
+		(*gpnum)++;
+}
+
+#endif
+
 #endif /* __LINUX_RCU_H */
-- 
cgit v1.2.3


From c4cbf9f736f5bd0a53a5ea401d86376c86bf905e Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 3 May 2017 10:36:36 -0700
Subject: rcu: Remove UINT_CMP_GE() and UINT_CMP_LT()

The UINT_CMP_GE() and UINT_CMP_LT() macros are not used, so this
commit removes them.

Reported-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 7557499d8e70..fa3f921e5874 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -46,8 +46,6 @@
 #include <linux/ktime.h>
 #include <linux/irqflags.h>
 
-#define UINT_CMP_GE(a, b)	(UINT_MAX / 2 >= (a) - (b))
-#define UINT_CMP_LT(a, b)	(UINT_MAX / 2 < (a) - (b))
 #define ULONG_CMP_GE(a, b)	(ULONG_MAX / 2 >= (a) - (b))
 #define ULONG_CMP_LT(a, b)	(ULONG_MAX / 2 < (a) - (b))
 #define ulong2long(a)		(*(long *)(&(a)))
-- 
cgit v1.2.3


From d0df7a349133e10184d478ae1189e79e5c53615d Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 3 May 2017 10:52:10 -0700
Subject: rcu: Move rcupdate.h to new empty-function style

This commit saves a few lines in include/linux/rcupdate.h by moving
to single-line definitions for empty functions, instead of the old
style where the two curly braces each get their own line.

Reported-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 39 +++++++++------------------------------
 1 file changed, 9 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index fa3f921e5874..415633076cb1 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -124,12 +124,8 @@ static inline void rcu_end_inkernel_boot(void) { }
 void rcu_sysrq_start(void);
 void rcu_sysrq_end(void);
 #else /* #ifdef CONFIG_RCU_STALL_COMMON */
-static inline void rcu_sysrq_start(void)
-{
-}
-static inline void rcu_sysrq_end(void)
-{
-}
+static inline void rcu_sysrq_start(void) { }
+static inline void rcu_sysrq_end(void) { }
 #endif /* #else #ifdef CONFIG_RCU_STALL_COMMON */
 
 #ifdef CONFIG_NO_HZ_FULL
@@ -143,9 +139,7 @@ static inline void rcu_user_exit(void) { }
 #ifdef CONFIG_RCU_NOCB_CPU
 void rcu_init_nohz(void);
 #else /* #ifdef CONFIG_RCU_NOCB_CPU */
-static inline void rcu_init_nohz(void)
-{
-}
+static inline void rcu_init_nohz(void) { }
 #endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */
 
 /**
@@ -243,21 +237,10 @@ void destroy_rcu_head(struct rcu_head *head);
 void init_rcu_head_on_stack(struct rcu_head *head);
 void destroy_rcu_head_on_stack(struct rcu_head *head);
 #else /* !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
-static inline void init_rcu_head(struct rcu_head *head)
-{
-}
-
-static inline void destroy_rcu_head(struct rcu_head *head)
-{
-}
-
-static inline void init_rcu_head_on_stack(struct rcu_head *head)
-{
-}
-
-static inline void destroy_rcu_head_on_stack(struct rcu_head *head)
-{
-}
+static inline void init_rcu_head(struct rcu_head *head) { }
+static inline void destroy_rcu_head(struct rcu_head *head) { }
+static inline void init_rcu_head_on_stack(struct rcu_head *head) { }
+static inline void destroy_rcu_head_on_stack(struct rcu_head *head) { }
 #endif	/* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
 
 #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU)
@@ -334,9 +317,7 @@ static inline void rcu_preempt_sleep_check(void)
 			 "Illegal context switch in RCU read-side critical section");
 }
 #else /* #ifdef CONFIG_PROVE_RCU */
-static inline void rcu_preempt_sleep_check(void)
-{
-}
+static inline void rcu_preempt_sleep_check(void) { }
 #endif /* #else #ifdef CONFIG_PROVE_RCU */
 
 #define rcu_sleep_check()						\
@@ -915,9 +896,7 @@ static inline bool rcu_sys_is_idle(void)
 	return false;
 }
 
-static inline void rcu_sysidle_force_exit(void)
-{
-}
+static inline void rcu_sysidle_force_exit(void) { }
 
 #endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
 
-- 
cgit v1.2.3


From 791875d16e2f6e2e5b90328ccac643f512ac76c4 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 3 May 2017 11:06:05 -0700
Subject: rcu: Eliminate the unused __rcu_is_watching() function

The __rcu_is_watching() function is currently not used, aside from
to implement the rcu_is_watching() function.  This commit therefore
eliminates __rcu_is_watching(), which has the beneficial side-effect
of shrinking include/linux/rcupdate.h a bit.

Reported-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h |  4 ----
 include/linux/rcutiny.h  | 11 -----------
 kernel/rcu/tiny.c        | 13 -------------
 kernel/rcu/tree.c        | 19 ++++---------------
 4 files changed, 4 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 415633076cb1..b4edfe0966c6 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -204,10 +204,6 @@ do { \
 		rcu_note_voluntary_context_switch(current); \
 } while (0)
 
-#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) || defined(CONFIG_SMP)
-bool __rcu_is_watching(void);
-#endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) || defined(CONFIG_SMP) */
-
 /*
  * Infrastructure to implement the synchronize_() primitives in
  * TREE_RCU and rcu_barrier_() primitives in TINY_RCU.
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index ade360e0d58c..5ed6934152a6 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -232,22 +232,11 @@ static inline void rcu_scheduler_starting(void)
 }
 #endif /* #else #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) */
 
-#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE)
-
-static inline bool rcu_is_watching(void)
-{
-	return __rcu_is_watching();
-}
-
-#else /* defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) */
-
 static inline bool rcu_is_watching(void)
 {
 	return true;
 }
 
-#endif /* #else defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) */
-
 static inline void rcu_request_urgent_qs_task(struct task_struct *t)
 {
 }
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
index e5385731e391..2306cab2195d 100644
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -59,19 +59,6 @@ void rcu_barrier_sched(void)
 }
 EXPORT_SYMBOL(rcu_barrier_sched);
 
-#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE)
-
-/*
- * Test whether RCU thinks that the current CPU is idle.
- */
-bool notrace __rcu_is_watching(void)
-{
-	return true;
-}
-EXPORT_SYMBOL(__rcu_is_watching);
-
-#endif /* defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) */
-
 /*
  * Helper function for rcu_sched_qs() and rcu_bh_qs().
  * Also irqs are disabled to avoid confusion due to interrupt handlers
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 5ebc830297c1..61a97164abcc 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1138,23 +1138,12 @@ void rcu_nmi_exit(void)
 	rcu_dynticks_eqs_enter();
 }
 
-/**
- * __rcu_is_watching - are RCU read-side critical sections safe?
- *
- * Return true if RCU is watching the running CPU, which means that
- * this CPU can safely enter RCU read-side critical sections.  Unlike
- * rcu_is_watching(), the caller of __rcu_is_watching() must have at
- * least disabled preemption.
- */
-bool notrace __rcu_is_watching(void)
-{
-	return !rcu_dynticks_curr_cpu_in_eqs();
-}
-
 /**
  * rcu_is_watching - see if RCU thinks that the current CPU is idle
  *
- * If the current CPU is in its idle loop and is neither in an interrupt
+ * Return true if RCU is watching the running CPU, which means that this
+ * CPU can safely enter RCU read-side critical sections.  In other words,
+ * if the current CPU is in its idle loop and is neither in an interrupt
  * or NMI handler, return true.
  */
 bool notrace rcu_is_watching(void)
@@ -1162,7 +1151,7 @@ bool notrace rcu_is_watching(void)
 	bool ret;
 
 	preempt_disable_notrace();
-	ret = __rcu_is_watching();
+	ret = !rcu_dynticks_curr_cpu_in_eqs();
 	preempt_enable_notrace();
 	return ret;
 }
-- 
cgit v1.2.3


From 82118249d0ca4078d56d5e43172ada1567fdf946 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 3 May 2017 11:13:24 -0700
Subject: rcu: Move the RCU_SCHEDULER_ definitions from rcupdate.h

The RCU_SCHEDULER_INACTIVE, RCU_SCHEDULER_INIT, and RCU_SCHEDULER_RUNNING
definitions are used only within RCU, so this commit moves them from
include/linux/rcupdate.h to kernel/rcu/rcu.h.

Reported-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 4 ----
 kernel/rcu/rcu.h         | 4 ++++
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index b4edfe0966c6..9206a28a2d44 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -217,10 +217,6 @@ do { \
 #error "Unknown RCU implementation specified to kernel configuration"
 #endif
 
-#define RCU_SCHEDULER_INACTIVE	0
-#define RCU_SCHEDULER_INIT	1
-#define RCU_SCHEDULER_RUNNING	2
-
 /*
  * init_rcu_head_on_stack()/destroy_rcu_head_on_stack() are needed for dynamic
  * initialization and destruction of rcu_head on the stack. rcu_head structures
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index f190fc1c8215..17fee2a667d9 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -319,6 +319,10 @@ void rcu_unexpedite_gp(void);
 void rcupdate_announce_bootup_oddness(void);
 #endif /* #else #ifdef CONFIG_TINY_RCU */
 
+#define RCU_SCHEDULER_INACTIVE	0
+#define RCU_SCHEDULER_INIT	1
+#define RCU_SCHEDULER_RUNNING	2
+
 enum rcutorture_type {
 	RCU_FLAVOR,
 	RCU_BH_FLAVOR,
-- 
cgit v1.2.3


From 752de307b0ee47308bfc299de3a3ad623c16b4d8 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 3 May 2017 11:18:01 -0700
Subject: rcu: Remove linux/debugobjects.h from rcupdate.h

The include/linux/rcupdate.h file does not actually need anything from
linux/debugobjects.h, so this commit removes the inclusion.

Reported-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 9206a28a2d44..f105f0834bbe 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -40,7 +40,6 @@
 #include <linux/cpumask.h>
 #include <linux/seqlock.h>
 #include <linux/lockdep.h>
-#include <linux/debugobjects.h>
 #include <linux/bug.h>
 #include <linux/compiler.h>
 #include <linux/ktime.h>
-- 
cgit v1.2.3


From 3d54f7983f3e6ac9f444fa20970b1abc8f089b79 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 3 May 2017 12:25:50 -0700
Subject: rcu: Move rcu_is_nocb_cpu() from rcupdate.h to rcu.h

The rcu_is_nocb_cpu() function is used only internally to RCU.  This
commit therefore moves its declaration from include/linux/rcupdate.h
to kernel/rcu/rcu.h.

Reported-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 9 ---------
 kernel/rcu/rcu.h         | 8 ++++++++
 2 files changed, 8 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index f105f0834bbe..003427425e27 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -867,15 +867,6 @@ static inline int rcu_needs_cpu(u64 basemono, u64 *nextevt)
 }
 #endif /* #ifdef CONFIG_TINY_RCU */
 
-#if defined(CONFIG_RCU_NOCB_CPU_ALL)
-static inline bool rcu_is_nocb_cpu(int cpu) { return true; }
-#elif defined(CONFIG_RCU_NOCB_CPU)
-bool rcu_is_nocb_cpu(int cpu);
-#else
-static inline bool rcu_is_nocb_cpu(int cpu) { return false; }
-#endif
-
-
 /* Only for use by adaptive-ticks code. */
 #ifdef CONFIG_NO_HZ_FULL_SYSIDLE
 bool rcu_sys_is_idle(void);
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 17fee2a667d9..2f344662c568 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -408,4 +408,12 @@ static inline void srcutorture_get_gp_data(enum rcutorture_type test_type,
 
 #endif
 
+#if defined(CONFIG_RCU_NOCB_CPU_ALL)
+static inline bool rcu_is_nocb_cpu(int cpu) { return true; }
+#elif defined(CONFIG_RCU_NOCB_CPU)
+bool rcu_is_nocb_cpu(int cpu);
+#else
+static inline bool rcu_is_nocb_cpu(int cpu) { return false; }
+#endif
+
 #endif /* __LINUX_RCU_H */
-- 
cgit v1.2.3


From b8989b76052eedc99b09322efd6f68816f191a1a Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 3 May 2017 12:28:59 -0700
Subject: rcu: Move rcu_ftrace_dump() from rcupdate.h to rcu.h

The rcu_ftrace_dump() function is used only internally to RCU.  This
commit therefore moves its declaration from include/linux/rcupdate.h
to kernel/rcu/rcu.h.

Reported-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 12 ------------
 kernel/rcu/rcu.h         | 12 ++++++++++++
 2 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 003427425e27..ad5e6934dcf3 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -883,18 +883,6 @@ static inline void rcu_sysidle_force_exit(void) { }
 #endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
 
 
-/*
- * Dump the ftrace buffer, but only one time per callsite per boot.
- */
-#define rcu_ftrace_dump(oops_dump_mode) \
-do { \
-	static atomic_t ___rfd_beenhere = ATOMIC_INIT(0); \
-	\
-	if (!atomic_read(&___rfd_beenhere) && \
-	    !atomic_xchg(&___rfd_beenhere, 1)) \
-		ftrace_dump(oops_dump_mode); \
-} while (0)
-
 /*
  * Place this after a lock-acquisition primitive to guarantee that
  * an UNLOCK+LOCK pair acts as a full barrier.  This guarantee applies
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 2f344662c568..cdbaa441bdac 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -212,6 +212,18 @@ int rcu_jiffies_till_stall_check(void);
  */
 #define TPS(x)  tracepoint_string(x)
 
+/*
+ * Dump the ftrace buffer, but only one time per callsite per boot.
+ */
+#define rcu_ftrace_dump(oops_dump_mode) \
+do { \
+	static atomic_t ___rfd_beenhere = ATOMIC_INIT(0); \
+	\
+	if (!atomic_read(&___rfd_beenhere) && \
+	    !atomic_xchg(&___rfd_beenhere, 1)) \
+		ftrace_dump(oops_dump_mode); \
+} while (0)
+
 void rcu_early_boot_tests(void);
 void rcu_test_sync_prims(void);
 
-- 
cgit v1.2.3


From 17a8c187310ccc5f5b65a7d8faf96fdc66c5fe3d Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 3 May 2017 12:32:55 -0700
Subject: rcu: move rcupdate.h to the new true/false-function style

This commit saves a few lines in include/linux/rcupdate.h by moving
to single-line definitions for functions that just return either true
or false, instead of the old style where the two curly braces each get
their own line.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index ad5e6934dcf3..564096e6e141 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -237,10 +237,7 @@ static inline void destroy_rcu_head_on_stack(struct rcu_head *head) { }
 #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU)
 bool rcu_lockdep_current_cpu_online(void);
 #else /* #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */
-static inline bool rcu_lockdep_current_cpu_online(void)
-{
-	return true;
-}
+static inline bool rcu_lockdep_current_cpu_online(void) { return true; }
 #endif /* #else #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
@@ -872,14 +869,8 @@ static inline int rcu_needs_cpu(u64 basemono, u64 *nextevt)
 bool rcu_sys_is_idle(void);
 void rcu_sysidle_force_exit(void);
 #else /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
-
-static inline bool rcu_sys_is_idle(void)
-{
-	return false;
-}
-
+static inline bool rcu_sys_is_idle(void) { return false; }
 static inline void rcu_sysidle_force_exit(void) { }
-
 #endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
 
 
-- 
cgit v1.2.3


From e3c8d51e1a58c73a557eb38a9a6afb4f704a3379 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 3 May 2017 13:37:16 -0700
Subject: rcu: Move torture-related functions out of rcutiny.h and rcutree.h

The various functions similar to rcu_batches_started(), the
function show_rcu_gp_kthreads(), the various functions similar to
rcu_force_quiescent_state(), and the variables rcutorture_testseq and
rcutorture_vernum are used only within RCU.  There is therefore no point
in exporting them to the kernel at large from include/linux/rcutiny.h
and include/linux/rcutree.h.  This commit therefore moves all of these
to kernel/rcu/rcu.h.

Reported-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcutiny.h | 80 ---------------------------------------
 include/linux/rcutree.h | 16 --------
 kernel/rcu/rcu.h        | 99 +++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 99 insertions(+), 96 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 5ed6934152a6..0d9270913686 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -106,86 +106,6 @@ static inline void rcu_virt_note_context_switch(int cpu)
 {
 }
 
-/*
- * Return the number of grace periods started.
- */
-static inline unsigned long rcu_batches_started(void)
-{
-	return 0;
-}
-
-/*
- * Return the number of bottom-half grace periods started.
- */
-static inline unsigned long rcu_batches_started_bh(void)
-{
-	return 0;
-}
-
-/*
- * Return the number of sched grace periods started.
- */
-static inline unsigned long rcu_batches_started_sched(void)
-{
-	return 0;
-}
-
-/*
- * Return the number of grace periods completed.
- */
-static inline unsigned long rcu_batches_completed(void)
-{
-	return 0;
-}
-
-/*
- * Return the number of bottom-half grace periods completed.
- */
-static inline unsigned long rcu_batches_completed_bh(void)
-{
-	return 0;
-}
-
-/*
- * Return the number of sched grace periods completed.
- */
-static inline unsigned long rcu_batches_completed_sched(void)
-{
-	return 0;
-}
-
-/*
- * Return the number of expedited grace periods completed.
- */
-static inline unsigned long rcu_exp_batches_completed(void)
-{
-	return 0;
-}
-
-/*
- * Return the number of expedited sched grace periods completed.
- */
-static inline unsigned long rcu_exp_batches_completed_sched(void)
-{
-	return 0;
-}
-
-static inline void rcu_force_quiescent_state(void)
-{
-}
-
-static inline void rcu_bh_force_quiescent_state(void)
-{
-}
-
-static inline void rcu_sched_force_quiescent_state(void)
-{
-}
-
-static inline void show_rcu_gp_kthreads(void)
-{
-}
-
 static inline void rcu_cpu_stall_reset(void)
 {
 }
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 28af91a19573..43113323ca09 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -79,22 +79,6 @@ void cond_synchronize_rcu(unsigned long oldstate);
 unsigned long get_state_synchronize_sched(void);
 void cond_synchronize_sched(unsigned long oldstate);
 
-extern unsigned long rcutorture_testseq;
-extern unsigned long rcutorture_vernum;
-unsigned long rcu_batches_started(void);
-unsigned long rcu_batches_started_bh(void);
-unsigned long rcu_batches_started_sched(void);
-unsigned long rcu_batches_completed(void);
-unsigned long rcu_batches_completed_bh(void);
-unsigned long rcu_batches_completed_sched(void);
-unsigned long rcu_exp_batches_completed(void);
-unsigned long rcu_exp_batches_completed_sched(void);
-void show_rcu_gp_kthreads(void);
-
-void rcu_force_quiescent_state(void);
-void rcu_bh_force_quiescent_state(void);
-void rcu_sched_force_quiescent_state(void);
-
 void rcu_idle_enter(void);
 void rcu_idle_exit(void);
 void rcu_irq_enter(void);
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index cdbaa441bdac..d849b371b32b 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -420,6 +420,105 @@ static inline void srcutorture_get_gp_data(enum rcutorture_type test_type,
 
 #endif
 
+#ifdef CONFIG_TINY_RCU
+
+/*
+ * Return the number of grace periods started.
+ */
+static inline unsigned long rcu_batches_started(void)
+{
+	return 0;
+}
+
+/*
+ * Return the number of bottom-half grace periods started.
+ */
+static inline unsigned long rcu_batches_started_bh(void)
+{
+	return 0;
+}
+
+/*
+ * Return the number of sched grace periods started.
+ */
+static inline unsigned long rcu_batches_started_sched(void)
+{
+	return 0;
+}
+
+/*
+ * Return the number of grace periods completed.
+ */
+static inline unsigned long rcu_batches_completed(void)
+{
+	return 0;
+}
+
+/*
+ * Return the number of bottom-half grace periods completed.
+ */
+static inline unsigned long rcu_batches_completed_bh(void)
+{
+	return 0;
+}
+
+/*
+ * Return the number of sched grace periods completed.
+ */
+static inline unsigned long rcu_batches_completed_sched(void)
+{
+	return 0;
+}
+
+/*
+ * Return the number of expedited grace periods completed.
+ */
+static inline unsigned long rcu_exp_batches_completed(void)
+{
+	return 0;
+}
+
+/*
+ * Return the number of expedited sched grace periods completed.
+ */
+static inline unsigned long rcu_exp_batches_completed_sched(void)
+{
+	return 0;
+}
+
+static inline void rcu_force_quiescent_state(void)
+{
+}
+
+static inline void rcu_bh_force_quiescent_state(void)
+{
+}
+
+static inline void rcu_sched_force_quiescent_state(void)
+{
+}
+
+static inline void show_rcu_gp_kthreads(void)
+{
+}
+
+#else /* #ifdef CONFIG_TINY_RCU */
+extern unsigned long rcutorture_testseq;
+extern unsigned long rcutorture_vernum;
+unsigned long rcu_batches_started(void);
+unsigned long rcu_batches_started_bh(void);
+unsigned long rcu_batches_started_sched(void);
+unsigned long rcu_batches_completed(void);
+unsigned long rcu_batches_completed_bh(void);
+unsigned long rcu_batches_completed_sched(void);
+unsigned long rcu_exp_batches_completed(void);
+unsigned long rcu_exp_batches_completed_sched(void);
+void show_rcu_gp_kthreads(void);
+void rcu_force_quiescent_state(void);
+void rcu_bh_force_quiescent_state(void);
+void rcu_sched_force_quiescent_state(void);
+#endif /* #else #ifdef CONFIG_TINY_RCU */
+
 #if defined(CONFIG_RCU_NOCB_CPU_ALL)
 static inline bool rcu_is_nocb_cpu(int cpu) { return true; }
 #elif defined(CONFIG_RCU_NOCB_CPU)
-- 
cgit v1.2.3


From fe21a27e8ca0937a5ac298de1f4b46382e9c5c88 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 3 May 2017 13:45:51 -0700
Subject: rcu: Move rcu_request_urgent_qs_task() out of rcutiny.h and rcutree.h

The rcu_request_urgent_qs_task() function is used only within RCU,
so there is no point in exporting it to the rest of the kernel from
nclude/linux/rcutiny.h and include/linux/rcutree.h.  This commit therefore
moves this function to kernel/rcu/rcu.h.

Reported-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcutiny.h | 4 ----
 include/linux/rcutree.h | 3 ---
 kernel/rcu/rcu.h        | 6 ++++++
 3 files changed, 6 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 0d9270913686..f5067941bc27 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -157,10 +157,6 @@ static inline bool rcu_is_watching(void)
 	return true;
 }
 
-static inline void rcu_request_urgent_qs_task(struct task_struct *t)
-{
-}
-
 static inline void rcu_all_qs(void)
 {
 	barrier(); /* Avoid RCU read-side critical sections leaking across. */
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 43113323ca09..d6aa89d15d47 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -91,10 +91,7 @@ void exit_rcu(void);
 
 void rcu_scheduler_starting(void);
 extern int rcu_scheduler_active __read_mostly;
-
 bool rcu_is_watching(void);
-void rcu_request_urgent_qs_task(struct task_struct *t);
-
 void rcu_all_qs(void);
 
 /* RCUtree hotplug events */
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index d849b371b32b..5b76a5baff2e 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -335,6 +335,12 @@ void rcupdate_announce_bootup_oddness(void);
 #define RCU_SCHEDULER_INIT	1
 #define RCU_SCHEDULER_RUNNING	2
 
+#ifdef CONFIG_TINY_RCU
+static inline void rcu_request_urgent_qs_task(struct task_struct *t) { }
+#else /* #ifdef CONFIG_TINY_RCU */
+void rcu_request_urgent_qs_task(struct task_struct *t);
+#endif /* #else #ifdef CONFIG_TINY_RCU */
+
 enum rcutorture_type {
 	RCU_FLAVOR,
 	RCU_BH_FLAVOR,
-- 
cgit v1.2.3


From 71c40fd0b5ceb300c6cb8753835d9d94a8bfc56f Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 3 May 2017 13:51:42 -0700
Subject: rcu: Move rcutiny.h to new empty/true/false-function style

This commit saves a few lines in include/linux/rcutiny.h by moving
to single-line definitions for empty functions, instead of the old
style where the two curly braces each get their own line.

Reported-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcutiny.h | 71 +++++++++++--------------------------------------
 1 file changed, 16 insertions(+), 55 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index f5067941bc27..2bfe48bc0e3b 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -33,10 +33,8 @@ static inline int rcu_dynticks_snap(struct rcu_dynticks *rdtp)
 	return 0;
 }
 
-static inline bool rcu_eqs_special_set(int cpu)
-{
-	return false;  /* Never flag non-existent other CPUs! */
-}
+/* Never flag non-existent other CPUs! */
+static inline bool rcu_eqs_special_set(int cpu) { return false; }
 
 static inline unsigned long get_state_synchronize_rcu(void)
 {
@@ -102,65 +100,28 @@ static inline void kfree_call_rcu(struct rcu_head *head,
  * Take advantage of the fact that there is only one CPU, which
  * allows us to ignore virtualization-based context switches.
  */
-static inline void rcu_virt_note_context_switch(int cpu)
-{
-}
-
-static inline void rcu_cpu_stall_reset(void)
-{
-}
-
-static inline void rcu_idle_enter(void)
-{
-}
-
-static inline void rcu_idle_exit(void)
-{
-}
-
-static inline void rcu_irq_enter(void)
-{
-}
-
-static inline bool rcu_irq_enter_disabled(void)
-{
-	return false;
-}
-
-static inline void rcu_irq_exit_irqson(void)
-{
-}
-
-static inline void rcu_irq_enter_irqson(void)
-{
-}
-
-static inline void rcu_irq_exit(void)
-{
-}
-
-static inline void exit_rcu(void)
-{
-}
+static inline void rcu_virt_note_context_switch(int cpu) { }
+static inline void rcu_cpu_stall_reset(void) { }
+static inline void rcu_idle_enter(void) { }
+static inline void rcu_idle_exit(void) { }
+static inline void rcu_irq_enter(void) { }
+static inline bool rcu_irq_enter_disabled(void) { return false; }
+static inline void rcu_irq_exit_irqson(void) { }
+static inline void rcu_irq_enter_irqson(void) { }
+static inline void rcu_irq_exit(void) { }
+static inline void exit_rcu(void) { }
 
 #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU)
 extern int rcu_scheduler_active __read_mostly;
 void rcu_scheduler_starting(void);
 #else /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) */
-static inline void rcu_scheduler_starting(void)
-{
-}
+static inline void rcu_scheduler_starting(void) { }
 #endif /* #else #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) */
 
-static inline bool rcu_is_watching(void)
-{
-	return true;
-}
+static inline bool rcu_is_watching(void) { return true; }
 
-static inline void rcu_all_qs(void)
-{
-	barrier(); /* Avoid RCU read-side critical sections leaking across. */
-}
+/* Avoid RCU read-side critical sections leaking across. */
+static inline void rcu_all_qs(void) { barrier(); }
 
 /* RCUtree hotplug events */
 #define rcutree_prepare_cpu      NULL
-- 
cgit v1.2.3


From 5a0465e17a18c467b712a816985b7b8dd8d10c16 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 4 May 2017 11:31:04 -0700
Subject: srcu: Shrink srcu.h by moving docbook and private function

The call_srcu() docbook entry is currently in include/linux/srcu.h,
which causes needless processing for each include point.  This commit
therefore moves this entry to kernel/rcu/srcutree.c, which the compiler
reads only once.  In addition, the srcu_batches_completed() function is
used only within RCU and its torture-test suites.  This commit therefore
also moves this function's declaration from include/linux/srcutiny.h,
include/linux/srcutree.h, and include/linux/srcuclassic.h to
kernel/rcu/rcu.h.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/srcu.h        | 20 --------------------
 include/linux/srcuclassic.h |  1 -
 include/linux/srcutiny.h    |  5 -----
 include/linux/srcutree.h    |  1 -
 kernel/rcu/rcu.h            |  6 ++++++
 kernel/rcu/srcutree.c       | 17 +++++++++++++++++
 6 files changed, 23 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index ea356d800675..5f509018e6b5 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -65,32 +65,12 @@ int init_srcu_struct(struct srcu_struct *sp);
 #elif defined(CONFIG_SRCU)
 #error "Unknown SRCU implementation specified to kernel configuration"
 #else
-
 /* Dummy definition for things like notifiers.  Actual use gets link error. */
 struct srcu_struct { };
-
 #endif
 
-/**
- * call_srcu() - Queue a callback for invocation after an SRCU grace period
- * @sp: srcu_struct in queue the callback
- * @head: structure to be used for queueing the SRCU callback.
- * @func: function to be invoked after the SRCU grace period
- *
- * The callback function will be invoked some time after a full SRCU
- * grace period elapses, in other words after all pre-existing SRCU
- * read-side critical sections have completed.  However, the callback
- * function might well execute concurrently with other SRCU read-side
- * critical sections that started after call_srcu() was invoked.  SRCU
- * read-side critical sections are delimited by srcu_read_lock() and
- * srcu_read_unlock(), and may be nested.
- *
- * The callback will be invoked from process context, but must nevertheless
- * be fast and must not block.
- */
 void call_srcu(struct srcu_struct *sp, struct rcu_head *head,
 		void (*func)(struct rcu_head *head));
-
 void cleanup_srcu_struct(struct srcu_struct *sp);
 int __srcu_read_lock(struct srcu_struct *sp) __acquires(sp);
 void __srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp);
diff --git a/include/linux/srcuclassic.h b/include/linux/srcuclassic.h
index 41cf99930f34..67db4a36ef0d 100644
--- a/include/linux/srcuclassic.h
+++ b/include/linux/srcuclassic.h
@@ -96,6 +96,5 @@ void process_srcu(struct work_struct *work);
 
 void synchronize_srcu_expedited(struct srcu_struct *sp);
 void srcu_barrier(struct srcu_struct *sp);
-unsigned long srcu_batches_completed(struct srcu_struct *sp);
 
 #endif
diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h
index 85bddce6a7a6..4c53e698c6e4 100644
--- a/include/linux/srcutiny.h
+++ b/include/linux/srcutiny.h
@@ -88,9 +88,4 @@ static inline void srcu_barrier(struct srcu_struct *sp)
 	synchronize_srcu(sp);
 }
 
-static inline unsigned long srcu_batches_completed(struct srcu_struct *sp)
-{
-	return 0;
-}
-
 #endif
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index f4adfed17b51..24e949bda12a 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -141,6 +141,5 @@ void process_srcu(struct work_struct *work);
 
 void synchronize_srcu_expedited(struct srcu_struct *sp);
 void srcu_barrier(struct srcu_struct *sp);
-unsigned long srcu_batches_completed(struct srcu_struct *sp);
 
 #endif
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 5b76a5baff2e..74d9fc205313 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -492,6 +492,11 @@ static inline unsigned long rcu_exp_batches_completed_sched(void)
 	return 0;
 }
 
+static inline unsigned long srcu_batches_completed(struct srcu_struct *sp)
+{
+	return 0;
+}
+
 static inline void rcu_force_quiescent_state(void)
 {
 }
@@ -519,6 +524,7 @@ unsigned long rcu_batches_completed_bh(void);
 unsigned long rcu_batches_completed_sched(void);
 unsigned long rcu_exp_batches_completed(void);
 unsigned long rcu_exp_batches_completed_sched(void);
+unsigned long srcu_batches_completed(struct srcu_struct *sp);
 void show_rcu_gp_kthreads(void);
 void rcu_force_quiescent_state(void);
 void rcu_bh_force_quiescent_state(void);
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index cc06dbfc9692..66a998f9c5a7 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -854,6 +854,23 @@ void __call_srcu(struct srcu_struct *sp, struct rcu_head *rhp,
 		srcu_funnel_exp_start(sp, sdp->mynode, s);
 }
 
+/**
+ * call_srcu() - Queue a callback for invocation after an SRCU grace period
+ * @sp: srcu_struct in queue the callback
+ * @head: structure to be used for queueing the SRCU callback.
+ * @func: function to be invoked after the SRCU grace period
+ *
+ * The callback function will be invoked some time after a full SRCU
+ * grace period elapses, in other words after all pre-existing SRCU
+ * read-side critical sections have completed.  However, the callback
+ * function might well execute concurrently with other SRCU read-side
+ * critical sections that started after call_srcu() was invoked.  SRCU
+ * read-side critical sections are delimited by srcu_read_lock() and
+ * srcu_read_unlock(), and may be nested.
+ *
+ * The callback will be invoked from process context, but must nevertheless
+ * be fast and must not block.
+ */
 void call_srcu(struct srcu_struct *sp, struct rcu_head *rhp,
 	       rcu_callback_t func)
 {
-- 
cgit v1.2.3


From 2464dd940e23bad227c387a40eec99f7aa02ed96 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 4 May 2017 14:29:16 -0700
Subject: srcu: Apply trivial callback lists to shrink Tiny SRCU

The rcu_segcblist structure provides quite a bit of functionality, and
Tiny SRCU needs almost none of it.  So this commit replaces Tiny SRCU's
uses of rcu_segcblist with a simple singly linked list with tail pointer.
This change significantly reduces Tiny SRCU's memory footprint, more
than making up for the growth caused by the creation of rcu_segcblist.c

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/srcutiny.h |  7 +++--
 init/Kconfig             |  2 +-
 kernel/rcu/rcu.h         |  2 +-
 kernel/rcu/srcutiny.c    | 70 ++++++++++++++++++++++--------------------------
 4 files changed, 37 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h
index 4c53e698c6e4..cfbfc540cafc 100644
--- a/include/linux/srcutiny.h
+++ b/include/linux/srcutiny.h
@@ -33,9 +33,8 @@ struct srcu_struct {
 	u8 srcu_gp_waiting;		/* GP waiting for readers? */
 	struct swait_queue_head srcu_wq;
 					/* Last srcu_read_unlock() wakes GP. */
-	unsigned long srcu_gp_seq;	/* GP seq # for callback tagging. */
-	struct rcu_segcblist srcu_cblist;
-					/* Pending SRCU callbacks. */
+	struct rcu_head *srcu_cb_head;	/* Pending callbacks: Head. */
+	struct rcu_head **srcu_cb_tail;	/* Pending callbacks: Tail. */
 	struct work_struct srcu_work;	/* For driving grace periods. */
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	struct lockdep_map dep_map;
@@ -47,7 +46,7 @@ void srcu_drive_gp(struct work_struct *wp);
 #define __SRCU_STRUCT_INIT(name)					\
 {									\
 	.srcu_wq = __SWAIT_QUEUE_HEAD_INITIALIZER(name.srcu_wq),	\
-	.srcu_cblist = RCU_SEGCBLIST_INITIALIZER(name.srcu_cblist),	\
+	.srcu_cb_tail = &name.srcu_cb_head,				\
 	.srcu_work = __WORK_INITIALIZER(name.srcu_work, srcu_drive_gp),	\
 	__SRCU_DEP_MAP_INIT(name)					\
 }
diff --git a/init/Kconfig b/init/Kconfig
index d928a3724af9..a2cfde19e8b8 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -573,7 +573,7 @@ config RCU_STALL_COMMON
 	  making these warnings mandatory for the tree variants.
 
 config RCU_NEED_SEGCBLIST
-	def_bool ( TREE_RCU || PREEMPT_RCU || TINY_SRCU || TREE_SRCU )
+	def_bool ( TREE_RCU || PREEMPT_RCU || TREE_SRCU )
 
 config CONTEXT_TRACKING
        bool
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 74d9fc205313..6a1e85bd2eac 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -398,7 +398,7 @@ static inline void srcutorture_get_gp_data(enum rcutorture_type test_type,
 	if (test_type != SRCU_FLAVOR)
 		return;
 	*flags = 0;
-	*completed = sp->srcu_gp_seq;
+	*completed = sp->srcu_idx;
 	*gpnum = *completed;
 }
 
diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c
index 988543721d5d..1a1c1047d2ed 100644
--- a/kernel/rcu/srcutiny.c
+++ b/kernel/rcu/srcutiny.c
@@ -38,8 +38,8 @@ static int init_srcu_struct_fields(struct srcu_struct *sp)
 	sp->srcu_lock_nesting[0] = 0;
 	sp->srcu_lock_nesting[1] = 0;
 	init_swait_queue_head(&sp->srcu_wq);
-	sp->srcu_gp_seq = 0;
-	rcu_segcblist_init(&sp->srcu_cblist);
+	sp->srcu_cb_head = NULL;
+	sp->srcu_cb_tail = &sp->srcu_cb_head;
 	sp->srcu_gp_running = false;
 	sp->srcu_gp_waiting = false;
 	sp->srcu_idx = 0;
@@ -88,10 +88,10 @@ void cleanup_srcu_struct(struct srcu_struct *sp)
 {
 	WARN_ON(sp->srcu_lock_nesting[0] || sp->srcu_lock_nesting[1]);
 	flush_work(&sp->srcu_work);
-	WARN_ON(rcu_seq_state(sp->srcu_gp_seq));
 	WARN_ON(sp->srcu_gp_running);
 	WARN_ON(sp->srcu_gp_waiting);
-	WARN_ON(!rcu_segcblist_empty(&sp->srcu_cblist));
+	WARN_ON(sp->srcu_cb_head);
+	WARN_ON(&sp->srcu_cb_head != sp->srcu_cb_tail);
 }
 EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
 
@@ -117,52 +117,44 @@ EXPORT_SYMBOL_GPL(__srcu_read_unlock);
 void srcu_drive_gp(struct work_struct *wp)
 {
 	int idx;
-	struct rcu_cblist ready_cbs;
-	struct srcu_struct *sp;
+	struct rcu_head *lh;
 	struct rcu_head *rhp;
+	struct srcu_struct *sp;
 
 	sp = container_of(wp, struct srcu_struct, srcu_work);
-	if (sp->srcu_gp_running || rcu_segcblist_empty(&sp->srcu_cblist))
+	if (sp->srcu_gp_running || !READ_ONCE(sp->srcu_cb_head))
 		return; /* Already running or nothing to do. */
 
-	/* Tag recently arrived callbacks and wait for readers. */
+	/* Remove recently arrived callbacks and wait for readers. */
 	WRITE_ONCE(sp->srcu_gp_running, true);
-	rcu_segcblist_accelerate(&sp->srcu_cblist,
-				 rcu_seq_snap(&sp->srcu_gp_seq));
-	rcu_seq_start(&sp->srcu_gp_seq);
+	local_irq_disable();
+	lh = sp->srcu_cb_head;
+	sp->srcu_cb_head = NULL;
+	sp->srcu_cb_tail = &sp->srcu_cb_head;
+	local_irq_enable();
 	idx = sp->srcu_idx;
 	WRITE_ONCE(sp->srcu_idx, !sp->srcu_idx);
 	WRITE_ONCE(sp->srcu_gp_waiting, true);  /* srcu_read_unlock() wakes! */
 	swait_event(sp->srcu_wq, !READ_ONCE(sp->srcu_lock_nesting[idx]));
 	WRITE_ONCE(sp->srcu_gp_waiting, false); /* srcu_read_unlock() cheap. */
-	rcu_seq_end(&sp->srcu_gp_seq);
-
-	/* Update callback list based on GP, and invoke ready callbacks. */
-	rcu_segcblist_advance(&sp->srcu_cblist,
-			      rcu_seq_current(&sp->srcu_gp_seq));
-	if (rcu_segcblist_ready_cbs(&sp->srcu_cblist)) {
-		rcu_cblist_init(&ready_cbs);
-		local_irq_disable();
-		rcu_segcblist_extract_done_cbs(&sp->srcu_cblist, &ready_cbs);
-		local_irq_enable();
-		rhp = rcu_cblist_dequeue(&ready_cbs);
-		for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) {
-			local_bh_disable();
-			rhp->func(rhp);
-			local_bh_enable();
-		}
-		local_irq_disable();
-		rcu_segcblist_insert_count(&sp->srcu_cblist, &ready_cbs);
-		local_irq_enable();
+
+	/* Invoke the callbacks we removed above. */
+	while (lh) {
+		rhp = lh;
+		lh = lh->next;
+		local_bh_disable();
+		rhp->func(rhp);
+		local_bh_enable();
 	}
-	WRITE_ONCE(sp->srcu_gp_running, false);
 
 	/*
-	 * If more callbacks, reschedule ourselves.  This can race with
-	 * a call_srcu() at interrupt level, but the ->srcu_gp_running
-	 * checks will straighten that out.
+	 * Enable rescheduling, and if there are more callbacks,
+	 * reschedule ourselves.  This can race with a call_srcu()
+	 * at interrupt level, but the ->srcu_gp_running checks will
+	 * straighten that out.
 	 */
-	if (!rcu_segcblist_empty(&sp->srcu_cblist))
+	WRITE_ONCE(sp->srcu_gp_running, false);
+	if (READ_ONCE(sp->srcu_cb_head))
 		schedule_work(&sp->srcu_work);
 }
 EXPORT_SYMBOL_GPL(srcu_drive_gp);
@@ -171,14 +163,16 @@ EXPORT_SYMBOL_GPL(srcu_drive_gp);
  * Enqueue an SRCU callback on the specified srcu_struct structure,
  * initiating grace-period processing if it is not already running.
  */
-void call_srcu(struct srcu_struct *sp, struct rcu_head *head,
+void call_srcu(struct srcu_struct *sp, struct rcu_head *rhp,
 	       rcu_callback_t func)
 {
 	unsigned long flags;
 
-	head->func = func;
+	rhp->func = func;
+	rhp->next = NULL;
 	local_irq_save(flags);
-	rcu_segcblist_enqueue(&sp->srcu_cblist, head, false);
+	*sp->srcu_cb_tail = rhp;
+	sp->srcu_cb_tail = &rhp->next;
 	local_irq_restore(flags);
 	if (!READ_ONCE(sp->srcu_gp_running))
 		schedule_work(&sp->srcu_work);
-- 
cgit v1.2.3


From 0cb5133ab573d1c471cfcfa632b0260a5aad5303 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 31 May 2017 09:26:07 -0700
Subject: bcm47xx: Fix build regression

Commit 0bc2d534708b ("rcu: Refactor #includes from include/linux/rcupdate.h")
caused a build regression in an MTD partition driver:

In file included from drivers/mtd/bcm47xxpart.c:12:0:
include/linux/bcm47xx_nvram.h: In function 'bcm47xx_nvram_init_from_mem':
include/linux/bcm47xx_nvram.h:27:10: error: 'ENOTSUPP' undeclared (first use in this function)

The rcupdate.h file has no particular need for linux/errno.h, so this
commit includes linux/errno.h into bcm47xx_nvram.h.

Fixes: 0bc2d534708b ("rcu: Refactor #includes from include/linux/rcupdate.h")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/bcm47xx_nvram.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/bcm47xx_nvram.h b/include/linux/bcm47xx_nvram.h
index 2793652fbf66..a414a2b53e41 100644
--- a/include/linux/bcm47xx_nvram.h
+++ b/include/linux/bcm47xx_nvram.h
@@ -8,6 +8,7 @@
 #ifndef __BCM47XX_NVRAM_H
 #define __BCM47XX_NVRAM_H
 
+#include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/vmalloc.h>
-- 
cgit v1.2.3


From 5f192ab027a5d865be24c817005d42eb96314dc2 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 3 May 2017 15:24:25 -0700
Subject: rcu: Refactor #includes from include/linux/rcupdate.h

The list of #includes from include/linux/rcupdate.h has grown quite
a bit, so it is time to trim it.  This commit moves the #include
of include/linux/ktime.h to include/linux/rcutiny.h, along with the
Tiny-RCU-only function that was the only thing needing ktimem.h.  It then
reconstructs the files included into include/linux/ktime.h based on what
is actually needed, with significant help from the 0day Test Robot.

This single change reduces the .i file footprint from rcupdate.h from
9018 lines to 7101 lines.

Reported-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 22 ++++++----------------
 include/linux/rcutiny.h  |  8 +++++++-
 2 files changed, 13 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 564096e6e141..ee40d7eba741 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -34,16 +34,14 @@
 #define __LINUX_RCUPDATE_H
 
 #include <linux/types.h>
-#include <linux/cache.h>
-#include <linux/spinlock.h>
-#include <linux/threads.h>
-#include <linux/cpumask.h>
-#include <linux/seqlock.h>
-#include <linux/lockdep.h>
-#include <linux/bug.h>
 #include <linux/compiler.h>
-#include <linux/ktime.h>
+#include <linux/atomic.h>
 #include <linux/irqflags.h>
+#include <linux/preempt.h>
+#include <linux/bottom_half.h>
+#include <linux/lockdep.h>
+#include <asm/processor.h>
+#include <linux/cpumask.h>
 
 #define ULONG_CMP_GE(a, b)	(ULONG_MAX / 2 >= (a) - (b))
 #define ULONG_CMP_LT(a, b)	(ULONG_MAX / 2 < (a) - (b))
@@ -856,14 +854,6 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
 #define kfree_rcu(ptr, rcu_head)					\
 	__kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head))
 
-#ifdef CONFIG_TINY_RCU
-static inline int rcu_needs_cpu(u64 basemono, u64 *nextevt)
-{
-	*nextevt = KTIME_MAX;
-	return 0;
-}
-#endif /* #ifdef CONFIG_TINY_RCU */
-
 /* Only for use by adaptive-ticks code. */
 #ifdef CONFIG_NO_HZ_FULL_SYSIDLE
 bool rcu_sys_is_idle(void);
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 2bfe48bc0e3b..c869785f16bd 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -25,7 +25,7 @@
 #ifndef __LINUX_TINY_H
 #define __LINUX_TINY_H
 
-#include <linux/cache.h>
+#include <linux/ktime.h>
 
 struct rcu_dynticks;
 static inline int rcu_dynticks_snap(struct rcu_dynticks *rdtp)
@@ -96,6 +96,12 @@ static inline void kfree_call_rcu(struct rcu_head *head,
 		rcu_note_voluntary_context_switch_lite(current); \
 	} while (0)
 
+static inline int rcu_needs_cpu(u64 basemono, u64 *nextevt)
+{
+	*nextevt = KTIME_MAX;
+	return 0;
+}
+
 /*
  * Take advantage of the fact that there is only one CPU, which
  * allows us to ignore virtualization-based context switches.
-- 
cgit v1.2.3


From a3883df3935e10caa8297719d85fa8eaff7cabbd Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 9 May 2017 15:00:14 -0700
Subject: srcu: Use rnp->lock wrappers to replace explicit memory barriers

This commit uses TREE RCU's rnp->lock wrappers to replace a few explicit
memory barriers.  This change also has the advantage of making SRCU's
memory-ordering properties be implemented in roughly the same way as they
are in Tree RCU.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/srcutree.h |  8 ++---
 kernel/rcu/srcutree.c    | 91 +++++++++++++++++++++++-------------------------
 2 files changed, 47 insertions(+), 52 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index 24e949bda12a..42973f787e7e 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -40,7 +40,7 @@ struct srcu_data {
 	unsigned long srcu_unlock_count[2];	/* Unlocks per CPU. */
 
 	/* Update-side state. */
-	spinlock_t lock ____cacheline_internodealigned_in_smp;
+	raw_spinlock_t __private lock ____cacheline_internodealigned_in_smp;
 	struct rcu_segcblist srcu_cblist;	/* List of callbacks.*/
 	unsigned long srcu_gp_seq_needed;	/* Furthest future GP needed. */
 	unsigned long srcu_gp_seq_needed_exp;	/* Furthest future exp GP. */
@@ -58,7 +58,7 @@ struct srcu_data {
  * Node in SRCU combining tree, similar in function to rcu_data.
  */
 struct srcu_node {
-	spinlock_t lock;
+	raw_spinlock_t __private lock;
 	unsigned long srcu_have_cbs[4];		/* GP seq for children */
 						/*  having CBs, but only */
 						/*  is > ->srcu_gq_seq. */
@@ -78,7 +78,7 @@ struct srcu_struct {
 	struct srcu_node *level[RCU_NUM_LVLS + 1];
 						/* First node at each level. */
 	struct mutex srcu_cb_mutex;		/* Serialize CB preparation. */
-	spinlock_t gp_lock;			/* protect ->srcu_cblist */
+	raw_spinlock_t __private lock;		/* Protect counters */
 	struct mutex srcu_gp_mutex;		/* Serialize GP work. */
 	unsigned int srcu_idx;			/* Current rdr array element. */
 	unsigned long srcu_gp_seq;		/* Grace-period seq #. */
@@ -109,7 +109,7 @@ void process_srcu(struct work_struct *work);
 #define __SRCU_STRUCT_INIT(name)					\
 	{								\
 		.sda = &name##_srcu_data,				\
-		.gp_lock = __SPIN_LOCK_UNLOCKED(name.gp_lock),		\
+		.lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock),		\
 		.srcu_gp_seq_needed = 0 - 1,				\
 		__SRCU_DEP_MAP_INIT(name)				\
 	}
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 66a998f9c5a7..d0ca524bf042 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -76,7 +76,7 @@ static void init_srcu_struct_nodes(struct srcu_struct *sp, bool is_static)
 
 	/* Each pass through this loop initializes one srcu_node structure. */
 	rcu_for_each_node_breadth_first(sp, snp) {
-		spin_lock_init(&snp->lock);
+		raw_spin_lock_init(&ACCESS_PRIVATE(snp, lock));
 		WARN_ON_ONCE(ARRAY_SIZE(snp->srcu_have_cbs) !=
 			     ARRAY_SIZE(snp->srcu_data_have_cbs));
 		for (i = 0; i < ARRAY_SIZE(snp->srcu_have_cbs); i++) {
@@ -110,7 +110,7 @@ static void init_srcu_struct_nodes(struct srcu_struct *sp, bool is_static)
 	snp_first = sp->level[level];
 	for_each_possible_cpu(cpu) {
 		sdp = per_cpu_ptr(sp->sda, cpu);
-		spin_lock_init(&sdp->lock);
+		raw_spin_lock_init(&ACCESS_PRIVATE(sdp, lock));
 		rcu_segcblist_init(&sdp->srcu_cblist);
 		sdp->srcu_cblist_invoking = false;
 		sdp->srcu_gp_seq_needed = sp->srcu_gp_seq;
@@ -169,7 +169,7 @@ int __init_srcu_struct(struct srcu_struct *sp, const char *name,
 	/* Don't re-initialize a lock while it is held. */
 	debug_check_no_locks_freed((void *)sp, sizeof(*sp));
 	lockdep_init_map(&sp->dep_map, name, key, 0);
-	spin_lock_init(&sp->gp_lock);
+	raw_spin_lock_init(&ACCESS_PRIVATE(sp, lock));
 	return init_srcu_struct_fields(sp, false);
 }
 EXPORT_SYMBOL_GPL(__init_srcu_struct);
@@ -186,7 +186,7 @@ EXPORT_SYMBOL_GPL(__init_srcu_struct);
  */
 int init_srcu_struct(struct srcu_struct *sp)
 {
-	spin_lock_init(&sp->gp_lock);
+	raw_spin_lock_init(&ACCESS_PRIVATE(sp, lock));
 	return init_srcu_struct_fields(sp, false);
 }
 EXPORT_SYMBOL_GPL(init_srcu_struct);
@@ -197,7 +197,7 @@ EXPORT_SYMBOL_GPL(init_srcu_struct);
  * First-use initialization of statically allocated srcu_struct
  * structure.  Wiring up the combining tree is more than can be
  * done with compile-time initialization, so this check is added
- * to each update-side SRCU primitive.  Use ->gp_lock, which -is-
+ * to each update-side SRCU primitive.  Use sp->lock, which -is-
  * compile-time initialized, to resolve races involving multiple
  * CPUs trying to garner first-use privileges.
  */
@@ -209,13 +209,13 @@ static void check_init_srcu_struct(struct srcu_struct *sp)
 	/* The smp_load_acquire() pairs with the smp_store_release(). */
 	if (!rcu_seq_state(smp_load_acquire(&sp->srcu_gp_seq_needed))) /*^^^*/
 		return; /* Already initialized. */
-	spin_lock_irqsave(&sp->gp_lock, flags);
+	raw_spin_lock_irqsave_rcu_node(sp, flags);
 	if (!rcu_seq_state(sp->srcu_gp_seq_needed)) {
-		spin_unlock_irqrestore(&sp->gp_lock, flags);
+		raw_spin_unlock_irqrestore_rcu_node(sp, flags);
 		return;
 	}
 	init_srcu_struct_fields(sp, true);
-	spin_unlock_irqrestore(&sp->gp_lock, flags);
+	raw_spin_unlock_irqrestore_rcu_node(sp, flags);
 }
 
 /*
@@ -411,8 +411,7 @@ static void srcu_gp_start(struct srcu_struct *sp)
 	struct srcu_data *sdp = this_cpu_ptr(sp->sda);
 	int state;
 
-	RCU_LOCKDEP_WARN(!lockdep_is_held(&sp->gp_lock),
-			 "Invoked srcu_gp_start() without ->gp_lock!");
+	lockdep_assert_held(&sp->lock);
 	WARN_ON_ONCE(ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed));
 	rcu_segcblist_advance(&sdp->srcu_cblist,
 			      rcu_seq_current(&sp->srcu_gp_seq));
@@ -513,7 +512,7 @@ static void srcu_gp_end(struct srcu_struct *sp)
 	mutex_lock(&sp->srcu_cb_mutex);
 
 	/* End the current grace period. */
-	spin_lock_irq(&sp->gp_lock);
+	raw_spin_lock_irq_rcu_node(sp);
 	idx = rcu_seq_state(sp->srcu_gp_seq);
 	WARN_ON_ONCE(idx != SRCU_STATE_SCAN2);
 	cbdelay = srcu_get_delay(sp);
@@ -522,7 +521,7 @@ static void srcu_gp_end(struct srcu_struct *sp)
 	gpseq = rcu_seq_current(&sp->srcu_gp_seq);
 	if (ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, gpseq))
 		sp->srcu_gp_seq_needed_exp = gpseq;
-	spin_unlock_irq(&sp->gp_lock);
+	raw_spin_unlock_irq_rcu_node(sp);
 	mutex_unlock(&sp->srcu_gp_mutex);
 	/* A new grace period can start at this point.  But only one. */
 
@@ -530,7 +529,7 @@ static void srcu_gp_end(struct srcu_struct *sp)
 	idx = rcu_seq_ctr(gpseq) % ARRAY_SIZE(snp->srcu_have_cbs);
 	idxnext = (idx + 1) % ARRAY_SIZE(snp->srcu_have_cbs);
 	rcu_for_each_node_breadth_first(sp, snp) {
-		spin_lock_irq(&snp->lock);
+		raw_spin_lock_irq_rcu_node(snp);
 		cbs = false;
 		if (snp >= sp->level[rcu_num_lvls - 1])
 			cbs = snp->srcu_have_cbs[idx] == gpseq;
@@ -540,21 +539,19 @@ static void srcu_gp_end(struct srcu_struct *sp)
 			snp->srcu_gp_seq_needed_exp = gpseq;
 		mask = snp->srcu_data_have_cbs[idx];
 		snp->srcu_data_have_cbs[idx] = 0;
-		spin_unlock_irq(&snp->lock);
-		if (cbs) {
-			smp_mb(); /* GP end before CB invocation. */
+		raw_spin_unlock_irq_rcu_node(snp);
+		if (cbs)
 			srcu_schedule_cbs_snp(sp, snp, mask, cbdelay);
-		}
 
 		/* Occasionally prevent srcu_data counter wrap. */
 		if (!(gpseq & counter_wrap_check))
 			for (cpu = snp->grplo; cpu <= snp->grphi; cpu++) {
 				sdp = per_cpu_ptr(sp->sda, cpu);
-				spin_lock_irqsave(&sdp->lock, flags);
+				raw_spin_lock_irqsave_rcu_node(sdp, flags);
 				if (ULONG_CMP_GE(gpseq,
 						 sdp->srcu_gp_seq_needed + 100))
 					sdp->srcu_gp_seq_needed = gpseq;
-				spin_unlock_irqrestore(&sdp->lock, flags);
+				raw_spin_unlock_irqrestore_rcu_node(sdp, flags);
 			}
 	}
 
@@ -562,17 +559,17 @@ static void srcu_gp_end(struct srcu_struct *sp)
 	mutex_unlock(&sp->srcu_cb_mutex);
 
 	/* Start a new grace period if needed. */
-	spin_lock_irq(&sp->gp_lock);
+	raw_spin_lock_irq_rcu_node(sp);
 	gpseq = rcu_seq_current(&sp->srcu_gp_seq);
 	if (!rcu_seq_state(gpseq) &&
 	    ULONG_CMP_LT(gpseq, sp->srcu_gp_seq_needed)) {
 		srcu_gp_start(sp);
-		spin_unlock_irq(&sp->gp_lock);
+		raw_spin_unlock_irq_rcu_node(sp);
 		/* Throttle expedited grace periods: Should be rare! */
 		srcu_reschedule(sp, rcu_seq_ctr(gpseq) & 0x3ff
 				    ? 0 : SRCU_INTERVAL);
 	} else {
-		spin_unlock_irq(&sp->gp_lock);
+		raw_spin_unlock_irq_rcu_node(sp);
 	}
 }
 
@@ -592,18 +589,18 @@ static void srcu_funnel_exp_start(struct srcu_struct *sp, struct srcu_node *snp,
 		if (rcu_seq_done(&sp->srcu_gp_seq, s) ||
 		    ULONG_CMP_GE(READ_ONCE(snp->srcu_gp_seq_needed_exp), s))
 			return;
-		spin_lock_irqsave(&snp->lock, flags);
+		raw_spin_lock_irqsave_rcu_node(snp, flags);
 		if (ULONG_CMP_GE(snp->srcu_gp_seq_needed_exp, s)) {
-			spin_unlock_irqrestore(&snp->lock, flags);
+			raw_spin_unlock_irqrestore_rcu_node(snp, flags);
 			return;
 		}
 		WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s);
-		spin_unlock_irqrestore(&snp->lock, flags);
+		raw_spin_unlock_irqrestore_rcu_node(snp, flags);
 	}
-	spin_lock_irqsave(&sp->gp_lock, flags);
+	raw_spin_lock_irqsave_rcu_node(sp, flags);
 	if (!ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, s))
 		sp->srcu_gp_seq_needed_exp = s;
-	spin_unlock_irqrestore(&sp->gp_lock, flags);
+	raw_spin_unlock_irqrestore_rcu_node(sp, flags);
 }
 
 /*
@@ -625,14 +622,13 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp,
 	for (; snp != NULL; snp = snp->srcu_parent) {
 		if (rcu_seq_done(&sp->srcu_gp_seq, s) && snp != sdp->mynode)
 			return; /* GP already done and CBs recorded. */
-		spin_lock_irqsave(&snp->lock, flags);
+		raw_spin_lock_irqsave_rcu_node(snp, flags);
 		if (ULONG_CMP_GE(snp->srcu_have_cbs[idx], s)) {
 			snp_seq = snp->srcu_have_cbs[idx];
 			if (snp == sdp->mynode && snp_seq == s)
 				snp->srcu_data_have_cbs[idx] |= sdp->grpmask;
-			spin_unlock_irqrestore(&snp->lock, flags);
+			raw_spin_unlock_irqrestore_rcu_node(snp, flags);
 			if (snp == sdp->mynode && snp_seq != s) {
-				smp_mb(); /* CBs after GP! */
 				srcu_schedule_cbs_sdp(sdp, do_norm
 							   ? SRCU_INTERVAL
 							   : 0);
@@ -647,11 +643,11 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp,
 			snp->srcu_data_have_cbs[idx] |= sdp->grpmask;
 		if (!do_norm && ULONG_CMP_LT(snp->srcu_gp_seq_needed_exp, s))
 			snp->srcu_gp_seq_needed_exp = s;
-		spin_unlock_irqrestore(&snp->lock, flags);
+		raw_spin_unlock_irqrestore_rcu_node(snp, flags);
 	}
 
 	/* Top of tree, must ensure the grace period will be started. */
-	spin_lock_irqsave(&sp->gp_lock, flags);
+	raw_spin_lock_irqsave_rcu_node(sp, flags);
 	if (ULONG_CMP_LT(sp->srcu_gp_seq_needed, s)) {
 		/*
 		 * Record need for grace period s.  Pair with load
@@ -670,7 +666,7 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp,
 		queue_delayed_work(system_power_efficient_wq, &sp->work,
 				   srcu_get_delay(sp));
 	}
-	spin_unlock_irqrestore(&sp->gp_lock, flags);
+	raw_spin_unlock_irqrestore_rcu_node(sp, flags);
 }
 
 /*
@@ -833,7 +829,7 @@ void __call_srcu(struct srcu_struct *sp, struct rcu_head *rhp,
 	rhp->func = func;
 	local_irq_save(flags);
 	sdp = this_cpu_ptr(sp->sda);
-	spin_lock(&sdp->lock);
+	raw_spin_lock_rcu_node(sdp);
 	rcu_segcblist_enqueue(&sdp->srcu_cblist, rhp, false);
 	rcu_segcblist_advance(&sdp->srcu_cblist,
 			      rcu_seq_current(&sp->srcu_gp_seq));
@@ -847,7 +843,7 @@ void __call_srcu(struct srcu_struct *sp, struct rcu_head *rhp,
 		sdp->srcu_gp_seq_needed_exp = s;
 		needexp = true;
 	}
-	spin_unlock_irqrestore(&sdp->lock, flags);
+	raw_spin_unlock_irqrestore_rcu_node(sdp, flags);
 	if (needgp)
 		srcu_funnel_gp_start(sp, sdp, s, do_norm);
 	else if (needexp)
@@ -1018,7 +1014,7 @@ void srcu_barrier(struct srcu_struct *sp)
 	 */
 	for_each_possible_cpu(cpu) {
 		sdp = per_cpu_ptr(sp->sda, cpu);
-		spin_lock_irq(&sdp->lock);
+		raw_spin_lock_irq_rcu_node(sdp);
 		atomic_inc(&sp->srcu_barrier_cpu_cnt);
 		sdp->srcu_barrier_head.func = srcu_barrier_cb;
 		debug_rcu_head_queue(&sdp->srcu_barrier_head);
@@ -1027,7 +1023,7 @@ void srcu_barrier(struct srcu_struct *sp)
 			debug_rcu_head_unqueue(&sdp->srcu_barrier_head);
 			atomic_dec(&sp->srcu_barrier_cpu_cnt);
 		}
-		spin_unlock_irq(&sdp->lock);
+		raw_spin_unlock_irq_rcu_node(sdp);
 	}
 
 	/* Remove the initial count, at which point reaching zero can happen. */
@@ -1076,17 +1072,17 @@ static void srcu_advance_state(struct srcu_struct *sp)
 	 */
 	idx = rcu_seq_state(smp_load_acquire(&sp->srcu_gp_seq)); /* ^^^ */
 	if (idx == SRCU_STATE_IDLE) {
-		spin_lock_irq(&sp->gp_lock);
+		raw_spin_lock_irq_rcu_node(sp);
 		if (ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)) {
 			WARN_ON_ONCE(rcu_seq_state(sp->srcu_gp_seq));
-			spin_unlock_irq(&sp->gp_lock);
+			raw_spin_unlock_irq_rcu_node(sp);
 			mutex_unlock(&sp->srcu_gp_mutex);
 			return;
 		}
 		idx = rcu_seq_state(READ_ONCE(sp->srcu_gp_seq));
 		if (idx == SRCU_STATE_IDLE)
 			srcu_gp_start(sp);
-		spin_unlock_irq(&sp->gp_lock);
+		raw_spin_unlock_irq_rcu_node(sp);
 		if (idx != SRCU_STATE_IDLE) {
 			mutex_unlock(&sp->srcu_gp_mutex);
 			return; /* Someone else started the grace period. */
@@ -1135,20 +1131,19 @@ static void srcu_invoke_callbacks(struct work_struct *work)
 	sdp = container_of(work, struct srcu_data, work.work);
 	sp = sdp->sp;
 	rcu_cblist_init(&ready_cbs);
-	spin_lock_irq(&sdp->lock);
-	smp_mb(); /* Old grace periods before callback invocation! */
+	raw_spin_lock_irq_rcu_node(sdp);
 	rcu_segcblist_advance(&sdp->srcu_cblist,
 			      rcu_seq_current(&sp->srcu_gp_seq));
 	if (sdp->srcu_cblist_invoking ||
 	    !rcu_segcblist_ready_cbs(&sdp->srcu_cblist)) {
-		spin_unlock_irq(&sdp->lock);
+		raw_spin_unlock_irq_rcu_node(sdp);
 		return;  /* Someone else on the job or nothing to do. */
 	}
 
 	/* We are on the job!  Extract and invoke ready callbacks. */
 	sdp->srcu_cblist_invoking = true;
 	rcu_segcblist_extract_done_cbs(&sdp->srcu_cblist, &ready_cbs);
-	spin_unlock_irq(&sdp->lock);
+	raw_spin_unlock_irq_rcu_node(sdp);
 	rhp = rcu_cblist_dequeue(&ready_cbs);
 	for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) {
 		debug_rcu_head_unqueue(rhp);
@@ -1161,13 +1156,13 @@ static void srcu_invoke_callbacks(struct work_struct *work)
 	 * Update counts, accelerate new callbacks, and if needed,
 	 * schedule another round of callback invocation.
 	 */
-	spin_lock_irq(&sdp->lock);
+	raw_spin_lock_irq_rcu_node(sdp);
 	rcu_segcblist_insert_count(&sdp->srcu_cblist, &ready_cbs);
 	(void)rcu_segcblist_accelerate(&sdp->srcu_cblist,
 				       rcu_seq_snap(&sp->srcu_gp_seq));
 	sdp->srcu_cblist_invoking = false;
 	more = rcu_segcblist_ready_cbs(&sdp->srcu_cblist);
-	spin_unlock_irq(&sdp->lock);
+	raw_spin_unlock_irq_rcu_node(sdp);
 	if (more)
 		srcu_schedule_cbs_sdp(sdp, 0);
 }
@@ -1180,7 +1175,7 @@ static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay)
 {
 	bool pushgp = true;
 
-	spin_lock_irq(&sp->gp_lock);
+	raw_spin_lock_irq_rcu_node(sp);
 	if (ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)) {
 		if (!WARN_ON_ONCE(rcu_seq_state(sp->srcu_gp_seq))) {
 			/* All requests fulfilled, time to go idle. */
@@ -1190,7 +1185,7 @@ static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay)
 		/* Outstanding request and no GP.  Start one. */
 		srcu_gp_start(sp);
 	}
-	spin_unlock_irq(&sp->gp_lock);
+	raw_spin_unlock_irq_rcu_node(sp);
 
 	if (pushgp)
 		queue_delayed_work(system_power_efficient_wq, &sp->work, delay);
-- 
cgit v1.2.3


From fe5ac724d81a3c7803e60c2232718f212f3f38d4 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 11 May 2017 11:26:22 -0700
Subject: rcu: Remove nohz_full full-system-idle state machine

The NO_HZ_FULL_SYSIDLE full-system-idle capability was added in 2013
by commit 0edd1b1784cb ("nohz_full: Add full-system-idle state machine"),
but has not been used.  This commit therefore removes it.

If it turns out to be needed later, this commit can always be reverted.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Ingo Molnar <mingo@kernel.org>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 .../RCU/Design/Requirements/Requirements.html      |   6 +-
 include/linux/rcupdate.h                           |   9 -
 kernel/rcu/tree.c                                  |  41 +-
 kernel/rcu/tree.h                                  |  16 -
 kernel/rcu/tree_plugin.h                           | 429 ---------------------
 kernel/time/Kconfig                                |  50 ---
 .../selftests/rcutorture/configs/rcu/TREE07        |   1 -
 .../testing/selftests/rcutorture/doc/TINY_RCU.txt  |   1 -
 .../selftests/rcutorture/doc/TREE_RCU-kconfig.txt  |   7 +-
 9 files changed, 9 insertions(+), 551 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/RCU/Design/Requirements/Requirements.html b/Documentation/RCU/Design/Requirements/Requirements.html
index cb614f2a69c2..8c94fc1d1c84 100644
--- a/Documentation/RCU/Design/Requirements/Requirements.html
+++ b/Documentation/RCU/Design/Requirements/Requirements.html
@@ -2520,11 +2520,7 @@ It is similarly socially unacceptable to interrupt an
 <tt>nohz_full</tt> CPU running in userspace.
 RCU must therefore track <tt>nohz_full</tt> userspace
 execution.
-And in
-<a href="https://lwn.net/Articles/558284/"><tt>CONFIG_NO_HZ_FULL_SYSIDLE=y</tt></a>
-kernels, RCU must separately track idle CPUs on the one hand and
-CPUs that are either idle or executing in userspace on the other.
-In both cases, RCU must be able to sample state at two points in
+RCU must therefore be able to sample state at two points in
 time, and be able to determine whether or not some other CPU spent
 any time idle and/or executing in userspace.
 
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index ee40d7eba741..7f24a5e673f5 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -854,15 +854,6 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
 #define kfree_rcu(ptr, rcu_head)					\
 	__kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head))
 
-/* Only for use by adaptive-ticks code. */
-#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
-bool rcu_sys_is_idle(void);
-void rcu_sysidle_force_exit(void);
-#else /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
-static inline bool rcu_sys_is_idle(void) { return false; }
-static inline void rcu_sysidle_force_exit(void) { }
-#endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
-
 
 /*
  * Place this after a lock-acquisition primitive to guarantee that
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 187ac3f41526..51d4c3acf32d 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -270,10 +270,6 @@ void rcu_bh_qs(void)
 static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
 	.dynticks_nesting = DYNTICK_TASK_EXIT_IDLE,
 	.dynticks = ATOMIC_INIT(RCU_DYNTICK_CTRL_CTR),
-#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
-	.dynticks_idle_nesting = DYNTICK_TASK_NEST_VALUE,
-	.dynticks_idle = ATOMIC_INIT(1),
-#endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
 };
 
 /*
@@ -546,10 +542,7 @@ module_param(jiffies_till_sched_qs, ulong, 0644);
 
 static bool rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
 				  struct rcu_data *rdp);
-static void force_qs_rnp(struct rcu_state *rsp,
-			 int (*f)(struct rcu_data *rsp, bool *isidle,
-				  unsigned long *maxj),
-			 bool *isidle, unsigned long *maxj);
+static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *rsp));
 static void force_quiescent_state(struct rcu_state *rsp);
 static int rcu_pending(void);
 
@@ -854,7 +847,6 @@ void rcu_idle_enter(void)
 
 	local_irq_save(flags);
 	rcu_eqs_enter(false);
-	rcu_sysidle_enter(0);
 	local_irq_restore(flags);
 }
 EXPORT_SYMBOL_GPL(rcu_idle_enter);
@@ -904,7 +896,6 @@ void rcu_irq_exit(void)
 		trace_rcu_dyntick(TPS("--="), rdtp->dynticks_nesting, rdtp->dynticks_nesting - 1);
 		rdtp->dynticks_nesting--;
 	}
-	rcu_sysidle_enter(1);
 }
 
 /*
@@ -986,7 +977,6 @@ void rcu_idle_exit(void)
 
 	local_irq_save(flags);
 	rcu_eqs_exit(false);
-	rcu_sysidle_exit(0);
 	local_irq_restore(flags);
 }
 EXPORT_SYMBOL_GPL(rcu_idle_exit);
@@ -1038,7 +1028,6 @@ void rcu_irq_enter(void)
 		trace_rcu_dyntick(TPS("++="), oldval, rdtp->dynticks_nesting);
 	else
 		rcu_eqs_exit_common(oldval, true);
-	rcu_sysidle_exit(1);
 }
 
 /*
@@ -1217,11 +1206,9 @@ static int rcu_is_cpu_rrupt_from_idle(void)
  * credit them with an implicit quiescent state.  Return 1 if this CPU
  * is in dynticks idle mode, which is an extended quiescent state.
  */
-static int dyntick_save_progress_counter(struct rcu_data *rdp,
-					 bool *isidle, unsigned long *maxj)
+static int dyntick_save_progress_counter(struct rcu_data *rdp)
 {
 	rdp->dynticks_snap = rcu_dynticks_snap(rdp->dynticks);
-	rcu_sysidle_check_cpu(rdp, isidle, maxj);
 	if (rcu_dynticks_in_eqs(rdp->dynticks_snap)) {
 		trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti"));
 		if (ULONG_CMP_LT(READ_ONCE(rdp->gpnum) + ULONG_MAX / 4,
@@ -1238,8 +1225,7 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp,
  * idle state since the last call to dyntick_save_progress_counter()
  * for this same CPU, or by virtue of having been offline.
  */
-static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
-				    bool *isidle, unsigned long *maxj)
+static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
 {
 	unsigned long jtsq;
 	bool *rnhqp;
@@ -2105,25 +2091,16 @@ static bool rcu_gp_fqs_check_wake(struct rcu_state *rsp, int *gfp)
  */
 static void rcu_gp_fqs(struct rcu_state *rsp, bool first_time)
 {
-	bool isidle = false;
-	unsigned long maxj;
 	struct rcu_node *rnp = rcu_get_root(rsp);
 
 	WRITE_ONCE(rsp->gp_activity, jiffies);
 	rsp->n_force_qs++;
 	if (first_time) {
 		/* Collect dyntick-idle snapshots. */
-		if (is_sysidle_rcu_state(rsp)) {
-			isidle = true;
-			maxj = jiffies - ULONG_MAX / 4;
-		}
-		force_qs_rnp(rsp, dyntick_save_progress_counter,
-			     &isidle, &maxj);
-		rcu_sysidle_report_gp(rsp, isidle, maxj);
+		force_qs_rnp(rsp, dyntick_save_progress_counter);
 	} else {
 		/* Handle dyntick-idle and offline CPUs. */
-		isidle = true;
-		force_qs_rnp(rsp, rcu_implicit_dynticks_qs, &isidle, &maxj);
+		force_qs_rnp(rsp, rcu_implicit_dynticks_qs);
 	}
 	/* Clear flag to prevent immediate re-entry. */
 	if (READ_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
@@ -2895,10 +2872,7 @@ void rcu_check_callbacks(int user)
  *
  * The caller must have suppressed start of new grace periods.
  */
-static void force_qs_rnp(struct rcu_state *rsp,
-			 int (*f)(struct rcu_data *rsp, bool *isidle,
-				  unsigned long *maxj),
-			 bool *isidle, unsigned long *maxj)
+static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *rsp))
 {
 	int cpu;
 	unsigned long flags;
@@ -2937,7 +2911,7 @@ static void force_qs_rnp(struct rcu_state *rsp,
 		for_each_leaf_node_possible_cpu(rnp, cpu) {
 			unsigned long bit = leaf_node_cpu_bit(rnp, cpu);
 			if ((rnp->qsmask & bit) != 0) {
-				if (f(per_cpu_ptr(rsp->rda, cpu), isidle, maxj))
+				if (f(per_cpu_ptr(rsp->rda, cpu)))
 					mask |= bit;
 			}
 		}
@@ -3793,7 +3767,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
 	    !init_nocb_callback_list(rdp))
 		rcu_segcblist_init(&rdp->cblist);  /* Re-enable callbacks. */
 	rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
-	rcu_sysidle_init_percpu_data(rdp->dynticks);
 	rcu_dynticks_eqs_online();
 	raw_spin_unlock_rcu_node(rnp);		/* irqs remain disabled. */
 
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index baa0bac8da2a..2c112bb11aa8 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -45,14 +45,6 @@ struct rcu_dynticks {
 	bool rcu_need_heavy_qs;     /* GP old, need heavy quiescent state. */
 	unsigned long rcu_qs_ctr;   /* Light universal quiescent state ctr. */
 	bool rcu_urgent_qs;	    /* GP old need light quiescent state. */
-#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
-	long long dynticks_idle_nesting;
-				    /* irq/process nesting level from idle. */
-	atomic_t dynticks_idle;	    /* Even value for idle, else odd. */
-				    /*  "Idle" excludes userspace execution. */
-	unsigned long dynticks_idle_jiffies;
-				    /* End of last non-NMI non-idle period. */
-#endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
 #ifdef CONFIG_RCU_FAST_NO_HZ
 	bool all_lazy;		    /* Are all CPU's CBs lazy? */
 	unsigned long nonlazy_posted;
@@ -529,15 +521,7 @@ static void __init rcu_organize_nocb_kthreads(struct rcu_state *rsp);
 #endif /* #ifdef CONFIG_RCU_NOCB_CPU */
 static void __maybe_unused rcu_kick_nohz_cpu(int cpu);
 static bool init_nocb_callback_list(struct rcu_data *rdp);
-static void rcu_sysidle_enter(int irq);
-static void rcu_sysidle_exit(int irq);
-static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle,
-				  unsigned long *maxj);
-static bool is_sysidle_rcu_state(struct rcu_state *rsp);
-static void rcu_sysidle_report_gp(struct rcu_state *rsp, int isidle,
-				  unsigned long maxj);
 static void rcu_bind_gp_kthread(void);
-static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp);
 static bool rcu_nohz_full_cpu(struct rcu_state *rsp);
 static void rcu_dynticks_task_enter(void);
 static void rcu_dynticks_task_exit(void);
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 0553d9fed7d7..f524d967f7b6 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -2563,429 +2563,6 @@ static void __maybe_unused rcu_kick_nohz_cpu(int cpu)
 #endif /* #ifdef CONFIG_NO_HZ_FULL */
 }
 
-
-#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
-
-static int full_sysidle_state;		/* Current system-idle state. */
-#define RCU_SYSIDLE_NOT		0	/* Some CPU is not idle. */
-#define RCU_SYSIDLE_SHORT	1	/* All CPUs idle for brief period. */
-#define RCU_SYSIDLE_LONG	2	/* All CPUs idle for long enough. */
-#define RCU_SYSIDLE_FULL	3	/* All CPUs idle, ready for sysidle. */
-#define RCU_SYSIDLE_FULL_NOTED	4	/* Actually entered sysidle state. */
-
-/*
- * Invoked to note exit from irq or task transition to idle.  Note that
- * usermode execution does -not- count as idle here!  After all, we want
- * to detect full-system idle states, not RCU quiescent states and grace
- * periods.  The caller must have disabled interrupts.
- */
-static void rcu_sysidle_enter(int irq)
-{
-	unsigned long j;
-	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
-
-	RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_sysidle_enter() invoked with irqs enabled!!!");
-
-	/* If there are no nohz_full= CPUs, no need to track this. */
-	if (!tick_nohz_full_enabled())
-		return;
-
-	/* Adjust nesting, check for fully idle. */
-	if (irq) {
-		rdtp->dynticks_idle_nesting--;
-		WARN_ON_ONCE(rdtp->dynticks_idle_nesting < 0);
-		if (rdtp->dynticks_idle_nesting != 0)
-			return;  /* Still not fully idle. */
-	} else {
-		if ((rdtp->dynticks_idle_nesting & DYNTICK_TASK_NEST_MASK) ==
-		    DYNTICK_TASK_NEST_VALUE) {
-			rdtp->dynticks_idle_nesting = 0;
-		} else {
-			rdtp->dynticks_idle_nesting -= DYNTICK_TASK_NEST_VALUE;
-			WARN_ON_ONCE(rdtp->dynticks_idle_nesting < 0);
-			return;  /* Still not fully idle. */
-		}
-	}
-
-	/* Record start of fully idle period. */
-	j = jiffies;
-	WRITE_ONCE(rdtp->dynticks_idle_jiffies, j);
-	smp_mb__before_atomic();
-	atomic_inc(&rdtp->dynticks_idle);
-	smp_mb__after_atomic();
-	WARN_ON_ONCE(atomic_read(&rdtp->dynticks_idle) & 0x1);
-}
-
-/*
- * Unconditionally force exit from full system-idle state.  This is
- * invoked when a normal CPU exits idle, but must be called separately
- * for the timekeeping CPU (tick_do_timer_cpu).  The reason for this
- * is that the timekeeping CPU is permitted to take scheduling-clock
- * interrupts while the system is in system-idle state, and of course
- * rcu_sysidle_exit() has no way of distinguishing a scheduling-clock
- * interrupt from any other type of interrupt.
- */
-void rcu_sysidle_force_exit(void)
-{
-	int oldstate = READ_ONCE(full_sysidle_state);
-	int newoldstate;
-
-	/*
-	 * Each pass through the following loop attempts to exit full
-	 * system-idle state.  If contention proves to be a problem,
-	 * a trylock-based contention tree could be used here.
-	 */
-	while (oldstate > RCU_SYSIDLE_SHORT) {
-		newoldstate = cmpxchg(&full_sysidle_state,
-				      oldstate, RCU_SYSIDLE_NOT);
-		if (oldstate == newoldstate &&
-		    oldstate == RCU_SYSIDLE_FULL_NOTED) {
-			rcu_kick_nohz_cpu(tick_do_timer_cpu);
-			return; /* We cleared it, done! */
-		}
-		oldstate = newoldstate;
-	}
-	smp_mb(); /* Order initial oldstate fetch vs. later non-idle work. */
-}
-
-/*
- * Invoked to note entry to irq or task transition from idle.  Note that
- * usermode execution does -not- count as idle here!  The caller must
- * have disabled interrupts.
- */
-static void rcu_sysidle_exit(int irq)
-{
-	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
-
-	RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_sysidle_exit() invoked with irqs enabled!!!");
-
-	/* If there are no nohz_full= CPUs, no need to track this. */
-	if (!tick_nohz_full_enabled())
-		return;
-
-	/* Adjust nesting, check for already non-idle. */
-	if (irq) {
-		rdtp->dynticks_idle_nesting++;
-		WARN_ON_ONCE(rdtp->dynticks_idle_nesting <= 0);
-		if (rdtp->dynticks_idle_nesting != 1)
-			return; /* Already non-idle. */
-	} else {
-		/*
-		 * Allow for irq misnesting.  Yes, it really is possible
-		 * to enter an irq handler then never leave it, and maybe
-		 * also vice versa.  Handle both possibilities.
-		 */
-		if (rdtp->dynticks_idle_nesting & DYNTICK_TASK_NEST_MASK) {
-			rdtp->dynticks_idle_nesting += DYNTICK_TASK_NEST_VALUE;
-			WARN_ON_ONCE(rdtp->dynticks_idle_nesting <= 0);
-			return; /* Already non-idle. */
-		} else {
-			rdtp->dynticks_idle_nesting = DYNTICK_TASK_EXIT_IDLE;
-		}
-	}
-
-	/* Record end of idle period. */
-	smp_mb__before_atomic();
-	atomic_inc(&rdtp->dynticks_idle);
-	smp_mb__after_atomic();
-	WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks_idle) & 0x1));
-
-	/*
-	 * If we are the timekeeping CPU, we are permitted to be non-idle
-	 * during a system-idle state.  This must be the case, because
-	 * the timekeeping CPU has to take scheduling-clock interrupts
-	 * during the time that the system is transitioning to full
-	 * system-idle state.  This means that the timekeeping CPU must
-	 * invoke rcu_sysidle_force_exit() directly if it does anything
-	 * more than take a scheduling-clock interrupt.
-	 */
-	if (smp_processor_id() == tick_do_timer_cpu)
-		return;
-
-	/* Update system-idle state: We are clearly no longer fully idle! */
-	rcu_sysidle_force_exit();
-}
-
-/*
- * Check to see if the current CPU is idle.  Note that usermode execution
- * does not count as idle.  The caller must have disabled interrupts,
- * and must be running on tick_do_timer_cpu.
- */
-static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle,
-				  unsigned long *maxj)
-{
-	int cur;
-	unsigned long j;
-	struct rcu_dynticks *rdtp = rdp->dynticks;
-
-	RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_sysidle_check_cpu() invoked with irqs enabled!!!");
-
-	/* If there are no nohz_full= CPUs, don't check system-wide idleness. */
-	if (!tick_nohz_full_enabled())
-		return;
-
-	/*
-	 * If some other CPU has already reported non-idle, if this is
-	 * not the flavor of RCU that tracks sysidle state, or if this
-	 * is an offline or the timekeeping CPU, nothing to do.
-	 */
-	if (!*isidle || rdp->rsp != rcu_state_p ||
-	    cpu_is_offline(rdp->cpu) || rdp->cpu == tick_do_timer_cpu)
-		return;
-	/* Verify affinity of current kthread. */
-	WARN_ON_ONCE(smp_processor_id() != tick_do_timer_cpu);
-
-	/* Pick up current idle and NMI-nesting counter and check. */
-	cur = atomic_read(&rdtp->dynticks_idle);
-	if (cur & 0x1) {
-		*isidle = false; /* We are not idle! */
-		return;
-	}
-	smp_mb(); /* Read counters before timestamps. */
-
-	/* Pick up timestamps. */
-	j = READ_ONCE(rdtp->dynticks_idle_jiffies);
-	/* If this CPU entered idle more recently, update maxj timestamp. */
-	if (ULONG_CMP_LT(*maxj, j))
-		*maxj = j;
-}
-
-/*
- * Is this the flavor of RCU that is handling full-system idle?
- */
-static bool is_sysidle_rcu_state(struct rcu_state *rsp)
-{
-	return rsp == rcu_state_p;
-}
-
-/*
- * Return a delay in jiffies based on the number of CPUs, rcu_node
- * leaf fanout, and jiffies tick rate.  The idea is to allow larger
- * systems more time to transition to full-idle state in order to
- * avoid the cache thrashing that otherwise occur on the state variable.
- * Really small systems (less than a couple of tens of CPUs) should
- * instead use a single global atomically incremented counter, and later
- * versions of this will automatically reconfigure themselves accordingly.
- */
-static unsigned long rcu_sysidle_delay(void)
-{
-	if (nr_cpu_ids <= CONFIG_NO_HZ_FULL_SYSIDLE_SMALL)
-		return 0;
-	return DIV_ROUND_UP(nr_cpu_ids * HZ, rcu_fanout_leaf * 1000);
-}
-
-/*
- * Advance the full-system-idle state.  This is invoked when all of
- * the non-timekeeping CPUs are idle.
- */
-static void rcu_sysidle(unsigned long j)
-{
-	/* Check the current state. */
-	switch (READ_ONCE(full_sysidle_state)) {
-	case RCU_SYSIDLE_NOT:
-
-		/* First time all are idle, so note a short idle period. */
-		WRITE_ONCE(full_sysidle_state, RCU_SYSIDLE_SHORT);
-		break;
-
-	case RCU_SYSIDLE_SHORT:
-
-		/*
-		 * Idle for a bit, time to advance to next state?
-		 * cmpxchg failure means race with non-idle, let them win.
-		 */
-		if (ULONG_CMP_GE(jiffies, j + rcu_sysidle_delay()))
-			(void)cmpxchg(&full_sysidle_state,
-				      RCU_SYSIDLE_SHORT, RCU_SYSIDLE_LONG);
-		break;
-
-	case RCU_SYSIDLE_LONG:
-
-		/*
-		 * Do an additional check pass before advancing to full.
-		 * cmpxchg failure means race with non-idle, let them win.
-		 */
-		if (ULONG_CMP_GE(jiffies, j + rcu_sysidle_delay()))
-			(void)cmpxchg(&full_sysidle_state,
-				      RCU_SYSIDLE_LONG, RCU_SYSIDLE_FULL);
-		break;
-
-	default:
-		break;
-	}
-}
-
-/*
- * Found a non-idle non-timekeeping CPU, so kick the system-idle state
- * back to the beginning.
- */
-static void rcu_sysidle_cancel(void)
-{
-	smp_mb();
-	if (full_sysidle_state > RCU_SYSIDLE_SHORT)
-		WRITE_ONCE(full_sysidle_state, RCU_SYSIDLE_NOT);
-}
-
-/*
- * Update the sysidle state based on the results of a force-quiescent-state
- * scan of the CPUs' dyntick-idle state.
- */
-static void rcu_sysidle_report(struct rcu_state *rsp, int isidle,
-			       unsigned long maxj, bool gpkt)
-{
-	if (rsp != rcu_state_p)
-		return;  /* Wrong flavor, ignore. */
-	if (gpkt && nr_cpu_ids <= CONFIG_NO_HZ_FULL_SYSIDLE_SMALL)
-		return;  /* Running state machine from timekeeping CPU. */
-	if (isidle)
-		rcu_sysidle(maxj);    /* More idle! */
-	else
-		rcu_sysidle_cancel(); /* Idle is over. */
-}
-
-/*
- * Wrapper for rcu_sysidle_report() when called from the grace-period
- * kthread's context.
- */
-static void rcu_sysidle_report_gp(struct rcu_state *rsp, int isidle,
-				  unsigned long maxj)
-{
-	/* If there are no nohz_full= CPUs, no need to track this. */
-	if (!tick_nohz_full_enabled())
-		return;
-
-	rcu_sysidle_report(rsp, isidle, maxj, true);
-}
-
-/* Callback and function for forcing an RCU grace period. */
-struct rcu_sysidle_head {
-	struct rcu_head rh;
-	int inuse;
-};
-
-static void rcu_sysidle_cb(struct rcu_head *rhp)
-{
-	struct rcu_sysidle_head *rshp;
-
-	/*
-	 * The following memory barrier is needed to replace the
-	 * memory barriers that would normally be in the memory
-	 * allocator.
-	 */
-	smp_mb();  /* grace period precedes setting inuse. */
-
-	rshp = container_of(rhp, struct rcu_sysidle_head, rh);
-	WRITE_ONCE(rshp->inuse, 0);
-}
-
-/*
- * Check to see if the system is fully idle, other than the timekeeping CPU.
- * The caller must have disabled interrupts.  This is not intended to be
- * called unless tick_nohz_full_enabled().
- */
-bool rcu_sys_is_idle(void)
-{
-	static struct rcu_sysidle_head rsh;
-	int rss = READ_ONCE(full_sysidle_state);
-
-	RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_sys_is_idle() invoked with irqs enabled!!!");
-
-	if (WARN_ON_ONCE(smp_processor_id() != tick_do_timer_cpu))
-		return false;
-
-	/* Handle small-system case by doing a full scan of CPUs. */
-	if (nr_cpu_ids <= CONFIG_NO_HZ_FULL_SYSIDLE_SMALL) {
-		int oldrss = rss - 1;
-
-		/*
-		 * One pass to advance to each state up to _FULL.
-		 * Give up if any pass fails to advance the state.
-		 */
-		while (rss < RCU_SYSIDLE_FULL && oldrss < rss) {
-			int cpu;
-			bool isidle = true;
-			unsigned long maxj = jiffies - ULONG_MAX / 4;
-			struct rcu_data *rdp;
-
-			/* Scan all the CPUs looking for nonidle CPUs. */
-			for_each_possible_cpu(cpu) {
-				rdp = per_cpu_ptr(rcu_state_p->rda, cpu);
-				rcu_sysidle_check_cpu(rdp, &isidle, &maxj);
-				if (!isidle)
-					break;
-			}
-			rcu_sysidle_report(rcu_state_p, isidle, maxj, false);
-			oldrss = rss;
-			rss = READ_ONCE(full_sysidle_state);
-		}
-	}
-
-	/* If this is the first observation of an idle period, record it. */
-	if (rss == RCU_SYSIDLE_FULL) {
-		rss = cmpxchg(&full_sysidle_state,
-			      RCU_SYSIDLE_FULL, RCU_SYSIDLE_FULL_NOTED);
-		return rss == RCU_SYSIDLE_FULL;
-	}
-
-	smp_mb(); /* ensure rss load happens before later caller actions. */
-
-	/* If already fully idle, tell the caller (in case of races). */
-	if (rss == RCU_SYSIDLE_FULL_NOTED)
-		return true;
-
-	/*
-	 * If we aren't there yet, and a grace period is not in flight,
-	 * initiate a grace period.  Either way, tell the caller that
-	 * we are not there yet.  We use an xchg() rather than an assignment
-	 * to make up for the memory barriers that would otherwise be
-	 * provided by the memory allocator.
-	 */
-	if (nr_cpu_ids > CONFIG_NO_HZ_FULL_SYSIDLE_SMALL &&
-	    !rcu_gp_in_progress(rcu_state_p) &&
-	    !rsh.inuse && xchg(&rsh.inuse, 1) == 0)
-		call_rcu(&rsh.rh, rcu_sysidle_cb);
-	return false;
-}
-
-/*
- * Initialize dynticks sysidle state for CPUs coming online.
- */
-static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp)
-{
-	rdtp->dynticks_idle_nesting = DYNTICK_TASK_NEST_VALUE;
-}
-
-#else /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
-
-static void rcu_sysidle_enter(int irq)
-{
-}
-
-static void rcu_sysidle_exit(int irq)
-{
-}
-
-static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle,
-				  unsigned long *maxj)
-{
-}
-
-static bool is_sysidle_rcu_state(struct rcu_state *rsp)
-{
-	return false;
-}
-
-static void rcu_sysidle_report_gp(struct rcu_state *rsp, int isidle,
-				  unsigned long maxj)
-{
-}
-
-static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp)
-{
-}
-
-#endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
-
 /*
  * Is this CPU a NO_HZ_FULL CPU that should ignore RCU so that the
  * grace-period kthread will do force_quiescent_state() processing?
@@ -3016,13 +2593,7 @@ static void rcu_bind_gp_kthread(void)
 
 	if (!tick_nohz_full_enabled())
 		return;
-#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
-	cpu = tick_do_timer_cpu;
-	if (cpu >= 0 && cpu < nr_cpu_ids)
-		set_cpus_allowed_ptr(current, cpumask_of(cpu));
-#else /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
 	housekeeping_affine(current);
-#endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
 }
 
 /* Record the current task on dyntick-idle entry. */
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index 4008d9f95dd7..ac09bc29eb08 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -126,56 +126,6 @@ config NO_HZ_FULL_ALL
 	 Note the boot CPU will still be kept outside the range to
 	 handle the timekeeping duty.
 
-config NO_HZ_FULL_SYSIDLE
-	bool "Detect full-system idle state for full dynticks system"
-	depends on NO_HZ_FULL
-	default n
-	help
-	 At least one CPU must keep the scheduling-clock tick running for
-	 timekeeping purposes whenever there is a non-idle CPU, where
-	 "non-idle" also includes dynticks CPUs as long as they are
-	 running non-idle tasks.  Because the underlying adaptive-tick
-	 support cannot distinguish between all CPUs being idle and
-	 all CPUs each running a single task in dynticks mode, the
-	 underlying support simply ensures that there is always a CPU
-	 handling the scheduling-clock tick, whether or not all CPUs
-	 are idle.  This Kconfig option enables scalable detection of
-	 the all-CPUs-idle state, thus allowing the scheduling-clock
-	 tick to be disabled when all CPUs are idle.  Note that scalable
-	 detection of the all-CPUs-idle state means that larger systems
-	 will be slower to declare the all-CPUs-idle state.
-
-	 Say Y if you would like to help debug all-CPUs-idle detection.
-
-	 Say N if you are unsure.
-
-config NO_HZ_FULL_SYSIDLE_SMALL
-	int "Number of CPUs above which large-system approach is used"
-	depends on NO_HZ_FULL_SYSIDLE
-	range 1 NR_CPUS
-	default 8
-	help
-	 The full-system idle detection mechanism takes a lazy approach
-	 on large systems, as is required to attain decent scalability.
-	 However, on smaller systems, scalability is not anywhere near as
-	 large a concern as is energy efficiency.  The sysidle subsystem
-	 therefore uses a fast but non-scalable algorithm for small
-	 systems and a lazier but scalable algorithm for large systems.
-	 This Kconfig parameter defines the number of CPUs in the largest
-	 system that will be considered to be "small".
-
-	 The default value will be fine in most cases.	Battery-powered
-	 systems that (1) enable NO_HZ_FULL_SYSIDLE, (2) have larger
-	 numbers of CPUs, and (3) are suffering from battery-lifetime
-	 problems due to long sysidle latencies might wish to experiment
-	 with larger values for this Kconfig parameter.  On the other
-	 hand, they might be even better served by disabling NO_HZ_FULL
-	 entirely, given that NO_HZ_FULL is intended for HPC and
-	 real-time workloads that at present do not tend to be run on
-	 battery-powered systems.
-
-	 Take the default if you are unsure.
-
 config NO_HZ
 	bool "Old Idle dynticks config"
 	depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE07 b/tools/testing/selftests/rcutorture/configs/rcu/TREE07
index b9ddd3beeb9a..0f4759f4232e 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE07
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE07
@@ -8,7 +8,6 @@ CONFIG_HZ_PERIODIC=n
 CONFIG_NO_HZ_IDLE=n
 CONFIG_NO_HZ_FULL=y
 CONFIG_NO_HZ_FULL_ALL=n
-CONFIG_NO_HZ_FULL_SYSIDLE=y
 CONFIG_RCU_FAST_NO_HZ=n
 CONFIG_RCU_TRACE=y
 CONFIG_HOTPLUG_CPU=y
diff --git a/tools/testing/selftests/rcutorture/doc/TINY_RCU.txt b/tools/testing/selftests/rcutorture/doc/TINY_RCU.txt
index 24396ae8355b..a75b16991a92 100644
--- a/tools/testing/selftests/rcutorture/doc/TINY_RCU.txt
+++ b/tools/testing/selftests/rcutorture/doc/TINY_RCU.txt
@@ -18,7 +18,6 @@ CONFIG_PROVE_RCU
 
 	In common code tested by TREE_RCU test cases.
 
-CONFIG_NO_HZ_FULL_SYSIDLE
 CONFIG_RCU_NOCB_CPU
 
 	Meaningless for TINY_RCU.
diff --git a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
index b5ea8489969a..519e06d34d0b 100644
--- a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
+++ b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
@@ -9,8 +9,7 @@ CONFIG_DEBUG_OBJECTS_RCU_HEAD -- Do one.
 CONFIG_HOTPLUG_CPU -- Do half.  (Every second.)
 CONFIG_HZ_PERIODIC -- Do one.
 CONFIG_NO_HZ_IDLE -- Do those not otherwise specified. (Groups of two.)
-CONFIG_NO_HZ_FULL -- Do two, one with CONFIG_NO_HZ_FULL_SYSIDLE.
-CONFIG_NO_HZ_FULL_SYSIDLE -- Do one.
+CONFIG_NO_HZ_FULL -- Do two, one with partial CPU enablement.
 CONFIG_PREEMPT -- Do half.  (First three and #8.)
 CONFIG_PROVE_LOCKING -- Do several, covering CONFIG_DEBUG_LOCK_ALLOC=y and not.
 CONFIG_PROVE_RCU -- Hardwired to CONFIG_PROVE_LOCKING.
@@ -48,10 +47,6 @@ CONFIG_64BIT
 
 	Used only to check CONFIG_RCU_FANOUT value, inspection suffices.
 
-CONFIG_NO_HZ_FULL_SYSIDLE_SMALL
-
-	Defer until Frederic uses this.
-
 CONFIG_PREEMPT_COUNT
 CONFIG_PREEMPT_RCU
 
-- 
cgit v1.2.3


From d2b1654f91f9e928011fbea7138854ee2044f470 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 11 May 2017 12:01:50 -0700
Subject: rcu: Remove #ifdef moving rcu_end_inkernel_boot from rcupdate.h

This commit removes a #ifdef and saves a few lines of code by moving
the rcu_end_inkernel_boot() function from include/linux/rcupdate.h to
include/linux/rcutiny.h (for TINY_RCU) and to include/linux/rcutree.h
(for TREE_RCU).

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 6 ------
 include/linux/rcutiny.h  | 2 +-
 include/linux/rcutree.h  | 1 +
 3 files changed, 2 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 7f24a5e673f5..f816fc72b51e 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -111,12 +111,6 @@ void rcu_check_callbacks(int user);
 void rcu_report_dead(unsigned int cpu);
 void rcu_cpu_starting(unsigned int cpu);
 
-#ifndef CONFIG_TINY_RCU
-void rcu_end_inkernel_boot(void);
-#else /* #ifndef CONFIG_TINY_RCU */
-static inline void rcu_end_inkernel_boot(void) { }
-#endif /* #ifndef CONFIG_TINY_RCU */
-
 #ifdef CONFIG_RCU_STALL_COMMON
 void rcu_sysrq_start(void);
 void rcu_sysrq_end(void);
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index c869785f16bd..5becbbccb998 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -123,7 +123,7 @@ void rcu_scheduler_starting(void);
 #else /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) */
 static inline void rcu_scheduler_starting(void) { }
 #endif /* #else #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) */
-
+static inline void rcu_end_inkernel_boot(void) { }
 static inline bool rcu_is_watching(void) { return true; }
 
 /* Avoid RCU read-side critical sections leaking across. */
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index d6aa89d15d47..37d6fd3b7ff8 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -91,6 +91,7 @@ void exit_rcu(void);
 
 void rcu_scheduler_starting(void);
 extern int rcu_scheduler_active __read_mostly;
+void rcu_end_inkernel_boot(void);
 bool rcu_is_watching(void);
 void rcu_all_qs(void);
 
-- 
cgit v1.2.3


From 41a2901e7d220875752a8c870e0b53288a578c20 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 12 May 2017 15:56:35 -0700
Subject: rcu: Remove SPARSE_RCU_POINTER Kconfig option

The sparse-based checking for non-RCU accesses to RCU-protected pointers
has been around for a very long time, and it is now the only type of
sparse-based checking that is optional.  This commit therefore makes
it unconditional.

Reported-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Fengguang Wu <fengguang.wu@intel.com>
---
 Documentation/RCU/Design/Requirements/Requirements.html   |  9 +++------
 Documentation/RCU/checklist.txt                           |  8 ++++----
 Documentation/dev-tools/sparse.rst                        |  6 ------
 include/linux/compiler.h                                  |  4 ----
 lib/Kconfig.debug                                         | 15 ---------------
 lib/Makefile                                              |  3 ---
 .../testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt |  4 ----
 7 files changed, 7 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/RCU/Design/Requirements/Requirements.html b/Documentation/RCU/Design/Requirements/Requirements.html
index 8c94fc1d1c84..0e6550a8c926 100644
--- a/Documentation/RCU/Design/Requirements/Requirements.html
+++ b/Documentation/RCU/Design/Requirements/Requirements.html
@@ -559,9 +559,7 @@ The <tt>rcu_access_pointer()</tt> on line&nbsp;6 is similar to
 	For <tt>remove_gp_synchronous()</tt>, as long as all modifications
 	to <tt>gp</tt> are carried out while holding <tt>gp_lock</tt>,
 	the above optimizations are harmless.
-	However,
-	with <tt>CONFIG_SPARSE_RCU_POINTER=y</tt>,
-	<tt>sparse</tt> will complain if you
+	However, <tt>sparse</tt> will complain if you
 	define <tt>gp</tt> with <tt>__rcu</tt> and then
 	access it without using
 	either <tt>rcu_access_pointer()</tt> or <tt>rcu_dereference()</tt>.
@@ -1978,9 +1976,8 @@ guard against mishaps and misuse:
 	and <tt>rcu_dereference()</tt>, perhaps (incorrectly)
 	substituting a simple assignment.
 	To catch this sort of error, a given RCU-protected pointer may be
-	tagged with <tt>__rcu</tt>, after which running sparse
-	with <tt>CONFIG_SPARSE_RCU_POINTER=y</tt> will complain
-	about simple-assignment accesses to that pointer.
+	tagged with <tt>__rcu</tt>, after which sparse
+	will complain about simple-assignment accesses to that pointer.
 	Arnd Bergmann made me aware of this requirement, and also
 	supplied the needed
 	<a href="https://lwn.net/Articles/376011/">patch series</a>.
diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt
index 877947130ebe..6beda556faf3 100644
--- a/Documentation/RCU/checklist.txt
+++ b/Documentation/RCU/checklist.txt
@@ -413,11 +413,11 @@ over a rather long period of time, but improvements are always welcome!
 	read-side critical sections.  It is the responsibility of the
 	RCU update-side primitives to deal with this.
 
-17.	Use CONFIG_PROVE_RCU, CONFIG_DEBUG_OBJECTS_RCU_HEAD, and the
-	__rcu sparse checks (enabled by CONFIG_SPARSE_RCU_POINTER) to
-	validate your RCU code.  These can help find problems as follows:
+17.	Use CONFIG_PROVE_LOCKING, CONFIG_DEBUG_OBJECTS_RCU_HEAD, and the
+	__rcu sparse checks to validate your RCU code.	These can help
+	find problems as follows:
 
-	CONFIG_PROVE_RCU: check that accesses to RCU-protected data
+	CONFIG_PROVE_LOCKING: check that accesses to RCU-protected data
 		structures are carried out under the proper RCU
 		read-side critical section, while holding the right
 		combination of locks, or whatever other conditions
diff --git a/Documentation/dev-tools/sparse.rst b/Documentation/dev-tools/sparse.rst
index ffdcc97f6f5a..78aa00a604a0 100644
--- a/Documentation/dev-tools/sparse.rst
+++ b/Documentation/dev-tools/sparse.rst
@@ -103,9 +103,3 @@ have already built it.
 
 The optional make variable CF can be used to pass arguments to sparse.  The
 build system passes -Wbitwise to sparse automatically.
-
-Checking RCU annotations
-~~~~~~~~~~~~~~~~~~~~~~~~
-
-RCU annotations are not checked by default.  To enable RCU annotation
-checks, include -DCONFIG_SPARSE_RCU_POINTER in your CF flags.
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index f8110051188f..707242fdbb89 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -17,11 +17,7 @@
 # define __release(x)	__context__(x,-1)
 # define __cond_lock(x,c)	((c) ? ({ __acquire(x); 1; }) : 0)
 # define __percpu	__attribute__((noderef, address_space(3)))
-#ifdef CONFIG_SPARSE_RCU_POINTER
 # define __rcu		__attribute__((noderef, address_space(4)))
-#else /* CONFIG_SPARSE_RCU_POINTER */
-# define __rcu
-#endif /* CONFIG_SPARSE_RCU_POINTER */
 # define __private	__attribute__((noderef))
 extern void __chk_user_ptr(const volatile void __user *);
 extern void __chk_io_ptr(const volatile void __iomem *);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 762deab304fe..498d5dd63bf4 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1306,21 +1306,6 @@ menu "RCU Debugging"
 config PROVE_RCU
 	def_bool PROVE_LOCKING
 
-config SPARSE_RCU_POINTER
-	bool "RCU debugging: sparse-based checks for pointer usage"
-	default n
-	help
-	 This feature enables the __rcu sparse annotation for
-	 RCU-protected pointers.  This annotation will cause sparse
-	 to flag any non-RCU used of annotated pointers.  This can be
-	 helpful when debugging RCU usage.  Please note that this feature
-	 is not intended to enforce code cleanliness; it is instead merely
-	 a debugging aid.
-
-	 Say Y to make sparse flag questionable use of RCU-protected pointers
-
-	 Say N if you are unsure.
-
 config TORTURE_TEST
 	tristate
 	default n
diff --git a/lib/Makefile b/lib/Makefile
index 0166fbc0fa81..07fbe6a75692 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -25,9 +25,6 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
 	 earlycpio.o seq_buf.o siphash.o \
 	 nmi_backtrace.o nodemask.o win_minmax.o
 
-CFLAGS_radix-tree.o += -DCONFIG_SPARSE_RCU_POINTER
-CFLAGS_idr.o += -DCONFIG_SPARSE_RCU_POINTER
-
 lib-$(CONFIG_MMU) += ioremap.o
 lib-$(CONFIG_SMP) += cpumask.o
 lib-$(CONFIG_DMA_NOOP_OPS) += dma-noop.o
diff --git a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
index b778a28f1386..c5c29fb7438c 100644
--- a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
+++ b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
@@ -74,10 +74,6 @@ CONFIG_TINY_RCU
 
 	These are controlled by CONFIG_PREEMPT and/or CONFIG_SMP.
 
-CONFIG_SPARSE_RCU_POINTER
-
-	Makes sense only for sparse runs, not for kernel builds.
-
 CONFIG_SRCU
 CONFIG_TASKS_RCU
 
-- 
cgit v1.2.3


From bd8cc5a062f41e334596edbe823e2fa0adddd1b7 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 15 May 2017 14:57:01 -0700
Subject: srcu: Remove Classic SRCU

Classic SRCU was only ever intended to be a fallback in case of issues
with Tree/Tiny SRCU, and the latter two are doing quite well in testing.
This commit therefore removes Classic SRCU.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/srcu.h                               |   2 -
 include/linux/srcuclassic.h                        | 100 ---
 init/Kconfig                                       |  21 +-
 kernel/rcu/Makefile                                |   1 -
 kernel/rcu/rcu.h                                   |  16 -
 kernel/rcu/rcutorture.c                            |  17 -
 kernel/rcu/srcu.c                                  | 668 ---------------------
 .../selftests/rcutorture/configs/rcu/CFLIST        |   1 -
 .../selftests/rcutorture/configs/rcu/SRCU-C        |  11 -
 .../rcutorture/configs/rcuperf/SRCUCLASSIC         |  16 -
 10 files changed, 2 insertions(+), 851 deletions(-)
 delete mode 100644 include/linux/srcuclassic.h
 delete mode 100644 kernel/rcu/srcu.c
 delete mode 100644 tools/testing/selftests/rcutorture/configs/rcu/SRCU-C
 delete mode 100644 tools/testing/selftests/rcutorture/configs/rcuperf/SRCUCLASSIC

(limited to 'include/linux')

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 5f509018e6b5..39af9bc0f653 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -60,8 +60,6 @@ int init_srcu_struct(struct srcu_struct *sp);
 #include <linux/srcutiny.h>
 #elif defined(CONFIG_TREE_SRCU)
 #include <linux/srcutree.h>
-#elif defined(CONFIG_CLASSIC_SRCU)
-#include <linux/srcuclassic.h>
 #elif defined(CONFIG_SRCU)
 #error "Unknown SRCU implementation specified to kernel configuration"
 #else
diff --git a/include/linux/srcuclassic.h b/include/linux/srcuclassic.h
deleted file mode 100644
index 67db4a36ef0d..000000000000
--- a/include/linux/srcuclassic.h
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Sleepable Read-Copy Update mechanism for mutual exclusion,
- *	classic v4.11 variant.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- * Copyright (C) IBM Corporation, 2017
- *
- * Author: Paul McKenney <paulmck@us.ibm.com>
- */
-
-#ifndef _LINUX_SRCU_CLASSIC_H
-#define _LINUX_SRCU_CLASSIC_H
-
-struct srcu_array {
-	unsigned long lock_count[2];
-	unsigned long unlock_count[2];
-};
-
-struct rcu_batch {
-	struct rcu_head *head, **tail;
-};
-
-#define RCU_BATCH_INIT(name) { NULL, &(name.head) }
-
-struct srcu_struct {
-	unsigned long completed;
-	struct srcu_array __percpu *per_cpu_ref;
-	spinlock_t queue_lock; /* protect ->batch_queue, ->running */
-	bool running;
-	/* callbacks just queued */
-	struct rcu_batch batch_queue;
-	/* callbacks try to do the first check_zero */
-	struct rcu_batch batch_check0;
-	/* callbacks done with the first check_zero and the flip */
-	struct rcu_batch batch_check1;
-	struct rcu_batch batch_done;
-	struct delayed_work work;
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-	struct lockdep_map dep_map;
-#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
-};
-
-void process_srcu(struct work_struct *work);
-
-#define __SRCU_STRUCT_INIT(name)					\
-	{								\
-		.completed = -300,					\
-		.per_cpu_ref = &name##_srcu_array,			\
-		.queue_lock = __SPIN_LOCK_UNLOCKED(name.queue_lock),	\
-		.running = false,					\
-		.batch_queue = RCU_BATCH_INIT(name.batch_queue),	\
-		.batch_check0 = RCU_BATCH_INIT(name.batch_check0),	\
-		.batch_check1 = RCU_BATCH_INIT(name.batch_check1),	\
-		.batch_done = RCU_BATCH_INIT(name.batch_done),		\
-		.work = __DELAYED_WORK_INITIALIZER(name.work, process_srcu, 0),\
-		__SRCU_DEP_MAP_INIT(name)				\
-	}
-
-/*
- * Define and initialize a srcu struct at build time.
- * Do -not- call init_srcu_struct() nor cleanup_srcu_struct() on it.
- *
- * Note that although DEFINE_STATIC_SRCU() hides the name from other
- * files, the per-CPU variable rules nevertheless require that the
- * chosen name be globally unique.  These rules also prohibit use of
- * DEFINE_STATIC_SRCU() within a function.  If these rules are too
- * restrictive, declare the srcu_struct manually.  For example, in
- * each file:
- *
- *	static struct srcu_struct my_srcu;
- *
- * Then, before the first use of each my_srcu, manually initialize it:
- *
- *	init_srcu_struct(&my_srcu);
- *
- * See include/linux/percpu-defs.h for the rules on per-CPU variables.
- */
-#define __DEFINE_SRCU(name, is_static)					\
-	static DEFINE_PER_CPU(struct srcu_array, name##_srcu_array);\
-	is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name)
-#define DEFINE_SRCU(name)		__DEFINE_SRCU(name, /* not static */)
-#define DEFINE_STATIC_SRCU(name)	__DEFINE_SRCU(name, static)
-
-void synchronize_srcu_expedited(struct srcu_struct *sp);
-void srcu_barrier(struct srcu_struct *sp);
-
-#endif
diff --git a/init/Kconfig b/init/Kconfig
index 6f257d51f582..2aa14ff40e88 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -526,32 +526,15 @@ config SRCU
 	  permits arbitrary sleeping or blocking within RCU read-side critical
 	  sections.
 
-config CLASSIC_SRCU
-	bool "Use v4.11 classic SRCU implementation"
-	default n
-	depends on RCU_EXPERT && SRCU
-	help
-	  This option selects the traditional well-tested classic SRCU
-	  implementation from v4.11, as might be desired for enterprise
-	  Linux distributions.  Without this option, the shiny new
-	  Tiny SRCU and Tree SRCU implementations are used instead.
-	  At some point, it is hoped that Tiny SRCU and Tree SRCU
-	  will accumulate enough test time and confidence to allow
-	  Classic SRCU to be dropped entirely.
-
-	  Say Y if you need a rock-solid SRCU.
-
-	  Say N if you would like help test Tree SRCU.
-
 config TINY_SRCU
 	bool
-	default y if SRCU && TINY_RCU && !CLASSIC_SRCU
+	default y if SRCU && TINY_RCU
 	help
 	  This option selects the single-CPU non-preemptible version of SRCU.
 
 config TREE_SRCU
 	bool
-	default y if SRCU && !TINY_RCU && !CLASSIC_SRCU
+	default y if SRCU && !TINY_RCU
 	help
 	  This option selects the full-fledged version of SRCU.
 
diff --git a/kernel/rcu/Makefile b/kernel/rcu/Makefile
index 23803c7d5180..3945337c8ce4 100644
--- a/kernel/rcu/Makefile
+++ b/kernel/rcu/Makefile
@@ -3,7 +3,6 @@
 KCOV_INSTRUMENT := n
 
 obj-y += update.o sync.o
-obj-$(CONFIG_CLASSIC_SRCU) += srcu.o
 obj-$(CONFIG_TREE_SRCU) += srcutree.o
 obj-$(CONFIG_TINY_SRCU) += srcutiny.o
 obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 27f871c88e0a..d06c42deee0b 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -457,22 +457,6 @@ void srcutorture_get_gp_data(enum rcutorture_type test_type,
 			     struct srcu_struct *sp, int *flags,
 			     unsigned long *gpnum, unsigned long *completed);
 
-#elif defined(CONFIG_CLASSIC_SRCU)
-
-static inline void srcutorture_get_gp_data(enum rcutorture_type test_type,
-					   struct srcu_struct *sp, int *flags,
-					   unsigned long *gpnum,
-					   unsigned long *completed)
-{
-	if (test_type != SRCU_FLAVOR)
-		return;
-	*flags = 0;
-	*completed = sp->completed;
-	*gpnum = *completed;
-	if (sp->batch_queue.head || sp->batch_check0.head || sp->batch_check1.head)
-		(*gpnum)++;
-}
-
 #endif
 
 #ifdef CONFIG_TINY_RCU
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 03cdf79e73d4..b8f7f8ce8575 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -564,31 +564,19 @@ static void srcu_torture_stats(void)
 	int __maybe_unused cpu;
 	int idx;
 
-#if defined(CONFIG_TREE_SRCU) || defined(CONFIG_CLASSIC_SRCU)
 #ifdef CONFIG_TREE_SRCU
 	idx = srcu_ctlp->srcu_idx & 0x1;
-#else /* #ifdef CONFIG_TREE_SRCU */
-	idx = srcu_ctlp->completed & 0x1;
-#endif /* #else #ifdef CONFIG_TREE_SRCU */
 	pr_alert("%s%s Tree SRCU per-CPU(idx=%d):",
 		 torture_type, TORTURE_FLAG, idx);
 	for_each_possible_cpu(cpu) {
 		unsigned long l0, l1;
 		unsigned long u0, u1;
 		long c0, c1;
-#ifdef CONFIG_TREE_SRCU
 		struct srcu_data *counts;
 
 		counts = per_cpu_ptr(srcu_ctlp->sda, cpu);
 		u0 = counts->srcu_unlock_count[!idx];
 		u1 = counts->srcu_unlock_count[idx];
-#else /* #ifdef CONFIG_TREE_SRCU */
-		struct srcu_array *counts;
-
-		counts = per_cpu_ptr(srcu_ctlp->per_cpu_ref, cpu);
-		u0 = counts->unlock_count[!idx];
-		u1 = counts->unlock_count[idx];
-#endif /* #else #ifdef CONFIG_TREE_SRCU */
 
 		/*
 		 * Make sure that a lock is always counted if the corresponding
@@ -596,13 +584,8 @@ static void srcu_torture_stats(void)
 		 */
 		smp_rmb();
 
-#ifdef CONFIG_TREE_SRCU
 		l0 = counts->srcu_lock_count[!idx];
 		l1 = counts->srcu_lock_count[idx];
-#else /* #ifdef CONFIG_TREE_SRCU */
-		l0 = counts->lock_count[!idx];
-		l1 = counts->lock_count[idx];
-#endif /* #else #ifdef CONFIG_TREE_SRCU */
 
 		c0 = l0 - u0;
 		c1 = l1 - u1;
diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c
deleted file mode 100644
index 4e3f558409a0..000000000000
--- a/kernel/rcu/srcu.c
+++ /dev/null
@@ -1,668 +0,0 @@
-/*
- * Sleepable Read-Copy Update mechanism for mutual exclusion.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- * Copyright (C) IBM Corporation, 2006
- * Copyright (C) Fujitsu, 2012
- *
- * Author: Paul McKenney <paulmck@us.ibm.com>
- *	   Lai Jiangshan <laijs@cn.fujitsu.com>
- *
- * For detailed explanation of Read-Copy Update mechanism see -
- *		Documentation/RCU/ *.txt
- *
- */
-
-#include <linux/export.h>
-#include <linux/mutex.h>
-#include <linux/percpu.h>
-#include <linux/preempt.h>
-#include <linux/rcupdate_wait.h>
-#include <linux/sched.h>
-#include <linux/smp.h>
-#include <linux/delay.h>
-#include <linux/srcu.h>
-
-#include "rcu.h"
-
-/*
- * Initialize an rcu_batch structure to empty.
- */
-static inline void rcu_batch_init(struct rcu_batch *b)
-{
-	b->head = NULL;
-	b->tail = &b->head;
-}
-
-/*
- * Enqueue a callback onto the tail of the specified rcu_batch structure.
- */
-static inline void rcu_batch_queue(struct rcu_batch *b, struct rcu_head *head)
-{
-	*b->tail = head;
-	b->tail = &head->next;
-}
-
-/*
- * Is the specified rcu_batch structure empty?
- */
-static inline bool rcu_batch_empty(struct rcu_batch *b)
-{
-	return b->tail == &b->head;
-}
-
-/*
- * Remove the callback at the head of the specified rcu_batch structure
- * and return a pointer to it, or return NULL if the structure is empty.
- */
-static inline struct rcu_head *rcu_batch_dequeue(struct rcu_batch *b)
-{
-	struct rcu_head *head;
-
-	if (rcu_batch_empty(b))
-		return NULL;
-
-	head = b->head;
-	b->head = head->next;
-	if (b->tail == &head->next)
-		rcu_batch_init(b);
-
-	return head;
-}
-
-/*
- * Move all callbacks from the rcu_batch structure specified by "from" to
- * the structure specified by "to".
- */
-static inline void rcu_batch_move(struct rcu_batch *to, struct rcu_batch *from)
-{
-	if (!rcu_batch_empty(from)) {
-		*to->tail = from->head;
-		to->tail = from->tail;
-		rcu_batch_init(from);
-	}
-}
-
-static int init_srcu_struct_fields(struct srcu_struct *sp)
-{
-	sp->completed = 0;
-	spin_lock_init(&sp->queue_lock);
-	sp->running = false;
-	rcu_batch_init(&sp->batch_queue);
-	rcu_batch_init(&sp->batch_check0);
-	rcu_batch_init(&sp->batch_check1);
-	rcu_batch_init(&sp->batch_done);
-	INIT_DELAYED_WORK(&sp->work, process_srcu);
-	sp->per_cpu_ref = alloc_percpu(struct srcu_array);
-	return sp->per_cpu_ref ? 0 : -ENOMEM;
-}
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-
-int __init_srcu_struct(struct srcu_struct *sp, const char *name,
-		       struct lock_class_key *key)
-{
-	/* Don't re-initialize a lock while it is held. */
-	debug_check_no_locks_freed((void *)sp, sizeof(*sp));
-	lockdep_init_map(&sp->dep_map, name, key, 0);
-	return init_srcu_struct_fields(sp);
-}
-EXPORT_SYMBOL_GPL(__init_srcu_struct);
-
-#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
-
-/**
- * init_srcu_struct - initialize a sleep-RCU structure
- * @sp: structure to initialize.
- *
- * Must invoke this on a given srcu_struct before passing that srcu_struct
- * to any other function.  Each srcu_struct represents a separate domain
- * of SRCU protection.
- */
-int init_srcu_struct(struct srcu_struct *sp)
-{
-	return init_srcu_struct_fields(sp);
-}
-EXPORT_SYMBOL_GPL(init_srcu_struct);
-
-#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
-
-/*
- * Returns approximate total of the readers' ->lock_count[] values for the
- * rank of per-CPU counters specified by idx.
- */
-static unsigned long srcu_readers_lock_idx(struct srcu_struct *sp, int idx)
-{
-	int cpu;
-	unsigned long sum = 0;
-
-	for_each_possible_cpu(cpu) {
-		struct srcu_array *cpuc = per_cpu_ptr(sp->per_cpu_ref, cpu);
-
-		sum += READ_ONCE(cpuc->lock_count[idx]);
-	}
-	return sum;
-}
-
-/*
- * Returns approximate total of the readers' ->unlock_count[] values for the
- * rank of per-CPU counters specified by idx.
- */
-static unsigned long srcu_readers_unlock_idx(struct srcu_struct *sp, int idx)
-{
-	int cpu;
-	unsigned long sum = 0;
-
-	for_each_possible_cpu(cpu) {
-		struct srcu_array *cpuc = per_cpu_ptr(sp->per_cpu_ref, cpu);
-
-		sum += READ_ONCE(cpuc->unlock_count[idx]);
-	}
-	return sum;
-}
-
-/*
- * Return true if the number of pre-existing readers is determined to
- * be zero.
- */
-static bool srcu_readers_active_idx_check(struct srcu_struct *sp, int idx)
-{
-	unsigned long unlocks;
-
-	unlocks = srcu_readers_unlock_idx(sp, idx);
-
-	/*
-	 * Make sure that a lock is always counted if the corresponding unlock
-	 * is counted. Needs to be a smp_mb() as the read side may contain a
-	 * read from a variable that is written to before the synchronize_srcu()
-	 * in the write side. In this case smp_mb()s A and B act like the store
-	 * buffering pattern.
-	 *
-	 * This smp_mb() also pairs with smp_mb() C to prevent accesses after the
-	 * synchronize_srcu() from being executed before the grace period ends.
-	 */
-	smp_mb(); /* A */
-
-	/*
-	 * If the locks are the same as the unlocks, then there must have
-	 * been no readers on this index at some time in between. This does not
-	 * mean that there are no more readers, as one could have read the
-	 * current index but not have incremented the lock counter yet.
-	 *
-	 * Possible bug: There is no guarantee that there haven't been ULONG_MAX
-	 * increments of ->lock_count[] since the unlocks were counted, meaning
-	 * that this could return true even if there are still active readers.
-	 * Since there are no memory barriers around srcu_flip(), the CPU is not
-	 * required to increment ->completed before running
-	 * srcu_readers_unlock_idx(), which means that there could be an
-	 * arbitrarily large number of critical sections that execute after
-	 * srcu_readers_unlock_idx() but use the old value of ->completed.
-	 */
-	return srcu_readers_lock_idx(sp, idx) == unlocks;
-}
-
-/**
- * srcu_readers_active - returns true if there are readers. and false
- *                       otherwise
- * @sp: which srcu_struct to count active readers (holding srcu_read_lock).
- *
- * Note that this is not an atomic primitive, and can therefore suffer
- * severe errors when invoked on an active srcu_struct.  That said, it
- * can be useful as an error check at cleanup time.
- */
-static bool srcu_readers_active(struct srcu_struct *sp)
-{
-	int cpu;
-	unsigned long sum = 0;
-
-	for_each_possible_cpu(cpu) {
-		struct srcu_array *cpuc = per_cpu_ptr(sp->per_cpu_ref, cpu);
-
-		sum += READ_ONCE(cpuc->lock_count[0]);
-		sum += READ_ONCE(cpuc->lock_count[1]);
-		sum -= READ_ONCE(cpuc->unlock_count[0]);
-		sum -= READ_ONCE(cpuc->unlock_count[1]);
-	}
-	return sum;
-}
-
-/**
- * cleanup_srcu_struct - deconstruct a sleep-RCU structure
- * @sp: structure to clean up.
- *
- * Must invoke this only after you are finished using a given srcu_struct
- * that was initialized via init_srcu_struct().  This code does some
- * probabalistic checking, spotting late uses of srcu_read_lock(),
- * synchronize_srcu(), synchronize_srcu_expedited(), and call_srcu().
- * If any such late uses are detected, the per-CPU memory associated with
- * the srcu_struct is simply leaked and WARN_ON() is invoked.  If the
- * caller frees the srcu_struct itself, a use-after-free crash will likely
- * ensue, but at least there will be a warning printed.
- */
-void cleanup_srcu_struct(struct srcu_struct *sp)
-{
-	if (WARN_ON(srcu_readers_active(sp)))
-		return; /* Leakage unless caller handles error. */
-	free_percpu(sp->per_cpu_ref);
-	sp->per_cpu_ref = NULL;
-}
-EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
-
-/*
- * Counts the new reader in the appropriate per-CPU element of the
- * srcu_struct.
- * Returns an index that must be passed to the matching srcu_read_unlock().
- */
-int __srcu_read_lock(struct srcu_struct *sp)
-{
-	int idx;
-
-	idx = READ_ONCE(sp->completed) & 0x1;
-	this_cpu_inc(sp->per_cpu_ref->lock_count[idx]);
-	smp_mb(); /* B */  /* Avoid leaking the critical section. */
-	return idx;
-}
-EXPORT_SYMBOL_GPL(__srcu_read_lock);
-
-/*
- * Removes the count for the old reader from the appropriate per-CPU
- * element of the srcu_struct.  Note that this may well be a different
- * CPU than that which was incremented by the corresponding srcu_read_lock().
- */
-void __srcu_read_unlock(struct srcu_struct *sp, int idx)
-{
-	smp_mb(); /* C */  /* Avoid leaking the critical section. */
-	this_cpu_inc(sp->per_cpu_ref->unlock_count[idx]);
-}
-EXPORT_SYMBOL_GPL(__srcu_read_unlock);
-
-/*
- * We use an adaptive strategy for synchronize_srcu() and especially for
- * synchronize_srcu_expedited().  We spin for a fixed time period
- * (defined below) to allow SRCU readers to exit their read-side critical
- * sections.  If there are still some readers after 10 microseconds,
- * we repeatedly block for 1-millisecond time periods.  This approach
- * has done well in testing, so there is no need for a config parameter.
- */
-#define SRCU_RETRY_CHECK_DELAY		5
-#define SYNCHRONIZE_SRCU_TRYCOUNT	2
-#define SYNCHRONIZE_SRCU_EXP_TRYCOUNT	12
-
-/*
- * @@@ Wait until all pre-existing readers complete.  Such readers
- * will have used the index specified by "idx".
- * the caller should ensures the ->completed is not changed while checking
- * and idx = (->completed & 1) ^ 1
- */
-static bool try_check_zero(struct srcu_struct *sp, int idx, int trycount)
-{
-	for (;;) {
-		if (srcu_readers_active_idx_check(sp, idx))
-			return true;
-		if (--trycount <= 0)
-			return false;
-		udelay(SRCU_RETRY_CHECK_DELAY);
-	}
-}
-
-/*
- * Increment the ->completed counter so that future SRCU readers will
- * use the other rank of the ->(un)lock_count[] arrays.  This allows
- * us to wait for pre-existing readers in a starvation-free manner.
- */
-static void srcu_flip(struct srcu_struct *sp)
-{
-	WRITE_ONCE(sp->completed, sp->completed + 1);
-
-	/*
-	 * Ensure that if the updater misses an __srcu_read_unlock()
-	 * increment, that task's next __srcu_read_lock() will see the
-	 * above counter update.  Note that both this memory barrier
-	 * and the one in srcu_readers_active_idx_check() provide the
-	 * guarantee for __srcu_read_lock().
-	 */
-	smp_mb(); /* D */  /* Pairs with C. */
-}
-
-/*
- * Enqueue an SRCU callback on the specified srcu_struct structure,
- * initiating grace-period processing if it is not already running.
- *
- * Note that all CPUs must agree that the grace period extended beyond
- * all pre-existing SRCU read-side critical section.  On systems with
- * more than one CPU, this means that when "func()" is invoked, each CPU
- * is guaranteed to have executed a full memory barrier since the end of
- * its last corresponding SRCU read-side critical section whose beginning
- * preceded the call to call_rcu().  It also means that each CPU executing
- * an SRCU read-side critical section that continues beyond the start of
- * "func()" must have executed a memory barrier after the call_rcu()
- * but before the beginning of that SRCU read-side critical section.
- * Note that these guarantees include CPUs that are offline, idle, or
- * executing in user mode, as well as CPUs that are executing in the kernel.
- *
- * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the
- * resulting SRCU callback function "func()", then both CPU A and CPU
- * B are guaranteed to execute a full memory barrier during the time
- * interval between the call to call_rcu() and the invocation of "func()".
- * This guarantee applies even if CPU A and CPU B are the same CPU (but
- * again only if the system has more than one CPU).
- *
- * Of course, these guarantees apply only for invocations of call_srcu(),
- * srcu_read_lock(), and srcu_read_unlock() that are all passed the same
- * srcu_struct structure.
- */
-void call_srcu(struct srcu_struct *sp, struct rcu_head *head,
-	       rcu_callback_t func)
-{
-	unsigned long flags;
-
-	head->next = NULL;
-	head->func = func;
-	spin_lock_irqsave(&sp->queue_lock, flags);
-	smp_mb__after_unlock_lock(); /* Caller's prior accesses before GP. */
-	rcu_batch_queue(&sp->batch_queue, head);
-	if (!sp->running) {
-		sp->running = true;
-		queue_delayed_work(system_power_efficient_wq, &sp->work, 0);
-	}
-	spin_unlock_irqrestore(&sp->queue_lock, flags);
-}
-EXPORT_SYMBOL_GPL(call_srcu);
-
-static void srcu_advance_batches(struct srcu_struct *sp, int trycount);
-static void srcu_reschedule(struct srcu_struct *sp);
-
-/*
- * Helper function for synchronize_srcu() and synchronize_srcu_expedited().
- */
-static void __synchronize_srcu(struct srcu_struct *sp, int trycount)
-{
-	struct rcu_synchronize rcu;
-	struct rcu_head *head = &rcu.head;
-	bool done = false;
-
-	RCU_LOCKDEP_WARN(lock_is_held(&sp->dep_map) ||
-			 lock_is_held(&rcu_bh_lock_map) ||
-			 lock_is_held(&rcu_lock_map) ||
-			 lock_is_held(&rcu_sched_lock_map),
-			 "Illegal synchronize_srcu() in same-type SRCU (or in RCU) read-side critical section");
-
-	might_sleep();
-	init_completion(&rcu.completion);
-
-	head->next = NULL;
-	head->func = wakeme_after_rcu;
-	spin_lock_irq(&sp->queue_lock);
-	smp_mb__after_unlock_lock(); /* Caller's prior accesses before GP. */
-	if (!sp->running) {
-		/* steal the processing owner */
-		sp->running = true;
-		rcu_batch_queue(&sp->batch_check0, head);
-		spin_unlock_irq(&sp->queue_lock);
-
-		srcu_advance_batches(sp, trycount);
-		if (!rcu_batch_empty(&sp->batch_done)) {
-			BUG_ON(sp->batch_done.head != head);
-			rcu_batch_dequeue(&sp->batch_done);
-			done = true;
-		}
-		/* give the processing owner to work_struct */
-		srcu_reschedule(sp);
-	} else {
-		rcu_batch_queue(&sp->batch_queue, head);
-		spin_unlock_irq(&sp->queue_lock);
-	}
-
-	if (!done) {
-		wait_for_completion(&rcu.completion);
-		smp_mb(); /* Caller's later accesses after GP. */
-	}
-
-}
-
-/**
- * synchronize_srcu - wait for prior SRCU read-side critical-section completion
- * @sp: srcu_struct with which to synchronize.
- *
- * Wait for the count to drain to zero of both indexes. To avoid the
- * possible starvation of synchronize_srcu(), it waits for the count of
- * the index=((->completed & 1) ^ 1) to drain to zero at first,
- * and then flip the completed and wait for the count of the other index.
- *
- * Can block; must be called from process context.
- *
- * Note that it is illegal to call synchronize_srcu() from the corresponding
- * SRCU read-side critical section; doing so will result in deadlock.
- * However, it is perfectly legal to call synchronize_srcu() on one
- * srcu_struct from some other srcu_struct's read-side critical section,
- * as long as the resulting graph of srcu_structs is acyclic.
- *
- * There are memory-ordering constraints implied by synchronize_srcu().
- * On systems with more than one CPU, when synchronize_srcu() returns,
- * each CPU is guaranteed to have executed a full memory barrier since
- * the end of its last corresponding SRCU-sched read-side critical section
- * whose beginning preceded the call to synchronize_srcu().  In addition,
- * each CPU having an SRCU read-side critical section that extends beyond
- * the return from synchronize_srcu() is guaranteed to have executed a
- * full memory barrier after the beginning of synchronize_srcu() and before
- * the beginning of that SRCU read-side critical section.  Note that these
- * guarantees include CPUs that are offline, idle, or executing in user mode,
- * as well as CPUs that are executing in the kernel.
- *
- * Furthermore, if CPU A invoked synchronize_srcu(), which returned
- * to its caller on CPU B, then both CPU A and CPU B are guaranteed
- * to have executed a full memory barrier during the execution of
- * synchronize_srcu().  This guarantee applies even if CPU A and CPU B
- * are the same CPU, but again only if the system has more than one CPU.
- *
- * Of course, these memory-ordering guarantees apply only when
- * synchronize_srcu(), srcu_read_lock(), and srcu_read_unlock() are
- * passed the same srcu_struct structure.
- */
-void synchronize_srcu(struct srcu_struct *sp)
-{
-	__synchronize_srcu(sp, (rcu_gp_is_expedited() && !rcu_gp_is_normal())
-			   ? SYNCHRONIZE_SRCU_EXP_TRYCOUNT
-			   : SYNCHRONIZE_SRCU_TRYCOUNT);
-}
-EXPORT_SYMBOL_GPL(synchronize_srcu);
-
-/**
- * synchronize_srcu_expedited - Brute-force SRCU grace period
- * @sp: srcu_struct with which to synchronize.
- *
- * Wait for an SRCU grace period to elapse, but be more aggressive about
- * spinning rather than blocking when waiting.
- *
- * Note that synchronize_srcu_expedited() has the same deadlock and
- * memory-ordering properties as does synchronize_srcu().
- */
-void synchronize_srcu_expedited(struct srcu_struct *sp)
-{
-	__synchronize_srcu(sp, SYNCHRONIZE_SRCU_EXP_TRYCOUNT);
-}
-EXPORT_SYMBOL_GPL(synchronize_srcu_expedited);
-
-/**
- * srcu_barrier - Wait until all in-flight call_srcu() callbacks complete.
- * @sp: srcu_struct on which to wait for in-flight callbacks.
- */
-void srcu_barrier(struct srcu_struct *sp)
-{
-	synchronize_srcu(sp);
-}
-EXPORT_SYMBOL_GPL(srcu_barrier);
-
-/**
- * srcu_batches_completed - return batches completed.
- * @sp: srcu_struct on which to report batch completion.
- *
- * Report the number of batches, correlated with, but not necessarily
- * precisely the same as, the number of grace periods that have elapsed.
- */
-unsigned long srcu_batches_completed(struct srcu_struct *sp)
-{
-	return sp->completed;
-}
-EXPORT_SYMBOL_GPL(srcu_batches_completed);
-
-#define SRCU_CALLBACK_BATCH	10
-#define SRCU_INTERVAL		1
-
-/*
- * Move any new SRCU callbacks to the first stage of the SRCU grace
- * period pipeline.
- */
-static void srcu_collect_new(struct srcu_struct *sp)
-{
-	if (!rcu_batch_empty(&sp->batch_queue)) {
-		spin_lock_irq(&sp->queue_lock);
-		rcu_batch_move(&sp->batch_check0, &sp->batch_queue);
-		spin_unlock_irq(&sp->queue_lock);
-	}
-}
-
-/*
- * Core SRCU state machine.  Advance callbacks from ->batch_check0 to
- * ->batch_check1 and then to ->batch_done as readers drain.
- */
-static void srcu_advance_batches(struct srcu_struct *sp, int trycount)
-{
-	int idx = 1 ^ (sp->completed & 1);
-
-	/*
-	 * Because readers might be delayed for an extended period after
-	 * fetching ->completed for their index, at any point in time there
-	 * might well be readers using both idx=0 and idx=1.  We therefore
-	 * need to wait for readers to clear from both index values before
-	 * invoking a callback.
-	 */
-
-	if (rcu_batch_empty(&sp->batch_check0) &&
-	    rcu_batch_empty(&sp->batch_check1))
-		return; /* no callbacks need to be advanced */
-
-	if (!try_check_zero(sp, idx, trycount))
-		return; /* failed to advance, will try after SRCU_INTERVAL */
-
-	/*
-	 * The callbacks in ->batch_check1 have already done with their
-	 * first zero check and flip back when they were enqueued on
-	 * ->batch_check0 in a previous invocation of srcu_advance_batches().
-	 * (Presumably try_check_zero() returned false during that
-	 * invocation, leaving the callbacks stranded on ->batch_check1.)
-	 * They are therefore ready to invoke, so move them to ->batch_done.
-	 */
-	rcu_batch_move(&sp->batch_done, &sp->batch_check1);
-
-	if (rcu_batch_empty(&sp->batch_check0))
-		return; /* no callbacks need to be advanced */
-	srcu_flip(sp);
-
-	/*
-	 * The callbacks in ->batch_check0 just finished their
-	 * first check zero and flip, so move them to ->batch_check1
-	 * for future checking on the other idx.
-	 */
-	rcu_batch_move(&sp->batch_check1, &sp->batch_check0);
-
-	/*
-	 * SRCU read-side critical sections are normally short, so check
-	 * at least twice in quick succession after a flip.
-	 */
-	trycount = trycount < 2 ? 2 : trycount;
-	if (!try_check_zero(sp, idx^1, trycount))
-		return; /* failed to advance, will try after SRCU_INTERVAL */
-
-	/*
-	 * The callbacks in ->batch_check1 have now waited for all
-	 * pre-existing readers using both idx values.  They are therefore
-	 * ready to invoke, so move them to ->batch_done.
-	 */
-	rcu_batch_move(&sp->batch_done, &sp->batch_check1);
-}
-
-/*
- * Invoke a limited number of SRCU callbacks that have passed through
- * their grace period.  If there are more to do, SRCU will reschedule
- * the workqueue.  Note that needed memory barriers have been executed
- * in this task's context by srcu_readers_active_idx_check().
- */
-static void srcu_invoke_callbacks(struct srcu_struct *sp)
-{
-	int i;
-	struct rcu_head *head;
-
-	for (i = 0; i < SRCU_CALLBACK_BATCH; i++) {
-		head = rcu_batch_dequeue(&sp->batch_done);
-		if (!head)
-			break;
-		local_bh_disable();
-		head->func(head);
-		local_bh_enable();
-	}
-}
-
-/*
- * Finished one round of SRCU grace period.  Start another if there are
- * more SRCU callbacks queued, otherwise put SRCU into not-running state.
- */
-static void srcu_reschedule(struct srcu_struct *sp)
-{
-	bool pending = true;
-
-	if (rcu_batch_empty(&sp->batch_done) &&
-	    rcu_batch_empty(&sp->batch_check1) &&
-	    rcu_batch_empty(&sp->batch_check0) &&
-	    rcu_batch_empty(&sp->batch_queue)) {
-		spin_lock_irq(&sp->queue_lock);
-		if (rcu_batch_empty(&sp->batch_done) &&
-		    rcu_batch_empty(&sp->batch_check1) &&
-		    rcu_batch_empty(&sp->batch_check0) &&
-		    rcu_batch_empty(&sp->batch_queue)) {
-			sp->running = false;
-			pending = false;
-		}
-		spin_unlock_irq(&sp->queue_lock);
-	}
-
-	if (pending)
-		queue_delayed_work(system_power_efficient_wq,
-				   &sp->work, SRCU_INTERVAL);
-}
-
-/*
- * This is the work-queue function that handles SRCU grace periods.
- */
-void process_srcu(struct work_struct *work)
-{
-	struct srcu_struct *sp;
-
-	sp = container_of(work, struct srcu_struct, work.work);
-
-	srcu_collect_new(sp);
-	srcu_advance_batches(sp, 1);
-	srcu_invoke_callbacks(sp);
-	srcu_reschedule(sp);
-}
-EXPORT_SYMBOL_GPL(process_srcu);
-
-static int __init srcu_bootup_announce(void)
-{
-	pr_info("Classic SRCU implementation.\n");
-	return 0;
-}
-early_initcall(srcu_bootup_announce);
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST
index 0c1da784b8cb..6a0b9f69faad 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST
+++ b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST
@@ -7,7 +7,6 @@ TREE06
 TREE07
 TREE08
 TREE09
-SRCU-C
 SRCU-N
 SRCU-P
 SRCU-t
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-C b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-C
deleted file mode 100644
index d4e19c087c21..000000000000
--- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-C
+++ /dev/null
@@ -1,11 +0,0 @@
-CONFIG_RCU_TRACE=n
-CONFIG_SMP=y
-CONFIG_NR_CPUS=3
-CONFIG_HOTPLUG_CPU=y
-CONFIG_RCU_EXPERT=y
-CONFIG_CLASSIC_SRCU=y
-CONFIG_PREEMPT_NONE=n
-CONFIG_PREEMPT_VOLUNTARY=n
-CONFIG_PREEMPT=y
-CONFIG_DEBUG_LOCK_ALLOC=y
-CONFIG_PROVE_LOCKING=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/SRCUCLASSIC b/tools/testing/selftests/rcutorture/configs/rcuperf/SRCUCLASSIC
deleted file mode 100644
index a1395af60ef4..000000000000
--- a/tools/testing/selftests/rcutorture/configs/rcuperf/SRCUCLASSIC
+++ /dev/null
@@ -1,16 +0,0 @@
-CONFIG_SMP=y
-CONFIG_HZ_PERIODIC=n
-CONFIG_NO_HZ_IDLE=y
-CONFIG_NO_HZ_FULL=n
-CONFIG_RCU_FAST_NO_HZ=n
-CONFIG_HOTPLUG_CPU=n
-CONFIG_SUSPEND=n
-CONFIG_HIBERNATION=n
-CONFIG_RCU_NOCB_CPU=n
-CONFIG_DEBUG_LOCK_ALLOC=n
-CONFIG_PROVE_LOCKING=n
-CONFIG_RCU_BOOST=n
-CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
-CONFIG_RCU_EXPERT=y
-CONFIG_RCU_TRACE=y
-CONFIG_CLASSIC_SRCU=y
-- 
cgit v1.2.3


From 0620fddb56dfaf0e1034eeb69d79c73b361debbf Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Thu, 8 Jun 2017 14:49:26 +0100
Subject: KEYS: sanitize key structs before freeing

While a 'struct key' itself normally does not contain sensitive
information, Documentation/security/keys.txt actually encourages this:

     "Having a payload is not required; and the payload can, in fact,
     just be a value stored in the struct key itself."

In case someone has taken this advice, or will take this advice in the
future, zero the key structure before freeing it.  We might as well, and
as a bonus this could make it a bit more difficult for an adversary to
determine which keys have recently been in use.

This is safe because the key_jar cache does not use a constructor.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: James Morris <james.l.morris@oracle.com>
---
 include/linux/key.h | 1 -
 security/keys/gc.c  | 4 +---
 2 files changed, 1 insertion(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/key.h b/include/linux/key.h
index 0c9b93b0d1f7..78e25aabedaf 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -173,7 +173,6 @@ struct key {
 #ifdef KEY_DEBUGGING
 	unsigned		magic;
 #define KEY_DEBUG_MAGIC		0x18273645u
-#define KEY_DEBUG_MAGIC_X	0xf8e9dacbu
 #endif
 
 	unsigned long		flags;		/* status flags (change with bitops) */
diff --git a/security/keys/gc.c b/security/keys/gc.c
index 595becc6d0d2..87cb260e4890 100644
--- a/security/keys/gc.c
+++ b/security/keys/gc.c
@@ -158,9 +158,7 @@ static noinline void key_gc_unused_keys(struct list_head *keys)
 
 		kfree(key->description);
 
-#ifdef KEY_DEBUGGING
-		key->magic = KEY_DEBUG_MAGIC_X;
-#endif
+		memzero_explicit(key, sizeof(*key));
 		kmem_cache_free(key_jar, key);
 	}
 }
-- 
cgit v1.2.3


From ecbaa83ee84cdf592c2891ca4c205b23d6b79a6f Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Thu, 8 Jun 2017 10:12:42 +0200
Subject: driver core: remove class_attrs from struct class

This field is no longer used or needed (use class_groups instead), so it
can be removed along with the driver core functionality that created and
removed these files.

Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/class.c   | 33 ---------------------------------
 include/linux/device.h |  2 --
 2 files changed, 35 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/class.c b/drivers/base/class.c
index a2b2896693d6..52eb8e644acd 100644
--- a/drivers/base/class.c
+++ b/drivers/base/class.c
@@ -119,36 +119,6 @@ static void class_put(struct class *cls)
 		kset_put(&cls->p->subsys);
 }
 
-static int add_class_attrs(struct class *cls)
-{
-	int i;
-	int error = 0;
-
-	if (cls->class_attrs) {
-		for (i = 0; cls->class_attrs[i].attr.name; i++) {
-			error = class_create_file(cls, &cls->class_attrs[i]);
-			if (error)
-				goto error;
-		}
-	}
-done:
-	return error;
-error:
-	while (--i >= 0)
-		class_remove_file(cls, &cls->class_attrs[i]);
-	goto done;
-}
-
-static void remove_class_attrs(struct class *cls)
-{
-	int i;
-
-	if (cls->class_attrs) {
-		for (i = 0; cls->class_attrs[i].attr.name; i++)
-			class_remove_file(cls, &cls->class_attrs[i]);
-	}
-}
-
 static void klist_class_dev_get(struct klist_node *n)
 {
 	struct device *dev = container_of(n, struct device, knode_class);
@@ -217,8 +187,6 @@ int __class_register(struct class *cls, struct lock_class_key *key)
 	}
 	error = class_add_groups(class_get(cls), cls->class_groups);
 	class_put(cls);
-	error = add_class_attrs(class_get(cls));
-	class_put(cls);
 	return error;
 }
 EXPORT_SYMBOL_GPL(__class_register);
@@ -226,7 +194,6 @@ EXPORT_SYMBOL_GPL(__class_register);
 void class_unregister(struct class *cls)
 {
 	pr_debug("device class '%s': unregistering\n", cls->name);
-	remove_class_attrs(cls);
 	class_remove_groups(cls, cls->class_groups);
 	kset_unregister(&cls->p->subsys);
 }
diff --git a/include/linux/device.h b/include/linux/device.h
index 9a902ae33932..5b725b943cf2 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -365,7 +365,6 @@ int subsys_virtual_register(struct bus_type *subsys,
  * struct class - device classes
  * @name:	Name of the class.
  * @owner:	The module owner.
- * @class_attrs: Default attributes of this class.
  * @class_groups: Default attributes of this class.
  * @dev_groups:	Default attributes of the devices that belong to the class.
  * @dev_kobj:	The kobject that represents this class and links it into the hierarchy.
@@ -394,7 +393,6 @@ struct class {
 	const char		*name;
 	struct module		*owner;
 
-	struct class_attribute		*class_attrs;
 	const struct attribute_group	**class_groups;
 	const struct attribute_group	**dev_groups;
 	struct kobject			*dev_kobj;
-- 
cgit v1.2.3


From 9f4ac349bd60ef463450a00aa5e19c67f5ad12e2 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Tue, 6 Jun 2017 14:17:02 +0200
Subject: sh: superhyway: use dev_groups and not dev_attrs for bus_type

The dev_attrs field has long been "depreciated" and is finally being
removed, so move the driver to use the "correct" dev_groups field
instead for struct bus_type.

Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Cc: Rich Felker <dalias@libc.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: <linux-sh@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/sh/superhyway/superhyway-sysfs.c | 29 +++++++++++++++++++----------
 drivers/sh/superhyway/superhyway.c       |  2 +-
 include/linux/superhyway.h               |  2 +-
 3 files changed, 21 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/sh/superhyway/superhyway-sysfs.c b/drivers/sh/superhyway/superhyway-sysfs.c
index 55434330867b..774f31b564f8 100644
--- a/drivers/sh/superhyway/superhyway-sysfs.c
+++ b/drivers/sh/superhyway/superhyway-sysfs.c
@@ -19,7 +19,8 @@ static ssize_t name##_show(struct device *dev, struct device_attribute *attr, ch
 {									\
 	struct superhyway_device *s = to_superhyway_device(dev);	\
 	return sprintf(buf, fmt, s->field);				\
-}
+}									\
+static DEVICE_ATTR_RO(name);
 
 /* VCR flags */
 superhyway_ro_attr(perr_flags, "0x%02x\n", vcr.perr_flags);
@@ -32,14 +33,22 @@ superhyway_ro_attr(top_mb, "0x%02x\n", vcr.top_mb);
 /* Misc */
 superhyway_ro_attr(resource, "0x%08lx\n", resource[0].start);
 
-struct device_attribute superhyway_dev_attrs[] = {
-	__ATTR_RO(perr_flags),
-	__ATTR_RO(merr_flags),
-	__ATTR_RO(mod_vers),
-	__ATTR_RO(mod_id),
-	__ATTR_RO(bot_mb),
-	__ATTR_RO(top_mb),
-	__ATTR_RO(resource),
-	__ATTR_NULL,
+static struct attribute *superhyway_dev_attrs[] = {
+	&dev_attr_perr_flags.attr,
+	&dev_attr_merr_flags.attr,
+	&dev_attr_mod_vers.attr,
+	&dev_attr_mod_id.attr,
+	&dev_attr_bot_mb.attr,
+	&dev_attr_top_mb.attr,
+	&dev_attr_resource.attr,
+	NULL,
 };
 
+static const struct attribute_group superhyway_dev_group = {
+	.attrs = superhyway_dev_attrs,
+};
+
+const struct attribute_group *superhyway_dev_groups[] = {
+	&superhyway_dev_group,
+	NULL,
+};
diff --git a/drivers/sh/superhyway/superhyway.c b/drivers/sh/superhyway/superhyway.c
index bb1fb7712134..348836b90605 100644
--- a/drivers/sh/superhyway/superhyway.c
+++ b/drivers/sh/superhyway/superhyway.c
@@ -209,7 +209,7 @@ struct bus_type superhyway_bus_type = {
 	.name		= "superhyway",
 	.match		= superhyway_bus_match,
 #ifdef CONFIG_SYSFS
-	.dev_attrs	= superhyway_dev_attrs,
+	.dev_groups	= superhyway_dev_groups,
 #endif
 	.probe		= superhyway_device_probe,
 	.remove		= superhyway_device_remove,
diff --git a/include/linux/superhyway.h b/include/linux/superhyway.h
index 17ea468fa362..8d3376775813 100644
--- a/include/linux/superhyway.h
+++ b/include/linux/superhyway.h
@@ -101,7 +101,7 @@ int superhyway_add_device(unsigned long base, struct superhyway_device *, struct
 int superhyway_add_devices(struct superhyway_bus *bus, struct superhyway_device **devices, int nr_devices);
 
 /* drivers/sh/superhyway/superhyway-sysfs.c */
-extern struct device_attribute superhyway_dev_attrs[];
+extern const struct attribute_group *superhyway_dev_groups[];
 
 #endif /* __LINUX_SUPERHYWAY_H */
 
-- 
cgit v1.2.3


From f42fe520e449bda213f035478ddc9cf99e9082ac Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@linaro.org>
Date: Mon, 5 Jun 2017 14:15:06 -0600
Subject: coresight: use const for device_node structures

Almost low level functions from open firmware have used const to
qualify device_node structures, so add const for device_node
parameters in of_coresight related functions.

Signed-off-by: Leo Yan <leo.yan@linaro.org>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hwtracing/coresight/of_coresight.c | 7 ++++---
 include/linux/coresight.h                  | 7 ++++---
 2 files changed, 8 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hwtracing/coresight/of_coresight.c b/drivers/hwtracing/coresight/of_coresight.c
index 09142e99e915..2749853b9010 100644
--- a/drivers/hwtracing/coresight/of_coresight.c
+++ b/drivers/hwtracing/coresight/of_coresight.c
@@ -52,7 +52,7 @@ of_coresight_get_endpoint_device(struct device_node *endpoint)
 			       endpoint, of_dev_node_match);
 }
 
-static void of_coresight_get_ports(struct device_node *node,
+static void of_coresight_get_ports(const struct device_node *node,
 				   int *nr_inport, int *nr_outport)
 {
 	struct device_node *ep = NULL;
@@ -101,8 +101,9 @@ static int of_coresight_alloc_memory(struct device *dev,
 	return 0;
 }
 
-struct coresight_platform_data *of_get_coresight_platform_data(
-				struct device *dev, struct device_node *node)
+struct coresight_platform_data *
+of_get_coresight_platform_data(struct device *dev,
+			       const struct device_node *node)
 {
 	int i = 0, ret = 0, cpu;
 	struct coresight_platform_data *pdata;
diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index 035c16c9a505..bf0aa50880be 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -263,11 +263,12 @@ static inline int coresight_timeout(void __iomem *addr, u32 offset,
 #endif
 
 #ifdef CONFIG_OF
-extern struct coresight_platform_data *of_get_coresight_platform_data(
-				struct device *dev, struct device_node *node);
+extern struct coresight_platform_data *
+of_get_coresight_platform_data(struct device *dev,
+			       const struct device_node *node);
 #else
 static inline struct coresight_platform_data *of_get_coresight_platform_data(
-	struct device *dev, struct device_node *node) { return NULL; }
+	struct device *dev, const struct device_node *node) { return NULL; }
 #endif
 
 #ifdef CONFIG_PID_NS
-- 
cgit v1.2.3


From c56cdd7a5c836db7834256f09692112afee9eb3f Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@linaro.org>
Date: Mon, 5 Jun 2017 14:15:15 -0600
Subject: coresight: refactor with function of_coresight_get_cpu

This is refactor to add function of_coresight_get_cpu(), so it's used to
retrieve CPU id for coresight component. Finally can use it as a common
function for multiple places.

Suggested-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: Leo Yan <leo.yan@linaro.org>
Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hwtracing/coresight/of_coresight.c | 43 +++++++++++++++++++-----------
 include/linux/coresight.h                  |  3 +++
 2 files changed, 31 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hwtracing/coresight/of_coresight.c b/drivers/hwtracing/coresight/of_coresight.c
index 225b2dd5970c..a18794128bf8 100644
--- a/drivers/hwtracing/coresight/of_coresight.c
+++ b/drivers/hwtracing/coresight/of_coresight.c
@@ -101,16 +101,40 @@ static int of_coresight_alloc_memory(struct device *dev,
 	return 0;
 }
 
+int of_coresight_get_cpu(const struct device_node *node)
+{
+	int cpu;
+	bool found;
+	struct device_node *dn, *np;
+
+	dn = of_parse_phandle(node, "cpu", 0);
+
+	/* Affinity defaults to CPU0 */
+	if (!dn)
+		return 0;
+
+	for_each_possible_cpu(cpu) {
+		np = of_cpu_device_node_get(cpu);
+		found = (dn == np);
+		of_node_put(np);
+		if (found)
+			break;
+	}
+	of_node_put(dn);
+
+	/* Affinity to CPU0 if no cpu nodes are found */
+	return found ? cpu : 0;
+}
+EXPORT_SYMBOL_GPL(of_coresight_get_cpu);
+
 struct coresight_platform_data *
 of_get_coresight_platform_data(struct device *dev,
 			       const struct device_node *node)
 {
-	int i = 0, ret = 0, cpu;
+	int i = 0, ret = 0;
 	struct coresight_platform_data *pdata;
 	struct of_endpoint endpoint, rendpoint;
 	struct device *rdev;
-	bool found;
-	struct device_node *dn, *np;
 	struct device_node *ep = NULL;
 	struct device_node *rparent = NULL;
 	struct device_node *rport = NULL;
@@ -177,18 +201,7 @@ of_get_coresight_platform_data(struct device *dev,
 		} while (ep);
 	}
 
-	dn = of_parse_phandle(node, "cpu", 0);
-	for_each_possible_cpu(cpu) {
-		np = of_cpu_device_node_get(cpu);
-		found = (dn == np);
-		of_node_put(np);
-		if (found)
-			break;
-	}
-	of_node_put(dn);
-
-	/* Affinity to CPU0 if no cpu nodes are found */
-	pdata->cpu = found ? cpu : 0;
+	pdata->cpu = of_coresight_get_cpu(node);
 
 	return pdata;
 }
diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index bf0aa50880be..d950dad5056a 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -263,10 +263,13 @@ static inline int coresight_timeout(void __iomem *addr, u32 offset,
 #endif
 
 #ifdef CONFIG_OF
+extern int of_coresight_get_cpu(const struct device_node *node);
 extern struct coresight_platform_data *
 of_get_coresight_platform_data(struct device *dev,
 			       const struct device_node *node);
 #else
+static inline int of_coresight_get_cpu(const struct device_node *node)
+{ return 0; }
 static inline struct coresight_platform_data *of_get_coresight_platform_data(
 	struct device *dev, const struct device_node *node) { return NULL; }
 #endif
-- 
cgit v1.2.3


From 0cbaa44841db3d06d2a21ae4ab679882033f3dbe Mon Sep 17 00:00:00 2001
From: Jeremy Kerr <jk@ozlabs.org>
Date: Tue, 6 Jun 2017 16:08:39 -0500
Subject: lib: Add crc4 module

Add a little helper for crc4 calculations. This works 4-bits-at-a-time,
using a simple table approach.

We will need this in the FSI core code, as well as any master
implementations that need to calculate CRCs in software.

Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Chris Bostic <cbostic@linux.vnet.ibm.com>
Signed-off-by: Joel Stanley <joel@jms.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/crc4.h |  8 ++++++++
 lib/Kconfig          |  8 ++++++++
 lib/Makefile         |  1 +
 lib/crc4.c           | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 63 insertions(+)
 create mode 100644 include/linux/crc4.h
 create mode 100644 lib/crc4.c

(limited to 'include/linux')

diff --git a/include/linux/crc4.h b/include/linux/crc4.h
new file mode 100644
index 000000000000..8f739f1d794f
--- /dev/null
+++ b/include/linux/crc4.h
@@ -0,0 +1,8 @@
+#ifndef _LINUX_CRC4_H
+#define _LINUX_CRC4_H
+
+#include <linux/types.h>
+
+extern uint8_t crc4(uint8_t c, uint64_t x, int bits);
+
+#endif /* _LINUX_CRC4_H */
diff --git a/lib/Kconfig b/lib/Kconfig
index 0c8b78a9ae2e..d2fd2623721e 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -158,6 +158,14 @@ config CRC32_BIT
 
 endchoice
 
+config CRC4
+	tristate "CRC4 functions"
+	help
+	  This option is provided for the case where no in-kernel-tree
+	  modules require CRC4 functions, but a module built outside
+	  the kernel tree does. Such modules that use library CRC4
+	  functions require M here.
+
 config CRC7
 	tristate "CRC7 functions"
 	help
diff --git a/lib/Makefile b/lib/Makefile
index 0166fbc0fa81..90cf124e5feb 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -99,6 +99,7 @@ obj-$(CONFIG_CRC_T10DIF)+= crc-t10dif.o
 obj-$(CONFIG_CRC_ITU_T)	+= crc-itu-t.o
 obj-$(CONFIG_CRC32)	+= crc32.o
 obj-$(CONFIG_CRC32_SELFTEST)	+= crc32test.o
+obj-$(CONFIG_CRC4)	+= crc4.o
 obj-$(CONFIG_CRC7)	+= crc7.o
 obj-$(CONFIG_LIBCRC32C)	+= libcrc32c.o
 obj-$(CONFIG_CRC8)	+= crc8.o
diff --git a/lib/crc4.c b/lib/crc4.c
new file mode 100644
index 000000000000..cf6db46661be
--- /dev/null
+++ b/lib/crc4.c
@@ -0,0 +1,46 @@
+/*
+ * crc4.c - simple crc-4 calculations.
+ *
+ * This source code is licensed under the GNU General Public License, Version
+ * 2. See the file COPYING for more details.
+ */
+
+#include <linux/crc4.h>
+#include <linux/module.h>
+
+static const uint8_t crc4_tab[] = {
+	0x0, 0x7, 0xe, 0x9, 0xb, 0xc, 0x5, 0x2,
+	0x1, 0x6, 0xf, 0x8, 0xa, 0xd, 0x4, 0x3,
+};
+
+/**
+ * crc4 - calculate the 4-bit crc of a value.
+ * @crc:  starting crc4
+ * @x:    value to checksum
+ * @bits: number of bits in @x to checksum
+ *
+ * Returns the crc4 value of @x, using polynomial 0b10111.
+ *
+ * The @x value is treated as left-aligned, and bits above @bits are ignored
+ * in the crc calculations.
+ */
+uint8_t crc4(uint8_t c, uint64_t x, int bits)
+{
+	int i;
+
+	/* mask off anything above the top bit */
+	x &= (1ull << bits) - 1;
+
+	/* Align to 4-bits */
+	bits = (bits + 3) & ~0x3;
+
+	/* Calculate crc4 over four-bit nibbles, starting at the MSbit */
+	for (i = bits - 4; i >= 0; i -= 4)
+		c = crc4_tab[c ^ ((x >> i) & 0xf)];
+
+	return c;
+}
+EXPORT_SYMBOL_GPL(crc4);
+
+MODULE_DESCRIPTION("CRC4 calculations");
+MODULE_LICENSE("GPL");
-- 
cgit v1.2.3


From f7ade2a603cfd205a6d7afb9d96ac7975f666dd6 Mon Sep 17 00:00:00 2001
From: Jeremy Kerr <jk@ozlabs.org>
Date: Tue, 6 Jun 2017 16:08:44 -0500
Subject: drivers/fsi: scan slaves & register devices

Now that we have fsi_slave devices, scan each for endpoints, and
register them on the fsi bus.

Includes contributions from Christopher Bostic
<cbostic@linux.vnet.ibm.com>.

Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Christopher Bostic <cbostic@linux.vnet.ibm.com>
Signed-off-by: Joel Stanley <joel@jms.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/fsi/fsi-core.c | 128 ++++++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/fsi.h    |   4 ++
 2 files changed, 131 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/fsi/fsi-core.c b/drivers/fsi/fsi-core.c
index 04795f2e591e..eac0bc4f71e9 100644
--- a/drivers/fsi/fsi-core.c
+++ b/drivers/fsi/fsi-core.c
@@ -19,9 +19,23 @@
 #include <linux/idr.h>
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/bitops.h>
 
 #include "fsi-master.h"
 
+#define FSI_SLAVE_CONF_NEXT_MASK	GENMASK(31, 31)
+#define FSI_SLAVE_CONF_SLOTS_MASK	GENMASK(23, 16)
+#define FSI_SLAVE_CONF_SLOTS_SHIFT	16
+#define FSI_SLAVE_CONF_VERSION_MASK	GENMASK(15, 12)
+#define FSI_SLAVE_CONF_VERSION_SHIFT	12
+#define FSI_SLAVE_CONF_TYPE_MASK	GENMASK(11, 4)
+#define FSI_SLAVE_CONF_TYPE_SHIFT	4
+#define FSI_SLAVE_CONF_CRC_SHIFT	4
+#define FSI_SLAVE_CONF_CRC_MASK		GENMASK(3, 0)
+#define FSI_SLAVE_CONF_DATA_BITS	28
+
+static const int engine_page_size = 0x400;
+
 #define FSI_SLAVE_BASE			0x800
 
 /*
@@ -62,6 +76,30 @@ static int fsi_master_read(struct fsi_master *master, int link,
 static int fsi_master_write(struct fsi_master *master, int link,
 		uint8_t slave_id, uint32_t addr, const void *val, size_t size);
 
+/* FSI endpoint-device support */
+
+static void fsi_device_release(struct device *_device)
+{
+	struct fsi_device *device = to_fsi_dev(_device);
+
+	kfree(device);
+}
+
+static struct fsi_device *fsi_create_device(struct fsi_slave *slave)
+{
+	struct fsi_device *dev;
+
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	if (!dev)
+		return NULL;
+
+	dev->dev.parent = &slave->dev;
+	dev->dev.bus = &fsi_bus_type;
+	dev->dev.release = fsi_device_release;
+
+	return dev;
+}
+
 /* FSI slave support */
 static int fsi_slave_calc_addr(struct fsi_slave *slave, uint32_t *addrp,
 		uint8_t *idp)
@@ -115,6 +153,91 @@ static int fsi_slave_write(struct fsi_slave *slave, uint32_t addr,
 			addr, val, size);
 }
 
+static int fsi_slave_scan(struct fsi_slave *slave)
+{
+	uint32_t engine_addr;
+	uint32_t conf;
+	int rc, i;
+
+	/*
+	 * scan engines
+	 *
+	 * We keep the peek mode and slave engines for the core; so start
+	 * at the third slot in the configuration table. We also need to
+	 * skip the chip ID entry at the start of the address space.
+	 */
+	engine_addr = engine_page_size * 3;
+	for (i = 2; i < engine_page_size / sizeof(uint32_t); i++) {
+		uint8_t slots, version, type, crc;
+		struct fsi_device *dev;
+
+		rc = fsi_slave_read(slave, (i + 1) * sizeof(conf),
+				&conf, sizeof(conf));
+		if (rc) {
+			dev_warn(&slave->dev,
+				"error reading slave registers\n");
+			return -1;
+		}
+		conf = be32_to_cpu(conf);
+
+		crc = crc4(0, conf, 32);
+		if (crc) {
+			dev_warn(&slave->dev,
+				"crc error in slave register at 0x%04x\n",
+				i);
+			return -1;
+		}
+
+		slots = (conf & FSI_SLAVE_CONF_SLOTS_MASK)
+			>> FSI_SLAVE_CONF_SLOTS_SHIFT;
+		version = (conf & FSI_SLAVE_CONF_VERSION_MASK)
+			>> FSI_SLAVE_CONF_VERSION_SHIFT;
+		type = (conf & FSI_SLAVE_CONF_TYPE_MASK)
+			>> FSI_SLAVE_CONF_TYPE_SHIFT;
+
+		/*
+		 * Unused address areas are marked by a zero type value; this
+		 * skips the defined address areas
+		 */
+		if (type != 0 && slots != 0) {
+
+			/* create device */
+			dev = fsi_create_device(slave);
+			if (!dev)
+				return -ENOMEM;
+
+			dev->slave = slave;
+			dev->engine_type = type;
+			dev->version = version;
+			dev->unit = i;
+			dev->addr = engine_addr;
+			dev->size = slots * engine_page_size;
+
+			dev_dbg(&slave->dev,
+			"engine[%i]: type %x, version %x, addr %x size %x\n",
+					dev->unit, dev->engine_type, version,
+					dev->addr, dev->size);
+
+			dev_set_name(&dev->dev, "%02x:%02x:%02x:%02x",
+					slave->master->idx, slave->link,
+					slave->id, i - 2);
+
+			rc = device_register(&dev->dev);
+			if (rc) {
+				dev_warn(&slave->dev, "add failed: %d\n", rc);
+				put_device(&dev->dev);
+			}
+		}
+
+		engine_addr += slots * engine_page_size;
+
+		if (!(conf & FSI_SLAVE_CONF_NEXT_MASK))
+			break;
+	}
+
+	return 0;
+}
+
 /* Encode slave local bus echo delay */
 static inline uint32_t fsi_smode_echodly(int x)
 {
@@ -230,7 +353,10 @@ static int fsi_slave_init(struct fsi_master *master, int link, uint8_t id)
 		return rc;
 	}
 
-	/* todo: perform engine scan */
+	rc = fsi_slave_scan(slave);
+	if (rc)
+		dev_dbg(&master->dev, "failed during slave scan with: %d\n",
+				rc);
 
 	return rc;
 }
diff --git a/include/linux/fsi.h b/include/linux/fsi.h
index 273cbf6400ea..efa55ba6cb39 100644
--- a/include/linux/fsi.h
+++ b/include/linux/fsi.h
@@ -21,6 +21,10 @@ struct fsi_device {
 	struct device		dev;
 	u8			engine_type;
 	u8			version;
+	u8			unit;
+	struct fsi_slave	*slave;
+	uint32_t		addr;
+	uint32_t		size;
 };
 
 struct fsi_device_id {
-- 
cgit v1.2.3


From 4efe37f4c4efcb73562e4634cb6c262b08ab6451 Mon Sep 17 00:00:00 2001
From: Jeremy Kerr <jk@ozlabs.org>
Date: Tue, 6 Jun 2017 16:08:45 -0500
Subject: drivers/fsi: Add device read/write/peek API

This change introduces the fsi device API: simple read, write and peek
accessors for the devices' address spaces.

Includes contributions from Christopher Bostic
<cbostic@linux.vnet.ibm.com> and Edward A. James <eajames@us.ibm.com>.

Signed-off-by: Edward A. James <eajames@us.ibm.com>
Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Christopher Bostic <cbostic@linux.vnet.ibm.com>
Signed-off-by: Joel Stanley <joel@jms.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/fsi/fsi-core.c | 50 +++++++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/fsi.h    |  7 ++++++-
 2 files changed, 55 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/fsi/fsi-core.c b/drivers/fsi/fsi-core.c
index eac0bc4f71e9..d7a6e762527f 100644
--- a/drivers/fsi/fsi-core.c
+++ b/drivers/fsi/fsi-core.c
@@ -34,6 +34,8 @@
 #define FSI_SLAVE_CONF_CRC_MASK		GENMASK(3, 0)
 #define FSI_SLAVE_CONF_DATA_BITS	28
 
+#define FSI_PEEK_BASE			0x410
+
 static const int engine_page_size = 0x400;
 
 #define FSI_SLAVE_BASE			0x800
@@ -75,8 +77,54 @@ static int fsi_master_read(struct fsi_master *master, int link,
 		uint8_t slave_id, uint32_t addr, void *val, size_t size);
 static int fsi_master_write(struct fsi_master *master, int link,
 		uint8_t slave_id, uint32_t addr, const void *val, size_t size);
+static int fsi_slave_read(struct fsi_slave *slave, uint32_t addr,
+		void *val, size_t size);
+static int fsi_slave_write(struct fsi_slave *slave, uint32_t addr,
+		const void *val, size_t size);
+
+/*
+ * fsi_device_read() / fsi_device_write() / fsi_device_peek()
+ *
+ * FSI endpoint-device support
+ *
+ * Read / write / peek accessors for a client
+ *
+ * Parameters:
+ * dev:  Structure passed to FSI client device drivers on probe().
+ * addr: FSI address of given device.  Client should pass in its base address
+ *       plus desired offset to access its register space.
+ * val:  For read/peek this is the value read at the specified address. For
+ *       write this is value to write to the specified address.
+ *       The data in val must be FSI bus endian (big endian).
+ * size: Size in bytes of the operation.  Sizes supported are 1, 2 and 4 bytes.
+ *       Addresses must be aligned on size boundaries or an error will result.
+ */
+int fsi_device_read(struct fsi_device *dev, uint32_t addr, void *val,
+		size_t size)
+{
+	if (addr > dev->size || size > dev->size || addr > dev->size - size)
+		return -EINVAL;
 
-/* FSI endpoint-device support */
+	return fsi_slave_read(dev->slave, dev->addr + addr, val, size);
+}
+EXPORT_SYMBOL_GPL(fsi_device_read);
+
+int fsi_device_write(struct fsi_device *dev, uint32_t addr, const void *val,
+		size_t size)
+{
+	if (addr > dev->size || size > dev->size || addr > dev->size - size)
+		return -EINVAL;
+
+	return fsi_slave_write(dev->slave, dev->addr + addr, val, size);
+}
+EXPORT_SYMBOL_GPL(fsi_device_write);
+
+int fsi_device_peek(struct fsi_device *dev, void *val)
+{
+	uint32_t addr = FSI_PEEK_BASE + ((dev->unit - 2) * sizeof(uint32_t));
+
+	return fsi_slave_read(dev->slave, addr, val, sizeof(uint32_t));
+}
 
 static void fsi_device_release(struct device *_device)
 {
diff --git a/include/linux/fsi.h b/include/linux/fsi.h
index efa55ba6cb39..66bce4851ff6 100644
--- a/include/linux/fsi.h
+++ b/include/linux/fsi.h
@@ -27,6 +27,12 @@ struct fsi_device {
 	uint32_t		size;
 };
 
+extern int fsi_device_read(struct fsi_device *dev, uint32_t addr,
+		void *val, size_t size);
+extern int fsi_device_write(struct fsi_device *dev, uint32_t addr,
+		const void *val, size_t size);
+extern int fsi_device_peek(struct fsi_device *dev, void *val);
+
 struct fsi_device_id {
 	u8	engine_type;
 	u8	version;
@@ -40,7 +46,6 @@ struct fsi_device_id {
 #define FSI_DEVICE_VERSIONED(t, v) \
 	.engine_type = (t), .version = (v),
 
-
 struct fsi_driver {
 	struct device_driver		drv;
 	const struct fsi_device_id	*id_table;
-- 
cgit v1.2.3


From 356d8009a5a4569f17a3508b50a347bdf4d5b337 Mon Sep 17 00:00:00 2001
From: Christopher Bostic <cbostic@linux.vnet.ibm.com>
Date: Tue, 6 Jun 2017 16:08:48 -0500
Subject: drivers/fsi: Add client driver register utilities

Add driver_register and driver_unregister wrappers for FSI.

Signed-off-by: Christopher Bostic <cbostic@linux.vnet.ibm.com>
Signed-off-by: Joel Stanley <joel@jms.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/fsi/fsi-core.c | 17 +++++++++++++++++
 include/linux/fsi.h    | 12 ++++++++++++
 2 files changed, 29 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/fsi/fsi-core.c b/drivers/fsi/fsi-core.c
index fcb0c818524f..e9fbd9feeb3e 100644
--- a/drivers/fsi/fsi-core.c
+++ b/drivers/fsi/fsi-core.c
@@ -586,6 +586,23 @@ static int fsi_bus_match(struct device *dev, struct device_driver *drv)
 	return 0;
 }
 
+int fsi_driver_register(struct fsi_driver *fsi_drv)
+{
+	if (!fsi_drv)
+		return -EINVAL;
+	if (!fsi_drv->id_table)
+		return -EINVAL;
+
+	return driver_register(&fsi_drv->drv);
+}
+EXPORT_SYMBOL_GPL(fsi_driver_register);
+
+void fsi_driver_unregister(struct fsi_driver *fsi_drv)
+{
+	driver_unregister(&fsi_drv->drv);
+}
+EXPORT_SYMBOL_GPL(fsi_driver_unregister);
+
 struct bus_type fsi_bus_type = {
 	.name		= "fsi",
 	.match		= fsi_bus_match,
diff --git a/include/linux/fsi.h b/include/linux/fsi.h
index 66bce4851ff6..34f1e9aea725 100644
--- a/include/linux/fsi.h
+++ b/include/linux/fsi.h
@@ -54,6 +54,18 @@ struct fsi_driver {
 #define to_fsi_dev(devp) container_of(devp, struct fsi_device, dev)
 #define to_fsi_drv(drvp) container_of(drvp, struct fsi_driver, drv)
 
+extern int fsi_driver_register(struct fsi_driver *fsi_drv);
+extern void fsi_driver_unregister(struct fsi_driver *fsi_drv);
+
+/* module_fsi_driver() - Helper macro for drivers that don't do
+ * anything special in module init/exit.  This eliminates a lot of
+ * boilerplate.  Each module may only use this macro once, and
+ * calling it replaces module_init() and module_exit()
+ */
+#define module_fsi_driver(__fsi_driver) \
+		module_driver(__fsi_driver, fsi_driver_register, \
+				fsi_driver_unregister)
+
 extern struct bus_type fsi_bus_type;
 
 #endif /* LINUX_FSI_H */
-- 
cgit v1.2.3


From da36cadf89a75a730302a4df114cb930b1becc39 Mon Sep 17 00:00:00 2001
From: Jeremy Kerr <jk@ozlabs.org>
Date: Tue, 6 Jun 2017 16:08:50 -0500
Subject: drivers/fsi: expose direct-access slave API

Allow drivers to access the slave address ranges.

Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Joel Stanley <joel@jms.id.au>
Signed-off-by: Christopher Bostic <cbostic@linux.vnet.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/fsi/fsi-core.c | 30 ++++++++++++++++++++++++------
 include/linux/fsi.h    | 12 ++++++++++++
 2 files changed, 36 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/fsi/fsi-core.c b/drivers/fsi/fsi-core.c
index 626cc0672552..36813651ac0a 100644
--- a/drivers/fsi/fsi-core.c
+++ b/drivers/fsi/fsi-core.c
@@ -78,10 +78,6 @@ static int fsi_master_read(struct fsi_master *master, int link,
 		uint8_t slave_id, uint32_t addr, void *val, size_t size);
 static int fsi_master_write(struct fsi_master *master, int link,
 		uint8_t slave_id, uint32_t addr, const void *val, size_t size);
-static int fsi_slave_read(struct fsi_slave *slave, uint32_t addr,
-		void *val, size_t size);
-static int fsi_slave_write(struct fsi_slave *slave, uint32_t addr,
-		const void *val, size_t size);
 
 /*
  * fsi_device_read() / fsi_device_write() / fsi_device_peek()
@@ -174,7 +170,7 @@ static int fsi_slave_calc_addr(struct fsi_slave *slave, uint32_t *addrp,
 	return 0;
 }
 
-static int fsi_slave_read(struct fsi_slave *slave, uint32_t addr,
+int fsi_slave_read(struct fsi_slave *slave, uint32_t addr,
 			void *val, size_t size)
 {
 	uint8_t id = slave->id;
@@ -187,8 +183,9 @@ static int fsi_slave_read(struct fsi_slave *slave, uint32_t addr,
 	return fsi_master_read(slave->master, slave->link, id,
 			addr, val, size);
 }
+EXPORT_SYMBOL_GPL(fsi_slave_read);
 
-static int fsi_slave_write(struct fsi_slave *slave, uint32_t addr,
+int fsi_slave_write(struct fsi_slave *slave, uint32_t addr,
 			const void *val, size_t size)
 {
 	uint8_t id = slave->id;
@@ -201,6 +198,27 @@ static int fsi_slave_write(struct fsi_slave *slave, uint32_t addr,
 	return fsi_master_write(slave->master, slave->link, id,
 			addr, val, size);
 }
+EXPORT_SYMBOL_GPL(fsi_slave_write);
+
+extern int fsi_slave_claim_range(struct fsi_slave *slave,
+		uint32_t addr, uint32_t size)
+{
+	if (addr + size < addr)
+		return -EINVAL;
+
+	if (addr + size > slave->size)
+		return -EINVAL;
+
+	/* todo: check for overlapping claims */
+	return 0;
+}
+EXPORT_SYMBOL_GPL(fsi_slave_claim_range);
+
+extern void fsi_slave_release_range(struct fsi_slave *slave,
+		uint32_t addr, uint32_t size)
+{
+}
+EXPORT_SYMBOL_GPL(fsi_slave_release_range);
 
 static int fsi_slave_scan(struct fsi_slave *slave)
 {
diff --git a/include/linux/fsi.h b/include/linux/fsi.h
index 34f1e9aea725..141fd38d061f 100644
--- a/include/linux/fsi.h
+++ b/include/linux/fsi.h
@@ -66,6 +66,18 @@ extern void fsi_driver_unregister(struct fsi_driver *fsi_drv);
 		module_driver(__fsi_driver, fsi_driver_register, \
 				fsi_driver_unregister)
 
+/* direct slave API */
+extern int fsi_slave_claim_range(struct fsi_slave *slave,
+		uint32_t addr, uint32_t size);
+extern void fsi_slave_release_range(struct fsi_slave *slave,
+		uint32_t addr, uint32_t size);
+extern int fsi_slave_read(struct fsi_slave *slave, uint32_t addr,
+		void *val, size_t size);
+extern int fsi_slave_write(struct fsi_slave *slave, uint32_t addr,
+		const void *val, size_t size);
+
+
+
 extern struct bus_type fsi_bus_type;
 
 #endif /* LINUX_FSI_H */
-- 
cgit v1.2.3


From de0d6dbdbdb23ddb85f10d54a516e794f9a873e0 Mon Sep 17 00:00:00 2001
From: "Andrew F. Davis" <afd@ti.com>
Date: Mon, 5 Jun 2017 08:52:08 -0500
Subject: w1: Add subsystem kernel public interface

Like other subsystems we should be able to define slave devices outside
of the w1 directory. To do this we move public facing interface
definitions to include/linux/w1.h and rename the internal definition
file to w1_internal.h.

As w1_family.h and w1_int.h contained almost entirely public
driver interface definitions we simply removed these files and
moved the remaining definitions into w1_internal.h.

With this we can now start to move slave devices out of w1/slaves and
into the subsystem based on the function they implement, again like
other drivers.

Signed-off-by: Andrew F. Davis <afd@ti.com>
Reviewed-by: Sebastian Reichel <sre@kernel.org>
Acked-by: Evgeniy Polyakov <zbr@ioremap.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 MAINTAINERS                           |   1 +
 drivers/power/supply/ds2760_battery.c |   2 +-
 drivers/power/supply/ds2780_battery.c |   2 +-
 drivers/power/supply/ds2781_battery.c |   2 +-
 drivers/w1/masters/ds1wm.c            |   3 +-
 drivers/w1/masters/ds2482.c           |   3 +-
 drivers/w1/masters/ds2490.c           |   3 +-
 drivers/w1/masters/matrox_w1.c        |   3 +-
 drivers/w1/masters/mxc_w1.c           |   3 +-
 drivers/w1/masters/omap_hdq.c         |   3 +-
 drivers/w1/masters/w1-gpio.c          |   3 +-
 drivers/w1/slaves/w1_bq27000.c        |   6 +-
 drivers/w1/slaves/w1_ds2405.c         |   5 +-
 drivers/w1/slaves/w1_ds2406.c         |   6 +-
 drivers/w1/slaves/w1_ds2408.c         |   6 +-
 drivers/w1/slaves/w1_ds2413.c         |   6 +-
 drivers/w1/slaves/w1_ds2423.c         |   6 +-
 drivers/w1/slaves/w1_ds2431.c         |   6 +-
 drivers/w1/slaves/w1_ds2433.c         |   6 +-
 drivers/w1/slaves/w1_ds2438.c         |   5 +-
 drivers/w1/slaves/w1_ds2760.c         |   7 +-
 drivers/w1/slaves/w1_ds2780.c         |   7 +-
 drivers/w1/slaves/w1_ds2781.c         |   7 +-
 drivers/w1/slaves/w1_ds28e04.c        |   6 +-
 drivers/w1/slaves/w1_smem.c           |   7 +-
 drivers/w1/slaves/w1_therm.c          |  10 +-
 drivers/w1/w1.c                       |   6 +-
 drivers/w1/w1.h                       | 336 ----------------------------------
 drivers/w1/w1_family.c                |   3 +-
 drivers/w1/w1_family.h                |  98 ----------
 drivers/w1/w1_int.c                   |   3 +-
 drivers/w1/w1_int.h                   |  27 ---
 drivers/w1/w1_internal.h              |  87 +++++++++
 drivers/w1/w1_io.c                    |   2 +-
 drivers/w1/w1_netlink.c               |   2 +-
 drivers/w1/w1_netlink.h               |   2 +-
 include/linux/w1.h                    | 320 ++++++++++++++++++++++++++++++++
 37 files changed, 479 insertions(+), 531 deletions(-)
 delete mode 100644 drivers/w1/w1.h
 delete mode 100644 drivers/w1/w1_family.h
 delete mode 100644 drivers/w1/w1_int.h
 create mode 100644 drivers/w1/w1_internal.h
 create mode 100644 include/linux/w1.h

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index 2dc114b9e7f7..2461e78b173f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13790,6 +13790,7 @@ M:	Evgeniy Polyakov <zbr@ioremap.net>
 S:	Maintained
 F:	Documentation/w1/
 F:	drivers/w1/
+F:	include/linux/w1.h
 
 W83791D HARDWARE MONITORING DRIVER
 M:	Marc Hulsman <m.hulsman@tudelft.nl>
diff --git a/drivers/power/supply/ds2760_battery.c b/drivers/power/supply/ds2760_battery.c
index 17225689e3f6..ae180dc929c9 100644
--- a/drivers/power/supply/ds2760_battery.c
+++ b/drivers/power/supply/ds2760_battery.c
@@ -28,7 +28,7 @@
 #include <linux/platform_device.h>
 #include <linux/power_supply.h>
 
-#include "../../w1/w1.h"
+#include <linux/w1.h>
 #include "../../w1/slaves/w1_ds2760.h"
 
 struct ds2760_device_info {
diff --git a/drivers/power/supply/ds2780_battery.c b/drivers/power/supply/ds2780_battery.c
index 1b3b6fa89c28..8edd4aa5f475 100644
--- a/drivers/power/supply/ds2780_battery.c
+++ b/drivers/power/supply/ds2780_battery.c
@@ -21,7 +21,7 @@
 #include <linux/power_supply.h>
 #include <linux/idr.h>
 
-#include "../../w1/w1.h"
+#include <linux/w1.h>
 #include "../../w1/slaves/w1_ds2780.h"
 
 /* Current unit measurement in uA for a 1 milli-ohm sense resistor */
diff --git a/drivers/power/supply/ds2781_battery.c b/drivers/power/supply/ds2781_battery.c
index cc0149131f89..4400402f9ec5 100644
--- a/drivers/power/supply/ds2781_battery.c
+++ b/drivers/power/supply/ds2781_battery.c
@@ -19,7 +19,7 @@
 #include <linux/power_supply.h>
 #include <linux/idr.h>
 
-#include "../../w1/w1.h"
+#include <linux/w1.h>
 #include "../../w1/slaves/w1_ds2781.h"
 
 /* Current unit measurement in uA for a 1 milli-ohm sense resistor */
diff --git a/drivers/w1/masters/ds1wm.c b/drivers/w1/masters/ds1wm.c
index e0b8a4bc73df..fd2e9da27c4b 100644
--- a/drivers/w1/masters/ds1wm.c
+++ b/drivers/w1/masters/ds1wm.c
@@ -25,8 +25,7 @@
 
 #include <asm/io.h>
 
-#include "../w1.h"
-#include "../w1_int.h"
+#include <linux/w1.h>
 
 
 #define DS1WM_CMD	0x00	/* R/W 4 bits command */
diff --git a/drivers/w1/masters/ds2482.c b/drivers/w1/masters/ds2482.c
index e8730ea3e3aa..d49681cd29af 100644
--- a/drivers/w1/masters/ds2482.c
+++ b/drivers/w1/masters/ds2482.c
@@ -20,8 +20,7 @@
 #include <linux/delay.h>
 #include <asm/delay.h>
 
-#include "../w1.h"
-#include "../w1_int.h"
+#include <linux/w1.h>
 
 /**
  * Allow the active pullup to be disabled, default is enabled.
diff --git a/drivers/w1/masters/ds2490.c b/drivers/w1/masters/ds2490.c
index d748e34d4ce5..46ccb2fc4f60 100644
--- a/drivers/w1/masters/ds2490.c
+++ b/drivers/w1/masters/ds2490.c
@@ -25,8 +25,7 @@
 #include <linux/usb.h>
 #include <linux/slab.h>
 
-#include "../w1_int.h"
-#include "../w1.h"
+#include <linux/w1.h>
 
 /* USB Standard */
 /* USB Control request vendor type */
diff --git a/drivers/w1/masters/matrox_w1.c b/drivers/w1/masters/matrox_w1.c
index f20d03ecfd1d..d83d7c99d81d 100644
--- a/drivers/w1/masters/matrox_w1.c
+++ b/drivers/w1/masters/matrox_w1.c
@@ -34,8 +34,7 @@
 #include <linux/pci_ids.h>
 #include <linux/pci.h>
 
-#include "../w1.h"
-#include "../w1_int.h"
+#include <linux/w1.h>
 
 /*
  * Matrox G400 DDC registers.
diff --git a/drivers/w1/masters/mxc_w1.c b/drivers/w1/masters/mxc_w1.c
index a4621757a47f..74f2e6e6202a 100644
--- a/drivers/w1/masters/mxc_w1.c
+++ b/drivers/w1/masters/mxc_w1.c
@@ -19,8 +19,7 @@
 #include <linux/module.h>
 #include <linux/platform_device.h>
 
-#include "../w1.h"
-#include "../w1_int.h"
+#include <linux/w1.h>
 
 /*
  * MXC W1 Register offsets
diff --git a/drivers/w1/masters/omap_hdq.c b/drivers/w1/masters/omap_hdq.c
index 3302cbd2344a..3612542b6044 100644
--- a/drivers/w1/masters/omap_hdq.c
+++ b/drivers/w1/masters/omap_hdq.c
@@ -19,8 +19,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/of.h>
 
-#include "../w1.h"
-#include "../w1_int.h"
+#include <linux/w1.h>
 
 #define	MOD_NAME	"OMAP_HDQ:"
 
diff --git a/drivers/w1/masters/w1-gpio.c b/drivers/w1/masters/w1-gpio.c
index a373ae69d9f6..a90728ceec5a 100644
--- a/drivers/w1/masters/w1-gpio.c
+++ b/drivers/w1/masters/w1-gpio.c
@@ -20,8 +20,7 @@
 #include <linux/of.h>
 #include <linux/delay.h>
 
-#include "../w1.h"
-#include "../w1_int.h"
+#include <linux/w1.h>
 
 static u8 w1_gpio_set_pullup(void *data, int delay)
 {
diff --git a/drivers/w1/slaves/w1_bq27000.c b/drivers/w1/slaves/w1_bq27000.c
index d480900a28ab..8046ac45381a 100644
--- a/drivers/w1/slaves/w1_bq27000.c
+++ b/drivers/w1/slaves/w1_bq27000.c
@@ -17,9 +17,9 @@
 #include <linux/mutex.h>
 #include <linux/power/bq27xxx_battery.h>
 
-#include "../w1.h"
-#include "../w1_int.h"
-#include "../w1_family.h"
+#include <linux/w1.h>
+
+#define W1_FAMILY_BQ27000	0x01
 
 #define HDQ_CMD_READ	(0)
 #define HDQ_CMD_WRITE	(1<<7)
diff --git a/drivers/w1/slaves/w1_ds2405.c b/drivers/w1/slaves/w1_ds2405.c
index d5d54876cb64..42a1e81060ce 100644
--- a/drivers/w1/slaves/w1_ds2405.c
+++ b/drivers/w1/slaves/w1_ds2405.c
@@ -24,8 +24,9 @@
 #include <linux/string.h>
 #include <linux/types.h>
 
-#include "../w1.h"
-#include "../w1_family.h"
+#include <linux/w1.h>
+
+#define W1_FAMILY_DS2405	0x05
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Maciej S. Szmigiero <mail@maciej.szmigiero.name>");
diff --git a/drivers/w1/slaves/w1_ds2406.c b/drivers/w1/slaves/w1_ds2406.c
index 709bd5e02eed..fac266366ca3 100644
--- a/drivers/w1/slaves/w1_ds2406.c
+++ b/drivers/w1/slaves/w1_ds2406.c
@@ -17,9 +17,9 @@
 #include <linux/slab.h>
 #include <linux/crc16.h>
 
-#include "../w1.h"
-#include "../w1_int.h"
-#include "../w1_family.h"
+#include <linux/w1.h>
+
+#define W1_FAMILY_DS2406	0x12
 
 #define W1_F12_FUNC_READ_STATUS		   0xAA
 #define W1_F12_FUNC_WRITE_STATUS	   0x55
diff --git a/drivers/w1/slaves/w1_ds2408.c b/drivers/w1/slaves/w1_ds2408.c
index e01d2b3997bc..b535d5ec35b6 100644
--- a/drivers/w1/slaves/w1_ds2408.c
+++ b/drivers/w1/slaves/w1_ds2408.c
@@ -15,9 +15,9 @@
 #include <linux/delay.h>
 #include <linux/slab.h>
 
-#include "../w1.h"
-#include "../w1_int.h"
-#include "../w1_family.h"
+#include <linux/w1.h>
+
+#define W1_FAMILY_DS2408	0x29
 
 #define W1_F29_RETRIES		3
 
diff --git a/drivers/w1/slaves/w1_ds2413.c b/drivers/w1/slaves/w1_ds2413.c
index 9cc6f0bc2e95..492e3d010321 100644
--- a/drivers/w1/slaves/w1_ds2413.c
+++ b/drivers/w1/slaves/w1_ds2413.c
@@ -16,9 +16,9 @@
 #include <linux/delay.h>
 #include <linux/slab.h>
 
-#include "../w1.h"
-#include "../w1_int.h"
-#include "../w1_family.h"
+#include <linux/w1.h>
+
+#define W1_FAMILY_DS2413	0x3A
 
 #define W1_F3A_RETRIES                     3
 #define W1_F3A_FUNC_PIO_ACCESS_READ        0xF5
diff --git a/drivers/w1/slaves/w1_ds2423.c b/drivers/w1/slaves/w1_ds2423.c
index 306240fe496c..050407c53b16 100644
--- a/drivers/w1/slaves/w1_ds2423.c
+++ b/drivers/w1/slaves/w1_ds2423.c
@@ -30,9 +30,9 @@
 #include <linux/delay.h>
 #include <linux/crc16.h>
 
-#include "../w1.h"
-#include "../w1_int.h"
-#include "../w1_family.h"
+#include <linux/w1.h>
+
+#define W1_COUNTER_DS2423	0x1D
 
 #define CRC16_VALID	0xb001
 #define CRC16_INIT	0
diff --git a/drivers/w1/slaves/w1_ds2431.c b/drivers/w1/slaves/w1_ds2431.c
index 20182d4a4b19..5adecd3face1 100644
--- a/drivers/w1/slaves/w1_ds2431.c
+++ b/drivers/w1/slaves/w1_ds2431.c
@@ -16,9 +16,9 @@
 #include <linux/types.h>
 #include <linux/delay.h>
 
-#include "../w1.h"
-#include "../w1_int.h"
-#include "../w1_family.h"
+#include <linux/w1.h>
+
+#define W1_EEPROM_DS2431	0x2D
 
 #define W1_F2D_EEPROM_SIZE		128
 #define W1_F2D_PAGE_COUNT		4
diff --git a/drivers/w1/slaves/w1_ds2433.c b/drivers/w1/slaves/w1_ds2433.c
index 86c30aceea3e..75ad70cfe8e8 100644
--- a/drivers/w1/slaves/w1_ds2433.c
+++ b/drivers/w1/slaves/w1_ds2433.c
@@ -22,9 +22,9 @@
 
 #endif
 
-#include "../w1.h"
-#include "../w1_int.h"
-#include "../w1_family.h"
+#include <linux/w1.h>
+
+#define W1_EEPROM_DS2433	0x23
 
 #define W1_EEPROM_SIZE		512
 #define W1_PAGE_COUNT		16
diff --git a/drivers/w1/slaves/w1_ds2438.c b/drivers/w1/slaves/w1_ds2438.c
index 5ededb4965e1..6487fb772a20 100644
--- a/drivers/w1/slaves/w1_ds2438.c
+++ b/drivers/w1/slaves/w1_ds2438.c
@@ -13,8 +13,9 @@
 #include <linux/types.h>
 #include <linux/delay.h>
 
-#include "../w1.h"
-#include "../w1_family.h"
+#include <linux/w1.h>
+
+#define W1_FAMILY_DS2438		0x26
 
 #define W1_DS2438_RETRIES		3
 
diff --git a/drivers/w1/slaves/w1_ds2760.c b/drivers/w1/slaves/w1_ds2760.c
index f35d1e2ef9bb..26168abfb8b8 100644
--- a/drivers/w1/slaves/w1_ds2760.c
+++ b/drivers/w1/slaves/w1_ds2760.c
@@ -18,11 +18,12 @@
 #include <linux/idr.h>
 #include <linux/gfp.h>
 
-#include "../w1.h"
-#include "../w1_int.h"
-#include "../w1_family.h"
+#include <linux/w1.h>
+
 #include "w1_ds2760.h"
 
+#define W1_FAMILY_DS2760	0x30
+
 static int w1_ds2760_io(struct device *dev, char *buf, int addr, size_t count,
 			int io)
 {
diff --git a/drivers/w1/slaves/w1_ds2780.c b/drivers/w1/slaves/w1_ds2780.c
index 292972212107..a60528131154 100644
--- a/drivers/w1/slaves/w1_ds2780.c
+++ b/drivers/w1/slaves/w1_ds2780.c
@@ -21,11 +21,12 @@
 #include <linux/mutex.h>
 #include <linux/idr.h>
 
-#include "../w1.h"
-#include "../w1_int.h"
-#include "../w1_family.h"
+#include <linux/w1.h>
+
 #include "w1_ds2780.h"
 
+#define W1_FAMILY_DS2780	0x32
+
 static int w1_ds2780_do_io(struct device *dev, char *buf, int addr,
 			size_t count, int io)
 {
diff --git a/drivers/w1/slaves/w1_ds2781.c b/drivers/w1/slaves/w1_ds2781.c
index a5d75067b230..645be6e0b24a 100644
--- a/drivers/w1/slaves/w1_ds2781.c
+++ b/drivers/w1/slaves/w1_ds2781.c
@@ -18,11 +18,12 @@
 #include <linux/platform_device.h>
 #include <linux/mutex.h>
 
-#include "../w1.h"
-#include "../w1_int.h"
-#include "../w1_family.h"
+#include <linux/w1.h>
+
 #include "w1_ds2781.h"
 
+#define W1_FAMILY_DS2781	0x3D
+
 static int w1_ds2781_do_io(struct device *dev, char *buf, int addr,
 			size_t count, int io)
 {
diff --git a/drivers/w1/slaves/w1_ds28e04.c b/drivers/w1/slaves/w1_ds28e04.c
index c62858c05e8f..ec234b846eb3 100644
--- a/drivers/w1/slaves/w1_ds28e04.c
+++ b/drivers/w1/slaves/w1_ds28e04.c
@@ -20,9 +20,9 @@
 #define CRC16_INIT		0
 #define CRC16_VALID		0xb001
 
-#include "../w1.h"
-#include "../w1_int.h"
-#include "../w1_family.h"
+#include <linux/w1.h>
+
+#define W1_FAMILY_DS28E04	0x1C
 
 /* Allow the strong pullup to be disabled, but default to enabled.
  * If it was disabled a parasite powered device might not get the required
diff --git a/drivers/w1/slaves/w1_smem.c b/drivers/w1/slaves/w1_smem.c
index 99b03bfb9941..e556b0caff71 100644
--- a/drivers/w1/slaves/w1_smem.c
+++ b/drivers/w1/slaves/w1_smem.c
@@ -27,9 +27,10 @@
 #include <linux/device.h>
 #include <linux/types.h>
 
-#include "../w1.h"
-#include "../w1_int.h"
-#include "../w1_family.h"
+#include <linux/w1.h>
+
+#define W1_FAMILY_SMEM_01	0x01
+#define W1_FAMILY_SMEM_81	0x81
 
 static struct w1_family w1_smem_family_01 = {
 	.fid = W1_FAMILY_SMEM_01,
diff --git a/drivers/w1/slaves/w1_therm.c b/drivers/w1/slaves/w1_therm.c
index ccaf29994a42..cb3fc3c6b0d1 100644
--- a/drivers/w1/slaves/w1_therm.c
+++ b/drivers/w1/slaves/w1_therm.c
@@ -30,9 +30,13 @@
 #include <linux/slab.h>
 #include <linux/delay.h>
 
-#include "../w1.h"
-#include "../w1_int.h"
-#include "../w1_family.h"
+#include <linux/w1.h>
+
+#define W1_THERM_DS18S20	0x10
+#define W1_THERM_DS1822		0x22
+#define W1_THERM_DS18B20	0x28
+#define W1_THERM_DS1825		0x3B
+#define W1_THERM_DS28EA00	0x42
 
 /* Allow the strong pullup to be disabled, but default to enabled.
  * If it was disabled a parasite powered device might not get the require
diff --git a/drivers/w1/w1.c b/drivers/w1/w1.c
index 8172dee5e23c..95ea7e6b1d99 100644
--- a/drivers/w1/w1.c
+++ b/drivers/w1/w1.c
@@ -28,11 +28,11 @@
 
 #include <linux/atomic.h>
 
-#include "w1.h"
-#include "w1_int.h"
-#include "w1_family.h"
+#include "w1_internal.h"
 #include "w1_netlink.h"
 
+#define W1_FAMILY_DEFAULT	0
+
 static int w1_timeout = 10;
 module_param_named(timeout, w1_timeout, int, 0);
 MODULE_PARM_DESC(timeout, "time in seconds between automatic slave searches");
diff --git a/drivers/w1/w1.h b/drivers/w1/w1.h
deleted file mode 100644
index 758a7a6322e9..000000000000
--- a/drivers/w1/w1.h
+++ /dev/null
@@ -1,336 +0,0 @@
-/*
- * Copyright (c) 2004 Evgeniy Polyakov <zbr@ioremap.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#ifndef __W1_H
-#define __W1_H
-
-/**
- * struct w1_reg_num - broken out slave device id
- *
- * @family: identifies the type of device
- * @id: along with family is the unique device id
- * @crc: checksum of the other bytes
- */
-struct w1_reg_num
-{
-#if defined(__LITTLE_ENDIAN_BITFIELD)
-	__u64	family:8,
-		id:48,
-		crc:8;
-#elif defined(__BIG_ENDIAN_BITFIELD)
-	__u64	crc:8,
-		id:48,
-		family:8;
-#else
-#error "Please fix <asm/byteorder.h>"
-#endif
-};
-
-#ifdef __KERNEL__
-
-#include <linux/completion.h>
-#include <linux/device.h>
-#include <linux/mutex.h>
-
-#include "w1_family.h"
-
-#define W1_MAXNAMELEN		32
-
-#define W1_SEARCH		0xF0
-#define W1_ALARM_SEARCH		0xEC
-#define W1_CONVERT_TEMP		0x44
-#define W1_SKIP_ROM		0xCC
-#define W1_COPY_SCRATCHPAD	0x48
-#define W1_WRITE_SCRATCHPAD	0x4E
-#define W1_READ_SCRATCHPAD	0xBE
-#define W1_READ_ROM		0x33
-#define W1_READ_PSUPPLY		0xB4
-#define W1_MATCH_ROM		0x55
-#define W1_RESUME_CMD		0xA5
-
-#define W1_SLAVE_ACTIVE		0
-#define W1_SLAVE_DETACH		1
-
-/**
- * struct w1_slave - holds a single slave device on the bus
- *
- * @owner: Points to the one wire "wire" kernel module.
- * @name: Device id is ascii.
- * @w1_slave_entry: data for the linked list
- * @reg_num: the slave id in binary
- * @refcnt: reference count, delete when 0
- * @flags: bit flags for W1_SLAVE_ACTIVE W1_SLAVE_DETACH
- * @ttl: decrement per search this slave isn't found, deatch at 0
- * @master: bus which this slave is on
- * @family: module for device family type
- * @family_data: pointer for use by the family module
- * @dev: kernel device identifier
- *
- */
-struct w1_slave
-{
-	struct module		*owner;
-	unsigned char		name[W1_MAXNAMELEN];
-	struct list_head	w1_slave_entry;
-	struct w1_reg_num	reg_num;
-	atomic_t		refcnt;
-	int			ttl;
-	unsigned long		flags;
-
-	struct w1_master	*master;
-	struct w1_family	*family;
-	void			*family_data;
-	struct device		dev;
-};
-
-typedef void (*w1_slave_found_callback)(struct w1_master *, u64);
-
-
-/**
- * struct w1_bus_master - operations available on a bus master
- *
- * @data: the first parameter in all the functions below
- *
- * @read_bit: Sample the line level @return the level read (0 or 1)
- *
- * @write_bit: Sets the line level
- *
- * @touch_bit: the lowest-level function for devices that really support the
- * 1-wire protocol.
- * touch_bit(0) = write-0 cycle
- * touch_bit(1) = write-1 / read cycle
- * @return the bit read (0 or 1)
- *
- * @read_byte: Reads a bytes. Same as 8 touch_bit(1) calls.
- * @return the byte read
- *
- * @write_byte: Writes a byte. Same as 8 touch_bit(x) calls.
- *
- * @read_block: Same as a series of read_byte() calls
- * @return the number of bytes read
- *
- * @write_block: Same as a series of write_byte() calls
- *
- * @triplet: Combines two reads and a smart write for ROM searches
- * @return bit0=Id bit1=comp_id bit2=dir_taken
- *
- * @reset_bus: long write-0 with a read for the presence pulse detection
- * @return -1=Error, 0=Device present, 1=No device present
- *
- * @set_pullup: Put out a strong pull-up pulse of the specified duration.
- * @return -1=Error, 0=completed
- *
- * @search: Really nice hardware can handles the different types of ROM search
- * w1_master* is passed to the slave found callback.
- * u8 is search_type, W1_SEARCH or W1_ALARM_SEARCH
- *
- * Note: read_bit and write_bit are very low level functions and should only
- * be used with hardware that doesn't really support 1-wire operations,
- * like a parallel/serial port.
- * Either define read_bit and write_bit OR define, at minimum, touch_bit and
- * reset_bus.
- *
- */
-struct w1_bus_master
-{
-	void		*data;
-
-	u8		(*read_bit)(void *);
-
-	void		(*write_bit)(void *, u8);
-
-	u8		(*touch_bit)(void *, u8);
-
-	u8		(*read_byte)(void *);
-
-	void		(*write_byte)(void *, u8);
-
-	u8		(*read_block)(void *, u8 *, int);
-
-	void		(*write_block)(void *, const u8 *, int);
-
-	u8		(*triplet)(void *, u8);
-
-	u8		(*reset_bus)(void *);
-
-	u8		(*set_pullup)(void *, int);
-
-	void		(*search)(void *, struct w1_master *,
-		u8, w1_slave_found_callback);
-};
-
-/**
- * enum w1_master_flags - bitfields used in w1_master.flags
- * @W1_ABORT_SEARCH: abort searching early on shutdown
- * @W1_WARN_MAX_COUNT: limit warning when the maximum count is reached
- */
-enum w1_master_flags {
-	W1_ABORT_SEARCH = 0,
-	W1_WARN_MAX_COUNT = 1,
-};
-
-/**
- * struct w1_master - one per bus master
- * @w1_master_entry:	master linked list
- * @owner:		module owner
- * @name:		dynamically allocate bus name
- * @list_mutex:		protect slist and async_list
- * @slist:		linked list of slaves
- * @async_list:		linked list of netlink commands to execute
- * @max_slave_count:	maximum number of slaves to search for at a time
- * @slave_count:	current number of slaves known
- * @attempts:		number of searches ran
- * @slave_ttl:		number of searches before a slave is timed out
- * @initialized:	prevent init/removal race conditions
- * @id:			w1 bus number
- * @search_count:	number of automatic searches to run, -1 unlimited
- * @search_id:		allows continuing a search
- * @refcnt:		reference count
- * @priv:		private data storage
- * @enable_pullup:	allows a strong pullup
- * @pullup_duration:	time for the next strong pullup
- * @flags:		one of w1_master_flags
- * @thread:		thread for bus search and netlink commands
- * @mutex:		protect most of w1_master
- * @bus_mutex:		pretect concurrent bus access
- * @driver:		sysfs driver
- * @dev:		sysfs device
- * @bus_master:		io operations available
- * @seq:		sequence number used for netlink broadcasts
- */
-struct w1_master
-{
-	struct list_head	w1_master_entry;
-	struct module		*owner;
-	unsigned char		name[W1_MAXNAMELEN];
-	/* list_mutex protects just slist and async_list so slaves can be
-	 * searched for and async commands added while the master has
-	 * w1_master.mutex locked and is operating on the bus.
-	 * lock order w1_mlock, w1_master.mutex, w1_master.list_mutex
-	 */
-	struct mutex		list_mutex;
-	struct list_head	slist;
-	struct list_head	async_list;
-	int			max_slave_count, slave_count;
-	unsigned long		attempts;
-	int			slave_ttl;
-	int			initialized;
-	u32			id;
-	int			search_count;
-	/* id to start searching on, to continue a search or 0 to restart */
-	u64			search_id;
-
-	atomic_t		refcnt;
-
-	void			*priv;
-
-	/** 5V strong pullup enabled flag, 1 enabled, zero disabled. */
-	int			enable_pullup;
-	/** 5V strong pullup duration in milliseconds, zero disabled. */
-	int			pullup_duration;
-
-	long			flags;
-
-	struct task_struct	*thread;
-	struct mutex		mutex;
-	struct mutex		bus_mutex;
-
-	struct device_driver	*driver;
-	struct device		dev;
-
-	struct w1_bus_master	*bus_master;
-
-	u32			seq;
-};
-
-/**
- * struct w1_async_cmd - execute callback from the w1_process kthread
- * @async_entry: link entry
- * @cb: callback function, must list_del and destroy this list before
- * returning
- *
- * When inserted into the w1_master async_list, w1_process will execute
- * the callback.  Embed this into the structure with the command details.
- */
-struct w1_async_cmd {
-	struct list_head	async_entry;
-	void (*cb)(struct w1_master *dev, struct w1_async_cmd *async_cmd);
-};
-
-int w1_create_master_attributes(struct w1_master *);
-void w1_destroy_master_attributes(struct w1_master *master);
-void w1_search(struct w1_master *dev, u8 search_type, w1_slave_found_callback cb);
-void w1_search_devices(struct w1_master *dev, u8 search_type, w1_slave_found_callback cb);
-/* call w1_unref_slave to release the reference counts w1_search_slave added */
-struct w1_slave *w1_search_slave(struct w1_reg_num *id);
-/* decrements the reference on sl->master and sl, and cleans up if zero
- * returns the reference count after it has been decremented */
-int w1_unref_slave(struct w1_slave *sl);
-void w1_slave_found(struct w1_master *dev, u64 rn);
-void w1_search_process_cb(struct w1_master *dev, u8 search_type,
-	w1_slave_found_callback cb);
-struct w1_slave *w1_slave_search_device(struct w1_master *dev,
-	struct w1_reg_num *rn);
-struct w1_master *w1_search_master_id(u32 id);
-
-/* Disconnect and reconnect devices in the given family.  Used for finding
- * unclaimed devices after a family has been registered or releasing devices
- * after a family has been unregistered.  Set attach to 1 when a new family
- * has just been registered, to 0 when it has been unregistered.
- */
-void w1_reconnect_slaves(struct w1_family *f, int attach);
-int w1_attach_slave_device(struct w1_master *dev, struct w1_reg_num *rn);
-/* 0 success, otherwise EBUSY */
-int w1_slave_detach(struct w1_slave *sl);
-
-u8 w1_triplet(struct w1_master *dev, int bdir);
-void w1_write_8(struct w1_master *, u8);
-u8 w1_read_8(struct w1_master *);
-int w1_reset_bus(struct w1_master *);
-u8 w1_calc_crc8(u8 *, int);
-void w1_write_block(struct w1_master *, const u8 *, int);
-void w1_touch_block(struct w1_master *, u8 *, int);
-u8 w1_read_block(struct w1_master *, u8 *, int);
-int w1_reset_select_slave(struct w1_slave *sl);
-int w1_reset_resume_command(struct w1_master *);
-void w1_next_pullup(struct w1_master *, int);
-
-static inline struct w1_slave* dev_to_w1_slave(struct device *dev)
-{
-	return container_of(dev, struct w1_slave, dev);
-}
-
-static inline struct w1_slave* kobj_to_w1_slave(struct kobject *kobj)
-{
-	return dev_to_w1_slave(container_of(kobj, struct device, kobj));
-}
-
-static inline struct w1_master* dev_to_w1_master(struct device *dev)
-{
-	return container_of(dev, struct w1_master, dev);
-}
-
-extern struct device_driver w1_master_driver;
-extern struct device w1_master_device;
-extern int w1_max_slave_count;
-extern int w1_max_slave_ttl;
-extern struct list_head w1_masters;
-extern struct mutex w1_mlock;
-
-extern int w1_process_callbacks(struct w1_master *dev);
-extern int w1_process(void *);
-
-#endif /* __KERNEL__ */
-
-#endif /* __W1_H */
diff --git a/drivers/w1/w1_family.c b/drivers/w1/w1_family.c
index 9759cdaf22f4..f14ab0b340b5 100644
--- a/drivers/w1/w1_family.c
+++ b/drivers/w1/w1_family.c
@@ -18,8 +18,7 @@
 #include <linux/delay.h>
 #include <linux/export.h>
 
-#include "w1_family.h"
-#include "w1.h"
+#include "w1_internal.h"
 
 DEFINE_SPINLOCK(w1_flock);
 static LIST_HEAD(w1_families);
diff --git a/drivers/w1/w1_family.h b/drivers/w1/w1_family.h
deleted file mode 100644
index 869a3ff87d29..000000000000
--- a/drivers/w1/w1_family.h
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Copyright (c) 2004 Evgeniy Polyakov <zbr@ioremap.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#ifndef __W1_FAMILY_H
-#define __W1_FAMILY_H
-
-#include <linux/types.h>
-#include <linux/device.h>
-#include <linux/atomic.h>
-
-#define W1_FAMILY_DEFAULT	0
-#define W1_FAMILY_BQ27000	0x01
-#define W1_FAMILY_SMEM_01	0x01
-#define W1_FAMILY_SMEM_81	0x81
-#define W1_FAMILY_DS2405	0x05
-#define W1_THERM_DS18S20 	0x10
-#define W1_FAMILY_DS28E04	0x1C
-#define W1_COUNTER_DS2423	0x1D
-#define W1_THERM_DS1822  	0x22
-#define W1_EEPROM_DS2433  	0x23
-#define W1_FAMILY_DS2438	0x26
-#define W1_THERM_DS18B20 	0x28
-#define W1_FAMILY_DS2408	0x29
-#define W1_EEPROM_DS2431	0x2D
-#define W1_FAMILY_DS2760	0x30
-#define W1_FAMILY_DS2780	0x32
-#define W1_FAMILY_DS2413	0x3A
-#define W1_FAMILY_DS2406	0x12
-#define W1_THERM_DS1825		0x3B
-#define W1_FAMILY_DS2781	0x3D
-#define W1_THERM_DS28EA00	0x42
-
-#define MAXNAMELEN		32
-
-struct w1_slave;
-
-/**
- * struct w1_family_ops - operations for a family type
- * @add_slave: add_slave
- * @remove_slave: remove_slave
- * @groups: sysfs group
- */
-struct w1_family_ops
-{
-	int  (* add_slave)(struct w1_slave *);
-	void (* remove_slave)(struct w1_slave *);
-	const struct attribute_group **groups;
-};
-
-/**
- * struct w1_family - reference counted family structure.
- * @family_entry:	family linked list
- * @fid:		8 bit family identifier
- * @fops:		operations for this family
- * @refcnt:		reference counter
- */
-struct w1_family
-{
-	struct list_head	family_entry;
-	u8			fid;
-
-	struct w1_family_ops	*fops;
-
-	atomic_t		refcnt;
-};
-
-extern spinlock_t w1_flock;
-
-void w1_family_put(struct w1_family *);
-void __w1_family_get(struct w1_family *);
-struct w1_family * w1_family_registered(u8);
-void w1_unregister_family(struct w1_family *);
-int w1_register_family(struct w1_family *);
-
-/**
- * module_w1_driver() - Helper macro for registering a 1-Wire families
- * @__w1_family: w1_family struct
- *
- * Helper macro for 1-Wire families which do not do anything special in module
- * init/exit. This eliminates a lot of boilerplate. Each module may only
- * use this macro once, and calling it replaces module_init() and module_exit()
- */
-#define module_w1_family(__w1_family) \
-	module_driver(__w1_family, w1_register_family, \
-			w1_unregister_family)
-
-#endif /* __W1_FAMILY_H */
diff --git a/drivers/w1/w1_int.c b/drivers/w1/w1_int.c
index 4439d10709bb..1c776178f598 100644
--- a/drivers/w1/w1_int.c
+++ b/drivers/w1/w1_int.c
@@ -21,9 +21,8 @@
 #include <linux/export.h>
 #include <linux/moduleparam.h>
 
-#include "w1.h"
+#include "w1_internal.h"
 #include "w1_netlink.h"
-#include "w1_int.h"
 
 static int w1_search_count = -1; /* Default is continual scan */
 module_param_named(search_count, w1_search_count, int, 0);
diff --git a/drivers/w1/w1_int.h b/drivers/w1/w1_int.h
deleted file mode 100644
index 371989159216..000000000000
--- a/drivers/w1/w1_int.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Copyright (c) 2004 Evgeniy Polyakov <zbr@ioremap.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#ifndef __W1_INT_H
-#define __W1_INT_H
-
-#include <linux/kernel.h>
-#include <linux/device.h>
-
-#include "w1.h"
-
-int w1_add_master_device(struct w1_bus_master *);
-void w1_remove_master_device(struct w1_bus_master *);
-void __w1_remove_master_device(struct w1_master *);
-
-#endif /* __W1_INT_H */
diff --git a/drivers/w1/w1_internal.h b/drivers/w1/w1_internal.h
new file mode 100644
index 000000000000..1623e2fdccc7
--- /dev/null
+++ b/drivers/w1/w1_internal.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2004 Evgeniy Polyakov <zbr@ioremap.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __W1_H
+#define __W1_H
+
+#include <linux/w1.h>
+
+#include <linux/completion.h>
+#include <linux/mutex.h>
+
+#define W1_SLAVE_ACTIVE		0
+#define W1_SLAVE_DETACH		1
+
+/**
+ * struct w1_async_cmd - execute callback from the w1_process kthread
+ * @async_entry: link entry
+ * @cb: callback function, must list_del and destroy this list before
+ * returning
+ *
+ * When inserted into the w1_master async_list, w1_process will execute
+ * the callback.  Embed this into the structure with the command details.
+ */
+struct w1_async_cmd {
+	struct list_head	async_entry;
+	void (*cb)(struct w1_master *dev, struct w1_async_cmd *async_cmd);
+};
+
+int w1_create_master_attributes(struct w1_master *master);
+void w1_destroy_master_attributes(struct w1_master *master);
+void w1_search(struct w1_master *dev, u8 search_type,
+	       w1_slave_found_callback cb);
+void w1_search_devices(struct w1_master *dev, u8 search_type,
+		       w1_slave_found_callback cb);
+/* call w1_unref_slave to release the reference counts w1_search_slave added */
+struct w1_slave *w1_search_slave(struct w1_reg_num *id);
+/*
+ * decrements the reference on sl->master and sl, and cleans up if zero
+ * returns the reference count after it has been decremented
+ */
+int w1_unref_slave(struct w1_slave *sl);
+void w1_slave_found(struct w1_master *dev, u64 rn);
+void w1_search_process_cb(struct w1_master *dev, u8 search_type,
+			  w1_slave_found_callback cb);
+struct w1_slave *w1_slave_search_device(struct w1_master *dev,
+					struct w1_reg_num *rn);
+struct w1_master *w1_search_master_id(u32 id);
+
+/* Disconnect and reconnect devices in the given family.  Used for finding
+ * unclaimed devices after a family has been registered or releasing devices
+ * after a family has been unregistered.  Set attach to 1 when a new family
+ * has just been registered, to 0 when it has been unregistered.
+ */
+void w1_reconnect_slaves(struct w1_family *f, int attach);
+int w1_attach_slave_device(struct w1_master *dev, struct w1_reg_num *rn);
+/* 0 success, otherwise EBUSY */
+int w1_slave_detach(struct w1_slave *sl);
+
+void __w1_remove_master_device(struct w1_master *dev);
+
+void w1_family_put(struct w1_family *f);
+void __w1_family_get(struct w1_family *f);
+struct w1_family *w1_family_registered(u8 fid);
+
+extern struct device_driver w1_master_driver;
+extern struct device w1_master_device;
+extern int w1_max_slave_count;
+extern int w1_max_slave_ttl;
+extern struct list_head w1_masters;
+extern struct mutex w1_mlock;
+extern spinlock_t w1_flock;
+
+int w1_process_callbacks(struct w1_master *dev);
+int w1_process(void *data);
+
+#endif /* __W1_H */
diff --git a/drivers/w1/w1_io.c b/drivers/w1/w1_io.c
index 1134e6b1eb02..d191e1f80579 100644
--- a/drivers/w1/w1_io.c
+++ b/drivers/w1/w1_io.c
@@ -18,7 +18,7 @@
 #include <linux/moduleparam.h>
 #include <linux/module.h>
 
-#include "w1.h"
+#include "w1_internal.h"
 
 static int w1_delay_parm = 1;
 module_param_named(delay_coef, w1_delay_parm, int, 0);
diff --git a/drivers/w1/w1_netlink.c b/drivers/w1/w1_netlink.c
index 027950f997d1..f2f099caeb77 100644
--- a/drivers/w1/w1_netlink.c
+++ b/drivers/w1/w1_netlink.c
@@ -17,7 +17,7 @@
 #include <linux/netlink.h>
 #include <linux/connector.h>
 
-#include "w1.h"
+#include "w1_internal.h"
 #include "w1_netlink.h"
 
 #if defined(CONFIG_W1_CON) && (defined(CONFIG_CONNECTOR) || (defined(CONFIG_CONNECTOR_MODULE) && defined(CONFIG_W1_MODULE)))
diff --git a/drivers/w1/w1_netlink.h b/drivers/w1/w1_netlink.h
index b389e5ff5fa5..a36661cd1f05 100644
--- a/drivers/w1/w1_netlink.h
+++ b/drivers/w1/w1_netlink.h
@@ -18,7 +18,7 @@
 #include <asm/types.h>
 #include <linux/connector.h>
 
-#include "w1.h"
+#include "w1_internal.h"
 
 /**
  * enum w1_cn_msg_flags - bitfield flags for struct cn_msg.flags
diff --git a/include/linux/w1.h b/include/linux/w1.h
new file mode 100644
index 000000000000..90cbe7e65059
--- /dev/null
+++ b/include/linux/w1.h
@@ -0,0 +1,320 @@
+/*
+ * Copyright (c) 2004 Evgeniy Polyakov <zbr@ioremap.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __LINUX_W1_H
+#define __LINUX_W1_H
+
+#include <linux/device.h>
+
+/**
+ * struct w1_reg_num - broken out slave device id
+ *
+ * @family: identifies the type of device
+ * @id: along with family is the unique device id
+ * @crc: checksum of the other bytes
+ */
+struct w1_reg_num {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+	__u64	family:8,
+		id:48,
+		crc:8;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+	__u64	crc:8,
+		id:48,
+		family:8;
+#else
+#error "Please fix <asm/byteorder.h>"
+#endif
+};
+
+#ifdef __KERNEL__
+
+#define W1_MAXNAMELEN		32
+
+#define W1_SEARCH		0xF0
+#define W1_ALARM_SEARCH		0xEC
+#define W1_CONVERT_TEMP		0x44
+#define W1_SKIP_ROM		0xCC
+#define W1_COPY_SCRATCHPAD	0x48
+#define W1_WRITE_SCRATCHPAD	0x4E
+#define W1_READ_SCRATCHPAD	0xBE
+#define W1_READ_ROM		0x33
+#define W1_READ_PSUPPLY		0xB4
+#define W1_MATCH_ROM		0x55
+#define W1_RESUME_CMD		0xA5
+
+/**
+ * struct w1_slave - holds a single slave device on the bus
+ *
+ * @owner: Points to the one wire "wire" kernel module.
+ * @name: Device id is ascii.
+ * @w1_slave_entry: data for the linked list
+ * @reg_num: the slave id in binary
+ * @refcnt: reference count, delete when 0
+ * @flags: bit flags for W1_SLAVE_ACTIVE W1_SLAVE_DETACH
+ * @ttl: decrement per search this slave isn't found, deatch at 0
+ * @master: bus which this slave is on
+ * @family: module for device family type
+ * @family_data: pointer for use by the family module
+ * @dev: kernel device identifier
+ *
+ */
+struct w1_slave {
+	struct module		*owner;
+	unsigned char		name[W1_MAXNAMELEN];
+	struct list_head	w1_slave_entry;
+	struct w1_reg_num	reg_num;
+	atomic_t		refcnt;
+	int			ttl;
+	unsigned long		flags;
+
+	struct w1_master	*master;
+	struct w1_family	*family;
+	void			*family_data;
+	struct device		dev;
+};
+
+typedef void (*w1_slave_found_callback)(struct w1_master *, u64);
+
+/**
+ * struct w1_bus_master - operations available on a bus master
+ *
+ * @data: the first parameter in all the functions below
+ *
+ * @read_bit: Sample the line level @return the level read (0 or 1)
+ *
+ * @write_bit: Sets the line level
+ *
+ * @touch_bit: the lowest-level function for devices that really support the
+ * 1-wire protocol.
+ * touch_bit(0) = write-0 cycle
+ * touch_bit(1) = write-1 / read cycle
+ * @return the bit read (0 or 1)
+ *
+ * @read_byte: Reads a bytes. Same as 8 touch_bit(1) calls.
+ * @return the byte read
+ *
+ * @write_byte: Writes a byte. Same as 8 touch_bit(x) calls.
+ *
+ * @read_block: Same as a series of read_byte() calls
+ * @return the number of bytes read
+ *
+ * @write_block: Same as a series of write_byte() calls
+ *
+ * @triplet: Combines two reads and a smart write for ROM searches
+ * @return bit0=Id bit1=comp_id bit2=dir_taken
+ *
+ * @reset_bus: long write-0 with a read for the presence pulse detection
+ * @return -1=Error, 0=Device present, 1=No device present
+ *
+ * @set_pullup: Put out a strong pull-up pulse of the specified duration.
+ * @return -1=Error, 0=completed
+ *
+ * @search: Really nice hardware can handles the different types of ROM search
+ * w1_master* is passed to the slave found callback.
+ * u8 is search_type, W1_SEARCH or W1_ALARM_SEARCH
+ *
+ * Note: read_bit and write_bit are very low level functions and should only
+ * be used with hardware that doesn't really support 1-wire operations,
+ * like a parallel/serial port.
+ * Either define read_bit and write_bit OR define, at minimum, touch_bit and
+ * reset_bus.
+ *
+ */
+struct w1_bus_master {
+	void		*data;
+
+	u8		(*read_bit)(void *);
+
+	void		(*write_bit)(void *, u8);
+
+	u8		(*touch_bit)(void *, u8);
+
+	u8		(*read_byte)(void *);
+
+	void		(*write_byte)(void *, u8);
+
+	u8		(*read_block)(void *, u8 *, int);
+
+	void		(*write_block)(void *, const u8 *, int);
+
+	u8		(*triplet)(void *, u8);
+
+	u8		(*reset_bus)(void *);
+
+	u8		(*set_pullup)(void *, int);
+
+	void		(*search)(void *, struct w1_master *,
+		u8, w1_slave_found_callback);
+};
+
+/**
+ * enum w1_master_flags - bitfields used in w1_master.flags
+ * @W1_ABORT_SEARCH: abort searching early on shutdown
+ * @W1_WARN_MAX_COUNT: limit warning when the maximum count is reached
+ */
+enum w1_master_flags {
+	W1_ABORT_SEARCH = 0,
+	W1_WARN_MAX_COUNT = 1,
+};
+
+/**
+ * struct w1_master - one per bus master
+ * @w1_master_entry:	master linked list
+ * @owner:		module owner
+ * @name:		dynamically allocate bus name
+ * @list_mutex:		protect slist and async_list
+ * @slist:		linked list of slaves
+ * @async_list:		linked list of netlink commands to execute
+ * @max_slave_count:	maximum number of slaves to search for at a time
+ * @slave_count:	current number of slaves known
+ * @attempts:		number of searches ran
+ * @slave_ttl:		number of searches before a slave is timed out
+ * @initialized:	prevent init/removal race conditions
+ * @id:			w1 bus number
+ * @search_count:	number of automatic searches to run, -1 unlimited
+ * @search_id:		allows continuing a search
+ * @refcnt:		reference count
+ * @priv:		private data storage
+ * @enable_pullup:	allows a strong pullup
+ * @pullup_duration:	time for the next strong pullup
+ * @flags:		one of w1_master_flags
+ * @thread:		thread for bus search and netlink commands
+ * @mutex:		protect most of w1_master
+ * @bus_mutex:		pretect concurrent bus access
+ * @driver:		sysfs driver
+ * @dev:		sysfs device
+ * @bus_master:		io operations available
+ * @seq:		sequence number used for netlink broadcasts
+ */
+struct w1_master {
+	struct list_head	w1_master_entry;
+	struct module		*owner;
+	unsigned char		name[W1_MAXNAMELEN];
+	/* list_mutex protects just slist and async_list so slaves can be
+	 * searched for and async commands added while the master has
+	 * w1_master.mutex locked and is operating on the bus.
+	 * lock order w1_mlock, w1_master.mutex, w1_master.list_mutex
+	 */
+	struct mutex		list_mutex;
+	struct list_head	slist;
+	struct list_head	async_list;
+	int			max_slave_count, slave_count;
+	unsigned long		attempts;
+	int			slave_ttl;
+	int			initialized;
+	u32			id;
+	int			search_count;
+	/* id to start searching on, to continue a search or 0 to restart */
+	u64			search_id;
+
+	atomic_t		refcnt;
+
+	void			*priv;
+
+	/** 5V strong pullup enabled flag, 1 enabled, zero disabled. */
+	int			enable_pullup;
+	/** 5V strong pullup duration in milliseconds, zero disabled. */
+	int			pullup_duration;
+
+	long			flags;
+
+	struct task_struct	*thread;
+	struct mutex		mutex;
+	struct mutex		bus_mutex;
+
+	struct device_driver	*driver;
+	struct device		dev;
+
+	struct w1_bus_master	*bus_master;
+
+	u32			seq;
+};
+
+int w1_add_master_device(struct w1_bus_master *master);
+void w1_remove_master_device(struct w1_bus_master *master);
+
+/**
+ * struct w1_family_ops - operations for a family type
+ * @add_slave: add_slave
+ * @remove_slave: remove_slave
+ * @groups: sysfs group
+ */
+struct w1_family_ops {
+	int  (*add_slave)(struct w1_slave *sl);
+	void (*remove_slave)(struct w1_slave *sl);
+	const struct attribute_group **groups;
+};
+
+/**
+ * struct w1_family - reference counted family structure.
+ * @family_entry:	family linked list
+ * @fid:		8 bit family identifier
+ * @fops:		operations for this family
+ * @refcnt:		reference counter
+ */
+struct w1_family {
+	struct list_head	family_entry;
+	u8			fid;
+
+	struct w1_family_ops	*fops;
+
+	atomic_t		refcnt;
+};
+
+int w1_register_family(struct w1_family *family);
+void w1_unregister_family(struct w1_family *family);
+
+/**
+ * module_w1_driver() - Helper macro for registering a 1-Wire families
+ * @__w1_family: w1_family struct
+ *
+ * Helper macro for 1-Wire families which do not do anything special in module
+ * init/exit. This eliminates a lot of boilerplate. Each module may only
+ * use this macro once, and calling it replaces module_init() and module_exit()
+ */
+#define module_w1_family(__w1_family) \
+	module_driver(__w1_family, w1_register_family, \
+			w1_unregister_family)
+
+u8 w1_triplet(struct w1_master *dev, int bdir);
+void w1_write_8(struct w1_master *, u8);
+u8 w1_read_8(struct w1_master *);
+int w1_reset_bus(struct w1_master *);
+u8 w1_calc_crc8(u8 *, int);
+void w1_write_block(struct w1_master *, const u8 *, int);
+void w1_touch_block(struct w1_master *, u8 *, int);
+u8 w1_read_block(struct w1_master *, u8 *, int);
+int w1_reset_select_slave(struct w1_slave *sl);
+int w1_reset_resume_command(struct w1_master *);
+void w1_next_pullup(struct w1_master *, int);
+
+static inline struct w1_slave* dev_to_w1_slave(struct device *dev)
+{
+	return container_of(dev, struct w1_slave, dev);
+}
+
+static inline struct w1_slave* kobj_to_w1_slave(struct kobject *kobj)
+{
+	return dev_to_w1_slave(container_of(kobj, struct device, kobj));
+}
+
+static inline struct w1_master* dev_to_w1_master(struct device *dev)
+{
+	return container_of(dev, struct w1_master, dev);
+}
+
+#endif /* __KERNEL__ */
+
+#endif /* __LINUX_W1_H */
-- 
cgit v1.2.3


From 4055351cdbb44e8646ff67b346c80097e1d2c04c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 3 Jun 2017 09:37:58 +0200
Subject: fs: remove the unused error argument to dio_end_io()

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Bart Van Assche <Bart.VanAssche@sandisk.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 fs/btrfs/inode.c   | 6 +++---
 fs/direct-io.c     | 3 +--
 include/linux/fs.h | 2 +-
 3 files changed, 5 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 17cbe9306faf..758b2666885e 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -8244,7 +8244,7 @@ static void btrfs_endio_direct_read(struct bio *bio)
 	kfree(dip);
 
 	dio_bio->bi_error = bio->bi_error;
-	dio_end_io(dio_bio, bio->bi_error);
+	dio_end_io(dio_bio);
 
 	if (io_bio->end_io)
 		io_bio->end_io(io_bio, err);
@@ -8304,7 +8304,7 @@ static void btrfs_endio_direct_write(struct bio *bio)
 	kfree(dip);
 
 	dio_bio->bi_error = bio->bi_error;
-	dio_end_io(dio_bio, bio->bi_error);
+	dio_end_io(dio_bio);
 	bio_put(bio);
 }
 
@@ -8673,7 +8673,7 @@ free_ordered:
 		 * Releases and cleans up our dio_bio, no need to bio_put()
 		 * nor bio_endio()/bio_io_error() against dio_bio.
 		 */
-		dio_end_io(dio_bio, ret);
+		dio_end_io(dio_bio);
 	}
 	if (io_bio)
 		bio_put(io_bio);
diff --git a/fs/direct-io.c b/fs/direct-io.c
index a04ebea77de8..04247a6c3f73 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -348,13 +348,12 @@ static void dio_bio_end_io(struct bio *bio)
 /**
  * dio_end_io - handle the end io action for the given bio
  * @bio: The direct io bio thats being completed
- * @error: Error if there was one
  *
  * This is meant to be called by any filesystem that uses their own dio_submit_t
  * so that the DIO specific endio actions are dealt with after the filesystem
  * has done it's completion work.
  */
-void dio_end_io(struct bio *bio, int error)
+void dio_end_io(struct bio *bio)
 {
 	struct dio *dio = bio->bi_private;
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 803e5a9b2654..4388ab58843d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2843,7 +2843,7 @@ enum {
 	DIO_SKIP_DIO_COUNT = 0x08,
 };
 
-void dio_end_io(struct bio *bio, int error);
+void dio_end_io(struct bio *bio);
 
 ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
 			     struct block_device *bdev, struct iov_iter *iter,
-- 
cgit v1.2.3


From 1be5690984588953e759af0a4c6ddac182a1806c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 3 Jun 2017 09:38:03 +0200
Subject: dm: change ->end_io calling convention

Turn the error paramter into a pointer so that target drivers can change
the value, and make sure only DM_ENDIO_* values are returned from the
methods.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/md/dm-cache-target.c  |  4 ++--
 drivers/md/dm-flakey.c        |  8 ++++----
 drivers/md/dm-log-writes.c    |  4 ++--
 drivers/md/dm-mpath.c         | 11 ++++++-----
 drivers/md/dm-raid1.c         | 14 +++++++-------
 drivers/md/dm-snap.c          |  4 ++--
 drivers/md/dm-stripe.c        | 14 +++++++-------
 drivers/md/dm-thin.c          |  4 ++--
 drivers/md/dm.c               | 36 ++++++++++++++++++------------------
 include/linux/device-mapper.h |  2 +-
 10 files changed, 51 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index d682a0511381..c48612e6d525 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -2820,7 +2820,7 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
 	return r;
 }
 
-static int cache_end_io(struct dm_target *ti, struct bio *bio, int error)
+static int cache_end_io(struct dm_target *ti, struct bio *bio, int *error)
 {
 	struct cache *cache = ti->private;
 	unsigned long flags;
@@ -2838,7 +2838,7 @@ static int cache_end_io(struct dm_target *ti, struct bio *bio, int error)
 	bio_drop_shared_lock(cache, bio);
 	accounted_complete(cache, bio);
 
-	return 0;
+	return DM_ENDIO_DONE;
 }
 
 static int write_dirty_bitset(struct cache *cache)
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index e8f093b323ce..c9539917a59b 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -358,12 +358,12 @@ map_bio:
 	return DM_MAPIO_REMAPPED;
 }
 
-static int flakey_end_io(struct dm_target *ti, struct bio *bio, int error)
+static int flakey_end_io(struct dm_target *ti, struct bio *bio, int *error)
 {
 	struct flakey_c *fc = ti->private;
 	struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
 
-	if (!error && pb->bio_submitted && (bio_data_dir(bio) == READ)) {
+	if (!*error && pb->bio_submitted && (bio_data_dir(bio) == READ)) {
 		if (fc->corrupt_bio_byte && (fc->corrupt_bio_rw == READ) &&
 		    all_corrupt_bio_flags_match(bio, fc)) {
 			/*
@@ -377,11 +377,11 @@ static int flakey_end_io(struct dm_target *ti, struct bio *bio, int error)
 			 * Error read during the down_interval if drop_writes
 			 * and error_writes were not configured.
 			 */
-			return -EIO;
+			*error = -EIO;
 		}
 	}
 
-	return error;
+	return DM_ENDIO_DONE;
 }
 
 static void flakey_status(struct dm_target *ti, status_type_t type,
diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c
index e42264706c59..cc57c7fa1268 100644
--- a/drivers/md/dm-log-writes.c
+++ b/drivers/md/dm-log-writes.c
@@ -664,7 +664,7 @@ map_bio:
 	return DM_MAPIO_REMAPPED;
 }
 
-static int normal_end_io(struct dm_target *ti, struct bio *bio, int error)
+static int normal_end_io(struct dm_target *ti, struct bio *bio, int *error)
 {
 	struct log_writes_c *lc = ti->private;
 	struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
@@ -686,7 +686,7 @@ static int normal_end_io(struct dm_target *ti, struct bio *bio, int error)
 		spin_unlock_irqrestore(&lc->blocks_lock, flags);
 	}
 
-	return error;
+	return DM_ENDIO_DONE;
 }
 
 /*
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index bf6e49c780d5..ceeeb495d01c 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -1517,14 +1517,15 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone,
 	return r;
 }
 
-static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone, int error)
+static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone, int *error)
 {
 	struct multipath *m = ti->private;
 	struct dm_mpath_io *mpio = get_mpio_from_bio(clone);
 	struct pgpath *pgpath = mpio->pgpath;
 	unsigned long flags;
+	int r = DM_ENDIO_DONE;
 
-	if (!error || noretry_error(error))
+	if (!*error || noretry_error(*error))
 		goto done;
 
 	if (pgpath)
@@ -1533,7 +1534,7 @@ static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone, int err
 	if (atomic_read(&m->nr_valid_paths) == 0 &&
 	    !test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
 		dm_report_EIO(m);
-		error = -EIO;
+		*error = -EIO;
 		goto done;
 	}
 
@@ -1546,7 +1547,7 @@ static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone, int err
 	if (!test_bit(MPATHF_QUEUE_IO, &m->flags))
 		queue_work(kmultipathd, &m->process_queued_bios);
 
-	error = DM_ENDIO_INCOMPLETE;
+	r = DM_ENDIO_INCOMPLETE;
 done:
 	if (pgpath) {
 		struct path_selector *ps = &pgpath->pg->ps;
@@ -1555,7 +1556,7 @@ done:
 			ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes);
 	}
 
-	return error;
+	return r;
 }
 
 /*
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index d9c0c6a77eb5..77bcf50ce75f 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -1236,7 +1236,7 @@ static int mirror_map(struct dm_target *ti, struct bio *bio)
 	return DM_MAPIO_REMAPPED;
 }
 
-static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
+static int mirror_end_io(struct dm_target *ti, struct bio *bio, int *error)
 {
 	int rw = bio_data_dir(bio);
 	struct mirror_set *ms = (struct mirror_set *) ti->private;
@@ -1252,16 +1252,16 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
 		if (!(bio->bi_opf & REQ_PREFLUSH) &&
 		    bio_op(bio) != REQ_OP_DISCARD)
 			dm_rh_dec(ms->rh, bio_record->write_region);
-		return error;
+		return DM_ENDIO_DONE;
 	}
 
-	if (error == -EOPNOTSUPP)
-		return error;
+	if (*error == -EOPNOTSUPP)
+		return DM_ENDIO_DONE;
 
 	if (bio->bi_opf & REQ_RAHEAD)
-		return error;
+		return DM_ENDIO_DONE;
 
-	if (unlikely(error)) {
+	if (unlikely(*error)) {
 		m = bio_record->m;
 
 		DMERR("Mirror read failed from %s. Trying alternative device.",
@@ -1285,7 +1285,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
 		DMERR("All replicated volumes dead, failing I/O");
 	}
 
-	return error;
+	return DM_ENDIO_DONE;
 }
 
 static void mirror_presuspend(struct dm_target *ti)
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 5a7f73f9a6fb..79a845798e2f 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1851,14 +1851,14 @@ out_unlock:
 	return r;
 }
 
-static int snapshot_end_io(struct dm_target *ti, struct bio *bio, int error)
+static int snapshot_end_io(struct dm_target *ti, struct bio *bio, int *error)
 {
 	struct dm_snapshot *s = ti->private;
 
 	if (is_bio_tracked(bio))
 		stop_tracking_chunk(s, bio);
 
-	return 0;
+	return DM_ENDIO_DONE;
 }
 
 static void snapshot_merge_presuspend(struct dm_target *ti)
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index 780e95889a7c..49888bc2c909 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -375,20 +375,20 @@ static void stripe_status(struct dm_target *ti, status_type_t type,
 	}
 }
 
-static int stripe_end_io(struct dm_target *ti, struct bio *bio, int error)
+static int stripe_end_io(struct dm_target *ti, struct bio *bio, int *error)
 {
 	unsigned i;
 	char major_minor[16];
 	struct stripe_c *sc = ti->private;
 
-	if (!error)
-		return 0; /* I/O complete */
+	if (!*error)
+		return DM_ENDIO_DONE; /* I/O complete */
 
 	if (bio->bi_opf & REQ_RAHEAD)
-		return error;
+		return DM_ENDIO_DONE;
 
-	if (error == -EOPNOTSUPP)
-		return error;
+	if (*error == -EOPNOTSUPP)
+		return DM_ENDIO_DONE;
 
 	memset(major_minor, 0, sizeof(major_minor));
 	sprintf(major_minor, "%d:%d",
@@ -409,7 +409,7 @@ static int stripe_end_io(struct dm_target *ti, struct bio *bio, int error)
 				schedule_work(&sc->trigger_event);
 		}
 
-	return error;
+	return DM_ENDIO_DONE;
 }
 
 static int stripe_iterate_devices(struct dm_target *ti,
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 17ad50daed08..22b1a64c44b7 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -4177,7 +4177,7 @@ static int thin_map(struct dm_target *ti, struct bio *bio)
 	return thin_bio_map(ti, bio);
 }
 
-static int thin_endio(struct dm_target *ti, struct bio *bio, int err)
+static int thin_endio(struct dm_target *ti, struct bio *bio, int *err)
 {
 	unsigned long flags;
 	struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
@@ -4212,7 +4212,7 @@ static int thin_endio(struct dm_target *ti, struct bio *bio, int err)
 	if (h->cell)
 		cell_defer_no_holder(h->tc, h->cell);
 
-	return 0;
+	return DM_ENDIO_DONE;
 }
 
 static void thin_presuspend(struct dm_target *ti)
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 499f8209bacf..7a7047211c64 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -845,24 +845,7 @@ static void clone_endio(struct bio *bio)
 	struct mapped_device *md = tio->io->md;
 	dm_endio_fn endio = tio->ti->type->end_io;
 
-	if (endio) {
-		r = endio(tio->ti, bio, error);
-		if (r < 0 || r == DM_ENDIO_REQUEUE)
-			/*
-			 * error and requeue request are handled
-			 * in dec_pending().
-			 */
-			error = r;
-		else if (r == DM_ENDIO_INCOMPLETE)
-			/* The target will handle the io */
-			return;
-		else if (r) {
-			DMWARN("unimplemented target endio return value: %d", r);
-			BUG();
-		}
-	}
-
-	if (unlikely(r == -EREMOTEIO)) {
+	if (unlikely(error == -EREMOTEIO)) {
 		if (bio_op(bio) == REQ_OP_WRITE_SAME &&
 		    !bdev_get_queue(bio->bi_bdev)->limits.max_write_same_sectors)
 			disable_write_same(md);
@@ -871,6 +854,23 @@ static void clone_endio(struct bio *bio)
 			disable_write_zeroes(md);
 	}
 
+	if (endio) {
+		r = endio(tio->ti, bio, &error);
+		switch (r) {
+		case DM_ENDIO_REQUEUE:
+			error = DM_ENDIO_REQUEUE;
+			/*FALLTHRU*/
+		case DM_ENDIO_DONE:
+			break;
+		case DM_ENDIO_INCOMPLETE:
+			/* The target will handle the io */
+			return;
+		default:
+			DMWARN("unimplemented target endio return value: %d", r);
+			BUG();
+		}
+	}
+
 	free_tio(tio);
 	dec_pending(io, error);
 }
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index f4c639c0c362..dec227acc13b 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -72,7 +72,7 @@ typedef void (*dm_release_clone_request_fn) (struct request *clone);
  * 2   : The target wants to push back the io
  */
 typedef int (*dm_endio_fn) (struct dm_target *ti,
-			    struct bio *bio, int error);
+			    struct bio *bio, int *error);
 typedef int (*dm_request_endio_fn) (struct dm_target *ti,
 				    struct request *clone, int error,
 				    union map_info *map_context);
-- 
cgit v1.2.3


From 2a842acab109f40f0d7d10b38e9ca88390628996 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 3 Jun 2017 09:38:04 +0200
Subject: block: introduce new block status code type

Currently we use nornal Linux errno values in the block layer, and while
we accept any error a few have overloaded magic meanings.  This patch
instead introduces a new  blk_status_t value that holds block layer specific
status codes and explicitly explains their meaning.  Helpers to convert from
and to the previous special meanings are provided for now, but I suspect
we want to get rid of them in the long run - those drivers that have a
errno input (e.g. networking) usually get errnos that don't know about
the special block layer overloads, and similarly returning them to userspace
will usually return somethings that strictly speaking isn't correct
for file system operations, but that's left as an exercise for later.

For now the set of errors is a very limited set that closely corresponds
to the previous overloaded errno values, but there is some low hanging
fruite to improve it.

blk_status_t (ab)uses the sparse __bitwise annotations to allow for sparse
typechecking, so that we can easily catch places passing the wrong values.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 arch/s390/include/asm/eadm.h        |   6 +-
 arch/um/drivers/ubd_kern.c          |   2 +-
 block/blk-core.c                    | 156 +++++++++++++++++++++---------------
 block/blk-exec.c                    |   4 +-
 block/blk-flush.c                   |   8 +-
 block/blk-mq.c                      |   8 +-
 block/bsg-lib.c                     |   4 +-
 block/bsg.c                         |   6 +-
 drivers/block/DAC960.c              |   2 +-
 drivers/block/amiflop.c             |  10 +--
 drivers/block/aoe/aoecmd.c          |   2 +-
 drivers/block/ataflop.c             |  16 ++--
 drivers/block/cciss.c               |   3 +-
 drivers/block/floppy.c              |   4 +-
 drivers/block/loop.c                |   2 +-
 drivers/block/mtip32xx/mtip32xx.c   |  16 ++--
 drivers/block/mtip32xx/mtip32xx.h   |   2 +-
 drivers/block/nbd.c                 |  14 ++--
 drivers/block/null_blk.c            |   9 ++-
 drivers/block/paride/pcd.c          |   8 +-
 drivers/block/paride/pd.c           |   2 +-
 drivers/block/paride/pf.c           |  18 ++---
 drivers/block/ps3disk.c             |  11 +--
 drivers/block/rbd.c                 |   8 +-
 drivers/block/skd_main.c            |  31 ++++---
 drivers/block/sunvdc.c              |   4 +-
 drivers/block/swim.c                |   6 +-
 drivers/block/swim3.c               |  26 +++---
 drivers/block/sx8.c                 |  20 ++---
 drivers/block/virtio_blk.c          |  10 +--
 drivers/block/xen-blkfront.c        |  16 ++--
 drivers/block/xsysace.c             |   8 +-
 drivers/block/z2ram.c               |   4 +-
 drivers/cdrom/gdrom.c               |   9 ++-
 drivers/ide/ide-atapi.c             |   9 ++-
 drivers/ide/ide-cd.c                |  10 +--
 drivers/ide/ide-dma.c               |   2 +-
 drivers/ide/ide-eh.c                |  16 ++--
 drivers/ide/ide-floppy.c            |   6 +-
 drivers/ide/ide-io.c                |  10 +--
 drivers/ide/ide-pm.c                |   6 +-
 drivers/ide/ide-tape.c              |   2 +-
 drivers/ide/ide-taskfile.c          |   6 +-
 drivers/ide/siimage.c               |   6 +-
 drivers/md/dm-mpath.c               |  27 +++----
 drivers/md/dm-rq.c                  |  20 ++---
 drivers/md/dm-rq.h                  |   2 +-
 drivers/memstick/core/ms_block.c    |   7 +-
 drivers/memstick/core/mspro_block.c |   8 +-
 drivers/mmc/core/block.c            |  37 +++++----
 drivers/mmc/core/queue.c            |   2 +-
 drivers/mtd/mtd_blkdevs.c           |  30 ++++---
 drivers/mtd/ubi/block.c             |   2 +-
 drivers/nvme/host/core.c            |  29 +++----
 drivers/nvme/host/lightnvm.c        |   2 +-
 drivers/nvme/host/pci.c             |   8 +-
 drivers/s390/block/dasd.c           |  36 +++++----
 drivers/s390/block/scm_blk.c        |   8 +-
 drivers/s390/block/scm_blk.h        |   4 +-
 drivers/s390/cio/eadm_sch.c         |   6 +-
 drivers/s390/cio/scm.c              |   2 +-
 drivers/sbus/char/jsflash.c         |   4 +-
 drivers/scsi/osd/osd_initiator.c    |  20 ++---
 drivers/scsi/osst.c                 |   2 +-
 drivers/scsi/scsi_error.c           |   2 +-
 drivers/scsi/scsi_lib.c             |  51 ++++--------
 drivers/scsi/scsi_transport_sas.c   |   2 +-
 drivers/scsi/sg.c                   |   6 +-
 drivers/scsi/st.c                   |   2 +-
 drivers/target/target_core_pscsi.c  |   4 +-
 include/linux/blk-mq.h              |   4 +-
 include/linux/blk_types.h           |  16 ++++
 include/linux/blkdev.h              |  21 ++---
 include/linux/device-mapper.h       |   2 +-
 include/linux/ide.h                 |   6 +-
 include/scsi/osd_initiator.h        |   2 +-
 76 files changed, 474 insertions(+), 428 deletions(-)

(limited to 'include/linux')

diff --git a/arch/s390/include/asm/eadm.h b/arch/s390/include/asm/eadm.h
index 67026300c88e..144809a3f4f6 100644
--- a/arch/s390/include/asm/eadm.h
+++ b/arch/s390/include/asm/eadm.h
@@ -3,6 +3,7 @@
 
 #include <linux/types.h>
 #include <linux/device.h>
+#include <linux/blkdev.h>
 
 struct arqb {
 	u64 data;
@@ -105,13 +106,14 @@ struct scm_driver {
 	int (*probe) (struct scm_device *scmdev);
 	int (*remove) (struct scm_device *scmdev);
 	void (*notify) (struct scm_device *scmdev, enum scm_event event);
-	void (*handler) (struct scm_device *scmdev, void *data, int error);
+	void (*handler) (struct scm_device *scmdev, void *data,
+			blk_status_t error);
 };
 
 int scm_driver_register(struct scm_driver *scmdrv);
 void scm_driver_unregister(struct scm_driver *scmdrv);
 
 int eadm_start_aob(struct aob *aob);
-void scm_irq_handler(struct aob *aob, int error);
+void scm_irq_handler(struct aob *aob, blk_status_t error);
 
 #endif /* _ASM_S390_EADM_H */
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 85410279beab..b55fe9bf5d3e 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -534,7 +534,7 @@ static void ubd_handler(void)
 		for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
 			blk_end_request(
 				(*irq_req_buffer)[count]->req,
-				0,
+				BLK_STS_OK,
 				(*irq_req_buffer)[count]->length
 			);
 			kfree((*irq_req_buffer)[count]);
diff --git a/block/blk-core.c b/block/blk-core.c
index c7068520794b..e942a9f814c7 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -129,11 +129,66 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
 }
 EXPORT_SYMBOL(blk_rq_init);
 
+static const struct {
+	int		errno;
+	const char	*name;
+} blk_errors[] = {
+	[BLK_STS_OK]		= { 0,		"" },
+	[BLK_STS_NOTSUPP]	= { -EOPNOTSUPP, "operation not supported" },
+	[BLK_STS_TIMEOUT]	= { -ETIMEDOUT,	"timeout" },
+	[BLK_STS_NOSPC]		= { -ENOSPC,	"critical space allocation" },
+	[BLK_STS_TRANSPORT]	= { -ENOLINK,	"recoverable transport" },
+	[BLK_STS_TARGET]	= { -EREMOTEIO,	"critical target" },
+	[BLK_STS_NEXUS]		= { -EBADE,	"critical nexus" },
+	[BLK_STS_MEDIUM]	= { -ENODATA,	"critical medium" },
+	[BLK_STS_PROTECTION]	= { -EILSEQ,	"protection" },
+	[BLK_STS_RESOURCE]	= { -ENOMEM,	"kernel resource" },
+
+	/* everything else not covered above: */
+	[BLK_STS_IOERR]		= { -EIO,	"I/O" },
+};
+
+blk_status_t errno_to_blk_status(int errno)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(blk_errors); i++) {
+		if (blk_errors[i].errno == errno)
+			return (__force blk_status_t)i;
+	}
+
+	return BLK_STS_IOERR;
+}
+EXPORT_SYMBOL_GPL(errno_to_blk_status);
+
+int blk_status_to_errno(blk_status_t status)
+{
+	int idx = (__force int)status;
+
+	if (WARN_ON_ONCE(idx > ARRAY_SIZE(blk_errors)))
+		return -EIO;
+	return blk_errors[idx].errno;
+}
+EXPORT_SYMBOL_GPL(blk_status_to_errno);
+
+static void print_req_error(struct request *req, blk_status_t status)
+{
+	int idx = (__force int)status;
+
+	if (WARN_ON_ONCE(idx > ARRAY_SIZE(blk_errors)))
+		return;
+
+	printk_ratelimited(KERN_ERR "%s: %s error, dev %s, sector %llu\n",
+			   __func__, blk_errors[idx].name, req->rq_disk ?
+			   req->rq_disk->disk_name : "?",
+			   (unsigned long long)blk_rq_pos(req));
+}
+
 static void req_bio_endio(struct request *rq, struct bio *bio,
-			  unsigned int nbytes, int error)
+			  unsigned int nbytes, blk_status_t error)
 {
 	if (error)
-		bio->bi_error = error;
+		bio->bi_error = blk_status_to_errno(error);
 
 	if (unlikely(rq->rq_flags & RQF_QUIET))
 		bio_set_flag(bio, BIO_QUIET);
@@ -2177,29 +2232,29 @@ static int blk_cloned_rq_check_limits(struct request_queue *q,
  * @q:  the queue to submit the request
  * @rq: the request being queued
  */
-int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
+blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq)
 {
 	unsigned long flags;
 	int where = ELEVATOR_INSERT_BACK;
 
 	if (blk_cloned_rq_check_limits(q, rq))
-		return -EIO;
+		return BLK_STS_IOERR;
 
 	if (rq->rq_disk &&
 	    should_fail_request(&rq->rq_disk->part0, blk_rq_bytes(rq)))
-		return -EIO;
+		return BLK_STS_IOERR;
 
 	if (q->mq_ops) {
 		if (blk_queue_io_stat(q))
 			blk_account_io_start(rq, true);
 		blk_mq_sched_insert_request(rq, false, true, false, false);
-		return 0;
+		return BLK_STS_OK;
 	}
 
 	spin_lock_irqsave(q->queue_lock, flags);
 	if (unlikely(blk_queue_dying(q))) {
 		spin_unlock_irqrestore(q->queue_lock, flags);
-		return -ENODEV;
+		return BLK_STS_IOERR;
 	}
 
 	/*
@@ -2216,7 +2271,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
 		__blk_run_queue(q);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 
-	return 0;
+	return BLK_STS_OK;
 }
 EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
 
@@ -2450,15 +2505,14 @@ struct request *blk_peek_request(struct request_queue *q)
 			rq = NULL;
 			break;
 		} else if (ret == BLKPREP_KILL || ret == BLKPREP_INVALID) {
-			int err = (ret == BLKPREP_INVALID) ? -EREMOTEIO : -EIO;
-
 			rq->rq_flags |= RQF_QUIET;
 			/*
 			 * Mark this request as started so we don't trigger
 			 * any debug logic in the end I/O path.
 			 */
 			blk_start_request(rq);
-			__blk_end_request_all(rq, err);
+			__blk_end_request_all(rq, ret == BLKPREP_INVALID ?
+					BLK_STS_TARGET : BLK_STS_IOERR);
 		} else {
 			printk(KERN_ERR "%s: bad return=%d\n", __func__, ret);
 			break;
@@ -2547,7 +2601,7 @@ EXPORT_SYMBOL(blk_fetch_request);
 /**
  * blk_update_request - Special helper function for request stacking drivers
  * @req:      the request being processed
- * @error:    %0 for success, < %0 for error
+ * @error:    block status code
  * @nr_bytes: number of bytes to complete @req
  *
  * Description:
@@ -2566,49 +2620,19 @@ EXPORT_SYMBOL(blk_fetch_request);
  *     %false - this request doesn't have any more data
  *     %true  - this request has more data
  **/
-bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
+bool blk_update_request(struct request *req, blk_status_t error,
+		unsigned int nr_bytes)
 {
 	int total_bytes;
 
-	trace_block_rq_complete(req, error, nr_bytes);
+	trace_block_rq_complete(req, blk_status_to_errno(error), nr_bytes);
 
 	if (!req->bio)
 		return false;
 
-	if (error && !blk_rq_is_passthrough(req) &&
-	    !(req->rq_flags & RQF_QUIET)) {
-		char *error_type;
-
-		switch (error) {
-		case -ENOLINK:
-			error_type = "recoverable transport";
-			break;
-		case -EREMOTEIO:
-			error_type = "critical target";
-			break;
-		case -EBADE:
-			error_type = "critical nexus";
-			break;
-		case -ETIMEDOUT:
-			error_type = "timeout";
-			break;
-		case -ENOSPC:
-			error_type = "critical space allocation";
-			break;
-		case -ENODATA:
-			error_type = "critical medium";
-			break;
-		case -EIO:
-		default:
-			error_type = "I/O";
-			break;
-		}
-		printk_ratelimited(KERN_ERR "%s: %s error, dev %s, sector %llu\n",
-				   __func__, error_type, req->rq_disk ?
-				   req->rq_disk->disk_name : "?",
-				   (unsigned long long)blk_rq_pos(req));
-
-	}
+	if (unlikely(error && !blk_rq_is_passthrough(req) &&
+		     !(req->rq_flags & RQF_QUIET)))
+		print_req_error(req, error);
 
 	blk_account_io_completion(req, nr_bytes);
 
@@ -2674,7 +2698,7 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
 }
 EXPORT_SYMBOL_GPL(blk_update_request);
 
-static bool blk_update_bidi_request(struct request *rq, int error,
+static bool blk_update_bidi_request(struct request *rq, blk_status_t error,
 				    unsigned int nr_bytes,
 				    unsigned int bidi_bytes)
 {
@@ -2715,7 +2739,7 @@ EXPORT_SYMBOL_GPL(blk_unprep_request);
 /*
  * queue lock must be held
  */
-void blk_finish_request(struct request *req, int error)
+void blk_finish_request(struct request *req, blk_status_t error)
 {
 	struct request_queue *q = req->q;
 
@@ -2752,7 +2776,7 @@ EXPORT_SYMBOL(blk_finish_request);
 /**
  * blk_end_bidi_request - Complete a bidi request
  * @rq:         the request to complete
- * @error:      %0 for success, < %0 for error
+ * @error:      block status code
  * @nr_bytes:   number of bytes to complete @rq
  * @bidi_bytes: number of bytes to complete @rq->next_rq
  *
@@ -2766,7 +2790,7 @@ EXPORT_SYMBOL(blk_finish_request);
  *     %false - we are done with this request
  *     %true  - still buffers pending for this request
  **/
-static bool blk_end_bidi_request(struct request *rq, int error,
+static bool blk_end_bidi_request(struct request *rq, blk_status_t error,
 				 unsigned int nr_bytes, unsigned int bidi_bytes)
 {
 	struct request_queue *q = rq->q;
@@ -2785,7 +2809,7 @@ static bool blk_end_bidi_request(struct request *rq, int error,
 /**
  * __blk_end_bidi_request - Complete a bidi request with queue lock held
  * @rq:         the request to complete
- * @error:      %0 for success, < %0 for error
+ * @error:      block status code
  * @nr_bytes:   number of bytes to complete @rq
  * @bidi_bytes: number of bytes to complete @rq->next_rq
  *
@@ -2797,7 +2821,7 @@ static bool blk_end_bidi_request(struct request *rq, int error,
  *     %false - we are done with this request
  *     %true  - still buffers pending for this request
  **/
-static bool __blk_end_bidi_request(struct request *rq, int error,
+static bool __blk_end_bidi_request(struct request *rq, blk_status_t error,
 				   unsigned int nr_bytes, unsigned int bidi_bytes)
 {
 	if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
@@ -2811,7 +2835,7 @@ static bool __blk_end_bidi_request(struct request *rq, int error,
 /**
  * blk_end_request - Helper function for drivers to complete the request.
  * @rq:       the request being processed
- * @error:    %0 for success, < %0 for error
+ * @error:    block status code
  * @nr_bytes: number of bytes to complete
  *
  * Description:
@@ -2822,7 +2846,8 @@ static bool __blk_end_bidi_request(struct request *rq, int error,
  *     %false - we are done with this request
  *     %true  - still buffers pending for this request
  **/
-bool blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
+bool blk_end_request(struct request *rq, blk_status_t error,
+		unsigned int nr_bytes)
 {
 	return blk_end_bidi_request(rq, error, nr_bytes, 0);
 }
@@ -2831,12 +2856,12 @@ EXPORT_SYMBOL(blk_end_request);
 /**
  * blk_end_request_all - Helper function for drives to finish the request.
  * @rq: the request to finish
- * @error: %0 for success, < %0 for error
+ * @error: block status code
  *
  * Description:
  *     Completely finish @rq.
  */
-void blk_end_request_all(struct request *rq, int error)
+void blk_end_request_all(struct request *rq, blk_status_t error)
 {
 	bool pending;
 	unsigned int bidi_bytes = 0;
@@ -2852,7 +2877,7 @@ EXPORT_SYMBOL(blk_end_request_all);
 /**
  * __blk_end_request - Helper function for drivers to complete the request.
  * @rq:       the request being processed
- * @error:    %0 for success, < %0 for error
+ * @error:    block status code
  * @nr_bytes: number of bytes to complete
  *
  * Description:
@@ -2862,7 +2887,8 @@ EXPORT_SYMBOL(blk_end_request_all);
  *     %false - we are done with this request
  *     %true  - still buffers pending for this request
  **/
-bool __blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
+bool __blk_end_request(struct request *rq, blk_status_t error,
+		unsigned int nr_bytes)
 {
 	return __blk_end_bidi_request(rq, error, nr_bytes, 0);
 }
@@ -2871,12 +2897,12 @@ EXPORT_SYMBOL(__blk_end_request);
 /**
  * __blk_end_request_all - Helper function for drives to finish the request.
  * @rq: the request to finish
- * @error: %0 for success, < %0 for error
+ * @error:    block status code
  *
  * Description:
  *     Completely finish @rq.  Must be called with queue lock held.
  */
-void __blk_end_request_all(struct request *rq, int error)
+void __blk_end_request_all(struct request *rq, blk_status_t error)
 {
 	bool pending;
 	unsigned int bidi_bytes = 0;
@@ -2892,7 +2918,7 @@ EXPORT_SYMBOL(__blk_end_request_all);
 /**
  * __blk_end_request_cur - Helper function to finish the current request chunk.
  * @rq: the request to finish the current chunk for
- * @error: %0 for success, < %0 for error
+ * @error:    block status code
  *
  * Description:
  *     Complete the current consecutively mapped chunk from @rq.  Must
@@ -2902,7 +2928,7 @@ EXPORT_SYMBOL(__blk_end_request_all);
  *     %false - we are done with this request
  *     %true  - still buffers pending for this request
  */
-bool __blk_end_request_cur(struct request *rq, int error)
+bool __blk_end_request_cur(struct request *rq, blk_status_t error)
 {
 	return __blk_end_request(rq, error, blk_rq_cur_bytes(rq));
 }
@@ -3243,7 +3269,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
 		 * Short-circuit if @q is dead
 		 */
 		if (unlikely(blk_queue_dying(q))) {
-			__blk_end_request_all(rq, -ENODEV);
+			__blk_end_request_all(rq, BLK_STS_IOERR);
 			continue;
 		}
 
diff --git a/block/blk-exec.c b/block/blk-exec.c
index a9451e3b8587..5c0f3dc446dc 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -16,7 +16,7 @@
  * @rq: request to complete
  * @error: end I/O status of the request
  */
-static void blk_end_sync_rq(struct request *rq, int error)
+static void blk_end_sync_rq(struct request *rq, blk_status_t error)
 {
 	struct completion *waiting = rq->end_io_data;
 
@@ -69,7 +69,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
 
 	if (unlikely(blk_queue_dying(q))) {
 		rq->rq_flags |= RQF_QUIET;
-		__blk_end_request_all(rq, -ENXIO);
+		__blk_end_request_all(rq, BLK_STS_IOERR);
 		spin_unlock_irq(q->queue_lock);
 		return;
 	}
diff --git a/block/blk-flush.c b/block/blk-flush.c
index c4e0880b54bb..a572b47fa059 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -164,7 +164,7 @@ static bool blk_flush_queue_rq(struct request *rq, bool add_front)
  */
 static bool blk_flush_complete_seq(struct request *rq,
 				   struct blk_flush_queue *fq,
-				   unsigned int seq, int error)
+				   unsigned int seq, blk_status_t error)
 {
 	struct request_queue *q = rq->q;
 	struct list_head *pending = &fq->flush_queue[fq->flush_pending_idx];
@@ -216,7 +216,7 @@ static bool blk_flush_complete_seq(struct request *rq,
 	return kicked | queued;
 }
 
-static void flush_end_io(struct request *flush_rq, int error)
+static void flush_end_io(struct request *flush_rq, blk_status_t error)
 {
 	struct request_queue *q = flush_rq->q;
 	struct list_head *running;
@@ -341,7 +341,7 @@ static bool blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq)
 	return blk_flush_queue_rq(flush_rq, false);
 }
 
-static void flush_data_end_io(struct request *rq, int error)
+static void flush_data_end_io(struct request *rq, blk_status_t error)
 {
 	struct request_queue *q = rq->q;
 	struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL);
@@ -382,7 +382,7 @@ static void flush_data_end_io(struct request *rq, int error)
 		blk_run_queue_async(q);
 }
 
-static void mq_flush_data_end_io(struct request *rq, int error)
+static void mq_flush_data_end_io(struct request *rq, blk_status_t error)
 {
 	struct request_queue *q = rq->q;
 	struct blk_mq_hw_ctx *hctx;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 22438d5036a3..adcc1c0dce6e 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -394,7 +394,7 @@ void blk_mq_free_request(struct request *rq)
 }
 EXPORT_SYMBOL_GPL(blk_mq_free_request);
 
-inline void __blk_mq_end_request(struct request *rq, int error)
+inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
 {
 	blk_account_io_done(rq);
 
@@ -409,7 +409,7 @@ inline void __blk_mq_end_request(struct request *rq, int error)
 }
 EXPORT_SYMBOL(__blk_mq_end_request);
 
-void blk_mq_end_request(struct request *rq, int error)
+void blk_mq_end_request(struct request *rq, blk_status_t error)
 {
 	if (blk_update_request(rq, error, blk_rq_bytes(rq)))
 		BUG();
@@ -988,7 +988,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list)
 			pr_err("blk-mq: bad return on queue: %d\n", ret);
 		case BLK_MQ_RQ_QUEUE_ERROR:
 			errors++;
-			blk_mq_end_request(rq, -EIO);
+			blk_mq_end_request(rq, BLK_STS_IOERR);
 			break;
 		}
 
@@ -1433,7 +1433,7 @@ static void __blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie,
 
 	if (ret == BLK_MQ_RQ_QUEUE_ERROR) {
 		*cookie = BLK_QC_T_NONE;
-		blk_mq_end_request(rq, -EIO);
+		blk_mq_end_request(rq, BLK_STS_IOERR);
 		return;
 	}
 
diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index 9b91daefcd9b..c4513b23f57a 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -37,7 +37,7 @@ static void bsg_destroy_job(struct kref *kref)
 	struct bsg_job *job = container_of(kref, struct bsg_job, kref);
 	struct request *rq = job->req;
 
-	blk_end_request_all(rq, scsi_req(rq)->result);
+	blk_end_request_all(rq, BLK_STS_OK);
 
 	put_device(job->dev);	/* release reference for the request */
 
@@ -202,7 +202,7 @@ static void bsg_request_fn(struct request_queue *q)
 		ret = bsg_create_job(dev, req);
 		if (ret) {
 			scsi_req(req)->result = ret;
-			blk_end_request_all(req, ret);
+			blk_end_request_all(req, BLK_STS_OK);
 			spin_lock_irq(q->queue_lock);
 			continue;
 		}
diff --git a/block/bsg.c b/block/bsg.c
index 40db8ff4c618..59d02dd31b0c 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -294,14 +294,14 @@ out:
  * async completion call-back from the block layer, when scsi/ide/whatever
  * calls end_that_request_last() on a request
  */
-static void bsg_rq_end_io(struct request *rq, int uptodate)
+static void bsg_rq_end_io(struct request *rq, blk_status_t status)
 {
 	struct bsg_command *bc = rq->end_io_data;
 	struct bsg_device *bd = bc->bd;
 	unsigned long flags;
 
-	dprintk("%s: finished rq %p bc %p, bio %p stat %d\n",
-		bd->name, rq, bc, bc->bio, uptodate);
+	dprintk("%s: finished rq %p bc %p, bio %p\n",
+		bd->name, rq, bc, bc->bio);
 
 	bc->hdr.duration = jiffies_to_msecs(jiffies - bc->hdr.duration);
 
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index 26a51be77227..245a879b036e 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -3464,7 +3464,7 @@ static inline bool DAC960_ProcessCompletedRequest(DAC960_Command_T *Command,
 						 bool SuccessfulIO)
 {
 	struct request *Request = Command->Request;
-	int Error = SuccessfulIO ? 0 : -EIO;
+	blk_status_t Error = SuccessfulIO ? BLK_STS_OK : BLK_STS_IOERR;
 
 	pci_unmap_sg(Command->Controller->PCIDevice, Command->cmd_sglist,
 		Command->SegmentCount, Command->DmaDirection);
diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index a328f673adfe..49908c74bfcb 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -1378,7 +1378,7 @@ static void redo_fd_request(void)
 	struct amiga_floppy_struct *floppy;
 	char *data;
 	unsigned long flags;
-	int err;
+	blk_status_t err;
 
 next_req:
 	rq = set_next_request();
@@ -1392,7 +1392,7 @@ next_req:
 
 next_segment:
 	/* Here someone could investigate to be more efficient */
-	for (cnt = 0, err = 0; cnt < blk_rq_cur_sectors(rq); cnt++) {
+	for (cnt = 0, err = BLK_STS_OK; cnt < blk_rq_cur_sectors(rq); cnt++) {
 #ifdef DEBUG
 		printk("fd: sector %ld + %d requested for %s\n",
 		       blk_rq_pos(rq), cnt,
@@ -1400,7 +1400,7 @@ next_segment:
 #endif
 		block = blk_rq_pos(rq) + cnt;
 		if ((int)block > floppy->blocks) {
-			err = -EIO;
+			err = BLK_STS_IOERR;
 			break;
 		}
 
@@ -1413,7 +1413,7 @@ next_segment:
 #endif
 
 		if (get_track(drive, track) == -1) {
-			err = -EIO;
+			err = BLK_STS_IOERR;
 			break;
 		}
 
@@ -1424,7 +1424,7 @@ next_segment:
 
 			/* keep the drive spinning while writes are scheduled */
 			if (!fd_motor_on(drive)) {
-				err = -EIO;
+				err = BLK_STS_IOERR;
 				break;
 			}
 			/*
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 3c606c09fd5a..5bf0c9d21fc1 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -1071,7 +1071,7 @@ aoe_end_request(struct aoedev *d, struct request *rq, int fastfail)
 	do {
 		bio = rq->bio;
 		bok = !fastfail && !bio->bi_error;
-	} while (__blk_end_request(rq, bok ? 0 : -EIO, bio->bi_iter.bi_size));
+	} while (__blk_end_request(rq, bok ? BLK_STS_OK : BLK_STS_IOERR, bio->bi_iter.bi_size));
 
 	/* cf. http://lkml.org/lkml/2006/10/31/28 */
 	if (!fastfail)
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index fa69ecd52cb5..92da886180aa 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -378,7 +378,7 @@ static DEFINE_TIMER(readtrack_timer, fd_readtrack_check, 0, 0);
 static DEFINE_TIMER(timeout_timer, fd_times_out, 0, 0);
 static DEFINE_TIMER(fd_timer, check_change, 0, 0);
 	
-static void fd_end_request_cur(int err)
+static void fd_end_request_cur(blk_status_t err)
 {
 	if (!__blk_end_request_cur(fd_request, err))
 		fd_request = NULL;
@@ -620,7 +620,7 @@ static void fd_error( void )
 	fd_request->error_count++;
 	if (fd_request->error_count >= MAX_ERRORS) {
 		printk(KERN_ERR "fd%d: too many errors.\n", SelectedDrive );
-		fd_end_request_cur(-EIO);
+		fd_end_request_cur(BLK_STS_IOERR);
 	}
 	else if (fd_request->error_count == RECALIBRATE_ERRORS) {
 		printk(KERN_WARNING "fd%d: recalibrating\n", SelectedDrive );
@@ -739,7 +739,7 @@ static void do_fd_action( int drive )
 		    }
 		    else {
 			/* all sectors finished */
-			fd_end_request_cur(0);
+			fd_end_request_cur(BLK_STS_OK);
 			redo_fd_request();
 			return;
 		    }
@@ -1144,7 +1144,7 @@ static void fd_rwsec_done1(int status)
 	}
 	else {
 		/* all sectors finished */
-		fd_end_request_cur(0);
+		fd_end_request_cur(BLK_STS_OK);
 		redo_fd_request();
 	}
 	return;
@@ -1445,7 +1445,7 @@ repeat:
 	if (!UD.connected) {
 		/* drive not connected */
 		printk(KERN_ERR "Unknown Device: fd%d\n", drive );
-		fd_end_request_cur(-EIO);
+		fd_end_request_cur(BLK_STS_IOERR);
 		goto repeat;
 	}
 		
@@ -1461,12 +1461,12 @@ repeat:
 		/* user supplied disk type */
 		if (--type >= NUM_DISK_MINORS) {
 			printk(KERN_WARNING "fd%d: invalid disk format", drive );
-			fd_end_request_cur(-EIO);
+			fd_end_request_cur(BLK_STS_IOERR);
 			goto repeat;
 		}
 		if (minor2disktype[type].drive_types > DriveType)  {
 			printk(KERN_WARNING "fd%d: unsupported disk format", drive );
-			fd_end_request_cur(-EIO);
+			fd_end_request_cur(BLK_STS_IOERR);
 			goto repeat;
 		}
 		type = minor2disktype[type].index;
@@ -1476,7 +1476,7 @@ repeat:
 	}
 	
 	if (blk_rq_pos(fd_request) + 1 > UDT->blocks) {
-		fd_end_request_cur(-EIO);
+		fd_end_request_cur(BLK_STS_IOERR);
 		goto repeat;
 	}
 
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 3761066fe89d..02a611993bb4 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -1864,7 +1864,8 @@ static void cciss_softirq_done(struct request *rq)
 	/* set the residual count for pc requests */
 	if (blk_rq_is_passthrough(rq))
 		scsi_req(rq)->resid_len = c->err_info->ResidualCnt;
-	blk_end_request_all(rq, scsi_req(rq)->result ? -EIO : 0);
+	blk_end_request_all(rq, scsi_req(rq)->result ?
+			BLK_STS_IOERR : BLK_STS_OK);
 
 	spin_lock_irqsave(&h->lock, flags);
 	cmd_free(h, c);
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 60d4c7653178..cc75a5176057 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -2202,7 +2202,7 @@ static int do_format(int drive, struct format_descr *tmp_format_req)
  * =============================
  */
 
-static void floppy_end_request(struct request *req, int error)
+static void floppy_end_request(struct request *req, blk_status_t error)
 {
 	unsigned int nr_sectors = current_count_sectors;
 	unsigned int drive = (unsigned long)req->rq_disk->private_data;
@@ -2263,7 +2263,7 @@ static void request_done(int uptodate)
 			DRWE->last_error_generation = DRS->generation;
 		}
 		spin_lock_irqsave(q->queue_lock, flags);
-		floppy_end_request(req, -EIO);
+		floppy_end_request(req, BLK_STS_IOERR);
 		spin_unlock_irqrestore(q->queue_lock, flags);
 	}
 }
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index e288fb30100f..4caf6338c012 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -464,7 +464,7 @@ static void lo_complete_rq(struct request *rq)
 		zero_fill_bio(bio);
 	}
 
-	blk_mq_end_request(rq, cmd->ret < 0 ? -EIO : 0);
+	blk_mq_end_request(rq, cmd->ret < 0 ? BLK_STS_IOERR : BLK_STS_OK);
 }
 
 static void lo_rw_aio_complete(struct kiocb *iocb, long ret, long ret2)
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 3a779a4f5653..ee6f66bb50c7 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -532,7 +532,7 @@ static int mtip_read_log_page(struct mtip_port *port, u8 page, u16 *buffer,
 static int mtip_get_smart_attr(struct mtip_port *port, unsigned int id,
 						struct smart_attr *attrib);
 
-static void mtip_complete_command(struct mtip_cmd *cmd, int status)
+static void mtip_complete_command(struct mtip_cmd *cmd, blk_status_t status)
 {
 	struct request *req = blk_mq_rq_from_pdu(cmd);
 
@@ -568,7 +568,7 @@ static void mtip_handle_tfe(struct driver_data *dd)
 	if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) {
 		cmd = mtip_cmd_from_tag(dd, MTIP_TAG_INTERNAL);
 		dbg_printk(MTIP_DRV_NAME " TFE for the internal command\n");
-		mtip_complete_command(cmd, -EIO);
+		mtip_complete_command(cmd, BLK_STS_IOERR);
 		return;
 	}
 
@@ -667,7 +667,7 @@ static void mtip_handle_tfe(struct driver_data *dd)
 					tag,
 					fail_reason != NULL ?
 						fail_reason : "unknown");
-					mtip_complete_command(cmd, -ENODATA);
+					mtip_complete_command(cmd, BLK_STS_MEDIUM);
 					continue;
 				}
 			}
@@ -690,7 +690,7 @@ static void mtip_handle_tfe(struct driver_data *dd)
 			dev_warn(&port->dd->pdev->dev,
 				"retiring tag %d\n", tag);
 
-			mtip_complete_command(cmd, -EIO);
+			mtip_complete_command(cmd, BLK_STS_IOERR);
 		}
 	}
 	print_tags(dd, "reissued (TFE)", tagaccum, cmd_cnt);
@@ -2753,7 +2753,7 @@ static void mtip_abort_cmd(struct request *req, void *data,
 	dbg_printk(MTIP_DRV_NAME " Aborting request, tag = %d\n", req->tag);
 
 	clear_bit(req->tag, dd->port->cmds_to_issue);
-	cmd->status = -EIO;
+	cmd->status = BLK_STS_IOERR;
 	mtip_softirq_done_fn(req);
 }
 
@@ -3597,7 +3597,7 @@ static int mtip_submit_request(struct blk_mq_hw_ctx *hctx, struct request *rq)
 		int err;
 
 		err = mtip_send_trim(dd, blk_rq_pos(rq), blk_rq_sectors(rq));
-		blk_mq_end_request(rq, err);
+		blk_mq_end_request(rq, err ? BLK_STS_IOERR : BLK_STS_OK);
 		return 0;
 	}
 
@@ -3730,7 +3730,7 @@ static enum blk_eh_timer_return mtip_cmd_timeout(struct request *req,
 	if (reserved) {
 		struct mtip_cmd *cmd = blk_mq_rq_to_pdu(req);
 
-		cmd->status = -ETIME;
+		cmd->status = BLK_STS_TIMEOUT;
 		return BLK_EH_HANDLED;
 	}
 
@@ -3961,7 +3961,7 @@ static void mtip_no_dev_cleanup(struct request *rq, void *data, bool reserv)
 {
 	struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq);
 
-	cmd->status = -ENODEV;
+	cmd->status = BLK_STS_IOERR;
 	blk_mq_complete_request(rq);
 }
 
diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h
index 37b8e3e0bb78..e8286af50e16 100644
--- a/drivers/block/mtip32xx/mtip32xx.h
+++ b/drivers/block/mtip32xx/mtip32xx.h
@@ -342,7 +342,7 @@ struct mtip_cmd {
 	int retries; /* The number of retries left for this command. */
 
 	int direction; /* Data transfer direction */
-	int status;
+	blk_status_t status;
 };
 
 /* Structure used to describe a port. */
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 6de9f9943a0e..978d2d2d08d6 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -116,7 +116,7 @@ struct nbd_cmd {
 	int index;
 	int cookie;
 	struct completion send_complete;
-	int status;
+	blk_status_t status;
 };
 
 #if IS_ENABLED(CONFIG_DEBUG_FS)
@@ -286,7 +286,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
 	struct nbd_config *config;
 
 	if (!refcount_inc_not_zero(&nbd->config_refs)) {
-		cmd->status = -EIO;
+		cmd->status = BLK_STS_TIMEOUT;
 		return BLK_EH_HANDLED;
 	}
 
@@ -331,7 +331,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
 				    "Connection timed out\n");
 	}
 	set_bit(NBD_TIMEDOUT, &config->runtime_flags);
-	cmd->status = -EIO;
+	cmd->status = BLK_STS_IOERR;
 	sock_shutdown(nbd);
 	nbd_config_put(nbd);
 
@@ -578,7 +578,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
 	if (ntohl(reply.error)) {
 		dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n",
 			ntohl(reply.error));
-		cmd->status = -EIO;
+		cmd->status = BLK_STS_IOERR;
 		return cmd;
 	}
 
@@ -603,7 +603,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
 				 */
 				if (nbd_disconnected(config) ||
 				    config->num_connections <= 1) {
-					cmd->status = -EIO;
+					cmd->status = BLK_STS_IOERR;
 					return cmd;
 				}
 				return ERR_PTR(-EIO);
@@ -655,7 +655,7 @@ static void nbd_clear_req(struct request *req, void *data, bool reserved)
 	if (!blk_mq_request_started(req))
 		return;
 	cmd = blk_mq_rq_to_pdu(req);
-	cmd->status = -EIO;
+	cmd->status = BLK_STS_IOERR;
 	blk_mq_complete_request(req);
 }
 
@@ -744,7 +744,7 @@ static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
 		nbd_config_put(nbd);
 		return -EINVAL;
 	}
-	cmd->status = 0;
+	cmd->status = BLK_STS_OK;
 again:
 	nsock = config->socks[index];
 	mutex_lock(&nsock->tx_lock);
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index d946e1eeac8e..e6b81d370882 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -229,11 +229,11 @@ static void end_cmd(struct nullb_cmd *cmd)
 
 	switch (queue_mode)  {
 	case NULL_Q_MQ:
-		blk_mq_end_request(cmd->rq, 0);
+		blk_mq_end_request(cmd->rq, BLK_STS_OK);
 		return;
 	case NULL_Q_RQ:
 		INIT_LIST_HEAD(&cmd->rq->queuelist);
-		blk_end_request_all(cmd->rq, 0);
+		blk_end_request_all(cmd->rq, BLK_STS_OK);
 		break;
 	case NULL_Q_BIO:
 		bio_endio(cmd->bio);
@@ -422,11 +422,12 @@ static void cleanup_queues(struct nullb *nullb)
 
 #ifdef CONFIG_NVM
 
-static void null_lnvm_end_io(struct request *rq, int error)
+static void null_lnvm_end_io(struct request *rq, blk_status_t status)
 {
 	struct nvm_rq *rqd = rq->end_io_data;
 
-	rqd->error = error;
+	/* XXX: lighnvm core seems to expect NVM_RSP_* values here.. */
+	rqd->error = status ? -EIO : 0;
 	nvm_end_io(rqd);
 
 	blk_put_request(rq);
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index b1267ef34d5a..cffe42d80ce9 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -783,7 +783,7 @@ static void pcd_request(void)
 			ps_set_intr(do_pcd_read, NULL, 0, nice);
 			return;
 		} else {
-			__blk_end_request_all(pcd_req, -EIO);
+			__blk_end_request_all(pcd_req, BLK_STS_IOERR);
 			pcd_req = NULL;
 		}
 	}
@@ -794,7 +794,7 @@ static void do_pcd_request(struct request_queue *q)
 	pcd_request();
 }
 
-static inline void next_request(int err)
+static inline void next_request(blk_status_t err)
 {
 	unsigned long saved_flags;
 
@@ -837,7 +837,7 @@ static void pcd_start(void)
 
 	if (pcd_command(pcd_current, rd_cmd, 2048, "read block")) {
 		pcd_bufblk = -1;
-		next_request(-EIO);
+		next_request(BLK_STS_IOERR);
 		return;
 	}
 
@@ -871,7 +871,7 @@ static void do_pcd_read_drq(void)
 			return;
 		}
 		pcd_bufblk = -1;
-		next_request(-EIO);
+		next_request(BLK_STS_IOERR);
 		return;
 	}
 
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index 7d2402f90978..c98983be4f9c 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -438,7 +438,7 @@ static void run_fsm(void)
 				phase = NULL;
 				spin_lock_irqsave(&pd_lock, saved_flags);
 				if (!__blk_end_request_cur(pd_req,
-						res == Ok ? 0 : -EIO)) {
+						res == Ok ? 0 : BLK_STS_IOERR)) {
 					if (!set_next_request())
 						stop = 1;
 				}
diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c
index f24ca7315ddc..5f46da8d05cd 100644
--- a/drivers/block/paride/pf.c
+++ b/drivers/block/paride/pf.c
@@ -801,7 +801,7 @@ static int set_next_request(void)
 	return pf_req != NULL;
 }
 
-static void pf_end_request(int err)
+static void pf_end_request(blk_status_t err)
 {
 	if (pf_req && !__blk_end_request_cur(pf_req, err))
 		pf_req = NULL;
@@ -821,7 +821,7 @@ repeat:
 	pf_count = blk_rq_cur_sectors(pf_req);
 
 	if (pf_block + pf_count > get_capacity(pf_req->rq_disk)) {
-		pf_end_request(-EIO);
+		pf_end_request(BLK_STS_IOERR);
 		goto repeat;
 	}
 
@@ -836,7 +836,7 @@ repeat:
 		pi_do_claimed(pf_current->pi, do_pf_write);
 	else {
 		pf_busy = 0;
-		pf_end_request(-EIO);
+		pf_end_request(BLK_STS_IOERR);
 		goto repeat;
 	}
 }
@@ -868,7 +868,7 @@ static int pf_next_buf(void)
 	return 0;
 }
 
-static inline void next_request(int err)
+static inline void next_request(blk_status_t err)
 {
 	unsigned long saved_flags;
 
@@ -896,7 +896,7 @@ static void do_pf_read_start(void)
 			pi_do_claimed(pf_current->pi, do_pf_read_start);
 			return;
 		}
-		next_request(-EIO);
+		next_request(BLK_STS_IOERR);
 		return;
 	}
 	pf_mask = STAT_DRQ;
@@ -915,7 +915,7 @@ static void do_pf_read_drq(void)
 				pi_do_claimed(pf_current->pi, do_pf_read_start);
 				return;
 			}
-			next_request(-EIO);
+			next_request(BLK_STS_IOERR);
 			return;
 		}
 		pi_read_block(pf_current->pi, pf_buf, 512);
@@ -942,7 +942,7 @@ static void do_pf_write_start(void)
 			pi_do_claimed(pf_current->pi, do_pf_write_start);
 			return;
 		}
-		next_request(-EIO);
+		next_request(BLK_STS_IOERR);
 		return;
 	}
 
@@ -955,7 +955,7 @@ static void do_pf_write_start(void)
 				pi_do_claimed(pf_current->pi, do_pf_write_start);
 				return;
 			}
-			next_request(-EIO);
+			next_request(BLK_STS_IOERR);
 			return;
 		}
 		pi_write_block(pf_current->pi, pf_buf, 512);
@@ -975,7 +975,7 @@ static void do_pf_write_done(void)
 			pi_do_claimed(pf_current->pi, do_pf_write_start);
 			return;
 		}
-		next_request(-EIO);
+		next_request(BLK_STS_IOERR);
 		return;
 	}
 	pi_disconnect(pf_current->pi);
diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c
index a809e3e9feb8..075662f2cf46 100644
--- a/drivers/block/ps3disk.c
+++ b/drivers/block/ps3disk.c
@@ -158,7 +158,7 @@ static int ps3disk_submit_request_sg(struct ps3_storage_device *dev,
 	if (res) {
 		dev_err(&dev->sbd.core, "%s:%u: %s failed %d\n", __func__,
 			__LINE__, op, res);
-		__blk_end_request_all(req, -EIO);
+		__blk_end_request_all(req, BLK_STS_IOERR);
 		return 0;
 	}
 
@@ -180,7 +180,7 @@ static int ps3disk_submit_flush_request(struct ps3_storage_device *dev,
 	if (res) {
 		dev_err(&dev->sbd.core, "%s:%u: sync cache failed 0x%llx\n",
 			__func__, __LINE__, res);
-		__blk_end_request_all(req, -EIO);
+		__blk_end_request_all(req, BLK_STS_IOERR);
 		return 0;
 	}
 
@@ -208,7 +208,7 @@ static void ps3disk_do_request(struct ps3_storage_device *dev,
 			break;
 		default:
 			blk_dump_rq_flags(req, DEVICE_NAME " bad request");
-			__blk_end_request_all(req, -EIO);
+			__blk_end_request_all(req, BLK_STS_IOERR);
 		}
 	}
 }
@@ -231,7 +231,8 @@ static irqreturn_t ps3disk_interrupt(int irq, void *data)
 	struct ps3_storage_device *dev = data;
 	struct ps3disk_private *priv;
 	struct request *req;
-	int res, read, error;
+	int res, read;
+	blk_status_t error;
 	u64 tag, status;
 	const char *op;
 
@@ -269,7 +270,7 @@ static irqreturn_t ps3disk_interrupt(int irq, void *data)
 	if (status) {
 		dev_dbg(&dev->sbd.core, "%s:%u: %s failed 0x%llx\n", __func__,
 			__LINE__, op, status);
-		error = -EIO;
+		error = BLK_STS_IOERR;
 	} else {
 		dev_dbg(&dev->sbd.core, "%s:%u: %s completed\n", __func__,
 			__LINE__, op);
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 454bf9c34882..3e8b43d792c2 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -2293,11 +2293,13 @@ static bool rbd_img_obj_end_request(struct rbd_obj_request *obj_request)
 		rbd_assert(img_request->obj_request != NULL);
 		more = obj_request->which < img_request->obj_request_count - 1;
 	} else {
+		blk_status_t status = errno_to_blk_status(result);
+
 		rbd_assert(img_request->rq != NULL);
 
-		more = blk_update_request(img_request->rq, result, xferred);
+		more = blk_update_request(img_request->rq, status, xferred);
 		if (!more)
-			__blk_mq_end_request(img_request->rq, result);
+			__blk_mq_end_request(img_request->rq, status);
 	}
 
 	return more;
@@ -4149,7 +4151,7 @@ err_rq:
 			 obj_op_name(op_type), length, offset, result);
 	ceph_put_snap_context(snapc);
 err:
-	blk_mq_end_request(rq, result);
+	blk_mq_end_request(rq, errno_to_blk_status(result));
 }
 
 static int rbd_queue_rq(struct blk_mq_hw_ctx *hctx,
diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c
index 27833e4dae2a..e6c526861703 100644
--- a/drivers/block/skd_main.c
+++ b/drivers/block/skd_main.c
@@ -451,8 +451,8 @@ static void skd_send_special_fitmsg(struct skd_device *skdev,
 				    struct skd_special_context *skspcl);
 static void skd_request_fn(struct request_queue *rq);
 static void skd_end_request(struct skd_device *skdev,
-			    struct skd_request_context *skreq, int error);
-static int skd_preop_sg_list(struct skd_device *skdev,
+		struct skd_request_context *skreq, blk_status_t status);
+static bool skd_preop_sg_list(struct skd_device *skdev,
 			     struct skd_request_context *skreq);
 static void skd_postop_sg_list(struct skd_device *skdev,
 			       struct skd_request_context *skreq);
@@ -491,7 +491,7 @@ static void skd_fail_all_pending(struct skd_device *skdev)
 		if (req == NULL)
 			break;
 		blk_start_request(req);
-		__blk_end_request_all(req, -EIO);
+		__blk_end_request_all(req, BLK_STS_IOERR);
 	}
 }
 
@@ -545,7 +545,6 @@ static void skd_request_fn(struct request_queue *q)
 	struct request *req = NULL;
 	struct skd_scsi_request *scsi_req;
 	unsigned long io_flags;
-	int error;
 	u32 lba;
 	u32 count;
 	int data_dir;
@@ -716,9 +715,7 @@ static void skd_request_fn(struct request_queue *q)
 		if (!req->bio)
 			goto skip_sg;
 
-		error = skd_preop_sg_list(skdev, skreq);
-
-		if (error != 0) {
+		if (!skd_preop_sg_list(skdev, skreq)) {
 			/*
 			 * Complete the native request with error.
 			 * Note that the request context is still at the
@@ -730,7 +727,7 @@ static void skd_request_fn(struct request_queue *q)
 			 */
 			pr_debug("%s:%s:%d error Out\n",
 				 skdev->name, __func__, __LINE__);
-			skd_end_request(skdev, skreq, error);
+			skd_end_request(skdev, skreq, BLK_STS_RESOURCE);
 			continue;
 		}
 
@@ -805,7 +802,7 @@ skip_sg:
 }
 
 static void skd_end_request(struct skd_device *skdev,
-			    struct skd_request_context *skreq, int error)
+		struct skd_request_context *skreq, blk_status_t error)
 {
 	if (unlikely(error)) {
 		struct request *req = skreq->req;
@@ -822,7 +819,7 @@ static void skd_end_request(struct skd_device *skdev,
 	__blk_end_request_all(skreq->req, error);
 }
 
-static int skd_preop_sg_list(struct skd_device *skdev,
+static bool skd_preop_sg_list(struct skd_device *skdev,
 			     struct skd_request_context *skreq)
 {
 	struct request *req = skreq->req;
@@ -839,7 +836,7 @@ static int skd_preop_sg_list(struct skd_device *skdev,
 
 	n_sg = blk_rq_map_sg(skdev->queue, req, sg);
 	if (n_sg <= 0)
-		return -EINVAL;
+		return false;
 
 	/*
 	 * Map scatterlist to PCI bus addresses.
@@ -847,7 +844,7 @@ static int skd_preop_sg_list(struct skd_device *skdev,
 	 */
 	n_sg = pci_map_sg(skdev->pdev, sg, n_sg, pci_dir);
 	if (n_sg <= 0)
-		return -EINVAL;
+		return false;
 
 	SKD_ASSERT(n_sg <= skdev->sgs_per_request);
 
@@ -882,7 +879,7 @@ static int skd_preop_sg_list(struct skd_device *skdev,
 		}
 	}
 
-	return 0;
+	return true;
 }
 
 static void skd_postop_sg_list(struct skd_device *skdev,
@@ -2333,7 +2330,7 @@ static void skd_resolve_req_exception(struct skd_device *skdev,
 	switch (skd_check_status(skdev, cmp_status, &skreq->err_info)) {
 	case SKD_CHECK_STATUS_REPORT_GOOD:
 	case SKD_CHECK_STATUS_REPORT_SMART_ALERT:
-		skd_end_request(skdev, skreq, 0);
+		skd_end_request(skdev, skreq, BLK_STS_OK);
 		break;
 
 	case SKD_CHECK_STATUS_BUSY_IMMINENT:
@@ -2355,7 +2352,7 @@ static void skd_resolve_req_exception(struct skd_device *skdev,
 
 	case SKD_CHECK_STATUS_REPORT_ERROR:
 	default:
-		skd_end_request(skdev, skreq, -EIO);
+		skd_end_request(skdev, skreq, BLK_STS_IOERR);
 		break;
 	}
 }
@@ -2748,7 +2745,7 @@ static int skd_isr_completion_posted(struct skd_device *skdev,
 			 * native request.
 			 */
 			if (likely(cmp_status == SAM_STAT_GOOD))
-				skd_end_request(skdev, skreq, 0);
+				skd_end_request(skdev, skreq, BLK_STS_OK);
 			else
 				skd_resolve_req_exception(skdev, skreq);
 		}
@@ -3190,7 +3187,7 @@ static void skd_recover_requests(struct skd_device *skdev, int requeue)
 			    SKD_MAX_RETRIES)
 				blk_requeue_request(skdev->queue, skreq->req);
 			else
-				skd_end_request(skdev, skreq, -EIO);
+				skd_end_request(skdev, skreq, BLK_STS_IOERR);
 
 			skreq->req = NULL;
 
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index 3f3a3ab3d50a..6b16ead1da58 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -316,7 +316,7 @@ static void vdc_end_one(struct vdc_port *port, struct vio_dring_state *dr,
 
 	rqe->req = NULL;
 
-	__blk_end_request(req, (desc->status ? -EIO : 0), desc->size);
+	__blk_end_request(req, (desc->status ? BLK_STS_IOERR : 0), desc->size);
 
 	vdc_blk_queue_start(port);
 }
@@ -1023,7 +1023,7 @@ static void vdc_queue_drain(struct vdc_port *port)
 	struct request *req;
 
 	while ((req = blk_fetch_request(port->disk->queue)) != NULL)
-		__blk_end_request_all(req, -EIO);
+		__blk_end_request_all(req, BLK_STS_IOERR);
 }
 
 static void vdc_ldc_reset_timer(unsigned long _arg)
diff --git a/drivers/block/swim.c b/drivers/block/swim.c
index 3064be6cf375..1633aaf24060 100644
--- a/drivers/block/swim.c
+++ b/drivers/block/swim.c
@@ -493,7 +493,7 @@ static inline int swim_read_sector(struct floppy_state *fs,
 	return ret;
 }
 
-static int floppy_read_sectors(struct floppy_state *fs,
+static blk_status_t floppy_read_sectors(struct floppy_state *fs,
 			       int req_sector, int sectors_nb,
 			       unsigned char *buffer)
 {
@@ -516,7 +516,7 @@ static int floppy_read_sectors(struct floppy_state *fs,
 			ret = swim_read_sector(fs, side, track, sector,
 						buffer);
 			if (try-- == 0)
-				return -EIO;
+				return BLK_STS_IOERR;
 		} while (ret != 512);
 
 		buffer += ret;
@@ -553,7 +553,7 @@ static void do_fd_request(struct request_queue *q)
 
 	req = swim_next_request(swd);
 	while (req) {
-		int err = -EIO;
+		blk_status_t err = BLK_STS_IOERR;
 
 		fs = req->rq_disk->private_data;
 		if (blk_rq_pos(req) >= fs->total_secs)
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index ba4809c9bdba..c7953860ce91 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -257,7 +257,7 @@ static unsigned int floppy_check_events(struct gendisk *disk,
 					unsigned int clearing);
 static int floppy_revalidate(struct gendisk *disk);
 
-static bool swim3_end_request(struct floppy_state *fs, int err, unsigned int nr_bytes)
+static bool swim3_end_request(struct floppy_state *fs, blk_status_t err, unsigned int nr_bytes)
 {
 	struct request *req = fs->cur_req;
 	int rc;
@@ -334,7 +334,7 @@ static void start_request(struct floppy_state *fs)
 		if (fs->mdev->media_bay &&
 		    check_media_bay(fs->mdev->media_bay) != MB_FD) {
 			swim3_dbg("%s", "  media bay absent, dropping req\n");
-			swim3_end_request(fs, -ENODEV, 0);
+			swim3_end_request(fs, BLK_STS_IOERR, 0);
 			continue;
 		}
 
@@ -350,12 +350,12 @@ static void start_request(struct floppy_state *fs)
 		if (blk_rq_pos(req) >= fs->total_secs) {
 			swim3_dbg("  pos out of bounds (%ld, max is %ld)\n",
 				  (long)blk_rq_pos(req), (long)fs->total_secs);
-			swim3_end_request(fs, -EIO, 0);
+			swim3_end_request(fs, BLK_STS_IOERR, 0);
 			continue;
 		}
 		if (fs->ejected) {
 			swim3_dbg("%s", "  disk ejected\n");
-			swim3_end_request(fs, -EIO, 0);
+			swim3_end_request(fs, BLK_STS_IOERR, 0);
 			continue;
 		}
 
@@ -364,7 +364,7 @@ static void start_request(struct floppy_state *fs)
 				fs->write_prot = swim3_readbit(fs, WRITE_PROT);
 			if (fs->write_prot) {
 				swim3_dbg("%s", "  try to write, disk write protected\n");
-				swim3_end_request(fs, -EIO, 0);
+				swim3_end_request(fs, BLK_STS_IOERR, 0);
 				continue;
 			}
 		}
@@ -548,7 +548,7 @@ static void act(struct floppy_state *fs)
 				if (fs->retries > 5) {
 					swim3_err("Wrong cylinder in transfer, want: %d got %d\n",
 						  fs->req_cyl, fs->cur_cyl);
-					swim3_end_request(fs, -EIO, 0);
+					swim3_end_request(fs, BLK_STS_IOERR, 0);
 					fs->state = idle;
 					return;
 				}
@@ -584,7 +584,7 @@ static void scan_timeout(unsigned long data)
 	out_8(&sw->intr_enable, 0);
 	fs->cur_cyl = -1;
 	if (fs->retries > 5) {
-		swim3_end_request(fs, -EIO, 0);
+		swim3_end_request(fs, BLK_STS_IOERR, 0);
 		fs->state = idle;
 		start_request(fs);
 	} else {
@@ -608,7 +608,7 @@ static void seek_timeout(unsigned long data)
 	out_8(&sw->select, RELAX);
 	out_8(&sw->intr_enable, 0);
 	swim3_err("%s", "Seek timeout\n");
-	swim3_end_request(fs, -EIO, 0);
+	swim3_end_request(fs, BLK_STS_IOERR, 0);
 	fs->state = idle;
 	start_request(fs);
 	spin_unlock_irqrestore(&swim3_lock, flags);
@@ -637,7 +637,7 @@ static void settle_timeout(unsigned long data)
 		goto unlock;
 	}
 	swim3_err("%s", "Seek settle timeout\n");
-	swim3_end_request(fs, -EIO, 0);
+	swim3_end_request(fs, BLK_STS_IOERR, 0);
 	fs->state = idle;
 	start_request(fs);
  unlock:
@@ -666,7 +666,7 @@ static void xfer_timeout(unsigned long data)
 	swim3_err("Timeout %sing sector %ld\n",
 	       (rq_data_dir(fs->cur_req)==WRITE? "writ": "read"),
 	       (long)blk_rq_pos(fs->cur_req));
-	swim3_end_request(fs, -EIO, 0);
+	swim3_end_request(fs, BLK_STS_IOERR, 0);
 	fs->state = idle;
 	start_request(fs);
 	spin_unlock_irqrestore(&swim3_lock, flags);
@@ -703,7 +703,7 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id)
 				swim3_err("%s", "Seen sector but cyl=ff?\n");
 				fs->cur_cyl = -1;
 				if (fs->retries > 5) {
-					swim3_end_request(fs, -EIO, 0);
+					swim3_end_request(fs, BLK_STS_IOERR, 0);
 					fs->state = idle;
 					start_request(fs);
 				} else {
@@ -786,7 +786,7 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id)
 				swim3_err("Error %sing block %ld (err=%x)\n",
 				       rq_data_dir(req) == WRITE? "writ": "read",
 				       (long)blk_rq_pos(req), err);
-				swim3_end_request(fs, -EIO, 0);
+				swim3_end_request(fs, BLK_STS_IOERR, 0);
 				fs->state = idle;
 			}
 		} else {
@@ -795,7 +795,7 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id)
 				swim3_err("fd dma error: stat=%x resid=%d\n", stat, resid);
 				swim3_err("  state=%d, dir=%x, intr=%x, err=%x\n",
 					  fs->state, rq_data_dir(req), intr, err);
-				swim3_end_request(fs, -EIO, 0);
+				swim3_end_request(fs, BLK_STS_IOERR, 0);
 				fs->state = idle;
 				start_request(fs);
 				break;
diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
index c8e072caf56f..08586dc14e85 100644
--- a/drivers/block/sx8.c
+++ b/drivers/block/sx8.c
@@ -745,7 +745,7 @@ static unsigned int carm_fill_get_fw_ver(struct carm_host *host,
 
 static inline void carm_end_request_queued(struct carm_host *host,
 					   struct carm_request *crq,
-					   int error)
+					   blk_status_t error)
 {
 	struct request *req = crq->rq;
 	int rc;
@@ -791,7 +791,7 @@ static inline void carm_round_robin(struct carm_host *host)
 }
 
 static inline void carm_end_rq(struct carm_host *host, struct carm_request *crq,
-			       int error)
+			       blk_status_t error)
 {
 	carm_end_request_queued(host, crq, error);
 	if (max_queue == 1)
@@ -869,14 +869,14 @@ queue_one_request:
 	sg = &crq->sg[0];
 	n_elem = blk_rq_map_sg(q, rq, sg);
 	if (n_elem <= 0) {
-		carm_end_rq(host, crq, -EIO);
+		carm_end_rq(host, crq, BLK_STS_IOERR);
 		return;		/* request with no s/g entries? */
 	}
 
 	/* map scatterlist to PCI bus addresses */
 	n_elem = pci_map_sg(host->pdev, sg, n_elem, pci_dir);
 	if (n_elem <= 0) {
-		carm_end_rq(host, crq, -EIO);
+		carm_end_rq(host, crq, BLK_STS_IOERR);
 		return;		/* request with no s/g entries? */
 	}
 	crq->n_elem = n_elem;
@@ -937,7 +937,7 @@ queue_one_request:
 
 static void carm_handle_array_info(struct carm_host *host,
 				   struct carm_request *crq, u8 *mem,
-				   int error)
+				   blk_status_t error)
 {
 	struct carm_port *port;
 	u8 *msg_data = mem + sizeof(struct carm_array_info);
@@ -997,7 +997,7 @@ out:
 
 static void carm_handle_scan_chan(struct carm_host *host,
 				  struct carm_request *crq, u8 *mem,
-				  int error)
+				  blk_status_t error)
 {
 	u8 *msg_data = mem + IOC_SCAN_CHAN_OFFSET;
 	unsigned int i, dev_count = 0;
@@ -1029,7 +1029,7 @@ out:
 }
 
 static void carm_handle_generic(struct carm_host *host,
-				struct carm_request *crq, int error,
+				struct carm_request *crq, blk_status_t error,
 				int cur_state, int next_state)
 {
 	DPRINTK("ENTER\n");
@@ -1045,7 +1045,7 @@ static void carm_handle_generic(struct carm_host *host,
 }
 
 static inline void carm_handle_rw(struct carm_host *host,
-				  struct carm_request *crq, int error)
+				  struct carm_request *crq, blk_status_t error)
 {
 	int pci_dir;
 
@@ -1067,7 +1067,7 @@ static inline void carm_handle_resp(struct carm_host *host,
 	u32 handle = le32_to_cpu(ret_handle_le);
 	unsigned int msg_idx;
 	struct carm_request *crq;
-	int error = (status == RMSG_OK) ? 0 : -EIO;
+	blk_status_t error = (status == RMSG_OK) ? 0 : BLK_STS_IOERR;
 	u8 *mem;
 
 	VPRINTK("ENTER, handle == 0x%x\n", handle);
@@ -1155,7 +1155,7 @@ static inline void carm_handle_resp(struct carm_host *host,
 err_out:
 	printk(KERN_WARNING DRV_NAME "(%s): BUG: unhandled message type %d/%d\n",
 	       pci_name(host->pdev), crq->msg_type, crq->msg_subtype);
-	carm_end_rq(host, crq, -EIO);
+	carm_end_rq(host, crq, BLK_STS_IOERR);
 }
 
 static inline void carm_handle_responses(struct carm_host *host)
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 553cc4c542b4..205b74d70efc 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -64,15 +64,15 @@ struct virtblk_req {
 	struct scatterlist sg[];
 };
 
-static inline int virtblk_result(struct virtblk_req *vbr)
+static inline blk_status_t virtblk_result(struct virtblk_req *vbr)
 {
 	switch (vbr->status) {
 	case VIRTIO_BLK_S_OK:
-		return 0;
+		return BLK_STS_OK;
 	case VIRTIO_BLK_S_UNSUPP:
-		return -ENOTTY;
+		return BLK_STS_NOTSUPP;
 	default:
-		return -EIO;
+		return BLK_STS_IOERR;
 	}
 }
 
@@ -307,7 +307,7 @@ static int virtblk_get_id(struct gendisk *disk, char *id_str)
 		goto out;
 
 	blk_execute_rq(vblk->disk->queue, vblk->disk, req, false);
-	err = virtblk_result(blk_mq_rq_to_pdu(req));
+	err = blk_status_to_errno(virtblk_result(blk_mq_rq_to_pdu(req)));
 out:
 	blk_put_request(req);
 	return err;
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 39459631667c..aedc3c759273 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -1601,14 +1601,18 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 			continue;
 		}
 
-		blkif_req(req)->error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO;
+		if (bret->status == BLKIF_RSP_OKAY)
+			blkif_req(req)->error = BLK_STS_OK;
+		else
+			blkif_req(req)->error = BLK_STS_IOERR;
+
 		switch (bret->operation) {
 		case BLKIF_OP_DISCARD:
 			if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
 				struct request_queue *rq = info->rq;
 				printk(KERN_WARNING "blkfront: %s: %s op failed\n",
 					   info->gd->disk_name, op_name(bret->operation));
-				blkif_req(req)->error = -EOPNOTSUPP;
+				blkif_req(req)->error = BLK_STS_NOTSUPP;
 				info->feature_discard = 0;
 				info->feature_secdiscard = 0;
 				queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
@@ -1626,11 +1630,11 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 				     rinfo->shadow[id].req.u.rw.nr_segments == 0)) {
 				printk(KERN_WARNING "blkfront: %s: empty %s op failed\n",
 				       info->gd->disk_name, op_name(bret->operation));
-				blkif_req(req)->error = -EOPNOTSUPP;
+				blkif_req(req)->error = BLK_STS_NOTSUPP;
 			}
 			if (unlikely(blkif_req(req)->error)) {
-				if (blkif_req(req)->error == -EOPNOTSUPP)
-					blkif_req(req)->error = 0;
+				if (blkif_req(req)->error == BLK_STS_NOTSUPP)
+					blkif_req(req)->error = BLK_STS_OK;
 				info->feature_fua = 0;
 				info->feature_flush = 0;
 				xlvbd_flush(info);
@@ -2137,7 +2141,7 @@ static int blkfront_resume(struct xenbus_device *dev)
 			merge_bio.tail = shadow[j].request->biotail;
 			bio_list_merge(&info->bio_list, &merge_bio);
 			shadow[j].request->bio = NULL;
-			blk_mq_end_request(shadow[j].request, 0);
+			blk_mq_end_request(shadow[j].request, BLK_STS_OK);
 		}
 	}
 
diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c
index 757dce2147e0..977fdf066017 100644
--- a/drivers/block/xsysace.c
+++ b/drivers/block/xsysace.c
@@ -471,7 +471,7 @@ static struct request *ace_get_next_request(struct request_queue *q)
 		if (!blk_rq_is_passthrough(req))
 			break;
 		blk_start_request(req);
-		__blk_end_request_all(req, -EIO);
+		__blk_end_request_all(req, BLK_STS_IOERR);
 	}
 	return req;
 }
@@ -499,11 +499,11 @@ static void ace_fsm_dostate(struct ace_device *ace)
 
 		/* Drop all in-flight and pending requests */
 		if (ace->req) {
-			__blk_end_request_all(ace->req, -EIO);
+			__blk_end_request_all(ace->req, BLK_STS_IOERR);
 			ace->req = NULL;
 		}
 		while ((req = blk_fetch_request(ace->queue)) != NULL)
-			__blk_end_request_all(req, -EIO);
+			__blk_end_request_all(req, BLK_STS_IOERR);
 
 		/* Drop back to IDLE state and notify waiters */
 		ace->fsm_state = ACE_FSM_STATE_IDLE;
@@ -728,7 +728,7 @@ static void ace_fsm_dostate(struct ace_device *ace)
 		}
 
 		/* bio finished; is there another one? */
-		if (__blk_end_request_cur(ace->req, 0)) {
+		if (__blk_end_request_cur(ace->req, BLK_STS_OK)) {
 			/* dev_dbg(ace->dev, "next block; h=%u c=%u\n",
 			 *      blk_rq_sectors(ace->req),
 			 *      blk_rq_cur_sectors(ace->req));
diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c
index 968f9e52effa..41c95c9b2ab4 100644
--- a/drivers/block/z2ram.c
+++ b/drivers/block/z2ram.c
@@ -74,14 +74,14 @@ static void do_z2_request(struct request_queue *q)
 	while (req) {
 		unsigned long start = blk_rq_pos(req) << 9;
 		unsigned long len  = blk_rq_cur_bytes(req);
-		int err = 0;
+		blk_status_t err = BLK_STS_OK;
 
 		if (start + len > z2ram_size) {
 			pr_err(DEVICE_NAME ": bad access: block=%llu, "
 			       "count=%u\n",
 			       (unsigned long long)blk_rq_pos(req),
 			       blk_rq_cur_sectors(req));
-			err = -EIO;
+			err = BLK_STS_IOERR;
 			goto done;
 		}
 		while (len) {
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index 1372763a948f..53f8278e66f7 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -583,7 +583,8 @@ static int gdrom_set_interrupt_handlers(void)
  */
 static void gdrom_readdisk_dma(struct work_struct *work)
 {
-	int err, block, block_cnt;
+	int block, block_cnt;
+	blk_status_t err;
 	struct packet_command *read_command;
 	struct list_head *elem, *next;
 	struct request *req;
@@ -641,7 +642,7 @@ static void gdrom_readdisk_dma(struct work_struct *work)
 		__raw_writeb(1, GDROM_DMA_STATUS_REG);
 		wait_event_interruptible_timeout(request_queue,
 			gd.transfer == 0, GDROM_DEFAULT_TIMEOUT);
-		err = gd.transfer ? -EIO : 0;
+		err = gd.transfer ? BLK_STS_IOERR : BLK_STS_OK;
 		gd.transfer = 0;
 		gd.pending = 0;
 		/* now seek to take the request spinlock
@@ -670,11 +671,11 @@ static void gdrom_request(struct request_queue *rq)
 			break;
 		case REQ_OP_WRITE:
 			pr_notice("Read only device - write request ignored\n");
-			__blk_end_request_all(req, -EIO);
+			__blk_end_request_all(req, BLK_STS_IOERR);
 			break;
 		default:
 			printk(KERN_DEBUG "gdrom: Non-fs request ignored\n");
-			__blk_end_request_all(req, -EIO);
+			__blk_end_request_all(req, BLK_STS_IOERR);
 			break;
 		}
 	}
diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 5901937284e7..d7a49dcfa85e 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -273,7 +273,7 @@ void ide_retry_pc(ide_drive_t *drive)
 	ide_requeue_and_plug(drive, failed_rq);
 	if (ide_queue_sense_rq(drive, pc)) {
 		blk_start_request(failed_rq);
-		ide_complete_rq(drive, -EIO, blk_rq_bytes(failed_rq));
+		ide_complete_rq(drive, BLK_STS_IOERR, blk_rq_bytes(failed_rq));
 	}
 }
 EXPORT_SYMBOL_GPL(ide_retry_pc);
@@ -437,7 +437,8 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 
 	/* No more interrupts */
 	if ((stat & ATA_DRQ) == 0) {
-		int uptodate, error;
+		int uptodate;
+		blk_status_t error;
 
 		debug_log("Packet command completed, %d bytes transferred\n",
 			  blk_rq_bytes(rq));
@@ -490,7 +491,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 
 		if (ata_misc_request(rq)) {
 			scsi_req(rq)->result = 0;
-			error = 0;
+			error = BLK_STS_OK;
 		} else {
 
 			if (blk_rq_is_passthrough(rq) && uptodate <= 0) {
@@ -498,7 +499,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 					scsi_req(rq)->result = -EIO;
 			}
 
-			error = uptodate ? 0 : -EIO;
+			error = uptodate ? BLK_STS_OK : BLK_STS_IOERR;
 		}
 
 		ide_complete_rq(drive, error, blk_rq_bytes(rq));
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 07e5ff3a64c3..d55e44ed82b5 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -228,7 +228,7 @@ static void ide_cd_complete_failed_rq(ide_drive_t *drive, struct request *rq)
 		scsi_req(failed)->sense_len = scsi_req(rq)->sense_len;
 		cdrom_analyze_sense_data(drive, failed);
 
-		if (ide_end_rq(drive, failed, -EIO, blk_rq_bytes(failed)))
+		if (ide_end_rq(drive, failed, BLK_STS_IOERR, blk_rq_bytes(failed)))
 			BUG();
 	} else
 		cdrom_analyze_sense_data(drive, NULL);
@@ -508,7 +508,7 @@ static bool ide_cd_error_cmd(ide_drive_t *drive, struct ide_cmd *cmd)
 		nr_bytes -= cmd->last_xfer_len;
 
 	if (nr_bytes > 0) {
-		ide_complete_rq(drive, 0, nr_bytes);
+		ide_complete_rq(drive, BLK_STS_OK, nr_bytes);
 		return true;
 	}
 
@@ -674,7 +674,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive)
 out_end:
 	if (blk_rq_is_scsi(rq) && rc == 0) {
 		scsi_req(rq)->resid_len = 0;
-		blk_end_request_all(rq, 0);
+		blk_end_request_all(rq, BLK_STS_OK);
 		hwif->rq = NULL;
 	} else {
 		if (sense && uptodate)
@@ -699,7 +699,7 @@ out_end:
 				scsi_req(rq)->resid_len += cmd->last_xfer_len;
 		}
 
-		ide_complete_rq(drive, uptodate ? 0 : -EIO, blk_rq_bytes(rq));
+		ide_complete_rq(drive, uptodate ? BLK_STS_OK : BLK_STS_IOERR, blk_rq_bytes(rq));
 
 		if (sense && rc == 2)
 			ide_error(drive, "request sense failure", stat);
@@ -844,7 +844,7 @@ out_end:
 	if (nsectors == 0)
 		nsectors = 1;
 
-	ide_complete_rq(drive, uptodate ? 0 : -EIO, nsectors << 9);
+	ide_complete_rq(drive, uptodate ? BLK_STS_OK : BLK_STS_IOERR, nsectors << 9);
 
 	return ide_stopped;
 }
diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c
index 51c81223e56d..54d4d78ca46a 100644
--- a/drivers/ide/ide-dma.c
+++ b/drivers/ide/ide-dma.c
@@ -104,7 +104,7 @@ ide_startstop_t ide_dma_intr(ide_drive_t *drive)
 			if ((cmd->tf_flags & IDE_TFLAG_FS) == 0)
 				ide_finish_cmd(drive, cmd, stat);
 			else
-				ide_complete_rq(drive, 0,
+				ide_complete_rq(drive, BLK_STS_OK,
 						blk_rq_sectors(cmd->rq) << 9);
 			return ide_stopped;
 		}
diff --git a/drivers/ide/ide-eh.c b/drivers/ide/ide-eh.c
index 4b7ffd7d158d..47d5f3379748 100644
--- a/drivers/ide/ide-eh.c
+++ b/drivers/ide/ide-eh.c
@@ -135,7 +135,7 @@ ide_startstop_t ide_error(ide_drive_t *drive, const char *msg, u8 stat)
 			return ide_stopped;
 		}
 		scsi_req(rq)->result = err;
-		ide_complete_rq(drive, err ? -EIO : 0, blk_rq_bytes(rq));
+		ide_complete_rq(drive, err ? BLK_STS_IOERR : BLK_STS_OK, blk_rq_bytes(rq));
 		return ide_stopped;
 	}
 
@@ -143,7 +143,7 @@ ide_startstop_t ide_error(ide_drive_t *drive, const char *msg, u8 stat)
 }
 EXPORT_SYMBOL_GPL(ide_error);
 
-static inline void ide_complete_drive_reset(ide_drive_t *drive, int err)
+static inline void ide_complete_drive_reset(ide_drive_t *drive, blk_status_t err)
 {
 	struct request *rq = drive->hwif->rq;
 
@@ -151,7 +151,7 @@ static inline void ide_complete_drive_reset(ide_drive_t *drive, int err)
 	    scsi_req(rq)->cmd[0] == REQ_DRIVE_RESET) {
 		if (err <= 0 && scsi_req(rq)->result == 0)
 			scsi_req(rq)->result = -EIO;
-		ide_complete_rq(drive, err ? err : 0, blk_rq_bytes(rq));
+		ide_complete_rq(drive, err, blk_rq_bytes(rq));
 	}
 }
 
@@ -191,7 +191,7 @@ static ide_startstop_t atapi_reset_pollfunc(ide_drive_t *drive)
 	}
 	/* done polling */
 	hwif->polling = 0;
-	ide_complete_drive_reset(drive, 0);
+	ide_complete_drive_reset(drive, BLK_STS_OK);
 	return ide_stopped;
 }
 
@@ -225,7 +225,7 @@ static ide_startstop_t reset_pollfunc(ide_drive_t *drive)
 	ide_hwif_t *hwif = drive->hwif;
 	const struct ide_port_ops *port_ops = hwif->port_ops;
 	u8 tmp;
-	int err = 0;
+	blk_status_t err = BLK_STS_OK;
 
 	if (port_ops && port_ops->reset_poll) {
 		err = port_ops->reset_poll(drive);
@@ -247,7 +247,7 @@ static ide_startstop_t reset_pollfunc(ide_drive_t *drive)
 		printk(KERN_ERR "%s: reset timed-out, status=0x%02x\n",
 			hwif->name, tmp);
 		drive->failures++;
-		err = -EIO;
+		err = BLK_STS_IOERR;
 	} else  {
 		tmp = ide_read_error(drive);
 
@@ -257,7 +257,7 @@ static ide_startstop_t reset_pollfunc(ide_drive_t *drive)
 		} else {
 			ide_reset_report_error(hwif, tmp);
 			drive->failures++;
-			err = -EIO;
+			err = BLK_STS_IOERR;
 		}
 	}
 out:
@@ -392,7 +392,7 @@ static ide_startstop_t do_reset1(ide_drive_t *drive, int do_not_try_atapi)
 
 	if (io_ports->ctl_addr == 0) {
 		spin_unlock_irqrestore(&hwif->lock, flags);
-		ide_complete_drive_reset(drive, -ENXIO);
+		ide_complete_drive_reset(drive, BLK_STS_IOERR);
 		return ide_stopped;
 	}
 
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 8ac6048cd2df..627b1f62a749 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -143,7 +143,7 @@ static ide_startstop_t ide_floppy_issue_pc(ide_drive_t *drive,
 
 		drive->failed_pc = NULL;
 		drive->pc_callback(drive, 0);
-		ide_complete_rq(drive, -EIO, done);
+		ide_complete_rq(drive, BLK_STS_IOERR, done);
 		return ide_stopped;
 	}
 
@@ -248,7 +248,7 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
 
 		if (ata_misc_request(rq)) {
 			scsi_req(rq)->result = 0;
-			ide_complete_rq(drive, 0, blk_rq_bytes(rq));
+			ide_complete_rq(drive, BLK_STS_OK, blk_rq_bytes(rq));
 			return ide_stopped;
 		} else
 			goto out_end;
@@ -303,7 +303,7 @@ out_end:
 	drive->failed_pc = NULL;
 	if (blk_rq_is_passthrough(rq) && scsi_req(rq)->result == 0)
 		scsi_req(rq)->result = -EIO;
-	ide_complete_rq(drive, -EIO, blk_rq_bytes(rq));
+	ide_complete_rq(drive, BLK_STS_IOERR, blk_rq_bytes(rq));
 	return ide_stopped;
 }
 
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 323af721f8cb..3a234701d92c 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -54,7 +54,7 @@
 #include <linux/uaccess.h>
 #include <asm/io.h>
 
-int ide_end_rq(ide_drive_t *drive, struct request *rq, int error,
+int ide_end_rq(ide_drive_t *drive, struct request *rq, blk_status_t error,
 	       unsigned int nr_bytes)
 {
 	/*
@@ -112,7 +112,7 @@ void ide_complete_cmd(ide_drive_t *drive, struct ide_cmd *cmd, u8 stat, u8 err)
 	}
 }
 
-int ide_complete_rq(ide_drive_t *drive, int error, unsigned int nr_bytes)
+int ide_complete_rq(ide_drive_t *drive, blk_status_t error, unsigned int nr_bytes)
 {
 	ide_hwif_t *hwif = drive->hwif;
 	struct request *rq = hwif->rq;
@@ -122,7 +122,7 @@ int ide_complete_rq(ide_drive_t *drive, int error, unsigned int nr_bytes)
 	 * if failfast is set on a request, override number of sectors
 	 * and complete the whole request right now
 	 */
-	if (blk_noretry_request(rq) && error <= 0)
+	if (blk_noretry_request(rq) && error)
 		nr_bytes = blk_rq_sectors(rq) << 9;
 
 	rc = ide_end_rq(drive, rq, error, nr_bytes);
@@ -149,7 +149,7 @@ void ide_kill_rq(ide_drive_t *drive, struct request *rq)
 			scsi_req(rq)->result = -EIO;
 	}
 
-	ide_complete_rq(drive, -EIO, blk_rq_bytes(rq));
+	ide_complete_rq(drive, BLK_STS_IOERR, blk_rq_bytes(rq));
 }
 
 static void ide_tf_set_specify_cmd(ide_drive_t *drive, struct ide_taskfile *tf)
@@ -272,7 +272,7 @@ static ide_startstop_t execute_drive_cmd (ide_drive_t *drive,
  	printk("%s: DRIVE_CMD (null)\n", drive->name);
 #endif
 	scsi_req(rq)->result = 0;
-	ide_complete_rq(drive, 0, blk_rq_bytes(rq));
+	ide_complete_rq(drive, BLK_STS_OK, blk_rq_bytes(rq));
 
  	return ide_stopped;
 }
diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c
index 0977fc1f40ce..08b54bb3b705 100644
--- a/drivers/ide/ide-pm.c
+++ b/drivers/ide/ide-pm.c
@@ -40,7 +40,7 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg)
 	return ret;
 }
 
-static void ide_end_sync_rq(struct request *rq, int error)
+static void ide_end_sync_rq(struct request *rq, blk_status_t error)
 {
 	complete(rq->end_io_data);
 }
@@ -57,7 +57,7 @@ static int ide_pm_execute_rq(struct request *rq)
 	if (unlikely(blk_queue_dying(q))) {
 		rq->rq_flags |= RQF_QUIET;
 		scsi_req(rq)->result = -ENXIO;
-		__blk_end_request_all(rq, 0);
+		__blk_end_request_all(rq, BLK_STS_OK);
 		spin_unlock_irq(q->queue_lock);
 		return -ENXIO;
 	}
@@ -235,7 +235,7 @@ void ide_complete_pm_rq(ide_drive_t *drive, struct request *rq)
 
 	drive->hwif->rq = NULL;
 
-	if (blk_end_request(rq, 0, 0))
+	if (blk_end_request(rq, BLK_STS_OK, 0))
 		BUG();
 }
 
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index a0651f948b76..4d062c568777 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -474,7 +474,7 @@ static ide_startstop_t ide_tape_issue_pc(ide_drive_t *drive,
 
 		drive->failed_pc = NULL;
 		drive->pc_callback(drive, 0);
-		ide_complete_rq(drive, -EIO, blk_rq_bytes(rq));
+		ide_complete_rq(drive, BLK_STS_IOERR, blk_rq_bytes(rq));
 		return ide_stopped;
 	}
 	ide_debug_log(IDE_DBG_SENSE, "retry #%d, cmd: 0x%02x", pc->retries,
diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
index d71199d23c9e..ab1a32cdcb0a 100644
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -318,7 +318,7 @@ static void ide_error_cmd(ide_drive_t *drive, struct ide_cmd *cmd)
 		}
 
 		if (nr_bytes > 0)
-			ide_complete_rq(drive, 0, nr_bytes);
+			ide_complete_rq(drive, BLK_STS_OK, nr_bytes);
 	}
 }
 
@@ -336,7 +336,7 @@ void ide_finish_cmd(ide_drive_t *drive, struct ide_cmd *cmd, u8 stat)
 		ide_driveid_update(drive);
 	}
 
-	ide_complete_rq(drive, err ? -EIO : 0, blk_rq_bytes(rq));
+	ide_complete_rq(drive, err ? BLK_STS_IOERR : BLK_STS_OK, blk_rq_bytes(rq));
 }
 
 /*
@@ -394,7 +394,7 @@ out_end:
 	if ((cmd->tf_flags & IDE_TFLAG_FS) == 0)
 		ide_finish_cmd(drive, cmd, stat);
 	else
-		ide_complete_rq(drive, 0, blk_rq_sectors(cmd->rq) << 9);
+		ide_complete_rq(drive, BLK_STS_OK, blk_rq_sectors(cmd->rq) << 9);
 	return ide_stopped;
 out_err:
 	ide_error_cmd(drive, cmd);
diff --git a/drivers/ide/siimage.c b/drivers/ide/siimage.c
index 6a1849bb476c..57eea5a9047f 100644
--- a/drivers/ide/siimage.c
+++ b/drivers/ide/siimage.c
@@ -406,7 +406,7 @@ static int siimage_dma_test_irq(ide_drive_t *drive)
  *	yet.
  */
 
-static int sil_sata_reset_poll(ide_drive_t *drive)
+static blk_status_t sil_sata_reset_poll(ide_drive_t *drive)
 {
 	ide_hwif_t *hwif = drive->hwif;
 	void __iomem *sata_status_addr
@@ -419,11 +419,11 @@ static int sil_sata_reset_poll(ide_drive_t *drive)
 		if ((sata_stat & 0x03) != 0x03) {
 			printk(KERN_WARNING "%s: reset phy dead, status=0x%08x\n",
 					    hwif->name, sata_stat);
-			return -ENXIO;
+			return BLK_STS_IOERR;
 		}
 	}
 
-	return 0;
+	return BLK_STS_OK;
 }
 
 /**
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index ceeeb495d01c..39262e344ae1 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -1449,22 +1449,15 @@ static void activate_path_work(struct work_struct *work)
 	activate_or_offline_path(pgpath);
 }
 
-static int noretry_error(int error)
+static int noretry_error(blk_status_t error)
 {
 	switch (error) {
-	case -EBADE:
-		/*
-		 * EBADE signals an reservation conflict.
-		 * We shouldn't fail the path here as we can communicate with
-		 * the target.  We should failover to the next path, but in
-		 * doing so we might be causing a ping-pong between paths.
-		 * So just return the reservation conflict error.
-		 */
-	case -EOPNOTSUPP:
-	case -EREMOTEIO:
-	case -EILSEQ:
-	case -ENODATA:
-	case -ENOSPC:
+	case BLK_STS_NOTSUPP:
+	case BLK_STS_NOSPC:
+	case BLK_STS_TARGET:
+	case BLK_STS_NEXUS:
+	case BLK_STS_MEDIUM:
+	case BLK_STS_RESOURCE:
 		return 1;
 	}
 
@@ -1473,7 +1466,7 @@ static int noretry_error(int error)
 }
 
 static int multipath_end_io(struct dm_target *ti, struct request *clone,
-			    int error, union map_info *map_context)
+			    blk_status_t error, union map_info *map_context)
 {
 	struct dm_mpath_io *mpio = get_mpio(map_context);
 	struct pgpath *pgpath = mpio->pgpath;
@@ -1500,7 +1493,7 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone,
 
 		if (atomic_read(&m->nr_valid_paths) == 0 &&
 		    !test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
-			if (error == -EIO)
+			if (error == BLK_STS_IOERR)
 				dm_report_EIO(m);
 			/* complete with the original error */
 			r = DM_ENDIO_DONE;
@@ -1525,7 +1518,7 @@ static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone, int *er
 	unsigned long flags;
 	int r = DM_ENDIO_DONE;
 
-	if (!*error || noretry_error(*error))
+	if (!*error || noretry_error(errno_to_blk_status(*error)))
 		goto done;
 
 	if (pgpath)
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index b639fa7246ee..bee334389173 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -119,7 +119,7 @@ static void end_clone_bio(struct bio *clone)
 	struct dm_rq_target_io *tio = info->tio;
 	struct bio *bio = info->orig;
 	unsigned int nr_bytes = info->orig->bi_iter.bi_size;
-	int error = clone->bi_error;
+	blk_status_t error = errno_to_blk_status(clone->bi_error);
 
 	bio_put(clone);
 
@@ -158,7 +158,7 @@ static void end_clone_bio(struct bio *clone)
 	 * Do not use blk_end_request() here, because it may complete
 	 * the original request before the clone, and break the ordering.
 	 */
-	blk_update_request(tio->orig, 0, nr_bytes);
+	blk_update_request(tio->orig, BLK_STS_OK, nr_bytes);
 }
 
 static struct dm_rq_target_io *tio_from_request(struct request *rq)
@@ -216,7 +216,7 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
  * Must be called without clone's queue lock held,
  * see end_clone_request() for more details.
  */
-static void dm_end_request(struct request *clone, int error)
+static void dm_end_request(struct request *clone, blk_status_t error)
 {
 	int rw = rq_data_dir(clone);
 	struct dm_rq_target_io *tio = clone->end_io_data;
@@ -285,7 +285,7 @@ static void dm_requeue_original_request(struct dm_rq_target_io *tio, bool delay_
 	rq_completed(md, rw, false);
 }
 
-static void dm_done(struct request *clone, int error, bool mapped)
+static void dm_done(struct request *clone, blk_status_t error, bool mapped)
 {
 	int r = DM_ENDIO_DONE;
 	struct dm_rq_target_io *tio = clone->end_io_data;
@@ -298,7 +298,7 @@ static void dm_done(struct request *clone, int error, bool mapped)
 			r = rq_end_io(tio->ti, clone, error, &tio->info);
 	}
 
-	if (unlikely(error == -EREMOTEIO)) {
+	if (unlikely(error == BLK_STS_TARGET)) {
 		if (req_op(clone) == REQ_OP_WRITE_SAME &&
 		    !clone->q->limits.max_write_same_sectors)
 			disable_write_same(tio->md);
@@ -358,7 +358,7 @@ static void dm_softirq_done(struct request *rq)
  * Complete the clone and the original request with the error status
  * through softirq context.
  */
-static void dm_complete_request(struct request *rq, int error)
+static void dm_complete_request(struct request *rq, blk_status_t error)
 {
 	struct dm_rq_target_io *tio = tio_from_request(rq);
 
@@ -375,7 +375,7 @@ static void dm_complete_request(struct request *rq, int error)
  * Target's rq_end_io() function isn't called.
  * This may be used when the target's map_rq() or clone_and_map_rq() functions fail.
  */
-static void dm_kill_unmapped_request(struct request *rq, int error)
+static void dm_kill_unmapped_request(struct request *rq, blk_status_t error)
 {
 	rq->rq_flags |= RQF_FAILED;
 	dm_complete_request(rq, error);
@@ -384,7 +384,7 @@ static void dm_kill_unmapped_request(struct request *rq, int error)
 /*
  * Called with the clone's queue lock held (in the case of .request_fn)
  */
-static void end_clone_request(struct request *clone, int error)
+static void end_clone_request(struct request *clone, blk_status_t error)
 {
 	struct dm_rq_target_io *tio = clone->end_io_data;
 
@@ -401,7 +401,7 @@ static void end_clone_request(struct request *clone, int error)
 
 static void dm_dispatch_clone_request(struct request *clone, struct request *rq)
 {
-	int r;
+	blk_status_t r;
 
 	if (blk_queue_io_stat(clone->q))
 		clone->rq_flags |= RQF_IO_STAT;
@@ -506,7 +506,7 @@ static int map_request(struct dm_rq_target_io *tio)
 		break;
 	case DM_MAPIO_KILL:
 		/* The target wants to complete the I/O */
-		dm_kill_unmapped_request(rq, -EIO);
+		dm_kill_unmapped_request(rq, BLK_STS_IOERR);
 		break;
 	default:
 		DMWARN("unimplemented target map return value: %d", r);
diff --git a/drivers/md/dm-rq.h b/drivers/md/dm-rq.h
index f0020d21b95f..9813922e4fe5 100644
--- a/drivers/md/dm-rq.h
+++ b/drivers/md/dm-rq.h
@@ -24,7 +24,7 @@ struct dm_rq_target_io {
 	struct dm_target *ti;
 	struct request *orig, *clone;
 	struct kthread_work work;
-	int error;
+	blk_status_t error;
 	union map_info info;
 	struct dm_stats_aux stats_aux;
 	unsigned long duration_jiffies;
diff --git a/drivers/memstick/core/ms_block.c b/drivers/memstick/core/ms_block.c
index 99e651c27fb7..22de7f5ed032 100644
--- a/drivers/memstick/core/ms_block.c
+++ b/drivers/memstick/core/ms_block.c
@@ -1921,12 +1921,13 @@ static void msb_io_work(struct work_struct *work)
 		spin_lock_irqsave(&msb->q_lock, flags);
 
 		if (len)
-			if (!__blk_end_request(msb->req, 0, len))
+			if (!__blk_end_request(msb->req, BLK_STS_OK, len))
 				msb->req = NULL;
 
 		if (error && msb->req) {
+			blk_status_t ret = errno_to_blk_status(error);
 			dbg_verbose("IO: ending one sector of the request with error");
-			if (!__blk_end_request(msb->req, error, msb->page_size))
+			if (!__blk_end_request(msb->req, ret, msb->page_size))
 				msb->req = NULL;
 		}
 
@@ -2014,7 +2015,7 @@ static void msb_submit_req(struct request_queue *q)
 		WARN_ON(!msb->io_queue_stopped);
 
 		while ((req = blk_fetch_request(q)) != NULL)
-			__blk_end_request_all(req, -ENODEV);
+			__blk_end_request_all(req, BLK_STS_IOERR);
 		return;
 	}
 
diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c
index c00d8a266878..8897962781bb 100644
--- a/drivers/memstick/core/mspro_block.c
+++ b/drivers/memstick/core/mspro_block.c
@@ -709,7 +709,8 @@ try_again:
 					       msb->req_sg);
 
 		if (!msb->seg_count) {
-			chunk = __blk_end_request_cur(msb->block_req, -ENOMEM);
+			chunk = __blk_end_request_cur(msb->block_req,
+					BLK_STS_RESOURCE);
 			continue;
 		}
 
@@ -776,7 +777,8 @@ static int mspro_block_complete_req(struct memstick_dev *card, int error)
 		if (error && !t_len)
 			t_len = blk_rq_cur_bytes(msb->block_req);
 
-		chunk = __blk_end_request(msb->block_req, error, t_len);
+		chunk = __blk_end_request(msb->block_req,
+				errno_to_blk_status(error), t_len);
 
 		error = mspro_block_issue_req(card, chunk);
 
@@ -838,7 +840,7 @@ static void mspro_block_submit_req(struct request_queue *q)
 
 	if (msb->eject) {
 		while ((req = blk_fetch_request(q)) != NULL)
-			__blk_end_request_all(req, -ENODEV);
+			__blk_end_request_all(req, BLK_STS_IOERR);
 
 		return;
 	}
diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index 8273b078686d..6ff94a948a4b 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -1184,9 +1184,10 @@ static void mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req)
 	struct mmc_card *card = md->queue.card;
 	unsigned int from, nr, arg;
 	int err = 0, type = MMC_BLK_DISCARD;
+	blk_status_t status = BLK_STS_OK;
 
 	if (!mmc_can_erase(card)) {
-		err = -EOPNOTSUPP;
+		status = BLK_STS_NOTSUPP;
 		goto fail;
 	}
 
@@ -1212,10 +1213,12 @@ static void mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req)
 		if (!err)
 			err = mmc_erase(card, from, nr, arg);
 	} while (err == -EIO && !mmc_blk_reset(md, card->host, type));
-	if (!err)
+	if (err)
+		status = BLK_STS_IOERR;
+	else
 		mmc_blk_reset_success(md, type);
 fail:
-	blk_end_request(req, err, blk_rq_bytes(req));
+	blk_end_request(req, status, blk_rq_bytes(req));
 }
 
 static void mmc_blk_issue_secdiscard_rq(struct mmc_queue *mq,
@@ -1225,9 +1228,10 @@ static void mmc_blk_issue_secdiscard_rq(struct mmc_queue *mq,
 	struct mmc_card *card = md->queue.card;
 	unsigned int from, nr, arg;
 	int err = 0, type = MMC_BLK_SECDISCARD;
+	blk_status_t status = BLK_STS_OK;
 
 	if (!(mmc_can_secure_erase_trim(card))) {
-		err = -EOPNOTSUPP;
+		status = BLK_STS_NOTSUPP;
 		goto out;
 	}
 
@@ -1254,8 +1258,10 @@ retry:
 	err = mmc_erase(card, from, nr, arg);
 	if (err == -EIO)
 		goto out_retry;
-	if (err)
+	if (err) {
+		status = BLK_STS_IOERR;
 		goto out;
+	}
 
 	if (arg == MMC_SECURE_TRIM1_ARG) {
 		if (card->quirks & MMC_QUIRK_INAND_CMD38) {
@@ -1270,8 +1276,10 @@ retry:
 		err = mmc_erase(card, from, nr, MMC_SECURE_TRIM2_ARG);
 		if (err == -EIO)
 			goto out_retry;
-		if (err)
+		if (err) {
+			status = BLK_STS_IOERR;
 			goto out;
+		}
 	}
 
 out_retry:
@@ -1280,7 +1288,7 @@ out_retry:
 	if (!err)
 		mmc_blk_reset_success(md, type);
 out:
-	blk_end_request(req, err, blk_rq_bytes(req));
+	blk_end_request(req, status, blk_rq_bytes(req));
 }
 
 static void mmc_blk_issue_flush(struct mmc_queue *mq, struct request *req)
@@ -1290,10 +1298,7 @@ static void mmc_blk_issue_flush(struct mmc_queue *mq, struct request *req)
 	int ret = 0;
 
 	ret = mmc_flush_cache(card);
-	if (ret)
-		ret = -EIO;
-
-	blk_end_request_all(req, ret);
+	blk_end_request_all(req, ret ? BLK_STS_IOERR : BLK_STS_OK);
 }
 
 /*
@@ -1641,7 +1646,7 @@ static void mmc_blk_rw_cmd_abort(struct mmc_queue *mq, struct mmc_card *card,
 {
 	if (mmc_card_removed(card))
 		req->rq_flags |= RQF_QUIET;
-	while (blk_end_request(req, -EIO, blk_rq_cur_bytes(req)));
+	while (blk_end_request(req, BLK_STS_IOERR, blk_rq_cur_bytes(req)));
 	mmc_queue_req_free(mq, mqrq);
 }
 
@@ -1661,7 +1666,7 @@ static void mmc_blk_rw_try_restart(struct mmc_queue *mq, struct request *req,
 	 */
 	if (mmc_card_removed(mq->card)) {
 		req->rq_flags |= RQF_QUIET;
-		blk_end_request_all(req, -EIO);
+		blk_end_request_all(req, BLK_STS_IOERR);
 		mmc_queue_req_free(mq, mqrq);
 		return;
 	}
@@ -1743,7 +1748,7 @@ static void mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *new_req)
 			 */
 			mmc_blk_reset_success(md, type);
 
-			req_pending = blk_end_request(old_req, 0,
+			req_pending = blk_end_request(old_req, BLK_STS_OK,
 						      brq->data.bytes_xfered);
 			/*
 			 * If the blk_end_request function returns non-zero even
@@ -1811,7 +1816,7 @@ static void mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *new_req)
 			 * time, so we only reach here after trying to
 			 * read a single sector.
 			 */
-			req_pending = blk_end_request(old_req, -EIO,
+			req_pending = blk_end_request(old_req, BLK_STS_IOERR,
 						      brq->data.blksz);
 			if (!req_pending) {
 				mmc_queue_req_free(mq, mq_rq);
@@ -1860,7 +1865,7 @@ void mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
 	ret = mmc_blk_part_switch(card, md);
 	if (ret) {
 		if (req) {
-			blk_end_request_all(req, -EIO);
+			blk_end_request_all(req, BLK_STS_IOERR);
 		}
 		goto out;
 	}
diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
index 5c37b6be3e7b..7f20298d892b 100644
--- a/drivers/mmc/core/queue.c
+++ b/drivers/mmc/core/queue.c
@@ -133,7 +133,7 @@ static void mmc_request_fn(struct request_queue *q)
 	if (!mq) {
 		while ((req = blk_fetch_request(q)) != NULL) {
 			req->rq_flags |= RQF_QUIET;
-			__blk_end_request_all(req, -EIO);
+			__blk_end_request_all(req, BLK_STS_IOERR);
 		}
 		return;
 	}
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 6b8d5cd7dbf6..91c17fba7659 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -73,7 +73,7 @@ static void blktrans_dev_put(struct mtd_blktrans_dev *dev)
 }
 
 
-static int do_blktrans_request(struct mtd_blktrans_ops *tr,
+static blk_status_t do_blktrans_request(struct mtd_blktrans_ops *tr,
 			       struct mtd_blktrans_dev *dev,
 			       struct request *req)
 {
@@ -84,33 +84,37 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr,
 	nsect = blk_rq_cur_bytes(req) >> tr->blkshift;
 	buf = bio_data(req->bio);
 
-	if (req_op(req) == REQ_OP_FLUSH)
-		return tr->flush(dev);
+	if (req_op(req) == REQ_OP_FLUSH) {
+		if (tr->flush(dev))
+			return BLK_STS_IOERR;
+		return BLK_STS_OK;
+	}
 
 	if (blk_rq_pos(req) + blk_rq_cur_sectors(req) >
 	    get_capacity(req->rq_disk))
-		return -EIO;
+		return BLK_STS_IOERR;
 
 	switch (req_op(req)) {
 	case REQ_OP_DISCARD:
-		return tr->discard(dev, block, nsect);
+		if (tr->discard(dev, block, nsect))
+			return BLK_STS_IOERR;
+		return BLK_STS_OK;
 	case REQ_OP_READ:
 		for (; nsect > 0; nsect--, block++, buf += tr->blksize)
 			if (tr->readsect(dev, block, buf))
-				return -EIO;
+				return BLK_STS_IOERR;
 		rq_flush_dcache_pages(req);
-		return 0;
+		return BLK_STS_OK;
 	case REQ_OP_WRITE:
 		if (!tr->writesect)
-			return -EIO;
+			return BLK_STS_IOERR;
 
 		rq_flush_dcache_pages(req);
 		for (; nsect > 0; nsect--, block++, buf += tr->blksize)
 			if (tr->writesect(dev, block, buf))
-				return -EIO;
-		return 0;
+				return BLK_STS_IOERR;
 	default:
-		return -EIO;
+		return BLK_STS_IOERR;
 	}
 }
 
@@ -132,7 +136,7 @@ static void mtd_blktrans_work(struct work_struct *work)
 	spin_lock_irq(rq->queue_lock);
 
 	while (1) {
-		int res;
+		blk_status_t res;
 
 		dev->bg_stop = false;
 		if (!req && !(req = blk_fetch_request(rq))) {
@@ -178,7 +182,7 @@ static void mtd_blktrans_request(struct request_queue *rq)
 
 	if (!dev)
 		while ((req = blk_fetch_request(rq)) != NULL)
-			__blk_end_request_all(req, -ENODEV);
+			__blk_end_request_all(req, BLK_STS_IOERR);
 	else
 		queue_work(dev->wq, &dev->work);
 }
diff --git a/drivers/mtd/ubi/block.c b/drivers/mtd/ubi/block.c
index 5497e65439df..3ecdb39d1985 100644
--- a/drivers/mtd/ubi/block.c
+++ b/drivers/mtd/ubi/block.c
@@ -313,7 +313,7 @@ static void ubiblock_do_work(struct work_struct *work)
 	ret = ubiblock_read(pdu);
 	rq_flush_dcache_pages(req);
 
-	blk_mq_end_request(req, ret);
+	blk_mq_end_request(req, errno_to_blk_status(ret));
 }
 
 static int ubiblock_queue_rq(struct blk_mq_hw_ctx *hctx,
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index a60926410438..07e95c7d837a 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -70,29 +70,21 @@ static DEFINE_SPINLOCK(dev_list_lock);
 
 static struct class *nvme_class;
 
-static int nvme_error_status(struct request *req)
+static blk_status_t nvme_error_status(struct request *req)
 {
 	switch (nvme_req(req)->status & 0x7ff) {
 	case NVME_SC_SUCCESS:
-		return 0;
+		return BLK_STS_OK;
 	case NVME_SC_CAP_EXCEEDED:
-		return -ENOSPC;
-	default:
-		return -EIO;
-
-	/*
-	 * XXX: these errors are a nasty side-band protocol to
-	 * drivers/md/dm-mpath.c:noretry_error() that aren't documented
-	 * anywhere..
-	 */
-	case NVME_SC_CMD_SEQ_ERROR:
-		return -EILSEQ;
+		return BLK_STS_NOSPC;
 	case NVME_SC_ONCS_NOT_SUPPORTED:
-		return -EOPNOTSUPP;
+		return BLK_STS_NOTSUPP;
 	case NVME_SC_WRITE_FAULT:
 	case NVME_SC_READ_ERROR:
 	case NVME_SC_UNWRITTEN_BLOCK:
-		return -ENODATA;
+		return BLK_STS_MEDIUM;
+	default:
+		return BLK_STS_IOERR;
 	}
 }
 
@@ -555,15 +547,16 @@ int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
 			result, timeout);
 }
 
-static void nvme_keep_alive_end_io(struct request *rq, int error)
+static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status)
 {
 	struct nvme_ctrl *ctrl = rq->end_io_data;
 
 	blk_mq_free_request(rq);
 
-	if (error) {
+	if (status) {
 		dev_err(ctrl->device,
-			"failed nvme_keep_alive_end_io error=%d\n", error);
+			"failed nvme_keep_alive_end_io error=%d\n",
+				status);
 		return;
 	}
 
diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index f3885b5e56bd..2d7a2889866f 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -480,7 +480,7 @@ static inline void nvme_nvm_rqtocmd(struct nvm_rq *rqd, struct nvme_ns *ns,
 					rqd->bio->bi_iter.bi_sector));
 }
 
-static void nvme_nvm_end_io(struct request *rq, int error)
+static void nvme_nvm_end_io(struct request *rq, blk_status_t status)
 {
 	struct nvm_rq *rqd = rq->end_io_data;
 
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index d52701df7245..819898428763 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -706,7 +706,7 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
 	if (ns && ns->ms && !blk_integrity_rq(req)) {
 		if (!(ns->pi_type && ns->ms == 8) &&
 		    !blk_rq_is_passthrough(req)) {
-			blk_mq_end_request(req, -EFAULT);
+			blk_mq_end_request(req, BLK_STS_NOTSUPP);
 			return BLK_MQ_RQ_QUEUE_OK;
 		}
 	}
@@ -939,7 +939,7 @@ static int adapter_delete_sq(struct nvme_dev *dev, u16 sqid)
 	return adapter_delete_queue(dev, nvme_admin_delete_sq, sqid);
 }
 
-static void abort_endio(struct request *req, int error)
+static void abort_endio(struct request *req, blk_status_t error)
 {
 	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
 	struct nvme_queue *nvmeq = iod->nvmeq;
@@ -1586,7 +1586,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
 	return nvme_create_io_queues(dev);
 }
 
-static void nvme_del_queue_end(struct request *req, int error)
+static void nvme_del_queue_end(struct request *req, blk_status_t error)
 {
 	struct nvme_queue *nvmeq = req->end_io_data;
 
@@ -1594,7 +1594,7 @@ static void nvme_del_queue_end(struct request *req, int error)
 	complete(&nvmeq->dev->ioq_wait);
 }
 
-static void nvme_del_cq_end(struct request *req, int error)
+static void nvme_del_cq_end(struct request *req, blk_status_t error)
 {
 	struct nvme_queue *nvmeq = req->end_io_data;
 
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index 6fb3fd5efc11..b7cbd5d2cdea 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -2672,7 +2672,7 @@ static void __dasd_process_request_queue(struct dasd_block *block)
 	 */
 	if (basedev->state < DASD_STATE_READY) {
 		while ((req = blk_fetch_request(block->request_queue)))
-			__blk_end_request_all(req, -EIO);
+			__blk_end_request_all(req, BLK_STS_IOERR);
 		return;
 	}
 
@@ -2692,7 +2692,7 @@ static void __dasd_process_request_queue(struct dasd_block *block)
 				      "Rejecting write request %p",
 				      req);
 			blk_start_request(req);
-			__blk_end_request_all(req, -EIO);
+			__blk_end_request_all(req, BLK_STS_IOERR);
 			continue;
 		}
 		if (test_bit(DASD_FLAG_ABORTALL, &basedev->flags) &&
@@ -2702,7 +2702,7 @@ static void __dasd_process_request_queue(struct dasd_block *block)
 				      "Rejecting failfast request %p",
 				      req);
 			blk_start_request(req);
-			__blk_end_request_all(req, -ETIMEDOUT);
+			__blk_end_request_all(req, BLK_STS_TIMEOUT);
 			continue;
 		}
 		cqr = basedev->discipline->build_cp(basedev, block, req);
@@ -2734,7 +2734,7 @@ static void __dasd_process_request_queue(struct dasd_block *block)
 				      "on request %p",
 				      PTR_ERR(cqr), req);
 			blk_start_request(req);
-			__blk_end_request_all(req, -EIO);
+			__blk_end_request_all(req, BLK_STS_IOERR);
 			continue;
 		}
 		/*
@@ -2755,21 +2755,29 @@ static void __dasd_cleanup_cqr(struct dasd_ccw_req *cqr)
 {
 	struct request *req;
 	int status;
-	int error = 0;
+	blk_status_t error = BLK_STS_OK;
 
 	req = (struct request *) cqr->callback_data;
 	dasd_profile_end(cqr->block, cqr, req);
+
 	status = cqr->block->base->discipline->free_cp(cqr, req);
 	if (status < 0)
-		error = status;
+		error = errno_to_blk_status(status);
 	else if (status == 0) {
-		if (cqr->intrc == -EPERM)
-			error = -EBADE;
-		else if (cqr->intrc == -ENOLINK ||
-			 cqr->intrc == -ETIMEDOUT)
-			error = cqr->intrc;
-		else
-			error = -EIO;
+		switch (cqr->intrc) {
+		case -EPERM:
+			error = BLK_STS_NEXUS;
+			break;
+		case -ENOLINK:
+			error = BLK_STS_TRANSPORT;
+			break;
+		case -ETIMEDOUT:
+			error = BLK_STS_TIMEOUT;
+			break;
+		default:
+			error = BLK_STS_IOERR;
+			break;
+		}
 	}
 	__blk_end_request_all(req, error);
 }
@@ -3190,7 +3198,7 @@ static void dasd_flush_request_queue(struct dasd_block *block)
 
 	spin_lock_irq(&block->request_queue_lock);
 	while ((req = blk_fetch_request(block->request_queue)))
-		__blk_end_request_all(req, -EIO);
+		__blk_end_request_all(req, BLK_STS_IOERR);
 	spin_unlock_irq(&block->request_queue_lock);
 }
 
diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c
index 152de6817875..3c2c84b72877 100644
--- a/drivers/s390/block/scm_blk.c
+++ b/drivers/s390/block/scm_blk.c
@@ -231,7 +231,7 @@ static inline void scm_request_init(struct scm_blk_dev *bdev,
 	aob->request.data = (u64) aobrq;
 	scmrq->bdev = bdev;
 	scmrq->retries = 4;
-	scmrq->error = 0;
+	scmrq->error = BLK_STS_OK;
 	/* We don't use all msbs - place aidaws at the end of the aob page. */
 	scmrq->next_aidaw = (void *) &aob->msb[nr_requests_per_io];
 	scm_request_cluster_init(scmrq);
@@ -364,7 +364,7 @@ static void __scmrq_log_error(struct scm_request *scmrq)
 {
 	struct aob *aob = scmrq->aob;
 
-	if (scmrq->error == -ETIMEDOUT)
+	if (scmrq->error == BLK_STS_TIMEOUT)
 		SCM_LOG(1, "Request timeout");
 	else {
 		SCM_LOG(1, "Request error");
@@ -377,7 +377,7 @@ static void __scmrq_log_error(struct scm_request *scmrq)
 		       scmrq->error);
 }
 
-void scm_blk_irq(struct scm_device *scmdev, void *data, int error)
+void scm_blk_irq(struct scm_device *scmdev, void *data, blk_status_t error)
 {
 	struct scm_request *scmrq = data;
 	struct scm_blk_dev *bdev = scmrq->bdev;
@@ -397,7 +397,7 @@ static void scm_blk_handle_error(struct scm_request *scmrq)
 	struct scm_blk_dev *bdev = scmrq->bdev;
 	unsigned long flags;
 
-	if (scmrq->error != -EIO)
+	if (scmrq->error != BLK_STS_IOERR)
 		goto restart;
 
 	/* For -EIO the response block is valid. */
diff --git a/drivers/s390/block/scm_blk.h b/drivers/s390/block/scm_blk.h
index 09218cdc5129..cd598d1a4eae 100644
--- a/drivers/s390/block/scm_blk.h
+++ b/drivers/s390/block/scm_blk.h
@@ -35,7 +35,7 @@ struct scm_request {
 	struct aob *aob;
 	struct list_head list;
 	u8 retries;
-	int error;
+	blk_status_t error;
 #ifdef CONFIG_SCM_BLOCK_CLUSTER_WRITE
 	struct {
 		enum {CLUSTER_NONE, CLUSTER_READ, CLUSTER_WRITE} state;
@@ -50,7 +50,7 @@ struct scm_request {
 int scm_blk_dev_setup(struct scm_blk_dev *, struct scm_device *);
 void scm_blk_dev_cleanup(struct scm_blk_dev *);
 void scm_blk_set_available(struct scm_blk_dev *);
-void scm_blk_irq(struct scm_device *, void *, int);
+void scm_blk_irq(struct scm_device *, void *, blk_status_t);
 
 void scm_request_finish(struct scm_request *);
 void scm_request_requeue(struct scm_request *);
diff --git a/drivers/s390/cio/eadm_sch.c b/drivers/s390/cio/eadm_sch.c
index b3f44bc7f644..0f11f3bcac82 100644
--- a/drivers/s390/cio/eadm_sch.c
+++ b/drivers/s390/cio/eadm_sch.c
@@ -135,7 +135,7 @@ static void eadm_subchannel_irq(struct subchannel *sch)
 	struct eadm_private *private = get_eadm_private(sch);
 	struct eadm_scsw *scsw = &sch->schib.scsw.eadm;
 	struct irb *irb = this_cpu_ptr(&cio_irb);
-	int error = 0;
+	blk_status_t error = BLK_STS_OK;
 
 	EADM_LOG(6, "irq");
 	EADM_LOG_HEX(6, irb, sizeof(*irb));
@@ -144,10 +144,10 @@ static void eadm_subchannel_irq(struct subchannel *sch)
 
 	if ((scsw->stctl & (SCSW_STCTL_ALERT_STATUS | SCSW_STCTL_STATUS_PEND))
 	    && scsw->eswf == 1 && irb->esw.eadm.erw.r)
-		error = -EIO;
+		error = BLK_STS_IOERR;
 
 	if (scsw->fctl & SCSW_FCTL_CLEAR_FUNC)
-		error = -ETIMEDOUT;
+		error = BLK_STS_TIMEOUT;
 
 	eadm_subchannel_set_timeout(sch, 0);
 
diff --git a/drivers/s390/cio/scm.c b/drivers/s390/cio/scm.c
index 15268edc54ae..1fa53ecdc2aa 100644
--- a/drivers/s390/cio/scm.c
+++ b/drivers/s390/cio/scm.c
@@ -71,7 +71,7 @@ void scm_driver_unregister(struct scm_driver *scmdrv)
 }
 EXPORT_SYMBOL_GPL(scm_driver_unregister);
 
-void scm_irq_handler(struct aob *aob, int error)
+void scm_irq_handler(struct aob *aob, blk_status_t error)
 {
 	struct aob_rq_header *aobrq = (void *) aob->request.data;
 	struct scm_device *scmdev = aobrq->scmdev;
diff --git a/drivers/sbus/char/jsflash.c b/drivers/sbus/char/jsflash.c
index 62fed9dc893e..35a69949f92d 100644
--- a/drivers/sbus/char/jsflash.c
+++ b/drivers/sbus/char/jsflash.c
@@ -214,7 +214,7 @@ static void jsfd_request(void)
 		struct jsfd_part *jdp = req->rq_disk->private_data;
 		unsigned long offset = blk_rq_pos(req) << 9;
 		size_t len = blk_rq_cur_bytes(req);
-		int err = -EIO;
+		blk_status_t err = BLK_STS_IOERR;
 
 		if ((offset + len) > jdp->dsize)
 			goto end;
@@ -230,7 +230,7 @@ static void jsfd_request(void)
 		}
 
 		jsfd_read(bio_data(req->bio), jdp->dbase + offset, len);
-		err = 0;
+		err = BLK_STS_OK;
 	end:
 		if (!__blk_end_request_cur(req, err))
 			req = jsfd_next_request();
diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
index 14785177ce7b..1e69a43b279d 100644
--- a/drivers/scsi/osd/osd_initiator.c
+++ b/drivers/scsi/osd/osd_initiator.c
@@ -446,7 +446,7 @@ static void _put_request(struct request *rq)
 	 *       code paths.
 	 */
 	if (unlikely(rq->bio))
-		blk_end_request(rq, -ENOMEM, blk_rq_bytes(rq));
+		blk_end_request(rq, BLK_STS_IOERR, blk_rq_bytes(rq));
 	else
 		blk_put_request(rq);
 }
@@ -474,7 +474,7 @@ void osd_end_request(struct osd_request *or)
 EXPORT_SYMBOL(osd_end_request);
 
 static void _set_error_resid(struct osd_request *or, struct request *req,
-			     int error)
+			     blk_status_t error)
 {
 	or->async_error = error;
 	or->req_errors = scsi_req(req)->result;
@@ -489,17 +489,19 @@ static void _set_error_resid(struct osd_request *or, struct request *req,
 
 int osd_execute_request(struct osd_request *or)
 {
-	int error;
-
 	blk_execute_rq(or->request->q, NULL, or->request, 0);
-	error = scsi_req(or->request)->result ? -EIO : 0;
 
-	_set_error_resid(or, or->request, error);
-	return error;
+	if (scsi_req(or->request)->result) {
+		_set_error_resid(or, or->request, BLK_STS_IOERR);
+		return -EIO;
+	}
+
+	_set_error_resid(or, or->request, BLK_STS_OK);
+	return 0;
 }
 EXPORT_SYMBOL(osd_execute_request);
 
-static void osd_request_async_done(struct request *req, int error)
+static void osd_request_async_done(struct request *req, blk_status_t error)
 {
 	struct osd_request *or = req->end_io_data;
 
@@ -1914,7 +1916,7 @@ analyze:
 		/* scsi sense is Empty, the request was never issued to target
 		 * linux return code might tell us what happened.
 		 */
-		if (or->async_error == -ENOMEM)
+		if (or->async_error == BLK_STS_RESOURCE)
 			osi->osd_err_pri = OSD_ERR_PRI_RESOURCE;
 		else
 			osi->osd_err_pri = OSD_ERR_PRI_UNREACHABLE;
diff --git a/drivers/scsi/osst.c b/drivers/scsi/osst.c
index 67cbed92f07d..d54689c9216e 100644
--- a/drivers/scsi/osst.c
+++ b/drivers/scsi/osst.c
@@ -320,7 +320,7 @@ static int osst_chk_result(struct osst_tape * STp, struct osst_request * SRpnt)
 
 
 /* Wakeup from interrupt */
-static void osst_end_async(struct request *req, int update)
+static void osst_end_async(struct request *req, blk_status_t status)
 {
 	struct scsi_request *rq = scsi_req(req);
 	struct osst_request *SRpnt = req->end_io_data;
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index ecc07dab893d..44904f41924c 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -1874,7 +1874,7 @@ int scsi_decide_disposition(struct scsi_cmnd *scmd)
 	}
 }
 
-static void eh_lock_door_done(struct request *req, int uptodate)
+static void eh_lock_door_done(struct request *req, blk_status_t status)
 {
 	__blk_put_request(req->q, req);
 }
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 884aaa84c2dd..67a67191520f 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -635,7 +635,7 @@ static void scsi_release_bidi_buffers(struct scsi_cmnd *cmd)
 	cmd->request->next_rq->special = NULL;
 }
 
-static bool scsi_end_request(struct request *req, int error,
+static bool scsi_end_request(struct request *req, blk_status_t error,
 		unsigned int bytes, unsigned int bidi_bytes)
 {
 	struct scsi_cmnd *cmd = req->special;
@@ -694,45 +694,28 @@ static bool scsi_end_request(struct request *req, int error,
  * @cmd:	SCSI command (unused)
  * @result:	scsi error code
  *
- * Translate SCSI error code into standard UNIX errno.
- * Return values:
- * -ENOLINK	temporary transport failure
- * -EREMOTEIO	permanent target failure, do not retry
- * -EBADE	permanent nexus failure, retry on other path
- * -ENOSPC	No write space available
- * -ENODATA	Medium error
- * -EIO		unspecified I/O error
+ * Translate SCSI error code into block errors.
  */
-static int __scsi_error_from_host_byte(struct scsi_cmnd *cmd, int result)
+static blk_status_t __scsi_error_from_host_byte(struct scsi_cmnd *cmd,
+		int result)
 {
-	int error = 0;
-
-	switch(host_byte(result)) {
+	switch (host_byte(result)) {
 	case DID_TRANSPORT_FAILFAST:
-		error = -ENOLINK;
-		break;
+		return BLK_STS_TRANSPORT;
 	case DID_TARGET_FAILURE:
 		set_host_byte(cmd, DID_OK);
-		error = -EREMOTEIO;
-		break;
+		return BLK_STS_TARGET;
 	case DID_NEXUS_FAILURE:
-		set_host_byte(cmd, DID_OK);
-		error = -EBADE;
-		break;
+		return BLK_STS_NEXUS;
 	case DID_ALLOC_FAILURE:
 		set_host_byte(cmd, DID_OK);
-		error = -ENOSPC;
-		break;
+		return BLK_STS_NOSPC;
 	case DID_MEDIUM_ERROR:
 		set_host_byte(cmd, DID_OK);
-		error = -ENODATA;
-		break;
+		return BLK_STS_MEDIUM;
 	default:
-		error = -EIO;
-		break;
+		return BLK_STS_IOERR;
 	}
-
-	return error;
 }
 
 /*
@@ -769,7 +752,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 	int result = cmd->result;
 	struct request_queue *q = cmd->device->request_queue;
 	struct request *req = cmd->request;
-	int error = 0;
+	blk_status_t error = BLK_STS_OK;
 	struct scsi_sense_hdr sshdr;
 	bool sense_valid = false;
 	int sense_deferred = 0, level = 0;
@@ -808,7 +791,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 			 * both sides at once.
 			 */
 			scsi_req(req->next_rq)->resid_len = scsi_in(cmd)->resid;
-			if (scsi_end_request(req, 0, blk_rq_bytes(req),
+			if (scsi_end_request(req, BLK_STS_OK, blk_rq_bytes(req),
 					blk_rq_bytes(req->next_rq)))
 				BUG();
 			return;
@@ -850,7 +833,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 			scsi_print_sense(cmd);
 		result = 0;
 		/* for passthrough error may be set */
-		error = 0;
+		error = BLK_STS_OK;
 	}
 
 	/*
@@ -922,18 +905,18 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 				action = ACTION_REPREP;
 			} else if (sshdr.asc == 0x10) /* DIX */ {
 				action = ACTION_FAIL;
-				error = -EILSEQ;
+				error = BLK_STS_PROTECTION;
 			/* INVALID COMMAND OPCODE or INVALID FIELD IN CDB */
 			} else if (sshdr.asc == 0x20 || sshdr.asc == 0x24) {
 				action = ACTION_FAIL;
-				error = -EREMOTEIO;
+				error = BLK_STS_TARGET;
 			} else
 				action = ACTION_FAIL;
 			break;
 		case ABORTED_COMMAND:
 			action = ACTION_FAIL;
 			if (sshdr.asc == 0x10) /* DIF */
-				error = -EILSEQ;
+				error = BLK_STS_PROTECTION;
 			break;
 		case NOT_READY:
 			/* If the device is in the process of becoming
diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c
index d16414bfe2ef..cc970c811bcb 100644
--- a/drivers/scsi/scsi_transport_sas.c
+++ b/drivers/scsi/scsi_transport_sas.c
@@ -172,7 +172,7 @@ static void sas_smp_request(struct request_queue *q, struct Scsi_Host *shost,
 			    struct sas_rphy *rphy)
 {
 	struct request *req;
-	int ret;
+	blk_status_t ret;
 	int (*handler)(struct Scsi_Host *, struct sas_rphy *, struct request *);
 
 	while ((req = blk_fetch_request(q)) != NULL) {
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 82c33a6edbea..f3387c6089c5 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -177,7 +177,7 @@ typedef struct sg_device { /* holds the state of each scsi generic device */
 } Sg_device;
 
 /* tasklet or soft irq callback */
-static void sg_rq_end_io(struct request *rq, int uptodate);
+static void sg_rq_end_io(struct request *rq, blk_status_t status);
 static int sg_start_req(Sg_request *srp, unsigned char *cmd);
 static int sg_finish_rem_req(Sg_request * srp);
 static int sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size);
@@ -808,7 +808,7 @@ sg_common_write(Sg_fd * sfp, Sg_request * srp,
 	if (atomic_read(&sdp->detaching)) {
 		if (srp->bio) {
 			scsi_req_free_cmd(scsi_req(srp->rq));
-			blk_end_request_all(srp->rq, -EIO);
+			blk_end_request_all(srp->rq, BLK_STS_IOERR);
 			srp->rq = NULL;
 		}
 
@@ -1300,7 +1300,7 @@ sg_rq_end_io_usercontext(struct work_struct *work)
  * level when a command is completed (or has failed).
  */
 static void
-sg_rq_end_io(struct request *rq, int uptodate)
+sg_rq_end_io(struct request *rq, blk_status_t status)
 {
 	struct sg_request *srp = rq->end_io_data;
 	struct scsi_request *req = scsi_req(rq);
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index 1ea34d6f5437..6b1c4ac54e66 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -511,7 +511,7 @@ static void st_do_stats(struct scsi_tape *STp, struct request *req)
 	atomic64_dec(&STp->stats->in_flight);
 }
 
-static void st_scsi_execute_end(struct request *req, int uptodate)
+static void st_scsi_execute_end(struct request *req, blk_status_t status)
 {
 	struct st_request *SRpnt = req->end_io_data;
 	struct scsi_request *rq = scsi_req(req);
diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index 3e4abb13f8ea..323ab47645d0 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c
@@ -55,7 +55,7 @@ static inline struct pscsi_dev_virt *PSCSI_DEV(struct se_device *dev)
 }
 
 static sense_reason_t pscsi_execute_cmd(struct se_cmd *cmd);
-static void pscsi_req_done(struct request *, int);
+static void pscsi_req_done(struct request *, blk_status_t);
 
 /*	pscsi_attach_hba():
  *
@@ -1045,7 +1045,7 @@ static sector_t pscsi_get_blocks(struct se_device *dev)
 	return 0;
 }
 
-static void pscsi_req_done(struct request *req, int uptodate)
+static void pscsi_req_done(struct request *req, blk_status_t status)
 {
 	struct se_cmd *cmd = req->end_io_data;
 	struct pscsi_plugin_task *pt = cmd->priv;
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index fcd641032f8d..0cf6735046d3 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -230,8 +230,8 @@ static inline u16 blk_mq_unique_tag_to_tag(u32 unique_tag)
 
 int blk_mq_request_started(struct request *rq);
 void blk_mq_start_request(struct request *rq);
-void blk_mq_end_request(struct request *rq, int error);
-void __blk_mq_end_request(struct request *rq, int error);
+void blk_mq_end_request(struct request *rq, blk_status_t error);
+void __blk_mq_end_request(struct request *rq, blk_status_t error);
 
 void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list);
 void blk_mq_add_to_requeue_list(struct request *rq, bool at_head,
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 61339bc44400..59378939a8cd 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -17,6 +17,22 @@ struct io_context;
 struct cgroup_subsys_state;
 typedef void (bio_end_io_t) (struct bio *);
 
+/*
+ * Block error status values.  See block/blk-core:blk_errors for the details.
+ */
+typedef u8 __bitwise blk_status_t;
+#define	BLK_STS_OK 0
+#define BLK_STS_NOTSUPP		((__force blk_status_t)1)
+#define BLK_STS_TIMEOUT		((__force blk_status_t)2)
+#define BLK_STS_NOSPC		((__force blk_status_t)3)
+#define BLK_STS_TRANSPORT	((__force blk_status_t)4)
+#define BLK_STS_TARGET		((__force blk_status_t)5)
+#define BLK_STS_NEXUS		((__force blk_status_t)6)
+#define BLK_STS_MEDIUM		((__force blk_status_t)7)
+#define BLK_STS_PROTECTION	((__force blk_status_t)8)
+#define BLK_STS_RESOURCE	((__force blk_status_t)9)
+#define BLK_STS_IOERR		((__force blk_status_t)10)
+
 struct blk_issue_stat {
 	u64 stat;
 };
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 019f18c65098..2a8871638453 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -55,7 +55,7 @@ struct blk_stat_callback;
  */
 #define BLKCG_MAX_POLS		3
 
-typedef void (rq_end_io_fn)(struct request *, int);
+typedef void (rq_end_io_fn)(struct request *, blk_status_t);
 
 #define BLK_RL_SYNCFULL		(1U << 0)
 #define BLK_RL_ASYNCFULL	(1U << 1)
@@ -940,7 +940,7 @@ extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
 			     int (*bio_ctr)(struct bio *, struct bio *, void *),
 			     void *data);
 extern void blk_rq_unprep_clone(struct request *rq);
-extern int blk_insert_cloned_request(struct request_queue *q,
+extern blk_status_t blk_insert_cloned_request(struct request_queue *q,
 				     struct request *rq);
 extern int blk_rq_append_bio(struct request *rq, struct bio *bio);
 extern void blk_delay_queue(struct request_queue *, unsigned long);
@@ -980,6 +980,9 @@ extern void blk_execute_rq(struct request_queue *, struct gendisk *,
 extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
 				  struct request *, int, rq_end_io_fn *);
 
+int blk_status_to_errno(blk_status_t status);
+blk_status_t errno_to_blk_status(int errno);
+
 bool blk_mq_poll(struct request_queue *q, blk_qc_t cookie);
 
 static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
@@ -1112,16 +1115,16 @@ extern struct request *blk_fetch_request(struct request_queue *q);
  * blk_end_request() for parts of the original function.
  * This prevents code duplication in drivers.
  */
-extern bool blk_update_request(struct request *rq, int error,
+extern bool blk_update_request(struct request *rq, blk_status_t error,
 			       unsigned int nr_bytes);
-extern void blk_finish_request(struct request *rq, int error);
-extern bool blk_end_request(struct request *rq, int error,
+extern void blk_finish_request(struct request *rq, blk_status_t error);
+extern bool blk_end_request(struct request *rq, blk_status_t error,
 			    unsigned int nr_bytes);
-extern void blk_end_request_all(struct request *rq, int error);
-extern bool __blk_end_request(struct request *rq, int error,
+extern void blk_end_request_all(struct request *rq, blk_status_t error);
+extern bool __blk_end_request(struct request *rq, blk_status_t error,
 			      unsigned int nr_bytes);
-extern void __blk_end_request_all(struct request *rq, int error);
-extern bool __blk_end_request_cur(struct request *rq, int error);
+extern void __blk_end_request_all(struct request *rq, blk_status_t error);
+extern bool __blk_end_request_cur(struct request *rq, blk_status_t error);
 
 extern void blk_complete_request(struct request *);
 extern void __blk_complete_request(struct request *);
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index dec227acc13b..5de5c53251ec 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -74,7 +74,7 @@ typedef void (*dm_release_clone_request_fn) (struct request *clone);
 typedef int (*dm_endio_fn) (struct dm_target *ti,
 			    struct bio *bio, int *error);
 typedef int (*dm_request_endio_fn) (struct dm_target *ti,
-				    struct request *clone, int error,
+				    struct request *clone, blk_status_t error,
 				    union map_info *map_context);
 
 typedef void (*dm_presuspend_fn) (struct dm_target *ti);
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 6980ca322074..dc152e4b7f73 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -671,7 +671,7 @@ struct ide_port_ops {
 	void	(*init_dev)(ide_drive_t *);
 	void	(*set_pio_mode)(struct hwif_s *, ide_drive_t *);
 	void	(*set_dma_mode)(struct hwif_s *, ide_drive_t *);
-	int	(*reset_poll)(ide_drive_t *);
+	blk_status_t (*reset_poll)(ide_drive_t *);
 	void	(*pre_reset)(ide_drive_t *);
 	void	(*resetproc)(ide_drive_t *);
 	void	(*maskproc)(ide_drive_t *, int);
@@ -1092,7 +1092,7 @@ int generic_ide_ioctl(ide_drive_t *, struct block_device *, unsigned, unsigned l
 extern int ide_vlb_clk;
 extern int ide_pci_clk;
 
-int ide_end_rq(ide_drive_t *, struct request *, int, unsigned int);
+int ide_end_rq(ide_drive_t *, struct request *, blk_status_t, unsigned int);
 void ide_kill_rq(ide_drive_t *, struct request *);
 
 void __ide_set_handler(ide_drive_t *, ide_handler_t *, unsigned int);
@@ -1123,7 +1123,7 @@ extern int ide_devset_execute(ide_drive_t *drive,
 			      const struct ide_devset *setting, int arg);
 
 void ide_complete_cmd(ide_drive_t *, struct ide_cmd *, u8, u8);
-int ide_complete_rq(ide_drive_t *, int, unsigned int);
+int ide_complete_rq(ide_drive_t *, blk_status_t, unsigned int);
 
 void ide_tf_readback(ide_drive_t *drive, struct ide_cmd *cmd);
 void ide_tf_dump(const char *, struct ide_cmd *);
diff --git a/include/scsi/osd_initiator.h b/include/scsi/osd_initiator.h
index a09cca829082..a29d3086eb56 100644
--- a/include/scsi/osd_initiator.h
+++ b/include/scsi/osd_initiator.h
@@ -157,7 +157,7 @@ struct osd_request {
 
 	osd_req_done_fn *async_done;
 	void *async_private;
-	int async_error;
+	blk_status_t async_error;
 	int req_errors;
 };
 
-- 
cgit v1.2.3


From fc17b6534eb8395f0b3133eb31d87deec32c642b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 3 Jun 2017 09:38:05 +0200
Subject: blk-mq: switch ->queue_rq return value to blk_status_t

Use the same values for use for request completion errors as the return
value from ->queue_rq.  BLK_STS_RESOURCE is special cased to cause
a requeue, and all the others are completed as-is.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c                    | 37 ++++++++++++++++------------------
 drivers/block/loop.c              |  6 +++---
 drivers/block/mtip32xx/mtip32xx.c | 17 ++++++++--------
 drivers/block/nbd.c               | 12 ++++-------
 drivers/block/null_blk.c          |  4 ++--
 drivers/block/rbd.c               |  4 ++--
 drivers/block/virtio_blk.c        | 10 +++++-----
 drivers/block/xen-blkfront.c      |  8 ++++----
 drivers/md/dm-rq.c                |  8 ++++----
 drivers/mtd/ubi/block.c           |  6 +++---
 drivers/nvme/host/core.c          | 14 ++++++-------
 drivers/nvme/host/fc.c            | 23 +++++++++++----------
 drivers/nvme/host/nvme.h          |  2 +-
 drivers/nvme/host/pci.c           | 42 +++++++++++++++++++--------------------
 drivers/nvme/host/rdma.c          | 26 +++++++++++++-----------
 drivers/nvme/target/loop.c        | 17 ++++++++--------
 drivers/scsi/scsi_lib.c           | 30 ++++++++++++++--------------
 include/linux/blk-mq.h            |  7 ++-----
 18 files changed, 131 insertions(+), 142 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index adcc1c0dce6e..7af78b1e9db9 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -924,7 +924,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list)
 {
 	struct blk_mq_hw_ctx *hctx;
 	struct request *rq;
-	int errors, queued, ret = BLK_MQ_RQ_QUEUE_OK;
+	int errors, queued;
 
 	if (list_empty(list))
 		return false;
@@ -935,6 +935,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list)
 	errors = queued = 0;
 	do {
 		struct blk_mq_queue_data bd;
+		blk_status_t ret;
 
 		rq = list_first_entry(list, struct request, queuelist);
 		if (!blk_mq_get_driver_tag(rq, &hctx, false)) {
@@ -975,25 +976,20 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list)
 		}
 
 		ret = q->mq_ops->queue_rq(hctx, &bd);
-		switch (ret) {
-		case BLK_MQ_RQ_QUEUE_OK:
-			queued++;
-			break;
-		case BLK_MQ_RQ_QUEUE_BUSY:
+		if (ret == BLK_STS_RESOURCE) {
 			blk_mq_put_driver_tag_hctx(hctx, rq);
 			list_add(&rq->queuelist, list);
 			__blk_mq_requeue_request(rq);
 			break;
-		default:
-			pr_err("blk-mq: bad return on queue: %d\n", ret);
-		case BLK_MQ_RQ_QUEUE_ERROR:
+		}
+
+		if (unlikely(ret != BLK_STS_OK)) {
 			errors++;
 			blk_mq_end_request(rq, BLK_STS_IOERR);
-			break;
+			continue;
 		}
 
-		if (ret == BLK_MQ_RQ_QUEUE_BUSY)
-			break;
+		queued++;
 	} while (!list_empty(list));
 
 	hctx->dispatched[queued_to_index(queued)]++;
@@ -1031,7 +1027,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list)
 		 * - blk_mq_run_hw_queue() checks whether or not a queue has
 		 *   been stopped before rerunning a queue.
 		 * - Some but not all block drivers stop a queue before
-		 *   returning BLK_MQ_RQ_QUEUE_BUSY. Two exceptions are scsi-mq
+		 *   returning BLK_STS_RESOURCE. Two exceptions are scsi-mq
 		 *   and dm-rq.
 		 */
 		if (!blk_mq_sched_needs_restart(hctx) &&
@@ -1410,7 +1406,7 @@ static void __blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie,
 	};
 	struct blk_mq_hw_ctx *hctx;
 	blk_qc_t new_cookie;
-	int ret;
+	blk_status_t ret;
 
 	if (q->elevator)
 		goto insert;
@@ -1426,18 +1422,19 @@ static void __blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie,
 	 * would have done
 	 */
 	ret = q->mq_ops->queue_rq(hctx, &bd);
-	if (ret == BLK_MQ_RQ_QUEUE_OK) {
+	switch (ret) {
+	case BLK_STS_OK:
 		*cookie = new_cookie;
 		return;
-	}
-
-	if (ret == BLK_MQ_RQ_QUEUE_ERROR) {
+	case BLK_STS_RESOURCE:
+		__blk_mq_requeue_request(rq);
+		goto insert;
+	default:
 		*cookie = BLK_QC_T_NONE;
-		blk_mq_end_request(rq, BLK_STS_IOERR);
+		blk_mq_end_request(rq, ret);
 		return;
 	}
 
-	__blk_mq_requeue_request(rq);
 insert:
 	blk_mq_sched_insert_request(rq, false, true, false, may_sleep);
 }
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 4caf6338c012..70fd7e0de0fa 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1674,7 +1674,7 @@ int loop_unregister_transfer(int number)
 EXPORT_SYMBOL(loop_register_transfer);
 EXPORT_SYMBOL(loop_unregister_transfer);
 
-static int loop_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx,
 		const struct blk_mq_queue_data *bd)
 {
 	struct loop_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
@@ -1683,7 +1683,7 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx,
 	blk_mq_start_request(bd->rq);
 
 	if (lo->lo_state != Lo_bound)
-		return BLK_MQ_RQ_QUEUE_ERROR;
+		return BLK_STS_IOERR;
 
 	switch (req_op(cmd->rq)) {
 	case REQ_OP_FLUSH:
@@ -1698,7 +1698,7 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx,
 
 	kthread_queue_work(&lo->worker, &cmd->work);
 
-	return BLK_MQ_RQ_QUEUE_OK;
+	return BLK_STS_OK;
 }
 
 static void loop_handle_cmd(struct loop_cmd *cmd)
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index ee6f66bb50c7..d8618a71da74 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -3633,8 +3633,8 @@ static bool mtip_check_unal_depth(struct blk_mq_hw_ctx *hctx,
 	return false;
 }
 
-static int mtip_issue_reserved_cmd(struct blk_mq_hw_ctx *hctx,
-				   struct request *rq)
+static blk_status_t mtip_issue_reserved_cmd(struct blk_mq_hw_ctx *hctx,
+		struct request *rq)
 {
 	struct driver_data *dd = hctx->queue->queuedata;
 	struct mtip_int_cmd *icmd = rq->special;
@@ -3642,7 +3642,7 @@ static int mtip_issue_reserved_cmd(struct blk_mq_hw_ctx *hctx,
 	struct mtip_cmd_sg *command_sg;
 
 	if (mtip_commands_active(dd->port))
-		return BLK_MQ_RQ_QUEUE_BUSY;
+		return BLK_STS_RESOURCE;
 
 	/* Populate the SG list */
 	cmd->command_header->opts =
@@ -3666,10 +3666,10 @@ static int mtip_issue_reserved_cmd(struct blk_mq_hw_ctx *hctx,
 
 	blk_mq_start_request(rq);
 	mtip_issue_non_ncq_command(dd->port, rq->tag);
-	return BLK_MQ_RQ_QUEUE_OK;
+	return 0;
 }
 
-static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t mtip_queue_rq(struct blk_mq_hw_ctx *hctx,
 			 const struct blk_mq_queue_data *bd)
 {
 	struct request *rq = bd->rq;
@@ -3681,15 +3681,14 @@ static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx,
 		return mtip_issue_reserved_cmd(hctx, rq);
 
 	if (unlikely(mtip_check_unal_depth(hctx, rq)))
-		return BLK_MQ_RQ_QUEUE_BUSY;
+		return BLK_STS_RESOURCE;
 
 	blk_mq_start_request(rq);
 
 	ret = mtip_submit_request(hctx, rq);
 	if (likely(!ret))
-		return BLK_MQ_RQ_QUEUE_OK;
-
-	return BLK_MQ_RQ_QUEUE_ERROR;
+		return BLK_STS_OK;
+	return BLK_STS_IOERR;
 }
 
 static void mtip_free_cmd(struct blk_mq_tag_set *set, struct request *rq,
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 978d2d2d08d6..36839dc45472 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -469,7 +469,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
 				nsock->pending = req;
 				nsock->sent = sent;
 			}
-			return BLK_MQ_RQ_QUEUE_BUSY;
+			return BLK_STS_RESOURCE;
 		}
 		dev_err_ratelimited(disk_to_dev(nbd->disk),
 			"Send control failed (result %d)\n", result);
@@ -510,7 +510,7 @@ send_pages:
 					 */
 					nsock->pending = req;
 					nsock->sent = sent;
-					return BLK_MQ_RQ_QUEUE_BUSY;
+					return BLK_STS_RESOURCE;
 				}
 				dev_err(disk_to_dev(nbd->disk),
 					"Send data failed (result %d)\n",
@@ -798,7 +798,7 @@ out:
 	return ret;
 }
 
-static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
 			const struct blk_mq_queue_data *bd)
 {
 	struct nbd_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
@@ -822,13 +822,9 @@ static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
 	 * appropriate.
 	 */
 	ret = nbd_handle_cmd(cmd, hctx->queue_num);
-	if (ret < 0)
-		ret = BLK_MQ_RQ_QUEUE_ERROR;
-	if (!ret)
-		ret = BLK_MQ_RQ_QUEUE_OK;
 	complete(&cmd->send_complete);
 
-	return ret;
+	return ret < 0 ? BLK_STS_IOERR : BLK_STS_OK;
 }
 
 static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg,
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index e6b81d370882..586dfff5d53f 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -356,7 +356,7 @@ static void null_request_fn(struct request_queue *q)
 	}
 }
 
-static int null_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
 			 const struct blk_mq_queue_data *bd)
 {
 	struct nullb_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
@@ -373,7 +373,7 @@ static int null_queue_rq(struct blk_mq_hw_ctx *hctx,
 	blk_mq_start_request(bd->rq);
 
 	null_handle_cmd(cmd);
-	return BLK_MQ_RQ_QUEUE_OK;
+	return BLK_STS_OK;
 }
 
 static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq)
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 3e8b43d792c2..74a6791b15c8 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -4154,14 +4154,14 @@ err:
 	blk_mq_end_request(rq, errno_to_blk_status(result));
 }
 
-static int rbd_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t rbd_queue_rq(struct blk_mq_hw_ctx *hctx,
 		const struct blk_mq_queue_data *bd)
 {
 	struct request *rq = bd->rq;
 	struct work_struct *work = blk_mq_rq_to_pdu(rq);
 
 	queue_work(rbd_wq, work);
-	return BLK_MQ_RQ_QUEUE_OK;
+	return BLK_STS_OK;
 }
 
 static void rbd_free_disk(struct rbd_device *rbd_dev)
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 205b74d70efc..e59bd4549a8a 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -214,7 +214,7 @@ static void virtblk_done(struct virtqueue *vq)
 	spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
 }
 
-static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
 			   const struct blk_mq_queue_data *bd)
 {
 	struct virtio_blk *vblk = hctx->queue->queuedata;
@@ -246,7 +246,7 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
 		break;
 	default:
 		WARN_ON_ONCE(1);
-		return BLK_MQ_RQ_QUEUE_ERROR;
+		return BLK_STS_IOERR;
 	}
 
 	vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, type);
@@ -276,8 +276,8 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
 		/* Out of mem doesn't actually happen, since we fall back
 		 * to direct descriptors */
 		if (err == -ENOMEM || err == -ENOSPC)
-			return BLK_MQ_RQ_QUEUE_BUSY;
-		return BLK_MQ_RQ_QUEUE_ERROR;
+			return BLK_STS_RESOURCE;
+		return BLK_STS_IOERR;
 	}
 
 	if (bd->last && virtqueue_kick_prepare(vblk->vqs[qid].vq))
@@ -286,7 +286,7 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
 
 	if (notify)
 		virtqueue_notify(vblk->vqs[qid].vq);
-	return BLK_MQ_RQ_QUEUE_OK;
+	return BLK_STS_OK;
 }
 
 /* return id (s/n) string for *disk to *id_str
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index aedc3c759273..2f468cf86dcf 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -881,7 +881,7 @@ static inline bool blkif_request_flush_invalid(struct request *req,
 		 !info->feature_fua));
 }
 
-static int blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
 			  const struct blk_mq_queue_data *qd)
 {
 	unsigned long flags;
@@ -904,16 +904,16 @@ static int blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
 
 	flush_requests(rinfo);
 	spin_unlock_irqrestore(&rinfo->ring_lock, flags);
-	return BLK_MQ_RQ_QUEUE_OK;
+	return BLK_STS_OK;
 
 out_err:
 	spin_unlock_irqrestore(&rinfo->ring_lock, flags);
-	return BLK_MQ_RQ_QUEUE_ERROR;
+	return BLK_STS_IOERR;
 
 out_busy:
 	spin_unlock_irqrestore(&rinfo->ring_lock, flags);
 	blk_mq_stop_hw_queue(hctx);
-	return BLK_MQ_RQ_QUEUE_BUSY;
+	return BLK_STS_RESOURCE;
 }
 
 static void blkif_complete_rq(struct request *rq)
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index bee334389173..63402f8a38de 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -727,7 +727,7 @@ static int dm_mq_init_request(struct blk_mq_tag_set *set, struct request *rq,
 	return __dm_rq_init_rq(set->driver_data, rq);
 }
 
-static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
 			  const struct blk_mq_queue_data *bd)
 {
 	struct request *rq = bd->rq;
@@ -744,7 +744,7 @@ static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
 	}
 
 	if (ti->type->busy && ti->type->busy(ti))
-		return BLK_MQ_RQ_QUEUE_BUSY;
+		return BLK_STS_RESOURCE;
 
 	dm_start_request(md, rq);
 
@@ -762,10 +762,10 @@ static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
 		rq_end_stats(md, rq);
 		rq_completed(md, rq_data_dir(rq), false);
 		blk_mq_delay_run_hw_queue(hctx, 100/*ms*/);
-		return BLK_MQ_RQ_QUEUE_BUSY;
+		return BLK_STS_RESOURCE;
 	}
 
-	return BLK_MQ_RQ_QUEUE_OK;
+	return BLK_STS_OK;
 }
 
 static const struct blk_mq_ops dm_mq_ops = {
diff --git a/drivers/mtd/ubi/block.c b/drivers/mtd/ubi/block.c
index 3ecdb39d1985..c3963f880448 100644
--- a/drivers/mtd/ubi/block.c
+++ b/drivers/mtd/ubi/block.c
@@ -316,7 +316,7 @@ static void ubiblock_do_work(struct work_struct *work)
 	blk_mq_end_request(req, errno_to_blk_status(ret));
 }
 
-static int ubiblock_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t ubiblock_queue_rq(struct blk_mq_hw_ctx *hctx,
 			     const struct blk_mq_queue_data *bd)
 {
 	struct request *req = bd->rq;
@@ -327,9 +327,9 @@ static int ubiblock_queue_rq(struct blk_mq_hw_ctx *hctx,
 	case REQ_OP_READ:
 		ubi_sgl_init(&pdu->usgl);
 		queue_work(dev->wq, &pdu->work);
-		return BLK_MQ_RQ_QUEUE_OK;
+		return BLK_STS_OK;
 	default:
-		return BLK_MQ_RQ_QUEUE_ERROR;
+		return BLK_STS_IOERR;
 	}
 
 }
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 07e95c7d837a..4e193b93d1d9 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -283,7 +283,7 @@ static inline void nvme_setup_flush(struct nvme_ns *ns,
 	cmnd->common.nsid = cpu_to_le32(ns->ns_id);
 }
 
-static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req,
+static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
 		struct nvme_command *cmnd)
 {
 	unsigned short segments = blk_rq_nr_discard_segments(req), n = 0;
@@ -292,7 +292,7 @@ static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req,
 
 	range = kmalloc_array(segments, sizeof(*range), GFP_ATOMIC);
 	if (!range)
-		return BLK_MQ_RQ_QUEUE_BUSY;
+		return BLK_STS_RESOURCE;
 
 	__rq_for_each_bio(bio, req) {
 		u64 slba = nvme_block_nr(ns, bio->bi_iter.bi_sector);
@@ -306,7 +306,7 @@ static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req,
 
 	if (WARN_ON_ONCE(n != segments)) {
 		kfree(range);
-		return BLK_MQ_RQ_QUEUE_ERROR;
+		return BLK_STS_IOERR;
 	}
 
 	memset(cmnd, 0, sizeof(*cmnd));
@@ -320,7 +320,7 @@ static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req,
 	req->special_vec.bv_len = sizeof(*range) * segments;
 	req->rq_flags |= RQF_SPECIAL_PAYLOAD;
 
-	return BLK_MQ_RQ_QUEUE_OK;
+	return BLK_STS_OK;
 }
 
 static inline void nvme_setup_rw(struct nvme_ns *ns, struct request *req,
@@ -364,10 +364,10 @@ static inline void nvme_setup_rw(struct nvme_ns *ns, struct request *req,
 	cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt);
 }
 
-int nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
+blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
 		struct nvme_command *cmd)
 {
-	int ret = BLK_MQ_RQ_QUEUE_OK;
+	blk_status_t ret = BLK_STS_OK;
 
 	if (!(req->rq_flags & RQF_DONTPREP)) {
 		nvme_req(req)->retries = 0;
@@ -394,7 +394,7 @@ int nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
 		break;
 	default:
 		WARN_ON_ONCE(1);
-		return BLK_MQ_RQ_QUEUE_ERROR;
+		return BLK_STS_IOERR;
 	}
 
 	cmd->common.command_id = req->tag;
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 5b14cbefb724..eb0973ac9e17 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -1873,7 +1873,7 @@ nvme_fc_unmap_data(struct nvme_fc_ctrl *ctrl, struct request *rq,
  * level FC exchange resource that is also outstanding. This must be
  * considered in all cleanup operations.
  */
-static int
+static blk_status_t
 nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
 	struct nvme_fc_fcp_op *op, u32 data_len,
 	enum nvmefc_fcp_datadir	io_dir)
@@ -1888,10 +1888,10 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
 	 * the target device is present
 	 */
 	if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE)
-		return BLK_MQ_RQ_QUEUE_ERROR;
+		return BLK_STS_IOERR;
 
 	if (!nvme_fc_ctrl_get(ctrl))
-		return BLK_MQ_RQ_QUEUE_ERROR;
+		return BLK_STS_IOERR;
 
 	/* format the FC-NVME CMD IU and fcp_req */
 	cmdiu->connection_id = cpu_to_be64(queue->connection_id);
@@ -1939,8 +1939,9 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
 		if (ret < 0) {
 			nvme_cleanup_cmd(op->rq);
 			nvme_fc_ctrl_put(ctrl);
-			return (ret == -ENOMEM || ret == -EAGAIN) ?
-				BLK_MQ_RQ_QUEUE_BUSY : BLK_MQ_RQ_QUEUE_ERROR;
+			if (ret == -ENOMEM || ret == -EAGAIN)
+				return BLK_STS_RESOURCE;
+			return BLK_STS_IOERR;
 		}
 	}
 
@@ -1966,19 +1967,19 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
 		nvme_fc_ctrl_put(ctrl);
 
 		if (ret != -EBUSY)
-			return BLK_MQ_RQ_QUEUE_ERROR;
+			return BLK_STS_IOERR;
 
 		if (op->rq) {
 			blk_mq_stop_hw_queues(op->rq->q);
 			blk_mq_delay_queue(queue->hctx, NVMEFC_QUEUE_DELAY);
 		}
-		return BLK_MQ_RQ_QUEUE_BUSY;
+		return BLK_STS_RESOURCE;
 	}
 
-	return BLK_MQ_RQ_QUEUE_OK;
+	return BLK_STS_OK;
 }
 
-static int
+static blk_status_t
 nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx,
 			const struct blk_mq_queue_data *bd)
 {
@@ -1991,7 +1992,7 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx,
 	struct nvme_command *sqe = &cmdiu->sqe;
 	enum nvmefc_fcp_datadir	io_dir;
 	u32 data_len;
-	int ret;
+	blk_status_t ret;
 
 	ret = nvme_setup_cmd(ns, rq, sqe);
 	if (ret)
@@ -2046,7 +2047,7 @@ nvme_fc_submit_async_event(struct nvme_ctrl *arg, int aer_idx)
 	struct nvme_fc_fcp_op *aen_op;
 	unsigned long flags;
 	bool terminating = false;
-	int ret;
+	blk_status_t ret;
 
 	if (aer_idx > NVME_FC_NR_AEN_COMMANDS)
 		return;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 9d6a070d4391..22ee60b2a3e8 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -296,7 +296,7 @@ void nvme_start_freeze(struct nvme_ctrl *ctrl);
 #define NVME_QID_ANY -1
 struct request *nvme_alloc_request(struct request_queue *q,
 		struct nvme_command *cmd, unsigned int flags, int qid);
-int nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
+blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
 		struct nvme_command *cmd);
 int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
 		void *buf, unsigned bufflen);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 819898428763..430d085af31c 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -427,7 +427,7 @@ static __le64 **iod_list(struct request *req)
 	return (__le64 **)(iod->sg + blk_rq_nr_phys_segments(req));
 }
 
-static int nvme_init_iod(struct request *rq, struct nvme_dev *dev)
+static blk_status_t nvme_init_iod(struct request *rq, struct nvme_dev *dev)
 {
 	struct nvme_iod *iod = blk_mq_rq_to_pdu(rq);
 	int nseg = blk_rq_nr_phys_segments(rq);
@@ -436,7 +436,7 @@ static int nvme_init_iod(struct request *rq, struct nvme_dev *dev)
 	if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) {
 		iod->sg = kmalloc(nvme_iod_alloc_size(dev, size, nseg), GFP_ATOMIC);
 		if (!iod->sg)
-			return BLK_MQ_RQ_QUEUE_BUSY;
+			return BLK_STS_RESOURCE;
 	} else {
 		iod->sg = iod->inline_sg;
 	}
@@ -446,7 +446,7 @@ static int nvme_init_iod(struct request *rq, struct nvme_dev *dev)
 	iod->nents = 0;
 	iod->length = size;
 
-	return BLK_MQ_RQ_QUEUE_OK;
+	return BLK_STS_OK;
 }
 
 static void nvme_free_iod(struct nvme_dev *dev, struct request *req)
@@ -616,21 +616,21 @@ static bool nvme_setup_prps(struct nvme_dev *dev, struct request *req)
 	return true;
 }
 
-static int nvme_map_data(struct nvme_dev *dev, struct request *req,
+static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
 		struct nvme_command *cmnd)
 {
 	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
 	struct request_queue *q = req->q;
 	enum dma_data_direction dma_dir = rq_data_dir(req) ?
 			DMA_TO_DEVICE : DMA_FROM_DEVICE;
-	int ret = BLK_MQ_RQ_QUEUE_ERROR;
+	blk_status_t ret = BLK_STS_IOERR;
 
 	sg_init_table(iod->sg, blk_rq_nr_phys_segments(req));
 	iod->nents = blk_rq_map_sg(q, req, iod->sg);
 	if (!iod->nents)
 		goto out;
 
-	ret = BLK_MQ_RQ_QUEUE_BUSY;
+	ret = BLK_STS_RESOURCE;
 	if (!dma_map_sg_attrs(dev->dev, iod->sg, iod->nents, dma_dir,
 				DMA_ATTR_NO_WARN))
 		goto out;
@@ -638,7 +638,7 @@ static int nvme_map_data(struct nvme_dev *dev, struct request *req,
 	if (!nvme_setup_prps(dev, req))
 		goto out_unmap;
 
-	ret = BLK_MQ_RQ_QUEUE_ERROR;
+	ret = BLK_STS_IOERR;
 	if (blk_integrity_rq(req)) {
 		if (blk_rq_count_integrity_sg(q, req->bio) != 1)
 			goto out_unmap;
@@ -658,7 +658,7 @@ static int nvme_map_data(struct nvme_dev *dev, struct request *req,
 	cmnd->rw.dptr.prp2 = cpu_to_le64(iod->first_dma);
 	if (blk_integrity_rq(req))
 		cmnd->rw.metadata = cpu_to_le64(sg_dma_address(&iod->meta_sg));
-	return BLK_MQ_RQ_QUEUE_OK;
+	return BLK_STS_OK;
 
 out_unmap:
 	dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir);
@@ -688,7 +688,7 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req)
 /*
  * NOTE: ns is NULL when called on the admin queue.
  */
-static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
 			 const struct blk_mq_queue_data *bd)
 {
 	struct nvme_ns *ns = hctx->queue->queuedata;
@@ -696,7 +696,7 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
 	struct nvme_dev *dev = nvmeq->dev;
 	struct request *req = bd->rq;
 	struct nvme_command cmnd;
-	int ret = BLK_MQ_RQ_QUEUE_OK;
+	blk_status_t ret = BLK_STS_OK;
 
 	/*
 	 * If formated with metadata, require the block layer provide a buffer
@@ -705,38 +705,36 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
 	 */
 	if (ns && ns->ms && !blk_integrity_rq(req)) {
 		if (!(ns->pi_type && ns->ms == 8) &&
-		    !blk_rq_is_passthrough(req)) {
-			blk_mq_end_request(req, BLK_STS_NOTSUPP);
-			return BLK_MQ_RQ_QUEUE_OK;
-		}
+		    !blk_rq_is_passthrough(req))
+			return BLK_STS_NOTSUPP;
 	}
 
 	ret = nvme_setup_cmd(ns, req, &cmnd);
-	if (ret != BLK_MQ_RQ_QUEUE_OK)
+	if (ret)
 		return ret;
 
 	ret = nvme_init_iod(req, dev);
-	if (ret != BLK_MQ_RQ_QUEUE_OK)
+	if (ret)
 		goto out_free_cmd;
 
-	if (blk_rq_nr_phys_segments(req))
+	if (blk_rq_nr_phys_segments(req)) {
 		ret = nvme_map_data(dev, req, &cmnd);
-
-	if (ret != BLK_MQ_RQ_QUEUE_OK)
-		goto out_cleanup_iod;
+		if (ret)
+			goto out_cleanup_iod;
+	}
 
 	blk_mq_start_request(req);
 
 	spin_lock_irq(&nvmeq->q_lock);
 	if (unlikely(nvmeq->cq_vector < 0)) {
-		ret = BLK_MQ_RQ_QUEUE_ERROR;
+		ret = BLK_STS_IOERR;
 		spin_unlock_irq(&nvmeq->q_lock);
 		goto out_cleanup_iod;
 	}
 	__nvme_submit_cmd(nvmeq, &cmnd);
 	nvme_process_cq(nvmeq);
 	spin_unlock_irq(&nvmeq->q_lock);
-	return BLK_MQ_RQ_QUEUE_OK;
+	return BLK_STS_OK;
 out_cleanup_iod:
 	nvme_free_iod(dev, req);
 out_free_cmd:
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 28bd255c144d..58d311e704e5 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1448,7 +1448,7 @@ static inline bool nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue,
 	return true;
 }
 
-static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
 		const struct blk_mq_queue_data *bd)
 {
 	struct nvme_ns *ns = hctx->queue->queuedata;
@@ -1459,27 +1459,28 @@ static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
 	struct nvme_command *c = sqe->data;
 	bool flush = false;
 	struct ib_device *dev;
-	int ret;
+	blk_status_t ret;
+	int err;
 
 	WARN_ON_ONCE(rq->tag < 0);
 
 	if (!nvme_rdma_queue_is_ready(queue, rq))
-		return BLK_MQ_RQ_QUEUE_BUSY;
+		return BLK_STS_RESOURCE;
 
 	dev = queue->device->dev;
 	ib_dma_sync_single_for_cpu(dev, sqe->dma,
 			sizeof(struct nvme_command), DMA_TO_DEVICE);
 
 	ret = nvme_setup_cmd(ns, rq, c);
-	if (ret != BLK_MQ_RQ_QUEUE_OK)
+	if (ret)
 		return ret;
 
 	blk_mq_start_request(rq);
 
-	ret = nvme_rdma_map_data(queue, rq, c);
-	if (ret < 0) {
+	err = nvme_rdma_map_data(queue, rq, c);
+	if (err < 0) {
 		dev_err(queue->ctrl->ctrl.device,
-			     "Failed to map data (%d)\n", ret);
+			     "Failed to map data (%d)\n", err);
 		nvme_cleanup_cmd(rq);
 		goto err;
 	}
@@ -1489,17 +1490,18 @@ static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
 
 	if (req_op(rq) == REQ_OP_FLUSH)
 		flush = true;
-	ret = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
+	err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
 			req->mr->need_inval ? &req->reg_wr.wr : NULL, flush);
-	if (ret) {
+	if (err) {
 		nvme_rdma_unmap_data(queue, rq);
 		goto err;
 	}
 
-	return BLK_MQ_RQ_QUEUE_OK;
+	return BLK_STS_OK;
 err:
-	return (ret == -ENOMEM || ret == -EAGAIN) ?
-		BLK_MQ_RQ_QUEUE_BUSY : BLK_MQ_RQ_QUEUE_ERROR;
+	if (err == -ENOMEM || err == -EAGAIN)
+		return BLK_STS_RESOURCE;
+	return BLK_STS_IOERR;
 }
 
 static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index e503cfff0337..db8ebadf885b 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -159,17 +159,17 @@ nvme_loop_timeout(struct request *rq, bool reserved)
 	return BLK_EH_HANDLED;
 }
 
-static int nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
 		const struct blk_mq_queue_data *bd)
 {
 	struct nvme_ns *ns = hctx->queue->queuedata;
 	struct nvme_loop_queue *queue = hctx->driver_data;
 	struct request *req = bd->rq;
 	struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req);
-	int ret;
+	blk_status_t ret;
 
 	ret = nvme_setup_cmd(ns, req, &iod->cmd);
-	if (ret != BLK_MQ_RQ_QUEUE_OK)
+	if (ret)
 		return ret;
 
 	iod->cmd.common.flags |= NVME_CMD_SGL_METABUF;
@@ -179,16 +179,15 @@ static int nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
 		nvme_cleanup_cmd(req);
 		blk_mq_start_request(req);
 		nvme_loop_queue_response(&iod->req);
-		return BLK_MQ_RQ_QUEUE_OK;
+		return BLK_STS_OK;
 	}
 
 	if (blk_rq_bytes(req)) {
 		iod->sg_table.sgl = iod->first_sgl;
-		ret = sg_alloc_table_chained(&iod->sg_table,
+		if (sg_alloc_table_chained(&iod->sg_table,
 				blk_rq_nr_phys_segments(req),
-				iod->sg_table.sgl);
-		if (ret)
-			return BLK_MQ_RQ_QUEUE_BUSY;
+				iod->sg_table.sgl))
+			return BLK_STS_RESOURCE;
 
 		iod->req.sg = iod->sg_table.sgl;
 		iod->req.sg_cnt = blk_rq_map_sg(req->q, req, iod->sg_table.sgl);
@@ -197,7 +196,7 @@ static int nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
 	blk_mq_start_request(req);
 
 	schedule_work(&iod->work);
-	return BLK_MQ_RQ_QUEUE_OK;
+	return BLK_STS_OK;
 }
 
 static void nvme_loop_submit_async_event(struct nvme_ctrl *arg, int aer_idx)
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 67a67191520f..b5f310b9e910 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1812,15 +1812,15 @@ out_delay:
 		blk_delay_queue(q, SCSI_QUEUE_DELAY);
 }
 
-static inline int prep_to_mq(int ret)
+static inline blk_status_t prep_to_mq(int ret)
 {
 	switch (ret) {
 	case BLKPREP_OK:
-		return BLK_MQ_RQ_QUEUE_OK;
+		return BLK_STS_OK;
 	case BLKPREP_DEFER:
-		return BLK_MQ_RQ_QUEUE_BUSY;
+		return BLK_STS_RESOURCE;
 	default:
-		return BLK_MQ_RQ_QUEUE_ERROR;
+		return BLK_STS_IOERR;
 	}
 }
 
@@ -1892,7 +1892,7 @@ static void scsi_mq_done(struct scsi_cmnd *cmd)
 	blk_mq_complete_request(cmd->request);
 }
 
-static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
 			 const struct blk_mq_queue_data *bd)
 {
 	struct request *req = bd->rq;
@@ -1900,14 +1900,14 @@ static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
 	struct scsi_device *sdev = q->queuedata;
 	struct Scsi_Host *shost = sdev->host;
 	struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req);
-	int ret;
+	blk_status_t ret;
 	int reason;
 
 	ret = prep_to_mq(scsi_prep_state_check(sdev, req));
-	if (ret != BLK_MQ_RQ_QUEUE_OK)
+	if (ret != BLK_STS_OK)
 		goto out;
 
-	ret = BLK_MQ_RQ_QUEUE_BUSY;
+	ret = BLK_STS_RESOURCE;
 	if (!get_device(&sdev->sdev_gendev))
 		goto out;
 
@@ -1920,7 +1920,7 @@ static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
 
 	if (!(req->rq_flags & RQF_DONTPREP)) {
 		ret = prep_to_mq(scsi_mq_prep_fn(req));
-		if (ret != BLK_MQ_RQ_QUEUE_OK)
+		if (ret != BLK_STS_OK)
 			goto out_dec_host_busy;
 		req->rq_flags |= RQF_DONTPREP;
 	} else {
@@ -1938,11 +1938,11 @@ static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
 	reason = scsi_dispatch_cmd(cmd);
 	if (reason) {
 		scsi_set_blocked(cmd, reason);
-		ret = BLK_MQ_RQ_QUEUE_BUSY;
+		ret = BLK_STS_RESOURCE;
 		goto out_dec_host_busy;
 	}
 
-	return BLK_MQ_RQ_QUEUE_OK;
+	return BLK_STS_OK;
 
 out_dec_host_busy:
 	atomic_dec(&shost->host_busy);
@@ -1955,12 +1955,14 @@ out_put_device:
 	put_device(&sdev->sdev_gendev);
 out:
 	switch (ret) {
-	case BLK_MQ_RQ_QUEUE_BUSY:
+	case BLK_STS_OK:
+		break;
+	case BLK_STS_RESOURCE:
 		if (atomic_read(&sdev->device_busy) == 0 &&
 		    !scsi_device_blocked(sdev))
 			blk_mq_delay_run_hw_queue(hctx, SCSI_QUEUE_DELAY);
 		break;
-	case BLK_MQ_RQ_QUEUE_ERROR:
+	default:
 		/*
 		 * Make sure to release all allocated ressources when
 		 * we hit an error, as we will never see this command
@@ -1969,8 +1971,6 @@ out:
 		if (req->rq_flags & RQF_DONTPREP)
 			scsi_mq_uninit_cmd(cmd);
 		break;
-	default:
-		break;
 	}
 	return ret;
 }
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 0cf6735046d3..b144b7b0e104 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -87,7 +87,8 @@ struct blk_mq_queue_data {
 	bool last;
 };
 
-typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, const struct blk_mq_queue_data *);
+typedef blk_status_t (queue_rq_fn)(struct blk_mq_hw_ctx *,
+		const struct blk_mq_queue_data *);
 typedef enum blk_eh_timer_return (timeout_fn)(struct request *, bool);
 typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int);
 typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int);
@@ -155,10 +156,6 @@ struct blk_mq_ops {
 };
 
 enum {
-	BLK_MQ_RQ_QUEUE_OK	= 0,	/* queued fine */
-	BLK_MQ_RQ_QUEUE_BUSY	= 1,	/* requeue IO for later */
-	BLK_MQ_RQ_QUEUE_ERROR	= 2,	/* end IO with error */
-
 	BLK_MQ_F_SHOULD_MERGE	= 1 << 0,
 	BLK_MQ_F_TAG_SHARED	= 1 << 1,
 	BLK_MQ_F_SG_MERGE	= 1 << 2,
-- 
cgit v1.2.3


From 4e4cbee93d56137ebff722be022cae5f70ef84fb Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 3 Jun 2017 09:38:06 +0200
Subject: block: switch bios to blk_status_t

Replace bi_error with a new bi_status to allow for a clear conversion.
Note that device mapper overloaded bi_error with a private value, which
we'll have to keep arround at least for now and thus propagate to a
proper blk_status_t value.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/bio-integrity.c               |  8 ++--
 block/bio.c                         |  8 ++--
 block/blk-core.c                    | 20 ++++++----
 block/blk-integrity.c               |  4 +-
 block/bounce.c                      |  4 +-
 block/t10-pi.c                      | 30 +++++++--------
 drivers/block/aoe/aoecmd.c          | 10 ++---
 drivers/block/aoe/aoedev.c          |  2 +-
 drivers/block/drbd/drbd_actlog.c    |  2 +-
 drivers/block/drbd/drbd_bitmap.c    |  6 +--
 drivers/block/drbd/drbd_int.h       |  2 +-
 drivers/block/drbd/drbd_receiver.c  |  6 +--
 drivers/block/drbd/drbd_req.c       |  6 +--
 drivers/block/drbd/drbd_worker.c    | 16 ++++----
 drivers/block/floppy.c              |  4 +-
 drivers/block/pktcdvd.c             | 18 ++++-----
 drivers/block/ps3vram.c             | 14 +++----
 drivers/block/rsxx/dev.c            | 14 +++----
 drivers/block/rsxx/dma.c            | 13 +++----
 drivers/block/rsxx/rsxx_priv.h      |  2 +-
 drivers/block/umem.c                |  2 +-
 drivers/block/xen-blkback/blkback.c | 19 ++++------
 drivers/block/xen-blkfront.c        |  2 +-
 drivers/lightnvm/pblk-core.c        |  4 +-
 drivers/lightnvm/pblk-read.c        |  4 +-
 drivers/lightnvm/pblk-write.c       |  2 +-
 drivers/lightnvm/rrpc.c             |  8 ++--
 drivers/md/bcache/bcache.h          |  7 ++--
 drivers/md/bcache/btree.c           |  6 +--
 drivers/md/bcache/io.c              |  6 +--
 drivers/md/bcache/journal.c         |  2 +-
 drivers/md/bcache/movinggc.c        | 10 ++---
 drivers/md/bcache/request.c         | 28 +++++++-------
 drivers/md/bcache/request.h         |  2 +-
 drivers/md/bcache/super.c           |  6 +--
 drivers/md/bcache/writeback.c       |  4 +-
 drivers/md/dm-bio-prison-v1.c       |  4 +-
 drivers/md/dm-bio-prison-v1.h       |  2 +-
 drivers/md/dm-bufio.c               | 28 +++++++-------
 drivers/md/dm-cache-target.c        | 34 +++++++++--------
 drivers/md/dm-crypt.c               | 34 ++++++++---------
 drivers/md/dm-flakey.c              |  5 ++-
 drivers/md/dm-integrity.c           | 18 ++++-----
 drivers/md/dm-io.c                  | 10 ++---
 drivers/md/dm-log-writes.c          |  7 ++--
 drivers/md/dm-mpath.c               | 15 ++++----
 drivers/md/dm-raid1.c               | 13 ++++---
 drivers/md/dm-rq.c                  |  2 +-
 drivers/md/dm-snap.c                |  5 ++-
 drivers/md/dm-stripe.c              |  5 ++-
 drivers/md/dm-thin.c                | 65 ++++++++++++++++----------------
 drivers/md/dm-verity-target.c       | 10 ++---
 drivers/md/dm.c                     | 40 ++++++++++----------
 drivers/md/md.c                     |  8 ++--
 drivers/md/multipath.c              | 10 ++---
 drivers/md/raid1.c                  | 36 +++++++++---------
 drivers/md/raid10.c                 | 36 +++++++++---------
 drivers/md/raid5-cache.c            |  4 +-
 drivers/md/raid5-ppl.c              |  2 +-
 drivers/md/raid5.c                  | 22 +++++------
 drivers/nvdimm/blk.c                |  4 +-
 drivers/nvdimm/btt.c                |  4 +-
 drivers/nvdimm/pmem.c               | 28 +++++++-------
 drivers/nvme/target/io-cmd.c        |  4 +-
 drivers/target/target_core_iblock.c | 10 ++---
 fs/block_dev.c                      | 18 +++++----
 fs/btrfs/btrfs_inode.h              |  3 +-
 fs/btrfs/check-integrity.c          |  4 +-
 fs/btrfs/compression.c              | 44 +++++++++++-----------
 fs/btrfs/compression.h              |  4 +-
 fs/btrfs/ctree.h                    |  6 +--
 fs/btrfs/disk-io.c                  | 75 ++++++++++++++++++-------------------
 fs/btrfs/disk-io.h                  | 12 +++---
 fs/btrfs/extent_io.c                | 23 +++++++-----
 fs/btrfs/extent_io.h                |  6 +--
 fs/btrfs/file-item.c                | 14 +++----
 fs/btrfs/inode.c                    | 73 ++++++++++++++++++------------------
 fs/btrfs/raid56.c                   | 16 ++++----
 fs/btrfs/scrub.c                    | 26 ++++++-------
 fs/btrfs/volumes.c                  | 11 +++---
 fs/buffer.c                         |  2 +-
 fs/crypto/bio.c                     |  2 +-
 fs/direct-io.c                      |  8 ++--
 fs/ext4/page-io.c                   | 13 ++++---
 fs/ext4/readpage.c                  |  4 +-
 fs/f2fs/data.c                      | 10 ++---
 fs/f2fs/segment.c                   |  2 +-
 fs/gfs2/lops.c                      |  8 ++--
 fs/gfs2/meta_io.c                   |  2 +-
 fs/gfs2/ops_fstype.c                |  4 +-
 fs/iomap.c                          |  4 +-
 fs/jfs/jfs_logmgr.c                 |  2 +-
 fs/jfs/jfs_metapage.c               |  4 +-
 fs/mpage.c                          |  3 +-
 fs/nfs/blocklayout/blocklayout.c    |  4 +-
 fs/nilfs2/segbuf.c                  |  2 +-
 fs/ocfs2/cluster/heartbeat.c        |  6 +--
 fs/xfs/xfs_aops.c                   |  7 ++--
 fs/xfs/xfs_buf.c                    |  7 +++-
 include/linux/bio.h                 |  2 +-
 include/linux/blk_types.h           |  5 ++-
 include/linux/blkdev.h              |  2 +-
 include/linux/device-mapper.h       |  2 +-
 kernel/power/swap.c                 | 14 +++----
 kernel/trace/blktrace.c             |  4 +-
 mm/page_io.c                        |  4 +-
 106 files changed, 625 insertions(+), 603 deletions(-)

(limited to 'include/linux')

diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 5384713d48bc..17b9740e138b 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -221,7 +221,7 @@ static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi,
  * @bio:	bio to generate/verify integrity metadata for
  * @proc_fn:	Pointer to the relevant processing function
  */
-static int bio_integrity_process(struct bio *bio,
+static blk_status_t bio_integrity_process(struct bio *bio,
 				 integrity_processing_fn *proc_fn)
 {
 	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
@@ -229,7 +229,7 @@ static int bio_integrity_process(struct bio *bio,
 	struct bvec_iter bviter;
 	struct bio_vec bv;
 	struct bio_integrity_payload *bip = bio_integrity(bio);
-	unsigned int ret = 0;
+	blk_status_t ret = BLK_STS_OK;
 	void *prot_buf = page_address(bip->bip_vec->bv_page) +
 		bip->bip_vec->bv_offset;
 
@@ -366,7 +366,7 @@ static void bio_integrity_verify_fn(struct work_struct *work)
 	struct bio *bio = bip->bip_bio;
 	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
 
-	bio->bi_error = bio_integrity_process(bio, bi->profile->verify_fn);
+	bio->bi_status = bio_integrity_process(bio, bi->profile->verify_fn);
 
 	/* Restore original bio completion handler */
 	bio->bi_end_io = bip->bip_end_io;
@@ -395,7 +395,7 @@ void bio_integrity_endio(struct bio *bio)
 	 * integrity metadata.  Restore original bio end_io handler
 	 * and run it.
 	 */
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		bio->bi_end_io = bip->bip_end_io;
 		bio_endio(bio);
 
diff --git a/block/bio.c b/block/bio.c
index 888e7801c638..7a5c8ed27f42 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -309,8 +309,8 @@ static struct bio *__bio_chain_endio(struct bio *bio)
 {
 	struct bio *parent = bio->bi_private;
 
-	if (!parent->bi_error)
-		parent->bi_error = bio->bi_error;
+	if (!parent->bi_status)
+		parent->bi_status = bio->bi_status;
 	bio_put(bio);
 	return parent;
 }
@@ -918,7 +918,7 @@ static void submit_bio_wait_endio(struct bio *bio)
 {
 	struct submit_bio_ret *ret = bio->bi_private;
 
-	ret->error = bio->bi_error;
+	ret->error = blk_status_to_errno(bio->bi_status);
 	complete(&ret->event);
 }
 
@@ -1818,7 +1818,7 @@ again:
 
 	if (bio->bi_bdev && bio_flagged(bio, BIO_TRACE_COMPLETION)) {
 		trace_block_bio_complete(bdev_get_queue(bio->bi_bdev),
-					 bio, bio->bi_error);
+					 bio, bio->bi_status);
 		bio_clear_flag(bio, BIO_TRACE_COMPLETION);
 	}
 
diff --git a/block/blk-core.c b/block/blk-core.c
index e942a9f814c7..3d84820ace9e 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -144,6 +144,9 @@ static const struct {
 	[BLK_STS_PROTECTION]	= { -EILSEQ,	"protection" },
 	[BLK_STS_RESOURCE]	= { -ENOMEM,	"kernel resource" },
 
+	/* device mapper special case, should not leak out: */
+	[BLK_STS_DM_REQUEUE]	= { -EREMCHG, "dm internal retry" },
+
 	/* everything else not covered above: */
 	[BLK_STS_IOERR]		= { -EIO,	"I/O" },
 };
@@ -188,7 +191,7 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
 			  unsigned int nbytes, blk_status_t error)
 {
 	if (error)
-		bio->bi_error = blk_status_to_errno(error);
+		bio->bi_status = error;
 
 	if (unlikely(rq->rq_flags & RQF_QUIET))
 		bio_set_flag(bio, BIO_QUIET);
@@ -1717,7 +1720,7 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
 	blk_queue_split(q, &bio, q->bio_split);
 
 	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
-		bio->bi_error = -EIO;
+		bio->bi_status = BLK_STS_IOERR;
 		bio_endio(bio);
 		return BLK_QC_T_NONE;
 	}
@@ -1775,7 +1778,10 @@ get_rq:
 	req = get_request(q, bio->bi_opf, bio, GFP_NOIO);
 	if (IS_ERR(req)) {
 		__wbt_done(q->rq_wb, wb_acct);
-		bio->bi_error = PTR_ERR(req);
+		if (PTR_ERR(req) == -ENOMEM)
+			bio->bi_status = BLK_STS_RESOURCE;
+		else
+			bio->bi_status = BLK_STS_IOERR;
 		bio_endio(bio);
 		goto out_unlock;
 	}
@@ -1930,7 +1936,7 @@ generic_make_request_checks(struct bio *bio)
 {
 	struct request_queue *q;
 	int nr_sectors = bio_sectors(bio);
-	int err = -EIO;
+	blk_status_t status = BLK_STS_IOERR;
 	char b[BDEVNAME_SIZE];
 	struct hd_struct *part;
 
@@ -1973,7 +1979,7 @@ generic_make_request_checks(struct bio *bio)
 	    !test_bit(QUEUE_FLAG_WC, &q->queue_flags)) {
 		bio->bi_opf &= ~(REQ_PREFLUSH | REQ_FUA);
 		if (!nr_sectors) {
-			err = 0;
+			status = BLK_STS_OK;
 			goto end_io;
 		}
 	}
@@ -2025,9 +2031,9 @@ generic_make_request_checks(struct bio *bio)
 	return true;
 
 not_supported:
-	err = -EOPNOTSUPP;
+	status = BLK_STS_NOTSUPP;
 end_io:
-	bio->bi_error = err;
+	bio->bi_status = status;
 	bio_endio(bio);
 	return false;
 }
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index 0f891a9aff4d..feb30570eaf5 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -384,9 +384,9 @@ static struct kobj_type integrity_ktype = {
 	.sysfs_ops	= &integrity_ops,
 };
 
-static int blk_integrity_nop_fn(struct blk_integrity_iter *iter)
+static blk_status_t blk_integrity_nop_fn(struct blk_integrity_iter *iter)
 {
-	return 0;
+	return BLK_STS_OK;
 }
 
 static const struct blk_integrity_profile nop_profile = {
diff --git a/block/bounce.c b/block/bounce.c
index 1cb5dd3a5da1..e4703181d97f 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -143,7 +143,7 @@ static void bounce_end_io(struct bio *bio, mempool_t *pool)
 		mempool_free(bvec->bv_page, pool);
 	}
 
-	bio_orig->bi_error = bio->bi_error;
+	bio_orig->bi_status = bio->bi_status;
 	bio_endio(bio_orig);
 	bio_put(bio);
 }
@@ -163,7 +163,7 @@ static void __bounce_end_io_read(struct bio *bio, mempool_t *pool)
 {
 	struct bio *bio_orig = bio->bi_private;
 
-	if (!bio->bi_error)
+	if (!bio->bi_status)
 		copy_to_high_bio_irq(bio_orig, bio);
 
 	bounce_end_io(bio, pool);
diff --git a/block/t10-pi.c b/block/t10-pi.c
index 680c6d636298..350b3cbcf9e5 100644
--- a/block/t10-pi.c
+++ b/block/t10-pi.c
@@ -46,8 +46,8 @@ static __be16 t10_pi_ip_fn(void *data, unsigned int len)
  * 16 bit app tag, 32 bit reference tag. Type 3 does not define the ref
  * tag.
  */
-static int t10_pi_generate(struct blk_integrity_iter *iter, csum_fn *fn,
-			   unsigned int type)
+static blk_status_t t10_pi_generate(struct blk_integrity_iter *iter,
+		csum_fn *fn, unsigned int type)
 {
 	unsigned int i;
 
@@ -67,11 +67,11 @@ static int t10_pi_generate(struct blk_integrity_iter *iter, csum_fn *fn,
 		iter->seed++;
 	}
 
-	return 0;
+	return BLK_STS_OK;
 }
 
-static int t10_pi_verify(struct blk_integrity_iter *iter, csum_fn *fn,
-				unsigned int type)
+static blk_status_t t10_pi_verify(struct blk_integrity_iter *iter,
+		csum_fn *fn, unsigned int type)
 {
 	unsigned int i;
 
@@ -108,7 +108,7 @@ static int t10_pi_verify(struct blk_integrity_iter *iter, csum_fn *fn,
 			       "(rcvd %04x, want %04x)\n", iter->disk_name,
 			       (unsigned long long)iter->seed,
 			       be16_to_cpu(pi->guard_tag), be16_to_cpu(csum));
-			return -EILSEQ;
+			return BLK_STS_PROTECTION;
 		}
 
 next:
@@ -117,45 +117,45 @@ next:
 		iter->seed++;
 	}
 
-	return 0;
+	return BLK_STS_OK;
 }
 
-static int t10_pi_type1_generate_crc(struct blk_integrity_iter *iter)
+static blk_status_t t10_pi_type1_generate_crc(struct blk_integrity_iter *iter)
 {
 	return t10_pi_generate(iter, t10_pi_crc_fn, 1);
 }
 
-static int t10_pi_type1_generate_ip(struct blk_integrity_iter *iter)
+static blk_status_t t10_pi_type1_generate_ip(struct blk_integrity_iter *iter)
 {
 	return t10_pi_generate(iter, t10_pi_ip_fn, 1);
 }
 
-static int t10_pi_type1_verify_crc(struct blk_integrity_iter *iter)
+static blk_status_t t10_pi_type1_verify_crc(struct blk_integrity_iter *iter)
 {
 	return t10_pi_verify(iter, t10_pi_crc_fn, 1);
 }
 
-static int t10_pi_type1_verify_ip(struct blk_integrity_iter *iter)
+static blk_status_t t10_pi_type1_verify_ip(struct blk_integrity_iter *iter)
 {
 	return t10_pi_verify(iter, t10_pi_ip_fn, 1);
 }
 
-static int t10_pi_type3_generate_crc(struct blk_integrity_iter *iter)
+static blk_status_t t10_pi_type3_generate_crc(struct blk_integrity_iter *iter)
 {
 	return t10_pi_generate(iter, t10_pi_crc_fn, 3);
 }
 
-static int t10_pi_type3_generate_ip(struct blk_integrity_iter *iter)
+static blk_status_t t10_pi_type3_generate_ip(struct blk_integrity_iter *iter)
 {
 	return t10_pi_generate(iter, t10_pi_ip_fn, 3);
 }
 
-static int t10_pi_type3_verify_crc(struct blk_integrity_iter *iter)
+static blk_status_t t10_pi_type3_verify_crc(struct blk_integrity_iter *iter)
 {
 	return t10_pi_verify(iter, t10_pi_crc_fn, 3);
 }
 
-static int t10_pi_type3_verify_ip(struct blk_integrity_iter *iter)
+static blk_status_t t10_pi_type3_verify_ip(struct blk_integrity_iter *iter)
 {
 	return t10_pi_verify(iter, t10_pi_ip_fn, 3);
 }
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 5bf0c9d21fc1..dc43254e05a4 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -1070,7 +1070,7 @@ aoe_end_request(struct aoedev *d, struct request *rq, int fastfail)
 		d->ip.rq = NULL;
 	do {
 		bio = rq->bio;
-		bok = !fastfail && !bio->bi_error;
+		bok = !fastfail && !bio->bi_status;
 	} while (__blk_end_request(rq, bok ? BLK_STS_OK : BLK_STS_IOERR, bio->bi_iter.bi_size));
 
 	/* cf. http://lkml.org/lkml/2006/10/31/28 */
@@ -1131,7 +1131,7 @@ ktiocomplete(struct frame *f)
 			ahout->cmdstat, ahin->cmdstat,
 			d->aoemajor, d->aoeminor);
 noskb:		if (buf)
-			buf->bio->bi_error = -EIO;
+			buf->bio->bi_status = BLK_STS_IOERR;
 		goto out;
 	}
 
@@ -1144,7 +1144,7 @@ noskb:		if (buf)
 				"aoe: runt data size in read from",
 				(long) d->aoemajor, d->aoeminor,
 			       skb->len, n);
-			buf->bio->bi_error = -EIO;
+			buf->bio->bi_status = BLK_STS_IOERR;
 			break;
 		}
 		if (n > f->iter.bi_size) {
@@ -1152,7 +1152,7 @@ noskb:		if (buf)
 				"aoe: too-large data size in read from",
 				(long) d->aoemajor, d->aoeminor,
 				n, f->iter.bi_size);
-			buf->bio->bi_error = -EIO;
+			buf->bio->bi_status = BLK_STS_IOERR;
 			break;
 		}
 		bvcpy(skb, f->buf->bio, f->iter, n);
@@ -1654,7 +1654,7 @@ aoe_failbuf(struct aoedev *d, struct buf *buf)
 	if (buf == NULL)
 		return;
 	buf->iter.bi_size = 0;
-	buf->bio->bi_error = -EIO;
+	buf->bio->bi_status = BLK_STS_IOERR;
 	if (buf->nframesout == 0)
 		aoe_end_buf(d, buf);
 }
diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c
index ffd1947500c6..b28fefb90391 100644
--- a/drivers/block/aoe/aoedev.c
+++ b/drivers/block/aoe/aoedev.c
@@ -170,7 +170,7 @@ aoe_failip(struct aoedev *d)
 	if (rq == NULL)
 		return;
 	while ((bio = d->ip.nxbio)) {
-		bio->bi_error = -EIO;
+		bio->bi_status = BLK_STS_IOERR;
 		d->ip.nxbio = bio->bi_next;
 		n = (unsigned long) rq->special;
 		rq->special = (void *) --n;
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index 8d7bcfa49c12..e02c45cd3c5a 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -178,7 +178,7 @@ static int _drbd_md_sync_page_io(struct drbd_device *device,
 	else
 		submit_bio(bio);
 	wait_until_done_or_force_detached(device, bdev, &device->md_io.done);
-	if (!bio->bi_error)
+	if (!bio->bi_status)
 		err = device->md_io.error;
 
  out:
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index a804a4107fbc..809fd245c3dc 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -959,16 +959,16 @@ static void drbd_bm_endio(struct bio *bio)
 	    !bm_test_page_unchanged(b->bm_pages[idx]))
 		drbd_warn(device, "bitmap page idx %u changed during IO!\n", idx);
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		/* ctx error will hold the completed-last non-zero error code,
 		 * in case error codes differ. */
-		ctx->error = bio->bi_error;
+		ctx->error = blk_status_to_errno(bio->bi_status);
 		bm_set_page_io_err(b->bm_pages[idx]);
 		/* Not identical to on disk version of it.
 		 * Is BM_PAGE_IO_ERROR enough? */
 		if (__ratelimit(&drbd_ratelimit_state))
 			drbd_err(device, "IO ERROR %d on bitmap page idx %u\n",
-					bio->bi_error, idx);
+					bio->bi_status, idx);
 	} else {
 		bm_clear_page_io_err(b->bm_pages[idx]);
 		dynamic_drbd_dbg(device, "bitmap page idx %u completed\n", idx);
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index d5da45bb03a6..76761b4ca13e 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1627,7 +1627,7 @@ static inline void drbd_generic_make_request(struct drbd_device *device,
 	__release(local);
 	if (!bio->bi_bdev) {
 		drbd_err(device, "drbd_generic_make_request: bio->bi_bdev == NULL\n");
-		bio->bi_error = -ENODEV;
+		bio->bi_status = BLK_STS_IOERR;
 		bio_endio(bio);
 		return;
 	}
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 1b0a2be24f39..c7e95e6380fb 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1229,9 +1229,9 @@ void one_flush_endio(struct bio *bio)
 	struct drbd_device *device = octx->device;
 	struct issue_flush_context *ctx = octx->ctx;
 
-	if (bio->bi_error) {
-		ctx->error = bio->bi_error;
-		drbd_info(device, "local disk FLUSH FAILED with status %d\n", bio->bi_error);
+	if (bio->bi_status) {
+		ctx->error = blk_status_to_errno(bio->bi_status);
+		drbd_info(device, "local disk FLUSH FAILED with status %d\n", bio->bi_status);
 	}
 	kfree(octx);
 	bio_put(bio);
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 656624314f0d..fca6b9914948 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -203,7 +203,7 @@ void start_new_tl_epoch(struct drbd_connection *connection)
 void complete_master_bio(struct drbd_device *device,
 		struct bio_and_error *m)
 {
-	m->bio->bi_error = m->error;
+	m->bio->bi_status = errno_to_blk_status(m->error);
 	bio_endio(m->bio);
 	dec_ap_bio(device);
 }
@@ -1157,7 +1157,7 @@ static void drbd_process_discard_req(struct drbd_request *req)
 
 	if (blkdev_issue_zeroout(bdev, req->i.sector, req->i.size >> 9,
 			GFP_NOIO, 0))
-		req->private_bio->bi_error = -EIO;
+		req->private_bio->bi_status = BLK_STS_IOERR;
 	bio_endio(req->private_bio);
 }
 
@@ -1225,7 +1225,7 @@ drbd_request_prepare(struct drbd_device *device, struct bio *bio, unsigned long
 		/* only pass the error to the upper layers.
 		 * if user cannot handle io errors, that's not our business. */
 		drbd_err(device, "could not kmalloc() req\n");
-		bio->bi_error = -ENOMEM;
+		bio->bi_status = BLK_STS_RESOURCE;
 		bio_endio(bio);
 		return ERR_PTR(-ENOMEM);
 	}
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 1afcb4e02d8d..1d8726a8df34 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -63,7 +63,7 @@ void drbd_md_endio(struct bio *bio)
 	struct drbd_device *device;
 
 	device = bio->bi_private;
-	device->md_io.error = bio->bi_error;
+	device->md_io.error = blk_status_to_errno(bio->bi_status);
 
 	/* We grabbed an extra reference in _drbd_md_sync_page_io() to be able
 	 * to timeout on the lower level device, and eventually detach from it.
@@ -177,13 +177,13 @@ void drbd_peer_request_endio(struct bio *bio)
 	bool is_discard = bio_op(bio) == REQ_OP_WRITE_ZEROES ||
 			  bio_op(bio) == REQ_OP_DISCARD;
 
-	if (bio->bi_error && __ratelimit(&drbd_ratelimit_state))
+	if (bio->bi_status && __ratelimit(&drbd_ratelimit_state))
 		drbd_warn(device, "%s: error=%d s=%llus\n",
 				is_write ? (is_discard ? "discard" : "write")
-					: "read", bio->bi_error,
+					: "read", bio->bi_status,
 				(unsigned long long)peer_req->i.sector);
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		set_bit(__EE_WAS_ERROR, &peer_req->flags);
 
 	bio_put(bio); /* no need for the bio anymore */
@@ -243,16 +243,16 @@ void drbd_request_endio(struct bio *bio)
 		if (__ratelimit(&drbd_ratelimit_state))
 			drbd_emerg(device, "delayed completion of aborted local request; disk-timeout may be too aggressive\n");
 
-		if (!bio->bi_error)
+		if (!bio->bi_status)
 			drbd_panic_after_delayed_completion_of_aborted_request(device);
 	}
 
 	/* to avoid recursion in __req_mod */
-	if (unlikely(bio->bi_error)) {
+	if (unlikely(bio->bi_status)) {
 		switch (bio_op(bio)) {
 		case REQ_OP_WRITE_ZEROES:
 		case REQ_OP_DISCARD:
-			if (bio->bi_error == -EOPNOTSUPP)
+			if (bio->bi_status == BLK_STS_NOTSUPP)
 				what = DISCARD_COMPLETED_NOTSUPP;
 			else
 				what = DISCARD_COMPLETED_WITH_ERROR;
@@ -272,7 +272,7 @@ void drbd_request_endio(struct bio *bio)
 	}
 
 	bio_put(req->private_bio);
-	req->private_bio = ERR_PTR(bio->bi_error);
+	req->private_bio = ERR_PTR(blk_status_to_errno(bio->bi_status));
 
 	/* not req_mod(), we need irqsave here! */
 	spin_lock_irqsave(&device->resource->req_lock, flags);
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index cc75a5176057..9e3cb32e365d 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -3780,9 +3780,9 @@ static void floppy_rb0_cb(struct bio *bio)
 	struct rb0_cbdata *cbdata = (struct rb0_cbdata *)bio->bi_private;
 	int drive = cbdata->drive;
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		pr_info("floppy: error %d while reading block 0\n",
-			bio->bi_error);
+			bio->bi_status);
 		set_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags);
 	}
 	complete(&cbdata->complete);
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 42e3c880a8a5..e8a381161db6 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -952,9 +952,9 @@ static void pkt_end_io_read(struct bio *bio)
 
 	pkt_dbg(2, pd, "bio=%p sec0=%llx sec=%llx err=%d\n",
 		bio, (unsigned long long)pkt->sector,
-		(unsigned long long)bio->bi_iter.bi_sector, bio->bi_error);
+		(unsigned long long)bio->bi_iter.bi_sector, bio->bi_status);
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		atomic_inc(&pkt->io_errors);
 	if (atomic_dec_and_test(&pkt->io_wait)) {
 		atomic_inc(&pkt->run_sm);
@@ -969,7 +969,7 @@ static void pkt_end_io_packet_write(struct bio *bio)
 	struct pktcdvd_device *pd = pkt->pd;
 	BUG_ON(!pd);
 
-	pkt_dbg(2, pd, "id=%d, err=%d\n", pkt->id, bio->bi_error);
+	pkt_dbg(2, pd, "id=%d, err=%d\n", pkt->id, bio->bi_status);
 
 	pd->stats.pkt_ended++;
 
@@ -1305,16 +1305,16 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
 	pkt_queue_bio(pd, pkt->w_bio);
 }
 
-static void pkt_finish_packet(struct packet_data *pkt, int error)
+static void pkt_finish_packet(struct packet_data *pkt, blk_status_t status)
 {
 	struct bio *bio;
 
-	if (error)
+	if (status)
 		pkt->cache_valid = 0;
 
 	/* Finish all bios corresponding to this packet */
 	while ((bio = bio_list_pop(&pkt->orig_bios))) {
-		bio->bi_error = error;
+		bio->bi_status = status;
 		bio_endio(bio);
 	}
 }
@@ -1349,7 +1349,7 @@ static void pkt_run_state_machine(struct pktcdvd_device *pd, struct packet_data
 			if (atomic_read(&pkt->io_wait) > 0)
 				return;
 
-			if (!pkt->w_bio->bi_error) {
+			if (!pkt->w_bio->bi_status) {
 				pkt_set_state(pkt, PACKET_FINISHED_STATE);
 			} else {
 				pkt_set_state(pkt, PACKET_RECOVERY_STATE);
@@ -1366,7 +1366,7 @@ static void pkt_run_state_machine(struct pktcdvd_device *pd, struct packet_data
 			break;
 
 		case PACKET_FINISHED_STATE:
-			pkt_finish_packet(pkt, pkt->w_bio->bi_error);
+			pkt_finish_packet(pkt, pkt->w_bio->bi_status);
 			return;
 
 		default:
@@ -2301,7 +2301,7 @@ static void pkt_end_io_read_cloned(struct bio *bio)
 	struct packet_stacked_data *psd = bio->bi_private;
 	struct pktcdvd_device *pd = psd->pd;
 
-	psd->bio->bi_error = bio->bi_error;
+	psd->bio->bi_status = bio->bi_status;
 	bio_put(bio);
 	bio_endio(psd->bio);
 	mempool_free(psd, psd_pool);
diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c
index 456b4fe21559..6fa2b8197013 100644
--- a/drivers/block/ps3vram.c
+++ b/drivers/block/ps3vram.c
@@ -428,7 +428,7 @@ static void ps3vram_cache_cleanup(struct ps3_system_bus_device *dev)
 	kfree(priv->cache.tags);
 }
 
-static int ps3vram_read(struct ps3_system_bus_device *dev, loff_t from,
+static blk_status_t ps3vram_read(struct ps3_system_bus_device *dev, loff_t from,
 			size_t len, size_t *retlen, u_char *buf)
 {
 	struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
@@ -438,7 +438,7 @@ static int ps3vram_read(struct ps3_system_bus_device *dev, loff_t from,
 		(unsigned int)from, len);
 
 	if (from >= priv->size)
-		return -EIO;
+		return BLK_STS_IOERR;
 
 	if (len > priv->size - from)
 		len = priv->size - from;
@@ -472,14 +472,14 @@ static int ps3vram_read(struct ps3_system_bus_device *dev, loff_t from,
 	return 0;
 }
 
-static int ps3vram_write(struct ps3_system_bus_device *dev, loff_t to,
+static blk_status_t ps3vram_write(struct ps3_system_bus_device *dev, loff_t to,
 			 size_t len, size_t *retlen, const u_char *buf)
 {
 	struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
 	unsigned int cached, count;
 
 	if (to >= priv->size)
-		return -EIO;
+		return BLK_STS_IOERR;
 
 	if (len > priv->size - to)
 		len = priv->size - to;
@@ -554,7 +554,7 @@ static struct bio *ps3vram_do_bio(struct ps3_system_bus_device *dev,
 	int write = bio_data_dir(bio) == WRITE;
 	const char *op = write ? "write" : "read";
 	loff_t offset = bio->bi_iter.bi_sector << 9;
-	int error = 0;
+	blk_status_t error = 0;
 	struct bio_vec bvec;
 	struct bvec_iter iter;
 	struct bio *next;
@@ -578,7 +578,7 @@ static struct bio *ps3vram_do_bio(struct ps3_system_bus_device *dev,
 
 		if (retlen != len) {
 			dev_err(&dev->core, "Short %s\n", op);
-			error = -EIO;
+			error = BLK_STS_IOERR;
 			goto out;
 		}
 
@@ -593,7 +593,7 @@ out:
 	next = bio_list_peek(&priv->list);
 	spin_unlock_irq(&priv->lock);
 
-	bio->bi_error = error;
+	bio->bi_status = error;
 	bio_endio(bio);
 	return next;
 }
diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c
index 9c566364ac9c..0b0a0a902355 100644
--- a/drivers/block/rsxx/dev.c
+++ b/drivers/block/rsxx/dev.c
@@ -149,7 +149,7 @@ static blk_qc_t rsxx_make_request(struct request_queue *q, struct bio *bio)
 {
 	struct rsxx_cardinfo *card = q->queuedata;
 	struct rsxx_bio_meta *bio_meta;
-	int st = -EINVAL;
+	blk_status_t st = BLK_STS_IOERR;
 
 	blk_queue_split(q, &bio, q->bio_split);
 
@@ -161,15 +161,11 @@ static blk_qc_t rsxx_make_request(struct request_queue *q, struct bio *bio)
 	if (bio_end_sector(bio) > get_capacity(card->gendisk))
 		goto req_err;
 
-	if (unlikely(card->halt)) {
-		st = -EFAULT;
+	if (unlikely(card->halt))
 		goto req_err;
-	}
 
-	if (unlikely(card->dma_fault)) {
-		st = (-EFAULT);
+	if (unlikely(card->dma_fault))
 		goto req_err;
-	}
 
 	if (bio->bi_iter.bi_size == 0) {
 		dev_err(CARD_TO_DEV(card), "size zero BIO!\n");
@@ -178,7 +174,7 @@ static blk_qc_t rsxx_make_request(struct request_queue *q, struct bio *bio)
 
 	bio_meta = kmem_cache_alloc(bio_meta_pool, GFP_KERNEL);
 	if (!bio_meta) {
-		st = -ENOMEM;
+		st = BLK_STS_RESOURCE;
 		goto req_err;
 	}
 
@@ -205,7 +201,7 @@ queue_err:
 	kmem_cache_free(bio_meta_pool, bio_meta);
 req_err:
 	if (st)
-		bio->bi_error = st;
+		bio->bi_status = st;
 	bio_endio(bio);
 	return BLK_QC_T_NONE;
 }
diff --git a/drivers/block/rsxx/dma.c b/drivers/block/rsxx/dma.c
index 5a20385f87d0..6a1b2177951c 100644
--- a/drivers/block/rsxx/dma.c
+++ b/drivers/block/rsxx/dma.c
@@ -611,7 +611,7 @@ static void rsxx_schedule_done(struct work_struct *work)
 	mutex_unlock(&ctrl->work_lock);
 }
 
-static int rsxx_queue_discard(struct rsxx_cardinfo *card,
+static blk_status_t rsxx_queue_discard(struct rsxx_cardinfo *card,
 				  struct list_head *q,
 				  unsigned int laddr,
 				  rsxx_dma_cb cb,
@@ -621,7 +621,7 @@ static int rsxx_queue_discard(struct rsxx_cardinfo *card,
 
 	dma = kmem_cache_alloc(rsxx_dma_pool, GFP_KERNEL);
 	if (!dma)
-		return -ENOMEM;
+		return BLK_STS_RESOURCE;
 
 	dma->cmd          = HW_CMD_BLK_DISCARD;
 	dma->laddr        = laddr;
@@ -640,7 +640,7 @@ static int rsxx_queue_discard(struct rsxx_cardinfo *card,
 	return 0;
 }
 
-static int rsxx_queue_dma(struct rsxx_cardinfo *card,
+static blk_status_t rsxx_queue_dma(struct rsxx_cardinfo *card,
 			      struct list_head *q,
 			      int dir,
 			      unsigned int dma_off,
@@ -655,7 +655,7 @@ static int rsxx_queue_dma(struct rsxx_cardinfo *card,
 
 	dma = kmem_cache_alloc(rsxx_dma_pool, GFP_KERNEL);
 	if (!dma)
-		return -ENOMEM;
+		return BLK_STS_RESOURCE;
 
 	dma->cmd          = dir ? HW_CMD_BLK_WRITE : HW_CMD_BLK_READ;
 	dma->laddr        = laddr;
@@ -677,7 +677,7 @@ static int rsxx_queue_dma(struct rsxx_cardinfo *card,
 	return 0;
 }
 
-int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
+blk_status_t rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
 			   struct bio *bio,
 			   atomic_t *n_dmas,
 			   rsxx_dma_cb cb,
@@ -694,7 +694,7 @@ int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
 	unsigned int dma_len;
 	int dma_cnt[RSXX_MAX_TARGETS];
 	int tgt;
-	int st;
+	blk_status_t st;
 	int i;
 
 	addr8 = bio->bi_iter.bi_sector << 9; /* sectors are 512 bytes */
@@ -769,7 +769,6 @@ bvec_err:
 	for (i = 0; i < card->n_targets; i++)
 		rsxx_cleanup_dma_queue(&card->ctrl[i], &dma_list[i],
 					FREE_DMA);
-
 	return st;
 }
 
diff --git a/drivers/block/rsxx/rsxx_priv.h b/drivers/block/rsxx/rsxx_priv.h
index 6bbc64d0f690..277f27e673a2 100644
--- a/drivers/block/rsxx/rsxx_priv.h
+++ b/drivers/block/rsxx/rsxx_priv.h
@@ -391,7 +391,7 @@ int rsxx_dma_cancel(struct rsxx_dma_ctrl *ctrl);
 void rsxx_dma_cleanup(void);
 void rsxx_dma_queue_reset(struct rsxx_cardinfo *card);
 int rsxx_dma_configure(struct rsxx_cardinfo *card);
-int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
+blk_status_t rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
 			   struct bio *bio,
 			   atomic_t *n_dmas,
 			   rsxx_dma_cb cb,
diff --git a/drivers/block/umem.c b/drivers/block/umem.c
index c141cc3be22b..4b3c947697b1 100644
--- a/drivers/block/umem.c
+++ b/drivers/block/umem.c
@@ -454,7 +454,7 @@ static void process_page(unsigned long data)
 				PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE);
 		if (control & DMASCR_HARD_ERROR) {
 			/* error */
-			bio->bi_error = -EIO;
+			bio->bi_status = BLK_STS_IOERR;
 			dev_printk(KERN_WARNING, &card->dev->dev,
 				"I/O error on sector %d/%d\n",
 				le32_to_cpu(desc->local_addr)>>9,
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 726c32e35db9..746bd8c8c09a 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -1069,20 +1069,17 @@ static void xen_blk_drain_io(struct xen_blkif_ring *ring)
 	atomic_set(&blkif->drain, 0);
 }
 
-/*
- * Completion callback on the bio's. Called as bh->b_end_io()
- */
-
-static void __end_block_io_op(struct pending_req *pending_req, int error)
+static void __end_block_io_op(struct pending_req *pending_req,
+		blk_status_t error)
 {
 	/* An error fails the entire request. */
-	if ((pending_req->operation == BLKIF_OP_FLUSH_DISKCACHE) &&
-	    (error == -EOPNOTSUPP)) {
+	if (pending_req->operation == BLKIF_OP_FLUSH_DISKCACHE &&
+	    error == BLK_STS_NOTSUPP) {
 		pr_debug("flush diskcache op failed, not supported\n");
 		xen_blkbk_flush_diskcache(XBT_NIL, pending_req->ring->blkif->be, 0);
 		pending_req->status = BLKIF_RSP_EOPNOTSUPP;
-	} else if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) &&
-		    (error == -EOPNOTSUPP)) {
+	} else if (pending_req->operation == BLKIF_OP_WRITE_BARRIER &&
+		   error == BLK_STS_NOTSUPP) {
 		pr_debug("write barrier op failed, not supported\n");
 		xen_blkbk_barrier(XBT_NIL, pending_req->ring->blkif->be, 0);
 		pending_req->status = BLKIF_RSP_EOPNOTSUPP;
@@ -1106,7 +1103,7 @@ static void __end_block_io_op(struct pending_req *pending_req, int error)
  */
 static void end_block_io_op(struct bio *bio)
 {
-	__end_block_io_op(bio->bi_private, bio->bi_error);
+	__end_block_io_op(bio->bi_private, bio->bi_status);
 	bio_put(bio);
 }
 
@@ -1423,7 +1420,7 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
 	for (i = 0; i < nbio; i++)
 		bio_put(biolist[i]);
 	atomic_set(&pending_req->pendcnt, 1);
-	__end_block_io_op(pending_req, -EINVAL);
+	__end_block_io_op(pending_req, BLK_STS_RESOURCE);
 	msleep(1); /* back off a bit */
 	return -EIO;
 }
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 2f468cf86dcf..e3be666c2776 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -2006,7 +2006,7 @@ static void split_bio_end(struct bio *bio)
 
 	if (atomic_dec_and_test(&split_bio->pending)) {
 		split_bio->bio->bi_phys_segments = 0;
-		split_bio->bio->bi_error = bio->bi_error;
+		split_bio->bio->bi_status = bio->bi_status;
 		bio_endio(split_bio->bio);
 		kfree(split_bio);
 	}
diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
index 5e44768ccffa..4e0de995cd90 100644
--- a/drivers/lightnvm/pblk-core.c
+++ b/drivers/lightnvm/pblk-core.c
@@ -296,8 +296,8 @@ void pblk_flush_writer(struct pblk *pblk)
 		pr_err("pblk: tear down bio failed\n");
 	}
 
-	if (bio->bi_error)
-		pr_err("pblk: flush sync write failed (%u)\n", bio->bi_error);
+	if (bio->bi_status)
+		pr_err("pblk: flush sync write failed (%u)\n", bio->bi_status);
 
 	bio_put(bio);
 }
diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c
index 4a12f14d78c6..762c0b73cb67 100644
--- a/drivers/lightnvm/pblk-read.c
+++ b/drivers/lightnvm/pblk-read.c
@@ -114,7 +114,7 @@ static void pblk_end_io_read(struct nvm_rq *rqd)
 		pblk_log_read_err(pblk, rqd);
 #ifdef CONFIG_NVM_DEBUG
 	else
-		WARN_ONCE(bio->bi_error, "pblk: corrupted read error\n");
+		WARN_ONCE(bio->bi_status, "pblk: corrupted read error\n");
 #endif
 
 	if (rqd->nr_ppas > 1)
@@ -123,7 +123,7 @@ static void pblk_end_io_read(struct nvm_rq *rqd)
 	bio_put(bio);
 	if (r_ctx->orig_bio) {
 #ifdef CONFIG_NVM_DEBUG
-		WARN_ONCE(r_ctx->orig_bio->bi_error,
+		WARN_ONCE(r_ctx->orig_bio->bi_status,
 						"pblk: corrupted read bio\n");
 #endif
 		bio_endio(r_ctx->orig_bio);
diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c
index aef6fd7c4a0c..79b90d8dbcb3 100644
--- a/drivers/lightnvm/pblk-write.c
+++ b/drivers/lightnvm/pblk-write.c
@@ -186,7 +186,7 @@ static void pblk_end_io_write(struct nvm_rq *rqd)
 	}
 #ifdef CONFIG_NVM_DEBUG
 	else
-		WARN_ONCE(rqd->bio->bi_error, "pblk: corrupted write error\n");
+		WARN_ONCE(rqd->bio->bi_status, "pblk: corrupted write error\n");
 #endif
 
 	pblk_complete_write(pblk, rqd, c_ctx);
diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
index cf0e28a0ff61..8d3b53bb3307 100644
--- a/drivers/lightnvm/rrpc.c
+++ b/drivers/lightnvm/rrpc.c
@@ -279,8 +279,8 @@ static void rrpc_end_sync_bio(struct bio *bio)
 {
 	struct completion *waiting = bio->bi_private;
 
-	if (bio->bi_error)
-		pr_err("nvm: gc request failed (%u).\n", bio->bi_error);
+	if (bio->bi_status)
+		pr_err("nvm: gc request failed (%u).\n", bio->bi_status);
 
 	complete(waiting);
 }
@@ -359,7 +359,7 @@ try:
 			goto finished;
 		}
 		wait_for_completion_io(&wait);
-		if (bio->bi_error) {
+		if (bio->bi_status) {
 			rrpc_inflight_laddr_release(rrpc, rqd);
 			goto finished;
 		}
@@ -385,7 +385,7 @@ try:
 		wait_for_completion_io(&wait);
 
 		rrpc_inflight_laddr_release(rrpc, rqd);
-		if (bio->bi_error)
+		if (bio->bi_status)
 			goto finished;
 
 		bio_reset(bio);
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index c3ea03c9a1a8..dee542fff68e 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -849,10 +849,11 @@ static inline void wake_up_allocators(struct cache_set *c)
 
 /* Forward declarations */
 
-void bch_count_io_errors(struct cache *, int, const char *);
+void bch_count_io_errors(struct cache *, blk_status_t, const char *);
 void bch_bbio_count_io_errors(struct cache_set *, struct bio *,
-			      int, const char *);
-void bch_bbio_endio(struct cache_set *, struct bio *, int, const char *);
+			      blk_status_t, const char *);
+void bch_bbio_endio(struct cache_set *, struct bio *, blk_status_t,
+		const char *);
 void bch_bbio_free(struct bio *, struct cache_set *);
 struct bio *bch_bbio_alloc(struct cache_set *);
 
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 450d0e848ae4..866dcf78ff8e 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -307,7 +307,7 @@ static void bch_btree_node_read(struct btree *b)
 	bch_submit_bbio(bio, b->c, &b->key, 0);
 	closure_sync(&cl);
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		set_btree_node_io_error(b);
 
 	bch_bbio_free(bio, b->c);
@@ -374,10 +374,10 @@ static void btree_node_write_endio(struct bio *bio)
 	struct closure *cl = bio->bi_private;
 	struct btree *b = container_of(cl, struct btree, io);
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		set_btree_node_io_error(b);
 
-	bch_bbio_count_io_errors(b->c, bio, bio->bi_error, "writing btree");
+	bch_bbio_count_io_errors(b->c, bio, bio->bi_status, "writing btree");
 	closure_put(cl);
 }
 
diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
index db45a88c0ce9..6a9b85095e7b 100644
--- a/drivers/md/bcache/io.c
+++ b/drivers/md/bcache/io.c
@@ -50,7 +50,7 @@ void bch_submit_bbio(struct bio *bio, struct cache_set *c,
 
 /* IO errors */
 
-void bch_count_io_errors(struct cache *ca, int error, const char *m)
+void bch_count_io_errors(struct cache *ca, blk_status_t error, const char *m)
 {
 	/*
 	 * The halflife of an error is:
@@ -103,7 +103,7 @@ void bch_count_io_errors(struct cache *ca, int error, const char *m)
 }
 
 void bch_bbio_count_io_errors(struct cache_set *c, struct bio *bio,
-			      int error, const char *m)
+			      blk_status_t error, const char *m)
 {
 	struct bbio *b = container_of(bio, struct bbio, bio);
 	struct cache *ca = PTR_CACHE(c, &b->key, 0);
@@ -132,7 +132,7 @@ void bch_bbio_count_io_errors(struct cache_set *c, struct bio *bio,
 }
 
 void bch_bbio_endio(struct cache_set *c, struct bio *bio,
-		    int error, const char *m)
+		    blk_status_t error, const char *m)
 {
 	struct closure *cl = bio->bi_private;
 
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index 1198e53d5670..0352d05e495c 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -549,7 +549,7 @@ static void journal_write_endio(struct bio *bio)
 {
 	struct journal_write *w = bio->bi_private;
 
-	cache_set_err_on(bio->bi_error, w->c, "journal io error");
+	cache_set_err_on(bio->bi_status, w->c, "journal io error");
 	closure_put(&w->c->journal.io);
 }
 
diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c
index 13b8a907006d..f633b30c962e 100644
--- a/drivers/md/bcache/movinggc.c
+++ b/drivers/md/bcache/movinggc.c
@@ -63,14 +63,14 @@ static void read_moving_endio(struct bio *bio)
 	struct moving_io *io = container_of(bio->bi_private,
 					    struct moving_io, cl);
 
-	if (bio->bi_error)
-		io->op.error = bio->bi_error;
+	if (bio->bi_status)
+		io->op.status = bio->bi_status;
 	else if (!KEY_DIRTY(&b->key) &&
 		 ptr_stale(io->op.c, &b->key, 0)) {
-		io->op.error = -EINTR;
+		io->op.status = BLK_STS_IOERR;
 	}
 
-	bch_bbio_endio(io->op.c, bio, bio->bi_error, "reading data to move");
+	bch_bbio_endio(io->op.c, bio, bio->bi_status, "reading data to move");
 }
 
 static void moving_init(struct moving_io *io)
@@ -92,7 +92,7 @@ static void write_moving(struct closure *cl)
 	struct moving_io *io = container_of(cl, struct moving_io, cl);
 	struct data_insert_op *op = &io->op;
 
-	if (!op->error) {
+	if (!op->status) {
 		moving_init(io);
 
 		io->bio.bio.bi_iter.bi_sector = KEY_START(&io->w->key);
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 709c9cc34369..019b3df9f1c6 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -81,7 +81,7 @@ static void bch_data_insert_keys(struct closure *cl)
 	if (ret == -ESRCH) {
 		op->replace_collision = true;
 	} else if (ret) {
-		op->error		= -ENOMEM;
+		op->status		= BLK_STS_RESOURCE;
 		op->insert_data_done	= true;
 	}
 
@@ -178,17 +178,17 @@ static void bch_data_insert_endio(struct bio *bio)
 	struct closure *cl = bio->bi_private;
 	struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		/* TODO: We could try to recover from this. */
 		if (op->writeback)
-			op->error = bio->bi_error;
+			op->status = bio->bi_status;
 		else if (!op->replace)
 			set_closure_fn(cl, bch_data_insert_error, op->wq);
 		else
 			set_closure_fn(cl, NULL, NULL);
 	}
 
-	bch_bbio_endio(op->c, bio, bio->bi_error, "writing data to cache");
+	bch_bbio_endio(op->c, bio, bio->bi_status, "writing data to cache");
 }
 
 static void bch_data_insert_start(struct closure *cl)
@@ -488,15 +488,15 @@ static void bch_cache_read_endio(struct bio *bio)
 	 * from the backing device.
 	 */
 
-	if (bio->bi_error)
-		s->iop.error = bio->bi_error;
+	if (bio->bi_status)
+		s->iop.status = bio->bi_status;
 	else if (!KEY_DIRTY(&b->key) &&
 		 ptr_stale(s->iop.c, &b->key, 0)) {
 		atomic_long_inc(&s->iop.c->cache_read_races);
-		s->iop.error = -EINTR;
+		s->iop.status = BLK_STS_IOERR;
 	}
 
-	bch_bbio_endio(s->iop.c, bio, bio->bi_error, "reading from cache");
+	bch_bbio_endio(s->iop.c, bio, bio->bi_status, "reading from cache");
 }
 
 /*
@@ -593,9 +593,9 @@ static void request_endio(struct bio *bio)
 {
 	struct closure *cl = bio->bi_private;
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		struct search *s = container_of(cl, struct search, cl);
-		s->iop.error = bio->bi_error;
+		s->iop.status = bio->bi_status;
 		/* Only cache read errors are recoverable */
 		s->recoverable = false;
 	}
@@ -611,7 +611,7 @@ static void bio_complete(struct search *s)
 				    &s->d->disk->part0, s->start_time);
 
 		trace_bcache_request_end(s->d, s->orig_bio);
-		s->orig_bio->bi_error = s->iop.error;
+		s->orig_bio->bi_status = s->iop.status;
 		bio_endio(s->orig_bio);
 		s->orig_bio = NULL;
 	}
@@ -664,7 +664,7 @@ static inline struct search *search_alloc(struct bio *bio,
 	s->iop.inode		= d->id;
 	s->iop.write_point	= hash_long((unsigned long) current, 16);
 	s->iop.write_prio	= 0;
-	s->iop.error		= 0;
+	s->iop.status		= 0;
 	s->iop.flags		= 0;
 	s->iop.flush_journal	= op_is_flush(bio->bi_opf);
 	s->iop.wq		= bcache_wq;
@@ -707,7 +707,7 @@ static void cached_dev_read_error(struct closure *cl)
 		/* Retry from the backing device: */
 		trace_bcache_read_retry(s->orig_bio);
 
-		s->iop.error = 0;
+		s->iop.status = 0;
 		do_bio_hook(s, s->orig_bio);
 
 		/* XXX: invalidate cache */
@@ -767,7 +767,7 @@ static void cached_dev_read_done_bh(struct closure *cl)
 				  !s->cache_miss, s->iop.bypass);
 	trace_bcache_read(s->orig_bio, !s->cache_miss, s->iop.bypass);
 
-	if (s->iop.error)
+	if (s->iop.status)
 		continue_at_nobarrier(cl, cached_dev_read_error, bcache_wq);
 	else if (s->iop.bio || verify(dc, &s->bio.bio))
 		continue_at_nobarrier(cl, cached_dev_read_done, bcache_wq);
diff --git a/drivers/md/bcache/request.h b/drivers/md/bcache/request.h
index 1ff36875c2b3..7689176951ce 100644
--- a/drivers/md/bcache/request.h
+++ b/drivers/md/bcache/request.h
@@ -10,7 +10,7 @@ struct data_insert_op {
 	unsigned		inode;
 	uint16_t		write_point;
 	uint16_t		write_prio;
-	short			error;
+	blk_status_t		status;
 
 	union {
 		uint16_t	flags;
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index e57353e39168..fbc4f5412dec 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -271,7 +271,7 @@ static void write_super_endio(struct bio *bio)
 {
 	struct cache *ca = bio->bi_private;
 
-	bch_count_io_errors(ca, bio->bi_error, "writing superblock");
+	bch_count_io_errors(ca, bio->bi_status, "writing superblock");
 	closure_put(&ca->set->sb_write);
 }
 
@@ -321,7 +321,7 @@ static void uuid_endio(struct bio *bio)
 	struct closure *cl = bio->bi_private;
 	struct cache_set *c = container_of(cl, struct cache_set, uuid_write);
 
-	cache_set_err_on(bio->bi_error, c, "accessing uuids");
+	cache_set_err_on(bio->bi_status, c, "accessing uuids");
 	bch_bbio_free(bio, c);
 	closure_put(cl);
 }
@@ -494,7 +494,7 @@ static void prio_endio(struct bio *bio)
 {
 	struct cache *ca = bio->bi_private;
 
-	cache_set_err_on(bio->bi_error, ca->set, "accessing priorities");
+	cache_set_err_on(bio->bi_status, ca->set, "accessing priorities");
 	bch_bbio_free(bio, ca->set);
 	closure_put(&ca->prio);
 }
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 6ac2e48b9235..42c66e76f05e 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -167,7 +167,7 @@ static void dirty_endio(struct bio *bio)
 	struct keybuf_key *w = bio->bi_private;
 	struct dirty_io *io = w->private;
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		SET_KEY_DIRTY(&w->key, false);
 
 	closure_put(&io->cl);
@@ -195,7 +195,7 @@ static void read_dirty_endio(struct bio *bio)
 	struct dirty_io *io = w->private;
 
 	bch_count_io_errors(PTR_CACHE(io->dc->disk.c, &w->key, 0),
-			    bio->bi_error, "reading dirty data from cache");
+			    bio->bi_status, "reading dirty data from cache");
 
 	dirty_endio(bio);
 }
diff --git a/drivers/md/dm-bio-prison-v1.c b/drivers/md/dm-bio-prison-v1.c
index ae7da2c30a57..82d27384d31f 100644
--- a/drivers/md/dm-bio-prison-v1.c
+++ b/drivers/md/dm-bio-prison-v1.c
@@ -229,7 +229,7 @@ void dm_cell_release_no_holder(struct dm_bio_prison *prison,
 EXPORT_SYMBOL_GPL(dm_cell_release_no_holder);
 
 void dm_cell_error(struct dm_bio_prison *prison,
-		   struct dm_bio_prison_cell *cell, int error)
+		   struct dm_bio_prison_cell *cell, blk_status_t error)
 {
 	struct bio_list bios;
 	struct bio *bio;
@@ -238,7 +238,7 @@ void dm_cell_error(struct dm_bio_prison *prison,
 	dm_cell_release(prison, cell, &bios);
 
 	while ((bio = bio_list_pop(&bios))) {
-		bio->bi_error = error;
+		bio->bi_status = error;
 		bio_endio(bio);
 	}
 }
diff --git a/drivers/md/dm-bio-prison-v1.h b/drivers/md/dm-bio-prison-v1.h
index cddd4ac07e2c..cec52ac5e1ae 100644
--- a/drivers/md/dm-bio-prison-v1.h
+++ b/drivers/md/dm-bio-prison-v1.h
@@ -91,7 +91,7 @@ void dm_cell_release_no_holder(struct dm_bio_prison *prison,
 			       struct dm_bio_prison_cell *cell,
 			       struct bio_list *inmates);
 void dm_cell_error(struct dm_bio_prison *prison,
-		   struct dm_bio_prison_cell *cell, int error);
+		   struct dm_bio_prison_cell *cell, blk_status_t error);
 
 /*
  * Visits the cell and then releases.  Guarantees no new inmates are
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index cd8139593ccd..0902d2fd1743 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -145,8 +145,8 @@ struct dm_buffer {
 	enum data_mode data_mode;
 	unsigned char list_mode;		/* LIST_* */
 	unsigned hold_count;
-	int read_error;
-	int write_error;
+	blk_status_t read_error;
+	blk_status_t write_error;
 	unsigned long state;
 	unsigned long last_accessed;
 	struct dm_bufio_client *c;
@@ -555,7 +555,7 @@ static void dmio_complete(unsigned long error, void *context)
 {
 	struct dm_buffer *b = context;
 
-	b->bio.bi_error = error ? -EIO : 0;
+	b->bio.bi_status = error ? BLK_STS_IOERR : 0;
 	b->bio.bi_end_io(&b->bio);
 }
 
@@ -588,7 +588,7 @@ static void use_dmio(struct dm_buffer *b, int rw, sector_t sector,
 
 	r = dm_io(&io_req, 1, &region, NULL);
 	if (r) {
-		b->bio.bi_error = r;
+		b->bio.bi_status = errno_to_blk_status(r);
 		end_io(&b->bio);
 	}
 }
@@ -596,7 +596,7 @@ static void use_dmio(struct dm_buffer *b, int rw, sector_t sector,
 static void inline_endio(struct bio *bio)
 {
 	bio_end_io_t *end_fn = bio->bi_private;
-	int error = bio->bi_error;
+	blk_status_t status = bio->bi_status;
 
 	/*
 	 * Reset the bio to free any attached resources
@@ -604,7 +604,7 @@ static void inline_endio(struct bio *bio)
 	 */
 	bio_reset(bio);
 
-	bio->bi_error = error;
+	bio->bi_status = status;
 	end_fn(bio);
 }
 
@@ -685,11 +685,12 @@ static void write_endio(struct bio *bio)
 {
 	struct dm_buffer *b = container_of(bio, struct dm_buffer, bio);
 
-	b->write_error = bio->bi_error;
-	if (unlikely(bio->bi_error)) {
+	b->write_error = bio->bi_status;
+	if (unlikely(bio->bi_status)) {
 		struct dm_bufio_client *c = b->c;
-		int error = bio->bi_error;
-		(void)cmpxchg(&c->async_write_error, 0, error);
+
+		(void)cmpxchg(&c->async_write_error, 0,
+				blk_status_to_errno(bio->bi_status));
 	}
 
 	BUG_ON(!test_bit(B_WRITING, &b->state));
@@ -1063,7 +1064,7 @@ static void read_endio(struct bio *bio)
 {
 	struct dm_buffer *b = container_of(bio, struct dm_buffer, bio);
 
-	b->read_error = bio->bi_error;
+	b->read_error = bio->bi_status;
 
 	BUG_ON(!test_bit(B_READING, &b->state));
 
@@ -1107,7 +1108,7 @@ static void *new_read(struct dm_bufio_client *c, sector_t block,
 	wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE);
 
 	if (b->read_error) {
-		int error = b->read_error;
+		int error = blk_status_to_errno(b->read_error);
 
 		dm_bufio_release(b);
 
@@ -1257,7 +1258,8 @@ EXPORT_SYMBOL_GPL(dm_bufio_write_dirty_buffers_async);
  */
 int dm_bufio_write_dirty_buffers(struct dm_bufio_client *c)
 {
-	int a, f;
+	blk_status_t a;
+	int f;
 	unsigned long buffers_processed = 0;
 	struct dm_buffer *b, *tmp;
 
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index c48612e6d525..c5ea03fc7ee1 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -119,7 +119,7 @@ static void iot_io_end(struct io_tracker *iot, sector_t len)
  */
 struct continuation {
 	struct work_struct ws;
-	int input;
+	blk_status_t input;
 };
 
 static inline void init_continuation(struct continuation *k,
@@ -145,7 +145,7 @@ struct batcher {
 	/*
 	 * The operation that everyone is waiting for.
 	 */
-	int (*commit_op)(void *context);
+	blk_status_t (*commit_op)(void *context);
 	void *commit_context;
 
 	/*
@@ -171,8 +171,7 @@ struct batcher {
 static void __commit(struct work_struct *_ws)
 {
 	struct batcher *b = container_of(_ws, struct batcher, commit_work);
-
-	int r;
+	blk_status_t r;
 	unsigned long flags;
 	struct list_head work_items;
 	struct work_struct *ws, *tmp;
@@ -205,7 +204,7 @@ static void __commit(struct work_struct *_ws)
 
 	while ((bio = bio_list_pop(&bios))) {
 		if (r) {
-			bio->bi_error = r;
+			bio->bi_status = r;
 			bio_endio(bio);
 		} else
 			b->issue_op(bio, b->issue_context);
@@ -213,7 +212,7 @@ static void __commit(struct work_struct *_ws)
 }
 
 static void batcher_init(struct batcher *b,
-			 int (*commit_op)(void *),
+			 blk_status_t (*commit_op)(void *),
 			 void *commit_context,
 			 void (*issue_op)(struct bio *bio, void *),
 			 void *issue_context,
@@ -955,7 +954,7 @@ static void writethrough_endio(struct bio *bio)
 
 	dm_unhook_bio(&pb->hook_info, bio);
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		bio_endio(bio);
 		return;
 	}
@@ -1220,7 +1219,7 @@ static void copy_complete(int read_err, unsigned long write_err, void *context)
 	struct dm_cache_migration *mg = container_of(context, struct dm_cache_migration, k);
 
 	if (read_err || write_err)
-		mg->k.input = -EIO;
+		mg->k.input = BLK_STS_IOERR;
 
 	queue_continuation(mg->cache->wq, &mg->k);
 }
@@ -1266,8 +1265,8 @@ static void overwrite_endio(struct bio *bio)
 
 	dm_unhook_bio(&pb->hook_info, bio);
 
-	if (bio->bi_error)
-		mg->k.input = bio->bi_error;
+	if (bio->bi_status)
+		mg->k.input = bio->bi_status;
 
 	queue_continuation(mg->cache->wq, &mg->k);
 }
@@ -1323,8 +1322,10 @@ static void mg_complete(struct dm_cache_migration *mg, bool success)
 		if (mg->overwrite_bio) {
 			if (success)
 				force_set_dirty(cache, cblock);
+			else if (mg->k.input)
+				mg->overwrite_bio->bi_status = mg->k.input;
 			else
-				mg->overwrite_bio->bi_error = (mg->k.input ? : -EIO);
+				mg->overwrite_bio->bi_status = BLK_STS_IOERR;
 			bio_endio(mg->overwrite_bio);
 		} else {
 			if (success)
@@ -1504,7 +1505,7 @@ static void mg_copy(struct work_struct *ws)
 		r = copy(mg, is_policy_promote);
 		if (r) {
 			DMERR_LIMIT("%s: migration copy failed", cache_device_name(cache));
-			mg->k.input = -EIO;
+			mg->k.input = BLK_STS_IOERR;
 			mg_complete(mg, false);
 		}
 	}
@@ -1907,12 +1908,12 @@ static int commit(struct cache *cache, bool clean_shutdown)
 /*
  * Used by the batcher.
  */
-static int commit_op(void *context)
+static blk_status_t commit_op(void *context)
 {
 	struct cache *cache = context;
 
 	if (dm_cache_changed_this_transaction(cache->cmd))
-		return commit(cache, false);
+		return errno_to_blk_status(commit(cache, false));
 
 	return 0;
 }
@@ -2018,7 +2019,7 @@ static void requeue_deferred_bios(struct cache *cache)
 	bio_list_init(&cache->deferred_bios);
 
 	while ((bio = bio_list_pop(&bios))) {
-		bio->bi_error = DM_ENDIO_REQUEUE;
+		bio->bi_status = BLK_STS_DM_REQUEUE;
 		bio_endio(bio);
 	}
 }
@@ -2820,7 +2821,8 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
 	return r;
 }
 
-static int cache_end_io(struct dm_target *ti, struct bio *bio, int *error)
+static int cache_end_io(struct dm_target *ti, struct bio *bio,
+		blk_status_t *error)
 {
 	struct cache *cache = ti->private;
 	unsigned long flags;
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index f4b51809db21..586cef085c6a 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -71,7 +71,7 @@ struct dm_crypt_io {
 	struct convert_context ctx;
 
 	atomic_t io_pending;
-	int error;
+	blk_status_t error;
 	sector_t sector;
 
 	struct rb_node rb_node;
@@ -1292,7 +1292,7 @@ static void crypt_free_req(struct crypt_config *cc, void *req, struct bio *base_
 /*
  * Encrypt / decrypt data from one bio to another one (can be the same one)
  */
-static int crypt_convert(struct crypt_config *cc,
+static blk_status_t crypt_convert(struct crypt_config *cc,
 			 struct convert_context *ctx)
 {
 	unsigned int tag_offset = 0;
@@ -1343,13 +1343,13 @@ static int crypt_convert(struct crypt_config *cc,
 		 */
 		case -EBADMSG:
 			atomic_dec(&ctx->cc_pending);
-			return -EILSEQ;
+			return BLK_STS_PROTECTION;
 		/*
 		 * There was an error while processing the request.
 		 */
 		default:
 			atomic_dec(&ctx->cc_pending);
-			return -EIO;
+			return BLK_STS_IOERR;
 		}
 	}
 
@@ -1463,7 +1463,7 @@ static void crypt_dec_pending(struct dm_crypt_io *io)
 {
 	struct crypt_config *cc = io->cc;
 	struct bio *base_bio = io->base_bio;
-	int error = io->error;
+	blk_status_t error = io->error;
 
 	if (!atomic_dec_and_test(&io->io_pending))
 		return;
@@ -1476,7 +1476,7 @@ static void crypt_dec_pending(struct dm_crypt_io *io)
 	else
 		kfree(io->integrity_metadata);
 
-	base_bio->bi_error = error;
+	base_bio->bi_status = error;
 	bio_endio(base_bio);
 }
 
@@ -1502,7 +1502,7 @@ static void crypt_endio(struct bio *clone)
 	struct dm_crypt_io *io = clone->bi_private;
 	struct crypt_config *cc = io->cc;
 	unsigned rw = bio_data_dir(clone);
-	int error;
+	blk_status_t error;
 
 	/*
 	 * free the processed pages
@@ -1510,7 +1510,7 @@ static void crypt_endio(struct bio *clone)
 	if (rw == WRITE)
 		crypt_free_buffer_pages(cc, clone);
 
-	error = clone->bi_error;
+	error = clone->bi_status;
 	bio_put(clone);
 
 	if (rw == READ && !error) {
@@ -1570,7 +1570,7 @@ static void kcryptd_io_read_work(struct work_struct *work)
 
 	crypt_inc_pending(io);
 	if (kcryptd_io_read(io, GFP_NOIO))
-		io->error = -ENOMEM;
+		io->error = BLK_STS_RESOURCE;
 	crypt_dec_pending(io);
 }
 
@@ -1656,7 +1656,7 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async)
 	sector_t sector;
 	struct rb_node **rbp, *parent;
 
-	if (unlikely(io->error < 0)) {
+	if (unlikely(io->error)) {
 		crypt_free_buffer_pages(cc, clone);
 		bio_put(clone);
 		crypt_dec_pending(io);
@@ -1697,7 +1697,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
 	struct bio *clone;
 	int crypt_finished;
 	sector_t sector = io->sector;
-	int r;
+	blk_status_t r;
 
 	/*
 	 * Prevent io from disappearing until this function completes.
@@ -1707,7 +1707,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
 
 	clone = crypt_alloc_buffer(io, io->base_bio->bi_iter.bi_size);
 	if (unlikely(!clone)) {
-		io->error = -EIO;
+		io->error = BLK_STS_IOERR;
 		goto dec;
 	}
 
@@ -1718,7 +1718,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
 
 	crypt_inc_pending(io);
 	r = crypt_convert(cc, &io->ctx);
-	if (r < 0)
+	if (r)
 		io->error = r;
 	crypt_finished = atomic_dec_and_test(&io->ctx.cc_pending);
 
@@ -1740,7 +1740,7 @@ static void kcryptd_crypt_read_done(struct dm_crypt_io *io)
 static void kcryptd_crypt_read_convert(struct dm_crypt_io *io)
 {
 	struct crypt_config *cc = io->cc;
-	int r = 0;
+	blk_status_t r;
 
 	crypt_inc_pending(io);
 
@@ -1748,7 +1748,7 @@ static void kcryptd_crypt_read_convert(struct dm_crypt_io *io)
 			   io->sector);
 
 	r = crypt_convert(cc, &io->ctx);
-	if (r < 0)
+	if (r)
 		io->error = r;
 
 	if (atomic_dec_and_test(&io->ctx.cc_pending))
@@ -1781,9 +1781,9 @@ static void kcryptd_async_done(struct crypto_async_request *async_req,
 	if (error == -EBADMSG) {
 		DMERR_LIMIT("INTEGRITY AEAD ERROR, sector %llu",
 			    (unsigned long long)le64_to_cpu(*org_sector_of_dmreq(cc, dmreq)));
-		io->error = -EILSEQ;
+		io->error = BLK_STS_PROTECTION;
 	} else if (error < 0)
-		io->error = -EIO;
+		io->error = BLK_STS_IOERR;
 
 	crypt_free_req(cc, req_of_dmreq(cc, dmreq), io->base_bio);
 
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index c9539917a59b..3d04d5ce19d9 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -358,7 +358,8 @@ map_bio:
 	return DM_MAPIO_REMAPPED;
 }
 
-static int flakey_end_io(struct dm_target *ti, struct bio *bio, int *error)
+static int flakey_end_io(struct dm_target *ti, struct bio *bio,
+		blk_status_t *error)
 {
 	struct flakey_c *fc = ti->private;
 	struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
@@ -377,7 +378,7 @@ static int flakey_end_io(struct dm_target *ti, struct bio *bio, int *error)
 			 * Error read during the down_interval if drop_writes
 			 * and error_writes were not configured.
 			 */
-			*error = -EIO;
+			*error = BLK_STS_IOERR;
 		}
 	}
 
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index ee78fb471229..ccc6ef4d00b9 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -246,7 +246,7 @@ struct dm_integrity_io {
 	unsigned metadata_offset;
 
 	atomic_t in_flight;
-	int bi_error;
+	blk_status_t bi_status;
 
 	struct completion *completion;
 
@@ -1114,8 +1114,8 @@ static void submit_flush_bio(struct dm_integrity_c *ic, struct dm_integrity_io *
 static void do_endio(struct dm_integrity_c *ic, struct bio *bio)
 {
 	int r = dm_integrity_failed(ic);
-	if (unlikely(r) && !bio->bi_error)
-		bio->bi_error = r;
+	if (unlikely(r) && !bio->bi_status)
+		bio->bi_status = errno_to_blk_status(r);
 	bio_endio(bio);
 }
 
@@ -1123,7 +1123,7 @@ static void do_endio_flush(struct dm_integrity_c *ic, struct dm_integrity_io *di
 {
 	struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
 
-	if (unlikely(dio->fua) && likely(!bio->bi_error) && likely(!dm_integrity_failed(ic)))
+	if (unlikely(dio->fua) && likely(!bio->bi_status) && likely(!dm_integrity_failed(ic)))
 		submit_flush_bio(ic, dio);
 	else
 		do_endio(ic, bio);
@@ -1142,9 +1142,9 @@ static void dec_in_flight(struct dm_integrity_io *dio)
 
 		bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
 
-		if (unlikely(dio->bi_error) && !bio->bi_error)
-			bio->bi_error = dio->bi_error;
-		if (likely(!bio->bi_error) && unlikely(bio_sectors(bio) != dio->range.n_sectors)) {
+		if (unlikely(dio->bi_status) && !bio->bi_status)
+			bio->bi_status = dio->bi_status;
+		if (likely(!bio->bi_status) && unlikely(bio_sectors(bio) != dio->range.n_sectors)) {
 			dio->range.logical_sector += dio->range.n_sectors;
 			bio_advance(bio, dio->range.n_sectors << SECTOR_SHIFT);
 			INIT_WORK(&dio->work, integrity_bio_wait);
@@ -1318,7 +1318,7 @@ skip_io:
 	dec_in_flight(dio);
 	return;
 error:
-	dio->bi_error = r;
+	dio->bi_status = errno_to_blk_status(r);
 	dec_in_flight(dio);
 }
 
@@ -1331,7 +1331,7 @@ static int dm_integrity_map(struct dm_target *ti, struct bio *bio)
 	sector_t area, offset;
 
 	dio->ic = ic;
-	dio->bi_error = 0;
+	dio->bi_status = 0;
 
 	if (unlikely(bio->bi_opf & REQ_PREFLUSH)) {
 		submit_flush_bio(ic, dio);
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index 3702e502466d..c8f8f3004085 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -124,7 +124,7 @@ static void complete_io(struct io *io)
 	fn(error_bits, context);
 }
 
-static void dec_count(struct io *io, unsigned int region, int error)
+static void dec_count(struct io *io, unsigned int region, blk_status_t error)
 {
 	if (error)
 		set_bit(region, &io->error_bits);
@@ -137,9 +137,9 @@ static void endio(struct bio *bio)
 {
 	struct io *io;
 	unsigned region;
-	int error;
+	blk_status_t error;
 
-	if (bio->bi_error && bio_data_dir(bio) == READ)
+	if (bio->bi_status && bio_data_dir(bio) == READ)
 		zero_fill_bio(bio);
 
 	/*
@@ -147,7 +147,7 @@ static void endio(struct bio *bio)
 	 */
 	retrieve_io_and_region_from_bio(bio, &io, &region);
 
-	error = bio->bi_error;
+	error = bio->bi_status;
 	bio_put(bio);
 
 	dec_count(io, region, error);
@@ -319,7 +319,7 @@ static void do_region(int op, int op_flags, unsigned region,
 	if ((op == REQ_OP_DISCARD || op == REQ_OP_WRITE_ZEROES ||
 	     op == REQ_OP_WRITE_SAME)  &&
 	    special_cmd_max_sectors == 0) {
-		dec_count(io, region, -EOPNOTSUPP);
+		dec_count(io, region, BLK_STS_NOTSUPP);
 		return;
 	}
 
diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c
index cc57c7fa1268..a1da0eb58a93 100644
--- a/drivers/md/dm-log-writes.c
+++ b/drivers/md/dm-log-writes.c
@@ -150,10 +150,10 @@ static void log_end_io(struct bio *bio)
 {
 	struct log_writes_c *lc = bio->bi_private;
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		unsigned long flags;
 
-		DMERR("Error writing log block, error=%d", bio->bi_error);
+		DMERR("Error writing log block, error=%d", bio->bi_status);
 		spin_lock_irqsave(&lc->blocks_lock, flags);
 		lc->logging_enabled = false;
 		spin_unlock_irqrestore(&lc->blocks_lock, flags);
@@ -664,7 +664,8 @@ map_bio:
 	return DM_MAPIO_REMAPPED;
 }
 
-static int normal_end_io(struct dm_target *ti, struct bio *bio, int *error)
+static int normal_end_io(struct dm_target *ti, struct bio *bio,
+		blk_status_t *error)
 {
 	struct log_writes_c *lc = ti->private;
 	struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 39262e344ae1..a7d2e0840cc5 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -565,7 +565,7 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_m
 	mpio->pgpath = pgpath;
 	mpio->nr_bytes = nr_bytes;
 
-	bio->bi_error = 0;
+	bio->bi_status = 0;
 	bio->bi_bdev = pgpath->path.dev->bdev;
 	bio->bi_opf |= REQ_FAILFAST_TRANSPORT;
 
@@ -623,10 +623,10 @@ static void process_queued_bios(struct work_struct *work)
 		r = __multipath_map_bio(m, bio, get_mpio_from_bio(bio));
 		switch (r) {
 		case DM_MAPIO_KILL:
-			r = -EIO;
-			/*FALLTHRU*/
+			bio->bi_status = BLK_STS_IOERR;
+			bio_endio(bio);
 		case DM_MAPIO_REQUEUE:
-			bio->bi_error = r;
+			bio->bi_status = BLK_STS_DM_REQUEUE;
 			bio_endio(bio);
 			break;
 		case DM_MAPIO_REMAPPED:
@@ -1510,7 +1510,8 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone,
 	return r;
 }
 
-static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone, int *error)
+static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone,
+		blk_status_t *error)
 {
 	struct multipath *m = ti->private;
 	struct dm_mpath_io *mpio = get_mpio_from_bio(clone);
@@ -1518,7 +1519,7 @@ static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone, int *er
 	unsigned long flags;
 	int r = DM_ENDIO_DONE;
 
-	if (!*error || noretry_error(errno_to_blk_status(*error)))
+	if (!*error || noretry_error(*error))
 		goto done;
 
 	if (pgpath)
@@ -1527,7 +1528,7 @@ static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone, int *er
 	if (atomic_read(&m->nr_valid_paths) == 0 &&
 	    !test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
 		dm_report_EIO(m);
-		*error = -EIO;
+		*error = BLK_STS_IOERR;
 		goto done;
 	}
 
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 77bcf50ce75f..0822e4a6f67d 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -490,9 +490,9 @@ static void hold_bio(struct mirror_set *ms, struct bio *bio)
 		 * If device is suspended, complete the bio.
 		 */
 		if (dm_noflush_suspending(ms->ti))
-			bio->bi_error = DM_ENDIO_REQUEUE;
+			bio->bi_status = BLK_STS_DM_REQUEUE;
 		else
-			bio->bi_error = -EIO;
+			bio->bi_status = BLK_STS_IOERR;
 
 		bio_endio(bio);
 		return;
@@ -626,7 +626,7 @@ static void write_callback(unsigned long error, void *context)
 	 * degrade the array.
 	 */
 	if (bio_op(bio) == REQ_OP_DISCARD) {
-		bio->bi_error = -EOPNOTSUPP;
+		bio->bi_status = BLK_STS_NOTSUPP;
 		bio_endio(bio);
 		return;
 	}
@@ -1236,7 +1236,8 @@ static int mirror_map(struct dm_target *ti, struct bio *bio)
 	return DM_MAPIO_REMAPPED;
 }
 
-static int mirror_end_io(struct dm_target *ti, struct bio *bio, int *error)
+static int mirror_end_io(struct dm_target *ti, struct bio *bio,
+		blk_status_t *error)
 {
 	int rw = bio_data_dir(bio);
 	struct mirror_set *ms = (struct mirror_set *) ti->private;
@@ -1255,7 +1256,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int *error)
 		return DM_ENDIO_DONE;
 	}
 
-	if (*error == -EOPNOTSUPP)
+	if (*error == BLK_STS_NOTSUPP)
 		return DM_ENDIO_DONE;
 
 	if (bio->bi_opf & REQ_RAHEAD)
@@ -1277,7 +1278,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int *error)
 			bd = &bio_record->details;
 
 			dm_bio_restore(bd, bio);
-			bio->bi_error = 0;
+			bio->bi_status = 0;
 
 			queue_bio(ms, bio, rw);
 			return DM_ENDIO_INCOMPLETE;
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index 63402f8a38de..fafd5326e572 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -119,7 +119,7 @@ static void end_clone_bio(struct bio *clone)
 	struct dm_rq_target_io *tio = info->tio;
 	struct bio *bio = info->orig;
 	unsigned int nr_bytes = info->orig->bi_iter.bi_size;
-	blk_status_t error = errno_to_blk_status(clone->bi_error);
+	blk_status_t error = clone->bi_status;
 
 	bio_put(clone);
 
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 79a845798e2f..1ba41048b438 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1590,7 +1590,7 @@ static void full_bio_end_io(struct bio *bio)
 {
 	void *callback_data = bio->bi_private;
 
-	dm_kcopyd_do_callback(callback_data, 0, bio->bi_error ? 1 : 0);
+	dm_kcopyd_do_callback(callback_data, 0, bio->bi_status ? 1 : 0);
 }
 
 static void start_full_bio(struct dm_snap_pending_exception *pe,
@@ -1851,7 +1851,8 @@ out_unlock:
 	return r;
 }
 
-static int snapshot_end_io(struct dm_target *ti, struct bio *bio, int *error)
+static int snapshot_end_io(struct dm_target *ti, struct bio *bio,
+		blk_status_t *error)
 {
 	struct dm_snapshot *s = ti->private;
 
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index 49888bc2c909..11621a0af887 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -375,7 +375,8 @@ static void stripe_status(struct dm_target *ti, status_type_t type,
 	}
 }
 
-static int stripe_end_io(struct dm_target *ti, struct bio *bio, int *error)
+static int stripe_end_io(struct dm_target *ti, struct bio *bio,
+		blk_status_t *error)
 {
 	unsigned i;
 	char major_minor[16];
@@ -387,7 +388,7 @@ static int stripe_end_io(struct dm_target *ti, struct bio *bio, int *error)
 	if (bio->bi_opf & REQ_RAHEAD)
 		return DM_ENDIO_DONE;
 
-	if (*error == -EOPNOTSUPP)
+	if (*error == BLK_STS_NOTSUPP)
 		return DM_ENDIO_DONE;
 
 	memset(major_minor, 0, sizeof(major_minor));
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 22b1a64c44b7..3490b300cbff 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -383,8 +383,8 @@ static void end_discard(struct discard_op *op, int r)
 	 * Even if r is set, there could be sub discards in flight that we
 	 * need to wait for.
 	 */
-	if (r && !op->parent_bio->bi_error)
-		op->parent_bio->bi_error = r;
+	if (r && !op->parent_bio->bi_status)
+		op->parent_bio->bi_status = errno_to_blk_status(r);
 	bio_endio(op->parent_bio);
 }
 
@@ -450,22 +450,20 @@ static void cell_release_no_holder(struct pool *pool,
 }
 
 static void cell_error_with_code(struct pool *pool,
-				 struct dm_bio_prison_cell *cell, int error_code)
+		struct dm_bio_prison_cell *cell, blk_status_t error_code)
 {
 	dm_cell_error(pool->prison, cell, error_code);
 	dm_bio_prison_free_cell(pool->prison, cell);
 }
 
-static int get_pool_io_error_code(struct pool *pool)
+static blk_status_t get_pool_io_error_code(struct pool *pool)
 {
-	return pool->out_of_data_space ? -ENOSPC : -EIO;
+	return pool->out_of_data_space ? BLK_STS_NOSPC : BLK_STS_IOERR;
 }
 
 static void cell_error(struct pool *pool, struct dm_bio_prison_cell *cell)
 {
-	int error = get_pool_io_error_code(pool);
-
-	cell_error_with_code(pool, cell, error);
+	cell_error_with_code(pool, cell, get_pool_io_error_code(pool));
 }
 
 static void cell_success(struct pool *pool, struct dm_bio_prison_cell *cell)
@@ -475,7 +473,7 @@ static void cell_success(struct pool *pool, struct dm_bio_prison_cell *cell)
 
 static void cell_requeue(struct pool *pool, struct dm_bio_prison_cell *cell)
 {
-	cell_error_with_code(pool, cell, DM_ENDIO_REQUEUE);
+	cell_error_with_code(pool, cell, BLK_STS_DM_REQUEUE);
 }
 
 /*----------------------------------------------------------------*/
@@ -555,17 +553,18 @@ static void __merge_bio_list(struct bio_list *bios, struct bio_list *master)
 	bio_list_init(master);
 }
 
-static void error_bio_list(struct bio_list *bios, int error)
+static void error_bio_list(struct bio_list *bios, blk_status_t error)
 {
 	struct bio *bio;
 
 	while ((bio = bio_list_pop(bios))) {
-		bio->bi_error = error;
+		bio->bi_status = error;
 		bio_endio(bio);
 	}
 }
 
-static void error_thin_bio_list(struct thin_c *tc, struct bio_list *master, int error)
+static void error_thin_bio_list(struct thin_c *tc, struct bio_list *master,
+		blk_status_t error)
 {
 	struct bio_list bios;
 	unsigned long flags;
@@ -608,11 +607,11 @@ static void requeue_io(struct thin_c *tc)
 	__merge_bio_list(&bios, &tc->retry_on_resume_list);
 	spin_unlock_irqrestore(&tc->lock, flags);
 
-	error_bio_list(&bios, DM_ENDIO_REQUEUE);
+	error_bio_list(&bios, BLK_STS_DM_REQUEUE);
 	requeue_deferred_cells(tc);
 }
 
-static void error_retry_list_with_code(struct pool *pool, int error)
+static void error_retry_list_with_code(struct pool *pool, blk_status_t error)
 {
 	struct thin_c *tc;
 
@@ -624,9 +623,7 @@ static void error_retry_list_with_code(struct pool *pool, int error)
 
 static void error_retry_list(struct pool *pool)
 {
-	int error = get_pool_io_error_code(pool);
-
-	error_retry_list_with_code(pool, error);
+	error_retry_list_with_code(pool, get_pool_io_error_code(pool));
 }
 
 /*
@@ -774,7 +771,7 @@ struct dm_thin_new_mapping {
 	 */
 	atomic_t prepare_actions;
 
-	int err;
+	blk_status_t status;
 	struct thin_c *tc;
 	dm_block_t virt_begin, virt_end;
 	dm_block_t data_block;
@@ -814,7 +811,7 @@ static void copy_complete(int read_err, unsigned long write_err, void *context)
 {
 	struct dm_thin_new_mapping *m = context;
 
-	m->err = read_err || write_err ? -EIO : 0;
+	m->status = read_err || write_err ? BLK_STS_IOERR : 0;
 	complete_mapping_preparation(m);
 }
 
@@ -825,7 +822,7 @@ static void overwrite_endio(struct bio *bio)
 
 	bio->bi_end_io = m->saved_bi_end_io;
 
-	m->err = bio->bi_error;
+	m->status = bio->bi_status;
 	complete_mapping_preparation(m);
 }
 
@@ -925,7 +922,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m)
 	struct bio *bio = m->bio;
 	int r;
 
-	if (m->err) {
+	if (m->status) {
 		cell_error(pool, m->cell);
 		goto out;
 	}
@@ -1495,7 +1492,7 @@ static void retry_on_resume(struct bio *bio)
 	spin_unlock_irqrestore(&tc->lock, flags);
 }
 
-static int should_error_unserviceable_bio(struct pool *pool)
+static blk_status_t should_error_unserviceable_bio(struct pool *pool)
 {
 	enum pool_mode m = get_pool_mode(pool);
 
@@ -1503,27 +1500,27 @@ static int should_error_unserviceable_bio(struct pool *pool)
 	case PM_WRITE:
 		/* Shouldn't get here */
 		DMERR_LIMIT("bio unserviceable, yet pool is in PM_WRITE mode");
-		return -EIO;
+		return BLK_STS_IOERR;
 
 	case PM_OUT_OF_DATA_SPACE:
-		return pool->pf.error_if_no_space ? -ENOSPC : 0;
+		return pool->pf.error_if_no_space ? BLK_STS_NOSPC : 0;
 
 	case PM_READ_ONLY:
 	case PM_FAIL:
-		return -EIO;
+		return BLK_STS_IOERR;
 	default:
 		/* Shouldn't get here */
 		DMERR_LIMIT("bio unserviceable, yet pool has an unknown mode");
-		return -EIO;
+		return BLK_STS_IOERR;
 	}
 }
 
 static void handle_unserviceable_bio(struct pool *pool, struct bio *bio)
 {
-	int error = should_error_unserviceable_bio(pool);
+	blk_status_t error = should_error_unserviceable_bio(pool);
 
 	if (error) {
-		bio->bi_error = error;
+		bio->bi_status = error;
 		bio_endio(bio);
 	} else
 		retry_on_resume(bio);
@@ -1533,7 +1530,7 @@ static void retry_bios_on_resume(struct pool *pool, struct dm_bio_prison_cell *c
 {
 	struct bio *bio;
 	struct bio_list bios;
-	int error;
+	blk_status_t error;
 
 	error = should_error_unserviceable_bio(pool);
 	if (error) {
@@ -2071,7 +2068,8 @@ static void process_thin_deferred_bios(struct thin_c *tc)
 	unsigned count = 0;
 
 	if (tc->requeue_mode) {
-		error_thin_bio_list(tc, &tc->deferred_bio_list, DM_ENDIO_REQUEUE);
+		error_thin_bio_list(tc, &tc->deferred_bio_list,
+				BLK_STS_DM_REQUEUE);
 		return;
 	}
 
@@ -2322,7 +2320,7 @@ static void do_no_space_timeout(struct work_struct *ws)
 	if (get_pool_mode(pool) == PM_OUT_OF_DATA_SPACE && !pool->pf.error_if_no_space) {
 		pool->pf.error_if_no_space = true;
 		notify_of_pool_mode_change_to_oods(pool);
-		error_retry_list_with_code(pool, -ENOSPC);
+		error_retry_list_with_code(pool, BLK_STS_NOSPC);
 	}
 }
 
@@ -2624,7 +2622,7 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio)
 	thin_hook_bio(tc, bio);
 
 	if (tc->requeue_mode) {
-		bio->bi_error = DM_ENDIO_REQUEUE;
+		bio->bi_status = BLK_STS_DM_REQUEUE;
 		bio_endio(bio);
 		return DM_MAPIO_SUBMITTED;
 	}
@@ -4177,7 +4175,8 @@ static int thin_map(struct dm_target *ti, struct bio *bio)
 	return thin_bio_map(ti, bio);
 }
 
-static int thin_endio(struct dm_target *ti, struct bio *bio, int *err)
+static int thin_endio(struct dm_target *ti, struct bio *bio,
+		blk_status_t *err)
 {
 	unsigned long flags;
 	struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index 9ed55468b98b..2dca66eb67e1 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -538,13 +538,13 @@ static int verity_verify_io(struct dm_verity_io *io)
 /*
  * End one "io" structure with a given error.
  */
-static void verity_finish_io(struct dm_verity_io *io, int error)
+static void verity_finish_io(struct dm_verity_io *io, blk_status_t status)
 {
 	struct dm_verity *v = io->v;
 	struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_io_data_size);
 
 	bio->bi_end_io = io->orig_bi_end_io;
-	bio->bi_error = error;
+	bio->bi_status = status;
 
 	verity_fec_finish_io(io);
 
@@ -555,15 +555,15 @@ static void verity_work(struct work_struct *w)
 {
 	struct dm_verity_io *io = container_of(w, struct dm_verity_io, work);
 
-	verity_finish_io(io, verity_verify_io(io));
+	verity_finish_io(io, errno_to_blk_status(verity_verify_io(io)));
 }
 
 static void verity_end_io(struct bio *bio)
 {
 	struct dm_verity_io *io = bio->bi_private;
 
-	if (bio->bi_error && !verity_fec_is_enabled(io->v)) {
-		verity_finish_io(io, bio->bi_error);
+	if (bio->bi_status && !verity_fec_is_enabled(io->v)) {
+		verity_finish_io(io, bio->bi_status);
 		return;
 	}
 
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 7a7047211c64..f38f9dd5cbdd 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -63,7 +63,7 @@ static struct workqueue_struct *deferred_remove_workqueue;
  */
 struct dm_io {
 	struct mapped_device *md;
-	int error;
+	blk_status_t status;
 	atomic_t io_count;
 	struct bio *bio;
 	unsigned long start_time;
@@ -768,23 +768,24 @@ static int __noflush_suspending(struct mapped_device *md)
  * Decrements the number of outstanding ios that a bio has been
  * cloned into, completing the original io if necc.
  */
-static void dec_pending(struct dm_io *io, int error)
+static void dec_pending(struct dm_io *io, blk_status_t error)
 {
 	unsigned long flags;
-	int io_error;
+	blk_status_t io_error;
 	struct bio *bio;
 	struct mapped_device *md = io->md;
 
 	/* Push-back supersedes any I/O errors */
 	if (unlikely(error)) {
 		spin_lock_irqsave(&io->endio_lock, flags);
-		if (!(io->error > 0 && __noflush_suspending(md)))
-			io->error = error;
+		if (!(io->status == BLK_STS_DM_REQUEUE &&
+				__noflush_suspending(md)))
+			io->status = error;
 		spin_unlock_irqrestore(&io->endio_lock, flags);
 	}
 
 	if (atomic_dec_and_test(&io->io_count)) {
-		if (io->error == DM_ENDIO_REQUEUE) {
+		if (io->status == BLK_STS_DM_REQUEUE) {
 			/*
 			 * Target requested pushing back the I/O.
 			 */
@@ -793,16 +794,16 @@ static void dec_pending(struct dm_io *io, int error)
 				bio_list_add_head(&md->deferred, io->bio);
 			else
 				/* noflush suspend was interrupted. */
-				io->error = -EIO;
+				io->status = BLK_STS_IOERR;
 			spin_unlock_irqrestore(&md->deferred_lock, flags);
 		}
 
-		io_error = io->error;
+		io_error = io->status;
 		bio = io->bio;
 		end_io_acct(io);
 		free_io(md, io);
 
-		if (io_error == DM_ENDIO_REQUEUE)
+		if (io_error == BLK_STS_DM_REQUEUE)
 			return;
 
 		if ((bio->bi_opf & REQ_PREFLUSH) && bio->bi_iter.bi_size) {
@@ -814,7 +815,7 @@ static void dec_pending(struct dm_io *io, int error)
 			queue_io(md, bio);
 		} else {
 			/* done with normal IO or empty flush */
-			bio->bi_error = io_error;
+			bio->bi_status = io_error;
 			bio_endio(bio);
 		}
 	}
@@ -838,14 +839,13 @@ void disable_write_zeroes(struct mapped_device *md)
 
 static void clone_endio(struct bio *bio)
 {
-	int error = bio->bi_error;
-	int r = error;
+	blk_status_t error = bio->bi_status;
 	struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
 	struct dm_io *io = tio->io;
 	struct mapped_device *md = tio->io->md;
 	dm_endio_fn endio = tio->ti->type->end_io;
 
-	if (unlikely(error == -EREMOTEIO)) {
+	if (unlikely(error == BLK_STS_TARGET)) {
 		if (bio_op(bio) == REQ_OP_WRITE_SAME &&
 		    !bdev_get_queue(bio->bi_bdev)->limits.max_write_same_sectors)
 			disable_write_same(md);
@@ -855,10 +855,10 @@ static void clone_endio(struct bio *bio)
 	}
 
 	if (endio) {
-		r = endio(tio->ti, bio, &error);
+		int r = endio(tio->ti, bio, &error);
 		switch (r) {
 		case DM_ENDIO_REQUEUE:
-			error = DM_ENDIO_REQUEUE;
+			error = BLK_STS_DM_REQUEUE;
 			/*FALLTHRU*/
 		case DM_ENDIO_DONE:
 			break;
@@ -1094,11 +1094,11 @@ static void __map_bio(struct dm_target_io *tio)
 		generic_make_request(clone);
 		break;
 	case DM_MAPIO_KILL:
-		r = -EIO;
-		/*FALLTHRU*/
+		dec_pending(tio->io, BLK_STS_IOERR);
+		free_tio(tio);
+		break;
 	case DM_MAPIO_REQUEUE:
-		/* error the io and bail out, or requeue it if needed */
-		dec_pending(tio->io, r);
+		dec_pending(tio->io, BLK_STS_DM_REQUEUE);
 		free_tio(tio);
 		break;
 	default:
@@ -1366,7 +1366,7 @@ static void __split_and_process_bio(struct mapped_device *md,
 	ci.map = map;
 	ci.md = md;
 	ci.io = alloc_io(md);
-	ci.io->error = 0;
+	ci.io->status = 0;
 	atomic_set(&ci.io->io_count, 1);
 	ci.io->bio = bio;
 	ci.io->md = md;
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 10367ffe92e3..6452e83fd650 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -273,7 +273,7 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
 	}
 	if (mddev->ro == 1 && unlikely(rw == WRITE)) {
 		if (bio_sectors(bio) != 0)
-			bio->bi_error = -EROFS;
+			bio->bi_status = BLK_STS_IOERR;
 		bio_endio(bio);
 		return BLK_QC_T_NONE;
 	}
@@ -719,8 +719,8 @@ static void super_written(struct bio *bio)
 	struct md_rdev *rdev = bio->bi_private;
 	struct mddev *mddev = rdev->mddev;
 
-	if (bio->bi_error) {
-		pr_err("md: super_written gets error=%d\n", bio->bi_error);
+	if (bio->bi_status) {
+		pr_err("md: super_written gets error=%d\n", bio->bi_status);
 		md_error(mddev, rdev);
 		if (!test_bit(Faulty, &rdev->flags)
 		    && (bio->bi_opf & MD_FAILFAST)) {
@@ -801,7 +801,7 @@ int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
 
 	submit_bio_wait(bio);
 
-	ret = !bio->bi_error;
+	ret = !bio->bi_status;
 	bio_put(bio);
 	return ret;
 }
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index e95d521d93e9..68d036e64041 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -73,12 +73,12 @@ static void multipath_reschedule_retry (struct multipath_bh *mp_bh)
  * operation and are ready to return a success/failure code to the buffer
  * cache layer.
  */
-static void multipath_end_bh_io (struct multipath_bh *mp_bh, int err)
+static void multipath_end_bh_io(struct multipath_bh *mp_bh, blk_status_t status)
 {
 	struct bio *bio = mp_bh->master_bio;
 	struct mpconf *conf = mp_bh->mddev->private;
 
-	bio->bi_error = err;
+	bio->bi_status = status;
 	bio_endio(bio);
 	mempool_free(mp_bh, conf->pool);
 }
@@ -89,7 +89,7 @@ static void multipath_end_request(struct bio *bio)
 	struct mpconf *conf = mp_bh->mddev->private;
 	struct md_rdev *rdev = conf->multipaths[mp_bh->path].rdev;
 
-	if (!bio->bi_error)
+	if (!bio->bi_status)
 		multipath_end_bh_io(mp_bh, 0);
 	else if (!(bio->bi_opf & REQ_RAHEAD)) {
 		/*
@@ -102,7 +102,7 @@ static void multipath_end_request(struct bio *bio)
 			(unsigned long long)bio->bi_iter.bi_sector);
 		multipath_reschedule_retry(mp_bh);
 	} else
-		multipath_end_bh_io(mp_bh, bio->bi_error);
+		multipath_end_bh_io(mp_bh, bio->bi_status);
 	rdev_dec_pending(rdev, conf->mddev);
 }
 
@@ -347,7 +347,7 @@ static void multipathd(struct md_thread *thread)
 			pr_err("multipath: %s: unrecoverable IO read error for block %llu\n",
 			       bdevname(bio->bi_bdev,b),
 			       (unsigned long long)bio->bi_iter.bi_sector);
-			multipath_end_bh_io(mp_bh, -EIO);
+			multipath_end_bh_io(mp_bh, BLK_STS_IOERR);
 		} else {
 			pr_err("multipath: %s: redirecting sector %llu to another IO path\n",
 			       bdevname(bio->bi_bdev,b),
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index af5056d56878..94b87c4d0f7b 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -277,7 +277,7 @@ static void call_bio_endio(struct r1bio *r1_bio)
 	struct r1conf *conf = r1_bio->mddev->private;
 
 	if (!test_bit(R1BIO_Uptodate, &r1_bio->state))
-		bio->bi_error = -EIO;
+		bio->bi_status = BLK_STS_IOERR;
 
 	bio_endio(bio);
 	/*
@@ -335,7 +335,7 @@ static int find_bio_disk(struct r1bio *r1_bio, struct bio *bio)
 
 static void raid1_end_read_request(struct bio *bio)
 {
-	int uptodate = !bio->bi_error;
+	int uptodate = !bio->bi_status;
 	struct r1bio *r1_bio = bio->bi_private;
 	struct r1conf *conf = r1_bio->mddev->private;
 	struct md_rdev *rdev = conf->mirrors[r1_bio->read_disk].rdev;
@@ -426,12 +426,12 @@ static void raid1_end_write_request(struct bio *bio)
 	struct md_rdev *rdev = conf->mirrors[mirror].rdev;
 	bool discard_error;
 
-	discard_error = bio->bi_error && bio_op(bio) == REQ_OP_DISCARD;
+	discard_error = bio->bi_status && bio_op(bio) == REQ_OP_DISCARD;
 
 	/*
 	 * 'one mirror IO has finished' event handler:
 	 */
-	if (bio->bi_error && !discard_error) {
+	if (bio->bi_status && !discard_error) {
 		set_bit(WriteErrorSeen,	&rdev->flags);
 		if (!test_and_set_bit(WantReplacement, &rdev->flags))
 			set_bit(MD_RECOVERY_NEEDED, &
@@ -802,7 +802,7 @@ static void flush_bio_list(struct r1conf *conf, struct bio *bio)
 		bio->bi_next = NULL;
 		bio->bi_bdev = rdev->bdev;
 		if (test_bit(Faulty, &rdev->flags)) {
-			bio->bi_error = -EIO;
+			bio->bi_status = BLK_STS_IOERR;
 			bio_endio(bio);
 		} else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
 				    !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
@@ -1856,7 +1856,7 @@ static void end_sync_read(struct bio *bio)
 	 * or re-read if the read failed.
 	 * We don't do much here, just schedule handling by raid1d
 	 */
-	if (!bio->bi_error)
+	if (!bio->bi_status)
 		set_bit(R1BIO_Uptodate, &r1_bio->state);
 
 	if (atomic_dec_and_test(&r1_bio->remaining))
@@ -1865,7 +1865,7 @@ static void end_sync_read(struct bio *bio)
 
 static void end_sync_write(struct bio *bio)
 {
-	int uptodate = !bio->bi_error;
+	int uptodate = !bio->bi_status;
 	struct r1bio *r1_bio = get_resync_r1bio(bio);
 	struct mddev *mddev = r1_bio->mddev;
 	struct r1conf *conf = mddev->private;
@@ -2058,7 +2058,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
 		idx ++;
 	}
 	set_bit(R1BIO_Uptodate, &r1_bio->state);
-	bio->bi_error = 0;
+	bio->bi_status = 0;
 	return 1;
 }
 
@@ -2082,16 +2082,16 @@ static void process_checks(struct r1bio *r1_bio)
 	for (i = 0; i < conf->raid_disks * 2; i++) {
 		int j;
 		int size;
-		int error;
+		blk_status_t status;
 		struct bio_vec *bi;
 		struct bio *b = r1_bio->bios[i];
 		struct resync_pages *rp = get_resync_pages(b);
 		if (b->bi_end_io != end_sync_read)
 			continue;
 		/* fixup the bio for reuse, but preserve errno */
-		error = b->bi_error;
+		status = b->bi_status;
 		bio_reset(b);
-		b->bi_error = error;
+		b->bi_status = status;
 		b->bi_vcnt = vcnt;
 		b->bi_iter.bi_size = r1_bio->sectors << 9;
 		b->bi_iter.bi_sector = r1_bio->sector +
@@ -2113,7 +2113,7 @@ static void process_checks(struct r1bio *r1_bio)
 	}
 	for (primary = 0; primary < conf->raid_disks * 2; primary++)
 		if (r1_bio->bios[primary]->bi_end_io == end_sync_read &&
-		    !r1_bio->bios[primary]->bi_error) {
+		    !r1_bio->bios[primary]->bi_status) {
 			r1_bio->bios[primary]->bi_end_io = NULL;
 			rdev_dec_pending(conf->mirrors[primary].rdev, mddev);
 			break;
@@ -2123,7 +2123,7 @@ static void process_checks(struct r1bio *r1_bio)
 		int j;
 		struct bio *pbio = r1_bio->bios[primary];
 		struct bio *sbio = r1_bio->bios[i];
-		int error = sbio->bi_error;
+		blk_status_t status = sbio->bi_status;
 		struct page **ppages = get_resync_pages(pbio)->pages;
 		struct page **spages = get_resync_pages(sbio)->pages;
 		struct bio_vec *bi;
@@ -2132,12 +2132,12 @@ static void process_checks(struct r1bio *r1_bio)
 		if (sbio->bi_end_io != end_sync_read)
 			continue;
 		/* Now we can 'fixup' the error value */
-		sbio->bi_error = 0;
+		sbio->bi_status = 0;
 
 		bio_for_each_segment_all(bi, sbio, j)
 			page_len[j] = bi->bv_len;
 
-		if (!error) {
+		if (!status) {
 			for (j = vcnt; j-- ; ) {
 				if (memcmp(page_address(ppages[j]),
 					   page_address(spages[j]),
@@ -2149,7 +2149,7 @@ static void process_checks(struct r1bio *r1_bio)
 		if (j >= 0)
 			atomic64_add(r1_bio->sectors, &mddev->resync_mismatches);
 		if (j < 0 || (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)
-			      && !error)) {
+			      && !status)) {
 			/* No need to write to this device. */
 			sbio->bi_end_io = NULL;
 			rdev_dec_pending(conf->mirrors[i].rdev, mddev);
@@ -2400,11 +2400,11 @@ static void handle_sync_write_finished(struct r1conf *conf, struct r1bio *r1_bio
 		struct bio *bio = r1_bio->bios[m];
 		if (bio->bi_end_io == NULL)
 			continue;
-		if (!bio->bi_error &&
+		if (!bio->bi_status &&
 		    test_bit(R1BIO_MadeGood, &r1_bio->state)) {
 			rdev_clear_badblocks(rdev, r1_bio->sector, s, 0);
 		}
-		if (bio->bi_error &&
+		if (bio->bi_status &&
 		    test_bit(R1BIO_WriteError, &r1_bio->state)) {
 			if (!rdev_set_badblocks(rdev, r1_bio->sector, s, 0))
 				md_error(conf->mddev, rdev);
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 4343d7ff9916..89ad1cd29037 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -336,7 +336,7 @@ static void raid_end_bio_io(struct r10bio *r10_bio)
 	struct r10conf *conf = r10_bio->mddev->private;
 
 	if (!test_bit(R10BIO_Uptodate, &r10_bio->state))
-		bio->bi_error = -EIO;
+		bio->bi_status = BLK_STS_IOERR;
 
 	bio_endio(bio);
 	/*
@@ -389,7 +389,7 @@ static int find_bio_disk(struct r10conf *conf, struct r10bio *r10_bio,
 
 static void raid10_end_read_request(struct bio *bio)
 {
-	int uptodate = !bio->bi_error;
+	int uptodate = !bio->bi_status;
 	struct r10bio *r10_bio = bio->bi_private;
 	int slot, dev;
 	struct md_rdev *rdev;
@@ -477,7 +477,7 @@ static void raid10_end_write_request(struct bio *bio)
 	struct bio *to_put = NULL;
 	bool discard_error;
 
-	discard_error = bio->bi_error && bio_op(bio) == REQ_OP_DISCARD;
+	discard_error = bio->bi_status && bio_op(bio) == REQ_OP_DISCARD;
 
 	dev = find_bio_disk(conf, r10_bio, bio, &slot, &repl);
 
@@ -491,7 +491,7 @@ static void raid10_end_write_request(struct bio *bio)
 	/*
 	 * this branch is our 'one mirror IO has finished' event handler:
 	 */
-	if (bio->bi_error && !discard_error) {
+	if (bio->bi_status && !discard_error) {
 		if (repl)
 			/* Never record new bad blocks to replacement,
 			 * just fail it.
@@ -913,7 +913,7 @@ static void flush_pending_writes(struct r10conf *conf)
 			bio->bi_next = NULL;
 			bio->bi_bdev = rdev->bdev;
 			if (test_bit(Faulty, &rdev->flags)) {
-				bio->bi_error = -EIO;
+				bio->bi_status = BLK_STS_IOERR;
 				bio_endio(bio);
 			} else if (unlikely((bio_op(bio) ==  REQ_OP_DISCARD) &&
 					    !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
@@ -1098,7 +1098,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
 		bio->bi_next = NULL;
 		bio->bi_bdev = rdev->bdev;
 		if (test_bit(Faulty, &rdev->flags)) {
-			bio->bi_error = -EIO;
+			bio->bi_status = BLK_STS_IOERR;
 			bio_endio(bio);
 		} else if (unlikely((bio_op(bio) ==  REQ_OP_DISCARD) &&
 				    !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
@@ -1888,7 +1888,7 @@ static void __end_sync_read(struct r10bio *r10_bio, struct bio *bio, int d)
 {
 	struct r10conf *conf = r10_bio->mddev->private;
 
-	if (!bio->bi_error)
+	if (!bio->bi_status)
 		set_bit(R10BIO_Uptodate, &r10_bio->state);
 	else
 		/* The write handler will notice the lack of
@@ -1972,7 +1972,7 @@ static void end_sync_write(struct bio *bio)
 	else
 		rdev = conf->mirrors[d].rdev;
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		if (repl)
 			md_error(mddev, rdev);
 		else {
@@ -2021,7 +2021,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
 
 	/* find the first device with a block */
 	for (i=0; i<conf->copies; i++)
-		if (!r10_bio->devs[i].bio->bi_error)
+		if (!r10_bio->devs[i].bio->bi_status)
 			break;
 
 	if (i == conf->copies)
@@ -2050,7 +2050,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
 		tpages = get_resync_pages(tbio)->pages;
 		d = r10_bio->devs[i].devnum;
 		rdev = conf->mirrors[d].rdev;
-		if (!r10_bio->devs[i].bio->bi_error) {
+		if (!r10_bio->devs[i].bio->bi_status) {
 			/* We know that the bi_io_vec layout is the same for
 			 * both 'first' and 'i', so we just compare them.
 			 * All vec entries are PAGE_SIZE;
@@ -2633,7 +2633,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
 			rdev = conf->mirrors[dev].rdev;
 			if (r10_bio->devs[m].bio == NULL)
 				continue;
-			if (!r10_bio->devs[m].bio->bi_error) {
+			if (!r10_bio->devs[m].bio->bi_status) {
 				rdev_clear_badblocks(
 					rdev,
 					r10_bio->devs[m].addr,
@@ -2649,7 +2649,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
 			if (r10_bio->devs[m].repl_bio == NULL)
 				continue;
 
-			if (!r10_bio->devs[m].repl_bio->bi_error) {
+			if (!r10_bio->devs[m].repl_bio->bi_status) {
 				rdev_clear_badblocks(
 					rdev,
 					r10_bio->devs[m].addr,
@@ -2675,7 +2675,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
 					r10_bio->devs[m].addr,
 					r10_bio->sectors, 0);
 				rdev_dec_pending(rdev, conf->mddev);
-			} else if (bio != NULL && bio->bi_error) {
+			} else if (bio != NULL && bio->bi_status) {
 				fail = true;
 				if (!narrow_write_error(r10_bio, m)) {
 					md_error(conf->mddev, rdev);
@@ -3267,7 +3267,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
 				r10_bio->devs[i].repl_bio->bi_end_io = NULL;
 
 			bio = r10_bio->devs[i].bio;
-			bio->bi_error = -EIO;
+			bio->bi_status = BLK_STS_IOERR;
 			rcu_read_lock();
 			rdev = rcu_dereference(conf->mirrors[d].rdev);
 			if (rdev == NULL || test_bit(Faulty, &rdev->flags)) {
@@ -3309,7 +3309,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
 
 			/* Need to set up for writing to the replacement */
 			bio = r10_bio->devs[i].repl_bio;
-			bio->bi_error = -EIO;
+			bio->bi_status = BLK_STS_IOERR;
 
 			sector = r10_bio->devs[i].addr;
 			bio->bi_next = biolist;
@@ -3375,7 +3375,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
 
 		if (bio->bi_end_io == end_sync_read) {
 			md_sync_acct(bio->bi_bdev, nr_sectors);
-			bio->bi_error = 0;
+			bio->bi_status = 0;
 			generic_make_request(bio);
 		}
 	}
@@ -4394,7 +4394,7 @@ read_more:
 	read_bio->bi_end_io = end_reshape_read;
 	bio_set_op_attrs(read_bio, REQ_OP_READ, 0);
 	read_bio->bi_flags &= (~0UL << BIO_RESET_BITS);
-	read_bio->bi_error = 0;
+	read_bio->bi_status = 0;
 	read_bio->bi_vcnt = 0;
 	read_bio->bi_iter.bi_size = 0;
 	r10_bio->master_bio = read_bio;
@@ -4638,7 +4638,7 @@ static void end_reshape_write(struct bio *bio)
 		rdev = conf->mirrors[d].rdev;
 	}
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		/* FIXME should record badblock */
 		md_error(mddev, rdev);
 	}
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 4c00bc248287..3ed6a0d89db8 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -572,7 +572,7 @@ static void r5l_log_endio(struct bio *bio)
 	struct r5l_log *log = io->log;
 	unsigned long flags;
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		md_error(log->rdev->mddev, log->rdev);
 
 	bio_put(bio);
@@ -1247,7 +1247,7 @@ static void r5l_log_flush_endio(struct bio *bio)
 	unsigned long flags;
 	struct r5l_io_unit *io;
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		md_error(log->rdev->mddev, log->rdev);
 
 	spin_lock_irqsave(&log->io_list_lock, flags);
diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c
index 5d25bebf3328..09e04be34e5f 100644
--- a/drivers/md/raid5-ppl.c
+++ b/drivers/md/raid5-ppl.c
@@ -397,7 +397,7 @@ static void ppl_log_endio(struct bio *bio)
 
 	pr_debug("%s: seq: %llu\n", __func__, io->seq);
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		md_error(ppl_conf->mddev, log->rdev);
 
 	list_for_each_entry_safe(sh, next, &io->stripe_list, log_list) {
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 9c4f7659f8b1..e1bdc320f664 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2476,7 +2476,7 @@ static void raid5_end_read_request(struct bio * bi)
 
 	pr_debug("end_read_request %llu/%d, count: %d, error %d.\n",
 		(unsigned long long)sh->sector, i, atomic_read(&sh->count),
-		bi->bi_error);
+		bi->bi_status);
 	if (i == disks) {
 		bio_reset(bi);
 		BUG();
@@ -2496,7 +2496,7 @@ static void raid5_end_read_request(struct bio * bi)
 		s = sh->sector + rdev->new_data_offset;
 	else
 		s = sh->sector + rdev->data_offset;
-	if (!bi->bi_error) {
+	if (!bi->bi_status) {
 		set_bit(R5_UPTODATE, &sh->dev[i].flags);
 		if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
 			/* Note that this cannot happen on a
@@ -2613,7 +2613,7 @@ static void raid5_end_write_request(struct bio *bi)
 	}
 	pr_debug("end_write_request %llu/%d, count %d, error: %d.\n",
 		(unsigned long long)sh->sector, i, atomic_read(&sh->count),
-		bi->bi_error);
+		bi->bi_status);
 	if (i == disks) {
 		bio_reset(bi);
 		BUG();
@@ -2621,14 +2621,14 @@ static void raid5_end_write_request(struct bio *bi)
 	}
 
 	if (replacement) {
-		if (bi->bi_error)
+		if (bi->bi_status)
 			md_error(conf->mddev, rdev);
 		else if (is_badblock(rdev, sh->sector,
 				     STRIPE_SECTORS,
 				     &first_bad, &bad_sectors))
 			set_bit(R5_MadeGoodRepl, &sh->dev[i].flags);
 	} else {
-		if (bi->bi_error) {
+		if (bi->bi_status) {
 			set_bit(STRIPE_DEGRADED, &sh->state);
 			set_bit(WriteErrorSeen, &rdev->flags);
 			set_bit(R5_WriteError, &sh->dev[i].flags);
@@ -2649,7 +2649,7 @@ static void raid5_end_write_request(struct bio *bi)
 	}
 	rdev_dec_pending(rdev, conf->mddev);
 
-	if (sh->batch_head && bi->bi_error && !replacement)
+	if (sh->batch_head && bi->bi_status && !replacement)
 		set_bit(STRIPE_BATCH_ERR, &sh->batch_head->state);
 
 	bio_reset(bi);
@@ -3381,7 +3381,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
 			sh->dev[i].sector + STRIPE_SECTORS) {
 			struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
 
-			bi->bi_error = -EIO;
+			bi->bi_status = BLK_STS_IOERR;
 			md_write_end(conf->mddev);
 			bio_endio(bi);
 			bi = nextbi;
@@ -3403,7 +3403,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
 		       sh->dev[i].sector + STRIPE_SECTORS) {
 			struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector);
 
-			bi->bi_error = -EIO;
+			bi->bi_status = BLK_STS_IOERR;
 			md_write_end(conf->mddev);
 			bio_endio(bi);
 			bi = bi2;
@@ -3429,7 +3429,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
 				struct bio *nextbi =
 					r5_next_bio(bi, sh->dev[i].sector);
 
-				bi->bi_error = -EIO;
+				bi->bi_status = BLK_STS_IOERR;
 				bio_endio(bi);
 				bi = nextbi;
 			}
@@ -5144,7 +5144,7 @@ static void raid5_align_endio(struct bio *bi)
 	struct mddev *mddev;
 	struct r5conf *conf;
 	struct md_rdev *rdev;
-	int error = bi->bi_error;
+	blk_status_t error = bi->bi_status;
 
 	bio_put(bi);
 
@@ -5721,7 +5721,7 @@ static void raid5_make_request(struct mddev *mddev, struct bio * bi)
 			release_stripe_plug(mddev, sh);
 		} else {
 			/* cannot get stripe for read-ahead, just give-up */
-			bi->bi_error = -EIO;
+			bi->bi_status = BLK_STS_IOERR;
 			break;
 		}
 	}
diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c
index 822198a75e96..79eb9fb358d5 100644
--- a/drivers/nvdimm/blk.c
+++ b/drivers/nvdimm/blk.c
@@ -186,7 +186,7 @@ static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio)
 	 * another kernel subsystem, and we just pass it through.
 	 */
 	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
-		bio->bi_error = -EIO;
+		bio->bi_status = BLK_STS_IOERR;
 		goto out;
 	}
 
@@ -205,7 +205,7 @@ static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio)
 					"io error in %s sector %lld, len %d,\n",
 					(rw == READ) ? "READ" : "WRITE",
 					(unsigned long long) iter.bi_sector, len);
-			bio->bi_error = err;
+			bio->bi_status = errno_to_blk_status(err);
 			break;
 		}
 	}
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index 983718b8fd9b..31b2d14e210d 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -1210,7 +1210,7 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
 	 * another kernel subsystem, and we just pass it through.
 	 */
 	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
-		bio->bi_error = -EIO;
+		bio->bi_status = BLK_STS_IOERR;
 		goto out;
 	}
 
@@ -1232,7 +1232,7 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
 					(op_is_write(bio_op(bio))) ? "WRITE" :
 					"READ",
 					(unsigned long long) iter.bi_sector, len);
-			bio->bi_error = err;
+			bio->bi_status = errno_to_blk_status(err);
 			break;
 		}
 	}
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index c544d466ea51..7bd383aeea14 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -49,19 +49,19 @@ static struct nd_region *to_region(struct pmem_device *pmem)
 	return to_nd_region(to_dev(pmem)->parent);
 }
 
-static int pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset,
-		unsigned int len)
+static blk_status_t pmem_clear_poison(struct pmem_device *pmem,
+		phys_addr_t offset, unsigned int len)
 {
 	struct device *dev = to_dev(pmem);
 	sector_t sector;
 	long cleared;
-	int rc = 0;
+	blk_status_t rc = BLK_STS_OK;
 
 	sector = (offset - pmem->data_offset) / 512;
 
 	cleared = nvdimm_clear_poison(dev, pmem->phys_addr + offset, len);
 	if (cleared < len)
-		rc = -EIO;
+		rc = BLK_STS_IOERR;
 	if (cleared > 0 && cleared / 512) {
 		cleared /= 512;
 		dev_dbg(dev, "%s: %#llx clear %ld sector%s\n", __func__,
@@ -84,7 +84,7 @@ static void write_pmem(void *pmem_addr, struct page *page,
 	kunmap_atomic(mem);
 }
 
-static int read_pmem(struct page *page, unsigned int off,
+static blk_status_t read_pmem(struct page *page, unsigned int off,
 		void *pmem_addr, unsigned int len)
 {
 	int rc;
@@ -93,15 +93,15 @@ static int read_pmem(struct page *page, unsigned int off,
 	rc = memcpy_mcsafe(mem + off, pmem_addr, len);
 	kunmap_atomic(mem);
 	if (rc)
-		return -EIO;
-	return 0;
+		return BLK_STS_IOERR;
+	return BLK_STS_OK;
 }
 
-static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
+static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page,
 			unsigned int len, unsigned int off, bool is_write,
 			sector_t sector)
 {
-	int rc = 0;
+	blk_status_t rc = BLK_STS_OK;
 	bool bad_pmem = false;
 	phys_addr_t pmem_off = sector * 512 + pmem->data_offset;
 	void *pmem_addr = pmem->virt_addr + pmem_off;
@@ -111,7 +111,7 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
 
 	if (!is_write) {
 		if (unlikely(bad_pmem))
-			rc = -EIO;
+			rc = BLK_STS_IOERR;
 		else {
 			rc = read_pmem(page, off, pmem_addr, len);
 			flush_dcache_page(page);
@@ -149,7 +149,7 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
 
 static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
 {
-	int rc = 0;
+	blk_status_t rc = 0;
 	bool do_acct;
 	unsigned long start;
 	struct bio_vec bvec;
@@ -166,7 +166,7 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
 				bvec.bv_offset, op_is_write(bio_op(bio)),
 				iter.bi_sector);
 		if (rc) {
-			bio->bi_error = rc;
+			bio->bi_status = rc;
 			break;
 		}
 	}
@@ -184,7 +184,7 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
 		       struct page *page, bool is_write)
 {
 	struct pmem_device *pmem = bdev->bd_queue->queuedata;
-	int rc;
+	blk_status_t rc;
 
 	rc = pmem_do_bvec(pmem, page, PAGE_SIZE, 0, is_write, sector);
 
@@ -197,7 +197,7 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
 	if (rc == 0)
 		page_endio(page, is_write, 0);
 
-	return rc;
+	return blk_status_to_errno(rc);
 }
 
 /* see "strong" declaration in tools/testing/nvdimm/pmem-dax.c */
diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd.c
index c77940d80fc8..40128793e613 100644
--- a/drivers/nvme/target/io-cmd.c
+++ b/drivers/nvme/target/io-cmd.c
@@ -21,7 +21,7 @@ static void nvmet_bio_done(struct bio *bio)
 	struct nvmet_req *req = bio->bi_private;
 
 	nvmet_req_complete(req,
-		bio->bi_error ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
+		bio->bi_status ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
 
 	if (bio != &req->inline_bio)
 		bio_put(bio);
@@ -145,7 +145,7 @@ static void nvmet_execute_discard(struct nvmet_req *req)
 		bio->bi_private = req;
 		bio->bi_end_io = nvmet_bio_done;
 		if (status) {
-			bio->bi_error = -EIO;
+			bio->bi_status = BLK_STS_IOERR;
 			bio_endio(bio);
 		} else {
 			submit_bio(bio);
diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index bb069ebe4aa6..75373624604b 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -296,8 +296,8 @@ static void iblock_bio_done(struct bio *bio)
 	struct se_cmd *cmd = bio->bi_private;
 	struct iblock_req *ibr = cmd->priv;
 
-	if (bio->bi_error) {
-		pr_err("bio error: %p,  err: %d\n", bio, bio->bi_error);
+	if (bio->bi_status) {
+		pr_err("bio error: %p,  err: %d\n", bio, bio->bi_status);
 		/*
 		 * Bump the ib_bio_err_cnt and release bio.
 		 */
@@ -354,11 +354,11 @@ static void iblock_end_io_flush(struct bio *bio)
 {
 	struct se_cmd *cmd = bio->bi_private;
 
-	if (bio->bi_error)
-		pr_err("IBLOCK: cache flush failed: %d\n", bio->bi_error);
+	if (bio->bi_status)
+		pr_err("IBLOCK: cache flush failed: %d\n", bio->bi_status);
 
 	if (cmd) {
-		if (bio->bi_error)
+		if (bio->bi_status)
 			target_complete_cmd(cmd, SAM_STAT_CHECK_CONDITION);
 		else
 			target_complete_cmd(cmd, SAM_STAT_GOOD);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index c1dc393ad6b9..bcd8e16a34e1 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -262,8 +262,8 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
 	if (vecs != inline_vecs)
 		kfree(vecs);
 
-	if (unlikely(bio.bi_error))
-		return bio.bi_error;
+	if (unlikely(bio.bi_status))
+		return blk_status_to_errno(bio.bi_status);
 	return ret;
 }
 
@@ -288,16 +288,18 @@ static void blkdev_bio_end_io(struct bio *bio)
 	bool should_dirty = dio->should_dirty;
 
 	if (dio->multi_bio && !atomic_dec_and_test(&dio->ref)) {
-		if (bio->bi_error && !dio->bio.bi_error)
-			dio->bio.bi_error = bio->bi_error;
+		if (bio->bi_status && !dio->bio.bi_status)
+			dio->bio.bi_status = bio->bi_status;
 	} else {
 		if (!dio->is_sync) {
 			struct kiocb *iocb = dio->iocb;
-			ssize_t ret = dio->bio.bi_error;
+			ssize_t ret;
 
-			if (likely(!ret)) {
+			if (likely(!dio->bio.bi_status)) {
 				ret = dio->size;
 				iocb->ki_pos += ret;
+			} else {
+				ret = blk_status_to_errno(dio->bio.bi_status);
 			}
 
 			dio->iocb->ki_complete(iocb, ret, 0);
@@ -363,7 +365,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
 
 		ret = bio_iov_iter_get_pages(bio, iter);
 		if (unlikely(ret)) {
-			bio->bi_error = -EIO;
+			bio->bi_status = BLK_STS_IOERR;
 			bio_endio(bio);
 			break;
 		}
@@ -413,7 +415,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
 	__set_current_state(TASK_RUNNING);
 
 	if (!ret)
-		ret = dio->bio.bi_error;
+		ret = blk_status_to_errno(dio->bio.bi_status);
 	if (likely(!ret))
 		ret = dio->size;
 
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index b8622e4d1744..d87ac27a5f2b 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -310,7 +310,8 @@ struct btrfs_dio_private {
 	 * The original bio may be split to several sub-bios, this is
 	 * done during endio of sub-bios
 	 */
-	int (*subio_endio)(struct inode *, struct btrfs_io_bio *, int);
+	blk_status_t (*subio_endio)(struct inode *, struct btrfs_io_bio *,
+			blk_status_t);
 };
 
 /*
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index ab14c2e635ca..4ded1c3f92b8 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -2129,7 +2129,7 @@ static void btrfsic_bio_end_io(struct bio *bp)
 	/* mutex is not held! This is not save if IO is not yet completed
 	 * on umount */
 	iodone_w_error = 0;
-	if (bp->bi_error)
+	if (bp->bi_status)
 		iodone_w_error = 1;
 
 	BUG_ON(NULL == block);
@@ -2143,7 +2143,7 @@ static void btrfsic_bio_end_io(struct bio *bp)
 		if ((dev_state->state->print_mask &
 		     BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
 			pr_info("bio_end_io(err=%d) for %c @%llu (%s/%llu/%d)\n",
-			       bp->bi_error,
+			       bp->bi_status,
 			       btrfsic_get_block_type(dev_state->state, block),
 			       block->logical_bytenr, dev_state->name,
 			       block->dev_bytenr, block->mirror_num);
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 10e6b282d09d..9ac55b266e78 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -155,7 +155,7 @@ static void end_compressed_bio_read(struct bio *bio)
 	unsigned long index;
 	int ret;
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		cb->errors = 1;
 
 	/* if there are more bios still pending for this compressed
@@ -268,7 +268,7 @@ static void end_compressed_bio_write(struct bio *bio)
 	struct page *page;
 	unsigned long index;
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		cb->errors = 1;
 
 	/* if there are more bios still pending for this compressed
@@ -287,7 +287,7 @@ static void end_compressed_bio_write(struct bio *bio)
 					 cb->start,
 					 cb->start + cb->len - 1,
 					 NULL,
-					 bio->bi_error ? 0 : 1);
+					 bio->bi_status ? 0 : 1);
 	cb->compressed_pages[0]->mapping = NULL;
 
 	end_compressed_writeback(inode, cb);
@@ -320,7 +320,7 @@ out:
  * This also checksums the file bytes and gets things ready for
  * the end io hooks.
  */
-int btrfs_submit_compressed_write(struct inode *inode, u64 start,
+blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
 				 unsigned long len, u64 disk_start,
 				 unsigned long compressed_len,
 				 struct page **compressed_pages,
@@ -335,13 +335,13 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
 	struct page *page;
 	u64 first_byte = disk_start;
 	struct block_device *bdev;
-	int ret;
+	blk_status_t ret;
 	int skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
 
 	WARN_ON(start & ((u64)PAGE_SIZE - 1));
 	cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
 	if (!cb)
-		return -ENOMEM;
+		return BLK_STS_RESOURCE;
 	refcount_set(&cb->pending_bios, 0);
 	cb->errors = 0;
 	cb->inode = inode;
@@ -358,7 +358,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
 	bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS);
 	if (!bio) {
 		kfree(cb);
-		return -ENOMEM;
+		return BLK_STS_RESOURCE;
 	}
 	bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 	bio->bi_private = cb;
@@ -368,17 +368,17 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
 	/* create and submit bios for the compressed pages */
 	bytes_left = compressed_len;
 	for (pg_index = 0; pg_index < cb->nr_pages; pg_index++) {
+		int submit = 0;
+
 		page = compressed_pages[pg_index];
 		page->mapping = inode->i_mapping;
 		if (bio->bi_iter.bi_size)
-			ret = io_tree->ops->merge_bio_hook(page, 0,
+			submit = io_tree->ops->merge_bio_hook(page, 0,
 							   PAGE_SIZE,
 							   bio, 0);
-		else
-			ret = 0;
 
 		page->mapping = NULL;
-		if (ret || bio_add_page(bio, page, PAGE_SIZE, 0) <
+		if (submit || bio_add_page(bio, page, PAGE_SIZE, 0) <
 		    PAGE_SIZE) {
 			bio_get(bio);
 
@@ -400,7 +400,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
 
 			ret = btrfs_map_bio(fs_info, bio, 0, 1);
 			if (ret) {
-				bio->bi_error = ret;
+				bio->bi_status = ret;
 				bio_endio(bio);
 			}
 
@@ -434,7 +434,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
 
 	ret = btrfs_map_bio(fs_info, bio, 0, 1);
 	if (ret) {
-		bio->bi_error = ret;
+		bio->bi_status = ret;
 		bio_endio(bio);
 	}
 
@@ -569,7 +569,7 @@ next:
  * After the compressed pages are read, we copy the bytes into the
  * bio we were passed and then call the bio end_io calls
  */
-int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
+blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 				 int mirror_num, unsigned long bio_flags)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@@ -586,7 +586,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 	u64 em_len;
 	u64 em_start;
 	struct extent_map *em;
-	int ret = -ENOMEM;
+	blk_status_t ret = BLK_STS_RESOURCE;
 	int faili = 0;
 	u32 *sums;
 
@@ -600,7 +600,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 				   PAGE_SIZE);
 	read_unlock(&em_tree->lock);
 	if (!em)
-		return -EIO;
+		return BLK_STS_IOERR;
 
 	compressed_len = em->block_len;
 	cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
@@ -659,19 +659,19 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 	refcount_set(&cb->pending_bios, 1);
 
 	for (pg_index = 0; pg_index < nr_pages; pg_index++) {
+		int submit = 0;
+
 		page = cb->compressed_pages[pg_index];
 		page->mapping = inode->i_mapping;
 		page->index = em_start >> PAGE_SHIFT;
 
 		if (comp_bio->bi_iter.bi_size)
-			ret = tree->ops->merge_bio_hook(page, 0,
+			submit = tree->ops->merge_bio_hook(page, 0,
 							PAGE_SIZE,
 							comp_bio, 0);
-		else
-			ret = 0;
 
 		page->mapping = NULL;
-		if (ret || bio_add_page(comp_bio, page, PAGE_SIZE, 0) <
+		if (submit || bio_add_page(comp_bio, page, PAGE_SIZE, 0) <
 		    PAGE_SIZE) {
 			bio_get(comp_bio);
 
@@ -697,7 +697,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 
 			ret = btrfs_map_bio(fs_info, comp_bio, mirror_num, 0);
 			if (ret) {
-				comp_bio->bi_error = ret;
+				comp_bio->bi_status = ret;
 				bio_endio(comp_bio);
 			}
 
@@ -726,7 +726,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 
 	ret = btrfs_map_bio(fs_info, comp_bio, mirror_num, 0);
 	if (ret) {
-		comp_bio->bi_error = ret;
+		comp_bio->bi_status = ret;
 		bio_endio(comp_bio);
 	}
 
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 39ec43ab8df1..680d4265d601 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -48,12 +48,12 @@ int btrfs_decompress_buf2page(const char *buf, unsigned long buf_start,
 			      unsigned long total_out, u64 disk_start,
 			      struct bio *bio);
 
-int btrfs_submit_compressed_write(struct inode *inode, u64 start,
+blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
 				  unsigned long len, u64 disk_start,
 				  unsigned long compressed_len,
 				  struct page **compressed_pages,
 				  unsigned long nr_pages);
-int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
+blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 				 int mirror_num, unsigned long bio_flags);
 
 enum btrfs_compression_type {
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 643c70d2b2e6..d2da0a52d560 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3078,8 +3078,8 @@ int btrfs_find_name_in_ext_backref(struct btrfs_path *path,
 struct btrfs_dio_private;
 int btrfs_del_csums(struct btrfs_trans_handle *trans,
 		    struct btrfs_fs_info *fs_info, u64 bytenr, u64 len);
-int btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u32 *dst);
-int btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio,
+blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u32 *dst);
+blk_status_t btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio,
 			      u64 logical_offset);
 int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
 			     struct btrfs_root *root,
@@ -3094,7 +3094,7 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
 int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
 			   struct btrfs_root *root,
 			   struct btrfs_ordered_sum *sums);
-int btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
+blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
 		       u64 file_start, int contig);
 int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
 			     struct list_head *list, int search_commit);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 8685d67185d0..46accc75ad5a 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -87,7 +87,7 @@ struct btrfs_end_io_wq {
 	bio_end_io_t *end_io;
 	void *private;
 	struct btrfs_fs_info *info;
-	int error;
+	blk_status_t status;
 	enum btrfs_wq_endio_type metadata;
 	struct list_head list;
 	struct btrfs_work work;
@@ -131,7 +131,7 @@ struct async_submit_bio {
 	 */
 	u64 bio_offset;
 	struct btrfs_work work;
-	int error;
+	blk_status_t status;
 };
 
 /*
@@ -799,7 +799,7 @@ static void end_workqueue_bio(struct bio *bio)
 	btrfs_work_func_t func;
 
 	fs_info = end_io_wq->info;
-	end_io_wq->error = bio->bi_error;
+	end_io_wq->status = bio->bi_status;
 
 	if (bio_op(bio) == REQ_OP_WRITE) {
 		if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) {
@@ -836,19 +836,19 @@ static void end_workqueue_bio(struct bio *bio)
 	btrfs_queue_work(wq, &end_io_wq->work);
 }
 
-int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
+blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
 			enum btrfs_wq_endio_type metadata)
 {
 	struct btrfs_end_io_wq *end_io_wq;
 
 	end_io_wq = kmem_cache_alloc(btrfs_end_io_wq_cache, GFP_NOFS);
 	if (!end_io_wq)
-		return -ENOMEM;
+		return BLK_STS_RESOURCE;
 
 	end_io_wq->private = bio->bi_private;
 	end_io_wq->end_io = bio->bi_end_io;
 	end_io_wq->info = info;
-	end_io_wq->error = 0;
+	end_io_wq->status = 0;
 	end_io_wq->bio = bio;
 	end_io_wq->metadata = metadata;
 
@@ -868,14 +868,14 @@ unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info)
 static void run_one_async_start(struct btrfs_work *work)
 {
 	struct async_submit_bio *async;
-	int ret;
+	blk_status_t ret;
 
 	async = container_of(work, struct  async_submit_bio, work);
 	ret = async->submit_bio_start(async->inode, async->bio,
 				      async->mirror_num, async->bio_flags,
 				      async->bio_offset);
 	if (ret)
-		async->error = ret;
+		async->status = ret;
 }
 
 static void run_one_async_done(struct btrfs_work *work)
@@ -898,8 +898,8 @@ static void run_one_async_done(struct btrfs_work *work)
 		wake_up(&fs_info->async_submit_wait);
 
 	/* If an error occurred we just want to clean up the bio and move on */
-	if (async->error) {
-		async->bio->bi_error = async->error;
+	if (async->status) {
+		async->bio->bi_status = async->status;
 		bio_endio(async->bio);
 		return;
 	}
@@ -916,18 +916,17 @@ static void run_one_async_free(struct btrfs_work *work)
 	kfree(async);
 }
 
-int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
-			struct bio *bio, int mirror_num,
-			unsigned long bio_flags,
-			u64 bio_offset,
-			extent_submit_bio_hook_t *submit_bio_start,
-			extent_submit_bio_hook_t *submit_bio_done)
+blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info,
+		struct inode *inode, struct bio *bio, int mirror_num,
+		unsigned long bio_flags, u64 bio_offset,
+		extent_submit_bio_hook_t *submit_bio_start,
+		extent_submit_bio_hook_t *submit_bio_done)
 {
 	struct async_submit_bio *async;
 
 	async = kmalloc(sizeof(*async), GFP_NOFS);
 	if (!async)
-		return -ENOMEM;
+		return BLK_STS_RESOURCE;
 
 	async->inode = inode;
 	async->bio = bio;
@@ -941,7 +940,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
 	async->bio_flags = bio_flags;
 	async->bio_offset = bio_offset;
 
-	async->error = 0;
+	async->status = 0;
 
 	atomic_inc(&fs_info->nr_async_submits);
 
@@ -959,7 +958,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
 	return 0;
 }
 
-static int btree_csum_one_bio(struct bio *bio)
+static blk_status_t btree_csum_one_bio(struct bio *bio)
 {
 	struct bio_vec *bvec;
 	struct btrfs_root *root;
@@ -972,12 +971,12 @@ static int btree_csum_one_bio(struct bio *bio)
 			break;
 	}
 
-	return ret;
+	return errno_to_blk_status(ret);
 }
 
-static int __btree_submit_bio_start(struct inode *inode, struct bio *bio,
-				    int mirror_num, unsigned long bio_flags,
-				    u64 bio_offset)
+static blk_status_t __btree_submit_bio_start(struct inode *inode,
+		struct bio *bio, int mirror_num, unsigned long bio_flags,
+		u64 bio_offset)
 {
 	/*
 	 * when we're called for a write, we're already in the async
@@ -986,11 +985,11 @@ static int __btree_submit_bio_start(struct inode *inode, struct bio *bio,
 	return btree_csum_one_bio(bio);
 }
 
-static int __btree_submit_bio_done(struct inode *inode, struct bio *bio,
-				 int mirror_num, unsigned long bio_flags,
-				 u64 bio_offset)
+static blk_status_t __btree_submit_bio_done(struct inode *inode,
+		struct bio *bio, int mirror_num, unsigned long bio_flags,
+		u64 bio_offset)
 {
-	int ret;
+	blk_status_t ret;
 
 	/*
 	 * when we're called for a write, we're already in the async
@@ -998,7 +997,7 @@ static int __btree_submit_bio_done(struct inode *inode, struct bio *bio,
 	 */
 	ret = btrfs_map_bio(btrfs_sb(inode->i_sb), bio, mirror_num, 1);
 	if (ret) {
-		bio->bi_error = ret;
+		bio->bi_status = ret;
 		bio_endio(bio);
 	}
 	return ret;
@@ -1015,13 +1014,13 @@ static int check_async_write(unsigned long bio_flags)
 	return 1;
 }
 
-static int btree_submit_bio_hook(struct inode *inode, struct bio *bio,
+static blk_status_t btree_submit_bio_hook(struct inode *inode, struct bio *bio,
 				 int mirror_num, unsigned long bio_flags,
 				 u64 bio_offset)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	int async = check_async_write(bio_flags);
-	int ret;
+	blk_status_t ret;
 
 	if (bio_op(bio) != REQ_OP_WRITE) {
 		/*
@@ -1054,7 +1053,7 @@ static int btree_submit_bio_hook(struct inode *inode, struct bio *bio,
 	return 0;
 
 out_w_error:
-	bio->bi_error = ret;
+	bio->bi_status = ret;
 	bio_endio(bio);
 	return ret;
 }
@@ -1820,7 +1819,7 @@ static void end_workqueue_fn(struct btrfs_work *work)
 	end_io_wq = container_of(work, struct btrfs_end_io_wq, work);
 	bio = end_io_wq->bio;
 
-	bio->bi_error = end_io_wq->error;
+	bio->bi_status = end_io_wq->status;
 	bio->bi_private = end_io_wq->private;
 	bio->bi_end_io = end_io_wq->end_io;
 	kmem_cache_free(btrfs_end_io_wq_cache, end_io_wq);
@@ -3495,11 +3494,11 @@ static void btrfs_end_empty_barrier(struct bio *bio)
  * any device where the flush fails with eopnotsupp are flagged as not-barrier
  * capable
  */
-static int write_dev_flush(struct btrfs_device *device, int wait)
+static blk_status_t write_dev_flush(struct btrfs_device *device, int wait)
 {
 	struct request_queue *q = bdev_get_queue(device->bdev);
 	struct bio *bio;
-	int ret = 0;
+	blk_status_t ret = 0;
 
 	if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags))
 		return 0;
@@ -3511,8 +3510,8 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
 
 		wait_for_completion(&device->flush_wait);
 
-		if (bio->bi_error) {
-			ret = bio->bi_error;
+		if (bio->bi_status) {
+			ret = bio->bi_status;
 			btrfs_dev_stat_inc_and_print(device,
 				BTRFS_DEV_STAT_FLUSH_ERRS);
 		}
@@ -3531,7 +3530,7 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
 	device->flush_bio = NULL;
 	bio = btrfs_io_bio_alloc(GFP_NOFS, 0);
 	if (!bio)
-		return -ENOMEM;
+		return BLK_STS_RESOURCE;
 
 	bio->bi_end_io = btrfs_end_empty_barrier;
 	bio->bi_bdev = device->bdev;
@@ -3556,7 +3555,7 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
 	struct btrfs_device *dev;
 	int errors_send = 0;
 	int errors_wait = 0;
-	int ret;
+	blk_status_t ret;
 
 	/* send down all the barriers */
 	head = &info->fs_devices->devices;
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 21f1ceb85b76..c581927555f3 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -118,13 +118,13 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
 int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid);
 u32 btrfs_csum_data(const char *data, u32 seed, size_t len);
 void btrfs_csum_final(u32 crc, u8 *result);
-int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
+blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
 			enum btrfs_wq_endio_type metadata);
-int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
-			struct bio *bio, int mirror_num,
-			unsigned long bio_flags, u64 bio_offset,
-			extent_submit_bio_hook_t *submit_bio_start,
-			extent_submit_bio_hook_t *submit_bio_done);
+blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info,
+		struct inode *inode, struct bio *bio, int mirror_num,
+		unsigned long bio_flags, u64 bio_offset,
+		extent_submit_bio_hook_t *submit_bio_start,
+		extent_submit_bio_hook_t *submit_bio_done);
 unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info);
 int btrfs_write_tree_block(struct extent_buffer *buf);
 int btrfs_wait_tree_block_writeback(struct extent_buffer *buf);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index d8da3edf2ac3..35cbb6ceb70d 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2399,6 +2399,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
 	struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
 	struct bio *bio;
 	int read_mode = 0;
+	blk_status_t status;
 	int ret;
 
 	BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
@@ -2431,11 +2432,12 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
 		"Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d",
 		read_mode, failrec->this_mirror, failrec->in_validation);
 
-	ret = tree->ops->submit_bio_hook(inode, bio, failrec->this_mirror,
+	status = tree->ops->submit_bio_hook(inode, bio, failrec->this_mirror,
 					 failrec->bio_flags, 0);
-	if (ret) {
+	if (status) {
 		free_io_failure(BTRFS_I(inode), failrec);
 		bio_put(bio);
+		ret = blk_status_to_errno(status);
 	}
 
 	return ret;
@@ -2474,6 +2476,7 @@ void end_extent_writepage(struct page *page, int err, u64 start, u64 end)
  */
 static void end_bio_extent_writepage(struct bio *bio)
 {
+	int error = blk_status_to_errno(bio->bi_status);
 	struct bio_vec *bvec;
 	u64 start;
 	u64 end;
@@ -2503,7 +2506,7 @@ static void end_bio_extent_writepage(struct bio *bio)
 		start = page_offset(page);
 		end = start + bvec->bv_offset + bvec->bv_len - 1;
 
-		end_extent_writepage(page, bio->bi_error, start, end);
+		end_extent_writepage(page, error, start, end);
 		end_page_writeback(page);
 	}
 
@@ -2536,7 +2539,7 @@ endio_readpage_release_extent(struct extent_io_tree *tree, u64 start, u64 len,
 static void end_bio_extent_readpage(struct bio *bio)
 {
 	struct bio_vec *bvec;
-	int uptodate = !bio->bi_error;
+	int uptodate = !bio->bi_status;
 	struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
 	struct extent_io_tree *tree;
 	u64 offset = 0;
@@ -2556,7 +2559,7 @@ static void end_bio_extent_readpage(struct bio *bio)
 
 		btrfs_debug(fs_info,
 			"end_bio_extent_readpage: bi_sector=%llu, err=%d, mirror=%u",
-			(u64)bio->bi_iter.bi_sector, bio->bi_error,
+			(u64)bio->bi_iter.bi_sector, bio->bi_status,
 			io_bio->mirror_num);
 		tree = &BTRFS_I(inode)->io_tree;
 
@@ -2615,7 +2618,7 @@ static void end_bio_extent_readpage(struct bio *bio)
 				ret = bio_readpage_error(bio, offset, page,
 							 start, end, mirror);
 				if (ret == 0) {
-					uptodate = !bio->bi_error;
+					uptodate = !bio->bi_status;
 					offset += len;
 					continue;
 				}
@@ -2673,7 +2676,7 @@ readpage_ok:
 		endio_readpage_release_extent(tree, extent_start, extent_len,
 					      uptodate);
 	if (io_bio->end_io)
-		io_bio->end_io(io_bio, bio->bi_error);
+		io_bio->end_io(io_bio, blk_status_to_errno(bio->bi_status));
 	bio_put(bio);
 }
 
@@ -2743,7 +2746,7 @@ struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
 static int __must_check submit_one_bio(struct bio *bio, int mirror_num,
 				       unsigned long bio_flags)
 {
-	int ret = 0;
+	blk_status_t ret = 0;
 	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
 	struct page *page = bvec->bv_page;
 	struct extent_io_tree *tree = bio->bi_private;
@@ -2761,7 +2764,7 @@ static int __must_check submit_one_bio(struct bio *bio, int mirror_num,
 		btrfsic_submit_bio(bio);
 
 	bio_put(bio);
-	return ret;
+	return blk_status_to_errno(ret);
 }
 
 static int merge_bio(struct extent_io_tree *tree, struct page *page,
@@ -3707,7 +3710,7 @@ static void end_bio_extent_buffer_writepage(struct bio *bio)
 		BUG_ON(!eb);
 		done = atomic_dec_and_test(&eb->io_pages);
 
-		if (bio->bi_error ||
+		if (bio->bi_status ||
 		    test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) {
 			ClearPageUptodate(page);
 			set_btree_ioerr(page);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 1eafa2f0ede3..487ca0207cb6 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -92,9 +92,9 @@ struct btrfs_inode;
 struct btrfs_io_bio;
 struct io_failure_record;
 
-typedef	int (extent_submit_bio_hook_t)(struct inode *inode, struct bio *bio,
-				       int mirror_num, unsigned long bio_flags,
-				       u64 bio_offset);
+typedef	blk_status_t (extent_submit_bio_hook_t)(struct inode *inode,
+		struct bio *bio, int mirror_num, unsigned long bio_flags,
+		u64 bio_offset);
 struct extent_io_ops {
 	/*
 	 * The following callbacks must be allways defined, the function
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 64fcb31d7163..5b1c7090e546 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -160,7 +160,7 @@ static void btrfs_io_bio_endio_readpage(struct btrfs_io_bio *bio, int err)
 	kfree(bio->csum_allocated);
 }
 
-static int __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
+static blk_status_t __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
 				   u64 logical_offset, u32 *dst, int dio)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@@ -182,7 +182,7 @@ static int __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
 
 	path = btrfs_alloc_path();
 	if (!path)
-		return -ENOMEM;
+		return BLK_STS_RESOURCE;
 
 	nblocks = bio->bi_iter.bi_size >> inode->i_sb->s_blocksize_bits;
 	if (!dst) {
@@ -191,7 +191,7 @@ static int __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
 					csum_size, GFP_NOFS);
 			if (!btrfs_bio->csum_allocated) {
 				btrfs_free_path(path);
-				return -ENOMEM;
+				return BLK_STS_RESOURCE;
 			}
 			btrfs_bio->csum = btrfs_bio->csum_allocated;
 			btrfs_bio->end_io = btrfs_io_bio_endio_readpage;
@@ -303,12 +303,12 @@ next:
 	return 0;
 }
 
-int btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u32 *dst)
+blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u32 *dst)
 {
 	return __btrfs_lookup_bio_sums(inode, bio, 0, dst, 0);
 }
 
-int btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio, u64 offset)
+blk_status_t btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio, u64 offset)
 {
 	return __btrfs_lookup_bio_sums(inode, bio, offset, NULL, 1);
 }
@@ -433,7 +433,7 @@ fail:
 	return ret;
 }
 
-int btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
+blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
 		       u64 file_start, int contig)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@@ -452,7 +452,7 @@ int btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
 	sums = kzalloc(btrfs_ordered_sum_size(fs_info, bio->bi_iter.bi_size),
 		       GFP_NOFS);
 	if (!sums)
-		return -ENOMEM;
+		return BLK_STS_RESOURCE;
 
 	sums->len = bio->bi_iter.bi_size;
 	INIT_LIST_HEAD(&sums->list);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 758b2666885e..ea7cae1003eb 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -842,13 +842,12 @@ retry:
 				NULL, EXTENT_LOCKED | EXTENT_DELALLOC,
 				PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
 				PAGE_SET_WRITEBACK);
-		ret = btrfs_submit_compressed_write(inode,
+		if (btrfs_submit_compressed_write(inode,
 				    async_extent->start,
 				    async_extent->ram_size,
 				    ins.objectid,
 				    ins.offset, async_extent->pages,
-				    async_extent->nr_pages);
-		if (ret) {
+				    async_extent->nr_pages)) {
 			struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
 			struct page *p = async_extent->pages[0];
 			const u64 start = async_extent->start;
@@ -1901,11 +1900,11 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
  * At IO completion time the cums attached on the ordered extent record
  * are inserted into the btree
  */
-static int __btrfs_submit_bio_start(struct inode *inode, struct bio *bio,
-				    int mirror_num, unsigned long bio_flags,
-				    u64 bio_offset)
+static blk_status_t __btrfs_submit_bio_start(struct inode *inode,
+		struct bio *bio, int mirror_num, unsigned long bio_flags,
+		u64 bio_offset)
 {
-	int ret = 0;
+	blk_status_t ret = 0;
 
 	ret = btrfs_csum_one_bio(inode, bio, 0, 0);
 	BUG_ON(ret); /* -ENOMEM */
@@ -1920,16 +1919,16 @@ static int __btrfs_submit_bio_start(struct inode *inode, struct bio *bio,
  * At IO completion time the cums attached on the ordered extent record
  * are inserted into the btree
  */
-static int __btrfs_submit_bio_done(struct inode *inode, struct bio *bio,
-			  int mirror_num, unsigned long bio_flags,
-			  u64 bio_offset)
+static blk_status_t __btrfs_submit_bio_done(struct inode *inode,
+		struct bio *bio, int mirror_num, unsigned long bio_flags,
+		u64 bio_offset)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-	int ret;
+	blk_status_t ret;
 
 	ret = btrfs_map_bio(fs_info, bio, mirror_num, 1);
 	if (ret) {
-		bio->bi_error = ret;
+		bio->bi_status = ret;
 		bio_endio(bio);
 	}
 	return ret;
@@ -1939,14 +1938,14 @@ static int __btrfs_submit_bio_done(struct inode *inode, struct bio *bio,
  * extent_io.c submission hook. This does the right thing for csum calculation
  * on write, or reading the csums from the tree before a read
  */
-static int btrfs_submit_bio_hook(struct inode *inode, struct bio *bio,
+static blk_status_t btrfs_submit_bio_hook(struct inode *inode, struct bio *bio,
 			  int mirror_num, unsigned long bio_flags,
 			  u64 bio_offset)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	enum btrfs_wq_endio_type metadata = BTRFS_WQ_ENDIO_DATA;
-	int ret = 0;
+	blk_status_t ret = 0;
 	int skip_sum;
 	int async = !atomic_read(&BTRFS_I(inode)->sync_writers);
 
@@ -1991,8 +1990,8 @@ mapit:
 	ret = btrfs_map_bio(fs_info, bio, mirror_num, 0);
 
 out:
-	if (ret < 0) {
-		bio->bi_error = ret;
+	if (ret) {
+		bio->bi_status = ret;
 		bio_endio(bio);
 	}
 	return ret;
@@ -8037,7 +8036,7 @@ static void btrfs_retry_endio_nocsum(struct bio *bio)
 	struct bio_vec *bvec;
 	int i;
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		goto end;
 
 	ASSERT(bio->bi_vcnt == 1);
@@ -8116,7 +8115,7 @@ static void btrfs_retry_endio(struct bio *bio)
 	int ret;
 	int i;
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		goto end;
 
 	uptodate = 1;
@@ -8141,8 +8140,8 @@ end:
 	bio_put(bio);
 }
 
-static int __btrfs_subio_endio_read(struct inode *inode,
-				    struct btrfs_io_bio *io_bio, int err)
+static blk_status_t __btrfs_subio_endio_read(struct inode *inode,
+		struct btrfs_io_bio *io_bio, blk_status_t err)
 {
 	struct btrfs_fs_info *fs_info;
 	struct bio_vec *bvec;
@@ -8184,7 +8183,7 @@ try_again:
 				io_bio->mirror_num,
 				btrfs_retry_endio, &done);
 		if (ret) {
-			err = ret;
+			err = errno_to_blk_status(ret);
 			goto next;
 		}
 
@@ -8211,8 +8210,8 @@ next:
 	return err;
 }
 
-static int btrfs_subio_endio_read(struct inode *inode,
-				  struct btrfs_io_bio *io_bio, int err)
+static blk_status_t btrfs_subio_endio_read(struct inode *inode,
+		struct btrfs_io_bio *io_bio, blk_status_t err)
 {
 	bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
 
@@ -8232,7 +8231,7 @@ static void btrfs_endio_direct_read(struct bio *bio)
 	struct inode *inode = dip->inode;
 	struct bio *dio_bio;
 	struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
-	int err = bio->bi_error;
+	blk_status_t err = bio->bi_status;
 
 	if (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED)
 		err = btrfs_subio_endio_read(inode, io_bio, err);
@@ -8243,11 +8242,11 @@ static void btrfs_endio_direct_read(struct bio *bio)
 
 	kfree(dip);
 
-	dio_bio->bi_error = bio->bi_error;
+	dio_bio->bi_status = bio->bi_status;
 	dio_end_io(dio_bio);
 
 	if (io_bio->end_io)
-		io_bio->end_io(io_bio, err);
+		io_bio->end_io(io_bio, blk_status_to_errno(err));
 	bio_put(bio);
 }
 
@@ -8299,20 +8298,20 @@ static void btrfs_endio_direct_write(struct bio *bio)
 	struct bio *dio_bio = dip->dio_bio;
 
 	__endio_write_update_ordered(dip->inode, dip->logical_offset,
-				     dip->bytes, !bio->bi_error);
+				     dip->bytes, !bio->bi_status);
 
 	kfree(dip);
 
-	dio_bio->bi_error = bio->bi_error;
+	dio_bio->bi_status = bio->bi_status;
 	dio_end_io(dio_bio);
 	bio_put(bio);
 }
 
-static int __btrfs_submit_bio_start_direct_io(struct inode *inode,
+static blk_status_t __btrfs_submit_bio_start_direct_io(struct inode *inode,
 				    struct bio *bio, int mirror_num,
 				    unsigned long bio_flags, u64 offset)
 {
-	int ret;
+	blk_status_t ret;
 	ret = btrfs_csum_one_bio(inode, bio, offset, 1);
 	BUG_ON(ret); /* -ENOMEM */
 	return 0;
@@ -8321,7 +8320,7 @@ static int __btrfs_submit_bio_start_direct_io(struct inode *inode,
 static void btrfs_end_dio_bio(struct bio *bio)
 {
 	struct btrfs_dio_private *dip = bio->bi_private;
-	int err = bio->bi_error;
+	blk_status_t err = bio->bi_status;
 
 	if (err)
 		btrfs_warn(BTRFS_I(dip->inode)->root->fs_info,
@@ -8351,7 +8350,7 @@ static void btrfs_end_dio_bio(struct bio *bio)
 	if (dip->errors) {
 		bio_io_error(dip->orig_bio);
 	} else {
-		dip->dio_bio->bi_error = 0;
+		dip->dio_bio->bi_status = 0;
 		bio_endio(dip->orig_bio);
 	}
 out:
@@ -8368,14 +8367,14 @@ static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev,
 	return bio;
 }
 
-static inline int btrfs_lookup_and_bind_dio_csum(struct inode *inode,
+static inline blk_status_t btrfs_lookup_and_bind_dio_csum(struct inode *inode,
 						 struct btrfs_dio_private *dip,
 						 struct bio *bio,
 						 u64 file_offset)
 {
 	struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
 	struct btrfs_io_bio *orig_io_bio = btrfs_io_bio(dip->orig_bio);
-	int ret;
+	blk_status_t ret;
 
 	/*
 	 * We load all the csum data we need when we submit
@@ -8406,7 +8405,7 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	struct btrfs_dio_private *dip = bio->bi_private;
 	bool write = bio_op(bio) == REQ_OP_WRITE;
-	int ret;
+	blk_status_t ret;
 
 	if (async_submit)
 		async_submit = !atomic_read(&BTRFS_I(inode)->sync_writers);
@@ -8649,7 +8648,7 @@ free_ordered:
 	 * callbacks - they require an allocated dip and a clone of dio_bio.
 	 */
 	if (io_bio && dip) {
-		io_bio->bi_error = -EIO;
+		io_bio->bi_status = BLK_STS_IOERR;
 		bio_endio(io_bio);
 		/*
 		 * The end io callbacks free our dip, do the final put on io_bio
@@ -8668,7 +8667,7 @@ free_ordered:
 			unlock_extent(&BTRFS_I(inode)->io_tree, file_offset,
 			      file_offset + dio_bio->bi_iter.bi_size - 1);
 
-		dio_bio->bi_error = -EIO;
+		dio_bio->bi_status = BLK_STS_IOERR;
 		/*
 		 * Releases and cleans up our dio_bio, no need to bio_put()
 		 * nor bio_endio()/bio_io_error() against dio_bio.
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index d8ea0eb76325..f3d30d9ea8f9 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -871,7 +871,7 @@ static void free_raid_bio(struct btrfs_raid_bio *rbio)
  * this frees the rbio and runs through all the bios in the
  * bio_list and calls end_io on them
  */
-static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, int err)
+static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, blk_status_t err)
 {
 	struct bio *cur = bio_list_get(&rbio->bio_list);
 	struct bio *next;
@@ -884,7 +884,7 @@ static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, int err)
 	while (cur) {
 		next = cur->bi_next;
 		cur->bi_next = NULL;
-		cur->bi_error = err;
+		cur->bi_status = err;
 		bio_endio(cur);
 		cur = next;
 	}
@@ -897,7 +897,7 @@ static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, int err)
 static void raid_write_end_io(struct bio *bio)
 {
 	struct btrfs_raid_bio *rbio = bio->bi_private;
-	int err = bio->bi_error;
+	blk_status_t err = bio->bi_status;
 	int max_errors;
 
 	if (err)
@@ -914,7 +914,7 @@ static void raid_write_end_io(struct bio *bio)
 	max_errors = (rbio->operation == BTRFS_RBIO_PARITY_SCRUB) ?
 		     0 : rbio->bbio->max_errors;
 	if (atomic_read(&rbio->error) > max_errors)
-		err = -EIO;
+		err = BLK_STS_IOERR;
 
 	rbio_orig_end_io(rbio, err);
 }
@@ -1092,7 +1092,7 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
 		 * devices or if they are not contiguous
 		 */
 		if (last_end == disk_start && stripe->dev->bdev &&
-		    !last->bi_error &&
+		    !last->bi_status &&
 		    last->bi_bdev == stripe->dev->bdev) {
 			ret = bio_add_page(last, page, PAGE_SIZE, 0);
 			if (ret == PAGE_SIZE)
@@ -1448,7 +1448,7 @@ static void raid_rmw_end_io(struct bio *bio)
 {
 	struct btrfs_raid_bio *rbio = bio->bi_private;
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		fail_bio_stripe(rbio, bio);
 	else
 		set_bio_pages_uptodate(bio);
@@ -1991,7 +1991,7 @@ static void raid_recover_end_io(struct bio *bio)
 	 * we only read stripe pages off the disk, set them
 	 * up to date if there were no errors
 	 */
-	if (bio->bi_error)
+	if (bio->bi_status)
 		fail_bio_stripe(rbio, bio);
 	else
 		set_bio_pages_uptodate(bio);
@@ -2530,7 +2530,7 @@ static void raid56_parity_scrub_end_io(struct bio *bio)
 {
 	struct btrfs_raid_bio *rbio = bio->bi_private;
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		fail_bio_stripe(rbio, bio);
 	else
 		set_bio_pages_uptodate(bio);
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index c7b45eb2403d..ba5595d19de1 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -95,7 +95,7 @@ struct scrub_bio {
 	struct scrub_ctx	*sctx;
 	struct btrfs_device	*dev;
 	struct bio		*bio;
-	int			err;
+	blk_status_t		status;
 	u64			logical;
 	u64			physical;
 #if SCRUB_PAGES_PER_WR_BIO >= SCRUB_PAGES_PER_RD_BIO
@@ -1668,14 +1668,14 @@ leave_nomem:
 
 struct scrub_bio_ret {
 	struct completion event;
-	int error;
+	blk_status_t status;
 };
 
 static void scrub_bio_wait_endio(struct bio *bio)
 {
 	struct scrub_bio_ret *ret = bio->bi_private;
 
-	ret->error = bio->bi_error;
+	ret->status = bio->bi_status;
 	complete(&ret->event);
 }
 
@@ -1693,7 +1693,7 @@ static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
 	int ret;
 
 	init_completion(&done.event);
-	done.error = 0;
+	done.status = 0;
 	bio->bi_iter.bi_sector = page->logical >> 9;
 	bio->bi_private = &done;
 	bio->bi_end_io = scrub_bio_wait_endio;
@@ -1705,7 +1705,7 @@ static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
 		return ret;
 
 	wait_for_completion(&done.event);
-	if (done.error)
+	if (done.status)
 		return -EIO;
 
 	return 0;
@@ -1937,7 +1937,7 @@ again:
 		bio->bi_bdev = sbio->dev->bdev;
 		bio->bi_iter.bi_sector = sbio->physical >> 9;
 		bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
-		sbio->err = 0;
+		sbio->status = 0;
 	} else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
 		   spage->physical_for_dev_replace ||
 		   sbio->logical + sbio->page_count * PAGE_SIZE !=
@@ -1992,7 +1992,7 @@ static void scrub_wr_bio_end_io(struct bio *bio)
 	struct scrub_bio *sbio = bio->bi_private;
 	struct btrfs_fs_info *fs_info = sbio->dev->fs_info;
 
-	sbio->err = bio->bi_error;
+	sbio->status = bio->bi_status;
 	sbio->bio = bio;
 
 	btrfs_init_work(&sbio->work, btrfs_scrubwrc_helper,
@@ -2007,7 +2007,7 @@ static void scrub_wr_bio_end_io_worker(struct btrfs_work *work)
 	int i;
 
 	WARN_ON(sbio->page_count > SCRUB_PAGES_PER_WR_BIO);
-	if (sbio->err) {
+	if (sbio->status) {
 		struct btrfs_dev_replace *dev_replace =
 			&sbio->sctx->fs_info->dev_replace;
 
@@ -2341,7 +2341,7 @@ again:
 		bio->bi_bdev = sbio->dev->bdev;
 		bio->bi_iter.bi_sector = sbio->physical >> 9;
 		bio_set_op_attrs(bio, REQ_OP_READ, 0);
-		sbio->err = 0;
+		sbio->status = 0;
 	} else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
 		   spage->physical ||
 		   sbio->logical + sbio->page_count * PAGE_SIZE !=
@@ -2377,7 +2377,7 @@ static void scrub_missing_raid56_end_io(struct bio *bio)
 	struct scrub_block *sblock = bio->bi_private;
 	struct btrfs_fs_info *fs_info = sblock->sctx->fs_info;
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		sblock->no_io_error_seen = 0;
 
 	bio_put(bio);
@@ -2588,7 +2588,7 @@ static void scrub_bio_end_io(struct bio *bio)
 	struct scrub_bio *sbio = bio->bi_private;
 	struct btrfs_fs_info *fs_info = sbio->dev->fs_info;
 
-	sbio->err = bio->bi_error;
+	sbio->status = bio->bi_status;
 	sbio->bio = bio;
 
 	btrfs_queue_work(fs_info->scrub_workers, &sbio->work);
@@ -2601,7 +2601,7 @@ static void scrub_bio_end_io_worker(struct btrfs_work *work)
 	int i;
 
 	BUG_ON(sbio->page_count > SCRUB_PAGES_PER_RD_BIO);
-	if (sbio->err) {
+	if (sbio->status) {
 		for (i = 0; i < sbio->page_count; i++) {
 			struct scrub_page *spage = sbio->pagev[i];
 
@@ -3004,7 +3004,7 @@ static void scrub_parity_bio_endio(struct bio *bio)
 	struct scrub_parity *sparity = (struct scrub_parity *)bio->bi_private;
 	struct btrfs_fs_info *fs_info = sparity->sctx->fs_info;
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
 			  sparity->nsectors);
 
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 017b67daa3bb..84a495967e0a 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6042,9 +6042,10 @@ static void btrfs_end_bio(struct bio *bio)
 	struct btrfs_bio *bbio = bio->bi_private;
 	int is_orig_bio = 0;
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		atomic_inc(&bbio->error);
-		if (bio->bi_error == -EIO || bio->bi_error == -EREMOTEIO) {
+		if (bio->bi_status == BLK_STS_IOERR ||
+		    bio->bi_status == BLK_STS_TARGET) {
 			unsigned int stripe_index =
 				btrfs_io_bio(bio)->stripe_index;
 			struct btrfs_device *dev;
@@ -6082,13 +6083,13 @@ static void btrfs_end_bio(struct bio *bio)
 		 * beyond the tolerance of the btrfs bio
 		 */
 		if (atomic_read(&bbio->error) > bbio->max_errors) {
-			bio->bi_error = -EIO;
+			bio->bi_status = BLK_STS_IOERR;
 		} else {
 			/*
 			 * this bio is actually up to date, we didn't
 			 * go over the max number of errors
 			 */
-			bio->bi_error = 0;
+			bio->bi_status = 0;
 		}
 
 		btrfs_end_bbio(bbio, bio);
@@ -6199,7 +6200,7 @@ static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical)
 
 		btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
 		bio->bi_iter.bi_sector = logical >> 9;
-		bio->bi_error = -EIO;
+		bio->bi_status = BLK_STS_IOERR;
 		btrfs_end_bbio(bbio, bio);
 	}
 }
diff --git a/fs/buffer.c b/fs/buffer.c
index 161be58c5cb0..306b720f7383 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -3038,7 +3038,7 @@ static void end_bio_bh_io_sync(struct bio *bio)
 	if (unlikely(bio_flagged(bio, BIO_QUIET)))
 		set_bit(BH_Quiet, &bh->b_state);
 
-	bh->b_end_io(bh, !bio->bi_error);
+	bh->b_end_io(bh, !bio->bi_status);
 	bio_put(bio);
 }
 
diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c
index a409a84f1bca..6181e9526860 100644
--- a/fs/crypto/bio.c
+++ b/fs/crypto/bio.c
@@ -129,7 +129,7 @@ int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk,
 			goto errout;
 		}
 		err = submit_bio_wait(bio);
-		if ((err == 0) && bio->bi_error)
+		if (err == 0 && bio->bi_status)
 			err = -EIO;
 		bio_put(bio);
 		if (err)
diff --git a/fs/direct-io.c b/fs/direct-io.c
index bb711e4b86c2..e8baaabebf13 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -294,7 +294,7 @@ static void dio_aio_complete_work(struct work_struct *work)
 	dio_complete(dio, 0, true);
 }
 
-static int dio_bio_complete(struct dio *dio, struct bio *bio);
+static blk_status_t dio_bio_complete(struct dio *dio, struct bio *bio);
 
 /*
  * Asynchronous IO callback. 
@@ -473,11 +473,11 @@ static struct bio *dio_await_one(struct dio *dio)
 /*
  * Process one completed BIO.  No locks are held.
  */
-static int dio_bio_complete(struct dio *dio, struct bio *bio)
+static blk_status_t dio_bio_complete(struct dio *dio, struct bio *bio)
 {
 	struct bio_vec *bvec;
 	unsigned i;
-	int err = bio->bi_error;
+	blk_status_t err = bio->bi_status;
 
 	if (err)
 		dio->io_error = -EIO;
@@ -536,7 +536,7 @@ static inline int dio_bio_reap(struct dio *dio, struct dio_submit *sdio)
 			bio = dio->bio_list;
 			dio->bio_list = bio->bi_private;
 			spin_unlock_irqrestore(&dio->bio_lock, flags);
-			ret2 = dio_bio_complete(dio, bio);
+			ret2 = blk_status_to_errno(dio_bio_complete(dio, bio));
 			if (ret == 0)
 				ret = ret2;
 		}
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 1a82138ba739..930ca0fc9a0f 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -85,7 +85,7 @@ static void ext4_finish_bio(struct bio *bio)
 		}
 #endif
 
-		if (bio->bi_error) {
+		if (bio->bi_status) {
 			SetPageError(page);
 			mapping_set_error(page->mapping, -EIO);
 		}
@@ -104,7 +104,7 @@ static void ext4_finish_bio(struct bio *bio)
 				continue;
 			}
 			clear_buffer_async_write(bh);
-			if (bio->bi_error)
+			if (bio->bi_status)
 				buffer_io_error(bh);
 		} while ((bh = bh->b_this_page) != head);
 		bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
@@ -303,24 +303,25 @@ static void ext4_end_bio(struct bio *bio)
 		      bdevname(bio->bi_bdev, b),
 		      (long long) bio->bi_iter.bi_sector,
 		      (unsigned) bio_sectors(bio),
-		      bio->bi_error)) {
+		      bio->bi_status)) {
 		ext4_finish_bio(bio);
 		bio_put(bio);
 		return;
 	}
 	bio->bi_end_io = NULL;
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		struct inode *inode = io_end->inode;
 
 		ext4_warning(inode->i_sb, "I/O error %d writing to inode %lu "
 			     "(offset %llu size %ld starting block %llu)",
-			     bio->bi_error, inode->i_ino,
+			     bio->bi_status, inode->i_ino,
 			     (unsigned long long) io_end->offset,
 			     (long) io_end->size,
 			     (unsigned long long)
 			     bi_sector >> (inode->i_blkbits - 9));
-		mapping_set_error(inode->i_mapping, bio->bi_error);
+		mapping_set_error(inode->i_mapping,
+				blk_status_to_errno(bio->bi_status));
 	}
 
 	if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index a81b829d56de..40a5497b0f60 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -73,7 +73,7 @@ static void mpage_end_io(struct bio *bio)
 	int i;
 
 	if (ext4_bio_encrypted(bio)) {
-		if (bio->bi_error) {
+		if (bio->bi_status) {
 			fscrypt_release_ctx(bio->bi_private);
 		} else {
 			fscrypt_decrypt_bio_pages(bio->bi_private, bio);
@@ -83,7 +83,7 @@ static void mpage_end_io(struct bio *bio)
 	bio_for_each_segment_all(bv, bio, i) {
 		struct page *page = bv->bv_page;
 
-		if (!bio->bi_error) {
+		if (!bio->bi_status) {
 			SetPageUptodate(page);
 		} else {
 			ClearPageUptodate(page);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 7c0f6bdf817d..36fe82012a33 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -58,12 +58,12 @@ static void f2fs_read_end_io(struct bio *bio)
 #ifdef CONFIG_F2FS_FAULT_INJECTION
 	if (time_to_inject(F2FS_P_SB(bio->bi_io_vec->bv_page), FAULT_IO)) {
 		f2fs_show_injection_info(FAULT_IO);
-		bio->bi_error = -EIO;
+		bio->bi_status = BLK_STS_IOERR;
 	}
 #endif
 
 	if (f2fs_bio_encrypted(bio)) {
-		if (bio->bi_error) {
+		if (bio->bi_status) {
 			fscrypt_release_ctx(bio->bi_private);
 		} else {
 			fscrypt_decrypt_bio_pages(bio->bi_private, bio);
@@ -74,7 +74,7 @@ static void f2fs_read_end_io(struct bio *bio)
 	bio_for_each_segment_all(bvec, bio, i) {
 		struct page *page = bvec->bv_page;
 
-		if (!bio->bi_error) {
+		if (!bio->bi_status) {
 			if (!PageUptodate(page))
 				SetPageUptodate(page);
 		} else {
@@ -102,14 +102,14 @@ static void f2fs_write_end_io(struct bio *bio)
 			unlock_page(page);
 			mempool_free(page, sbi->write_io_dummy);
 
-			if (unlikely(bio->bi_error))
+			if (unlikely(bio->bi_status))
 				f2fs_stop_checkpoint(sbi, true);
 			continue;
 		}
 
 		fscrypt_pullback_bio_page(&page, true);
 
-		if (unlikely(bio->bi_error)) {
+		if (unlikely(bio->bi_status)) {
 			mapping_set_error(page->mapping, -EIO);
 			f2fs_stop_checkpoint(sbi, true);
 		}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 96845854e7ee..ea9f455d94ba 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -749,7 +749,7 @@ static void f2fs_submit_discard_endio(struct bio *bio)
 {
 	struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
 
-	dc->error = bio->bi_error;
+	dc->error = blk_status_to_errno(bio->bi_status);
 	dc->state = D_DONE;
 	complete(&dc->wait);
 	bio_put(bio);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 13ebf15a4db0..885d36e7a29f 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -170,7 +170,7 @@ static u64 gfs2_log_bmap(struct gfs2_sbd *sdp)
  */
 
 static void gfs2_end_log_write_bh(struct gfs2_sbd *sdp, struct bio_vec *bvec,
-				  int error)
+				  blk_status_t error)
 {
 	struct buffer_head *bh, *next;
 	struct page *page = bvec->bv_page;
@@ -209,13 +209,13 @@ static void gfs2_end_log_write(struct bio *bio)
 	struct page *page;
 	int i;
 
-	if (bio->bi_error)
-		fs_err(sdp, "Error %d writing to log\n", bio->bi_error);
+	if (bio->bi_status)
+		fs_err(sdp, "Error %d writing to log\n", bio->bi_status);
 
 	bio_for_each_segment_all(bvec, bio, i) {
 		page = bvec->bv_page;
 		if (page_has_buffers(page))
-			gfs2_end_log_write_bh(sdp, bvec, bio->bi_error);
+			gfs2_end_log_write_bh(sdp, bvec, bio->bi_status);
 		else
 			mempool_free(page, gfs2_page_pool);
 	}
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 663ffc135ef3..fabe1614f879 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -201,7 +201,7 @@ static void gfs2_meta_read_endio(struct bio *bio)
 		do {
 			struct buffer_head *next = bh->b_this_page;
 			len -= bh->b_size;
-			bh->b_end_io(bh, !bio->bi_error);
+			bh->b_end_io(bh, !bio->bi_status);
 			bh = next;
 		} while (bh && len);
 	}
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index ed67548b286c..83953cdbbc6c 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -176,10 +176,10 @@ static void end_bio_io_page(struct bio *bio)
 {
 	struct page *page = bio->bi_private;
 
-	if (!bio->bi_error)
+	if (!bio->bi_status)
 		SetPageUptodate(page);
 	else
-		pr_warn("error %d reading superblock\n", bio->bi_error);
+		pr_warn("error %d reading superblock\n", bio->bi_status);
 	unlock_page(page);
 }
 
diff --git a/fs/iomap.c b/fs/iomap.c
index 4b10892967a5..18f2f2b8ba2c 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -672,8 +672,8 @@ static void iomap_dio_bio_end_io(struct bio *bio)
 	struct iomap_dio *dio = bio->bi_private;
 	bool should_dirty = (dio->flags & IOMAP_DIO_DIRTY);
 
-	if (bio->bi_error)
-		iomap_dio_set_error(dio, bio->bi_error);
+	if (bio->bi_status)
+		iomap_dio_set_error(dio, blk_status_to_errno(bio->bi_status));
 
 	if (atomic_dec_and_test(&dio->ref)) {
 		if (is_sync_kiocb(dio->iocb)) {
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index bb1da1feafeb..a21f0e9eecd4 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -2205,7 +2205,7 @@ static void lbmIODone(struct bio *bio)
 
 	bp->l_flag |= lbmDONE;
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		bp->l_flag |= lbmERROR;
 
 		jfs_err("lbmIODone: I/O error in JFS log");
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 489aaa1403e5..ce93db3aef3c 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -280,7 +280,7 @@ static void metapage_read_end_io(struct bio *bio)
 {
 	struct page *page = bio->bi_private;
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		printk(KERN_ERR "metapage_read_end_io: I/O error\n");
 		SetPageError(page);
 	}
@@ -337,7 +337,7 @@ static void metapage_write_end_io(struct bio *bio)
 
 	BUG_ON(!PagePrivate(page));
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		printk(KERN_ERR "metapage_write_end_io: I/O error\n");
 		SetPageError(page);
 	}
diff --git a/fs/mpage.c b/fs/mpage.c
index baff8f820c29..9524fdde00c2 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -50,7 +50,8 @@ static void mpage_end_io(struct bio *bio)
 
 	bio_for_each_segment_all(bv, bio, i) {
 		struct page *page = bv->bv_page;
-		page_endio(page, op_is_write(bio_op(bio)), bio->bi_error);
+		page_endio(page, op_is_write(bio_op(bio)),
+				blk_status_to_errno(bio->bi_status));
 	}
 
 	bio_put(bio);
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 0ca370d23ddb..d8863a804b15 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -188,7 +188,7 @@ static void bl_end_io_read(struct bio *bio)
 {
 	struct parallel_io *par = bio->bi_private;
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		struct nfs_pgio_header *header = par->data;
 
 		if (!header->pnfs_error)
@@ -319,7 +319,7 @@ static void bl_end_io_write(struct bio *bio)
 	struct parallel_io *par = bio->bi_private;
 	struct nfs_pgio_header *header = par->data;
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		if (!header->pnfs_error)
 			header->pnfs_error = -EIO;
 		pnfs_set_lo_fail(header->lseg);
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index 6f87b2ac1aeb..e73c86d9855c 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -338,7 +338,7 @@ static void nilfs_end_bio_write(struct bio *bio)
 {
 	struct nilfs_segment_buffer *segbuf = bio->bi_private;
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		atomic_inc(&segbuf->sb_err);
 
 	bio_put(bio);
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 0da0332725aa..ffe003982d95 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -516,9 +516,9 @@ static void o2hb_bio_end_io(struct bio *bio)
 {
 	struct o2hb_bio_wait_ctxt *wc = bio->bi_private;
 
-	if (bio->bi_error) {
-		mlog(ML_ERROR, "IO Error %d\n", bio->bi_error);
-		wc->wc_error = bio->bi_error;
+	if (bio->bi_status) {
+		mlog(ML_ERROR, "IO Error %d\n", bio->bi_status);
+		wc->wc_error = blk_status_to_errno(bio->bi_status);
 	}
 
 	o2hb_bio_wait_dec(wc, 1);
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 09af0f7cd55e..76b6f988e2fa 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -276,7 +276,7 @@ xfs_end_io(
 	struct xfs_inode	*ip = XFS_I(ioend->io_inode);
 	xfs_off_t		offset = ioend->io_offset;
 	size_t			size = ioend->io_size;
-	int			error = ioend->io_bio->bi_error;
+	int			error;
 
 	/*
 	 * Just clean up the in-memory strutures if the fs has been shut down.
@@ -289,6 +289,7 @@ xfs_end_io(
 	/*
 	 * Clean up any COW blocks on an I/O error.
 	 */
+	error = blk_status_to_errno(ioend->io_bio->bi_status);
 	if (unlikely(error)) {
 		switch (ioend->io_type) {
 		case XFS_IO_COW:
@@ -332,7 +333,7 @@ xfs_end_bio(
 	else if (ioend->io_append_trans)
 		queue_work(mp->m_data_workqueue, &ioend->io_work);
 	else
-		xfs_destroy_ioend(ioend, bio->bi_error);
+		xfs_destroy_ioend(ioend, blk_status_to_errno(bio->bi_status));
 }
 
 STATIC int
@@ -500,7 +501,7 @@ xfs_submit_ioend(
 	 * time.
 	 */
 	if (status) {
-		ioend->io_bio->bi_error = status;
+		ioend->io_bio->bi_status = errno_to_blk_status(status);
 		bio_endio(ioend->io_bio);
 		return status;
 	}
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 62fa39276a24..15c7a484a5d2 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1213,8 +1213,11 @@ xfs_buf_bio_end_io(
 	 * don't overwrite existing errors - otherwise we can lose errors on
 	 * buffers that require multiple bios to complete.
 	 */
-	if (bio->bi_error)
-		cmpxchg(&bp->b_io_error, 0, bio->bi_error);
+	if (bio->bi_status) {
+		int error = blk_status_to_errno(bio->bi_status);
+
+		cmpxchg(&bp->b_io_error, 0, error);
+	}
 
 	if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
 		invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
diff --git a/include/linux/bio.h b/include/linux/bio.h
index d1b04b0e99cf..9455aada1399 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -414,7 +414,7 @@ extern void bio_endio(struct bio *);
 
 static inline void bio_io_error(struct bio *bio)
 {
-	bio->bi_error = -EIO;
+	bio->bi_status = BLK_STS_IOERR;
 	bio_endio(bio);
 }
 
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 59378939a8cd..dcd45b15a3a5 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -33,6 +33,9 @@ typedef u8 __bitwise blk_status_t;
 #define BLK_STS_RESOURCE	((__force blk_status_t)9)
 #define BLK_STS_IOERR		((__force blk_status_t)10)
 
+/* hack for device mapper, don't use elsewhere: */
+#define BLK_STS_DM_REQUEUE    ((__force blk_status_t)11)
+
 struct blk_issue_stat {
 	u64 stat;
 };
@@ -44,7 +47,7 @@ struct blk_issue_stat {
 struct bio {
 	struct bio		*bi_next;	/* request queue link */
 	struct block_device	*bi_bdev;
-	int			bi_error;
+	blk_status_t		bi_status;
 	unsigned int		bi_opf;		/* bottom bits req flags,
 						 * top bits REQ_OP. Use
 						 * accessors.
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 2a8871638453..76b6df862a12 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1782,7 +1782,7 @@ struct blk_integrity_iter {
 	const char		*disk_name;
 };
 
-typedef int (integrity_processing_fn) (struct blk_integrity_iter *);
+typedef blk_status_t (integrity_processing_fn) (struct blk_integrity_iter *);
 
 struct blk_integrity_profile {
 	integrity_processing_fn		*generate_fn;
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 5de5c53251ec..456da5017b32 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -72,7 +72,7 @@ typedef void (*dm_release_clone_request_fn) (struct request *clone);
  * 2   : The target wants to push back the io
  */
 typedef int (*dm_endio_fn) (struct dm_target *ti,
-			    struct bio *bio, int *error);
+			    struct bio *bio, blk_status_t *error);
 typedef int (*dm_request_endio_fn) (struct dm_target *ti,
 				    struct request *clone, blk_status_t error,
 				    union map_info *map_context);
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index f80fd33639e0..57d22571f306 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -225,14 +225,14 @@ static struct block_device *hib_resume_bdev;
 struct hib_bio_batch {
 	atomic_t		count;
 	wait_queue_head_t	wait;
-	int			error;
+	blk_status_t		error;
 };
 
 static void hib_init_batch(struct hib_bio_batch *hb)
 {
 	atomic_set(&hb->count, 0);
 	init_waitqueue_head(&hb->wait);
-	hb->error = 0;
+	hb->error = BLK_STS_OK;
 }
 
 static void hib_end_io(struct bio *bio)
@@ -240,7 +240,7 @@ static void hib_end_io(struct bio *bio)
 	struct hib_bio_batch *hb = bio->bi_private;
 	struct page *page = bio->bi_io_vec[0].bv_page;
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		printk(KERN_ALERT "Read-error on swap-device (%u:%u:%Lu)\n",
 				imajor(bio->bi_bdev->bd_inode),
 				iminor(bio->bi_bdev->bd_inode),
@@ -253,8 +253,8 @@ static void hib_end_io(struct bio *bio)
 		flush_icache_range((unsigned long)page_address(page),
 				   (unsigned long)page_address(page) + PAGE_SIZE);
 
-	if (bio->bi_error && !hb->error)
-		hb->error = bio->bi_error;
+	if (bio->bi_status && !hb->error)
+		hb->error = bio->bi_status;
 	if (atomic_dec_and_test(&hb->count))
 		wake_up(&hb->wait);
 
@@ -293,10 +293,10 @@ static int hib_submit_io(int op, int op_flags, pgoff_t page_off, void *addr,
 	return error;
 }
 
-static int hib_wait_io(struct hib_bio_batch *hb)
+static blk_status_t hib_wait_io(struct hib_bio_batch *hb)
 {
 	wait_event(hb->wait, atomic_read(&hb->count) == 0);
-	return hb->error;
+	return blk_status_to_errno(hb->error);
 }
 
 /*
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 193c5f5e3f79..bc364f86100a 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -867,7 +867,7 @@ static void blk_add_trace_split(void *ignore,
 
 		__blk_add_trace(bt, bio->bi_iter.bi_sector,
 				bio->bi_iter.bi_size, bio_op(bio), bio->bi_opf,
-				BLK_TA_SPLIT, bio->bi_error, sizeof(rpdu),
+				BLK_TA_SPLIT, bio->bi_status, sizeof(rpdu),
 				&rpdu);
 	}
 }
@@ -900,7 +900,7 @@ static void blk_add_trace_bio_remap(void *ignore,
 	r.sector_from = cpu_to_be64(from);
 
 	__blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,
-			bio_op(bio), bio->bi_opf, BLK_TA_REMAP, bio->bi_error,
+			bio_op(bio), bio->bi_opf, BLK_TA_REMAP, bio->bi_status,
 			sizeof(r), &r);
 }
 
diff --git a/mm/page_io.c b/mm/page_io.c
index 23f6d0d3470f..2da71e627812 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -45,7 +45,7 @@ void end_swap_bio_write(struct bio *bio)
 {
 	struct page *page = bio->bi_io_vec[0].bv_page;
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		SetPageError(page);
 		/*
 		 * We failed to write the page out to swap-space.
@@ -118,7 +118,7 @@ static void end_swap_bio_read(struct bio *bio)
 {
 	struct page *page = bio->bi_io_vec[0].bv_page;
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		SetPageError(page);
 		ClearPageUptodate(page);
 		pr_alert("Read-error on swap-device (%u:%u:%llu)\n",
-- 
cgit v1.2.3


From 0aed55af88345b5d673240f90e671d79662fb01e Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 29 May 2017 12:22:50 -0700
Subject: x86, uaccess: introduce copy_from_iter_flushcache for pmem /
 cache-bypass operations

The pmem driver has a need to transfer data with a persistent memory
destination and be able to rely on the fact that the destination writes are not
cached. It is sufficient for the writes to be flushed to a cpu-store-buffer
(non-temporal / "movnt" in x86 terms), as we expect userspace to call fsync()
to ensure data-writes have reached a power-fail-safe zone in the platform. The
fsync() triggers a REQ_FUA or REQ_FLUSH to the pmem driver which will turn
around and fence previous writes with an "sfence".

Implement a __copy_from_user_inatomic_flushcache, memcpy_page_flushcache, and
memcpy_flushcache, that guarantee that the destination buffer is not dirty in
the cpu cache on completion. The new copy_from_iter_flushcache and sub-routines
will be used to replace the "pmem api" (include/linux/pmem.h +
arch/x86/include/asm/pmem.h). The availability of copy_from_iter_flushcache()
and memcpy_flushcache() are gated by the CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
config symbol, and fallback to copy_from_iter_nocache() and plain memcpy()
otherwise.

This is meant to satisfy the concern from Linus that if a driver wants to do
something beyond the normal nocache semantics it should be something private to
that driver [1], and Al's concern that anything uaccess related belongs with
the rest of the uaccess code [2].

The first consumer of this interface is a new 'copy_from_iter' dax operation so
that pmem can inject cache maintenance operations without imposing this
overhead on other dax-capable drivers.

[1]: https://lists.01.org/pipermail/linux-nvdimm/2017-January/008364.html
[2]: https://lists.01.org/pipermail/linux-nvdimm/2017-April/009942.html

Cc: <x86@kernel.org>
Cc: Jan Kara <jack@suse.cz>
Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Toshi Kani <toshi.kani@hpe.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 arch/x86/Kconfig                  |   1 +
 arch/x86/include/asm/string_64.h  |   5 ++
 arch/x86/include/asm/uaccess_64.h |  11 ++++
 arch/x86/lib/usercopy_64.c        | 128 ++++++++++++++++++++++++++++++++++++++
 drivers/acpi/nfit/core.c          |   3 +-
 drivers/nvdimm/claim.c            |   2 +-
 drivers/nvdimm/pmem.c             |  13 +++-
 drivers/nvdimm/region_devs.c      |   4 +-
 include/linux/dax.h               |   3 +
 include/linux/string.h            |   6 ++
 include/linux/uio.h               |  15 +++++
 lib/Kconfig                       |   3 +
 lib/iov_iter.c                    |  22 +++++++
 13 files changed, 209 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 4ccfacc7232a..bb273b2f50b5 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -54,6 +54,7 @@ config X86
 	select ARCH_HAS_KCOV			if X86_64
 	select ARCH_HAS_MMIO_FLUSH
 	select ARCH_HAS_PMEM_API		if X86_64
+	select ARCH_HAS_UACCESS_FLUSHCACHE	if X86_64
 	select ARCH_HAS_SET_MEMORY
 	select ARCH_HAS_SG_CHAIN
 	select ARCH_HAS_STRICT_KERNEL_RWX
diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h
index 733bae07fb29..1f22bc277c45 100644
--- a/arch/x86/include/asm/string_64.h
+++ b/arch/x86/include/asm/string_64.h
@@ -109,6 +109,11 @@ memcpy_mcsafe(void *dst, const void *src, size_t cnt)
 	return 0;
 }
 
+#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
+#define __HAVE_ARCH_MEMCPY_FLUSHCACHE 1
+void memcpy_flushcache(void *dst, const void *src, size_t cnt);
+#endif
+
 #endif /* __KERNEL__ */
 
 #endif /* _ASM_X86_STRING_64_H */
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index c5504b9a472e..b16f6a1d8b26 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -171,6 +171,10 @@ unsigned long raw_copy_in_user(void __user *dst, const void __user *src, unsigne
 extern long __copy_user_nocache(void *dst, const void __user *src,
 				unsigned size, int zerorest);
 
+extern long __copy_user_flushcache(void *dst, const void __user *src, unsigned size);
+extern void memcpy_page_flushcache(char *to, struct page *page, size_t offset,
+			   size_t len);
+
 static inline int
 __copy_from_user_inatomic_nocache(void *dst, const void __user *src,
 				  unsigned size)
@@ -179,6 +183,13 @@ __copy_from_user_inatomic_nocache(void *dst, const void __user *src,
 	return __copy_user_nocache(dst, src, size, 0);
 }
 
+static inline int
+__copy_from_user_flushcache(void *dst, const void __user *src, unsigned size)
+{
+	kasan_check_write(dst, size);
+	return __copy_user_flushcache(dst, src, size);
+}
+
 unsigned long
 copy_user_handle_tail(char *to, char *from, unsigned len);
 
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index 3b7c40a2e3e1..f42d2fd86ca3 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -7,6 +7,7 @@
  */
 #include <linux/export.h>
 #include <linux/uaccess.h>
+#include <linux/highmem.h>
 
 /*
  * Zero Userspace
@@ -73,3 +74,130 @@ copy_user_handle_tail(char *to, char *from, unsigned len)
 	clac();
 	return len;
 }
+
+#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
+/**
+ * clean_cache_range - write back a cache range with CLWB
+ * @vaddr:	virtual start address
+ * @size:	number of bytes to write back
+ *
+ * Write back a cache range using the CLWB (cache line write back)
+ * instruction. Note that @size is internally rounded up to be cache
+ * line size aligned.
+ */
+static void clean_cache_range(void *addr, size_t size)
+{
+	u16 x86_clflush_size = boot_cpu_data.x86_clflush_size;
+	unsigned long clflush_mask = x86_clflush_size - 1;
+	void *vend = addr + size;
+	void *p;
+
+	for (p = (void *)((unsigned long)addr & ~clflush_mask);
+	     p < vend; p += x86_clflush_size)
+		clwb(p);
+}
+
+long __copy_user_flushcache(void *dst, const void __user *src, unsigned size)
+{
+	unsigned long flushed, dest = (unsigned long) dst;
+	long rc = __copy_user_nocache(dst, src, size, 0);
+
+	/*
+	 * __copy_user_nocache() uses non-temporal stores for the bulk
+	 * of the transfer, but we need to manually flush if the
+	 * transfer is unaligned. A cached memory copy is used when
+	 * destination or size is not naturally aligned. That is:
+	 *   - Require 8-byte alignment when size is 8 bytes or larger.
+	 *   - Require 4-byte alignment when size is 4 bytes.
+	 */
+	if (size < 8) {
+		if (!IS_ALIGNED(dest, 4) || size != 4)
+			clean_cache_range(dst, 1);
+	} else {
+		if (!IS_ALIGNED(dest, 8)) {
+			dest = ALIGN(dest, boot_cpu_data.x86_clflush_size);
+			clean_cache_range(dst, 1);
+		}
+
+		flushed = dest - (unsigned long) dst;
+		if (size > flushed && !IS_ALIGNED(size - flushed, 8))
+			clean_cache_range(dst + size - 1, 1);
+	}
+
+	return rc;
+}
+
+void memcpy_flushcache(void *_dst, const void *_src, size_t size)
+{
+	unsigned long dest = (unsigned long) _dst;
+	unsigned long source = (unsigned long) _src;
+
+	/* cache copy and flush to align dest */
+	if (!IS_ALIGNED(dest, 8)) {
+		unsigned len = min_t(unsigned, size, ALIGN(dest, 8) - dest);
+
+		memcpy((void *) dest, (void *) source, len);
+		clean_cache_range((void *) dest, len);
+		dest += len;
+		source += len;
+		size -= len;
+		if (!size)
+			return;
+	}
+
+	/* 4x8 movnti loop */
+	while (size >= 32) {
+		asm("movq    (%0), %%r8\n"
+		    "movq   8(%0), %%r9\n"
+		    "movq  16(%0), %%r10\n"
+		    "movq  24(%0), %%r11\n"
+		    "movnti  %%r8,   (%1)\n"
+		    "movnti  %%r9,  8(%1)\n"
+		    "movnti %%r10, 16(%1)\n"
+		    "movnti %%r11, 24(%1)\n"
+		    :: "r" (source), "r" (dest)
+		    : "memory", "r8", "r9", "r10", "r11");
+		dest += 32;
+		source += 32;
+		size -= 32;
+	}
+
+	/* 1x8 movnti loop */
+	while (size >= 8) {
+		asm("movq    (%0), %%r8\n"
+		    "movnti  %%r8,   (%1)\n"
+		    :: "r" (source), "r" (dest)
+		    : "memory", "r8");
+		dest += 8;
+		source += 8;
+		size -= 8;
+	}
+
+	/* 1x4 movnti loop */
+	while (size >= 4) {
+		asm("movl    (%0), %%r8d\n"
+		    "movnti  %%r8d,   (%1)\n"
+		    :: "r" (source), "r" (dest)
+		    : "memory", "r8");
+		dest += 4;
+		source += 4;
+		size -= 4;
+	}
+
+	/* cache copy for remaining bytes */
+	if (size) {
+		memcpy((void *) dest, (void *) source, size);
+		clean_cache_range((void *) dest, size);
+	}
+}
+EXPORT_SYMBOL_GPL(memcpy_flushcache);
+
+void memcpy_page_flushcache(char *to, struct page *page, size_t offset,
+		size_t len)
+{
+	char *from = kmap_atomic(page);
+
+	memcpy_flushcache(to, from + offset, len);
+	kunmap_atomic(from);
+}
+#endif
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index 656acb5d7166..cbd5596e7562 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -1842,8 +1842,7 @@ static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk,
 		}
 
 		if (rw)
-			memcpy_to_pmem(mmio->addr.aperture + offset,
-					iobuf + copied, c);
+			memcpy_flushcache(mmio->addr.aperture + offset, iobuf + copied, c);
 		else {
 			if (nfit_blk->dimm_flags & NFIT_BLK_READ_FLUSH)
 				mmio_flush_range((void __force *)
diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c
index 7ceb5fa4f2a1..b8b9c8ca7862 100644
--- a/drivers/nvdimm/claim.c
+++ b/drivers/nvdimm/claim.c
@@ -277,7 +277,7 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
 			rc = -EIO;
 	}
 
-	memcpy_to_pmem(nsio->addr + offset, buf, size);
+	memcpy_flushcache(nsio->addr + offset, buf, size);
 	nvdimm_flush(to_nd_region(ndns->dev.parent));
 
 	return rc;
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index c544d466ea51..2f3aefe565c6 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -29,6 +29,7 @@
 #include <linux/pfn_t.h>
 #include <linux/slab.h>
 #include <linux/pmem.h>
+#include <linux/uio.h>
 #include <linux/dax.h>
 #include <linux/nd.h>
 #include "pmem.h"
@@ -80,7 +81,7 @@ static void write_pmem(void *pmem_addr, struct page *page,
 {
 	void *mem = kmap_atomic(page);
 
-	memcpy_to_pmem(pmem_addr, mem + off, len);
+	memcpy_flushcache(pmem_addr, mem + off, len);
 	kunmap_atomic(mem);
 }
 
@@ -235,8 +236,15 @@ static long pmem_dax_direct_access(struct dax_device *dax_dev,
 	return __pmem_direct_access(pmem, pgoff, nr_pages, kaddr, pfn);
 }
 
+static size_t pmem_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
+		void *addr, size_t bytes, struct iov_iter *i)
+{
+	return copy_from_iter_flushcache(addr, bytes, i);
+}
+
 static const struct dax_operations pmem_dax_ops = {
 	.direct_access = pmem_dax_direct_access,
+	.copy_from_iter = pmem_copy_from_iter,
 };
 
 static void pmem_release_queue(void *q)
@@ -294,7 +302,8 @@ static int pmem_attach_disk(struct device *dev,
 	dev_set_drvdata(dev, pmem);
 	pmem->phys_addr = res->start;
 	pmem->size = resource_size(res);
-	if (nvdimm_has_flush(nd_region) < 0)
+	if (!IS_ENABLED(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE)
+			|| nvdimm_has_flush(nd_region) < 0)
 		dev_warn(dev, "unable to guarantee persistence of writes\n");
 
 	if (!devm_request_mem_region(dev, res->start, resource_size(res),
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index b550edf2571f..985b0e11bd73 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -1015,8 +1015,8 @@ void nvdimm_flush(struct nd_region *nd_region)
 	 * The first wmb() is needed to 'sfence' all previous writes
 	 * such that they are architecturally visible for the platform
 	 * buffer flush.  Note that we've already arranged for pmem
-	 * writes to avoid the cache via arch_memcpy_to_pmem().  The
-	 * final wmb() ensures ordering for the NVDIMM flush write.
+	 * writes to avoid the cache via memcpy_flushcache().  The final
+	 * wmb() ensures ordering for the NVDIMM flush write.
 	 */
 	wmb();
 	for (i = 0; i < nd_region->ndr_mappings; i++)
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 5ec1f6c47716..bbe79ed90e2b 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -16,6 +16,9 @@ struct dax_operations {
 	 */
 	long (*direct_access)(struct dax_device *, pgoff_t, long,
 			void **, pfn_t *);
+	/* copy_from_iter: dax-driver override for default copy_from_iter */
+	size_t (*copy_from_iter)(struct dax_device *, pgoff_t, void *, size_t,
+			struct iov_iter *);
 };
 
 #if IS_ENABLED(CONFIG_DAX)
diff --git a/include/linux/string.h b/include/linux/string.h
index 537918f8a98e..7439d83eaa33 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -122,6 +122,12 @@ static inline __must_check int memcpy_mcsafe(void *dst, const void *src,
 	return 0;
 }
 #endif
+#ifndef __HAVE_ARCH_MEMCPY_FLUSHCACHE
+static inline void memcpy_flushcache(void *dst, const void *src, size_t cnt)
+{
+	memcpy(dst, src, cnt);
+}
+#endif
 void *memchr_inv(const void *s, int c, size_t n);
 char *strreplace(char *s, char old, char new);
 
diff --git a/include/linux/uio.h b/include/linux/uio.h
index f2d36a3d3005..55cd54a0e941 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -95,6 +95,21 @@ size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i);
 size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i);
 bool copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i);
 size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i);
+#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
+/*
+ * Note, users like pmem that depend on the stricter semantics of
+ * copy_from_iter_flushcache() than copy_from_iter_nocache() must check for
+ * IS_ENABLED(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) before assuming that the
+ * destination is flushed from the cache on return.
+ */
+size_t copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i);
+#else
+static inline size_t copy_from_iter_flushcache(void *addr, size_t bytes,
+				       struct iov_iter *i)
+{
+	return copy_from_iter_nocache(addr, bytes, i);
+}
+#endif
 bool copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i);
 size_t iov_iter_zero(size_t bytes, struct iov_iter *);
 unsigned long iov_iter_alignment(const struct iov_iter *i);
diff --git a/lib/Kconfig b/lib/Kconfig
index 0c8b78a9ae2e..2d1c4b3a085c 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -548,6 +548,9 @@ config ARCH_HAS_SG_CHAIN
 config ARCH_HAS_PMEM_API
 	bool
 
+config ARCH_HAS_UACCESS_FLUSHCACHE
+	bool
+
 config ARCH_HAS_MMIO_FLUSH
 	bool
 
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index f835964c9485..c9a69064462f 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -615,6 +615,28 @@ size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
 }
 EXPORT_SYMBOL(copy_from_iter_nocache);
 
+#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
+size_t copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
+{
+	char *to = addr;
+	if (unlikely(i->type & ITER_PIPE)) {
+		WARN_ON(1);
+		return 0;
+	}
+	iterate_and_advance(i, bytes, v,
+		__copy_from_user_flushcache((to += v.iov_len) - v.iov_len,
+					 v.iov_base, v.iov_len),
+		memcpy_page_flushcache((to += v.bv_len) - v.bv_len, v.bv_page,
+				 v.bv_offset, v.bv_len),
+		memcpy_flushcache((to += v.iov_len) - v.iov_len, v.iov_base,
+			v.iov_len)
+	)
+
+	return bytes;
+}
+EXPORT_SYMBOL_GPL(copy_from_iter_flushcache);
+#endif
+
 bool copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i)
 {
 	char *to = addr;
-- 
cgit v1.2.3


From 7e026c8c0a4200da86bc51edeaad79dcdccf78ca Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 29 May 2017 12:57:56 -0700
Subject: dm: add ->copy_from_iter() dax operation support

Allow device-mapper to route copy_from_iter operations to the
per-target implementation. In order for the device stacking to work we
need a dax_dev and a pgoff relative to that device. This gives each
layer of the stack the information it needs to look up the operation
pointer for the next level.

This conceptually allows for an array of mixed device drivers with
varying copy_from_iter implementations.

Reviewed-by: Toshi Kani <toshi.kani@hpe.com>
Reviewed-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dax/super.c           | 13 +++++++++++++
 drivers/md/dm-linear.c        | 15 +++++++++++++++
 drivers/md/dm-stripe.c        | 20 ++++++++++++++++++++
 drivers/md/dm.c               | 26 ++++++++++++++++++++++++++
 include/linux/dax.h           |  2 ++
 include/linux/device-mapper.h |  3 +++
 6 files changed, 79 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index 6ed32aac8bbe..dd299e55f65d 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -18,6 +18,7 @@
 #include <linux/cdev.h>
 #include <linux/hash.h>
 #include <linux/slab.h>
+#include <linux/uio.h>
 #include <linux/dax.h>
 #include <linux/fs.h>
 
@@ -172,6 +173,18 @@ long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages,
 }
 EXPORT_SYMBOL_GPL(dax_direct_access);
 
+size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
+		size_t bytes, struct iov_iter *i)
+{
+	if (!dax_alive(dax_dev))
+		return 0;
+
+	if (!dax_dev->ops->copy_from_iter)
+		return copy_from_iter(addr, bytes, i);
+	return dax_dev->ops->copy_from_iter(dax_dev, pgoff, addr, bytes, i);
+}
+EXPORT_SYMBOL_GPL(dax_copy_from_iter);
+
 bool dax_alive(struct dax_device *dax_dev)
 {
 	lockdep_assert_held(&dax_srcu);
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index 7d42a9d9f406..0841ec1bfbad 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -159,6 +159,20 @@ static long linear_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
 	return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn);
 }
 
+static size_t linear_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff,
+		void *addr, size_t bytes, struct iov_iter *i)
+{
+	struct linear_c *lc = ti->private;
+	struct block_device *bdev = lc->dev->bdev;
+	struct dax_device *dax_dev = lc->dev->dax_dev;
+	sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
+
+	dev_sector = linear_map_sector(ti, sector);
+	if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(bytes, PAGE_SIZE), &pgoff))
+		return 0;
+	return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i);
+}
+
 static struct target_type linear_target = {
 	.name   = "linear",
 	.version = {1, 3, 0},
@@ -171,6 +185,7 @@ static struct target_type linear_target = {
 	.prepare_ioctl = linear_prepare_ioctl,
 	.iterate_devices = linear_iterate_devices,
 	.direct_access = linear_dax_direct_access,
+	.dax_copy_from_iter = linear_dax_copy_from_iter,
 };
 
 int __init dm_linear_init(void)
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index 75152482f3ad..1ef914f9ca72 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -332,6 +332,25 @@ static long stripe_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
 	return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn);
 }
 
+static size_t stripe_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff,
+		void *addr, size_t bytes, struct iov_iter *i)
+{
+	sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
+	struct stripe_c *sc = ti->private;
+	struct dax_device *dax_dev;
+	struct block_device *bdev;
+	uint32_t stripe;
+
+	stripe_map_sector(sc, sector, &stripe, &dev_sector);
+	dev_sector += sc->stripe[stripe].physical_start;
+	dax_dev = sc->stripe[stripe].dev->dax_dev;
+	bdev = sc->stripe[stripe].dev->bdev;
+
+	if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(bytes, PAGE_SIZE), &pgoff))
+		return 0;
+	return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i);
+}
+
 /*
  * Stripe status:
  *
@@ -451,6 +470,7 @@ static struct target_type stripe_target = {
 	.iterate_devices = stripe_iterate_devices,
 	.io_hints = stripe_io_hints,
 	.direct_access = stripe_dax_direct_access,
+	.dax_copy_from_iter = stripe_dax_copy_from_iter,
 };
 
 int __init dm_stripe_init(void)
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 37ccd73c79ec..7faaceb52819 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -19,6 +19,7 @@
 #include <linux/dax.h>
 #include <linux/slab.h>
 #include <linux/idr.h>
+#include <linux/uio.h>
 #include <linux/hdreg.h>
 #include <linux/delay.h>
 #include <linux/wait.h>
@@ -969,6 +970,30 @@ static long dm_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
 	return ret;
 }
 
+static size_t dm_dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
+		void *addr, size_t bytes, struct iov_iter *i)
+{
+	struct mapped_device *md = dax_get_private(dax_dev);
+	sector_t sector = pgoff * PAGE_SECTORS;
+	struct dm_target *ti;
+	long ret = 0;
+	int srcu_idx;
+
+	ti = dm_dax_get_live_target(md, sector, &srcu_idx);
+
+	if (!ti)
+		goto out;
+	if (!ti->type->dax_copy_from_iter) {
+		ret = copy_from_iter(addr, bytes, i);
+		goto out;
+	}
+	ret = ti->type->dax_copy_from_iter(ti, pgoff, addr, bytes, i);
+ out:
+	dm_put_live_table(md, srcu_idx);
+
+	return ret;
+}
+
 /*
  * A target may call dm_accept_partial_bio only from the map routine.  It is
  * allowed for all bio types except REQ_PREFLUSH.
@@ -2859,6 +2884,7 @@ static const struct block_device_operations dm_blk_dops = {
 
 static const struct dax_operations dm_dax_ops = {
 	.direct_access = dm_dax_direct_access,
+	.copy_from_iter = dm_dax_copy_from_iter,
 };
 
 /*
diff --git a/include/linux/dax.h b/include/linux/dax.h
index bbe79ed90e2b..28e398f8c59e 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -78,6 +78,8 @@ void kill_dax(struct dax_device *dax_dev);
 void *dax_get_private(struct dax_device *dax_dev);
 long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages,
 		void **kaddr, pfn_t *pfn);
+size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
+		size_t bytes, struct iov_iter *i);
 
 /*
  * We use lowest available bit in exceptional entry for locking, one bit for
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index f4c639c0c362..11c8a0a92f9c 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -132,6 +132,8 @@ typedef int (*dm_busy_fn) (struct dm_target *ti);
  */
 typedef long (*dm_dax_direct_access_fn) (struct dm_target *ti, pgoff_t pgoff,
 		long nr_pages, void **kaddr, pfn_t *pfn);
+typedef size_t (*dm_dax_copy_from_iter_fn)(struct dm_target *ti, pgoff_t pgoff,
+		void *addr, size_t bytes, struct iov_iter *i);
 #define PAGE_SECTORS (PAGE_SIZE / 512)
 
 void dm_error(const char *message);
@@ -181,6 +183,7 @@ struct target_type {
 	dm_iterate_devices_fn iterate_devices;
 	dm_io_hints_fn io_hints;
 	dm_dax_direct_access_fn direct_access;
+	dm_dax_copy_from_iter_fn dax_copy_from_iter;
 
 	/* For internal device-mapper use. */
 	struct list_head list;
-- 
cgit v1.2.3


From 3ad7d2468f79fc13215eb941f766a692d34b1381 Mon Sep 17 00:00:00 2001
From: Krister Johansen <kjlx@templeofstupid.com>
Date: Thu, 8 Jun 2017 13:12:14 -0700
Subject: Ipvlan should return an error when an address is already in use.

The ipvlan code already knows how to detect when a duplicate address is
about to be assigned to an ipvlan device.  However, that failure is not
propogated outward and leads to a silent failure.

Introduce a validation step at ip address creation time and allow device
drivers to register to validate the incoming ip addresses.  The ipvlan
code is the first consumer.  If it detects an address in use, we can
return an error to the user before beginning to commit the new ifa in
the networking code.

This can be especially useful if it is necessary to provision many
ipvlans in containers.  The provisioning software (or operator) can use
this to detect situations where an ip address is unexpectedly in use.

Signed-off-by: Krister Johansen <kjlx@templeofstupid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipvlan/ipvlan_main.c | 69 ++++++++++++++++++++++++++++++++++++++++
 include/linux/inetdevice.h       |  7 ++++
 include/net/addrconf.h           | 10 +++++-
 net/ipv4/devinet.c               | 33 +++++++++++++++++++
 net/ipv6/addrconf.c              | 17 +++++++++-
 net/ipv6/addrconf_core.c         | 19 +++++++++++
 6 files changed, 153 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index 618ed88fad0f..e4141d62b5c3 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -824,6 +824,33 @@ static int ipvlan_addr6_event(struct notifier_block *unused,
 	return NOTIFY_OK;
 }
 
+static int ipvlan_addr6_validator_event(struct notifier_block *unused,
+					unsigned long event, void *ptr)
+{
+	struct in6_validator_info *i6vi = (struct in6_validator_info *)ptr;
+	struct net_device *dev = (struct net_device *)i6vi->i6vi_dev->dev;
+	struct ipvl_dev *ipvlan = netdev_priv(dev);
+
+	/* FIXME IPv6 autoconf calls us from bh without RTNL */
+	if (in_softirq())
+		return NOTIFY_DONE;
+
+	if (!netif_is_ipvlan(dev))
+		return NOTIFY_DONE;
+
+	if (!ipvlan || !ipvlan->port)
+		return NOTIFY_DONE;
+
+	switch (event) {
+	case NETDEV_UP:
+		if (ipvlan_addr_busy(ipvlan->port, &i6vi->i6vi_addr, true))
+			return notifier_from_errno(-EADDRINUSE);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
 static int ipvlan_add_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr)
 {
 	if (ipvlan_addr_busy(ipvlan->port, ip4_addr, false)) {
@@ -871,10 +898,37 @@ static int ipvlan_addr4_event(struct notifier_block *unused,
 	return NOTIFY_OK;
 }
 
+static int ipvlan_addr4_validator_event(struct notifier_block *unused,
+					unsigned long event, void *ptr)
+{
+	struct in_validator_info *ivi = (struct in_validator_info *)ptr;
+	struct net_device *dev = (struct net_device *)ivi->ivi_dev->dev;
+	struct ipvl_dev *ipvlan = netdev_priv(dev);
+
+	if (!netif_is_ipvlan(dev))
+		return NOTIFY_DONE;
+
+	if (!ipvlan || !ipvlan->port)
+		return NOTIFY_DONE;
+
+	switch (event) {
+	case NETDEV_UP:
+		if (ipvlan_addr_busy(ipvlan->port, &ivi->ivi_addr, false))
+			return notifier_from_errno(-EADDRINUSE);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
 static struct notifier_block ipvlan_addr4_notifier_block __read_mostly = {
 	.notifier_call = ipvlan_addr4_event,
 };
 
+static struct notifier_block ipvlan_addr4_vtor_notifier_block __read_mostly = {
+	.notifier_call = ipvlan_addr4_validator_event,
+};
+
 static struct notifier_block ipvlan_notifier_block __read_mostly = {
 	.notifier_call = ipvlan_device_event,
 };
@@ -883,6 +937,10 @@ static struct notifier_block ipvlan_addr6_notifier_block __read_mostly = {
 	.notifier_call = ipvlan_addr6_event,
 };
 
+static struct notifier_block ipvlan_addr6_vtor_notifier_block __read_mostly = {
+	.notifier_call = ipvlan_addr6_validator_event,
+};
+
 static void ipvlan_ns_exit(struct net *net)
 {
 	struct ipvlan_netns *vnet = net_generic(net, ipvlan_netid);
@@ -907,7 +965,10 @@ static int __init ipvlan_init_module(void)
 	ipvlan_init_secret();
 	register_netdevice_notifier(&ipvlan_notifier_block);
 	register_inet6addr_notifier(&ipvlan_addr6_notifier_block);
+	register_inet6addr_validator_notifier(
+	    &ipvlan_addr6_vtor_notifier_block);
 	register_inetaddr_notifier(&ipvlan_addr4_notifier_block);
+	register_inetaddr_validator_notifier(&ipvlan_addr4_vtor_notifier_block);
 
 	err = register_pernet_subsys(&ipvlan_net_ops);
 	if (err < 0)
@@ -922,7 +983,11 @@ static int __init ipvlan_init_module(void)
 	return 0;
 error:
 	unregister_inetaddr_notifier(&ipvlan_addr4_notifier_block);
+	unregister_inetaddr_validator_notifier(
+	    &ipvlan_addr4_vtor_notifier_block);
 	unregister_inet6addr_notifier(&ipvlan_addr6_notifier_block);
+	unregister_inet6addr_validator_notifier(
+	    &ipvlan_addr6_vtor_notifier_block);
 	unregister_netdevice_notifier(&ipvlan_notifier_block);
 	return err;
 }
@@ -933,7 +998,11 @@ static void __exit ipvlan_cleanup_module(void)
 	unregister_pernet_subsys(&ipvlan_net_ops);
 	unregister_netdevice_notifier(&ipvlan_notifier_block);
 	unregister_inetaddr_notifier(&ipvlan_addr4_notifier_block);
+	unregister_inetaddr_validator_notifier(
+	    &ipvlan_addr4_vtor_notifier_block);
 	unregister_inet6addr_notifier(&ipvlan_addr6_notifier_block);
+	unregister_inet6addr_validator_notifier(
+	    &ipvlan_addr6_vtor_notifier_block);
 }
 
 module_init(ipvlan_init_module);
diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index a2e9d6ea1349..e7c04c4e4bcd 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -150,8 +150,15 @@ struct in_ifaddr {
 	unsigned long		ifa_tstamp; /* updated timestamp */
 };
 
+struct in_validator_info {
+	__be32			ivi_addr;
+	struct in_device	*ivi_dev;
+};
+
 int register_inetaddr_notifier(struct notifier_block *nb);
 int unregister_inetaddr_notifier(struct notifier_block *nb);
+int register_inetaddr_validator_notifier(struct notifier_block *nb);
+int unregister_inetaddr_validator_notifier(struct notifier_block *nb);
 
 void inet_netconf_notify_devconf(struct net *net, int event, int type,
 				 int ifindex, struct ipv4_devconf *devconf);
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index b43a4eec3cec..d0889cb50172 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -48,11 +48,15 @@ struct prefix_info {
 	struct in6_addr		prefix;
 };
 
-
 #include <linux/netdevice.h>
 #include <net/if_inet6.h>
 #include <net/ipv6.h>
 
+struct in6_validator_info {
+	struct in6_addr		i6vi_addr;
+	struct inet6_dev	*i6vi_dev;
+};
+
 #define IN6_ADDR_HSIZE_SHIFT	4
 #define IN6_ADDR_HSIZE		(1 << IN6_ADDR_HSIZE_SHIFT)
 
@@ -278,6 +282,10 @@ int register_inet6addr_notifier(struct notifier_block *nb);
 int unregister_inet6addr_notifier(struct notifier_block *nb);
 int inet6addr_notifier_call_chain(unsigned long val, void *v);
 
+int register_inet6addr_validator_notifier(struct notifier_block *nb);
+int unregister_inet6addr_validator_notifier(struct notifier_block *nb);
+int inet6addr_validator_notifier_call_chain(unsigned long val, void *v);
+
 void inet6_netconf_notify_devconf(struct net *net, int event, int type,
 				  int ifindex, struct ipv6_devconf *devconf);
 
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index df14815a3b8c..a7dd088d5fc9 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -176,6 +176,7 @@ EXPORT_SYMBOL(__ip_dev_find);
 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
 
 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
+static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 			 int destroy);
 #ifdef CONFIG_SYSCTL
@@ -441,6 +442,8 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
 {
 	struct in_device *in_dev = ifa->ifa_dev;
 	struct in_ifaddr *ifa1, **ifap, **last_primary;
+	struct in_validator_info ivi;
+	int ret;
 
 	ASSERT_RTNL();
 
@@ -471,6 +474,23 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
 		}
 	}
 
+	/* Allow any devices that wish to register ifaddr validtors to weigh
+	 * in now, before changes are committed.  The rntl lock is serializing
+	 * access here, so the state should not change between a validator call
+	 * and a final notify on commit.  This isn't invoked on promotion under
+	 * the assumption that validators are checking the address itself, and
+	 * not the flags.
+	 */
+	ivi.ivi_addr = ifa->ifa_address;
+	ivi.ivi_dev = ifa->ifa_dev;
+	ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
+					   NETDEV_UP, &ivi);
+	ret = notifier_to_errno(ret);
+	if (ret) {
+		inet_free_ifa(ifa);
+		return ret;
+	}
+
 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
 		prandom_seed((__force u32) ifa->ifa_local);
 		ifap = last_primary;
@@ -1356,6 +1376,19 @@ int unregister_inetaddr_notifier(struct notifier_block *nb)
 }
 EXPORT_SYMBOL(unregister_inetaddr_notifier);
 
+int register_inetaddr_validator_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
+}
+EXPORT_SYMBOL(register_inetaddr_validator_notifier);
+
+int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
+	    nb);
+}
+EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
+
 /* Rename ifa_labels for a device name change. Make some effort to preserve
  * existing alias numbering and to create unique labels if possible.
 */
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 25443fd946a8..0aa36b093013 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -963,6 +963,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
 	struct net *net = dev_net(idev->dev);
 	struct inet6_ifaddr *ifa = NULL;
 	struct rt6_info *rt;
+	struct in6_validator_info i6vi;
 	unsigned int hash;
 	int err = 0;
 	int addr_type = ipv6_addr_type(addr);
@@ -974,6 +975,9 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
 		return ERR_PTR(-EADDRNOTAVAIL);
 
 	rcu_read_lock_bh();
+
+	in6_dev_hold(idev);
+
 	if (idev->dead) {
 		err = -ENODEV;			/*XXX*/
 		goto out2;
@@ -984,6 +988,17 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
 		goto out2;
 	}
 
+	i6vi.i6vi_addr = *addr;
+	i6vi.i6vi_dev = idev;
+	rcu_read_unlock_bh();
+
+	err = inet6addr_validator_notifier_call_chain(NETDEV_UP, &i6vi);
+
+	rcu_read_lock_bh();
+	err = notifier_to_errno(err);
+	if (err)
+		goto out2;
+
 	spin_lock(&addrconf_hash_lock);
 
 	/* Ignore adding duplicate addresses on an interface */
@@ -1034,7 +1049,6 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
 	ifa->rt = rt;
 
 	ifa->idev = idev;
-	in6_dev_hold(idev);
 	/* For caller */
 	in6_ifa_hold(ifa);
 
@@ -1062,6 +1076,7 @@ out2:
 		inet6addr_notifier_call_chain(NETDEV_UP, ifa);
 	else {
 		kfree(ifa);
+		in6_dev_put(idev);
 		ifa = ERR_PTR(err);
 	}
 
diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
index bfa941fc1165..9e3488d50b15 100644
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c
@@ -88,6 +88,7 @@ int __ipv6_addr_type(const struct in6_addr *addr)
 EXPORT_SYMBOL(__ipv6_addr_type);
 
 static ATOMIC_NOTIFIER_HEAD(inet6addr_chain);
+static ATOMIC_NOTIFIER_HEAD(inet6addr_validator_chain);
 
 int register_inet6addr_notifier(struct notifier_block *nb)
 {
@@ -107,6 +108,24 @@ int inet6addr_notifier_call_chain(unsigned long val, void *v)
 }
 EXPORT_SYMBOL(inet6addr_notifier_call_chain);
 
+int register_inet6addr_validator_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_register(&inet6addr_validator_chain, nb);
+}
+EXPORT_SYMBOL(register_inet6addr_validator_notifier);
+
+int unregister_inet6addr_validator_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_unregister(&inet6addr_validator_chain, nb);
+}
+EXPORT_SYMBOL(unregister_inet6addr_validator_notifier);
+
+int inet6addr_validator_notifier_call_chain(unsigned long val, void *v)
+{
+	return atomic_notifier_call_chain(&inet6addr_validator_chain, val, v);
+}
+EXPORT_SYMBOL(inet6addr_validator_notifier_call_chain);
+
 static int eafnosupport_ipv6_dst_lookup(struct net *net, struct sock *u1,
 					struct dst_entry **u2,
 					struct flowi6 *u3)
-- 
cgit v1.2.3


From 7c7973b2ae277c6e89dceda2246fff2472c8ffdb Mon Sep 17 00:00:00 2001
From: "Mintz, Yuval" <Yuval.Mintz@cavium.com>
Date: Fri, 9 Jun 2017 17:13:18 +0300
Subject: qed: LL2 to use packed information for tx

First step in revising the LL2 interface, this declares
qed_ll2_tx_pkt_info as part of the ll2 interface, and uses it for
transmission instead of receiving lots of parameters.

Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_ll2.c  | 127 +++++++++++++----------------
 drivers/net/ethernet/qlogic/qed/qed_ll2.h  |  41 ++--------
 drivers/net/ethernet/qlogic/qed/qed_roce.c |  18 ++--
 include/linux/qed/qed_ll2_if.h             |  27 ++++++
 4 files changed, 102 insertions(+), 111 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
index f67ed6d39dfd..0b75f5dd25f8 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
@@ -740,12 +740,13 @@ static void
 qed_ooo_submit_tx_buffers(struct qed_hwfn *p_hwfn,
 			  struct qed_ll2_info *p_ll2_conn)
 {
+	struct qed_ll2_tx_pkt_info tx_pkt;
 	struct qed_ooo_buffer *p_buffer;
-	int rc;
 	u16 l4_hdr_offset_w;
 	dma_addr_t first_frag;
 	u16 parse_flags;
 	u8 bd_flags;
+	int rc;
 
 	/* Submit Tx buffers here */
 	while ((p_buffer = qed_ooo_get_ready_buffer(p_hwfn,
@@ -760,13 +761,18 @@ qed_ooo_submit_tx_buffers(struct qed_hwfn *p_hwfn,
 		SET_FIELD(bd_flags, CORE_TX_BD_DATA_FORCE_VLAN_MODE, 1);
 		SET_FIELD(bd_flags, CORE_TX_BD_DATA_L4_PROTOCOL, 1);
 
-		rc = qed_ll2_prepare_tx_packet(p_hwfn, p_ll2_conn->my_id, 1,
-					       p_buffer->vlan, bd_flags,
-					       l4_hdr_offset_w,
-					       p_ll2_conn->conn.tx_dest, 0,
-					       first_frag,
-					       p_buffer->packet_length,
-					       p_buffer, true);
+		memset(&tx_pkt, 0, sizeof(tx_pkt));
+		tx_pkt.num_of_bds = 1;
+		tx_pkt.vlan = p_buffer->vlan;
+		tx_pkt.bd_flags = bd_flags;
+		tx_pkt.l4_hdr_offset_w = l4_hdr_offset_w;
+		tx_pkt.tx_dest = p_ll2_conn->conn.tx_dest;
+		tx_pkt.first_frag = first_frag;
+		tx_pkt.first_frag_len = p_buffer->packet_length;
+		tx_pkt.cookie = p_buffer;
+
+		rc = qed_ll2_prepare_tx_packet(p_hwfn, p_ll2_conn->my_id,
+					       &tx_pkt, true);
 		if (rc) {
 			qed_ooo_put_ready_buffer(p_hwfn, p_hwfn->p_ooo_info,
 						 p_buffer, false);
@@ -1593,20 +1599,18 @@ out:
 static void qed_ll2_prepare_tx_packet_set(struct qed_hwfn *p_hwfn,
 					  struct qed_ll2_tx_queue *p_tx,
 					  struct qed_ll2_tx_packet *p_curp,
-					  u8 num_of_bds,
-					  dma_addr_t first_frag,
-					  u16 first_frag_len, void *p_cookie,
+					  struct qed_ll2_tx_pkt_info *pkt,
 					  u8 notify_fw)
 {
 	list_del(&p_curp->list_entry);
-	p_curp->cookie = p_cookie;
-	p_curp->bd_used = num_of_bds;
+	p_curp->cookie = pkt->cookie;
+	p_curp->bd_used = pkt->num_of_bds;
 	p_curp->notify_fw = notify_fw;
 	p_tx->cur_send_packet = p_curp;
 	p_tx->cur_send_frag_num = 0;
 
-	p_curp->bds_set[p_tx->cur_send_frag_num].tx_frag = first_frag;
-	p_curp->bds_set[p_tx->cur_send_frag_num].frag_len = first_frag_len;
+	p_curp->bds_set[p_tx->cur_send_frag_num].tx_frag = pkt->first_frag;
+	p_curp->bds_set[p_tx->cur_send_frag_num].frag_len = pkt->first_frag_len;
 	p_tx->cur_send_frag_num++;
 }
 
@@ -1614,32 +1618,33 @@ static void
 qed_ll2_prepare_tx_packet_set_bd(struct qed_hwfn *p_hwfn,
 				 struct qed_ll2_info *p_ll2,
 				 struct qed_ll2_tx_packet *p_curp,
-				 u8 num_of_bds,
-				 enum core_tx_dest tx_dest,
-				 u16 vlan,
-				 u8 bd_flags,
-				 u16 l4_hdr_offset_w,
-				 enum core_roce_flavor_type roce_flavor,
-				 dma_addr_t first_frag,
-				 u16 first_frag_len)
+				 struct qed_ll2_tx_pkt_info *pkt)
 {
 	struct qed_chain *p_tx_chain = &p_ll2->tx_queue.txq_chain;
 	u16 prod_idx = qed_chain_get_prod_idx(p_tx_chain);
 	struct core_tx_bd *start_bd = NULL;
+	enum core_roce_flavor_type roce_flavor;
+	enum core_tx_dest tx_dest;
 	u16 bd_data = 0, frag_idx;
 
+	roce_flavor = (pkt->qed_roce_flavor == QED_LL2_ROCE) ? CORE_ROCE
+							     : CORE_RROCE;
+
+	tx_dest = (pkt->tx_dest == QED_LL2_TX_DEST_NW) ? CORE_TX_DEST_NW
+						       : CORE_TX_DEST_LB;
+
 	start_bd = (struct core_tx_bd *)qed_chain_produce(p_tx_chain);
-	start_bd->nw_vlan_or_lb_echo = cpu_to_le16(vlan);
+	start_bd->nw_vlan_or_lb_echo = cpu_to_le16(pkt->vlan);
 	SET_FIELD(start_bd->bitfield1, CORE_TX_BD_L4_HDR_OFFSET_W,
-		  cpu_to_le16(l4_hdr_offset_w));
+		  cpu_to_le16(pkt->l4_hdr_offset_w));
 	SET_FIELD(start_bd->bitfield1, CORE_TX_BD_TX_DST, tx_dest);
-	bd_data |= bd_flags;
+	bd_data |= pkt->bd_flags;
 	SET_FIELD(bd_data, CORE_TX_BD_DATA_START_BD, 0x1);
-	SET_FIELD(bd_data, CORE_TX_BD_DATA_NBDS, num_of_bds);
+	SET_FIELD(bd_data, CORE_TX_BD_DATA_NBDS, pkt->num_of_bds);
 	SET_FIELD(bd_data, CORE_TX_BD_DATA_ROCE_FLAV, roce_flavor);
 	start_bd->bd_data.as_bitfield = cpu_to_le16(bd_data);
-	DMA_REGPAIR_LE(start_bd->addr, first_frag);
-	start_bd->nbytes = cpu_to_le16(first_frag_len);
+	DMA_REGPAIR_LE(start_bd->addr, pkt->first_frag);
+	start_bd->nbytes = cpu_to_le16(pkt->first_frag_len);
 
 	DP_VERBOSE(p_hwfn,
 		   (NETIF_MSG_TX_QUEUED | QED_MSG_LL2),
@@ -1648,17 +1653,17 @@ qed_ll2_prepare_tx_packet_set_bd(struct qed_hwfn *p_hwfn,
 		   p_ll2->cid,
 		   p_ll2->conn.conn_type,
 		   prod_idx,
-		   first_frag_len,
-		   num_of_bds,
+		   pkt->first_frag_len,
+		   pkt->num_of_bds,
 		   le32_to_cpu(start_bd->addr.hi),
 		   le32_to_cpu(start_bd->addr.lo));
 
-	if (p_ll2->tx_queue.cur_send_frag_num == num_of_bds)
+	if (p_ll2->tx_queue.cur_send_frag_num == pkt->num_of_bds)
 		return;
 
 	/* Need to provide the packet with additional BDs for frags */
 	for (frag_idx = p_ll2->tx_queue.cur_send_frag_num;
-	     frag_idx < num_of_bds; frag_idx++) {
+	     frag_idx < pkt->num_of_bds; frag_idx++) {
 		struct core_tx_bd **p_bd = &p_curp->bds_set[frag_idx].txq_bd;
 
 		*p_bd = (struct core_tx_bd *)qed_chain_produce(p_tx_chain);
@@ -1726,21 +1731,13 @@ static void qed_ll2_tx_packet_notify(struct qed_hwfn *p_hwfn,
 
 int qed_ll2_prepare_tx_packet(struct qed_hwfn *p_hwfn,
 			      u8 connection_handle,
-			      u8 num_of_bds,
-			      u16 vlan,
-			      u8 bd_flags,
-			      u16 l4_hdr_offset_w,
-			      enum qed_ll2_tx_dest e_tx_dest,
-			      enum qed_ll2_roce_flavor_type qed_roce_flavor,
-			      dma_addr_t first_frag,
-			      u16 first_frag_len, void *cookie, u8 notify_fw)
+			      struct qed_ll2_tx_pkt_info *pkt,
+			      bool notify_fw)
 {
 	struct qed_ll2_tx_packet *p_curp = NULL;
 	struct qed_ll2_info *p_ll2_conn = NULL;
-	enum core_roce_flavor_type roce_flavor;
 	struct qed_ll2_tx_queue *p_tx;
 	struct qed_chain *p_tx_chain;
-	enum core_tx_dest tx_dest;
 	unsigned long flags;
 	int rc = 0;
 
@@ -1750,7 +1747,7 @@ int qed_ll2_prepare_tx_packet(struct qed_hwfn *p_hwfn,
 	p_tx = &p_ll2_conn->tx_queue;
 	p_tx_chain = &p_tx->txq_chain;
 
-	if (num_of_bds > CORE_LL2_TX_MAX_BDS_PER_PACKET)
+	if (pkt->num_of_bds > CORE_LL2_TX_MAX_BDS_PER_PACKET)
 		return -EIO;
 
 	spin_lock_irqsave(&p_tx->lock, flags);
@@ -1763,7 +1760,7 @@ int qed_ll2_prepare_tx_packet(struct qed_hwfn *p_hwfn,
 	if (!list_empty(&p_tx->free_descq))
 		p_curp = list_first_entry(&p_tx->free_descq,
 					  struct qed_ll2_tx_packet, list_entry);
-	if (p_curp && qed_chain_get_elem_left(p_tx_chain) < num_of_bds)
+	if (p_curp && qed_chain_get_elem_left(p_tx_chain) < pkt->num_of_bds)
 		p_curp = NULL;
 
 	if (!p_curp) {
@@ -1771,26 +1768,10 @@ int qed_ll2_prepare_tx_packet(struct qed_hwfn *p_hwfn,
 		goto out;
 	}
 
-	tx_dest = e_tx_dest == QED_LL2_TX_DEST_NW ? CORE_TX_DEST_NW :
-						    CORE_TX_DEST_LB;
-	if (qed_roce_flavor == QED_LL2_ROCE) {
-		roce_flavor = CORE_ROCE;
-	} else if (qed_roce_flavor == QED_LL2_RROCE) {
-		roce_flavor = CORE_RROCE;
-	} else {
-		rc = -EINVAL;
-		goto out;
-	}
-
 	/* Prepare packet and BD, and perhaps send a doorbell to FW */
-	qed_ll2_prepare_tx_packet_set(p_hwfn, p_tx, p_curp,
-				      num_of_bds, first_frag,
-				      first_frag_len, cookie, notify_fw);
-	qed_ll2_prepare_tx_packet_set_bd(p_hwfn, p_ll2_conn, p_curp,
-					 num_of_bds, tx_dest,
-					 vlan, bd_flags, l4_hdr_offset_w,
-					 roce_flavor,
-					 first_frag, first_frag_len);
+	qed_ll2_prepare_tx_packet_set(p_hwfn, p_tx, p_curp, pkt, notify_fw);
+
+	qed_ll2_prepare_tx_packet_set_bd(p_hwfn, p_ll2_conn, p_curp, pkt);
 
 	qed_ll2_tx_packet_notify(p_hwfn, p_ll2_conn);
 
@@ -2245,6 +2226,7 @@ fail:
 
 static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb)
 {
+	struct qed_ll2_tx_pkt_info pkt;
 	const skb_frag_t *frag;
 	int rc = -EINVAL, i;
 	dma_addr_t mapping;
@@ -2279,12 +2261,17 @@ static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb)
 		flags |= BIT(CORE_TX_BD_DATA_VLAN_INSERTION_SHIFT);
 	}
 
-	rc = qed_ll2_prepare_tx_packet(QED_LEADING_HWFN(cdev),
-				       cdev->ll2->handle,
-				       1 + skb_shinfo(skb)->nr_frags,
-				       vlan, flags, 0, QED_LL2_TX_DEST_NW,
-				       0 /* RoCE FLAVOR */,
-				       mapping, skb->len, skb, 1);
+	memset(&pkt, 0, sizeof(pkt));
+	pkt.num_of_bds = 1 + skb_shinfo(skb)->nr_frags;
+	pkt.vlan = vlan;
+	pkt.bd_flags = flags;
+	pkt.tx_dest = QED_LL2_TX_DEST_NW;
+	pkt.first_frag = mapping;
+	pkt.first_frag_len = skb->len;
+	pkt.cookie = skb;
+
+	rc = qed_ll2_prepare_tx_packet(&cdev->hwfns[0], cdev->ll2->handle,
+				       &pkt, 1);
 	if (rc)
 		goto err;
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.h b/drivers/net/ethernet/qlogic/qed/qed_ll2.h
index 2c07d0ed971a..96af50b733e8 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.h
@@ -47,12 +47,6 @@
 
 #define QED_MAX_NUM_OF_LL2_CONNECTIONS                    (4)
 
-enum qed_ll2_roce_flavor_type {
-	QED_LL2_ROCE,
-	QED_LL2_RROCE,
-	MAX_QED_LL2_ROCE_FLAVOR_TYPE
-};
-
 enum qed_ll2_conn_type {
 	QED_LL2_TYPE_FCOE,
 	QED_LL2_TYPE_ISCSI,
@@ -64,12 +58,6 @@ enum qed_ll2_conn_type {
 	MAX_QED_LL2_RX_CONN_TYPE
 };
 
-enum qed_ll2_tx_dest {
-	QED_LL2_TX_DEST_NW, /* Light L2 TX Destination to the Network */
-	QED_LL2_TX_DEST_LB, /* Light L2 TX Destination to the Loopback */
-	QED_LL2_TX_DEST_MAX
-};
-
 struct qed_ll2_rx_packet {
 	struct list_head list_entry;
 	struct core_rx_bd_with_buff_len *rxq_bd;
@@ -216,35 +204,16 @@ int qed_ll2_post_rx_buffer(struct qed_hwfn *p_hwfn,
  *				      to prepare Tx packet submission to FW.
  *
  * @param p_hwfn
- * @param connection_handle	LL2 connection's handle obtained from
- *				qed_ll2_require_connection
- * @param num_of_bds		a number of requested BD equals a number of
- *				fragments in Tx packet
- * @param vlan			VLAN to insert to packet (if insertion set)
- * @param bd_flags
- * @param l4_hdr_offset_w	L4 Header Offset from start of packet
- *				(in words). This is needed if both l4_csum
- *				and ipv6_ext are set
- * @param e_tx_dest             indicates if the packet is to be transmitted via
- *                              loopback or to the network
- * @param first_frag
- * @param first_frag_len
- * @param cookie
- *
- * @param notify_fw
+ * @param connection_handle
+ * @param pkt - info regarding the tx packet
+ * @param notify_fw - issue doorbell to fw for this packet
  *
  * @return 0 on success, failure otherwise
  */
 int qed_ll2_prepare_tx_packet(struct qed_hwfn *p_hwfn,
 			      u8 connection_handle,
-			      u8 num_of_bds,
-			      u16 vlan,
-			      u8 bd_flags,
-			      u16 l4_hdr_offset_w,
-			      enum qed_ll2_tx_dest e_tx_dest,
-			      enum qed_ll2_roce_flavor_type qed_roce_flavor,
-			      dma_addr_t first_frag,
-			      u16 first_frag_len, void *cookie, u8 notify_fw);
+			      struct qed_ll2_tx_pkt_info *pkt,
+			      bool notify_fw);
 
 /**
  * @brief qed_ll2_release_connection -	releases resources
diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c
index b9434b707b08..afc63c0e7c44 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_roce.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c
@@ -2937,6 +2937,7 @@ static int qed_roce_ll2_tx(struct qed_dev *cdev,
 	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
 	struct qed_roce_ll2_info *roce_ll2 = hwfn->ll2;
 	enum qed_ll2_roce_flavor_type qed_roce_flavor;
+	struct qed_ll2_tx_pkt_info ll2_pkt;
 	u8 flags = 0;
 	int rc;
 	int i;
@@ -2955,11 +2956,18 @@ static int qed_roce_ll2_tx(struct qed_dev *cdev,
 		flags |= BIT(CORE_TX_BD_DATA_IP_CSUM_SHIFT);
 
 	/* Tx header */
-	rc = qed_ll2_prepare_tx_packet(QED_LEADING_HWFN(cdev), roce_ll2->handle,
-				       1 + pkt->n_seg, 0, flags, 0,
-				       QED_LL2_TX_DEST_NW,
-				       qed_roce_flavor, pkt->header.baddr,
-				       pkt->header.len, pkt, 1);
+	memset(&ll2_pkt, 0, sizeof(ll2_pkt));
+	ll2_pkt.num_of_bds = 1 + pkt->n_seg;
+	ll2_pkt.bd_flags = flags;
+	ll2_pkt.tx_dest = QED_LL2_TX_DEST_NW;
+	ll2_pkt.qed_roce_flavor = qed_roce_flavor;
+	ll2_pkt.first_frag = pkt->header.baddr;
+	ll2_pkt.first_frag_len = pkt->header.len;
+	ll2_pkt.cookie = pkt;
+
+	rc = qed_ll2_prepare_tx_packet(QED_LEADING_HWFN(cdev),
+				       roce_ll2->handle,
+				       &ll2_pkt, 1);
 	if (rc) {
 		DP_ERR(cdev, "roce ll2 tx: header failed (rc=%d)\n", rc);
 		return QED_ROCE_TX_HEAD_FAILURE;
diff --git a/include/linux/qed/qed_ll2_if.h b/include/linux/qed/qed_ll2_if.h
index 4fb4666ea879..78d9ed6c6b6f 100644
--- a/include/linux/qed/qed_ll2_if.h
+++ b/include/linux/qed/qed_ll2_if.h
@@ -43,6 +43,18 @@
 #include <linux/slab.h>
 #include <linux/qed/qed_if.h>
 
+enum qed_ll2_roce_flavor_type {
+	QED_LL2_ROCE,
+	QED_LL2_RROCE,
+	MAX_QED_LL2_ROCE_FLAVOR_TYPE
+};
+
+enum qed_ll2_tx_dest {
+	QED_LL2_TX_DEST_NW, /* Light L2 TX Destination to the Network */
+	QED_LL2_TX_DEST_LB, /* Light L2 TX Destination to the Loopback */
+	QED_LL2_TX_DEST_MAX
+};
+
 struct qed_ll2_stats {
 	u64 gsi_invalid_hdr;
 	u64 gsi_invalid_pkt_length;
@@ -67,6 +79,21 @@ struct qed_ll2_stats {
 	u64 sent_bcast_pkts;
 };
 
+struct qed_ll2_tx_pkt_info {
+	void *cookie;
+	dma_addr_t first_frag;
+	enum qed_ll2_tx_dest tx_dest;
+	enum qed_ll2_roce_flavor_type qed_roce_flavor;
+	u16 vlan;
+	u16 l4_hdr_offset_w;	/* from start of packet */
+	u16 first_frag_len;
+	u8 num_of_bds;
+	u8 bd_flags;
+	bool enable_ip_cksum;
+	bool enable_l4_cksum;
+	bool calc_ip_len;
+};
+
 #define QED_LL2_UNUSED_HANDLE   (0xff)
 
 struct qed_ll2_cb_ops {
-- 
cgit v1.2.3


From 68be910cd2fa3f58587438af7ce3def6e03731fa Mon Sep 17 00:00:00 2001
From: "Mintz, Yuval" <Yuval.Mintz@cavium.com>
Date: Fri, 9 Jun 2017 17:13:19 +0300
Subject: qed: Revise ll2 Rx completion

This introduces qed_ll2_comp_rx_data as a public struct
and moves handling of Rx packets in LL2 into using it.

Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_ll2.c | 84 ++++++++++++++++++-------------
 include/linux/qed/qed_ll2_if.h            | 26 ++++++++++
 2 files changed, 76 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
index 0b75f5dd25f8..46b71a668892 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
@@ -165,41 +165,33 @@ static void qed_ll2_kill_buffers(struct qed_dev *cdev)
 }
 
 static void qed_ll2b_complete_rx_packet(struct qed_hwfn *p_hwfn,
-					u8 connection_handle,
-					struct qed_ll2_rx_packet *p_pkt,
-					struct core_rx_fast_path_cqe *p_cqe,
-					bool b_last_packet)
+				 struct qed_ll2_comp_rx_data *data)
 {
-	u16 packet_length = le16_to_cpu(p_cqe->packet_length);
-	struct qed_ll2_buffer *buffer = p_pkt->cookie;
+	struct qed_ll2_buffer *buffer = data->cookie;
 	struct qed_dev *cdev = p_hwfn->cdev;
-	u16 vlan = le16_to_cpu(p_cqe->vlan);
-	u32 opaque_data_0, opaque_data_1;
-	u8 pad = p_cqe->placement_offset;
 	dma_addr_t new_phys_addr;
 	struct sk_buff *skb;
 	bool reuse = false;
 	int rc = -EINVAL;
 	u8 *new_data;
 
-	opaque_data_0 = le32_to_cpu(p_cqe->opaque_data.data[0]);
-	opaque_data_1 = le32_to_cpu(p_cqe->opaque_data.data[1]);
-
 	DP_VERBOSE(p_hwfn,
 		   (NETIF_MSG_RX_STATUS | QED_MSG_STORAGE | NETIF_MSG_PKTDATA),
 		   "Got an LL2 Rx completion: [Buffer at phys 0x%llx, offset 0x%02x] Length 0x%04x Parse_flags 0x%04x vlan 0x%04x Opaque data [0x%08x:0x%08x]\n",
-		   (u64)p_pkt->rx_buf_addr, pad, packet_length,
-		   le16_to_cpu(p_cqe->parse_flags.flags), vlan,
-		   opaque_data_0, opaque_data_1);
+		   (u64)data->rx_buf_addr,
+		   data->u.placement_offset,
+		   data->length.packet_length,
+		   data->parse_flags,
+		   data->vlan, data->opaque_data_0, data->opaque_data_1);
 
 	if ((cdev->dp_module & NETIF_MSG_PKTDATA) && buffer->data) {
 		print_hex_dump(KERN_INFO, "",
 			       DUMP_PREFIX_OFFSET, 16, 1,
-			       buffer->data, packet_length, false);
+			       buffer->data, data->length.packet_length, false);
 	}
 
 	/* Determine if data is valid */
-	if (packet_length < ETH_HLEN)
+	if (data->length.packet_length < ETH_HLEN)
 		reuse = true;
 
 	/* Allocate a replacement for buffer; Reuse upon failure */
@@ -219,9 +211,9 @@ static void qed_ll2b_complete_rx_packet(struct qed_hwfn *p_hwfn,
 		goto out_post;
 	}
 
-	pad += NET_SKB_PAD;
-	skb_reserve(skb, pad);
-	skb_put(skb, packet_length);
+	data->u.placement_offset += NET_SKB_PAD;
+	skb_reserve(skb, data->u.placement_offset);
+	skb_put(skb, data->length.packet_length);
 	skb_checksum_none_assert(skb);
 
 	/* Get parital ethernet information instead of eth_type_trans(),
@@ -232,10 +224,12 @@ static void qed_ll2b_complete_rx_packet(struct qed_hwfn *p_hwfn,
 
 	/* Pass SKB onward */
 	if (cdev->ll2->cbs && cdev->ll2->cbs->rx_cb) {
-		if (vlan)
-			__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan);
+		if (data->vlan)
+			__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
+					       data->vlan);
 		cdev->ll2->cbs->rx_cb(cdev->ll2->cb_cookie, skb,
-				      opaque_data_0, opaque_data_1);
+				      data->opaque_data_0,
+				      data->opaque_data_1);
 	}
 
 	/* Update Buffer information and update FW producer */
@@ -472,33 +466,55 @@ qed_ll2_rxq_completion_gsi(struct qed_hwfn *p_hwfn,
 	return 0;
 }
 
-static int qed_ll2_rxq_completion_reg(struct qed_hwfn *p_hwfn,
-				      struct qed_ll2_info *p_ll2_conn,
-				      union core_rx_cqe_union *p_cqe,
-				      unsigned long *p_lock_flags,
-				      bool b_last_cqe)
+static void qed_ll2_rxq_parse_reg(struct qed_hwfn *p_hwfn,
+				  union core_rx_cqe_union *p_cqe,
+				  struct qed_ll2_comp_rx_data *data)
+{
+	data->parse_flags = le16_to_cpu(p_cqe->rx_cqe_fp.parse_flags.flags);
+	data->length.packet_length =
+	    le16_to_cpu(p_cqe->rx_cqe_fp.packet_length);
+	data->vlan = le16_to_cpu(p_cqe->rx_cqe_fp.vlan);
+	data->opaque_data_0 = le32_to_cpu(p_cqe->rx_cqe_fp.opaque_data.data[0]);
+	data->opaque_data_1 = le32_to_cpu(p_cqe->rx_cqe_fp.opaque_data.data[1]);
+	data->u.placement_offset = p_cqe->rx_cqe_fp.placement_offset;
+}
+
+static int
+qed_ll2_rxq_handle_completion(struct qed_hwfn *p_hwfn,
+			      struct qed_ll2_info *p_ll2_conn,
+			      union core_rx_cqe_union *p_cqe,
+			      unsigned long *p_lock_flags, bool b_last_cqe)
 {
 	struct qed_ll2_rx_queue *p_rx = &p_ll2_conn->rx_queue;
 	struct qed_ll2_rx_packet *p_pkt = NULL;
+	struct qed_ll2_comp_rx_data data;
 
 	if (!list_empty(&p_rx->active_descq))
 		p_pkt = list_first_entry(&p_rx->active_descq,
 					 struct qed_ll2_rx_packet, list_entry);
 	if (!p_pkt) {
 		DP_NOTICE(p_hwfn,
-			  "LL2 Rx completion but active_descq is empty\n");
+			  "[%d] LL2 Rx completion but active_descq is empty\n",
+			  p_ll2_conn->conn.conn_type);
+
 		return -EIO;
 	}
 	list_del(&p_pkt->list_entry);
 
+	qed_ll2_rxq_parse_reg(p_hwfn, p_cqe, &data);
 	if (qed_chain_consume(&p_rx->rxq_chain) != p_pkt->rxq_bd)
 		DP_NOTICE(p_hwfn,
 			  "Mismatch between active_descq and the LL2 Rx chain\n");
+
 	list_add_tail(&p_pkt->list_entry, &p_rx->free_descq);
 
+	data.connection_handle = p_ll2_conn->my_id;
+	data.cookie = p_pkt->cookie;
+	data.rx_buf_addr = p_pkt->rx_buf_addr;
+	data.b_last_packet = b_last_cqe;
+
 	spin_unlock_irqrestore(&p_rx->lock, *p_lock_flags);
-	qed_ll2b_complete_rx_packet(p_hwfn, p_ll2_conn->my_id,
-				    p_pkt, &p_cqe->rx_cqe_fp, b_last_cqe);
+	qed_ll2b_complete_rx_packet(p_hwfn, &data);
 	spin_lock_irqsave(&p_rx->lock, *p_lock_flags);
 
 	return 0;
@@ -538,9 +554,9 @@ static int qed_ll2_rxq_completion(struct qed_hwfn *p_hwfn, void *cookie)
 							cqe, flags, b_last_cqe);
 			break;
 		case CORE_RX_CQE_TYPE_REGULAR:
-			rc = qed_ll2_rxq_completion_reg(p_hwfn, p_ll2_conn,
-							cqe, &flags,
-							b_last_cqe);
+			rc = qed_ll2_rxq_handle_completion(p_hwfn, p_ll2_conn,
+							   cqe, &flags,
+							   b_last_cqe);
 			break;
 		default:
 			rc = -EIO;
diff --git a/include/linux/qed/qed_ll2_if.h b/include/linux/qed/qed_ll2_if.h
index 78d9ed6c6b6f..056ac007dd12 100644
--- a/include/linux/qed/qed_ll2_if.h
+++ b/include/linux/qed/qed_ll2_if.h
@@ -79,6 +79,32 @@ struct qed_ll2_stats {
 	u64 sent_bcast_pkts;
 };
 
+struct qed_ll2_comp_rx_data {
+	void *cookie;
+	dma_addr_t rx_buf_addr;
+	u16 parse_flags;
+	u16 vlan;
+	bool b_last_packet;
+	u8 connection_handle;
+
+	union {
+		u16 packet_length;
+		u16 data_length;
+	} length;
+
+	u32 opaque_data_0;
+	u32 opaque_data_1;
+
+	/* GSI only */
+	u32 gid_dst[4];
+	u16 qp_id;
+
+	union {
+		u8 placement_offset;
+		u8 data_length_error;
+	} u;
+};
+
 struct qed_ll2_tx_pkt_info {
 	void *cookie;
 	dma_addr_t first_frag;
-- 
cgit v1.2.3


From 13c547717231aad7e1635004ae3f698e5e78d6d1 Mon Sep 17 00:00:00 2001
From: "Mintz, Yuval" <Yuval.Mintz@cavium.com>
Date: Fri, 9 Jun 2017 17:13:20 +0300
Subject: qed: Cleaner seperation of LL2 inputs

A LL2 connection [qed_ll2_info] has a sub-structure of type qed_ll2_conn
that contain various inputs for ll2 acquisition, but the connection also
utilizes a couple of other inputs.

Restructure the input structure to include all the inputs and refactor
the code necessary to populate those.

Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_ll2.c  | 379 +++++++++++++++--------------
 drivers/net/ethernet/qlogic/qed/qed_ll2.h  |  42 +---
 drivers/net/ethernet/qlogic/qed/qed_roce.c |  31 +--
 include/linux/qed/qed_ll2_if.h             |  39 +++
 4 files changed, 262 insertions(+), 229 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
index 46b71a668892..fcf4ea98e0bf 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
@@ -315,7 +315,7 @@ static void qed_ll2_txq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle)
 		list_del(&p_pkt->list_entry);
 		b_last_packet = list_empty(&p_tx->active_descq);
 		list_add_tail(&p_pkt->list_entry, &p_tx->free_descq);
-		if (p_ll2_conn->conn.conn_type == QED_LL2_TYPE_ISCSI_OOO) {
+		if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_ISCSI_OOO) {
 			struct qed_ooo_buffer *p_buffer;
 
 			p_buffer = (struct qed_ooo_buffer *)p_pkt->cookie;
@@ -327,7 +327,7 @@ static void qed_ll2_txq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle)
 			b_last_frag =
 				p_tx->cur_completing_bd_idx == p_pkt->bd_used;
 			tx_frag = p_pkt->bds_set[0].tx_frag;
-			if (p_ll2_conn->conn.gsi_enable)
+			if (p_ll2_conn->input.gsi_enable)
 				qed_ll2b_release_tx_gsi_packet(p_hwfn,
 							       p_ll2_conn->
 							       my_id,
@@ -396,7 +396,7 @@ static int qed_ll2_txq_completion(struct qed_hwfn *p_hwfn, void *p_cookie)
 
 		spin_unlock_irqrestore(&p_tx->lock, flags);
 		tx_frag = p_pkt->bds_set[0].tx_frag;
-		if (p_ll2_conn->conn.gsi_enable)
+		if (p_ll2_conn->input.gsi_enable)
 			qed_ll2b_complete_tx_gsi_packet(p_hwfn,
 							p_ll2_conn->my_id,
 							p_pkt->cookie,
@@ -495,7 +495,7 @@ qed_ll2_rxq_handle_completion(struct qed_hwfn *p_hwfn,
 	if (!p_pkt) {
 		DP_NOTICE(p_hwfn,
 			  "[%d] LL2 Rx completion but active_descq is empty\n",
-			  p_ll2_conn->conn.conn_type);
+			  p_ll2_conn->input.conn_type);
 
 		return -EIO;
 	}
@@ -522,7 +522,7 @@ qed_ll2_rxq_handle_completion(struct qed_hwfn *p_hwfn,
 
 static int qed_ll2_rxq_completion(struct qed_hwfn *p_hwfn, void *cookie)
 {
-	struct qed_ll2_info *p_ll2_conn = cookie;
+	struct qed_ll2_info *p_ll2_conn = (struct qed_ll2_info *)cookie;
 	struct qed_ll2_rx_queue *p_rx = &p_ll2_conn->rx_queue;
 	union core_rx_cqe_union *cqe = NULL;
 	u16 cq_new_idx = 0, cq_old_idx = 0;
@@ -536,7 +536,9 @@ static int qed_ll2_rxq_completion(struct qed_hwfn *p_hwfn, void *cookie)
 	while (cq_new_idx != cq_old_idx) {
 		bool b_last_cqe = (cq_new_idx == cq_old_idx);
 
-		cqe = qed_chain_consume(&p_rx->rcq_chain);
+		cqe =
+		    (union core_rx_cqe_union *)
+		    qed_chain_consume(&p_rx->rcq_chain);
 		cq_old_idx = qed_chain_get_cons_idx(&p_rx->rcq_chain);
 
 		DP_VERBOSE(p_hwfn,
@@ -591,7 +593,7 @@ static void qed_ll2_rxq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle)
 
 		list_move_tail(&p_pkt->list_entry, &p_rx->free_descq);
 
-		if (p_ll2_conn->conn.conn_type == QED_LL2_TYPE_ISCSI_OOO) {
+		if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_ISCSI_OOO) {
 			struct qed_ooo_buffer *p_buffer;
 
 			p_buffer = (struct qed_ooo_buffer *)p_pkt->cookie;
@@ -606,7 +608,6 @@ static void qed_ll2_rxq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle)
 	}
 }
 
-#if IS_ENABLED(CONFIG_QED_ISCSI)
 static u8 qed_ll2_convert_rx_parse_to_tx_flags(u16 parse_flags)
 {
 	u8 bd_flags = 0;
@@ -782,7 +783,7 @@ qed_ooo_submit_tx_buffers(struct qed_hwfn *p_hwfn,
 		tx_pkt.vlan = p_buffer->vlan;
 		tx_pkt.bd_flags = bd_flags;
 		tx_pkt.l4_hdr_offset_w = l4_hdr_offset_w;
-		tx_pkt.tx_dest = p_ll2_conn->conn.tx_dest;
+		tx_pkt.tx_dest = p_ll2_conn->tx_dest;
 		tx_pkt.first_frag = first_frag;
 		tx_pkt.first_frag_len = p_buffer->packet_length;
 		tx_pkt.cookie = p_buffer;
@@ -895,60 +896,11 @@ static int qed_ll2_lb_txq_completion(struct qed_hwfn *p_hwfn, void *p_cookie)
 	return 0;
 }
 
-static int
-qed_ll2_acquire_connection_ooo(struct qed_hwfn *p_hwfn,
-			       struct qed_ll2_info *p_ll2_info,
-			       u16 rx_num_ooo_buffers, u16 mtu)
-{
-	struct qed_ooo_buffer *p_buf = NULL;
-	void *p_virt;
-	u16 buf_idx;
-	int rc = 0;
-
-	if (p_ll2_info->conn.conn_type != QED_LL2_TYPE_ISCSI_OOO)
-		return rc;
-
-	if (!rx_num_ooo_buffers)
-		return -EINVAL;
-
-	for (buf_idx = 0; buf_idx < rx_num_ooo_buffers; buf_idx++) {
-		p_buf = kzalloc(sizeof(*p_buf), GFP_KERNEL);
-		if (!p_buf) {
-			rc = -ENOMEM;
-			goto out;
-		}
-
-		p_buf->rx_buffer_size = mtu + 26 + ETH_CACHE_LINE_SIZE;
-		p_buf->rx_buffer_size = (p_buf->rx_buffer_size +
-					 ETH_CACHE_LINE_SIZE - 1) &
-					~(ETH_CACHE_LINE_SIZE - 1);
-		p_virt = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
-					    p_buf->rx_buffer_size,
-					    &p_buf->rx_buffer_phys_addr,
-					    GFP_KERNEL);
-		if (!p_virt) {
-			kfree(p_buf);
-			rc = -ENOMEM;
-			goto out;
-		}
-
-		p_buf->rx_buffer_virt_addr = p_virt;
-		qed_ooo_put_free_buffer(p_hwfn, p_hwfn->p_ooo_info, p_buf);
-	}
-
-	DP_VERBOSE(p_hwfn, QED_MSG_LL2,
-		   "Allocated [%04x] LL2 OOO buffers [each of size 0x%08x]\n",
-		   rx_num_ooo_buffers, p_buf->rx_buffer_size);
-
-out:
-	return rc;
-}
-
 static void
 qed_ll2_establish_connection_ooo(struct qed_hwfn *p_hwfn,
 				 struct qed_ll2_info *p_ll2_conn)
 {
-	if (p_ll2_conn->conn.conn_type != QED_LL2_TYPE_ISCSI_OOO)
+	if (p_ll2_conn->input.conn_type != QED_LL2_TYPE_ISCSI_OOO)
 		return;
 
 	qed_ooo_release_all_isles(p_hwfn, p_hwfn->p_ooo_info);
@@ -960,7 +912,7 @@ static void qed_ll2_release_connection_ooo(struct qed_hwfn *p_hwfn,
 {
 	struct qed_ooo_buffer *p_buffer;
 
-	if (p_ll2_conn->conn.conn_type != QED_LL2_TYPE_ISCSI_OOO)
+	if (p_ll2_conn->input.conn_type != QED_LL2_TYPE_ISCSI_OOO)
 		return;
 
 	qed_ooo_release_all_isles(p_hwfn, p_hwfn->p_ooo_info);
@@ -987,69 +939,11 @@ static void qed_ll2_stop_ooo(struct qed_dev *cdev)
 	*handle = QED_LL2_UNUSED_HANDLE;
 }
 
-static int qed_ll2_start_ooo(struct qed_dev *cdev,
-			     struct qed_ll2_params *params)
-{
-	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
-	u8 *handle = &hwfn->pf_params.iscsi_pf_params.ll2_ooo_queue_id;
-	struct qed_ll2_conn ll2_info = { 0 };
-	int rc;
-
-	ll2_info.conn_type = QED_LL2_TYPE_ISCSI_OOO;
-	ll2_info.mtu = params->mtu;
-	ll2_info.rx_drop_ttl0_flg = params->drop_ttl0_packets;
-	ll2_info.rx_vlan_removal_en = params->rx_vlan_stripping;
-	ll2_info.tx_tc = OOO_LB_TC;
-	ll2_info.tx_dest = CORE_TX_DEST_LB;
-
-	rc = qed_ll2_acquire_connection(hwfn, &ll2_info,
-					QED_LL2_RX_SIZE, QED_LL2_TX_SIZE,
-					handle);
-	if (rc) {
-		DP_INFO(cdev, "Failed to acquire LL2 OOO connection\n");
-		goto out;
-	}
-
-	rc = qed_ll2_establish_connection(hwfn, *handle);
-	if (rc) {
-		DP_INFO(cdev, "Failed to establist LL2 OOO connection\n");
-		goto fail;
-	}
-
-	return 0;
-
-fail:
-	qed_ll2_release_connection(hwfn, *handle);
-out:
-	*handle = QED_LL2_UNUSED_HANDLE;
-	return rc;
-}
-#else /* IS_ENABLED(CONFIG_QED_ISCSI) */
-static int qed_ll2_lb_rxq_completion(struct qed_hwfn *p_hwfn,
-				     void *p_cookie) { return -EINVAL; }
-static int qed_ll2_lb_txq_completion(struct qed_hwfn *p_hwfn,
-				     void *p_cookie) { return -EINVAL; }
-static inline int
-qed_ll2_acquire_connection_ooo(struct qed_hwfn *p_hwfn,
-			       struct qed_ll2_info *p_ll2_info,
-			       u16 rx_num_ooo_buffers, u16 mtu) { return 0; }
-static inline void
-qed_ll2_establish_connection_ooo(struct qed_hwfn *p_hwfn,
-				 struct qed_ll2_info *p_ll2_conn) { return; }
-static inline void
-qed_ll2_release_connection_ooo(struct qed_hwfn *p_hwfn,
-			       struct qed_ll2_info *p_ll2_conn) { return; }
-static inline void qed_ll2_stop_ooo(struct qed_dev *cdev) { return; }
-static inline int qed_ll2_start_ooo(struct qed_dev *cdev,
-				    struct qed_ll2_params *params)
-				    { return -EINVAL; }
-#endif /* IS_ENABLED(CONFIG_QED_ISCSI) */
-
 static int qed_sp_ll2_rx_queue_start(struct qed_hwfn *p_hwfn,
 				     struct qed_ll2_info *p_ll2_conn,
 				     u8 action_on_error)
 {
-	enum qed_ll2_conn_type conn_type = p_ll2_conn->conn.conn_type;
+	enum qed_ll2_conn_type conn_type = p_ll2_conn->input.conn_type;
 	struct qed_ll2_rx_queue *p_rx = &p_ll2_conn->rx_queue;
 	struct core_rx_start_ramrod_data *p_ramrod = NULL;
 	struct qed_spq_entry *p_ent = NULL;
@@ -1075,16 +969,15 @@ static int qed_sp_ll2_rx_queue_start(struct qed_hwfn *p_hwfn,
 	p_ramrod->sb_index = p_rx->rx_sb_index;
 	p_ramrod->complete_event_flg = 1;
 
-	p_ramrod->mtu = cpu_to_le16(p_ll2_conn->conn.mtu);
-	DMA_REGPAIR_LE(p_ramrod->bd_base,
-		       p_rx->rxq_chain.p_phys_addr);
+	p_ramrod->mtu = cpu_to_le16(p_ll2_conn->input.mtu);
+	DMA_REGPAIR_LE(p_ramrod->bd_base, p_rx->rxq_chain.p_phys_addr);
 	cqe_pbl_size = (u16)qed_chain_get_page_cnt(&p_rx->rcq_chain);
 	p_ramrod->num_of_pbl_pages = cpu_to_le16(cqe_pbl_size);
 	DMA_REGPAIR_LE(p_ramrod->cqe_pbl_addr,
 		       qed_chain_get_pbl_phys(&p_rx->rcq_chain));
 
-	p_ramrod->drop_ttl0_flg = p_ll2_conn->conn.rx_drop_ttl0_flg;
-	p_ramrod->inner_vlan_removal_en = p_ll2_conn->conn.rx_vlan_removal_en;
+	p_ramrod->drop_ttl0_flg = p_ll2_conn->input.rx_drop_ttl0_flg;
+	p_ramrod->inner_vlan_removal_en = p_ll2_conn->input.rx_vlan_removal_en;
 	p_ramrod->queue_id = p_ll2_conn->queue_id;
 	p_ramrod->main_func_queue = (conn_type == QED_LL2_TYPE_ISCSI_OOO) ? 0
 									  : 1;
@@ -1099,14 +992,14 @@ static int qed_sp_ll2_rx_queue_start(struct qed_hwfn *p_hwfn,
 	}
 
 	p_ramrod->action_on_error.error_type = action_on_error;
-	p_ramrod->gsi_offload_flag = p_ll2_conn->conn.gsi_enable;
+	p_ramrod->gsi_offload_flag = p_ll2_conn->input.gsi_enable;
 	return qed_spq_post(p_hwfn, p_ent, NULL);
 }
 
 static int qed_sp_ll2_tx_queue_start(struct qed_hwfn *p_hwfn,
 				     struct qed_ll2_info *p_ll2_conn)
 {
-	enum qed_ll2_conn_type conn_type = p_ll2_conn->conn.conn_type;
+	enum qed_ll2_conn_type conn_type = p_ll2_conn->input.conn_type;
 	struct qed_ll2_tx_queue *p_tx = &p_ll2_conn->tx_queue;
 	struct core_tx_start_ramrod_data *p_ramrod = NULL;
 	struct qed_spq_entry *p_ent = NULL;
@@ -1117,7 +1010,7 @@ static int qed_sp_ll2_tx_queue_start(struct qed_hwfn *p_hwfn,
 	if (!QED_LL2_TX_REGISTERED(p_ll2_conn))
 		return 0;
 
-	if (p_ll2_conn->conn.conn_type == QED_LL2_TYPE_ISCSI_OOO)
+	if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_ISCSI_OOO)
 		p_ll2_conn->tx_stats_en = 0;
 	else
 		p_ll2_conn->tx_stats_en = 1;
@@ -1138,7 +1031,7 @@ static int qed_sp_ll2_tx_queue_start(struct qed_hwfn *p_hwfn,
 
 	p_ramrod->sb_id = cpu_to_le16(qed_int_get_sp_sb_id(p_hwfn));
 	p_ramrod->sb_index = p_tx->tx_sb_index;
-	p_ramrod->mtu = cpu_to_le16(p_ll2_conn->conn.mtu);
+	p_ramrod->mtu = cpu_to_le16(p_ll2_conn->input.mtu);
 	p_ramrod->stats_en = p_ll2_conn->tx_stats_en;
 	p_ramrod->stats_id = p_ll2_conn->tx_stats_id;
 
@@ -1147,7 +1040,7 @@ static int qed_sp_ll2_tx_queue_start(struct qed_hwfn *p_hwfn,
 	pbl_size = qed_chain_get_page_cnt(&p_tx->txq_chain);
 	p_ramrod->pbl_size = cpu_to_le16(pbl_size);
 
-	switch (p_ll2_conn->conn.tx_tc) {
+	switch (p_ll2_conn->input.tx_tc) {
 	case LB_TC:
 		pq_id = qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_LB);
 		break;
@@ -1177,7 +1070,8 @@ static int qed_sp_ll2_tx_queue_start(struct qed_hwfn *p_hwfn,
 		DP_NOTICE(p_hwfn, "Unknown connection type: %d\n", conn_type);
 	}
 
-	p_ramrod->gsi_offload_flag = p_ll2_conn->conn.gsi_enable;
+	p_ramrod->gsi_offload_flag = p_ll2_conn->input.gsi_enable;
+
 	return qed_spq_post(p_hwfn, p_ent, NULL);
 }
 
@@ -1233,20 +1127,20 @@ static int qed_sp_ll2_tx_queue_stop(struct qed_hwfn *p_hwfn,
 
 static int
 qed_ll2_acquire_connection_rx(struct qed_hwfn *p_hwfn,
-			      struct qed_ll2_info *p_ll2_info, u16 rx_num_desc)
+			      struct qed_ll2_info *p_ll2_info)
 {
 	struct qed_ll2_rx_packet *p_descq;
 	u32 capacity;
 	int rc = 0;
 
-	if (!rx_num_desc)
+	if (!p_ll2_info->input.rx_num_desc)
 		goto out;
 
 	rc = qed_chain_alloc(p_hwfn->cdev,
 			     QED_CHAIN_USE_TO_CONSUME_PRODUCE,
 			     QED_CHAIN_MODE_NEXT_PTR,
 			     QED_CHAIN_CNT_TYPE_U16,
-			     rx_num_desc,
+			     p_ll2_info->input.rx_num_desc,
 			     sizeof(struct core_rx_bd),
 			     &p_ll2_info->rx_queue.rxq_chain);
 	if (rc) {
@@ -1268,7 +1162,7 @@ qed_ll2_acquire_connection_rx(struct qed_hwfn *p_hwfn,
 			     QED_CHAIN_USE_TO_CONSUME_PRODUCE,
 			     QED_CHAIN_MODE_PBL,
 			     QED_CHAIN_CNT_TYPE_U16,
-			     rx_num_desc,
+			     p_ll2_info->input.rx_num_desc,
 			     sizeof(struct core_rx_fast_path_cqe),
 			     &p_ll2_info->rx_queue.rcq_chain);
 	if (rc) {
@@ -1278,28 +1172,27 @@ qed_ll2_acquire_connection_rx(struct qed_hwfn *p_hwfn,
 
 	DP_VERBOSE(p_hwfn, QED_MSG_LL2,
 		   "Allocated LL2 Rxq [Type %08x] with 0x%08x buffers\n",
-		   p_ll2_info->conn.conn_type, rx_num_desc);
+		   p_ll2_info->input.conn_type, p_ll2_info->input.rx_num_desc);
 
 out:
 	return rc;
 }
 
 static int qed_ll2_acquire_connection_tx(struct qed_hwfn *p_hwfn,
-					 struct qed_ll2_info *p_ll2_info,
-					 u16 tx_num_desc)
+					 struct qed_ll2_info *p_ll2_info)
 {
 	struct qed_ll2_tx_packet *p_descq;
 	u32 capacity;
 	int rc = 0;
 
-	if (!tx_num_desc)
+	if (!p_ll2_info->input.tx_num_desc)
 		goto out;
 
 	rc = qed_chain_alloc(p_hwfn->cdev,
 			     QED_CHAIN_USE_TO_CONSUME_PRODUCE,
 			     QED_CHAIN_MODE_PBL,
 			     QED_CHAIN_CNT_TYPE_U16,
-			     tx_num_desc,
+			     p_ll2_info->input.tx_num_desc,
 			     sizeof(struct core_tx_bd),
 			     &p_ll2_info->tx_queue.txq_chain);
 	if (rc)
@@ -1316,28 +1209,95 @@ static int qed_ll2_acquire_connection_tx(struct qed_hwfn *p_hwfn,
 
 	DP_VERBOSE(p_hwfn, QED_MSG_LL2,
 		   "Allocated LL2 Txq [Type %08x] with 0x%08x buffers\n",
-		   p_ll2_info->conn.conn_type, tx_num_desc);
+		   p_ll2_info->input.conn_type, p_ll2_info->input.tx_num_desc);
 
 out:
 	if (rc)
 		DP_NOTICE(p_hwfn,
 			  "Can't allocate memory for Tx LL2 with 0x%08x buffers\n",
-			  tx_num_desc);
+			  p_ll2_info->input.tx_num_desc);
+	return rc;
+}
+
+static int
+qed_ll2_acquire_connection_ooo(struct qed_hwfn *p_hwfn,
+			       struct qed_ll2_info *p_ll2_info, u16 mtu)
+{
+	struct qed_ooo_buffer *p_buf = NULL;
+	void *p_virt;
+	u16 buf_idx;
+	int rc = 0;
+
+	if (p_ll2_info->input.conn_type != QED_LL2_TYPE_ISCSI_OOO)
+		return rc;
+
+	/* Correct number of requested OOO buffers if needed */
+	if (!p_ll2_info->input.rx_num_ooo_buffers) {
+		u16 num_desc = p_ll2_info->input.rx_num_desc;
+
+		if (!num_desc)
+			return -EINVAL;
+		p_ll2_info->input.rx_num_ooo_buffers = num_desc * 2;
+	}
+
+	for (buf_idx = 0; buf_idx < p_ll2_info->input.rx_num_ooo_buffers;
+	     buf_idx++) {
+		p_buf = kzalloc(sizeof(*p_buf), GFP_KERNEL);
+		if (!p_buf) {
+			rc = -ENOMEM;
+			goto out;
+		}
+
+		p_buf->rx_buffer_size = mtu + 26 + ETH_CACHE_LINE_SIZE;
+		p_buf->rx_buffer_size = (p_buf->rx_buffer_size +
+					 ETH_CACHE_LINE_SIZE - 1) &
+					~(ETH_CACHE_LINE_SIZE - 1);
+		p_virt = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
+					    p_buf->rx_buffer_size,
+					    &p_buf->rx_buffer_phys_addr,
+					    GFP_KERNEL);
+		if (!p_virt) {
+			kfree(p_buf);
+			rc = -ENOMEM;
+			goto out;
+		}
+
+		p_buf->rx_buffer_virt_addr = p_virt;
+		qed_ooo_put_free_buffer(p_hwfn, p_hwfn->p_ooo_info, p_buf);
+	}
+
+	DP_VERBOSE(p_hwfn, QED_MSG_LL2,
+		   "Allocated [%04x] LL2 OOO buffers [each of size 0x%08x]\n",
+		   p_ll2_info->input.rx_num_ooo_buffers, p_buf->rx_buffer_size);
+
+out:
 	return rc;
 }
 
+static enum core_error_handle
+qed_ll2_get_error_choice(enum qed_ll2_error_handle err)
+{
+	switch (err) {
+	case QED_LL2_DROP_PACKET:
+		return LL2_DROP_PACKET;
+	case QED_LL2_DO_NOTHING:
+		return LL2_DO_NOTHING;
+	case QED_LL2_ASSERT:
+		return LL2_ASSERT;
+	default:
+		return LL2_DO_NOTHING;
+	}
+}
+
 int qed_ll2_acquire_connection(struct qed_hwfn *p_hwfn,
-			       struct qed_ll2_conn *p_params,
-			       u16 rx_num_desc,
-			       u16 tx_num_desc,
-			       u8 *p_connection_handle)
+			       struct qed_ll2_acquire_data *data)
 {
 	qed_int_comp_cb_t comp_rx_cb, comp_tx_cb;
 	struct qed_ll2_info *p_ll2_info = NULL;
+	u8 i, *p_tx_max;
 	int rc;
-	u8 i;
 
-	if (!p_connection_handle || !p_hwfn->p_ll2_info)
+	if (!data->p_connection_handle || !p_hwfn->p_ll2_info)
 		return -EINVAL;
 
 	/* Find a free connection to be used */
@@ -1356,23 +1316,33 @@ int qed_ll2_acquire_connection(struct qed_hwfn *p_hwfn,
 	if (!p_ll2_info)
 		return -EBUSY;
 
-	p_ll2_info->conn = *p_params;
+	memcpy(&p_ll2_info->input, &data->input, sizeof(p_ll2_info->input));
 
-	rc = qed_ll2_acquire_connection_rx(p_hwfn, p_ll2_info, rx_num_desc);
+	p_ll2_info->tx_dest = (data->input.tx_dest == QED_LL2_TX_DEST_NW) ?
+			      CORE_TX_DEST_NW : CORE_TX_DEST_LB;
+
+	/* Correct maximum number of Tx BDs */
+	p_tx_max = &p_ll2_info->input.tx_max_bds_per_packet;
+	if (*p_tx_max == 0)
+		*p_tx_max = CORE_LL2_TX_MAX_BDS_PER_PACKET;
+	else
+		*p_tx_max = min_t(u8, *p_tx_max,
+				  CORE_LL2_TX_MAX_BDS_PER_PACKET);
+	rc = qed_ll2_acquire_connection_rx(p_hwfn, p_ll2_info);
 	if (rc)
 		goto q_allocate_fail;
 
-	rc = qed_ll2_acquire_connection_tx(p_hwfn, p_ll2_info, tx_num_desc);
+	rc = qed_ll2_acquire_connection_tx(p_hwfn, p_ll2_info);
 	if (rc)
 		goto q_allocate_fail;
 
 	rc = qed_ll2_acquire_connection_ooo(p_hwfn, p_ll2_info,
-					    rx_num_desc * 2, p_params->mtu);
+					    data->input.mtu);
 	if (rc)
 		goto q_allocate_fail;
 
 	/* Register callbacks for the Rx/Tx queues */
-	if (p_params->conn_type == QED_LL2_TYPE_ISCSI_OOO) {
+	if (data->input.conn_type == QED_LL2_TYPE_ISCSI_OOO) {
 		comp_rx_cb = qed_ll2_lb_rxq_completion;
 		comp_tx_cb = qed_ll2_lb_txq_completion;
 	} else {
@@ -1380,7 +1350,7 @@ int qed_ll2_acquire_connection(struct qed_hwfn *p_hwfn,
 		comp_tx_cb = qed_ll2_txq_completion;
 	}
 
-	if (rx_num_desc) {
+	if (data->input.rx_num_desc) {
 		qed_int_register_cb(p_hwfn, comp_rx_cb,
 				    &p_hwfn->p_ll2_info[i],
 				    &p_ll2_info->rx_queue.rx_sb_index,
@@ -1388,7 +1358,7 @@ int qed_ll2_acquire_connection(struct qed_hwfn *p_hwfn,
 		p_ll2_info->rx_queue.b_cb_registred = true;
 	}
 
-	if (tx_num_desc) {
+	if (data->input.tx_num_desc) {
 		qed_int_register_cb(p_hwfn,
 				    comp_tx_cb,
 				    &p_hwfn->p_ll2_info[i],
@@ -1397,7 +1367,7 @@ int qed_ll2_acquire_connection(struct qed_hwfn *p_hwfn,
 		p_ll2_info->tx_queue.b_cb_registred = true;
 	}
 
-	*p_connection_handle = i;
+	*data->p_connection_handle = i;
 	return rc;
 
 q_allocate_fail:
@@ -1408,18 +1378,21 @@ q_allocate_fail:
 static int qed_ll2_establish_connection_rx(struct qed_hwfn *p_hwfn,
 					   struct qed_ll2_info *p_ll2_conn)
 {
+	enum qed_ll2_error_handle error_input;
+	enum core_error_handle error_mode;
 	u8 action_on_error = 0;
 
 	if (!QED_LL2_RX_REGISTERED(p_ll2_conn))
 		return 0;
 
 	DIRECT_REG_WR(p_ll2_conn->rx_queue.set_prod_addr, 0x0);
-
+	error_input = p_ll2_conn->input.ai_err_packet_too_big;
+	error_mode = qed_ll2_get_error_choice(error_input);
 	SET_FIELD(action_on_error,
-		  CORE_RX_ACTION_ON_ERROR_PACKET_TOO_BIG,
-		  p_ll2_conn->conn.ai_err_packet_too_big);
-	SET_FIELD(action_on_error,
-		  CORE_RX_ACTION_ON_ERROR_NO_BUFF, p_ll2_conn->conn.ai_err_no_buf);
+		  CORE_RX_ACTION_ON_ERROR_PACKET_TOO_BIG, error_mode);
+	error_input = p_ll2_conn->input.ai_err_no_buf;
+	error_mode = qed_ll2_get_error_choice(error_input);
+	SET_FIELD(action_on_error, CORE_RX_ACTION_ON_ERROR_NO_BUFF, error_mode);
 
 	return qed_sp_ll2_rx_queue_start(p_hwfn, p_ll2_conn, action_on_error);
 }
@@ -1503,7 +1476,7 @@ int qed_ll2_establish_connection(struct qed_hwfn *p_hwfn, u8 connection_handle)
 
 	qed_ll2_establish_connection_ooo(p_hwfn, p_ll2_conn);
 
-	if (p_ll2_conn->conn.conn_type == QED_LL2_TYPE_FCOE) {
+	if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_FCOE) {
 		qed_llh_add_protocol_filter(p_hwfn, p_ptt,
 					    0x8906, 0,
 					    QED_LLH_FILTER_ETHERTYPE);
@@ -1667,7 +1640,7 @@ qed_ll2_prepare_tx_packet_set_bd(struct qed_hwfn *p_hwfn,
 		   "LL2 [q 0x%02x cid 0x%08x type 0x%08x] Tx Producer at [0x%04x] - set with a %04x bytes %02x BDs buffer at %08x:%08x\n",
 		   p_ll2->queue_id,
 		   p_ll2->cid,
-		   p_ll2->conn.conn_type,
+		   p_ll2->input.conn_type,
 		   prod_idx,
 		   pkt->first_frag_len,
 		   pkt->num_of_bds,
@@ -1742,7 +1715,8 @@ static void qed_ll2_tx_packet_notify(struct qed_hwfn *p_hwfn,
 		   (NETIF_MSG_TX_QUEUED | QED_MSG_LL2),
 		   "LL2 [q 0x%02x cid 0x%08x type 0x%08x] Doorbelled [producer 0x%04x]\n",
 		   p_ll2_conn->queue_id,
-		   p_ll2_conn->cid, p_ll2_conn->conn.conn_type, db_msg.spq_prod);
+		   p_ll2_conn->cid,
+		   p_ll2_conn->input.conn_type, db_msg.spq_prod);
 }
 
 int qed_ll2_prepare_tx_packet(struct qed_hwfn *p_hwfn,
@@ -1866,10 +1840,10 @@ int qed_ll2_terminate_connection(struct qed_hwfn *p_hwfn, u8 connection_handle)
 		qed_ll2_rxq_flush(p_hwfn, connection_handle);
 	}
 
-	if (p_ll2_conn->conn.conn_type == QED_LL2_TYPE_ISCSI_OOO)
+	if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_ISCSI_OOO)
 		qed_ooo_release_all_isles(p_hwfn, p_hwfn->p_ooo_info);
 
-	if (p_ll2_conn->conn.conn_type == QED_LL2_TYPE_FCOE) {
+	if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_FCOE) {
 		qed_llh_remove_protocol_filter(p_hwfn, p_ptt,
 					       0x8906, 0,
 					       QED_LLH_FILTER_ETHERTYPE);
@@ -2054,11 +2028,68 @@ static void qed_ll2_register_cb_ops(struct qed_dev *cdev,
 	cdev->ll2->cb_cookie = cookie;
 }
 
+static void qed_ll2_set_conn_data(struct qed_dev *cdev,
+				  struct qed_ll2_acquire_data *data,
+				  struct qed_ll2_params *params,
+				  enum qed_ll2_conn_type conn_type,
+				  u8 *handle, bool lb, u8 gsi_enable)
+{
+	memset(data, 0, sizeof(*data));
+
+	data->input.conn_type = conn_type;
+	data->input.mtu = params->mtu;
+	data->input.rx_num_desc = QED_LL2_RX_SIZE;
+	data->input.rx_drop_ttl0_flg = params->drop_ttl0_packets;
+	data->input.rx_vlan_removal_en = params->rx_vlan_stripping;
+	data->input.tx_num_desc = QED_LL2_TX_SIZE;
+	data->input.gsi_enable = gsi_enable;
+	data->p_connection_handle = handle;
+	if (lb) {
+		data->input.tx_tc = OOO_LB_TC;
+		data->input.tx_dest = QED_LL2_TX_DEST_LB;
+	} else {
+		data->input.tx_tc = 0;
+		data->input.tx_dest = QED_LL2_TX_DEST_NW;
+	}
+}
+
+static int qed_ll2_start_ooo(struct qed_dev *cdev,
+			     struct qed_ll2_params *params)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	u8 *handle = &hwfn->pf_params.iscsi_pf_params.ll2_ooo_queue_id;
+	struct qed_ll2_acquire_data data;
+	int rc;
+
+	qed_ll2_set_conn_data(cdev, &data, params,
+			      QED_LL2_TYPE_ISCSI_OOO, handle, true, 0);
+
+	rc = qed_ll2_acquire_connection(hwfn, &data);
+	if (rc) {
+		DP_INFO(cdev, "Failed to acquire LL2 OOO connection\n");
+		goto out;
+	}
+
+	rc = qed_ll2_establish_connection(hwfn, *handle);
+	if (rc) {
+		DP_INFO(cdev, "Failed to establist LL2 OOO connection\n");
+		goto fail;
+	}
+
+	return 0;
+
+fail:
+	qed_ll2_release_connection(hwfn, *handle);
+out:
+	*handle = QED_LL2_UNUSED_HANDLE;
+	return rc;
+}
+
 static int qed_ll2_start(struct qed_dev *cdev, struct qed_ll2_params *params)
 {
-	struct qed_ll2_conn ll2_info;
 	struct qed_ll2_buffer *buffer, *tmp_buffer;
 	enum qed_ll2_conn_type conn_type;
+	struct qed_ll2_acquire_data data;
 	struct qed_ptt *p_ptt;
 	int rc, i;
 	u8 gsi_enable = 1;
@@ -2106,20 +2137,10 @@ static int qed_ll2_start(struct qed_dev *cdev, struct qed_ll2_params *params)
 		conn_type = QED_LL2_TYPE_TEST;
 	}
 
-	/* Prepare the temporary ll2 information */
-	memset(&ll2_info, 0, sizeof(ll2_info));
-
-	ll2_info.conn_type = conn_type;
-	ll2_info.mtu = params->mtu;
-	ll2_info.rx_drop_ttl0_flg = params->drop_ttl0_packets;
-	ll2_info.rx_vlan_removal_en = params->rx_vlan_stripping;
-	ll2_info.tx_tc = 0;
-	ll2_info.tx_dest = CORE_TX_DEST_NW;
-	ll2_info.gsi_enable = gsi_enable;
+	qed_ll2_set_conn_data(cdev, &data, params, conn_type,
+			      &cdev->ll2->handle, false, gsi_enable);
 
-	rc = qed_ll2_acquire_connection(QED_LEADING_HWFN(cdev), &ll2_info,
-					QED_LL2_RX_SIZE, QED_LL2_TX_SIZE,
-					&cdev->ll2->handle);
+	rc = qed_ll2_acquire_connection(QED_LEADING_HWFN(cdev), &data);
 	if (rc) {
 		DP_INFO(cdev, "Failed to acquire LL2 connection\n");
 		goto fail;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.h b/drivers/net/ethernet/qlogic/qed/qed_ll2.h
index 96af50b733e8..3caaad53c958 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.h
@@ -47,17 +47,6 @@
 
 #define QED_MAX_NUM_OF_LL2_CONNECTIONS                    (4)
 
-enum qed_ll2_conn_type {
-	QED_LL2_TYPE_FCOE,
-	QED_LL2_TYPE_ISCSI,
-	QED_LL2_TYPE_TEST,
-	QED_LL2_TYPE_ISCSI_OOO,
-	QED_LL2_TYPE_RESERVED2,
-	QED_LL2_TYPE_ROCE,
-	QED_LL2_TYPE_RESERVED3,
-	MAX_QED_LL2_RX_CONN_TYPE
-};
-
 struct qed_ll2_rx_packet {
 	struct list_head list_entry;
 	struct core_rx_bd_with_buff_len *rxq_bd;
@@ -123,27 +112,17 @@ struct qed_ll2_tx_queue {
 	bool b_completing_packet;
 };
 
-struct qed_ll2_conn {
-	enum qed_ll2_conn_type conn_type;
-	u16 mtu;
-	u8 rx_drop_ttl0_flg;
-	u8 rx_vlan_removal_en;
-	u8 tx_tc;
-	enum core_tx_dest tx_dest;
-	enum core_error_handle ai_err_packet_too_big;
-	enum core_error_handle ai_err_no_buf;
-	u8 gsi_enable;
-};
-
 struct qed_ll2_info {
 	/* Lock protecting the state of LL2 */
 	struct mutex mutex;
-	struct qed_ll2_conn conn;
+
+	struct qed_ll2_acquire_data_inputs input;
 	u32 cid;
 	u8 my_id;
 	u8 queue_id;
 	u8 tx_stats_id;
 	bool b_active;
+	enum core_tx_dest tx_dest;
 	u8 tx_stats_en;
 	struct qed_ll2_rx_queue rx_queue;
 	struct qed_ll2_tx_queue tx_queue;
@@ -154,20 +133,13 @@ struct qed_ll2_info {
  *        starts rx & tx (if relevant) queues pair. Provides
  *        connecion handler as output parameter.
  *
- * @param p_hwfn
- * @param p_params		Contain various configuration properties
- * @param rx_num_desc
- * @param tx_num_desc
- *
- * @param p_connection_handle  Output container for LL2 connection's handle
  *
- * @return 0 on success, failure otherwise
+ * @param p_hwfn
+ * @param data - describes connection parameters
+ * @return int
  */
 int qed_ll2_acquire_connection(struct qed_hwfn *p_hwfn,
-			       struct qed_ll2_conn *p_params,
-			       u16 rx_num_desc,
-			       u16 tx_num_desc,
-			       u8 *p_connection_handle);
+			       struct qed_ll2_acquire_data *data);
 
 /**
  * @brief qed_ll2_establish_connection - start previously
diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c
index afc63c0e7c44..33abcf110260 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_roce.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c
@@ -2818,7 +2818,7 @@ static int qed_roce_ll2_start(struct qed_dev *cdev,
 {
 	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
 	struct qed_roce_ll2_info *roce_ll2;
-	struct qed_ll2_conn ll2_params;
+	struct qed_ll2_acquire_data data;
 	int rc;
 
 	if (!params) {
@@ -2844,25 +2844,26 @@ static int qed_roce_ll2_start(struct qed_dev *cdev,
 		DP_ERR(cdev, "qed roce ll2 start: failed memory allocation\n");
 		return -ENOMEM;
 	}
+
 	roce_ll2->handle = QED_LL2_UNUSED_HANDLE;
 	roce_ll2->cbs = params->cbs;
 	roce_ll2->cb_cookie = params->cb_cookie;
 	mutex_init(&roce_ll2->lock);
 
-	memset(&ll2_params, 0, sizeof(ll2_params));
-	ll2_params.conn_type = QED_LL2_TYPE_ROCE;
-	ll2_params.mtu = params->mtu;
-	ll2_params.rx_drop_ttl0_flg = true;
-	ll2_params.rx_vlan_removal_en = false;
-	ll2_params.tx_dest = CORE_TX_DEST_NW;
-	ll2_params.ai_err_packet_too_big = LL2_DROP_PACKET;
-	ll2_params.ai_err_no_buf = LL2_DROP_PACKET;
-	ll2_params.gsi_enable = true;
-
-	rc = qed_ll2_acquire_connection(QED_LEADING_HWFN(cdev), &ll2_params,
-					params->max_rx_buffers,
-					params->max_tx_buffers,
-					&roce_ll2->handle);
+	memset(&data, 0, sizeof(data));
+	data.input.conn_type = QED_LL2_TYPE_ROCE;
+	data.input.mtu = params->mtu;
+	data.input.rx_num_desc = params->max_rx_buffers;
+	data.input.tx_num_desc = params->max_tx_buffers;
+	data.input.rx_drop_ttl0_flg = true;
+	data.input.rx_vlan_removal_en = false;
+	data.input.tx_dest = QED_LL2_TX_DEST_NW;
+	data.input.ai_err_packet_too_big = LL2_DROP_PACKET;
+	data.input.ai_err_no_buf = LL2_DROP_PACKET;
+	data.p_connection_handle = &roce_ll2->handle;
+	data.input.gsi_enable = true;
+
+	rc = qed_ll2_acquire_connection(QED_LEADING_HWFN(cdev), &data);
 	if (rc) {
 		DP_ERR(cdev,
 		       "qed roce ll2 start: failed to acquire LL2 connection (rc=%d)\n",
diff --git a/include/linux/qed/qed_ll2_if.h b/include/linux/qed/qed_ll2_if.h
index 056ac007dd12..a1f63eca430a 100644
--- a/include/linux/qed/qed_ll2_if.h
+++ b/include/linux/qed/qed_ll2_if.h
@@ -43,6 +43,17 @@
 #include <linux/slab.h>
 #include <linux/qed/qed_if.h>
 
+enum qed_ll2_conn_type {
+	QED_LL2_TYPE_FCOE,
+	QED_LL2_TYPE_ISCSI,
+	QED_LL2_TYPE_TEST,
+	QED_LL2_TYPE_ISCSI_OOO,
+	QED_LL2_TYPE_RESERVED2,
+	QED_LL2_TYPE_ROCE,
+	QED_LL2_TYPE_RESERVED3,
+	MAX_QED_LL2_RX_CONN_TYPE
+};
+
 enum qed_ll2_roce_flavor_type {
 	QED_LL2_ROCE,
 	QED_LL2_RROCE,
@@ -55,6 +66,12 @@ enum qed_ll2_tx_dest {
 	QED_LL2_TX_DEST_MAX
 };
 
+enum qed_ll2_error_handle {
+	QED_LL2_DROP_PACKET,
+	QED_LL2_DO_NOTHING,
+	QED_LL2_ASSERT,
+};
+
 struct qed_ll2_stats {
 	u64 gsi_invalid_hdr;
 	u64 gsi_invalid_pkt_length;
@@ -105,6 +122,28 @@ struct qed_ll2_comp_rx_data {
 	} u;
 };
 
+struct qed_ll2_acquire_data_inputs {
+	enum qed_ll2_conn_type conn_type;
+	u16 mtu;
+	u16 rx_num_desc;
+	u16 rx_num_ooo_buffers;
+	u8 rx_drop_ttl0_flg;
+	u8 rx_vlan_removal_en;
+	u16 tx_num_desc;
+	u8 tx_max_bds_per_packet;
+	u8 tx_tc;
+	enum qed_ll2_tx_dest tx_dest;
+	enum qed_ll2_error_handle ai_err_packet_too_big;
+	enum qed_ll2_error_handle ai_err_no_buf;
+	u8 gsi_enable;
+};
+
+struct qed_ll2_acquire_data {
+	struct qed_ll2_acquire_data_inputs input;
+	/* Output container for LL2 connection's handle */
+	u8 *p_connection_handle;
+};
+
 struct qed_ll2_tx_pkt_info {
 	void *cookie;
 	dma_addr_t first_frag;
-- 
cgit v1.2.3


From 0518c12f1f79dc2f2020836974c577404e42ae89 Mon Sep 17 00:00:00 2001
From: Michal Kalderon <Michal.Kalderon@cavium.com>
Date: Fri, 9 Jun 2017 17:13:22 +0300
Subject: qed*: LL2 callback operations

LL2 today is interrupt driven - when tx/rx completion arrives [or any
other indication], qed needs to operate on the connection and pass
the information to the protocol-driver [or internal qed consumer].
Since we have several flavors of ll2 employeed by the driver,
each handler needs to do an if-else to determine the right functionality
to use based on the connection type.

In order to make things more scalable [given that we're going to add
additional types of ll2 flavors] move the infrastrucutre into using
a callback-based approach - the callbacks would be provided as part
of the connection's initialization parameters.

Signed-off-by: Michal Kalderon <Michal.Kalderon@cavium.com>
Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/hw/qedr/main.c          |   6 +-
 drivers/infiniband/hw/qedr/qedr.h          |   2 +
 drivers/infiniband/hw/qedr/qedr_cm.c       | 238 ++++++++++++++++------
 drivers/net/ethernet/qlogic/qed/qed.h      |   1 -
 drivers/net/ethernet/qlogic/qed/qed_ll2.c  | 197 +++++++++----------
 drivers/net/ethernet/qlogic/qed/qed_ll2.h  |  34 ++--
 drivers/net/ethernet/qlogic/qed/qed_roce.c | 304 ++---------------------------
 drivers/net/ethernet/qlogic/qed/qed_roce.h |  43 ----
 include/linux/qed/qed_ll2_if.h             |  36 ++++
 include/linux/qed/qed_roce_if.h            |  80 +++-----
 10 files changed, 384 insertions(+), 557 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c
index 6a72095d6c7a..485c1fef238b 100644
--- a/drivers/infiniband/hw/qedr/main.c
+++ b/drivers/infiniband/hw/qedr/main.c
@@ -886,9 +886,9 @@ static void qedr_mac_address_change(struct qedr_dev *dev)
 	memcpy(&sgid->raw[8], guid, sizeof(guid));
 
 	/* Update LL2 */
-	rc = dev->ops->roce_ll2_set_mac_filter(dev->cdev,
-					       dev->gsi_ll2_mac_address,
-					       dev->ndev->dev_addr);
+	rc = dev->ops->ll2_set_mac_filter(dev->cdev,
+					  dev->gsi_ll2_mac_address,
+					  dev->ndev->dev_addr);
 
 	ether_addr_copy(dev->gsi_ll2_mac_address, dev->ndev->dev_addr);
 
diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h
index aa08c76a4245..80333ec2c8b6 100644
--- a/drivers/infiniband/hw/qedr/qedr.h
+++ b/drivers/infiniband/hw/qedr/qedr.h
@@ -150,6 +150,8 @@ struct qedr_dev {
 	u32			dp_module;
 	u8			dp_level;
 	u8			num_hwfns;
+	u8			gsi_ll2_handle;
+
 	uint			wq_multiplier;
 	u8			gsi_ll2_mac_address[ETH_ALEN];
 	int			gsi_qp_created;
diff --git a/drivers/infiniband/hw/qedr/qedr_cm.c b/drivers/infiniband/hw/qedr/qedr_cm.c
index d86dbe814d98..eb3dce72fc21 100644
--- a/drivers/infiniband/hw/qedr/qedr_cm.c
+++ b/drivers/infiniband/hw/qedr/qedr_cm.c
@@ -64,9 +64,14 @@ void qedr_store_gsi_qp_cq(struct qedr_dev *dev, struct qedr_qp *qp,
 	dev->gsi_qp = qp;
 }
 
-void qedr_ll2_tx_cb(void *_qdev, struct qed_roce_ll2_packet *pkt)
+void qedr_ll2_complete_tx_packet(void *cxt,
+				 u8 connection_handle,
+				 void *cookie,
+				 dma_addr_t first_frag_addr,
+				 bool b_last_fragment, bool b_last_packet)
 {
-	struct qedr_dev *dev = (struct qedr_dev *)_qdev;
+	struct qedr_dev *dev = (struct qedr_dev *)cxt;
+	struct qed_roce_ll2_packet *pkt = cookie;
 	struct qedr_cq *cq = dev->gsi_sqcq;
 	struct qedr_qp *qp = dev->gsi_qp;
 	unsigned long flags;
@@ -88,20 +93,26 @@ void qedr_ll2_tx_cb(void *_qdev, struct qed_roce_ll2_packet *pkt)
 		(*cq->ibcq.comp_handler) (&cq->ibcq, cq->ibcq.cq_context);
 }
 
-void qedr_ll2_rx_cb(void *_dev, struct qed_roce_ll2_packet *pkt,
-		    struct qed_roce_ll2_rx_params *params)
+void qedr_ll2_complete_rx_packet(void *cxt,
+				 struct qed_ll2_comp_rx_data *data)
 {
-	struct qedr_dev *dev = (struct qedr_dev *)_dev;
+	struct qedr_dev *dev = (struct qedr_dev *)cxt;
 	struct qedr_cq *cq = dev->gsi_rqcq;
 	struct qedr_qp *qp = dev->gsi_qp;
 	unsigned long flags;
 
 	spin_lock_irqsave(&qp->q_lock, flags);
 
-	qp->rqe_wr_id[qp->rq.gsi_cons].rc = params->rc;
-	qp->rqe_wr_id[qp->rq.gsi_cons].vlan_id = params->vlan_id;
-	qp->rqe_wr_id[qp->rq.gsi_cons].sg_list[0].length = pkt->payload[0].len;
-	ether_addr_copy(qp->rqe_wr_id[qp->rq.gsi_cons].smac, params->smac);
+	qp->rqe_wr_id[qp->rq.gsi_cons].rc = data->u.data_length_error ?
+		-EINVAL : 0;
+	qp->rqe_wr_id[qp->rq.gsi_cons].vlan_id = data->vlan;
+	/* note: length stands for data length i.e. GRH is excluded */
+	qp->rqe_wr_id[qp->rq.gsi_cons].sg_list[0].length =
+		data->length.data_length;
+	*((u32 *)&qp->rqe_wr_id[qp->rq.gsi_cons].smac[0]) =
+		ntohl(data->opaque_data_0);
+	*((u16 *)&qp->rqe_wr_id[qp->rq.gsi_cons].smac[4]) =
+		ntohs((u16)data->opaque_data_1);
 
 	qedr_inc_sw_gsi_cons(&qp->rq);
 
@@ -111,6 +122,14 @@ void qedr_ll2_rx_cb(void *_dev, struct qed_roce_ll2_packet *pkt,
 		(*cq->ibcq.comp_handler) (&cq->ibcq, cq->ibcq.cq_context);
 }
 
+void qedr_ll2_release_rx_packet(void *cxt,
+				u8 connection_handle,
+				void *cookie,
+				dma_addr_t rx_buf_addr, bool b_last_packet)
+{
+	/* Do nothing... */
+}
+
 static void qedr_destroy_gsi_cq(struct qedr_dev *dev,
 				struct ib_qp_init_attr *attrs)
 {
@@ -159,27 +178,159 @@ static inline int qedr_check_gsi_qp_attrs(struct qedr_dev *dev,
 	return 0;
 }
 
+static int qedr_ll2_post_tx(struct qedr_dev *dev,
+			    struct qed_roce_ll2_packet *pkt)
+{
+	enum qed_ll2_roce_flavor_type roce_flavor;
+	struct qed_ll2_tx_pkt_info ll2_tx_pkt;
+	int rc;
+	int i;
+
+	memset(&ll2_tx_pkt, 0, sizeof(ll2_tx_pkt));
+
+	roce_flavor = (pkt->roce_mode == ROCE_V1) ?
+	    QED_LL2_ROCE : QED_LL2_RROCE;
+
+	if (pkt->roce_mode == ROCE_V2_IPV4)
+		ll2_tx_pkt.enable_ip_cksum = 1;
+
+	ll2_tx_pkt.num_of_bds = 1 /* hdr */  + pkt->n_seg;
+	ll2_tx_pkt.vlan = 0;
+	ll2_tx_pkt.tx_dest = pkt->tx_dest;
+	ll2_tx_pkt.qed_roce_flavor = roce_flavor;
+	ll2_tx_pkt.first_frag = pkt->header.baddr;
+	ll2_tx_pkt.first_frag_len = pkt->header.len;
+	ll2_tx_pkt.cookie = pkt;
+
+	/* tx header */
+	rc = dev->ops->ll2_prepare_tx_packet(dev->rdma_ctx,
+					     dev->gsi_ll2_handle,
+					     &ll2_tx_pkt, 1);
+	if (rc) {
+		/* TX failed while posting header - release resources */
+		dma_free_coherent(&dev->pdev->dev, pkt->header.len,
+				  pkt->header.vaddr, pkt->header.baddr);
+		kfree(pkt);
+
+		DP_ERR(dev, "roce ll2 tx: header failed (rc=%d)\n", rc);
+		return rc;
+	}
+
+	/* tx payload */
+	for (i = 0; i < pkt->n_seg; i++) {
+		rc = dev->ops->ll2_set_fragment_of_tx_packet(
+			dev->rdma_ctx,
+			dev->gsi_ll2_handle,
+			pkt->payload[i].baddr,
+			pkt->payload[i].len);
+
+		if (rc) {
+			/* if failed not much to do here, partial packet has
+			 * been posted we can't free memory, will need to wait
+			 * for completion
+			 */
+			DP_ERR(dev, "ll2 tx: payload failed (rc=%d)\n", rc);
+			return rc;
+		}
+	}
+
+	return 0;
+}
+
+int qedr_ll2_stop(struct qedr_dev *dev)
+{
+	int rc;
+
+	if (dev->gsi_ll2_handle == QED_LL2_UNUSED_HANDLE)
+		return 0;
+
+	/* remove LL2 MAC address filter */
+	rc = dev->ops->ll2_set_mac_filter(dev->cdev,
+					  dev->gsi_ll2_mac_address, NULL);
+
+	rc = dev->ops->ll2_terminate_connection(dev->rdma_ctx,
+						dev->gsi_ll2_handle);
+	if (rc)
+		DP_ERR(dev, "Failed to terminate LL2 connection (rc=%d)\n", rc);
+
+	dev->ops->ll2_release_connection(dev->rdma_ctx, dev->gsi_ll2_handle);
+
+	dev->gsi_ll2_handle = QED_LL2_UNUSED_HANDLE;
+
+	return rc;
+}
+
+int qedr_ll2_start(struct qedr_dev *dev,
+		   struct ib_qp_init_attr *attrs, struct qedr_qp *qp)
+{
+	struct qed_ll2_acquire_data data;
+	struct qed_ll2_cbs cbs;
+	int rc;
+
+	/* configure and start LL2 */
+	cbs.rx_comp_cb = qedr_ll2_complete_rx_packet;
+	cbs.tx_comp_cb = qedr_ll2_complete_tx_packet;
+	cbs.rx_release_cb = qedr_ll2_release_rx_packet;
+	cbs.tx_release_cb = qedr_ll2_complete_tx_packet;
+	cbs.cookie = dev;
+
+	memset(&data, 0, sizeof(data));
+	data.input.conn_type = QED_LL2_TYPE_ROCE;
+	data.input.mtu = dev->ndev->mtu;
+	data.input.rx_num_desc = attrs->cap.max_recv_wr;
+	data.input.rx_drop_ttl0_flg = true;
+	data.input.rx_vlan_removal_en = false;
+	data.input.tx_num_desc = attrs->cap.max_send_wr;
+	data.input.tx_tc = 0;
+	data.input.tx_dest = QED_LL2_TX_DEST_NW;
+	data.input.ai_err_packet_too_big = QED_LL2_DROP_PACKET;
+	data.input.ai_err_no_buf = QED_LL2_DROP_PACKET;
+	data.input.gsi_enable = 1;
+	data.p_connection_handle = &dev->gsi_ll2_handle;
+	data.cbs = &cbs;
+
+	rc = dev->ops->ll2_acquire_connection(dev->rdma_ctx, &data);
+	if (rc) {
+		DP_ERR(dev,
+		       "ll2 start: failed to acquire LL2 connection (rc=%d)\n",
+		       rc);
+		return rc;
+	}
+
+	rc = dev->ops->ll2_establish_connection(dev->rdma_ctx,
+						dev->gsi_ll2_handle);
+	if (rc) {
+		DP_ERR(dev,
+		       "ll2 start: failed to establish LL2 connection (rc=%d)\n",
+		       rc);
+		goto err1;
+	}
+
+	rc = dev->ops->ll2_set_mac_filter(dev->cdev, NULL, dev->ndev->dev_addr);
+	if (rc)
+		goto err2;
+
+	return 0;
+
+err2:
+	dev->ops->ll2_terminate_connection(dev->rdma_ctx, dev->gsi_ll2_handle);
+err1:
+	dev->ops->ll2_release_connection(dev->rdma_ctx, dev->gsi_ll2_handle);
+
+	return rc;
+}
+
 struct ib_qp *qedr_create_gsi_qp(struct qedr_dev *dev,
 				 struct ib_qp_init_attr *attrs,
 				 struct qedr_qp *qp)
 {
-	struct qed_roce_ll2_params ll2_params;
 	int rc;
 
 	rc = qedr_check_gsi_qp_attrs(dev, attrs);
 	if (rc)
 		return ERR_PTR(rc);
 
-	/* configure and start LL2 */
-	memset(&ll2_params, 0, sizeof(ll2_params));
-	ll2_params.max_tx_buffers = attrs->cap.max_send_wr;
-	ll2_params.max_rx_buffers = attrs->cap.max_recv_wr;
-	ll2_params.cbs.tx_cb = qedr_ll2_tx_cb;
-	ll2_params.cbs.rx_cb = qedr_ll2_rx_cb;
-	ll2_params.cb_cookie = (void *)dev;
-	ll2_params.mtu = dev->ndev->mtu;
-	ether_addr_copy(ll2_params.mac_address, dev->ndev->dev_addr);
-	rc = dev->ops->roce_ll2_start(dev->cdev, &ll2_params);
+	rc = qedr_ll2_start(dev, attrs, qp);
 	if (rc) {
 		DP_ERR(dev, "create gsi qp: failed on ll2 start. rc=%d\n", rc);
 		return ERR_PTR(rc);
@@ -214,7 +365,7 @@ struct ib_qp *qedr_create_gsi_qp(struct qedr_dev *dev,
 err:
 	kfree(qp->rqe_wr_id);
 
-	rc = dev->ops->roce_ll2_stop(dev->cdev);
+	rc = qedr_ll2_stop(dev);
 	if (rc)
 		DP_ERR(dev, "create gsi qp: failed destroy on create\n");
 
@@ -223,15 +374,7 @@ err:
 
 int qedr_destroy_gsi_qp(struct qedr_dev *dev)
 {
-	int rc;
-
-	rc = dev->ops->roce_ll2_stop(dev->cdev);
-	if (rc)
-		DP_ERR(dev, "destroy gsi qp: failed (rc=%d)\n", rc);
-	else
-		DP_DEBUG(dev, QEDR_MSG_GSI, "destroy gsi qp: success\n");
-
-	return rc;
+	return qedr_ll2_stop(dev);
 }
 
 #define QEDR_MAX_UD_HEADER_SIZE	(100)
@@ -421,7 +564,6 @@ int qedr_gsi_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 {
 	struct qed_roce_ll2_packet *pkt = NULL;
 	struct qedr_qp *qp = get_qedr_qp(ibqp);
-	struct qed_roce_ll2_tx_params params;
 	struct qedr_dev *dev = qp->dev;
 	unsigned long flags;
 	int rc;
@@ -449,8 +591,6 @@ int qedr_gsi_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 		goto err;
 	}
 
-	memset(&params, 0, sizeof(params));
-
 	spin_lock_irqsave(&qp->q_lock, flags);
 
 	rc = qedr_gsi_build_packet(dev, qp, wr, &pkt);
@@ -459,7 +599,8 @@ int qedr_gsi_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 		goto err;
 	}
 
-	rc = dev->ops->roce_ll2_tx(dev->cdev, pkt, &params);
+	rc = qedr_ll2_post_tx(dev, pkt);
+
 	if (!rc) {
 		qp->wqe_wr_id[qp->sq.prod].wr_id = wr->wr_id;
 		qedr_inc_sw_prod(&qp->sq);
@@ -467,17 +608,6 @@ int qedr_gsi_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 			 "gsi post send: opcode=%d, in_irq=%ld, irqs_disabled=%d, wr_id=%llx\n",
 			 wr->opcode, in_irq(), irqs_disabled(), wr->wr_id);
 	} else {
-		if (rc == QED_ROCE_TX_HEAD_FAILURE) {
-			/* TX failed while posting header - release resources */
-			dma_free_coherent(&dev->pdev->dev, pkt->header.len,
-					  pkt->header.vaddr, pkt->header.baddr);
-			kfree(pkt);
-		} else if (rc == QED_ROCE_TX_FRAG_FAILURE) {
-			/* NTD since TX failed while posting a fragment. We will
-			 * release the resources on TX callback
-			 */
-		}
-
 		DP_ERR(dev, "gsi post send: failed to transmit (rc=%d)\n", rc);
 		rc = -EAGAIN;
 		*bad_wr = wr;
@@ -504,10 +634,8 @@ int qedr_gsi_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
 {
 	struct qedr_dev *dev = get_qedr_dev(ibqp->device);
 	struct qedr_qp *qp = get_qedr_qp(ibqp);
-	struct qed_roce_ll2_buffer buf;
 	unsigned long flags;
-	int status = 0;
-	int rc;
+	int rc = 0;
 
 	if ((qp->state != QED_ROCE_QP_STATE_RTR) &&
 	    (qp->state != QED_ROCE_QP_STATE_RTS)) {
@@ -518,8 +646,6 @@ int qedr_gsi_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
 		return -EINVAL;
 	}
 
-	memset(&buf, 0, sizeof(buf));
-
 	spin_lock_irqsave(&qp->q_lock, flags);
 
 	while (wr) {
@@ -530,10 +656,12 @@ int qedr_gsi_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
 			goto err;
 		}
 
-		buf.baddr = wr->sg_list[0].addr;
-		buf.len = wr->sg_list[0].length;
-
-		rc = dev->ops->roce_ll2_post_rx_buffer(dev->cdev, &buf, 0, 1);
+		rc = dev->ops->ll2_post_rx_buffer(dev->rdma_ctx,
+						  dev->gsi_ll2_handle,
+						  wr->sg_list[0].addr,
+						  wr->sg_list[0].length,
+						  0 /* cookie */,
+						  1 /* notify_fw */);
 		if (rc) {
 			DP_ERR(dev,
 			       "gsi post recv: failed to post rx buffer (rc=%d)\n",
@@ -553,7 +681,7 @@ int qedr_gsi_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
 
 	spin_unlock_irqrestore(&qp->q_lock, flags);
 
-	return status;
+	return rc;
 err:
 	spin_unlock_irqrestore(&qp->q_lock, flags);
 	*bad_wr = wr;
diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index d7afc42f766c..14b08ee9e3ad 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -552,7 +552,6 @@ struct qed_hwfn {
 #endif
 
 	struct z_stream_s		*stream;
-	struct qed_roce_ll2_info	*ll2;
 };
 
 struct pci_params {
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
index f3aad61e6394..b222b2b471e8 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
@@ -89,13 +89,14 @@ struct qed_ll2_buffer {
 	dma_addr_t phys_addr;
 };
 
-static void qed_ll2b_complete_tx_packet(struct qed_hwfn *p_hwfn,
+static void qed_ll2b_complete_tx_packet(void *cxt,
 					u8 connection_handle,
 					void *cookie,
 					dma_addr_t first_frag_addr,
 					bool b_last_fragment,
 					bool b_last_packet)
 {
+	struct qed_hwfn *p_hwfn = cxt;
 	struct qed_dev *cdev = p_hwfn->cdev;
 	struct sk_buff *skb = cookie;
 
@@ -164,9 +165,9 @@ static void qed_ll2_kill_buffers(struct qed_dev *cdev)
 		qed_ll2_dealloc_buffer(cdev, buffer);
 }
 
-static void qed_ll2b_complete_rx_packet(struct qed_hwfn *p_hwfn,
-				 struct qed_ll2_comp_rx_data *data)
+void qed_ll2b_complete_rx_packet(void *cxt, struct qed_ll2_comp_rx_data *data)
 {
+	struct qed_hwfn *p_hwfn = cxt;
 	struct qed_ll2_buffer *buffer = data->cookie;
 	struct qed_dev *cdev = p_hwfn->cdev;
 	dma_addr_t new_phys_addr;
@@ -327,21 +328,12 @@ static void qed_ll2_txq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle)
 			b_last_frag =
 				p_tx->cur_completing_bd_idx == p_pkt->bd_used;
 			tx_frag = p_pkt->bds_set[0].tx_frag;
-			if (p_ll2_conn->input.gsi_enable)
-				qed_ll2b_release_tx_gsi_packet(p_hwfn,
-							       p_ll2_conn->
-							       my_id,
-							       p_pkt->cookie,
-							       tx_frag,
-							       b_last_frag,
-							       b_last_packet);
-			else
-				qed_ll2b_complete_tx_packet(p_hwfn,
-							    p_ll2_conn->my_id,
-							    p_pkt->cookie,
-							    tx_frag,
-							    b_last_frag,
-							    b_last_packet);
+			p_ll2_conn->cbs.tx_release_cb(p_ll2_conn->cbs.cookie,
+						      p_ll2_conn->my_id,
+						      p_pkt->cookie,
+						      tx_frag,
+						      b_last_frag,
+						      b_last_packet);
 		}
 	}
 }
@@ -354,7 +346,6 @@ static int qed_ll2_txq_completion(struct qed_hwfn *p_hwfn, void *p_cookie)
 	struct qed_ll2_tx_packet *p_pkt;
 	bool b_last_frag = false;
 	unsigned long flags;
-	dma_addr_t tx_frag;
 	int rc = -EINVAL;
 
 	spin_lock_irqsave(&p_tx->lock, flags);
@@ -395,19 +386,13 @@ static int qed_ll2_txq_completion(struct qed_hwfn *p_hwfn, void *p_cookie)
 		list_add_tail(&p_pkt->list_entry, &p_tx->free_descq);
 
 		spin_unlock_irqrestore(&p_tx->lock, flags);
-		tx_frag = p_pkt->bds_set[0].tx_frag;
-		if (p_ll2_conn->input.gsi_enable)
-			qed_ll2b_complete_tx_gsi_packet(p_hwfn,
-							p_ll2_conn->my_id,
-							p_pkt->cookie,
-							tx_frag,
-							b_last_frag, !num_bds);
-		else
-			qed_ll2b_complete_tx_packet(p_hwfn,
-						    p_ll2_conn->my_id,
-						    p_pkt->cookie,
-						    tx_frag,
-						    b_last_frag, !num_bds);
+
+		p_ll2_conn->cbs.tx_comp_cb(p_ll2_conn->cbs.cookie,
+					   p_ll2_conn->my_id,
+					   p_pkt->cookie,
+					   p_pkt->bds_set[0].tx_frag,
+					   b_last_frag, !num_bds);
+
 		spin_lock_irqsave(&p_tx->lock, flags);
 	}
 
@@ -418,52 +403,16 @@ out:
 	return rc;
 }
 
-static int
-qed_ll2_rxq_completion_gsi(struct qed_hwfn *p_hwfn,
-			   struct qed_ll2_info *p_ll2_info,
-			   union core_rx_cqe_union *p_cqe,
-			   unsigned long lock_flags, bool b_last_cqe)
+static void qed_ll2_rxq_parse_gsi(struct qed_hwfn *p_hwfn,
+				  union core_rx_cqe_union *p_cqe,
+				  struct qed_ll2_comp_rx_data *data)
 {
-	struct qed_ll2_rx_queue *p_rx = &p_ll2_info->rx_queue;
-	struct qed_ll2_rx_packet *p_pkt = NULL;
-	u16 packet_length, parse_flags, vlan;
-	u32 src_mac_addrhi;
-	u16 src_mac_addrlo;
-
-	if (!list_empty(&p_rx->active_descq))
-		p_pkt = list_first_entry(&p_rx->active_descq,
-					 struct qed_ll2_rx_packet, list_entry);
-	if (!p_pkt) {
-		DP_NOTICE(p_hwfn,
-			  "GSI Rx completion but active_descq is empty\n");
-		return -EIO;
-	}
-
-	list_del(&p_pkt->list_entry);
-	parse_flags = le16_to_cpu(p_cqe->rx_cqe_gsi.parse_flags.flags);
-	packet_length = le16_to_cpu(p_cqe->rx_cqe_gsi.data_length);
-	vlan = le16_to_cpu(p_cqe->rx_cqe_gsi.vlan);
-	src_mac_addrhi = le32_to_cpu(p_cqe->rx_cqe_gsi.src_mac_addrhi);
-	src_mac_addrlo = le16_to_cpu(p_cqe->rx_cqe_gsi.src_mac_addrlo);
-	if (qed_chain_consume(&p_rx->rxq_chain) != p_pkt->rxq_bd)
-		DP_NOTICE(p_hwfn,
-			  "Mismatch between active_descq and the LL2 Rx chain\n");
-	list_add_tail(&p_pkt->list_entry, &p_rx->free_descq);
-
-	spin_unlock_irqrestore(&p_rx->lock, lock_flags);
-	qed_ll2b_complete_rx_gsi_packet(p_hwfn,
-					p_ll2_info->my_id,
-					p_pkt->cookie,
-					p_pkt->rx_buf_addr,
-					packet_length,
-					p_cqe->rx_cqe_gsi.data_length_error,
-					parse_flags,
-					vlan,
-					src_mac_addrhi,
-					src_mac_addrlo, b_last_cqe);
-	spin_lock_irqsave(&p_rx->lock, lock_flags);
-
-	return 0;
+	data->parse_flags = le16_to_cpu(p_cqe->rx_cqe_gsi.parse_flags.flags);
+	data->length.data_length = le16_to_cpu(p_cqe->rx_cqe_gsi.data_length);
+	data->vlan = le16_to_cpu(p_cqe->rx_cqe_gsi.vlan);
+	data->opaque_data_0 = le32_to_cpu(p_cqe->rx_cqe_gsi.src_mac_addrhi);
+	data->opaque_data_1 = le16_to_cpu(p_cqe->rx_cqe_gsi.src_mac_addrlo);
+	data->u.data_length_error = p_cqe->rx_cqe_gsi.data_length_error;
 }
 
 static void qed_ll2_rxq_parse_reg(struct qed_hwfn *p_hwfn,
@@ -501,7 +450,10 @@ qed_ll2_rxq_handle_completion(struct qed_hwfn *p_hwfn,
 	}
 	list_del(&p_pkt->list_entry);
 
-	qed_ll2_rxq_parse_reg(p_hwfn, p_cqe, &data);
+	if (p_cqe->rx_cqe_sp.type == CORE_RX_CQE_TYPE_REGULAR)
+		qed_ll2_rxq_parse_reg(p_hwfn, p_cqe, &data);
+	else
+		qed_ll2_rxq_parse_gsi(p_hwfn, p_cqe, &data);
 	if (qed_chain_consume(&p_rx->rxq_chain) != p_pkt->rxq_bd)
 		DP_NOTICE(p_hwfn,
 			  "Mismatch between active_descq and the LL2 Rx chain\n");
@@ -514,7 +466,8 @@ qed_ll2_rxq_handle_completion(struct qed_hwfn *p_hwfn,
 	data.b_last_packet = b_last_cqe;
 
 	spin_unlock_irqrestore(&p_rx->lock, *p_lock_flags);
-	qed_ll2b_complete_rx_packet(p_hwfn, &data);
+	p_ll2_conn->cbs.rx_comp_cb(p_ll2_conn->cbs.cookie, &data);
+
 	spin_lock_irqsave(&p_rx->lock, *p_lock_flags);
 
 	return 0;
@@ -552,9 +505,6 @@ static int qed_ll2_rxq_completion(struct qed_hwfn *p_hwfn, void *cookie)
 			rc = -EINVAL;
 			break;
 		case CORE_RX_CQE_TYPE_GSI_OFFLOAD:
-			rc = qed_ll2_rxq_completion_gsi(p_hwfn, p_ll2_conn,
-							cqe, flags, b_last_cqe);
-			break;
 		case CORE_RX_CQE_TYPE_REGULAR:
 			rc = qed_ll2_rxq_handle_completion(p_hwfn, p_ll2_conn,
 							   cqe, &flags,
@@ -1244,6 +1194,23 @@ out:
 	return rc;
 }
 
+static int
+qed_ll2_set_cbs(struct qed_ll2_info *p_ll2_info, const struct qed_ll2_cbs *cbs)
+{
+	if (!cbs || (!cbs->rx_comp_cb ||
+		     !cbs->rx_release_cb ||
+		     !cbs->tx_comp_cb || !cbs->tx_release_cb || !cbs->cookie))
+		return -EINVAL;
+
+	p_ll2_info->cbs.rx_comp_cb = cbs->rx_comp_cb;
+	p_ll2_info->cbs.rx_release_cb = cbs->rx_release_cb;
+	p_ll2_info->cbs.tx_comp_cb = cbs->tx_comp_cb;
+	p_ll2_info->cbs.tx_release_cb = cbs->tx_release_cb;
+	p_ll2_info->cbs.cookie = cbs->cookie;
+
+	return 0;
+}
+
 static enum core_error_handle
 qed_ll2_get_error_choice(enum qed_ll2_error_handle err)
 {
@@ -1259,9 +1226,9 @@ qed_ll2_get_error_choice(enum qed_ll2_error_handle err)
 	}
 }
 
-int qed_ll2_acquire_connection(struct qed_hwfn *p_hwfn,
-			       struct qed_ll2_acquire_data *data)
+int qed_ll2_acquire_connection(void *cxt, struct qed_ll2_acquire_data *data)
 {
+	struct qed_hwfn *p_hwfn = cxt;
 	qed_int_comp_cb_t comp_rx_cb, comp_tx_cb;
 	struct qed_ll2_info *p_ll2_info = NULL;
 	u8 i, *p_tx_max;
@@ -1298,6 +1265,13 @@ int qed_ll2_acquire_connection(struct qed_hwfn *p_hwfn,
 	else
 		*p_tx_max = min_t(u8, *p_tx_max,
 				  CORE_LL2_TX_MAX_BDS_PER_PACKET);
+
+	rc = qed_ll2_set_cbs(p_ll2_info, data->cbs);
+	if (rc) {
+		DP_NOTICE(p_hwfn, "Invalid callback functions\n");
+		goto q_allocate_fail;
+	}
+
 	rc = qed_ll2_acquire_connection_rx(p_hwfn, p_ll2_info);
 	if (rc)
 		goto q_allocate_fail;
@@ -1377,8 +1351,10 @@ qed_ll2_establish_connection_ooo(struct qed_hwfn *p_hwfn,
 	qed_ooo_release_all_isles(p_hwfn, p_hwfn->p_ooo_info);
 	qed_ooo_submit_rx_buffers(p_hwfn, p_ll2_conn);
 }
-int qed_ll2_establish_connection(struct qed_hwfn *p_hwfn, u8 connection_handle)
+
+int qed_ll2_establish_connection(void *cxt, u8 connection_handle)
 {
+	struct qed_hwfn *p_hwfn = cxt;
 	struct qed_ll2_info *p_ll2_conn;
 	struct qed_ll2_rx_queue *p_rx;
 	struct qed_ll2_tx_queue *p_tx;
@@ -1505,11 +1481,12 @@ static void qed_ll2_post_rx_buffer_notify_fw(struct qed_hwfn *p_hwfn,
 	DIRECT_REG_WR(p_rx->set_prod_addr, *((u32 *)&rx_prod));
 }
 
-int qed_ll2_post_rx_buffer(struct qed_hwfn *p_hwfn,
+int qed_ll2_post_rx_buffer(void *cxt,
 			   u8 connection_handle,
 			   dma_addr_t addr,
 			   u16 buf_len, void *cookie, u8 notify_fw)
 {
+	struct qed_hwfn *p_hwfn = cxt;
 	struct core_rx_bd_with_buff_len *p_curb = NULL;
 	struct qed_ll2_rx_packet *p_curp = NULL;
 	struct qed_ll2_info *p_ll2_conn;
@@ -1699,11 +1676,12 @@ static void qed_ll2_tx_packet_notify(struct qed_hwfn *p_hwfn,
 		   p_ll2_conn->input.conn_type, db_msg.spq_prod);
 }
 
-int qed_ll2_prepare_tx_packet(struct qed_hwfn *p_hwfn,
+int qed_ll2_prepare_tx_packet(void *cxt,
 			      u8 connection_handle,
 			      struct qed_ll2_tx_pkt_info *pkt,
 			      bool notify_fw)
 {
+	struct qed_hwfn *p_hwfn = cxt;
 	struct qed_ll2_tx_packet *p_curp = NULL;
 	struct qed_ll2_info *p_ll2_conn = NULL;
 	struct qed_ll2_tx_queue *p_tx;
@@ -1750,11 +1728,12 @@ out:
 	return rc;
 }
 
-int qed_ll2_set_fragment_of_tx_packet(struct qed_hwfn *p_hwfn,
+int qed_ll2_set_fragment_of_tx_packet(void *cxt,
 				      u8 connection_handle,
 				      dma_addr_t addr, u16 nbytes)
 {
 	struct qed_ll2_tx_packet *p_cur_send_packet = NULL;
+	struct qed_hwfn *p_hwfn = cxt;
 	struct qed_ll2_info *p_ll2_conn = NULL;
 	u16 cur_send_frag_num = 0;
 	struct core_tx_bd *p_bd;
@@ -1789,8 +1768,9 @@ int qed_ll2_set_fragment_of_tx_packet(struct qed_hwfn *p_hwfn,
 	return 0;
 }
 
-int qed_ll2_terminate_connection(struct qed_hwfn *p_hwfn, u8 connection_handle)
+int qed_ll2_terminate_connection(void *cxt, u8 connection_handle)
 {
+	struct qed_hwfn *p_hwfn = cxt;
 	struct qed_ll2_info *p_ll2_conn = NULL;
 	int rc = -EINVAL;
 	struct qed_ptt *p_ptt;
@@ -1855,8 +1835,10 @@ static void qed_ll2_release_connection_ooo(struct qed_hwfn *p_hwfn,
 		kfree(p_buffer);
 	}
 }
-void qed_ll2_release_connection(struct qed_hwfn *p_hwfn, u8 connection_handle)
+
+void qed_ll2_release_connection(void *cxt, u8 connection_handle)
 {
+	struct qed_hwfn *p_hwfn = cxt;
 	struct qed_ll2_info *p_ll2_conn = NULL;
 
 	p_ll2_conn = qed_ll2_handle_sanity(p_hwfn, connection_handle);
@@ -1989,9 +1971,10 @@ static void _qed_ll2_get_pstats(struct qed_hwfn *p_hwfn,
 	p_stats->sent_bcast_pkts = HILO_64_REGPAIR(pstats.sent_bcast_pkts);
 }
 
-int qed_ll2_get_stats(struct qed_hwfn *p_hwfn,
+int qed_ll2_get_stats(void *cxt,
 		      u8 connection_handle, struct qed_ll2_stats *p_stats)
 {
+	struct qed_hwfn *p_hwfn = cxt;
 	struct qed_ll2_info *p_ll2_conn = NULL;
 	struct qed_ptt *p_ptt;
 
@@ -2018,6 +2001,17 @@ int qed_ll2_get_stats(struct qed_hwfn *p_hwfn,
 	return 0;
 }
 
+static void qed_ll2b_release_rx_packet(void *cxt,
+				       u8 connection_handle,
+				       void *cookie,
+				       dma_addr_t rx_buf_addr,
+				       bool b_last_packet)
+{
+	struct qed_hwfn *p_hwfn = cxt;
+
+	qed_ll2_dealloc_buffer(p_hwfn->cdev, cookie);
+}
+
 static void qed_ll2_register_cb_ops(struct qed_dev *cdev,
 				    const struct qed_ll2_cb_ops *ops,
 				    void *cookie)
@@ -2026,11 +2020,18 @@ static void qed_ll2_register_cb_ops(struct qed_dev *cdev,
 	cdev->ll2->cb_cookie = cookie;
 }
 
+struct qed_ll2_cbs ll2_cbs = {
+	.rx_comp_cb = &qed_ll2b_complete_rx_packet,
+	.rx_release_cb = &qed_ll2b_release_rx_packet,
+	.tx_comp_cb = &qed_ll2b_complete_tx_packet,
+	.tx_release_cb = &qed_ll2b_complete_tx_packet,
+};
+
 static void qed_ll2_set_conn_data(struct qed_dev *cdev,
 				  struct qed_ll2_acquire_data *data,
 				  struct qed_ll2_params *params,
 				  enum qed_ll2_conn_type conn_type,
-				  u8 *handle, bool lb, u8 gsi_enable)
+				  u8 *handle, bool lb)
 {
 	memset(data, 0, sizeof(*data));
 
@@ -2040,8 +2041,10 @@ static void qed_ll2_set_conn_data(struct qed_dev *cdev,
 	data->input.rx_drop_ttl0_flg = params->drop_ttl0_packets;
 	data->input.rx_vlan_removal_en = params->rx_vlan_stripping;
 	data->input.tx_num_desc = QED_LL2_TX_SIZE;
-	data->input.gsi_enable = gsi_enable;
 	data->p_connection_handle = handle;
+	data->cbs = &ll2_cbs;
+	ll2_cbs.cookie = QED_LEADING_HWFN(cdev);
+
 	if (lb) {
 		data->input.tx_tc = OOO_LB_TC;
 		data->input.tx_dest = QED_LL2_TX_DEST_LB;
@@ -2060,7 +2063,7 @@ static int qed_ll2_start_ooo(struct qed_dev *cdev,
 	int rc;
 
 	qed_ll2_set_conn_data(cdev, &data, params,
-			      QED_LL2_TYPE_ISCSI_OOO, handle, true, 0);
+			      QED_LL2_TYPE_ISCSI_OOO, handle, true);
 
 	rc = qed_ll2_acquire_connection(hwfn, &data);
 	if (rc) {
@@ -2090,7 +2093,7 @@ static int qed_ll2_start(struct qed_dev *cdev, struct qed_ll2_params *params)
 	struct qed_ll2_acquire_data data;
 	struct qed_ptt *p_ptt;
 	int rc, i;
-	u8 gsi_enable = 1;
+
 
 	/* Initialize LL2 locks & lists */
 	INIT_LIST_HEAD(&cdev->ll2->list);
@@ -2122,11 +2125,9 @@ static int qed_ll2_start(struct qed_dev *cdev, struct qed_ll2_params *params)
 	switch (QED_LEADING_HWFN(cdev)->hw_info.personality) {
 	case QED_PCI_FCOE:
 		conn_type = QED_LL2_TYPE_FCOE;
-		gsi_enable = 0;
 		break;
 	case QED_PCI_ISCSI:
 		conn_type = QED_LL2_TYPE_ISCSI;
-		gsi_enable = 0;
 		break;
 	case QED_PCI_ETH_ROCE:
 		conn_type = QED_LL2_TYPE_ROCE;
@@ -2136,7 +2137,7 @@ static int qed_ll2_start(struct qed_dev *cdev, struct qed_ll2_params *params)
 	}
 
 	qed_ll2_set_conn_data(cdev, &data, params, conn_type,
-			      &cdev->ll2->handle, false, gsi_enable);
+			      &cdev->ll2->handle, false);
 
 	rc = qed_ll2_acquire_connection(QED_LEADING_HWFN(cdev), &data);
 	if (rc) {
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.h b/drivers/net/ethernet/qlogic/qed/qed_ll2.h
index 3caaad53c958..a822528e9c63 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.h
@@ -126,6 +126,7 @@ struct qed_ll2_info {
 	u8 tx_stats_en;
 	struct qed_ll2_rx_queue rx_queue;
 	struct qed_ll2_tx_queue tx_queue;
+	struct qed_ll2_cbs cbs;
 };
 
 /**
@@ -134,30 +135,29 @@ struct qed_ll2_info {
  *        connecion handler as output parameter.
  *
  *
- * @param p_hwfn
+ * @param cxt - pointer to the hw-function [opaque to some]
  * @param data - describes connection parameters
  * @return int
  */
-int qed_ll2_acquire_connection(struct qed_hwfn *p_hwfn,
-			       struct qed_ll2_acquire_data *data);
+int qed_ll2_acquire_connection(void *cxt, struct qed_ll2_acquire_data *data);
 
 /**
  * @brief qed_ll2_establish_connection - start previously
  *        allocated LL2 queues pair
  *
- * @param p_hwfn
+ * @param cxt - pointer to the hw-function [opaque to some]
  * @param p_ptt
  * @param connection_handle	LL2 connection's handle obtained from
  *                              qed_ll2_require_connection
  *
  * @return 0 on success, failure otherwise
  */
-int qed_ll2_establish_connection(struct qed_hwfn *p_hwfn, u8 connection_handle);
+int qed_ll2_establish_connection(void *cxt, u8 connection_handle);
 
 /**
  * @brief qed_ll2_post_rx_buffers - submit buffers to LL2 Rx queue.
  *
- * @param p_hwfn
+ * @param cxt - pointer to the hw-function [opaque to some]
  * @param connection_handle	LL2 connection's handle obtained from
  *				qed_ll2_require_connection
  * @param addr			rx (physical address) buffers to submit
@@ -166,7 +166,7 @@ int qed_ll2_establish_connection(struct qed_hwfn *p_hwfn, u8 connection_handle);
  *
  * @return 0 on success, failure otherwise
  */
-int qed_ll2_post_rx_buffer(struct qed_hwfn *p_hwfn,
+int qed_ll2_post_rx_buffer(void *cxt,
 			   u8 connection_handle,
 			   dma_addr_t addr,
 			   u16 buf_len, void *cookie, u8 notify_fw);
@@ -175,14 +175,14 @@ int qed_ll2_post_rx_buffer(struct qed_hwfn *p_hwfn,
  * @brief qed_ll2_prepare_tx_packet - request for start Tx BD
  *				      to prepare Tx packet submission to FW.
  *
- * @param p_hwfn
+ * @param cxt - pointer to the hw-function [opaque to some]
  * @param connection_handle
  * @param pkt - info regarding the tx packet
  * @param notify_fw - issue doorbell to fw for this packet
  *
  * @return 0 on success, failure otherwise
  */
-int qed_ll2_prepare_tx_packet(struct qed_hwfn *p_hwfn,
+int qed_ll2_prepare_tx_packet(void *cxt,
 			      u8 connection_handle,
 			      struct qed_ll2_tx_pkt_info *pkt,
 			      bool notify_fw);
@@ -191,18 +191,18 @@ int qed_ll2_prepare_tx_packet(struct qed_hwfn *p_hwfn,
  * @brief qed_ll2_release_connection -	releases resources
  *					allocated for LL2 connection
  *
- * @param p_hwfn
+ * @param cxt - pointer to the hw-function [opaque to some]
  * @param connection_handle		LL2 connection's handle obtained from
  *					qed_ll2_require_connection
  */
-void qed_ll2_release_connection(struct qed_hwfn *p_hwfn, u8 connection_handle);
+void qed_ll2_release_connection(void *cxt, u8 connection_handle);
 
 /**
  * @brief qed_ll2_set_fragment_of_tx_packet -	provides fragments to fill
  *						Tx BD of BDs requested by
  *						qed_ll2_prepare_tx_packet
  *
- * @param p_hwfn
+ * @param cxt - pointer to the hw-function [opaque to some]
  * @param connection_handle			LL2 connection's handle
  *						obtained from
  *						qed_ll2_require_connection
@@ -211,7 +211,7 @@ void qed_ll2_release_connection(struct qed_hwfn *p_hwfn, u8 connection_handle);
  *
  * @return 0 on success, failure otherwise
  */
-int qed_ll2_set_fragment_of_tx_packet(struct qed_hwfn *p_hwfn,
+int qed_ll2_set_fragment_of_tx_packet(void *cxt,
 				      u8 connection_handle,
 				      dma_addr_t addr, u16 nbytes);
 
@@ -219,27 +219,27 @@ int qed_ll2_set_fragment_of_tx_packet(struct qed_hwfn *p_hwfn,
  * @brief qed_ll2_terminate_connection -	stops Tx/Rx queues
  *
  *
- * @param p_hwfn
+ * @param cxt - pointer to the hw-function [opaque to some]
  * @param connection_handle			LL2 connection's handle
  *						obtained from
  *						qed_ll2_require_connection
  *
  * @return 0 on success, failure otherwise
  */
-int qed_ll2_terminate_connection(struct qed_hwfn *p_hwfn, u8 connection_handle);
+int qed_ll2_terminate_connection(void *cxt, u8 connection_handle);
 
 /**
  * @brief qed_ll2_get_stats -	get LL2 queue's statistics
  *
  *
- * @param p_hwfn
+ * @param cxt - pointer to the hw-function [opaque to some]
  * @param connection_handle	LL2 connection's handle obtained from
  *				qed_ll2_require_connection
  * @param p_stats
  *
  * @return 0 on success, failure otherwise
  */
-int qed_ll2_get_stats(struct qed_hwfn *p_hwfn,
+int qed_ll2_get_stats(void *cxt,
 		      u8 connection_handle, struct qed_ll2_stats *p_stats);
 
 /**
diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c
index 33abcf110260..4bc2f6c47f69 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_roce.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c
@@ -35,7 +35,6 @@
 #include <linux/delay.h>
 #include <linux/dma-mapping.h>
 #include <linux/errno.h>
-#include <linux/etherdevice.h>
 #include <linux/if_ether.h>
 #include <linux/if_vlan.h>
 #include <linux/io.h>
@@ -65,6 +64,7 @@
 #include "qed_sp.h"
 #include "qed_roce.h"
 #include "qed_ll2.h"
+#include <linux/qed/qed_ll2_if.h>
 
 static void qed_roce_free_real_icid(struct qed_hwfn *p_hwfn, u16 icid);
 
@@ -2709,310 +2709,35 @@ static void qed_rdma_remove_user(void *rdma_cxt, u16 dpi)
 	spin_unlock_bh(&p_hwfn->p_rdma_info->lock);
 }
 
-void qed_ll2b_complete_tx_gsi_packet(struct qed_hwfn *p_hwfn,
-				     u8 connection_handle,
-				     void *cookie,
-				     dma_addr_t first_frag_addr,
-				     bool b_last_fragment, bool b_last_packet)
-{
-	struct qed_roce_ll2_packet *packet = cookie;
-	struct qed_roce_ll2_info *roce_ll2 = p_hwfn->ll2;
-
-	roce_ll2->cbs.tx_cb(roce_ll2->cb_cookie, packet);
-}
-
-void qed_ll2b_release_tx_gsi_packet(struct qed_hwfn *p_hwfn,
-				    u8 connection_handle,
-				    void *cookie,
-				    dma_addr_t first_frag_addr,
-				    bool b_last_fragment, bool b_last_packet)
-{
-	qed_ll2b_complete_tx_gsi_packet(p_hwfn, connection_handle,
-					cookie, first_frag_addr,
-					b_last_fragment, b_last_packet);
-}
-
-void qed_ll2b_complete_rx_gsi_packet(struct qed_hwfn *p_hwfn,
-				     u8 connection_handle,
-				     void *cookie,
-				     dma_addr_t rx_buf_addr,
-				     u16 data_length,
-				     u8 data_length_error,
-				     u16 parse_flags,
-				     u16 vlan,
-				     u32 src_mac_addr_hi,
-				     u16 src_mac_addr_lo, bool b_last_packet)
-{
-	struct qed_roce_ll2_info *roce_ll2 = p_hwfn->ll2;
-	struct qed_roce_ll2_rx_params params;
-	struct qed_dev *cdev = p_hwfn->cdev;
-	struct qed_roce_ll2_packet pkt;
-
-	DP_VERBOSE(cdev,
-		   QED_MSG_LL2,
-		   "roce ll2 rx complete: bus_addr=%p, len=%d, data_len_err=%d\n",
-		   (void *)(uintptr_t)rx_buf_addr,
-		   data_length, data_length_error);
-
-	memset(&pkt, 0, sizeof(pkt));
-	pkt.n_seg = 1;
-	pkt.payload[0].baddr = rx_buf_addr;
-	pkt.payload[0].len = data_length;
-
-	memset(&params, 0, sizeof(params));
-	params.vlan_id = vlan;
-	*((u32 *)&params.smac[0]) = ntohl(src_mac_addr_hi);
-	*((u16 *)&params.smac[4]) = ntohs(src_mac_addr_lo);
-
-	if (data_length_error) {
-		DP_ERR(cdev,
-		       "roce ll2 rx complete: data length error %d, length=%d\n",
-		       data_length_error, data_length);
-		params.rc = -EINVAL;
-	}
-
-	roce_ll2->cbs.rx_cb(roce_ll2->cb_cookie, &pkt, &params);
-}
-
 static int qed_roce_ll2_set_mac_filter(struct qed_dev *cdev,
 				       u8 *old_mac_address,
 				       u8 *new_mac_address)
 {
-	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
 	struct qed_ptt *p_ptt;
 	int rc = 0;
 
-	if (!hwfn->ll2 || hwfn->ll2->handle == QED_LL2_UNUSED_HANDLE) {
-		DP_ERR(cdev,
-		       "qed roce mac filter failed - roce_info/ll2 NULL\n");
-		return -EINVAL;
-	}
-
-	p_ptt = qed_ptt_acquire(QED_LEADING_HWFN(cdev));
+	p_ptt = qed_ptt_acquire(p_hwfn);
 	if (!p_ptt) {
 		DP_ERR(cdev,
 		       "qed roce ll2 mac filter set: failed to acquire PTT\n");
 		return -EINVAL;
 	}
 
-	mutex_lock(&hwfn->ll2->lock);
 	if (old_mac_address)
-		qed_llh_remove_mac_filter(QED_LEADING_HWFN(cdev), p_ptt,
-					  old_mac_address);
+		qed_llh_remove_mac_filter(p_hwfn, p_ptt, old_mac_address);
 	if (new_mac_address)
-		rc = qed_llh_add_mac_filter(QED_LEADING_HWFN(cdev), p_ptt,
-					    new_mac_address);
-	mutex_unlock(&hwfn->ll2->lock);
-
-	qed_ptt_release(QED_LEADING_HWFN(cdev), p_ptt);
-
-	if (rc)
-		DP_ERR(cdev,
-		       "qed roce ll2 mac filter set: failed to add mac filter\n");
-
-	return rc;
-}
-
-static int qed_roce_ll2_start(struct qed_dev *cdev,
-			      struct qed_roce_ll2_params *params)
-{
-	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
-	struct qed_roce_ll2_info *roce_ll2;
-	struct qed_ll2_acquire_data data;
-	int rc;
-
-	if (!params) {
-		DP_ERR(cdev, "qed roce ll2 start: failed due to NULL params\n");
-		return -EINVAL;
-	}
-	if (!params->cbs.tx_cb || !params->cbs.rx_cb) {
-		DP_ERR(cdev,
-		       "qed roce ll2 start: failed due to NULL tx/rx. tx_cb=%p, rx_cb=%p\n",
-		       params->cbs.tx_cb, params->cbs.rx_cb);
-		return -EINVAL;
-	}
-	if (!is_valid_ether_addr(params->mac_address)) {
-		DP_ERR(cdev,
-		       "qed roce ll2 start: failed due to invalid Ethernet address %pM\n",
-		       params->mac_address);
-		return -EINVAL;
-	}
-
-	/* Initialize */
-	roce_ll2 = kzalloc(sizeof(*roce_ll2), GFP_ATOMIC);
-	if (!roce_ll2) {
-		DP_ERR(cdev, "qed roce ll2 start: failed memory allocation\n");
-		return -ENOMEM;
-	}
+		rc = qed_llh_add_mac_filter(p_hwfn, p_ptt, new_mac_address);
 
-	roce_ll2->handle = QED_LL2_UNUSED_HANDLE;
-	roce_ll2->cbs = params->cbs;
-	roce_ll2->cb_cookie = params->cb_cookie;
-	mutex_init(&roce_ll2->lock);
-
-	memset(&data, 0, sizeof(data));
-	data.input.conn_type = QED_LL2_TYPE_ROCE;
-	data.input.mtu = params->mtu;
-	data.input.rx_num_desc = params->max_rx_buffers;
-	data.input.tx_num_desc = params->max_tx_buffers;
-	data.input.rx_drop_ttl0_flg = true;
-	data.input.rx_vlan_removal_en = false;
-	data.input.tx_dest = QED_LL2_TX_DEST_NW;
-	data.input.ai_err_packet_too_big = LL2_DROP_PACKET;
-	data.input.ai_err_no_buf = LL2_DROP_PACKET;
-	data.p_connection_handle = &roce_ll2->handle;
-	data.input.gsi_enable = true;
-
-	rc = qed_ll2_acquire_connection(QED_LEADING_HWFN(cdev), &data);
-	if (rc) {
-		DP_ERR(cdev,
-		       "qed roce ll2 start: failed to acquire LL2 connection (rc=%d)\n",
-		       rc);
-		goto err;
-	}
-
-	rc = qed_ll2_establish_connection(QED_LEADING_HWFN(cdev),
-					  roce_ll2->handle);
-	if (rc) {
-		DP_ERR(cdev,
-		       "qed roce ll2 start: failed to establish LL2 connection (rc=%d)\n",
-		       rc);
-		goto err1;
-	}
-
-	hwfn->ll2 = roce_ll2;
-
-	rc = qed_roce_ll2_set_mac_filter(cdev, NULL, params->mac_address);
-	if (rc) {
-		hwfn->ll2 = NULL;
-		goto err2;
-	}
-	ether_addr_copy(roce_ll2->mac_address, params->mac_address);
-
-	return 0;
-
-err2:
-	qed_ll2_terminate_connection(QED_LEADING_HWFN(cdev), roce_ll2->handle);
-err1:
-	qed_ll2_release_connection(QED_LEADING_HWFN(cdev), roce_ll2->handle);
-err:
-	kfree(roce_ll2);
-	return rc;
-}
-
-static int qed_roce_ll2_stop(struct qed_dev *cdev)
-{
-	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
-	struct qed_roce_ll2_info *roce_ll2 = hwfn->ll2;
-	int rc;
-
-	if (roce_ll2->handle == QED_LL2_UNUSED_HANDLE) {
-		DP_ERR(cdev, "qed roce ll2 stop: cannot stop an unused LL2\n");
-		return -EINVAL;
-	}
-
-	/* remove LL2 MAC address filter */
-	rc = qed_roce_ll2_set_mac_filter(cdev, roce_ll2->mac_address, NULL);
-	eth_zero_addr(roce_ll2->mac_address);
+	qed_ptt_release(p_hwfn, p_ptt);
 
-	rc = qed_ll2_terminate_connection(QED_LEADING_HWFN(cdev),
-					  roce_ll2->handle);
 	if (rc)
 		DP_ERR(cdev,
-		       "qed roce ll2 stop: failed to terminate LL2 connection (rc=%d)\n",
-		       rc);
-
-	qed_ll2_release_connection(QED_LEADING_HWFN(cdev), roce_ll2->handle);
-
-	roce_ll2->handle = QED_LL2_UNUSED_HANDLE;
-
-	kfree(roce_ll2);
+		       "qed roce ll2 mac filter set: failed to add MAC filter\n");
 
 	return rc;
 }
 
-static int qed_roce_ll2_tx(struct qed_dev *cdev,
-			   struct qed_roce_ll2_packet *pkt,
-			   struct qed_roce_ll2_tx_params *params)
-{
-	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
-	struct qed_roce_ll2_info *roce_ll2 = hwfn->ll2;
-	enum qed_ll2_roce_flavor_type qed_roce_flavor;
-	struct qed_ll2_tx_pkt_info ll2_pkt;
-	u8 flags = 0;
-	int rc;
-	int i;
-
-	if (!pkt || !params) {
-		DP_ERR(cdev,
-		       "roce ll2 tx: failed tx because one of the following is NULL - drv=%p, pkt=%p, params=%p\n",
-		       cdev, pkt, params);
-		return -EINVAL;
-	}
-
-	qed_roce_flavor = (pkt->roce_mode == ROCE_V1) ? QED_LL2_ROCE
-						      : QED_LL2_RROCE;
-
-	if (pkt->roce_mode == ROCE_V2_IPV4)
-		flags |= BIT(CORE_TX_BD_DATA_IP_CSUM_SHIFT);
-
-	/* Tx header */
-	memset(&ll2_pkt, 0, sizeof(ll2_pkt));
-	ll2_pkt.num_of_bds = 1 + pkt->n_seg;
-	ll2_pkt.bd_flags = flags;
-	ll2_pkt.tx_dest = QED_LL2_TX_DEST_NW;
-	ll2_pkt.qed_roce_flavor = qed_roce_flavor;
-	ll2_pkt.first_frag = pkt->header.baddr;
-	ll2_pkt.first_frag_len = pkt->header.len;
-	ll2_pkt.cookie = pkt;
-
-	rc = qed_ll2_prepare_tx_packet(QED_LEADING_HWFN(cdev),
-				       roce_ll2->handle,
-				       &ll2_pkt, 1);
-	if (rc) {
-		DP_ERR(cdev, "roce ll2 tx: header failed (rc=%d)\n", rc);
-		return QED_ROCE_TX_HEAD_FAILURE;
-	}
-
-	/* Tx payload */
-	for (i = 0; i < pkt->n_seg; i++) {
-		rc = qed_ll2_set_fragment_of_tx_packet(QED_LEADING_HWFN(cdev),
-						       roce_ll2->handle,
-						       pkt->payload[i].baddr,
-						       pkt->payload[i].len);
-		if (rc) {
-			/* If failed not much to do here, partial packet has
-			 * been posted * we can't free memory, will need to wait
-			 * for completion
-			 */
-			DP_ERR(cdev,
-			       "roce ll2 tx: payload failed (rc=%d)\n", rc);
-			return QED_ROCE_TX_FRAG_FAILURE;
-		}
-	}
-
-	return 0;
-}
-
-static int qed_roce_ll2_post_rx_buffer(struct qed_dev *cdev,
-				       struct qed_roce_ll2_buffer *buf,
-				       u64 cookie, u8 notify_fw)
-{
-	return qed_ll2_post_rx_buffer(QED_LEADING_HWFN(cdev),
-				      QED_LEADING_HWFN(cdev)->ll2->handle,
-				      buf->baddr, buf->len,
-				      (void *)(uintptr_t)cookie, notify_fw);
-}
-
-static int qed_roce_ll2_stats(struct qed_dev *cdev, struct qed_ll2_stats *stats)
-{
-	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
-	struct qed_roce_ll2_info *roce_ll2 = hwfn->ll2;
-
-	return qed_ll2_get_stats(QED_LEADING_HWFN(cdev),
-				 roce_ll2->handle, stats);
-}
-
 static const struct qed_rdma_ops qed_rdma_ops_pass = {
 	.common = &qed_common_ops_pass,
 	.fill_dev_info = &qed_fill_rdma_dev_info,
@@ -3040,12 +2765,15 @@ static const struct qed_rdma_ops qed_rdma_ops_pass = {
 	.rdma_free_tid = &qed_rdma_free_tid,
 	.rdma_register_tid = &qed_rdma_register_tid,
 	.rdma_deregister_tid = &qed_rdma_deregister_tid,
-	.roce_ll2_start = &qed_roce_ll2_start,
-	.roce_ll2_stop = &qed_roce_ll2_stop,
-	.roce_ll2_tx = &qed_roce_ll2_tx,
-	.roce_ll2_post_rx_buffer = &qed_roce_ll2_post_rx_buffer,
-	.roce_ll2_set_mac_filter = &qed_roce_ll2_set_mac_filter,
-	.roce_ll2_stats = &qed_roce_ll2_stats,
+	.ll2_acquire_connection = &qed_ll2_acquire_connection,
+	.ll2_establish_connection = &qed_ll2_establish_connection,
+	.ll2_terminate_connection = &qed_ll2_terminate_connection,
+	.ll2_release_connection = &qed_ll2_release_connection,
+	.ll2_post_rx_buffer = &qed_ll2_post_rx_buffer,
+	.ll2_prepare_tx_packet = &qed_ll2_prepare_tx_packet,
+	.ll2_set_fragment_of_tx_packet = &qed_ll2_set_fragment_of_tx_packet,
+	.ll2_set_mac_filter = &qed_roce_ll2_set_mac_filter,
+	.ll2_get_stats = &qed_ll2_get_stats,
 };
 
 const struct qed_rdma_ops *qed_get_rdma_ops(void)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.h b/drivers/net/ethernet/qlogic/qed/qed_roce.h
index 9742af516183..94be3b5a39c4 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_roce.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_roce.h
@@ -170,53 +170,10 @@ struct qed_rdma_qp {
 void qed_rdma_dpm_bar(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
 void qed_roce_async_event(struct qed_hwfn *p_hwfn,
 			  u8 fw_event_code, union rdma_eqe_data *rdma_data);
-void qed_ll2b_complete_tx_gsi_packet(struct qed_hwfn *p_hwfn,
-				     u8 connection_handle,
-				     void *cookie,
-				     dma_addr_t first_frag_addr,
-				     bool b_last_fragment, bool b_last_packet);
-void qed_ll2b_release_tx_gsi_packet(struct qed_hwfn *p_hwfn,
-				    u8 connection_handle,
-				    void *cookie,
-				    dma_addr_t first_frag_addr,
-				    bool b_last_fragment, bool b_last_packet);
-void qed_ll2b_complete_rx_gsi_packet(struct qed_hwfn *p_hwfn,
-				     u8 connection_handle,
-				     void *cookie,
-				     dma_addr_t rx_buf_addr,
-				     u16 data_length,
-				     u8 data_length_error,
-				     u16 parse_flags,
-				     u16 vlan,
-				     u32 src_mac_addr_hi,
-				     u16 src_mac_addr_lo, bool b_last_packet);
 #else
 static inline void qed_rdma_dpm_bar(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) {}
 static inline void qed_roce_async_event(struct qed_hwfn *p_hwfn,
 					u8 fw_event_code,
 					union rdma_eqe_data *rdma_data) {}
-static inline void qed_ll2b_complete_tx_gsi_packet(struct qed_hwfn *p_hwfn,
-						   u8 connection_handle,
-						   void *cookie,
-						   dma_addr_t first_frag_addr,
-						   bool b_last_fragment,
-						   bool b_last_packet) {}
-static inline void qed_ll2b_release_tx_gsi_packet(struct qed_hwfn *p_hwfn,
-						  u8 connection_handle,
-						  void *cookie,
-						  dma_addr_t first_frag_addr,
-						  bool b_last_fragment,
-						  bool b_last_packet) {}
-static inline void qed_ll2b_complete_rx_gsi_packet(struct qed_hwfn *p_hwfn,
-						   u8 connection_handle,
-						   void *cookie,
-						   dma_addr_t rx_buf_addr,
-						   u16 data_length,
-						   u8 data_length_error,
-						   u16 parse_flags,
-						   u16 vlan,
-						   u32 src_mac_addr_hi,
-						   u16 src_mac_addr_lo,
-						   bool b_last_packet) {}
 #endif
 #endif
diff --git a/include/linux/qed/qed_ll2_if.h b/include/linux/qed/qed_ll2_if.h
index a1f63eca430a..5958b45eb699 100644
--- a/include/linux/qed/qed_ll2_if.h
+++ b/include/linux/qed/qed_ll2_if.h
@@ -122,6 +122,40 @@ struct qed_ll2_comp_rx_data {
 	} u;
 };
 
+typedef
+void (*qed_ll2_complete_rx_packet_cb)(void *cxt,
+				      struct qed_ll2_comp_rx_data *data);
+
+typedef
+void (*qed_ll2_release_rx_packet_cb)(void *cxt,
+				     u8 connection_handle,
+				     void *cookie,
+				     dma_addr_t rx_buf_addr,
+				     bool b_last_packet);
+
+typedef
+void (*qed_ll2_complete_tx_packet_cb)(void *cxt,
+				      u8 connection_handle,
+				      void *cookie,
+				      dma_addr_t first_frag_addr,
+				      bool b_last_fragment,
+				      bool b_last_packet);
+
+typedef
+void (*qed_ll2_release_tx_packet_cb)(void *cxt,
+				     u8 connection_handle,
+				     void *cookie,
+				     dma_addr_t first_frag_addr,
+				     bool b_last_fragment, bool b_last_packet);
+
+struct qed_ll2_cbs {
+	qed_ll2_complete_rx_packet_cb rx_comp_cb;
+	qed_ll2_release_rx_packet_cb rx_release_cb;
+	qed_ll2_complete_tx_packet_cb tx_comp_cb;
+	qed_ll2_release_tx_packet_cb tx_release_cb;
+	void *cookie;
+};
+
 struct qed_ll2_acquire_data_inputs {
 	enum qed_ll2_conn_type conn_type;
 	u16 mtu;
@@ -140,6 +174,8 @@ struct qed_ll2_acquire_data_inputs {
 
 struct qed_ll2_acquire_data {
 	struct qed_ll2_acquire_data_inputs input;
+	const struct qed_ll2_cbs *cbs;
+
 	/* Output container for LL2 connection's handle */
 	u8 *p_connection_handle;
 };
diff --git a/include/linux/qed/qed_roce_if.h b/include/linux/qed/qed_roce_if.h
index cbb2ff0ce4bc..8e70f5ee05af 100644
--- a/include/linux/qed/qed_roce_if.h
+++ b/include/linux/qed/qed_roce_if.h
@@ -34,8 +34,6 @@
 #include <linux/types.h>
 #include <linux/delay.h>
 #include <linux/list.h>
-#include <linux/mutex.h>
-#include <linux/pci.h>
 #include <linux/slab.h>
 #include <linux/qed/qed_if.h>
 #include <linux/qed/qed_ll2_if.h>
@@ -491,42 +489,6 @@ struct qed_roce_ll2_packet {
 	enum qed_roce_ll2_tx_dest tx_dest;
 };
 
-struct qed_roce_ll2_tx_params {
-	int reserved;
-};
-
-struct qed_roce_ll2_rx_params {
-	u16 vlan_id;
-	u8 smac[ETH_ALEN];
-	int rc;
-};
-
-struct qed_roce_ll2_cbs {
-	void (*tx_cb)(void *pdev, struct qed_roce_ll2_packet *pkt);
-
-	void (*rx_cb)(void *pdev, struct qed_roce_ll2_packet *pkt,
-		      struct qed_roce_ll2_rx_params *params);
-};
-
-struct qed_roce_ll2_params {
-	u16 max_rx_buffers;
-	u16 max_tx_buffers;
-	u16 mtu;
-	u8 mac_address[ETH_ALEN];
-	struct qed_roce_ll2_cbs cbs;
-	void *cb_cookie;
-};
-
-struct qed_roce_ll2_info {
-	u8 handle;
-	struct qed_roce_ll2_cbs cbs;
-	u8 mac_address[ETH_ALEN];
-	void *cb_cookie;
-
-	/* Lock to protect ll2 */
-	struct mutex lock;
-};
-
 enum qed_rdma_type {
 	QED_RDMA_TYPE_ROCE,
 };
@@ -579,26 +541,40 @@ struct qed_rdma_ops {
 	int (*rdma_query_qp)(void *rdma_cxt, struct qed_rdma_qp *qp,
 			     struct qed_rdma_query_qp_out_params *oparams);
 	int (*rdma_destroy_qp)(void *rdma_cxt, struct qed_rdma_qp *qp);
+
 	int
 	(*rdma_register_tid)(void *rdma_cxt,
 			     struct qed_rdma_register_tid_in_params *iparams);
+
 	int (*rdma_deregister_tid)(void *rdma_cxt, u32 itid);
 	int (*rdma_alloc_tid)(void *rdma_cxt, u32 *itid);
 	void (*rdma_free_tid)(void *rdma_cxt, u32 itid);
-	int (*roce_ll2_start)(struct qed_dev *cdev,
-			      struct qed_roce_ll2_params *params);
-	int (*roce_ll2_stop)(struct qed_dev *cdev);
-	int (*roce_ll2_tx)(struct qed_dev *cdev,
-			   struct qed_roce_ll2_packet *packet,
-			   struct qed_roce_ll2_tx_params *params);
-	int (*roce_ll2_post_rx_buffer)(struct qed_dev *cdev,
-				       struct qed_roce_ll2_buffer *buf,
-				       u64 cookie, u8 notify_fw);
-	int (*roce_ll2_set_mac_filter)(struct qed_dev *cdev,
-				       u8 *old_mac_address,
-				       u8 *new_mac_address);
-	int (*roce_ll2_stats)(struct qed_dev *cdev,
-			      struct qed_ll2_stats *stats);
+
+	int (*ll2_acquire_connection)(void *rdma_cxt,
+				      struct qed_ll2_acquire_data *data);
+
+	int (*ll2_establish_connection)(void *rdma_cxt, u8 connection_handle);
+	int (*ll2_terminate_connection)(void *rdma_cxt, u8 connection_handle);
+	void (*ll2_release_connection)(void *rdma_cxt, u8 connection_handle);
+
+	int (*ll2_prepare_tx_packet)(void *rdma_cxt,
+				     u8 connection_handle,
+				     struct qed_ll2_tx_pkt_info *pkt,
+				     bool notify_fw);
+
+	int (*ll2_set_fragment_of_tx_packet)(void *rdma_cxt,
+					     u8 connection_handle,
+					     dma_addr_t addr,
+					     u16 nbytes);
+	int (*ll2_post_rx_buffer)(void *rdma_cxt, u8 connection_handle,
+				  dma_addr_t addr, u16 buf_len, void *cookie,
+				  u8 notify_fw);
+	int (*ll2_get_stats)(void *rdma_cxt,
+			     u8 connection_handle,
+			     struct qed_ll2_stats *p_stats);
+	int (*ll2_set_mac_filter)(struct qed_dev *cdev,
+				  u8 *old_mac_address, u8 *new_mac_address);
+
 };
 
 const struct qed_rdma_ops *qed_get_rdma_ops(void);
-- 
cgit v1.2.3


From 1e1fc133483ef3b56c20bf3cd9241146c41042f8 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 30 May 2017 00:29:38 -0400
Subject: compat_{get,put}_bitmap(): use unsafe_{get,put}_user()

unroll the inner loops, while we are at it

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/compat.h |  3 +-
 kernel/compat.c        | 81 +++++++++++++++++---------------------------------
 2 files changed, 29 insertions(+), 55 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compat.h b/include/linux/compat.h
index 1c5f3152cbb5..94ceb0348a25 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -388,8 +388,7 @@ asmlinkage long compat_sys_wait4(compat_pid_t pid,
 
 #define BITS_PER_COMPAT_LONG    (8*sizeof(compat_long_t))
 
-#define BITS_TO_COMPAT_LONGS(bits) \
-	(((bits)+BITS_PER_COMPAT_LONG-1)/BITS_PER_COMPAT_LONG)
+#define BITS_TO_COMPAT_LONGS(bits) DIV_ROUND_UP(bits, BITS_PER_COMPAT_LONG)
 
 long compat_get_bitmap(unsigned long *mask, const compat_ulong_t __user *umask,
 		       unsigned long bitmap_size);
diff --git a/kernel/compat.c b/kernel/compat.c
index 860f674fa556..9c2a8f3788d5 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -871,84 +871,59 @@ int get_compat_sigevent(struct sigevent *event,
 long compat_get_bitmap(unsigned long *mask, const compat_ulong_t __user *umask,
 		       unsigned long bitmap_size)
 {
-	int i, j;
-	unsigned long m;
-	compat_ulong_t um;
 	unsigned long nr_compat_longs;
 
 	/* align bitmap up to nearest compat_long_t boundary */
 	bitmap_size = ALIGN(bitmap_size, BITS_PER_COMPAT_LONG);
+	nr_compat_longs = BITS_TO_COMPAT_LONGS(bitmap_size);
 
 	if (!access_ok(VERIFY_READ, umask, bitmap_size / 8))
 		return -EFAULT;
 
-	nr_compat_longs = BITS_TO_COMPAT_LONGS(bitmap_size);
-
-	for (i = 0; i < BITS_TO_LONGS(bitmap_size); i++) {
-		m = 0;
-
-		for (j = 0; j < sizeof(m)/sizeof(um); j++) {
-			/*
-			 * We dont want to read past the end of the userspace
-			 * bitmap. We must however ensure the end of the
-			 * kernel bitmap is zeroed.
-			 */
-			if (nr_compat_longs) {
-				nr_compat_longs--;
-				if (__get_user(um, umask))
-					return -EFAULT;
-			} else {
-				um = 0;
-			}
-
-			umask++;
-			m |= (long)um << (j * BITS_PER_COMPAT_LONG);
-		}
-		*mask++ = m;
+	user_access_begin();
+	while (nr_compat_longs > 1) {
+		compat_ulong_t l1, l2;
+		unsafe_get_user(l1, umask++, Efault);
+		unsafe_get_user(l2, umask++, Efault);
+		*mask++ = ((unsigned long)l2 << BITS_PER_COMPAT_LONG) | l1;
+		nr_compat_longs -= 2;
 	}
-
+	if (nr_compat_longs)
+		unsafe_get_user(*mask, umask++, Efault);
+	user_access_end();
 	return 0;
+
+Efault:
+	user_access_end();
+	return -EFAULT;
 }
 
 long compat_put_bitmap(compat_ulong_t __user *umask, unsigned long *mask,
 		       unsigned long bitmap_size)
 {
-	int i, j;
-	unsigned long m;
-	compat_ulong_t um;
 	unsigned long nr_compat_longs;
 
 	/* align bitmap up to nearest compat_long_t boundary */
 	bitmap_size = ALIGN(bitmap_size, BITS_PER_COMPAT_LONG);
+	nr_compat_longs = BITS_TO_COMPAT_LONGS(bitmap_size);
 
 	if (!access_ok(VERIFY_WRITE, umask, bitmap_size / 8))
 		return -EFAULT;
 
-	nr_compat_longs = BITS_TO_COMPAT_LONGS(bitmap_size);
-
-	for (i = 0; i < BITS_TO_LONGS(bitmap_size); i++) {
-		m = *mask++;
-
-		for (j = 0; j < sizeof(m)/sizeof(um); j++) {
-			um = m;
-
-			/*
-			 * We dont want to write past the end of the userspace
-			 * bitmap.
-			 */
-			if (nr_compat_longs) {
-				nr_compat_longs--;
-				if (__put_user(um, umask))
-					return -EFAULT;
-			}
-
-			umask++;
-			m >>= 4*sizeof(um);
-			m >>= 4*sizeof(um);
-		}
+	user_access_begin();
+	while (nr_compat_longs > 1) {
+		unsigned long m = *mask++;
+		unsafe_put_user((compat_ulong_t)m, umask++, Efault);
+		unsafe_put_user(m >> BITS_PER_COMPAT_LONG, umask++, Efault);
+		nr_compat_longs -= 2;
 	}
-
+	if (nr_compat_longs)
+		unsafe_put_user((compat_ulong_t)*mask, umask++, Efault);
+	user_access_end();
 	return 0;
+Efault:
+	user_access_end();
+	return -EFAULT;
 }
 
 void
-- 
cgit v1.2.3


From ca2406ed58fef3f7c8ef6470cba807bfc3415605 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 31 May 2017 04:22:44 -0400
Subject: times(2): move compat to native

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/time.h |  3 ---
 kernel/compat.c      | 24 ------------------------
 kernel/sys.c         | 28 +++++++++++++++++++++++++++-
 3 files changed, 27 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/time.h b/include/linux/time.h
index c0543f5f25de..f769ea88250d 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -171,9 +171,6 @@ extern int do_getitimer(int which, struct itimerval *value);
 
 extern long do_utimes(int dfd, const char __user *filename, struct timespec *times, int flags);
 
-struct tms;
-extern void do_sys_times(struct tms *);
-
 /*
  * Similar to the struct tm in userspace <time.h>, but it needs to be here so
  * that the kernel source is self contained.
diff --git a/kernel/compat.c b/kernel/compat.c
index 9c2a8f3788d5..99408252762e 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -350,30 +350,6 @@ COMPAT_SYSCALL_DEFINE3(setitimer, int, which,
 	return 0;
 }
 
-static compat_clock_t clock_t_to_compat_clock_t(clock_t x)
-{
-	return compat_jiffies_to_clock_t(clock_t_to_jiffies(x));
-}
-
-COMPAT_SYSCALL_DEFINE1(times, struct compat_tms __user *, tbuf)
-{
-	if (tbuf) {
-		struct tms tms;
-		struct compat_tms tmp;
-
-		do_sys_times(&tms);
-		/* Convert our struct tms to the compat version. */
-		tmp.tms_utime = clock_t_to_compat_clock_t(tms.tms_utime);
-		tmp.tms_stime = clock_t_to_compat_clock_t(tms.tms_stime);
-		tmp.tms_cutime = clock_t_to_compat_clock_t(tms.tms_cutime);
-		tmp.tms_cstime = clock_t_to_compat_clock_t(tms.tms_cstime);
-		if (copy_to_user(tbuf, &tmp, sizeof(tmp)))
-			return -EFAULT;
-	}
-	force_successful_syscall_return();
-	return compat_jiffies_to_clock_t(jiffies);
-}
-
 #ifdef __ARCH_WANT_SYS_SIGPENDING
 
 /*
diff --git a/kernel/sys.c b/kernel/sys.c
index 3778a8a417b6..161b5eae9c77 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -886,7 +886,7 @@ SYSCALL_DEFINE0(getegid)
 	return from_kgid_munged(current_user_ns(), current_egid());
 }
 
-void do_sys_times(struct tms *tms)
+static void do_sys_times(struct tms *tms)
 {
 	u64 tgutime, tgstime, cutime, cstime;
 
@@ -912,6 +912,32 @@ SYSCALL_DEFINE1(times, struct tms __user *, tbuf)
 	return (long) jiffies_64_to_clock_t(get_jiffies_64());
 }
 
+#ifdef CONFIG_COMPAT
+static compat_clock_t clock_t_to_compat_clock_t(clock_t x)
+{
+	return compat_jiffies_to_clock_t(clock_t_to_jiffies(x));
+}
+
+COMPAT_SYSCALL_DEFINE1(times, struct compat_tms __user *, tbuf)
+{
+	if (tbuf) {
+		struct tms tms;
+		struct compat_tms tmp;
+
+		do_sys_times(&tms);
+		/* Convert our struct tms to the compat version. */
+		tmp.tms_utime = clock_t_to_compat_clock_t(tms.tms_utime);
+		tmp.tms_stime = clock_t_to_compat_clock_t(tms.tms_stime);
+		tmp.tms_cutime = clock_t_to_compat_clock_t(tms.tms_cutime);
+		tmp.tms_cstime = clock_t_to_compat_clock_t(tms.tms_cstime);
+		if (copy_to_user(tbuf, &tmp, sizeof(tmp)))
+			return -EFAULT;
+	}
+	force_successful_syscall_return();
+	return compat_jiffies_to_clock_t(jiffies);
+}
+#endif
+
 /*
  * This needs some heavy checking ...
  * I just haven't the stomach for it. I also don't fully
-- 
cgit v1.2.3


From 1b3c872c8342803d0fcd8042e4e007d173191db6 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 31 May 2017 04:46:17 -0400
Subject: rt_sigtimedwait(): move compat to native

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/signal.h |  2 --
 kernel/compat.c        | 32 --------------------------------
 kernel/signal.c        | 36 +++++++++++++++++++++++++++++++++++-
 3 files changed, 35 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/signal.h b/include/linux/signal.h
index 1f5a16620693..231603ac20a3 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -246,8 +246,6 @@ extern int do_send_sig_info(int sig, struct siginfo *info,
 				struct task_struct *p, bool group);
 extern int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p);
 extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *);
-extern int do_sigtimedwait(const sigset_t *, siginfo_t *,
-				const struct timespec *);
 extern int sigprocmask(int, sigset_t *, sigset_t *);
 extern void set_current_blocked(sigset_t *);
 extern void __set_current_blocked(const sigset_t *);
diff --git a/kernel/compat.c b/kernel/compat.c
index 64e772aabdb5..36e6e7c405e3 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -866,38 +866,6 @@ sigset_to_compat(compat_sigset_t *compat, const sigset_t *set)
 	}
 }
 
-COMPAT_SYSCALL_DEFINE4(rt_sigtimedwait, compat_sigset_t __user *, uthese,
-		struct compat_siginfo __user *, uinfo,
-		struct compat_timespec __user *, uts, compat_size_t, sigsetsize)
-{
-	compat_sigset_t s32;
-	sigset_t s;
-	struct timespec t;
-	siginfo_t info;
-	long ret;
-
-	if (sigsetsize != sizeof(sigset_t))
-		return -EINVAL;
-
-	if (copy_from_user(&s32, uthese, sizeof(compat_sigset_t)))
-		return -EFAULT;
-	sigset_from_compat(&s, &s32);
-
-	if (uts) {
-		if (compat_get_timespec(&t, uts))
-			return -EFAULT;
-	}
-
-	ret = do_sigtimedwait(&s, &info, uts ? &t : NULL);
-
-	if (ret > 0 && uinfo) {
-		if (copy_siginfo_to_user32(uinfo, &info))
-			ret = -EFAULT;
-	}
-
-	return ret;
-}
-
 #ifdef __ARCH_WANT_COMPAT_SYS_TIME
 
 /* compat_time_t is a 32 bit "long" and needs to get converted. */
diff --git a/kernel/signal.c b/kernel/signal.c
index 6237f492adfc..fe5b3608c31c 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2768,7 +2768,7 @@ int copy_siginfo_to_user(siginfo_t __user *to, const siginfo_t *from)
  *  @info: if non-null, the signal's siginfo is returned here
  *  @ts: upper bound on process time suspension
  */
-int do_sigtimedwait(const sigset_t *which, siginfo_t *info,
+static int do_sigtimedwait(const sigset_t *which, siginfo_t *info,
 		    const struct timespec *ts)
 {
 	ktime_t *to = NULL, timeout = KTIME_MAX;
@@ -2857,6 +2857,40 @@ SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese,
 	return ret;
 }
 
+#ifdef CONFIG_COMPAT
+COMPAT_SYSCALL_DEFINE4(rt_sigtimedwait, compat_sigset_t __user *, uthese,
+		struct compat_siginfo __user *, uinfo,
+		struct compat_timespec __user *, uts, compat_size_t, sigsetsize)
+{
+	compat_sigset_t s32;
+	sigset_t s;
+	struct timespec t;
+	siginfo_t info;
+	long ret;
+
+	if (sigsetsize != sizeof(sigset_t))
+		return -EINVAL;
+
+	if (copy_from_user(&s32, uthese, sizeof(compat_sigset_t)))
+		return -EFAULT;
+	sigset_from_compat(&s, &s32);
+
+	if (uts) {
+		if (compat_get_timespec(&t, uts))
+			return -EFAULT;
+	}
+
+	ret = do_sigtimedwait(&s, &info, uts ? &t : NULL);
+
+	if (ret > 0 && uinfo) {
+		if (copy_siginfo_to_user32(uinfo, &info))
+			ret = -EFAULT;
+	}
+
+	return ret;
+}
+#endif
+
 /**
  *  sys_kill - send a signal to a process
  *  @pid: the PID of the process
-- 
cgit v1.2.3


From 2c8f8afa7f92acb07641bf95b940d384ed1d0294 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Wed, 7 Jun 2017 20:52:10 +0900
Subject: mtd: nand: add generic helpers to check, match, maximize ECC settings

Driver are responsible for setting up ECC parameters correctly.
Those include:
  - Check if ECC parameters specified (usually by DT) are valid
  - Meet the chip's ECC requirement
  - Maximize ECC strength if NAND_ECC_MAXIMIZE flag is set

The logic can be generalized by factoring out common code.

This commit adds 3 helpers to the NAND framework:
nand_check_ecc_caps - Check if preset step_size and strength are valid
nand_match_ecc_req - Match the chip's requirement
nand_maximize_ecc - Maximize the ECC strength

To use the helpers above, a driver needs to provide:
  - Data array of supported ECC step size and strength
  - A hook that calculates ECC bytes from the combination of
    step_size and strength.

By using those helpers, code duplication among drivers will be
reduced.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/nand_base.c | 220 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/mtd/nand.h     |  33 +++++++
 2 files changed, 253 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 2404bb046b69..85edac9b2bb5 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -4523,6 +4523,226 @@ static int nand_set_ecc_soft_ops(struct mtd_info *mtd)
 	}
 }
 
+/**
+ * nand_check_ecc_caps - check the sanity of preset ECC settings
+ * @chip: nand chip info structure
+ * @caps: ECC caps info structure
+ * @oobavail: OOB size that the ECC engine can use
+ *
+ * When ECC step size and strength are already set, check if they are supported
+ * by the controller and the calculated ECC bytes fit within the chip's OOB.
+ * On success, the calculated ECC bytes is set.
+ */
+int nand_check_ecc_caps(struct nand_chip *chip,
+			const struct nand_ecc_caps *caps, int oobavail)
+{
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	const struct nand_ecc_step_info *stepinfo;
+	int preset_step = chip->ecc.size;
+	int preset_strength = chip->ecc.strength;
+	int nsteps, ecc_bytes;
+	int i, j;
+
+	if (WARN_ON(oobavail < 0))
+		return -EINVAL;
+
+	if (!preset_step || !preset_strength)
+		return -ENODATA;
+
+	nsteps = mtd->writesize / preset_step;
+
+	for (i = 0; i < caps->nstepinfos; i++) {
+		stepinfo = &caps->stepinfos[i];
+
+		if (stepinfo->stepsize != preset_step)
+			continue;
+
+		for (j = 0; j < stepinfo->nstrengths; j++) {
+			if (stepinfo->strengths[j] != preset_strength)
+				continue;
+
+			ecc_bytes = caps->calc_ecc_bytes(preset_step,
+							 preset_strength);
+			if (WARN_ON_ONCE(ecc_bytes < 0))
+				return ecc_bytes;
+
+			if (ecc_bytes * nsteps > oobavail) {
+				pr_err("ECC (step, strength) = (%d, %d) does not fit in OOB",
+				       preset_step, preset_strength);
+				return -ENOSPC;
+			}
+
+			chip->ecc.bytes = ecc_bytes;
+
+			return 0;
+		}
+	}
+
+	pr_err("ECC (step, strength) = (%d, %d) not supported on this controller",
+	       preset_step, preset_strength);
+
+	return -ENOTSUPP;
+}
+EXPORT_SYMBOL_GPL(nand_check_ecc_caps);
+
+/**
+ * nand_match_ecc_req - meet the chip's requirement with least ECC bytes
+ * @chip: nand chip info structure
+ * @caps: ECC engine caps info structure
+ * @oobavail: OOB size that the ECC engine can use
+ *
+ * If a chip's ECC requirement is provided, try to meet it with the least
+ * number of ECC bytes (i.e. with the largest number of OOB-free bytes).
+ * On success, the chosen ECC settings are set.
+ */
+int nand_match_ecc_req(struct nand_chip *chip,
+		       const struct nand_ecc_caps *caps, int oobavail)
+{
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	const struct nand_ecc_step_info *stepinfo;
+	int req_step = chip->ecc_step_ds;
+	int req_strength = chip->ecc_strength_ds;
+	int req_corr, step_size, strength, nsteps, ecc_bytes, ecc_bytes_total;
+	int best_step, best_strength, best_ecc_bytes;
+	int best_ecc_bytes_total = INT_MAX;
+	int i, j;
+
+	if (WARN_ON(oobavail < 0))
+		return -EINVAL;
+
+	/* No information provided by the NAND chip */
+	if (!req_step || !req_strength)
+		return -ENOTSUPP;
+
+	/* number of correctable bits the chip requires in a page */
+	req_corr = mtd->writesize / req_step * req_strength;
+
+	for (i = 0; i < caps->nstepinfos; i++) {
+		stepinfo = &caps->stepinfos[i];
+		step_size = stepinfo->stepsize;
+
+		for (j = 0; j < stepinfo->nstrengths; j++) {
+			strength = stepinfo->strengths[j];
+
+			/*
+			 * If both step size and strength are smaller than the
+			 * chip's requirement, it is not easy to compare the
+			 * resulted reliability.
+			 */
+			if (step_size < req_step && strength < req_strength)
+				continue;
+
+			if (mtd->writesize % step_size)
+				continue;
+
+			nsteps = mtd->writesize / step_size;
+
+			ecc_bytes = caps->calc_ecc_bytes(step_size, strength);
+			if (WARN_ON_ONCE(ecc_bytes < 0))
+				continue;
+			ecc_bytes_total = ecc_bytes * nsteps;
+
+			if (ecc_bytes_total > oobavail ||
+			    strength * nsteps < req_corr)
+				continue;
+
+			/*
+			 * We assume the best is to meet the chip's requrement
+			 * with the least number of ECC bytes.
+			 */
+			if (ecc_bytes_total < best_ecc_bytes_total) {
+				best_ecc_bytes_total = ecc_bytes_total;
+				best_step = step_size;
+				best_strength = strength;
+				best_ecc_bytes = ecc_bytes;
+			}
+		}
+	}
+
+	if (best_ecc_bytes_total == INT_MAX)
+		return -ENOTSUPP;
+
+	chip->ecc.size = best_step;
+	chip->ecc.strength = best_strength;
+	chip->ecc.bytes = best_ecc_bytes;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nand_match_ecc_req);
+
+/**
+ * nand_maximize_ecc - choose the max ECC strength available
+ * @chip: nand chip info structure
+ * @caps: ECC engine caps info structure
+ * @oobavail: OOB size that the ECC engine can use
+ *
+ * Choose the max ECC strength that is supported on the controller, and can fit
+ * within the chip's OOB.  On success, the chosen ECC settings are set.
+ */
+int nand_maximize_ecc(struct nand_chip *chip,
+		      const struct nand_ecc_caps *caps, int oobavail)
+{
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	const struct nand_ecc_step_info *stepinfo;
+	int step_size, strength, nsteps, ecc_bytes, corr;
+	int best_corr = 0;
+	int best_step = 0;
+	int best_strength, best_ecc_bytes;
+	int i, j;
+
+	if (WARN_ON(oobavail < 0))
+		return -EINVAL;
+
+	for (i = 0; i < caps->nstepinfos; i++) {
+		stepinfo = &caps->stepinfos[i];
+		step_size = stepinfo->stepsize;
+
+		/* If chip->ecc.size is already set, respect it */
+		if (chip->ecc.size && step_size != chip->ecc.size)
+			continue;
+
+		for (j = 0; j < stepinfo->nstrengths; j++) {
+			strength = stepinfo->strengths[j];
+
+			if (mtd->writesize % step_size)
+				continue;
+
+			nsteps = mtd->writesize / step_size;
+
+			ecc_bytes = caps->calc_ecc_bytes(step_size, strength);
+			if (WARN_ON_ONCE(ecc_bytes < 0))
+				continue;
+
+			if (ecc_bytes * nsteps > oobavail)
+				continue;
+
+			corr = strength * nsteps;
+
+			/*
+			 * If the number of correctable bits is the same,
+			 * bigger step_size has more reliability.
+			 */
+			if (corr > best_corr ||
+			    (corr == best_corr && step_size > best_step)) {
+				best_corr = corr;
+				best_step = step_size;
+				best_strength = strength;
+				best_ecc_bytes = ecc_bytes;
+			}
+		}
+	}
+
+	if (!best_corr)
+		return -ENOTSUPP;
+
+	chip->ecc.size = best_step;
+	chip->ecc.strength = best_strength;
+	chip->ecc.bytes = best_ecc_bytes;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nand_maximize_ecc);
+
 /*
  * Check if the chip configuration meet the datasheet requirements.
 
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 8b3607bde1b5..568f53e812cd 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -481,6 +481,30 @@ static inline void nand_hw_control_init(struct nand_hw_control *nfc)
 	init_waitqueue_head(&nfc->wq);
 }
 
+/**
+ * struct nand_ecc_step_info - ECC step information of ECC engine
+ * @stepsize: data bytes per ECC step
+ * @strengths: array of supported strengths
+ * @nstrengths: number of supported strengths
+ */
+struct nand_ecc_step_info {
+	int stepsize;
+	const int *strengths;
+	int nstrengths;
+};
+
+/**
+ * struct nand_ecc_caps - capability of ECC engine
+ * @stepinfos: array of ECC step information
+ * @nstepinfos: number of ECC step information
+ * @calc_ecc_bytes: driver's hook to calculate ECC bytes per step
+ */
+struct nand_ecc_caps {
+	const struct nand_ecc_step_info *stepinfos;
+	int nstepinfos;
+	int (*calc_ecc_bytes)(int step_size, int strength);
+};
+
 /**
  * struct nand_ecc_ctrl - Control structure for ECC
  * @mode:	ECC mode
@@ -1246,6 +1270,15 @@ int nand_check_erased_ecc_chunk(void *data, int datalen,
 				void *extraoob, int extraooblen,
 				int threshold);
 
+int nand_check_ecc_caps(struct nand_chip *chip,
+			const struct nand_ecc_caps *caps, int oobavail);
+
+int nand_match_ecc_req(struct nand_chip *chip,
+		       const struct nand_ecc_caps *caps,  int oobavail);
+
+int nand_maximize_ecc(struct nand_chip *chip,
+		      const struct nand_ecc_caps *caps, int oobavail);
+
 /* Default write_oob implementation */
 int nand_write_oob_std(struct mtd_info *mtd, struct nand_chip *chip, int page);
 
-- 
cgit v1.2.3


From a03c60178c181767ecfb26fb311a88742d228118 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Wed, 7 Jun 2017 20:52:11 +0900
Subject: mtd: nand: add a shorthand to generate nand_ecc_caps structure

struct nand_ecc_caps was designed as flexible as possible to support
multiple stepsizes (like sunxi_nand.c).

So, we need to write multiple arrays even for the simplest case.
I guess many controllers support a single stepsize, so here is a
shorthand macro for the case.

It allows to describe like ...

NAND_ECC_CAPS_SINGLE(denali_pci_ecc_caps, denali_calc_ecc_bytes, 512, 8, 15);

... instead of

static const int denali_pci_ecc_strengths[] = {8, 15};
static const struct nand_ecc_step_info denali_pci_ecc_stepinfo = {
        .stepsize = 512,
        .strengths = denali_pci_ecc_strengths,
        .nstrengths = ARRAY_SIZE(denali_pci_ecc_strengths),
};
static const struct nand_ecc_caps denali_pci_ecc_caps = {
        .stepinfos = &denali_pci_ecc_stepinfo,
        .nstepinfos = 1,
        .calc_ecc_bytes = denali_calc_ecc_bytes,
};

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 include/linux/mtd/nand.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 568f53e812cd..dc8fbc033442 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -505,6 +505,20 @@ struct nand_ecc_caps {
 	int (*calc_ecc_bytes)(int step_size, int strength);
 };
 
+/* a shorthand to generate struct nand_ecc_caps with only one ECC stepsize */
+#define NAND_ECC_CAPS_SINGLE(__name, __calc, __step, ...)	\
+static const int __name##_strengths[] = { __VA_ARGS__ };	\
+static const struct nand_ecc_step_info __name##_stepinfo = {	\
+	.stepsize = __step,					\
+	.strengths = __name##_strengths,			\
+	.nstrengths = ARRAY_SIZE(__name##_strengths),		\
+};								\
+static const struct nand_ecc_caps __name = {			\
+	.stepinfos = &__name##_stepinfo,			\
+	.nstepinfos = 1,					\
+	.calc_ecc_bytes = __calc,				\
+}
+
 /**
  * struct nand_ecc_ctrl - Control structure for ECC
  * @mode:	ECC mode
-- 
cgit v1.2.3


From 6576ff740f5c1e6705d33b9d6a922526f20fa0dc Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
Date: Thu, 27 Apr 2017 17:30:08 +0200
Subject: iio: adc: twl4030: Drop twl4030_get_madc_conversion()

Drop legacy twl4030_get_madc_conversion() method. It has been
used by drivers to get madc data before it conversion to IIO
API. There are no users in the mainline kernel anymore.

Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
Acked-by: Wolfram Sang <wsa@the-dreams.de>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/adc/twl4030-madc.c   | 21 ---------------------
 include/linux/i2c/twl4030-madc.h |  1 -
 2 files changed, 22 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iio/adc/twl4030-madc.c b/drivers/iio/adc/twl4030-madc.c
index 0c74869a540a..88e44066ef82 100644
--- a/drivers/iio/adc/twl4030-madc.c
+++ b/drivers/iio/adc/twl4030-madc.c
@@ -642,27 +642,6 @@ out:
 }
 EXPORT_SYMBOL_GPL(twl4030_madc_conversion);
 
-int twl4030_get_madc_conversion(int channel_no)
-{
-	struct twl4030_madc_request req;
-	int temp = 0;
-	int ret;
-
-	req.channels = (1 << channel_no);
-	req.method = TWL4030_MADC_SW2;
-	req.active = 0;
-	req.raw = 0;
-	req.func_cb = NULL;
-	ret = twl4030_madc_conversion(&req);
-	if (ret < 0)
-		return ret;
-	if (req.rbuf[channel_no] > 0)
-		temp = req.rbuf[channel_no];
-
-	return temp;
-}
-EXPORT_SYMBOL_GPL(twl4030_get_madc_conversion);
-
 /**
  * twl4030_madc_set_current_generator() - setup bias current
  *
diff --git a/include/linux/i2c/twl4030-madc.h b/include/linux/i2c/twl4030-madc.h
index 1c0134dd3271..0c919ebb31e0 100644
--- a/include/linux/i2c/twl4030-madc.h
+++ b/include/linux/i2c/twl4030-madc.h
@@ -143,5 +143,4 @@ struct twl4030_madc_user_parms {
 };
 
 int twl4030_madc_conversion(struct twl4030_madc_request *conv);
-int twl4030_get_madc_conversion(int channel_no);
 #endif
-- 
cgit v1.2.3


From 42ab9278eb3abfe8b500abe6681c8549c408ec8f Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
Date: Thu, 27 Apr 2017 17:30:09 +0200
Subject: iio: adc: twl4030: Unexport twl4030_madc_conversion()

All madc users have been converted to IIO API, so drop the
legacy API. The function is still used inside of the driver.

Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
Acked-by: Wolfram Sang <wsa@the-dreams.de>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/adc/twl4030-madc.c   | 5 +++--
 include/linux/i2c/twl4030-madc.h | 2 --
 2 files changed, 3 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iio/adc/twl4030-madc.c b/drivers/iio/adc/twl4030-madc.c
index 88e44066ef82..be60f76d1a50 100644
--- a/drivers/iio/adc/twl4030-madc.c
+++ b/drivers/iio/adc/twl4030-madc.c
@@ -72,6 +72,8 @@ struct twl4030_madc_data {
 	u8 isr;
 };
 
+static int twl4030_madc_conversion(struct twl4030_madc_request *req);
+
 static int twl4030_madc_read(struct iio_dev *iio_dev,
 			     const struct iio_chan_spec *chan,
 			     int *val, int *val2, long mask)
@@ -568,7 +570,7 @@ static int twl4030_madc_wait_conversion_ready(struct twl4030_madc_data *madc,
  * be a negative error value in the corresponding array element.
  * returns 0 if succeeds else error value
  */
-int twl4030_madc_conversion(struct twl4030_madc_request *req)
+static int twl4030_madc_conversion(struct twl4030_madc_request *req)
 {
 	const struct twl4030_madc_conversion_method *method;
 	int ret;
@@ -640,7 +642,6 @@ out:
 
 	return ret;
 }
-EXPORT_SYMBOL_GPL(twl4030_madc_conversion);
 
 /**
  * twl4030_madc_set_current_generator() - setup bias current
diff --git a/include/linux/i2c/twl4030-madc.h b/include/linux/i2c/twl4030-madc.h
index 0c919ebb31e0..be9260e261ac 100644
--- a/include/linux/i2c/twl4030-madc.h
+++ b/include/linux/i2c/twl4030-madc.h
@@ -141,6 +141,4 @@ struct twl4030_madc_user_parms {
 	int status;
 	u16 result;
 };
-
-int twl4030_madc_conversion(struct twl4030_madc_request *conv);
 #endif
-- 
cgit v1.2.3


From 1adf5a3c0d60ad76a873e90a25a456fd51971774 Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
Date: Thu, 27 Apr 2017 17:30:10 +0200
Subject: iio: adc: twl4030: Drop struct twl4030_madc_user_parms

This struct is no longer used by anything in the kernel.

Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
Acked-by: Wolfram Sang <wsa@the-dreams.de>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 include/linux/i2c/twl4030-madc.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/i2c/twl4030-madc.h b/include/linux/i2c/twl4030-madc.h
index be9260e261ac..f395700fb933 100644
--- a/include/linux/i2c/twl4030-madc.h
+++ b/include/linux/i2c/twl4030-madc.h
@@ -135,10 +135,4 @@ enum sample_type {
 #define TWL4030_REG_GPBR1		0x0c
 #define TWL4030_GPBR1_MADC_HFCLK_EN	(1 << 7)
 
-struct twl4030_madc_user_parms {
-	int channel;
-	int average;
-	int status;
-	u16 result;
-};
 #endif
-- 
cgit v1.2.3


From 842eb60b044454f186b4e1da87a4333a9827b62f Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
Date: Thu, 27 Apr 2017 17:30:11 +0200
Subject: iio: adc: twl4030: Remove twl4030_madc_request.func_cb

This functionality is not used by the IIO subsystem. Due
to removal of legacy API it can also be removed.

Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
Acked-by: Wolfram Sang <wsa@the-dreams.de>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/adc/twl4030-madc.c   | 70 ----------------------------------------
 include/linux/i2c/twl4030-madc.h |  1 -
 2 files changed, 71 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iio/adc/twl4030-madc.c b/drivers/iio/adc/twl4030-madc.c
index be60f76d1a50..21df5b932bd1 100644
--- a/drivers/iio/adc/twl4030-madc.c
+++ b/drivers/iio/adc/twl4030-madc.c
@@ -86,7 +86,6 @@ static int twl4030_madc_read(struct iio_dev *iio_dev,
 
 	req.channels = BIT(chan->channel);
 	req.active = false;
-	req.func_cb = NULL;
 	req.type = TWL4030_MADC_WAIT;
 	req.raw = !(mask == IIO_CHAN_INFO_PROCESSED);
 	req.do_avg = (mask == IIO_CHAN_INFO_AVERAGE_RAW);
@@ -342,37 +341,6 @@ static int twl4030_madc_read_channels(struct twl4030_madc_data *madc,
 	return count;
 }
 
-/*
- * Enables irq.
- * @madc - pointer to twl4030_madc_data struct
- * @id - irq number to be enabled
- * can take one of TWL4030_MADC_RT, TWL4030_MADC_SW1, TWL4030_MADC_SW2
- * corresponding to RT, SW1, SW2 conversion requests.
- * If the i2c read fails it returns an error else returns 0.
- */
-static int twl4030_madc_enable_irq(struct twl4030_madc_data *madc, u8 id)
-{
-	u8 val;
-	int ret;
-
-	ret = twl_i2c_read_u8(TWL4030_MODULE_MADC, &val, madc->imr);
-	if (ret) {
-		dev_err(madc->dev, "unable to read imr register 0x%X\n",
-			madc->imr);
-		return ret;
-	}
-
-	val &= ~(1 << id);
-	ret = twl_i2c_write_u8(TWL4030_MODULE_MADC, val, madc->imr);
-	if (ret) {
-		dev_err(madc->dev,
-			"unable to write imr register 0x%X\n", madc->imr);
-		return ret;
-	}
-
-	return 0;
-}
-
 /*
  * Disables irq.
  * @madc - pointer to twl4030_madc_data struct
@@ -442,11 +410,6 @@ static irqreturn_t twl4030_madc_threaded_irq_handler(int irq, void *_madc)
 		/* Read results */
 		len = twl4030_madc_read_channels(madc, method->rbase,
 						 r->channels, r->rbuf, r->raw);
-		/* Return results to caller */
-		if (r->func_cb != NULL) {
-			r->func_cb(len, r->channels, r->rbuf);
-			r->func_cb = NULL;
-		}
 		/* Free request */
 		r->result_pending = 0;
 		r->active = 0;
@@ -468,11 +431,6 @@ err_i2c:
 		/* Read results */
 		len = twl4030_madc_read_channels(madc, method->rbase,
 						 r->channels, r->rbuf, r->raw);
-		/* Return results to caller */
-		if (r->func_cb != NULL) {
-			r->func_cb(len, r->channels, r->rbuf);
-			r->func_cb = NULL;
-		}
 		/* Free request */
 		r->result_pending = 0;
 		r->active = 0;
@@ -482,23 +440,6 @@ err_i2c:
 	return IRQ_HANDLED;
 }
 
-static int twl4030_madc_set_irq(struct twl4030_madc_data *madc,
-				struct twl4030_madc_request *req)
-{
-	struct twl4030_madc_request *p;
-	int ret;
-
-	p = &madc->requests[req->method];
-	memcpy(p, req, sizeof(*req));
-	ret = twl4030_madc_enable_irq(madc, req->method);
-	if (ret < 0) {
-		dev_err(madc->dev, "enable irq failed!!\n");
-		return ret;
-	}
-
-	return 0;
-}
-
 /*
  * Function which enables the madc conversion
  * by writing to the control register.
@@ -607,17 +548,6 @@ static int twl4030_madc_conversion(struct twl4030_madc_request *req)
 			goto out;
 		}
 	}
-	if (req->type == TWL4030_MADC_IRQ_ONESHOT && req->func_cb != NULL) {
-		ret = twl4030_madc_set_irq(twl4030_madc, req);
-		if (ret < 0)
-			goto out;
-		ret = twl4030_madc_start_conversion(twl4030_madc, req->method);
-		if (ret < 0)
-			goto out;
-		twl4030_madc->requests[req->method].active = 1;
-		ret = 0;
-		goto out;
-	}
 	/* With RT method we should not be here anymore */
 	if (req->method == TWL4030_MADC_RT) {
 		ret = -EINVAL;
diff --git a/include/linux/i2c/twl4030-madc.h b/include/linux/i2c/twl4030-madc.h
index f395700fb933..34e94747b61e 100644
--- a/include/linux/i2c/twl4030-madc.h
+++ b/include/linux/i2c/twl4030-madc.h
@@ -51,7 +51,6 @@ struct twl4030_madc_request {
 	bool result_pending;
 	bool raw;
 	int rbuf[TWL4030_MADC_MAX_CHANNELS];
-	void (*func_cb)(int len, int channels, int *buf);
 };
 
 enum conversion_methods {
-- 
cgit v1.2.3


From 7af1a06776da9e943126c2dd740a18710ecd66df Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
Date: Thu, 27 Apr 2017 17:30:12 +0200
Subject: iio: adc: twl4030: Fold twl4030-madc.h into driver

twl4030-madc.h is no longer used by anything outside of
the iio driver, so it can be merged into the driver.

Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
Acked-by: Wolfram Sang <wsa@the-dreams.de>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/adc/twl4030-madc.c   | 113 +++++++++++++++++++++++++++++++-
 include/linux/i2c/twl4030-madc.h | 137 ---------------------------------------
 2 files changed, 112 insertions(+), 138 deletions(-)
 delete mode 100644 include/linux/i2c/twl4030-madc.h

(limited to 'include/linux')

diff --git a/drivers/iio/adc/twl4030-madc.c b/drivers/iio/adc/twl4030-madc.c
index 21df5b932bd1..bd3d37fc2144 100644
--- a/drivers/iio/adc/twl4030-madc.c
+++ b/drivers/iio/adc/twl4030-madc.c
@@ -36,7 +36,6 @@
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/i2c/twl.h>
-#include <linux/i2c/twl4030-madc.h>
 #include <linux/module.h>
 #include <linux/stddef.h>
 #include <linux/mutex.h>
@@ -49,9 +48,121 @@
 
 #include <linux/iio/iio.h>
 
+#define TWL4030_MADC_MAX_CHANNELS 16
+
+#define TWL4030_MADC_CTRL1		0x00
+#define TWL4030_MADC_CTRL2		0x01
+
+#define TWL4030_MADC_RTSELECT_LSB	0x02
+#define TWL4030_MADC_SW1SELECT_LSB	0x06
+#define TWL4030_MADC_SW2SELECT_LSB	0x0A
+
+#define TWL4030_MADC_RTAVERAGE_LSB	0x04
+#define TWL4030_MADC_SW1AVERAGE_LSB	0x08
+#define TWL4030_MADC_SW2AVERAGE_LSB	0x0C
+
+#define TWL4030_MADC_CTRL_SW1		0x12
+#define TWL4030_MADC_CTRL_SW2		0x13
+
+#define TWL4030_MADC_RTCH0_LSB		0x17
+#define TWL4030_MADC_GPCH0_LSB		0x37
+
+#define TWL4030_MADC_MADCON	(1 << 0)	/* MADC power on */
+#define TWL4030_MADC_BUSY	(1 << 0)	/* MADC busy */
+/* MADC conversion completion */
+#define TWL4030_MADC_EOC_SW	(1 << 1)
+/* MADC SWx start conversion */
+#define TWL4030_MADC_SW_START	(1 << 5)
+#define TWL4030_MADC_ADCIN0	(1 << 0)
+#define TWL4030_MADC_ADCIN1	(1 << 1)
+#define TWL4030_MADC_ADCIN2	(1 << 2)
+#define TWL4030_MADC_ADCIN3	(1 << 3)
+#define TWL4030_MADC_ADCIN4	(1 << 4)
+#define TWL4030_MADC_ADCIN5	(1 << 5)
+#define TWL4030_MADC_ADCIN6	(1 << 6)
+#define TWL4030_MADC_ADCIN7	(1 << 7)
+#define TWL4030_MADC_ADCIN8	(1 << 8)
+#define TWL4030_MADC_ADCIN9	(1 << 9)
+#define TWL4030_MADC_ADCIN10	(1 << 10)
+#define TWL4030_MADC_ADCIN11	(1 << 11)
+#define TWL4030_MADC_ADCIN12	(1 << 12)
+#define TWL4030_MADC_ADCIN13	(1 << 13)
+#define TWL4030_MADC_ADCIN14	(1 << 14)
+#define TWL4030_MADC_ADCIN15	(1 << 15)
+
+/* Fixed channels */
+#define TWL4030_MADC_BTEMP	TWL4030_MADC_ADCIN1
+#define TWL4030_MADC_VBUS	TWL4030_MADC_ADCIN8
+#define TWL4030_MADC_VBKB	TWL4030_MADC_ADCIN9
+#define TWL4030_MADC_ICHG	TWL4030_MADC_ADCIN10
+#define TWL4030_MADC_VCHG	TWL4030_MADC_ADCIN11
+#define TWL4030_MADC_VBAT	TWL4030_MADC_ADCIN12
+
+/* Step size and prescaler ratio */
+#define TEMP_STEP_SIZE          147
+#define TEMP_PSR_R              100
+#define CURR_STEP_SIZE		147
+#define CURR_PSR_R1		44
+#define CURR_PSR_R2		88
+
+#define TWL4030_BCI_BCICTL1	0x23
+#define TWL4030_BCI_CGAIN	0x020
+#define TWL4030_BCI_MESBAT	(1 << 1)
+#define TWL4030_BCI_TYPEN	(1 << 4)
+#define TWL4030_BCI_ITHEN	(1 << 3)
+
+#define REG_BCICTL2             0x024
+#define TWL4030_BCI_ITHSENS	0x007
+
+/* Register and bits for GPBR1 register */
+#define TWL4030_REG_GPBR1		0x0c
+#define TWL4030_GPBR1_MADC_HFCLK_EN	(1 << 7)
+
 #define TWL4030_USB_SEL_MADC_MCPC	(1<<3)
 #define TWL4030_USB_CARKIT_ANA_CTRL	0xBB
 
+struct twl4030_madc_conversion_method {
+	u8 sel;
+	u8 avg;
+	u8 rbase;
+	u8 ctrl;
+};
+
+/**
+ * struct twl4030_madc_request - madc request packet for channel conversion
+ * @channels:	16 bit bitmap for individual channels
+ * @do_avg:	sample the input channel for 4 consecutive cycles
+ * @method:	RT, SW1, SW2
+ * @type:	Polling or interrupt based method
+ * @active:	Flag if request is active
+ * @result_pending: Flag from irq handler, that result is ready
+ * @raw:	Return raw value, do not convert it
+ * @rbuf:	Result buffer
+ */
+struct twl4030_madc_request {
+	unsigned long channels;
+	bool do_avg;
+	u16 method;
+	u16 type;
+	bool active;
+	bool result_pending;
+	bool raw;
+	int rbuf[TWL4030_MADC_MAX_CHANNELS];
+};
+
+enum conversion_methods {
+	TWL4030_MADC_RT,
+	TWL4030_MADC_SW1,
+	TWL4030_MADC_SW2,
+	TWL4030_MADC_NUM_METHODS
+};
+
+enum sample_type {
+	TWL4030_MADC_WAIT,
+	TWL4030_MADC_IRQ_ONESHOT,
+	TWL4030_MADC_IRQ_REARM
+};
+
 /**
  * struct twl4030_madc_data - a container for madc info
  * @dev:		Pointer to device structure for madc
diff --git a/include/linux/i2c/twl4030-madc.h b/include/linux/i2c/twl4030-madc.h
deleted file mode 100644
index 34e94747b61e..000000000000
--- a/include/linux/i2c/twl4030-madc.h
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * twl4030_madc.h - Header for TWL4030 MADC
- *
- * Copyright (C) 2011 Texas Instruments Incorporated - http://www.ti.com/
- * J Keerthy <j-keerthy@ti.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
- * 02110-1301 USA
- *
- */
-
-#ifndef _TWL4030_MADC_H
-#define _TWL4030_MADC_H
-
-struct twl4030_madc_conversion_method {
-	u8 sel;
-	u8 avg;
-	u8 rbase;
-	u8 ctrl;
-};
-
-#define TWL4030_MADC_MAX_CHANNELS 16
-
-
-/*
- * twl4030_madc_request- madc request packet for channel conversion
- * @channels:	16 bit bitmap for individual channels
- * @do_avgP:	sample the input channel for 4 consecutive cycles
- * @method:	RT, SW1, SW2
- * @type:	Polling or interrupt based method
- * @raw:	Return raw value, do not convert it
- */
-
-struct twl4030_madc_request {
-	unsigned long channels;
-	bool do_avg;
-	u16 method;
-	u16 type;
-	bool active;
-	bool result_pending;
-	bool raw;
-	int rbuf[TWL4030_MADC_MAX_CHANNELS];
-};
-
-enum conversion_methods {
-	TWL4030_MADC_RT,
-	TWL4030_MADC_SW1,
-	TWL4030_MADC_SW2,
-	TWL4030_MADC_NUM_METHODS
-};
-
-enum sample_type {
-	TWL4030_MADC_WAIT,
-	TWL4030_MADC_IRQ_ONESHOT,
-	TWL4030_MADC_IRQ_REARM
-};
-
-#define TWL4030_MADC_CTRL1		0x00
-#define TWL4030_MADC_CTRL2		0x01
-
-#define TWL4030_MADC_RTSELECT_LSB	0x02
-#define TWL4030_MADC_SW1SELECT_LSB	0x06
-#define TWL4030_MADC_SW2SELECT_LSB	0x0A
-
-#define TWL4030_MADC_RTAVERAGE_LSB	0x04
-#define TWL4030_MADC_SW1AVERAGE_LSB	0x08
-#define TWL4030_MADC_SW2AVERAGE_LSB	0x0C
-
-#define TWL4030_MADC_CTRL_SW1		0x12
-#define TWL4030_MADC_CTRL_SW2		0x13
-
-#define TWL4030_MADC_RTCH0_LSB		0x17
-#define TWL4030_MADC_GPCH0_LSB		0x37
-
-#define TWL4030_MADC_MADCON	(1 << 0)	/* MADC power on */
-#define TWL4030_MADC_BUSY	(1 << 0)	/* MADC busy */
-/* MADC conversion completion */
-#define TWL4030_MADC_EOC_SW	(1 << 1)
-/* MADC SWx start conversion */
-#define TWL4030_MADC_SW_START	(1 << 5)
-#define TWL4030_MADC_ADCIN0	(1 << 0)
-#define TWL4030_MADC_ADCIN1	(1 << 1)
-#define TWL4030_MADC_ADCIN2	(1 << 2)
-#define TWL4030_MADC_ADCIN3	(1 << 3)
-#define TWL4030_MADC_ADCIN4	(1 << 4)
-#define TWL4030_MADC_ADCIN5	(1 << 5)
-#define TWL4030_MADC_ADCIN6	(1 << 6)
-#define TWL4030_MADC_ADCIN7	(1 << 7)
-#define TWL4030_MADC_ADCIN8	(1 << 8)
-#define TWL4030_MADC_ADCIN9	(1 << 9)
-#define TWL4030_MADC_ADCIN10	(1 << 10)
-#define TWL4030_MADC_ADCIN11	(1 << 11)
-#define TWL4030_MADC_ADCIN12	(1 << 12)
-#define TWL4030_MADC_ADCIN13	(1 << 13)
-#define TWL4030_MADC_ADCIN14	(1 << 14)
-#define TWL4030_MADC_ADCIN15	(1 << 15)
-
-/* Fixed channels */
-#define TWL4030_MADC_BTEMP	TWL4030_MADC_ADCIN1
-#define TWL4030_MADC_VBUS	TWL4030_MADC_ADCIN8
-#define TWL4030_MADC_VBKB	TWL4030_MADC_ADCIN9
-#define TWL4030_MADC_ICHG	TWL4030_MADC_ADCIN10
-#define TWL4030_MADC_VCHG	TWL4030_MADC_ADCIN11
-#define TWL4030_MADC_VBAT	TWL4030_MADC_ADCIN12
-
-/* Step size and prescaler ratio */
-#define TEMP_STEP_SIZE          147
-#define TEMP_PSR_R              100
-#define CURR_STEP_SIZE		147
-#define CURR_PSR_R1		44
-#define CURR_PSR_R2		88
-
-#define TWL4030_BCI_BCICTL1	0x23
-#define TWL4030_BCI_CGAIN	0x020
-#define TWL4030_BCI_MESBAT	(1 << 1)
-#define TWL4030_BCI_TYPEN	(1 << 4)
-#define TWL4030_BCI_ITHEN	(1 << 3)
-
-#define REG_BCICTL2             0x024
-#define TWL4030_BCI_ITHSENS	0x007
-
-/* Register and bits for GPBR1 register */
-#define TWL4030_REG_GPBR1		0x0c
-#define TWL4030_GPBR1_MADC_HFCLK_EN	(1 << 7)
-
-#endif
-- 
cgit v1.2.3


From d89e119a088ec83881eda5645307ae252ecea33a Mon Sep 17 00:00:00 2001
From: Benjamin Gaignard <benjamin.gaignard@linaro.org>
Date: Thu, 27 Apr 2017 15:29:15 +0200
Subject: iio: add hardware triggered operating mode

Devices, like stm32 timer, could be triggered by hardware events which
are not buffer or software events. However it could be necessary to
validate the triggers like it is done for buffer or event triggered modes.
This patch add a new INDIO_HARDWARE_TRIGGERED operating mode for this
kind of devices and allow this mode to register trigger consumer.

Signed-off-by: Benjamin Gaignard <benjamin.gaignard@st.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/industrialio-core.c | 4 ++--
 include/linux/iio/iio.h         | 6 ++++++
 2 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c
index 15c86a8cd704..17ec4cee51dc 100644
--- a/drivers/iio/industrialio-core.c
+++ b/drivers/iio/industrialio-core.c
@@ -1423,7 +1423,7 @@ static void iio_device_unregister_sysfs(struct iio_dev *indio_dev)
 static void iio_dev_release(struct device *device)
 {
 	struct iio_dev *indio_dev = dev_to_iio_dev(device);
-	if (indio_dev->modes & (INDIO_BUFFER_TRIGGERED | INDIO_EVENT_TRIGGERED))
+	if (indio_dev->modes & INDIO_ALL_TRIGGERED_MODES)
 		iio_device_unregister_trigger_consumer(indio_dev);
 	iio_device_unregister_eventset(indio_dev);
 	iio_device_unregister_sysfs(indio_dev);
@@ -1705,7 +1705,7 @@ int iio_device_register(struct iio_dev *indio_dev)
 			"Failed to register event set\n");
 		goto error_free_sysfs;
 	}
-	if (indio_dev->modes & (INDIO_BUFFER_TRIGGERED | INDIO_EVENT_TRIGGERED))
+	if (indio_dev->modes & INDIO_ALL_TRIGGERED_MODES)
 		iio_device_register_trigger_consumer(indio_dev);
 
 	if ((indio_dev->modes & INDIO_ALL_BUFFER_MODES) &&
diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h
index 3f5ea2e9a39e..d68bec297a45 100644
--- a/include/linux/iio/iio.h
+++ b/include/linux/iio/iio.h
@@ -352,10 +352,16 @@ unsigned int iio_get_time_res(const struct iio_dev *indio_dev);
 #define INDIO_BUFFER_SOFTWARE		0x04
 #define INDIO_BUFFER_HARDWARE		0x08
 #define INDIO_EVENT_TRIGGERED		0x10
+#define INDIO_HARDWARE_TRIGGERED	0x20
 
 #define INDIO_ALL_BUFFER_MODES					\
 	(INDIO_BUFFER_TRIGGERED | INDIO_BUFFER_HARDWARE | INDIO_BUFFER_SOFTWARE)
 
+#define INDIO_ALL_TRIGGERED_MODES	\
+	(INDIO_BUFFER_TRIGGERED		\
+	 | INDIO_EVENT_TRIGGERED	\
+	 | INDIO_HARDWARE_TRIGGERED)
+
 #define INDIO_MAX_RAW_ELEMENTS		4
 
 struct iio_trigger; /* forward declaration */
-- 
cgit v1.2.3


From 6d53cefb18e4646fb4bf62ccb6098fb3808486df Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 11 Jun 2017 15:51:56 -0700
Subject: compiler, clang: properly override 'inline' for clang

Commit abb2ea7dfd82 ("compiler, clang: suppress warning for unused
static inline functions") just caused more warnings due to re-defining
the 'inline' macro.

So undef it before re-defining it, and also add the 'notrace' attribute
like the gcc version that this is overriding does.

Maybe this makes clang happier.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler-clang.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index ea9126006a69..d614c5ea1b5e 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -21,4 +21,5 @@
  * -Wunused-function.  This turns out to avoid the need for complex #ifdef
  * directives.  Suppress the warning in clang as well.
  */
-#define inline inline __attribute__((unused))
+#undef inline
+#define inline inline __attribute__((unused)) notrace
-- 
cgit v1.2.3


From 9010624cc5b446d3377dde4753eecbaf945f3a41 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Sun, 21 May 2017 22:34:39 +0200
Subject: hwmon: (ads1015) move header file out of I2C realm

include/linux/i2c is not for client devices. Move the header file to a
more appropriate location.

Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 Documentation/hwmon/ads1015           |  2 +-
 MAINTAINERS                           |  2 +-
 drivers/hwmon/ads1015.c               |  2 +-
 drivers/iio/adc/ti-ads1015.c          |  2 +-
 include/linux/i2c/ads1015.h           | 36 -----------------------------------
 include/linux/platform_data/ads1015.h | 36 +++++++++++++++++++++++++++++++++++
 6 files changed, 40 insertions(+), 40 deletions(-)
 delete mode 100644 include/linux/i2c/ads1015.h
 create mode 100644 include/linux/platform_data/ads1015.h

(limited to 'include/linux')

diff --git a/Documentation/hwmon/ads1015 b/Documentation/hwmon/ads1015
index 063b80d857b1..02d2a459385f 100644
--- a/Documentation/hwmon/ads1015
+++ b/Documentation/hwmon/ads1015
@@ -40,7 +40,7 @@ By default all inputs are exported.
 Platform Data
 -------------
 
-In linux/i2c/ads1015.h platform data is defined, channel_data contains
+In linux/platform_data/ads1015.h platform data is defined, channel_data contains
 configuration data for the used input combinations:
 - pga is the programmable gain amplifier (values are full scale)
   0: +/- 6.144 V
diff --git a/MAINTAINERS b/MAINTAINERS
index 09b5ab6a8a5c..18b9472bc5be 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -478,7 +478,7 @@ L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/ads1015
 F:	drivers/hwmon/ads1015.c
-F:	include/linux/i2c/ads1015.h
+F:	include/linux/platform_data/ads1015.h
 
 ADT746X FAN DRIVER
 M:	Colin Leroy <colin@colino.net>
diff --git a/drivers/hwmon/ads1015.c b/drivers/hwmon/ads1015.c
index 5140c27d16dd..357b42607164 100644
--- a/drivers/hwmon/ads1015.c
+++ b/drivers/hwmon/ads1015.c
@@ -34,7 +34,7 @@
 #include <linux/of_device.h>
 #include <linux/of.h>
 
-#include <linux/i2c/ads1015.h>
+#include <linux/platform_data/ads1015.h>
 
 /* ADS1015 registers */
 enum {
diff --git a/drivers/iio/adc/ti-ads1015.c b/drivers/iio/adc/ti-ads1015.c
index f76d979fb7e8..884b8e461b17 100644
--- a/drivers/iio/adc/ti-ads1015.c
+++ b/drivers/iio/adc/ti-ads1015.c
@@ -23,7 +23,7 @@
 #include <linux/mutex.h>
 #include <linux/delay.h>
 
-#include <linux/i2c/ads1015.h>
+#include <linux/platform_data/ads1015.h>
 
 #include <linux/iio/iio.h>
 #include <linux/iio/types.h>
diff --git a/include/linux/i2c/ads1015.h b/include/linux/i2c/ads1015.h
deleted file mode 100644
index d5aa2a045669..000000000000
--- a/include/linux/i2c/ads1015.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Platform Data for ADS1015 12-bit 4-input ADC
- * (C) Copyright 2010
- * Dirk Eibach, Guntermann & Drunck GmbH <eibach@gdsys.de>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#ifndef LINUX_ADS1015_H
-#define LINUX_ADS1015_H
-
-#define ADS1015_CHANNELS 8
-
-struct ads1015_channel_data {
-	bool enabled;
-	unsigned int pga;
-	unsigned int data_rate;
-};
-
-struct ads1015_platform_data {
-	struct ads1015_channel_data channel_data[ADS1015_CHANNELS];
-};
-
-#endif /* LINUX_ADS1015_H */
diff --git a/include/linux/platform_data/ads1015.h b/include/linux/platform_data/ads1015.h
new file mode 100644
index 000000000000..d5aa2a045669
--- /dev/null
+++ b/include/linux/platform_data/ads1015.h
@@ -0,0 +1,36 @@
+/*
+ * Platform Data for ADS1015 12-bit 4-input ADC
+ * (C) Copyright 2010
+ * Dirk Eibach, Guntermann & Drunck GmbH <eibach@gdsys.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef LINUX_ADS1015_H
+#define LINUX_ADS1015_H
+
+#define ADS1015_CHANNELS 8
+
+struct ads1015_channel_data {
+	bool enabled;
+	unsigned int pga;
+	unsigned int data_rate;
+};
+
+struct ads1015_platform_data {
+	struct ads1015_channel_data channel_data[ADS1015_CHANNELS];
+};
+
+#endif /* LINUX_ADS1015_H */
-- 
cgit v1.2.3


From 570999f306fc0375a533a1906ff35fffe289e36b Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Sun, 21 May 2017 22:34:40 +0200
Subject: hwmon: (ds620) move header file out of I2C realm

include/linux/i2c is not for client devices. Move the header file to a
more appropriate location.

Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/ds620.c               |  2 +-
 include/linux/i2c/ds620.h           | 21 ---------------------
 include/linux/platform_data/ds620.h | 21 +++++++++++++++++++++
 3 files changed, 22 insertions(+), 22 deletions(-)
 delete mode 100644 include/linux/i2c/ds620.h
 create mode 100644 include/linux/platform_data/ds620.h

(limited to 'include/linux')

diff --git a/drivers/hwmon/ds620.c b/drivers/hwmon/ds620.c
index 0043a4c02b85..57d6958c74b8 100644
--- a/drivers/hwmon/ds620.c
+++ b/drivers/hwmon/ds620.c
@@ -30,7 +30,7 @@
 #include <linux/err.h>
 #include <linux/mutex.h>
 #include <linux/sysfs.h>
-#include <linux/i2c/ds620.h>
+#include <linux/platform_data/ds620.h>
 
 /*
  * Many DS620 constants specified below
diff --git a/include/linux/i2c/ds620.h b/include/linux/i2c/ds620.h
deleted file mode 100644
index 736bb87ac0fc..000000000000
--- a/include/linux/i2c/ds620.h
+++ /dev/null
@@ -1,21 +0,0 @@
-#ifndef _LINUX_DS620_H
-#define _LINUX_DS620_H
-
-#include <linux/types.h>
-#include <linux/i2c.h>
-
-/* platform data for the DS620 temperature sensor and thermostat */
-
-struct ds620_platform_data {
-	/*
-	 *  Thermostat output pin PO mode:
-	 *  0 = always low (default)
-	 *  1 = PO_LOW
-	 *  2 = PO_HIGH
-	 *
-	 * (see Documentation/hwmon/ds620)
-	 */
-	int pomode;
-};
-
-#endif /* _LINUX_DS620_H */
diff --git a/include/linux/platform_data/ds620.h b/include/linux/platform_data/ds620.h
new file mode 100644
index 000000000000..736bb87ac0fc
--- /dev/null
+++ b/include/linux/platform_data/ds620.h
@@ -0,0 +1,21 @@
+#ifndef _LINUX_DS620_H
+#define _LINUX_DS620_H
+
+#include <linux/types.h>
+#include <linux/i2c.h>
+
+/* platform data for the DS620 temperature sensor and thermostat */
+
+struct ds620_platform_data {
+	/*
+	 *  Thermostat output pin PO mode:
+	 *  0 = always low (default)
+	 *  1 = PO_LOW
+	 *  2 = PO_HIGH
+	 *
+	 * (see Documentation/hwmon/ds620)
+	 */
+	int pomode;
+};
+
+#endif /* _LINUX_DS620_H */
-- 
cgit v1.2.3


From 8116e8dd564fcbadf9f3697dcd0c3d37c049fb45 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Sun, 21 May 2017 22:34:41 +0200
Subject: hwmon: (ltc4245) move header file out of I2C realm

include/linux/i2c is not for client devices. Move the header file to a
more appropriate location.

Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 Documentation/hwmon/ltc4245           |  2 +-
 drivers/hwmon/ltc4245.c               |  2 +-
 include/linux/i2c/ltc4245.h           | 21 ---------------------
 include/linux/platform_data/ltc4245.h | 21 +++++++++++++++++++++
 4 files changed, 23 insertions(+), 23 deletions(-)
 delete mode 100644 include/linux/i2c/ltc4245.h
 create mode 100644 include/linux/platform_data/ltc4245.h

(limited to 'include/linux')

diff --git a/Documentation/hwmon/ltc4245 b/Documentation/hwmon/ltc4245
index b478b0864965..4ca7a9da09f9 100644
--- a/Documentation/hwmon/ltc4245
+++ b/Documentation/hwmon/ltc4245
@@ -96,7 +96,7 @@ slowly, -EAGAIN will be returned when you read the sysfs attribute containing
 the sensor reading.
 
 The LTC4245 chip can be configured to sample all GPIO pins with two methods:
-1) platform data -- see include/linux/i2c/ltc4245.h
+1) platform data -- see include/linux/platform_data/ltc4245.h
 2) OF device tree -- add the "ltc4245,use-extra-gpios" property to each chip
 
 The default mode of operation is to sample a single GPIO pin.
diff --git a/drivers/hwmon/ltc4245.c b/drivers/hwmon/ltc4245.c
index 4680d89556ce..082f0a0bd8a0 100644
--- a/drivers/hwmon/ltc4245.c
+++ b/drivers/hwmon/ltc4245.c
@@ -23,7 +23,7 @@
 #include <linux/hwmon.h>
 #include <linux/hwmon-sysfs.h>
 #include <linux/jiffies.h>
-#include <linux/i2c/ltc4245.h>
+#include <linux/platform_data/ltc4245.h>
 
 /* Here are names of the chip's registers (a.k.a. commands) */
 enum ltc4245_cmd {
diff --git a/include/linux/i2c/ltc4245.h b/include/linux/i2c/ltc4245.h
deleted file mode 100644
index 56bda4be0016..000000000000
--- a/include/linux/i2c/ltc4245.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Platform Data for LTC4245 hardware monitor chip
- *
- * Copyright (c) 2010 Ira W. Snyder <iws@ovro.caltech.edu>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-#ifndef LINUX_LTC4245_H
-#define LINUX_LTC4245_H
-
-#include <linux/types.h>
-
-struct ltc4245_platform_data {
-	bool use_extra_gpios;
-};
-
-#endif /* LINUX_LTC4245_H */
diff --git a/include/linux/platform_data/ltc4245.h b/include/linux/platform_data/ltc4245.h
new file mode 100644
index 000000000000..56bda4be0016
--- /dev/null
+++ b/include/linux/platform_data/ltc4245.h
@@ -0,0 +1,21 @@
+/*
+ * Platform Data for LTC4245 hardware monitor chip
+ *
+ * Copyright (c) 2010 Ira W. Snyder <iws@ovro.caltech.edu>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ */
+
+#ifndef LINUX_LTC4245_H
+#define LINUX_LTC4245_H
+
+#include <linux/types.h>
+
+struct ltc4245_platform_data {
+	bool use_extra_gpios;
+};
+
+#endif /* LINUX_LTC4245_H */
-- 
cgit v1.2.3


From 0c9fe1614126171c70e0dc0b0635d45a327ac82a Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Sun, 21 May 2017 22:34:42 +0200
Subject: hwmon: (max6639) move header file out of I2C realm

include/linux/i2c is not for client devices. Move the header file to a
more appropriate location.

Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/max6639.c               |  2 +-
 include/linux/i2c/max6639.h           | 14 --------------
 include/linux/platform_data/max6639.h | 14 ++++++++++++++
 3 files changed, 15 insertions(+), 15 deletions(-)
 delete mode 100644 include/linux/i2c/max6639.h
 create mode 100644 include/linux/platform_data/max6639.h

(limited to 'include/linux')

diff --git a/drivers/hwmon/max6639.c b/drivers/hwmon/max6639.c
index dac6d85f2fd9..f98a83c79ff1 100644
--- a/drivers/hwmon/max6639.c
+++ b/drivers/hwmon/max6639.c
@@ -32,7 +32,7 @@
 #include <linux/hwmon-sysfs.h>
 #include <linux/err.h>
 #include <linux/mutex.h>
-#include <linux/i2c/max6639.h>
+#include <linux/platform_data/max6639.h>
 
 /* Addresses to scan */
 static const unsigned short normal_i2c[] = { 0x2c, 0x2e, 0x2f, I2C_CLIENT_END };
diff --git a/include/linux/i2c/max6639.h b/include/linux/i2c/max6639.h
deleted file mode 100644
index 6011c42034da..000000000000
--- a/include/linux/i2c/max6639.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef _LINUX_MAX6639_H
-#define _LINUX_MAX6639_H
-
-#include <linux/types.h>
-
-/* platform data for the MAX6639 temperature sensor and fan control */
-
-struct max6639_platform_data {
-	bool pwm_polarity;	/* Polarity low (0) or high (1, default) */
-	int ppr;		/* Pulses per rotation 1..4 (default == 2) */
-	int rpm_range;		/* 2000, 4000 (default), 8000 or 16000 */
-};
-
-#endif /* _LINUX_MAX6639_H */
diff --git a/include/linux/platform_data/max6639.h b/include/linux/platform_data/max6639.h
new file mode 100644
index 000000000000..6011c42034da
--- /dev/null
+++ b/include/linux/platform_data/max6639.h
@@ -0,0 +1,14 @@
+#ifndef _LINUX_MAX6639_H
+#define _LINUX_MAX6639_H
+
+#include <linux/types.h>
+
+/* platform data for the MAX6639 temperature sensor and fan control */
+
+struct max6639_platform_data {
+	bool pwm_polarity;	/* Polarity low (0) or high (1, default) */
+	int ppr;		/* Pulses per rotation 1..4 (default == 2) */
+	int rpm_range;		/* 2000, 4000 (default), 8000 or 16000 */
+};
+
+#endif /* _LINUX_MAX6639_H */
-- 
cgit v1.2.3


From 4ba1bb12cf21f4ee4681aee939c4d9d82d6f49f2 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Sun, 21 May 2017 22:34:43 +0200
Subject: hwmon: (pmbus) move header file out of I2C realm

include/linux/i2c is not for client devices. Move the header file to a
more appropriate location.

Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 Documentation/hwmon/pmbus-core   |  2 +-
 MAINTAINERS                      |  2 +-
 drivers/hwmon/pmbus/pmbus.c      |  2 +-
 drivers/hwmon/pmbus/pmbus_core.c |  2 +-
 drivers/hwmon/pmbus/ucd9000.c    |  2 +-
 drivers/hwmon/pmbus/ucd9200.c    |  2 +-
 include/linux/i2c/pmbus.h        | 49 ----------------------------------------
 include/linux/pmbus.h            | 49 ++++++++++++++++++++++++++++++++++++++++
 8 files changed, 55 insertions(+), 55 deletions(-)
 delete mode 100644 include/linux/i2c/pmbus.h
 create mode 100644 include/linux/pmbus.h

(limited to 'include/linux')

diff --git a/Documentation/hwmon/pmbus-core b/Documentation/hwmon/pmbus-core
index 31e4720fed18..8ed10e9ddfb5 100644
--- a/Documentation/hwmon/pmbus-core
+++ b/Documentation/hwmon/pmbus-core
@@ -253,7 +253,7 @@ Specifically, it provides the following information.
 PMBus driver platform data
 ==========================
 
-PMBus platform data is defined in include/linux/i2c/pmbus.h. Platform data
+PMBus platform data is defined in include/linux/pmbus.h. Platform data
 currently only provides a flag field with a single bit used.
 
 #define PMBUS_SKIP_STATUS_CHECK (1 << 0)
diff --git a/MAINTAINERS b/MAINTAINERS
index 18b9472bc5be..48e882da622e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10154,7 +10154,7 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/groeck/linux-staging.git
 S:	Maintained
 F:	Documentation/hwmon/pmbus
 F:	drivers/hwmon/pmbus/
-F:	include/linux/i2c/pmbus.h
+F:	include/linux/pmbus.h
 
 PMC SIERRA MaxRAID DRIVER
 L:	linux-scsi@vger.kernel.org
diff --git a/drivers/hwmon/pmbus/pmbus.c b/drivers/hwmon/pmbus/pmbus.c
index 44ca8a94873d..7718e58dbda5 100644
--- a/drivers/hwmon/pmbus/pmbus.c
+++ b/drivers/hwmon/pmbus/pmbus.c
@@ -25,7 +25,7 @@
 #include <linux/slab.h>
 #include <linux/mutex.h>
 #include <linux/i2c.h>
-#include <linux/i2c/pmbus.h>
+#include <linux/pmbus.h>
 #include "pmbus.h"
 
 /*
diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c
index ba59eaef2e07..f1eff6b6c798 100644
--- a/drivers/hwmon/pmbus/pmbus_core.c
+++ b/drivers/hwmon/pmbus/pmbus_core.c
@@ -28,7 +28,7 @@
 #include <linux/hwmon.h>
 #include <linux/hwmon-sysfs.h>
 #include <linux/jiffies.h>
-#include <linux/i2c/pmbus.h>
+#include <linux/pmbus.h>
 #include <linux/regulator/driver.h>
 #include <linux/regulator/machine.h>
 #include "pmbus.h"
diff --git a/drivers/hwmon/pmbus/ucd9000.c b/drivers/hwmon/pmbus/ucd9000.c
index 3518f0c08934..b74dbeca2e8d 100644
--- a/drivers/hwmon/pmbus/ucd9000.c
+++ b/drivers/hwmon/pmbus/ucd9000.c
@@ -26,7 +26,7 @@
 #include <linux/err.h>
 #include <linux/slab.h>
 #include <linux/i2c.h>
-#include <linux/i2c/pmbus.h>
+#include <linux/pmbus.h>
 #include "pmbus.h"
 
 enum chips { ucd9000, ucd90120, ucd90124, ucd90160, ucd9090, ucd90910 };
diff --git a/drivers/hwmon/pmbus/ucd9200.c b/drivers/hwmon/pmbus/ucd9200.c
index a8712c5ded4e..3ed94585837a 100644
--- a/drivers/hwmon/pmbus/ucd9200.c
+++ b/drivers/hwmon/pmbus/ucd9200.c
@@ -25,7 +25,7 @@
 #include <linux/err.h>
 #include <linux/slab.h>
 #include <linux/i2c.h>
-#include <linux/i2c/pmbus.h>
+#include <linux/pmbus.h>
 #include "pmbus.h"
 
 #define UCD9200_PHASE_INFO	0xd2
diff --git a/include/linux/i2c/pmbus.h b/include/linux/i2c/pmbus.h
deleted file mode 100644
index ee3c2aba2a8e..000000000000
--- a/include/linux/i2c/pmbus.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Hardware monitoring driver for PMBus devices
- *
- * Copyright (c) 2010, 2011 Ericsson AB.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#ifndef _PMBUS_H_
-#define _PMBUS_H_
-
-/* flags */
-
-/*
- * PMBUS_SKIP_STATUS_CHECK
- *
- * During register detection, skip checking the status register for
- * communication or command errors.
- *
- * Some PMBus chips respond with valid data when trying to read an unsupported
- * register. For such chips, checking the status register is mandatory when
- * trying to determine if a chip register exists or not.
- * Other PMBus chips don't support the STATUS_CML register, or report
- * communication errors for no explicable reason. For such chips, checking
- * the status register must be disabled.
- */
-#define PMBUS_SKIP_STATUS_CHECK	(1 << 0)
-
-struct pmbus_platform_data {
-	u32 flags;		/* Device specific flags */
-
-	/* regulator support */
-	int num_regulators;
-	struct regulator_init_data *reg_init_data;
-};
-
-#endif /* _PMBUS_H_ */
diff --git a/include/linux/pmbus.h b/include/linux/pmbus.h
new file mode 100644
index 000000000000..ee3c2aba2a8e
--- /dev/null
+++ b/include/linux/pmbus.h
@@ -0,0 +1,49 @@
+/*
+ * Hardware monitoring driver for PMBus devices
+ *
+ * Copyright (c) 2010, 2011 Ericsson AB.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _PMBUS_H_
+#define _PMBUS_H_
+
+/* flags */
+
+/*
+ * PMBUS_SKIP_STATUS_CHECK
+ *
+ * During register detection, skip checking the status register for
+ * communication or command errors.
+ *
+ * Some PMBus chips respond with valid data when trying to read an unsupported
+ * register. For such chips, checking the status register is mandatory when
+ * trying to determine if a chip register exists or not.
+ * Other PMBus chips don't support the STATUS_CML register, or report
+ * communication errors for no explicable reason. For such chips, checking
+ * the status register must be disabled.
+ */
+#define PMBUS_SKIP_STATUS_CHECK	(1 << 0)
+
+struct pmbus_platform_data {
+	u32 flags;		/* Device specific flags */
+
+	/* regulator support */
+	int num_regulators;
+	struct regulator_init_data *reg_init_data;
+};
+
+#endif /* _PMBUS_H_ */
-- 
cgit v1.2.3


From 8e0931022e12e45bab9afe01e830d697d9c8e73d Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Fri, 26 May 2017 15:30:34 +0200
Subject: Revert "clockevents: Add a clkevt-of mechanism like clksrc-of"

After discussing it, this feature is dropped as it is not considered
adequate:

	https://patchwork.kernel.org/patch/9639317/

There is no user of this macro yet, so there is no impact on the drivers.

This reverts commit 376bc27150f180d9f5eddec6a14117780177589d.

Cc: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/clocksource/Kconfig        |  7 -----
 drivers/clocksource/Makefile       |  1 -
 drivers/clocksource/clkevt-probe.c | 56 --------------------------------------
 include/asm-generic/vmlinux.lds.h  |  2 --
 include/linux/clockchips.h         |  9 ------
 5 files changed, 75 deletions(-)
 delete mode 100644 drivers/clocksource/clkevt-probe.c

(limited to 'include/linux')

diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index 1b22ade4c8f1..623fcc69e9a1 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -5,10 +5,6 @@ config CLKSRC_OF
 	bool
 	select CLKSRC_PROBE
 
-config CLKEVT_OF
-	bool
-	select CLKEVT_PROBE
-
 config CLKSRC_ACPI
 	bool
 	select CLKSRC_PROBE
@@ -16,9 +12,6 @@ config CLKSRC_ACPI
 config CLKSRC_PROBE
 	bool
 
-config CLKEVT_PROBE
-	bool
-
 config CLKSRC_I8253
 	bool
 
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index cf0c30b6ec1f..cad713c53e7f 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -1,5 +1,4 @@
 obj-$(CONFIG_CLKSRC_PROBE)	+= clksrc-probe.o
-obj-$(CONFIG_CLKEVT_PROBE)	+= clkevt-probe.o
 obj-$(CONFIG_ATMEL_PIT)		+= timer-atmel-pit.o
 obj-$(CONFIG_ATMEL_ST)		+= timer-atmel-st.o
 obj-$(CONFIG_ATMEL_TCB_CLKSRC)	+= tcb_clksrc.o
diff --git a/drivers/clocksource/clkevt-probe.c b/drivers/clocksource/clkevt-probe.c
deleted file mode 100644
index eb89b502acbd..000000000000
--- a/drivers/clocksource/clkevt-probe.c
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2016, Linaro Ltd.  All rights reserved.
- * Daniel Lezcano <daniel.lezcano@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/init.h>
-#include <linux/of.h>
-#include <linux/clockchips.h>
-
-extern struct of_device_id __clkevt_of_table[];
-
-static const struct of_device_id __clkevt_of_table_sentinel
-	__used __section(__clkevt_of_table_end);
-
-int __init clockevent_probe(void)
-{
-	struct device_node *np;
-	const struct of_device_id *match;
-	of_init_fn_1_ret init_func;
-	int ret, clockevents = 0;
-
-	for_each_matching_node_and_match(np, __clkevt_of_table, &match) {
-		if (!of_device_is_available(np))
-			continue;
-
-		init_func = match->data;
-
-		ret = init_func(np);
-		if (ret) {
-			pr_warn("Failed to initialize '%s' (%d)\n",
-				np->name, ret);
-			continue;
-		}
-
-		clockevents++;
-	}
-
-	if (!clockevents) {
-		pr_crit("%s: no matching clockevent found\n", __func__);
-		return -ENODEV;
-	}
-
-	return 0;
-}
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 314a0b9219c6..401d324bcede 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -173,7 +173,6 @@
 	KEEP(*(__##name##_of_table_end))
 
 #define CLKSRC_OF_TABLES()	OF_TABLE(CONFIG_CLKSRC_OF, clksrc)
-#define CLKEVT_OF_TABLES()	OF_TABLE(CONFIG_CLKEVT_OF, clkevt)
 #define IRQCHIP_OF_MATCH_TABLE() OF_TABLE(CONFIG_IRQCHIP, irqchip)
 #define CLK_OF_TABLES()		OF_TABLE(CONFIG_COMMON_CLK, clk)
 #define IOMMU_OF_TABLES()	OF_TABLE(CONFIG_OF_IOMMU, iommu)
@@ -558,7 +557,6 @@
 	CLK_OF_TABLES()							\
 	RESERVEDMEM_OF_TABLES()						\
 	CLKSRC_OF_TABLES()						\
-	CLKEVT_OF_TABLES()						\
 	IOMMU_OF_TABLES()						\
 	CPU_METHOD_OF_TABLES()						\
 	CPUIDLE_METHOD_OF_TABLES()					\
diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index acc9ce05e5f0..a116926598fd 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -223,13 +223,4 @@ static inline void tick_setup_hrtimer_broadcast(void) { }
 
 #endif /* !CONFIG_GENERIC_CLOCKEVENTS */
 
-#define CLOCKEVENT_OF_DECLARE(name, compat, fn) \
-	OF_DECLARE_1_RET(clkevt, name, compat, fn)
-
-#ifdef CONFIG_CLKEVT_PROBE
-extern int clockevent_probe(void);
-#else
-static inline int clockevent_probe(void) { return 0; }
-#endif
-
 #endif /* _LINUX_CLOCKCHIPS_H */
-- 
cgit v1.2.3


From 8be381a131c29c4737aed44e7e5f90cb77bb4a7e Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Fri, 19 May 2017 10:35:10 +0200
Subject: soc: renesas: Rework Kconfig and Makefile logic

The goals are to:
  - Allow precise control over and automatic selection of which
    (sub)drivers are used for which SoC,
  - Allow adding support for new SoCs easily,
  - Allow compile-testing of all (sub)drivers,
  - Keep driver selection logic in the subsystem-specific Kconfig,
    independent from the architecture-specific Kconfig (i.e. no "select"
    from arch/arm64/Kconfig.platforms), to avoid dependencies.

This is implemented by:
  - Introducing Kconfig symbols for all drivers and sub-drivers,
  - Introducing the Kconfig symbol SOC_RENESAS, which is enabled
    automatically when building for a Renesas ARM platform, and which
    enables all required drivers without interaction of the user, based
    on SoC-specific ARCH_* symbols,
  - Allowing the user to enable any Kconfig symbol manually if
    COMPILE_TEST is enabled,
  - Using the new Kconfig symbols instead of the ARCH_* symbols to
    control compilation in the Makefile,
  - Always entering drivers/soc/renesas/ during the build.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
---
 drivers/soc/Kconfig                  |  1 +
 drivers/soc/Makefile                 |  2 +-
 drivers/soc/renesas/Kconfig          | 63 ++++++++++++++++++++++++++++++++++++
 drivers/soc/renesas/Makefile         | 31 +++++++++---------
 drivers/soc/renesas/rcar-sysc.c      | 24 +++++++-------
 include/linux/soc/renesas/rcar-rst.h |  3 +-
 6 files changed, 92 insertions(+), 32 deletions(-)
 create mode 100644 drivers/soc/renesas/Kconfig

(limited to 'include/linux')

diff --git a/drivers/soc/Kconfig b/drivers/soc/Kconfig
index f09023f7ab11..de4fcdbae2a8 100644
--- a/drivers/soc/Kconfig
+++ b/drivers/soc/Kconfig
@@ -4,6 +4,7 @@ source "drivers/soc/bcm/Kconfig"
 source "drivers/soc/fsl/Kconfig"
 source "drivers/soc/mediatek/Kconfig"
 source "drivers/soc/qcom/Kconfig"
+source "drivers/soc/renesas/Kconfig"
 source "drivers/soc/rockchip/Kconfig"
 source "drivers/soc/samsung/Kconfig"
 source "drivers/soc/sunxi/Kconfig"
diff --git a/drivers/soc/Makefile b/drivers/soc/Makefile
index 05eae52a30b4..59d8c937f4a8 100644
--- a/drivers/soc/Makefile
+++ b/drivers/soc/Makefile
@@ -8,7 +8,7 @@ obj-$(CONFIG_MACH_DOVE)		+= dove/
 obj-y				+= fsl/
 obj-$(CONFIG_ARCH_MEDIATEK)	+= mediatek/
 obj-$(CONFIG_ARCH_QCOM)		+= qcom/
-obj-$(CONFIG_ARCH_RENESAS)	+= renesas/
+obj-y				+= renesas/
 obj-$(CONFIG_ARCH_ROCKCHIP)	+= rockchip/
 obj-$(CONFIG_SOC_SAMSUNG)	+= samsung/
 obj-$(CONFIG_ARCH_SUNXI)	+= sunxi/
diff --git a/drivers/soc/renesas/Kconfig b/drivers/soc/renesas/Kconfig
new file mode 100644
index 000000000000..87a4be46bd98
--- /dev/null
+++ b/drivers/soc/renesas/Kconfig
@@ -0,0 +1,63 @@
+config SOC_RENESAS
+	bool "Renesas SoC driver support" if COMPILE_TEST && !ARCH_RENESAS
+	default y if ARCH_RENESAS
+	select SOC_BUS
+	select RST_RCAR if ARCH_RCAR_GEN1 || ARCH_RCAR_GEN2 || \
+			   ARCH_R8A7795 || ARCH_R8A7796
+	select SYSC_R8A7743 if ARCH_R8A7743
+	select SYSC_R8A7745 if ARCH_R8A7745
+	select SYSC_R8A7779 if ARCH_R8A7779
+	select SYSC_R8A7790 if ARCH_R8A7790
+	select SYSC_R8A7791 if ARCH_R8A7791 || ARCH_R8A7793
+	select SYSC_R8A7792 if ARCH_R8A7792
+	select SYSC_R8A7794 if ARCH_R8A7794
+	select SYSC_R8A7795 if ARCH_R8A7795
+	select SYSC_R8A7796 if ARCH_R8A7796
+
+if SOC_RENESAS
+
+# SoC
+config SYSC_R8A7743
+	bool "RZ/G1M System Controller support" if COMPILE_TEST
+	select SYSC_RCAR
+
+config SYSC_R8A7745
+	bool "RZ/G1E System Controller support" if COMPILE_TEST
+	select SYSC_RCAR
+
+config SYSC_R8A7779
+	bool "R-Car H1 System Controller support" if COMPILE_TEST
+	select SYSC_RCAR
+
+config SYSC_R8A7790
+	bool "R-Car H2 System Controller support" if COMPILE_TEST
+	select SYSC_RCAR
+
+config SYSC_R8A7791
+	bool "R-Car M2-W/N System Controller support" if COMPILE_TEST
+	select SYSC_RCAR
+
+config SYSC_R8A7792
+	bool "R-Car V2H System Controller support" if COMPILE_TEST
+	select SYSC_RCAR
+
+config SYSC_R8A7794
+	bool "R-Car E2 System Controller support" if COMPILE_TEST
+	select SYSC_RCAR
+
+config SYSC_R8A7795
+	bool "R-Car H3 System Controller support" if COMPILE_TEST
+	select SYSC_RCAR
+
+config SYSC_R8A7796
+	bool "R-Car M3-W System Controller support" if COMPILE_TEST
+	select SYSC_RCAR
+
+# Family
+config RST_RCAR
+	bool "R-Car Reset Controller support" if COMPILE_TEST
+
+config SYSC_RCAR
+	bool "R-Car System Controller support" if COMPILE_TEST
+
+endif # SOC_RENESAS
diff --git a/drivers/soc/renesas/Makefile b/drivers/soc/renesas/Makefile
index d9115cb5ed9d..1a1a297b26a7 100644
--- a/drivers/soc/renesas/Makefile
+++ b/drivers/soc/renesas/Makefile
@@ -1,18 +1,17 @@
-obj-$(CONFIG_SOC_BUS)		+= renesas-soc.o
+# Generic, must be first because of soc_device_register()
+obj-$(CONFIG_SOC_RENESAS)	+= renesas-soc.o
 
-obj-$(CONFIG_ARCH_RCAR_GEN1)	+= rcar-rst.o
-obj-$(CONFIG_ARCH_RCAR_GEN2)	+= rcar-rst.o
-obj-$(CONFIG_ARCH_R8A7795)	+= rcar-rst.o
-obj-$(CONFIG_ARCH_R8A7796)	+= rcar-rst.o
+# SoC
+obj-$(CONFIG_SYSC_R8A7743)	+= r8a7743-sysc.o
+obj-$(CONFIG_SYSC_R8A7745)	+= r8a7745-sysc.o
+obj-$(CONFIG_SYSC_R8A7779)	+= r8a7779-sysc.o
+obj-$(CONFIG_SYSC_R8A7790)	+= r8a7790-sysc.o
+obj-$(CONFIG_SYSC_R8A7791)	+= r8a7791-sysc.o
+obj-$(CONFIG_SYSC_R8A7792)	+= r8a7792-sysc.o
+obj-$(CONFIG_SYSC_R8A7794)	+= r8a7794-sysc.o
+obj-$(CONFIG_SYSC_R8A7795)	+= r8a7795-sysc.o
+obj-$(CONFIG_SYSC_R8A7796)	+= r8a7796-sysc.o
 
-obj-$(CONFIG_ARCH_R8A7743)	+= rcar-sysc.o r8a7743-sysc.o
-obj-$(CONFIG_ARCH_R8A7745)	+= rcar-sysc.o r8a7745-sysc.o
-obj-$(CONFIG_ARCH_R8A7779)	+= rcar-sysc.o r8a7779-sysc.o
-obj-$(CONFIG_ARCH_R8A7790)	+= rcar-sysc.o r8a7790-sysc.o
-obj-$(CONFIG_ARCH_R8A7791)	+= rcar-sysc.o r8a7791-sysc.o
-obj-$(CONFIG_ARCH_R8A7792)	+= rcar-sysc.o r8a7792-sysc.o
-# R-Car M2-N is identical to R-Car M2-W w.r.t. power domains.
-obj-$(CONFIG_ARCH_R8A7793)	+= rcar-sysc.o r8a7791-sysc.o
-obj-$(CONFIG_ARCH_R8A7794)	+= rcar-sysc.o r8a7794-sysc.o
-obj-$(CONFIG_ARCH_R8A7795)	+= rcar-sysc.o r8a7795-sysc.o
-obj-$(CONFIG_ARCH_R8A7796)	+= rcar-sysc.o r8a7796-sysc.o
+# Family
+obj-$(CONFIG_RST_RCAR)		+= rcar-rst.o
+obj-$(CONFIG_SYSC_RCAR)		+= rcar-sysc.o
diff --git a/drivers/soc/renesas/rcar-sysc.c b/drivers/soc/renesas/rcar-sysc.c
index 225c35c79d9a..4d0125dff787 100644
--- a/drivers/soc/renesas/rcar-sysc.c
+++ b/drivers/soc/renesas/rcar-sysc.c
@@ -275,35 +275,33 @@ finalize:
 }
 
 static const struct of_device_id rcar_sysc_matches[] = {
-#ifdef CONFIG_ARCH_R8A7743
+#ifdef CONFIG_SYSC_R8A7743
 	{ .compatible = "renesas,r8a7743-sysc", .data = &r8a7743_sysc_info },
 #endif
-#ifdef CONFIG_ARCH_R8A7745
+#ifdef CONFIG_SYSC_R8A7745
 	{ .compatible = "renesas,r8a7745-sysc", .data = &r8a7745_sysc_info },
 #endif
-#ifdef CONFIG_ARCH_R8A7779
+#ifdef CONFIG_SYSC_R8A7779
 	{ .compatible = "renesas,r8a7779-sysc", .data = &r8a7779_sysc_info },
 #endif
-#ifdef CONFIG_ARCH_R8A7790
+#ifdef CONFIG_SYSC_R8A7790
 	{ .compatible = "renesas,r8a7790-sysc", .data = &r8a7790_sysc_info },
 #endif
-#ifdef CONFIG_ARCH_R8A7791
+#ifdef CONFIG_SYSC_R8A7791
 	{ .compatible = "renesas,r8a7791-sysc", .data = &r8a7791_sysc_info },
-#endif
-#ifdef CONFIG_ARCH_R8A7792
-	{ .compatible = "renesas,r8a7792-sysc", .data = &r8a7792_sysc_info },
-#endif
-#ifdef CONFIG_ARCH_R8A7793
 	/* R-Car M2-N is identical to R-Car M2-W w.r.t. power domains. */
 	{ .compatible = "renesas,r8a7793-sysc", .data = &r8a7791_sysc_info },
 #endif
-#ifdef CONFIG_ARCH_R8A7794
+#ifdef CONFIG_SYSC_R8A7792
+	{ .compatible = "renesas,r8a7792-sysc", .data = &r8a7792_sysc_info },
+#endif
+#ifdef CONFIG_SYSC_R8A7794
 	{ .compatible = "renesas,r8a7794-sysc", .data = &r8a7794_sysc_info },
 #endif
-#ifdef CONFIG_ARCH_R8A7795
+#ifdef CONFIG_SYSC_R8A7795
 	{ .compatible = "renesas,r8a7795-sysc", .data = &r8a7795_sysc_info },
 #endif
-#ifdef CONFIG_ARCH_R8A7796
+#ifdef CONFIG_SYSC_R8A7796
 	{ .compatible = "renesas,r8a7796-sysc", .data = &r8a7796_sysc_info },
 #endif
 	{ /* sentinel */ }
diff --git a/include/linux/soc/renesas/rcar-rst.h b/include/linux/soc/renesas/rcar-rst.h
index 787e7ad53d45..2c231f2280a6 100644
--- a/include/linux/soc/renesas/rcar-rst.h
+++ b/include/linux/soc/renesas/rcar-rst.h
@@ -1,8 +1,7 @@
 #ifndef __LINUX_SOC_RENESAS_RCAR_RST_H__
 #define __LINUX_SOC_RENESAS_RCAR_RST_H__
 
-#if defined(CONFIG_ARCH_RCAR_GEN1) || defined(CONFIG_ARCH_RCAR_GEN2) || \
-    defined(CONFIG_ARCH_R8A7795) || defined(CONFIG_ARCH_R8A7796)
+#ifdef CONFIG_RST_RCAR
 int rcar_rst_read_mode_pins(u32 *mode);
 #else
 static inline int rcar_rst_read_mode_pins(u32 *mode) { return -ENODEV; }
-- 
cgit v1.2.3


From 19e72d3abb63cb16d021a4066ce1a18880509e99 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Thu, 9 Feb 2017 17:28:50 -0800
Subject: configfs: Introduce config_item_get_unless_zero()

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
[hch: minor style tweak]
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/configfs/item.c       | 8 ++++++++
 include/linux/configfs.h | 3 ++-
 2 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/configfs/item.c b/fs/configfs/item.c
index 8b2a994042dd..a66f6624d899 100644
--- a/fs/configfs/item.c
+++ b/fs/configfs/item.c
@@ -138,6 +138,14 @@ struct config_item *config_item_get(struct config_item *item)
 }
 EXPORT_SYMBOL(config_item_get);
 
+struct config_item *config_item_get_unless_zero(struct config_item *item)
+{
+	if (item && kref_get_unless_zero(&item->ci_kref))
+		return item;
+	return NULL;
+}
+EXPORT_SYMBOL(config_item_get_unless_zero);
+
 static void config_item_cleanup(struct config_item *item)
 {
 	struct config_item_type *t = item->ci_type;
diff --git a/include/linux/configfs.h b/include/linux/configfs.h
index 2319b8c108e8..c96709049683 100644
--- a/include/linux/configfs.h
+++ b/include/linux/configfs.h
@@ -74,7 +74,8 @@ extern void config_item_init_type_name(struct config_item *item,
 				       const char *name,
 				       struct config_item_type *type);
 
-extern struct config_item * config_item_get(struct config_item *);
+extern struct config_item *config_item_get(struct config_item *);
+extern struct config_item *config_item_get_unless_zero(struct config_item *);
 extern void config_item_put(struct config_item *);
 
 struct config_item_type {
-- 
cgit v1.2.3


From 3889a803e1da9bd7cd10d6504bf281ee7e55dfd6 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Mon, 12 Jun 2017 11:23:41 +0200
Subject: net: factor out a helper to decrement the skb refcount

The same code is replicated in 3 different places; move it to a
common helper.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 13 +++++++++++++
 net/core/datagram.c    |  4 +---
 net/core/skbuff.c      | 14 ++++----------
 3 files changed, 18 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index d460a4cbda1c..decce3655a48 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -867,6 +867,19 @@ static inline unsigned int skb_napi_id(const struct sk_buff *skb)
 #endif
 }
 
+/* decrement the reference count and return true if we can free the skb */
+static inline bool skb_unref(struct sk_buff *skb)
+{
+	if (unlikely(!skb))
+		return false;
+	if (likely(atomic_read(&skb->users) == 1))
+		smp_rmb();
+	else if (likely(!atomic_dec_and_test(&skb->users)))
+		return false;
+
+	return true;
+}
+
 void kfree_skb(struct sk_buff *skb);
 void kfree_skb_list(struct sk_buff *segs);
 void skb_tx_error(struct sk_buff *skb);
diff --git a/net/core/datagram.c b/net/core/datagram.c
index bc46118486fe..e5311a7c70da 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -330,9 +330,7 @@ void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len)
 {
 	bool slow;
 
-	if (likely(atomic_read(&skb->users) == 1))
-		smp_rmb();
-	else if (likely(!atomic_dec_and_test(&skb->users))) {
+	if (!skb_unref(skb)) {
 		sk_peek_offset_bwd(sk, len);
 		return;
 	}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index e508c1eae67f..747263c49838 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -694,12 +694,9 @@ EXPORT_SYMBOL(__kfree_skb);
  */
 void kfree_skb(struct sk_buff *skb)
 {
-	if (unlikely(!skb))
-		return;
-	if (likely(atomic_read(&skb->users) == 1))
-		smp_rmb();
-	else if (likely(!atomic_dec_and_test(&skb->users)))
+	if (!skb_unref(skb))
 		return;
+
 	trace_kfree_skb(skb, __builtin_return_address(0));
 	__kfree_skb(skb);
 }
@@ -746,12 +743,9 @@ EXPORT_SYMBOL(skb_tx_error);
  */
 void consume_skb(struct sk_buff *skb)
 {
-	if (unlikely(!skb))
-		return;
-	if (likely(atomic_read(&skb->users) == 1))
-		smp_rmb();
-	else if (likely(!atomic_dec_and_test(&skb->users)))
+	if (!skb_unref(skb))
 		return;
+
 	trace_consume_skb(skb);
 	__kfree_skb(skb);
 }
-- 
cgit v1.2.3


From 0a463c78d25b9464b77311d9dda297550a2d6aa5 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Mon, 12 Jun 2017 11:23:42 +0200
Subject: udp: avoid a cache miss on dequeue

Since UDP no more uses sk->destructor, we can clear completely
the skb head state before enqueuing. Amend and use
skb_release_head_state() for that.

All head states share a single cacheline, which is not
normally used/accesses on dequeue. We can avoid entirely accessing
such cacheline implementing and using in the UDP code a specialized
skb free helper which ignores the skb head state.

This saves a cacheline miss at skb deallocation time.

v1 -> v2:
  replaced secpath_reset() with skb_release_head_state()

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  2 ++
 net/core/skbuff.c      | 24 ++++++++++++++++++++----
 net/ipv4/udp.c         |  6 +++++-
 3 files changed, 27 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index decce3655a48..d66d4feaac86 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -880,10 +880,12 @@ static inline bool skb_unref(struct sk_buff *skb)
 	return true;
 }
 
+void skb_release_head_state(struct sk_buff *skb);
 void kfree_skb(struct sk_buff *skb);
 void kfree_skb_list(struct sk_buff *segs);
 void skb_tx_error(struct sk_buff *skb);
 void consume_skb(struct sk_buff *skb);
+void consume_stateless_skb(struct sk_buff *skb);
 void  __kfree_skb(struct sk_buff *skb);
 extern struct kmem_cache *skbuff_head_cache;
 
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 747263c49838..304602784c3b 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -643,12 +643,10 @@ fastpath:
 	kmem_cache_free(skbuff_fclone_cache, fclones);
 }
 
-static void skb_release_head_state(struct sk_buff *skb)
+void skb_release_head_state(struct sk_buff *skb)
 {
 	skb_dst_drop(skb);
-#ifdef CONFIG_XFRM
-	secpath_put(skb->sp);
-#endif
+	secpath_reset(skb);
 	if (skb->destructor) {
 		WARN_ON(in_irq());
 		skb->destructor(skb);
@@ -751,6 +749,24 @@ void consume_skb(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(consume_skb);
 
+/**
+ *	consume_stateless_skb - free an skbuff, assuming it is stateless
+ *	@skb: buffer to free
+ *
+ *	Works like consume_skb(), but this variant assumes that all the head
+ *	states have been already dropped.
+ */
+void consume_stateless_skb(struct sk_buff *skb)
+{
+	if (!skb_unref(skb))
+		return;
+
+	trace_consume_skb(skb);
+	if (likely(skb->head))
+		skb_release_data(skb);
+	kfree_skbmem(skb);
+}
+
 void __kfree_skb_flush(void)
 {
 	struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index fdcb7437cc15..d8b265f1a33b 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1359,7 +1359,8 @@ void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len)
 		sk_peek_offset_bwd(sk, len);
 		unlock_sock_fast(sk, slow);
 	}
-	consume_skb(skb);
+
+	consume_stateless_skb(skb);
 }
 EXPORT_SYMBOL_GPL(skb_consume_udp);
 
@@ -1739,6 +1740,9 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 		sk_mark_napi_id_once(sk, skb);
 	}
 
+	/* clear all pending head states while they are hot in the cache */
+	skb_release_head_state(skb);
+
 	rc = __udp_enqueue_schedule_skb(sk, skb);
 	if (rc < 0) {
 		int is_udplite = IS_UDPLITE(sk);
-- 
cgit v1.2.3


From b46c73378c8436c3cd3fa19cead57a645adb0ed0 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 23 Aug 2013 14:12:09 -0700
Subject: driver-core: remove struct bus_type.dev_attrs

Now that all in-kernel users of bus_type.dev_attrs have been converted
to use dev_groups instead, the dev_attrs field, and logic surrounding
it, can be removed.

Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/bus.c     | 37 +------------------------------------
 include/linux/device.h |  2 --
 2 files changed, 1 insertion(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/bus.c b/drivers/base/bus.c
index f945f2f0ee06..e162c9a789ba 100644
--- a/drivers/base/bus.c
+++ b/drivers/base/bus.c
@@ -466,35 +466,6 @@ int bus_for_each_drv(struct bus_type *bus, struct device_driver *start,
 }
 EXPORT_SYMBOL_GPL(bus_for_each_drv);
 
-static int device_add_attrs(struct bus_type *bus, struct device *dev)
-{
-	int error = 0;
-	int i;
-
-	if (!bus->dev_attrs)
-		return 0;
-
-	for (i = 0; bus->dev_attrs[i].attr.name; i++) {
-		error = device_create_file(dev, &bus->dev_attrs[i]);
-		if (error) {
-			while (--i >= 0)
-				device_remove_file(dev, &bus->dev_attrs[i]);
-			break;
-		}
-	}
-	return error;
-}
-
-static void device_remove_attrs(struct bus_type *bus, struct device *dev)
-{
-	int i;
-
-	if (bus->dev_attrs) {
-		for (i = 0; bus->dev_attrs[i].attr.name; i++)
-			device_remove_file(dev, &bus->dev_attrs[i]);
-	}
-}
-
 /**
  * bus_add_device - add device to bus
  * @dev: device being added
@@ -510,12 +481,9 @@ int bus_add_device(struct device *dev)
 
 	if (bus) {
 		pr_debug("bus: '%s': add device %s\n", bus->name, dev_name(dev));
-		error = device_add_attrs(bus, dev);
-		if (error)
-			goto out_put;
 		error = device_add_groups(dev, bus->dev_groups);
 		if (error)
-			goto out_id;
+			goto out_put;
 		error = sysfs_create_link(&bus->p->devices_kset->kobj,
 						&dev->kobj, dev_name(dev));
 		if (error)
@@ -532,8 +500,6 @@ out_subsys:
 	sysfs_remove_link(&bus->p->devices_kset->kobj, dev_name(dev));
 out_groups:
 	device_remove_groups(dev, bus->dev_groups);
-out_id:
-	device_remove_attrs(bus, dev);
 out_put:
 	bus_put(dev->bus);
 	return error;
@@ -590,7 +556,6 @@ void bus_remove_device(struct device *dev)
 	sysfs_remove_link(&dev->kobj, "subsystem");
 	sysfs_remove_link(&dev->bus->p->devices_kset->kobj,
 			  dev_name(dev));
-	device_remove_attrs(dev->bus, dev);
 	device_remove_groups(dev, dev->bus->dev_groups);
 	if (klist_node_attached(&dev->p->knode_bus))
 		klist_del(&dev->p->knode_bus);
diff --git a/include/linux/device.h b/include/linux/device.h
index 5b725b943cf2..b63cbf2528ef 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -66,7 +66,6 @@ extern void bus_remove_file(struct bus_type *, struct bus_attribute *);
  * @name:	The name of the bus.
  * @dev_name:	Used for subsystems to enumerate devices like ("foo%u", dev->id).
  * @dev_root:	Default device to use as the parent.
- * @dev_attrs:	Default attributes of the devices on the bus.
  * @bus_groups:	Default attributes of the bus.
  * @dev_groups:	Default attributes of the devices on the bus.
  * @drv_groups: Default attributes of the device drivers on the bus.
@@ -112,7 +111,6 @@ struct bus_type {
 	const char		*name;
 	const char		*dev_name;
 	struct device		*dev_root;
-	struct device_attribute	*dev_attrs;	/* use dev_groups instead */
 	const struct attribute_group **bus_groups;
 	const struct attribute_group **dev_groups;
 	const struct attribute_group **drv_groups;
-- 
cgit v1.2.3


From 2371cd90abe3fa1b88e15111abf2cc0a26db6e52 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen@networkplumber.org>
Date: Thu, 18 May 2017 09:18:12 -0700
Subject: scsi: storvsc: remove unnecessary channel inbound lock

In storvsc driver, inbound messages do not go through inbound lock.  The
only effect of this lock was is to provide a barrier for connect and
remove logic.

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/hv/channel_mgmt.c  | 1 -
 drivers/scsi/storvsc_drv.c | 8 +++-----
 include/linux/hyperv.h     | 1 -
 3 files changed, 3 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 735f9363f2e4..685572bae1f0 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -332,7 +332,6 @@ static struct vmbus_channel *alloc_channel(void)
 	if (!channel)
 		return NULL;
 
-	spin_lock_init(&channel->inbound_lock);
 	spin_lock_init(&channel->lock);
 
 	INIT_LIST_HEAD(&channel->sc_list);
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index f8a1649e4c63..8d955db6424f 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -1206,13 +1206,13 @@ static int storvsc_connect_to_vsp(struct hv_device *device, u32 ring_size,
 static int storvsc_dev_remove(struct hv_device *device)
 {
 	struct storvsc_device *stor_device;
-	unsigned long flags;
 
 	stor_device = hv_get_drvdata(device);
 
-	spin_lock_irqsave(&device->channel->inbound_lock, flags);
 	stor_device->destroy = true;
-	spin_unlock_irqrestore(&device->channel->inbound_lock, flags);
+
+	/* Make sure flag is set before waiting */
+	wmb();
 
 	/*
 	 * At this point, all outbound traffic should be disable. We
@@ -1229,9 +1229,7 @@ static int storvsc_dev_remove(struct hv_device *device)
 	 * we have drained - to drain the outgoing packets, we need to
 	 * allow incoming packets.
 	 */
-	spin_lock_irqsave(&device->channel->inbound_lock, flags);
 	hv_set_drvdata(device, NULL);
-	spin_unlock_irqrestore(&device->channel->inbound_lock, flags);
 
 	/* Close the channel */
 	vmbus_close(device->channel);
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index e09fc8290c2f..b7d7bbec74e0 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -744,7 +744,6 @@ struct vmbus_channel {
 	u32 ringbuffer_pagecount;
 	struct hv_ring_buffer_info outbound;	/* send to parent */
 	struct hv_ring_buffer_info inbound;	/* receive from parent */
-	spinlock_t inbound_lock;
 
 	struct vmbus_close_msg close_msg;
 
-- 
cgit v1.2.3


From 91b5ab628929d97357108594610e7c07be93e2fd Mon Sep 17 00:00:00 2001
From: Eliad Peller <eliad@wizery.com>
Date: Fri, 9 Jun 2017 13:08:42 +0100
Subject: cfg80211: support 4-way handshake offloading for WPA/WPA2-PSK

Let drivers advertise support for station-mode 4-way handshake
offloading with a new NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_PSK flag.

Extend use of NL80211_ATTR_PMK attribute indicating it might be passed
as part of NL80211_CMD_CONNECT command, and contain the PSK (which is
the PMK, hence the name.)

The driver/device is assumed to handle the 4-way handshake by
itself in this case (including key derivations, etc.), instead
of relying on the supplicant.

This patch is somewhat based on this one (by Vladimir Kondratiev):
https://patchwork.kernel.org/patch/1309561/.

Signed-off-by: Vladimir Kondratiev <qca_vkondrat@qca.qualcomm.com>
Signed-off-by: Eliad Peller <eliadx.peller@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
[arend.vanspriel@broadcom.com rebase dealing with existing ATTR_PMK]
Signed-off-by: Arend van Spriel <arend.vanspriel@broadcom.com>
[reword NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_PSK docs to indicate
that this offload might be required]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h    |  1 +
 include/net/cfg80211.h       |  2 ++
 include/uapi/linux/nl80211.h | 22 ++++++++++++++++++++--
 net/wireless/nl80211.c       |  9 +++++++++
 4 files changed, 32 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 69033353d0d1..e97ca3a9a67b 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -2401,6 +2401,7 @@ enum ieee80211_sa_query_action {
 #define WLAN_MAX_KEY_LEN		32
 
 #define WLAN_PMKID_LEN			16
+#define WLAN_PMK_LEN			32
 
 #define WLAN_OUI_WFA			0x506f9a
 #define WLAN_OUI_TYPE_WFA_P2P		9
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index fa25fbb67cb6..1b288bac5d1a 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -649,6 +649,7 @@ struct survey_info {
  * @wep_keys: static WEP keys, if not NULL points to an array of
  *	CFG80211_MAX_WEP_KEYS WEP keys
  * @wep_tx_key: key index (0..3) of the default TX static WEP key
+ * @psk: PSK (for devices supporting 4-way-handshake offload)
  */
 struct cfg80211_crypto_settings {
 	u32 wpa_versions;
@@ -662,6 +663,7 @@ struct cfg80211_crypto_settings {
 	bool control_port_no_encrypt;
 	struct key_params *wep_keys;
 	int wep_tx_key;
+	const u8 *psk;
 };
 
 /**
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index b8c44b98f12d..f1f7da25bca4 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -172,6 +172,18 @@
  * Multiple such rules can be created.
  */
 
+/**
+ * DOC: WPA/WPA2 EAPOL handshake offload
+ *
+ * By setting @NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_PSK flag drivers
+ * can indicate they support offloading EAPOL handshakes for WPA/WPA2
+ * preshared key authentication. In %NL80211_CMD_CONNECT the preshared
+ * key should be specified using %NL80211_ATTR_PMK. Drivers supporting
+ * this offload may reject the %NL80211_CMD_CONNECT when no preshared
+ * key material is provided, for example when that driver does not
+ * support setting the temporal keys through %CMD_NEW_KEY.
+ */
+
 /**
  * DOC: FILS shared key authentication offload
  *
@@ -2080,8 +2092,10 @@ enum nl80211_commands {
  *	identifying the scope of PMKSAs. This is used with
  *	@NL80211_CMD_SET_PMKSA and @NL80211_CMD_DEL_PMKSA.
  *
- * @NL80211_ATTR_PMK: PMK for the PMKSA identified by %NL80211_ATTR_PMKID.
- *	This is used with @NL80211_CMD_SET_PMKSA.
+ * @NL80211_ATTR_PMK: attribute for passing PMK key material. Used with
+ *	%NL80211_CMD_SET_PMKSA for the PMKSA identified by %NL80211_ATTR_PMKID.
+ *	For %NL80211_CMD_CONNECT it is used to provide PSK for offloading 4-way
+ *	handshake for WPA/WPA2-PSK networks.
  *
  * @NL80211_ATTR_SCHED_SCAN_MULTI: flag attribute which user-space shall use to
  *	indicate that it supports multiple active scheduled scan requests.
@@ -4852,6 +4866,9 @@ enum nl80211_feature_flags {
  *	RSSI threshold values to monitor rather than exactly one threshold.
  * @NL80211_EXT_FEATURE_FILS_SK_OFFLOAD: Driver SME supports FILS shared key
  *	authentication with %NL80211_CMD_CONNECT.
+ * @NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_PSK: Device wants to do 4-way
+ *	handshake with PSK in station mode (PSK is passed as part of the connect
+ *	and associate commands), doing it in the host might not be supported.
  *
  * @NUM_NL80211_EXT_FEATURES: number of extended features.
  * @MAX_NL80211_EXT_FEATURES: highest extended feature index.
@@ -4872,6 +4889,7 @@ enum nl80211_ext_feature_index {
 	NL80211_EXT_FEATURE_SCHED_SCAN_RELATIVE_RSSI,
 	NL80211_EXT_FEATURE_CQM_RSSI_LIST,
 	NL80211_EXT_FEATURE_FILS_SK_OFFLOAD,
+	NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_PSK,
 
 	/* add new features before the definition below */
 	NUM_NL80211_EXT_FEATURES,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 9eb59196a378..2c6863aee4e4 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -8168,6 +8168,15 @@ static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev,
 		memcpy(settings->akm_suites, data, len);
 	}
 
+	if (info->attrs[NL80211_ATTR_PMK]) {
+		if (nla_len(info->attrs[NL80211_ATTR_PMK]) != WLAN_PMK_LEN)
+			return -EINVAL;
+		if (!wiphy_ext_feature_isset(&rdev->wiphy,
+					     NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_PSK))
+			return -EINVAL;
+		settings->psk = nla_data(info->attrs[NL80211_ATTR_PMK]);
+	}
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From 3a00df5707b6af715e78c26569800e0c2eb615fe Mon Sep 17 00:00:00 2001
From: Avraham Stern <avraham.stern@intel.com>
Date: Fri, 9 Jun 2017 13:08:43 +0100
Subject: cfg80211: support 4-way handshake offloading for 802.1X

Add API for setting the PMK to the driver. For FT support, allow
setting also the PMK-R0 Name.

This can be used by drivers that support 4-Way handshake offload
while IEEE802.1X authentication is managed by upper layers.

Signed-off-by: Avraham Stern <avraham.stern@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
[arend.vanspriel@broadcom.com: add WANT_1X_4WAY_HS attribute]
Signed-off-by: Arend van Spriel <arend.vanspriel@broadcom.com>
[reword NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_1X docs a bit to
say that the device may require it]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h    |   3 ++
 include/net/cfg80211.h       |  32 +++++++++++++
 include/uapi/linux/nl80211.h |  39 +++++++++++++++-
 net/wireless/core.c          |   5 +++
 net/wireless/nl80211.c       | 105 +++++++++++++++++++++++++++++++++++++++++++
 net/wireless/rdev-ops.h      |  25 +++++++++++
 net/wireless/trace.h         |  60 +++++++++++++++++++++++++
 7 files changed, 268 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index e97ca3a9a67b..34e1bcd2d7ff 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -2400,8 +2400,11 @@ enum ieee80211_sa_query_action {
 
 #define WLAN_MAX_KEY_LEN		32
 
+#define WLAN_PMK_NAME_LEN		16
 #define WLAN_PMKID_LEN			16
+#define WLAN_PMK_LEN_EAP_LEAP		16
 #define WLAN_PMK_LEN			32
+#define WLAN_PMK_LEN_SUITE_B_192	48
 
 #define WLAN_OUI_WFA			0x506f9a
 #define WLAN_OUI_TYPE_WFA_P2P		9
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 1b288bac5d1a..2174e51c6595 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -2112,6 +2112,8 @@ struct cfg80211_bss_selection {
  * @fils_erp_rrk: ERP re-authentication Root Key (rRK) used to derive additional
  *	keys in FILS or %NULL if not specified.
  * @fils_erp_rrk_len: Length of @fils_erp_rrk in octets.
+ * @want_1x: indicates user-space supports and wants to use 802.1X driver
+ *	offload of 4-way handshake.
  */
 struct cfg80211_connect_params {
 	struct ieee80211_channel *channel;
@@ -2144,6 +2146,7 @@ struct cfg80211_connect_params {
 	u16 fils_erp_next_seq_num;
 	const u8 *fils_erp_rrk;
 	size_t fils_erp_rrk_len;
+	bool want_1x;
 };
 
 /**
@@ -2565,6 +2568,23 @@ struct cfg80211_nan_func {
 	u64 cookie;
 };
 
+/**
+ * struct cfg80211_pmk_conf - PMK configuration
+ *
+ * @aa: authenticator address
+ * @pmk_len: PMK length in bytes.
+ * @pmk: the PMK material
+ * @pmk_r0_name: PMK-R0 Name. NULL if not applicable (i.e., the PMK
+ *	is not PMK-R0). When pmk_r0_name is not NULL, the pmk field
+ *	holds PMK-R0.
+ */
+struct cfg80211_pmk_conf {
+	const u8 *aa;
+	u8 pmk_len;
+	const u8 *pmk;
+	const u8 *pmk_r0_name;
+};
+
 /**
  * struct cfg80211_ops - backend description for wireless configuration
  *
@@ -2881,6 +2901,13 @@ struct cfg80211_nan_func {
  *	All other parameters must be ignored.
  *
  * @set_multicast_to_unicast: configure multicast to unicast conversion for BSS
+ *
+ * @set_pmk: configure the PMK to be used for offloaded 802.1X 4-Way handshake.
+ *	If not deleted through @del_pmk the PMK remains valid until disconnect
+ *	upon which the driver should clear it.
+ *	(invoked with the wireless_dev mutex held)
+ * @del_pmk: delete the previously configured PMK for the given authenticator.
+ *	(invoked with the wireless_dev mutex held)
  */
 struct cfg80211_ops {
 	int	(*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow);
@@ -3169,6 +3196,11 @@ struct cfg80211_ops {
 	int	(*set_multicast_to_unicast)(struct wiphy *wiphy,
 					    struct net_device *dev,
 					    const bool enabled);
+
+	int	(*set_pmk)(struct wiphy *wiphy, struct net_device *dev,
+			   const struct cfg80211_pmk_conf *conf);
+	int	(*del_pmk)(struct wiphy *wiphy, struct net_device *dev,
+			   const u8 *aa);
 };
 
 /*
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index f1f7da25bca4..073e26850195 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -182,6 +182,17 @@
  * this offload may reject the %NL80211_CMD_CONNECT when no preshared
  * key material is provided, for example when that driver does not
  * support setting the temporal keys through %CMD_NEW_KEY.
+ *
+ * Similarly @NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_1X flag can be
+ * set by drivers indicating offload support of the PTK/GTK EAPOL
+ * handshakes during 802.1X authentication. In order to use the offload
+ * the %NL80211_CMD_CONNECT should have %NL80211_ATTR_WANT_1X_4WAY_HS
+ * attribute flag. Drivers supporting this offload may reject the
+ * %NL80211_CMD_CONNECT when the attribute flag is not present.
+ *
+ * For 802.1X the PMK or PMK-R0 are set by providing %NL80211_ATTR_PMK
+ * using %NL80211_CMD_SET_PMK. For offloaded FT support also
+ * %NL80211_ATTR_PMKR0_NAME must be provided.
  */
 
 /**
@@ -959,6 +970,14 @@
  *	does not result in a change for the current association. Currently,
  *	only the %NL80211_ATTR_IE data is used and updated with this command.
  *
+ * @NL80211_CMD_SET_PMK: For offloaded 4-Way handshake, set the PMK or PMK-R0
+ *	for the given authenticator address (specified with &NL80211_ATTR_MAC).
+ *	When &NL80211_ATTR_PMKR0_NAME is set, &NL80211_ATTR_PMK specifies the
+ *	PMK-R0, otherwise it specifies the PMK.
+ * @NL80211_CMD_DEL_PMK: For offloaded 4-Way handshake, delete the previously
+ *	configured PMK for the authenticator address identified by
+ *	&NL80211_ATTR_MAC.
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -1158,6 +1177,9 @@ enum nl80211_commands {
 
 	NL80211_CMD_UPDATE_CONNECT_PARAMS,
 
+	NL80211_CMD_SET_PMK,
+	NL80211_CMD_DEL_PMK,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
@@ -2095,13 +2117,20 @@ enum nl80211_commands {
  * @NL80211_ATTR_PMK: attribute for passing PMK key material. Used with
  *	%NL80211_CMD_SET_PMKSA for the PMKSA identified by %NL80211_ATTR_PMKID.
  *	For %NL80211_CMD_CONNECT it is used to provide PSK for offloading 4-way
- *	handshake for WPA/WPA2-PSK networks.
+ *	handshake for WPA/WPA2-PSK networks. For 802.1X authentication it is
+ *	used with %NL80211_CMD_SET_PMK. For offloaded FT support this attribute
+ *	specifies the PMK-R0 if NL80211_ATTR_PMKR0_NAME is included as well.
  *
  * @NL80211_ATTR_SCHED_SCAN_MULTI: flag attribute which user-space shall use to
  *	indicate that it supports multiple active scheduled scan requests.
  * @NL80211_ATTR_SCHED_SCAN_MAX_REQS: indicates maximum number of scheduled
  *	scan request that may be active for the device (u32).
  *
+ * @NL80211_ATTR_WANT_1X_4WAY_HS: flag attribute which user-space can include
+ *	in %NL80211_CMD_CONNECT to indicate that for 802.1X authentication it
+ *	wants to use the supported offload of the 4-way handshake.
+ * @NL80211_ATTR_PMKR0_NAME: PMK-R0 Name for offloaded FT.
+ *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -2524,6 +2553,9 @@ enum nl80211_attrs {
 	NL80211_ATTR_SCHED_SCAN_MULTI,
 	NL80211_ATTR_SCHED_SCAN_MAX_REQS,
 
+	NL80211_ATTR_WANT_1X_4WAY_HS,
+	NL80211_ATTR_PMKR0_NAME,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -4869,6 +4901,10 @@ enum nl80211_feature_flags {
  * @NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_PSK: Device wants to do 4-way
  *	handshake with PSK in station mode (PSK is passed as part of the connect
  *	and associate commands), doing it in the host might not be supported.
+ * @NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_1X: Device wants to do doing 4-way
+ *	handshake with 802.1X in station mode (will pass EAP frames to the host
+ *	and accept the set_pmk/del_pmk commands), doing it in the host might not
+ *	be supported.
  *
  * @NUM_NL80211_EXT_FEATURES: number of extended features.
  * @MAX_NL80211_EXT_FEATURES: highest extended feature index.
@@ -4890,6 +4926,7 @@ enum nl80211_ext_feature_index {
 	NL80211_EXT_FEATURE_CQM_RSSI_LIST,
 	NL80211_EXT_FEATURE_FILS_SK_OFFLOAD,
 	NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_PSK,
+	NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_1X,
 
 	/* add new features before the definition below */
 	NUM_NL80211_EXT_FEATURES,
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 83ea164f16b3..7b33e8c366bc 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -711,6 +711,11 @@ int wiphy_register(struct wiphy *wiphy)
 		    (wiphy->bss_select_support & ~(BIT(__NL80211_BSS_SELECT_ATTR_AFTER_LAST) - 2))))
 		return -EINVAL;
 
+	if (WARN_ON(wiphy_ext_feature_isset(&rdev->wiphy,
+					    NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_1X) &&
+		    (!rdev->ops->set_pmk || !rdev->ops->del_pmk)))
+		return -EINVAL;
+
 	if (wiphy->addresses)
 		memcpy(wiphy->perm_addr, wiphy->addresses[0].addr, ETH_ALEN);
 
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 2c6863aee4e4..8148b01bcdd2 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -8881,6 +8881,12 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
 
 	connect.privacy = info->attrs[NL80211_ATTR_PRIVACY];
 
+	if (info->attrs[NL80211_ATTR_WANT_1X_4WAY_HS] &&
+	    !wiphy_ext_feature_isset(&rdev->wiphy,
+				     NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_1X))
+		return -EINVAL;
+	connect.want_1x = info->attrs[NL80211_ATTR_WANT_1X_4WAY_HS];
+
 	err = nl80211_crypto_settings(rdev, info, &connect.crypto,
 				      NL80211_MAX_NR_CIPHER_SUITES);
 	if (err)
@@ -12265,6 +12271,90 @@ static int nl80211_set_multicast_to_unicast(struct sk_buff *skb,
 	return rdev_set_multicast_to_unicast(rdev, dev, enabled);
 }
 
+static int nl80211_set_pmk(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct net_device *dev = info->user_ptr[1];
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct cfg80211_pmk_conf pmk_conf = {};
+	int ret;
+
+	if (wdev->iftype != NL80211_IFTYPE_STATION &&
+	    wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)
+		return -EOPNOTSUPP;
+
+	if (!wiphy_ext_feature_isset(&rdev->wiphy,
+				     NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_1X))
+		return -EOPNOTSUPP;
+
+	if (!info->attrs[NL80211_ATTR_MAC] || !info->attrs[NL80211_ATTR_PMK])
+		return -EINVAL;
+
+	wdev_lock(wdev);
+	if (!wdev->current_bss) {
+		ret = -ENOTCONN;
+		goto out;
+	}
+
+	pmk_conf.aa = nla_data(info->attrs[NL80211_ATTR_MAC]);
+	if (memcmp(pmk_conf.aa, wdev->current_bss->pub.bssid, ETH_ALEN)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	pmk_conf.pmk = nla_data(info->attrs[NL80211_ATTR_PMK]);
+	pmk_conf.pmk_len = nla_len(info->attrs[NL80211_ATTR_PMK]);
+	if (pmk_conf.pmk_len != WLAN_PMK_LEN &&
+	    pmk_conf.pmk_len != WLAN_PMK_LEN_SUITE_B_192) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (info->attrs[NL80211_ATTR_PMKR0_NAME]) {
+		int r0_name_len = nla_len(info->attrs[NL80211_ATTR_PMKR0_NAME]);
+
+		if (r0_name_len != WLAN_PMK_NAME_LEN) {
+			ret = -EINVAL;
+			goto out;
+		}
+
+		pmk_conf.pmk_r0_name =
+			nla_data(info->attrs[NL80211_ATTR_PMKR0_NAME]);
+	}
+
+	ret = rdev_set_pmk(rdev, dev, &pmk_conf);
+out:
+	wdev_unlock(wdev);
+	return ret;
+}
+
+static int nl80211_del_pmk(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct net_device *dev = info->user_ptr[1];
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	const u8 *aa;
+	int ret;
+
+	if (wdev->iftype != NL80211_IFTYPE_STATION &&
+	    wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)
+		return -EOPNOTSUPP;
+
+	if (!wiphy_ext_feature_isset(&rdev->wiphy,
+				     NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_1X))
+		return -EOPNOTSUPP;
+
+	if (!info->attrs[NL80211_ATTR_MAC])
+		return -EINVAL;
+
+	wdev_lock(wdev);
+	aa = nla_data(info->attrs[NL80211_ATTR_MAC]);
+	ret = rdev_del_pmk(rdev, dev, aa);
+	wdev_unlock(wdev);
+
+	return ret;
+}
+
 #define NL80211_FLAG_NEED_WIPHY		0x01
 #define NL80211_FLAG_NEED_NETDEV	0x02
 #define NL80211_FLAG_NEED_RTNL		0x04
@@ -13140,6 +13230,21 @@ static const struct genl_ops nl80211_ops[] = {
 		.internal_flags = NL80211_FLAG_NEED_NETDEV |
 				  NL80211_FLAG_NEED_RTNL,
 	},
+	{
+		.cmd = NL80211_CMD_SET_PMK,
+		.doit = nl80211_set_pmk,
+		.policy = nl80211_policy,
+		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
+				  NL80211_FLAG_NEED_RTNL,
+	},
+	{
+		.cmd = NL80211_CMD_DEL_PMK,
+		.doit = nl80211_del_pmk,
+		.policy = nl80211_policy,
+		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
+				  NL80211_FLAG_NEED_RTNL,
+	},
+
 };
 
 static struct genl_family nl80211_fam __ro_after_init = {
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index 0598c1e5d0ad..ce23d7d49960 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -1164,4 +1164,29 @@ rdev_set_coalesce(struct cfg80211_registered_device *rdev,
 	trace_rdev_return_int(&rdev->wiphy, ret);
 	return ret;
 }
+
+static inline int rdev_set_pmk(struct cfg80211_registered_device *rdev,
+			       struct net_device *dev,
+			       struct cfg80211_pmk_conf *pmk_conf)
+{
+	int ret = -EOPNOTSUPP;
+
+	trace_rdev_set_pmk(&rdev->wiphy, dev, pmk_conf);
+	if (rdev->ops->set_pmk)
+		ret = rdev->ops->set_pmk(&rdev->wiphy, dev, pmk_conf);
+	trace_rdev_return_int(&rdev->wiphy, ret);
+	return ret;
+}
+
+static inline int rdev_del_pmk(struct cfg80211_registered_device *rdev,
+			       struct net_device *dev, const u8 *aa)
+{
+	int ret = -EOPNOTSUPP;
+
+	trace_rdev_del_pmk(&rdev->wiphy, dev, aa);
+	if (rdev->ops->del_pmk)
+		ret = rdev->ops->del_pmk(&rdev->wiphy, dev, aa);
+	trace_rdev_return_int(&rdev->wiphy, ret);
+	return ret;
+}
 #endif /* __CFG80211_RDEV_OPS */
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index ca8b2059f92c..0f8db41eaddb 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -2258,6 +2258,66 @@ TRACE_EVENT(rdev_tdls_cancel_channel_switch,
 		  WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(addr))
 );
 
+TRACE_EVENT(rdev_set_pmk,
+	TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+		 struct cfg80211_pmk_conf *pmk_conf),
+
+	TP_ARGS(wiphy, netdev, pmk_conf),
+
+	TP_STRUCT__entry(
+		WIPHY_ENTRY
+		NETDEV_ENTRY
+		MAC_ENTRY(aa)
+		__field(u8, pmk_len)
+		__field(u8, pmk_r0_name_len)
+		__dynamic_array(u8, pmk, pmk_conf->pmk_len)
+		__dynamic_array(u8, pmk_r0_name, WLAN_PMK_NAME_LEN)
+	),
+
+	TP_fast_assign(
+		WIPHY_ASSIGN;
+		NETDEV_ASSIGN;
+		MAC_ASSIGN(aa, pmk_conf->aa);
+		__entry->pmk_len = pmk_conf->pmk_len;
+		__entry->pmk_r0_name_len =
+		pmk_conf->pmk_r0_name ? WLAN_PMK_NAME_LEN : 0;
+		memcpy(__get_dynamic_array(pmk), pmk_conf->pmk,
+		       pmk_conf->pmk_len);
+		memcpy(__get_dynamic_array(pmk_r0_name), pmk_conf->pmk_r0_name,
+		       pmk_conf->pmk_r0_name ? WLAN_PMK_NAME_LEN : 0);
+	),
+
+	TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " MAC_PR_FMT
+		  "pmk_len=%u, pmk: %s pmk_r0_name: %s", WIPHY_PR_ARG,
+		  NETDEV_PR_ARG, MAC_PR_ARG(aa), __entry->pmk_len,
+		  __print_array(__get_dynamic_array(pmk),
+				__get_dynamic_array_len(pmk), 1),
+		  __entry->pmk_r0_name_len ?
+		  __print_array(__get_dynamic_array(pmk_r0_name),
+				__get_dynamic_array_len(pmk_r0_name), 1) : "")
+);
+
+TRACE_EVENT(rdev_del_pmk,
+	TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, const u8 *aa),
+
+	TP_ARGS(wiphy, netdev, aa),
+
+	TP_STRUCT__entry(
+		WIPHY_ENTRY
+		NETDEV_ENTRY
+		MAC_ENTRY(aa)
+	),
+
+	TP_fast_assign(
+		WIPHY_ASSIGN;
+		NETDEV_ASSIGN;
+		MAC_ASSIGN(aa, aa);
+	),
+
+	TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " MAC_PR_FMT,
+		  WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(aa))
+);
+
 /*************************************************************
  *	     cfg80211 exported functions traces		     *
  *************************************************************/
-- 
cgit v1.2.3


From 7cf916bd639bd26db7214f2205bccdb4b9306256 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 13 Jun 2017 16:01:13 +1000
Subject: usb: Fix typo in the definition of Endpoint[out]Request

The current definition is wrong. This breaks my upcoming
Aspeed virtual hub driver.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb/hcd.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
index 50398b69ca44..a1f03ebfde47 100644
--- a/include/linux/usb/hcd.h
+++ b/include/linux/usb/hcd.h
@@ -565,9 +565,9 @@ extern void usb_ep0_reinit(struct usb_device *);
 	((USB_DIR_IN|USB_TYPE_STANDARD|USB_RECIP_INTERFACE)<<8)
 
 #define EndpointRequest \
-	((USB_DIR_IN|USB_TYPE_STANDARD|USB_RECIP_INTERFACE)<<8)
+	((USB_DIR_IN|USB_TYPE_STANDARD|USB_RECIP_ENDPOINT)<<8)
 #define EndpointOutRequest \
-	((USB_DIR_OUT|USB_TYPE_STANDARD|USB_RECIP_INTERFACE)<<8)
+	((USB_DIR_OUT|USB_TYPE_STANDARD|USB_RECIP_ENDPOINT)<<8)
 
 /* class requests from the USB 2.0 hub spec, table 11-15 */
 #define HUB_CLASS_REQ(dir, type, request) ((((dir) | (type)) << 8) | (request))
-- 
cgit v1.2.3


From 4e75e1d7dac9d7c95c57eceb451d01f2afcc8626 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Tue, 6 Jun 2017 17:59:00 +0200
Subject: driver core: add helper to reuse a device-tree node

Add a helper function to be used when reusing the device-tree node of
another device.

It is fairly common for drivers to reuse the device-tree node of a
parent (or other ancestor) device when creating class or bus devices
(e.g. gpio chips, i2c adapters, iio chips, spi masters, serdev, phys,
usb root hubs). But reusing a device-tree node may cause problems if the
new device is later probed as for example driver core would currently
attempt to reinitialise an already active associated pinmux
configuration.

Other potential issues include the platform-bus code unconditionally
dropping the device-tree node reference in its device destructor,
reinitialisation of other bus-managed resources such as clocks, and the
recently added DMA-setup in driver core.

Note that for most examples above this is currently not an issue as the
devices are never probed, but this is a problem for the USB bus which
has recently gained device-tree support. This was discovered and
worked-around in a rather ad-hoc fashion by commit dc5878abf49c ("usb:
core: move root hub's device node assignment after it is added to bus")
by not setting the of_node pointer until after the root-hub device has
been registered.

Instead we can allow devices to reuse a device-tree node by setting a
flag in their struct device that can be used by core, bus and driver
code to avoid resources from being over-allocated.

Note that the helper also grabs an extra reference to the device node,
which specifically balances the unconditional put in the platform-device
destructor.

Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/core.c    | 16 ++++++++++++++++
 include/linux/device.h |  4 ++++
 2 files changed, 20 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index bbecaf9293be..c3064ff09af5 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -2884,3 +2884,19 @@ void set_secondary_fwnode(struct device *dev, struct fwnode_handle *fwnode)
 	else
 		dev->fwnode = fwnode;
 }
+
+/**
+ * device_set_of_node_from_dev - reuse device-tree node of another device
+ * @dev: device whose device-tree node is being set
+ * @dev2: device whose device-tree node is being reused
+ *
+ * Takes another reference to the new device-tree node after first dropping
+ * any reference held to the old node.
+ */
+void device_set_of_node_from_dev(struct device *dev, const struct device *dev2)
+{
+	of_node_put(dev->of_node);
+	dev->of_node = of_node_get(dev2->of_node);
+	dev->of_node_reused = true;
+}
+EXPORT_SYMBOL_GPL(device_set_of_node_from_dev);
diff --git a/include/linux/device.h b/include/linux/device.h
index 9ef518af5515..60ab00b13095 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -879,6 +879,8 @@ struct dev_links_info {
  *
  * @offline_disabled: If set, the device is permanently online.
  * @offline:	Set after successful invocation of bus type's .offline().
+ * @of_node_reused: Set if the device-tree node is shared with an ancestor
+ *              device.
  *
  * At the lowest level, every device in a Linux system is represented by an
  * instance of struct device. The device structure contains the information
@@ -966,6 +968,7 @@ struct device {
 
 	bool			offline_disabled:1;
 	bool			offline:1;
+	bool			of_node_reused:1;
 };
 
 static inline struct device *kobj_to_dev(struct kobject *kobj)
@@ -1144,6 +1147,7 @@ extern int device_offline(struct device *dev);
 extern int device_online(struct device *dev);
 extern void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode);
 extern void set_secondary_fwnode(struct device *dev, struct fwnode_handle *fwnode);
+void device_set_of_node_from_dev(struct device *dev, const struct device *dev2);
 
 static inline int dev_num_vf(struct device *dev)
 {
-- 
cgit v1.2.3


From 39673e1995381b09a63cc7e9d0aea7cf871cb359 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 9 Jan 2017 15:36:28 +0100
Subject: nvme.h: add struct nvme_host_mem_buf_desc and HMB flags

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
---
 include/linux/nvme.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index e400a69fa1d3..180a2fdbcaef 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -587,6 +587,11 @@ struct nvme_feat_auto_pst {
 	__le64 entries[32];
 };
 
+enum {
+	NVME_HOST_MEM_ENABLE	= (1 << 0),
+	NVME_HOST_MEM_RETURN	= (1 << 1),
+};
+
 /* Admin commands */
 
 enum nvme_admin_opcode {
@@ -671,6 +676,12 @@ struct nvme_features {
 	__u32			rsvd12[4];
 };
 
+struct nvme_host_mem_buf_desc {
+	__le64			addr;
+	__le32			size;
+	__u32			rsvd;
+};
+
 struct nvme_create_cq {
 	__u8			opcode;
 	__u8			flags;
-- 
cgit v1.2.3


From b85cf7348ab50e2042b732e19031b1d22eedc741 Mon Sep 17 00:00:00 2001
From: Arnav Dawn <a.dawn@samsung.com>
Date: Fri, 12 May 2017 17:12:03 +0200
Subject: nvme.h: add dword 12 - 15 fields to struct nvme_features

Signed-off-by: Arnav Dawn <a.dawn@samsung.com>
[hch: split from a larger patch, new changelog]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
---
 include/linux/nvme.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 180a2fdbcaef..51ca4771be2c 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -673,7 +673,10 @@ struct nvme_features {
 	union nvme_data_ptr	dptr;
 	__le32			fid;
 	__le32			dword11;
-	__u32			rsvd12[4];
+	__le32                  dword12;
+	__le32                  dword13;
+	__le32                  dword14;
+	__le32                  dword15;
 };
 
 struct nvme_host_mem_buf_desc {
-- 
cgit v1.2.3


From 7b08579f5c2a5c0e94b4a9f298339c2fdf0fcc89 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Tue, 6 Jun 2017 12:54:41 +0200
Subject: tty: drop unused alt_speed from tty_struct

Drop the now unused alt_speed field from struct tty_struct.

Setting an alt_speed using the ASYNC_SPD flags has been deprecated since
v2.1.69, and has been broken for all tty drivers but serial-core since
v3.10 and commit 6865ff222cca ("TTY: do not warn about setting speed via
SPD_*") without anyone noticing.

Note that serial-core still supports changing speed using TIOCSSERIAL
and SPD flags (including "alt-speeds"), but also warns about it being
deprecated since pre-git.

Signed-off-by: Johan Hovold <johan@kernel.org>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Reviewed-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/tty.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/tty.h b/include/linux/tty.h
index eccb4ec30a8a..585cf2b5ca94 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -316,7 +316,6 @@ struct tty_struct {
 
 	struct tty_struct *link;
 	struct fasync_struct *fasync;
-	int alt_speed;		/* For magic substitution of 38400 bps */
 	wait_queue_head_t write_wait;
 	wait_queue_head_t read_wait;
 	struct work_struct hangup_work;
-- 
cgit v1.2.3


From be36e000bc2d28512721c6e09c3df920b1bfad5e Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 9 Jun 2017 12:18:02 +0200
Subject: ARM: at91: fix at91_suspend_entering_slow_clock link error

When CONFIG_ARCH_AT91 is enabled, but none of the specific SoC support
is in use, some at91 specific drivers fail to link:

drivers/tty/serial/atmel_serial.o: In function `atmel_serial_suspend':
atmel_serial.c:(.text.atmel_serial_suspend+0x1e): undefined reference to `at91_suspend_entering_slow_clock'
drivers/usb/host/ohci-at91.o: In function `ohci_hcd_at91_drv_suspend':
ohci-at91.c:(.text.ohci_hcd_at91_drv_suspend+0x12): undefined reference to `at91_suspend_entering_slow_clock'
drivers/usb/gadget/udc/at91_udc.o: In function `at91udc_suspend':
at91_udc.c:(.text.at91udc_suspend+0x26): undefined reference to `at91_suspend_entering_slow_clock'

This changes the at91_suspend_entering_slow_clock hack once more, adding
an alternative inline implementation that is used exactly in those cases
that don't provide the normal implementation.

Fixes: c1892c2379d2 ("ARM: at91: handle CONFIG_PM for armv7m configurations")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 arch/arm/mach-at91/samv7.c          | 9 ---------
 include/linux/platform_data/atmel.h | 7 +++++++
 2 files changed, 7 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-at91/samv7.c b/arch/arm/mach-at91/samv7.c
index 910f0c68db62..11386f190c83 100644
--- a/arch/arm/mach-at91/samv7.c
+++ b/arch/arm/mach-at91/samv7.c
@@ -15,15 +15,6 @@
 #include <asm/system_misc.h>
 #include "generic.h"
 
-#ifdef CONFIG_PM
-/* This function has to be defined for various drivers that are using it */
-int at91_suspend_entering_slow_clock(void)
-{
-	return 0;
-}
-EXPORT_SYMBOL(at91_suspend_entering_slow_clock);
-#endif
-
 static const char *const samv7_dt_board_compat[] __initconst = {
 	"atmel,samv7",
 	NULL
diff --git a/include/linux/platform_data/atmel.h b/include/linux/platform_data/atmel.h
index 3c8825b67298..7b6dce7d6d33 100644
--- a/include/linux/platform_data/atmel.h
+++ b/include/linux/platform_data/atmel.h
@@ -52,6 +52,13 @@ struct atmel_uart_data {
 };
 
 /* FIXME: this needs a better location, but gets stuff building again */
+#ifdef CONFIG_ATMEL_PM
 extern int at91_suspend_entering_slow_clock(void);
+#else
+static inline int at91_suspend_entering_slow_clock(void)
+{
+	return 0;
+}
+#endif
 
 #endif /* __ATMEL_H__ */
-- 
cgit v1.2.3


From 67fdfda4a99edea939a63bad1797d69dd8de00d6 Mon Sep 17 00:00:00 2001
From: Felipe Balbi <felipe.balbi@linux.intel.com>
Date: Tue, 6 Jun 2017 16:03:19 +0300
Subject: usb: gadget: core: introduce ->udc_set_speed() method

Sometimes, the gadget driver we want to run has max_speed lower than
what the UDC supports. In such situations, UDC might want to make sure
we don't try to connect on speeds not supported by the gadget
driver (e.g. super-speed capable dwc3 with high-speed capable g_midi)
because that will just fail.

In order to make sure this situation never happens, we introduce a new
optional ->udc_set_speed() method which can be implemented by
interested UDC drivers.

Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/gadget/udc/core.c | 20 ++++++++++++++++++++
 include/linux/usb/gadget.h    |  1 +
 2 files changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c
index 62d52fc0fcdc..eec388bccb25 100644
--- a/drivers/usb/gadget/udc/core.c
+++ b/drivers/usb/gadget/udc/core.c
@@ -1055,6 +1055,23 @@ static inline void usb_gadget_udc_stop(struct usb_udc *udc)
 	udc->gadget->ops->udc_stop(udc->gadget);
 }
 
+/**
+ * usb_gadget_udc_set_speed - tells usb device controller speed supported by
+ *    current driver
+ * @udc: The device we want to set maximum speed
+ * @speed: The maximum speed to allowed to run
+ *
+ * This call is issued by the UDC Class driver before calling
+ * usb_gadget_udc_start() in order to make sure that we don't try to
+ * connect on speeds the gadget driver doesn't support.
+ */
+static inline void usb_gadget_udc_set_speed(struct usb_udc *udc,
+					    enum usb_device_speed speed)
+{
+	if (udc->gadget->ops->udc_set_speed)
+		udc->gadget->ops->udc_set_speed(udc->gadget, speed);
+}
+
 /**
  * usb_udc_release - release the usb_udc struct
  * @dev: the dev member within usb_udc
@@ -1288,6 +1305,9 @@ static int udc_bind_to_driver(struct usb_udc *udc, struct usb_gadget_driver *dri
 	udc->dev.driver = &driver->driver;
 	udc->gadget->dev.driver = &driver->driver;
 
+	if (driver->max_speed < udc->gadget->max_speed)
+		usb_gadget_udc_set_speed(udc, driver->max_speed);
+
 	ret = driver->bind(udc->gadget, driver);
 	if (ret)
 		goto err1;
diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h
index 3ee5f2a7c0b4..1a4a4bacfae6 100644
--- a/include/linux/usb/gadget.h
+++ b/include/linux/usb/gadget.h
@@ -304,6 +304,7 @@ struct usb_gadget_ops {
 	int	(*udc_start)(struct usb_gadget *,
 			struct usb_gadget_driver *);
 	int	(*udc_stop)(struct usb_gadget *);
+	void	(*udc_set_speed)(struct usb_gadget *, enum usb_device_speed);
 	struct usb_ep *(*match_ep)(struct usb_gadget *,
 			struct usb_endpoint_descriptor *,
 			struct usb_ss_ep_comp_descriptor *);
-- 
cgit v1.2.3


From 408455245a48a1ecabd90133bae0baec3ec4cfb8 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Wed, 29 Mar 2017 18:34:50 +0200
Subject: PM / Domains: Allow overriding the ->xlate() callback

Allow generic power domain providers to override the ->xlate() callback
in case the default genpd_xlate_onecell() translation callback is not
good enough.

One potential use-case for this is to allow generic power domains to be
specified by an ID rather than an index.

Signed-off-by: Thierry Reding <treding@nvidia.com>
Acked-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/base/power/domain.c | 8 ++++----
 include/linux/pm_domain.h   | 4 ++++
 2 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index da49a8383dc3..d3f1d96f75e9 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -1584,9 +1584,6 @@ EXPORT_SYMBOL_GPL(pm_genpd_remove);
 
 #ifdef CONFIG_PM_GENERIC_DOMAINS_OF
 
-typedef struct generic_pm_domain *(*genpd_xlate_t)(struct of_phandle_args *args,
-						   void *data);
-
 /*
  * Device Tree based PM domain providers.
  *
@@ -1742,6 +1739,9 @@ int of_genpd_add_provider_onecell(struct device_node *np,
 
 	mutex_lock(&gpd_list_lock);
 
+	if (!data->xlate)
+		data->xlate = genpd_xlate_onecell;
+
 	for (i = 0; i < data->num_domains; i++) {
 		if (!data->domains[i])
 			continue;
@@ -1752,7 +1752,7 @@ int of_genpd_add_provider_onecell(struct device_node *np,
 		data->domains[i]->has_provider = true;
 	}
 
-	ret = genpd_add_provider(np, genpd_xlate_onecell, data);
+	ret = genpd_add_provider(np, data->xlate, data);
 	if (ret < 0)
 		goto error;
 
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index b7803a251044..41004d97cefa 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -206,9 +206,13 @@ static inline void pm_genpd_syscore_poweron(struct device *dev) {}
 /* OF PM domain providers */
 struct of_device_id;
 
+typedef struct generic_pm_domain *(*genpd_xlate_t)(struct of_phandle_args *args,
+						   void *data);
+
 struct genpd_onecell_data {
 	struct generic_pm_domain **domains;
 	unsigned int num_domains;
+	genpd_xlate_t xlate;
 };
 
 #ifdef CONFIG_PM_GENERIC_DOMAINS_OF
-- 
cgit v1.2.3


From 8a3d809373c6790b3958f74fa5640aedd4e804dd Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Tue, 13 Jun 2017 14:51:07 +0200
Subject: ARM: at91: remove atmel_nand_data

Since AVR32 is gone and the driver rework, struct atmel_nand_data is not
used anywhere.

Acked-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 include/linux/platform_data/atmel.h | 21 ---------------------
 1 file changed, 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/atmel.h b/include/linux/platform_data/atmel.h
index 7b6dce7d6d33..70c5c766628e 100644
--- a/include/linux/platform_data/atmel.h
+++ b/include/linux/platform_data/atmel.h
@@ -7,8 +7,6 @@
 #ifndef __ATMEL_H__
 #define __ATMEL_H__
 
-#include <linux/mtd/nand.h>
-#include <linux/mtd/partitions.h>
 #include <linux/serial.h>
 
  /* Compact Flash */
@@ -23,25 +21,6 @@ struct at91_cf_data {
 #define AT91_IDE_SWAP_A0_A2	0x02
 };
 
- /* NAND / SmartMedia */
-struct atmel_nand_data {
-	int		enable_pin;		/* chip enable */
-	int		det_pin;		/* card detect */
-	int		rdy_pin;		/* ready/busy */
-	u8		rdy_pin_active_low;	/* rdy_pin value is inverted */
-	u8		ale;			/* address line number connected to ALE */
-	u8		cle;			/* address line number connected to CLE */
-	u8		bus_width_16;		/* buswidth is 16 bit */
-	u8		ecc_mode;		/* ecc mode */
-	u8		on_flash_bbt;		/* bbt on flash */
-	struct mtd_partition *parts;
-	unsigned int	num_parts;
-	bool		has_dma;		/* support dma transfer */
-
-	/* default is false, only for at32ap7000 chip is true */
-	bool		need_reset_workaround;
-};
-
  /* Serial */
 struct atmel_uart_data {
 	int			num;		/* port num */
-- 
cgit v1.2.3


From d396e84c56047b303cac378dde4b2e5cc430b336 Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Date: Mon, 12 Jun 2017 23:55:38 +0300
Subject: mdio_bus: handle only single PHY reset GPIO

Commit 4c5e7a2c0501 ("dt-bindings: mdio: Clarify binding document")
declared that a MDIO reset GPIO property should have only a single GPIO
reference/specifier, however the supporting code was left intact, still
burdening the kernel with now apparently useless loops -- get rid of them.

Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mdio_bus.c | 53 +++++++++++++++++-----------------------------
 drivers/of/of_mdio.c       |  1 -
 include/linux/phy.h        |  6 ++----
 3 files changed, 21 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index 4c169dbf9138..5d37716e7796 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c
@@ -353,33 +353,22 @@ int __mdiobus_register(struct mii_bus *bus, struct module *owner)
 
 	mutex_init(&bus->mdio_lock);
 
-	/* de-assert bus level PHY GPIO resets */
-	if (bus->num_reset_gpios > 0) {
-		bus->reset_gpiod = devm_kcalloc(&bus->dev,
-						 bus->num_reset_gpios,
-						 sizeof(struct gpio_desc *),
-						 GFP_KERNEL);
-		if (!bus->reset_gpiod)
-			return -ENOMEM;
-	}
-
-	for (i = 0; i < bus->num_reset_gpios; i++) {
-		gpiod = devm_gpiod_get_index(&bus->dev, "reset", i,
-					     GPIOD_OUT_LOW);
-		if (IS_ERR(gpiod)) {
-			err = PTR_ERR(gpiod);
-			if (err != -ENOENT) {
-				dev_err(&bus->dev,
-					"mii_bus %s couldn't get reset GPIO\n",
-					bus->id);
-				return err;
-			}
-		} else {
-			bus->reset_gpiod[i] = gpiod;
-			gpiod_set_value_cansleep(gpiod, 1);
-			udelay(bus->reset_delay_us);
-			gpiod_set_value_cansleep(gpiod, 0);
+	/* de-assert bus level PHY GPIO reset */
+	gpiod = devm_gpiod_get(&bus->dev, "reset", GPIOD_OUT_LOW);
+	if (IS_ERR(gpiod)) {
+		err = PTR_ERR(gpiod);
+		if (err != -ENOENT) {
+			dev_err(&bus->dev,
+				"mii_bus %s couldn't get reset GPIO\n",
+				bus->id);
+			return err;
 		}
+	} else	{
+		bus->reset_gpiod = gpiod;
+
+		gpiod_set_value_cansleep(gpiod, 1);
+		udelay(bus->reset_delay_us);
+		gpiod_set_value_cansleep(gpiod, 0);
 	}
 
 	if (bus->reset)
@@ -414,10 +403,8 @@ error:
 	}
 
 	/* Put PHYs in RESET to save power */
-	for (i = 0; i < bus->num_reset_gpios; i++) {
-		if (bus->reset_gpiod[i])
-			gpiod_set_value_cansleep(bus->reset_gpiod[i], 1);
-	}
+	if (bus->reset_gpiod)
+		gpiod_set_value_cansleep(bus->reset_gpiod, 1);
 
 	device_del(&bus->dev);
 	return err;
@@ -442,10 +429,8 @@ void mdiobus_unregister(struct mii_bus *bus)
 	}
 
 	/* Put PHYs in RESET to save power */
-	for (i = 0; i < bus->num_reset_gpios; i++) {
-		if (bus->reset_gpiod[i])
-			gpiod_set_value_cansleep(bus->reset_gpiod[i], 1);
-	}
+	if (bus->reset_gpiod)
+		gpiod_set_value_cansleep(bus->reset_gpiod, 1);
 
 	device_del(&bus->dev);
 }
diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c
index 7e4c80f9b6cd..9596be9a49d0 100644
--- a/drivers/of/of_mdio.c
+++ b/drivers/of/of_mdio.c
@@ -226,7 +226,6 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np)
 	/* Get bus level PHY reset GPIO details */
 	mdio->reset_delay_us = DEFAULT_GPIO_RESET_DELAY;
 	of_property_read_u32(np, "reset-delay-us", &mdio->reset_delay_us);
-	mdio->num_reset_gpios = of_gpio_named_count(np, "reset-gpios");
 
 	/* Register the MDIO bus */
 	rc = mdiobus_register(mdio);
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 414242200a90..51bea6593409 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -226,10 +226,8 @@ struct mii_bus {
 
 	/* GPIO reset pulse width in microseconds */
 	int reset_delay_us;
-	/* Number of reset GPIOs */
-	int num_reset_gpios;
-	/* Array of RESET GPIO descriptors */
-	struct gpio_desc **reset_gpiod;
+	/* RESET GPIO descriptor pointer */
+	struct gpio_desc *reset_gpiod;
 };
 #define to_mii_bus(d) container_of(d, struct mii_bus, dev)
 
-- 
cgit v1.2.3


From 5514174fe9c61c83bd8781c1e048ea6b4bf16a14 Mon Sep 17 00:00:00 2001
From: "yuval.shaia@oracle.com" <yuval.shaia@oracle.com>
Date: Tue, 13 Jun 2017 10:09:46 +0300
Subject: net: phy: Make phy_ethtool_ksettings_get return void

Make return value void since function never return meaningfull value

Signed-off-by: Yuval Shaia <yuval.shaia@oracle.com>
Acked-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/apm/xgene-v2/ethtool.c          |  4 +++-
 drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c  |  8 ++++++--
 drivers/net/ethernet/broadcom/b44.c                  |  4 +++-
 drivers/net/ethernet/broadcom/bcm63xx_enet.c         |  5 ++++-
 drivers/net/ethernet/broadcom/genet/bcmgenet.c       |  4 +++-
 drivers/net/ethernet/broadcom/tg3.c                  |  4 +++-
 drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c   |  6 ++----
 drivers/net/ethernet/freescale/ucc_geth_ethtool.c    |  4 +++-
 drivers/net/ethernet/hisilicon/hns/hns_ethtool.c     |  2 +-
 drivers/net/ethernet/marvell/mv643xx_eth.c           |  5 ++---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c          |  4 +++-
 drivers/net/ethernet/renesas/ravb_main.c             | 14 +++++++-------
 drivers/net/ethernet/renesas/sh_eth.c                |  5 ++---
 drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c |  5 ++---
 drivers/net/ethernet/ti/cpsw.c                       |  8 ++++----
 drivers/net/ethernet/ti/netcp_ethss.c                |  8 +++-----
 drivers/net/phy/phy.c                                | 10 +++++-----
 drivers/net/usb/lan78xx.c                            |  2 +-
 drivers/staging/netlogic/xlr_net.c                   |  5 ++++-
 include/linux/phy.h                                  |  4 ++--
 net/dsa/slave.c                                      |  9 +++++----
 21 files changed, 68 insertions(+), 52 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/apm/xgene-v2/ethtool.c b/drivers/net/ethernet/apm/xgene-v2/ethtool.c
index b6666e418e79..d31ad8270d93 100644
--- a/drivers/net/ethernet/apm/xgene-v2/ethtool.c
+++ b/drivers/net/ethernet/apm/xgene-v2/ethtool.c
@@ -157,7 +157,9 @@ static int xge_get_link_ksettings(struct net_device *ndev,
 	if (!phydev)
 		return -ENODEV;
 
-	return phy_ethtool_ksettings_get(phydev, cmd);
+	phy_ethtool_ksettings_get(phydev, cmd);
+
+	return 0;
 }
 
 static int xge_set_link_ksettings(struct net_device *ndev,
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c b/drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c
index 559963b1aa32..4f50f11718f4 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c
@@ -131,13 +131,17 @@ static int xgene_get_link_ksettings(struct net_device *ndev,
 		if (phydev == NULL)
 			return -ENODEV;
 
-		return phy_ethtool_ksettings_get(phydev, cmd);
+		phy_ethtool_ksettings_get(phydev, cmd);
+
+		return 0;
 	} else if (pdata->phy_mode == PHY_INTERFACE_MODE_SGMII) {
 		if (pdata->mdio_driver) {
 			if (!phydev)
 				return -ENODEV;
 
-			return phy_ethtool_ksettings_get(phydev, cmd);
+			phy_ethtool_ksettings_get(phydev, cmd);
+
+			return 0;
 		}
 
 		supported = SUPPORTED_1000baseT_Full | SUPPORTED_Autoneg |
diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c
index 5b95bb48ce97..f411936b744c 100644
--- a/drivers/net/ethernet/broadcom/b44.c
+++ b/drivers/net/ethernet/broadcom/b44.c
@@ -1836,7 +1836,9 @@ static int b44_get_link_ksettings(struct net_device *dev,
 
 	if (bp->flags & B44_FLAG_EXTERNAL_PHY) {
 		BUG_ON(!dev->phydev);
-		return phy_ethtool_ksettings_get(dev->phydev, cmd);
+		phy_ethtool_ksettings_get(dev->phydev, cmd);
+
+		return 0;
 	}
 
 	supported = (SUPPORTED_Autoneg);
diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c
index 50d88d3e03b6..ea3c906fa0e4 100644
--- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c
+++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c
@@ -1453,7 +1453,10 @@ static int bcm_enet_get_link_ksettings(struct net_device *dev,
 	if (priv->has_phy) {
 		if (!dev->phydev)
 			return -ENODEV;
-		return phy_ethtool_ksettings_get(dev->phydev, cmd);
+
+		phy_ethtool_ksettings_get(dev->phydev, cmd);
+
+		return 0;
 	} else {
 		cmd->base.autoneg = 0;
 		cmd->base.speed = (priv->force_speed_100) ?
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index a205a9ff9e17..daca1c9d254b 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -477,7 +477,9 @@ static int bcmgenet_get_link_ksettings(struct net_device *dev,
 	if (!priv->phydev)
 		return -ENODEV;
 
-	return phy_ethtool_ksettings_get(priv->phydev, cmd);
+	phy_ethtool_ksettings_get(priv->phydev, cmd);
+
+	return 0;
 }
 
 static int bcmgenet_set_link_ksettings(struct net_device *dev,
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 537d571ee601..d600c41fb1dc 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -12097,7 +12097,9 @@ static int tg3_get_link_ksettings(struct net_device *dev,
 		if (!(tp->phy_flags & TG3_PHYFLG_IS_CONNECTED))
 			return -EAGAIN;
 		phydev = mdiobus_get_phy(tp->mdio_bus, tp->phy_addr);
-		return phy_ethtool_ksettings_get(phydev, cmd);
+		phy_ethtool_ksettings_get(phydev, cmd);
+
+		return 0;
 	}
 
 	supported = (SUPPORTED_Autoneg);
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c
index 15571e251fb9..aad825088357 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c
@@ -75,16 +75,14 @@ static char dpaa_stats_global[][ETH_GSTRING_LEN] = {
 static int dpaa_get_link_ksettings(struct net_device *net_dev,
 				   struct ethtool_link_ksettings *cmd)
 {
-	int err;
-
 	if (!net_dev->phydev) {
 		netdev_dbg(net_dev, "phy device not initialized\n");
 		return 0;
 	}
 
-	err = phy_ethtool_ksettings_get(net_dev->phydev, cmd);
+	phy_ethtool_ksettings_get(net_dev->phydev, cmd);
 
-	return err;
+	return 0;
 }
 
 static int dpaa_set_link_ksettings(struct net_device *net_dev,
diff --git a/drivers/net/ethernet/freescale/ucc_geth_ethtool.c b/drivers/net/ethernet/freescale/ucc_geth_ethtool.c
index b642990b549c..4df282ed22c7 100644
--- a/drivers/net/ethernet/freescale/ucc_geth_ethtool.c
+++ b/drivers/net/ethernet/freescale/ucc_geth_ethtool.c
@@ -113,7 +113,9 @@ uec_get_ksettings(struct net_device *netdev, struct ethtool_link_ksettings *cmd)
 	if (!phydev)
 		return -ENODEV;
 
-	return phy_ethtool_ksettings_get(phydev, cmd);
+	phy_ethtool_ksettings_get(phydev, cmd);
+
+	return 0;
 }
 
 static int
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
index b8fab149690f..af1b15cc6a7f 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
@@ -150,7 +150,7 @@ static int hns_nic_get_link_ksettings(struct net_device *net_dev,
 	cmd->base.duplex = duplex;
 
 	if (net_dev->phydev)
-		(void)phy_ethtool_ksettings_get(net_dev->phydev, cmd);
+		phy_ethtool_ksettings_get(net_dev->phydev, cmd);
 
 	link_stat = hns_nic_get_link(net_dev);
 	if (!link_stat) {
diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c
index 25642dee49d3..5794d98d946f 100644
--- a/drivers/net/ethernet/marvell/mv643xx_eth.c
+++ b/drivers/net/ethernet/marvell/mv643xx_eth.c
@@ -1501,10 +1501,9 @@ mv643xx_eth_get_link_ksettings_phy(struct mv643xx_eth_private *mp,
 				   struct ethtool_link_ksettings *cmd)
 {
 	struct net_device *dev = mp->dev;
-	int err;
 	u32 supported, advertising;
 
-	err = phy_ethtool_ksettings_get(dev->phydev, cmd);
+	phy_ethtool_ksettings_get(dev->phydev, cmd);
 
 	/*
 	 * The MAC does not support 1000baseT_Half.
@@ -1520,7 +1519,7 @@ mv643xx_eth_get_link_ksettings_phy(struct mv643xx_eth_private *mp,
 	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
 						advertising);
 
-	return err;
+	return 0;
 }
 
 static int
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 16f97552ae98..962975d192d1 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -2056,7 +2056,9 @@ static int mtk_get_link_ksettings(struct net_device *ndev,
 	if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state)))
 		return -EBUSY;
 
-	return phy_ethtool_ksettings_get(ndev->phydev, cmd);
+	phy_ethtool_ksettings_get(ndev->phydev, cmd);
+
+	return 0;
 }
 
 static int mtk_set_link_ksettings(struct net_device *ndev,
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index 784782da3a85..5931e859876c 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -1076,16 +1076,16 @@ static int ravb_get_link_ksettings(struct net_device *ndev,
 				   struct ethtool_link_ksettings *cmd)
 {
 	struct ravb_private *priv = netdev_priv(ndev);
-	int error = -ENODEV;
 	unsigned long flags;
 
-	if (ndev->phydev) {
-		spin_lock_irqsave(&priv->lock, flags);
-		error = phy_ethtool_ksettings_get(ndev->phydev, cmd);
-		spin_unlock_irqrestore(&priv->lock, flags);
-	}
+	if (!ndev->phydev)
+		return -ENODEV;
 
-	return error;
+	spin_lock_irqsave(&priv->lock, flags);
+	phy_ethtool_ksettings_get(ndev->phydev, cmd);
+	spin_unlock_irqrestore(&priv->lock, flags);
+
+	return 0;
 }
 
 static int ravb_set_link_ksettings(struct net_device *ndev,
diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index 48b66df88294..d2dc0a8ef305 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -1915,16 +1915,15 @@ static int sh_eth_get_link_ksettings(struct net_device *ndev,
 {
 	struct sh_eth_private *mdp = netdev_priv(ndev);
 	unsigned long flags;
-	int ret;
 
 	if (!ndev->phydev)
 		return -ENODEV;
 
 	spin_lock_irqsave(&mdp->lock, flags);
-	ret = phy_ethtool_ksettings_get(ndev->phydev, cmd);
+	phy_ethtool_ksettings_get(ndev->phydev, cmd);
 	spin_unlock_irqrestore(&mdp->lock, flags);
 
-	return ret;
+	return 0;
 }
 
 static int sh_eth_set_link_ksettings(struct net_device *ndev,
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index 16808e48ca1c..743170d57f62 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -273,7 +273,6 @@ static int stmmac_ethtool_get_link_ksettings(struct net_device *dev,
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
 	struct phy_device *phy = dev->phydev;
-	int rc;
 
 	if (priv->hw->pcs & STMMAC_PCS_RGMII ||
 	    priv->hw->pcs & STMMAC_PCS_SGMII) {
@@ -364,8 +363,8 @@ static int stmmac_ethtool_get_link_ksettings(struct net_device *dev,
 		"link speed / duplex setting\n", dev->name);
 		return -EBUSY;
 	}
-	rc = phy_ethtool_ksettings_get(phy, cmd);
-	return rc;
+	phy_ethtool_ksettings_get(phy, cmd);
+	return 0;
 }
 
 static int
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index b6a0d92dd637..b7a0f5eeab62 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -2170,11 +2170,11 @@ static int cpsw_get_link_ksettings(struct net_device *ndev,
 	struct cpsw_common *cpsw = priv->cpsw;
 	int slave_no = cpsw_slave_index(cpsw, priv);
 
-	if (cpsw->slaves[slave_no].phy)
-		return phy_ethtool_ksettings_get(cpsw->slaves[slave_no].phy,
-						 ecmd);
-	else
+	if (!cpsw->slaves[slave_no].phy)
 		return -EOPNOTSUPP;
+
+	phy_ethtool_ksettings_get(cpsw->slaves[slave_no].phy, ecmd);
+	return 0;
 }
 
 static int cpsw_set_link_ksettings(struct net_device *ndev,
diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c
index dd92950a4615..0847a8f48cfe 100644
--- a/drivers/net/ethernet/ti/netcp_ethss.c
+++ b/drivers/net/ethernet/ti/netcp_ethss.c
@@ -1927,7 +1927,6 @@ static int keystone_get_link_ksettings(struct net_device *ndev,
 	struct netcp_intf *netcp = netdev_priv(ndev);
 	struct phy_device *phy = ndev->phydev;
 	struct gbe_intf *gbe_intf;
-	int ret;
 
 	if (!phy)
 		return -EINVAL;
@@ -1939,11 +1938,10 @@ static int keystone_get_link_ksettings(struct net_device *ndev,
 	if (!gbe_intf->slave)
 		return -EINVAL;
 
-	ret = phy_ethtool_ksettings_get(phy, cmd);
-	if (!ret)
-		cmd->base.port = gbe_intf->slave->phy_port_t;
+	phy_ethtool_ksettings_get(phy, cmd);
+	cmd->base.port = gbe_intf->slave->phy_port_t;
 
-	return ret;
+	return 0;
 }
 
 static int keystone_set_link_ksettings(struct net_device *ndev,
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 14fc5bc75cd1..edcdf0d872ed 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -509,8 +509,8 @@ int phy_ethtool_ksettings_set(struct phy_device *phydev,
 }
 EXPORT_SYMBOL(phy_ethtool_ksettings_set);
 
-int phy_ethtool_ksettings_get(struct phy_device *phydev,
-			      struct ethtool_link_ksettings *cmd)
+void phy_ethtool_ksettings_get(struct phy_device *phydev,
+			       struct ethtool_link_ksettings *cmd)
 {
 	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
 						phydev->supported);
@@ -532,8 +532,6 @@ int phy_ethtool_ksettings_get(struct phy_device *phydev,
 	cmd->base.autoneg = phydev->autoneg;
 	cmd->base.eth_tp_mdix_ctrl = phydev->mdix_ctrl;
 	cmd->base.eth_tp_mdix = phydev->mdix;
-
-	return 0;
 }
 EXPORT_SYMBOL(phy_ethtool_ksettings_get);
 
@@ -1449,7 +1447,9 @@ int phy_ethtool_get_link_ksettings(struct net_device *ndev,
 	if (!phydev)
 		return -ENODEV;
 
-	return phy_ethtool_ksettings_get(phydev, cmd);
+	phy_ethtool_ksettings_get(phydev, cmd);
+
+	return 0;
 }
 EXPORT_SYMBOL(phy_ethtool_get_link_ksettings);
 
diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index 9eff97a650ae..5833f7e2a127 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -1490,7 +1490,7 @@ static int lan78xx_get_link_ksettings(struct net_device *net,
 	if (ret < 0)
 		return ret;
 
-	ret = phy_ethtool_ksettings_get(phydev, cmd);
+	phy_ethtool_ksettings_get(phydev, cmd);
 
 	usb_autopm_put_interface(dev->intf);
 
diff --git a/drivers/staging/netlogic/xlr_net.c b/drivers/staging/netlogic/xlr_net.c
index 781ef623233e..e05ae4645d91 100644
--- a/drivers/staging/netlogic/xlr_net.c
+++ b/drivers/staging/netlogic/xlr_net.c
@@ -179,7 +179,10 @@ static int xlr_get_link_ksettings(struct net_device *ndev,
 
 	if (!phydev)
 		return -ENODEV;
-	return phy_ethtool_ksettings_get(phydev, ecmd);
+
+	phy_ethtool_ksettings_get(phydev, ecmd);
+
+	return 0;
 }
 
 static int xlr_set_link_ksettings(struct net_device *ndev,
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 51bea6593409..23d2e46dd322 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -872,8 +872,8 @@ void phy_start_machine(struct phy_device *phydev);
 void phy_stop_machine(struct phy_device *phydev);
 void phy_trigger_machine(struct phy_device *phydev, bool sync);
 int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd);
-int phy_ethtool_ksettings_get(struct phy_device *phydev,
-			      struct ethtool_link_ksettings *cmd);
+void phy_ethtool_ksettings_get(struct phy_device *phydev,
+			       struct ethtool_link_ksettings *cmd);
 int phy_ethtool_ksettings_set(struct phy_device *phydev,
 			      const struct ethtool_link_ksettings *cmd);
 int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd);
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 5f3caee725ee..5e45ae5c3f71 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -387,12 +387,13 @@ dsa_slave_get_link_ksettings(struct net_device *dev,
 			     struct ethtool_link_ksettings *cmd)
 {
 	struct dsa_slave_priv *p = netdev_priv(dev);
-	int err = -EOPNOTSUPP;
 
-	if (p->phy != NULL)
-		err = phy_ethtool_ksettings_get(p->phy, cmd);
+	if (!p->phy)
+		return -EOPNOTSUPP;
 
-	return err;
+	phy_ethtool_ksettings_get(p->phy, cmd);
+
+	return 0;
 }
 
 static int
-- 
cgit v1.2.3


From 8caab75fd2c2a92667cbb1cd315720bede3feaa9 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Tue, 13 Jun 2017 13:23:52 +0200
Subject: spi: Generalize SPI "master" to "controller"

Now struct spi_master is used for both SPI master and slave controllers,
it makes sense to rename it to struct spi_controller, and replace
"master" by "controller" where appropriate.

For now this conversion is done for SPI core infrastructure only.
Wrappers are provided for backwards compatibility, until all SPI drivers
have been converted.

Noteworthy details:
  - SPI_MASTER_GPIO_SS is retained, as it only makes sense for SPI
    master controllers,
  - spi_busnum_to_master() is retained, as it looks up masters only,
  - A new field spi_device.controller is added, but spi_device.master is
    retained for compatibility (both are always initialized by
    spi_alloc_device()),
  - spi_flash_read() is used by SPI masters only.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c          | 1081 ++++++++++++++++++++++----------------------
 include/linux/spi/spi.h    |  198 ++++----
 include/trace/events/spi.h |   26 +-
 3 files changed, 668 insertions(+), 637 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index c3f6b524b3ce..4fcbb0aa71d3 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -48,11 +48,11 @@ static void spidev_release(struct device *dev)
 {
 	struct spi_device	*spi = to_spi_device(dev);
 
-	/* spi masters may cleanup for released devices */
-	if (spi->master->cleanup)
-		spi->master->cleanup(spi);
+	/* spi controllers may cleanup for released devices */
+	if (spi->controller->cleanup)
+		spi->controller->cleanup(spi);
 
-	spi_master_put(spi->master);
+	spi_controller_put(spi->controller);
 	kfree(spi);
 }
 
@@ -71,17 +71,17 @@ modalias_show(struct device *dev, struct device_attribute *a, char *buf)
 static DEVICE_ATTR_RO(modalias);
 
 #define SPI_STATISTICS_ATTRS(field, file)				\
-static ssize_t spi_master_##field##_show(struct device *dev,		\
-					 struct device_attribute *attr,	\
-					 char *buf)			\
+static ssize_t spi_controller_##field##_show(struct device *dev,	\
+					     struct device_attribute *attr, \
+					     char *buf)			\
 {									\
-	struct spi_master *master = container_of(dev,			\
-						 struct spi_master, dev); \
-	return spi_statistics_##field##_show(&master->statistics, buf);	\
+	struct spi_controller *ctlr = container_of(dev,			\
+					 struct spi_controller, dev);	\
+	return spi_statistics_##field##_show(&ctlr->statistics, buf);	\
 }									\
-static struct device_attribute dev_attr_spi_master_##field = {		\
+static struct device_attribute dev_attr_spi_controller_##field = {	\
 	.attr = { .name = file, .mode = 0444 },				\
-	.show = spi_master_##field##_show,				\
+	.show = spi_controller_##field##_show,				\
 };									\
 static ssize_t spi_device_##field##_show(struct device *dev,		\
 					 struct device_attribute *attr,	\
@@ -201,51 +201,51 @@ static const struct attribute_group *spi_dev_groups[] = {
 	NULL,
 };
 
-static struct attribute *spi_master_statistics_attrs[] = {
-	&dev_attr_spi_master_messages.attr,
-	&dev_attr_spi_master_transfers.attr,
-	&dev_attr_spi_master_errors.attr,
-	&dev_attr_spi_master_timedout.attr,
-	&dev_attr_spi_master_spi_sync.attr,
-	&dev_attr_spi_master_spi_sync_immediate.attr,
-	&dev_attr_spi_master_spi_async.attr,
-	&dev_attr_spi_master_bytes.attr,
-	&dev_attr_spi_master_bytes_rx.attr,
-	&dev_attr_spi_master_bytes_tx.attr,
-	&dev_attr_spi_master_transfer_bytes_histo0.attr,
-	&dev_attr_spi_master_transfer_bytes_histo1.attr,
-	&dev_attr_spi_master_transfer_bytes_histo2.attr,
-	&dev_attr_spi_master_transfer_bytes_histo3.attr,
-	&dev_attr_spi_master_transfer_bytes_histo4.attr,
-	&dev_attr_spi_master_transfer_bytes_histo5.attr,
-	&dev_attr_spi_master_transfer_bytes_histo6.attr,
-	&dev_attr_spi_master_transfer_bytes_histo7.attr,
-	&dev_attr_spi_master_transfer_bytes_histo8.attr,
-	&dev_attr_spi_master_transfer_bytes_histo9.attr,
-	&dev_attr_spi_master_transfer_bytes_histo10.attr,
-	&dev_attr_spi_master_transfer_bytes_histo11.attr,
-	&dev_attr_spi_master_transfer_bytes_histo12.attr,
-	&dev_attr_spi_master_transfer_bytes_histo13.attr,
-	&dev_attr_spi_master_transfer_bytes_histo14.attr,
-	&dev_attr_spi_master_transfer_bytes_histo15.attr,
-	&dev_attr_spi_master_transfer_bytes_histo16.attr,
-	&dev_attr_spi_master_transfers_split_maxsize.attr,
+static struct attribute *spi_controller_statistics_attrs[] = {
+	&dev_attr_spi_controller_messages.attr,
+	&dev_attr_spi_controller_transfers.attr,
+	&dev_attr_spi_controller_errors.attr,
+	&dev_attr_spi_controller_timedout.attr,
+	&dev_attr_spi_controller_spi_sync.attr,
+	&dev_attr_spi_controller_spi_sync_immediate.attr,
+	&dev_attr_spi_controller_spi_async.attr,
+	&dev_attr_spi_controller_bytes.attr,
+	&dev_attr_spi_controller_bytes_rx.attr,
+	&dev_attr_spi_controller_bytes_tx.attr,
+	&dev_attr_spi_controller_transfer_bytes_histo0.attr,
+	&dev_attr_spi_controller_transfer_bytes_histo1.attr,
+	&dev_attr_spi_controller_transfer_bytes_histo2.attr,
+	&dev_attr_spi_controller_transfer_bytes_histo3.attr,
+	&dev_attr_spi_controller_transfer_bytes_histo4.attr,
+	&dev_attr_spi_controller_transfer_bytes_histo5.attr,
+	&dev_attr_spi_controller_transfer_bytes_histo6.attr,
+	&dev_attr_spi_controller_transfer_bytes_histo7.attr,
+	&dev_attr_spi_controller_transfer_bytes_histo8.attr,
+	&dev_attr_spi_controller_transfer_bytes_histo9.attr,
+	&dev_attr_spi_controller_transfer_bytes_histo10.attr,
+	&dev_attr_spi_controller_transfer_bytes_histo11.attr,
+	&dev_attr_spi_controller_transfer_bytes_histo12.attr,
+	&dev_attr_spi_controller_transfer_bytes_histo13.attr,
+	&dev_attr_spi_controller_transfer_bytes_histo14.attr,
+	&dev_attr_spi_controller_transfer_bytes_histo15.attr,
+	&dev_attr_spi_controller_transfer_bytes_histo16.attr,
+	&dev_attr_spi_controller_transfers_split_maxsize.attr,
 	NULL,
 };
 
-static const struct attribute_group spi_master_statistics_group = {
+static const struct attribute_group spi_controller_statistics_group = {
 	.name  = "statistics",
-	.attrs  = spi_master_statistics_attrs,
+	.attrs  = spi_controller_statistics_attrs,
 };
 
 static const struct attribute_group *spi_master_groups[] = {
-	&spi_master_statistics_group,
+	&spi_controller_statistics_group,
 	NULL,
 };
 
 void spi_statistics_add_transfer_stats(struct spi_statistics *stats,
 				       struct spi_transfer *xfer,
-				       struct spi_master *master)
+				       struct spi_controller *ctlr)
 {
 	unsigned long flags;
 	int l2len = min(fls(xfer->len), SPI_STATISTICS_HISTO_SIZE) - 1;
@@ -260,10 +260,10 @@ void spi_statistics_add_transfer_stats(struct spi_statistics *stats,
 
 	stats->bytes += xfer->len;
 	if ((xfer->tx_buf) &&
-	    (xfer->tx_buf != master->dummy_tx))
+	    (xfer->tx_buf != ctlr->dummy_tx))
 		stats->bytes_tx += xfer->len;
 	if ((xfer->rx_buf) &&
-	    (xfer->rx_buf != master->dummy_rx))
+	    (xfer->rx_buf != ctlr->dummy_rx))
 		stats->bytes_rx += xfer->len;
 
 	spin_unlock_irqrestore(&stats->lock, flags);
@@ -405,7 +405,7 @@ EXPORT_SYMBOL_GPL(__spi_register_driver);
 /*-------------------------------------------------------------------------*/
 
 /* SPI devices should normally not be created by SPI device drivers; that
- * would make them board-specific.  Similarly with SPI master drivers.
+ * would make them board-specific.  Similarly with SPI controller drivers.
  * Device registration normally goes into like arch/.../mach.../board-YYY.c
  * with other readonly (flashable) information about mainboard devices.
  */
@@ -416,17 +416,17 @@ struct boardinfo {
 };
 
 static LIST_HEAD(board_list);
-static LIST_HEAD(spi_master_list);
+static LIST_HEAD(spi_controller_list);
 
 /*
  * Used to protect add/del opertion for board_info list and
- * spi_master list, and their matching process
+ * spi_controller list, and their matching process
  */
 static DEFINE_MUTEX(board_lock);
 
 /**
  * spi_alloc_device - Allocate a new SPI device
- * @master: Controller to which device is connected
+ * @ctlr: Controller to which device is connected
  * Context: can sleep
  *
  * Allows a driver to allocate and initialize a spi_device without
@@ -435,27 +435,27 @@ static DEFINE_MUTEX(board_lock);
  * spi_add_device() on it.
  *
  * Caller is responsible to call spi_add_device() on the returned
- * spi_device structure to add it to the SPI master.  If the caller
+ * spi_device structure to add it to the SPI controller.  If the caller
  * needs to discard the spi_device without adding it, then it should
  * call spi_dev_put() on it.
  *
  * Return: a pointer to the new device, or NULL.
  */
-struct spi_device *spi_alloc_device(struct spi_master *master)
+struct spi_device *spi_alloc_device(struct spi_controller *ctlr)
 {
 	struct spi_device	*spi;
 
-	if (!spi_master_get(master))
+	if (!spi_controller_get(ctlr))
 		return NULL;
 
 	spi = kzalloc(sizeof(*spi), GFP_KERNEL);
 	if (!spi) {
-		spi_master_put(master);
+		spi_controller_put(ctlr);
 		return NULL;
 	}
 
-	spi->master = master;
-	spi->dev.parent = &master->dev;
+	spi->master = spi->controller = ctlr;
+	spi->dev.parent = &ctlr->dev;
 	spi->dev.bus = &spi_bus_type;
 	spi->dev.release = spidev_release;
 	spi->cs_gpio = -ENOENT;
@@ -476,7 +476,7 @@ static void spi_dev_set_name(struct spi_device *spi)
 		return;
 	}
 
-	dev_set_name(&spi->dev, "%s.%u", dev_name(&spi->master->dev),
+	dev_set_name(&spi->dev, "%s.%u", dev_name(&spi->controller->dev),
 		     spi->chip_select);
 }
 
@@ -485,7 +485,7 @@ static int spi_dev_check(struct device *dev, void *data)
 	struct spi_device *spi = to_spi_device(dev);
 	struct spi_device *new_spi = data;
 
-	if (spi->master == new_spi->master &&
+	if (spi->controller == new_spi->controller &&
 	    spi->chip_select == new_spi->chip_select)
 		return -EBUSY;
 	return 0;
@@ -503,15 +503,14 @@ static int spi_dev_check(struct device *dev, void *data)
 int spi_add_device(struct spi_device *spi)
 {
 	static DEFINE_MUTEX(spi_add_lock);
-	struct spi_master *master = spi->master;
-	struct device *dev = master->dev.parent;
+	struct spi_controller *ctlr = spi->controller;
+	struct device *dev = ctlr->dev.parent;
 	int status;
 
 	/* Chipselects are numbered 0..max; validate. */
-	if (spi->chip_select >= master->num_chipselect) {
-		dev_err(dev, "cs%d >= max %d\n",
-			spi->chip_select,
-			master->num_chipselect);
+	if (spi->chip_select >= ctlr->num_chipselect) {
+		dev_err(dev, "cs%d >= max %d\n", spi->chip_select,
+			ctlr->num_chipselect);
 		return -EINVAL;
 	}
 
@@ -531,8 +530,8 @@ int spi_add_device(struct spi_device *spi)
 		goto done;
 	}
 
-	if (master->cs_gpios)
-		spi->cs_gpio = master->cs_gpios[spi->chip_select];
+	if (ctlr->cs_gpios)
+		spi->cs_gpio = ctlr->cs_gpios[spi->chip_select];
 
 	/* Drivers may modify this initial i/o setup, but will
 	 * normally rely on the device being setup.  Devices
@@ -561,7 +560,7 @@ EXPORT_SYMBOL_GPL(spi_add_device);
 
 /**
  * spi_new_device - instantiate one new SPI device
- * @master: Controller to which device is connected
+ * @ctlr: Controller to which device is connected
  * @chip: Describes the SPI device
  * Context: can sleep
  *
@@ -573,7 +572,7 @@ EXPORT_SYMBOL_GPL(spi_add_device);
  *
  * Return: the new device, or NULL.
  */
-struct spi_device *spi_new_device(struct spi_master *master,
+struct spi_device *spi_new_device(struct spi_controller *ctlr,
 				  struct spi_board_info *chip)
 {
 	struct spi_device	*proxy;
@@ -586,7 +585,7 @@ struct spi_device *spi_new_device(struct spi_master *master,
 	 * suggests syslogged diagnostics are best here (ugh).
 	 */
 
-	proxy = spi_alloc_device(master);
+	proxy = spi_alloc_device(ctlr);
 	if (!proxy)
 		return NULL;
 
@@ -604,7 +603,7 @@ struct spi_device *spi_new_device(struct spi_master *master,
 	if (chip->properties) {
 		status = device_add_properties(&proxy->dev, chip->properties);
 		if (status) {
-			dev_err(&master->dev,
+			dev_err(&ctlr->dev,
 				"failed to add properties to '%s': %d\n",
 				chip->modalias, status);
 			goto err_dev_put;
@@ -631,7 +630,7 @@ EXPORT_SYMBOL_GPL(spi_new_device);
  * @spi: spi_device to unregister
  *
  * Start making the passed SPI device vanish. Normally this would be handled
- * by spi_unregister_master().
+ * by spi_unregister_controller().
  */
 void spi_unregister_device(struct spi_device *spi)
 {
@@ -648,17 +647,17 @@ void spi_unregister_device(struct spi_device *spi)
 }
 EXPORT_SYMBOL_GPL(spi_unregister_device);
 
-static void spi_match_master_to_boardinfo(struct spi_master *master,
-				struct spi_board_info *bi)
+static void spi_match_controller_to_boardinfo(struct spi_controller *ctlr,
+					      struct spi_board_info *bi)
 {
 	struct spi_device *dev;
 
-	if (master->bus_num != bi->bus_num)
+	if (ctlr->bus_num != bi->bus_num)
 		return;
 
-	dev = spi_new_device(master, bi);
+	dev = spi_new_device(ctlr, bi);
 	if (!dev)
-		dev_err(master->dev.parent, "can't create new device for %s\n",
+		dev_err(ctlr->dev.parent, "can't create new device for %s\n",
 			bi->modalias);
 }
 
@@ -697,7 +696,7 @@ int spi_register_board_info(struct spi_board_info const *info, unsigned n)
 		return -ENOMEM;
 
 	for (i = 0; i < n; i++, bi++, info++) {
-		struct spi_master *master;
+		struct spi_controller *ctlr;
 
 		memcpy(&bi->board_info, info, sizeof(*info));
 		if (info->properties) {
@@ -709,8 +708,9 @@ int spi_register_board_info(struct spi_board_info const *info, unsigned n)
 
 		mutex_lock(&board_lock);
 		list_add_tail(&bi->list, &board_list);
-		list_for_each_entry(master, &spi_master_list, list)
-			spi_match_master_to_boardinfo(master, &bi->board_info);
+		list_for_each_entry(ctlr, &spi_controller_list, list)
+			spi_match_controller_to_boardinfo(ctlr,
+							  &bi->board_info);
 		mutex_unlock(&board_lock);
 	}
 
@@ -727,16 +727,16 @@ static void spi_set_cs(struct spi_device *spi, bool enable)
 	if (gpio_is_valid(spi->cs_gpio)) {
 		gpio_set_value(spi->cs_gpio, !enable);
 		/* Some SPI masters need both GPIO CS & slave_select */
-		if ((spi->master->flags & SPI_MASTER_GPIO_SS) &&
-		    spi->master->set_cs)
-			spi->master->set_cs(spi, !enable);
-	} else if (spi->master->set_cs) {
-		spi->master->set_cs(spi, !enable);
+		if ((spi->controller->flags & SPI_MASTER_GPIO_SS) &&
+		    spi->controller->set_cs)
+			spi->controller->set_cs(spi, !enable);
+	} else if (spi->controller->set_cs) {
+		spi->controller->set_cs(spi, !enable);
 	}
 }
 
 #ifdef CONFIG_HAS_DMA
-static int spi_map_buf(struct spi_master *master, struct device *dev,
+static int spi_map_buf(struct spi_controller *ctlr, struct device *dev,
 		       struct sg_table *sgt, void *buf, size_t len,
 		       enum dma_data_direction dir)
 {
@@ -761,7 +761,7 @@ static int spi_map_buf(struct spi_master *master, struct device *dev,
 		desc_len = min_t(int, max_seg_size, PAGE_SIZE);
 		sgs = DIV_ROUND_UP(len + offset_in_page(buf), desc_len);
 	} else if (virt_addr_valid(buf)) {
-		desc_len = min_t(int, max_seg_size, master->max_dma_len);
+		desc_len = min_t(int, max_seg_size, ctlr->max_dma_len);
 		sgs = DIV_ROUND_UP(len, desc_len);
 	} else {
 		return -EINVAL;
@@ -811,7 +811,7 @@ static int spi_map_buf(struct spi_master *master, struct device *dev,
 	return 0;
 }
 
-static void spi_unmap_buf(struct spi_master *master, struct device *dev,
+static void spi_unmap_buf(struct spi_controller *ctlr, struct device *dev,
 			  struct sg_table *sgt, enum dma_data_direction dir)
 {
 	if (sgt->orig_nents) {
@@ -820,31 +820,31 @@ static void spi_unmap_buf(struct spi_master *master, struct device *dev,
 	}
 }
 
-static int __spi_map_msg(struct spi_master *master, struct spi_message *msg)
+static int __spi_map_msg(struct spi_controller *ctlr, struct spi_message *msg)
 {
 	struct device *tx_dev, *rx_dev;
 	struct spi_transfer *xfer;
 	int ret;
 
-	if (!master->can_dma)
+	if (!ctlr->can_dma)
 		return 0;
 
-	if (master->dma_tx)
-		tx_dev = master->dma_tx->device->dev;
+	if (ctlr->dma_tx)
+		tx_dev = ctlr->dma_tx->device->dev;
 	else
-		tx_dev = master->dev.parent;
+		tx_dev = ctlr->dev.parent;
 
-	if (master->dma_rx)
-		rx_dev = master->dma_rx->device->dev;
+	if (ctlr->dma_rx)
+		rx_dev = ctlr->dma_rx->device->dev;
 	else
-		rx_dev = master->dev.parent;
+		rx_dev = ctlr->dev.parent;
 
 	list_for_each_entry(xfer, &msg->transfers, transfer_list) {
-		if (!master->can_dma(master, msg->spi, xfer))
+		if (!ctlr->can_dma(ctlr, msg->spi, xfer))
 			continue;
 
 		if (xfer->tx_buf != NULL) {
-			ret = spi_map_buf(master, tx_dev, &xfer->tx_sg,
+			ret = spi_map_buf(ctlr, tx_dev, &xfer->tx_sg,
 					  (void *)xfer->tx_buf, xfer->len,
 					  DMA_TO_DEVICE);
 			if (ret != 0)
@@ -852,79 +852,78 @@ static int __spi_map_msg(struct spi_master *master, struct spi_message *msg)
 		}
 
 		if (xfer->rx_buf != NULL) {
-			ret = spi_map_buf(master, rx_dev, &xfer->rx_sg,
+			ret = spi_map_buf(ctlr, rx_dev, &xfer->rx_sg,
 					  xfer->rx_buf, xfer->len,
 					  DMA_FROM_DEVICE);
 			if (ret != 0) {
-				spi_unmap_buf(master, tx_dev, &xfer->tx_sg,
+				spi_unmap_buf(ctlr, tx_dev, &xfer->tx_sg,
 					      DMA_TO_DEVICE);
 				return ret;
 			}
 		}
 	}
 
-	master->cur_msg_mapped = true;
+	ctlr->cur_msg_mapped = true;
 
 	return 0;
 }
 
-static int __spi_unmap_msg(struct spi_master *master, struct spi_message *msg)
+static int __spi_unmap_msg(struct spi_controller *ctlr, struct spi_message *msg)
 {
 	struct spi_transfer *xfer;
 	struct device *tx_dev, *rx_dev;
 
-	if (!master->cur_msg_mapped || !master->can_dma)
+	if (!ctlr->cur_msg_mapped || !ctlr->can_dma)
 		return 0;
 
-	if (master->dma_tx)
-		tx_dev = master->dma_tx->device->dev;
+	if (ctlr->dma_tx)
+		tx_dev = ctlr->dma_tx->device->dev;
 	else
-		tx_dev = master->dev.parent;
+		tx_dev = ctlr->dev.parent;
 
-	if (master->dma_rx)
-		rx_dev = master->dma_rx->device->dev;
+	if (ctlr->dma_rx)
+		rx_dev = ctlr->dma_rx->device->dev;
 	else
-		rx_dev = master->dev.parent;
+		rx_dev = ctlr->dev.parent;
 
 	list_for_each_entry(xfer, &msg->transfers, transfer_list) {
-		if (!master->can_dma(master, msg->spi, xfer))
+		if (!ctlr->can_dma(ctlr, msg->spi, xfer))
 			continue;
 
-		spi_unmap_buf(master, rx_dev, &xfer->rx_sg, DMA_FROM_DEVICE);
-		spi_unmap_buf(master, tx_dev, &xfer->tx_sg, DMA_TO_DEVICE);
+		spi_unmap_buf(ctlr, rx_dev, &xfer->rx_sg, DMA_FROM_DEVICE);
+		spi_unmap_buf(ctlr, tx_dev, &xfer->tx_sg, DMA_TO_DEVICE);
 	}
 
 	return 0;
 }
 #else /* !CONFIG_HAS_DMA */
-static inline int spi_map_buf(struct spi_master *master,
-			      struct device *dev, struct sg_table *sgt,
-			      void *buf, size_t len,
+static inline int spi_map_buf(struct spi_controller *ctlr, struct device *dev,
+			      struct sg_table *sgt, void *buf, size_t len,
 			      enum dma_data_direction dir)
 {
 	return -EINVAL;
 }
 
-static inline void spi_unmap_buf(struct spi_master *master,
+static inline void spi_unmap_buf(struct spi_controller *ctlr,
 				 struct device *dev, struct sg_table *sgt,
 				 enum dma_data_direction dir)
 {
 }
 
-static inline int __spi_map_msg(struct spi_master *master,
+static inline int __spi_map_msg(struct spi_controller *ctlr,
 				struct spi_message *msg)
 {
 	return 0;
 }
 
-static inline int __spi_unmap_msg(struct spi_master *master,
+static inline int __spi_unmap_msg(struct spi_controller *ctlr,
 				  struct spi_message *msg)
 {
 	return 0;
 }
 #endif /* !CONFIG_HAS_DMA */
 
-static inline int spi_unmap_msg(struct spi_master *master,
+static inline int spi_unmap_msg(struct spi_controller *ctlr,
 				struct spi_message *msg)
 {
 	struct spi_transfer *xfer;
@@ -934,63 +933,63 @@ static inline int spi_unmap_msg(struct spi_master *master,
 		 * Restore the original value of tx_buf or rx_buf if they are
 		 * NULL.
 		 */
-		if (xfer->tx_buf == master->dummy_tx)
+		if (xfer->tx_buf == ctlr->dummy_tx)
 			xfer->tx_buf = NULL;
-		if (xfer->rx_buf == master->dummy_rx)
+		if (xfer->rx_buf == ctlr->dummy_rx)
 			xfer->rx_buf = NULL;
 	}
 
-	return __spi_unmap_msg(master, msg);
+	return __spi_unmap_msg(ctlr, msg);
 }
 
-static int spi_map_msg(struct spi_master *master, struct spi_message *msg)
+static int spi_map_msg(struct spi_controller *ctlr, struct spi_message *msg)
 {
 	struct spi_transfer *xfer;
 	void *tmp;
 	unsigned int max_tx, max_rx;
 
-	if (master->flags & (SPI_MASTER_MUST_RX | SPI_MASTER_MUST_TX)) {
+	if (ctlr->flags & (SPI_CONTROLLER_MUST_RX | SPI_CONTROLLER_MUST_TX)) {
 		max_tx = 0;
 		max_rx = 0;
 
 		list_for_each_entry(xfer, &msg->transfers, transfer_list) {
-			if ((master->flags & SPI_MASTER_MUST_TX) &&
+			if ((ctlr->flags & SPI_CONTROLLER_MUST_TX) &&
 			    !xfer->tx_buf)
 				max_tx = max(xfer->len, max_tx);
-			if ((master->flags & SPI_MASTER_MUST_RX) &&
+			if ((ctlr->flags & SPI_CONTROLLER_MUST_RX) &&
 			    !xfer->rx_buf)
 				max_rx = max(xfer->len, max_rx);
 		}
 
 		if (max_tx) {
-			tmp = krealloc(master->dummy_tx, max_tx,
+			tmp = krealloc(ctlr->dummy_tx, max_tx,
 				       GFP_KERNEL | GFP_DMA);
 			if (!tmp)
 				return -ENOMEM;
-			master->dummy_tx = tmp;
+			ctlr->dummy_tx = tmp;
 			memset(tmp, 0, max_tx);
 		}
 
 		if (max_rx) {
-			tmp = krealloc(master->dummy_rx, max_rx,
+			tmp = krealloc(ctlr->dummy_rx, max_rx,
 				       GFP_KERNEL | GFP_DMA);
 			if (!tmp)
 				return -ENOMEM;
-			master->dummy_rx = tmp;
+			ctlr->dummy_rx = tmp;
 		}
 
 		if (max_tx || max_rx) {
 			list_for_each_entry(xfer, &msg->transfers,
 					    transfer_list) {
 				if (!xfer->tx_buf)
-					xfer->tx_buf = master->dummy_tx;
+					xfer->tx_buf = ctlr->dummy_tx;
 				if (!xfer->rx_buf)
-					xfer->rx_buf = master->dummy_rx;
+					xfer->rx_buf = ctlr->dummy_rx;
 			}
 		}
 	}
 
-	return __spi_map_msg(master, msg);
+	return __spi_map_msg(ctlr, msg);
 }
 
 /*
@@ -1000,14 +999,14 @@ static int spi_map_msg(struct spi_master *master, struct spi_message *msg)
  * drivers which implement a transfer_one() operation.  It provides
  * standard handling of delays and chip select management.
  */
-static int spi_transfer_one_message(struct spi_master *master,
+static int spi_transfer_one_message(struct spi_controller *ctlr,
 				    struct spi_message *msg)
 {
 	struct spi_transfer *xfer;
 	bool keep_cs = false;
 	int ret = 0;
 	unsigned long long ms = 1;
-	struct spi_statistics *statm = &master->statistics;
+	struct spi_statistics *statm = &ctlr->statistics;
 	struct spi_statistics *stats = &msg->spi->statistics;
 
 	spi_set_cs(msg->spi, true);
@@ -1018,13 +1017,13 @@ static int spi_transfer_one_message(struct spi_master *master,
 	list_for_each_entry(xfer, &msg->transfers, transfer_list) {
 		trace_spi_transfer_start(msg, xfer);
 
-		spi_statistics_add_transfer_stats(statm, xfer, master);
-		spi_statistics_add_transfer_stats(stats, xfer, master);
+		spi_statistics_add_transfer_stats(statm, xfer, ctlr);
+		spi_statistics_add_transfer_stats(stats, xfer, ctlr);
 
 		if (xfer->tx_buf || xfer->rx_buf) {
-			reinit_completion(&master->xfer_completion);
+			reinit_completion(&ctlr->xfer_completion);
 
-			ret = master->transfer_one(master, msg->spi, xfer);
+			ret = ctlr->transfer_one(ctlr, msg->spi, xfer);
 			if (ret < 0) {
 				SPI_STATISTICS_INCREMENT_FIELD(statm,
 							       errors);
@@ -1044,7 +1043,7 @@ static int spi_transfer_one_message(struct spi_master *master,
 				if (ms > UINT_MAX)
 					ms = UINT_MAX;
 
-				ms = wait_for_completion_timeout(&master->xfer_completion,
+				ms = wait_for_completion_timeout(&ctlr->xfer_completion,
 								 msecs_to_jiffies(ms));
 			}
 
@@ -1099,33 +1098,33 @@ out:
 	if (msg->status == -EINPROGRESS)
 		msg->status = ret;
 
-	if (msg->status && master->handle_err)
-		master->handle_err(master, msg);
+	if (msg->status && ctlr->handle_err)
+		ctlr->handle_err(ctlr, msg);
 
-	spi_res_release(master, msg);
+	spi_res_release(ctlr, msg);
 
-	spi_finalize_current_message(master);
+	spi_finalize_current_message(ctlr);
 
 	return ret;
 }
 
 /**
  * spi_finalize_current_transfer - report completion of a transfer
- * @master: the master reporting completion
+ * @ctlr: the controller reporting completion
  *
  * Called by SPI drivers using the core transfer_one_message()
  * implementation to notify it that the current interrupt driven
  * transfer has finished and the next one may be scheduled.
  */
-void spi_finalize_current_transfer(struct spi_master *master)
+void spi_finalize_current_transfer(struct spi_controller *ctlr)
 {
-	complete(&master->xfer_completion);
+	complete(&ctlr->xfer_completion);
 }
 EXPORT_SYMBOL_GPL(spi_finalize_current_transfer);
 
 /**
  * __spi_pump_messages - function which processes spi message queue
- * @master: master to process queue for
+ * @ctlr: controller to process queue for
  * @in_kthread: true if we are in the context of the message pump thread
  *
  * This function checks if there is any spi message in the queue that
@@ -1136,136 +1135,136 @@ EXPORT_SYMBOL_GPL(spi_finalize_current_transfer);
  * inside spi_sync(); the queue extraction handling at the top of the
  * function should deal with this safely.
  */
-static void __spi_pump_messages(struct spi_master *master, bool in_kthread)
+static void __spi_pump_messages(struct spi_controller *ctlr, bool in_kthread)
 {
 	unsigned long flags;
 	bool was_busy = false;
 	int ret;
 
 	/* Lock queue */
-	spin_lock_irqsave(&master->queue_lock, flags);
+	spin_lock_irqsave(&ctlr->queue_lock, flags);
 
 	/* Make sure we are not already running a message */
-	if (master->cur_msg) {
-		spin_unlock_irqrestore(&master->queue_lock, flags);
+	if (ctlr->cur_msg) {
+		spin_unlock_irqrestore(&ctlr->queue_lock, flags);
 		return;
 	}
 
 	/* If another context is idling the device then defer */
-	if (master->idling) {
-		kthread_queue_work(&master->kworker, &master->pump_messages);
-		spin_unlock_irqrestore(&master->queue_lock, flags);
+	if (ctlr->idling) {
+		kthread_queue_work(&ctlr->kworker, &ctlr->pump_messages);
+		spin_unlock_irqrestore(&ctlr->queue_lock, flags);
 		return;
 	}
 
 	/* Check if the queue is idle */
-	if (list_empty(&master->queue) || !master->running) {
-		if (!master->busy) {
-			spin_unlock_irqrestore(&master->queue_lock, flags);
+	if (list_empty(&ctlr->queue) || !ctlr->running) {
+		if (!ctlr->busy) {
+			spin_unlock_irqrestore(&ctlr->queue_lock, flags);
 			return;
 		}
 
 		/* Only do teardown in the thread */
 		if (!in_kthread) {
-			kthread_queue_work(&master->kworker,
-					   &master->pump_messages);
-			spin_unlock_irqrestore(&master->queue_lock, flags);
+			kthread_queue_work(&ctlr->kworker,
+					   &ctlr->pump_messages);
+			spin_unlock_irqrestore(&ctlr->queue_lock, flags);
 			return;
 		}
 
-		master->busy = false;
-		master->idling = true;
-		spin_unlock_irqrestore(&master->queue_lock, flags);
-
-		kfree(master->dummy_rx);
-		master->dummy_rx = NULL;
-		kfree(master->dummy_tx);
-		master->dummy_tx = NULL;
-		if (master->unprepare_transfer_hardware &&
-		    master->unprepare_transfer_hardware(master))
-			dev_err(&master->dev,
+		ctlr->busy = false;
+		ctlr->idling = true;
+		spin_unlock_irqrestore(&ctlr->queue_lock, flags);
+
+		kfree(ctlr->dummy_rx);
+		ctlr->dummy_rx = NULL;
+		kfree(ctlr->dummy_tx);
+		ctlr->dummy_tx = NULL;
+		if (ctlr->unprepare_transfer_hardware &&
+		    ctlr->unprepare_transfer_hardware(ctlr))
+			dev_err(&ctlr->dev,
 				"failed to unprepare transfer hardware\n");
-		if (master->auto_runtime_pm) {
-			pm_runtime_mark_last_busy(master->dev.parent);
-			pm_runtime_put_autosuspend(master->dev.parent);
+		if (ctlr->auto_runtime_pm) {
+			pm_runtime_mark_last_busy(ctlr->dev.parent);
+			pm_runtime_put_autosuspend(ctlr->dev.parent);
 		}
-		trace_spi_master_idle(master);
+		trace_spi_controller_idle(ctlr);
 
-		spin_lock_irqsave(&master->queue_lock, flags);
-		master->idling = false;
-		spin_unlock_irqrestore(&master->queue_lock, flags);
+		spin_lock_irqsave(&ctlr->queue_lock, flags);
+		ctlr->idling = false;
+		spin_unlock_irqrestore(&ctlr->queue_lock, flags);
 		return;
 	}
 
 	/* Extract head of queue */
-	master->cur_msg =
-		list_first_entry(&master->queue, struct spi_message, queue);
+	ctlr->cur_msg =
+		list_first_entry(&ctlr->queue, struct spi_message, queue);
 
-	list_del_init(&master->cur_msg->queue);
-	if (master->busy)
+	list_del_init(&ctlr->cur_msg->queue);
+	if (ctlr->busy)
 		was_busy = true;
 	else
-		master->busy = true;
-	spin_unlock_irqrestore(&master->queue_lock, flags);
+		ctlr->busy = true;
+	spin_unlock_irqrestore(&ctlr->queue_lock, flags);
 
-	mutex_lock(&master->io_mutex);
+	mutex_lock(&ctlr->io_mutex);
 
-	if (!was_busy && master->auto_runtime_pm) {
-		ret = pm_runtime_get_sync(master->dev.parent);
+	if (!was_busy && ctlr->auto_runtime_pm) {
+		ret = pm_runtime_get_sync(ctlr->dev.parent);
 		if (ret < 0) {
-			dev_err(&master->dev, "Failed to power device: %d\n",
+			dev_err(&ctlr->dev, "Failed to power device: %d\n",
 				ret);
-			mutex_unlock(&master->io_mutex);
+			mutex_unlock(&ctlr->io_mutex);
 			return;
 		}
 	}
 
 	if (!was_busy)
-		trace_spi_master_busy(master);
+		trace_spi_controller_busy(ctlr);
 
-	if (!was_busy && master->prepare_transfer_hardware) {
-		ret = master->prepare_transfer_hardware(master);
+	if (!was_busy && ctlr->prepare_transfer_hardware) {
+		ret = ctlr->prepare_transfer_hardware(ctlr);
 		if (ret) {
-			dev_err(&master->dev,
+			dev_err(&ctlr->dev,
 				"failed to prepare transfer hardware\n");
 
-			if (master->auto_runtime_pm)
-				pm_runtime_put(master->dev.parent);
-			mutex_unlock(&master->io_mutex);
+			if (ctlr->auto_runtime_pm)
+				pm_runtime_put(ctlr->dev.parent);
+			mutex_unlock(&ctlr->io_mutex);
 			return;
 		}
 	}
 
-	trace_spi_message_start(master->cur_msg);
+	trace_spi_message_start(ctlr->cur_msg);
 
-	if (master->prepare_message) {
-		ret = master->prepare_message(master, master->cur_msg);
+	if (ctlr->prepare_message) {
+		ret = ctlr->prepare_message(ctlr, ctlr->cur_msg);
 		if (ret) {
-			dev_err(&master->dev,
-				"failed to prepare message: %d\n", ret);
-			master->cur_msg->status = ret;
-			spi_finalize_current_message(master);
+			dev_err(&ctlr->dev, "failed to prepare message: %d\n",
+				ret);
+			ctlr->cur_msg->status = ret;
+			spi_finalize_current_message(ctlr);
 			goto out;
 		}
-		master->cur_msg_prepared = true;
+		ctlr->cur_msg_prepared = true;
 	}
 
-	ret = spi_map_msg(master, master->cur_msg);
+	ret = spi_map_msg(ctlr, ctlr->cur_msg);
 	if (ret) {
-		master->cur_msg->status = ret;
-		spi_finalize_current_message(master);
+		ctlr->cur_msg->status = ret;
+		spi_finalize_current_message(ctlr);
 		goto out;
 	}
 
-	ret = master->transfer_one_message(master, master->cur_msg);
+	ret = ctlr->transfer_one_message(ctlr, ctlr->cur_msg);
 	if (ret) {
-		dev_err(&master->dev,
+		dev_err(&ctlr->dev,
 			"failed to transfer one message from queue\n");
 		goto out;
 	}
 
 out:
-	mutex_unlock(&master->io_mutex);
+	mutex_unlock(&ctlr->io_mutex);
 
 	/* Prod the scheduler in case transfer_one() was busy waiting */
 	if (!ret)
@@ -1274,44 +1273,43 @@ out:
 
 /**
  * spi_pump_messages - kthread work function which processes spi message queue
- * @work: pointer to kthread work struct contained in the master struct
+ * @work: pointer to kthread work struct contained in the controller struct
  */
 static void spi_pump_messages(struct kthread_work *work)
 {
-	struct spi_master *master =
-		container_of(work, struct spi_master, pump_messages);
+	struct spi_controller *ctlr =
+		container_of(work, struct spi_controller, pump_messages);
 
-	__spi_pump_messages(master, true);
+	__spi_pump_messages(ctlr, true);
 }
 
-static int spi_init_queue(struct spi_master *master)
+static int spi_init_queue(struct spi_controller *ctlr)
 {
 	struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
 
-	master->running = false;
-	master->busy = false;
+	ctlr->running = false;
+	ctlr->busy = false;
 
-	kthread_init_worker(&master->kworker);
-	master->kworker_task = kthread_run(kthread_worker_fn,
-					   &master->kworker, "%s",
-					   dev_name(&master->dev));
-	if (IS_ERR(master->kworker_task)) {
-		dev_err(&master->dev, "failed to create message pump task\n");
-		return PTR_ERR(master->kworker_task);
+	kthread_init_worker(&ctlr->kworker);
+	ctlr->kworker_task = kthread_run(kthread_worker_fn, &ctlr->kworker,
+					 "%s", dev_name(&ctlr->dev));
+	if (IS_ERR(ctlr->kworker_task)) {
+		dev_err(&ctlr->dev, "failed to create message pump task\n");
+		return PTR_ERR(ctlr->kworker_task);
 	}
-	kthread_init_work(&master->pump_messages, spi_pump_messages);
+	kthread_init_work(&ctlr->pump_messages, spi_pump_messages);
 
 	/*
-	 * Master config will indicate if this controller should run the
+	 * Controller config will indicate if this controller should run the
 	 * message pump with high (realtime) priority to reduce the transfer
 	 * latency on the bus by minimising the delay between a transfer
 	 * request and the scheduling of the message pump thread. Without this
 	 * setting the message pump thread will remain at default priority.
 	 */
-	if (master->rt) {
-		dev_info(&master->dev,
+	if (ctlr->rt) {
+		dev_info(&ctlr->dev,
 			"will run message pump with realtime priority\n");
-		sched_setscheduler(master->kworker_task, SCHED_FIFO, &param);
+		sched_setscheduler(ctlr->kworker_task, SCHED_FIFO, &param);
 	}
 
 	return 0;
@@ -1320,23 +1318,23 @@ static int spi_init_queue(struct spi_master *master)
 /**
  * spi_get_next_queued_message() - called by driver to check for queued
  * messages
- * @master: the master to check for queued messages
+ * @ctlr: the controller to check for queued messages
  *
  * If there are more messages in the queue, the next message is returned from
  * this call.
  *
  * Return: the next message in the queue, else NULL if the queue is empty.
  */
-struct spi_message *spi_get_next_queued_message(struct spi_master *master)
+struct spi_message *spi_get_next_queued_message(struct spi_controller *ctlr)
 {
 	struct spi_message *next;
 	unsigned long flags;
 
 	/* get a pointer to the next message, if any */
-	spin_lock_irqsave(&master->queue_lock, flags);
-	next = list_first_entry_or_null(&master->queue, struct spi_message,
+	spin_lock_irqsave(&ctlr->queue_lock, flags);
+	next = list_first_entry_or_null(&ctlr->queue, struct spi_message,
 					queue);
-	spin_unlock_irqrestore(&master->queue_lock, flags);
+	spin_unlock_irqrestore(&ctlr->queue_lock, flags);
 
 	return next;
 }
@@ -1344,36 +1342,36 @@ EXPORT_SYMBOL_GPL(spi_get_next_queued_message);
 
 /**
  * spi_finalize_current_message() - the current message is complete
- * @master: the master to return the message to
+ * @ctlr: the controller to return the message to
  *
  * Called by the driver to notify the core that the message in the front of the
  * queue is complete and can be removed from the queue.
  */
-void spi_finalize_current_message(struct spi_master *master)
+void spi_finalize_current_message(struct spi_controller *ctlr)
 {
 	struct spi_message *mesg;
 	unsigned long flags;
 	int ret;
 
-	spin_lock_irqsave(&master->queue_lock, flags);
-	mesg = master->cur_msg;
-	spin_unlock_irqrestore(&master->queue_lock, flags);
+	spin_lock_irqsave(&ctlr->queue_lock, flags);
+	mesg = ctlr->cur_msg;
+	spin_unlock_irqrestore(&ctlr->queue_lock, flags);
 
-	spi_unmap_msg(master, mesg);
+	spi_unmap_msg(ctlr, mesg);
 
-	if (master->cur_msg_prepared && master->unprepare_message) {
-		ret = master->unprepare_message(master, mesg);
+	if (ctlr->cur_msg_prepared && ctlr->unprepare_message) {
+		ret = ctlr->unprepare_message(ctlr, mesg);
 		if (ret) {
-			dev_err(&master->dev,
-				"failed to unprepare message: %d\n", ret);
+			dev_err(&ctlr->dev, "failed to unprepare message: %d\n",
+				ret);
 		}
 	}
 
-	spin_lock_irqsave(&master->queue_lock, flags);
-	master->cur_msg = NULL;
-	master->cur_msg_prepared = false;
-	kthread_queue_work(&master->kworker, &master->pump_messages);
-	spin_unlock_irqrestore(&master->queue_lock, flags);
+	spin_lock_irqsave(&ctlr->queue_lock, flags);
+	ctlr->cur_msg = NULL;
+	ctlr->cur_msg_prepared = false;
+	kthread_queue_work(&ctlr->kworker, &ctlr->pump_messages);
+	spin_unlock_irqrestore(&ctlr->queue_lock, flags);
 
 	trace_spi_message_done(mesg);
 
@@ -1383,66 +1381,65 @@ void spi_finalize_current_message(struct spi_master *master)
 }
 EXPORT_SYMBOL_GPL(spi_finalize_current_message);
 
-static int spi_start_queue(struct spi_master *master)
+static int spi_start_queue(struct spi_controller *ctlr)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&master->queue_lock, flags);
+	spin_lock_irqsave(&ctlr->queue_lock, flags);
 
-	if (master->running || master->busy) {
-		spin_unlock_irqrestore(&master->queue_lock, flags);
+	if (ctlr->running || ctlr->busy) {
+		spin_unlock_irqrestore(&ctlr->queue_lock, flags);
 		return -EBUSY;
 	}
 
-	master->running = true;
-	master->cur_msg = NULL;
-	spin_unlock_irqrestore(&master->queue_lock, flags);
+	ctlr->running = true;
+	ctlr->cur_msg = NULL;
+	spin_unlock_irqrestore(&ctlr->queue_lock, flags);
 
-	kthread_queue_work(&master->kworker, &master->pump_messages);
+	kthread_queue_work(&ctlr->kworker, &ctlr->pump_messages);
 
 	return 0;
 }
 
-static int spi_stop_queue(struct spi_master *master)
+static int spi_stop_queue(struct spi_controller *ctlr)
 {
 	unsigned long flags;
 	unsigned limit = 500;
 	int ret = 0;
 
-	spin_lock_irqsave(&master->queue_lock, flags);
+	spin_lock_irqsave(&ctlr->queue_lock, flags);
 
 	/*
 	 * This is a bit lame, but is optimized for the common execution path.
-	 * A wait_queue on the master->busy could be used, but then the common
+	 * A wait_queue on the ctlr->busy could be used, but then the common
 	 * execution path (pump_messages) would be required to call wake_up or
 	 * friends on every SPI message. Do this instead.
 	 */
-	while ((!list_empty(&master->queue) || master->busy) && limit--) {
-		spin_unlock_irqrestore(&master->queue_lock, flags);
+	while ((!list_empty(&ctlr->queue) || ctlr->busy) && limit--) {
+		spin_unlock_irqrestore(&ctlr->queue_lock, flags);
 		usleep_range(10000, 11000);
-		spin_lock_irqsave(&master->queue_lock, flags);
+		spin_lock_irqsave(&ctlr->queue_lock, flags);
 	}
 
-	if (!list_empty(&master->queue) || master->busy)
+	if (!list_empty(&ctlr->queue) || ctlr->busy)
 		ret = -EBUSY;
 	else
-		master->running = false;
+		ctlr->running = false;
 
-	spin_unlock_irqrestore(&master->queue_lock, flags);
+	spin_unlock_irqrestore(&ctlr->queue_lock, flags);
 
 	if (ret) {
-		dev_warn(&master->dev,
-			 "could not stop message queue\n");
+		dev_warn(&ctlr->dev, "could not stop message queue\n");
 		return ret;
 	}
 	return ret;
 }
 
-static int spi_destroy_queue(struct spi_master *master)
+static int spi_destroy_queue(struct spi_controller *ctlr)
 {
 	int ret;
 
-	ret = spi_stop_queue(master);
+	ret = spi_stop_queue(ctlr);
 
 	/*
 	 * kthread_flush_worker will block until all work is done.
@@ -1451,12 +1448,12 @@ static int spi_destroy_queue(struct spi_master *master)
 	 * return anyway.
 	 */
 	if (ret) {
-		dev_err(&master->dev, "problem destroying queue\n");
+		dev_err(&ctlr->dev, "problem destroying queue\n");
 		return ret;
 	}
 
-	kthread_flush_worker(&master->kworker);
-	kthread_stop(master->kworker_task);
+	kthread_flush_worker(&ctlr->kworker);
+	kthread_stop(ctlr->kworker_task);
 
 	return 0;
 }
@@ -1465,23 +1462,23 @@ static int __spi_queued_transfer(struct spi_device *spi,
 				 struct spi_message *msg,
 				 bool need_pump)
 {
-	struct spi_master *master = spi->master;
+	struct spi_controller *ctlr = spi->controller;
 	unsigned long flags;
 
-	spin_lock_irqsave(&master->queue_lock, flags);
+	spin_lock_irqsave(&ctlr->queue_lock, flags);
 
-	if (!master->running) {
-		spin_unlock_irqrestore(&master->queue_lock, flags);
+	if (!ctlr->running) {
+		spin_unlock_irqrestore(&ctlr->queue_lock, flags);
 		return -ESHUTDOWN;
 	}
 	msg->actual_length = 0;
 	msg->status = -EINPROGRESS;
 
-	list_add_tail(&msg->queue, &master->queue);
-	if (!master->busy && need_pump)
-		kthread_queue_work(&master->kworker, &master->pump_messages);
+	list_add_tail(&msg->queue, &ctlr->queue);
+	if (!ctlr->busy && need_pump)
+		kthread_queue_work(&ctlr->kworker, &ctlr->pump_messages);
 
-	spin_unlock_irqrestore(&master->queue_lock, flags);
+	spin_unlock_irqrestore(&ctlr->queue_lock, flags);
 	return 0;
 }
 
@@ -1497,31 +1494,31 @@ static int spi_queued_transfer(struct spi_device *spi, struct spi_message *msg)
 	return __spi_queued_transfer(spi, msg, true);
 }
 
-static int spi_master_initialize_queue(struct spi_master *master)
+static int spi_controller_initialize_queue(struct spi_controller *ctlr)
 {
 	int ret;
 
-	master->transfer = spi_queued_transfer;
-	if (!master->transfer_one_message)
-		master->transfer_one_message = spi_transfer_one_message;
+	ctlr->transfer = spi_queued_transfer;
+	if (!ctlr->transfer_one_message)
+		ctlr->transfer_one_message = spi_transfer_one_message;
 
 	/* Initialize and start queue */
-	ret = spi_init_queue(master);
+	ret = spi_init_queue(ctlr);
 	if (ret) {
-		dev_err(&master->dev, "problem initializing queue\n");
+		dev_err(&ctlr->dev, "problem initializing queue\n");
 		goto err_init_queue;
 	}
-	master->queued = true;
-	ret = spi_start_queue(master);
+	ctlr->queued = true;
+	ret = spi_start_queue(ctlr);
 	if (ret) {
-		dev_err(&master->dev, "problem starting queue\n");
+		dev_err(&ctlr->dev, "problem starting queue\n");
 		goto err_start_queue;
 	}
 
 	return 0;
 
 err_start_queue:
-	spi_destroy_queue(master);
+	spi_destroy_queue(ctlr);
 err_init_queue:
 	return ret;
 }
@@ -1529,7 +1526,7 @@ err_init_queue:
 /*-------------------------------------------------------------------------*/
 
 #if defined(CONFIG_OF)
-static int of_spi_parse_dt(struct spi_master *master, struct spi_device *spi,
+static int of_spi_parse_dt(struct spi_controller *ctlr, struct spi_device *spi,
 			   struct device_node *nc)
 {
 	u32 value;
@@ -1559,7 +1556,7 @@ static int of_spi_parse_dt(struct spi_master *master, struct spi_device *spi,
 			spi->mode |= SPI_TX_QUAD;
 			break;
 		default:
-			dev_warn(&master->dev,
+			dev_warn(&ctlr->dev,
 				"spi-tx-bus-width %d not supported\n",
 				value);
 			break;
@@ -1577,16 +1574,16 @@ static int of_spi_parse_dt(struct spi_master *master, struct spi_device *spi,
 			spi->mode |= SPI_RX_QUAD;
 			break;
 		default:
-			dev_warn(&master->dev,
+			dev_warn(&ctlr->dev,
 				"spi-rx-bus-width %d not supported\n",
 				value);
 			break;
 		}
 	}
 
-	if (spi_controller_is_slave(master)) {
+	if (spi_controller_is_slave(ctlr)) {
 		if (strcmp(nc->name, "slave")) {
-			dev_err(&master->dev, "%s is not called 'slave'\n",
+			dev_err(&ctlr->dev, "%s is not called 'slave'\n",
 				nc->full_name);
 			return -EINVAL;
 		}
@@ -1596,7 +1593,7 @@ static int of_spi_parse_dt(struct spi_master *master, struct spi_device *spi,
 	/* Device address */
 	rc = of_property_read_u32(nc, "reg", &value);
 	if (rc) {
-		dev_err(&master->dev, "%s has no valid 'reg' property (%d)\n",
+		dev_err(&ctlr->dev, "%s has no valid 'reg' property (%d)\n",
 			nc->full_name, rc);
 		return rc;
 	}
@@ -1605,7 +1602,8 @@ static int of_spi_parse_dt(struct spi_master *master, struct spi_device *spi,
 	/* Device speed */
 	rc = of_property_read_u32(nc, "spi-max-frequency", &value);
 	if (rc) {
-		dev_err(&master->dev, "%s has no valid 'spi-max-frequency' property (%d)\n",
+		dev_err(&ctlr->dev,
+			"%s has no valid 'spi-max-frequency' property (%d)\n",
 			nc->full_name, rc);
 		return rc;
 	}
@@ -1615,15 +1613,15 @@ static int of_spi_parse_dt(struct spi_master *master, struct spi_device *spi,
 }
 
 static struct spi_device *
-of_register_spi_device(struct spi_master *master, struct device_node *nc)
+of_register_spi_device(struct spi_controller *ctlr, struct device_node *nc)
 {
 	struct spi_device *spi;
 	int rc;
 
 	/* Alloc an spi_device */
-	spi = spi_alloc_device(master);
+	spi = spi_alloc_device(ctlr);
 	if (!spi) {
-		dev_err(&master->dev, "spi_device alloc error for %s\n",
+		dev_err(&ctlr->dev, "spi_device alloc error for %s\n",
 			nc->full_name);
 		rc = -ENOMEM;
 		goto err_out;
@@ -1633,12 +1631,12 @@ of_register_spi_device(struct spi_master *master, struct device_node *nc)
 	rc = of_modalias_node(nc, spi->modalias,
 				sizeof(spi->modalias));
 	if (rc < 0) {
-		dev_err(&master->dev, "cannot find modalias for %s\n",
+		dev_err(&ctlr->dev, "cannot find modalias for %s\n",
 			nc->full_name);
 		goto err_out;
 	}
 
-	rc = of_spi_parse_dt(master, spi, nc);
+	rc = of_spi_parse_dt(ctlr, spi, nc);
 	if (rc)
 		goto err_out;
 
@@ -1649,7 +1647,7 @@ of_register_spi_device(struct spi_master *master, struct device_node *nc)
 	/* Register the new device */
 	rc = spi_add_device(spi);
 	if (rc) {
-		dev_err(&master->dev, "spi_device register error %s\n",
+		dev_err(&ctlr->dev, "spi_device register error %s\n",
 			nc->full_name);
 		goto err_of_node_put;
 	}
@@ -1665,39 +1663,40 @@ err_out:
 
 /**
  * of_register_spi_devices() - Register child devices onto the SPI bus
- * @master:	Pointer to spi_master device
+ * @ctlr:	Pointer to spi_controller device
  *
  * Registers an spi_device for each child node of controller node which
  * represents a valid SPI slave.
  */
-static void of_register_spi_devices(struct spi_master *master)
+static void of_register_spi_devices(struct spi_controller *ctlr)
 {
 	struct spi_device *spi;
 	struct device_node *nc;
 
-	if (!master->dev.of_node)
+	if (!ctlr->dev.of_node)
 		return;
 
-	for_each_available_child_of_node(master->dev.of_node, nc) {
+	for_each_available_child_of_node(ctlr->dev.of_node, nc) {
 		if (of_node_test_and_set_flag(nc, OF_POPULATED))
 			continue;
-		spi = of_register_spi_device(master, nc);
+		spi = of_register_spi_device(ctlr, nc);
 		if (IS_ERR(spi)) {
-			dev_warn(&master->dev, "Failed to create SPI device for %s\n",
-				nc->full_name);
+			dev_warn(&ctlr->dev,
+				 "Failed to create SPI device for %s\n",
+				 nc->full_name);
 			of_node_clear_flag(nc, OF_POPULATED);
 		}
 	}
 }
 #else
-static void of_register_spi_devices(struct spi_master *master) { }
+static void of_register_spi_devices(struct spi_controller *ctlr) { }
 #endif
 
 #ifdef CONFIG_ACPI
 static int acpi_spi_add_resource(struct acpi_resource *ares, void *data)
 {
 	struct spi_device *spi = data;
-	struct spi_master *master = spi->master;
+	struct spi_controller *ctlr = spi->controller;
 
 	if (ares->type == ACPI_RESOURCE_TYPE_SERIAL_BUS) {
 		struct acpi_resource_spi_serialbus *sb;
@@ -1711,8 +1710,8 @@ static int acpi_spi_add_resource(struct acpi_resource *ares, void *data)
 			 * 0 .. max - 1 so we need to ask the driver to
 			 * translate between the two schemes.
 			 */
-			if (master->fw_translate_cs) {
-				int cs = master->fw_translate_cs(master,
+			if (ctlr->fw_translate_cs) {
+				int cs = ctlr->fw_translate_cs(ctlr,
 						sb->device_selection);
 				if (cs < 0)
 					return cs;
@@ -1741,7 +1740,7 @@ static int acpi_spi_add_resource(struct acpi_resource *ares, void *data)
 	return 1;
 }
 
-static acpi_status acpi_register_spi_device(struct spi_master *master,
+static acpi_status acpi_register_spi_device(struct spi_controller *ctlr,
 					    struct acpi_device *adev)
 {
 	struct list_head resource_list;
@@ -1752,9 +1751,9 @@ static acpi_status acpi_register_spi_device(struct spi_master *master,
 	    acpi_device_enumerated(adev))
 		return AE_OK;
 
-	spi = spi_alloc_device(master);
+	spi = spi_alloc_device(ctlr);
 	if (!spi) {
-		dev_err(&master->dev, "failed to allocate SPI device for %s\n",
+		dev_err(&ctlr->dev, "failed to allocate SPI device for %s\n",
 			dev_name(&adev->dev));
 		return AE_NO_MEMORY;
 	}
@@ -1783,7 +1782,7 @@ static acpi_status acpi_register_spi_device(struct spi_master *master,
 	adev->power.flags.ignore_parent = true;
 	if (spi_add_device(spi)) {
 		adev->power.flags.ignore_parent = false;
-		dev_err(&master->dev, "failed to add SPI device %s from ACPI\n",
+		dev_err(&ctlr->dev, "failed to add SPI device %s from ACPI\n",
 			dev_name(&adev->dev));
 		spi_dev_put(spi);
 	}
@@ -1794,46 +1793,45 @@ static acpi_status acpi_register_spi_device(struct spi_master *master,
 static acpi_status acpi_spi_add_device(acpi_handle handle, u32 level,
 				       void *data, void **return_value)
 {
-	struct spi_master *master = data;
+	struct spi_controller *ctlr = data;
 	struct acpi_device *adev;
 
 	if (acpi_bus_get_device(handle, &adev))
 		return AE_OK;
 
-	return acpi_register_spi_device(master, adev);
+	return acpi_register_spi_device(ctlr, adev);
 }
 
-static void acpi_register_spi_devices(struct spi_master *master)
+static void acpi_register_spi_devices(struct spi_controller *ctlr)
 {
 	acpi_status status;
 	acpi_handle handle;
 
-	handle = ACPI_HANDLE(master->dev.parent);
+	handle = ACPI_HANDLE(ctlr->dev.parent);
 	if (!handle)
 		return;
 
 	status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, 1,
-				     acpi_spi_add_device, NULL,
-				     master, NULL);
+				     acpi_spi_add_device, NULL, ctlr, NULL);
 	if (ACPI_FAILURE(status))
-		dev_warn(&master->dev, "failed to enumerate SPI slaves\n");
+		dev_warn(&ctlr->dev, "failed to enumerate SPI slaves\n");
 }
 #else
-static inline void acpi_register_spi_devices(struct spi_master *master) {}
+static inline void acpi_register_spi_devices(struct spi_controller *ctlr) {}
 #endif /* CONFIG_ACPI */
 
-static void spi_master_release(struct device *dev)
+static void spi_controller_release(struct device *dev)
 {
-	struct spi_master *master;
+	struct spi_controller *ctlr;
 
-	master = container_of(dev, struct spi_master, dev);
-	kfree(master);
+	ctlr = container_of(dev, struct spi_controller, dev);
+	kfree(ctlr);
 }
 
 static struct class spi_master_class = {
 	.name		= "spi_master",
 	.owner		= THIS_MODULE,
-	.dev_release	= spi_master_release,
+	.dev_release	= spi_controller_release,
 	.dev_groups	= spi_master_groups,
 };
 
@@ -1845,10 +1843,10 @@ static struct class spi_master_class = {
  */
 int spi_slave_abort(struct spi_device *spi)
 {
-	struct spi_master *master = spi->master;
+	struct spi_controller *ctlr = spi->controller;
 
-	if (spi_controller_is_slave(master) && master->slave_abort)
-		return master->slave_abort(master);
+	if (spi_controller_is_slave(ctlr) && ctlr->slave_abort)
+		return ctlr->slave_abort(ctlr);
 
 	return -ENOTSUPP;
 }
@@ -1862,7 +1860,8 @@ static int match_true(struct device *dev, void *data)
 static ssize_t spi_slave_show(struct device *dev,
 			      struct device_attribute *attr, char *buf)
 {
-	struct spi_master *ctlr = container_of(dev, struct spi_master, dev);
+	struct spi_controller *ctlr = container_of(dev, struct spi_controller,
+						   dev);
 	struct device *child;
 
 	child = device_find_child(&ctlr->dev, NULL, match_true);
@@ -1874,7 +1873,8 @@ static ssize_t spi_slave_store(struct device *dev,
 			       struct device_attribute *attr, const char *buf,
 			       size_t count)
 {
-	struct spi_master *ctlr = container_of(dev, struct spi_master, dev);
+	struct spi_controller *ctlr = container_of(dev, struct spi_controller,
+						   dev);
 	struct spi_device *spi;
 	struct device *child;
 	char name[32];
@@ -1921,7 +1921,7 @@ static const struct attribute_group spi_slave_group = {
 };
 
 static const struct attribute_group *spi_slave_groups[] = {
-	&spi_master_statistics_group,
+	&spi_controller_statistics_group,
 	&spi_slave_group,
 	NULL,
 };
@@ -1929,7 +1929,7 @@ static const struct attribute_group *spi_slave_groups[] = {
 static struct class spi_slave_class = {
 	.name		= "spi_slave",
 	.owner		= THIS_MODULE,
-	.dev_release	= spi_master_release,
+	.dev_release	= spi_controller_release,
 	.dev_groups	= spi_slave_groups,
 };
 #else
@@ -1941,62 +1941,63 @@ extern struct class spi_slave_class;	/* dummy */
  * @dev: the controller, possibly using the platform_bus
  * @size: how much zeroed driver-private data to allocate; the pointer to this
  *	memory is in the driver_data field of the returned device,
- *	accessible with spi_master_get_devdata().
+ *	accessible with spi_controller_get_devdata().
  * @slave: flag indicating whether to allocate an SPI master (false) or SPI
  *	slave (true) controller
  * Context: can sleep
  *
  * This call is used only by SPI controller drivers, which are the
  * only ones directly touching chip registers.  It's how they allocate
- * an spi_master structure, prior to calling spi_register_master().
+ * an spi_controller structure, prior to calling spi_register_controller().
  *
  * This must be called from context that can sleep.
  *
  * The caller is responsible for assigning the bus number and initializing the
- * controller's methods before calling spi_register_master(); and (after errors
- * adding the device) calling spi_master_put() to prevent a memory leak.
+ * controller's methods before calling spi_register_controller(); and (after
+ * errors adding the device) calling spi_controller_put() to prevent a memory
+ * leak.
  *
  * Return: the SPI controller structure on success, else NULL.
  */
-struct spi_master *__spi_alloc_controller(struct device *dev,
-					  unsigned int size, bool slave)
+struct spi_controller *__spi_alloc_controller(struct device *dev,
+					      unsigned int size, bool slave)
 {
-	struct spi_master	*master;
+	struct spi_controller	*ctlr;
 
 	if (!dev)
 		return NULL;
 
-	master = kzalloc(size + sizeof(*master), GFP_KERNEL);
-	if (!master)
+	ctlr = kzalloc(size + sizeof(*ctlr), GFP_KERNEL);
+	if (!ctlr)
 		return NULL;
 
-	device_initialize(&master->dev);
-	master->bus_num = -1;
-	master->num_chipselect = 1;
-	master->slave = slave;
+	device_initialize(&ctlr->dev);
+	ctlr->bus_num = -1;
+	ctlr->num_chipselect = 1;
+	ctlr->slave = slave;
 	if (IS_ENABLED(CONFIG_SPI_SLAVE) && slave)
-		master->dev.class = &spi_slave_class;
+		ctlr->dev.class = &spi_slave_class;
 	else
-		master->dev.class = &spi_master_class;
-	master->dev.parent = dev;
-	pm_suspend_ignore_children(&master->dev, true);
-	spi_master_set_devdata(master, &master[1]);
+		ctlr->dev.class = &spi_master_class;
+	ctlr->dev.parent = dev;
+	pm_suspend_ignore_children(&ctlr->dev, true);
+	spi_controller_set_devdata(ctlr, &ctlr[1]);
 
-	return master;
+	return ctlr;
 }
 EXPORT_SYMBOL_GPL(__spi_alloc_controller);
 
 #ifdef CONFIG_OF
-static int of_spi_register_master(struct spi_master *master)
+static int of_spi_register_master(struct spi_controller *ctlr)
 {
 	int nb, i, *cs;
-	struct device_node *np = master->dev.of_node;
+	struct device_node *np = ctlr->dev.of_node;
 
 	if (!np)
 		return 0;
 
 	nb = of_gpio_named_count(np, "cs-gpios");
-	master->num_chipselect = max_t(int, nb, master->num_chipselect);
+	ctlr->num_chipselect = max_t(int, nb, ctlr->num_chipselect);
 
 	/* Return error only for an incorrectly formed cs-gpios property */
 	if (nb == 0 || nb == -ENOENT)
@@ -2004,15 +2005,14 @@ static int of_spi_register_master(struct spi_master *master)
 	else if (nb < 0)
 		return nb;
 
-	cs = devm_kzalloc(&master->dev,
-			  sizeof(int) * master->num_chipselect,
+	cs = devm_kzalloc(&ctlr->dev, sizeof(int) * ctlr->num_chipselect,
 			  GFP_KERNEL);
-	master->cs_gpios = cs;
+	ctlr->cs_gpios = cs;
 
-	if (!master->cs_gpios)
+	if (!ctlr->cs_gpios)
 		return -ENOMEM;
 
-	for (i = 0; i < master->num_chipselect; i++)
+	for (i = 0; i < ctlr->num_chipselect; i++)
 		cs[i] = -ENOENT;
 
 	for (i = 0; i < nb; i++)
@@ -2021,20 +2021,21 @@ static int of_spi_register_master(struct spi_master *master)
 	return 0;
 }
 #else
-static int of_spi_register_master(struct spi_master *master)
+static int of_spi_register_master(struct spi_controller *ctlr)
 {
 	return 0;
 }
 #endif
 
 /**
- * spi_register_master - register SPI master controller
- * @master: initialized master, originally from spi_alloc_master()
+ * spi_register_controller - register SPI master or slave controller
+ * @ctlr: initialized master, originally from spi_alloc_master() or
+ *	spi_alloc_slave()
  * Context: can sleep
  *
- * SPI master controllers connect to their drivers using some non-SPI bus,
+ * SPI controllers connect to their drivers using some non-SPI bus,
  * such as the platform bus.  The final stage of probe() in that code
- * includes calling spi_register_master() to hook up to this SPI bus glue.
+ * includes calling spi_register_controller() to hook up to this SPI bus glue.
  *
  * SPI controllers use board specific (often SOC specific) bus numbers,
  * and board-specific addressing for SPI devices combines those numbers
@@ -2043,16 +2044,16 @@ static int of_spi_register_master(struct spi_master *master)
  * chip is at which address.
  *
  * This must be called from context that can sleep.  It returns zero on
- * success, else a negative error code (dropping the master's refcount).
+ * success, else a negative error code (dropping the controller's refcount).
  * After a successful return, the caller is responsible for calling
- * spi_unregister_master().
+ * spi_unregister_controller().
  *
  * Return: zero on success, else a negative error code.
  */
-int spi_register_master(struct spi_master *master)
+int spi_register_controller(struct spi_controller *ctlr)
 {
 	static atomic_t		dyn_bus_id = ATOMIC_INIT((1<<15) - 1);
-	struct device		*dev = master->dev.parent;
+	struct device		*dev = ctlr->dev.parent;
 	struct boardinfo	*bi;
 	int			status = -ENODEV;
 	int			dynamic = 0;
@@ -2060,8 +2061,8 @@ int spi_register_master(struct spi_master *master)
 	if (!dev)
 		return -ENODEV;
 
-	if (!spi_controller_is_slave(master)) {
-		status = of_spi_register_master(master);
+	if (!spi_controller_is_slave(ctlr)) {
+		status = of_spi_register_master(ctlr);
 		if (status)
 			return status;
 	}
@@ -2069,97 +2070,100 @@ int spi_register_master(struct spi_master *master)
 	/* even if it's just one always-selected device, there must
 	 * be at least one chipselect
 	 */
-	if (master->num_chipselect == 0)
+	if (ctlr->num_chipselect == 0)
 		return -EINVAL;
 
-	if ((master->bus_num < 0) && master->dev.of_node)
-		master->bus_num = of_alias_get_id(master->dev.of_node, "spi");
+	if ((ctlr->bus_num < 0) && ctlr->dev.of_node)
+		ctlr->bus_num = of_alias_get_id(ctlr->dev.of_node, "spi");
 
 	/* convention:  dynamically assigned bus IDs count down from the max */
-	if (master->bus_num < 0) {
+	if (ctlr->bus_num < 0) {
 		/* FIXME switch to an IDR based scheme, something like
 		 * I2C now uses, so we can't run out of "dynamic" IDs
 		 */
-		master->bus_num = atomic_dec_return(&dyn_bus_id);
+		ctlr->bus_num = atomic_dec_return(&dyn_bus_id);
 		dynamic = 1;
 	}
 
-	INIT_LIST_HEAD(&master->queue);
-	spin_lock_init(&master->queue_lock);
-	spin_lock_init(&master->bus_lock_spinlock);
-	mutex_init(&master->bus_lock_mutex);
-	mutex_init(&master->io_mutex);
-	master->bus_lock_flag = 0;
-	init_completion(&master->xfer_completion);
-	if (!master->max_dma_len)
-		master->max_dma_len = INT_MAX;
+	INIT_LIST_HEAD(&ctlr->queue);
+	spin_lock_init(&ctlr->queue_lock);
+	spin_lock_init(&ctlr->bus_lock_spinlock);
+	mutex_init(&ctlr->bus_lock_mutex);
+	mutex_init(&ctlr->io_mutex);
+	ctlr->bus_lock_flag = 0;
+	init_completion(&ctlr->xfer_completion);
+	if (!ctlr->max_dma_len)
+		ctlr->max_dma_len = INT_MAX;
 
 	/* register the device, then userspace will see it.
 	 * registration fails if the bus ID is in use.
 	 */
-	dev_set_name(&master->dev, "spi%u", master->bus_num);
-	status = device_add(&master->dev);
+	dev_set_name(&ctlr->dev, "spi%u", ctlr->bus_num);
+	status = device_add(&ctlr->dev);
 	if (status < 0)
 		goto done;
 	dev_dbg(dev, "registered %s %s%s\n",
-			spi_controller_is_slave(master) ? "slave" : "master",
-			dev_name(&master->dev), dynamic ? " (dynamic)" : "");
+			spi_controller_is_slave(ctlr) ? "slave" : "master",
+			dev_name(&ctlr->dev), dynamic ? " (dynamic)" : "");
 
 	/* If we're using a queued driver, start the queue */
-	if (master->transfer)
-		dev_info(dev, "master is unqueued, this is deprecated\n");
+	if (ctlr->transfer)
+		dev_info(dev, "controller is unqueued, this is deprecated\n");
 	else {
-		status = spi_master_initialize_queue(master);
+		status = spi_controller_initialize_queue(ctlr);
 		if (status) {
-			device_del(&master->dev);
+			device_del(&ctlr->dev);
 			goto done;
 		}
 	}
 	/* add statistics */
-	spin_lock_init(&master->statistics.lock);
+	spin_lock_init(&ctlr->statistics.lock);
 
 	mutex_lock(&board_lock);
-	list_add_tail(&master->list, &spi_master_list);
+	list_add_tail(&ctlr->list, &spi_controller_list);
 	list_for_each_entry(bi, &board_list, list)
-		spi_match_master_to_boardinfo(master, &bi->board_info);
+		spi_match_controller_to_boardinfo(ctlr, &bi->board_info);
 	mutex_unlock(&board_lock);
 
 	/* Register devices from the device tree and ACPI */
-	of_register_spi_devices(master);
-	acpi_register_spi_devices(master);
+	of_register_spi_devices(ctlr);
+	acpi_register_spi_devices(ctlr);
 done:
 	return status;
 }
-EXPORT_SYMBOL_GPL(spi_register_master);
+EXPORT_SYMBOL_GPL(spi_register_controller);
 
 static void devm_spi_unregister(struct device *dev, void *res)
 {
-	spi_unregister_master(*(struct spi_master **)res);
+	spi_unregister_controller(*(struct spi_controller **)res);
 }
 
 /**
- * devm_spi_register_master - register managed SPI master controller
- * @dev:    device managing SPI master
- * @master: initialized master, originally from spi_alloc_master()
+ * devm_spi_register_controller - register managed SPI master or slave
+ *	controller
+ * @dev:    device managing SPI controller
+ * @ctlr: initialized controller, originally from spi_alloc_master() or
+ *	spi_alloc_slave()
  * Context: can sleep
  *
- * Register a SPI device as with spi_register_master() which will
+ * Register a SPI device as with spi_register_controller() which will
  * automatically be unregister
  *
  * Return: zero on success, else a negative error code.
  */
-int devm_spi_register_master(struct device *dev, struct spi_master *master)
+int devm_spi_register_controller(struct device *dev,
+				 struct spi_controller *ctlr)
 {
-	struct spi_master **ptr;
+	struct spi_controller **ptr;
 	int ret;
 
 	ptr = devres_alloc(devm_spi_unregister, sizeof(*ptr), GFP_KERNEL);
 	if (!ptr)
 		return -ENOMEM;
 
-	ret = spi_register_master(master);
+	ret = spi_register_controller(ctlr);
 	if (!ret) {
-		*ptr = master;
+		*ptr = ctlr;
 		devres_add(dev, ptr);
 	} else {
 		devres_free(ptr);
@@ -2167,7 +2171,7 @@ int devm_spi_register_master(struct device *dev, struct spi_master *master)
 
 	return ret;
 }
-EXPORT_SYMBOL_GPL(devm_spi_register_master);
+EXPORT_SYMBOL_GPL(devm_spi_register_controller);
 
 static int __unregister(struct device *dev, void *null)
 {
@@ -2176,71 +2180,71 @@ static int __unregister(struct device *dev, void *null)
 }
 
 /**
- * spi_unregister_master - unregister SPI master controller
- * @master: the master being unregistered
+ * spi_unregister_controller - unregister SPI master or slave controller
+ * @ctlr: the controller being unregistered
  * Context: can sleep
  *
- * This call is used only by SPI master controller drivers, which are the
+ * This call is used only by SPI controller drivers, which are the
  * only ones directly touching chip registers.
  *
  * This must be called from context that can sleep.
  */
-void spi_unregister_master(struct spi_master *master)
+void spi_unregister_controller(struct spi_controller *ctlr)
 {
 	int dummy;
 
-	if (master->queued) {
-		if (spi_destroy_queue(master))
-			dev_err(&master->dev, "queue remove failed\n");
+	if (ctlr->queued) {
+		if (spi_destroy_queue(ctlr))
+			dev_err(&ctlr->dev, "queue remove failed\n");
 	}
 
 	mutex_lock(&board_lock);
-	list_del(&master->list);
+	list_del(&ctlr->list);
 	mutex_unlock(&board_lock);
 
-	dummy = device_for_each_child(&master->dev, NULL, __unregister);
-	device_unregister(&master->dev);
+	dummy = device_for_each_child(&ctlr->dev, NULL, __unregister);
+	device_unregister(&ctlr->dev);
 }
-EXPORT_SYMBOL_GPL(spi_unregister_master);
+EXPORT_SYMBOL_GPL(spi_unregister_controller);
 
-int spi_master_suspend(struct spi_master *master)
+int spi_controller_suspend(struct spi_controller *ctlr)
 {
 	int ret;
 
-	/* Basically no-ops for non-queued masters */
-	if (!master->queued)
+	/* Basically no-ops for non-queued controllers */
+	if (!ctlr->queued)
 		return 0;
 
-	ret = spi_stop_queue(master);
+	ret = spi_stop_queue(ctlr);
 	if (ret)
-		dev_err(&master->dev, "queue stop failed\n");
+		dev_err(&ctlr->dev, "queue stop failed\n");
 
 	return ret;
 }
-EXPORT_SYMBOL_GPL(spi_master_suspend);
+EXPORT_SYMBOL_GPL(spi_controller_suspend);
 
-int spi_master_resume(struct spi_master *master)
+int spi_controller_resume(struct spi_controller *ctlr)
 {
 	int ret;
 
-	if (!master->queued)
+	if (!ctlr->queued)
 		return 0;
 
-	ret = spi_start_queue(master);
+	ret = spi_start_queue(ctlr);
 	if (ret)
-		dev_err(&master->dev, "queue restart failed\n");
+		dev_err(&ctlr->dev, "queue restart failed\n");
 
 	return ret;
 }
-EXPORT_SYMBOL_GPL(spi_master_resume);
+EXPORT_SYMBOL_GPL(spi_controller_resume);
 
-static int __spi_master_match(struct device *dev, const void *data)
+static int __spi_controller_match(struct device *dev, const void *data)
 {
-	struct spi_master *m;
+	struct spi_controller *ctlr;
 	const u16 *bus_num = data;
 
-	m = container_of(dev, struct spi_master, dev);
-	return m->bus_num == *bus_num;
+	ctlr = container_of(dev, struct spi_controller, dev);
+	return ctlr->bus_num == *bus_num;
 }
 
 /**
@@ -2250,22 +2254,22 @@ static int __spi_master_match(struct device *dev, const void *data)
  *
  * This call may be used with devices that are registered after
  * arch init time.  It returns a refcounted pointer to the relevant
- * spi_master (which the caller must release), or NULL if there is
+ * spi_controller (which the caller must release), or NULL if there is
  * no such master registered.
  *
  * Return: the SPI master structure on success, else NULL.
  */
-struct spi_master *spi_busnum_to_master(u16 bus_num)
+struct spi_controller *spi_busnum_to_master(u16 bus_num)
 {
 	struct device		*dev;
-	struct spi_master	*master = NULL;
+	struct spi_controller	*ctlr = NULL;
 
 	dev = class_find_device(&spi_master_class, NULL, &bus_num,
-				__spi_master_match);
+				__spi_controller_match);
 	if (dev)
-		master = container_of(dev, struct spi_master, dev);
+		ctlr = container_of(dev, struct spi_controller, dev);
 	/* reference got in class_find_device */
-	return master;
+	return ctlr;
 }
 EXPORT_SYMBOL_GPL(spi_busnum_to_master);
 
@@ -2285,7 +2289,7 @@ EXPORT_SYMBOL_GPL(spi_busnum_to_master);
  * Return: the pointer to the allocated data
  *
  * This may get enhanced in the future to allocate from a memory pool
- * of the @spi_device or @spi_master to avoid repeated allocations.
+ * of the @spi_device or @spi_controller to avoid repeated allocations.
  */
 void *spi_res_alloc(struct spi_device *spi,
 		    spi_res_release_t release,
@@ -2337,11 +2341,10 @@ EXPORT_SYMBOL_GPL(spi_res_add);
 
 /**
  * spi_res_release - release all spi resources for this message
- * @master:  the @spi_master
+ * @ctlr:  the @spi_controller
  * @message: the @spi_message
  */
-void spi_res_release(struct spi_master *master,
-		     struct spi_message *message)
+void spi_res_release(struct spi_controller *ctlr, struct spi_message *message)
 {
 	struct spi_res *res;
 
@@ -2350,7 +2353,7 @@ void spi_res_release(struct spi_master *master,
 				      struct spi_res, entry);
 
 		if (res->release)
-			res->release(master, message, res->data);
+			res->release(ctlr, message, res->data);
 
 		list_del(&res->entry);
 
@@ -2363,7 +2366,7 @@ EXPORT_SYMBOL_GPL(spi_res_release);
 
 /* Core methods for spi_message alterations */
 
-static void __spi_replace_transfers_release(struct spi_master *master,
+static void __spi_replace_transfers_release(struct spi_controller *ctlr,
 					    struct spi_message *msg,
 					    void *res)
 {
@@ -2372,7 +2375,7 @@ static void __spi_replace_transfers_release(struct spi_master *master,
 
 	/* call extra callback if requested */
 	if (rxfer->release)
-		rxfer->release(master, msg, res);
+		rxfer->release(ctlr, msg, res);
 
 	/* insert replaced transfers back into the message */
 	list_splice(&rxfer->replaced_transfers, rxfer->replaced_after);
@@ -2492,7 +2495,7 @@ struct spi_replaced_transfers *spi_replace_transfers(
 }
 EXPORT_SYMBOL_GPL(spi_replace_transfers);
 
-static int __spi_split_transfer_maxsize(struct spi_master *master,
+static int __spi_split_transfer_maxsize(struct spi_controller *ctlr,
 					struct spi_message *msg,
 					struct spi_transfer **xferp,
 					size_t maxsize,
@@ -2554,7 +2557,7 @@ static int __spi_split_transfer_maxsize(struct spi_master *master,
 	*xferp = &xfers[count - 1];
 
 	/* increment statistics counters */
-	SPI_STATISTICS_INCREMENT_FIELD(&master->statistics,
+	SPI_STATISTICS_INCREMENT_FIELD(&ctlr->statistics,
 				       transfers_split_maxsize);
 	SPI_STATISTICS_INCREMENT_FIELD(&msg->spi->statistics,
 				       transfers_split_maxsize);
@@ -2566,14 +2569,14 @@ static int __spi_split_transfer_maxsize(struct spi_master *master,
  * spi_split_tranfers_maxsize - split spi transfers into multiple transfers
  *                              when an individual transfer exceeds a
  *                              certain size
- * @master:    the @spi_master for this transfer
+ * @ctlr:    the @spi_controller for this transfer
  * @msg:   the @spi_message to transform
  * @maxsize:  the maximum when to apply this
  * @gfp: GFP allocation flags
  *
  * Return: status of transformation
  */
-int spi_split_transfers_maxsize(struct spi_master *master,
+int spi_split_transfers_maxsize(struct spi_controller *ctlr,
 				struct spi_message *msg,
 				size_t maxsize,
 				gfp_t gfp)
@@ -2589,8 +2592,8 @@ int spi_split_transfers_maxsize(struct spi_master *master,
 	 */
 	list_for_each_entry(xfer, &msg->transfers, transfer_list) {
 		if (xfer->len > maxsize) {
-			ret = __spi_split_transfer_maxsize(
-				master, msg, &xfer, maxsize, gfp);
+			ret = __spi_split_transfer_maxsize(ctlr, msg, &xfer,
+							   maxsize, gfp);
 			if (ret)
 				return ret;
 		}
@@ -2602,18 +2605,18 @@ EXPORT_SYMBOL_GPL(spi_split_transfers_maxsize);
 
 /*-------------------------------------------------------------------------*/
 
-/* Core methods for SPI master protocol drivers.  Some of the
+/* Core methods for SPI controller protocol drivers.  Some of the
  * other core methods are currently defined as inline functions.
  */
 
-static int __spi_validate_bits_per_word(struct spi_master *master, u8 bits_per_word)
+static int __spi_validate_bits_per_word(struct spi_controller *ctlr,
+					u8 bits_per_word)
 {
-	if (master->bits_per_word_mask) {
+	if (ctlr->bits_per_word_mask) {
 		/* Only 32 bits fit in the mask */
 		if (bits_per_word > 32)
 			return -EINVAL;
-		if (!(master->bits_per_word_mask &
-				SPI_BPW_MASK(bits_per_word)))
+		if (!(ctlr->bits_per_word_mask & SPI_BPW_MASK(bits_per_word)))
 			return -EINVAL;
 	}
 
@@ -2659,9 +2662,9 @@ int spi_setup(struct spi_device *spi)
 		(SPI_TX_DUAL | SPI_TX_QUAD | SPI_RX_DUAL | SPI_RX_QUAD)))
 		return -EINVAL;
 	/* help drivers fail *cleanly* when they need options
-	 * that aren't supported with their current master
+	 * that aren't supported with their current controller
 	 */
-	bad_bits = spi->mode & ~spi->master->mode_bits;
+	bad_bits = spi->mode & ~spi->controller->mode_bits;
 	ugly_bits = bad_bits &
 		    (SPI_TX_DUAL | SPI_TX_QUAD | SPI_RX_DUAL | SPI_RX_QUAD);
 	if (ugly_bits) {
@@ -2680,15 +2683,16 @@ int spi_setup(struct spi_device *spi)
 	if (!spi->bits_per_word)
 		spi->bits_per_word = 8;
 
-	status = __spi_validate_bits_per_word(spi->master, spi->bits_per_word);
+	status = __spi_validate_bits_per_word(spi->controller,
+					      spi->bits_per_word);
 	if (status)
 		return status;
 
 	if (!spi->max_speed_hz)
-		spi->max_speed_hz = spi->master->max_speed_hz;
+		spi->max_speed_hz = spi->controller->max_speed_hz;
 
-	if (spi->master->setup)
-		status = spi->master->setup(spi);
+	if (spi->controller->setup)
+		status = spi->controller->setup(spi);
 
 	spi_set_cs(spi, false);
 
@@ -2707,7 +2711,7 @@ EXPORT_SYMBOL_GPL(spi_setup);
 
 static int __spi_validate(struct spi_device *spi, struct spi_message *message)
 {
-	struct spi_master *master = spi->master;
+	struct spi_controller *ctlr = spi->controller;
 	struct spi_transfer *xfer;
 	int w_size;
 
@@ -2719,16 +2723,16 @@ static int __spi_validate(struct spi_device *spi, struct spi_message *message)
 	 * either MOSI or MISO is missing.  They can also be caused by
 	 * software limitations.
 	 */
-	if ((master->flags & SPI_MASTER_HALF_DUPLEX)
-			|| (spi->mode & SPI_3WIRE)) {
-		unsigned flags = master->flags;
+	if ((ctlr->flags & SPI_CONTROLLER_HALF_DUPLEX) ||
+	    (spi->mode & SPI_3WIRE)) {
+		unsigned flags = ctlr->flags;
 
 		list_for_each_entry(xfer, &message->transfers, transfer_list) {
 			if (xfer->rx_buf && xfer->tx_buf)
 				return -EINVAL;
-			if ((flags & SPI_MASTER_NO_TX) && xfer->tx_buf)
+			if ((flags & SPI_CONTROLLER_NO_TX) && xfer->tx_buf)
 				return -EINVAL;
-			if ((flags & SPI_MASTER_NO_RX) && xfer->rx_buf)
+			if ((flags & SPI_CONTROLLER_NO_RX) && xfer->rx_buf)
 				return -EINVAL;
 		}
 	}
@@ -2748,13 +2752,12 @@ static int __spi_validate(struct spi_device *spi, struct spi_message *message)
 		if (!xfer->speed_hz)
 			xfer->speed_hz = spi->max_speed_hz;
 		if (!xfer->speed_hz)
-			xfer->speed_hz = master->max_speed_hz;
+			xfer->speed_hz = ctlr->max_speed_hz;
 
-		if (master->max_speed_hz &&
-		    xfer->speed_hz > master->max_speed_hz)
-			xfer->speed_hz = master->max_speed_hz;
+		if (ctlr->max_speed_hz && xfer->speed_hz > ctlr->max_speed_hz)
+			xfer->speed_hz = ctlr->max_speed_hz;
 
-		if (__spi_validate_bits_per_word(master, xfer->bits_per_word))
+		if (__spi_validate_bits_per_word(ctlr, xfer->bits_per_word))
 			return -EINVAL;
 
 		/*
@@ -2772,8 +2775,8 @@ static int __spi_validate(struct spi_device *spi, struct spi_message *message)
 		if (xfer->len % w_size)
 			return -EINVAL;
 
-		if (xfer->speed_hz && master->min_speed_hz &&
-		    xfer->speed_hz < master->min_speed_hz)
+		if (xfer->speed_hz && ctlr->min_speed_hz &&
+		    xfer->speed_hz < ctlr->min_speed_hz)
 			return -EINVAL;
 
 		if (xfer->tx_buf && !xfer->tx_nbits)
@@ -2818,16 +2821,16 @@ static int __spi_validate(struct spi_device *spi, struct spi_message *message)
 
 static int __spi_async(struct spi_device *spi, struct spi_message *message)
 {
-	struct spi_master *master = spi->master;
+	struct spi_controller *ctlr = spi->controller;
 
 	message->spi = spi;
 
-	SPI_STATISTICS_INCREMENT_FIELD(&master->statistics, spi_async);
+	SPI_STATISTICS_INCREMENT_FIELD(&ctlr->statistics, spi_async);
 	SPI_STATISTICS_INCREMENT_FIELD(&spi->statistics, spi_async);
 
 	trace_spi_message_submit(message);
 
-	return master->transfer(spi, message);
+	return ctlr->transfer(spi, message);
 }
 
 /**
@@ -2863,7 +2866,7 @@ static int __spi_async(struct spi_device *spi, struct spi_message *message)
  */
 int spi_async(struct spi_device *spi, struct spi_message *message)
 {
-	struct spi_master *master = spi->master;
+	struct spi_controller *ctlr = spi->controller;
 	int ret;
 	unsigned long flags;
 
@@ -2871,14 +2874,14 @@ int spi_async(struct spi_device *spi, struct spi_message *message)
 	if (ret != 0)
 		return ret;
 
-	spin_lock_irqsave(&master->bus_lock_spinlock, flags);
+	spin_lock_irqsave(&ctlr->bus_lock_spinlock, flags);
 
-	if (master->bus_lock_flag)
+	if (ctlr->bus_lock_flag)
 		ret = -EBUSY;
 	else
 		ret = __spi_async(spi, message);
 
-	spin_unlock_irqrestore(&master->bus_lock_spinlock, flags);
+	spin_unlock_irqrestore(&ctlr->bus_lock_spinlock, flags);
 
 	return ret;
 }
@@ -2917,7 +2920,7 @@ EXPORT_SYMBOL_GPL(spi_async);
  */
 int spi_async_locked(struct spi_device *spi, struct spi_message *message)
 {
-	struct spi_master *master = spi->master;
+	struct spi_controller *ctlr = spi->controller;
 	int ret;
 	unsigned long flags;
 
@@ -2925,11 +2928,11 @@ int spi_async_locked(struct spi_device *spi, struct spi_message *message)
 	if (ret != 0)
 		return ret;
 
-	spin_lock_irqsave(&master->bus_lock_spinlock, flags);
+	spin_lock_irqsave(&ctlr->bus_lock_spinlock, flags);
 
 	ret = __spi_async(spi, message);
 
-	spin_unlock_irqrestore(&master->bus_lock_spinlock, flags);
+	spin_unlock_irqrestore(&ctlr->bus_lock_spinlock, flags);
 
 	return ret;
 
@@ -2941,7 +2944,7 @@ int spi_flash_read(struct spi_device *spi,
 		   struct spi_flash_read_message *msg)
 
 {
-	struct spi_master *master = spi->master;
+	struct spi_controller *master = spi->controller;
 	struct device *rx_dev = NULL;
 	int ret;
 
@@ -2995,7 +2998,7 @@ EXPORT_SYMBOL_GPL(spi_flash_read);
 
 /*-------------------------------------------------------------------------*/
 
-/* Utility methods for SPI master protocol drivers, layered on
+/* Utility methods for SPI protocol drivers, layered on
  * top of the core.  Some other utility methods are defined as
  * inline functions.
  */
@@ -3009,7 +3012,7 @@ static int __spi_sync(struct spi_device *spi, struct spi_message *message)
 {
 	DECLARE_COMPLETION_ONSTACK(done);
 	int status;
-	struct spi_master *master = spi->master;
+	struct spi_controller *ctlr = spi->controller;
 	unsigned long flags;
 
 	status = __spi_validate(spi, message);
@@ -3020,7 +3023,7 @@ static int __spi_sync(struct spi_device *spi, struct spi_message *message)
 	message->context = &done;
 	message->spi = spi;
 
-	SPI_STATISTICS_INCREMENT_FIELD(&master->statistics, spi_sync);
+	SPI_STATISTICS_INCREMENT_FIELD(&ctlr->statistics, spi_sync);
 	SPI_STATISTICS_INCREMENT_FIELD(&spi->statistics, spi_sync);
 
 	/* If we're not using the legacy transfer method then we will
@@ -3028,14 +3031,14 @@ static int __spi_sync(struct spi_device *spi, struct spi_message *message)
 	 * This code would be less tricky if we could remove the
 	 * support for driver implemented message queues.
 	 */
-	if (master->transfer == spi_queued_transfer) {
-		spin_lock_irqsave(&master->bus_lock_spinlock, flags);
+	if (ctlr->transfer == spi_queued_transfer) {
+		spin_lock_irqsave(&ctlr->bus_lock_spinlock, flags);
 
 		trace_spi_message_submit(message);
 
 		status = __spi_queued_transfer(spi, message, false);
 
-		spin_unlock_irqrestore(&master->bus_lock_spinlock, flags);
+		spin_unlock_irqrestore(&ctlr->bus_lock_spinlock, flags);
 	} else {
 		status = spi_async_locked(spi, message);
 	}
@@ -3044,12 +3047,12 @@ static int __spi_sync(struct spi_device *spi, struct spi_message *message)
 		/* Push out the messages in the calling context if we
 		 * can.
 		 */
-		if (master->transfer == spi_queued_transfer) {
-			SPI_STATISTICS_INCREMENT_FIELD(&master->statistics,
+		if (ctlr->transfer == spi_queued_transfer) {
+			SPI_STATISTICS_INCREMENT_FIELD(&ctlr->statistics,
 						       spi_sync_immediate);
 			SPI_STATISTICS_INCREMENT_FIELD(&spi->statistics,
 						       spi_sync_immediate);
-			__spi_pump_messages(master, false);
+			__spi_pump_messages(ctlr, false);
 		}
 
 		wait_for_completion(&done);
@@ -3084,9 +3087,9 @@ int spi_sync(struct spi_device *spi, struct spi_message *message)
 {
 	int ret;
 
-	mutex_lock(&spi->master->bus_lock_mutex);
+	mutex_lock(&spi->controller->bus_lock_mutex);
 	ret = __spi_sync(spi, message);
-	mutex_unlock(&spi->master->bus_lock_mutex);
+	mutex_unlock(&spi->controller->bus_lock_mutex);
 
 	return ret;
 }
@@ -3116,7 +3119,7 @@ EXPORT_SYMBOL_GPL(spi_sync_locked);
 
 /**
  * spi_bus_lock - obtain a lock for exclusive SPI bus usage
- * @master: SPI bus master that should be locked for exclusive bus access
+ * @ctlr: SPI bus master that should be locked for exclusive bus access
  * Context: can sleep
  *
  * This call may only be used from a context that may sleep.  The sleep
@@ -3129,15 +3132,15 @@ EXPORT_SYMBOL_GPL(spi_sync_locked);
  *
  * Return: always zero.
  */
-int spi_bus_lock(struct spi_master *master)
+int spi_bus_lock(struct spi_controller *ctlr)
 {
 	unsigned long flags;
 
-	mutex_lock(&master->bus_lock_mutex);
+	mutex_lock(&ctlr->bus_lock_mutex);
 
-	spin_lock_irqsave(&master->bus_lock_spinlock, flags);
-	master->bus_lock_flag = 1;
-	spin_unlock_irqrestore(&master->bus_lock_spinlock, flags);
+	spin_lock_irqsave(&ctlr->bus_lock_spinlock, flags);
+	ctlr->bus_lock_flag = 1;
+	spin_unlock_irqrestore(&ctlr->bus_lock_spinlock, flags);
 
 	/* mutex remains locked until spi_bus_unlock is called */
 
@@ -3147,7 +3150,7 @@ EXPORT_SYMBOL_GPL(spi_bus_lock);
 
 /**
  * spi_bus_unlock - release the lock for exclusive SPI bus usage
- * @master: SPI bus master that was locked for exclusive bus access
+ * @ctlr: SPI bus master that was locked for exclusive bus access
  * Context: can sleep
  *
  * This call may only be used from a context that may sleep.  The sleep
@@ -3158,11 +3161,11 @@ EXPORT_SYMBOL_GPL(spi_bus_lock);
  *
  * Return: always zero.
  */
-int spi_bus_unlock(struct spi_master *master)
+int spi_bus_unlock(struct spi_controller *ctlr)
 {
-	master->bus_lock_flag = 0;
+	ctlr->bus_lock_flag = 0;
 
-	mutex_unlock(&master->bus_lock_mutex);
+	mutex_unlock(&ctlr->bus_lock_mutex);
 
 	return 0;
 }
@@ -3264,48 +3267,48 @@ static struct spi_device *of_find_spi_device_by_node(struct device_node *node)
 	return dev ? to_spi_device(dev) : NULL;
 }
 
-static int __spi_of_master_match(struct device *dev, const void *data)
+static int __spi_of_controller_match(struct device *dev, const void *data)
 {
 	return dev->of_node == data;
 }
 
-/* the spi masters are not using spi_bus, so we find it with another way */
-static struct spi_master *of_find_spi_master_by_node(struct device_node *node)
+/* the spi controllers are not using spi_bus, so we find it with another way */
+static struct spi_controller *of_find_spi_controller_by_node(struct device_node *node)
 {
 	struct device *dev;
 
 	dev = class_find_device(&spi_master_class, NULL, node,
-				__spi_of_master_match);
+				__spi_of_controller_match);
 	if (!dev && IS_ENABLED(CONFIG_SPI_SLAVE))
 		dev = class_find_device(&spi_slave_class, NULL, node,
-					__spi_of_master_match);
+					__spi_of_controller_match);
 	if (!dev)
 		return NULL;
 
 	/* reference got in class_find_device */
-	return container_of(dev, struct spi_master, dev);
+	return container_of(dev, struct spi_controller, dev);
 }
 
 static int of_spi_notify(struct notifier_block *nb, unsigned long action,
 			 void *arg)
 {
 	struct of_reconfig_data *rd = arg;
-	struct spi_master *master;
+	struct spi_controller *ctlr;
 	struct spi_device *spi;
 
 	switch (of_reconfig_get_state_change(action, arg)) {
 	case OF_RECONFIG_CHANGE_ADD:
-		master = of_find_spi_master_by_node(rd->dn->parent);
-		if (master == NULL)
+		ctlr = of_find_spi_controller_by_node(rd->dn->parent);
+		if (ctlr == NULL)
 			return NOTIFY_OK;	/* not for us */
 
 		if (of_node_test_and_set_flag(rd->dn, OF_POPULATED)) {
-			put_device(&master->dev);
+			put_device(&ctlr->dev);
 			return NOTIFY_OK;
 		}
 
-		spi = of_register_spi_device(master, rd->dn);
-		put_device(&master->dev);
+		spi = of_register_spi_device(ctlr, rd->dn);
+		put_device(&ctlr->dev);
 
 		if (IS_ERR(spi)) {
 			pr_err("%s: failed to create for '%s'\n",
@@ -3344,7 +3347,7 @@ extern struct notifier_block spi_of_notifier;
 #endif /* IS_ENABLED(CONFIG_OF_DYNAMIC) */
 
 #if IS_ENABLED(CONFIG_ACPI)
-static int spi_acpi_master_match(struct device *dev, const void *data)
+static int spi_acpi_controller_match(struct device *dev, const void *data)
 {
 	return ACPI_COMPANION(dev->parent) == data;
 }
@@ -3354,19 +3357,19 @@ static int spi_acpi_device_match(struct device *dev, void *data)
 	return ACPI_COMPANION(dev) == data;
 }
 
-static struct spi_master *acpi_spi_find_master_by_adev(struct acpi_device *adev)
+static struct spi_controller *acpi_spi_find_controller_by_adev(struct acpi_device *adev)
 {
 	struct device *dev;
 
 	dev = class_find_device(&spi_master_class, NULL, adev,
-				spi_acpi_master_match);
+				spi_acpi_controller_match);
 	if (!dev && IS_ENABLED(CONFIG_SPI_SLAVE))
 		dev = class_find_device(&spi_slave_class, NULL, adev,
-					spi_acpi_master_match);
+					spi_acpi_controller_match);
 	if (!dev)
 		return NULL;
 
-	return container_of(dev, struct spi_master, dev);
+	return container_of(dev, struct spi_controller, dev);
 }
 
 static struct spi_device *acpi_spi_find_device_by_adev(struct acpi_device *adev)
@@ -3382,17 +3385,17 @@ static int acpi_spi_notify(struct notifier_block *nb, unsigned long value,
 			   void *arg)
 {
 	struct acpi_device *adev = arg;
-	struct spi_master *master;
+	struct spi_controller *ctlr;
 	struct spi_device *spi;
 
 	switch (value) {
 	case ACPI_RECONFIG_DEVICE_ADD:
-		master = acpi_spi_find_master_by_adev(adev->parent);
-		if (!master)
+		ctlr = acpi_spi_find_controller_by_adev(adev->parent);
+		if (!ctlr)
 			break;
 
-		acpi_register_spi_device(master, adev);
-		put_device(&master->dev);
+		acpi_register_spi_device(ctlr, adev);
+		put_device(&ctlr->dev);
 		break;
 	case ACPI_RECONFIG_DEVICE_REMOVE:
 		if (!acpi_device_enumerated(adev))
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 0a78745e5766..7b2170bfd6e7 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -24,7 +24,7 @@
 
 struct dma_chan;
 struct property_entry;
-struct spi_master;
+struct spi_controller;
 struct spi_transfer;
 struct spi_flash_read_message;
 
@@ -84,7 +84,7 @@ struct spi_statistics {
 
 void spi_statistics_add_transfer_stats(struct spi_statistics *stats,
 				       struct spi_transfer *xfer,
-				       struct spi_master *master);
+				       struct spi_controller *ctlr);
 
 #define SPI_STATISTICS_ADD_TO_FIELD(stats, field, count)	\
 	do {							\
@@ -98,13 +98,14 @@ void spi_statistics_add_transfer_stats(struct spi_statistics *stats,
 	SPI_STATISTICS_ADD_TO_FIELD(stats, field, 1)
 
 /**
- * struct spi_device - Master side proxy for an SPI slave device
+ * struct spi_device - Controller side proxy for an SPI slave device
  * @dev: Driver model representation of the device.
- * @master: SPI controller used with the device.
+ * @controller: SPI controller used with the device.
+ * @master: Copy of controller, for backwards compatibility.
  * @max_speed_hz: Maximum clock rate to be used with this chip
  *	(on this board); may be changed by the device's driver.
  *	The spi_transfer.speed_hz can override this for each transfer.
- * @chip_select: Chipselect, distinguishing chips handled by @master.
+ * @chip_select: Chipselect, distinguishing chips handled by @controller.
  * @mode: The spi mode defines how data is clocked out and in.
  *	This may be changed by the device's driver.
  *	The "active low" default for chipselect mode can be overridden
@@ -140,7 +141,8 @@ void spi_statistics_add_transfer_stats(struct spi_statistics *stats,
  */
 struct spi_device {
 	struct device		dev;
-	struct spi_master	*master;
+	struct spi_controller	*controller;
+	struct spi_controller	*master;	/* compatibility layer */
 	u32			max_speed_hz;
 	u8			chip_select;
 	u8			bits_per_word;
@@ -198,7 +200,7 @@ static inline void spi_dev_put(struct spi_device *spi)
 		put_device(&spi->dev);
 }
 
-/* ctldata is for the bus_master driver's runtime state */
+/* ctldata is for the bus_controller driver's runtime state */
 static inline void *spi_get_ctldata(struct spi_device *spi)
 {
 	return spi->controller_state;
@@ -292,9 +294,9 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv)
 			spi_unregister_driver)
 
 /**
- * struct spi_master - interface to SPI master controller
+ * struct spi_controller - interface to SPI master or slave controller
  * @dev: device interface to this driver
- * @list: link with the global spi_master list
+ * @list: link with the global spi_controller list
  * @bus_num: board-specific (and often SOC-specific) identifier for a
  *	given SPI controller.
  * @num_chipselect: chipselects are used to distinguish individual
@@ -327,8 +329,8 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv)
  *	the device whose settings are being modified.
  * @transfer: adds a message to the controller's transfer queue.
  * @cleanup: frees controller-specific state
- * @can_dma: determine whether this master supports DMA
- * @queued: whether this master is providing an internal message queue
+ * @can_dma: determine whether this controller supports DMA
+ * @queued: whether this controller is providing an internal message queue
  * @kworker: thread struct for message pump
  * @kworker_task: pointer to task for message pump kworker thread
  * @pump_messages: work struct for scheduling work to the message pump
@@ -384,7 +386,7 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv)
  * @cs_gpios: Array of GPIOs to use as chip select lines; one per CS
  *	number. Any individual value may be -ENOENT for CS lines that
  *	are not GPIOs (driven by the SPI controller itself).
- * @statistics: statistics for the spi_master
+ * @statistics: statistics for the spi_controller
  * @dma_tx: DMA transmit channel
  * @dma_rx: DMA receive channel
  * @dummy_rx: dummy receive buffer for full-duplex devices
@@ -393,7 +395,7 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv)
  *	what Linux expects, this optional hook can be used to translate
  *	between the two.
  *
- * Each SPI master controller can communicate with one or more @spi_device
+ * Each SPI controller can communicate with one or more @spi_device
  * children.  These make a small bus, sharing MOSI, MISO and SCK signals
  * but not chip select signals.  Each device may be configured to use a
  * different clock rate, since those shared signals are ignored unless
@@ -404,7 +406,7 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv)
  * an SPI slave device.  For each such message it queues, it calls the
  * message's completion function when the transaction completes.
  */
-struct spi_master {
+struct spi_controller {
 	struct device	dev;
 
 	struct list_head list;
@@ -442,12 +444,13 @@ struct spi_master {
 
 	/* other constraints relevant to this driver */
 	u16			flags;
-#define SPI_MASTER_HALF_DUPLEX	BIT(0)		/* can't do full duplex */
-#define SPI_MASTER_NO_RX	BIT(1)		/* can't do buffer read */
-#define SPI_MASTER_NO_TX	BIT(2)		/* can't do buffer write */
-#define SPI_MASTER_MUST_RX      BIT(3)		/* requires rx */
-#define SPI_MASTER_MUST_TX      BIT(4)		/* requires tx */
-#define SPI_MASTER_GPIO_SS      BIT(5)		/* GPIO CS must select slave */
+#define SPI_CONTROLLER_HALF_DUPLEX	BIT(0)	/* can't do full duplex */
+#define SPI_CONTROLLER_NO_RX		BIT(1)	/* can't do buffer read */
+#define SPI_CONTROLLER_NO_TX		BIT(2)	/* can't do buffer write */
+#define SPI_CONTROLLER_MUST_RX		BIT(3)	/* requires rx */
+#define SPI_CONTROLLER_MUST_TX		BIT(4)	/* requires tx */
+
+#define SPI_MASTER_GPIO_SS		BIT(5)	/* GPIO CS must select slave */
 
 	/* flag indicating this is an SPI slave controller */
 	bool			slave;
@@ -485,8 +488,8 @@ struct spi_master {
 	 *   any other request management
 	 * + To a given spi_device, message queueing is pure fifo
 	 *
-	 * + The master's main job is to process its message queue,
-	 *   selecting a chip then transferring data
+	 * + The controller's main job is to process its message queue,
+	 *   selecting a chip (for masters), then transferring data
 	 * + If there are multiple spi_device children, the i/o queue
 	 *   arbitration algorithm is unspecified (round robin, fifo,
 	 *   priority, reservations, preemption, etc)
@@ -499,7 +502,7 @@ struct spi_master {
 	int			(*transfer)(struct spi_device *spi,
 						struct spi_message *mesg);
 
-	/* called on release() to free memory provided by spi_master */
+	/* called on release() to free memory provided by spi_controller */
 	void			(*cleanup)(struct spi_device *spi);
 
 	/*
@@ -509,13 +512,13 @@ struct spi_master {
 	 * not modify or store xfer and dma_tx and dma_rx must be set
 	 * while the device is prepared.
 	 */
-	bool			(*can_dma)(struct spi_master *master,
+	bool			(*can_dma)(struct spi_controller *ctlr,
 					   struct spi_device *spi,
 					   struct spi_transfer *xfer);
 
 	/*
 	 * These hooks are for drivers that want to use the generic
-	 * master transfer queueing mechanism. If these are used, the
+	 * controller transfer queueing mechanism. If these are used, the
 	 * transfer() function above must NOT be specified by the driver.
 	 * Over time we expect SPI drivers to be phased over to this API.
 	 */
@@ -536,15 +539,15 @@ struct spi_master {
 	struct completion               xfer_completion;
 	size_t				max_dma_len;
 
-	int (*prepare_transfer_hardware)(struct spi_master *master);
-	int (*transfer_one_message)(struct spi_master *master,
+	int (*prepare_transfer_hardware)(struct spi_controller *ctlr);
+	int (*transfer_one_message)(struct spi_controller *ctlr,
 				    struct spi_message *mesg);
-	int (*unprepare_transfer_hardware)(struct spi_master *master);
-	int (*prepare_message)(struct spi_master *master,
+	int (*unprepare_transfer_hardware)(struct spi_controller *ctlr);
+	int (*prepare_message)(struct spi_controller *ctlr,
 			       struct spi_message *message);
-	int (*unprepare_message)(struct spi_master *master,
+	int (*unprepare_message)(struct spi_controller *ctlr,
 				 struct spi_message *message);
-	int (*slave_abort)(struct spi_master *spi);
+	int (*slave_abort)(struct spi_controller *ctlr);
 	int (*spi_flash_read)(struct  spi_device *spi,
 			      struct spi_flash_read_message *msg);
 	bool (*spi_flash_can_dma)(struct spi_device *spi,
@@ -556,9 +559,9 @@ struct spi_master {
 	 * of transfer_one_message() provied by the core.
 	 */
 	void (*set_cs)(struct spi_device *spi, bool enable);
-	int (*transfer_one)(struct spi_master *master, struct spi_device *spi,
+	int (*transfer_one)(struct spi_controller *ctlr, struct spi_device *spi,
 			    struct spi_transfer *transfer);
-	void (*handle_err)(struct spi_master *master,
+	void (*handle_err)(struct spi_controller *ctlr,
 			   struct spi_message *message);
 
 	/* gpio chip select */
@@ -575,58 +578,59 @@ struct spi_master {
 	void			*dummy_rx;
 	void			*dummy_tx;
 
-	int (*fw_translate_cs)(struct spi_master *master, unsigned cs);
+	int (*fw_translate_cs)(struct spi_controller *ctlr, unsigned cs);
 };
 
-static inline void *spi_master_get_devdata(struct spi_master *master)
+static inline void *spi_controller_get_devdata(struct spi_controller *ctlr)
 {
-	return dev_get_drvdata(&master->dev);
+	return dev_get_drvdata(&ctlr->dev);
 }
 
-static inline void spi_master_set_devdata(struct spi_master *master, void *data)
+static inline void spi_controller_set_devdata(struct spi_controller *ctlr,
+					      void *data)
 {
-	dev_set_drvdata(&master->dev, data);
+	dev_set_drvdata(&ctlr->dev, data);
 }
 
-static inline struct spi_master *spi_master_get(struct spi_master *master)
+static inline struct spi_controller *spi_controller_get(struct spi_controller *ctlr)
 {
-	if (!master || !get_device(&master->dev))
+	if (!ctlr || !get_device(&ctlr->dev))
 		return NULL;
-	return master;
+	return ctlr;
 }
 
-static inline void spi_master_put(struct spi_master *master)
+static inline void spi_controller_put(struct spi_controller *ctlr)
 {
-	if (master)
-		put_device(&master->dev);
+	if (ctlr)
+		put_device(&ctlr->dev);
 }
 
-static inline bool spi_controller_is_slave(struct spi_master *ctlr)
+static inline bool spi_controller_is_slave(struct spi_controller *ctlr)
 {
 	return IS_ENABLED(CONFIG_SPI_SLAVE) && ctlr->slave;
 }
 
 /* PM calls that need to be issued by the driver */
-extern int spi_master_suspend(struct spi_master *master);
-extern int spi_master_resume(struct spi_master *master);
+extern int spi_controller_suspend(struct spi_controller *ctlr);
+extern int spi_controller_resume(struct spi_controller *ctlr);
 
 /* Calls the driver make to interact with the message queue */
-extern struct spi_message *spi_get_next_queued_message(struct spi_master *master);
-extern void spi_finalize_current_message(struct spi_master *master);
-extern void spi_finalize_current_transfer(struct spi_master *master);
+extern struct spi_message *spi_get_next_queued_message(struct spi_controller *ctlr);
+extern void spi_finalize_current_message(struct spi_controller *ctlr);
+extern void spi_finalize_current_transfer(struct spi_controller *ctlr);
 
-/* the spi driver core manages memory for the spi_master classdev */
-extern struct spi_master *__spi_alloc_controller(struct device *host,
-						 unsigned int size, bool slave);
+/* the spi driver core manages memory for the spi_controller classdev */
+extern struct spi_controller *__spi_alloc_controller(struct device *host,
+						unsigned int size, bool slave);
 
-static inline struct spi_master *spi_alloc_master(struct device *host,
-						  unsigned int size)
+static inline struct spi_controller *spi_alloc_master(struct device *host,
+						      unsigned int size)
 {
 	return __spi_alloc_controller(host, size, false);
 }
 
-static inline struct spi_master *spi_alloc_slave(struct device *host,
-						 unsigned int size)
+static inline struct spi_controller *spi_alloc_slave(struct device *host,
+						     unsigned int size)
 {
 	if (!IS_ENABLED(CONFIG_SPI_SLAVE))
 		return NULL;
@@ -634,18 +638,18 @@ static inline struct spi_master *spi_alloc_slave(struct device *host,
 	return __spi_alloc_controller(host, size, true);
 }
 
-extern int spi_register_master(struct spi_master *master);
-extern int devm_spi_register_master(struct device *dev,
-				    struct spi_master *master);
-extern void spi_unregister_master(struct spi_master *master);
+extern int spi_register_controller(struct spi_controller *ctlr);
+extern int devm_spi_register_controller(struct device *dev,
+					struct spi_controller *ctlr);
+extern void spi_unregister_controller(struct spi_controller *ctlr);
 
-extern struct spi_master *spi_busnum_to_master(u16 busnum);
+extern struct spi_controller *spi_busnum_to_master(u16 busnum);
 
 /*
  * SPI resource management while processing a SPI message
  */
 
-typedef void (*spi_res_release_t)(struct spi_master *master,
+typedef void (*spi_res_release_t)(struct spi_controller *ctlr,
 				  struct spi_message *msg,
 				  void *res);
 
@@ -670,7 +674,7 @@ extern void *spi_res_alloc(struct spi_device *spi,
 extern void spi_res_add(struct spi_message *message, void *res);
 extern void spi_res_free(void *res);
 
-extern void spi_res_release(struct spi_master *master,
+extern void spi_res_release(struct spi_controller *ctlr,
 			    struct spi_message *message);
 
 /*---------------------------------------------------------------------------*/
@@ -854,7 +858,7 @@ struct spi_message {
 
 	/* for optional use by whatever driver currently owns the
 	 * spi_message ...  between calls to spi_async and then later
-	 * complete(), that's the spi_master controller driver.
+	 * complete(), that's the spi_controller controller driver.
 	 */
 	struct list_head	queue;
 	void			*state;
@@ -943,21 +947,22 @@ extern int spi_slave_abort(struct spi_device *spi);
 static inline size_t
 spi_max_message_size(struct spi_device *spi)
 {
-	struct spi_master *master = spi->master;
-	if (!master->max_message_size)
+	struct spi_controller *ctlr = spi->controller;
+
+	if (!ctlr->max_message_size)
 		return SIZE_MAX;
-	return master->max_message_size(spi);
+	return ctlr->max_message_size(spi);
 }
 
 static inline size_t
 spi_max_transfer_size(struct spi_device *spi)
 {
-	struct spi_master *master = spi->master;
+	struct spi_controller *ctlr = spi->controller;
 	size_t tr_max = SIZE_MAX;
 	size_t msg_max = spi_max_message_size(spi);
 
-	if (master->max_transfer_size)
-		tr_max = master->max_transfer_size(spi);
+	if (ctlr->max_transfer_size)
+		tr_max = ctlr->max_transfer_size(spi);
 
 	/* transfer size limit must not be greater than messsage size limit */
 	return min(tr_max, msg_max);
@@ -968,7 +973,7 @@ spi_max_transfer_size(struct spi_device *spi)
 /* SPI transfer replacement methods which make use of spi_res */
 
 struct spi_replaced_transfers;
-typedef void (*spi_replaced_release_t)(struct spi_master *master,
+typedef void (*spi_replaced_release_t)(struct spi_controller *ctlr,
 				       struct spi_message *msg,
 				       struct spi_replaced_transfers *res);
 /**
@@ -1012,7 +1017,7 @@ extern struct spi_replaced_transfers *spi_replace_transfers(
 
 /* SPI transfer transformation methods */
 
-extern int spi_split_transfers_maxsize(struct spi_master *master,
+extern int spi_split_transfers_maxsize(struct spi_controller *ctlr,
 				       struct spi_message *msg,
 				       size_t maxsize,
 				       gfp_t gfp);
@@ -1026,8 +1031,8 @@ extern int spi_split_transfers_maxsize(struct spi_master *master,
 
 extern int spi_sync(struct spi_device *spi, struct spi_message *message);
 extern int spi_sync_locked(struct spi_device *spi, struct spi_message *message);
-extern int spi_bus_lock(struct spi_master *master);
-extern int spi_bus_unlock(struct spi_master *master);
+extern int spi_bus_lock(struct spi_controller *ctlr);
+extern int spi_bus_unlock(struct spi_controller *ctlr);
 
 /**
  * spi_sync_transfer - synchronous SPI data transfer
@@ -1212,9 +1217,9 @@ struct spi_flash_read_message {
 /* SPI core interface for flash read support */
 static inline bool spi_flash_read_supported(struct spi_device *spi)
 {
-	return spi->master->spi_flash_read &&
-	       (!spi->master->flash_read_supported ||
-	       spi->master->flash_read_supported(spi));
+	return spi->controller->spi_flash_read &&
+	       (!spi->controller->flash_read_supported ||
+	       spi->controller->flash_read_supported(spi));
 }
 
 int spi_flash_read(struct spi_device *spi,
@@ -1247,7 +1252,7 @@ int spi_flash_read(struct spi_device *spi,
  * @irq: Initializes spi_device.irq; depends on how the board is wired.
  * @max_speed_hz: Initializes spi_device.max_speed_hz; based on limits
  *	from the chip datasheet and board-specific signal quality issues.
- * @bus_num: Identifies which spi_master parents the spi_device; unused
+ * @bus_num: Identifies which spi_controller parents the spi_device; unused
  *	by spi_new_device(), and otherwise depends on board wiring.
  * @chip_select: Initializes spi_device.chip_select; depends on how
  *	the board is wired.
@@ -1288,7 +1293,7 @@ struct spi_board_info {
 
 
 	/* bus_num is board specific and matches the bus_num of some
-	 * spi_master that will probably be registered later.
+	 * spi_controller that will probably be registered later.
 	 *
 	 * chip_select reflects how this chip is wired to that master;
 	 * it's less than num_chipselect.
@@ -1322,7 +1327,7 @@ spi_register_board_info(struct spi_board_info const *info, unsigned n)
 /* If you're hotplugging an adapter with devices (parport, usb, etc)
  * use spi_new_device() to describe each device.  You can also call
  * spi_unregister_device() to start making that device vanish, but
- * normally that would be handled by spi_unregister_master().
+ * normally that would be handled by spi_unregister_controller().
  *
  * You can also use spi_alloc_device() and spi_add_device() to use a two
  * stage registration sequence for each spi_device.  This gives the caller
@@ -1331,13 +1336,13 @@ spi_register_board_info(struct spi_board_info const *info, unsigned n)
  * be defined using the board info.
  */
 extern struct spi_device *
-spi_alloc_device(struct spi_master *master);
+spi_alloc_device(struct spi_controller *ctlr);
 
 extern int
 spi_add_device(struct spi_device *spi);
 
 extern struct spi_device *
-spi_new_device(struct spi_master *, struct spi_board_info *);
+spi_new_device(struct spi_controller *, struct spi_board_info *);
 
 extern void spi_unregister_device(struct spi_device *spi);
 
@@ -1345,9 +1350,32 @@ extern const struct spi_device_id *
 spi_get_device_id(const struct spi_device *sdev);
 
 static inline bool
-spi_transfer_is_last(struct spi_master *master, struct spi_transfer *xfer)
+spi_transfer_is_last(struct spi_controller *ctlr, struct spi_transfer *xfer)
 {
-	return list_is_last(&xfer->transfer_list, &master->cur_msg->transfers);
+	return list_is_last(&xfer->transfer_list, &ctlr->cur_msg->transfers);
 }
 
+
+/* Compatibility layer */
+#define spi_master			spi_controller
+
+#define SPI_MASTER_HALF_DUPLEX		SPI_CONTROLLER_HALF_DUPLEX
+#define SPI_MASTER_NO_RX		SPI_CONTROLLER_NO_RX
+#define SPI_MASTER_NO_TX		SPI_CONTROLLER_NO_TX
+#define SPI_MASTER_MUST_RX		SPI_CONTROLLER_MUST_RX
+#define SPI_MASTER_MUST_TX		SPI_CONTROLLER_MUST_TX
+
+#define spi_master_get_devdata(_ctlr)	spi_controller_get_devdata(_ctlr)
+#define spi_master_set_devdata(_ctlr, _data)	\
+	spi_controller_set_devdata(_ctlr, _data)
+#define spi_master_get(_ctlr)		spi_controller_get(_ctlr)
+#define spi_master_put(_ctlr)		spi_controller_put(_ctlr)
+#define spi_master_suspend(_ctlr)	spi_controller_suspend(_ctlr)
+#define spi_master_resume(_ctlr)	spi_controller_resume(_ctlr)
+
+#define spi_register_master(_ctlr)	spi_register_controller(_ctlr)
+#define devm_spi_register_master(_dev, _ctlr) \
+	devm_spi_register_controller(_dev, _ctlr)
+#define spi_unregister_master(_ctlr)	spi_unregister_controller(_ctlr)
+
 #endif /* __LINUX_SPI_H */
diff --git a/include/trace/events/spi.h b/include/trace/events/spi.h
index 7e02c983bbe2..f9f702b6ae2e 100644
--- a/include/trace/events/spi.h
+++ b/include/trace/events/spi.h
@@ -7,37 +7,37 @@
 #include <linux/ktime.h>
 #include <linux/tracepoint.h>
 
-DECLARE_EVENT_CLASS(spi_master,
+DECLARE_EVENT_CLASS(spi_controller,
 
-	TP_PROTO(struct spi_master *master),
+	TP_PROTO(struct spi_controller *controller),
 
-	TP_ARGS(master),
+	TP_ARGS(controller),
 
 	TP_STRUCT__entry(
 		__field(        int,           bus_num             )
 	),
 
 	TP_fast_assign(
-		__entry->bus_num = master->bus_num;
+		__entry->bus_num = controller->bus_num;
 	),
 
 	TP_printk("spi%d", (int)__entry->bus_num)
 
 );
 
-DEFINE_EVENT(spi_master, spi_master_idle,
+DEFINE_EVENT(spi_controller, spi_controller_idle,
 
-	TP_PROTO(struct spi_master *master),
+	TP_PROTO(struct spi_controller *controller),
 
-	TP_ARGS(master)
+	TP_ARGS(controller)
 
 );
 
-DEFINE_EVENT(spi_master, spi_master_busy,
+DEFINE_EVENT(spi_controller, spi_controller_busy,
 
-	TP_PROTO(struct spi_master *master),
+	TP_PROTO(struct spi_controller *controller),
 
-	TP_ARGS(master)
+	TP_ARGS(controller)
 
 );
 
@@ -54,7 +54,7 @@ DECLARE_EVENT_CLASS(spi_message,
 	),
 
 	TP_fast_assign(
-		__entry->bus_num = msg->spi->master->bus_num;
+		__entry->bus_num = msg->spi->controller->bus_num;
 		__entry->chip_select = msg->spi->chip_select;
 		__entry->msg = msg;
 	),
@@ -95,7 +95,7 @@ TRACE_EVENT(spi_message_done,
 	),
 
 	TP_fast_assign(
-		__entry->bus_num = msg->spi->master->bus_num;
+		__entry->bus_num = msg->spi->controller->bus_num;
 		__entry->chip_select = msg->spi->chip_select;
 		__entry->msg = msg;
 		__entry->frame = msg->frame_length;
@@ -122,7 +122,7 @@ DECLARE_EVENT_CLASS(spi_transfer,
 	),
 
 	TP_fast_assign(
-		__entry->bus_num = msg->spi->master->bus_num;
+		__entry->bus_num = msg->spi->controller->bus_num;
 		__entry->chip_select = msg->spi->chip_select;
 		__entry->xfer = xfer;
 		__entry->len = xfer->len;
-- 
cgit v1.2.3


From 4798a714d6a78171d7df48c921dddd0dc004f0a0 Mon Sep 17 00:00:00 2001
From: Jon Mason <jon.mason@broadcom.com>
Date: Tue, 13 Jun 2017 10:56:08 -0400
Subject: of_mdio: move of_mdio_parse_addr to header file

The of_mdio_parse_addr() helper function is useful to other code, but
the module dependency chain causes issues.  To work around this, we can
move of_mdio_parse_addr() to be an inline function in the header file.
This gets rid of the dependencies and still allows for the reuse of
code.

Reported-by: Liviu Dudau <liviu@dudau.co.uk>
Signed-off-by: Jon Mason <jon.mason@broadcom.com>
Fixes: 342fa1964439 ("mdio: mux: make child bus walking more permissive and errors more verbose")
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/of/of_mdio.c    | 22 ----------------------
 include/linux/of_mdio.h | 24 +++++++++++++++++++++++-
 2 files changed, 23 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c
index 9596be9a49d0..e0dbd6e48a98 100644
--- a/drivers/of/of_mdio.c
+++ b/drivers/of/of_mdio.c
@@ -119,28 +119,6 @@ static void of_mdiobus_register_device(struct mii_bus *mdio,
 		child->name, addr);
 }
 
-int of_mdio_parse_addr(struct device *dev, const struct device_node *np)
-{
-	u32 addr;
-	int ret;
-
-	ret = of_property_read_u32(np, "reg", &addr);
-	if (ret < 0) {
-		dev_err(dev, "%s has invalid PHY address\n", np->full_name);
-		return ret;
-	}
-
-	/* A PHY must have a reg property in the range [0-31] */
-	if (addr >= PHY_MAX_ADDR) {
-		dev_err(dev, "%s PHY address %i is too large\n",
-			np->full_name, addr);
-		return -EINVAL;
-	}
-
-	return addr;
-}
-EXPORT_SYMBOL(of_mdio_parse_addr);
-
 /* The following is a list of PHY compatible strings which appear in
  * some DTBs. The compatible string is never matched against a PHY
  * driver, so is pointless. We only expect devices which are not PHYs
diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h
index ba35ba520487..f5db93bcd069 100644
--- a/include/linux/of_mdio.h
+++ b/include/linux/of_mdio.h
@@ -27,11 +27,33 @@ struct phy_device *of_phy_attach(struct net_device *dev,
 				 phy_interface_t iface);
 
 extern struct mii_bus *of_mdio_find_bus(struct device_node *mdio_np);
-extern int of_mdio_parse_addr(struct device *dev, const struct device_node *np);
 extern int of_phy_register_fixed_link(struct device_node *np);
 extern void of_phy_deregister_fixed_link(struct device_node *np);
 extern bool of_phy_is_fixed_link(struct device_node *np);
 
+
+static inline int of_mdio_parse_addr(struct device *dev,
+				     const struct device_node *np)
+{
+	u32 addr;
+	int ret;
+
+	ret = of_property_read_u32(np, "reg", &addr);
+	if (ret < 0) {
+		dev_err(dev, "%s has invalid PHY address\n", np->full_name);
+		return ret;
+	}
+
+	/* A PHY must have a reg property in the range [0-31] */
+	if (addr >= PHY_MAX_ADDR) {
+		dev_err(dev, "%s PHY address %i is too large\n",
+			np->full_name, addr);
+		return -EINVAL;
+	}
+
+	return addr;
+}
+
 #else /* CONFIG_OF_MDIO */
 static inline int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np)
 {
-- 
cgit v1.2.3


From 058fe49da3b6ab71b57effd49dcc5d007071eea5 Mon Sep 17 00:00:00 2001
From: Leilk Liu <leilk.liu@mediatek.com>
Date: Mon, 12 Jun 2017 09:24:39 +0800
Subject: spi: mediatek: adjust register to enhance time accuracy

this patch adjust register to enhance time accuracy.

Signed-off-by: Leilk Liu <leilk.liu@mediatek.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-mt65xx.c                 | 45 ++++++++++++++++++++++++++++----
 include/linux/platform_data/spi-mt65xx.h |  2 ++
 2 files changed, 42 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/spi/spi-mt65xx.c b/drivers/spi/spi-mt65xx.c
index 278867a31950..eae73b58248b 100644
--- a/drivers/spi/spi-mt65xx.c
+++ b/drivers/spi/spi-mt65xx.c
@@ -35,11 +35,15 @@
 #define SPI_CMD_REG                       0x0018
 #define SPI_STATUS0_REG                   0x001c
 #define SPI_PAD_SEL_REG                   0x0024
+#define SPI_CFG2_REG                      0x0028
 
 #define SPI_CFG0_SCK_HIGH_OFFSET          0
 #define SPI_CFG0_SCK_LOW_OFFSET           8
 #define SPI_CFG0_CS_HOLD_OFFSET           16
 #define SPI_CFG0_CS_SETUP_OFFSET          24
+#define SPI_ADJUST_CFG0_SCK_LOW_OFFSET    16
+#define SPI_ADJUST_CFG0_CS_HOLD_OFFSET    0
+#define SPI_ADJUST_CFG0_CS_SETUP_OFFSET   16
 
 #define SPI_CFG1_CS_IDLE_OFFSET           0
 #define SPI_CFG1_PACKET_LOOP_OFFSET       8
@@ -55,6 +59,8 @@
 #define SPI_CMD_RST                  BIT(2)
 #define SPI_CMD_PAUSE_EN             BIT(4)
 #define SPI_CMD_DEASSERT             BIT(5)
+#define SPI_CMD_SAMPLE_SEL           BIT(6)
+#define SPI_CMD_CS_POL               BIT(7)
 #define SPI_CMD_CPHA                 BIT(8)
 #define SPI_CMD_CPOL                 BIT(9)
 #define SPI_CMD_RX_DMA               BIT(10)
@@ -80,6 +86,8 @@ struct mtk_spi_compatible {
 	bool need_pad_sel;
 	/* Must explicitly send dummy Tx bytes to do Rx only transfer */
 	bool must_tx;
+	/* some IC design adjust cfg register to enhance time accuracy */
+	bool enhance_timing;
 };
 
 struct mtk_spi {
@@ -108,6 +116,8 @@ static const struct mtk_spi_compatible mt8173_compat = {
 static const struct mtk_chip_config mtk_default_chip_info = {
 	.rx_mlsb = 1,
 	.tx_mlsb = 1,
+	.cs_pol = 0,
+	.sample_sel = 0,
 };
 
 static const struct of_device_id mtk_spi_of_match[] = {
@@ -182,6 +192,17 @@ static int mtk_spi_prepare_message(struct spi_master *master,
 	reg_val |= SPI_CMD_RX_ENDIAN;
 #endif
 
+	if (mdata->dev_comp->enhance_timing) {
+		if (chip_config->cs_pol)
+			reg_val |= SPI_CMD_CS_POL;
+		else
+			reg_val &= ~SPI_CMD_CS_POL;
+		if (chip_config->sample_sel)
+			reg_val |= SPI_CMD_SAMPLE_SEL;
+		else
+			reg_val &= ~SPI_CMD_SAMPLE_SEL;
+	}
+
 	/* set finish and pause interrupt always enable */
 	reg_val |= SPI_CMD_FINISH_IE | SPI_CMD_PAUSE_IE;
 
@@ -233,11 +254,25 @@ static void mtk_spi_prepare_transfer(struct spi_master *master,
 	sck_time = (div + 1) / 2;
 	cs_time = sck_time * 2;
 
-	reg_val |= (((sck_time - 1) & 0xff) << SPI_CFG0_SCK_HIGH_OFFSET);
-	reg_val |= (((sck_time - 1) & 0xff) << SPI_CFG0_SCK_LOW_OFFSET);
-	reg_val |= (((cs_time - 1) & 0xff) << SPI_CFG0_CS_HOLD_OFFSET);
-	reg_val |= (((cs_time - 1) & 0xff) << SPI_CFG0_CS_SETUP_OFFSET);
-	writel(reg_val, mdata->base + SPI_CFG0_REG);
+	if (mdata->dev_comp->enhance_timing) {
+		reg_val |= (((sck_time - 1) & 0xffff)
+			   << SPI_CFG0_SCK_HIGH_OFFSET);
+		reg_val |= (((sck_time - 1) & 0xffff)
+			   << SPI_ADJUST_CFG0_SCK_LOW_OFFSET);
+		writel(reg_val, mdata->base + SPI_CFG2_REG);
+		reg_val |= (((cs_time - 1) & 0xffff)
+			   << SPI_ADJUST_CFG0_CS_HOLD_OFFSET);
+		reg_val |= (((cs_time - 1) & 0xffff)
+			   << SPI_ADJUST_CFG0_CS_SETUP_OFFSET);
+		writel(reg_val, mdata->base + SPI_CFG0_REG);
+	} else {
+		reg_val |= (((sck_time - 1) & 0xff)
+			   << SPI_CFG0_SCK_HIGH_OFFSET);
+		reg_val |= (((sck_time - 1) & 0xff) << SPI_CFG0_SCK_LOW_OFFSET);
+		reg_val |= (((cs_time - 1) & 0xff) << SPI_CFG0_CS_HOLD_OFFSET);
+		reg_val |= (((cs_time - 1) & 0xff) << SPI_CFG0_CS_SETUP_OFFSET);
+		writel(reg_val, mdata->base + SPI_CFG0_REG);
+	}
 
 	reg_val = readl(mdata->base + SPI_CFG1_REG);
 	reg_val &= ~SPI_CFG1_CS_IDLE_MASK;
diff --git a/include/linux/platform_data/spi-mt65xx.h b/include/linux/platform_data/spi-mt65xx.h
index 54b04483976c..ba4e4bb70262 100644
--- a/include/linux/platform_data/spi-mt65xx.h
+++ b/include/linux/platform_data/spi-mt65xx.h
@@ -16,5 +16,7 @@
 struct mtk_chip_config {
 	u32 tx_mlsb;
 	u32 rx_mlsb;
+	u32 cs_pol;
+	u32 sample_sel;
 };
 #endif
-- 
cgit v1.2.3


From 00f4b652b6f1dbfd4e1d5419d7f1cc23b1374da8 Mon Sep 17 00:00:00 2001
From: Jeremy Linton <jeremy.linton@arm.com>
Date: Wed, 31 May 2017 16:56:43 -0500
Subject: trace: rename trace_enum_map to trace_eval_map

Each enum is loaded into the trace_enum_map, as we
are now using this for more than enums rename it.

Link: http://lkml.kernel.org/r/20170531215653.3240-3-jeremy.linton@arm.com

Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/module.h       |  2 +-
 include/linux/tracepoint.h   |  6 +++---
 include/trace/trace_events.h |  8 ++++----
 kernel/trace/trace.c         | 24 ++++++++++++------------
 kernel/trace/trace.h         |  4 ++--
 kernel/trace/trace_events.c  | 14 +++++++-------
 6 files changed, 29 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/module.h b/include/linux/module.h
index 21f56393602f..46b48043d741 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -442,7 +442,7 @@ struct module {
 #ifdef CONFIG_EVENT_TRACING
 	struct trace_event_call **trace_events;
 	unsigned int num_trace_events;
-	struct trace_enum_map **trace_enums;
+	struct trace_eval_map **trace_enums;
 	unsigned int num_trace_enums;
 #endif
 #ifdef CONFIG_FTRACE_MCOUNT_RECORD
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index cc48cb2ce209..f7b0f5525e46 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -25,10 +25,10 @@ struct module;
 struct tracepoint;
 struct notifier_block;
 
-struct trace_enum_map {
+struct trace_eval_map {
 	const char		*system;
-	const char		*enum_string;
-	unsigned long		enum_value;
+	const char		*eval_string;
+	unsigned long		eval_value;
 };
 
 #define TRACEPOINT_DEFAULT_PRIO	10
diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h
index 4bdd84023f5b..49cce5fb54ee 100644
--- a/include/trace/trace_events.h
+++ b/include/trace/trace_events.h
@@ -35,14 +35,14 @@ TRACE_MAKE_SYSTEM_STR();
 
 #undef TRACE_DEFINE_ENUM
 #define TRACE_DEFINE_ENUM(a)				\
-	static struct trace_enum_map __used __initdata	\
+	static struct trace_eval_map __used __initdata	\
 	__##TRACE_SYSTEM##_##a =			\
 	{						\
 		.system = TRACE_SYSTEM_STRING,		\
-		.enum_string = #a,			\
-		.enum_value = a				\
+		.eval_string = #a,			\
+		.eval_value = a				\
 	};						\
-	static struct trace_enum_map __used		\
+	static struct trace_eval_map __used		\
 	__attribute__((section("_ftrace_eval_map")))	\
 	*TRACE_SYSTEM##_##a = &__##TRACE_SYSTEM##_##a
 
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index acd3eb4d56a0..46fac3f63af1 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -132,7 +132,7 @@ union trace_enum_map_item;
 struct trace_enum_map_tail {
 	/*
 	 * "end" is first and points to NULL as it must be different
-	 * than "mod" or "enum_string"
+	 * than "mod" or "eval_string"
 	 */
 	union trace_enum_map_item	*next;
 	const char			*end;	/* points to NULL */
@@ -148,7 +148,7 @@ static DEFINE_MUTEX(trace_enum_mutex);
  * pointer to the next array of saved enum_map items.
  */
 union trace_enum_map_item {
-	struct trace_enum_map		map;
+	struct trace_eval_map		map;
 	struct trace_enum_map_head	head;
 	struct trace_enum_map_tail	tail;
 };
@@ -4748,7 +4748,7 @@ static const struct file_operations tracing_saved_cmdlines_size_fops = {
 static union trace_enum_map_item *
 update_enum_map(union trace_enum_map_item *ptr)
 {
-	if (!ptr->map.enum_string) {
+	if (!ptr->map.eval_string) {
 		if (ptr->tail.next) {
 			ptr = ptr->tail.next;
 			/* Set ptr to the next real item (skip head) */
@@ -4808,7 +4808,7 @@ static int enum_map_show(struct seq_file *m, void *v)
 	union trace_enum_map_item *ptr = v;
 
 	seq_printf(m, "%s %ld (%s)\n",
-		   ptr->map.enum_string, ptr->map.enum_value,
+		   ptr->map.eval_string, ptr->map.eval_value,
 		   ptr->map.system);
 
 	return 0;
@@ -4844,11 +4844,11 @@ trace_enum_jmp_to_tail(union trace_enum_map_item *ptr)
 }
 
 static void
-trace_insert_enum_map_file(struct module *mod, struct trace_enum_map **start,
+trace_insert_enum_map_file(struct module *mod, struct trace_eval_map **start,
 			   int len)
 {
-	struct trace_enum_map **stop;
-	struct trace_enum_map **map;
+	struct trace_eval_map **stop;
+	struct trace_eval_map **map;
 	union trace_enum_map_item *map_array;
 	union trace_enum_map_item *ptr;
 
@@ -4902,13 +4902,13 @@ static void trace_create_enum_file(struct dentry *d_tracer)
 #else /* CONFIG_TRACE_ENUM_MAP_FILE */
 static inline void trace_create_enum_file(struct dentry *d_tracer) { }
 static inline void trace_insert_enum_map_file(struct module *mod,
-			      struct trace_enum_map **start, int len) { }
+			      struct trace_eval_map **start, int len) { }
 #endif /* !CONFIG_TRACE_ENUM_MAP_FILE */
 
 static void trace_insert_enum_map(struct module *mod,
-				  struct trace_enum_map **start, int len)
+				  struct trace_eval_map **start, int len)
 {
-	struct trace_enum_map **map;
+	struct trace_eval_map **map;
 
 	if (len <= 0)
 		return;
@@ -7732,8 +7732,8 @@ struct dentry *tracing_init_dentry(void)
 	return NULL;
 }
 
-extern struct trace_enum_map *__start_ftrace_eval_maps[];
-extern struct trace_enum_map *__stop_ftrace_eval_maps[];
+extern struct trace_eval_map *__start_ftrace_eval_maps[];
+extern struct trace_eval_map *__stop_ftrace_eval_maps[];
 
 static void __init trace_enum_init(void)
 {
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 39fd77330aab..a9667297ae49 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -1773,10 +1773,10 @@ static inline const char *get_syscall_name(int syscall)
 
 #ifdef CONFIG_EVENT_TRACING
 void trace_event_init(void);
-void trace_event_enum_update(struct trace_enum_map **map, int len);
+void trace_event_enum_update(struct trace_eval_map **map, int len);
 #else
 static inline void __init trace_event_init(void) { }
-static inline void trace_event_enum_update(struct trace_enum_map **map, int len) { }
+static inline void trace_event_enum_update(struct trace_eval_map **map, int len) { }
 #endif
 
 extern struct trace_iterator *tracepoint_print_iter;
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index e7973e10398c..cf5b9aa4d732 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -2067,18 +2067,18 @@ __register_event(struct trace_event_call *call, struct module *mod)
 	return 0;
 }
 
-static char *enum_replace(char *ptr, struct trace_enum_map *map, int len)
+static char *enum_replace(char *ptr, struct trace_eval_map *map, int len)
 {
 	int rlen;
 	int elen;
 
 	/* Find the length of the enum value as a string */
-	elen = snprintf(ptr, 0, "%ld", map->enum_value);
+	elen = snprintf(ptr, 0, "%ld", map->eval_value);
 	/* Make sure there's enough room to replace the string with the value */
 	if (len < elen)
 		return NULL;
 
-	snprintf(ptr, elen + 1, "%ld", map->enum_value);
+	snprintf(ptr, elen + 1, "%ld", map->eval_value);
 
 	/* Get the rest of the string of ptr */
 	rlen = strlen(ptr + len);
@@ -2090,11 +2090,11 @@ static char *enum_replace(char *ptr, struct trace_enum_map *map, int len)
 }
 
 static void update_event_printk(struct trace_event_call *call,
-				struct trace_enum_map *map)
+				struct trace_eval_map *map)
 {
 	char *ptr;
 	int quote = 0;
-	int len = strlen(map->enum_string);
+	int len = strlen(map->eval_string);
 
 	for (ptr = call->print_fmt; *ptr; ptr++) {
 		if (*ptr == '\\') {
@@ -2125,7 +2125,7 @@ static void update_event_printk(struct trace_event_call *call,
 			continue;
 		}
 		if (isalpha(*ptr) || *ptr == '_') {
-			if (strncmp(map->enum_string, ptr, len) == 0 &&
+			if (strncmp(map->eval_string, ptr, len) == 0 &&
 			    !isalnum(ptr[len]) && ptr[len] != '_') {
 				ptr = enum_replace(ptr, map, len);
 				/* Hmm, enum string smaller than value */
@@ -2165,7 +2165,7 @@ static void update_event_printk(struct trace_event_call *call,
 	}
 }
 
-void trace_event_enum_update(struct trace_enum_map **map, int len)
+void trace_event_enum_update(struct trace_eval_map **map, int len)
 {
 	struct trace_event_call *call, *p;
 	const char *last_system = NULL;
-- 
cgit v1.2.3


From 99be647c5841d570a23b5dfa65bfecada8b6e6b5 Mon Sep 17 00:00:00 2001
From: Jeremy Linton <jeremy.linton@arm.com>
Date: Wed, 31 May 2017 16:56:44 -0500
Subject: trace: rename struct module entry for trace enums

Each module has a list of enum's its contributing to the
enum map, rename that entry to reflect its use by more than
enums.

Link: http://lkml.kernel.org/r/20170531215653.3240-4-jeremy.linton@arm.com

Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/module.h | 4 ++--
 kernel/module.c        | 6 +++---
 kernel/trace/trace.c   | 6 +++---
 3 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/module.h b/include/linux/module.h
index 46b48043d741..8eb9a1e693e5 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -442,8 +442,8 @@ struct module {
 #ifdef CONFIG_EVENT_TRACING
 	struct trace_event_call **trace_events;
 	unsigned int num_trace_events;
-	struct trace_eval_map **trace_enums;
-	unsigned int num_trace_enums;
+	struct trace_eval_map **trace_evals;
+	unsigned int num_trace_evals;
 #endif
 #ifdef CONFIG_FTRACE_MCOUNT_RECORD
 	unsigned int num_ftrace_callsites;
diff --git a/kernel/module.c b/kernel/module.c
index 9ec4713c5eee..df1c4a9e7abb 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -3077,9 +3077,9 @@ static int find_module_sections(struct module *mod, struct load_info *info)
 	mod->trace_events = section_objs(info, "_ftrace_events",
 					 sizeof(*mod->trace_events),
 					 &mod->num_trace_events);
-	mod->trace_enums = section_objs(info, "_ftrace_eval_map",
-					sizeof(*mod->trace_enums),
-					&mod->num_trace_enums);
+	mod->trace_evals = section_objs(info, "_ftrace_eval_map",
+					sizeof(*mod->trace_evals),
+					&mod->num_trace_evals);
 #endif
 #ifdef CONFIG_TRACING
 	mod->trace_bprintk_fmt_start = section_objs(info, "__trace_printk_fmt",
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 46fac3f63af1..061abd8ba101 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -7746,7 +7746,7 @@ static void __init trace_enum_init(void)
 #ifdef CONFIG_MODULES
 static void trace_module_add_enums(struct module *mod)
 {
-	if (!mod->num_trace_enums)
+	if (!mod->num_trace_evals)
 		return;
 
 	/*
@@ -7756,7 +7756,7 @@ static void trace_module_add_enums(struct module *mod)
 	if (trace_module_has_bad_taint(mod))
 		return;
 
-	trace_insert_enum_map(mod, mod->trace_enums, mod->num_trace_enums);
+	trace_insert_enum_map(mod, mod->trace_evals, mod->num_trace_evals);
 }
 
 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
@@ -7765,7 +7765,7 @@ static void trace_module_remove_enums(struct module *mod)
 	union trace_enum_map_item *map;
 	union trace_enum_map_item **last = &trace_enum_maps;
 
-	if (!mod->num_trace_enums)
+	if (!mod->num_trace_evals)
 		return;
 
 	mutex_lock(&trace_enum_mutex);
-- 
cgit v1.2.3


From 4f0dfd76e9cc9296d74d6d5f579a5c7ca3bed869 Mon Sep 17 00:00:00 2001
From: Jeremy Linton <jeremy.linton@arm.com>
Date: Wed, 31 May 2017 16:56:50 -0500
Subject: tracing: define TRACE_DEFINE_SIZEOF() macro to map sizeof's to their
 values

Perf has a problem that if sizeof() macros are used within TRACE_EVENT()
macro's they end up in userspace as "sizeof(kernel structure)" which
cannot properly be parsed. Add a macro which can forward this data
through the eval_map for userspace utilization.

Link: http://lkml.kernel.org/r/20170531215653.3240-10-jeremy.linton@arm.com

Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/tracepoint.h   |  1 +
 include/trace/trace_events.h | 16 ++++++++++++++++
 2 files changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index f7b0f5525e46..a26ffbe09e71 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -88,6 +88,7 @@ extern void syscall_unregfunc(void);
 #define PARAMS(args...) args
 
 #define TRACE_DEFINE_ENUM(x)
+#define TRACE_DEFINE_SIZEOF(x)
 
 #endif /* _LINUX_TRACEPOINT_H */
 
diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h
index 49cce5fb54ee..3976fa1f6e42 100644
--- a/include/trace/trace_events.h
+++ b/include/trace/trace_events.h
@@ -46,6 +46,19 @@ TRACE_MAKE_SYSTEM_STR();
 	__attribute__((section("_ftrace_eval_map")))	\
 	*TRACE_SYSTEM##_##a = &__##TRACE_SYSTEM##_##a
 
+#undef TRACE_DEFINE_SIZEOF
+#define TRACE_DEFINE_SIZEOF(a)				\
+	static struct trace_eval_map __used __initdata	\
+	__##TRACE_SYSTEM##_##a =			\
+	{						\
+		.system = TRACE_SYSTEM_STRING,		\
+		.eval_string = "sizeof(" #a ")",	\
+		.eval_value = sizeof(a)			\
+	};						\
+	static struct trace_eval_map __used		\
+	__attribute__((section("_ftrace_eval_map")))	\
+	*TRACE_SYSTEM##_##a = &__##TRACE_SYSTEM##_##a
+
 /*
  * DECLARE_EVENT_CLASS can be used to add a generic function
  * handlers for events. That is, if all events have the same
@@ -158,6 +171,9 @@ TRACE_MAKE_SYSTEM_STR();
 #undef TRACE_DEFINE_ENUM
 #define TRACE_DEFINE_ENUM(a)
 
+#undef TRACE_DEFINE_SIZEOF
+#define TRACE_DEFINE_SIZEOF(a)
+
 #undef __field
 #define __field(type, item)
 
-- 
cgit v1.2.3


From 192a82f9003fe8fabd6088aa646e829225a94c55 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ZenIV.linux.org.uk>
Date: Wed, 7 Jun 2017 09:42:28 +0100
Subject: hrtimer_nanosleep(): Pass rmtp in restart_block

Store the pointer to the timespec which gets updated with the remaining
time in the restart block and remove the function argument.

[ tglx: Added changelog ]

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20170607084241.28657-3-viro@ZenIV.linux.org.uk
---
 include/linux/hrtimer.h    |  1 -
 kernel/compat.c            |  6 +++---
 kernel/time/hrtimer.c      | 11 ++++++-----
 kernel/time/posix-stubs.c  |  5 ++++-
 kernel/time/posix-timers.c |  5 ++++-
 5 files changed, 17 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 8c5b10eb7265..b80c34f6fd4b 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -453,7 +453,6 @@ static inline u64 hrtimer_forward_now(struct hrtimer *timer,
 
 /* Precise sleep: */
 extern long hrtimer_nanosleep(struct timespec64 *rqtp,
-			      struct timespec __user *rmtp,
 			      const enum hrtimer_mode mode,
 			      const clockid_t clockid);
 extern long hrtimer_nanosleep_restart(struct restart_block *restart_block);
diff --git a/kernel/compat.c b/kernel/compat.c
index 933bcb31ae10..cc9ba9d29b47 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -253,9 +253,9 @@ COMPAT_SYSCALL_DEFINE2(nanosleep, struct compat_timespec __user *, rqtp,
 
 	oldfs = get_fs();
 	set_fs(KERNEL_DS);
-	ret = hrtimer_nanosleep(&tu64,
-				rmtp ? (struct timespec __user *)&rmt : NULL,
-				HRTIMER_MODE_REL, CLOCK_MONOTONIC);
+	current->restart_block.nanosleep.rmtp =
+				rmtp ? (struct timespec __user *)&rmt : NULL;
+	ret = hrtimer_nanosleep(&tu64, HRTIMER_MODE_REL, CLOCK_MONOTONIC);
 	set_fs(oldfs);
 
 	/*
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index ac053bb5296e..4ae777f159de 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1503,10 +1503,11 @@ out:
 	return ret;
 }
 
-long hrtimer_nanosleep(struct timespec64 *rqtp, struct timespec __user *rmtp,
+long hrtimer_nanosleep(struct timespec64 *rqtp,
 		       const enum hrtimer_mode mode, const clockid_t clockid)
 {
-	struct restart_block *restart;
+	struct restart_block *restart = &current->restart_block;
+	struct timespec __user *rmtp;
 	struct hrtimer_sleeper t;
 	int ret = 0;
 	u64 slack;
@@ -1526,16 +1527,15 @@ long hrtimer_nanosleep(struct timespec64 *rqtp, struct timespec __user *rmtp,
 		goto out;
 	}
 
+	rmtp = restart->nanosleep.rmtp;
 	if (rmtp) {
 		ret = update_rmtp(&t.timer, rmtp);
 		if (ret <= 0)
 			goto out;
 	}
 
-	restart = &current->restart_block;
 	restart->fn = hrtimer_nanosleep_restart;
 	restart->nanosleep.clockid = t.timer.base->clockid;
-	restart->nanosleep.rmtp = rmtp;
 	restart->nanosleep.expires = hrtimer_get_expires_tv64(&t.timer);
 
 	ret = -ERESTART_RESTARTBLOCK;
@@ -1557,7 +1557,8 @@ SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp,
 	if (!timespec64_valid(&tu64))
 		return -EINVAL;
 
-	return hrtimer_nanosleep(&tu64, rmtp, HRTIMER_MODE_REL, CLOCK_MONOTONIC);
+	current->restart_block.nanosleep.rmtp = rmtp;
+	return hrtimer_nanosleep(&tu64, HRTIMER_MODE_REL, CLOCK_MONOTONIC);
 }
 
 /*
diff --git a/kernel/time/posix-stubs.c b/kernel/time/posix-stubs.c
index c0cd53eb018a..156a5e6f3bd2 100644
--- a/kernel/time/posix-stubs.c
+++ b/kernel/time/posix-stubs.c
@@ -115,7 +115,10 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags,
 		t64 = timespec_to_timespec64(t);
 		if (!timespec64_valid(&t64))
 			return -EINVAL;
-		return hrtimer_nanosleep(&t64, rmtp, flags & TIMER_ABSTIME ?
+		if (flags & TIMER_ABSTIME)
+			rmtp = NULL;
+		current->restart_block.nanosleep.rmtp = rmtp;
+		return hrtimer_nanosleep(&t64, flags & TIMER_ABSTIME ?
 					 HRTIMER_MODE_ABS : HRTIMER_MODE_REL,
 					 which_clock);
 	default:
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 58c0f60b132f..1a9f59f8afc2 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -1043,7 +1043,10 @@ SYSCALL_DEFINE2(clock_getres, const clockid_t, which_clock,
 static int common_nsleep(const clockid_t which_clock, int flags,
 			 struct timespec64 *tsave, struct timespec __user *rmtp)
 {
-	return hrtimer_nanosleep(tsave, rmtp, flags & TIMER_ABSTIME ?
+	if (flags & TIMER_ABSTIME)
+		rmtp = NULL;
+	current->restart_block.nanosleep.rmtp = rmtp;
+	return hrtimer_nanosleep(tsave, flags & TIMER_ABSTIME ?
 				 HRTIMER_MODE_ABS : HRTIMER_MODE_REL,
 				 which_clock);
 }
-- 
cgit v1.2.3


From edbeda46322fbcb15af2d2d0f2daffb0cd349a5a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 7 Jun 2017 09:42:31 +0100
Subject: time/posix-timers: Move the compat copyouts to the nanosleep
 implementations

Turn restart_block.nanosleep.{rmtp,compat_rmtp} into a tagged union (kind =
1 -> native, kind = 2 -> compat, kind = 0 -> nothing) and make the places
doing actual copyout handle compat as well as native (that will become a
helper in the next commit).  Result: compat wrappers, messing with
reassignments, etc. are gone.

[ tglx: Folded in a variant of Peter Zijlstras enum patch ]

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20170607084241.28657-6-viro@ZenIV.linux.org.uk
---
 include/linux/posix-timers.h   |   2 -
 include/linux/restart_block.h  |  15 ++++-
 kernel/compat.c                | 131 -----------------------------------------
 kernel/time/alarmtimer.c       |  16 +++--
 kernel/time/hrtimer.c          |  42 +++++++++++--
 kernel/time/posix-cpu-timers.c |  20 +++++--
 kernel/time/posix-stubs.c      |  55 +++++++++++++----
 kernel/time/posix-timers.c     |  32 +++++++---
 8 files changed, 142 insertions(+), 171 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 667095dbcd37..29f1b7f09ced 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -110,8 +110,6 @@ void posix_cpu_timers_exit_group(struct task_struct *task);
 void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx,
 			   u64 *newval, u64 *oldval);
 
-long clock_nanosleep_restart(struct restart_block *restart_block);
-
 void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new);
 
 void posixtimer_rearm(struct siginfo *info);
diff --git a/include/linux/restart_block.h b/include/linux/restart_block.h
index 0d905d8ec553..19df8422606c 100644
--- a/include/linux/restart_block.h
+++ b/include/linux/restart_block.h
@@ -11,6 +11,14 @@ struct timespec;
 struct compat_timespec;
 struct pollfd;
 
+enum timespec_type {
+	TT_NONE		= 0,
+	TT_NATIVE	= 1,
+#ifdef CONFIG_COMPAT
+	TT_COMPAT	= 2,
+#endif
+};
+
 /*
  * System call restart block.
  */
@@ -29,10 +37,13 @@ struct restart_block {
 		/* For nanosleep */
 		struct {
 			clockid_t clockid;
-			struct timespec __user *rmtp;
+			enum timespec_type type;
+			union {
+				struct timespec __user *rmtp;
 #ifdef CONFIG_COMPAT
-			struct compat_timespec __user *compat_rmtp;
+				struct compat_timespec __user *compat_rmtp;
 #endif
+			};
 			u64 expires;
 		} nanosleep;
 		/* For poll */
diff --git a/kernel/compat.c b/kernel/compat.c
index cc9ba9d29b47..23afa26f574b 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -213,82 +213,6 @@ int compat_convert_timespec(struct timespec __user **kts,
 	return 0;
 }
 
-static long compat_nanosleep_restart(struct restart_block *restart)
-{
-	struct compat_timespec __user *rmtp;
-	struct timespec rmt;
-	mm_segment_t oldfs;
-	long ret;
-
-	restart->nanosleep.rmtp = (struct timespec __user *) &rmt;
-	oldfs = get_fs();
-	set_fs(KERNEL_DS);
-	ret = hrtimer_nanosleep_restart(restart);
-	set_fs(oldfs);
-
-	if (ret == -ERESTART_RESTARTBLOCK) {
-		rmtp = restart->nanosleep.compat_rmtp;
-
-		if (rmtp && compat_put_timespec(&rmt, rmtp))
-			return -EFAULT;
-	}
-
-	return ret;
-}
-
-COMPAT_SYSCALL_DEFINE2(nanosleep, struct compat_timespec __user *, rqtp,
-		       struct compat_timespec __user *, rmtp)
-{
-	struct timespec tu, rmt;
-	struct timespec64 tu64;
-	mm_segment_t oldfs;
-	long ret;
-
-	if (compat_get_timespec(&tu, rqtp))
-		return -EFAULT;
-
-	tu64 = timespec_to_timespec64(tu);
-	if (!timespec64_valid(&tu64))
-		return -EINVAL;
-
-	oldfs = get_fs();
-	set_fs(KERNEL_DS);
-	current->restart_block.nanosleep.rmtp =
-				rmtp ? (struct timespec __user *)&rmt : NULL;
-	ret = hrtimer_nanosleep(&tu64, HRTIMER_MODE_REL, CLOCK_MONOTONIC);
-	set_fs(oldfs);
-
-	/*
-	 * hrtimer_nanosleep() can only return 0 or
-	 * -ERESTART_RESTARTBLOCK here because:
-	 *
-	 * - we call it with HRTIMER_MODE_REL and therefor exclude the
-	 *   -ERESTARTNOHAND return path.
-	 *
-	 * - we supply the rmtp argument from the task stack (due to
-	 *   the necessary compat conversion. So the update cannot
-	 *   fail, which excludes the -EFAULT return path as well. If
-	 *   it fails nevertheless we have a bigger problem and wont
-	 *   reach this place anymore.
-	 *
-	 * - if the return value is 0, we do not have to update rmtp
-	 *    because there is no remaining time.
-	 *
-	 * We check for -ERESTART_RESTARTBLOCK nevertheless if the
-	 * core implementation decides to return random nonsense.
-	 */
-	if (ret == -ERESTART_RESTARTBLOCK) {
-		struct restart_block *restart = &current->restart_block;
-
-		restart->fn = compat_nanosleep_restart;
-		restart->nanosleep.compat_rmtp = rmtp;
-
-		if (rmtp && compat_put_timespec(&rmt, rmtp))
-			return -EFAULT;
-	}
-	return ret;
-}
-
 static inline long get_compat_itimerval(struct itimerval *o,
 		struct compat_itimerval __user *i)
 {
@@ -821,61 +745,6 @@ COMPAT_SYSCALL_DEFINE2(clock_getres, clockid_t, which_clock,
 	return err;
 }
 
-static long compat_clock_nanosleep_restart(struct restart_block *restart)
-{
-	long err;
-	mm_segment_t oldfs;
-	struct timespec tu;
-	struct compat_timespec __user *rmtp = restart->nanosleep.compat_rmtp;
-
-	restart->nanosleep.rmtp = (struct timespec __user *) &tu;
-	oldfs = get_fs();
-	set_fs(KERNEL_DS);
-	err = clock_nanosleep_restart(restart);
-	set_fs(oldfs);
-
-	if ((err == -ERESTART_RESTARTBLOCK) && rmtp &&
-	    compat_put_timespec(&tu, rmtp))
-		return -EFAULT;
-
-	if (err == -ERESTART_RESTARTBLOCK) {
-		restart->fn = compat_clock_nanosleep_restart;
-		restart->nanosleep.compat_rmtp = rmtp;
-	}
-	return err;
-}
-
-COMPAT_SYSCALL_DEFINE4(clock_nanosleep, clockid_t, which_clock, int, flags,
-		       struct compat_timespec __user *, rqtp,
-		       struct compat_timespec __user *, rmtp)
-{
-	long err;
-	mm_segment_t oldfs;
-	struct timespec in, out;
-	struct restart_block *restart;
-
-	if (compat_get_timespec(&in, rqtp))
-		return -EFAULT;
-
-	oldfs = get_fs();
-	set_fs(KERNEL_DS);
-	err = sys_clock_nanosleep(which_clock, flags,
-				  (struct timespec __user *) &in,
-				  (struct timespec __user *) &out);
-	set_fs(oldfs);
-
-	if ((err == -ERESTART_RESTARTBLOCK) && rmtp &&
-	    compat_put_timespec(&out, rmtp))
-		return -EFAULT;
-
-	if (err == -ERESTART_RESTARTBLOCK) {
-		restart = &current->restart_block;
-		restart->fn = compat_clock_nanosleep_restart;
-		restart->nanosleep.compat_rmtp = rmtp;
-	}
-	return err;
-}
-
 /*
  * We currently only need the following fields from the sigevent
  * structure: sigev_value, sigev_signo, sig_notify and (sometimes
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index d859a3601ddd..57bcf94ee132 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -27,6 +27,7 @@
 #include <linux/posix-timers.h>
 #include <linux/workqueue.h>
 #include <linux/freezer.h>
+#include <linux/compat.h>
 
 #include "posix-timers.h"
 
@@ -691,7 +692,7 @@ static enum alarmtimer_restart alarmtimer_nsleep_wakeup(struct alarm *alarm,
 static int alarmtimer_do_nsleep(struct alarm *alarm, ktime_t absexp,
 				enum alarmtimer_type type)
 {
-	struct timespec __user *rmtp;
+	struct restart_block *restart;
 	alarm->data = (void *)current;
 	do {
 		set_current_state(TASK_INTERRUPTIBLE);
@@ -709,8 +710,8 @@ static int alarmtimer_do_nsleep(struct alarm *alarm, ktime_t absexp,
 
 	if (freezing(current))
 		alarmtimer_freezerset(absexp, type);
-	rmtp = current->restart_block.nanosleep.rmtp;
-	if (rmtp) {
+	restart = &current->restart_block;
+	if (restart->nanosleep.type != TT_NONE) {
 		struct timespec rmt;
 		ktime_t rem;
 
@@ -720,7 +721,14 @@ static int alarmtimer_do_nsleep(struct alarm *alarm, ktime_t absexp,
 			return 0;
 		rmt = ktime_to_timespec(rem);
 
-		if (copy_to_user(rmtp, &rmt, sizeof(*rmtp)))
+#ifdef CONFIG_COMPAT
+		if (restart->nanosleep.type == TT_COMPAT) {
+			if (compat_put_timespec(&rmt,
+						restart->nanosleep.compat_rmtp))
+				return -EFAULT;
+		} else
+#endif
+		if (copy_to_user(restart->nanosleep.rmtp, &rmt, sizeof(rmt)))
 			return -EFAULT;
 	}
 	return -ERESTART_RESTARTBLOCK;
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index baa7b846b6e3..5370da8fc0a4 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -51,6 +51,7 @@
 #include <linux/sched/debug.h>
 #include <linux/timer.h>
 #include <linux/freezer.h>
+#include <linux/compat.h>
 
 #include <linux/uaccess.h>
 
@@ -1441,7 +1442,8 @@ EXPORT_SYMBOL_GPL(hrtimer_init_sleeper);
 
 static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode)
 {
-	struct timespec __user *rmtp;
+	struct restart_block *restart;
+
 	hrtimer_init_sleeper(t, current);
 
 	do {
@@ -1461,15 +1463,23 @@ static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mod
 	if (!t->task)
 		return 0;
 
-	rmtp = current->restart_block.nanosleep.rmtp;
-	if (rmtp) {
-		struct timespec rmt;
+	restart = &current->restart_block;
+	if (restart->nanosleep.type != TT_NONE) {
 		ktime_t rem = hrtimer_expires_remaining(&t->timer);
+		struct timespec rmt;
+
 		if (rem <= 0)
 			return 0;
 		rmt = ktime_to_timespec(rem);
 
-		if (copy_to_user(rmtp, &rmt, sizeof(*rmtp)))
+#ifdef CONFIG_COMPAT
+		if (restart->nanosleep.type == TT_COMPAT) {
+			if (compat_put_timespec(&rmt,
+						restart->nanosleep.compat_rmtp))
+				return -EFAULT;
+		} else
+#endif
+		if (copy_to_user(restart->nanosleep.rmtp, &rmt, sizeof(rmt)))
 			return -EFAULT;
 	}
 	return -ERESTART_RESTARTBLOCK;
@@ -1535,10 +1545,32 @@ SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp,
 	if (!timespec64_valid(&tu64))
 		return -EINVAL;
 
+	current->restart_block.nanosleep.type = rmtp ? TT_NATIVE : TT_NONE;
 	current->restart_block.nanosleep.rmtp = rmtp;
 	return hrtimer_nanosleep(&tu64, HRTIMER_MODE_REL, CLOCK_MONOTONIC);
 }
 
+#ifdef CONFIG_COMPAT
+
+COMPAT_SYSCALL_DEFINE2(nanosleep, struct compat_timespec __user *, rqtp,
+		       struct compat_timespec __user *, rmtp)
+{
+	struct timespec64 tu64;
+	struct timespec tu;
+
+	if (compat_get_timespec(&tu, rqtp))
+		return -EFAULT;
+
+	tu64 = timespec_to_timespec64(tu);
+	if (!timespec64_valid(&tu64))
+		return -EINVAL;
+
+	current->restart_block.nanosleep.type = rmtp ? TT_COMPAT : TT_NONE;
+	current->restart_block.nanosleep.compat_rmtp = rmtp;
+	return hrtimer_nanosleep(&tu64, HRTIMER_MODE_REL, CLOCK_MONOTONIC);
+}
+#endif
+
 /*
  * Functions related to boot-time initialization:
  */
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index ec6258c9cde5..1563ca22cf1f 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -12,6 +12,7 @@
 #include <trace/events/timer.h>
 #include <linux/tick.h>
 #include <linux/workqueue.h>
+#include <linux/compat.h>
 
 #include "posix-timers.h"
 
@@ -1243,10 +1244,9 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
 	timer.it_process = current;
 	if (!error) {
 		static struct itimerspec64 zero_it;
-		struct restart_block *restart = &current->restart_block;
-		struct timespec __user *rmtp;
+		struct restart_block *restart;
 
-		memset(&it, 0, sizeof it);
+		memset(&it, 0, sizeof(it));
 		it.it_value = *rqtp;
 
 		spin_lock_irq(&timer.it_lock);
@@ -1311,12 +1311,20 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
 		/*
 		 * Report back to the user the time still remaining.
 		 */
-		rmtp = restart->nanosleep.rmtp;
-		if (rmtp) {
+		restart = &current->restart_block;
+		if (restart->nanosleep.type != TT_NONE) {
 			struct timespec ts;
 
 			ts = timespec64_to_timespec(it.it_value);
-			if (copy_to_user(rmtp, &ts, sizeof(*rmtp)))
+#ifdef CONFIG_COMPAT
+			if (restart->nanosleep.type == TT_COMPAT) {
+				if (compat_put_timespec(&ts,
+						restart->nanosleep.compat_rmtp))
+					return -EFAULT;
+			} else
+#endif
+			if (copy_to_user(restart->nanosleep.rmtp, &ts,
+					sizeof(ts)))
 				return -EFAULT;
 		}
 		restart->nanosleep.expires = timespec64_to_ns(rqtp);
diff --git a/kernel/time/posix-stubs.c b/kernel/time/posix-stubs.c
index 156a5e6f3bd2..749b76f2d757 100644
--- a/kernel/time/posix-stubs.c
+++ b/kernel/time/posix-stubs.c
@@ -17,6 +17,7 @@
 #include <linux/ktime.h>
 #include <linux/timekeeping.h>
 #include <linux/posix-timers.h>
+#include <linux/compat.h>
 
 asmlinkage long sys_ni_posix_timers(void)
 {
@@ -110,25 +111,53 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags,
 	case CLOCK_REALTIME:
 	case CLOCK_MONOTONIC:
 	case CLOCK_BOOTTIME:
-		if (copy_from_user(&t, rqtp, sizeof (struct timespec)))
-			return -EFAULT;
-		t64 = timespec_to_timespec64(t);
-		if (!timespec64_valid(&t64))
-			return -EINVAL;
-		if (flags & TIMER_ABSTIME)
-			rmtp = NULL;
-		current->restart_block.nanosleep.rmtp = rmtp;
-		return hrtimer_nanosleep(&t64, flags & TIMER_ABSTIME ?
-					 HRTIMER_MODE_ABS : HRTIMER_MODE_REL,
-					 which_clock);
+		break;
 	default:
 		return -EINVAL;
 	}
+
+	if (copy_from_user(&t, rqtp, sizeof (struct timespec)))
+		return -EFAULT;
+	t64 = timespec_to_timespec64(t);
+	if (!timespec64_valid(&t64))
+		return -EINVAL;
+	if (flags & TIMER_ABSTIME)
+		rmtp = NULL;
+	current->restart_block.nanosleep.type = rmtp ? TT_NATIVE : TT_NONE;
+	current->restart_block.nanosleep.rmtp = rmtp;
+	return hrtimer_nanosleep(&t64, flags & TIMER_ABSTIME ?
+				 HRTIMER_MODE_ABS : HRTIMER_MODE_REL,
+				 which_clock);
 }
 
 #ifdef CONFIG_COMPAT
-long clock_nanosleep_restart(struct restart_block *restart_block)
+COMPAT_SYSCALL_DEFINE4(clock_nanosleep, clockid_t, which_clock, int, flags,
+		       struct compat_timespec __user *, rqtp,
+		       struct compat_timespec __user *, rmtp)
 {
-	return hrtimer_nanosleep_restart(restart_block);
+	struct timespec64 t64;
+	struct timespec t;
+
+	switch (which_clock) {
+	case CLOCK_REALTIME:
+	case CLOCK_MONOTONIC:
+	case CLOCK_BOOTTIME:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (compat_get_timespec(&t, rqtp))
+		return -EFAULT;
+	t64 = timespec_to_timespec64(t);
+	if (!timespec64_valid(&t64))
+		return -EINVAL;
+	if (flags & TIMER_ABSTIME)
+		rmtp = NULL;
+	current->restart_block.nanosleep.type = rmtp ? TT_COMPAT : TT_NONE;
+	current->restart_block.nanosleep.compat_rmtp = rmtp;
+	return hrtimer_nanosleep(&t64, flags & TIMER_ABSTIME ?
+				 HRTIMER_MODE_ABS : HRTIMER_MODE_REL,
+				 which_clock);
 }
 #endif
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index a3e5c01b430e..bec86b6b9814 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -49,6 +49,7 @@
 #include <linux/workqueue.h>
 #include <linux/export.h>
 #include <linux/hashtable.h>
+#include <linux/compat.h>
 
 #include "timekeeping.h"
 #include "posix-timers.h"
@@ -1069,25 +1070,40 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags,
 		return -EINVAL;
 	if (flags & TIMER_ABSTIME)
 		rmtp = NULL;
+	current->restart_block.nanosleep.type = rmtp ? TT_NATIVE : TT_NONE;
 	current->restart_block.nanosleep.rmtp = rmtp;
 
 	return kc->nsleep(which_clock, flags, &t64);
 }
 
-/*
- * This will restart clock_nanosleep. This is required only by
- * compat_clock_nanosleep_restart for now.
- */
-long clock_nanosleep_restart(struct restart_block *restart_block)
+#ifdef CONFIG_COMPAT
+COMPAT_SYSCALL_DEFINE4(clock_nanosleep, clockid_t, which_clock, int, flags,
+		       struct compat_timespec __user *, rqtp,
+		       struct compat_timespec __user *, rmtp)
 {
-	clockid_t which_clock = restart_block->nanosleep.clockid;
 	const struct k_clock *kc = clockid_to_kclock(which_clock);
+	struct timespec64 t64;
+	struct timespec t;
 
-	if (WARN_ON_ONCE(!kc || !kc->nsleep_restart))
+	if (!kc)
 		return -EINVAL;
+	if (!kc->nsleep)
+		return -ENANOSLEEP_NOTSUP;
+
+	if (compat_get_timespec(&t, rqtp))
+		return -EFAULT;
 
-	return kc->nsleep_restart(restart_block);
+	t64 = timespec_to_timespec64(t);
+	if (!timespec64_valid(&t64))
+		return -EINVAL;
+	if (flags & TIMER_ABSTIME)
+		rmtp = NULL;
+	current->restart_block.nanosleep.type = rmtp ? TT_COMPAT : TT_NONE;
+	current->restart_block.nanosleep.compat_rmtp = rmtp;
+
+	return kc->nsleep(which_clock, flags, &t64);
 }
+#endif
 
 static const struct k_clock clock_realtime = {
 	.clock_getres		= posix_get_hrtimer_res,
-- 
cgit v1.2.3


From ce41aaf47af3d28c4c958e07675a3e0a51f09bd3 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 7 Jun 2017 09:42:32 +0100
Subject: hrtimers/posix-timers: Merge nanosleep timespec copyout logics into a
 new helper

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20170607084241.28657-7-viro@ZenIV.linux.org.uk
---
 include/linux/hrtimer.h        |  2 ++
 kernel/time/alarmtimer.c       | 10 +---------
 kernel/time/hrtimer.c          | 29 ++++++++++++++++++++---------
 kernel/time/posix-cpu-timers.c | 13 ++-----------
 4 files changed, 25 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index b80c34f6fd4b..38b968f3df4e 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -452,6 +452,8 @@ static inline u64 hrtimer_forward_now(struct hrtimer *timer,
 }
 
 /* Precise sleep: */
+
+extern int nanosleep_copyout(struct restart_block *, struct timespec *);
 extern long hrtimer_nanosleep(struct timespec64 *rqtp,
 			      const enum hrtimer_mode mode,
 			      const clockid_t clockid);
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 57bcf94ee132..7bed4e44f9bd 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -721,15 +721,7 @@ static int alarmtimer_do_nsleep(struct alarm *alarm, ktime_t absexp,
 			return 0;
 		rmt = ktime_to_timespec(rem);
 
-#ifdef CONFIG_COMPAT
-		if (restart->nanosleep.type == TT_COMPAT) {
-			if (compat_put_timespec(&rmt,
-						restart->nanosleep.compat_rmtp))
-				return -EFAULT;
-		} else
-#endif
-		if (copy_to_user(restart->nanosleep.rmtp, &rmt, sizeof(rmt)))
-			return -EFAULT;
+		return nanosleep_copyout(restart, &rmt);
 	}
 	return -ERESTART_RESTARTBLOCK;
 }
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 5370da8fc0a4..db2f5f7b4ba5 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1440,6 +1440,25 @@ void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)
 }
 EXPORT_SYMBOL_GPL(hrtimer_init_sleeper);
 
+int nanosleep_copyout(struct restart_block *restart, struct timespec *ts)
+{
+	switch(restart->nanosleep.type) {
+#ifdef CONFIG_COMPAT
+	case TT_COMPAT:
+		if (compat_put_timespec(ts, restart->nanosleep.compat_rmtp))
+			return -EFAULT;
+		break;
+#endif
+	case TT_NATIVE:
+		if (copy_to_user(restart->nanosleep.rmtp, ts, sizeof(struct timespec)))
+			return -EFAULT;
+		break;
+	default:
+		BUG();
+	}
+	return -ERESTART_RESTARTBLOCK;
+}
+
 static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode)
 {
 	struct restart_block *restart;
@@ -1472,15 +1491,7 @@ static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mod
 			return 0;
 		rmt = ktime_to_timespec(rem);
 
-#ifdef CONFIG_COMPAT
-		if (restart->nanosleep.type == TT_COMPAT) {
-			if (compat_put_timespec(&rmt,
-						restart->nanosleep.compat_rmtp))
-				return -EFAULT;
-		} else
-#endif
-		if (copy_to_user(restart->nanosleep.rmtp, &rmt, sizeof(rmt)))
-			return -EFAULT;
+		return nanosleep_copyout(restart, &rmt);
 	}
 	return -ERESTART_RESTARTBLOCK;
 }
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 1563ca22cf1f..993a924d1399 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -1312,22 +1312,13 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
 		 * Report back to the user the time still remaining.
 		 */
 		restart = &current->restart_block;
+		restart->nanosleep.expires = timespec64_to_ns(rqtp);
 		if (restart->nanosleep.type != TT_NONE) {
 			struct timespec ts;
 
 			ts = timespec64_to_timespec(it.it_value);
-#ifdef CONFIG_COMPAT
-			if (restart->nanosleep.type == TT_COMPAT) {
-				if (compat_put_timespec(&ts,
-						restart->nanosleep.compat_rmtp))
-					return -EFAULT;
-			} else
-#endif
-			if (copy_to_user(restart->nanosleep.rmtp, &ts,
-					sizeof(ts)))
-				return -EFAULT;
+			error = nanosleep_copyout(restart, &ts);
 		}
-		restart->nanosleep.expires = timespec64_to_ns(rqtp);
 	}
 
 	return error;
-- 
cgit v1.2.3


From fb923c4a3c2ee735755d4a93522150fc35d0ecbd Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 7 Jun 2017 09:42:33 +0100
Subject: posix-timers: Kill ->nsleep_restart()

No more users.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20170607084241.28657-8-viro@ZenIV.linux.org.uk
---
 include/linux/hrtimer.h        | 1 -
 kernel/time/hrtimer.c          | 2 +-
 kernel/time/posix-cpu-timers.c | 6 ------
 kernel/time/posix-timers.c     | 4 ----
 kernel/time/posix-timers.h     | 1 -
 5 files changed, 1 insertion(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 38b968f3df4e..d83b7ed1cb0e 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -457,7 +457,6 @@ extern int nanosleep_copyout(struct restart_block *, struct timespec *);
 extern long hrtimer_nanosleep(struct timespec64 *rqtp,
 			      const enum hrtimer_mode mode,
 			      const clockid_t clockid);
-extern long hrtimer_nanosleep_restart(struct restart_block *restart_block);
 
 extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
 				 struct task_struct *tsk);
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index db2f5f7b4ba5..45f83cc7c0c7 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1496,7 +1496,7 @@ static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mod
 	return -ERESTART_RESTARTBLOCK;
 }
 
-long __sched hrtimer_nanosleep_restart(struct restart_block *restart)
+static long __sched hrtimer_nanosleep_restart(struct restart_block *restart)
 {
 	struct hrtimer_sleeper t;
 	int ret;
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 993a924d1399..515148d4eeb1 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -1386,10 +1386,6 @@ static int process_cpu_nsleep(const clockid_t which_clock, int flags,
 {
 	return posix_cpu_nsleep(PROCESS_CLOCK, flags, rqtp);
 }
-static long process_cpu_nsleep_restart(struct restart_block *restart_block)
-{
-	return -EINVAL;
-}
 static int thread_cpu_clock_getres(const clockid_t which_clock,
 				   struct timespec64 *tp)
 {
@@ -1412,7 +1408,6 @@ const struct k_clock clock_posix_cpu = {
 	.clock_get	= posix_cpu_clock_get,
 	.timer_create	= posix_cpu_timer_create,
 	.nsleep		= posix_cpu_nsleep,
-	.nsleep_restart	= posix_cpu_nsleep_restart,
 	.timer_set	= posix_cpu_timer_set,
 	.timer_del	= posix_cpu_timer_del,
 	.timer_get	= posix_cpu_timer_get,
@@ -1424,7 +1419,6 @@ const struct k_clock clock_process = {
 	.clock_get	= process_cpu_clock_get,
 	.timer_create	= process_cpu_timer_create,
 	.nsleep		= process_cpu_nsleep,
-	.nsleep_restart	= process_cpu_nsleep_restart,
 };
 
 const struct k_clock clock_thread = {
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index bec86b6b9814..ea4a463436bf 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -1111,7 +1111,6 @@ static const struct k_clock clock_realtime = {
 	.clock_set		= posix_clock_realtime_set,
 	.clock_adj		= posix_clock_realtime_adj,
 	.nsleep			= common_nsleep,
-	.nsleep_restart		= hrtimer_nanosleep_restart,
 	.timer_create		= common_timer_create,
 	.timer_set		= common_timer_set,
 	.timer_get		= common_timer_get,
@@ -1127,7 +1126,6 @@ static const struct k_clock clock_monotonic = {
 	.clock_getres		= posix_get_hrtimer_res,
 	.clock_get		= posix_ktime_get_ts,
 	.nsleep			= common_nsleep,
-	.nsleep_restart		= hrtimer_nanosleep_restart,
 	.timer_create		= common_timer_create,
 	.timer_set		= common_timer_set,
 	.timer_get		= common_timer_get,
@@ -1158,7 +1156,6 @@ static const struct k_clock clock_tai = {
 	.clock_getres		= posix_get_hrtimer_res,
 	.clock_get		= posix_get_tai,
 	.nsleep			= common_nsleep,
-	.nsleep_restart		= hrtimer_nanosleep_restart,
 	.timer_create		= common_timer_create,
 	.timer_set		= common_timer_set,
 	.timer_get		= common_timer_get,
@@ -1174,7 +1171,6 @@ static const struct k_clock clock_boottime = {
 	.clock_getres		= posix_get_hrtimer_res,
 	.clock_get		= posix_get_boottime,
 	.nsleep			= common_nsleep,
-	.nsleep_restart		= hrtimer_nanosleep_restart,
 	.timer_create		= common_timer_create,
 	.timer_set		= common_timer_set,
 	.timer_get		= common_timer_get,
diff --git a/kernel/time/posix-timers.h b/kernel/time/posix-timers.h
index bfd9e15c6ce0..5e69bb85629f 100644
--- a/kernel/time/posix-timers.h
+++ b/kernel/time/posix-timers.h
@@ -11,7 +11,6 @@ struct k_clock {
 	int	(*timer_create)(struct k_itimer *timer);
 	int	(*nsleep)(const clockid_t which_clock, int flags,
 			  struct timespec64 *);
-	long	(*nsleep_restart)(struct restart_block *restart_block);
 	int	(*timer_set)(struct k_itimer *timr, int flags,
 			     struct itimerspec64 *new_setting,
 			     struct itimerspec64 *old_setting);
-- 
cgit v1.2.3


From 3a4d44b6162555070194e486ff6b3799a8d323a2 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 7 Jun 2017 09:42:34 +0100
Subject: ntp: Move adjtimex related compat syscalls to native counterparts

Get rid of set_fs() mess and sanitize compat_{get,put}_timex(),
while we are at it.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20170607084241.28657-9-viro@ZenIV.linux.org.uk
---
 include/linux/compat.h     |   4 ++
 kernel/compat.c            | 141 +++++++++++++++++----------------------------
 kernel/time/posix-stubs.c  |   2 +
 kernel/time/posix-timers.c |  27 +++++++++
 kernel/time/time.c         |  24 +++++++-
 5 files changed, 108 insertions(+), 90 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compat.h b/include/linux/compat.h
index 1c5f3152cbb5..ecb8dd261d36 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -128,6 +128,10 @@ struct compat_timex {
 	compat_int_t:32; compat_int_t:32; compat_int_t:32;
 };
 
+struct timex;
+int compat_get_timex(struct timex *, const struct compat_timex __user *);
+int compat_put_timex(struct compat_timex __user *, const struct timex *);
+
 #define _COMPAT_NSIG_WORDS	(_COMPAT_NSIG / _COMPAT_NSIG_BPW)
 
 typedef struct {
diff --git a/kernel/compat.c b/kernel/compat.c
index 23afa26f574b..97087b333543 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -30,60 +30,64 @@
 
 #include <linux/uaccess.h>
 
-static int compat_get_timex(struct timex *txc, struct compat_timex __user *utp)
+int compat_get_timex(struct timex *txc, const struct compat_timex __user *utp)
 {
-	memset(txc, 0, sizeof(struct timex));
-
-	if (!access_ok(VERIFY_READ, utp, sizeof(struct compat_timex)) ||
-			__get_user(txc->modes, &utp->modes) ||
-			__get_user(txc->offset, &utp->offset) ||
-			__get_user(txc->freq, &utp->freq) ||
-			__get_user(txc->maxerror, &utp->maxerror) ||
-			__get_user(txc->esterror, &utp->esterror) ||
-			__get_user(txc->status, &utp->status) ||
-			__get_user(txc->constant, &utp->constant) ||
-			__get_user(txc->precision, &utp->precision) ||
-			__get_user(txc->tolerance, &utp->tolerance) ||
-			__get_user(txc->time.tv_sec, &utp->time.tv_sec) ||
-			__get_user(txc->time.tv_usec, &utp->time.tv_usec) ||
-			__get_user(txc->tick, &utp->tick) ||
-			__get_user(txc->ppsfreq, &utp->ppsfreq) ||
-			__get_user(txc->jitter, &utp->jitter) ||
-			__get_user(txc->shift, &utp->shift) ||
-			__get_user(txc->stabil, &utp->stabil) ||
-			__get_user(txc->jitcnt, &utp->jitcnt) ||
-			__get_user(txc->calcnt, &utp->calcnt) ||
-			__get_user(txc->errcnt, &utp->errcnt) ||
-			__get_user(txc->stbcnt, &utp->stbcnt))
+	struct compat_timex tx32;
+
+	if (copy_from_user(&tx32, utp, sizeof(struct compat_timex)))
 		return -EFAULT;
 
+	txc->modes = tx32.modes;
+	txc->offset = tx32.offset;
+	txc->freq = tx32.freq;
+	txc->maxerror = tx32.maxerror;
+	txc->esterror = tx32.esterror;
+	txc->status = tx32.status;
+	txc->constant = tx32.constant;
+	txc->precision = tx32.precision;
+	txc->tolerance = tx32.tolerance;
+	txc->time.tv_sec = tx32.time.tv_sec;
+	txc->time.tv_usec = tx32.time.tv_usec;
+	txc->tick = tx32.tick;
+	txc->ppsfreq = tx32.ppsfreq;
+	txc->jitter = tx32.jitter;
+	txc->shift = tx32.shift;
+	txc->stabil = tx32.stabil;
+	txc->jitcnt = tx32.jitcnt;
+	txc->calcnt = tx32.calcnt;
+	txc->errcnt = tx32.errcnt;
+	txc->stbcnt = tx32.stbcnt;
+
 	return 0;
 }
 
-static int compat_put_timex(struct compat_timex __user *utp, struct timex *txc)
-{
-	if (!access_ok(VERIFY_WRITE, utp, sizeof(struct compat_timex)) ||
-			__put_user(txc->modes, &utp->modes) ||
-			__put_user(txc->offset, &utp->offset) ||
-			__put_user(txc->freq, &utp->freq) ||
-			__put_user(txc->maxerror, &utp->maxerror) ||
-			__put_user(txc->esterror, &utp->esterror) ||
-			__put_user(txc->status, &utp->status) ||
-			__put_user(txc->constant, &utp->constant) ||
-			__put_user(txc->precision, &utp->precision) ||
-			__put_user(txc->tolerance, &utp->tolerance) ||
-			__put_user(txc->time.tv_sec, &utp->time.tv_sec) ||
-			__put_user(txc->time.tv_usec, &utp->time.tv_usec) ||
-			__put_user(txc->tick, &utp->tick) ||
-			__put_user(txc->ppsfreq, &utp->ppsfreq) ||
-			__put_user(txc->jitter, &utp->jitter) ||
-			__put_user(txc->shift, &utp->shift) ||
-			__put_user(txc->stabil, &utp->stabil) ||
-			__put_user(txc->jitcnt, &utp->jitcnt) ||
-			__put_user(txc->calcnt, &utp->calcnt) ||
-			__put_user(txc->errcnt, &utp->errcnt) ||
-			__put_user(txc->stbcnt, &utp->stbcnt) ||
-			__put_user(txc->tai, &utp->tai))
+int compat_put_timex(struct compat_timex __user *utp, const struct timex *txc)
+{
+	struct compat_timex tx32;
+
+	memset(&tx32, 0, sizeof(struct compat_timex));
+	tx32.modes = txc->modes;
+	tx32.offset = txc->offset;
+	tx32.freq = txc->freq;
+	tx32.maxerror = txc->maxerror;
+	tx32.esterror = txc->esterror;
+	tx32.status = txc->status;
+	tx32.constant = txc->constant;
+	tx32.precision = txc->precision;
+	tx32.tolerance = txc->tolerance;
+	tx32.time.tv_sec = txc->time.tv_sec;
+	tx32.time.tv_usec = txc->time.tv_usec;
+	tx32.tick = txc->tick;
+	tx32.ppsfreq = txc->ppsfreq;
+	tx32.jitter = txc->jitter;
+	tx32.shift = txc->shift;
+	tx32.stabil = txc->stabil;
+	tx32.jitcnt = txc->jitcnt;
+	tx32.calcnt = txc->calcnt;
+	tx32.errcnt = txc->errcnt;
+	tx32.stbcnt = txc->stbcnt;
+	tx32.tai = txc->tai;
+	if (copy_to_user(utp, &tx32, sizeof(struct compat_timex)))
 		return -EFAULT;
 	return 0;
 }
@@ -705,29 +709,6 @@ COMPAT_SYSCALL_DEFINE2(clock_gettime, clockid_t, which_clock,
 	return err;
 }
 
-COMPAT_SYSCALL_DEFINE2(clock_adjtime, clockid_t, which_clock,
-		       struct compat_timex __user *, utp)
-{
-	struct timex txc;
-	mm_segment_t oldfs;
-	int err, ret;
-
-	err = compat_get_timex(&txc, utp);
-	if (err)
-		return err;
-
-	oldfs = get_fs();
-	set_fs(KERNEL_DS);
-	ret = sys_clock_adjtime(which_clock, (struct timex __user *) &txc);
-	set_fs(oldfs);
-
-	err = compat_put_timex(utp, &txc);
-	if (err)
-		return err;
-
-	return ret;
-}
-
 COMPAT_SYSCALL_DEFINE2(clock_getres, clockid_t, which_clock,
 		       struct compat_timespec __user *, tp)
 {
@@ -944,24 +925,6 @@ COMPAT_SYSCALL_DEFINE1(stime, compat_time_t __user *, tptr)
 
 #endif /* __ARCH_WANT_COMPAT_SYS_TIME */
 
-COMPAT_SYSCALL_DEFINE1(adjtimex, struct compat_timex __user *, utp)
-{
-	struct timex txc;
-	int err, ret;
-
-	err = compat_get_timex(&txc, utp);
-	if (err)
-		return err;
-
-	ret = do_adjtimex(&txc);
-
-	err = compat_put_timex(utp, &txc);
-	if (err)
-		return err;
-
-	return ret;
-}
-
 #ifdef CONFIG_NUMA
 COMPAT_SYSCALL_DEFINE6(move_pages, pid_t, pid, compat_ulong_t, nr_pages,
 		       compat_uptr_t __user *, pages32,
diff --git a/kernel/time/posix-stubs.c b/kernel/time/posix-stubs.c
index 749b76f2d757..954d1d8ff9e6 100644
--- a/kernel/time/posix-stubs.c
+++ b/kernel/time/posix-stubs.c
@@ -28,6 +28,7 @@ asmlinkage long sys_ni_posix_timers(void)
 }
 
 #define SYS_NI(name)  SYSCALL_ALIAS(sys_##name, sys_ni_posix_timers)
+#define COMPAT_SYS_NI(name)  SYSCALL_ALIAS(compat_sys_##name, sys_ni_posix_timers)
 
 SYS_NI(timer_create);
 SYS_NI(timer_gettime);
@@ -40,6 +41,7 @@ SYS_NI(setitimer);
 #ifdef __ARCH_WANT_SYS_ALARM
 SYS_NI(alarm);
 #endif
+COMPAT_SYS_NI(clock_adjtime);
 
 /*
  * We preserve minimal support for CLOCK_REALTIME and CLOCK_MONOTONIC
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index ea4a463436bf..b1b6d52d6425 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -1018,6 +1018,33 @@ SYSCALL_DEFINE2(clock_adjtime, const clockid_t, which_clock,
 	return err;
 }
 
+#ifdef CONFIG_COMPAT
+
+COMPAT_SYSCALL_DEFINE2(clock_adjtime, clockid_t, which_clock,
+		       struct compat_timex __user *, utp)
+{
+	const struct k_clock *kc = clockid_to_kclock(which_clock);
+	struct timex ktx;
+	int err;
+
+	if (!kc)
+		return -EINVAL;
+	if (!kc->clock_adj)
+		return -EOPNOTSUPP;
+
+	err = compat_get_timex(&ktx, utp);
+	if (err)
+		return err;
+
+	err = kc->clock_adj(which_clock, &ktx);
+
+	if (err >= 0)
+		err = compat_put_timex(utp, &ktx);
+
+	return err;
+}
+#endif
+
 SYSCALL_DEFINE2(clock_getres, const clockid_t, which_clock,
 		struct timespec __user *, tp)
 {
diff --git a/kernel/time/time.c b/kernel/time/time.c
index 49c73c6ed648..400662f16c5a 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -39,6 +39,7 @@
 #include <linux/ptrace.h>
 
 #include <linux/uaccess.h>
+#include <linux/compat.h>
 #include <asm/unistd.h>
 
 #include <generated/timeconst.h>
@@ -224,12 +225,33 @@ SYSCALL_DEFINE1(adjtimex, struct timex __user *, txc_p)
 	 * structure. But bear in mind that the structures
 	 * may change
 	 */
-	if(copy_from_user(&txc, txc_p, sizeof(struct timex)))
+	if (copy_from_user(&txc, txc_p, sizeof(struct timex)))
 		return -EFAULT;
 	ret = do_adjtimex(&txc);
 	return copy_to_user(txc_p, &txc, sizeof(struct timex)) ? -EFAULT : ret;
 }
 
+#ifdef CONFIG_COMPAT
+
+COMPAT_SYSCALL_DEFINE1(adjtimex, struct compat_timex __user *, utp)
+{
+	struct timex txc;
+	int err, ret;
+
+	err = compat_get_timex(&txc, utp);
+	if (err)
+		return err;
+
+	ret = do_adjtimex(&txc);
+
+	err = compat_put_timex(utp, &txc);
+	if (err)
+		return err;
+
+	return ret;
+}
+#endif
+
 /*
  * Convert jiffies to milliseconds and back.
  *
-- 
cgit v1.2.3


From 54ad9c46c262ce4a603dc7887e37956896a0211d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 7 Jun 2017 09:42:37 +0100
Subject: itimers: Move compat itimer syscalls to native ones

get rid of set_fs(), sanitize compat copyin/copyout.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20170607084241.28657-12-viro@ZenIV.linux.org.uk
---
 include/linux/compat.h    |  4 +++
 kernel/compat.c           | 69 +++++++++++------------------------------------
 kernel/time/itimer.c      | 38 ++++++++++++++++++++++++++
 kernel/time/posix-stubs.c |  2 ++
 4 files changed, 60 insertions(+), 53 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compat.h b/include/linux/compat.h
index ecb8dd261d36..425563c7647b 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -94,6 +94,10 @@ struct compat_itimerval {
 	struct compat_timeval	it_value;
 };
 
+struct itimerval;
+int get_compat_itimerval(struct itimerval *, const struct compat_itimerval __user *);
+int put_compat_itimerval(struct compat_itimerval __user *, const struct itimerval *);
+
 struct compat_tms {
 	compat_clock_t		tms_utime;
 	compat_clock_t		tms_stime;
diff --git a/kernel/compat.c b/kernel/compat.c
index 1fb8cf7e691e..c349417d2c40 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -217,65 +217,28 @@ int compat_convert_timespec(struct timespec __user **kts,
 	return 0;
 }
 
-static inline long get_compat_itimerval(struct itimerval *o,
-		struct compat_itimerval __user *i)
+int get_compat_itimerval(struct itimerval *o, const struct compat_itimerval __user *i)
 {
-	return (!access_ok(VERIFY_READ, i, sizeof(*i)) ||
-		(__get_user(o->it_interval.tv_sec, &i->it_interval.tv_sec) |
-		 __get_user(o->it_interval.tv_usec, &i->it_interval.tv_usec) |
-		 __get_user(o->it_value.tv_sec, &i->it_value.tv_sec) |
-		 __get_user(o->it_value.tv_usec, &i->it_value.tv_usec)));
-}
-
-static inline long put_compat_itimerval(struct compat_itimerval __user *o,
-		struct itimerval *i)
-{
-	return (!access_ok(VERIFY_WRITE, o, sizeof(*o)) ||
-		(__put_user(i->it_interval.tv_sec, &o->it_interval.tv_sec) |
-		 __put_user(i->it_interval.tv_usec, &o->it_interval.tv_usec) |
-		 __put_user(i->it_value.tv_sec, &o->it_value.tv_sec) |
-		 __put_user(i->it_value.tv_usec, &o->it_value.tv_usec)));
-}
-
-asmlinkage long sys_ni_posix_timers(void);
-
-COMPAT_SYSCALL_DEFINE2(getitimer, int, which,
-		struct compat_itimerval __user *, it)
-{
-	struct itimerval kit;
-	int error;
-
-	if (!IS_ENABLED(CONFIG_POSIX_TIMERS))
-		return sys_ni_posix_timers();
+	struct compat_itimerval v32;
 
-	error = do_getitimer(which, &kit);
-	if (!error && put_compat_itimerval(it, &kit))
-		error = -EFAULT;
-	return error;
+	if (copy_from_user(&v32, i, sizeof(struct compat_itimerval)))
+		return -EFAULT;
+	o->it_interval.tv_sec = v32.it_interval.tv_sec;
+	o->it_interval.tv_usec = v32.it_interval.tv_usec;
+	o->it_value.tv_sec = v32.it_value.tv_sec;
+	o->it_value.tv_usec = v32.it_value.tv_usec;
+	return 0;
 }
 
-COMPAT_SYSCALL_DEFINE3(setitimer, int, which,
-		struct compat_itimerval __user *, in,
-		struct compat_itimerval __user *, out)
+int put_compat_itimerval(struct compat_itimerval __user *o, const struct itimerval *i)
 {
-	struct itimerval kin, kout;
-	int error;
+	struct compat_itimerval v32;
 
-	if (!IS_ENABLED(CONFIG_POSIX_TIMERS))
-		return sys_ni_posix_timers();
-
-	if (in) {
-		if (get_compat_itimerval(&kin, in))
-			return -EFAULT;
-	} else
-		memset(&kin, 0, sizeof(kin));
-
-	error = do_setitimer(which, &kin, out ? &kout : NULL);
-	if (error || !out)
-		return error;
-	if (put_compat_itimerval(out, &kout))
-		return -EFAULT;
-	return 0;
+	v32.it_interval.tv_sec = i->it_interval.tv_sec;
+	v32.it_interval.tv_usec = i->it_interval.tv_usec;
+	v32.it_value.tv_sec = i->it_value.tv_sec;
+	v32.it_value.tv_usec = i->it_value.tv_usec;
+	return copy_to_user(o, &v32, sizeof(struct compat_itimerval)) ? -EFAULT : 0;
 }
 
 static compat_clock_t clock_t_to_compat_clock_t(clock_t x)
diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c
index 087d6a1279b8..9dd7ff5e445a 100644
--- a/kernel/time/itimer.c
+++ b/kernel/time/itimer.c
@@ -15,6 +15,7 @@
 #include <linux/posix-timers.h>
 #include <linux/hrtimer.h>
 #include <trace/events/timer.h>
+#include <linux/compat.h>
 
 #include <linux/uaccess.h>
 
@@ -116,6 +117,19 @@ SYSCALL_DEFINE2(getitimer, int, which, struct itimerval __user *, value)
 	return error;
 }
 
+#ifdef CONFIG_COMPAT
+COMPAT_SYSCALL_DEFINE2(getitimer, int, which,
+		       struct compat_itimerval __user *, it)
+{
+	struct itimerval kit;
+	int error = do_getitimer(which, &kit);
+
+	if (!error && put_compat_itimerval(it, &kit))
+		error = -EFAULT;
+	return error;
+}
+#endif
+
 
 /*
  * The timer is automagically restarted, when interval != 0
@@ -294,3 +308,27 @@ SYSCALL_DEFINE3(setitimer, int, which, struct itimerval __user *, value,
 		return -EFAULT;
 	return 0;
 }
+
+#ifdef CONFIG_COMPAT
+COMPAT_SYSCALL_DEFINE3(setitimer, int, which,
+		       struct compat_itimerval __user *, in,
+		       struct compat_itimerval __user *, out)
+{
+	struct itimerval kin, kout;
+	int error;
+
+	if (in) {
+		if (get_compat_itimerval(&kin, in))
+			return -EFAULT;
+	} else {
+		memset(&kin, 0, sizeof(kin));
+	}
+
+	error = do_setitimer(which, &kin, out ? &kout : NULL);
+	if (error || !out)
+		return error;
+	if (put_compat_itimerval(out, &kout))
+		return -EFAULT;
+	return 0;
+}
+#endif
diff --git a/kernel/time/posix-stubs.c b/kernel/time/posix-stubs.c
index f4a1962d1729..7f88517461e8 100644
--- a/kernel/time/posix-stubs.c
+++ b/kernel/time/posix-stubs.c
@@ -44,6 +44,8 @@ SYS_NI(alarm);
 COMPAT_SYS_NI(clock_adjtime);
 COMPAT_SYS_NI(timer_settime);
 COMPAT_SYS_NI(timer_gettime);
+COMPAT_SYS_NI(getitimer);
+COMPAT_SYS_NI(setitimer);
 
 /*
  * We preserve minimal support for CLOCK_REALTIME and CLOCK_MONOTONIC
-- 
cgit v1.2.3


From 938e7cf2d569833a5acf689a8926faf507826253 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 13 Jun 2017 23:34:33 +0200
Subject: posix-timers: Make nanosleep timespec argument const

No nanosleep implementation modifies the rqtp argument. Mark is const.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Peter Zijlstra <peterz@infradead.org>
---
 include/linux/hrtimer.h        | 2 +-
 kernel/time/alarmtimer.c       | 2 +-
 kernel/time/hrtimer.c          | 2 +-
 kernel/time/posix-cpu-timers.c | 4 ++--
 kernel/time/posix-timers.c     | 4 ++--
 kernel/time/posix-timers.h     | 2 +-
 6 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index d83b7ed1cb0e..255edd5e7a74 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -454,7 +454,7 @@ static inline u64 hrtimer_forward_now(struct hrtimer *timer,
 /* Precise sleep: */
 
 extern int nanosleep_copyout(struct restart_block *, struct timespec *);
-extern long hrtimer_nanosleep(struct timespec64 *rqtp,
+extern long hrtimer_nanosleep(const struct timespec64 *rqtp,
 			      const enum hrtimer_mode mode,
 			      const clockid_t clockid);
 
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 7bed4e44f9bd..c991cf212c6d 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -753,7 +753,7 @@ static long __sched alarm_timer_nsleep_restart(struct restart_block *restart)
  * Handles clock_nanosleep calls against _ALARM clockids
  */
 static int alarm_timer_nsleep(const clockid_t which_clock, int flags,
-			      struct timespec64 *tsreq)
+			      const struct timespec64 *tsreq)
 {
 	enum  alarmtimer_type type = clock2alarm(which_clock);
 	struct restart_block *restart = &current->restart_block;
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 45f83cc7c0c7..81da124f1115 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1510,7 +1510,7 @@ static long __sched hrtimer_nanosleep_restart(struct restart_block *restart)
 	return ret;
 }
 
-long hrtimer_nanosleep(struct timespec64 *rqtp,
+long hrtimer_nanosleep(const struct timespec64 *rqtp,
 		       const enum hrtimer_mode mode, const clockid_t clockid)
 {
 	struct restart_block *restart;
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 3adfa42ca24c..9df618ee64cf 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -1328,7 +1328,7 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
 static long posix_cpu_nsleep_restart(struct restart_block *restart_block);
 
 static int posix_cpu_nsleep(const clockid_t which_clock, int flags,
-			    struct timespec64 *rqtp)
+			    const struct timespec64 *rqtp)
 {
 	struct restart_block *restart_block = &current->restart_block;
 	int error;
@@ -1383,7 +1383,7 @@ static int process_cpu_timer_create(struct k_itimer *timer)
 	return posix_cpu_timer_create(timer);
 }
 static int process_cpu_nsleep(const clockid_t which_clock, int flags,
-			      struct timespec64 *rqtp)
+			      const struct timespec64 *rqtp)
 {
 	return posix_cpu_nsleep(PROCESS_CLOCK, flags, rqtp);
 }
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index c9f45a84fb8b..82d67be7d9d1 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -1214,9 +1214,9 @@ COMPAT_SYSCALL_DEFINE2(clock_getres, clockid_t, which_clock,
  * nanosleep for monotonic and realtime clocks
  */
 static int common_nsleep(const clockid_t which_clock, int flags,
-			 struct timespec64 *tsave)
+			 const struct timespec64 *rqtp)
 {
-	return hrtimer_nanosleep(tsave, flags & TIMER_ABSTIME ?
+	return hrtimer_nanosleep(rqtp, flags & TIMER_ABSTIME ?
 				 HRTIMER_MODE_ABS : HRTIMER_MODE_REL,
 				 which_clock);
 }
diff --git a/kernel/time/posix-timers.h b/kernel/time/posix-timers.h
index 5e69bb85629f..fb303c3be4d3 100644
--- a/kernel/time/posix-timers.h
+++ b/kernel/time/posix-timers.h
@@ -10,7 +10,7 @@ struct k_clock {
 	int	(*clock_adj)(const clockid_t which_clock, struct timex *tx);
 	int	(*timer_create)(struct k_itimer *timer);
 	int	(*nsleep)(const clockid_t which_clock, int flags,
-			  struct timespec64 *);
+			  const struct timespec64 *);
 	int	(*timer_set)(struct k_itimer *timr, int flags,
 			     struct itimerspec64 *new_setting,
 			     struct itimerspec64 *old_setting);
-- 
cgit v1.2.3


From 1727339590fdb5a1ded881b540cd32121278d414 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Fri, 26 May 2017 16:56:11 +0200
Subject: clocksource/drivers: Rename CLOCKSOURCE_OF_DECLARE to
 TIMER_OF_DECLARE

The CLOCKSOURCE_OF_DECLARE macro is used widely for the timers to declare the
clocksource at early stage. However, this macro is also used to initialize
the clockevent if any, or the clockevent only.

It was originally suggested to declare another macro to initialize a
clockevent, so in order to separate the two entities even they belong to the
same IP. This was not accepted because of the impact on the DT where splitting
a clocksource/clockevent definition does not make sense as it is a Linux
concept not a hardware description.

On the other side, the clocksource has not interrupt declared while the
clockevent has, so it is easy from the driver to know if the description is
for a clockevent or a clocksource, IOW it could be implemented at the driver
level.

So instead of dealing with a named clocksource macro, let's use a more generic
one: TIMER_OF_DECLARE.

The patch has not functional changes.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Acked-by: Heiko Stuebner <heiko@sntech.de>
Acked-by: Neil Armstrong <narmstrong@baylibre.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Matthias Brugger <matthias.bgg@gmail.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
---
 arch/arm/kernel/smp_twd.c                 |  6 +++---
 arch/microblaze/kernel/timer.c            |  2 +-
 arch/mips/ralink/cevt-rt3352.c            |  2 +-
 arch/nios2/kernel/time.c                  |  2 +-
 drivers/clocksource/arc_timer.c           |  6 +++---
 drivers/clocksource/arm_arch_timer.c      |  6 +++---
 drivers/clocksource/arm_global_timer.c    |  2 +-
 drivers/clocksource/armv7m_systick.c      |  2 +-
 drivers/clocksource/asm9260_timer.c       |  2 +-
 drivers/clocksource/bcm2835_timer.c       |  2 +-
 drivers/clocksource/bcm_kona_timer.c      |  4 ++--
 drivers/clocksource/cadence_ttc_timer.c   |  2 +-
 drivers/clocksource/clksrc-dbx500-prcmu.c |  2 +-
 drivers/clocksource/clksrc_st_lpc.c       |  2 +-
 drivers/clocksource/clps711x-timer.c      |  2 +-
 drivers/clocksource/dw_apb_timer_of.c     |  8 ++++----
 drivers/clocksource/exynos_mct.c          |  4 ++--
 drivers/clocksource/fsl_ftm_timer.c       |  2 +-
 drivers/clocksource/h8300_timer16.c       |  2 +-
 drivers/clocksource/h8300_timer8.c        |  2 +-
 drivers/clocksource/h8300_tpu.c           |  2 +-
 drivers/clocksource/jcore-pit.c           |  2 +-
 drivers/clocksource/meson6_timer.c        |  2 +-
 drivers/clocksource/mips-gic-timer.c      |  2 +-
 drivers/clocksource/mps2-timer.c          |  2 +-
 drivers/clocksource/mtk_timer.c           |  2 +-
 drivers/clocksource/mxs_timer.c           |  2 +-
 drivers/clocksource/nomadik-mtu.c         |  2 +-
 drivers/clocksource/pxa_timer.c           |  2 +-
 drivers/clocksource/qcom-timer.c          |  4 ++--
 drivers/clocksource/renesas-ostm.c        |  2 +-
 drivers/clocksource/rockchip_timer.c      |  4 ++--
 drivers/clocksource/samsung_pwm_timer.c   |  8 ++++----
 drivers/clocksource/sun4i_timer.c         |  2 +-
 drivers/clocksource/tango_xtal.c          |  2 +-
 drivers/clocksource/tegra20_timer.c       |  4 ++--
 drivers/clocksource/time-armada-370-xp.c  |  6 +++---
 drivers/clocksource/time-efm32.c          |  4 ++--
 drivers/clocksource/time-lpc32xx.c        |  2 +-
 drivers/clocksource/time-orion.c          |  2 +-
 drivers/clocksource/time-pistachio.c      |  2 +-
 drivers/clocksource/timer-atlas7.c        |  2 +-
 drivers/clocksource/timer-atmel-pit.c     |  2 +-
 drivers/clocksource/timer-atmel-st.c      |  2 +-
 drivers/clocksource/timer-digicolor.c     |  2 +-
 drivers/clocksource/timer-fttmr010.c      | 10 +++++-----
 drivers/clocksource/timer-imx-gpt.c       | 24 ++++++++++++------------
 drivers/clocksource/timer-integrator-ap.c |  2 +-
 drivers/clocksource/timer-keystone.c      |  2 +-
 drivers/clocksource/timer-nps.c           |  6 +++---
 drivers/clocksource/timer-oxnas-rps.c     |  4 ++--
 drivers/clocksource/timer-prima2.c        |  2 +-
 drivers/clocksource/timer-sp804.c         |  4 ++--
 drivers/clocksource/timer-stm32.c         |  2 +-
 drivers/clocksource/timer-sun5i.c         |  4 ++--
 drivers/clocksource/timer-ti-32k.c        |  2 +-
 drivers/clocksource/timer-u300.c          |  2 +-
 drivers/clocksource/versatile.c           |  4 ++--
 drivers/clocksource/vf_pit_timer.c        |  2 +-
 drivers/clocksource/vt8500_timer.c        |  2 +-
 drivers/clocksource/zevio-timer.c         |  2 +-
 include/linux/clocksource.h               |  2 +-
 62 files changed, 103 insertions(+), 103 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c
index 895ae5197159..b30eafeef096 100644
--- a/arch/arm/kernel/smp_twd.c
+++ b/arch/arm/kernel/smp_twd.c
@@ -403,7 +403,7 @@ out:
 	WARN(err, "twd_local_timer_of_register failed (%d)\n", err);
 	return err;
 }
-CLOCKSOURCE_OF_DECLARE(arm_twd_a9, "arm,cortex-a9-twd-timer", twd_local_timer_of_register);
-CLOCKSOURCE_OF_DECLARE(arm_twd_a5, "arm,cortex-a5-twd-timer", twd_local_timer_of_register);
-CLOCKSOURCE_OF_DECLARE(arm_twd_11mp, "arm,arm11mp-twd-timer", twd_local_timer_of_register);
+TIMER_OF_DECLARE(arm_twd_a9, "arm,cortex-a9-twd-timer", twd_local_timer_of_register);
+TIMER_OF_DECLARE(arm_twd_a5, "arm,cortex-a5-twd-timer", twd_local_timer_of_register);
+TIMER_OF_DECLARE(arm_twd_11mp, "arm,arm11mp-twd-timer", twd_local_timer_of_register);
 #endif
diff --git a/arch/microblaze/kernel/timer.c b/arch/microblaze/kernel/timer.c
index 999066192715..873a1ccd9040 100644
--- a/arch/microblaze/kernel/timer.c
+++ b/arch/microblaze/kernel/timer.c
@@ -333,5 +333,5 @@ static int __init xilinx_timer_init(struct device_node *timer)
 	return 0;
 }
 
-CLOCKSOURCE_OF_DECLARE(xilinx_timer, "xlnx,xps-timer-1.00.a",
+TIMER_OF_DECLARE(xilinx_timer, "xlnx,xps-timer-1.00.a",
 		       xilinx_timer_init);
diff --git a/arch/mips/ralink/cevt-rt3352.c b/arch/mips/ralink/cevt-rt3352.c
index b8a1376165b0..92f284d2b802 100644
--- a/arch/mips/ralink/cevt-rt3352.c
+++ b/arch/mips/ralink/cevt-rt3352.c
@@ -152,4 +152,4 @@ static int __init ralink_systick_init(struct device_node *np)
 	return 0;
 }
 
-CLOCKSOURCE_OF_DECLARE(systick, "ralink,cevt-systick", ralink_systick_init);
+TIMER_OF_DECLARE(systick, "ralink,cevt-systick", ralink_systick_init);
diff --git a/arch/nios2/kernel/time.c b/arch/nios2/kernel/time.c
index 6e2bdc9b8530..2954b6617378 100644
--- a/arch/nios2/kernel/time.c
+++ b/arch/nios2/kernel/time.c
@@ -353,4 +353,4 @@ void __init time_init(void)
 	clocksource_probe();
 }
 
-CLOCKSOURCE_OF_DECLARE(nios2_timer, ALTR_TIMER_COMPATIBLE, nios2_time_init);
+TIMER_OF_DECLARE(nios2_timer, ALTR_TIMER_COMPATIBLE, nios2_time_init);
diff --git a/drivers/clocksource/arc_timer.c b/drivers/clocksource/arc_timer.c
index 21649733827d..4927355f9cbe 100644
--- a/drivers/clocksource/arc_timer.c
+++ b/drivers/clocksource/arc_timer.c
@@ -99,7 +99,7 @@ static int __init arc_cs_setup_gfrc(struct device_node *node)
 
 	return clocksource_register_hz(&arc_counter_gfrc, arc_timer_freq);
 }
-CLOCKSOURCE_OF_DECLARE(arc_gfrc, "snps,archs-timer-gfrc", arc_cs_setup_gfrc);
+TIMER_OF_DECLARE(arc_gfrc, "snps,archs-timer-gfrc", arc_cs_setup_gfrc);
 
 #define AUX_RTC_CTRL	0x103
 #define AUX_RTC_LOW	0x104
@@ -158,7 +158,7 @@ static int __init arc_cs_setup_rtc(struct device_node *node)
 
 	return clocksource_register_hz(&arc_counter_rtc, arc_timer_freq);
 }
-CLOCKSOURCE_OF_DECLARE(arc_rtc, "snps,archs-timer-rtc", arc_cs_setup_rtc);
+TIMER_OF_DECLARE(arc_rtc, "snps,archs-timer-rtc", arc_cs_setup_rtc);
 
 #endif
 
@@ -333,4 +333,4 @@ static int __init arc_of_timer_init(struct device_node *np)
 
 	return ret;
 }
-CLOCKSOURCE_OF_DECLARE(arc_clkevt, "snps,arc-timer", arc_of_timer_init);
+TIMER_OF_DECLARE(arc_clkevt, "snps,arc-timer", arc_of_timer_init);
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index 4bed671e490e..bc60cd78698e 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -1194,8 +1194,8 @@ static int __init arch_timer_of_init(struct device_node *np)
 
 	return arch_timer_common_init();
 }
-CLOCKSOURCE_OF_DECLARE(armv7_arch_timer, "arm,armv7-timer", arch_timer_of_init);
-CLOCKSOURCE_OF_DECLARE(armv8_arch_timer, "arm,armv8-timer", arch_timer_of_init);
+TIMER_OF_DECLARE(armv7_arch_timer, "arm,armv7-timer", arch_timer_of_init);
+TIMER_OF_DECLARE(armv8_arch_timer, "arm,armv8-timer", arch_timer_of_init);
 
 static u32 __init
 arch_timer_mem_frame_get_cntfrq(struct arch_timer_mem_frame *frame)
@@ -1382,7 +1382,7 @@ out:
 	kfree(timer_mem);
 	return ret;
 }
-CLOCKSOURCE_OF_DECLARE(armv7_arch_timer_mem, "arm,armv7-timer-mem",
+TIMER_OF_DECLARE(armv7_arch_timer_mem, "arm,armv7-timer-mem",
 		       arch_timer_mem_of_init);
 
 #ifdef CONFIG_ACPI_GTDT
diff --git a/drivers/clocksource/arm_global_timer.c b/drivers/clocksource/arm_global_timer.c
index 123ed20ac2ff..095bb965f621 100644
--- a/drivers/clocksource/arm_global_timer.c
+++ b/drivers/clocksource/arm_global_timer.c
@@ -339,5 +339,5 @@ out_unmap:
 }
 
 /* Only tested on r2p2 and r3p0  */
-CLOCKSOURCE_OF_DECLARE(arm_gt, "arm,cortex-a9-global-timer",
+TIMER_OF_DECLARE(arm_gt, "arm,cortex-a9-global-timer",
 			global_timer_of_register);
diff --git a/drivers/clocksource/armv7m_systick.c b/drivers/clocksource/armv7m_systick.c
index a315491b7047..ac046d6fb0bf 100644
--- a/drivers/clocksource/armv7m_systick.c
+++ b/drivers/clocksource/armv7m_systick.c
@@ -82,5 +82,5 @@ out_unmap:
 	return ret;
 }
 
-CLOCKSOURCE_OF_DECLARE(arm_systick, "arm,armv7m-systick",
+TIMER_OF_DECLARE(arm_systick, "arm,armv7m-systick",
 			system_timer_of_register);
diff --git a/drivers/clocksource/asm9260_timer.c b/drivers/clocksource/asm9260_timer.c
index c6780830b8ac..38cd2feb87c4 100644
--- a/drivers/clocksource/asm9260_timer.c
+++ b/drivers/clocksource/asm9260_timer.c
@@ -238,5 +238,5 @@ static int __init asm9260_timer_init(struct device_node *np)
 
 	return 0;
 }
-CLOCKSOURCE_OF_DECLARE(asm9260_timer, "alphascale,asm9260-timer",
+TIMER_OF_DECLARE(asm9260_timer, "alphascale,asm9260-timer",
 		asm9260_timer_init);
diff --git a/drivers/clocksource/bcm2835_timer.c b/drivers/clocksource/bcm2835_timer.c
index dce44307469e..82828d3a4739 100644
--- a/drivers/clocksource/bcm2835_timer.c
+++ b/drivers/clocksource/bcm2835_timer.c
@@ -148,5 +148,5 @@ err_iounmap:
 	iounmap(base);
 	return ret;
 }
-CLOCKSOURCE_OF_DECLARE(bcm2835, "brcm,bcm2835-system-timer",
+TIMER_OF_DECLARE(bcm2835, "brcm,bcm2835-system-timer",
 			bcm2835_timer_init);
diff --git a/drivers/clocksource/bcm_kona_timer.c b/drivers/clocksource/bcm_kona_timer.c
index fda5e1476638..5c40be9880f5 100644
--- a/drivers/clocksource/bcm_kona_timer.c
+++ b/drivers/clocksource/bcm_kona_timer.c
@@ -198,9 +198,9 @@ static int __init kona_timer_init(struct device_node *node)
 	return 0;
 }
 
-CLOCKSOURCE_OF_DECLARE(brcm_kona, "brcm,kona-timer", kona_timer_init);
+TIMER_OF_DECLARE(brcm_kona, "brcm,kona-timer", kona_timer_init);
 /*
  * bcm,kona-timer is deprecated by brcm,kona-timer
  * being kept here for driver compatibility
  */
-CLOCKSOURCE_OF_DECLARE(bcm_kona, "bcm,kona-timer", kona_timer_init);
+TIMER_OF_DECLARE(bcm_kona, "bcm,kona-timer", kona_timer_init);
diff --git a/drivers/clocksource/cadence_ttc_timer.c b/drivers/clocksource/cadence_ttc_timer.c
index 44e5e951583b..a144dfca6499 100644
--- a/drivers/clocksource/cadence_ttc_timer.c
+++ b/drivers/clocksource/cadence_ttc_timer.c
@@ -539,4 +539,4 @@ static int __init ttc_timer_init(struct device_node *timer)
 	return 0;
 }
 
-CLOCKSOURCE_OF_DECLARE(ttc, "cdns,ttc", ttc_timer_init);
+TIMER_OF_DECLARE(ttc, "cdns,ttc", ttc_timer_init);
diff --git a/drivers/clocksource/clksrc-dbx500-prcmu.c b/drivers/clocksource/clksrc-dbx500-prcmu.c
index c69e2772658d..c1b96dc5f444 100644
--- a/drivers/clocksource/clksrc-dbx500-prcmu.c
+++ b/drivers/clocksource/clksrc-dbx500-prcmu.c
@@ -86,5 +86,5 @@ static int __init clksrc_dbx500_prcmu_init(struct device_node *node)
 #endif
 	return clocksource_register_hz(&clocksource_dbx500_prcmu, RATE_32K);
 }
-CLOCKSOURCE_OF_DECLARE(dbx500_prcmu, "stericsson,db8500-prcmu-timer-4",
+TIMER_OF_DECLARE(dbx500_prcmu, "stericsson,db8500-prcmu-timer-4",
 		       clksrc_dbx500_prcmu_init);
diff --git a/drivers/clocksource/clksrc_st_lpc.c b/drivers/clocksource/clksrc_st_lpc.c
index 03cc49217bb4..a1d01ebb81f5 100644
--- a/drivers/clocksource/clksrc_st_lpc.c
+++ b/drivers/clocksource/clksrc_st_lpc.c
@@ -132,4 +132,4 @@ static int __init st_clksrc_of_register(struct device_node *np)
 
 	return ret;
 }
-CLOCKSOURCE_OF_DECLARE(ddata, "st,stih407-lpc", st_clksrc_of_register);
+TIMER_OF_DECLARE(ddata, "st,stih407-lpc", st_clksrc_of_register);
diff --git a/drivers/clocksource/clps711x-timer.c b/drivers/clocksource/clps711x-timer.c
index 24db6d605549..fc9e025cc395 100644
--- a/drivers/clocksource/clps711x-timer.c
+++ b/drivers/clocksource/clps711x-timer.c
@@ -119,5 +119,5 @@ static int __init clps711x_timer_init(struct device_node *np)
 		return -EINVAL;
 	}
 }
-CLOCKSOURCE_OF_DECLARE(clps711x, "cirrus,ep7209-timer", clps711x_timer_init);
+TIMER_OF_DECLARE(clps711x, "cirrus,ep7209-timer", clps711x_timer_init);
 #endif
diff --git a/drivers/clocksource/dw_apb_timer_of.c b/drivers/clocksource/dw_apb_timer_of.c
index aee6c0d39a7c..69866cd8f4bb 100644
--- a/drivers/clocksource/dw_apb_timer_of.c
+++ b/drivers/clocksource/dw_apb_timer_of.c
@@ -167,7 +167,7 @@ static int __init dw_apb_timer_init(struct device_node *timer)
 
 	return 0;
 }
-CLOCKSOURCE_OF_DECLARE(pc3x2_timer, "picochip,pc3x2-timer", dw_apb_timer_init);
-CLOCKSOURCE_OF_DECLARE(apb_timer_osc, "snps,dw-apb-timer-osc", dw_apb_timer_init);
-CLOCKSOURCE_OF_DECLARE(apb_timer_sp, "snps,dw-apb-timer-sp", dw_apb_timer_init);
-CLOCKSOURCE_OF_DECLARE(apb_timer, "snps,dw-apb-timer", dw_apb_timer_init);
+TIMER_OF_DECLARE(pc3x2_timer, "picochip,pc3x2-timer", dw_apb_timer_init);
+TIMER_OF_DECLARE(apb_timer_osc, "snps,dw-apb-timer-osc", dw_apb_timer_init);
+TIMER_OF_DECLARE(apb_timer_sp, "snps,dw-apb-timer-sp", dw_apb_timer_init);
+TIMER_OF_DECLARE(apb_timer, "snps,dw-apb-timer", dw_apb_timer_init);
diff --git a/drivers/clocksource/exynos_mct.c b/drivers/clocksource/exynos_mct.c
index 670ff0f25b67..7a244b681876 100644
--- a/drivers/clocksource/exynos_mct.c
+++ b/drivers/clocksource/exynos_mct.c
@@ -610,5 +610,5 @@ static int __init mct_init_ppi(struct device_node *np)
 {
 	return mct_init_dt(np, MCT_INT_PPI);
 }
-CLOCKSOURCE_OF_DECLARE(exynos4210, "samsung,exynos4210-mct", mct_init_spi);
-CLOCKSOURCE_OF_DECLARE(exynos4412, "samsung,exynos4412-mct", mct_init_ppi);
+TIMER_OF_DECLARE(exynos4210, "samsung,exynos4210-mct", mct_init_spi);
+TIMER_OF_DECLARE(exynos4412, "samsung,exynos4412-mct", mct_init_ppi);
diff --git a/drivers/clocksource/fsl_ftm_timer.c b/drivers/clocksource/fsl_ftm_timer.c
index 738515b89073..3121e2d96c91 100644
--- a/drivers/clocksource/fsl_ftm_timer.c
+++ b/drivers/clocksource/fsl_ftm_timer.c
@@ -369,4 +369,4 @@ err:
 	kfree(priv);
 	return ret;
 }
-CLOCKSOURCE_OF_DECLARE(flextimer, "fsl,ftm-timer", ftm_timer_init);
+TIMER_OF_DECLARE(flextimer, "fsl,ftm-timer", ftm_timer_init);
diff --git a/drivers/clocksource/h8300_timer16.c b/drivers/clocksource/h8300_timer16.c
index 5b27fb9997c2..dfbd4f8051cb 100644
--- a/drivers/clocksource/h8300_timer16.c
+++ b/drivers/clocksource/h8300_timer16.c
@@ -187,5 +187,5 @@ free_clk:
 	return ret;
 }
 
-CLOCKSOURCE_OF_DECLARE(h8300_16bit, "renesas,16bit-timer",
+TIMER_OF_DECLARE(h8300_16bit, "renesas,16bit-timer",
 			   h8300_16timer_init);
diff --git a/drivers/clocksource/h8300_timer8.c b/drivers/clocksource/h8300_timer8.c
index 804c489531d6..f6ffb0cef091 100644
--- a/drivers/clocksource/h8300_timer8.c
+++ b/drivers/clocksource/h8300_timer8.c
@@ -207,4 +207,4 @@ free_clk:
 	return ret;
 }
 
-CLOCKSOURCE_OF_DECLARE(h8300_8bit, "renesas,8bit-timer", h8300_8timer_init);
+TIMER_OF_DECLARE(h8300_8bit, "renesas,8bit-timer", h8300_8timer_init);
diff --git a/drivers/clocksource/h8300_tpu.c b/drivers/clocksource/h8300_tpu.c
index 72e1cf2b3096..45a8d17dac1e 100644
--- a/drivers/clocksource/h8300_tpu.c
+++ b/drivers/clocksource/h8300_tpu.c
@@ -154,4 +154,4 @@ free_clk:
 	return ret;
 }
 
-CLOCKSOURCE_OF_DECLARE(h8300_tpu, "renesas,tpu", h8300_tpu_init);
+TIMER_OF_DECLARE(h8300_tpu, "renesas,tpu", h8300_tpu_init);
diff --git a/drivers/clocksource/jcore-pit.c b/drivers/clocksource/jcore-pit.c
index 7c61226f4359..5d3d88e0fc8c 100644
--- a/drivers/clocksource/jcore-pit.c
+++ b/drivers/clocksource/jcore-pit.c
@@ -246,4 +246,4 @@ static int __init jcore_pit_init(struct device_node *node)
 	return 0;
 }
 
-CLOCKSOURCE_OF_DECLARE(jcore_pit, "jcore,pit", jcore_pit_init);
+TIMER_OF_DECLARE(jcore_pit, "jcore,pit", jcore_pit_init);
diff --git a/drivers/clocksource/meson6_timer.c b/drivers/clocksource/meson6_timer.c
index 39d21f693a33..92f20991a937 100644
--- a/drivers/clocksource/meson6_timer.c
+++ b/drivers/clocksource/meson6_timer.c
@@ -174,5 +174,5 @@ static int __init meson6_timer_init(struct device_node *node)
 					1, 0xfffe);
 	return 0;
 }
-CLOCKSOURCE_OF_DECLARE(meson6, "amlogic,meson6-timer",
+TIMER_OF_DECLARE(meson6, "amlogic,meson6-timer",
 		       meson6_timer_init);
diff --git a/drivers/clocksource/mips-gic-timer.c b/drivers/clocksource/mips-gic-timer.c
index 3f52ee219923..e31e08326024 100644
--- a/drivers/clocksource/mips-gic-timer.c
+++ b/drivers/clocksource/mips-gic-timer.c
@@ -200,5 +200,5 @@ static int __init gic_clocksource_of_init(struct device_node *node)
 
 	return 0;
 }
-CLOCKSOURCE_OF_DECLARE(mips_gic_timer, "mti,gic-timer",
+TIMER_OF_DECLARE(mips_gic_timer, "mti,gic-timer",
 		       gic_clocksource_of_init);
diff --git a/drivers/clocksource/mps2-timer.c b/drivers/clocksource/mps2-timer.c
index 3e4431ed9aa9..aa4d63af8706 100644
--- a/drivers/clocksource/mps2-timer.c
+++ b/drivers/clocksource/mps2-timer.c
@@ -274,4 +274,4 @@ static int __init mps2_timer_init(struct device_node *np)
 	return 0;
 }
 
-CLOCKSOURCE_OF_DECLARE(mps2_timer, "arm,mps2-timer", mps2_timer_init);
+TIMER_OF_DECLARE(mps2_timer, "arm,mps2-timer", mps2_timer_init);
diff --git a/drivers/clocksource/mtk_timer.c b/drivers/clocksource/mtk_timer.c
index 90659493c59c..f9b724fd9950 100644
--- a/drivers/clocksource/mtk_timer.c
+++ b/drivers/clocksource/mtk_timer.c
@@ -265,4 +265,4 @@ err_kzalloc:
 
 	return -EINVAL;
 }
-CLOCKSOURCE_OF_DECLARE(mtk_mt6577, "mediatek,mt6577-timer", mtk_timer_init);
+TIMER_OF_DECLARE(mtk_mt6577, "mediatek,mt6577-timer", mtk_timer_init);
diff --git a/drivers/clocksource/mxs_timer.c b/drivers/clocksource/mxs_timer.c
index 99b77aff0839..a03434e9fe8f 100644
--- a/drivers/clocksource/mxs_timer.c
+++ b/drivers/clocksource/mxs_timer.c
@@ -293,4 +293,4 @@ static int __init mxs_timer_init(struct device_node *np)
 
 	return setup_irq(irq, &mxs_timer_irq);
 }
-CLOCKSOURCE_OF_DECLARE(mxs, "fsl,timrot", mxs_timer_init);
+TIMER_OF_DECLARE(mxs, "fsl,timrot", mxs_timer_init);
diff --git a/drivers/clocksource/nomadik-mtu.c b/drivers/clocksource/nomadik-mtu.c
index 7d44de304f37..8e4ddb9420c6 100644
--- a/drivers/clocksource/nomadik-mtu.c
+++ b/drivers/clocksource/nomadik-mtu.c
@@ -284,5 +284,5 @@ static int __init nmdk_timer_of_init(struct device_node *node)
 
 	return nmdk_timer_init(base, irq, pclk, clk);
 }
-CLOCKSOURCE_OF_DECLARE(nomadik_mtu, "st,nomadik-mtu",
+TIMER_OF_DECLARE(nomadik_mtu, "st,nomadik-mtu",
 		       nmdk_timer_of_init);
diff --git a/drivers/clocksource/pxa_timer.c b/drivers/clocksource/pxa_timer.c
index a10fa667325f..08cd6eaf3795 100644
--- a/drivers/clocksource/pxa_timer.c
+++ b/drivers/clocksource/pxa_timer.c
@@ -216,7 +216,7 @@ static int __init pxa_timer_dt_init(struct device_node *np)
 
 	return pxa_timer_common_init(irq, clk_get_rate(clk));
 }
-CLOCKSOURCE_OF_DECLARE(pxa_timer, "marvell,pxa-timer", pxa_timer_dt_init);
+TIMER_OF_DECLARE(pxa_timer, "marvell,pxa-timer", pxa_timer_dt_init);
 
 /*
  * Legacy timer init for non device-tree boards.
diff --git a/drivers/clocksource/qcom-timer.c b/drivers/clocksource/qcom-timer.c
index ee358cdf4a07..89816f89ff3f 100644
--- a/drivers/clocksource/qcom-timer.c
+++ b/drivers/clocksource/qcom-timer.c
@@ -254,5 +254,5 @@ static int __init msm_dt_timer_init(struct device_node *np)
 
 	return msm_timer_init(freq, 32, irq, !!percpu_offset);
 }
-CLOCKSOURCE_OF_DECLARE(kpss_timer, "qcom,kpss-timer", msm_dt_timer_init);
-CLOCKSOURCE_OF_DECLARE(scss_timer, "qcom,scss-timer", msm_dt_timer_init);
+TIMER_OF_DECLARE(kpss_timer, "qcom,kpss-timer", msm_dt_timer_init);
+TIMER_OF_DECLARE(scss_timer, "qcom,scss-timer", msm_dt_timer_init);
diff --git a/drivers/clocksource/renesas-ostm.c b/drivers/clocksource/renesas-ostm.c
index c76f57668fb2..6cffd7c6001a 100644
--- a/drivers/clocksource/renesas-ostm.c
+++ b/drivers/clocksource/renesas-ostm.c
@@ -262,4 +262,4 @@ err:
 	return 0;
 }
 
-CLOCKSOURCE_OF_DECLARE(ostm, "renesas,ostm", ostm_init);
+TIMER_OF_DECLARE(ostm, "renesas,ostm", ostm_init);
diff --git a/drivers/clocksource/rockchip_timer.c b/drivers/clocksource/rockchip_timer.c
index 49c02be50eca..c27f4c850d83 100644
--- a/drivers/clocksource/rockchip_timer.c
+++ b/drivers/clocksource/rockchip_timer.c
@@ -303,5 +303,5 @@ static int __init rk_timer_init(struct device_node *np)
 	return -EINVAL;
 }
 
-CLOCKSOURCE_OF_DECLARE(rk3288_timer, "rockchip,rk3288-timer", rk_timer_init);
-CLOCKSOURCE_OF_DECLARE(rk3399_timer, "rockchip,rk3399-timer", rk_timer_init);
+TIMER_OF_DECLARE(rk3288_timer, "rockchip,rk3288-timer", rk_timer_init);
+TIMER_OF_DECLARE(rk3399_timer, "rockchip,rk3399-timer", rk_timer_init);
diff --git a/drivers/clocksource/samsung_pwm_timer.c b/drivers/clocksource/samsung_pwm_timer.c
index a68e6538c809..21cd72c55a2d 100644
--- a/drivers/clocksource/samsung_pwm_timer.c
+++ b/drivers/clocksource/samsung_pwm_timer.c
@@ -466,7 +466,7 @@ static int __init s3c2410_pwm_clocksource_init(struct device_node *np)
 {
 	return samsung_pwm_alloc(np, &s3c24xx_variant);
 }
-CLOCKSOURCE_OF_DECLARE(s3c2410_pwm, "samsung,s3c2410-pwm", s3c2410_pwm_clocksource_init);
+TIMER_OF_DECLARE(s3c2410_pwm, "samsung,s3c2410-pwm", s3c2410_pwm_clocksource_init);
 
 static const struct samsung_pwm_variant s3c64xx_variant = {
 	.bits		= 32,
@@ -479,7 +479,7 @@ static int __init s3c64xx_pwm_clocksource_init(struct device_node *np)
 {
 	return samsung_pwm_alloc(np, &s3c64xx_variant);
 }
-CLOCKSOURCE_OF_DECLARE(s3c6400_pwm, "samsung,s3c6400-pwm", s3c64xx_pwm_clocksource_init);
+TIMER_OF_DECLARE(s3c6400_pwm, "samsung,s3c6400-pwm", s3c64xx_pwm_clocksource_init);
 
 static const struct samsung_pwm_variant s5p64x0_variant = {
 	.bits		= 32,
@@ -492,7 +492,7 @@ static int __init s5p64x0_pwm_clocksource_init(struct device_node *np)
 {
 	return samsung_pwm_alloc(np, &s5p64x0_variant);
 }
-CLOCKSOURCE_OF_DECLARE(s5p6440_pwm, "samsung,s5p6440-pwm", s5p64x0_pwm_clocksource_init);
+TIMER_OF_DECLARE(s5p6440_pwm, "samsung,s5p6440-pwm", s5p64x0_pwm_clocksource_init);
 
 static const struct samsung_pwm_variant s5p_variant = {
 	.bits		= 32,
@@ -505,5 +505,5 @@ static int __init s5p_pwm_clocksource_init(struct device_node *np)
 {
 	return samsung_pwm_alloc(np, &s5p_variant);
 }
-CLOCKSOURCE_OF_DECLARE(s5pc100_pwm, "samsung,s5pc100-pwm", s5p_pwm_clocksource_init);
+TIMER_OF_DECLARE(s5pc100_pwm, "samsung,s5pc100-pwm", s5p_pwm_clocksource_init);
 #endif
diff --git a/drivers/clocksource/sun4i_timer.c b/drivers/clocksource/sun4i_timer.c
index 4452d5c8f304..3e4bc64ff176 100644
--- a/drivers/clocksource/sun4i_timer.c
+++ b/drivers/clocksource/sun4i_timer.c
@@ -233,5 +233,5 @@ static int __init sun4i_timer_init(struct device_node *node)
 
 	return ret;
 }
-CLOCKSOURCE_OF_DECLARE(sun4i, "allwinner,sun4i-a10-timer",
+TIMER_OF_DECLARE(sun4i, "allwinner,sun4i-a10-timer",
 		       sun4i_timer_init);
diff --git a/drivers/clocksource/tango_xtal.c b/drivers/clocksource/tango_xtal.c
index 12fcef8cf2d3..c4e1c2e6046f 100644
--- a/drivers/clocksource/tango_xtal.c
+++ b/drivers/clocksource/tango_xtal.c
@@ -53,4 +53,4 @@ static int __init tango_clocksource_init(struct device_node *np)
 	return 0;
 }
 
-CLOCKSOURCE_OF_DECLARE(tango, "sigma,tick-counter", tango_clocksource_init);
+TIMER_OF_DECLARE(tango, "sigma,tick-counter", tango_clocksource_init);
diff --git a/drivers/clocksource/tegra20_timer.c b/drivers/clocksource/tegra20_timer.c
index b9990b9c98c5..c337a8100a7b 100644
--- a/drivers/clocksource/tegra20_timer.c
+++ b/drivers/clocksource/tegra20_timer.c
@@ -237,7 +237,7 @@ static int __init tegra20_init_timer(struct device_node *np)
 
 	return 0;
 }
-CLOCKSOURCE_OF_DECLARE(tegra20_timer, "nvidia,tegra20-timer", tegra20_init_timer);
+TIMER_OF_DECLARE(tegra20_timer, "nvidia,tegra20-timer", tegra20_init_timer);
 
 static int __init tegra20_init_rtc(struct device_node *np)
 {
@@ -261,4 +261,4 @@ static int __init tegra20_init_rtc(struct device_node *np)
 
 	return register_persistent_clock(NULL, tegra_read_persistent_clock64);
 }
-CLOCKSOURCE_OF_DECLARE(tegra20_rtc, "nvidia,tegra20-rtc", tegra20_init_rtc);
+TIMER_OF_DECLARE(tegra20_rtc, "nvidia,tegra20-rtc", tegra20_init_rtc);
diff --git a/drivers/clocksource/time-armada-370-xp.c b/drivers/clocksource/time-armada-370-xp.c
index aea4380129ea..edf1a46269f1 100644
--- a/drivers/clocksource/time-armada-370-xp.c
+++ b/drivers/clocksource/time-armada-370-xp.c
@@ -351,7 +351,7 @@ static int __init armada_xp_timer_init(struct device_node *np)
 
 	return armada_370_xp_timer_common_init(np);
 }
-CLOCKSOURCE_OF_DECLARE(armada_xp, "marvell,armada-xp-timer",
+TIMER_OF_DECLARE(armada_xp, "marvell,armada-xp-timer",
 		       armada_xp_timer_init);
 
 static int __init armada_375_timer_init(struct device_node *np)
@@ -389,7 +389,7 @@ static int __init armada_375_timer_init(struct device_node *np)
 
 	return armada_370_xp_timer_common_init(np);
 }
-CLOCKSOURCE_OF_DECLARE(armada_375, "marvell,armada-375-timer",
+TIMER_OF_DECLARE(armada_375, "marvell,armada-375-timer",
 		       armada_375_timer_init);
 
 static int __init armada_370_timer_init(struct device_node *np)
@@ -412,5 +412,5 @@ static int __init armada_370_timer_init(struct device_node *np)
 
 	return armada_370_xp_timer_common_init(np);
 }
-CLOCKSOURCE_OF_DECLARE(armada_370, "marvell,armada-370-timer",
+TIMER_OF_DECLARE(armada_370, "marvell,armada-370-timer",
 		       armada_370_timer_init);
diff --git a/drivers/clocksource/time-efm32.c b/drivers/clocksource/time-efm32.c
index ce0f97b4e5db..257e810ec1ad 100644
--- a/drivers/clocksource/time-efm32.c
+++ b/drivers/clocksource/time-efm32.c
@@ -283,5 +283,5 @@ static int __init efm32_timer_init(struct device_node *np)
 
 	return ret;
 }
-CLOCKSOURCE_OF_DECLARE(efm32compat, "efm32,timer", efm32_timer_init);
-CLOCKSOURCE_OF_DECLARE(efm32, "energymicro,efm32-timer", efm32_timer_init);
+TIMER_OF_DECLARE(efm32compat, "efm32,timer", efm32_timer_init);
+TIMER_OF_DECLARE(efm32, "energymicro,efm32-timer", efm32_timer_init);
diff --git a/drivers/clocksource/time-lpc32xx.c b/drivers/clocksource/time-lpc32xx.c
index 9649cfdb9213..d51a62a79ef7 100644
--- a/drivers/clocksource/time-lpc32xx.c
+++ b/drivers/clocksource/time-lpc32xx.c
@@ -311,4 +311,4 @@ static int __init lpc32xx_timer_init(struct device_node *np)
 
 	return ret;
 }
-CLOCKSOURCE_OF_DECLARE(lpc32xx_timer, "nxp,lpc3220-timer", lpc32xx_timer_init);
+TIMER_OF_DECLARE(lpc32xx_timer, "nxp,lpc3220-timer", lpc32xx_timer_init);
diff --git a/drivers/clocksource/time-orion.c b/drivers/clocksource/time-orion.c
index b9b97f630c4d..12202067fe4b 100644
--- a/drivers/clocksource/time-orion.c
+++ b/drivers/clocksource/time-orion.c
@@ -189,4 +189,4 @@ static int __init orion_timer_init(struct device_node *np)
 
 	return 0;
 }
-CLOCKSOURCE_OF_DECLARE(orion_timer, "marvell,orion-timer", orion_timer_init);
+TIMER_OF_DECLARE(orion_timer, "marvell,orion-timer", orion_timer_init);
diff --git a/drivers/clocksource/time-pistachio.c b/drivers/clocksource/time-pistachio.c
index 3710e4d9dcba..a2dd85d0c1d7 100644
--- a/drivers/clocksource/time-pistachio.c
+++ b/drivers/clocksource/time-pistachio.c
@@ -214,5 +214,5 @@ static int __init pistachio_clksrc_of_init(struct device_node *node)
 	sched_clock_register(pistachio_read_sched_clock, 32, rate);
 	return clocksource_register_hz(&pcs_gpt.cs, rate);
 }
-CLOCKSOURCE_OF_DECLARE(pistachio_gptimer, "img,pistachio-gptimer",
+TIMER_OF_DECLARE(pistachio_gptimer, "img,pistachio-gptimer",
 		       pistachio_clksrc_of_init);
diff --git a/drivers/clocksource/timer-atlas7.c b/drivers/clocksource/timer-atlas7.c
index 50300eec4a39..62c4bbc55a7e 100644
--- a/drivers/clocksource/timer-atlas7.c
+++ b/drivers/clocksource/timer-atlas7.c
@@ -283,4 +283,4 @@ static int __init sirfsoc_of_timer_init(struct device_node *np)
 
 	return sirfsoc_atlas7_timer_init(np);
 }
-CLOCKSOURCE_OF_DECLARE(sirfsoc_atlas7_timer, "sirf,atlas7-tick", sirfsoc_of_timer_init);
+TIMER_OF_DECLARE(sirfsoc_atlas7_timer, "sirf,atlas7-tick", sirfsoc_of_timer_init);
diff --git a/drivers/clocksource/timer-atmel-pit.c b/drivers/clocksource/timer-atmel-pit.c
index cc112351dc70..ec8a4376f74f 100644
--- a/drivers/clocksource/timer-atmel-pit.c
+++ b/drivers/clocksource/timer-atmel-pit.c
@@ -255,5 +255,5 @@ static int __init at91sam926x_pit_dt_init(struct device_node *node)
 
 	return 0;
 }
-CLOCKSOURCE_OF_DECLARE(at91sam926x_pit, "atmel,at91sam9260-pit",
+TIMER_OF_DECLARE(at91sam926x_pit, "atmel,at91sam9260-pit",
 		       at91sam926x_pit_dt_init);
diff --git a/drivers/clocksource/timer-atmel-st.c b/drivers/clocksource/timer-atmel-st.c
index be4ac7604136..d2e660f475af 100644
--- a/drivers/clocksource/timer-atmel-st.c
+++ b/drivers/clocksource/timer-atmel-st.c
@@ -260,5 +260,5 @@ static int __init atmel_st_timer_init(struct device_node *node)
 	/* register clocksource */
 	return clocksource_register_hz(&clk32k, sclk_rate);
 }
-CLOCKSOURCE_OF_DECLARE(atmel_st_timer, "atmel,at91rm9200-st",
+TIMER_OF_DECLARE(atmel_st_timer, "atmel,at91rm9200-st",
 		       atmel_st_timer_init);
diff --git a/drivers/clocksource/timer-digicolor.c b/drivers/clocksource/timer-digicolor.c
index 94a161eb9cce..1e984a4d8ad0 100644
--- a/drivers/clocksource/timer-digicolor.c
+++ b/drivers/clocksource/timer-digicolor.c
@@ -203,5 +203,5 @@ static int __init digicolor_timer_init(struct device_node *node)
 
 	return 0;
 }
-CLOCKSOURCE_OF_DECLARE(conexant_digicolor, "cnxt,cx92755-timer",
+TIMER_OF_DECLARE(conexant_digicolor, "cnxt,cx92755-timer",
 		       digicolor_timer_init);
diff --git a/drivers/clocksource/timer-fttmr010.c b/drivers/clocksource/timer-fttmr010.c
index d96190e85c66..a21020c57df6 100644
--- a/drivers/clocksource/timer-fttmr010.c
+++ b/drivers/clocksource/timer-fttmr010.c
@@ -364,8 +364,8 @@ static __init int fttmr010_timer_init(struct device_node *np)
 	return fttmr010_common_init(np, false);
 }
 
-CLOCKSOURCE_OF_DECLARE(fttmr010, "faraday,fttmr010", fttmr010_timer_init);
-CLOCKSOURCE_OF_DECLARE(gemini, "cortina,gemini-timer", fttmr010_timer_init);
-CLOCKSOURCE_OF_DECLARE(moxart, "moxa,moxart-timer", fttmr010_timer_init);
-CLOCKSOURCE_OF_DECLARE(ast2400, "aspeed,ast2400-timer", aspeed_timer_init);
-CLOCKSOURCE_OF_DECLARE(ast2500, "aspeed,ast2500-timer", aspeed_timer_init);
+TIMER_OF_DECLARE(fttmr010, "faraday,fttmr010", fttmr010_timer_init);
+TIMER_OF_DECLARE(gemini, "cortina,gemini-timer", fttmr010_timer_init);
+TIMER_OF_DECLARE(moxart, "moxa,moxart-timer", fttmr010_timer_init);
+TIMER_OF_DECLARE(ast2400, "aspeed,ast2400-timer", aspeed_timer_init);
+TIMER_OF_DECLARE(ast2500, "aspeed,ast2500-timer", aspeed_timer_init);
diff --git a/drivers/clocksource/timer-imx-gpt.c b/drivers/clocksource/timer-imx-gpt.c
index f595460bfc58..6ec6d79b237c 100644
--- a/drivers/clocksource/timer-imx-gpt.c
+++ b/drivers/clocksource/timer-imx-gpt.c
@@ -545,15 +545,15 @@ static int __init imx6dl_timer_init_dt(struct device_node *np)
 	return mxc_timer_init_dt(np, GPT_TYPE_IMX6DL);
 }
 
-CLOCKSOURCE_OF_DECLARE(imx1_timer, "fsl,imx1-gpt", imx1_timer_init_dt);
-CLOCKSOURCE_OF_DECLARE(imx21_timer, "fsl,imx21-gpt", imx21_timer_init_dt);
-CLOCKSOURCE_OF_DECLARE(imx27_timer, "fsl,imx27-gpt", imx21_timer_init_dt);
-CLOCKSOURCE_OF_DECLARE(imx31_timer, "fsl,imx31-gpt", imx31_timer_init_dt);
-CLOCKSOURCE_OF_DECLARE(imx25_timer, "fsl,imx25-gpt", imx31_timer_init_dt);
-CLOCKSOURCE_OF_DECLARE(imx50_timer, "fsl,imx50-gpt", imx31_timer_init_dt);
-CLOCKSOURCE_OF_DECLARE(imx51_timer, "fsl,imx51-gpt", imx31_timer_init_dt);
-CLOCKSOURCE_OF_DECLARE(imx53_timer, "fsl,imx53-gpt", imx31_timer_init_dt);
-CLOCKSOURCE_OF_DECLARE(imx6q_timer, "fsl,imx6q-gpt", imx31_timer_init_dt);
-CLOCKSOURCE_OF_DECLARE(imx6dl_timer, "fsl,imx6dl-gpt", imx6dl_timer_init_dt);
-CLOCKSOURCE_OF_DECLARE(imx6sl_timer, "fsl,imx6sl-gpt", imx6dl_timer_init_dt);
-CLOCKSOURCE_OF_DECLARE(imx6sx_timer, "fsl,imx6sx-gpt", imx6dl_timer_init_dt);
+TIMER_OF_DECLARE(imx1_timer, "fsl,imx1-gpt", imx1_timer_init_dt);
+TIMER_OF_DECLARE(imx21_timer, "fsl,imx21-gpt", imx21_timer_init_dt);
+TIMER_OF_DECLARE(imx27_timer, "fsl,imx27-gpt", imx21_timer_init_dt);
+TIMER_OF_DECLARE(imx31_timer, "fsl,imx31-gpt", imx31_timer_init_dt);
+TIMER_OF_DECLARE(imx25_timer, "fsl,imx25-gpt", imx31_timer_init_dt);
+TIMER_OF_DECLARE(imx50_timer, "fsl,imx50-gpt", imx31_timer_init_dt);
+TIMER_OF_DECLARE(imx51_timer, "fsl,imx51-gpt", imx31_timer_init_dt);
+TIMER_OF_DECLARE(imx53_timer, "fsl,imx53-gpt", imx31_timer_init_dt);
+TIMER_OF_DECLARE(imx6q_timer, "fsl,imx6q-gpt", imx31_timer_init_dt);
+TIMER_OF_DECLARE(imx6dl_timer, "fsl,imx6dl-gpt", imx6dl_timer_init_dt);
+TIMER_OF_DECLARE(imx6sl_timer, "fsl,imx6sl-gpt", imx6dl_timer_init_dt);
+TIMER_OF_DECLARE(imx6sx_timer, "fsl,imx6sx-gpt", imx6dl_timer_init_dt);
diff --git a/drivers/clocksource/timer-integrator-ap.c b/drivers/clocksource/timer-integrator-ap.c
index 04ad3066e190..2ff64d9d4fb3 100644
--- a/drivers/clocksource/timer-integrator-ap.c
+++ b/drivers/clocksource/timer-integrator-ap.c
@@ -232,5 +232,5 @@ static int __init integrator_ap_timer_init_of(struct device_node *node)
 	return 0;
 }
 
-CLOCKSOURCE_OF_DECLARE(integrator_ap_timer, "arm,integrator-timer",
+TIMER_OF_DECLARE(integrator_ap_timer, "arm,integrator-timer",
 		       integrator_ap_timer_init_of);
diff --git a/drivers/clocksource/timer-keystone.c b/drivers/clocksource/timer-keystone.c
index ab68a47ab3b4..0eee03250cfc 100644
--- a/drivers/clocksource/timer-keystone.c
+++ b/drivers/clocksource/timer-keystone.c
@@ -226,5 +226,5 @@ err:
 	return error;
 }
 
-CLOCKSOURCE_OF_DECLARE(keystone_timer, "ti,keystone-timer",
+TIMER_OF_DECLARE(keystone_timer, "ti,keystone-timer",
 			   keystone_timer_init);
diff --git a/drivers/clocksource/timer-nps.c b/drivers/clocksource/timer-nps.c
index e74ea1722ad3..7b6bb0df96ae 100644
--- a/drivers/clocksource/timer-nps.c
+++ b/drivers/clocksource/timer-nps.c
@@ -110,9 +110,9 @@ static int __init nps_setup_clocksource(struct device_node *node)
 	return ret;
 }
 
-CLOCKSOURCE_OF_DECLARE(ezchip_nps400_clksrc, "ezchip,nps400-timer",
+TIMER_OF_DECLARE(ezchip_nps400_clksrc, "ezchip,nps400-timer",
 		       nps_setup_clocksource);
-CLOCKSOURCE_OF_DECLARE(ezchip_nps400_clk_src, "ezchip,nps400-timer1",
+TIMER_OF_DECLARE(ezchip_nps400_clk_src, "ezchip,nps400-timer1",
 		       nps_setup_clocksource);
 
 #ifdef CONFIG_EZNPS_MTM_EXT
@@ -279,6 +279,6 @@ static int __init nps_setup_clockevent(struct device_node *node)
 	return 0;
 }
 
-CLOCKSOURCE_OF_DECLARE(ezchip_nps400_clk_evt, "ezchip,nps400-timer0",
+TIMER_OF_DECLARE(ezchip_nps400_clk_evt, "ezchip,nps400-timer0",
 		       nps_setup_clockevent);
 #endif /* CONFIG_EZNPS_MTM_EXT */
diff --git a/drivers/clocksource/timer-oxnas-rps.c b/drivers/clocksource/timer-oxnas-rps.c
index d630bf417773..eed6feff8b5f 100644
--- a/drivers/clocksource/timer-oxnas-rps.c
+++ b/drivers/clocksource/timer-oxnas-rps.c
@@ -293,7 +293,7 @@ err_alloc:
 	return ret;
 }
 
-CLOCKSOURCE_OF_DECLARE(ox810se_rps,
+TIMER_OF_DECLARE(ox810se_rps,
 		       "oxsemi,ox810se-rps-timer", oxnas_rps_timer_init);
-CLOCKSOURCE_OF_DECLARE(ox820_rps,
+TIMER_OF_DECLARE(ox820_rps,
 		       "oxsemi,ox820se-rps-timer", oxnas_rps_timer_init);
diff --git a/drivers/clocksource/timer-prima2.c b/drivers/clocksource/timer-prima2.c
index b4122ed1accb..20ff33b698df 100644
--- a/drivers/clocksource/timer-prima2.c
+++ b/drivers/clocksource/timer-prima2.c
@@ -245,5 +245,5 @@ static int __init sirfsoc_prima2_timer_init(struct device_node *np)
 
 	return 0;
 }
-CLOCKSOURCE_OF_DECLARE(sirfsoc_prima2_timer,
+TIMER_OF_DECLARE(sirfsoc_prima2_timer,
 	"sirf,prima2-tick", sirfsoc_prima2_timer_init);
diff --git a/drivers/clocksource/timer-sp804.c b/drivers/clocksource/timer-sp804.c
index 2d575a8c0939..3ac9dec9a038 100644
--- a/drivers/clocksource/timer-sp804.c
+++ b/drivers/clocksource/timer-sp804.c
@@ -287,7 +287,7 @@ err:
 	iounmap(base);
 	return ret;
 }
-CLOCKSOURCE_OF_DECLARE(sp804, "arm,sp804", sp804_of_init);
+TIMER_OF_DECLARE(sp804, "arm,sp804", sp804_of_init);
 
 static int __init integrator_cp_of_init(struct device_node *np)
 {
@@ -335,4 +335,4 @@ err:
 	iounmap(base);
 	return ret;
 }
-CLOCKSOURCE_OF_DECLARE(intcp, "arm,integrator-cp-timer", integrator_cp_of_init);
+TIMER_OF_DECLARE(intcp, "arm,integrator-cp-timer", integrator_cp_of_init);
diff --git a/drivers/clocksource/timer-stm32.c b/drivers/clocksource/timer-stm32.c
index 1b2574c4fb97..174d1243ea93 100644
--- a/drivers/clocksource/timer-stm32.c
+++ b/drivers/clocksource/timer-stm32.c
@@ -187,4 +187,4 @@ err_clk_get:
 	return ret;
 }
 
-CLOCKSOURCE_OF_DECLARE(stm32, "st,stm32-timer", stm32_clockevent_init);
+TIMER_OF_DECLARE(stm32, "st,stm32-timer", stm32_clockevent_init);
diff --git a/drivers/clocksource/timer-sun5i.c b/drivers/clocksource/timer-sun5i.c
index 2e9c830ae1cd..a4ebc8ff8f91 100644
--- a/drivers/clocksource/timer-sun5i.c
+++ b/drivers/clocksource/timer-sun5i.c
@@ -358,7 +358,7 @@ static int __init sun5i_timer_init(struct device_node *node)
 
 	return sun5i_setup_clockevent(node, timer_base, clk, irq);
 }
-CLOCKSOURCE_OF_DECLARE(sun5i_a13, "allwinner,sun5i-a13-hstimer",
+TIMER_OF_DECLARE(sun5i_a13, "allwinner,sun5i-a13-hstimer",
 			   sun5i_timer_init);
-CLOCKSOURCE_OF_DECLARE(sun7i_a20, "allwinner,sun7i-a20-hstimer",
+TIMER_OF_DECLARE(sun7i_a20, "allwinner,sun7i-a20-hstimer",
 			   sun5i_timer_init);
diff --git a/drivers/clocksource/timer-ti-32k.c b/drivers/clocksource/timer-ti-32k.c
index 624067712ef0..880a861ab3c8 100644
--- a/drivers/clocksource/timer-ti-32k.c
+++ b/drivers/clocksource/timer-ti-32k.c
@@ -124,5 +124,5 @@ static int __init ti_32k_timer_init(struct device_node *np)
 
 	return 0;
 }
-CLOCKSOURCE_OF_DECLARE(ti_32k_timer, "ti,omap-counter32k",
+TIMER_OF_DECLARE(ti_32k_timer, "ti,omap-counter32k",
 		ti_32k_timer_init);
diff --git a/drivers/clocksource/timer-u300.c b/drivers/clocksource/timer-u300.c
index 704e40c6f151..be34b116d4d2 100644
--- a/drivers/clocksource/timer-u300.c
+++ b/drivers/clocksource/timer-u300.c
@@ -458,5 +458,5 @@ static int __init u300_timer_init_of(struct device_node *np)
 	return 0;
 }
 
-CLOCKSOURCE_OF_DECLARE(u300_timer, "stericsson,u300-apptimer",
+TIMER_OF_DECLARE(u300_timer, "stericsson,u300-apptimer",
 		       u300_timer_init_of);
diff --git a/drivers/clocksource/versatile.c b/drivers/clocksource/versatile.c
index 220b490a8142..39725d38aede 100644
--- a/drivers/clocksource/versatile.c
+++ b/drivers/clocksource/versatile.c
@@ -38,7 +38,7 @@ static int __init versatile_sched_clock_init(struct device_node *node)
 
 	return 0;
 }
-CLOCKSOURCE_OF_DECLARE(vexpress, "arm,vexpress-sysreg",
+TIMER_OF_DECLARE(vexpress, "arm,vexpress-sysreg",
 		       versatile_sched_clock_init);
-CLOCKSOURCE_OF_DECLARE(versatile, "arm,versatile-sysreg",
+TIMER_OF_DECLARE(versatile, "arm,versatile-sysreg",
 		       versatile_sched_clock_init);
diff --git a/drivers/clocksource/vf_pit_timer.c b/drivers/clocksource/vf_pit_timer.c
index e0849e20a307..0f92089ec08c 100644
--- a/drivers/clocksource/vf_pit_timer.c
+++ b/drivers/clocksource/vf_pit_timer.c
@@ -201,4 +201,4 @@ static int __init pit_timer_init(struct device_node *np)
 
 	return pit_clockevent_init(clk_rate, irq);
 }
-CLOCKSOURCE_OF_DECLARE(vf610, "fsl,vf610-pit", pit_timer_init);
+TIMER_OF_DECLARE(vf610, "fsl,vf610-pit", pit_timer_init);
diff --git a/drivers/clocksource/vt8500_timer.c b/drivers/clocksource/vt8500_timer.c
index d02b51075ad1..e0f7489cfc8e 100644
--- a/drivers/clocksource/vt8500_timer.c
+++ b/drivers/clocksource/vt8500_timer.c
@@ -165,4 +165,4 @@ static int __init vt8500_timer_init(struct device_node *np)
 	return 0;
 }
 
-CLOCKSOURCE_OF_DECLARE(vt8500, "via,vt8500-timer", vt8500_timer_init);
+TIMER_OF_DECLARE(vt8500, "via,vt8500-timer", vt8500_timer_init);
diff --git a/drivers/clocksource/zevio-timer.c b/drivers/clocksource/zevio-timer.c
index 9a53f5ef6157..a6a0338eea77 100644
--- a/drivers/clocksource/zevio-timer.c
+++ b/drivers/clocksource/zevio-timer.c
@@ -215,4 +215,4 @@ static int __init zevio_timer_init(struct device_node *node)
 	return zevio_timer_add(node);
 }
 
-CLOCKSOURCE_OF_DECLARE(zevio_timer, "lsi,zevio-timer", zevio_timer_init);
+TIMER_OF_DECLARE(zevio_timer, "lsi,zevio-timer", zevio_timer_init);
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index f2b10d9ebd04..a86b65f0a246 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -249,7 +249,7 @@ extern int clocksource_mmio_init(void __iomem *, const char *,
 
 extern int clocksource_i8253_init(void);
 
-#define CLOCKSOURCE_OF_DECLARE(name, compat, fn) \
+#define TIMER_OF_DECLARE(name, compat, fn) \
 	OF_DECLARE_1_RET(clksrc, name, compat, fn)
 
 #ifdef CONFIG_CLKSRC_PROBE
-- 
cgit v1.2.3


From ba5d08c0ea785d5710c5a1e7dc3182b7124d63c0 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Fri, 26 May 2017 17:40:46 +0200
Subject: clocksource/drivers: Rename clocksource_probe to timer_probe

The function name is now renamed to 'timer_probe' for consistency with
the CLOCKSOURCE_OF_DECLARE => TIMER_OF_DECLARE change.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Heiko Stuebner <heiko@sntech.de>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
---
 arch/arc/kernel/setup.c                  |  2 +-
 arch/arm/kernel/time.c                   |  2 +-
 arch/arm/mach-mediatek/mediatek.c        |  2 +-
 arch/arm/mach-omap2/timer.c              | 10 +++++-----
 arch/arm/mach-rockchip/rockchip.c        |  2 +-
 arch/arm/mach-shmobile/setup-rcar-gen2.c |  2 +-
 arch/arm/mach-spear/spear13xx.c          |  2 +-
 arch/arm/mach-sunxi/sunxi.c              |  2 +-
 arch/arm/mach-u300/core.c                |  2 +-
 arch/arm/mach-zynq/common.c              |  2 +-
 arch/arm64/kernel/time.c                 |  2 +-
 arch/h8300/kernel/setup.c                |  2 +-
 arch/microblaze/kernel/setup.c           |  2 +-
 arch/mips/generic/init.c                 |  2 +-
 arch/mips/mti-malta/malta-time.c         |  2 +-
 arch/mips/pic32/pic32mzda/time.c         |  2 +-
 arch/mips/pistachio/time.c               |  2 +-
 arch/mips/ralink/clk.c                   |  2 +-
 arch/mips/ralink/timer-gic.c             |  2 +-
 arch/mips/xilfpga/time.c                 |  2 +-
 arch/nios2/kernel/time.c                 |  2 +-
 arch/sh/boards/of-generic.c              |  2 +-
 arch/xtensa/kernel/time.c                |  2 +-
 drivers/clocksource/clksrc-probe.c       |  2 +-
 include/linux/clocksource.h              |  4 ++--
 25 files changed, 30 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
index fc8211f338ad..666613fde91d 100644
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -470,7 +470,7 @@ void __init setup_arch(char **cmdline_p)
 void __init time_init(void)
 {
 	of_clk_init(NULL);
-	clocksource_probe();
+	timer_probe();
 }
 
 static int __init customize_machine(void)
diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c
index 97b22fa7cb3a..629f8e9981f1 100644
--- a/arch/arm/kernel/time.c
+++ b/arch/arm/kernel/time.c
@@ -120,6 +120,6 @@ void __init time_init(void)
 #ifdef CONFIG_COMMON_CLK
 		of_clk_init(NULL);
 #endif
-		clocksource_probe();
+		timer_probe();
 	}
 }
diff --git a/arch/arm/mach-mediatek/mediatek.c b/arch/arm/mach-mediatek/mediatek.c
index a6e3c98b95ed..c3cf215773b2 100644
--- a/arch/arm/mach-mediatek/mediatek.c
+++ b/arch/arm/mach-mediatek/mediatek.c
@@ -41,7 +41,7 @@ static void __init mediatek_timer_init(void)
 	}
 
 	of_clk_init(NULL);
-	clocksource_probe();
+	timer_probe();
 };
 
 static const char * const mediatek_board_dt_compat[] = {
diff --git a/arch/arm/mach-omap2/timer.c b/arch/arm/mach-omap2/timer.c
index 07dd692c4737..ae4bb9fdc483 100644
--- a/arch/arm/mach-omap2/timer.c
+++ b/arch/arm/mach-omap2/timer.c
@@ -497,7 +497,7 @@ void __init omap_init_time(void)
 	__omap_sync32k_timer_init(1, "timer_32k_ck", "ti,timer-alwon",
 			2, "timer_sys_ck", NULL, false);
 
-	clocksource_probe();
+	timer_probe();
 }
 
 #if defined(CONFIG_ARCH_OMAP3) || defined(CONFIG_SOC_AM43XX)
@@ -506,7 +506,7 @@ void __init omap3_secure_sync32k_timer_init(void)
 	__omap_sync32k_timer_init(12, "secure_32k_fck", "ti,timer-secure",
 			2, "timer_sys_ck", NULL, false);
 
-	clocksource_probe();
+	timer_probe();
 }
 #endif /* CONFIG_ARCH_OMAP3 */
 
@@ -517,7 +517,7 @@ void __init omap3_gptimer_timer_init(void)
 	__omap_sync32k_timer_init(2, "timer_sys_ck", NULL,
 			1, "timer_sys_ck", "ti,timer-alwon", true);
 	if (of_have_populated_dt())
-		clocksource_probe();
+		timer_probe();
 }
 #endif
 
@@ -532,7 +532,7 @@ static void __init omap4_sync32k_timer_init(void)
 void __init omap4_local_timer_init(void)
 {
 	omap4_sync32k_timer_init();
-	clocksource_probe();
+	timer_probe();
 }
 #endif
 
@@ -656,7 +656,7 @@ void __init omap5_realtime_timer_init(void)
 	omap4_sync32k_timer_init();
 	realtime_counter_init();
 
-	clocksource_probe();
+	timer_probe();
 }
 #endif /* CONFIG_SOC_OMAP5 || CONFIG_SOC_DRA7XX */
 
diff --git a/arch/arm/mach-rockchip/rockchip.c b/arch/arm/mach-rockchip/rockchip.c
index ef0500a4c8ad..5ab834ebcb49 100644
--- a/arch/arm/mach-rockchip/rockchip.c
+++ b/arch/arm/mach-rockchip/rockchip.c
@@ -55,7 +55,7 @@ static void __init rockchip_timer_init(void)
 	}
 
 	of_clk_init(NULL);
-	clocksource_probe();
+	timer_probe();
 }
 
 static void __init rockchip_dt_init(void)
diff --git a/arch/arm/mach-shmobile/setup-rcar-gen2.c b/arch/arm/mach-shmobile/setup-rcar-gen2.c
index 52d466b75973..a6e74f481dea 100644
--- a/arch/arm/mach-shmobile/setup-rcar-gen2.c
+++ b/arch/arm/mach-shmobile/setup-rcar-gen2.c
@@ -113,7 +113,7 @@ void __init rcar_gen2_timer_init(void)
 #endif /* CONFIG_ARM_ARCH_TIMER */
 
 	of_clk_init(NULL);
-	clocksource_probe();
+	timer_probe();
 }
 
 struct memory_reserve_config {
diff --git a/arch/arm/mach-spear/spear13xx.c b/arch/arm/mach-spear/spear13xx.c
index ca2f6a82a414..31c43cabf362 100644
--- a/arch/arm/mach-spear/spear13xx.c
+++ b/arch/arm/mach-spear/spear13xx.c
@@ -124,5 +124,5 @@ void __init spear13xx_timer_init(void)
 	clk_put(pclk);
 
 	spear_setup_of_timer();
-	clocksource_probe();
+	timer_probe();
 }
diff --git a/arch/arm/mach-sunxi/sunxi.c b/arch/arm/mach-sunxi/sunxi.c
index f44e3acb5c90..7ab353fb25f2 100644
--- a/arch/arm/mach-sunxi/sunxi.c
+++ b/arch/arm/mach-sunxi/sunxi.c
@@ -42,7 +42,7 @@ static void __init sun6i_timer_init(void)
 	of_clk_init(NULL);
 	if (IS_ENABLED(CONFIG_RESET_CONTROLLER))
 		sun6i_reset_init();
-	clocksource_probe();
+	timer_probe();
 }
 
 DT_MACHINE_START(SUN6I_DT, "Allwinner sun6i (A31) Family")
diff --git a/arch/arm/mach-u300/core.c b/arch/arm/mach-u300/core.c
index a4910ea6811a..048f15e8c669 100644
--- a/arch/arm/mach-u300/core.c
+++ b/arch/arm/mach-u300/core.c
@@ -407,7 +407,7 @@ static const char * u300_board_compat[] = {
 DT_MACHINE_START(U300_DT, "U300 S335/B335 (Device Tree)")
 	.map_io		= u300_map_io,
 	.init_irq	= u300_init_irq_dt,
-	.init_time	= clocksource_probe,
+	.init_time	= timer_probe,
 	.init_machine	= u300_init_machine_dt,
 	.restart	= u300_restart,
 	.dt_compat      = u300_board_compat,
diff --git a/arch/arm/mach-zynq/common.c b/arch/arm/mach-zynq/common.c
index ed118648313f..6aba9ebf8041 100644
--- a/arch/arm/mach-zynq/common.c
+++ b/arch/arm/mach-zynq/common.c
@@ -150,7 +150,7 @@ static void __init zynq_timer_init(void)
 {
 	zynq_clock_init();
 	of_clk_init(NULL);
-	clocksource_probe();
+	timer_probe();
 }
 
 static struct map_desc zynq_cortex_a9_scu_map __initdata = {
diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c
index 59779699a1a4..da33c90248e9 100644
--- a/arch/arm64/kernel/time.c
+++ b/arch/arm64/kernel/time.c
@@ -70,7 +70,7 @@ void __init time_init(void)
 	u32 arch_timer_rate;
 
 	of_clk_init(NULL);
-	clocksource_probe();
+	timer_probe();
 
 	tick_setup_hrtimer_broadcast();
 
diff --git a/arch/h8300/kernel/setup.c b/arch/h8300/kernel/setup.c
index c8c25a4e9e48..6be15d634650 100644
--- a/arch/h8300/kernel/setup.c
+++ b/arch/h8300/kernel/setup.c
@@ -246,5 +246,5 @@ void __init calibrate_delay(void)
 void __init time_init(void)
 {
 	of_clk_init(NULL);
-	clocksource_probe();
+	timer_probe();
 }
diff --git a/arch/microblaze/kernel/setup.c b/arch/microblaze/kernel/setup.c
index f31ebb5dc26c..be98ffe28ca8 100644
--- a/arch/microblaze/kernel/setup.c
+++ b/arch/microblaze/kernel/setup.c
@@ -192,7 +192,7 @@ void __init time_init(void)
 {
 	of_clk_init(NULL);
 	setup_cpuinfo_clk();
-	clocksource_probe();
+	timer_probe();
 }
 
 #ifdef CONFIG_DEBUG_FS
diff --git a/arch/mips/generic/init.c b/arch/mips/generic/init.c
index 4af619215410..1231b5a17b37 100644
--- a/arch/mips/generic/init.c
+++ b/arch/mips/generic/init.c
@@ -161,7 +161,7 @@ void __init plat_time_init(void)
 		}
 	}
 
-	clocksource_probe();
+	timer_probe();
 }
 
 void __init arch_init_irq(void)
diff --git a/arch/mips/mti-malta/malta-time.c b/arch/mips/mti-malta/malta-time.c
index 289edcfadd7c..cea4ec909806 100644
--- a/arch/mips/mti-malta/malta-time.c
+++ b/arch/mips/mti-malta/malta-time.c
@@ -265,7 +265,7 @@ void __init plat_time_init(void)
 		       (freq%1000000)*100/1000000);
 #ifdef CONFIG_CLKSRC_MIPS_GIC
 		update_gic_frequency_dt();
-		clocksource_probe();
+		timer_probe();
 #endif
 	}
 #endif
diff --git a/arch/mips/pic32/pic32mzda/time.c b/arch/mips/pic32/pic32mzda/time.c
index 62a0a78b6c64..1894e50939b5 100644
--- a/arch/mips/pic32/pic32mzda/time.c
+++ b/arch/mips/pic32/pic32mzda/time.c
@@ -64,5 +64,5 @@ void __init plat_time_init(void)
 	pr_info("CPU Clock: %ldMHz\n", rate / 1000000);
 	mips_hpt_frequency = rate / 2;
 
-	clocksource_probe();
+	timer_probe();
 }
diff --git a/arch/mips/pistachio/time.c b/arch/mips/pistachio/time.c
index 1022201b2beb..17a0f1dec05b 100644
--- a/arch/mips/pistachio/time.c
+++ b/arch/mips/pistachio/time.c
@@ -39,7 +39,7 @@ void __init plat_time_init(void)
 	struct clk *clk;
 
 	of_clk_init(NULL);
-	clocksource_probe();
+	timer_probe();
 
 	np = of_get_cpu_node(0, NULL);
 	if (!np) {
diff --git a/arch/mips/ralink/clk.c b/arch/mips/ralink/clk.c
index df795885eace..eb1c61917eb7 100644
--- a/arch/mips/ralink/clk.c
+++ b/arch/mips/ralink/clk.c
@@ -82,5 +82,5 @@ void __init plat_time_init(void)
 	pr_info("CPU Clock: %ldMHz\n", clk_get_rate(clk) / 1000000);
 	mips_hpt_frequency = clk_get_rate(clk) / 2;
 	clk_put(clk);
-	clocksource_probe();
+	timer_probe();
 }
diff --git a/arch/mips/ralink/timer-gic.c b/arch/mips/ralink/timer-gic.c
index 069771dbec42..b5f07d21fcf2 100644
--- a/arch/mips/ralink/timer-gic.c
+++ b/arch/mips/ralink/timer-gic.c
@@ -20,5 +20,5 @@ void __init plat_time_init(void)
 	ralink_of_remap();
 
 	of_clk_init(NULL);
-	clocksource_probe();
+	timer_probe();
 }
diff --git a/arch/mips/xilfpga/time.c b/arch/mips/xilfpga/time.c
index cbb3fca7b6fa..36f3f1870ee2 100644
--- a/arch/mips/xilfpga/time.c
+++ b/arch/mips/xilfpga/time.c
@@ -22,7 +22,7 @@ void __init plat_time_init(void)
 	struct clk *clk;
 
 	of_clk_init(NULL);
-	clocksource_probe();
+	timer_probe();
 
 	np = of_get_cpu_node(0, NULL);
 	if (!np) {
diff --git a/arch/nios2/kernel/time.c b/arch/nios2/kernel/time.c
index 2954b6617378..645129aaa9a0 100644
--- a/arch/nios2/kernel/time.c
+++ b/arch/nios2/kernel/time.c
@@ -350,7 +350,7 @@ void __init time_init(void)
 	if (count < 2)
 		panic("%d timer is found, it needs 2 timers in system\n", count);
 
-	clocksource_probe();
+	timer_probe();
 }
 
 TIMER_OF_DECLARE(nios2_timer, ALTR_TIMER_COMPATIBLE, nios2_time_init);
diff --git a/arch/sh/boards/of-generic.c b/arch/sh/boards/of-generic.c
index 1fb6d5714bae..4feb7c86f4ac 100644
--- a/arch/sh/boards/of-generic.c
+++ b/arch/sh/boards/of-generic.c
@@ -119,7 +119,7 @@ static void __init sh_of_mem_reserve(void)
 static void __init sh_of_time_init(void)
 {
 	pr_info("SH generic board support: scanning for clocksource devices\n");
-	clocksource_probe();
+	timer_probe();
 }
 
 static void __init sh_of_setup(char **cmdline_p)
diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c
index 668c1056f9e4..fd524a54d2ab 100644
--- a/arch/xtensa/kernel/time.c
+++ b/arch/xtensa/kernel/time.c
@@ -187,7 +187,7 @@ void __init time_init(void)
 	local_timer_setup(0);
 	setup_irq(this_cpu_ptr(&ccount_timer)->evt.irq, &timer_irqaction);
 	sched_clock_register(ccount_sched_clock_read, 32, ccount_freq);
-	clocksource_probe();
+	timer_probe();
 }
 
 /*
diff --git a/drivers/clocksource/clksrc-probe.c b/drivers/clocksource/clksrc-probe.c
index ac701ffb8d59..5d549c2a65fe 100644
--- a/drivers/clocksource/clksrc-probe.c
+++ b/drivers/clocksource/clksrc-probe.c
@@ -24,7 +24,7 @@ extern struct of_device_id __clksrc_of_table[];
 static const struct of_device_id __clksrc_of_table_sentinel
 	__used __section(__clksrc_of_table_end);
 
-void __init clocksource_probe(void)
+void __init timer_probe(void)
 {
 	struct device_node *np;
 	const struct of_device_id *match;
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index a86b65f0a246..010bb9f60db2 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -253,9 +253,9 @@ extern int clocksource_i8253_init(void);
 	OF_DECLARE_1_RET(clksrc, name, compat, fn)
 
 #ifdef CONFIG_CLKSRC_PROBE
-extern void clocksource_probe(void);
+extern void timer_probe(void);
 #else
-static inline void clocksource_probe(void) {}
+static inline void timer_probe(void) {}
 #endif
 
 #define CLOCKSOURCE_ACPI_DECLARE(name, table_id, fn)		\
-- 
cgit v1.2.3


From 77d62f532282010d5859a99819d6a0c233bfcfff Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Fri, 26 May 2017 17:42:25 +0200
Subject: clocksource/drivers: Rename CLOCKSOURCE_ACPI_DECLARE to
 TIMER_ACPI_DECLARE

The macro name is now renamed to 'TIMER_ACPI_DECLARE' for consistency
with the CLOCKSOURCE_OF_DECLARE => TIMER_OF_DECLARE change.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/clocksource/arm_arch_timer.c | 2 +-
 include/linux/clocksource.h          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index bc60cd78698e..a89d41cff915 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -1516,5 +1516,5 @@ static int __init arch_timer_acpi_init(struct acpi_table_header *table)
 
 	return arch_timer_common_init();
 }
-CLOCKSOURCE_ACPI_DECLARE(arch_timer, ACPI_SIG_GTDT, arch_timer_acpi_init);
+TIMER_ACPI_DECLARE(arch_timer, ACPI_SIG_GTDT, arch_timer_acpi_init);
 #endif
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 010bb9f60db2..e43f37f9a1b6 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -258,7 +258,7 @@ extern void timer_probe(void);
 static inline void timer_probe(void) {}
 #endif
 
-#define CLOCKSOURCE_ACPI_DECLARE(name, table_id, fn)		\
+#define TIMER_ACPI_DECLARE(name, table_id, fn)		\
 	ACPI_DECLARE_PROBE_ENTRY(clksrc, name, table_id, 0, NULL, 0, fn)
 
 #endif /* _LINUX_CLOCKSOURCE_H */
-- 
cgit v1.2.3


From 2fcc112af37fa88f8da077d6dd3bb8e38e75adb1 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Fri, 26 May 2017 18:33:27 +0200
Subject: clocksource/drivers: Rename clksrc table to timer

The table name is now renamed to 'timer' for consistency with
the CLOCKSOURCE_OF_DECLARE => TIMER_OF_DECLARE change.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/clocksource/clksrc-probe.c | 18 +++++++++---------
 include/asm-generic/vmlinux.lds.h  |  7 ++++---
 include/linux/clocksource.h        |  4 ++--
 3 files changed, 15 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/clocksource/clksrc-probe.c b/drivers/clocksource/clksrc-probe.c
index 5d549c2a65fe..da81e5de74fe 100644
--- a/drivers/clocksource/clksrc-probe.c
+++ b/drivers/clocksource/clksrc-probe.c
@@ -19,20 +19,20 @@
 #include <linux/of.h>
 #include <linux/clocksource.h>
 
-extern struct of_device_id __clksrc_of_table[];
+extern struct of_device_id __timer_of_table[];
 
-static const struct of_device_id __clksrc_of_table_sentinel
-	__used __section(__clksrc_of_table_end);
+static const struct of_device_id __timer_of_table_sentinel
+	__used __section(__timer_of_table_end);
 
 void __init timer_probe(void)
 {
 	struct device_node *np;
 	const struct of_device_id *match;
 	of_init_fn_1_ret init_func_ret;
-	unsigned clocksources = 0;
+	unsigned timers = 0;
 	int ret;
 
-	for_each_matching_node_and_match(np, __clksrc_of_table, &match) {
+	for_each_matching_node_and_match(np, __timer_of_table, &match) {
 		if (!of_device_is_available(np))
 			continue;
 
@@ -45,11 +45,11 @@ void __init timer_probe(void)
 			continue;
 		}
 
-		clocksources++;
+		timers++;
 	}
 
-	clocksources += acpi_probe_device_table(clksrc);
+	timers += acpi_probe_device_table(timer);
 
-	if (!clocksources)
-		pr_crit("%s: no matching clocksources found\n", __func__);
+	if (!timers)
+		pr_crit("%s: no matching timers found\n", __func__);
 }
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 401d324bcede..c6a4ef50bbe6 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -172,7 +172,7 @@
 	KEEP(*(__##name##_of_table))					\
 	KEEP(*(__##name##_of_table_end))
 
-#define CLKSRC_OF_TABLES()	OF_TABLE(CONFIG_CLKSRC_OF, clksrc)
+#define TIMER_OF_TABLES()	OF_TABLE(CONFIG_CLKSRC_OF, timer)
 #define IRQCHIP_OF_MATCH_TABLE() OF_TABLE(CONFIG_IRQCHIP, irqchip)
 #define CLK_OF_TABLES()		OF_TABLE(CONFIG_COMMON_CLK, clk)
 #define IOMMU_OF_TABLES()	OF_TABLE(CONFIG_OF_IOMMU, iommu)
@@ -556,14 +556,15 @@
 	MEM_DISCARD(init.rodata)					\
 	CLK_OF_TABLES()							\
 	RESERVEDMEM_OF_TABLES()						\
-	CLKSRC_OF_TABLES()						\
+	TIMER_OF_TABLES()						\
 	IOMMU_OF_TABLES()						\
 	CPU_METHOD_OF_TABLES()						\
 	CPUIDLE_METHOD_OF_TABLES()					\
 	KERNEL_DTB()							\
 	IRQCHIP_OF_MATCH_TABLE()					\
 	ACPI_PROBE_TABLE(irqchip)					\
-	ACPI_PROBE_TABLE(clksrc)					\
+	ACPI_PROBE_TABLE(timer)						\
+	ACPI_PROBE_TABLE(iort)						\
 	EARLYCON_TABLE()
 
 #define INIT_TEXT							\
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index e43f37f9a1b6..7cd38b21cbd3 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -250,7 +250,7 @@ extern int clocksource_mmio_init(void __iomem *, const char *,
 extern int clocksource_i8253_init(void);
 
 #define TIMER_OF_DECLARE(name, compat, fn) \
-	OF_DECLARE_1_RET(clksrc, name, compat, fn)
+	OF_DECLARE_1_RET(timer, name, compat, fn)
 
 #ifdef CONFIG_CLKSRC_PROBE
 extern void timer_probe(void);
@@ -259,6 +259,6 @@ static inline void timer_probe(void) {}
 #endif
 
 #define TIMER_ACPI_DECLARE(name, table_id, fn)		\
-	ACPI_DECLARE_PROBE_ENTRY(clksrc, name, table_id, 0, NULL, 0, fn)
+	ACPI_DECLARE_PROBE_ENTRY(timer, name, table_id, 0, NULL, 0, fn)
 
 #endif /* _LINUX_CLOCKSOURCE_H */
-- 
cgit v1.2.3


From bb0eb050a577a866cb47c2dc37596f1207f4c2d9 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Fri, 26 May 2017 19:34:11 +0200
Subject: clocksource/drivers: Rename CLKSRC_OF to TIMER_OF

The config option name is now renamed to 'TIMER_OF' for consistency with
the CLOCKSOURCE_OF_DECLARE => TIMER_OF_DECLARE change.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
---
 arch/arm/Kconfig                        | 10 +++---
 arch/arm/mach-bcm/Kconfig               |  2 +-
 arch/arm/mach-clps711x/Kconfig          |  2 +-
 arch/arm/mach-s3c24xx/Kconfig           |  2 +-
 arch/arm/mach-s3c64xx/Kconfig           |  2 +-
 arch/arm64/Kconfig.platforms            |  4 +--
 arch/h8300/Kconfig                      |  2 +-
 arch/microblaze/Kconfig                 |  2 +-
 arch/mips/ralink/Kconfig                |  2 +-
 arch/nios2/Kconfig                      |  2 +-
 arch/sh/boards/Kconfig                  |  2 +-
 drivers/clocksource/Kconfig             | 60 ++++++++++++++++-----------------
 drivers/clocksource/Makefile            |  2 +-
 drivers/clocksource/clksrc-probe.c      | 55 ------------------------------
 drivers/clocksource/clps711x-timer.c    |  2 +-
 drivers/clocksource/samsung_pwm_timer.c |  2 +-
 drivers/clocksource/timer-probe.c       | 55 ++++++++++++++++++++++++++++++
 include/asm-generic/vmlinux.lds.h       |  2 +-
 include/linux/clocksource.h             |  2 +-
 19 files changed, 106 insertions(+), 106 deletions(-)
 delete mode 100644 drivers/clocksource/clksrc-probe.c
 create mode 100644 drivers/clocksource/timer-probe.c

(limited to 'include/linux')

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 4c1a35f15838..1c7e165b0a93 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -337,7 +337,7 @@ config ARCH_MULTIPLATFORM
 	select ARM_HAS_SG_CHAIN
 	select ARM_PATCH_PHYS_VIRT
 	select AUTO_ZRELADDR
-	select CLKSRC_OF
+	select TIMER_OF
 	select COMMON_CLK
 	select GENERIC_CLOCKEVENTS
 	select MIGHT_HAVE_PCI
@@ -351,7 +351,7 @@ config ARM_SINGLE_ARMV7M
 	depends on !MMU
 	select ARM_NVIC
 	select AUTO_ZRELADDR
-	select CLKSRC_OF
+	select TIMER_OF
 	select COMMON_CLK
 	select CPU_V7M
 	select GENERIC_CLOCKEVENTS
@@ -532,7 +532,7 @@ config ARCH_PXA
 	select CLKDEV_LOOKUP
 	select CLKSRC_PXA
 	select CLKSRC_MMIO
-	select CLKSRC_OF
+	select TIMER_OF
 	select CPU_XSCALE if !CPU_XSC3
 	select GENERIC_CLOCKEVENTS
 	select GPIO_PXA
@@ -571,7 +571,7 @@ config ARCH_SA1100
 	select CLKDEV_LOOKUP
 	select CLKSRC_MMIO
 	select CLKSRC_PXA
-	select CLKSRC_OF if OF
+	select TIMER_OF if OF
 	select CPU_FREQ
 	select CPU_SA1100
 	select GENERIC_CLOCKEVENTS
@@ -1357,7 +1357,7 @@ config HAVE_ARM_ARCH_TIMER
 
 config HAVE_ARM_TWD
 	bool
-	select CLKSRC_OF if OF
+	select TIMER_OF if OF
 	help
 	  This options enables support for the ARM timer and watchdog unit
 
diff --git a/arch/arm/mach-bcm/Kconfig b/arch/arm/mach-bcm/Kconfig
index f9389c5910e7..f23a23934162 100644
--- a/arch/arm/mach-bcm/Kconfig
+++ b/arch/arm/mach-bcm/Kconfig
@@ -150,7 +150,7 @@ config ARCH_BCM2835
 	select ARM_ERRATA_411920 if ARCH_MULTI_V6
 	select ARM_TIMER_SP804
 	select HAVE_ARM_ARCH_TIMER if ARCH_MULTI_V7
-	select CLKSRC_OF
+	select TIMER_OF
 	select BCM2835_TIMER
 	select PINCTRL
 	select PINCTRL_BCM2835
diff --git a/arch/arm/mach-clps711x/Kconfig b/arch/arm/mach-clps711x/Kconfig
index 61284b9389cf..f385b1fcafef 100644
--- a/arch/arm/mach-clps711x/Kconfig
+++ b/arch/arm/mach-clps711x/Kconfig
@@ -2,7 +2,7 @@ menuconfig ARCH_CLPS711X
 	bool "Cirrus Logic EP721x/EP731x-based"
 	depends on ARCH_MULTI_V4T
 	select AUTO_ZRELADDR
-	select CLKSRC_OF
+	select TIMER_OF
 	select CLPS711X_TIMER
 	select COMMON_CLK
 	select CPU_ARM720T
diff --git a/arch/arm/mach-s3c24xx/Kconfig b/arch/arm/mach-s3c24xx/Kconfig
index 4b1690acb6a5..f07da82ebfea 100644
--- a/arch/arm/mach-s3c24xx/Kconfig
+++ b/arch/arm/mach-s3c24xx/Kconfig
@@ -394,7 +394,7 @@ config MACH_SMDK2416
 
 config MACH_S3C2416_DT
 	bool "Samsung S3C2416 machine using devicetree"
-	select CLKSRC_OF
+	select TIMER_OF
 	select USE_OF
 	select PINCTRL
 	select PINCTRL_S3C24XX
diff --git a/arch/arm/mach-s3c64xx/Kconfig b/arch/arm/mach-s3c64xx/Kconfig
index 459214fa20b4..71a49343d711 100644
--- a/arch/arm/mach-s3c64xx/Kconfig
+++ b/arch/arm/mach-s3c64xx/Kconfig
@@ -336,7 +336,7 @@ config MACH_WLF_CRAGG_6410
 
 config MACH_S3C64XX_DT
 	bool "Samsung S3C6400/S3C6410 machine using Device Tree"
-	select CLKSRC_OF
+	select TIMER_OF
 	select CPU_S3C6400
 	select CPU_S3C6410
 	select PINCTRL
diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms
index 73272f43ca01..9ed0a659046b 100644
--- a/arch/arm64/Kconfig.platforms
+++ b/arch/arm64/Kconfig.platforms
@@ -18,7 +18,7 @@ config ARCH_ALPINE
 
 config ARCH_BCM2835
 	bool "Broadcom BCM2835 family"
-	select CLKSRC_OF
+	select TIMER_OF
 	select GPIOLIB
 	select PINCTRL
 	select PINCTRL_BCM2835
@@ -178,7 +178,7 @@ config ARCH_TEGRA
 	select ARCH_HAS_RESET_CONTROLLER
 	select CLKDEV_LOOKUP
 	select CLKSRC_MMIO
-	select CLKSRC_OF
+	select TIMER_OF
 	select GENERIC_CLOCKEVENTS
 	select GPIOLIB
 	select PINCTRL
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index 3ae852507e57..6e3d36f37a02 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -15,7 +15,7 @@ config H8300
 	select OF_IRQ
 	select OF_EARLY_FLATTREE
 	select HAVE_MEMBLOCK
-	select CLKSRC_OF
+	select TIMER_OF
 	select H8300_TMR8
 	select HAVE_KERNEL_GZIP
 	select HAVE_KERNEL_LZO
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index 85885a501dce..8e47121b8b8b 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -4,7 +4,7 @@ config MICROBLAZE
 	select ARCH_MIGHT_HAVE_PC_PARPORT
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select BUILDTIME_EXTABLE_SORT
-	select CLKSRC_OF
+	select TIMER_OF
 	select CLONE_BACKWARDS3
 	select COMMON_CLK
 	select GENERIC_ATOMIC64
diff --git a/arch/mips/ralink/Kconfig b/arch/mips/ralink/Kconfig
index 9825dee10bc1..710b04cf4851 100644
--- a/arch/mips/ralink/Kconfig
+++ b/arch/mips/ralink/Kconfig
@@ -4,7 +4,7 @@ config CLKEVT_RT3352
 	bool
 	depends on SOC_RT305X || SOC_MT7620
 	default y
-	select CLKSRC_OF
+	select TIMER_OF
 	select CLKSRC_MMIO
 
 config RALINK_ILL_ACC
diff --git a/arch/nios2/Kconfig b/arch/nios2/Kconfig
index a72d5f0de692..c587764b9c5a 100644
--- a/arch/nios2/Kconfig
+++ b/arch/nios2/Kconfig
@@ -1,6 +1,6 @@
 config NIOS2
 	def_bool y
-	select CLKSRC_OF
+	select TIMER_OF
 	select GENERIC_ATOMIC64
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_CPU_DEVICES
diff --git a/arch/sh/boards/Kconfig b/arch/sh/boards/Kconfig
index 4e21949593cf..3554fcaa023b 100644
--- a/arch/sh/boards/Kconfig
+++ b/arch/sh/boards/Kconfig
@@ -10,7 +10,7 @@ config SH_DEVICE_TREE
 	bool "Board Described by Device Tree"
 	select OF
 	select OF_EARLY_FLATTREE
-	select CLKSRC_OF
+	select TIMER_OF
 	select COMMON_CLK
 	select GENERIC_CALIBRATE_DELAY
 	help
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index 623fcc69e9a1..90f062ec1779 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -1,15 +1,15 @@
 menu "Clock Source drivers"
 	depends on !ARCH_USES_GETTIMEOFFSET
 
-config CLKSRC_OF
+config TIMER_OF
 	bool
-	select CLKSRC_PROBE
+	select TIMER_PROBE
 
 config CLKSRC_ACPI
 	bool
-	select CLKSRC_PROBE
+	select TIMER_PROBE
 
-config CLKSRC_PROBE
+config TIMER_PROBE
 	bool
 
 config CLKSRC_I8253
@@ -58,14 +58,14 @@ config DW_APB_TIMER
 config DW_APB_TIMER_OF
 	bool
 	select DW_APB_TIMER
-	select CLKSRC_OF
+	select TIMER_OF
 
 config FTTMR010_TIMER
 	bool "Faraday Technology timer driver" if COMPILE_TEST
 	depends on GENERIC_CLOCKEVENTS
 	depends on HAS_IOMEM
 	select CLKSRC_MMIO
-	select CLKSRC_OF
+	select TIMER_OF
 	select MFD_SYSCON
 	help
 	  Enables support for the Faraday Technology timer block
@@ -74,7 +74,7 @@ config FTTMR010_TIMER
 config ROCKCHIP_TIMER
 	bool "Rockchip timer driver" if COMPILE_TEST
 	depends on ARM || ARM64
-	select CLKSRC_OF
+	select TIMER_OF
 	select CLKSRC_MMIO
 	help
 	  Enables the support for the rockchip timer driver.
@@ -82,7 +82,7 @@ config ROCKCHIP_TIMER
 config ARMADA_370_XP_TIMER
 	bool "Armada 370 and XP timer driver" if COMPILE_TEST
 	depends on ARM
-	select CLKSRC_OF
+	select TIMER_OF
 	select CLKSRC_MMIO
 	help
 	  Enables the support for the Armada 370 and XP timer driver.
@@ -97,7 +97,7 @@ config MESON6_TIMER
 config ORION_TIMER
 	bool "Orion timer driver" if COMPILE_TEST
 	depends on ARM
-	select CLKSRC_OF
+	select TIMER_OF
 	select CLKSRC_MMIO
 	help
 	  Enables the support for the Orion timer driver
@@ -141,7 +141,7 @@ config ASM9260_TIMER
 	bool "ASM9260 timer driver" if COMPILE_TEST
 	depends on GENERIC_CLOCKEVENTS
 	select CLKSRC_MMIO
-	select CLKSRC_OF
+	select TIMER_OF
 	help
 	  Enables support for the ASM9260 timer.
 
@@ -247,21 +247,21 @@ config CLKSRC_LPC32XX
 	depends on GENERIC_CLOCKEVENTS && HAS_IOMEM
 	depends on ARM
 	select CLKSRC_MMIO
-	select CLKSRC_OF
+	select TIMER_OF
 	help
 	  Support for the LPC32XX clocksource.
 
 config CLKSRC_PISTACHIO
 	bool "Clocksource for Pistachio SoC" if COMPILE_TEST
 	depends on HAS_IOMEM
-	select CLKSRC_OF
+	select TIMER_OF
 	help
 	  Enables the clocksource for the Pistachio SoC.
 
 config CLKSRC_TI_32K
 	bool "Texas Instruments 32.768 Hz Clocksource" if COMPILE_TEST
 	depends on GENERIC_SCHED_CLOCK
-	select CLKSRC_OF if OF
+	select TIMER_OF if OF
 	help
 	  This option enables support for Texas Instruments 32.768 Hz clocksource
 	  available on many OMAP-like platforms.
@@ -270,7 +270,7 @@ config CLKSRC_NPS
 	bool "NPS400 clocksource driver" if COMPILE_TEST
 	depends on !PHYS_ADDR_T_64BIT
 	select CLKSRC_MMIO
-	select CLKSRC_OF if OF
+	select TIMER_OF if OF
 	help
 	  NPS400 clocksource support.
 	  Got 64 bit counter with update rate up to 1000MHz.
@@ -285,12 +285,12 @@ config CLKSRC_MPS2
 	bool "Clocksource for MPS2 SoCs" if COMPILE_TEST
 	depends on GENERIC_SCHED_CLOCK
 	select CLKSRC_MMIO
-	select CLKSRC_OF
+	select TIMER_OF
 
 config ARC_TIMERS
 	bool "Support for 32-bit TIMERn counters in ARC Cores" if COMPILE_TEST
 	depends on GENERIC_CLOCKEVENTS
-	select CLKSRC_OF
+	select TIMER_OF
 	help
 	  These are legacy 32-bit TIMER0 and TIMER1 counters found on all ARC cores
 	  (ARC700 as well as ARC HS38).
@@ -300,7 +300,7 @@ config ARC_TIMERS_64BIT
 	bool "Support for 64-bit counters in ARC HS38 cores" if COMPILE_TEST
 	depends on GENERIC_CLOCKEVENTS
 	depends on ARC_TIMERS
-	select CLKSRC_OF
+	select TIMER_OF
 	help
 	  This enables 2 different 64-bit timers: RTC (for UP) and GFRC (for SMP)
 	  RTC is implemented inside the core, while GFRC sits outside the core in
@@ -309,7 +309,7 @@ config ARC_TIMERS_64BIT
 
 config ARM_ARCH_TIMER
 	bool
-	select CLKSRC_OF if OF
+	select TIMER_OF if OF
 	select CLKSRC_ACPI if ACPI
 
 config ARM_ARCH_TIMER_EVTSTREAM
@@ -367,7 +367,7 @@ config ARM64_ERRATUM_858921
 
 config ARM_GLOBAL_TIMER
 	bool "Support for the ARM global timer" if COMPILE_TEST
-	select CLKSRC_OF if OF
+	select TIMER_OF if OF
 	depends on ARM
 	help
 	  This options enables support for the ARM global timer unit
@@ -376,7 +376,7 @@ config ARM_TIMER_SP804
 	bool "Support for Dual Timer SP804 module"
 	depends on GENERIC_SCHED_CLOCK && CLKDEV_LOOKUP
 	select CLKSRC_MMIO
-	select CLKSRC_OF if OF
+	select TIMER_OF if OF
 
 config CLKSRC_ARM_GLOBAL_TIMER_SCHED_CLOCK
 	bool
@@ -387,19 +387,19 @@ config CLKSRC_ARM_GLOBAL_TIMER_SCHED_CLOCK
 
 config ARMV7M_SYSTICK
 	bool "Support for the ARMv7M system time" if COMPILE_TEST
-	select CLKSRC_OF if OF
+	select TIMER_OF if OF
 	select CLKSRC_MMIO
 	help
 	  This options enables support for the ARMv7M system timer unit
 
 config ATMEL_PIT
-	select CLKSRC_OF if OF
+	select TIMER_OF if OF
 	def_bool SOC_AT91SAM9 || SOC_SAMA5
 
 config ATMEL_ST
 	bool "Atmel ST timer support" if COMPILE_TEST
 	depends on GENERIC_CLOCKEVENTS
-	select CLKSRC_OF
+	select TIMER_OF
 	select MFD_SYSCON
 	help
 	  Support for the Atmel ST timer.
@@ -442,7 +442,7 @@ config VF_PIT_TIMER
 config OXNAS_RPS_TIMER
 	bool "Oxford Semiconductor OXNAS RPS Timers driver" if COMPILE_TEST
 	depends on GENERIC_CLOCKEVENTS
-	select CLKSRC_OF
+	select TIMER_OF
 	select CLKSRC_MMIO
 	help
 	  This enables support for the Oxford Semiconductor OXNAS RPS timers.
@@ -453,7 +453,7 @@ config SYS_SUPPORTS_SH_CMT
 config MTK_TIMER
 	bool "Mediatek timer driver" if COMPILE_TEST
 	depends on GENERIC_CLOCKEVENTS && HAS_IOMEM
-	select CLKSRC_OF
+	select TIMER_OF
 	select CLKSRC_MMIO
 	help
 	  Support for Mediatek timer driver.
@@ -526,7 +526,7 @@ config EM_TIMER_STI
 config CLKSRC_QCOM
 	bool "Qualcomm MSM timer" if COMPILE_TEST
 	depends on ARM
-	select CLKSRC_OF
+	select TIMER_OF
 	help
 	  This enables the clocksource and the per CPU clockevent driver for the
 	  Qualcomm SoCs.
@@ -534,7 +534,7 @@ config CLKSRC_QCOM
 config CLKSRC_VERSATILE
 	bool "ARM Versatile (Express) reference platforms clock source" if COMPILE_TEST
 	depends on GENERIC_SCHED_CLOCK && !ARCH_USES_GETTIMEOFFSET
-	select CLKSRC_OF
+	select TIMER_OF
 	default y if MFD_VEXPRESS_SYSREG
 	help
 	  This option enables clock source based on free running
@@ -545,12 +545,12 @@ config CLKSRC_VERSATILE
 config CLKSRC_MIPS_GIC
 	bool
 	depends on MIPS_GIC
-	select CLKSRC_OF
+	select TIMER_OF
 
 config CLKSRC_TANGO_XTAL
 	bool "Clocksource for Tango SoC" if COMPILE_TEST
 	depends on ARM
-	select CLKSRC_OF
+	select TIMER_OF
 	select CLKSRC_MMIO
 	help
 	  This enables the clocksource for Tango SoC
@@ -591,7 +591,7 @@ config CLKSRC_IMX_GPT
 
 config CLKSRC_ST_LPC
 	bool "Low power clocksource found in the LPC" if COMPILE_TEST
-	select CLKSRC_OF if OF
+	select TIMER_OF if OF
 	depends on HAS_IOMEM
 	select CLKSRC_MMIO
 	help
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index cad713c53e7f..ec559212edf6 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -1,4 +1,4 @@
-obj-$(CONFIG_CLKSRC_PROBE)	+= clksrc-probe.o
+obj-$(CONFIG_TIMER_PROBE)	+= timer-probe.o
 obj-$(CONFIG_ATMEL_PIT)		+= timer-atmel-pit.o
 obj-$(CONFIG_ATMEL_ST)		+= timer-atmel-st.o
 obj-$(CONFIG_ATMEL_TCB_CLKSRC)	+= tcb_clksrc.o
diff --git a/drivers/clocksource/clksrc-probe.c b/drivers/clocksource/clksrc-probe.c
deleted file mode 100644
index da81e5de74fe..000000000000
--- a/drivers/clocksource/clksrc-probe.c
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright (c) 2012, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/acpi.h>
-#include <linux/init.h>
-#include <linux/of.h>
-#include <linux/clocksource.h>
-
-extern struct of_device_id __timer_of_table[];
-
-static const struct of_device_id __timer_of_table_sentinel
-	__used __section(__timer_of_table_end);
-
-void __init timer_probe(void)
-{
-	struct device_node *np;
-	const struct of_device_id *match;
-	of_init_fn_1_ret init_func_ret;
-	unsigned timers = 0;
-	int ret;
-
-	for_each_matching_node_and_match(np, __timer_of_table, &match) {
-		if (!of_device_is_available(np))
-			continue;
-
-		init_func_ret = match->data;
-
-		ret = init_func_ret(np);
-		if (ret) {
-			pr_err("Failed to initialize '%s': %d\n",
-			       of_node_full_name(np), ret);
-			continue;
-		}
-
-		timers++;
-	}
-
-	timers += acpi_probe_device_table(timer);
-
-	if (!timers)
-		pr_crit("%s: no matching timers found\n", __func__);
-}
diff --git a/drivers/clocksource/clps711x-timer.c b/drivers/clocksource/clps711x-timer.c
index fc9e025cc395..a8dd80576c95 100644
--- a/drivers/clocksource/clps711x-timer.c
+++ b/drivers/clocksource/clps711x-timer.c
@@ -103,7 +103,7 @@ void __init clps711x_clksrc_init(void __iomem *tc1_base, void __iomem *tc2_base,
 	BUG_ON(_clps711x_clkevt_init(tc2, tc2_base, irq));
 }
 
-#ifdef CONFIG_CLKSRC_OF
+#ifdef CONFIG_TIMER_OF
 static int __init clps711x_timer_init(struct device_node *np)
 {
 	unsigned int irq = irq_of_parse_and_map(np, 0);
diff --git a/drivers/clocksource/samsung_pwm_timer.c b/drivers/clocksource/samsung_pwm_timer.c
index 21cd72c55a2d..6d5d126357c2 100644
--- a/drivers/clocksource/samsung_pwm_timer.c
+++ b/drivers/clocksource/samsung_pwm_timer.c
@@ -418,7 +418,7 @@ void __init samsung_pwm_clocksource_init(void __iomem *base,
 	_samsung_pwm_clocksource_init();
 }
 
-#ifdef CONFIG_CLKSRC_OF
+#ifdef CONFIG_TIMER_OF
 static int __init samsung_pwm_alloc(struct device_node *np,
 				    const struct samsung_pwm_variant *variant)
 {
diff --git a/drivers/clocksource/timer-probe.c b/drivers/clocksource/timer-probe.c
new file mode 100644
index 000000000000..da81e5de74fe
--- /dev/null
+++ b/drivers/clocksource/timer-probe.c
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2012, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/acpi.h>
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/clocksource.h>
+
+extern struct of_device_id __timer_of_table[];
+
+static const struct of_device_id __timer_of_table_sentinel
+	__used __section(__timer_of_table_end);
+
+void __init timer_probe(void)
+{
+	struct device_node *np;
+	const struct of_device_id *match;
+	of_init_fn_1_ret init_func_ret;
+	unsigned timers = 0;
+	int ret;
+
+	for_each_matching_node_and_match(np, __timer_of_table, &match) {
+		if (!of_device_is_available(np))
+			continue;
+
+		init_func_ret = match->data;
+
+		ret = init_func_ret(np);
+		if (ret) {
+			pr_err("Failed to initialize '%s': %d\n",
+			       of_node_full_name(np), ret);
+			continue;
+		}
+
+		timers++;
+	}
+
+	timers += acpi_probe_device_table(timer);
+
+	if (!timers)
+		pr_crit("%s: no matching timers found\n", __func__);
+}
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index c6a4ef50bbe6..0d64658a224f 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -172,7 +172,7 @@
 	KEEP(*(__##name##_of_table))					\
 	KEEP(*(__##name##_of_table_end))
 
-#define TIMER_OF_TABLES()	OF_TABLE(CONFIG_CLKSRC_OF, timer)
+#define TIMER_OF_TABLES()	OF_TABLE(CONFIG_TIMER_OF, timer)
 #define IRQCHIP_OF_MATCH_TABLE() OF_TABLE(CONFIG_IRQCHIP, irqchip)
 #define CLK_OF_TABLES()		OF_TABLE(CONFIG_COMMON_CLK, clk)
 #define IOMMU_OF_TABLES()	OF_TABLE(CONFIG_OF_IOMMU, iommu)
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 7cd38b21cbd3..c48ceefe0e6e 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -252,7 +252,7 @@ extern int clocksource_i8253_init(void);
 #define TIMER_OF_DECLARE(name, compat, fn) \
 	OF_DECLARE_1_RET(timer, name, compat, fn)
 
-#ifdef CONFIG_CLKSRC_PROBE
+#ifdef CONFIG_TIMER_PROBE
 extern void timer_probe(void);
 #else
 static inline void timer_probe(void) {}
-- 
cgit v1.2.3


From 8b7a3b568814a8e36d2910dd74465b0215aa0a31 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Tue, 30 May 2017 08:35:40 +0200
Subject: clocksource/drivers: Add an alias macro CLOCKSOURCE_OF_DECLARE

The macro CLOCKSOURCE_OF_DECLARE has been rename to TIMER_OF_DECLARE.

In order to prevent conflicts for the next merge window, a temporary
alias has been added which will be removed later.

Cc: Arnd Bergman <arnd@arndb.de>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 include/linux/clocksource.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index c48ceefe0e6e..d92bd83eed9f 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -252,6 +252,9 @@ extern int clocksource_i8253_init(void);
 #define TIMER_OF_DECLARE(name, compat, fn) \
 	OF_DECLARE_1_RET(timer, name, compat, fn)
 
+#define CLOCKSOURCE_OF_DECLARE(name, compat, fn) \
+	TIMER_OF_DECLARE(name, compat, fn)
+
 #ifdef CONFIG_TIMER_PROBE
 extern void timer_probe(void);
 #else
-- 
cgit v1.2.3


From 92c8f7c0e109d2fcff607a13dd7c1437d6c9f87a Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Tue, 13 Jun 2017 22:24:39 +0200
Subject: tty/serial: atmel: make the driver DT only

Now that AVR32 is gone, platform_data are not used to initialize the driver
anymore, remove that path from the driver. Also remove the now unused
struct atmel_uart_data.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Acked-by: Richard Genoud <richard.genoud@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/atmel_serial.c   | 96 +++++++++++++------------------------
 include/linux/platform_data/atmel.h | 10 ----
 2 files changed, 33 insertions(+), 73 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c
index b8d9f8f06b85..7551cab438ff 100644
--- a/drivers/tty/serial/atmel_serial.c
+++ b/drivers/tty/serial/atmel_serial.c
@@ -1638,72 +1638,56 @@ static void atmel_init_property(struct atmel_uart_port *atmel_port,
 				struct platform_device *pdev)
 {
 	struct device_node *np = pdev->dev.of_node;
-	struct atmel_uart_data *pdata = dev_get_platdata(&pdev->dev);
-
-	if (np) {
-		/* DMA/PDC usage specification */
-		if (of_property_read_bool(np, "atmel,use-dma-rx")) {
-			if (of_property_read_bool(np, "dmas")) {
-				atmel_port->use_dma_rx  = true;
-				atmel_port->use_pdc_rx  = false;
-			} else {
-				atmel_port->use_dma_rx  = false;
-				atmel_port->use_pdc_rx  = true;
-			}
+
+	/* DMA/PDC usage specification */
+	if (of_property_read_bool(np, "atmel,use-dma-rx")) {
+		if (of_property_read_bool(np, "dmas")) {
+			atmel_port->use_dma_rx  = true;
+			atmel_port->use_pdc_rx  = false;
 		} else {
 			atmel_port->use_dma_rx  = false;
-			atmel_port->use_pdc_rx  = false;
+			atmel_port->use_pdc_rx  = true;
 		}
+	} else {
+		atmel_port->use_dma_rx  = false;
+		atmel_port->use_pdc_rx  = false;
+	}
 
-		if (of_property_read_bool(np, "atmel,use-dma-tx")) {
-			if (of_property_read_bool(np, "dmas")) {
-				atmel_port->use_dma_tx  = true;
-				atmel_port->use_pdc_tx  = false;
-			} else {
-				atmel_port->use_dma_tx  = false;
-				atmel_port->use_pdc_tx  = true;
-			}
+	if (of_property_read_bool(np, "atmel,use-dma-tx")) {
+		if (of_property_read_bool(np, "dmas")) {
+			atmel_port->use_dma_tx  = true;
+			atmel_port->use_pdc_tx  = false;
 		} else {
 			atmel_port->use_dma_tx  = false;
-			atmel_port->use_pdc_tx  = false;
+			atmel_port->use_pdc_tx  = true;
 		}
-
 	} else {
-		atmel_port->use_pdc_rx  = pdata->use_dma_rx;
-		atmel_port->use_pdc_tx  = pdata->use_dma_tx;
-		atmel_port->use_dma_rx  = false;
 		atmel_port->use_dma_tx  = false;
+		atmel_port->use_pdc_tx  = false;
 	}
-
 }
 
 static void atmel_init_rs485(struct uart_port *port,
 				struct platform_device *pdev)
 {
 	struct device_node *np = pdev->dev.of_node;
-	struct atmel_uart_data *pdata = dev_get_platdata(&pdev->dev);
-
-	if (np) {
-		struct serial_rs485 *rs485conf = &port->rs485;
-		u32 rs485_delay[2];
-		/* rs485 properties */
-		if (of_property_read_u32_array(np, "rs485-rts-delay",
-					rs485_delay, 2) == 0) {
-			rs485conf->delay_rts_before_send = rs485_delay[0];
-			rs485conf->delay_rts_after_send = rs485_delay[1];
-			rs485conf->flags = 0;
-		}
 
-		if (of_get_property(np, "rs485-rx-during-tx", NULL))
-			rs485conf->flags |= SER_RS485_RX_DURING_TX;
+	struct serial_rs485 *rs485conf = &port->rs485;
+	u32 rs485_delay[2];
 
-		if (of_get_property(np, "linux,rs485-enabled-at-boot-time",
-								NULL))
-			rs485conf->flags |= SER_RS485_ENABLED;
-	} else {
-		port->rs485       = pdata->rs485;
+	/* rs485 properties */
+	if (of_property_read_u32_array(np, "rs485-rts-delay",
+				       rs485_delay, 2) == 0) {
+		rs485conf->delay_rts_before_send = rs485_delay[0];
+		rs485conf->delay_rts_after_send = rs485_delay[1];
+		rs485conf->flags = 0;
 	}
 
+	if (of_get_property(np, "rs485-rx-during-tx", NULL))
+		rs485conf->flags |= SER_RS485_RX_DURING_TX;
+
+	if (of_get_property(np, "linux,rs485-enabled-at-boot-time", NULL))
+		rs485conf->flags |= SER_RS485_ENABLED;
 }
 
 static void atmel_set_ops(struct uart_port *port)
@@ -2385,7 +2369,6 @@ static int atmel_init_port(struct atmel_uart_port *atmel_port,
 {
 	int ret;
 	struct uart_port *port = &atmel_port->uart;
-	struct atmel_uart_data *pdata = dev_get_platdata(&pdev->dev);
 
 	atmel_init_property(atmel_port, pdev);
 	atmel_set_ops(port);
@@ -2393,24 +2376,17 @@ static int atmel_init_port(struct atmel_uart_port *atmel_port,
 	atmel_init_rs485(port, pdev);
 
 	port->iotype		= UPIO_MEM;
-	port->flags		= UPF_BOOT_AUTOCONF;
+	port->flags		= UPF_BOOT_AUTOCONF | UPF_IOREMAP;
 	port->ops		= &atmel_pops;
 	port->fifosize		= 1;
 	port->dev		= &pdev->dev;
 	port->mapbase	= pdev->resource[0].start;
 	port->irq	= pdev->resource[1].start;
 	port->rs485_config	= atmel_config_rs485;
+	port->membase	= NULL;
 
 	memset(&atmel_port->rx_ring, 0, sizeof(atmel_port->rx_ring));
 
-	if (pdata && pdata->regs) {
-		/* Already mapped by setup code */
-		port->membase = pdata->regs;
-	} else {
-		port->flags	|= UPF_IOREMAP;
-		port->membase	= NULL;
-	}
-
 	/* for console, the clock could already be configured */
 	if (!atmel_port->clk) {
 		atmel_port->clk = clk_get(&pdev->dev, "usart");
@@ -2744,19 +2720,13 @@ static int atmel_serial_probe(struct platform_device *pdev)
 {
 	struct atmel_uart_port *atmel_port;
 	struct device_node *np = pdev->dev.of_node;
-	struct atmel_uart_data *pdata = dev_get_platdata(&pdev->dev);
 	void *data;
 	int ret = -ENODEV;
 	bool rs485_enabled;
 
 	BUILD_BUG_ON(ATMEL_SERIAL_RINGSIZE & (ATMEL_SERIAL_RINGSIZE - 1));
 
-	if (np)
-		ret = of_alias_get_id(np, "serial");
-	else
-		if (pdata)
-			ret = pdata->num;
-
+	ret = of_alias_get_id(np, "serial");
 	if (ret < 0)
 		/* port id not found in platform data nor device-tree aliases:
 		 * auto-enumerate it */
diff --git a/include/linux/platform_data/atmel.h b/include/linux/platform_data/atmel.h
index 3c8825b67298..d36bc8d17e97 100644
--- a/include/linux/platform_data/atmel.h
+++ b/include/linux/platform_data/atmel.h
@@ -9,7 +9,6 @@
 
 #include <linux/mtd/nand.h>
 #include <linux/mtd/partitions.h>
-#include <linux/serial.h>
 
  /* Compact Flash */
 struct at91_cf_data {
@@ -42,15 +41,6 @@ struct atmel_nand_data {
 	bool		need_reset_workaround;
 };
 
- /* Serial */
-struct atmel_uart_data {
-	int			num;		/* port num */
-	short			use_dma_tx;	/* use transmit DMA? */
-	short			use_dma_rx;	/* use receive DMA? */
-	void __iomem		*regs;		/* virt. base address, if any */
-	struct serial_rs485	rs485;		/* rs485 settings */
-};
-
 /* FIXME: this needs a better location, but gets stuff building again */
 extern int at91_suspend_entering_slow_clock(void);
 
-- 
cgit v1.2.3


From 393cc3f51135ea2520521f776ef3afdf3395c797 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Tue, 13 Jun 2017 13:35:50 +0200
Subject: fs/fcntl: f_setown, allow returning error

Allow f_setown to return an error value. We will fail in the next patch
with EINVAL for bad input to f_setown, so tile the path for the later
patch.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Cc: Jeff Layton <jlayton@poochiereds.net>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Jeff Layton <jlayton@redhat.com>
---
 fs/fcntl.c         | 7 ++++---
 include/linux/fs.h | 2 +-
 net/socket.c       | 3 +--
 3 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/fcntl.c b/fs/fcntl.c
index bbf80344c125..313eba860346 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -109,7 +109,7 @@ void __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
 }
 EXPORT_SYMBOL(__f_setown);
 
-void f_setown(struct file *filp, unsigned long arg, int force)
+int f_setown(struct file *filp, unsigned long arg, int force)
 {
 	enum pid_type type;
 	struct pid *pid;
@@ -123,6 +123,8 @@ void f_setown(struct file *filp, unsigned long arg, int force)
 	pid = find_vpid(who);
 	__f_setown(filp, pid, type, force);
 	rcu_read_unlock();
+
+	return 0;
 }
 EXPORT_SYMBOL(f_setown);
 
@@ -305,8 +307,7 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
 		force_successful_syscall_return();
 		break;
 	case F_SETOWN:
-		f_setown(filp, arg, 1);
-		err = 0;
+		err = f_setown(filp, arg, 1);
 		break;
 	case F_GETOWN_EX:
 		err = f_getown_ex(filp, arg);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index aa4affb38c39..25ee1ff6d45b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1249,7 +1249,7 @@ extern void fasync_free(struct fasync_struct *);
 extern void kill_fasync(struct fasync_struct **, int, int);
 
 extern void __f_setown(struct file *filp, struct pid *, enum pid_type, int force);
-extern void f_setown(struct file *filp, unsigned long arg, int force);
+extern int f_setown(struct file *filp, unsigned long arg, int force);
 extern void f_delown(struct file *filp);
 extern pid_t f_getown(struct file *filp);
 extern int send_sigurg(struct fown_struct *fown);
diff --git a/net/socket.c b/net/socket.c
index c2564eb25c6b..a30a1e324390 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -950,8 +950,7 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 			err = -EFAULT;
 			if (get_user(pid, (int __user *)argp))
 				break;
-			f_setown(sock->file, pid, 1);
-			err = 0;
+			err = f_setown(sock->file, pid, 1);
 			break;
 		case FIOGETOWN:
 		case SIOCGPGRP:
-- 
cgit v1.2.3


From 3bc1630774bc9f202308ae04608a32c366b41caf Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Tue, 25 Apr 2017 19:38:48 +0200
Subject: of: Provide dummy of_device_compatible_match() for compile-testing

Most of_device_*() functions have dummy versions for CONFIG_OF=n,
but of_device_compatible_match() hasn't.  Fix that to improve the
ability to do compile-testing.

Fixes: b9c13fe32faaa71c ("dt: Add of_device_compatible_match()")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 include/linux/of.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/of.h b/include/linux/of.h
index 50fcdb54087f..c72ba9437a43 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -627,6 +627,12 @@ static inline int of_device_is_compatible(const struct device_node *device,
 	return 0;
 }
 
+static inline  int of_device_compatible_match(struct device_node *device,
+					      const char *const *compat)
+{
+	return 0;
+}
+
 static inline bool of_device_is_available(const struct device_node *device)
 {
 	return false;
-- 
cgit v1.2.3


From 31fd85816dbe3a714bcc3f67c17c3dd87011f79e Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Tue, 13 Jun 2017 15:52:13 -0700
Subject: bpf: permits narrower load from bpf program context fields

Currently, verifier will reject a program if it contains an
narrower load from the bpf context structure. For example,
        __u8 h = __sk_buff->hash, or
        __u16 p = __sk_buff->protocol
        __u32 sample_period = bpf_perf_event_data->sample_period
which are narrower loads of 4-byte or 8-byte field.

This patch solves the issue by:
  . Introduce a new parameter ctx_field_size to carry the
    field size of narrower load from prog type
    specific *__is_valid_access validator back to verifier.
  . The non-zero ctx_field_size for a memory access indicates
    (1). underlying prog type specific convert_ctx_accesses
         supporting non-whole-field access
    (2). the current insn is a narrower or whole field access.
  . In verifier, for such loads where load memory size is
    less than ctx_field_size, verifier transforms it
    to a full field load followed by proper masking.
  . Currently, __sk_buff and bpf_perf_event_data->sample_period
    are supporting narrowing loads.
  . Narrower stores are still not allowed as typical ctx stores
    are just normal stores.

Because of this change, some tests in verifier will fail and
these tests are removed. As a bonus, rename some out of bound
__sk_buff->cb access to proper field name and remove two
redundant "skb cb oob" tests.

Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h                         |  2 +-
 include/linux/bpf_verifier.h                |  1 +
 kernel/bpf/verifier.c                       | 71 ++++++++++++++++------
 kernel/trace/bpf_trace.c                    | 21 +++++--
 net/core/filter.c                           | 56 +++++++++++++-----
 tools/testing/selftests/bpf/test_verifier.c | 92 ++++-------------------------
 6 files changed, 124 insertions(+), 119 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index c32bace66d3d..1bcbf0a71f75 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -157,7 +157,7 @@ struct bpf_verifier_ops {
 	 * with 'type' (read or write) is allowed
 	 */
 	bool (*is_valid_access)(int off, int size, enum bpf_access_type type,
-				enum bpf_reg_type *reg_type);
+				enum bpf_reg_type *reg_type, int *ctx_field_size);
 	int (*gen_prologue)(struct bpf_insn *insn, bool direct_write,
 			    const struct bpf_prog *prog);
 	u32 (*convert_ctx_access)(enum bpf_access_type type,
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index d5093b52b485..189741c0da85 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -73,6 +73,7 @@ struct bpf_insn_aux_data {
 		enum bpf_reg_type ptr_type;	/* pointer type for load/store insns */
 		struct bpf_map *map_ptr;	/* pointer for call insn into lookup_elem */
 	};
+	int ctx_field_size; /* the ctx field size for load/store insns, maybe 0 */
 };
 
 #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 519a6144d3d3..44b97d958fb7 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -758,15 +758,26 @@ static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
 }
 
 /* check access to 'struct bpf_context' fields */
-static int check_ctx_access(struct bpf_verifier_env *env, int off, int size,
+static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
 			    enum bpf_access_type t, enum bpf_reg_type *reg_type)
 {
+	int ctx_field_size = 0;
+
 	/* for analyzer ctx accesses are already validated and converted */
 	if (env->analyzer_ops)
 		return 0;
 
 	if (env->prog->aux->ops->is_valid_access &&
-	    env->prog->aux->ops->is_valid_access(off, size, t, reg_type)) {
+	    env->prog->aux->ops->is_valid_access(off, size, t, reg_type, &ctx_field_size)) {
+		/* a non zero ctx_field_size indicates:
+		 * . For this field, the prog type specific ctx conversion algorithm
+		 *   only supports whole field access.
+		 * . This ctx access is a candiate for later verifier transformation
+		 *   to load the whole field and then apply a mask to get correct result.
+		 */
+		if (ctx_field_size)
+			env->insn_aux_data[insn_idx].ctx_field_size = ctx_field_size;
+
 		/* remember the offset of last byte accessed in ctx */
 		if (env->prog->aux->max_ctx_offset < off + size)
 			env->prog->aux->max_ctx_offset = off + size;
@@ -868,7 +879,7 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
  * if t==write && value_regno==-1, some unknown value is stored into memory
  * if t==read && value_regno==-1, don't care what we read from memory
  */
-static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off,
+static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno, int off,
 			    int bpf_size, enum bpf_access_type t,
 			    int value_regno)
 {
@@ -911,7 +922,7 @@ static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off,
 			verbose("R%d leaks addr into ctx\n", value_regno);
 			return -EACCES;
 		}
-		err = check_ctx_access(env, off, size, t, &reg_type);
+		err = check_ctx_access(env, insn_idx, off, size, t, &reg_type);
 		if (!err && t == BPF_READ && value_regno >= 0) {
 			mark_reg_unknown_value_and_range(state->regs,
 							 value_regno);
@@ -972,7 +983,7 @@ static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off,
 	return err;
 }
 
-static int check_xadd(struct bpf_verifier_env *env, struct bpf_insn *insn)
+static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
 {
 	struct bpf_reg_state *regs = env->cur_state.regs;
 	int err;
@@ -994,13 +1005,13 @@ static int check_xadd(struct bpf_verifier_env *env, struct bpf_insn *insn)
 		return err;
 
 	/* check whether atomic_add can read the memory */
-	err = check_mem_access(env, insn->dst_reg, insn->off,
+	err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
 			       BPF_SIZE(insn->code), BPF_READ, -1);
 	if (err)
 		return err;
 
 	/* check whether atomic_add can write into the same memory */
-	return check_mem_access(env, insn->dst_reg, insn->off,
+	return check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
 				BPF_SIZE(insn->code), BPF_WRITE, -1);
 }
 
@@ -1416,7 +1427,7 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
 	 * is inferred from register state.
 	 */
 	for (i = 0; i < meta.access_size; i++) {
-		err = check_mem_access(env, meta.regno, i, BPF_B, BPF_WRITE, -1);
+		err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B, BPF_WRITE, -1);
 		if (err)
 			return err;
 	}
@@ -2993,18 +3004,12 @@ static int do_check(struct bpf_verifier_env *env)
 			/* check that memory (src_reg + off) is readable,
 			 * the state of dst_reg will be updated by this func
 			 */
-			err = check_mem_access(env, insn->src_reg, insn->off,
+			err = check_mem_access(env, insn_idx, insn->src_reg, insn->off,
 					       BPF_SIZE(insn->code), BPF_READ,
 					       insn->dst_reg);
 			if (err)
 				return err;
 
-			if (BPF_SIZE(insn->code) != BPF_W &&
-			    BPF_SIZE(insn->code) != BPF_DW) {
-				insn_idx++;
-				continue;
-			}
-
 			prev_src_type = &env->insn_aux_data[insn_idx].ptr_type;
 
 			if (*prev_src_type == NOT_INIT) {
@@ -3032,7 +3037,7 @@ static int do_check(struct bpf_verifier_env *env)
 			enum bpf_reg_type *prev_dst_type, dst_reg_type;
 
 			if (BPF_MODE(insn->code) == BPF_XADD) {
-				err = check_xadd(env, insn);
+				err = check_xadd(env, insn_idx, insn);
 				if (err)
 					return err;
 				insn_idx++;
@@ -3051,7 +3056,7 @@ static int do_check(struct bpf_verifier_env *env)
 			dst_reg_type = regs[insn->dst_reg].type;
 
 			/* check that memory (dst_reg + off) is writeable */
-			err = check_mem_access(env, insn->dst_reg, insn->off,
+			err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
 					       BPF_SIZE(insn->code), BPF_WRITE,
 					       insn->src_reg);
 			if (err)
@@ -3080,7 +3085,7 @@ static int do_check(struct bpf_verifier_env *env)
 				return err;
 
 			/* check that memory (dst_reg + off) is writeable */
-			err = check_mem_access(env, insn->dst_reg, insn->off,
+			err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
 					       BPF_SIZE(insn->code), BPF_WRITE,
 					       -1);
 			if (err)
@@ -3383,7 +3388,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
 	struct bpf_insn insn_buf[16], *insn;
 	struct bpf_prog *new_prog;
 	enum bpf_access_type type;
-	int i, cnt, delta = 0;
+	int i, cnt, off, size, ctx_field_size, is_narrower_load, delta = 0;
 
 	if (ops->gen_prologue) {
 		cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
@@ -3423,11 +3428,39 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
 		if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX)
 			continue;
 
+		off = insn->off;
+		size = bpf_size_to_bytes(BPF_SIZE(insn->code));
+		ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
+		is_narrower_load = (type == BPF_READ && size < ctx_field_size);
+
+		/* If the read access is a narrower load of the field,
+		 * convert to a 4/8-byte load, to minimum program type specific
+		 * convert_ctx_access changes. If conversion is successful,
+		 * we will apply proper mask to the result.
+		 */
+		if (is_narrower_load) {
+			int size_code = BPF_H;
+
+			if (ctx_field_size == 4)
+				size_code = BPF_W;
+			else if (ctx_field_size == 8)
+				size_code = BPF_DW;
+			insn->off = off & ~(ctx_field_size - 1);
+			insn->code = BPF_LDX | BPF_MEM | size_code;
+		}
 		cnt = ops->convert_ctx_access(type, insn, insn_buf, env->prog);
 		if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
 			verbose("bpf verifier is misconfigured\n");
 			return -EINVAL;
 		}
+		if (is_narrower_load) {
+			if (ctx_field_size <= 4)
+				insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
+							(1 << size * 8) - 1);
+			else
+				insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg,
+							(1 << size * 8) - 1);
+		}
 
 		new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
 		if (!new_prog)
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 051d7fca0c09..9d3ec8253131 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -479,7 +479,7 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func
 
 /* bpf+kprobe programs can access fields of 'struct pt_regs' */
 static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
-					enum bpf_reg_type *reg_type)
+					enum bpf_reg_type *reg_type, int *ctx_field_size)
 {
 	if (off < 0 || off >= sizeof(struct pt_regs))
 		return false;
@@ -562,7 +562,7 @@ static const struct bpf_func_proto *tp_prog_func_proto(enum bpf_func_id func_id)
 }
 
 static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type,
-				    enum bpf_reg_type *reg_type)
+				    enum bpf_reg_type *reg_type, int *ctx_field_size)
 {
 	if (off < sizeof(void *) || off >= PERF_MAX_TRACE_SIZE)
 		return false;
@@ -581,17 +581,26 @@ const struct bpf_verifier_ops tracepoint_prog_ops = {
 };
 
 static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
-				    enum bpf_reg_type *reg_type)
+				    enum bpf_reg_type *reg_type, int *ctx_field_size)
 {
+	int sample_period_off;
+
 	if (off < 0 || off >= sizeof(struct bpf_perf_event_data))
 		return false;
 	if (type != BPF_READ)
 		return false;
 	if (off % size != 0)
 		return false;
-	if (off == offsetof(struct bpf_perf_event_data, sample_period)) {
-		if (size != sizeof(u64))
-			return false;
+
+	/* permit 1, 2, 4 byte narrower and 8 normal read access to sample_period */
+	sample_period_off = offsetof(struct bpf_perf_event_data, sample_period);
+	if (off >= sample_period_off && off < sample_period_off + sizeof(__u64)) {
+		*ctx_field_size = 8;
+#ifdef __LITTLE_ENDIAN
+		return (off & 0x7) == 0 && size <= 8 && (size & (size - 1)) == 0;
+#else
+		return ((off & 0x7) + size) == 8 && size <= 8 && (size & (size - 1)) == 0;
+#endif
 	} else {
 		if (size != sizeof(long))
 			return false;
diff --git a/net/core/filter.c b/net/core/filter.c
index a65a3b25e104..60ed6f343a63 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2856,7 +2856,8 @@ lwt_xmit_func_proto(enum bpf_func_id func_id)
 	}
 }
 
-static bool __is_valid_access(int off, int size)
+static bool __is_valid_access(int off, int size, enum bpf_access_type type,
+			      int *ctx_field_size)
 {
 	if (off < 0 || off >= sizeof(struct __sk_buff))
 		return false;
@@ -2872,9 +2873,27 @@ static bool __is_valid_access(int off, int size)
 		    offsetof(struct __sk_buff, cb[4]) + sizeof(__u32))
 			return false;
 		break;
-	default:
+	case offsetof(struct __sk_buff, data) ...
+	     offsetof(struct __sk_buff, data) + sizeof(__u32) - 1:
+	case offsetof(struct __sk_buff, data_end) ...
+	     offsetof(struct __sk_buff, data_end) + sizeof(__u32) - 1:
 		if (size != sizeof(__u32))
 			return false;
+		break;
+	default:
+		/* permit narrower load for not cb/data/data_end fields */
+		*ctx_field_size = 4;
+		if (type == BPF_WRITE) {
+			if (size != sizeof(__u32))
+				return false;
+		} else {
+			if (size != sizeof(__u32))
+#ifdef __LITTLE_ENDIAN
+				return (off & 0x3) == 0 && (size == 1 || size == 2);
+#else
+				return (off & 0x3) + size == 4 && (size == 1 || size == 2);
+#endif
+		}
 	}
 
 	return true;
@@ -2882,12 +2901,16 @@ static bool __is_valid_access(int off, int size)
 
 static bool sk_filter_is_valid_access(int off, int size,
 				      enum bpf_access_type type,
-				      enum bpf_reg_type *reg_type)
+				      enum bpf_reg_type *reg_type,
+				      int *ctx_field_size)
 {
 	switch (off) {
-	case offsetof(struct __sk_buff, tc_classid):
-	case offsetof(struct __sk_buff, data):
-	case offsetof(struct __sk_buff, data_end):
+	case offsetof(struct __sk_buff, tc_classid) ...
+	     offsetof(struct __sk_buff, tc_classid) + sizeof(__u32) - 1:
+	case offsetof(struct __sk_buff, data) ...
+	     offsetof(struct __sk_buff, data) + sizeof(__u32) - 1:
+	case offsetof(struct __sk_buff, data_end) ...
+	     offsetof(struct __sk_buff, data_end) + sizeof(__u32) - 1:
 		return false;
 	}
 
@@ -2901,15 +2924,17 @@ static bool sk_filter_is_valid_access(int off, int size,
 		}
 	}
 
-	return __is_valid_access(off, size);
+	return __is_valid_access(off, size, type, ctx_field_size);
 }
 
 static bool lwt_is_valid_access(int off, int size,
 				enum bpf_access_type type,
-				enum bpf_reg_type *reg_type)
+				enum bpf_reg_type *reg_type,
+				int *ctx_field_size)
 {
 	switch (off) {
-	case offsetof(struct __sk_buff, tc_classid):
+	case offsetof(struct __sk_buff, tc_classid) ...
+	     offsetof(struct __sk_buff, tc_classid) + sizeof(__u32) - 1:
 		return false;
 	}
 
@@ -2934,12 +2959,13 @@ static bool lwt_is_valid_access(int off, int size,
 		break;
 	}
 
-	return __is_valid_access(off, size);
+	return __is_valid_access(off, size, type, ctx_field_size);
 }
 
 static bool sock_filter_is_valid_access(int off, int size,
 					enum bpf_access_type type,
-					enum bpf_reg_type *reg_type)
+					enum bpf_reg_type *reg_type,
+					int *ctx_field_size)
 {
 	if (type == BPF_WRITE) {
 		switch (off) {
@@ -3002,7 +3028,8 @@ static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write,
 
 static bool tc_cls_act_is_valid_access(int off, int size,
 				       enum bpf_access_type type,
-				       enum bpf_reg_type *reg_type)
+				       enum bpf_reg_type *reg_type,
+				       int *ctx_field_size)
 {
 	if (type == BPF_WRITE) {
 		switch (off) {
@@ -3027,7 +3054,7 @@ static bool tc_cls_act_is_valid_access(int off, int size,
 		break;
 	}
 
-	return __is_valid_access(off, size);
+	return __is_valid_access(off, size, type, ctx_field_size);
 }
 
 static bool __is_valid_xdp_access(int off, int size)
@@ -3044,7 +3071,8 @@ static bool __is_valid_xdp_access(int off, int size)
 
 static bool xdp_is_valid_access(int off, int size,
 				enum bpf_access_type type,
-				enum bpf_reg_type *reg_type)
+				enum bpf_reg_type *reg_type,
+				int *ctx_field_size)
 {
 	if (type == BPF_WRITE)
 		return false;
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 4ee4708b0d60..13341700930c 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -1073,44 +1073,22 @@ static struct bpf_test tests[] = {
 		.result = ACCEPT,
 	},
 	{
-		"check cb access: byte, oob 1",
+		"__sk_buff->hash, offset 0, byte store not permitted",
 		.insns = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
-				    offsetof(struct __sk_buff, cb[4]) + 4),
+				    offsetof(struct __sk_buff, hash)),
 			BPF_EXIT_INSN(),
 		},
 		.errstr = "invalid bpf_context access",
 		.result = REJECT,
 	},
 	{
-		"check cb access: byte, oob 2",
+		"__sk_buff->tc_index, offset 3, byte store not permitted",
 		.insns = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
-				    offsetof(struct __sk_buff, cb[0]) - 1),
-			BPF_EXIT_INSN(),
-		},
-		.errstr = "invalid bpf_context access",
-		.result = REJECT,
-	},
-	{
-		"check cb access: byte, oob 3",
-		.insns = {
-			BPF_MOV64_IMM(BPF_REG_0, 0),
-			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
-				    offsetof(struct __sk_buff, cb[4]) + 4),
-			BPF_EXIT_INSN(),
-		},
-		.errstr = "invalid bpf_context access",
-		.result = REJECT,
-	},
-	{
-		"check cb access: byte, oob 4",
-		.insns = {
-			BPF_MOV64_IMM(BPF_REG_0, 0),
-			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
-				    offsetof(struct __sk_buff, cb[0]) - 1),
+				    offsetof(struct __sk_buff, tc_index) + 3),
 			BPF_EXIT_INSN(),
 		},
 		.errstr = "invalid bpf_context access",
@@ -1188,44 +1166,22 @@ static struct bpf_test tests[] = {
 		.result = REJECT,
 	},
 	{
-		"check cb access: half, oob 1",
+		"check __sk_buff->hash, offset 0, half store not permitted",
 		.insns = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0,
-				    offsetof(struct __sk_buff, cb[4]) + 4),
+				    offsetof(struct __sk_buff, hash)),
 			BPF_EXIT_INSN(),
 		},
 		.errstr = "invalid bpf_context access",
 		.result = REJECT,
 	},
 	{
-		"check cb access: half, oob 2",
+		"check __sk_buff->tc_index, offset 2, half store not permitted",
 		.insns = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0,
-				    offsetof(struct __sk_buff, cb[0]) - 2),
-			BPF_EXIT_INSN(),
-		},
-		.errstr = "invalid bpf_context access",
-		.result = REJECT,
-	},
-	{
-		"check cb access: half, oob 3",
-		.insns = {
-			BPF_MOV64_IMM(BPF_REG_0, 0),
-			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
-				    offsetof(struct __sk_buff, cb[4]) + 4),
-			BPF_EXIT_INSN(),
-		},
-		.errstr = "invalid bpf_context access",
-		.result = REJECT,
-	},
-	{
-		"check cb access: half, oob 4",
-		.insns = {
-			BPF_MOV64_IMM(BPF_REG_0, 0),
-			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
-				    offsetof(struct __sk_buff, cb[0]) - 2),
+				    offsetof(struct __sk_buff, tc_index) + 2),
 			BPF_EXIT_INSN(),
 		},
 		.errstr = "invalid bpf_context access",
@@ -1366,28 +1322,6 @@ static struct bpf_test tests[] = {
 	},
 	{
 		"check cb access: double, oob 2",
-		.insns = {
-			BPF_MOV64_IMM(BPF_REG_0, 0),
-			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
-				    offsetof(struct __sk_buff, cb[4]) + 8),
-			BPF_EXIT_INSN(),
-		},
-		.errstr = "invalid bpf_context access",
-		.result = REJECT,
-	},
-	{
-		"check cb access: double, oob 3",
-		.insns = {
-			BPF_MOV64_IMM(BPF_REG_0, 0),
-			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
-				    offsetof(struct __sk_buff, cb[0]) - 8),
-			BPF_EXIT_INSN(),
-		},
-		.errstr = "invalid bpf_context access",
-		.result = REJECT,
-	},
-	{
-		"check cb access: double, oob 4",
 		.insns = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
@@ -1398,22 +1332,22 @@ static struct bpf_test tests[] = {
 		.result = REJECT,
 	},
 	{
-		"check cb access: double, oob 5",
+		"check __sk_buff->ifindex dw store not permitted",
 		.insns = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
-			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
-				    offsetof(struct __sk_buff, cb[4]) + 8),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, ifindex)),
 			BPF_EXIT_INSN(),
 		},
 		.errstr = "invalid bpf_context access",
 		.result = REJECT,
 	},
 	{
-		"check cb access: double, oob 6",
+		"check __sk_buff->ifindex dw load not permitted",
 		.insns = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
-				    offsetof(struct __sk_buff, cb[0]) - 8),
+				    offsetof(struct __sk_buff, ifindex)),
 			BPF_EXIT_INSN(),
 		},
 		.errstr = "invalid bpf_context access",
-- 
cgit v1.2.3


From db46a0e1be7eac45d0bb1bdcd438b8d47c920451 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm+renesas@opensource.se>
Date: Wed, 14 Jun 2017 16:15:24 +0900
Subject: net: update undefined ->ndo_change_mtu() comment

Update ->ndo_change_mtu() callback comment to remove text
about returning error in case of undefined callback. This
change makes the comment match the existing code behavior.

Signed-off-by: Magnus Damm <damm+renesas@opensource.se>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 846193dfb0ac..4ed952c17fc7 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -914,8 +914,7 @@ struct xfrmdev_ops {
  *
  * int (*ndo_change_mtu)(struct net_device *dev, int new_mtu);
  *	Called when a user wants to change the Maximum Transfer Unit
- *	of a device. If not defined, any request to change MTU will
- *	will return an error.
+ *	of a device.
  *
  * void (*ndo_tx_timeout)(struct net_device *dev);
  *	Callback used when the transmitter has not made any progress
-- 
cgit v1.2.3


From dc9edc44de6cd7cc8cc7f5b36c1adb221eda3207 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Wed, 14 Jun 2017 13:27:50 -0600
Subject: block: Fix a blk_exit_rl() regression

Avoid that the following complaint is reported:

 BUG: sleeping function called from invalid context at kernel/workqueue.c:2790
 in_atomic(): 1, irqs_disabled(): 0, pid: 41, name: rcuop/3
 1 lock held by rcuop/3/41:
  #0:  (rcu_callback){......}, at: [<ffffffff8111f9a2>] rcu_nocb_kthread+0x282/0x500
 Call Trace:
  dump_stack+0x86/0xcf
  ___might_sleep+0x174/0x260
  __might_sleep+0x4a/0x80
  flush_work+0x7e/0x2e0
  __cancel_work_timer+0x143/0x1c0
  cancel_work_sync+0x10/0x20
  blk_throtl_exit+0x25/0x60
  blkcg_exit_queue+0x35/0x40
  blk_release_queue+0x42/0x130
  kobject_put+0xa9/0x190

This happens since we invoke callbacks that need to block from the
queue release handler. Fix this by pushing the final release to
a workqueue.

Reported-by: Ross Zwisler <zwisler@gmail.com>
Fixes: commit b425e5049258 ("block: Avoid that blk_exit_rl() triggers a use-after-free")
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Tested-by: Ross Zwisler <ross.zwisler@linux.intel.com>

Updated changelog
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-sysfs.c      | 34 ++++++++++++++++++++++------------
 include/linux/blkdev.h |  2 ++
 2 files changed, 24 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 283da7fbe034..27aceab1cc31 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -777,24 +777,25 @@ static void blk_free_queue_rcu(struct rcu_head *rcu_head)
 }
 
 /**
- * blk_release_queue: - release a &struct request_queue when it is no longer needed
- * @kobj:    the kobj belonging to the request queue to be released
+ * __blk_release_queue - release a request queue when it is no longer needed
+ * @work: pointer to the release_work member of the request queue to be released
  *
  * Description:
- *     blk_release_queue is the pair to blk_init_queue() or
- *     blk_queue_make_request().  It should be called when a request queue is
- *     being released; typically when a block device is being de-registered.
- *     Currently, its primary task it to free all the &struct request
- *     structures that were allocated to the queue and the queue itself.
+ *     blk_release_queue is the counterpart of blk_init_queue(). It should be
+ *     called when a request queue is being released; typically when a block
+ *     device is being de-registered. Its primary task it to free the queue
+ *     itself.
  *
- * Note:
+ * Notes:
  *     The low level driver must have finished any outstanding requests first
  *     via blk_cleanup_queue().
- **/
-static void blk_release_queue(struct kobject *kobj)
+ *
+ *     Although blk_release_queue() may be called with preemption disabled,
+ *     __blk_release_queue() may sleep.
+ */
+static void __blk_release_queue(struct work_struct *work)
 {
-	struct request_queue *q =
-		container_of(kobj, struct request_queue, kobj);
+	struct request_queue *q = container_of(work, typeof(*q), release_work);
 
 	if (test_bit(QUEUE_FLAG_POLL_STATS, &q->queue_flags))
 		blk_stat_remove_callback(q, q->poll_cb);
@@ -834,6 +835,15 @@ static void blk_release_queue(struct kobject *kobj)
 	call_rcu(&q->rcu_head, blk_free_queue_rcu);
 }
 
+static void blk_release_queue(struct kobject *kobj)
+{
+	struct request_queue *q =
+		container_of(kobj, struct request_queue, kobj);
+
+	INIT_WORK(&q->release_work, __blk_release_queue);
+	schedule_work(&q->release_work);
+}
+
 static const struct sysfs_ops queue_sysfs_ops = {
 	.show	= queue_attr_show,
 	.store	= queue_attr_store,
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ab92c4ea138b..b74a3edcb3da 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -586,6 +586,8 @@ struct request_queue {
 
 	size_t			cmd_size;
 	void			*rq_alloc_data;
+
+	struct work_struct	release_work;
 };
 
 #define QUEUE_FLAG_QUEUED	1	/* uses generic tag queueing */
-- 
cgit v1.2.3


From 73a7242a06ff995d771fbe243e72b516feaa6e3d Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Tue, 13 Jun 2017 17:18:01 -0400
Subject: cgroup: Keep accurate count of tasks in each css_set

The reference count in the css_set data structure was used as a
proxy of the number of tasks attached to that css_set. However, that
count is actually not an accurate measure especially with thread mode
support. So a new variable nr_tasks is added to the css_set to keep
track of the actual task count. This new variable is protected by
the css_set_lock. Functions that require the actual task count are
updated to use the new variable.

tj: s/task_count/nr_tasks/ for consistency with cgroup_root->nr_cgrps.
    Refreshed on top of cgroup/for-v4.13 which dropped on
    css_set_populated() -> nr_tasks conversion.

Signed-off-by: Waiman Long <longman@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cgroup-defs.h |  3 +++
 kernel/cgroup/cgroup-v1.c   |  6 +-----
 kernel/cgroup/cgroup.c      | 10 ++++++++++
 3 files changed, 14 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index ec47101cb1bf..3bc4196bf217 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -166,6 +166,9 @@ struct css_set {
 	/* the default cgroup associated with this css_set */
 	struct cgroup *dfl_cgrp;
 
+	/* internal task count, protected by css_set_lock */
+	int nr_tasks;
+
 	/*
 	 * Lists running through all tasks using this cgroup group.
 	 * mg_tasks lists tasks which belong to this cset but are in the
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index 85d75152402d..e9ea5f201fac 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -334,10 +334,6 @@ static struct cgroup_pidlist *cgroup_pidlist_find_create(struct cgroup *cgrp,
 /**
  * cgroup_task_count - count the number of tasks in a cgroup.
  * @cgrp: the cgroup in question
- *
- * Return the number of tasks in the cgroup.  The returned number can be
- * higher than the actual number of tasks due to css_set references from
- * namespace roots and temporary usages.
  */
 static int cgroup_task_count(const struct cgroup *cgrp)
 {
@@ -346,7 +342,7 @@ static int cgroup_task_count(const struct cgroup *cgrp)
 
 	spin_lock_irq(&css_set_lock);
 	list_for_each_entry(link, &cgrp->cset_links, cset_link)
-		count += refcount_read(&link->cset->refcount);
+		count += link->cset->nr_tasks;
 	spin_unlock_irq(&css_set_lock);
 	return count;
 }
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 8d4e85eae42c..dbfd7028b1c6 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -573,6 +573,11 @@ static int css_set_count	= 1;	/* 1 for init_css_set */
 /**
  * css_set_populated - does a css_set contain any tasks?
  * @cset: target css_set
+ *
+ * css_set_populated() should be the same as !!cset->nr_tasks at steady
+ * state. However, css_set_populated() can be called while a task is being
+ * added to or removed from the linked list before the nr_tasks is
+ * properly updated. Hence, we can't just look at ->nr_tasks here.
  */
 static bool css_set_populated(struct css_set *cset)
 {
@@ -1598,6 +1603,7 @@ static void cgroup_enable_task_cg_lists(void)
 				css_set_update_populated(cset, true);
 			list_add_tail(&p->cg_list, &cset->tasks);
 			get_css_set(cset);
+			cset->nr_tasks++;
 		}
 		spin_unlock(&p->sighand->siglock);
 	} while_each_thread(g, p);
@@ -2064,8 +2070,10 @@ static int cgroup_migrate_execute(struct cgroup_mgctx *mgctx)
 			struct css_set *to_cset = cset->mg_dst_cset;
 
 			get_css_set(to_cset);
+			to_cset->nr_tasks++;
 			css_set_move_task(task, from_cset, to_cset, true);
 			put_css_set_locked(from_cset);
+			from_cset->nr_tasks--;
 		}
 	}
 	spin_unlock_irq(&css_set_lock);
@@ -4789,6 +4797,7 @@ void cgroup_post_fork(struct task_struct *child)
 		cset = task_css_set(current);
 		if (list_empty(&child->cg_list)) {
 			get_css_set(cset);
+			cset->nr_tasks++;
 			css_set_move_task(child, NULL, cset, false);
 		}
 		spin_unlock_irq(&css_set_lock);
@@ -4838,6 +4847,7 @@ void cgroup_exit(struct task_struct *tsk)
 	if (!list_empty(&tsk->cg_list)) {
 		spin_lock_irq(&css_set_lock);
 		css_set_move_task(tsk, cset, NULL, false);
+		cset->nr_tasks--;
 		spin_unlock_irq(&css_set_lock);
 	} else {
 		get_css_set(cset);
-- 
cgit v1.2.3


From 33e4f80ee69b5168badf37edbfed796eb48434b9 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 12 Jun 2017 22:56:34 +0200
Subject: ACPI / PM: Ignore spurious SCI wakeups from suspend-to-idle

The ACPI SCI (System Control Interrupt) is set up as a wakeup IRQ
during suspend-to-idle transitions and, consequently, any events
signaled through it wake up the system from that state.  However,
on some systems some of the events signaled via the ACPI SCI while
suspended to idle should not cause the system to wake up.  In fact,
quite often they should just be discarded.

Arguably, systems should not resume entirely on such events, but in
order to decide which events really should cause the system to resume
and which are spurious, it is necessary to resume up to the point
when ACPI SCIs are actually handled and processed, which is after
executing dpm_resume_noirq() in the system resume path.

For this reasons, add a loop around freeze_enter() in which the
platforms can process events signaled via multiplexed IRQ lines
like the ACPI SCI and add suspend-to-idle hooks that can be
used for this purpose to struct platform_freeze_ops.

In the ACPI case, the ->wake hook is used for checking if the SCI
has triggered while suspended and deferring the interrupt-induced
system wakeup until the events signaled through it are actually
processed sufficiently to decide whether or not the system should
resume.  In turn, the ->sync hook allows all of the relevant event
queues to be flushed so as to prevent events from being missed due
to race conditions.

In addition to that, some ACPI code processing wakeup events needs
to be modified to use the "hard" version of wakeup triggers, so that
it will cause a system resume to happen on device-induced wakeup
events even if the "soft" mechanism to prevent the system from
suspending is not enabled.  However, to preserve the existing
behavior with respect to suspend-to-RAM, this only is done in
the suspend-to-idle case and only if an SCI has occurred while
suspended.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/battery.c      |  2 +-
 drivers/acpi/button.c       |  5 +++--
 drivers/acpi/device_pm.c    |  9 ++++++++-
 drivers/acpi/internal.h     |  2 ++
 drivers/acpi/sleep.c        | 37 +++++++++++++++++++++++++++++++++++++
 drivers/base/power/main.c   |  5 -----
 drivers/base/power/wakeup.c | 18 ++++++++++++------
 include/acpi/acpi_bus.h     |  6 +++++-
 include/linux/suspend.h     |  7 +++++--
 kernel/power/process.c      |  2 +-
 kernel/power/suspend.c      | 35 +++++++++++++++++++++++++++++------
 11 files changed, 103 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c
index d42eeef9d928..1cbb88d938e5 100644
--- a/drivers/acpi/battery.c
+++ b/drivers/acpi/battery.c
@@ -782,7 +782,7 @@ static int acpi_battery_update(struct acpi_battery *battery, bool resume)
 	if ((battery->state & ACPI_BATTERY_STATE_CRITICAL) ||
 	    (test_bit(ACPI_BATTERY_ALARM_PRESENT, &battery->flags) &&
             (battery->capacity_now <= battery->alarm)))
-		pm_wakeup_event(&battery->device->dev, 0);
+		acpi_pm_wakeup_event(&battery->device->dev);
 
 	return result;
 }
diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c
index e19f530f1083..91cfdf377df7 100644
--- a/drivers/acpi/button.c
+++ b/drivers/acpi/button.c
@@ -217,7 +217,7 @@ static int acpi_lid_notify_state(struct acpi_device *device, int state)
 	}
 
 	if (state)
-		pm_wakeup_event(&device->dev, 0);
+		acpi_pm_wakeup_event(&device->dev);
 
 	ret = blocking_notifier_call_chain(&acpi_lid_notifier, state, device);
 	if (ret == NOTIFY_DONE)
@@ -402,7 +402,7 @@ static void acpi_button_notify(struct acpi_device *device, u32 event)
 		} else {
 			int keycode;
 
-			pm_wakeup_event(&device->dev, 0);
+			acpi_pm_wakeup_event(&device->dev);
 			if (button->suspended)
 				break;
 
@@ -534,6 +534,7 @@ static int acpi_button_add(struct acpi_device *device)
 		lid_device = device;
 	}
 
+	device_init_wakeup(&device->dev, true);
 	printk(KERN_INFO PREFIX "%s [%s]\n", name, acpi_device_bid(device));
 	return 0;
 
diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c
index f13c62c4b117..ca0210213773 100644
--- a/drivers/acpi/device_pm.c
+++ b/drivers/acpi/device_pm.c
@@ -24,6 +24,7 @@
 #include <linux/pm_qos.h>
 #include <linux/pm_domain.h>
 #include <linux/pm_runtime.h>
+#include <linux/suspend.h>
 
 #include "internal.h"
 
@@ -385,6 +386,12 @@ EXPORT_SYMBOL(acpi_bus_power_manageable);
 #ifdef CONFIG_PM
 static DEFINE_MUTEX(acpi_pm_notifier_lock);
 
+void acpi_pm_wakeup_event(struct device *dev)
+{
+	pm_wakeup_dev_event(dev, 0, acpi_s2idle_wakeup());
+}
+EXPORT_SYMBOL_GPL(acpi_pm_wakeup_event);
+
 static void acpi_pm_notify_handler(acpi_handle handle, u32 val, void *not_used)
 {
 	struct acpi_device *adev;
@@ -399,7 +406,7 @@ static void acpi_pm_notify_handler(acpi_handle handle, u32 val, void *not_used)
 	mutex_lock(&acpi_pm_notifier_lock);
 
 	if (adev->wakeup.flags.notifier_present) {
-		__pm_wakeup_event(adev->wakeup.ws, 0);
+		pm_wakeup_ws_event(adev->wakeup.ws, 0, acpi_s2idle_wakeup());
 		if (adev->wakeup.context.func)
 			adev->wakeup.context.func(&adev->wakeup.context);
 	}
diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index 66229ffa909b..75924ea69071 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -198,8 +198,10 @@ void acpi_ec_remove_query_handler(struct acpi_ec *ec, u8 query_bit);
                                   Suspend/Resume
   -------------------------------------------------------------------------- */
 #ifdef CONFIG_ACPI_SYSTEM_POWER_STATES_SUPPORT
+extern bool acpi_s2idle_wakeup(void);
 extern int acpi_sleep_init(void);
 #else
+static inline bool acpi_s2idle_wakeup(void) { return false; }
 static inline int acpi_sleep_init(void) { return -ENXIO; }
 #endif
 
diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index a4782c75ebdd..555de11a56b6 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -650,6 +650,8 @@ static const struct platform_suspend_ops acpi_suspend_ops_old = {
 	.recover = acpi_pm_finish,
 };
 
+static bool s2idle_wakeup;
+
 static int acpi_freeze_begin(void)
 {
 	acpi_scan_lock_acquire();
@@ -666,6 +668,33 @@ static int acpi_freeze_prepare(void)
 	return 0;
 }
 
+static void acpi_freeze_wake(void)
+{
+	/*
+	 * If IRQD_WAKEUP_ARMED is not set for the SCI at this point, it means
+	 * that the SCI has triggered while suspended, so cancel the wakeup in
+	 * case it has not been a wakeup event (the GPEs will be checked later).
+	 */
+	if (acpi_sci_irq_valid() &&
+	    !irqd_is_wakeup_armed(irq_get_irq_data(acpi_sci_irq))) {
+		pm_system_cancel_wakeup();
+		s2idle_wakeup = true;
+	}
+}
+
+static void acpi_freeze_sync(void)
+{
+	/*
+	 * Process all pending events in case there are any wakeup ones.
+	 *
+	 * The EC driver uses the system workqueue, so that one needs to be
+	 * flushed too.
+	 */
+	acpi_os_wait_events_complete();
+	flush_scheduled_work();
+	s2idle_wakeup = false;
+}
+
 static void acpi_freeze_restore(void)
 {
 	if (acpi_sci_irq_valid())
@@ -682,6 +711,8 @@ static void acpi_freeze_end(void)
 static const struct platform_freeze_ops acpi_freeze_ops = {
 	.begin = acpi_freeze_begin,
 	.prepare = acpi_freeze_prepare,
+	.wake = acpi_freeze_wake,
+	.sync = acpi_freeze_sync,
 	.restore = acpi_freeze_restore,
 	.end = acpi_freeze_end,
 };
@@ -700,9 +731,15 @@ static void acpi_sleep_suspend_setup(void)
 }
 
 #else /* !CONFIG_SUSPEND */
+#define s2idle_wakeup	(false)
 static inline void acpi_sleep_suspend_setup(void) {}
 #endif /* !CONFIG_SUSPEND */
 
+bool acpi_s2idle_wakeup(void)
+{
+	return s2idle_wakeup;
+}
+
 #ifdef CONFIG_PM_SLEEP
 static u32 saved_bm_rld;
 
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 253f860e8981..ef5b6a6e5045 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -1095,11 +1095,6 @@ static int __device_suspend_noirq(struct device *dev, pm_message_t state, bool a
 	if (async_error)
 		goto Complete;
 
-	if (pm_wakeup_pending()) {
-		async_error = -EBUSY;
-		goto Complete;
-	}
-
 	if (dev->power.syscore || dev->power.direct_complete)
 		goto Complete;
 
diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
index c313b600d356..9c36b27996fc 100644
--- a/drivers/base/power/wakeup.c
+++ b/drivers/base/power/wakeup.c
@@ -28,8 +28,8 @@ bool events_check_enabled __read_mostly;
 /* First wakeup IRQ seen by the kernel in the last cycle. */
 unsigned int pm_wakeup_irq __read_mostly;
 
-/* If set and the system is suspending, terminate the suspend. */
-static bool pm_abort_suspend __read_mostly;
+/* If greater than 0 and the system is suspending, terminate the suspend. */
+static atomic_t pm_abort_suspend __read_mostly;
 
 /*
  * Combined counters of registered wakeup events and wakeup events in progress.
@@ -855,20 +855,26 @@ bool pm_wakeup_pending(void)
 		pm_print_active_wakeup_sources();
 	}
 
-	return ret || pm_abort_suspend;
+	return ret || atomic_read(&pm_abort_suspend) > 0;
 }
 
 void pm_system_wakeup(void)
 {
-	pm_abort_suspend = true;
+	atomic_inc(&pm_abort_suspend);
 	freeze_wake();
 }
 EXPORT_SYMBOL_GPL(pm_system_wakeup);
 
-void pm_wakeup_clear(void)
+void pm_system_cancel_wakeup(void)
+{
+	atomic_dec(&pm_abort_suspend);
+}
+
+void pm_wakeup_clear(bool reset)
 {
-	pm_abort_suspend = false;
 	pm_wakeup_irq = 0;
+	if (reset)
+		atomic_set(&pm_abort_suspend, 0);
 }
 
 void pm_system_irq_wakeup(unsigned int irq_number)
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 79c0af419300..63a90a624a0f 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -598,15 +598,19 @@ static inline bool acpi_device_always_present(struct acpi_device *adev)
 #endif
 
 #ifdef CONFIG_PM
+void acpi_pm_wakeup_event(struct device *dev);
 acpi_status acpi_add_pm_notifier(struct acpi_device *adev, struct device *dev,
 			void (*func)(struct acpi_device_wakeup_context *context));
 acpi_status acpi_remove_pm_notifier(struct acpi_device *adev);
 int acpi_pm_device_sleep_state(struct device *, int *, int);
 int acpi_pm_device_run_wake(struct device *, bool);
 #else
+static inline void acpi_pm_wakeup_event(struct device *dev)
+{
+}
 static inline acpi_status acpi_add_pm_notifier(struct acpi_device *adev,
 					       struct device *dev,
-				               void (*work_func)(struct work_struct *work))
+					       void (*func)(struct acpi_device_wakeup_context *context))
 {
 	return AE_SUPPORT;
 }
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index d9718378a8be..0b1cf32edfd7 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -189,6 +189,8 @@ struct platform_suspend_ops {
 struct platform_freeze_ops {
 	int (*begin)(void);
 	int (*prepare)(void);
+	void (*wake)(void);
+	void (*sync)(void);
 	void (*restore)(void);
 	void (*end)(void);
 };
@@ -428,7 +430,8 @@ extern unsigned int pm_wakeup_irq;
 
 extern bool pm_wakeup_pending(void);
 extern void pm_system_wakeup(void);
-extern void pm_wakeup_clear(void);
+extern void pm_system_cancel_wakeup(void);
+extern void pm_wakeup_clear(bool reset);
 extern void pm_system_irq_wakeup(unsigned int irq_number);
 extern bool pm_get_wakeup_count(unsigned int *count, bool block);
 extern bool pm_save_wakeup_count(unsigned int count);
@@ -478,7 +481,7 @@ static inline int unregister_pm_notifier(struct notifier_block *nb)
 
 static inline bool pm_wakeup_pending(void) { return false; }
 static inline void pm_system_wakeup(void) {}
-static inline void pm_wakeup_clear(void) {}
+static inline void pm_wakeup_clear(bool reset) {}
 static inline void pm_system_irq_wakeup(unsigned int irq_number) {}
 
 static inline void lock_system_sleep(void) {}
diff --git a/kernel/power/process.c b/kernel/power/process.c
index c7209f060eeb..78672d324a6e 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -132,7 +132,7 @@ int freeze_processes(void)
 	if (!pm_freezing)
 		atomic_inc(&system_freezing_cnt);
 
-	pm_wakeup_clear();
+	pm_wakeup_clear(true);
 	pr_info("Freezing user space processes ... ");
 	pm_freezing = true;
 	error = try_to_freeze_tasks(true);
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 15e6baef5c73..3ecf275d7e44 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -72,6 +72,8 @@ static void freeze_begin(void)
 
 static void freeze_enter(void)
 {
+	trace_suspend_resume(TPS("machine_suspend"), PM_SUSPEND_FREEZE, true);
+
 	spin_lock_irq(&suspend_freeze_lock);
 	if (pm_wakeup_pending())
 		goto out;
@@ -84,11 +86,9 @@ static void freeze_enter(void)
 
 	/* Push all the CPUs into the idle loop. */
 	wake_up_all_idle_cpus();
-	pr_debug("PM: suspend-to-idle\n");
 	/* Make the current CPU wait so it can enter the idle loop too. */
 	wait_event(suspend_freeze_wait_head,
 		   suspend_freeze_state == FREEZE_STATE_WAKE);
-	pr_debug("PM: resume from suspend-to-idle\n");
 
 	cpuidle_pause();
 	put_online_cpus();
@@ -98,6 +98,31 @@ static void freeze_enter(void)
  out:
 	suspend_freeze_state = FREEZE_STATE_NONE;
 	spin_unlock_irq(&suspend_freeze_lock);
+
+	trace_suspend_resume(TPS("machine_suspend"), PM_SUSPEND_FREEZE, false);
+}
+
+static void s2idle_loop(void)
+{
+	pr_debug("PM: suspend-to-idle\n");
+
+	do {
+		freeze_enter();
+
+		if (freeze_ops && freeze_ops->wake)
+			freeze_ops->wake();
+
+		dpm_resume_noirq(PMSG_RESUME);
+		if (freeze_ops && freeze_ops->sync)
+			freeze_ops->sync();
+
+		if (pm_wakeup_pending())
+			break;
+
+		pm_wakeup_clear(false);
+	} while (!dpm_suspend_noirq(PMSG_SUSPEND));
+
+	pr_debug("PM: resume from suspend-to-idle\n");
 }
 
 void freeze_wake(void)
@@ -371,10 +396,8 @@ static int suspend_enter(suspend_state_t state, bool *wakeup)
 	 * all the devices are suspended.
 	 */
 	if (state == PM_SUSPEND_FREEZE) {
-		trace_suspend_resume(TPS("machine_suspend"), state, true);
-		freeze_enter();
-		trace_suspend_resume(TPS("machine_suspend"), state, false);
-		goto Platform_wake;
+		s2idle_loop();
+		goto Platform_early_resume;
 	}
 
 	error = disable_nonboot_cpus();
-- 
cgit v1.2.3


From 132a324ab62fe4fb8d6dcc2ab4eddb0e93b69afe Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 9 Jun 2017 12:49:36 +0100
Subject: KVM: arm64: vgic-v3: Add ICV_IAR1_EL1 handler

Add a handler for reading the guest's view of the ICC_IAR1_EL1
register. This involves finding the highest priority Group-1
interrupt, checking against both PMR and the active group
priority, activating the interrupt and setting the group
priority as active.

Tested-by: Alexander Graf <agraf@suse.de>
Acked-by: David Daney <david.daney@cavium.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Christoffer Dall <cdall@linaro.org>
Signed-off-by: Christoffer Dall <cdall@linaro.org>
---
 include/linux/irqchip/arm-gic-v3.h |   1 +
 virt/kvm/arm/hyp/vgic-v3-sr.c      | 163 +++++++++++++++++++++++++++++++++++++
 2 files changed, 164 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 1fa293a37f4a..d70668fae003 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -405,6 +405,7 @@
 #define ICH_LR_PHYS_ID_SHIFT		32
 #define ICH_LR_PHYS_ID_MASK		(0x3ffULL << ICH_LR_PHYS_ID_SHIFT)
 #define ICH_LR_PRIORITY_SHIFT		48
+#define ICH_LR_PRIORITY_MASK		(0xffULL << ICH_LR_PRIORITY_SHIFT)
 
 /* These are for GICv2 emulation only */
 #define GICH_LR_VIRTUALID		(0x3ffUL << 0)
diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c
index 25f09721241f..5a20f8d5bada 100644
--- a/virt/kvm/arm/hyp/vgic-v3-sr.c
+++ b/virt/kvm/arm/hyp/vgic-v3-sr.c
@@ -24,6 +24,7 @@
 
 #define vtr_to_max_lr_idx(v)		((v) & 0xf)
 #define vtr_to_nr_pre_bits(v)		((((u32)(v) >> 26) & 7) + 1)
+#define vtr_to_nr_apr_regs(v)		(1 << (vtr_to_nr_pre_bits(v) - 5))
 
 static u64 __hyp_text __gic_v3_get_lr(unsigned int lr)
 {
@@ -381,6 +382,88 @@ static int __hyp_text __vgic_v3_bpr_min(void)
 	return 8 - vtr_to_nr_pre_bits(read_gicreg(ICH_VTR_EL2));
 }
 
+static int __hyp_text __vgic_v3_get_group(struct kvm_vcpu *vcpu)
+{
+	u32 esr = kvm_vcpu_get_hsr(vcpu);
+	u8 crm = (esr & ESR_ELx_SYS64_ISS_CRM_MASK) >> ESR_ELx_SYS64_ISS_CRM_SHIFT;
+
+	return crm != 8;
+}
+
+#define GICv3_IDLE_PRIORITY	0xff
+
+static int __hyp_text __vgic_v3_highest_priority_lr(struct kvm_vcpu *vcpu,
+						    u32 vmcr,
+						    u64 *lr_val)
+{
+	unsigned int used_lrs = vcpu->arch.vgic_cpu.used_lrs;
+	u8 priority = GICv3_IDLE_PRIORITY;
+	int i, lr = -1;
+
+	for (i = 0; i < used_lrs; i++) {
+		u64 val = __gic_v3_get_lr(i);
+		u8 lr_prio = (val & ICH_LR_PRIORITY_MASK) >> ICH_LR_PRIORITY_SHIFT;
+
+		/* Not pending in the state? */
+		if ((val & ICH_LR_STATE) != ICH_LR_PENDING_BIT)
+			continue;
+
+		/* Group-0 interrupt, but Group-0 disabled? */
+		if (!(val & ICH_LR_GROUP) && !(vmcr & ICH_VMCR_ENG0_MASK))
+			continue;
+
+		/* Group-1 interrupt, but Group-1 disabled? */
+		if ((val & ICH_LR_GROUP) && !(vmcr & ICH_VMCR_ENG1_MASK))
+			continue;
+
+		/* Not the highest priority? */
+		if (lr_prio >= priority)
+			continue;
+
+		/* This is a candidate */
+		priority = lr_prio;
+		*lr_val = val;
+		lr = i;
+	}
+
+	if (lr == -1)
+		*lr_val = ICC_IAR1_EL1_SPURIOUS;
+
+	return lr;
+}
+
+static int __hyp_text __vgic_v3_get_highest_active_priority(void)
+{
+	u8 nr_apr_regs = vtr_to_nr_apr_regs(read_gicreg(ICH_VTR_EL2));
+	u32 hap = 0;
+	int i;
+
+	for (i = 0; i < nr_apr_regs; i++) {
+		u32 val;
+
+		/*
+		 * The ICH_AP0Rn_EL2 and ICH_AP1Rn_EL2 registers
+		 * contain the active priority levels for this VCPU
+		 * for the maximum number of supported priority
+		 * levels, and we return the full priority level only
+		 * if the BPR is programmed to its minimum, otherwise
+		 * we return a combination of the priority level and
+		 * subpriority, as determined by the setting of the
+		 * BPR, but without the full subpriority.
+		 */
+		val  = __vgic_v3_read_ap0rn(i);
+		val |= __vgic_v3_read_ap1rn(i);
+		if (!val) {
+			hap += 32;
+			continue;
+		}
+
+		return (hap + __ffs(val)) << __vgic_v3_bpr_min();
+	}
+
+	return GICv3_IDLE_PRIORITY;
+}
+
 static unsigned int __hyp_text __vgic_v3_get_bpr0(u32 vmcr)
 {
 	return (vmcr & ICH_VMCR_BPR0_MASK) >> ICH_VMCR_BPR0_SHIFT;
@@ -401,6 +484,83 @@ static unsigned int __hyp_text __vgic_v3_get_bpr1(u32 vmcr)
 	return bpr;
 }
 
+/*
+ * Convert a priority to a preemption level, taking the relevant BPR
+ * into account by zeroing the sub-priority bits.
+ */
+static u8 __hyp_text __vgic_v3_pri_to_pre(u8 pri, u32 vmcr, int grp)
+{
+	unsigned int bpr;
+
+	if (!grp)
+		bpr = __vgic_v3_get_bpr0(vmcr) + 1;
+	else
+		bpr = __vgic_v3_get_bpr1(vmcr);
+
+	return pri & (GENMASK(7, 0) << bpr);
+}
+
+/*
+ * The priority value is independent of any of the BPR values, so we
+ * normalize it using the minumal BPR value. This guarantees that no
+ * matter what the guest does with its BPR, we can always set/get the
+ * same value of a priority.
+ */
+static void __hyp_text __vgic_v3_set_active_priority(u8 pri, u32 vmcr, int grp)
+{
+	u8 pre, ap;
+	u32 val;
+	int apr;
+
+	pre = __vgic_v3_pri_to_pre(pri, vmcr, grp);
+	ap = pre >> __vgic_v3_bpr_min();
+	apr = ap / 32;
+
+	if (!grp) {
+		val = __vgic_v3_read_ap0rn(apr);
+		__vgic_v3_write_ap0rn(val | BIT(ap % 32), apr);
+	} else {
+		val = __vgic_v3_read_ap1rn(apr);
+		__vgic_v3_write_ap1rn(val | BIT(ap % 32), apr);
+	}
+}
+
+static void __hyp_text __vgic_v3_read_iar(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+{
+	u64 lr_val;
+	u8 lr_prio, pmr;
+	int lr, grp;
+
+	grp = __vgic_v3_get_group(vcpu);
+
+	lr = __vgic_v3_highest_priority_lr(vcpu, vmcr, &lr_val);
+	if (lr < 0)
+		goto spurious;
+
+	if (grp != !!(lr_val & ICH_LR_GROUP))
+		goto spurious;
+
+	pmr = (vmcr & ICH_VMCR_PMR_MASK) >> ICH_VMCR_PMR_SHIFT;
+	lr_prio = (lr_val & ICH_LR_PRIORITY_MASK) >> ICH_LR_PRIORITY_SHIFT;
+	if (pmr <= lr_prio)
+		goto spurious;
+
+	if (__vgic_v3_get_highest_active_priority() <= __vgic_v3_pri_to_pre(lr_prio, vmcr, grp))
+		goto spurious;
+
+	lr_val &= ~ICH_LR_STATE;
+	/* No active state for LPIs */
+	if ((lr_val & ICH_LR_VIRTUAL_ID_MASK) <= VGIC_MAX_SPI)
+		lr_val |= ICH_LR_ACTIVE_BIT;
+	__gic_v3_set_lr(lr_val, lr);
+	__vgic_v3_set_active_priority(lr_prio, vmcr, grp);
+	vcpu_set_reg(vcpu, rt, lr_val & ICH_LR_VIRTUAL_ID_MASK);
+	return;
+
+spurious:
+	vcpu_set_reg(vcpu, rt, ICC_IAR1_EL1_SPURIOUS);
+}
+
 static void __hyp_text __vgic_v3_read_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
 {
 	vcpu_set_reg(vcpu, rt, !!(vmcr & ICH_VMCR_ENG1_MASK));
@@ -465,6 +625,9 @@ int __hyp_text __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu)
 	is_read = (esr & ESR_ELx_SYS64_ISS_DIR_MASK) == ESR_ELx_SYS64_ISS_DIR_READ;
 
 	switch (sysreg) {
+	case SYS_ICC_IAR1_EL1:
+		fn = __vgic_v3_read_iar;
+		break;
 	case SYS_ICC_GRPEN1_EL1:
 		if (is_read)
 			fn = __vgic_v3_read_igrpen1;
-- 
cgit v1.2.3


From b6f49035b4bf6e2709f2a5fed3107f5438c1fd02 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 9 Jun 2017 12:49:37 +0100
Subject: KVM: arm64: vgic-v3: Add ICV_EOIR1_EL1 handler

Add a handler for writing the guest's view of the ICC_EOIR1_EL1
register. This involves dropping the priority of the interrupt,
and deactivating it if required (EOImode == 0).

Tested-by: Alexander Graf <agraf@suse.de>
Acked-by: David Daney <david.daney@cavium.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Christoffer Dall <cdall@linaro.org>
Signed-off-by: Christoffer Dall <cdall@linaro.org>
---
 include/linux/irqchip/arm-gic-v3.h |   2 +
 virt/kvm/arm/hyp/vgic-v3-sr.c      | 120 +++++++++++++++++++++++++++++++++++++
 2 files changed, 122 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index d70668fae003..1f458ac6f494 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -417,6 +417,8 @@
 
 #define ICH_HCR_EN			(1 << 0)
 #define ICH_HCR_UIE			(1 << 1)
+#define ICH_HCR_EOIcount_SHIFT		27
+#define ICH_HCR_EOIcount_MASK		(0x1f << ICH_HCR_EOIcount_SHIFT)
 
 #define ICH_VMCR_ACK_CTL_SHIFT		2
 #define ICH_VMCR_ACK_CTL_MASK		(1 << ICH_VMCR_ACK_CTL_SHIFT)
diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c
index 5a20f8d5bada..e9ff99112c4d 100644
--- a/virt/kvm/arm/hyp/vgic-v3-sr.c
+++ b/virt/kvm/arm/hyp/vgic-v3-sr.c
@@ -432,6 +432,26 @@ static int __hyp_text __vgic_v3_highest_priority_lr(struct kvm_vcpu *vcpu,
 	return lr;
 }
 
+static int __hyp_text __vgic_v3_find_active_lr(struct kvm_vcpu *vcpu,
+					       int intid, u64 *lr_val)
+{
+	unsigned int used_lrs = vcpu->arch.vgic_cpu.used_lrs;
+	int i;
+
+	for (i = 0; i < used_lrs; i++) {
+		u64 val = __gic_v3_get_lr(i);
+
+		if ((val & ICH_LR_VIRTUAL_ID_MASK) == intid &&
+		    (val & ICH_LR_ACTIVE_BIT)) {
+			*lr_val = val;
+			return i;
+		}
+	}
+
+	*lr_val = ICC_IAR1_EL1_SPURIOUS;
+	return -1;
+}
+
 static int __hyp_text __vgic_v3_get_highest_active_priority(void)
 {
 	u8 nr_apr_regs = vtr_to_nr_apr_regs(read_gicreg(ICH_VTR_EL2));
@@ -525,6 +545,44 @@ static void __hyp_text __vgic_v3_set_active_priority(u8 pri, u32 vmcr, int grp)
 	}
 }
 
+static int __hyp_text __vgic_v3_clear_highest_active_priority(void)
+{
+	u8 nr_apr_regs = vtr_to_nr_apr_regs(read_gicreg(ICH_VTR_EL2));
+	u32 hap = 0;
+	int i;
+
+	for (i = 0; i < nr_apr_regs; i++) {
+		u32 ap0, ap1;
+		int c0, c1;
+
+		ap0 = __vgic_v3_read_ap0rn(i);
+		ap1 = __vgic_v3_read_ap1rn(i);
+		if (!ap0 && !ap1) {
+			hap += 32;
+			continue;
+		}
+
+		c0 = ap0 ? __ffs(ap0) : 32;
+		c1 = ap1 ? __ffs(ap1) : 32;
+
+		/* Always clear the LSB, which is the highest priority */
+		if (c0 < c1) {
+			ap0 &= ~BIT(c0);
+			__vgic_v3_write_ap0rn(ap0, i);
+			hap += c0;
+		} else {
+			ap1 &= ~BIT(c1);
+			__vgic_v3_write_ap1rn(ap1, i);
+			hap += c1;
+		}
+
+		/* Rescale to 8 bits of priority */
+		return hap << __vgic_v3_bpr_min();
+	}
+
+	return GICv3_IDLE_PRIORITY;
+}
+
 static void __hyp_text __vgic_v3_read_iar(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
 {
 	u64 lr_val;
@@ -561,6 +619,65 @@ spurious:
 	vcpu_set_reg(vcpu, rt, ICC_IAR1_EL1_SPURIOUS);
 }
 
+static void __hyp_text __vgic_v3_clear_active_lr(int lr, u64 lr_val)
+{
+	lr_val &= ~ICH_LR_ACTIVE_BIT;
+	if (lr_val & ICH_LR_HW) {
+		u32 pid;
+
+		pid = (lr_val & ICH_LR_PHYS_ID_MASK) >> ICH_LR_PHYS_ID_SHIFT;
+		gic_write_dir(pid);
+	}
+
+	__gic_v3_set_lr(lr_val, lr);
+}
+
+static void __hyp_text __vgic_v3_bump_eoicount(void)
+{
+	u32 hcr;
+
+	hcr = read_gicreg(ICH_HCR_EL2);
+	hcr += 1 << ICH_HCR_EOIcount_SHIFT;
+	write_gicreg(hcr, ICH_HCR_EL2);
+}
+
+static void __hyp_text __vgic_v3_write_eoir(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+{
+	u32 vid = vcpu_get_reg(vcpu, rt);
+	u64 lr_val;
+	u8 lr_prio, act_prio;
+	int lr, grp;
+
+	grp = __vgic_v3_get_group(vcpu);
+
+	/* Drop priority in any case */
+	act_prio = __vgic_v3_clear_highest_active_priority();
+
+	/* If EOIing an LPI, no deactivate to be performed */
+	if (vid >= VGIC_MIN_LPI)
+		return;
+
+	/* EOImode == 1, nothing to be done here */
+	if (vmcr & ICH_VMCR_EOIM_MASK)
+		return;
+
+	lr = __vgic_v3_find_active_lr(vcpu, vid, &lr_val);
+	if (lr == -1) {
+		__vgic_v3_bump_eoicount();
+		return;
+	}
+
+	lr_prio = (lr_val & ICH_LR_PRIORITY_MASK) >> ICH_LR_PRIORITY_SHIFT;
+
+	/* If priorities or group do not match, the guest has fscked-up. */
+	if (grp != !!(lr_val & ICH_LR_GROUP) ||
+	    __vgic_v3_pri_to_pre(lr_prio, vmcr, grp) != act_prio)
+		return;
+
+	/* Let's now perform the deactivation */
+	__vgic_v3_clear_active_lr(lr, lr_val);
+}
+
 static void __hyp_text __vgic_v3_read_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
 {
 	vcpu_set_reg(vcpu, rt, !!(vmcr & ICH_VMCR_ENG1_MASK));
@@ -628,6 +745,9 @@ int __hyp_text __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu)
 	case SYS_ICC_IAR1_EL1:
 		fn = __vgic_v3_read_iar;
 		break;
+	case SYS_ICC_EOIR1_EL1:
+		fn = __vgic_v3_write_eoir;
+		break;
 	case SYS_ICC_GRPEN1_EL1:
 		if (is_read)
 			fn = __vgic_v3_read_igrpen1;
-- 
cgit v1.2.3


From 9c7bfc288c71068ab323b802dba2eb87fd08b127 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 9 Jun 2017 12:49:40 +0100
Subject: KVM: arm64: vgic-v3: Enable trapping of Group-1 system registers

In order to be able to trap Group-1 GICv3 system registers, we need to
set ICH_HCR_EL2.TALL1 before entering the guest. This is conditionally
done after having restored the guest's state, and cleared on exit.

Tested-by: Alexander Graf <agraf@suse.de>
Acked-by: David Daney <david.daney@cavium.com>
Acked-by: Christoffer Dall <cdall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <cdall@linaro.org>
---
 include/linux/irqchip/arm-gic-v3.h |  1 +
 virt/kvm/arm/hyp/vgic-v3-sr.c      | 11 +++++++++++
 virt/kvm/arm/vgic/vgic-v3.c        |  4 ++++
 3 files changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 1f458ac6f494..6b05d2ac8c54 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -417,6 +417,7 @@
 
 #define ICH_HCR_EN			(1 << 0)
 #define ICH_HCR_UIE			(1 << 1)
+#define ICH_HCR_TALL1			(1 << 12)
 #define ICH_HCR_EOIcount_SHIFT		27
 #define ICH_HCR_EOIcount_MASK		(0x1f << ICH_HCR_EOIcount_SHIFT)
 
diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c
index f031e8f088ae..a2a62f030341 100644
--- a/virt/kvm/arm/hyp/vgic-v3-sr.c
+++ b/virt/kvm/arm/hyp/vgic-v3-sr.c
@@ -258,6 +258,9 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
 			cpu_if->vgic_ap1r[0] = __vgic_v3_read_ap1rn(0);
 		}
 	} else {
+		if (static_branch_unlikely(&vgic_v3_cpuif_trap))
+			write_gicreg(0, ICH_HCR_EL2);
+
 		cpu_if->vgic_elrsr = 0xffff;
 		cpu_if->vgic_ap0r[0] = 0;
 		cpu_if->vgic_ap0r[1] = 0;
@@ -330,6 +333,14 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
 
 		for (i = 0; i < used_lrs; i++)
 			__gic_v3_set_lr(cpu_if->vgic_lr[i], i);
+	} else {
+		/*
+		 * If we need to trap system registers, we must write
+		 * ICH_HCR_EL2 anyway, even if no interrupts are being
+		 * injected,
+		 */
+		if (static_branch_unlikely(&vgic_v3_cpuif_trap))
+			write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2);
 	}
 
 	/*
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
index fac6e23cd0b3..722bdebdfbb5 100644
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -21,6 +21,8 @@
 
 #include "vgic.h"
 
+static bool group1_trap;
+
 void vgic_v3_set_underflow(struct kvm_vcpu *vcpu)
 {
 	struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3;
@@ -258,6 +260,8 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu)
 
 	/* Get the show on the road... */
 	vgic_v3->vgic_hcr = ICH_HCR_EN;
+	if (group1_trap)
+		vgic_v3->vgic_hcr |= ICH_HCR_TALL1;
 }
 
 int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq)
-- 
cgit v1.2.3


From abf55766f7b062234083ff612446ff8d47e2417e Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 9 Jun 2017 12:49:45 +0100
Subject: KVM: arm64: vgic-v3: Enable trapping of Group-0 system registers

In order to be able to trap Group-0 GICv3 system registers, we need to
set ICH_HCR_EL2.TALL0 begore entering the guest. This is conditionnaly
done after having restored the guest's state, and cleared on exit.

Tested-by: Alexander Graf <agraf@suse.de>
Acked-by: David Daney <david.daney@cavium.com>
Acked-by: Christoffer Dall <cdall@linaro.org>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <cdall@linaro.org>
---
 include/linux/irqchip/arm-gic-v3.h | 1 +
 virt/kvm/arm/vgic/vgic-v3.c        | 5 ++++-
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 6b05d2ac8c54..c7f31a962cfc 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -417,6 +417,7 @@
 
 #define ICH_HCR_EN			(1 << 0)
 #define ICH_HCR_UIE			(1 << 1)
+#define ICH_HCR_TALL0			(1 << 11)
 #define ICH_HCR_TALL1			(1 << 12)
 #define ICH_HCR_EOIcount_SHIFT		27
 #define ICH_HCR_EOIcount_MASK		(0x1f << ICH_HCR_EOIcount_SHIFT)
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
index 4b2b62b73470..39046ee5be25 100644
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -21,6 +21,7 @@
 
 #include "vgic.h"
 
+static bool group0_trap;
 static bool group1_trap;
 
 void vgic_v3_set_underflow(struct kvm_vcpu *vcpu)
@@ -260,6 +261,8 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu)
 
 	/* Get the show on the road... */
 	vgic_v3->vgic_hcr = ICH_HCR_EN;
+	if (group0_trap)
+		vgic_v3->vgic_hcr |= ICH_HCR_TALL0;
 	if (group1_trap)
 		vgic_v3->vgic_hcr |= ICH_HCR_TALL1;
 }
@@ -492,7 +495,7 @@ int vgic_v3_probe(const struct gic_kvm_info *info)
 	if (kvm_vgic_global_state.vcpu_base == 0)
 		kvm_info("disabling GICv2 emulation\n");
 
-	if (group1_trap) {
+	if (group0_trap || group1_trap) {
 		kvm_info("GICv3 sysreg trapping enabled (reduced performance)\n");
 		static_branch_enable(&vgic_v3_cpuif_trap);
 	}
-- 
cgit v1.2.3


From ff89511ef29b794d6a9c6b62f5ea76fc013cdae7 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 9 Jun 2017 12:49:53 +0100
Subject: KVM: arm64: Enable GICv3 common sysreg trapping via command-line

Now that we're able to safely handle common sysreg access, let's
give the user the opportunity to enable it by passing a specific
command-line option (vgic_v3.common_trap).

Tested-by: Alexander Graf <agraf@suse.de>
Acked-by: David Daney <david.daney@cavium.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Christoffer Dall <cdall@linaro.org>
Signed-off-by: Christoffer Dall <cdall@linaro.org>
---
 Documentation/admin-guide/kernel-parameters.txt |  4 ++++
 include/linux/irqchip/arm-gic-v3.h              |  1 +
 virt/kvm/arm/vgic/vgic-v3.c                     | 11 ++++++++++-
 3 files changed, 15 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 88bdc421351f..aa8341e73b35 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1837,6 +1837,10 @@
 			[KVM,ARM] Trap guest accesses to GICv3 group-1
 			system registers
 
+	kvm-arm.vgic_v3_common_trap=
+			[KVM,ARM] Trap guest accesses to GICv3 common
+			system registers
+
 	kvm-intel.ept=	[KVM,Intel] Disable extended page tables
 			(virtualized MMU) support on capable Intel chips.
 			Default is 1 (enabled)
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index c7f31a962cfc..6a1f87ff94e2 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -417,6 +417,7 @@
 
 #define ICH_HCR_EN			(1 << 0)
 #define ICH_HCR_UIE			(1 << 1)
+#define ICH_HCR_TC			(1 << 10)
 #define ICH_HCR_TALL0			(1 << 11)
 #define ICH_HCR_TALL1			(1 << 12)
 #define ICH_HCR_EOIcount_SHIFT		27
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
index 35c00efc110b..91cf8b4f01f1 100644
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -23,6 +23,7 @@
 
 static bool group0_trap;
 static bool group1_trap;
+static bool common_trap;
 
 void vgic_v3_set_underflow(struct kvm_vcpu *vcpu)
 {
@@ -265,6 +266,8 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu)
 		vgic_v3->vgic_hcr |= ICH_HCR_TALL0;
 	if (group1_trap)
 		vgic_v3->vgic_hcr |= ICH_HCR_TALL1;
+	if (common_trap)
+		vgic_v3->vgic_hcr |= ICH_HCR_TC;
 }
 
 int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq)
@@ -450,6 +453,12 @@ static int __init early_group1_trap_cfg(char *buf)
 }
 early_param("kvm-arm.vgic_v3_group1_trap", early_group1_trap_cfg);
 
+static int __init early_common_trap_cfg(char *buf)
+{
+	return strtobool(buf, &common_trap);
+}
+early_param("kvm-arm.vgic_v3_common_trap", early_common_trap_cfg);
+
 /**
  * vgic_v3_probe - probe for a GICv3 compatible interrupt controller in DT
  * @node:	pointer to the DT node
@@ -508,7 +517,7 @@ int vgic_v3_probe(const struct gic_kvm_info *info)
 	}
 #endif
 
-	if (group0_trap || group1_trap) {
+	if (group0_trap || group1_trap || common_trap) {
 		kvm_info("GICv3 sysreg trapping enabled (reduced performance)\n");
 		static_branch_enable(&vgic_v3_cpuif_trap);
 	}
-- 
cgit v1.2.3


From 91e0bf81258c07aad27a4833368569ce873cd83e Mon Sep 17 00:00:00 2001
From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Date: Wed, 14 Jun 2017 17:37:13 +0100
Subject: ACPI/IORT: Remove iort_node_match()

Commit 316ca8804ea8 ("ACPI/IORT: Remove linker section for IORT entries
probing") removed the linker section for IORT entries probing.

Since those IORT entries were the only iort_node_match() interface
users, the iort_node_match() became obsolete and can then be removed.

Remove the ACPI IORT iort_node_match() interface from the kernel.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Hanjun Guo <hanjun.guo@linaro.org>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: Hanjun Guo <hanjun.guo@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/acpi/arm64/iort.c | 15 ---------------
 include/linux/acpi_iort.h |  2 --
 2 files changed, 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index c5fecf97ee2f..8c77598dd6aa 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -234,21 +234,6 @@ static struct acpi_iort_node *iort_scan_node(enum acpi_iort_node_type type,
 	return NULL;
 }
 
-static acpi_status
-iort_match_type_callback(struct acpi_iort_node *node, void *context)
-{
-	return AE_OK;
-}
-
-bool iort_node_match(u8 type)
-{
-	struct acpi_iort_node *node;
-
-	node = iort_scan_node(type, iort_match_type_callback, NULL);
-
-	return node != NULL;
-}
-
 static acpi_status iort_match_node_callback(struct acpi_iort_node *node,
 					    void *context)
 {
diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h
index 3ff9acea8616..8379d406ad2e 100644
--- a/include/linux/acpi_iort.h
+++ b/include/linux/acpi_iort.h
@@ -31,7 +31,6 @@ void iort_deregister_domain_token(int trans_id);
 struct fwnode_handle *iort_find_domain_token(int trans_id);
 #ifdef CONFIG_ACPI_IORT
 void acpi_iort_init(void);
-bool iort_node_match(u8 type);
 u32 iort_msi_map_rid(struct device *dev, u32 req_id);
 struct irq_domain *iort_get_device_domain(struct device *dev, u32 req_id);
 void acpi_configure_pmsi_domain(struct device *dev);
@@ -41,7 +40,6 @@ void iort_set_dma_mask(struct device *dev);
 const struct iommu_ops *iort_iommu_configure(struct device *dev);
 #else
 static inline void acpi_iort_init(void) { }
-static inline bool iort_node_match(u8 type) { return false; }
 static inline u32 iort_msi_map_rid(struct device *dev, u32 req_id)
 { return req_id; }
 static inline struct irq_domain *iort_get_device_domain(struct device *dev,
-- 
cgit v1.2.3


From c926820085437a27b27e78996b2c7a5ad94e8055 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Thu, 15 Jun 2017 13:46:00 +0200
Subject: firmware: dmi_scan: Make dmi_walk and dmi_walk_early return real
 error codes

Currently they return -1 on error, which will confuse callers if
they try to interpret it as a normal negative error code.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Darren Hart (VMware) <dvhart@infradead.org>
Signed-off-by: Jean Delvare <jdelvare@suse.de>
---
 drivers/firmware/dmi_scan.c | 9 +++++----
 include/linux/dmi.h         | 2 +-
 2 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c
index 82ee042f075c..fc30249132f5 100644
--- a/drivers/firmware/dmi_scan.c
+++ b/drivers/firmware/dmi_scan.c
@@ -144,7 +144,7 @@ static int __init dmi_walk_early(void (*decode)(const struct dmi_header *,
 
 	buf = dmi_early_remap(dmi_base, orig_dmi_len);
 	if (buf == NULL)
-		return -1;
+		return -ENOMEM;
 
 	dmi_decode_table(buf, decode, NULL);
 
@@ -1008,7 +1008,8 @@ EXPORT_SYMBOL(dmi_get_date);
  *	@decode: Callback function
  *	@private_data: Private data to be passed to the callback function
  *
- *	Returns -1 when the DMI table can't be reached, 0 on success.
+ *	Returns 0 on success, -ENXIO if DMI is not selected or not present,
+ *	or a different negative error code if DMI walking fails.
  */
 int dmi_walk(void (*decode)(const struct dmi_header *, void *),
 	     void *private_data)
@@ -1016,11 +1017,11 @@ int dmi_walk(void (*decode)(const struct dmi_header *, void *),
 	u8 *buf;
 
 	if (!dmi_available)
-		return -1;
+		return -ENXIO;
 
 	buf = dmi_remap(dmi_base, dmi_len);
 	if (buf == NULL)
-		return -1;
+		return -ENOMEM;
 
 	dmi_decode_table(buf, decode, private_data);
 
diff --git a/include/linux/dmi.h b/include/linux/dmi.h
index 5e9c74cf8894..9bbf21a516e4 100644
--- a/include/linux/dmi.h
+++ b/include/linux/dmi.h
@@ -136,7 +136,7 @@ static inline int dmi_name_in_vendors(const char *s) { return 0; }
 static inline int dmi_name_in_serial(const char *s) { return 0; }
 #define dmi_available 0
 static inline int dmi_walk(void (*decode)(const struct dmi_header *, void *),
-	void *private_data) { return -1; }
+	void *private_data) { return -ENXIO; }
 static inline bool dmi_match(enum dmi_field f, const char *str)
 	{ return false; }
 static inline void dmi_memdev_name(u16 handle, const char **bank,
-- 
cgit v1.2.3


From 466749f13e33d892cf9263d7efbc0ea713c23ed7 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Mon, 10 Apr 2017 12:27:01 +0200
Subject: gpu: host1x: Flesh out kerneldoc

Improve kerneldoc for the public parts of the host1x infrastructure in
preparation for adding driver-specific part to the GPU documentation.

Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/host1x/bus.c    | 75 +++++++++++++++++++++++++++++++++++++++++
 drivers/gpu/host1x/syncpt.c | 81 +++++++++++++++++++++++++++++++++++++++------
 include/linux/host1x.h      | 25 ++++++++++++++
 3 files changed, 170 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c
index 561831e1ae2c..a048e3ac523d 100644
--- a/drivers/gpu/host1x/bus.c
+++ b/drivers/gpu/host1x/bus.c
@@ -40,6 +40,9 @@ struct host1x_subdev {
 
 /**
  * host1x_subdev_add() - add a new subdevice with an associated device node
+ * @device: host1x device to add the subdevice to
+ * @driver: host1x driver
+ * @np: device node
  */
 static int host1x_subdev_add(struct host1x_device *device,
 			     struct device_node *np)
@@ -62,6 +65,7 @@ static int host1x_subdev_add(struct host1x_device *device,
 
 /**
  * host1x_subdev_del() - remove subdevice
+ * @subdev: subdevice to remove
  */
 static void host1x_subdev_del(struct host1x_subdev *subdev)
 {
@@ -72,6 +76,8 @@ static void host1x_subdev_del(struct host1x_subdev *subdev)
 
 /**
  * host1x_device_parse_dt() - scan device tree and add matching subdevices
+ * @device: host1x logical device
+ * @driver: host1x driver
  */
 static int host1x_device_parse_dt(struct host1x_device *device,
 				  struct host1x_driver *driver)
@@ -166,6 +172,16 @@ static void host1x_subdev_unregister(struct host1x_device *device,
 	mutex_unlock(&device->subdevs_lock);
 }
 
+/**
+ * host1x_device_init() - initialize a host1x logical device
+ * @device: host1x logical device
+ *
+ * The driver for the host1x logical device can call this during execution of
+ * its &host1x_driver.probe implementation to initialize each of its clients.
+ * The client drivers access the subsystem specific driver data using the
+ * &host1x_client.parent field and driver data associated with it (usually by
+ * calling dev_get_drvdata()).
+ */
 int host1x_device_init(struct host1x_device *device)
 {
 	struct host1x_client *client;
@@ -192,6 +208,15 @@ int host1x_device_init(struct host1x_device *device)
 }
 EXPORT_SYMBOL(host1x_device_init);
 
+/**
+ * host1x_device_exit() - uninitialize host1x logical device
+ * @device: host1x logical device
+ *
+ * When the driver for a host1x logical device is unloaded, it can call this
+ * function to tear down each of its clients. Typically this is done after a
+ * subsystem-specific data structure is removed and the functionality can no
+ * longer be used.
+ */
 int host1x_device_exit(struct host1x_device *device)
 {
 	struct host1x_client *client;
@@ -446,6 +471,14 @@ static void host1x_detach_driver(struct host1x *host1x,
 	mutex_unlock(&host1x->devices_lock);
 }
 
+/**
+ * host1x_register() - register a host1x controller
+ * @host1x: host1x controller
+ *
+ * The host1x controller driver uses this to register a host1x controller with
+ * the infrastructure. Note that all Tegra SoC generations have only ever come
+ * with a single host1x instance, so this function is somewhat academic.
+ */
 int host1x_register(struct host1x *host1x)
 {
 	struct host1x_driver *driver;
@@ -464,6 +497,13 @@ int host1x_register(struct host1x *host1x)
 	return 0;
 }
 
+/**
+ * host1x_unregister() - unregister a host1x controller
+ * @host1x: host1x controller
+ *
+ * The host1x controller driver uses this to remove a host1x controller from
+ * the infrastructure.
+ */
 int host1x_unregister(struct host1x *host1x)
 {
 	struct host1x_driver *driver;
@@ -513,6 +553,16 @@ static void host1x_device_shutdown(struct device *dev)
 		driver->shutdown(device);
 }
 
+/**
+ * host1x_driver_register_full() - register a host1x driver
+ * @driver: host1x driver
+ * @owner: owner module
+ *
+ * Drivers for host1x logical devices call this function to register a driver
+ * with the infrastructure. Note that since these drive logical devices, the
+ * registration of the driver actually triggers tho logical device creation.
+ * A logical device will be created for each host1x instance.
+ */
 int host1x_driver_register_full(struct host1x_driver *driver,
 				struct module *owner)
 {
@@ -541,6 +591,13 @@ int host1x_driver_register_full(struct host1x_driver *driver,
 }
 EXPORT_SYMBOL(host1x_driver_register_full);
 
+/**
+ * host1x_driver_unregister() - unregister a host1x driver
+ * @driver: host1x driver
+ *
+ * Unbinds the driver from each of the host1x logical devices that it is
+ * bound to, effectively removing the subsystem devices that they represent.
+ */
 void host1x_driver_unregister(struct host1x_driver *driver)
 {
 	driver_unregister(&driver->driver);
@@ -551,6 +608,17 @@ void host1x_driver_unregister(struct host1x_driver *driver)
 }
 EXPORT_SYMBOL(host1x_driver_unregister);
 
+/**
+ * host1x_client_register() - register a host1x client
+ * @client: host1x client
+ *
+ * Registers a host1x client with each host1x controller instance. Note that
+ * each client will only match their parent host1x controller and will only be
+ * associated with that instance. Once all clients have been registered with
+ * their parent host1x controller, the infrastructure will set up the logical
+ * device and call host1x_device_init(), which will in turn call each client's
+ * &host1x_client_ops.init implementation.
+ */
 int host1x_client_register(struct host1x_client *client)
 {
 	struct host1x *host1x;
@@ -576,6 +644,13 @@ int host1x_client_register(struct host1x_client *client)
 }
 EXPORT_SYMBOL(host1x_client_register);
 
+/**
+ * host1x_client_unregister() - unregister a host1x client
+ * @client: host1x client
+ *
+ * Removes a host1x client from its host1x controller instance. If a logical
+ * device has already been initialized, it will be torn down.
+ */
 int host1x_client_unregister(struct host1x_client *client)
 {
 	struct host1x_client *c;
diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
index 0ac026cdc30c..048ac9e344ce 100644
--- a/drivers/gpu/host1x/syncpt.c
+++ b/drivers/gpu/host1x/syncpt.c
@@ -99,14 +99,24 @@ unlock:
 	return NULL;
 }
 
+/**
+ * host1x_syncpt_id() - retrieve syncpoint ID
+ * @sp: host1x syncpoint
+ *
+ * Given a pointer to a struct host1x_syncpt, retrieves its ID. This ID is
+ * often used as a value to program into registers that control how hardware
+ * blocks interact with syncpoints.
+ */
 u32 host1x_syncpt_id(struct host1x_syncpt *sp)
 {
 	return sp->id;
 }
 EXPORT_SYMBOL(host1x_syncpt_id);
 
-/*
- * Updates the value sent to hardware.
+/**
+ * host1x_syncpt_incr_max() - update the value sent to hardware
+ * @sp: host1x syncpoint
+ * @incrs: number of increments
  */
 u32 host1x_syncpt_incr_max(struct host1x_syncpt *sp, u32 incrs)
 {
@@ -175,8 +185,9 @@ u32 host1x_syncpt_load_wait_base(struct host1x_syncpt *sp)
 	return sp->base_val;
 }
 
-/*
- * Increment syncpoint value from cpu, updating cache
+/**
+ * host1x_syncpt_incr() - increment syncpoint value from CPU, updating cache
+ * @sp: host1x syncpoint
  */
 int host1x_syncpt_incr(struct host1x_syncpt *sp)
 {
@@ -195,8 +206,12 @@ static bool syncpt_load_min_is_expired(struct host1x_syncpt *sp, u32 thresh)
 	return host1x_syncpt_is_expired(sp, thresh);
 }
 
-/*
- * Main entrypoint for syncpoint value waits.
+/**
+ * host1x_syncpt_wait() - wait for a syncpoint to reach a given value
+ * @sp: host1x syncpoint
+ * @thresh: threshold
+ * @timeout: maximum time to wait for the syncpoint to reach the given value
+ * @value: return location for the syncpoint value
  */
 int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh, long timeout,
 		       u32 *value)
@@ -402,6 +417,16 @@ int host1x_syncpt_init(struct host1x *host)
 	return 0;
 }
 
+/**
+ * host1x_syncpt_request() - request a syncpoint
+ * @dev: device requesting the syncpoint
+ * @flags: flags
+ *
+ * host1x client drivers can use this function to allocate a syncpoint for
+ * subsequent use. A syncpoint returned by this function will be reserved for
+ * use by the client exclusively. When no longer using a syncpoint, a host1x
+ * client driver needs to release it using host1x_syncpt_free().
+ */
 struct host1x_syncpt *host1x_syncpt_request(struct device *dev,
 					    unsigned long flags)
 {
@@ -411,6 +436,16 @@ struct host1x_syncpt *host1x_syncpt_request(struct device *dev,
 }
 EXPORT_SYMBOL(host1x_syncpt_request);
 
+/**
+ * host1x_syncpt_free() - free a requested syncpoint
+ * @sp: host1x syncpoint
+ *
+ * Release a syncpoint previously allocated using host1x_syncpt_request(). A
+ * host1x client driver should call this when the syncpoint is no longer in
+ * use. Note that client drivers must ensure that the syncpoint doesn't remain
+ * under the control of hardware after calling this function, otherwise two
+ * clients may end up trying to access the same syncpoint concurrently.
+ */
 void host1x_syncpt_free(struct host1x_syncpt *sp)
 {
 	if (!sp)
@@ -438,9 +473,12 @@ void host1x_syncpt_deinit(struct host1x *host)
 		kfree(sp->name);
 }
 
-/*
- * Read max. It indicates how many operations there are in queue, either in
- * channel or in a software thread.
+/**
+ * host1x_syncpt_read_max() - read maximum syncpoint value
+ * @sp: host1x syncpoint
+ *
+ * The maximum syncpoint value indicates how many operations there are in
+ * queue, either in channel or in a software thread.
  */
 u32 host1x_syncpt_read_max(struct host1x_syncpt *sp)
 {
@@ -450,8 +488,12 @@ u32 host1x_syncpt_read_max(struct host1x_syncpt *sp)
 }
 EXPORT_SYMBOL(host1x_syncpt_read_max);
 
-/*
- * Read min, which is a shadow of the current sync point value in hardware.
+/**
+ * host1x_syncpt_read_min() - read minimum syncpoint value
+ * @sp: host1x syncpoint
+ *
+ * The minimum syncpoint value is a shadow of the current sync point value in
+ * hardware.
  */
 u32 host1x_syncpt_read_min(struct host1x_syncpt *sp)
 {
@@ -461,6 +503,10 @@ u32 host1x_syncpt_read_min(struct host1x_syncpt *sp)
 }
 EXPORT_SYMBOL(host1x_syncpt_read_min);
 
+/**
+ * host1x_syncpt_read() - read the current syncpoint value
+ * @sp: host1x syncpoint
+ */
 u32 host1x_syncpt_read(struct host1x_syncpt *sp)
 {
 	return host1x_syncpt_load(sp);
@@ -482,6 +528,11 @@ unsigned int host1x_syncpt_nb_mlocks(struct host1x *host)
 	return host->info->nb_mlocks;
 }
 
+/**
+ * host1x_syncpt_get() - obtain a syncpoint by ID
+ * @host: host1x controller
+ * @id: syncpoint ID
+ */
 struct host1x_syncpt *host1x_syncpt_get(struct host1x *host, unsigned int id)
 {
 	if (id >= host->info->nb_pts)
@@ -491,12 +542,20 @@ struct host1x_syncpt *host1x_syncpt_get(struct host1x *host, unsigned int id)
 }
 EXPORT_SYMBOL(host1x_syncpt_get);
 
+/**
+ * host1x_syncpt_get_base() - obtain the wait base associated with a syncpoint
+ * @sp: host1x syncpoint
+ */
 struct host1x_syncpt_base *host1x_syncpt_get_base(struct host1x_syncpt *sp)
 {
 	return sp ? sp->base : NULL;
 }
 EXPORT_SYMBOL(host1x_syncpt_get_base);
 
+/**
+ * host1x_syncpt_base_id() - retrieve the ID of a syncpoint wait base
+ * @base: host1x syncpoint wait base
+ */
 u32 host1x_syncpt_base_id(struct host1x_syncpt_base *base)
 {
 	return base->id;
diff --git a/include/linux/host1x.h b/include/linux/host1x.h
index 3d04aa1dc83e..840a8ad627b2 100644
--- a/include/linux/host1x.h
+++ b/include/linux/host1x.h
@@ -32,11 +32,27 @@ enum host1x_class {
 
 struct host1x_client;
 
+/**
+ * struct host1x_client_ops - host1x client operations
+ * @init: host1x client initialization code
+ * @exit: host1x client tear down code
+ */
 struct host1x_client_ops {
 	int (*init)(struct host1x_client *client);
 	int (*exit)(struct host1x_client *client);
 };
 
+/**
+ * struct host1x_client - host1x client structure
+ * @list: list node for the host1x client
+ * @parent: pointer to struct device representing the host1x controller
+ * @dev: pointer to struct device backing this host1x client
+ * @ops: host1x client operations
+ * @class: host1x class represented by this client
+ * @channel: host1x channel associated with this client
+ * @syncpts: array of syncpoints requested for this client
+ * @num_syncpts: number of syncpoints requested for this client
+ */
 struct host1x_client {
 	struct list_head list;
 	struct device *parent;
@@ -251,6 +267,15 @@ void host1x_job_unpin(struct host1x_job *job);
 
 struct host1x_device;
 
+/**
+ * struct host1x_driver - host1x logical device driver
+ * @driver: core driver
+ * @subdevs: table of OF device IDs matching subdevices for this driver
+ * @list: list node for the driver
+ * @probe: called when the host1x logical device is probed
+ * @remove: called when the host1x logical device is removed
+ * @shutdown: called when the host1x logical device is shut down
+ */
 struct host1x_driver {
 	struct device_driver driver;
 
-- 
cgit v1.2.3


From d0fbbdff2e19aabccc1107b7e12ab9f3cbf626ef Mon Sep 17 00:00:00 2001
From: Dmitry Osipenko <digetx@gmail.com>
Date: Thu, 15 Jun 2017 02:18:27 +0300
Subject: drm/tegra: Correct copying of waitchecks and disable them in the
 'submit' IOCTL

The waitchecks along with multiple syncpoints per submit are not ready
for use yet, let's forbid them for now.

Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
Reviewed-by: Mikko Perttunen <mperttunen@nvidia.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/drm.c | 60 ++++++++++++++++++++++++++++++++++++++++++---
 drivers/gpu/host1x/job.h    |  7 ------
 include/linux/host1x.h      |  7 ++++++
 3 files changed, 63 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index 51e20e015053..0928f2bb4203 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -349,6 +349,36 @@ static int host1x_reloc_copy_from_user(struct host1x_reloc *dest,
 	return 0;
 }
 
+static int host1x_waitchk_copy_from_user(struct host1x_waitchk *dest,
+					 struct drm_tegra_waitchk __user *src,
+					 struct drm_file *file)
+{
+	u32 cmdbuf;
+	int err;
+
+	err = get_user(cmdbuf, &src->handle);
+	if (err < 0)
+		return err;
+
+	err = get_user(dest->offset, &src->offset);
+	if (err < 0)
+		return err;
+
+	err = get_user(dest->syncpt_id, &src->syncpt);
+	if (err < 0)
+		return err;
+
+	err = get_user(dest->thresh, &src->thresh);
+	if (err < 0)
+		return err;
+
+	dest->bo = host1x_bo_lookup(file, cmdbuf);
+	if (!dest->bo)
+		return -ENOENT;
+
+	return 0;
+}
+
 int tegra_drm_submit(struct tegra_drm_context *context,
 		     struct drm_tegra_submit *args, struct drm_device *drm,
 		     struct drm_file *file)
@@ -370,6 +400,10 @@ int tegra_drm_submit(struct tegra_drm_context *context,
 	if (args->num_syncpts != 1)
 		return -EINVAL;
 
+	/* We don't yet support waitchks */
+	if (args->num_waitchks != 0)
+		return -EINVAL;
+
 	job = host1x_job_alloc(context->channel, args->num_cmdbufs,
 			       args->num_relocs, args->num_waitchks);
 	if (!job)
@@ -458,10 +492,28 @@ int tegra_drm_submit(struct tegra_drm_context *context,
 		}
 	}
 
-	if (copy_from_user(job->waitchk, waitchks,
-			   sizeof(*waitchks) * num_waitchks)) {
-		err = -EFAULT;
-		goto fail;
+	/* copy and resolve waitchks from submit */
+	while (num_waitchks--) {
+		struct host1x_waitchk *wait = &job->waitchk[num_waitchks];
+		struct tegra_bo *obj;
+
+		err = host1x_waitchk_copy_from_user(wait,
+						    &waitchks[num_waitchks],
+						    file);
+		if (err < 0)
+			goto fail;
+
+		obj = host1x_to_tegra_bo(wait->bo);
+
+		/*
+		 * The unaligned offset will cause an unaligned write during
+		 * of the waitchks patching, corrupting the commands stream.
+		 */
+		if (wait->offset & 3 ||
+		    wait->offset >= obj->gem.size) {
+			err = -EINVAL;
+			goto fail;
+		}
 	}
 
 	if (copy_from_user(&syncpt, (void __user *)(uintptr_t)args->syncpts,
diff --git a/drivers/gpu/host1x/job.h b/drivers/gpu/host1x/job.h
index 878239c476d2..0debd93a1849 100644
--- a/drivers/gpu/host1x/job.h
+++ b/drivers/gpu/host1x/job.h
@@ -34,13 +34,6 @@ struct host1x_cmdbuf {
 	u32 pad;
 };
 
-struct host1x_waitchk {
-	struct host1x_bo *bo;
-	u32 offset;
-	u32 syncpt_id;
-	u32 thresh;
-};
-
 struct host1x_job_unpin_data {
 	struct host1x_bo *bo;
 	struct sg_table *sgt;
diff --git a/include/linux/host1x.h b/include/linux/host1x.h
index 840a8ad627b2..ba0b245da732 100644
--- a/include/linux/host1x.h
+++ b/include/linux/host1x.h
@@ -193,6 +193,13 @@ struct host1x_reloc {
 	unsigned long shift;
 };
 
+struct host1x_waitchk {
+	struct host1x_bo *bo;
+	u32 offset;
+	u32 syncpt_id;
+	u32 thresh;
+};
+
 struct host1x_job {
 	/* When refcount goes to zero, job can be freed */
 	struct kref ref;
-- 
cgit v1.2.3


From 0f563a4bf66e5182f0882efee398f7e6bc0bb1be Mon Sep 17 00:00:00 2001
From: Dmitry Osipenko <digetx@gmail.com>
Date: Thu, 15 Jun 2017 02:18:37 +0300
Subject: gpu: host1x: Forbid unrelated SETCLASS opcode in the firewall

Several channels could be made to write the same unit concurrently via
the SETCLASS opcode, trusting userspace is a bad idea. It should be
possible to drop the per-client channel reservation and add a per-unit
locking by inserting MLOCK's to the command stream to re-allow the
SETCLASS opcode, but it will be much more work. Let's forbid the
unit-unrelated class changes for now.

Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
Reviewed-by: Erik Faye-Lund <kusmabite@gmail.com>
Reviewed-by: Mikko Perttunen <mperttunen@nvidia.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/drm.c  |  1 +
 drivers/gpu/drm/tegra/drm.h  |  1 +
 drivers/gpu/drm/tegra/gr2d.c |  7 +++++++
 drivers/gpu/host1x/job.c     | 24 ++++++++++++++++++++----
 include/linux/host1x.h       |  3 +++
 5 files changed, 32 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index b44f1eddb570..4a46ba846a0f 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -532,6 +532,7 @@ int tegra_drm_submit(struct tegra_drm_context *context,
 	}
 
 	job->is_addr_reg = context->client->ops->is_addr_reg;
+	job->is_valid_class = context->client->ops->is_valid_class;
 	job->syncpt_incrs = syncpt.incrs;
 	job->syncpt_id = syncpt.id;
 	job->timeout = 10000;
diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h
index 85aa2e3d9d4e..6d6da01282f3 100644
--- a/drivers/gpu/drm/tegra/drm.h
+++ b/drivers/gpu/drm/tegra/drm.h
@@ -83,6 +83,7 @@ struct tegra_drm_client_ops {
 			    struct tegra_drm_context *context);
 	void (*close_channel)(struct tegra_drm_context *context);
 	int (*is_addr_reg)(struct device *dev, u32 class, u32 offset);
+	int (*is_valid_class)(u32 class);
 	int (*submit)(struct tegra_drm_context *context,
 		      struct drm_tegra_submit *args, struct drm_device *drm,
 		      struct drm_file *file);
diff --git a/drivers/gpu/drm/tegra/gr2d.c b/drivers/gpu/drm/tegra/gr2d.c
index 02cd3e37a6ec..fbe0b8b25b42 100644
--- a/drivers/gpu/drm/tegra/gr2d.c
+++ b/drivers/gpu/drm/tegra/gr2d.c
@@ -109,10 +109,17 @@ static int gr2d_is_addr_reg(struct device *dev, u32 class, u32 offset)
 	return 0;
 }
 
+static int gr2d_is_valid_class(u32 class)
+{
+	return (class == HOST1X_CLASS_GR2D ||
+		class == HOST1X_CLASS_GR2D_SB);
+}
+
 static const struct tegra_drm_client_ops gr2d_ops = {
 	.open_channel = gr2d_open_channel,
 	.close_channel = gr2d_close_channel,
 	.is_addr_reg = gr2d_is_addr_reg,
+	.is_valid_class = gr2d_is_valid_class,
 	.submit = tegra_drm_submit,
 };
 
diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c
index 54230ec4f81e..ef746f7afb88 100644
--- a/drivers/gpu/host1x/job.c
+++ b/drivers/gpu/host1x/job.c
@@ -356,6 +356,9 @@ struct host1x_firewall {
 
 static int check_register(struct host1x_firewall *fw, unsigned long offset)
 {
+	if (!fw->job->is_addr_reg)
+		return 0;
+
 	if (fw->job->is_addr_reg(fw->dev, fw->class, offset)) {
 		if (!fw->num_relocs)
 			return -EINVAL;
@@ -370,6 +373,19 @@ static int check_register(struct host1x_firewall *fw, unsigned long offset)
 	return 0;
 }
 
+static int check_class(struct host1x_firewall *fw, u32 class)
+{
+	if (!fw->job->is_valid_class) {
+		if (fw->class != class)
+			return -EINVAL;
+	} else {
+		if (!fw->job->is_valid_class(fw->class))
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int check_mask(struct host1x_firewall *fw)
 {
 	u32 mask = fw->mask;
@@ -443,11 +459,9 @@ static int validate(struct host1x_firewall *fw, struct host1x_job_gather *g)
 {
 	u32 *cmdbuf_base = (u32 *)fw->job->gather_copy_mapped +
 		(g->offset / sizeof(u32));
+	u32 job_class = fw->class;
 	int err = 0;
 
-	if (!fw->job->is_addr_reg)
-		return 0;
-
 	fw->words = g->words;
 	fw->cmdbuf = g->bo;
 	fw->offset = 0;
@@ -467,7 +481,9 @@ static int validate(struct host1x_firewall *fw, struct host1x_job_gather *g)
 			fw->class = word >> 6 & 0x3ff;
 			fw->mask = word & 0x3f;
 			fw->reg = word >> 16 & 0xfff;
-			err = check_mask(fw);
+			err = check_class(fw, job_class);
+			if (!err)
+				err = check_mask(fw);
 			if (err)
 				goto out;
 			break;
diff --git a/include/linux/host1x.h b/include/linux/host1x.h
index ba0b245da732..b5358f855d9e 100644
--- a/include/linux/host1x.h
+++ b/include/linux/host1x.h
@@ -251,6 +251,9 @@ struct host1x_job {
 	/* Check if register is marked as an address reg */
 	int (*is_addr_reg)(struct device *dev, u32 reg, u32 class);
 
+	/* Check if class belongs to the unit */
+	int (*is_valid_class)(u32 class);
+
 	/* Request a SETCLASS to this class */
 	u32 class;
 
-- 
cgit v1.2.3


From a2b78b0d53f0808ebc2a0368b589a5cb6b672294 Mon Sep 17 00:00:00 2001
From: Dmitry Osipenko <digetx@gmail.com>
Date: Thu, 15 Jun 2017 02:18:38 +0300
Subject: gpu: host1x: Correct swapped arguments in the is_addr_reg()
 definition

Arguments of the .is_addr_reg() are swapped in the definition of the
function, that is quite confusing.

Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
Reviewed-by: Erik Faye-Lund <kusmabite@gmail.com>
Reviewed-by: Mikko Perttunen <mperttunen@nvidia.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 include/linux/host1x.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/host1x.h b/include/linux/host1x.h
index b5358f855d9e..476da0e06bb2 100644
--- a/include/linux/host1x.h
+++ b/include/linux/host1x.h
@@ -249,7 +249,7 @@ struct host1x_job {
 	u8 *gather_copy_mapped;
 
 	/* Check if register is marked as an address reg */
-	int (*is_addr_reg)(struct device *dev, u32 reg, u32 class);
+	int (*is_addr_reg)(struct device *dev, u32 class, u32 reg);
 
 	/* Check if class belongs to the unit */
 	int (*is_valid_class)(u32 class);
-- 
cgit v1.2.3


From 8474b02531c4881a762c52ef869c52429e38633f Mon Sep 17 00:00:00 2001
From: Mikko Perttunen <mperttunen@nvidia.com>
Date: Thu, 15 Jun 2017 02:18:42 +0300
Subject: gpu: host1x: Refactor channel allocation code

This is largely a rewrite of the Host1x channel allocation code, bringing
several changes:

- The previous code could deadlock due to an interaction
  between the 'reflock' mutex and CDMA timeout handling.
  This gets rid of the mutex.
- Support for more than 32 channels, required for Tegra186
- General refactoring, including better encapsulation
  of channel ownership handling into channel.c

Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
Reviewed-by: Dmitry Osipenko <digetx@gmail.com>
Tested-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/gr2d.c       |   4 +-
 drivers/gpu/drm/tegra/gr3d.c       |   4 +-
 drivers/gpu/drm/tegra/vic.c        |   4 +-
 drivers/gpu/host1x/channel.c       | 147 +++++++++++++++++++++++--------------
 drivers/gpu/host1x/channel.h       |  21 ++++--
 drivers/gpu/host1x/debug.c         |  47 +++++-------
 drivers/gpu/host1x/dev.c           |   7 +-
 drivers/gpu/host1x/dev.h           |   6 +-
 drivers/gpu/host1x/hw/channel_hw.c |   4 -
 include/linux/host1x.h             |   1 -
 10 files changed, 135 insertions(+), 110 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/tegra/gr2d.c b/drivers/gpu/drm/tegra/gr2d.c
index fbe0b8b25b42..6ea070da7718 100644
--- a/drivers/gpu/drm/tegra/gr2d.c
+++ b/drivers/gpu/drm/tegra/gr2d.c
@@ -38,7 +38,7 @@ static int gr2d_init(struct host1x_client *client)
 
 	client->syncpts[0] = host1x_syncpt_request(client->dev, flags);
 	if (!client->syncpts[0]) {
-		host1x_channel_free(gr2d->channel);
+		host1x_channel_put(gr2d->channel);
 		return -ENOMEM;
 	}
 
@@ -57,7 +57,7 @@ static int gr2d_exit(struct host1x_client *client)
 		return err;
 
 	host1x_syncpt_free(client->syncpts[0]);
-	host1x_channel_free(gr2d->channel);
+	host1x_channel_put(gr2d->channel);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/tegra/gr3d.c b/drivers/gpu/drm/tegra/gr3d.c
index 13f0d1b7cd98..cee2ab645cde 100644
--- a/drivers/gpu/drm/tegra/gr3d.c
+++ b/drivers/gpu/drm/tegra/gr3d.c
@@ -48,7 +48,7 @@ static int gr3d_init(struct host1x_client *client)
 
 	client->syncpts[0] = host1x_syncpt_request(client->dev, flags);
 	if (!client->syncpts[0]) {
-		host1x_channel_free(gr3d->channel);
+		host1x_channel_put(gr3d->channel);
 		return -ENOMEM;
 	}
 
@@ -67,7 +67,7 @@ static int gr3d_exit(struct host1x_client *client)
 		return err;
 
 	host1x_syncpt_free(client->syncpts[0]);
-	host1x_channel_free(gr3d->channel);
+	host1x_channel_put(gr3d->channel);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c
index cd804e404a11..47cb1aaa58b1 100644
--- a/drivers/gpu/drm/tegra/vic.c
+++ b/drivers/gpu/drm/tegra/vic.c
@@ -182,7 +182,7 @@ static int vic_init(struct host1x_client *client)
 free_syncpt:
 	host1x_syncpt_free(client->syncpts[0]);
 free_channel:
-	host1x_channel_free(vic->channel);
+	host1x_channel_put(vic->channel);
 detach_device:
 	if (tegra->domain)
 		iommu_detach_device(tegra->domain, vic->dev);
@@ -203,7 +203,7 @@ static int vic_exit(struct host1x_client *client)
 		return err;
 
 	host1x_syncpt_free(client->syncpts[0]);
-	host1x_channel_free(vic->channel);
+	host1x_channel_put(vic->channel);
 
 	if (vic->domain) {
 		iommu_detach_device(vic->domain, vic->dev);
diff --git a/drivers/gpu/host1x/channel.c b/drivers/gpu/host1x/channel.c
index 8f437d924c10..db9b91d1384c 100644
--- a/drivers/gpu/host1x/channel.c
+++ b/drivers/gpu/host1x/channel.c
@@ -24,19 +24,33 @@
 #include "job.h"
 
 /* Constructor for the host1x device list */
-int host1x_channel_list_init(struct host1x *host)
+int host1x_channel_list_init(struct host1x_channel_list *chlist,
+			     unsigned int num_channels)
 {
-	INIT_LIST_HEAD(&host->chlist.list);
-	mutex_init(&host->chlist_mutex);
-
-	if (host->info->nb_channels > BITS_PER_LONG) {
-		WARN(1, "host1x hardware has more channels than supported by the driver\n");
-		return -ENOSYS;
+	chlist->channels = kcalloc(num_channels, sizeof(struct host1x_channel),
+				   GFP_KERNEL);
+	if (!chlist->channels)
+		return -ENOMEM;
+
+	chlist->allocated_channels =
+		kcalloc(BITS_TO_LONGS(num_channels), sizeof(unsigned long),
+			GFP_KERNEL);
+	if (!chlist->allocated_channels) {
+		kfree(chlist->channels);
+		return -ENOMEM;
 	}
 
+	bitmap_zero(chlist->allocated_channels, num_channels);
+
 	return 0;
 }
 
+void host1x_channel_list_free(struct host1x_channel_list *chlist)
+{
+	kfree(chlist->allocated_channels);
+	kfree(chlist->channels);
+}
+
 int host1x_job_submit(struct host1x_job *job)
 {
 	struct host1x *host = dev_get_drvdata(job->channel->dev->parent);
@@ -47,86 +61,107 @@ EXPORT_SYMBOL(host1x_job_submit);
 
 struct host1x_channel *host1x_channel_get(struct host1x_channel *channel)
 {
-	int err = 0;
+	kref_get(&channel->refcount);
 
-	mutex_lock(&channel->reflock);
+	return channel;
+}
+EXPORT_SYMBOL(host1x_channel_get);
 
-	if (channel->refcount == 0)
-		err = host1x_cdma_init(&channel->cdma);
+/**
+ * host1x_channel_get_index() - Attempt to get channel reference by index
+ * @host: Host1x device object
+ * @index: Index of channel
+ *
+ * If channel number @index is currently allocated, increase its refcount
+ * and return a pointer to it. Otherwise, return NULL.
+ */
+struct host1x_channel *host1x_channel_get_index(struct host1x *host,
+						unsigned int index)
+{
+	struct host1x_channel *ch = &host->channel_list.channels[index];
 
-	if (!err)
-		channel->refcount++;
+	if (!kref_get_unless_zero(&ch->refcount))
+		return NULL;
 
-	mutex_unlock(&channel->reflock);
+	return ch;
+}
+
+static void release_channel(struct kref *kref)
+{
+	struct host1x_channel *channel =
+		container_of(kref, struct host1x_channel, refcount);
+	struct host1x *host = dev_get_drvdata(channel->dev->parent);
+	struct host1x_channel_list *chlist = &host->channel_list;
+
+	host1x_hw_cdma_stop(host, &channel->cdma);
+	host1x_cdma_deinit(&channel->cdma);
 
-	return err ? NULL : channel;
+	clear_bit(channel->id, chlist->allocated_channels);
 }
-EXPORT_SYMBOL(host1x_channel_get);
 
 void host1x_channel_put(struct host1x_channel *channel)
 {
-	mutex_lock(&channel->reflock);
+	kref_put(&channel->refcount, release_channel);
+}
+EXPORT_SYMBOL(host1x_channel_put);
 
-	if (channel->refcount == 1) {
-		struct host1x *host = dev_get_drvdata(channel->dev->parent);
+static struct host1x_channel *acquire_unused_channel(struct host1x *host)
+{
+	struct host1x_channel_list *chlist = &host->channel_list;
+	unsigned int max_channels = host->info->nb_channels;
+	unsigned int index;
 
-		host1x_hw_cdma_stop(host, &channel->cdma);
-		host1x_cdma_deinit(&channel->cdma);
+	index = find_first_zero_bit(chlist->allocated_channels, max_channels);
+	if (index >= max_channels) {
+		dev_err(host->dev, "failed to find free channel\n");
+		return NULL;
 	}
 
-	channel->refcount--;
+	chlist->channels[index].id = index;
 
-	mutex_unlock(&channel->reflock);
+	set_bit(index, chlist->allocated_channels);
+
+	return &chlist->channels[index];
 }
-EXPORT_SYMBOL(host1x_channel_put);
 
+/**
+ * host1x_channel_request() - Allocate a channel
+ * @device: Host1x unit this channel will be used to send commands to
+ *
+ * Allocates a new host1x channel for @device. If there are no free channels,
+ * this will sleep until one becomes available. May return NULL if CDMA
+ * initialization fails.
+ */
 struct host1x_channel *host1x_channel_request(struct device *dev)
 {
 	struct host1x *host = dev_get_drvdata(dev->parent);
-	unsigned int max_channels = host->info->nb_channels;
-	struct host1x_channel *channel = NULL;
-	unsigned long index;
+	struct host1x_channel_list *chlist = &host->channel_list;
+	struct host1x_channel *channel;
 	int err;
 
-	mutex_lock(&host->chlist_mutex);
+	channel = acquire_unused_channel(host);
+	if (!channel)
+		return NULL;
 
-	index = find_first_zero_bit(&host->allocated_channels, max_channels);
-	if (index >= max_channels)
-		goto fail;
+	kref_init(&channel->refcount);
+	mutex_init(&channel->submitlock);
+	channel->dev = dev;
 
-	channel = kzalloc(sizeof(*channel), GFP_KERNEL);
-	if (!channel)
+	err = host1x_hw_channel_init(host, channel, channel->id);
+	if (err < 0)
 		goto fail;
 
-	err = host1x_hw_channel_init(host, channel, index);
+	err = host1x_cdma_init(&channel->cdma);
 	if (err < 0)
 		goto fail;
 
-	/* Link device to host1x_channel */
-	channel->dev = dev;
-
-	/* Add to channel list */
-	list_add_tail(&channel->list, &host->chlist.list);
-
-	host->allocated_channels |= BIT(index);
-
-	mutex_unlock(&host->chlist_mutex);
 	return channel;
 
 fail:
-	dev_err(dev, "failed to init channel\n");
-	kfree(channel);
-	mutex_unlock(&host->chlist_mutex);
-	return NULL;
-}
-EXPORT_SYMBOL(host1x_channel_request);
+	clear_bit(channel->id, chlist->allocated_channels);
 
-void host1x_channel_free(struct host1x_channel *channel)
-{
-	struct host1x *host = dev_get_drvdata(channel->dev->parent);
+	dev_err(dev, "failed to initialize channel\n");
 
-	host->allocated_channels &= ~BIT(channel->id);
-	list_del(&channel->list);
-	kfree(channel);
+	return NULL;
 }
-EXPORT_SYMBOL(host1x_channel_free);
+EXPORT_SYMBOL(host1x_channel_request);
diff --git a/drivers/gpu/host1x/channel.h b/drivers/gpu/host1x/channel.h
index df767cf90d51..7068e42d42df 100644
--- a/drivers/gpu/host1x/channel.h
+++ b/drivers/gpu/host1x/channel.h
@@ -20,17 +20,21 @@
 #define __HOST1X_CHANNEL_H
 
 #include <linux/io.h>
+#include <linux/kref.h>
 
 #include "cdma.h"
 
 struct host1x;
+struct host1x_channel;
 
-struct host1x_channel {
-	struct list_head list;
+struct host1x_channel_list {
+	struct host1x_channel *channels;
+	unsigned long *allocated_channels;
+};
 
-	unsigned int refcount;
+struct host1x_channel {
+	struct kref refcount;
 	unsigned int id;
-	struct mutex reflock;
 	struct mutex submitlock;
 	void __iomem *regs;
 	struct device *dev;
@@ -38,9 +42,10 @@ struct host1x_channel {
 };
 
 /* channel list operations */
-int host1x_channel_list_init(struct host1x *host);
-
-#define host1x_for_each_channel(host, channel)				\
-	list_for_each_entry(channel, &host->chlist.list, list)
+int host1x_channel_list_init(struct host1x_channel_list *chlist,
+			     unsigned int num_channels);
+void host1x_channel_list_free(struct host1x_channel_list *chlist);
+struct host1x_channel *host1x_channel_get_index(struct host1x *host,
+						unsigned int index);
 
 #endif
diff --git a/drivers/gpu/host1x/debug.c b/drivers/gpu/host1x/debug.c
index d9330fcc62ad..2aae0e63214c 100644
--- a/drivers/gpu/host1x/debug.c
+++ b/drivers/gpu/host1x/debug.c
@@ -43,24 +43,19 @@ void host1x_debug_output(struct output *o, const char *fmt, ...)
 	o->fn(o->ctx, o->buf, len);
 }
 
-static int show_channels(struct host1x_channel *ch, void *data, bool show_fifo)
+static int show_channel(struct host1x_channel *ch, void *data, bool show_fifo)
 {
 	struct host1x *m = dev_get_drvdata(ch->dev->parent);
 	struct output *o = data;
 
-	mutex_lock(&ch->reflock);
+	mutex_lock(&ch->cdma.lock);
 
-	if (ch->refcount) {
-		mutex_lock(&ch->cdma.lock);
+	if (show_fifo)
+		host1x_hw_show_channel_fifo(m, ch, o);
 
-		if (show_fifo)
-			host1x_hw_show_channel_fifo(m, ch, o);
+	host1x_hw_show_channel_cdma(m, ch, o);
 
-		host1x_hw_show_channel_cdma(m, ch, o);
-		mutex_unlock(&ch->cdma.lock);
-	}
-
-	mutex_unlock(&ch->reflock);
+	mutex_unlock(&ch->cdma.lock);
 
 	return 0;
 }
@@ -94,28 +89,22 @@ static void show_syncpts(struct host1x *m, struct output *o)
 	host1x_debug_output(o, "\n");
 }
 
-static void show_all(struct host1x *m, struct output *o)
+static void show_all(struct host1x *m, struct output *o, bool show_fifo)
 {
-	struct host1x_channel *ch;
+	int i;
 
 	host1x_hw_show_mlocks(m, o);
 	show_syncpts(m, o);
 	host1x_debug_output(o, "---- channels ----\n");
 
-	host1x_for_each_channel(m, ch)
-		show_channels(ch, o, true);
-}
-
-static void show_all_no_fifo(struct host1x *host1x, struct output *o)
-{
-	struct host1x_channel *ch;
-
-	host1x_hw_show_mlocks(host1x, o);
-	show_syncpts(host1x, o);
-	host1x_debug_output(o, "---- channels ----\n");
+	for (i = 0; i < m->info->nb_channels; ++i) {
+		struct host1x_channel *ch = host1x_channel_get_index(m, i);
 
-	host1x_for_each_channel(host1x, ch)
-		show_channels(ch, o, false);
+		if (ch) {
+			show_channel(ch, o, show_fifo);
+			host1x_channel_put(ch);
+		}
+	}
 }
 
 static int host1x_debug_show_all(struct seq_file *s, void *unused)
@@ -125,7 +114,7 @@ static int host1x_debug_show_all(struct seq_file *s, void *unused)
 		.ctx = s
 	};
 
-	show_all(s->private, &o);
+	show_all(s->private, &o, true);
 
 	return 0;
 }
@@ -137,7 +126,7 @@ static int host1x_debug_show(struct seq_file *s, void *unused)
 		.ctx = s
 	};
 
-	show_all_no_fifo(s->private, &o);
+	show_all(s->private, &o, false);
 
 	return 0;
 }
@@ -216,7 +205,7 @@ void host1x_debug_dump(struct host1x *host1x)
 		.fn = write_to_printk
 	};
 
-	show_all(host1x, &o);
+	show_all(host1x, &o, true);
 }
 
 void host1x_debug_dump_syncpts(struct host1x *host1x)
diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index f05ebb14fa63..5c1c711a21af 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -198,7 +198,8 @@ static int host1x_probe(struct platform_device *pdev)
 		host->iova_end = geometry->aperture_end;
 	}
 
-	err = host1x_channel_list_init(host);
+	err = host1x_channel_list_init(&host->channel_list,
+				       host->info->nb_channels);
 	if (err) {
 		dev_err(&pdev->dev, "failed to initialize channel list\n");
 		goto fail_detach_device;
@@ -207,7 +208,7 @@ static int host1x_probe(struct platform_device *pdev)
 	err = clk_prepare_enable(host->clk);
 	if (err < 0) {
 		dev_err(&pdev->dev, "failed to enable clock\n");
-		goto fail_detach_device;
+		goto fail_free_channels;
 	}
 
 	err = reset_control_deassert(host->rst);
@@ -244,6 +245,8 @@ fail_reset_assert:
 	reset_control_assert(host->rst);
 fail_unprepare_disable:
 	clk_disable_unprepare(host->clk);
+fail_free_channels:
+	host1x_channel_list_free(&host->channel_list);
 fail_detach_device:
 	if (host->domain) {
 		put_iova_domain(&host->iova);
diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
index 229d08b6a45e..ffdbc15b749b 100644
--- a/drivers/gpu/host1x/dev.h
+++ b/drivers/gpu/host1x/dev.h
@@ -129,10 +129,8 @@ struct host1x {
 	struct host1x_syncpt *nop_sp;
 
 	struct mutex syncpt_mutex;
-	struct mutex chlist_mutex;
-	struct host1x_channel chlist;
-	unsigned long allocated_channels;
-	unsigned int num_allocated_channels;
+
+	struct host1x_channel_list channel_list;
 
 	struct dentry *debugfs;
 
diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c
index 5e8df78b7acd..8447a56c41ca 100644
--- a/drivers/gpu/host1x/hw/channel_hw.c
+++ b/drivers/gpu/host1x/hw/channel_hw.c
@@ -181,10 +181,6 @@ error:
 static int host1x_channel_init(struct host1x_channel *ch, struct host1x *dev,
 			       unsigned int index)
 {
-	ch->id = index;
-	mutex_init(&ch->reflock);
-	mutex_init(&ch->submitlock);
-
 	ch->regs = dev->regs + index * HOST1X_CHANNEL_SIZE;
 	return 0;
 }
diff --git a/include/linux/host1x.h b/include/linux/host1x.h
index 476da0e06bb2..630b1a98ab58 100644
--- a/include/linux/host1x.h
+++ b/include/linux/host1x.h
@@ -172,7 +172,6 @@ struct host1x_channel;
 struct host1x_job;
 
 struct host1x_channel *host1x_channel_request(struct device *dev);
-void host1x_channel_free(struct host1x_channel *channel);
 struct host1x_channel *host1x_channel_get(struct host1x_channel *channel);
 void host1x_channel_put(struct host1x_channel *channel);
 int host1x_job_submit(struct host1x_job *job);
-- 
cgit v1.2.3


From 97f6ef6464dbd235a4d9bdfc05d949aab24fc927 Mon Sep 17 00:00:00 2001
From: Xu Yu <yu.a.xu@intel.com>
Date: Wed, 24 May 2017 16:39:55 +0800
Subject: nvme-pci: remap BAR0 to cover admin CQ doorbell for large stride

The existing driver initially maps 8192 bytes of BAR0 which is
intended to cover doorbells of admin SQ and CQ. However, if a
large stride, e.g. 10, is used, the doorbell of admin CQ will
be out of 8192 bytes. Consequently, a page fault will be raised
when the admin CQ doorbell is accessed in nvme_configure_admin_queue().

This patch fixes this issue by remapping BAR0 before accessing
admin CQ doorbell if the initial mapping is not enough.

Signed-off-by: Xu Yu <yu.a.xu@intel.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/pci.c | 65 ++++++++++++++++++++++++++++++++-----------------
 include/linux/nvme.h    |  1 +
 2 files changed, 44 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index ebd5cdfc0174..5278ed9811a6 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -95,6 +95,7 @@ struct nvme_dev {
 	int q_depth;
 	u32 db_stride;
 	void __iomem *bar;
+	unsigned long bar_mapped_size;
 	struct work_struct reset_work;
 	struct work_struct remove_work;
 	struct timer_list watchdog_timer;
@@ -1320,6 +1321,32 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
 	return 0;
 }
 
+static unsigned long db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues)
+{
+	return NVME_REG_DBS + ((nr_io_queues + 1) * 8 * dev->db_stride);
+}
+
+static int nvme_remap_bar(struct nvme_dev *dev, unsigned long size)
+{
+	struct pci_dev *pdev = to_pci_dev(dev->dev);
+
+	if (size <= dev->bar_mapped_size)
+		return 0;
+	if (size > pci_resource_len(pdev, 0))
+		return -ENOMEM;
+	if (dev->bar)
+		iounmap(dev->bar);
+	dev->bar = ioremap(pci_resource_start(pdev, 0), size);
+	if (!dev->bar) {
+		dev->bar_mapped_size = 0;
+		return -ENOMEM;
+	}
+	dev->bar_mapped_size = size;
+	dev->dbs = dev->bar + NVME_REG_DBS;
+
+	return 0;
+}
+
 static int nvme_configure_admin_queue(struct nvme_dev *dev)
 {
 	int result;
@@ -1327,6 +1354,10 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
 	u64 cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
 	struct nvme_queue *nvmeq;
 
+	result = nvme_remap_bar(dev, db_bar_size(dev, 0));
+	if (result < 0)
+		return result;
+
 	dev->subsystem = readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 1, 0) ?
 						NVME_CAP_NSSRC(cap) : 0;
 
@@ -1679,16 +1710,12 @@ static void nvme_setup_host_mem(struct nvme_dev *dev)
 		nvme_free_host_mem(dev);
 }
 
-static size_t db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues)
-{
-	return 4096 + ((nr_io_queues + 1) * 8 * dev->db_stride);
-}
-
 static int nvme_setup_io_queues(struct nvme_dev *dev)
 {
 	struct nvme_queue *adminq = dev->queues[0];
 	struct pci_dev *pdev = to_pci_dev(dev->dev);
-	int result, nr_io_queues, size;
+	int result, nr_io_queues;
+	unsigned long size;
 
 	nr_io_queues = num_online_cpus();
 	result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues);
@@ -1707,20 +1734,15 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
 			nvme_release_cmb(dev);
 	}
 
-	size = db_bar_size(dev, nr_io_queues);
-	if (size > 8192) {
-		iounmap(dev->bar);
-		do {
-			dev->bar = ioremap(pci_resource_start(pdev, 0), size);
-			if (dev->bar)
-				break;
-			if (!--nr_io_queues)
-				return -ENOMEM;
-			size = db_bar_size(dev, nr_io_queues);
-		} while (1);
-		dev->dbs = dev->bar + 4096;
-		adminq->q_db = dev->dbs;
-	}
+	do {
+		size = db_bar_size(dev, nr_io_queues);
+		result = nvme_remap_bar(dev, size);
+		if (!result)
+			break;
+		if (!--nr_io_queues)
+			return -ENOMEM;
+	} while (1);
+	adminq->q_db = dev->dbs;
 
 	/* Deregister the admin queue's interrupt */
 	pci_free_irq(pdev, 0, adminq);
@@ -2240,8 +2262,7 @@ static int nvme_dev_map(struct nvme_dev *dev)
 	if (pci_request_mem_regions(pdev, "nvme"))
 		return -ENODEV;
 
-	dev->bar = ioremap(pci_resource_start(pdev, 0), 8192);
-	if (!dev->bar)
+	if (nvme_remap_bar(dev, NVME_REG_DBS + 4096))
 		goto release;
 
 	return 0;
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 51ca4771be2c..706a0fbfe28e 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -102,6 +102,7 @@ enum {
 	NVME_REG_ACQ	= 0x0030,	/* Admin CQ Base Address */
 	NVME_REG_CMBLOC = 0x0038,	/* Controller Memory Buffer Location */
 	NVME_REG_CMBSZ	= 0x003c,	/* Controller Memory Buffer Size */
+	NVME_REG_DBS	= 0x1000,	/* SQ 0 Tail Doorbell */
 };
 
 #define NVME_CAP_MQES(cap)	((cap) & 0xffff)
-- 
cgit v1.2.3


From 0945e56994ac855d01c4aecf69bded65c751b894 Mon Sep 17 00:00:00 2001
From: Johannes Thumshirn <jthumshirn@suse.de>
Date: Wed, 7 Jun 2017 11:45:28 +0200
Subject: scatterlist: add sg_zero_buffer() helper

The sg_zero_buffer() helper is used to zero fill an area in a SG
list.

Signed-off-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
[hch: renamed to sg_zero_buffer]
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/scatterlist.h |  2 ++
 lib/scatterlist.c           | 35 +++++++++++++++++++++++++++++++++++
 2 files changed, 37 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index cb3c8fe6acd7..4b3286ac60c8 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -278,6 +278,8 @@ size_t sg_pcopy_from_buffer(struct scatterlist *sgl, unsigned int nents,
 			    const void *buf, size_t buflen, off_t skip);
 size_t sg_pcopy_to_buffer(struct scatterlist *sgl, unsigned int nents,
 			  void *buf, size_t buflen, off_t skip);
+size_t sg_zero_buffer(struct scatterlist *sgl, unsigned int nents,
+		       size_t buflen, off_t skip);
 
 /*
  * Maximum number of entries that will be allocated in one piece, if
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index c6cf82242d65..be7b4dd6b68d 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -751,3 +751,38 @@ size_t sg_pcopy_to_buffer(struct scatterlist *sgl, unsigned int nents,
 	return sg_copy_buffer(sgl, nents, buf, buflen, skip, true);
 }
 EXPORT_SYMBOL(sg_pcopy_to_buffer);
+
+/**
+ * sg_zero_buffer - Zero-out a part of a SG list
+ * @sgl:		 The SG list
+ * @nents:		 Number of SG entries
+ * @buflen:		 The number of bytes to zero out
+ * @skip:		 Number of bytes to skip before zeroing
+ *
+ * Returns the number of bytes zeroed.
+ **/
+size_t sg_zero_buffer(struct scatterlist *sgl, unsigned int nents,
+		       size_t buflen, off_t skip)
+{
+	unsigned int offset = 0;
+	struct sg_mapping_iter miter;
+	unsigned int sg_flags = SG_MITER_ATOMIC | SG_MITER_TO_SG;
+
+	sg_miter_start(&miter, sgl, nents, sg_flags);
+
+	if (!sg_miter_skip(&miter, skip))
+		return false;
+
+	while (offset < buflen && sg_miter_next(&miter)) {
+		unsigned int len;
+
+		len = min(miter.length, buflen - offset);
+		memset(miter.addr, 0, len);
+
+		offset += len;
+	}
+
+	sg_miter_stop(&miter);
+	return offset;
+}
+EXPORT_SYMBOL(sg_zero_buffer);
-- 
cgit v1.2.3


From 0add5e8e588c65c5ac6a3255f624260bf889128d Mon Sep 17 00:00:00 2001
From: Johannes Thumshirn <jthumshirn@suse.de>
Date: Wed, 7 Jun 2017 11:45:29 +0200
Subject: nvmet: use NVME_IDENTIFY_DATA_SIZE

Use NVME_IDENTIFY_DATA_SIZE define instead of hard coding the magic
4096 value.

Signed-off-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Max Gurtovoy <maxg@mellanox.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Hannes Reinecke <hare@suse.com>
[hch: converted three more users]
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/lightnvm.c    | 2 +-
 drivers/nvme/host/pci.c         | 4 ++--
 drivers/nvme/target/admin-cmd.c | 4 ++--
 drivers/nvme/target/discovery.c | 2 +-
 include/linux/nvme.h            | 2 ++
 5 files changed, 8 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index 2d7a2889866f..e1ef8e9b41cb 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -242,7 +242,7 @@ static inline void _nvme_nvm_check_size(void)
 	BUILD_BUG_ON(sizeof(struct nvme_nvm_erase_blk) != 64);
 	BUILD_BUG_ON(sizeof(struct nvme_nvm_id_group) != 960);
 	BUILD_BUG_ON(sizeof(struct nvme_nvm_addr_format) != 16);
-	BUILD_BUG_ON(sizeof(struct nvme_nvm_id) != 4096);
+	BUILD_BUG_ON(sizeof(struct nvme_nvm_id) != NVME_IDENTIFY_DATA_SIZE);
 	BUILD_BUG_ON(sizeof(struct nvme_nvm_bb_tbl) != 64);
 }
 
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index cd1725095531..63e5a3d3f0dc 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -183,8 +183,8 @@ static inline void _nvme_check_size(void)
 	BUILD_BUG_ON(sizeof(struct nvme_format_cmd) != 64);
 	BUILD_BUG_ON(sizeof(struct nvme_abort_cmd) != 64);
 	BUILD_BUG_ON(sizeof(struct nvme_command) != 64);
-	BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != 4096);
-	BUILD_BUG_ON(sizeof(struct nvme_id_ns) != 4096);
+	BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != NVME_IDENTIFY_DATA_SIZE);
+	BUILD_BUG_ON(sizeof(struct nvme_id_ns) != NVME_IDENTIFY_DATA_SIZE);
 	BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64);
 	BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512);
 	BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64);
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index ff1f97006322..96c144325443 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -336,7 +336,7 @@ out:
 
 static void nvmet_execute_identify_nslist(struct nvmet_req *req)
 {
-	static const int buf_size = 4096;
+	static const int buf_size = NVME_IDENTIFY_DATA_SIZE;
 	struct nvmet_ctrl *ctrl = req->sq->ctrl;
 	struct nvmet_ns *ns;
 	u32 min_nsid = le32_to_cpu(req->cmd->identify.nsid);
@@ -504,7 +504,7 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req)
 		}
 		break;
 	case nvme_admin_identify:
-		req->data_len = 4096;
+		req->data_len = NVME_IDENTIFY_DATA_SIZE;
 		switch (cmd->identify.cns) {
 		case NVME_ID_CNS_NS:
 			req->execute = nvmet_execute_identify_ns;
diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c
index 1aaf597e81fc..c7a90384dd75 100644
--- a/drivers/nvme/target/discovery.c
+++ b/drivers/nvme/target/discovery.c
@@ -185,7 +185,7 @@ u16 nvmet_parse_discovery_cmd(struct nvmet_req *req)
 		return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
 		}
 	case nvme_admin_identify:
-		req->data_len = 4096;
+		req->data_len = NVME_IDENTIFY_DATA_SIZE;
 		switch (cmd->identify.cns) {
 		case NVME_ID_CNS_CTRL:
 			req->execute =
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 706a0fbfe28e..782d557c5535 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -665,6 +665,8 @@ struct nvme_identify {
 	__u32			rsvd11[5];
 };
 
+#define NVME_IDENTIFY_DATA_SIZE 4096
+
 struct nvme_features {
 	__u8			opcode;
 	__u8			flags;
-- 
cgit v1.2.3


From af8b86e9a7ffb9528e745b7ea25b18545699482c Mon Sep 17 00:00:00 2001
From: Johannes Thumshirn <jthumshirn@suse.de>
Date: Wed, 7 Jun 2017 11:45:30 +0200
Subject: nvme: introduce NVMe Namespace Identification Descriptor structures

Signed-off-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Max Gurtovoy <maxg@mellanox.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/nvme.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 782d557c5535..f2344aa923e8 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -290,6 +290,7 @@ enum {
 	NVME_ID_CNS_NS			= 0x00,
 	NVME_ID_CNS_CTRL		= 0x01,
 	NVME_ID_CNS_NS_ACTIVE_LIST	= 0x02,
+	NVME_ID_CNS_NS_DESC_LIST	= 0x03,
 	NVME_ID_CNS_NS_PRESENT_LIST	= 0x10,
 	NVME_ID_CNS_NS_PRESENT		= 0x11,
 	NVME_ID_CNS_CTRL_NS_LIST	= 0x12,
@@ -316,6 +317,22 @@ enum {
 	NVME_NS_DPS_PI_TYPE3	= 3,
 };
 
+struct nvme_ns_id_desc {
+	__u8 nidt;
+	__u8 nidl;
+	__le16 reserved;
+};
+
+#define NVME_NIDT_EUI64_LEN	8
+#define NVME_NIDT_NGUID_LEN	16
+#define NVME_NIDT_UUID_LEN	16
+
+enum {
+	NVME_NIDT_EUI64		= 0x01,
+	NVME_NIDT_NGUID		= 0x02,
+	NVME_NIDT_UUID		= 0x03,
+};
+
 struct nvme_smart_log {
 	__u8			critical_warning;
 	__u8			temperature[2];
-- 
cgit v1.2.3


From c61d788b8b1fe57aaf03ac0b5c636c7388ebfd20 Mon Sep 17 00:00:00 2001
From: Johannes Thumshirn <jthumshirn@suse.de>
Date: Wed, 7 Jun 2017 11:45:36 +0200
Subject: nvmet: allow overriding the NVMe VS via configfs

Allow overriding the announced NVMe Version of a via configfs.

This is particularly helpful when debugging new features for the host
or target side without bumping the hard coded version (as the target
might not be fully compliant to the announced version yet).

Signed-off-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Guan Junxiong <guanjunxiong@huawei.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/configfs.c | 37 +++++++++++++++++++++++++++++++++++++
 include/linux/nvme.h           |  4 ++++
 2 files changed, 41 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index 83bfe28fe7da..a358ecd93e11 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -650,8 +650,45 @@ out_unlock:
 
 CONFIGFS_ATTR(nvmet_subsys_, attr_allow_any_host);
 
+static ssize_t nvmet_subsys_version_show(struct config_item *item,
+					      char *page)
+{
+	struct nvmet_subsys *subsys = to_subsys(item);
+
+	if (NVME_TERTIARY(subsys->ver))
+		return snprintf(page, PAGE_SIZE, "%d.%d.%d\n",
+				(int)NVME_MAJOR(subsys->ver),
+				(int)NVME_MINOR(subsys->ver),
+				(int)NVME_TERTIARY(subsys->ver));
+	else
+		return snprintf(page, PAGE_SIZE, "%d.%d\n",
+				(int)NVME_MAJOR(subsys->ver),
+				(int)NVME_MINOR(subsys->ver));
+}
+
+static ssize_t nvmet_subsys_version_store(struct config_item *item,
+					       const char *page, size_t count)
+{
+	struct nvmet_subsys *subsys = to_subsys(item);
+	int major, minor, tertiary = 0;
+	int ret;
+
+
+	ret = sscanf(page, "%d.%d.%d\n", &major, &minor, &tertiary);
+	if (ret != 2 && ret != 3)
+		return -EINVAL;
+
+	down_write(&nvmet_config_sem);
+	subsys->ver = NVME_VS(major, minor, tertiary);
+	up_write(&nvmet_config_sem);
+
+	return count;
+}
+CONFIGFS_ATTR(nvmet_subsys_, version);
+
 static struct configfs_attribute *nvmet_subsys_attrs[] = {
 	&nvmet_subsys_attr_attr_allow_any_host,
+	&nvmet_subsys_attr_version,
 	NULL,
 };
 
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index f2344aa923e8..acb484935603 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -1085,4 +1085,8 @@ struct nvme_completion {
 #define NVME_VS(major, minor, tertiary) \
 	(((major) << 16) | ((minor) << 8) | (tertiary))
 
+#define NVME_MAJOR(ver)		((ver) >> 16)
+#define NVME_MINOR(ver)		(((ver) >> 8) & 0xff)
+#define NVME_TERTIARY(ver)	((ver) & 0xff)
+
 #endif /* _LINUX_NVME_H */
-- 
cgit v1.2.3


From 435e809058bafaa8f0bf8f55f37508b01734c9a5 Mon Sep 17 00:00:00 2001
From: Guan Junxiong <guanjunxiong@huawei.com>
Date: Tue, 13 Jun 2017 09:26:15 +0800
Subject: nvme: add fields into identify controller data structure

Add the new to NVMe 1.3 fields EDSTT, DSTO, FWUG, HCTMA, MNTMT, MXTMT,
and SANICAP into the idenfity controller data structure.

Signed-off-by: Guan Junxiong <guanjunxiong@huawei.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/nvme.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index acb484935603..6d476f242ee6 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -209,9 +209,15 @@ struct nvme_id_ctrl {
 	__u8			tnvmcap[16];
 	__u8			unvmcap[16];
 	__le32			rpmbs;
-	__u8			rsvd316[4];
+	__le16			edstt;
+	__u8			dsto;
+	__u8			fwug;
 	__le16			kas;
-	__u8			rsvd322[190];
+	__le16			hctma;
+	__le16			mntmt;
+	__le16			mxtmt;
+	__le32			sanicap;
+	__u8			rsvd332[180];
 	__u8			sqes;
 	__u8			cqes;
 	__le16			maxcmd;
-- 
cgit v1.2.3


From 3c4d7559159bfe1e3b94df3a657b2cda3a34e218 Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Wed, 14 Jun 2017 11:37:39 -0700
Subject: tls: kernel TLS support

Software implementation of transport layer security, implemented using ULP
infrastructure.  tcp proto_ops are replaced with tls equivalents of sendmsg and
sendpage.

Only symmetric crypto is done in the kernel, keys are passed by setsockopt
after the handshake is complete.  All control messages are supported via CMSG
data - the actual symmetric encryption is the same, just the message type needs
to be passed separately.

For user API, please see Documentation patch.

Pieces that can be shared between hw and sw implementation
are in tls_main.c

Signed-off-by: Boris Pismenny <borisp@mellanox.com>
Signed-off-by: Ilya Lesokhin <ilyal@mellanox.com>
Signed-off-by: Aviad Yehezkel <aviadye@mellanox.com>
Signed-off-by: Dave Watson <davejwatson@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS              |  10 +
 include/linux/socket.h   |   1 +
 include/net/tls.h        | 237 +++++++++++++++
 include/uapi/linux/tls.h |  79 +++++
 net/Kconfig              |   1 +
 net/Makefile             |   1 +
 net/tls/Kconfig          |  12 +
 net/tls/Makefile         |   7 +
 net/tls/tls_main.c       | 487 ++++++++++++++++++++++++++++++
 net/tls/tls_sw.c         | 772 +++++++++++++++++++++++++++++++++++++++++++++++
 10 files changed, 1607 insertions(+)
 create mode 100644 include/net/tls.h
 create mode 100644 include/uapi/linux/tls.h
 create mode 100644 net/tls/Kconfig
 create mode 100644 net/tls/Makefile
 create mode 100644 net/tls/tls_main.c
 create mode 100644 net/tls/tls_sw.c

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index 10f158ee95a3..71a74555afdf 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8978,6 +8978,16 @@ F:	net/ipv6/
 F:	include/net/ip*
 F:	arch/x86/net/*
 
+NETWORKING [TLS]
+M:	Ilya Lesokhin <ilyal@mellanox.com>
+M:	Aviad Yehezkel <aviadye@mellanox.com>
+M:	Dave Watson <davejwatson@fb.com>
+L:	netdev@vger.kernel.org
+S:	Maintained
+F:	net/tls/*
+F:	include/uapi/linux/tls.h
+F:	include/net/tls.h
+
 NETWORKING [IPSEC]
 M:	Steffen Klassert <steffen.klassert@secunet.com>
 M:	Herbert Xu <herbert@gondor.apana.org.au>
diff --git a/include/linux/socket.h b/include/linux/socket.h
index 082027457825..8b13db5163cc 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -334,6 +334,7 @@ struct ucred {
 #define SOL_ALG		279
 #define SOL_NFC		280
 #define SOL_KCM		281
+#define SOL_TLS		282
 
 /* IPX options */
 #define IPX_TYPE	1
diff --git a/include/net/tls.h b/include/net/tls.h
new file mode 100644
index 000000000000..b89d397dd62f
--- /dev/null
+++ b/include/net/tls.h
@@ -0,0 +1,237 @@
+/*
+ * Copyright (c) 2016-2017, Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016-2017, Dave Watson <davejwatson@fb.com>. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _TLS_OFFLOAD_H
+#define _TLS_OFFLOAD_H
+
+#include <linux/types.h>
+
+#include <uapi/linux/tls.h>
+
+
+/* Maximum data size carried in a TLS record */
+#define TLS_MAX_PAYLOAD_SIZE		((size_t)1 << 14)
+
+#define TLS_HEADER_SIZE			5
+#define TLS_NONCE_OFFSET		TLS_HEADER_SIZE
+
+#define TLS_CRYPTO_INFO_READY(info)	((info)->cipher_type)
+
+#define TLS_RECORD_TYPE_DATA		0x17
+
+#define TLS_AAD_SPACE_SIZE		13
+
+struct tls_sw_context {
+	struct crypto_aead *aead_send;
+
+	/* Sending context */
+	char aad_space[TLS_AAD_SPACE_SIZE];
+
+	unsigned int sg_plaintext_size;
+	int sg_plaintext_num_elem;
+	struct scatterlist sg_plaintext_data[MAX_SKB_FRAGS];
+
+	unsigned int sg_encrypted_size;
+	int sg_encrypted_num_elem;
+	struct scatterlist sg_encrypted_data[MAX_SKB_FRAGS];
+
+	/* AAD | sg_plaintext_data | sg_tag */
+	struct scatterlist sg_aead_in[2];
+	/* AAD | sg_encrypted_data (data contain overhead for hdr&iv&tag) */
+	struct scatterlist sg_aead_out[2];
+};
+
+enum {
+	TLS_PENDING_CLOSED_RECORD
+};
+
+struct tls_context {
+	union {
+		struct tls_crypto_info crypto_send;
+		struct tls12_crypto_info_aes_gcm_128 crypto_send_aes_gcm_128;
+	};
+
+	void *priv_ctx;
+
+	u16 prepend_size;
+	u16 tag_size;
+	u16 overhead_size;
+	u16 iv_size;
+	char *iv;
+	u16 rec_seq_size;
+	char *rec_seq;
+
+	struct scatterlist *partially_sent_record;
+	u16 partially_sent_offset;
+	unsigned long flags;
+
+	u16 pending_open_record_frags;
+	int (*push_pending_record)(struct sock *sk, int flags);
+	void (*free_resources)(struct sock *sk);
+
+	void (*sk_write_space)(struct sock *sk);
+	void (*sk_proto_close)(struct sock *sk, long timeout);
+
+	int  (*setsockopt)(struct sock *sk, int level,
+			   int optname, char __user *optval,
+			   unsigned int optlen);
+	int  (*getsockopt)(struct sock *sk, int level,
+			   int optname, char __user *optval,
+			   int __user *optlen);
+};
+
+int wait_on_pending_writer(struct sock *sk, long *timeo);
+int tls_sk_query(struct sock *sk, int optname, char __user *optval,
+		int __user *optlen);
+int tls_sk_attach(struct sock *sk, int optname, char __user *optval,
+		  unsigned int optlen);
+
+
+int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx);
+int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
+int tls_sw_sendpage(struct sock *sk, struct page *page,
+		    int offset, size_t size, int flags);
+void tls_sw_close(struct sock *sk, long timeout);
+
+void tls_sk_destruct(struct sock *sk, struct tls_context *ctx);
+void tls_icsk_clean_acked(struct sock *sk);
+
+int tls_push_sg(struct sock *sk, struct tls_context *ctx,
+		struct scatterlist *sg, u16 first_offset,
+		int flags);
+int tls_push_pending_closed_record(struct sock *sk, struct tls_context *ctx,
+				   int flags, long *timeo);
+
+static inline bool tls_is_pending_closed_record(struct tls_context *ctx)
+{
+	return test_bit(TLS_PENDING_CLOSED_RECORD, &ctx->flags);
+}
+
+static inline int tls_complete_pending_work(struct sock *sk,
+					    struct tls_context *ctx,
+					    int flags, long *timeo)
+{
+	int rc = 0;
+
+	if (unlikely(sk->sk_write_pending))
+		rc = wait_on_pending_writer(sk, timeo);
+
+	if (!rc && tls_is_pending_closed_record(ctx))
+		rc = tls_push_pending_closed_record(sk, ctx, flags, timeo);
+
+	return rc;
+}
+
+static inline bool tls_is_partially_sent_record(struct tls_context *ctx)
+{
+	return !!ctx->partially_sent_record;
+}
+
+static inline bool tls_is_pending_open_record(struct tls_context *tls_ctx)
+{
+	return tls_ctx->pending_open_record_frags;
+}
+
+static inline void tls_err_abort(struct sock *sk)
+{
+	sk->sk_err = -EBADMSG;
+	sk->sk_error_report(sk);
+}
+
+static inline bool tls_bigint_increment(unsigned char *seq, int len)
+{
+	int i;
+
+	for (i = len - 1; i >= 0; i--) {
+		++seq[i];
+		if (seq[i] != 0)
+			break;
+	}
+
+	return (i == -1);
+}
+
+static inline void tls_advance_record_sn(struct sock *sk,
+					 struct tls_context *ctx)
+{
+	if (tls_bigint_increment(ctx->rec_seq, ctx->rec_seq_size))
+		tls_err_abort(sk);
+	tls_bigint_increment(ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
+			     ctx->iv_size);
+}
+
+static inline void tls_fill_prepend(struct tls_context *ctx,
+			     char *buf,
+			     size_t plaintext_len,
+			     unsigned char record_type)
+{
+	size_t pkt_len, iv_size = ctx->iv_size;
+
+	pkt_len = plaintext_len + iv_size + ctx->tag_size;
+
+	/* we cover nonce explicit here as well, so buf should be of
+	 * size KTLS_DTLS_HEADER_SIZE + KTLS_DTLS_NONCE_EXPLICIT_SIZE
+	 */
+	buf[0] = record_type;
+	buf[1] = TLS_VERSION_MINOR(ctx->crypto_send.version);
+	buf[2] = TLS_VERSION_MAJOR(ctx->crypto_send.version);
+	/* we can use IV for nonce explicit according to spec */
+	buf[3] = pkt_len >> 8;
+	buf[4] = pkt_len & 0xFF;
+	memcpy(buf + TLS_NONCE_OFFSET,
+	       ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv_size);
+}
+
+static inline struct tls_context *tls_get_ctx(const struct sock *sk)
+{
+	struct inet_connection_sock *icsk = inet_csk(sk);
+
+	return icsk->icsk_ulp_data;
+}
+
+static inline struct tls_sw_context *tls_sw_ctx(
+		const struct tls_context *tls_ctx)
+{
+	return (struct tls_sw_context *)tls_ctx->priv_ctx;
+}
+
+static inline struct tls_offload_context *tls_offload_ctx(
+		const struct tls_context *tls_ctx)
+{
+	return (struct tls_offload_context *)tls_ctx->priv_ctx;
+}
+
+int tls_proccess_cmsg(struct sock *sk, struct msghdr *msg,
+		      unsigned char *record_type);
+
+#endif /* _TLS_OFFLOAD_H */
diff --git a/include/uapi/linux/tls.h b/include/uapi/linux/tls.h
new file mode 100644
index 000000000000..cc1d21db35d8
--- /dev/null
+++ b/include/uapi/linux/tls.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2016-2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _UAPI_LINUX_TLS_H
+#define _UAPI_LINUX_TLS_H
+
+#include <linux/types.h>
+#include <asm/byteorder.h>
+#include <linux/socket.h>
+#include <linux/tcp.h>
+#include <net/tcp.h>
+
+/* TLS socket options */
+#define TLS_TX			1	/* Set transmit parameters */
+
+/* Supported versions */
+#define TLS_VERSION_MINOR(ver)	((ver) & 0xFF)
+#define TLS_VERSION_MAJOR(ver)	(((ver) >> 8) & 0xFF)
+
+#define TLS_VERSION_NUMBER(id)	((((id##_VERSION_MAJOR) & 0xFF) << 8) |	\
+				 ((id##_VERSION_MINOR) & 0xFF))
+
+#define TLS_1_2_VERSION_MAJOR	0x3
+#define TLS_1_2_VERSION_MINOR	0x3
+#define TLS_1_2_VERSION		TLS_VERSION_NUMBER(TLS_1_2)
+
+/* Supported ciphers */
+#define TLS_CIPHER_AES_GCM_128				51
+#define TLS_CIPHER_AES_GCM_128_IV_SIZE			8
+#define TLS_CIPHER_AES_GCM_128_KEY_SIZE		16
+#define TLS_CIPHER_AES_GCM_128_SALT_SIZE		4
+#define TLS_CIPHER_AES_GCM_128_TAG_SIZE		16
+#define TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE		8
+
+#define TLS_SET_RECORD_TYPE	1
+
+struct tls_crypto_info {
+	__u16 version;
+	__u16 cipher_type;
+};
+
+struct tls12_crypto_info_aes_gcm_128 {
+	struct tls_crypto_info info;
+	unsigned char iv[TLS_CIPHER_AES_GCM_128_IV_SIZE];
+	unsigned char key[TLS_CIPHER_AES_GCM_128_KEY_SIZE];
+	unsigned char salt[TLS_CIPHER_AES_GCM_128_SALT_SIZE];
+	unsigned char rec_seq[TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE];
+};
+
+#endif /* _UAPI_LINUX_TLS_H */
diff --git a/net/Kconfig b/net/Kconfig
index 102f781a0131..7d57ef34b79c 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -55,6 +55,7 @@ menu "Networking options"
 
 source "net/packet/Kconfig"
 source "net/unix/Kconfig"
+source "net/tls/Kconfig"
 source "net/xfrm/Kconfig"
 source "net/iucv/Kconfig"
 source "net/smc/Kconfig"
diff --git a/net/Makefile b/net/Makefile
index 9086ffbb5085..bed80fa398b7 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -15,6 +15,7 @@ obj-$(CONFIG_LLC)		+= llc/
 obj-$(CONFIG_NET)		+= ethernet/ 802/ sched/ netlink/ bpf/
 obj-$(CONFIG_NETFILTER)		+= netfilter/
 obj-$(CONFIG_INET)		+= ipv4/
+obj-$(CONFIG_TLS)		+= tls/
 obj-$(CONFIG_XFRM)		+= xfrm/
 obj-$(CONFIG_UNIX)		+= unix/
 obj-$(CONFIG_NET)		+= ipv6/
diff --git a/net/tls/Kconfig b/net/tls/Kconfig
new file mode 100644
index 000000000000..61e532964c82
--- /dev/null
+++ b/net/tls/Kconfig
@@ -0,0 +1,12 @@
+#
+# TLS configuration
+#
+config TLS
+	tristate "Transport Layer Security support"
+	depends on NET
+	default m
+	---help---
+	Enable kernel support for TLS protocol. This allows symmetric
+	encryption handling of the TLS protocol to be done in-kernel.
+
+	If unsure, say M.
diff --git a/net/tls/Makefile b/net/tls/Makefile
new file mode 100644
index 000000000000..a930fd1c4f7b
--- /dev/null
+++ b/net/tls/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for the TLS subsystem.
+#
+
+obj-$(CONFIG_TLS) += tls.o
+
+tls-y := tls_main.o tls_sw.o
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
new file mode 100644
index 000000000000..2ebc328bda96
--- /dev/null
+++ b/net/tls/tls_main.c
@@ -0,0 +1,487 @@
+/*
+ * Copyright (c) 2016-2017, Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016-2017, Dave Watson <davejwatson@fb.com>. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+
+#include <net/tcp.h>
+#include <net/inet_common.h>
+#include <linux/highmem.h>
+#include <linux/netdevice.h>
+#include <linux/sched/signal.h>
+
+#include <net/tls.h>
+
+MODULE_AUTHOR("Mellanox Technologies");
+MODULE_DESCRIPTION("Transport Layer Security Support");
+MODULE_LICENSE("Dual BSD/GPL");
+
+static struct proto tls_base_prot;
+static struct proto tls_sw_prot;
+
+int wait_on_pending_writer(struct sock *sk, long *timeo)
+{
+	int rc = 0;
+	DEFINE_WAIT_FUNC(wait, woken_wake_function);
+
+	add_wait_queue(sk_sleep(sk), &wait);
+	while (1) {
+		if (!*timeo) {
+			rc = -EAGAIN;
+			break;
+		}
+
+		if (signal_pending(current)) {
+			rc = sock_intr_errno(*timeo);
+			break;
+		}
+
+		if (sk_wait_event(sk, timeo, !sk->sk_write_pending, &wait))
+			break;
+	}
+	remove_wait_queue(sk_sleep(sk), &wait);
+	return rc;
+}
+
+int tls_push_sg(struct sock *sk,
+		struct tls_context *ctx,
+		struct scatterlist *sg,
+		u16 first_offset,
+		int flags)
+{
+	int sendpage_flags = flags | MSG_SENDPAGE_NOTLAST;
+	int ret = 0;
+	struct page *p;
+	size_t size;
+	int offset = first_offset;
+
+	size = sg->length - offset;
+	offset += sg->offset;
+
+	while (1) {
+		if (sg_is_last(sg))
+			sendpage_flags = flags;
+
+		/* is sending application-limited? */
+		tcp_rate_check_app_limited(sk);
+		p = sg_page(sg);
+retry:
+		ret = do_tcp_sendpages(sk, p, offset, size, sendpage_flags);
+
+		if (ret != size) {
+			if (ret > 0) {
+				offset += ret;
+				size -= ret;
+				goto retry;
+			}
+
+			offset -= sg->offset;
+			ctx->partially_sent_offset = offset;
+			ctx->partially_sent_record = (void *)sg;
+			return ret;
+		}
+
+		put_page(p);
+		sk_mem_uncharge(sk, sg->length);
+		sg = sg_next(sg);
+		if (!sg)
+			break;
+
+		offset = sg->offset;
+		size = sg->length;
+	}
+
+	clear_bit(TLS_PENDING_CLOSED_RECORD, &ctx->flags);
+
+	return 0;
+}
+
+static int tls_handle_open_record(struct sock *sk, int flags)
+{
+	struct tls_context *ctx = tls_get_ctx(sk);
+
+	if (tls_is_pending_open_record(ctx))
+		return ctx->push_pending_record(sk, flags);
+
+	return 0;
+}
+
+int tls_proccess_cmsg(struct sock *sk, struct msghdr *msg,
+		      unsigned char *record_type)
+{
+	struct cmsghdr *cmsg;
+	int rc = -EINVAL;
+
+	for_each_cmsghdr(cmsg, msg) {
+		if (!CMSG_OK(msg, cmsg))
+			return -EINVAL;
+		if (cmsg->cmsg_level != SOL_TLS)
+			continue;
+
+		switch (cmsg->cmsg_type) {
+		case TLS_SET_RECORD_TYPE:
+			if (cmsg->cmsg_len < CMSG_LEN(sizeof(*record_type)))
+				return -EINVAL;
+
+			if (msg->msg_flags & MSG_MORE)
+				return -EINVAL;
+
+			rc = tls_handle_open_record(sk, msg->msg_flags);
+			if (rc)
+				return rc;
+
+			*record_type = *(unsigned char *)CMSG_DATA(cmsg);
+			rc = 0;
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+
+	return rc;
+}
+
+int tls_push_pending_closed_record(struct sock *sk, struct tls_context *ctx,
+				   int flags, long *timeo)
+{
+	struct scatterlist *sg;
+	u16 offset;
+
+	if (!tls_is_partially_sent_record(ctx))
+		return ctx->push_pending_record(sk, flags);
+
+	sg = ctx->partially_sent_record;
+	offset = ctx->partially_sent_offset;
+
+	ctx->partially_sent_record = NULL;
+	return tls_push_sg(sk, ctx, sg, offset, flags);
+}
+
+static void tls_write_space(struct sock *sk)
+{
+	struct tls_context *ctx = tls_get_ctx(sk);
+
+	if (!sk->sk_write_pending && tls_is_pending_closed_record(ctx)) {
+		gfp_t sk_allocation = sk->sk_allocation;
+		int rc;
+		long timeo = 0;
+
+		sk->sk_allocation = GFP_ATOMIC;
+		rc = tls_push_pending_closed_record(sk, ctx,
+						    MSG_DONTWAIT |
+						    MSG_NOSIGNAL,
+						    &timeo);
+		sk->sk_allocation = sk_allocation;
+
+		if (rc < 0)
+			return;
+	}
+
+	ctx->sk_write_space(sk);
+}
+
+static void tls_sk_proto_close(struct sock *sk, long timeout)
+{
+	struct tls_context *ctx = tls_get_ctx(sk);
+	long timeo = sock_sndtimeo(sk, 0);
+	void (*sk_proto_close)(struct sock *sk, long timeout);
+
+	lock_sock(sk);
+
+	if (!tls_complete_pending_work(sk, ctx, 0, &timeo))
+		tls_handle_open_record(sk, 0);
+
+	if (ctx->partially_sent_record) {
+		struct scatterlist *sg = ctx->partially_sent_record;
+
+		while (1) {
+			put_page(sg_page(sg));
+			sk_mem_uncharge(sk, sg->length);
+
+			if (sg_is_last(sg))
+				break;
+			sg++;
+		}
+	}
+	ctx->free_resources(sk);
+	kfree(ctx->rec_seq);
+	kfree(ctx->iv);
+
+	sk_proto_close = ctx->sk_proto_close;
+	kfree(ctx);
+
+	release_sock(sk);
+	sk_proto_close(sk, timeout);
+}
+
+static int do_tls_getsockopt_tx(struct sock *sk, char __user *optval,
+				int __user *optlen)
+{
+	int rc = 0;
+	struct tls_context *ctx = tls_get_ctx(sk);
+	struct tls_crypto_info *crypto_info;
+	int len;
+
+	if (get_user(len, optlen))
+		return -EFAULT;
+
+	if (!optval || (len < sizeof(*crypto_info))) {
+		rc = -EINVAL;
+		goto out;
+	}
+
+	if (!ctx) {
+		rc = -EBUSY;
+		goto out;
+	}
+
+	/* get user crypto info */
+	crypto_info = &ctx->crypto_send;
+
+	if (!TLS_CRYPTO_INFO_READY(crypto_info)) {
+		rc = -EBUSY;
+		goto out;
+	}
+
+	if (len == sizeof(crypto_info)) {
+		rc = copy_to_user(optval, crypto_info, sizeof(*crypto_info));
+		goto out;
+	}
+
+	switch (crypto_info->cipher_type) {
+	case TLS_CIPHER_AES_GCM_128: {
+		struct tls12_crypto_info_aes_gcm_128 *
+		  crypto_info_aes_gcm_128 =
+		  container_of(crypto_info,
+			       struct tls12_crypto_info_aes_gcm_128,
+			       info);
+
+		if (len != sizeof(*crypto_info_aes_gcm_128)) {
+			rc = -EINVAL;
+			goto out;
+		}
+		lock_sock(sk);
+		memcpy(crypto_info_aes_gcm_128->iv, ctx->iv,
+		       TLS_CIPHER_AES_GCM_128_IV_SIZE);
+		release_sock(sk);
+		rc = copy_to_user(optval,
+				  crypto_info_aes_gcm_128,
+				  sizeof(*crypto_info_aes_gcm_128));
+		break;
+	}
+	default:
+		rc = -EINVAL;
+	}
+
+out:
+	return rc;
+}
+
+static int do_tls_getsockopt(struct sock *sk, int optname,
+			     char __user *optval, int __user *optlen)
+{
+	int rc = 0;
+
+	switch (optname) {
+	case TLS_TX:
+		rc = do_tls_getsockopt_tx(sk, optval, optlen);
+		break;
+	default:
+		rc = -ENOPROTOOPT;
+		break;
+	}
+	return rc;
+}
+
+static int tls_getsockopt(struct sock *sk, int level, int optname,
+			  char __user *optval, int __user *optlen)
+{
+	struct tls_context *ctx = tls_get_ctx(sk);
+
+	if (level != SOL_TLS)
+		return ctx->getsockopt(sk, level, optname, optval, optlen);
+
+	return do_tls_getsockopt(sk, optname, optval, optlen);
+}
+
+static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval,
+				unsigned int optlen)
+{
+	struct tls_crypto_info *crypto_info, tmp_crypto_info;
+	struct tls_context *ctx = tls_get_ctx(sk);
+	struct proto *prot = NULL;
+	int rc = 0;
+
+	if (!optval || (optlen < sizeof(*crypto_info))) {
+		rc = -EINVAL;
+		goto out;
+	}
+
+	rc = copy_from_user(&tmp_crypto_info, optval, sizeof(*crypto_info));
+	if (rc) {
+		rc = -EFAULT;
+		goto out;
+	}
+
+	/* check version */
+	if (tmp_crypto_info.version != TLS_1_2_VERSION) {
+		rc = -ENOTSUPP;
+		goto out;
+	}
+
+	/* get user crypto info */
+	crypto_info = &ctx->crypto_send;
+
+	/* Currently we don't support set crypto info more than one time */
+	if (TLS_CRYPTO_INFO_READY(crypto_info))
+		goto out;
+
+	switch (tmp_crypto_info.cipher_type) {
+	case TLS_CIPHER_AES_GCM_128: {
+		if (optlen != sizeof(struct tls12_crypto_info_aes_gcm_128)) {
+			rc = -EINVAL;
+			goto out;
+		}
+		rc = copy_from_user(
+		  crypto_info,
+		  optval,
+		  sizeof(struct tls12_crypto_info_aes_gcm_128));
+
+		if (rc) {
+			rc = -EFAULT;
+			goto err_crypto_info;
+		}
+		break;
+	}
+	default:
+		rc = -EINVAL;
+		goto out;
+	}
+
+	ctx->sk_write_space = sk->sk_write_space;
+	sk->sk_write_space = tls_write_space;
+
+	ctx->sk_proto_close = sk->sk_prot->close;
+
+	/* currently SW is default, we will have ethtool in future */
+	rc = tls_set_sw_offload(sk, ctx);
+	prot = &tls_sw_prot;
+	if (rc)
+		goto err_crypto_info;
+
+	sk->sk_prot = prot;
+	goto out;
+
+err_crypto_info:
+	memset(crypto_info, 0, sizeof(*crypto_info));
+out:
+	return rc;
+}
+
+static int do_tls_setsockopt(struct sock *sk, int optname,
+			     char __user *optval, unsigned int optlen)
+{
+	int rc = 0;
+
+	switch (optname) {
+	case TLS_TX:
+		lock_sock(sk);
+		rc = do_tls_setsockopt_tx(sk, optval, optlen);
+		release_sock(sk);
+		break;
+	default:
+		rc = -ENOPROTOOPT;
+		break;
+	}
+	return rc;
+}
+
+static int tls_setsockopt(struct sock *sk, int level, int optname,
+			  char __user *optval, unsigned int optlen)
+{
+	struct tls_context *ctx = tls_get_ctx(sk);
+
+	if (level != SOL_TLS)
+		return ctx->setsockopt(sk, level, optname, optval, optlen);
+
+	return do_tls_setsockopt(sk, optname, optval, optlen);
+}
+
+static int tls_init(struct sock *sk)
+{
+	struct inet_connection_sock *icsk = inet_csk(sk);
+	struct tls_context *ctx;
+	int rc = 0;
+
+	/* allocate tls context */
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx) {
+		rc = -ENOMEM;
+		goto out;
+	}
+	icsk->icsk_ulp_data = ctx;
+	ctx->setsockopt = sk->sk_prot->setsockopt;
+	ctx->getsockopt = sk->sk_prot->getsockopt;
+	sk->sk_prot = &tls_base_prot;
+out:
+	return rc;
+}
+
+static struct tcp_ulp_ops tcp_tls_ulp_ops __read_mostly = {
+	.name			= "tls",
+	.owner			= THIS_MODULE,
+	.init			= tls_init,
+};
+
+static int __init tls_register(void)
+{
+	tls_base_prot			= tcp_prot;
+	tls_base_prot.setsockopt	= tls_setsockopt;
+	tls_base_prot.getsockopt	= tls_getsockopt;
+
+	tls_sw_prot			= tls_base_prot;
+	tls_sw_prot.sendmsg		= tls_sw_sendmsg;
+	tls_sw_prot.sendpage            = tls_sw_sendpage;
+	tls_sw_prot.close               = tls_sk_proto_close;
+
+	tcp_register_ulp(&tcp_tls_ulp_ops);
+
+	return 0;
+}
+
+static void __exit tls_unregister(void)
+{
+	tcp_unregister_ulp(&tcp_tls_ulp_ops);
+}
+
+module_init(tls_register);
+module_exit(tls_unregister);
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
new file mode 100644
index 000000000000..fa596fa71ba7
--- /dev/null
+++ b/net/tls/tls_sw.c
@@ -0,0 +1,772 @@
+/*
+ * Copyright (c) 2016-2017, Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016-2017, Dave Watson <davejwatson@fb.com>. All rights reserved.
+ * Copyright (c) 2016-2017, Lance Chao <lancerchao@fb.com>. All rights reserved.
+ * Copyright (c) 2016, Fridolin Pokorny <fridolin.pokorny@gmail.com>. All rights reserved.
+ * Copyright (c) 2016, Nikos Mavrogiannopoulos <nmav@gnutls.org>. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <crypto/aead.h>
+
+#include <net/tls.h>
+
+static inline void tls_make_aad(int recv,
+				char *buf,
+				size_t size,
+				char *record_sequence,
+				int record_sequence_size,
+				unsigned char record_type)
+{
+	memcpy(buf, record_sequence, record_sequence_size);
+
+	buf[8] = record_type;
+	buf[9] = TLS_1_2_VERSION_MAJOR;
+	buf[10] = TLS_1_2_VERSION_MINOR;
+	buf[11] = size >> 8;
+	buf[12] = size & 0xFF;
+}
+
+static void trim_sg(struct sock *sk, struct scatterlist *sg,
+		    int *sg_num_elem, unsigned int *sg_size, int target_size)
+{
+	int i = *sg_num_elem - 1;
+	int trim = *sg_size - target_size;
+
+	if (trim <= 0) {
+		WARN_ON(trim < 0);
+		return;
+	}
+
+	*sg_size = target_size;
+	while (trim >= sg[i].length) {
+		trim -= sg[i].length;
+		sk_mem_uncharge(sk, sg[i].length);
+		put_page(sg_page(&sg[i]));
+		i--;
+
+		if (i < 0)
+			goto out;
+	}
+
+	sg[i].length -= trim;
+	sk_mem_uncharge(sk, trim);
+
+out:
+	*sg_num_elem = i + 1;
+}
+
+static void trim_both_sgl(struct sock *sk, int target_size)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+
+	trim_sg(sk, ctx->sg_plaintext_data,
+		&ctx->sg_plaintext_num_elem,
+		&ctx->sg_plaintext_size,
+		target_size);
+
+	if (target_size > 0)
+		target_size += tls_ctx->overhead_size;
+
+	trim_sg(sk, ctx->sg_encrypted_data,
+		&ctx->sg_encrypted_num_elem,
+		&ctx->sg_encrypted_size,
+		target_size);
+}
+
+static int alloc_sg(struct sock *sk, int len, struct scatterlist *sg,
+		    int *sg_num_elem, unsigned int *sg_size,
+		    int first_coalesce)
+{
+	struct page_frag *pfrag;
+	unsigned int size = *sg_size;
+	int num_elem = *sg_num_elem, use = 0, rc = 0;
+	struct scatterlist *sge;
+	unsigned int orig_offset;
+
+	len -= size;
+	pfrag = sk_page_frag(sk);
+
+	while (len > 0) {
+		if (!sk_page_frag_refill(sk, pfrag)) {
+			rc = -ENOMEM;
+			goto out;
+		}
+
+		use = min_t(int, len, pfrag->size - pfrag->offset);
+
+		if (!sk_wmem_schedule(sk, use)) {
+			rc = -ENOMEM;
+			goto out;
+		}
+
+		sk_mem_charge(sk, use);
+		size += use;
+		orig_offset = pfrag->offset;
+		pfrag->offset += use;
+
+		sge = sg + num_elem - 1;
+		if (num_elem > first_coalesce && sg_page(sg) == pfrag->page &&
+		    sg->offset + sg->length == orig_offset) {
+			sg->length += use;
+		} else {
+			sge++;
+			sg_unmark_end(sge);
+			sg_set_page(sge, pfrag->page, use, orig_offset);
+			get_page(pfrag->page);
+			++num_elem;
+			if (num_elem == MAX_SKB_FRAGS) {
+				rc = -ENOSPC;
+				break;
+			}
+		}
+
+		len -= use;
+	}
+	goto out;
+
+out:
+	*sg_size = size;
+	*sg_num_elem = num_elem;
+	return rc;
+}
+
+static int alloc_encrypted_sg(struct sock *sk, int len)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+	int rc = 0;
+
+	rc = alloc_sg(sk, len, ctx->sg_encrypted_data,
+		      &ctx->sg_encrypted_num_elem, &ctx->sg_encrypted_size, 0);
+
+	return rc;
+}
+
+static int alloc_plaintext_sg(struct sock *sk, int len)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+	int rc = 0;
+
+	rc = alloc_sg(sk, len, ctx->sg_plaintext_data,
+		      &ctx->sg_plaintext_num_elem, &ctx->sg_plaintext_size,
+		      tls_ctx->pending_open_record_frags);
+
+	return rc;
+}
+
+static void free_sg(struct sock *sk, struct scatterlist *sg,
+		    int *sg_num_elem, unsigned int *sg_size)
+{
+	int i, n = *sg_num_elem;
+
+	for (i = 0; i < n; ++i) {
+		sk_mem_uncharge(sk, sg[i].length);
+		put_page(sg_page(&sg[i]));
+	}
+	*sg_num_elem = 0;
+	*sg_size = 0;
+}
+
+static void tls_free_both_sg(struct sock *sk)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+
+	free_sg(sk, ctx->sg_encrypted_data, &ctx->sg_encrypted_num_elem,
+		&ctx->sg_encrypted_size);
+
+	free_sg(sk, ctx->sg_plaintext_data, &ctx->sg_plaintext_num_elem,
+		&ctx->sg_plaintext_size);
+}
+
+static int tls_do_encryption(struct tls_context *tls_ctx,
+			     struct tls_sw_context *ctx, size_t data_len,
+			     gfp_t flags)
+{
+	unsigned int req_size = sizeof(struct aead_request) +
+		crypto_aead_reqsize(ctx->aead_send);
+	struct aead_request *aead_req;
+	int rc;
+
+	aead_req = kmalloc(req_size, flags);
+	if (!aead_req)
+		return -ENOMEM;
+
+	ctx->sg_encrypted_data[0].offset += tls_ctx->prepend_size;
+	ctx->sg_encrypted_data[0].length -= tls_ctx->prepend_size;
+
+	aead_request_set_tfm(aead_req, ctx->aead_send);
+	aead_request_set_ad(aead_req, TLS_AAD_SPACE_SIZE);
+	aead_request_set_crypt(aead_req, ctx->sg_aead_in, ctx->sg_aead_out,
+			       data_len, tls_ctx->iv);
+	rc = crypto_aead_encrypt(aead_req);
+
+	ctx->sg_encrypted_data[0].offset -= tls_ctx->prepend_size;
+	ctx->sg_encrypted_data[0].length += tls_ctx->prepend_size;
+
+	kfree(aead_req);
+	return rc;
+}
+
+static int tls_push_record(struct sock *sk, int flags,
+			   unsigned char record_type)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+	int rc;
+
+	sg_mark_end(ctx->sg_plaintext_data + ctx->sg_plaintext_num_elem - 1);
+	sg_mark_end(ctx->sg_encrypted_data + ctx->sg_encrypted_num_elem - 1);
+
+	tls_make_aad(0, ctx->aad_space, ctx->sg_plaintext_size,
+		     tls_ctx->rec_seq, tls_ctx->rec_seq_size,
+		     record_type);
+
+	tls_fill_prepend(tls_ctx,
+			 page_address(sg_page(&ctx->sg_encrypted_data[0])) +
+			 ctx->sg_encrypted_data[0].offset,
+			 ctx->sg_plaintext_size, record_type);
+
+	tls_ctx->pending_open_record_frags = 0;
+	set_bit(TLS_PENDING_CLOSED_RECORD, &tls_ctx->flags);
+
+	rc = tls_do_encryption(tls_ctx, ctx, ctx->sg_plaintext_size,
+			       sk->sk_allocation);
+	if (rc < 0) {
+		/* If we are called from write_space and
+		 * we fail, we need to set this SOCK_NOSPACE
+		 * to trigger another write_space in the future.
+		 */
+		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+		return rc;
+	}
+
+	free_sg(sk, ctx->sg_plaintext_data, &ctx->sg_plaintext_num_elem,
+		&ctx->sg_plaintext_size);
+
+	ctx->sg_encrypted_num_elem = 0;
+	ctx->sg_encrypted_size = 0;
+
+	/* Only pass through MSG_DONTWAIT and MSG_NOSIGNAL flags */
+	rc = tls_push_sg(sk, tls_ctx, ctx->sg_encrypted_data, 0, flags);
+	if (rc < 0 && rc != -EAGAIN)
+		tls_err_abort(sk);
+
+	tls_advance_record_sn(sk, tls_ctx);
+	return rc;
+}
+
+static int tls_sw_push_pending_record(struct sock *sk, int flags)
+{
+	return tls_push_record(sk, flags, TLS_RECORD_TYPE_DATA);
+}
+
+static int zerocopy_from_iter(struct sock *sk, struct iov_iter *from,
+			      int length)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+	struct page *pages[MAX_SKB_FRAGS];
+
+	size_t offset;
+	ssize_t copied, use;
+	int i = 0;
+	unsigned int size = ctx->sg_plaintext_size;
+	int num_elem = ctx->sg_plaintext_num_elem;
+	int rc = 0;
+	int maxpages;
+
+	while (length > 0) {
+		i = 0;
+		maxpages = ARRAY_SIZE(ctx->sg_plaintext_data) - num_elem;
+		if (maxpages == 0) {
+			rc = -EFAULT;
+			goto out;
+		}
+		copied = iov_iter_get_pages(from, pages,
+					    length,
+					    maxpages, &offset);
+		if (copied <= 0) {
+			rc = -EFAULT;
+			goto out;
+		}
+
+		iov_iter_advance(from, copied);
+
+		length -= copied;
+		size += copied;
+		while (copied) {
+			use = min_t(int, copied, PAGE_SIZE - offset);
+
+			sg_set_page(&ctx->sg_plaintext_data[num_elem],
+				    pages[i], use, offset);
+			sg_unmark_end(&ctx->sg_plaintext_data[num_elem]);
+			sk_mem_charge(sk, use);
+
+			offset = 0;
+			copied -= use;
+
+			++i;
+			++num_elem;
+		}
+	}
+
+out:
+	ctx->sg_plaintext_size = size;
+	ctx->sg_plaintext_num_elem = num_elem;
+	return rc;
+}
+
+static int memcopy_from_iter(struct sock *sk, struct iov_iter *from,
+			     int bytes)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+	struct scatterlist *sg = ctx->sg_plaintext_data;
+	int copy, i, rc = 0;
+
+	for (i = tls_ctx->pending_open_record_frags;
+	     i < ctx->sg_plaintext_num_elem; ++i) {
+		copy = sg[i].length;
+		if (copy_from_iter(
+				page_address(sg_page(&sg[i])) + sg[i].offset,
+				copy, from) != copy) {
+			rc = -EFAULT;
+			goto out;
+		}
+		bytes -= copy;
+
+		++tls_ctx->pending_open_record_frags;
+
+		if (!bytes)
+			break;
+	}
+
+out:
+	return rc;
+}
+
+int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+	int ret = 0;
+	int required_size;
+	long timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
+	bool eor = !(msg->msg_flags & MSG_MORE);
+	size_t try_to_copy, copied = 0;
+	unsigned char record_type = TLS_RECORD_TYPE_DATA;
+	int record_room;
+	bool full_record;
+	int orig_size;
+
+	if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL))
+		return -ENOTSUPP;
+
+	lock_sock(sk);
+
+	if (tls_complete_pending_work(sk, tls_ctx, msg->msg_flags, &timeo))
+		goto send_end;
+
+	if (unlikely(msg->msg_controllen)) {
+		ret = tls_proccess_cmsg(sk, msg, &record_type);
+		if (ret)
+			goto send_end;
+	}
+
+	while (msg_data_left(msg)) {
+		if (sk->sk_err) {
+			ret = sk->sk_err;
+			goto send_end;
+		}
+
+		orig_size = ctx->sg_plaintext_size;
+		full_record = false;
+		try_to_copy = msg_data_left(msg);
+		record_room = TLS_MAX_PAYLOAD_SIZE - ctx->sg_plaintext_size;
+		if (try_to_copy >= record_room) {
+			try_to_copy = record_room;
+			full_record = true;
+		}
+
+		required_size = ctx->sg_plaintext_size + try_to_copy +
+				tls_ctx->overhead_size;
+
+		if (!sk_stream_memory_free(sk))
+			goto wait_for_sndbuf;
+alloc_encrypted:
+		ret = alloc_encrypted_sg(sk, required_size);
+		if (ret) {
+			if (ret != -ENOSPC)
+				goto wait_for_memory;
+
+			/* Adjust try_to_copy according to the amount that was
+			 * actually allocated. The difference is due
+			 * to max sg elements limit
+			 */
+			try_to_copy -= required_size - ctx->sg_encrypted_size;
+			full_record = true;
+		}
+
+		if (full_record || eor) {
+			ret = zerocopy_from_iter(sk, &msg->msg_iter,
+						 try_to_copy);
+			if (ret)
+				goto fallback_to_reg_send;
+
+			copied += try_to_copy;
+			ret = tls_push_record(sk, msg->msg_flags, record_type);
+			if (!ret)
+				continue;
+			if (ret == -EAGAIN)
+				goto send_end;
+
+			copied -= try_to_copy;
+fallback_to_reg_send:
+			iov_iter_revert(&msg->msg_iter,
+					ctx->sg_plaintext_size - orig_size);
+			trim_sg(sk, ctx->sg_plaintext_data,
+				&ctx->sg_plaintext_num_elem,
+				&ctx->sg_plaintext_size,
+				orig_size);
+		}
+
+		required_size = ctx->sg_plaintext_size + try_to_copy;
+alloc_plaintext:
+		ret = alloc_plaintext_sg(sk, required_size);
+		if (ret) {
+			if (ret != -ENOSPC)
+				goto wait_for_memory;
+
+			/* Adjust try_to_copy according to the amount that was
+			 * actually allocated. The difference is due
+			 * to max sg elements limit
+			 */
+			try_to_copy -= required_size - ctx->sg_plaintext_size;
+			full_record = true;
+
+			trim_sg(sk, ctx->sg_encrypted_data,
+				&ctx->sg_encrypted_num_elem,
+				&ctx->sg_encrypted_size,
+				ctx->sg_plaintext_size +
+				tls_ctx->overhead_size);
+		}
+
+		ret = memcopy_from_iter(sk, &msg->msg_iter, try_to_copy);
+		if (ret)
+			goto trim_sgl;
+
+		copied += try_to_copy;
+		if (full_record || eor) {
+push_record:
+			ret = tls_push_record(sk, msg->msg_flags, record_type);
+			if (ret) {
+				if (ret == -ENOMEM)
+					goto wait_for_memory;
+
+				goto send_end;
+			}
+		}
+
+		continue;
+
+wait_for_sndbuf:
+		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+wait_for_memory:
+		ret = sk_stream_wait_memory(sk, &timeo);
+		if (ret) {
+trim_sgl:
+			trim_both_sgl(sk, orig_size);
+			goto send_end;
+		}
+
+		if (tls_is_pending_closed_record(tls_ctx))
+			goto push_record;
+
+		if (ctx->sg_encrypted_size < required_size)
+			goto alloc_encrypted;
+
+		goto alloc_plaintext;
+	}
+
+send_end:
+	ret = sk_stream_error(sk, msg->msg_flags, ret);
+
+	release_sock(sk);
+	return copied ? copied : ret;
+}
+
+int tls_sw_sendpage(struct sock *sk, struct page *page,
+		    int offset, size_t size, int flags)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+	int ret = 0;
+	long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
+	bool eor;
+	size_t orig_size = size;
+	unsigned char record_type = TLS_RECORD_TYPE_DATA;
+	struct scatterlist *sg;
+	bool full_record;
+	int record_room;
+
+	if (flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL |
+		      MSG_SENDPAGE_NOTLAST))
+		return -ENOTSUPP;
+
+	/* No MSG_EOR from splice, only look at MSG_MORE */
+	eor = !(flags & (MSG_MORE | MSG_SENDPAGE_NOTLAST));
+
+	lock_sock(sk);
+
+	sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
+
+	if (tls_complete_pending_work(sk, tls_ctx, flags, &timeo))
+		goto sendpage_end;
+
+	/* Call the sk_stream functions to manage the sndbuf mem. */
+	while (size > 0) {
+		size_t copy, required_size;
+
+		if (sk->sk_err) {
+			ret = sk->sk_err;
+			goto sendpage_end;
+		}
+
+		full_record = false;
+		record_room = TLS_MAX_PAYLOAD_SIZE - ctx->sg_plaintext_size;
+		copy = size;
+		if (copy >= record_room) {
+			copy = record_room;
+			full_record = true;
+		}
+		required_size = ctx->sg_plaintext_size + copy +
+			      tls_ctx->overhead_size;
+
+		if (!sk_stream_memory_free(sk))
+			goto wait_for_sndbuf;
+alloc_payload:
+		ret = alloc_encrypted_sg(sk, required_size);
+		if (ret) {
+			if (ret != -ENOSPC)
+				goto wait_for_memory;
+
+			/* Adjust copy according to the amount that was
+			 * actually allocated. The difference is due
+			 * to max sg elements limit
+			 */
+			copy -= required_size - ctx->sg_plaintext_size;
+			full_record = true;
+		}
+
+		get_page(page);
+		sg = ctx->sg_plaintext_data + ctx->sg_plaintext_num_elem;
+		sg_set_page(sg, page, copy, offset);
+		ctx->sg_plaintext_num_elem++;
+
+		sk_mem_charge(sk, copy);
+		offset += copy;
+		size -= copy;
+		ctx->sg_plaintext_size += copy;
+		tls_ctx->pending_open_record_frags = ctx->sg_plaintext_num_elem;
+
+		if (full_record || eor ||
+		    ctx->sg_plaintext_num_elem ==
+		    ARRAY_SIZE(ctx->sg_plaintext_data)) {
+push_record:
+			ret = tls_push_record(sk, flags, record_type);
+			if (ret) {
+				if (ret == -ENOMEM)
+					goto wait_for_memory;
+
+				goto sendpage_end;
+			}
+		}
+		continue;
+wait_for_sndbuf:
+		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+wait_for_memory:
+		ret = sk_stream_wait_memory(sk, &timeo);
+		if (ret) {
+			trim_both_sgl(sk, ctx->sg_plaintext_size);
+			goto sendpage_end;
+		}
+
+		if (tls_is_pending_closed_record(tls_ctx))
+			goto push_record;
+
+		goto alloc_payload;
+	}
+
+sendpage_end:
+	if (orig_size > size)
+		ret = orig_size - size;
+	else
+		ret = sk_stream_error(sk, flags, ret);
+
+	release_sock(sk);
+	return ret;
+}
+
+void tls_sw_free_resources(struct sock *sk)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+
+	if (ctx->aead_send)
+		crypto_free_aead(ctx->aead_send);
+
+	tls_free_both_sg(sk);
+
+	kfree(ctx);
+}
+
+int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx)
+{
+	char keyval[TLS_CIPHER_AES_GCM_128_KEY_SIZE];
+	struct tls_crypto_info *crypto_info;
+	struct tls12_crypto_info_aes_gcm_128 *gcm_128_info;
+	struct tls_sw_context *sw_ctx;
+	u16 nonce_size, tag_size, iv_size, rec_seq_size;
+	char *iv, *rec_seq;
+	int rc = 0;
+
+	if (!ctx) {
+		rc = -EINVAL;
+		goto out;
+	}
+
+	if (ctx->priv_ctx) {
+		rc = -EEXIST;
+		goto out;
+	}
+
+	sw_ctx = kzalloc(sizeof(*sw_ctx), GFP_KERNEL);
+	if (!sw_ctx) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	ctx->priv_ctx = (struct tls_offload_context *)sw_ctx;
+	ctx->free_resources = tls_sw_free_resources;
+
+	crypto_info = &ctx->crypto_send;
+	switch (crypto_info->cipher_type) {
+	case TLS_CIPHER_AES_GCM_128: {
+		nonce_size = TLS_CIPHER_AES_GCM_128_IV_SIZE;
+		tag_size = TLS_CIPHER_AES_GCM_128_TAG_SIZE;
+		iv_size = TLS_CIPHER_AES_GCM_128_IV_SIZE;
+		iv = ((struct tls12_crypto_info_aes_gcm_128 *)crypto_info)->iv;
+		rec_seq_size = TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE;
+		rec_seq =
+		 ((struct tls12_crypto_info_aes_gcm_128 *)crypto_info)->rec_seq;
+		gcm_128_info =
+			(struct tls12_crypto_info_aes_gcm_128 *)crypto_info;
+		break;
+	}
+	default:
+		rc = -EINVAL;
+		goto out;
+	}
+
+	ctx->prepend_size = TLS_HEADER_SIZE + nonce_size;
+	ctx->tag_size = tag_size;
+	ctx->overhead_size = ctx->prepend_size + ctx->tag_size;
+	ctx->iv_size = iv_size;
+	ctx->iv = kmalloc(iv_size + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
+			  GFP_KERNEL);
+	if (!ctx->iv) {
+		rc = -ENOMEM;
+		goto out;
+	}
+	memcpy(ctx->iv, gcm_128_info->salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE);
+	memcpy(ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv, iv_size);
+	ctx->rec_seq_size = rec_seq_size;
+	ctx->rec_seq = kmalloc(rec_seq_size, GFP_KERNEL);
+	if (!ctx->rec_seq) {
+		rc = -ENOMEM;
+		goto free_iv;
+	}
+	memcpy(ctx->rec_seq, rec_seq, rec_seq_size);
+
+	sg_init_table(sw_ctx->sg_encrypted_data,
+		      ARRAY_SIZE(sw_ctx->sg_encrypted_data));
+	sg_init_table(sw_ctx->sg_plaintext_data,
+		      ARRAY_SIZE(sw_ctx->sg_plaintext_data));
+
+	sg_init_table(sw_ctx->sg_aead_in, 2);
+	sg_set_buf(&sw_ctx->sg_aead_in[0], sw_ctx->aad_space,
+		   sizeof(sw_ctx->aad_space));
+	sg_unmark_end(&sw_ctx->sg_aead_in[1]);
+	sg_chain(sw_ctx->sg_aead_in, 2, sw_ctx->sg_plaintext_data);
+	sg_init_table(sw_ctx->sg_aead_out, 2);
+	sg_set_buf(&sw_ctx->sg_aead_out[0], sw_ctx->aad_space,
+		   sizeof(sw_ctx->aad_space));
+	sg_unmark_end(&sw_ctx->sg_aead_out[1]);
+	sg_chain(sw_ctx->sg_aead_out, 2, sw_ctx->sg_encrypted_data);
+
+	if (!sw_ctx->aead_send) {
+		sw_ctx->aead_send = crypto_alloc_aead("gcm(aes)", 0, 0);
+		if (IS_ERR(sw_ctx->aead_send)) {
+			rc = PTR_ERR(sw_ctx->aead_send);
+			sw_ctx->aead_send = NULL;
+			goto free_rec_seq;
+		}
+	}
+
+	ctx->push_pending_record = tls_sw_push_pending_record;
+
+	memcpy(keyval, gcm_128_info->key, TLS_CIPHER_AES_GCM_128_KEY_SIZE);
+
+	rc = crypto_aead_setkey(sw_ctx->aead_send, keyval,
+				TLS_CIPHER_AES_GCM_128_KEY_SIZE);
+	if (rc)
+		goto free_aead;
+
+	rc = crypto_aead_setauthsize(sw_ctx->aead_send, ctx->tag_size);
+	if (!rc)
+		goto out;
+
+free_aead:
+	crypto_free_aead(sw_ctx->aead_send);
+	sw_ctx->aead_send = NULL;
+free_rec_seq:
+	kfree(ctx->rec_seq);
+	ctx->rec_seq = NULL;
+free_iv:
+	kfree(ctx->iv);
+	ctx->iv = NULL;
+out:
+	return rc;
+}
-- 
cgit v1.2.3


From 83ad357dee467f63574de35752bc40033deab30e Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 14 Jun 2017 22:17:20 +0200
Subject: skbuff: make skb_put_zero() return void

It's nicer to return void, since then there's no need to
cast to any structures. Currently none of the users have
a cast, but a number of future conversions do.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 1151b50892d1..01ea64d0783a 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1904,9 +1904,9 @@ static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len)
 	return tmp;
 }
 
-static inline unsigned char *skb_put_zero(struct sk_buff *skb, unsigned int len)
+static inline void *skb_put_zero(struct sk_buff *skb, unsigned int len)
 {
-	unsigned char *tmp = skb_put(skb, len);
+	void *tmp = skb_put(skb, len);
 
 	memset(tmp, 0, len);
 
-- 
cgit v1.2.3


From a9bc67de0c5713a8675bfe33bfe9cb36c7934589 Mon Sep 17 00:00:00 2001
From: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Date: Wed, 14 Jun 2017 21:04:14 +0200
Subject: regulator: tps65910: wire up sleep control configuration
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This enables configuring the PMIC's sleep mode via device-tree.

A pointer indirection to sleep mode data is removed, as it simplifies
the implementation slightly. In current kernel tree, platform data
structure is not used outside MFD cell drivers.

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 Documentation/devicetree/bindings/mfd/tps65910.txt |  4 ++++
 drivers/mfd/tps65910.c                             | 22 +++++++++++++++-------
 include/linux/mfd/tps65910.h                       |  2 +-
 3 files changed, 20 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/mfd/tps65910.txt b/Documentation/devicetree/bindings/mfd/tps65910.txt
index 38833e63a59f..8af1202b381d 100644
--- a/Documentation/devicetree/bindings/mfd/tps65910.txt
+++ b/Documentation/devicetree/bindings/mfd/tps65910.txt
@@ -61,6 +61,10 @@ Optional properties:
   There should be 9 entries here, one for each gpio.
 - ti,system-power-controller: Telling whether or not this pmic is controlling
   the system power.
+- ti,sleep-enable: Enable SLEEP state.
+- ti,sleep-keep-therm: Keep thermal monitoring on in sleep state.
+- ti,sleep-keep-ck32k: Keep the 32KHz clock output on in sleep state.
+- ti,sleep-keep-hsclk: Keep high speed internal clock on in sleep state.
 
 Regulator Optional properties:
 - ti,regulator-ext-sleep-control: enable external sleep
diff --git a/drivers/mfd/tps65910.c b/drivers/mfd/tps65910.c
index 11cab1582f2f..8263605f6d2f 100644
--- a/drivers/mfd/tps65910.c
+++ b/drivers/mfd/tps65910.c
@@ -328,11 +328,7 @@ static int tps65910_sleepinit(struct tps65910 *tps65910,
 		goto err_sleep_init;
 	}
 
-	/* Return if there is no sleep keepon data. */
-	if (!pmic_pdata->slp_keepon)
-		return 0;
-
-	if (pmic_pdata->slp_keepon->therm_keepon) {
+	if (pmic_pdata->slp_keepon.therm_keepon) {
 		ret = tps65910_reg_set_bits(tps65910,
 				TPS65910_SLEEP_KEEP_RES_ON,
 				SLEEP_KEEP_RES_ON_THERM_KEEPON_MASK);
@@ -342,7 +338,7 @@ static int tps65910_sleepinit(struct tps65910 *tps65910,
 		}
 	}
 
-	if (pmic_pdata->slp_keepon->clkout32k_keepon) {
+	if (pmic_pdata->slp_keepon.clkout32k_keepon) {
 		ret = tps65910_reg_set_bits(tps65910,
 				TPS65910_SLEEP_KEEP_RES_ON,
 				SLEEP_KEEP_RES_ON_CLKOUT32K_KEEPON_MASK);
@@ -352,7 +348,7 @@ static int tps65910_sleepinit(struct tps65910 *tps65910,
 		}
 	}
 
-	if (pmic_pdata->slp_keepon->i2chs_keepon) {
+	if (pmic_pdata->slp_keepon.i2chs_keepon) {
 		ret = tps65910_reg_set_bits(tps65910,
 				TPS65910_SLEEP_KEEP_RES_ON,
 				SLEEP_KEEP_RES_ON_I2CHS_KEEPON_MASK);
@@ -415,6 +411,18 @@ static struct tps65910_board *tps65910_parse_dt(struct i2c_client *client,
 	prop = of_property_read_bool(np, "ti,en-ck32k-xtal");
 	board_info->en_ck32k_xtal = prop;
 
+	prop = of_property_read_bool(np, "ti,sleep-enable");
+	board_info->en_dev_slp = prop;
+
+	prop = of_property_read_bool(np, "ti,sleep-keep-therm");
+	board_info->slp_keepon.therm_keepon = prop;
+
+	prop = of_property_read_bool(np, "ti,sleep-keep-ck32k");
+	board_info->slp_keepon.clkout32k_keepon = prop;
+
+	prop = of_property_read_bool(np, "ti,sleep-keep-hsclk");
+	board_info->slp_keepon.i2chs_keepon = prop;
+
 	board_info->irq = client->irq;
 	board_info->irq_base = -1;
 	board_info->pm_off = of_property_read_bool(np,
diff --git a/include/linux/mfd/tps65910.h b/include/linux/mfd/tps65910.h
index ffb21e79204d..deffdcd0236f 100644
--- a/include/linux/mfd/tps65910.h
+++ b/include/linux/mfd/tps65910.h
@@ -879,7 +879,7 @@ struct tps65910_board {
 	bool en_ck32k_xtal;
 	bool en_dev_slp;
 	bool pm_off;
-	struct tps65910_sleep_keepon_data *slp_keepon;
+	struct tps65910_sleep_keepon_data slp_keepon;
 	bool en_gpio_sleep[TPS6591X_MAX_NUM_GPIO];
 	unsigned long regulator_ext_sleep_control[TPS65910_NUM_REGS];
 	struct regulator_init_data *tps65910_pmic_init_data[TPS65910_NUM_REGS];
-- 
cgit v1.2.3


From bd10838af2d918994a27c702e9910fb71bb9c304 Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Sun, 28 May 2017 15:24:17 +0300
Subject: net/mlx5: Fix some spelling mistakes

Fixed few places where endianness was misspelled and
one spot whwere output was:

CHECK: 'endianess' may be misspelled - perhaps 'endianness'?
CHECK: 'ouput' may be misspelled - perhaps 'output'?

Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/infiniband/hw/mlx5/main.c              |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c  |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 10 +++++-----
 include/linux/mlx5/mlx5_ifc.h                  |  4 ++--
 4 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 852a6a75db98..2ab505d1e8e3 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -439,7 +439,7 @@ static void get_atomic_caps(struct mlx5_ib_dev *dev,
 	u8 atomic_operations = MLX5_CAP_ATOMIC(dev->mdev, atomic_operations);
 	u8 atomic_size_qp = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp);
 	u8 atomic_req_8B_endianness_mode =
-		MLX5_CAP_ATOMIC(dev->mdev, atomic_req_8B_endianess_mode);
+		MLX5_CAP_ATOMIC(dev->mdev, atomic_req_8B_endianness_mode);
 
 	/* Check if HW supports 8 bytes standard atomic operations and capable
 	 * of host endianness respond
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 10d282841f5b..46efaab9da46 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -874,7 +874,7 @@ static const char *deliv_status_to_str(u8 status)
 	case MLX5_CMD_DELIVERY_STAT_IN_LENGTH_ERR:
 		return "command input length error";
 	case MLX5_CMD_DELIVERY_STAT_OUT_LENGTH_ERR:
-		return "command ouput length error";
+		return "command output length error";
 	case MLX5_CMD_DELIVERY_STAT_RES_FLD_NOT_CLR_ERR:
 		return "reserved fields not cleared";
 	case MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index dc890944c4ea..3319968ed789 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -356,7 +356,7 @@ static void mlx5_disable_msix(struct mlx5_core_dev *dev)
 	kfree(priv->msix_arr);
 }
 
-struct mlx5_reg_host_endianess {
+struct mlx5_reg_host_endianness {
 	u8	he;
 	u8      rsvd[15];
 };
@@ -475,7 +475,7 @@ static int handle_hca_cap_atomic(struct mlx5_core_dev *dev)
 
 	req_endianness =
 		MLX5_CAP_ATOMIC(dev,
-				supported_atomic_req_8B_endianess_mode_1);
+				supported_atomic_req_8B_endianness_mode_1);
 
 	if (req_endianness != MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS)
 		return 0;
@@ -487,7 +487,7 @@ static int handle_hca_cap_atomic(struct mlx5_core_dev *dev)
 	set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability);
 
 	/* Set requestor to host endianness */
-	MLX5_SET(atomic_caps, set_hca_cap, atomic_req_8B_endianess_mode,
+	MLX5_SET(atomic_caps, set_hca_cap, atomic_req_8B_endianness_mode,
 		 MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS);
 
 	err = set_caps(dev, set_ctx, set_sz, MLX5_SET_HCA_CAP_OP_MOD_ATOMIC);
@@ -562,8 +562,8 @@ query_ex:
 
 static int set_hca_ctrl(struct mlx5_core_dev *dev)
 {
-	struct mlx5_reg_host_endianess he_in;
-	struct mlx5_reg_host_endianess he_out;
+	struct mlx5_reg_host_endianness he_in;
+	struct mlx5_reg_host_endianness he_out;
 	int err;
 
 	if (!mlx5_core_is_pf(dev))
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 32b044e953d2..1fd144662491 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -661,9 +661,9 @@ enum {
 struct mlx5_ifc_atomic_caps_bits {
 	u8         reserved_at_0[0x40];
 
-	u8         atomic_req_8B_endianess_mode[0x2];
+	u8         atomic_req_8B_endianness_mode[0x2];
 	u8         reserved_at_42[0x4];
-	u8         supported_atomic_req_8B_endianess_mode_1[0x1];
+	u8         supported_atomic_req_8B_endianness_mode_1[0x1];
 
 	u8         reserved_at_47[0x19];
 
-- 
cgit v1.2.3


From 432609a4cdfb1c3e3a58e6e37b3501e42bfc50ab Mon Sep 17 00:00:00 2001
From: Gal Pressman <galp@mellanox.com>
Date: Wed, 14 Jun 2017 11:52:33 +0300
Subject: net/mlx5e: Move and optimize query out of buffer function

Move "query queue counter out of buffer" helper function out of
qp.c to en_main.c, since mlx5e netdev driver is the only one to use it.

Also allocate the output buffer on the stack instead of the heap, to reduce
number of heap allocs on update_stats work.

Signed-off-by: Gal Pressman <galp@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Cc: kernel-team@fb.com
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c |  9 +++++++--
 drivers/net/ethernet/mellanox/mlx5/core/qp.c      | 20 --------------------
 include/linux/mlx5/qp.h                           |  2 --
 3 files changed, 7 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 50184021624e..8bb0241df069 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -289,12 +289,17 @@ static void mlx5e_update_pport_counters(struct mlx5e_priv *priv)
 static void mlx5e_update_q_counter(struct mlx5e_priv *priv)
 {
 	struct mlx5e_qcounter_stats *qcnt = &priv->stats.qcnt;
+	u32 out[MLX5_ST_SZ_DW(query_q_counter_out)];
+	int err;
 
 	if (!priv->q_counter)
 		return;
 
-	mlx5_core_query_out_of_buffer(priv->mdev, priv->q_counter,
-				      &qcnt->rx_out_of_buffer);
+	err = mlx5_core_query_q_counter(priv->mdev, priv->q_counter, 0, out, sizeof(out));
+	if (err)
+		return;
+
+	qcnt->rx_out_of_buffer = MLX5_GET(query_q_counter_out, out, out_of_buffer);
 }
 
 static void mlx5e_update_pcie_counters(struct mlx5e_priv *priv)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
index da0f18f93616..340f281c9801 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
@@ -518,23 +518,3 @@ int mlx5_core_query_q_counter(struct mlx5_core_dev *dev, u16 counter_id,
 	return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size);
 }
 EXPORT_SYMBOL_GPL(mlx5_core_query_q_counter);
-
-int mlx5_core_query_out_of_buffer(struct mlx5_core_dev *dev, u16 counter_id,
-				  u32 *out_of_buffer)
-{
-	int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out);
-	void *out;
-	int err;
-
-	out = kvzalloc(outlen, GFP_KERNEL);
-	if (!out)
-		return -ENOMEM;
-
-	err = mlx5_core_query_q_counter(dev, counter_id, 0, out, outlen);
-	if (!err)
-		*out_of_buffer = MLX5_GET(query_q_counter_out, out,
-					  out_of_buffer);
-
-	kfree(out);
-	return err;
-}
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index bef80d0a0e30..1f637f4d1265 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -569,8 +569,6 @@ int mlx5_core_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id);
 int mlx5_core_dealloc_q_counter(struct mlx5_core_dev *dev, u16 counter_id);
 int mlx5_core_query_q_counter(struct mlx5_core_dev *dev, u16 counter_id,
 			      int reset, void *out, int out_size);
-int mlx5_core_query_out_of_buffer(struct mlx5_core_dev *dev, u16 counter_id,
-				  u32 *out_of_buffer);
 
 static inline const char *mlx5_qp_type_str(int type)
 {
-- 
cgit v1.2.3


From 4525abeaae54560254a1bb8970b3d4c225d32ef4 Mon Sep 17 00:00:00 2001
From: Majd Dibbiny <majd@mellanox.com>
Date: Thu, 9 Feb 2017 13:20:46 +0200
Subject: net/mlx5: Expose command polling interface

Add a new interface for commands execution that allows the
caller to wait for the command's completion in a busy-wait
loop (polling mode).

This is useful if we want to execute a command in a polling mode
while the driver is working in events mode for the rest of
the commands.
This interface will be used in the downstream patches.

Signed-off-by: Majd Dibbiny <majd@mellanox.com>
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 30 ++++++++++++++++++++-------
 include/linux/mlx5/driver.h                   |  3 +++
 2 files changed, 26 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index c1d8b0bcde75..4d5bd01f1ebb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -785,6 +785,8 @@ static void cmd_work_handler(struct work_struct *work)
 	struct mlx5_cmd_layout *lay;
 	struct semaphore *sem;
 	unsigned long flags;
+	bool poll_cmd = ent->polling;
+
 
 	sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem;
 	down(sem);
@@ -845,7 +847,7 @@ static void cmd_work_handler(struct work_struct *work)
 	iowrite32be(1 << ent->idx, &dev->iseg->cmd_dbell);
 	mmiowb();
 	/* if not in polling don't use ent after this point */
-	if (cmd->mode == CMD_MODE_POLLING) {
+	if (cmd->mode == CMD_MODE_POLLING || poll_cmd) {
 		poll_timeout(ent);
 		/* make sure we read the descriptor after ownership is SW */
 		rmb();
@@ -889,7 +891,7 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
 	struct mlx5_cmd *cmd = &dev->cmd;
 	int err;
 
-	if (cmd->mode == CMD_MODE_POLLING) {
+	if (cmd->mode == CMD_MODE_POLLING || ent->polling) {
 		wait_for_completion(&ent->done);
 	} else if (!wait_for_completion_timeout(&ent->done, timeout)) {
 		ent->ret = -ETIMEDOUT;
@@ -917,7 +919,7 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
 			   struct mlx5_cmd_msg *out, void *uout, int uout_size,
 			   mlx5_cmd_cbk_t callback,
 			   void *context, int page_queue, u8 *status,
-			   u8 token)
+			   u8 token, bool force_polling)
 {
 	struct mlx5_cmd *cmd = &dev->cmd;
 	struct mlx5_cmd_work_ent *ent;
@@ -935,6 +937,7 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
 		return PTR_ERR(ent);
 
 	ent->token = token;
+	ent->polling = force_polling;
 
 	if (!callback)
 		init_completion(&ent->done);
@@ -1535,7 +1538,8 @@ static int is_manage_pages(void *in)
 }
 
 static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
-		    int out_size, mlx5_cmd_cbk_t callback, void *context)
+		    int out_size, mlx5_cmd_cbk_t callback, void *context,
+		    bool force_polling)
 {
 	struct mlx5_cmd_msg *inb;
 	struct mlx5_cmd_msg *outb;
@@ -1580,7 +1584,7 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
 	}
 
 	err = mlx5_cmd_invoke(dev, inb, outb, out, out_size, callback, context,
-			      pages_queue, &status, token);
+			      pages_queue, &status, token, force_polling);
 	if (err)
 		goto out_out;
 
@@ -1608,7 +1612,7 @@ int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
 {
 	int err;
 
-	err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL);
+	err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, false);
 	return err ? : mlx5_cmd_check(dev, in, out);
 }
 EXPORT_SYMBOL(mlx5_cmd_exec);
@@ -1617,10 +1621,22 @@ int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size,
 		     void *out, int out_size, mlx5_cmd_cbk_t callback,
 		     void *context)
 {
-	return cmd_exec(dev, in, in_size, out, out_size, callback, context);
+	return cmd_exec(dev, in, in_size, out, out_size, callback, context,
+			false);
 }
 EXPORT_SYMBOL(mlx5_cmd_exec_cb);
 
+int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size,
+			  void *out, int out_size)
+{
+	int err;
+
+	err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, true);
+
+	return err ? : mlx5_cmd_check(dev, in, out);
+}
+EXPORT_SYMBOL(mlx5_cmd_exec_polling);
+
 static void destroy_msg_cache(struct mlx5_core_dev *dev)
 {
 	struct cmd_msg_cache *ch;
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 6ea2f5734e37..bf15e87da8fa 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -817,6 +817,7 @@ struct mlx5_cmd_work_ent {
 	u64			ts1;
 	u64			ts2;
 	u16			op;
+	bool			polling;
 };
 
 struct mlx5_pas {
@@ -915,6 +916,8 @@ int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
 int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size,
 		     void *out, int out_size, mlx5_cmd_cbk_t callback,
 		     void *context);
+int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size,
+			  void *out, int out_size);
 void mlx5_cmd_mbox_status(void *out, u8 *status, u32 *syndrome);
 
 int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type);
-- 
cgit v1.2.3


From 8812c24d28f4972c4f2b9998bf30b1f2a1b62adf Mon Sep 17 00:00:00 2001
From: Majd Dibbiny <majd@mellanox.com>
Date: Thu, 9 Feb 2017 14:20:12 +0200
Subject: net/mlx5: Add fast unload support in shutdown flow

Adding a support to flush all HW resources with one FW command and
skip all the heavy unload flows of the driver on kernel shutdown.
There's no need to free all the SW context since a new fresh kernel
will be loaded afterwards.

Regarding the FW resources, they should be closed, otherwise we will
have leakage in the FW. To accelerate this flow, we execute one command
in the beginning that tells the FW that the driver isn't going to close
any of the FW resources and asks the FW to clean up everything.
Once the commands complete, it's safe to close the PCI resources and
finish the routine.

Signed-off-by: Majd Dibbiny <majd@mellanox.com>
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fw.c       | 28 +++++++++++++++++++
 drivers/net/ethernet/mellanox/mlx5/core/health.c   |  4 +--
 drivers/net/ethernet/mellanox/mlx5/core/main.c     | 32 ++++++++++++++++++++--
 .../net/ethernet/mellanox/mlx5/core/mlx5_core.h    |  3 +-
 include/linux/mlx5/mlx5_ifc.h                      | 14 ++++++++--
 5 files changed, 73 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index 1bc14d0fded8..e9489e8d08bb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -195,3 +195,31 @@ int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev)
 	MLX5_SET(teardown_hca_in, in, opcode, MLX5_CMD_OP_TEARDOWN_HCA);
 	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
+
+int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev)
+{
+	u32 out[MLX5_ST_SZ_DW(teardown_hca_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(teardown_hca_in)] = {0};
+	int force_state;
+	int ret;
+
+	if (!MLX5_CAP_GEN(dev, force_teardown)) {
+		mlx5_core_dbg(dev, "force teardown is not supported in the firmware\n");
+		return -EOPNOTSUPP;
+	}
+
+	MLX5_SET(teardown_hca_in, in, opcode, MLX5_CMD_OP_TEARDOWN_HCA);
+	MLX5_SET(teardown_hca_in, in, profile, MLX5_TEARDOWN_HCA_IN_PROFILE_FORCE_CLOSE);
+
+	ret = mlx5_cmd_exec_polling(dev, in, sizeof(in), out, sizeof(out));
+	if (ret)
+		return ret;
+
+	force_state = MLX5_GET(teardown_hca_out, out, force_state);
+	if (force_state == MLX5_TEARDOWN_HCA_OUT_FORCE_STATE_FAIL) {
+		mlx5_core_err(dev, "teardown with force mode failed\n");
+		return -EIO;
+	}
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index c6679b21884e..0648a659b21d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -111,14 +111,14 @@ static int in_fatal(struct mlx5_core_dev *dev)
 	return 0;
 }
 
-void mlx5_enter_error_state(struct mlx5_core_dev *dev)
+void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force)
 {
 	mutex_lock(&dev->intf_state_mutex);
 	if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
 		goto unlock;
 
 	mlx5_core_err(dev, "start\n");
-	if (pci_channel_offline(dev->pdev) || in_fatal(dev)) {
+	if (pci_channel_offline(dev->pdev) || in_fatal(dev) || force) {
 		dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
 		trigger_cmd_completions(dev);
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 39e7e523a0dd..715eeab59999 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1418,7 +1418,7 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev,
 
 	dev_info(&pdev->dev, "%s was called\n", __func__);
 
-	mlx5_enter_error_state(dev);
+	mlx5_enter_error_state(dev, false);
 	mlx5_unload_one(dev, priv, false);
 	/* In case of kernel call drain the health wq */
 	if (state) {
@@ -1505,15 +1505,43 @@ static const struct pci_error_handlers mlx5_err_handler = {
 	.resume		= mlx5_pci_resume
 };
 
+static int mlx5_try_fast_unload(struct mlx5_core_dev *dev)
+{
+	int ret;
+
+	if (!MLX5_CAP_GEN(dev, force_teardown)) {
+		mlx5_core_dbg(dev, "force teardown is not supported in the firmware\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
+		mlx5_core_dbg(dev, "Device in internal error state, giving up\n");
+		return -EAGAIN;
+	}
+
+	ret = mlx5_cmd_force_teardown_hca(dev);
+	if (ret) {
+		mlx5_core_dbg(dev, "Firmware couldn't do fast unload error: %d\n", ret);
+		return ret;
+	}
+
+	mlx5_enter_error_state(dev, true);
+
+	return 0;
+}
+
 static void shutdown(struct pci_dev *pdev)
 {
 	struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
 	struct mlx5_priv *priv = &dev->priv;
+	int err;
 
 	dev_info(&pdev->dev, "Shutdown was called\n");
 	/* Notify mlx5 clients that the kernel is being shut down */
 	set_bit(MLX5_INTERFACE_STATE_SHUTDOWN, &dev->intf_state);
-	mlx5_unload_one(dev, priv, false);
+	err = mlx5_try_fast_unload(dev);
+	if (err)
+		mlx5_unload_one(dev, priv, false);
 	mlx5_pci_disable_device(dev);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index 1fd279c0338d..5ccdf43e58a6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -83,12 +83,13 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev);
 int mlx5_query_board_id(struct mlx5_core_dev *dev);
 int mlx5_cmd_init_hca(struct mlx5_core_dev *dev);
 int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev);
+int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev);
 void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
 		     unsigned long param);
 void mlx5_core_page_fault(struct mlx5_core_dev *dev,
 			  struct mlx5_pagefault *pfault);
 void mlx5_port_module_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe);
-void mlx5_enter_error_state(struct mlx5_core_dev *dev);
+void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force);
 void mlx5_disable_device(struct mlx5_core_dev *dev);
 void mlx5_recover_device(struct mlx5_core_dev *dev);
 int mlx5_sriov_init(struct mlx5_core_dev *dev);
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 1fd144662491..e86ef880a149 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -801,7 +801,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         max_indirection[0x8];
 	u8         fixed_buffer_size[0x1];
 	u8         log_max_mrw_sz[0x7];
-	u8         reserved_at_110[0x2];
+	u8         force_teardown[0x1];
+	u8         reserved_at_111[0x1];
 	u8         log_max_bsf_list_size[0x6];
 	u8         umr_extended_translation_offset[0x1];
 	u8         null_mkey[0x1];
@@ -3094,18 +3095,25 @@ struct mlx5_ifc_tsar_element_bits {
 	u8         reserved_at_10[0x10];
 };
 
+enum {
+	MLX5_TEARDOWN_HCA_OUT_FORCE_STATE_SUCCESS = 0x0,
+	MLX5_TEARDOWN_HCA_OUT_FORCE_STATE_FAIL = 0x1,
+};
+
 struct mlx5_ifc_teardown_hca_out_bits {
 	u8         status[0x8];
 	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_at_40[0x40];
+	u8         reserved_at_40[0x3f];
+
+	u8         force_state[0x1];
 };
 
 enum {
 	MLX5_TEARDOWN_HCA_IN_PROFILE_GRACEFUL_CLOSE  = 0x0,
-	MLX5_TEARDOWN_HCA_IN_PROFILE_PANIC_CLOSE     = 0x1,
+	MLX5_TEARDOWN_HCA_IN_PROFILE_FORCE_CLOSE     = 0x1,
 };
 
 struct mlx5_ifc_teardown_hca_in_bits {
-- 
cgit v1.2.3


From c12c48ce869d72029d70666f615cbd8f67fc14e9 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Sun, 4 Jun 2017 10:59:15 +0900
Subject: libnvdimm, label: add v1.2 interleave-set-cookie algorithm

The interleave-set-cookie algorithm is extended to incorporate all the
same components that are used to generate an nvdimm unique-id. For
backwards compatibility we still maintain the old v1.1 definition.

Reported-by: Nicholas Moulin <nicholas.w.moulin@intel.com>
Reported-by: Kaushik Kanetkar <kaushik.a.kanetkar@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/acpi/nfit/core.c        | 53 +++++++++++++++++++++++++++++++++++++++--
 drivers/nvdimm/label.c          |  3 ++-
 drivers/nvdimm/namespace_devs.c |  9 +++++--
 drivers/nvdimm/nd.h             |  3 ++-
 drivers/nvdimm/region_devs.c    | 43 +++++++++++++++++++++++++++++----
 include/linux/libnvdimm.h       |  5 +++-
 6 files changed, 104 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index 097eff0b963d..e744ab38eaf9 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -1663,12 +1663,29 @@ struct nfit_set_info {
 	} mapping[0];
 };
 
+struct nfit_set_info2 {
+	struct nfit_set_info_map2 {
+		u64 region_offset;
+		u32 serial_number;
+		u16 vendor_id;
+		u16 manufacturing_date;
+		u8  manufacturing_location;
+		u8  reserved[31];
+	} mapping[0];
+};
+
 static size_t sizeof_nfit_set_info(int num_mappings)
 {
 	return sizeof(struct nfit_set_info)
 		+ num_mappings * sizeof(struct nfit_set_info_map);
 }
 
+static size_t sizeof_nfit_set_info2(int num_mappings)
+{
+	return sizeof(struct nfit_set_info2)
+		+ num_mappings * sizeof(struct nfit_set_info_map2);
+}
+
 static int cmp_map_compat(const void *m0, const void *m1)
 {
 	const struct nfit_set_info_map *map0 = m0;
@@ -1690,6 +1707,18 @@ static int cmp_map(const void *m0, const void *m1)
 	return 0;
 }
 
+static int cmp_map2(const void *m0, const void *m1)
+{
+	const struct nfit_set_info_map2 *map0 = m0;
+	const struct nfit_set_info_map2 *map1 = m1;
+
+	if (map0->region_offset < map1->region_offset)
+		return -1;
+	else if (map0->region_offset > map1->region_offset)
+		return 1;
+	return 0;
+}
+
 /* Retrieve the nth entry referencing this spa */
 static struct acpi_nfit_memory_map *memdev_from_spa(
 		struct acpi_nfit_desc *acpi_desc, u16 range_index, int n)
@@ -1711,6 +1740,7 @@ static int acpi_nfit_init_interleave_set(struct acpi_nfit_desc *acpi_desc,
 	struct device *dev = acpi_desc->dev;
 	struct nd_interleave_set *nd_set;
 	u16 nr = ndr_desc->num_mappings;
+	struct nfit_set_info2 *info2;
 	struct nfit_set_info *info;
 
 	if (spa_type == NFIT_SPA_PM || spa_type == NFIT_SPA_VOLATILE)
@@ -1725,9 +1755,15 @@ static int acpi_nfit_init_interleave_set(struct acpi_nfit_desc *acpi_desc,
 	info = devm_kzalloc(dev, sizeof_nfit_set_info(nr), GFP_KERNEL);
 	if (!info)
 		return -ENOMEM;
+
+	info2 = devm_kzalloc(dev, sizeof_nfit_set_info2(nr), GFP_KERNEL);
+	if (!info2)
+		return -ENOMEM;
+
 	for (i = 0; i < nr; i++) {
 		struct nd_mapping_desc *mapping = &ndr_desc->mapping[i];
 		struct nfit_set_info_map *map = &info->mapping[i];
+		struct nfit_set_info_map2 *map2 = &info2->mapping[i];
 		struct nvdimm *nvdimm = mapping->nvdimm;
 		struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
 		struct acpi_nfit_memory_map *memdev = memdev_from_spa(acpi_desc,
@@ -1740,19 +1776,32 @@ static int acpi_nfit_init_interleave_set(struct acpi_nfit_desc *acpi_desc,
 
 		map->region_offset = memdev->region_offset;
 		map->serial_number = nfit_mem->dcr->serial_number;
+
+		map2->region_offset = memdev->region_offset;
+		map2->serial_number = nfit_mem->dcr->serial_number;
+		map2->vendor_id = nfit_mem->dcr->vendor_id;
+		map2->manufacturing_date = nfit_mem->dcr->manufacturing_date;
+		map2->manufacturing_location = nfit_mem->dcr->manufacturing_location;
 	}
 
+	/* v1.1 namespaces */
 	sort(&info->mapping[0], nr, sizeof(struct nfit_set_info_map),
 			cmp_map, NULL);
-	nd_set->cookie = nd_fletcher64(info, sizeof_nfit_set_info(nr), 0);
+	nd_set->cookie1 = nd_fletcher64(info, sizeof_nfit_set_info(nr), 0);
+
+	/* v1.2 namespaces */
+	sort(&info2->mapping[0], nr, sizeof(struct nfit_set_info_map2),
+			cmp_map2, NULL);
+	nd_set->cookie2 = nd_fletcher64(info2, sizeof_nfit_set_info2(nr), 0);
 
-	/* support namespaces created with the wrong sort order */
+	/* support v1.1 namespaces created with the wrong sort order */
 	sort(&info->mapping[0], nr, sizeof(struct nfit_set_info_map),
 			cmp_map_compat, NULL);
 	nd_set->altcookie = nd_fletcher64(info, sizeof_nfit_set_info(nr), 0);
 
 	ndr_desc->nd_set = nd_set;
 	devm_kfree(dev, info);
+	devm_kfree(dev, info2);
 
 	return 0;
 }
diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c
index d6233d220bfd..1aacd4866c76 100644
--- a/drivers/nvdimm/label.c
+++ b/drivers/nvdimm/label.c
@@ -553,7 +553,6 @@ static int __pmem_label_update(struct nd_region *nd_region,
 		struct nd_mapping *nd_mapping, struct nd_namespace_pmem *nspm,
 		int pos)
 {
-	u64 cookie = nd_region_interleave_set_cookie(nd_region);
 	struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
 	struct nd_label_ent *label_ent, *victim = NULL;
 	struct nd_namespace_label *nd_label;
@@ -563,11 +562,13 @@ static int __pmem_label_update(struct nd_region *nd_region,
 	unsigned long *free;
 	u32 nslot, slot;
 	size_t offset;
+	u64 cookie;
 	int rc;
 
 	if (!preamble_next(ndd, &nsindex, &free, &nslot))
 		return -ENXIO;
 
+	cookie = nd_region_interleave_set_cookie(nd_region, nsindex);
 	nd_label_gen_id(&label_id, nspm->uuid, 0);
 	for_each_dpa_resource(ndd, res)
 		if (strcmp(res->name, label_id.id) == 0)
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index 2f9dfbd2dbec..51f304fe8a52 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -1698,10 +1698,11 @@ static int select_pmem_id(struct nd_region *nd_region, u8 *pmem_id)
  * @nd_label: target pmem namespace label to evaluate
  */
 struct device *create_namespace_pmem(struct nd_region *nd_region,
+		struct nd_namespace_index *nsindex,
 		struct nd_namespace_label *nd_label)
 {
+	u64 cookie = nd_region_interleave_set_cookie(nd_region, nsindex);
 	u64 altcookie = nd_region_interleave_set_altcookie(nd_region);
-	u64 cookie = nd_region_interleave_set_cookie(nd_region);
 	struct nd_label_ent *label_ent;
 	struct nd_namespace_pmem *nspm;
 	struct nd_mapping *nd_mapping;
@@ -2108,7 +2109,11 @@ static struct device **scan_labels(struct nd_region *nd_region)
 				goto err;
 			devs[count++] = dev;
 		} else {
-			dev = create_namespace_pmem(nd_region, nd_label);
+			struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+			struct nd_namespace_index *nsindex;
+
+			nsindex = to_namespace_index(ndd, ndd->ns_current);
+			dev = create_namespace_pmem(nd_region, nsindex, nd_label);
 			if (IS_ERR(dev)) {
 				switch (PTR_ERR(dev)) {
 				case -EAGAIN:
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index 28d9f4481547..ad4e518940c9 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -336,7 +336,8 @@ static inline struct device *nd_dax_create(struct nd_region *nd_region)
 struct nd_region *to_nd_region(struct device *dev);
 int nd_region_to_nstype(struct nd_region *nd_region);
 int nd_region_register_namespaces(struct nd_region *nd_region, int *err);
-u64 nd_region_interleave_set_cookie(struct nd_region *nd_region);
+u64 nd_region_interleave_set_cookie(struct nd_region *nd_region,
+		struct nd_namespace_index *nsindex);
 u64 nd_region_interleave_set_altcookie(struct nd_region *nd_region);
 void nvdimm_bus_lock(struct device *dev);
 void nvdimm_bus_unlock(struct device *dev);
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index b550edf2571f..282b8991ea83 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -307,13 +307,41 @@ static ssize_t set_cookie_show(struct device *dev,
 {
 	struct nd_region *nd_region = to_nd_region(dev);
 	struct nd_interleave_set *nd_set = nd_region->nd_set;
+	ssize_t rc = 0;
 
 	if (is_nd_pmem(dev) && nd_set)
 		/* pass, should be precluded by region_visible */;
 	else
 		return -ENXIO;
 
-	return sprintf(buf, "%#llx\n", nd_set->cookie);
+	/*
+	 * The cookie to show depends on which specification of the
+	 * labels we are using. If there are not labels then default to
+	 * the v1.1 namespace label cookie definition. To read all this
+	 * data we need to wait for probing to settle.
+	 */
+	device_lock(dev);
+	nvdimm_bus_lock(dev);
+	wait_nvdimm_bus_probe_idle(dev);
+	if (nd_region->ndr_mappings) {
+		struct nd_mapping *nd_mapping = &nd_region->mapping[0];
+		struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+
+		if (ndd) {
+			struct nd_namespace_index *nsindex;
+
+			nsindex = to_namespace_index(ndd, ndd->ns_current);
+			rc = sprintf(buf, "%#llx\n",
+					nd_region_interleave_set_cookie(nd_region,
+						nsindex));
+		}
+	}
+	nvdimm_bus_unlock(dev);
+	device_unlock(dev);
+
+	if (rc)
+		return rc;
+	return sprintf(buf, "%#llx\n", nd_set->cookie1);
 }
 static DEVICE_ATTR_RO(set_cookie);
 
@@ -564,13 +592,18 @@ struct attribute_group nd_region_attribute_group = {
 };
 EXPORT_SYMBOL_GPL(nd_region_attribute_group);
 
-u64 nd_region_interleave_set_cookie(struct nd_region *nd_region)
+u64 nd_region_interleave_set_cookie(struct nd_region *nd_region,
+		struct nd_namespace_index *nsindex)
 {
 	struct nd_interleave_set *nd_set = nd_region->nd_set;
 
-	if (nd_set)
-		return nd_set->cookie;
-	return 0;
+	if (!nd_set)
+		return 0;
+
+	if (nsindex && __le16_to_cpu(nsindex->major) == 1
+			&& __le16_to_cpu(nsindex->minor) == 1)
+		return nd_set->cookie1;
+	return nd_set->cookie2;
 }
 
 u64 nd_region_interleave_set_altcookie(struct nd_region *nd_region)
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index 6c807017128d..722cdf21429f 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -71,7 +71,10 @@ struct nd_cmd_desc {
 };
 
 struct nd_interleave_set {
-	u64 cookie;
+	/* v1.1 definition of the interleave-set-cookie algorithm */
+	u64 cookie1;
+	/* v1.2 definition of the interleave-set-cookie algorithm */
+	u64 cookie2;
 	/* compatibility with initial buggy Linux implementation */
 	u64 altcookie;
 };
-- 
cgit v1.2.3


From f979b13c3cc51584882bffa32965f34e5afa3b9b Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Sun, 4 Jun 2017 12:12:07 +0900
Subject: libnvdimm, label: honor the lba size specified in v1.2 labels

Previously we only honored the lba size for blk-aperture mode
namespaces. For pmem namespaces the lba size was just assumed to be 512.
With the new v1.2 label definition and compatibility with other
operating environments, the ->lbasize property is now respected for pmem
namespaces.

Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/namespace_devs.c | 65 +++++++++++++++++++++++++++++++++--------
 drivers/nvdimm/nd.h             |  1 +
 drivers/nvdimm/pmem.c           |  1 +
 include/linux/nd.h              |  2 ++
 4 files changed, 57 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index 51f304fe8a52..e034b003a5e2 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -163,6 +163,29 @@ bool pmem_should_map_pages(struct device *dev)
 }
 EXPORT_SYMBOL(pmem_should_map_pages);
 
+unsigned int pmem_sector_size(struct nd_namespace_common *ndns)
+{
+	if (is_namespace_pmem(&ndns->dev)) {
+		struct nd_namespace_pmem *nspm;
+
+		nspm = to_nd_namespace_pmem(&ndns->dev);
+		if (nspm->lbasize == 0 || nspm->lbasize == 512)
+			/* default */;
+		else if (nspm->lbasize == 4096)
+			return 4096;
+		else
+			dev_WARN(&ndns->dev, "unsupported sector size: %ld\n",
+					nspm->lbasize);
+	}
+
+	/*
+	 * There is no namespace label (is_namespace_io()), or the label
+	 * indicates the default sector size.
+	 */
+	return 512;
+}
+EXPORT_SYMBOL(pmem_sector_size);
+
 const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns,
 		char *name)
 {
@@ -1283,28 +1306,49 @@ static ssize_t resource_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(resource);
 
-static const unsigned long ns_lbasize_supported[] = { 512, 520, 528,
+static const unsigned long blk_lbasize_supported[] = { 512, 520, 528,
 	4096, 4104, 4160, 4224, 0 };
 
+static const unsigned long pmem_lbasize_supported[] = { 512, 4096, 0 };
+
 static ssize_t sector_size_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
-	struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
+	if (is_namespace_blk(dev)) {
+		struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
 
-	if (!is_namespace_blk(dev))
-		return -ENXIO;
+		return nd_sector_size_show(nsblk->lbasize,
+				blk_lbasize_supported, buf);
+	}
 
-	return nd_sector_size_show(nsblk->lbasize, ns_lbasize_supported, buf);
+	if (is_namespace_pmem(dev)) {
+		struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+
+		return nd_sector_size_show(nspm->lbasize,
+				pmem_lbasize_supported, buf);
+	}
+	return -ENXIO;
 }
 
 static ssize_t sector_size_store(struct device *dev,
 		struct device_attribute *attr, const char *buf, size_t len)
 {
-	struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
 	struct nd_region *nd_region = to_nd_region(dev->parent);
+	const unsigned long *supported;
+	unsigned long *lbasize;
 	ssize_t rc = 0;
 
-	if (!is_namespace_blk(dev))
+	if (is_namespace_blk(dev)) {
+		struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
+
+		lbasize = &nsblk->lbasize;
+		supported = blk_lbasize_supported;
+	} else if (is_namespace_pmem(dev)) {
+		struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+
+		lbasize = &nspm->lbasize;
+		supported = pmem_lbasize_supported;
+	} else
 		return -ENXIO;
 
 	device_lock(dev);
@@ -1312,8 +1356,7 @@ static ssize_t sector_size_store(struct device *dev,
 	if (to_ndns(dev)->claim)
 		rc = -EBUSY;
 	if (rc >= 0)
-		rc = nd_sector_size_store(dev, buf, &nsblk->lbasize,
-				ns_lbasize_supported);
+		rc = nd_sector_size_store(dev, buf, lbasize, supported);
 	if (rc >= 0)
 		rc = nd_namespace_label_update(nd_region, dev);
 	dev_dbg(dev, "%s: result: %zd %s: %s%s", __func__,
@@ -1458,9 +1501,6 @@ static umode_t namespace_visible(struct kobject *kobj,
 		if (a == &dev_attr_size.attr)
 			return 0644;
 
-		if (is_namespace_pmem(dev) && a == &dev_attr_sector_size.attr)
-			return 0;
-
 		return a->mode;
 	}
 
@@ -1795,6 +1835,7 @@ struct device *create_namespace_pmem(struct nd_region *nd_region,
 				NSLABEL_NAME_LEN, GFP_KERNEL);
 		nspm->uuid = kmemdup((void __force *) label0->uuid,
 				NSLABEL_UUID_LEN, GFP_KERNEL);
+		nspm->lbasize = __le64_to_cpu(label0->lbasize);
 	}
 
 	if (!nspm->alt_name || !nspm->uuid) {
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index ad4e518940c9..17cecb38dfc9 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -356,6 +356,7 @@ int nvdimm_namespace_attach_btt(struct nd_namespace_common *ndns);
 int nvdimm_namespace_detach_btt(struct nd_btt *nd_btt);
 const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns,
 		char *name);
+unsigned int pmem_sector_size(struct nd_namespace_common *ndns);
 void nvdimm_badblocks_populate(struct nd_region *nd_region,
 		struct badblocks *bb, const struct resource *res);
 #if IS_ENABLED(CONFIG_ND_CLAIM)
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index c544d466ea51..5c45e178bd4a 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -342,6 +342,7 @@ static int pmem_attach_disk(struct device *dev,
 	blk_queue_write_cache(q, true, true);
 	blk_queue_make_request(q, pmem_make_request);
 	blk_queue_physical_block_size(q, PAGE_SIZE);
+	blk_queue_logical_block_size(q, pmem_sector_size(ndns));
 	blk_queue_max_hw_sectors(q, UINT_MAX);
 	blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
 	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
diff --git a/include/linux/nd.h b/include/linux/nd.h
index 194b8e002ea7..d8f5023b49ae 100644
--- a/include/linux/nd.h
+++ b/include/linux/nd.h
@@ -75,12 +75,14 @@ struct nd_namespace_io {
 /**
  * struct nd_namespace_pmem - namespace device for dimm-backed interleaved memory
  * @nsio: device and system physical address range to drive
+ * @lbasize: logical sector size for the namespace in block-device-mode
  * @alt_name: namespace name supplied in the dimm label
  * @uuid: namespace name supplied in the dimm label
  * @id: ida allocated id
  */
 struct nd_namespace_pmem {
 	struct nd_namespace_io nsio;
+	unsigned long lbasize;
 	char *alt_name;
 	u8 *uuid;
 	int id;
-- 
cgit v1.2.3


From faec6f8a1cd2c44e439de35ab3328c5cf7bf52d8 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 6 Jun 2017 11:10:51 -0700
Subject: libnvdimm, label: populate the type_guid property for v1.2 namespaces

The type_guid refers to the "Address Range Type GUID" for the region
backing a namespace as defined the ACPI NFIT (NVDIMM Firmware Interface
Table). This 'type' identifier specifies an access mechanism for the
given namespace. This capability replaces the confusing usage of the
'NSLABEL_FLAG_LOCAL' flag to indicate a block-aperture-mode namespace.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/acpi/nfit/core.c        | 15 +++++++----
 drivers/nvdimm/label.c          |  6 +++++
 drivers/nvdimm/namespace_devs.c | 57 +++++++++++++++++++++++++++--------------
 include/linux/libnvdimm.h       |  3 +++
 4 files changed, 57 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index e744ab38eaf9..436cfdd1215b 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -1743,15 +1743,17 @@ static int acpi_nfit_init_interleave_set(struct acpi_nfit_desc *acpi_desc,
 	struct nfit_set_info2 *info2;
 	struct nfit_set_info *info;
 
+	nd_set = devm_kzalloc(dev, sizeof(*nd_set), GFP_KERNEL);
+	if (!nd_set)
+		return -ENOMEM;
+	ndr_desc->nd_set = nd_set;
+	guid_copy(&nd_set->type_guid, (guid_t *) spa->range_guid);
+
 	if (spa_type == NFIT_SPA_PM || spa_type == NFIT_SPA_VOLATILE)
 		/* pass */;
 	else
 		return 0;
 
-	nd_set = devm_kzalloc(dev, sizeof(*nd_set), GFP_KERNEL);
-	if (!nd_set)
-		return -ENOMEM;
-
 	info = devm_kzalloc(dev, sizeof_nfit_set_info(nr), GFP_KERNEL);
 	if (!info)
 		return -ENOMEM;
@@ -2228,7 +2230,7 @@ static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc,
 	struct acpi_nfit_system_address *spa = nfit_spa->spa;
 	struct nd_blk_region_desc *ndbr_desc;
 	struct nfit_mem *nfit_mem;
-	int blk_valid = 0;
+	int blk_valid = 0, rc;
 
 	if (!nvdimm) {
 		dev_err(acpi_desc->dev, "spa%d dimm: %#x not found\n",
@@ -2260,6 +2262,9 @@ static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc,
 		ndbr_desc = to_blk_region_desc(ndr_desc);
 		ndbr_desc->enable = acpi_nfit_blk_region_enable;
 		ndbr_desc->do_io = acpi_desc->blk_do_io;
+		rc = acpi_nfit_init_interleave_set(acpi_desc, ndr_desc, spa);
+		if (rc)
+			return rc;
 		nfit_spa->nd_region = nvdimm_blk_region_create(acpi_desc->nvdimm_bus,
 				ndr_desc);
 		if (!nfit_spa->nd_region)
diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c
index 1aacd4866c76..d8b87d3a0ebe 100644
--- a/drivers/nvdimm/label.c
+++ b/drivers/nvdimm/label.c
@@ -553,6 +553,7 @@ static int __pmem_label_update(struct nd_region *nd_region,
 		struct nd_mapping *nd_mapping, struct nd_namespace_pmem *nspm,
 		int pos)
 {
+	struct nd_interleave_set *nd_set = nd_region->nd_set;
 	struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
 	struct nd_label_ent *label_ent, *victim = NULL;
 	struct nd_namespace_label *nd_label;
@@ -597,6 +598,8 @@ static int __pmem_label_update(struct nd_region *nd_region,
 	nd_label->rawsize = __cpu_to_le64(resource_size(res));
 	nd_label->dpa = __cpu_to_le64(res->start);
 	nd_label->slot = __cpu_to_le32(slot);
+	if (namespace_label_has(ndd, type_guid))
+		guid_copy(&nd_label->type_guid, &nd_set->type_guid);
 	nd_dbg_dpa(nd_region, ndd, res, "%s\n", __func__);
 
 	/* update label */
@@ -684,6 +687,7 @@ static int __blk_label_update(struct nd_region *nd_region,
 		int num_labels)
 {
 	int i, alloc, victims, nfree, old_num_resources, nlabel, rc = -ENXIO;
+	struct nd_interleave_set *nd_set = nd_region->nd_set;
 	struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
 	struct nd_namespace_label *nd_label;
 	struct nd_label_ent *label_ent, *e;
@@ -788,6 +792,8 @@ static int __blk_label_update(struct nd_region *nd_region,
 		nd_label->rawsize = __cpu_to_le64(resource_size(res));
 		nd_label->lbasize = __cpu_to_le64(nsblk->lbasize);
 		nd_label->slot = __cpu_to_le32(slot);
+		if (namespace_label_has(ndd, type_guid))
+			guid_copy(&nd_label->type_guid, &nd_set->type_guid);
 
 		/* update label */
 		offset = nd_label_offset(ndd, nd_label);
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index e034b003a5e2..e101aec186c7 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -1639,6 +1639,8 @@ static bool has_uuid_at_pos(struct nd_region *nd_region, u8 *uuid,
 
 	for (i = 0; i < nd_region->ndr_mappings; i++) {
 		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+		struct nd_interleave_set *nd_set = nd_region->nd_set;
+		struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
 		struct nd_label_ent *label_ent;
 		bool found_uuid = false;
 
@@ -1659,8 +1661,17 @@ static bool has_uuid_at_pos(struct nd_region *nd_region, u8 *uuid,
 			if (memcmp(nd_label->uuid, uuid, NSLABEL_UUID_LEN) != 0)
 				continue;
 
+			if (namespace_label_has(ndd, type_guid)
+					&& !guid_equal(&nd_set->type_guid,
+						&nd_label->type_guid)) {
+				dev_dbg(ndd->dev, "expect type_guid %pUb got %pUb\n",
+						nd_set->type_guid.b,
+						nd_label->type_guid.b);
+				continue;
+			}
+
 			if (found_uuid) {
-				dev_dbg(to_ndd(nd_mapping)->dev,
+				dev_dbg(ndd->dev,
 						"%s duplicate entry for uuid\n",
 						__func__);
 				return false;
@@ -2047,12 +2058,21 @@ struct device *create_namespace_blk(struct nd_region *nd_region,
 {
 
 	struct nd_mapping *nd_mapping = &nd_region->mapping[0];
+	struct nd_interleave_set *nd_set = nd_region->nd_set;
 	struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
 	struct nd_namespace_blk *nsblk;
 	char name[NSLABEL_NAME_LEN];
 	struct device *dev = NULL;
 	struct resource *res;
 
+	if (namespace_label_has(ndd, type_guid)
+			&& !guid_equal(&nd_set->type_guid,
+				&nd_label->type_guid)) {
+		dev_dbg(ndd->dev, "expect type_guid %pUb got %pUb\n",
+				nd_set->type_guid.b, nd_label->type_guid.b);
+		return ERR_PTR(-EAGAIN);
+	}
+
 	nsblk = kzalloc(sizeof(*nsblk), GFP_KERNEL);
 	if (!nsblk)
 		return ERR_PTR(-ENOMEM);
@@ -2144,31 +2164,30 @@ static struct device **scan_labels(struct nd_region *nd_region)
 		kfree(devs);
 		devs = __devs;
 
-		if (is_nd_blk(&nd_region->dev)) {
+		if (is_nd_blk(&nd_region->dev))
 			dev = create_namespace_blk(nd_region, nd_label, count);
-			if (IS_ERR(dev))
-				goto err;
-			devs[count++] = dev;
-		} else {
+		else {
 			struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
 			struct nd_namespace_index *nsindex;
 
 			nsindex = to_namespace_index(ndd, ndd->ns_current);
 			dev = create_namespace_pmem(nd_region, nsindex, nd_label);
-			if (IS_ERR(dev)) {
-				switch (PTR_ERR(dev)) {
-				case -EAGAIN:
-					/* skip invalid labels */
-					continue;
-				case -ENODEV:
-					/* fallthrough to seed creation */
-					break;
-				default:
-					goto err;
-				}
-			} else
-				devs[count++] = dev;
 		}
+
+		if (IS_ERR(dev)) {
+			switch (PTR_ERR(dev)) {
+			case -EAGAIN:
+				/* skip invalid labels */
+				continue;
+			case -ENODEV:
+				/* fallthrough to seed creation */
+				break;
+			default:
+				goto err;
+			}
+		} else
+			devs[count++] = dev;
+
 	}
 
 	dev_dbg(&nd_region->dev, "%s: discovered %d %s namespace%s\n",
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index 722cdf21429f..4b9f178c82e6 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -17,6 +17,7 @@
 #include <linux/kernel.h>
 #include <linux/sizes.h>
 #include <linux/types.h>
+#include <linux/uuid.h>
 
 enum {
 	/* when a dimm supports both PMEM and BLK access a label is required */
@@ -77,6 +78,8 @@ struct nd_interleave_set {
 	u64 cookie2;
 	/* compatibility with initial buggy Linux implementation */
 	u64 altcookie;
+
+	guid_t type_guid;
 };
 
 struct nd_mapping_desc {
-- 
cgit v1.2.3


From b3fde74ea195d2f9f49830a29f971a0aab4cd67a Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Sun, 4 Jun 2017 10:18:39 +0900
Subject: libnvdimm, label: add address abstraction identifiers

Starting with v1.2 labels, 'address abstractions' can be hinted via an
address abstraction id that implies an info-block format. The standard
address abstraction in the specification is the v2 format of the
Block-Translation-Table (BTT). Support for that is saved for a later
patch, for now we add support for the Linux supported address
abstractions BTT (v1), PFN, and DAX.

The new 'holder_class' attribute for namespace devices is added for
tooling to specify the 'abstraction_guid' to store in the namespace label.
For v1.1 labels this field is undefined and any setting of
'holder_class' away from the default 'none' value will only have effect
until the driver is unloaded. Setting 'holder_class' requires that
whatever device tries to claim the namespace must be of the specified
class.

Cc: Vishal Verma <vishal.l.verma@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/btt_devs.c       |  8 +++++
 drivers/nvdimm/claim.c          | 28 ++++++++++++++++
 drivers/nvdimm/core.c           |  3 ++
 drivers/nvdimm/dax_devs.c       |  8 +++++
 drivers/nvdimm/label.c          | 58 ++++++++++++++++++++++++++++++++
 drivers/nvdimm/label.h          |  5 +++
 drivers/nvdimm/namespace_devs.c | 74 +++++++++++++++++++++++++++++++++++++++++
 drivers/nvdimm/nd.h             |  1 +
 drivers/nvdimm/pfn_devs.c       |  8 +++++
 include/linux/nd.h              | 10 ++++++
 10 files changed, 203 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c
index 4c989bb9a8a0..31d875a91569 100644
--- a/drivers/nvdimm/btt_devs.c
+++ b/drivers/nvdimm/btt_devs.c
@@ -295,6 +295,14 @@ int nd_btt_probe(struct device *dev, struct nd_namespace_common *ndns)
 	if (ndns->force_raw)
 		return -ENODEV;
 
+	switch (ndns->claim_class) {
+	case NVDIMM_CCLASS_NONE:
+	case NVDIMM_CCLASS_BTT:
+		break;
+	default:
+		return -ENODEV;
+	}
+
 	nvdimm_bus_lock(&ndns->dev);
 	btt_dev = __nd_btt_create(nd_region, 0, NULL, ndns);
 	nvdimm_bus_unlock(&ndns->dev);
diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c
index 7ceb5fa4f2a1..de9b1cce242e 100644
--- a/drivers/nvdimm/claim.c
+++ b/drivers/nvdimm/claim.c
@@ -184,6 +184,34 @@ ssize_t nd_namespace_store(struct device *dev,
 	}
 
 	ndns = to_ndns(found);
+
+	switch (ndns->claim_class) {
+	case NVDIMM_CCLASS_NONE:
+		break;
+	case NVDIMM_CCLASS_BTT:
+		if (!is_nd_btt(dev)) {
+			len = -EBUSY;
+			goto out_attach;
+		}
+		break;
+	case NVDIMM_CCLASS_PFN:
+		if (!is_nd_pfn(dev)) {
+			len = -EBUSY;
+			goto out_attach;
+		}
+		break;
+	case NVDIMM_CCLASS_DAX:
+		if (!is_nd_dax(dev)) {
+			len = -EBUSY;
+			goto out_attach;
+		}
+		break;
+	default:
+		len = -EBUSY;
+		goto out_attach;
+		break;
+	}
+
 	if (__nvdimm_namespace_capacity(ndns) < SZ_16M) {
 		dev_dbg(dev, "%s too small to host\n", name);
 		len = -ENXIO;
diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c
index 2dee908e4bae..ed0bf174d128 100644
--- a/drivers/nvdimm/core.c
+++ b/drivers/nvdimm/core.c
@@ -699,6 +699,9 @@ static __init int libnvdimm_init(void)
 	rc = nd_region_init();
 	if (rc)
 		goto err_region;
+
+	nd_label_init();
+
 	return 0;
  err_region:
 	nvdimm_exit();
diff --git a/drivers/nvdimm/dax_devs.c b/drivers/nvdimm/dax_devs.c
index c1b6556aea6e..59f676381ae5 100644
--- a/drivers/nvdimm/dax_devs.c
+++ b/drivers/nvdimm/dax_devs.c
@@ -111,6 +111,14 @@ int nd_dax_probe(struct device *dev, struct nd_namespace_common *ndns)
 	if (ndns->force_raw)
 		return -ENODEV;
 
+	switch (ndns->claim_class) {
+	case NVDIMM_CCLASS_NONE:
+	case NVDIMM_CCLASS_DAX:
+		break;
+	default:
+		return -ENODEV;
+	}
+
 	nvdimm_bus_lock(&ndns->dev);
 	nd_dax = nd_dax_alloc(nd_region);
 	nd_pfn = &nd_dax->nd_pfn;
diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c
index c503362a03c7..837bf21c8555 100644
--- a/drivers/nvdimm/label.c
+++ b/drivers/nvdimm/label.c
@@ -12,6 +12,7 @@
  */
 #include <linux/device.h>
 #include <linux/ndctl.h>
+#include <linux/uuid.h>
 #include <linux/slab.h>
 #include <linux/io.h>
 #include <linux/nd.h>
@@ -19,6 +20,10 @@
 #include "label.h"
 #include "nd.h"
 
+static guid_t nvdimm_btt_guid;
+static guid_t nvdimm_pfn_guid;
+static guid_t nvdimm_dax_guid;
+
 static u32 best_seq(u32 a, u32 b)
 {
 	a &= NSINDEX_SEQ_MASK;
@@ -565,10 +570,44 @@ static unsigned long nd_label_offset(struct nvdimm_drvdata *ndd,
 		- (unsigned long) to_namespace_index(ndd, 0);
 }
 
+enum nvdimm_claim_class to_nvdimm_cclass(guid_t *guid)
+{
+	if (guid_equal(guid, &nvdimm_btt_guid))
+		return NVDIMM_CCLASS_BTT;
+	else if (guid_equal(guid, &nvdimm_pfn_guid))
+		return NVDIMM_CCLASS_PFN;
+	else if (guid_equal(guid, &nvdimm_dax_guid))
+		return NVDIMM_CCLASS_DAX;
+	else if (guid_equal(guid, &guid_null))
+		return NVDIMM_CCLASS_NONE;
+
+	return NVDIMM_CCLASS_UNKNOWN;
+}
+
+static const guid_t *to_abstraction_guid(enum nvdimm_claim_class claim_class,
+	guid_t *target)
+{
+	if (claim_class == NVDIMM_CCLASS_BTT)
+		return &nvdimm_btt_guid;
+	else if (claim_class == NVDIMM_CCLASS_PFN)
+		return &nvdimm_pfn_guid;
+	else if (claim_class == NVDIMM_CCLASS_DAX)
+		return &nvdimm_dax_guid;
+	else if (claim_class == NVDIMM_CCLASS_UNKNOWN) {
+		/*
+		 * If we're modifying a namespace for which we don't
+		 * know the claim_class, don't touch the existing guid.
+		 */
+		return target;
+	} else
+		return &guid_null;
+}
+
 static int __pmem_label_update(struct nd_region *nd_region,
 		struct nd_mapping *nd_mapping, struct nd_namespace_pmem *nspm,
 		int pos)
 {
+	struct nd_namespace_common *ndns = &nspm->nsio.common;
 	struct nd_interleave_set *nd_set = nd_region->nd_set;
 	struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
 	struct nd_label_ent *label_ent, *victim = NULL;
@@ -616,6 +655,10 @@ static int __pmem_label_update(struct nd_region *nd_region,
 	nd_label->slot = __cpu_to_le32(slot);
 	if (namespace_label_has(ndd, type_guid))
 		guid_copy(&nd_label->type_guid, &nd_set->type_guid);
+	if (namespace_label_has(ndd, abstraction_guid))
+		guid_copy(&nd_label->abstraction_guid,
+				to_abstraction_guid(ndns->claim_class,
+					&nd_label->abstraction_guid));
 	if (namespace_label_has(ndd, checksum)) {
 		u64 sum;
 
@@ -711,6 +754,7 @@ static int __blk_label_update(struct nd_region *nd_region,
 {
 	int i, alloc, victims, nfree, old_num_resources, nlabel, rc = -ENXIO;
 	struct nd_interleave_set *nd_set = nd_region->nd_set;
+	struct nd_namespace_common *ndns = &nsblk->common;
 	struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
 	struct nd_namespace_label *nd_label;
 	struct nd_label_ent *label_ent, *e;
@@ -848,6 +892,11 @@ static int __blk_label_update(struct nd_region *nd_region,
 		nd_label->slot = __cpu_to_le32(slot);
 		if (namespace_label_has(ndd, type_guid))
 			guid_copy(&nd_label->type_guid, &nd_set->type_guid);
+		if (namespace_label_has(ndd, abstraction_guid))
+			guid_copy(&nd_label->abstraction_guid,
+					to_abstraction_guid(ndns->claim_class,
+						&nd_label->abstraction_guid));
+
 		if (namespace_label_has(ndd, checksum)) {
 			u64 sum;
 
@@ -1101,3 +1150,12 @@ int nd_blk_namespace_label_update(struct nd_region *nd_region,
 
 	return __blk_label_update(nd_region, nd_mapping, nsblk, count);
 }
+
+int __init nd_label_init(void)
+{
+	WARN_ON(guid_parse(NVDIMM_BTT_GUID, &nvdimm_btt_guid));
+	WARN_ON(guid_parse(NVDIMM_PFN_GUID, &nvdimm_pfn_guid));
+	WARN_ON(guid_parse(NVDIMM_DAX_GUID, &nvdimm_dax_guid));
+
+	return 0;
+}
diff --git a/drivers/nvdimm/label.h b/drivers/nvdimm/label.h
index f39bfb31f72f..7c8e2cc9e73e 100644
--- a/drivers/nvdimm/label.h
+++ b/drivers/nvdimm/label.h
@@ -112,6 +112,10 @@ struct nd_namespace_label {
 	__le64 checksum;
 };
 
+#define NVDIMM_BTT_GUID "8aed63a2-29a2-4c66-8b12-f05d15d3922a"
+#define NVDIMM_PFN_GUID "266400ba-fb9f-4677-bcb0-968f11d0d225"
+#define NVDIMM_DAX_GUID "97a86d9c-3cdd-4eda-986f-5068b4f80088"
+
 /**
  * struct nd_label_id - identifier string for dpa allocation
  * @id: "{blk|pmem}-<namespace uuid>"
@@ -142,6 +146,7 @@ struct nd_namespace_label *nd_label_active(struct nvdimm_drvdata *ndd, int n);
 u32 nd_label_alloc_slot(struct nvdimm_drvdata *ndd);
 bool nd_label_free_slot(struct nvdimm_drvdata *ndd, u32 slot);
 u32 nd_label_nfree(struct nvdimm_drvdata *ndd);
+enum nvdimm_claim_class to_nvdimm_cclass(guid_t *guid);
 struct nd_region;
 struct nd_namespace_pmem;
 struct nd_namespace_blk;
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index 7aba9a569c8e..f05d9b0672bf 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -1425,6 +1425,69 @@ static ssize_t holder_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(holder);
 
+static ssize_t __holder_class_store(struct device *dev, const char *buf)
+{
+	struct nd_namespace_common *ndns = to_ndns(dev);
+
+	if (dev->driver || ndns->claim)
+		return -EBUSY;
+
+	if (strcmp(buf, "btt") == 0 || strcmp(buf, "btt\n") == 0)
+		ndns->claim_class = NVDIMM_CCLASS_BTT;
+	else if (strcmp(buf, "pfn") == 0 || strcmp(buf, "pfn\n") == 0)
+		ndns->claim_class = NVDIMM_CCLASS_PFN;
+	else if (strcmp(buf, "dax") == 0 || strcmp(buf, "dax\n") == 0)
+		ndns->claim_class = NVDIMM_CCLASS_DAX;
+	else if (strcmp(buf, "") == 0 || strcmp(buf, "\n") == 0)
+		ndns->claim_class = NVDIMM_CCLASS_NONE;
+	else
+		return -EINVAL;
+
+	return 0;
+}
+
+static ssize_t holder_class_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	struct nd_region *nd_region = to_nd_region(dev->parent);
+	ssize_t rc;
+
+	device_lock(dev);
+	nvdimm_bus_lock(dev);
+	wait_nvdimm_bus_probe_idle(dev);
+	rc = __holder_class_store(dev, buf);
+	if (rc >= 0)
+		rc = nd_namespace_label_update(nd_region, dev);
+	dev_dbg(dev, "%s: %s(%zd)\n", __func__, rc < 0 ? "fail " : "", rc);
+	nvdimm_bus_unlock(dev);
+	device_unlock(dev);
+
+	return rc < 0 ? rc : len;
+}
+
+static ssize_t holder_class_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_namespace_common *ndns = to_ndns(dev);
+	ssize_t rc;
+
+	device_lock(dev);
+	if (ndns->claim_class == NVDIMM_CCLASS_NONE)
+		rc = sprintf(buf, "\n");
+	else if (ndns->claim_class == NVDIMM_CCLASS_BTT)
+		rc = sprintf(buf, "btt\n");
+	else if (ndns->claim_class == NVDIMM_CCLASS_PFN)
+		rc = sprintf(buf, "pfn\n");
+	else if (ndns->claim_class == NVDIMM_CCLASS_DAX)
+		rc = sprintf(buf, "dax\n");
+	else
+		rc = sprintf(buf, "<unknown>\n");
+	device_unlock(dev);
+
+	return rc;
+}
+static DEVICE_ATTR_RW(holder_class);
+
 static ssize_t mode_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
@@ -1483,6 +1546,7 @@ static struct attribute *nd_namespace_attributes[] = {
 	&dev_attr_force_raw.attr,
 	&dev_attr_sector_size.attr,
 	&dev_attr_dpa_extents.attr,
+	&dev_attr_holder_class.attr,
 	NULL,
 };
 
@@ -1506,6 +1570,7 @@ static umode_t namespace_visible(struct kobject *kobj,
 
 	if (a == &dev_attr_nstype.attr || a == &dev_attr_size.attr
 			|| a == &dev_attr_holder.attr
+			|| a == &dev_attr_holder_class.attr
 			|| a == &dev_attr_force_raw.attr
 			|| a == &dev_attr_mode.attr)
 		return a->mode;
@@ -1827,6 +1892,7 @@ struct device *create_namespace_pmem(struct nd_region *nd_region,
 	/* Calculate total size and populate namespace properties from label0 */
 	for (i = 0; i < nd_region->ndr_mappings; i++) {
 		struct nd_namespace_label *label0;
+		struct nvdimm_drvdata *ndd;
 
 		nd_mapping = &nd_region->mapping[i];
 		label_ent = list_first_entry_or_null(&nd_mapping->labels,
@@ -1847,6 +1913,11 @@ struct device *create_namespace_pmem(struct nd_region *nd_region,
 		nspm->uuid = kmemdup((void __force *) label0->uuid,
 				NSLABEL_UUID_LEN, GFP_KERNEL);
 		nspm->lbasize = __le64_to_cpu(label0->lbasize);
+		ndd = to_ndd(nd_mapping);
+		if (namespace_label_has(ndd, abstraction_guid))
+			nspm->nsio.common.claim_class
+				= to_nvdimm_cclass(&label0->abstraction_guid);
+
 	}
 
 	if (!nspm->alt_name || !nspm->uuid) {
@@ -2091,6 +2162,9 @@ struct device *create_namespace_blk(struct nd_region *nd_region,
 	nsblk->lbasize = __le64_to_cpu(nd_label->lbasize);
 	nsblk->uuid = kmemdup(nd_label->uuid, NSLABEL_UUID_LEN,
 			GFP_KERNEL);
+	if (namespace_label_has(ndd, abstraction_guid))
+		nsblk->common.claim_class
+			= to_nvdimm_cclass(&nd_label->abstraction_guid);
 	if (!nsblk->uuid)
 		goto blk_err;
 	memcpy(name, nd_label->name, NSLABEL_NAME_LEN);
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index 17cecb38dfc9..8cabd836df0e 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -235,6 +235,7 @@ ssize_t nd_sector_size_store(struct device *dev, const char *buf,
 		unsigned long *current_lbasize, const unsigned long *supported);
 int __init nvdimm_init(void);
 int __init nd_region_init(void);
+int __init nd_label_init(void);
 void nvdimm_exit(void);
 void nd_region_exit(void);
 struct nvdimm;
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index a6c403600d19..5e4041276d6f 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -471,6 +471,14 @@ int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns)
 	if (ndns->force_raw)
 		return -ENODEV;
 
+	switch (ndns->claim_class) {
+	case NVDIMM_CCLASS_NONE:
+	case NVDIMM_CCLASS_PFN:
+		break;
+	default:
+		return -ENODEV;
+	}
+
 	nvdimm_bus_lock(&ndns->dev);
 	nd_pfn = nd_pfn_alloc(nd_region);
 	pfn_dev = nd_pfn_devinit(nd_pfn, ndns);
diff --git a/include/linux/nd.h b/include/linux/nd.h
index d8f5023b49ae..96069c543890 100644
--- a/include/linux/nd.h
+++ b/include/linux/nd.h
@@ -21,6 +21,14 @@ enum nvdimm_event {
 	NVDIMM_REVALIDATE_POISON,
 };
 
+enum nvdimm_claim_class {
+	NVDIMM_CCLASS_NONE,
+	NVDIMM_CCLASS_BTT,
+	NVDIMM_CCLASS_PFN,
+	NVDIMM_CCLASS_DAX,
+	NVDIMM_CCLASS_UNKNOWN,
+};
+
 struct nd_device_driver {
 	struct device_driver drv;
 	unsigned long type;
@@ -41,12 +49,14 @@ static inline struct nd_device_driver *to_nd_device_driver(
  * @force_raw: ignore other personalities for the namespace (e.g. btt)
  * @dev: device model node
  * @claim: when set a another personality has taken ownership of the namespace
+ * @claim_class: restrict claim type to a given class
  * @rw_bytes: access the raw namespace capacity with byte-aligned transfers
  */
 struct nd_namespace_common {
 	int force_raw;
 	struct device dev;
 	struct device *claim;
+	enum nvdimm_claim_class claim_class;
 	int (*rw_bytes)(struct nd_namespace_common *, resource_size_t offset,
 			void *buf, size_t size, int rw, unsigned long flags);
 };
-- 
cgit v1.2.3


From fec53774fd043038e57ac737d90e8d58975d6e92 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 29 May 2017 21:56:49 -0700
Subject: filesystem-dax: convert to dax_copy_from_iter()

Now that all possible providers of the dax_operations copy_from_iter
method are implemented, switch filesytem-dax to call the driver rather
than copy_to_iter_pmem.

Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 arch/x86/include/asm/pmem.h | 50 ---------------------------------------------
 fs/dax.c                    |  3 ++-
 include/linux/pmem.h        | 24 ----------------------
 3 files changed, 2 insertions(+), 75 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h
index 0ff8fe71b255..60e8edbe0205 100644
--- a/arch/x86/include/asm/pmem.h
+++ b/arch/x86/include/asm/pmem.h
@@ -65,56 +65,6 @@ static inline void arch_wb_cache_pmem(void *addr, size_t size)
 		clwb(p);
 }
 
-/**
- * arch_copy_from_iter_pmem - copy data from an iterator to PMEM
- * @addr:	PMEM destination address
- * @bytes:	number of bytes to copy
- * @i:		iterator with source data
- *
- * Copy data from the iterator 'i' to the PMEM buffer starting at 'addr'.
- */
-static inline size_t arch_copy_from_iter_pmem(void *addr, size_t bytes,
-		struct iov_iter *i)
-{
-	size_t len;
-
-	/* TODO: skip the write-back by always using non-temporal stores */
-	len = copy_from_iter_nocache(addr, bytes, i);
-
-	/*
-	 * In the iovec case on x86_64 copy_from_iter_nocache() uses
-	 * non-temporal stores for the bulk of the transfer, but we need
-	 * to manually flush if the transfer is unaligned. A cached
-	 * memory copy is used when destination or size is not naturally
-	 * aligned. That is:
-	 *   - Require 8-byte alignment when size is 8 bytes or larger.
-	 *   - Require 4-byte alignment when size is 4 bytes.
-	 *
-	 * In the non-iovec case the entire destination needs to be
-	 * flushed.
-	 */
-	if (iter_is_iovec(i)) {
-		unsigned long flushed, dest = (unsigned long) addr;
-
-		if (bytes < 8) {
-			if (!IS_ALIGNED(dest, 4) || (bytes != 4))
-				arch_wb_cache_pmem(addr, bytes);
-		} else {
-			if (!IS_ALIGNED(dest, 8)) {
-				dest = ALIGN(dest, boot_cpu_data.x86_clflush_size);
-				arch_wb_cache_pmem(addr, 1);
-			}
-
-			flushed = dest - (unsigned long) addr;
-			if (bytes > flushed && !IS_ALIGNED(bytes - flushed, 8))
-				arch_wb_cache_pmem(addr + bytes - 1, 1);
-		}
-	} else
-		arch_wb_cache_pmem(addr, bytes);
-
-	return len;
-}
-
 /**
  * arch_clear_pmem - zero a PMEM memory range
  * @addr:	virtual start address
diff --git a/fs/dax.c b/fs/dax.c
index 2a6889b3585f..b459948de427 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1054,7 +1054,8 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 			map_len = end - pos;
 
 		if (iov_iter_rw(iter) == WRITE)
-			map_len = copy_from_iter_pmem(kaddr, map_len, iter);
+			map_len = dax_copy_from_iter(dax_dev, pgoff, kaddr,
+					map_len, iter);
 		else
 			map_len = copy_to_iter(kaddr, map_len, iter);
 		if (map_len <= 0) {
diff --git a/include/linux/pmem.h b/include/linux/pmem.h
index 71ecf3d46aac..9d542a5600e4 100644
--- a/include/linux/pmem.h
+++ b/include/linux/pmem.h
@@ -31,13 +31,6 @@ static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n)
 	BUG();
 }
 
-static inline size_t arch_copy_from_iter_pmem(void *addr, size_t bytes,
-		struct iov_iter *i)
-{
-	BUG();
-	return 0;
-}
-
 static inline void arch_clear_pmem(void *addr, size_t size)
 {
 	BUG();
@@ -79,23 +72,6 @@ static inline void memcpy_to_pmem(void *dst, const void *src, size_t n)
 		memcpy(dst, src, n);
 }
 
-/**
- * copy_from_iter_pmem - copy data from an iterator to PMEM
- * @addr:	PMEM destination address
- * @bytes:	number of bytes to copy
- * @i:		iterator with source data
- *
- * Copy data from the iterator 'i' to the PMEM buffer starting at 'addr'.
- * See blkdev_issue_flush() note for memcpy_to_pmem().
- */
-static inline size_t copy_from_iter_pmem(void *addr, size_t bytes,
-		struct iov_iter *i)
-{
-	if (arch_has_pmem_api())
-		return arch_copy_from_iter_pmem(addr, bytes, i);
-	return copy_from_iter_nocache(addr, bytes, i);
-}
-
 /**
  * clear_pmem - zero a PMEM memory range
  * @addr:	virtual start address
-- 
cgit v1.2.3


From 3c1cebff23cdca01c421411e953a9e239f2b9ef9 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 29 May 2017 12:58:19 -0700
Subject: dax, pmem: introduce an optional 'flush' dax_operation

Filesystem-DAX flushes caches whenever it writes to the address returned
through dax_direct_access() and when writing back dirty radix entries.
That flushing is only required in the pmem case, so add a dax operation
to allow pmem to take this extra action, but skip it for other dax
capable devices that do not provide a flush routine.

An example for this differentiation might be a volatile ram disk where
there is no expectation of persistence. In fact the pmem driver itself might
front such an address range specified by the NFIT. So, this "no flush"
property might be something passed down by the bus / libnvdimm.

Cc: Christoph Hellwig <hch@lst.de>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/pmem.c | 7 +++++++
 include/linux/dax.h   | 2 ++
 2 files changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 2f3aefe565c6..823b07774244 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -242,9 +242,16 @@ static size_t pmem_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
 	return copy_from_iter_flushcache(addr, bytes, i);
 }
 
+static void pmem_dax_flush(struct dax_device *dax_dev, pgoff_t pgoff,
+		void *addr, size_t size)
+{
+	wb_cache_pmem(addr, size);
+}
+
 static const struct dax_operations pmem_dax_ops = {
 	.direct_access = pmem_dax_direct_access,
 	.copy_from_iter = pmem_copy_from_iter,
+	.flush = pmem_dax_flush,
 };
 
 static void pmem_release_queue(void *q)
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 28e398f8c59e..407dd3ff6e54 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -19,6 +19,8 @@ struct dax_operations {
 	/* copy_from_iter: dax-driver override for default copy_from_iter */
 	size_t (*copy_from_iter)(struct dax_device *, pgoff_t, void *, size_t,
 			struct iov_iter *);
+	/* flush: optional driver-specific cache management after writes */
+	void (*flush)(struct dax_device *, pgoff_t, void *, size_t);
 };
 
 #if IS_ENABLED(CONFIG_DAX)
-- 
cgit v1.2.3


From abebfbe2f7315dd3ec9a0c69596a76e32beb5749 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 29 May 2017 13:02:52 -0700
Subject: dm: add ->flush() dax operation support

Allow device-mapper to route flush operations to the
per-target implementation. In order for the device stacking to work we
need a dax_dev and a pgoff relative to that device. This gives each
layer of the stack the information it needs to look up the operation
pointer for the next level.

This conceptually allows for an array of mixed device drivers with
varying flush implementations.

Reviewed-by: Toshi Kani <toshi.kani@hpe.com>
Reviewed-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dax/super.c           | 11 +++++++++++
 drivers/md/dm-linear.c        | 15 +++++++++++++++
 drivers/md/dm-stripe.c        | 20 ++++++++++++++++++++
 drivers/md/dm.c               | 19 +++++++++++++++++++
 include/linux/dax.h           |  2 ++
 include/linux/device-mapper.h |  3 +++
 6 files changed, 70 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index dd299e55f65d..b7729e4d351a 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -185,6 +185,17 @@ size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
 }
 EXPORT_SYMBOL_GPL(dax_copy_from_iter);
 
+void dax_flush(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
+		size_t size)
+{
+	if (!dax_alive(dax_dev))
+		return;
+
+	if (dax_dev->ops->flush)
+		dax_dev->ops->flush(dax_dev, pgoff, addr, size);
+}
+EXPORT_SYMBOL_GPL(dax_flush);
+
 bool dax_alive(struct dax_device *dax_dev)
 {
 	lockdep_assert_held(&dax_srcu);
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index 0841ec1bfbad..25e661974319 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -173,6 +173,20 @@ static size_t linear_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff,
 	return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i);
 }
 
+static void linear_dax_flush(struct dm_target *ti, pgoff_t pgoff, void *addr,
+		size_t size)
+{
+	struct linear_c *lc = ti->private;
+	struct block_device *bdev = lc->dev->bdev;
+	struct dax_device *dax_dev = lc->dev->dax_dev;
+	sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
+
+	dev_sector = linear_map_sector(ti, sector);
+	if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(size, PAGE_SIZE), &pgoff))
+		return;
+	dax_flush(dax_dev, pgoff, addr, size);
+}
+
 static struct target_type linear_target = {
 	.name   = "linear",
 	.version = {1, 3, 0},
@@ -186,6 +200,7 @@ static struct target_type linear_target = {
 	.iterate_devices = linear_iterate_devices,
 	.direct_access = linear_dax_direct_access,
 	.dax_copy_from_iter = linear_dax_copy_from_iter,
+	.dax_flush = linear_dax_flush,
 };
 
 int __init dm_linear_init(void)
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index 1ef914f9ca72..8e73517967b6 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -351,6 +351,25 @@ static size_t stripe_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff,
 	return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i);
 }
 
+static void stripe_dax_flush(struct dm_target *ti, pgoff_t pgoff, void *addr,
+		size_t size)
+{
+	sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
+	struct stripe_c *sc = ti->private;
+	struct dax_device *dax_dev;
+	struct block_device *bdev;
+	uint32_t stripe;
+
+	stripe_map_sector(sc, sector, &stripe, &dev_sector);
+	dev_sector += sc->stripe[stripe].physical_start;
+	dax_dev = sc->stripe[stripe].dev->dax_dev;
+	bdev = sc->stripe[stripe].dev->bdev;
+
+	if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(size, PAGE_SIZE), &pgoff))
+		return;
+	dax_flush(dax_dev, pgoff, addr, size);
+}
+
 /*
  * Stripe status:
  *
@@ -471,6 +490,7 @@ static struct target_type stripe_target = {
 	.io_hints = stripe_io_hints,
 	.direct_access = stripe_dax_direct_access,
 	.dax_copy_from_iter = stripe_dax_copy_from_iter,
+	.dax_flush = stripe_dax_flush,
 };
 
 int __init dm_stripe_init(void)
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 7faaceb52819..09b3efdc8abf 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -994,6 +994,24 @@ static size_t dm_dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
 	return ret;
 }
 
+static void dm_dax_flush(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
+		size_t size)
+{
+	struct mapped_device *md = dax_get_private(dax_dev);
+	sector_t sector = pgoff * PAGE_SECTORS;
+	struct dm_target *ti;
+	int srcu_idx;
+
+	ti = dm_dax_get_live_target(md, sector, &srcu_idx);
+
+	if (!ti)
+		goto out;
+	if (ti->type->dax_flush)
+		ti->type->dax_flush(ti, pgoff, addr, size);
+ out:
+	dm_put_live_table(md, srcu_idx);
+}
+
 /*
  * A target may call dm_accept_partial_bio only from the map routine.  It is
  * allowed for all bio types except REQ_PREFLUSH.
@@ -2885,6 +2903,7 @@ static const struct block_device_operations dm_blk_dops = {
 static const struct dax_operations dm_dax_ops = {
 	.direct_access = dm_dax_direct_access,
 	.copy_from_iter = dm_dax_copy_from_iter,
+	.flush = dm_dax_flush,
 };
 
 /*
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 407dd3ff6e54..1f6b6072af64 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -82,6 +82,8 @@ long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages,
 		void **kaddr, pfn_t *pfn);
 size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
 		size_t bytes, struct iov_iter *i);
+void dax_flush(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
+		size_t size);
 
 /*
  * We use lowest available bit in exceptional entry for locking, one bit for
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 11c8a0a92f9c..67bfe8ddcb32 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -134,6 +134,8 @@ typedef long (*dm_dax_direct_access_fn) (struct dm_target *ti, pgoff_t pgoff,
 		long nr_pages, void **kaddr, pfn_t *pfn);
 typedef size_t (*dm_dax_copy_from_iter_fn)(struct dm_target *ti, pgoff_t pgoff,
 		void *addr, size_t bytes, struct iov_iter *i);
+typedef void (*dm_dax_flush_fn)(struct dm_target *ti, pgoff_t pgoff, void *addr,
+		size_t size);
 #define PAGE_SECTORS (PAGE_SIZE / 512)
 
 void dm_error(const char *message);
@@ -184,6 +186,7 @@ struct target_type {
 	dm_io_hints_fn io_hints;
 	dm_dax_direct_access_fn direct_access;
 	dm_dax_copy_from_iter_fn dax_copy_from_iter;
+	dm_dax_flush_fn dax_flush;
 
 	/* For internal device-mapper use. */
 	struct list_head list;
-- 
cgit v1.2.3


From 81f558701ae8d5677635118751b1b4043094c7e9 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 29 May 2017 13:12:20 -0700
Subject: x86, dax: replace clear_pmem() with open coded memset +
 dax_ops->flush

The clear_pmem() helper simply combines a memset() plus a cache flush.
Now that the flush routine is optionally provided by the dax device
driver we can avoid unnecessary cache management on dax devices fronting
volatile memory.

With clear_pmem() gone we can follow on with a patch to make pmem cache
management completely defined within the pmem driver.

Cc: <x86@kernel.org>
Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 arch/x86/include/asm/pmem.h | 13 -------------
 fs/dax.c                    |  3 ++-
 include/linux/pmem.h        | 21 ---------------------
 3 files changed, 2 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h
index 60e8edbe0205..f4c119d253f3 100644
--- a/arch/x86/include/asm/pmem.h
+++ b/arch/x86/include/asm/pmem.h
@@ -65,19 +65,6 @@ static inline void arch_wb_cache_pmem(void *addr, size_t size)
 		clwb(p);
 }
 
-/**
- * arch_clear_pmem - zero a PMEM memory range
- * @addr:	virtual start address
- * @size:	number of bytes to zero
- *
- * Write zeros into the memory range starting at 'addr' for 'size' bytes.
- */
-static inline void arch_clear_pmem(void *addr, size_t size)
-{
-	memset(addr, 0, size);
-	arch_wb_cache_pmem(addr, size);
-}
-
 static inline void arch_invalidate_pmem(void *addr, size_t size)
 {
 	clflush_cache_range(addr, size);
diff --git a/fs/dax.c b/fs/dax.c
index 0933fc460ada..554b8e7d921c 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -975,7 +975,8 @@ int __dax_zero_page_range(struct block_device *bdev,
 			dax_read_unlock(id);
 			return rc;
 		}
-		clear_pmem(kaddr + offset, size);
+		memset(kaddr + offset, 0, size);
+		dax_flush(dax_dev, pgoff, kaddr + offset, size);
 		dax_read_unlock(id);
 	}
 	return 0;
diff --git a/include/linux/pmem.h b/include/linux/pmem.h
index 9d542a5600e4..772bd02a5b52 100644
--- a/include/linux/pmem.h
+++ b/include/linux/pmem.h
@@ -31,11 +31,6 @@ static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n)
 	BUG();
 }
 
-static inline void arch_clear_pmem(void *addr, size_t size)
-{
-	BUG();
-}
-
 static inline void arch_wb_cache_pmem(void *addr, size_t size)
 {
 	BUG();
@@ -72,22 +67,6 @@ static inline void memcpy_to_pmem(void *dst, const void *src, size_t n)
 		memcpy(dst, src, n);
 }
 
-/**
- * clear_pmem - zero a PMEM memory range
- * @addr:	virtual start address
- * @size:	number of bytes to zero
- *
- * Write zeros into the memory range starting at 'addr' for 'size' bytes.
- * See blkdev_issue_flush() note for memcpy_to_pmem().
- */
-static inline void clear_pmem(void *addr, size_t size)
-{
-	if (arch_has_pmem_api())
-		arch_clear_pmem(addr, size);
-	else
-		memset(addr, 0, size);
-}
-
 /**
  * invalidate_pmem - flush a pmem range from the cache hierarchy
  * @addr:	virtual start address
-- 
cgit v1.2.3


From 4e4f00a9b51a1c52ebdd728a1caeb3b9fe48c39d Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 29 May 2017 22:40:44 -0700
Subject: x86, dax, libnvdimm: remove wb_cache_pmem() indirection

With all handling of the CONFIG_ARCH_HAS_PMEM_API case being moved to
libnvdimm and the pmem driver directly we do not need to provide global
wrappers and fallbacks in the CONFIG_ARCH_HAS_PMEM_API=n case. The pmem
driver will simply not link to arch_wb_cache_pmem() in that case.  Same
as before, pmem flushing is only defined for x86_64, via
clean_cache_range(), but it is straightforward to add other archs in the
future.

arch_wb_cache_pmem() is an exported function since the pmem module needs
to find it, but it is privately declared in drivers/nvdimm/pmem.h because
there are no consumers outside of the pmem driver.

Cc: <x86@kernel.org>
Cc: Jan Kara <jack@suse.cz>
Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Oliver O'Halloran <oohall@gmail.com>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Suggested-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 arch/x86/include/asm/pmem.h | 21 ---------------------
 arch/x86/lib/usercopy_64.c  |  6 ++++++
 drivers/nvdimm/pmem.c       |  2 +-
 drivers/nvdimm/pmem.h       |  8 ++++++++
 include/linux/pmem.h        | 19 -------------------
 5 files changed, 15 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h
index f4c119d253f3..4759a179aa52 100644
--- a/arch/x86/include/asm/pmem.h
+++ b/arch/x86/include/asm/pmem.h
@@ -44,27 +44,6 @@ static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n)
 		BUG();
 }
 
-/**
- * arch_wb_cache_pmem - write back a cache range with CLWB
- * @vaddr:	virtual start address
- * @size:	number of bytes to write back
- *
- * Write back a cache range using the CLWB (cache line write back)
- * instruction. Note that @size is internally rounded up to be cache
- * line size aligned.
- */
-static inline void arch_wb_cache_pmem(void *addr, size_t size)
-{
-	u16 x86_clflush_size = boot_cpu_data.x86_clflush_size;
-	unsigned long clflush_mask = x86_clflush_size - 1;
-	void *vend = addr + size;
-	void *p;
-
-	for (p = (void *)((unsigned long)addr & ~clflush_mask);
-	     p < vend; p += x86_clflush_size)
-		clwb(p);
-}
-
 static inline void arch_invalidate_pmem(void *addr, size_t size)
 {
 	clflush_cache_range(addr, size);
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index f42d2fd86ca3..75d3776123cc 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -97,6 +97,12 @@ static void clean_cache_range(void *addr, size_t size)
 		clwb(p);
 }
 
+void arch_wb_cache_pmem(void *addr, size_t size)
+{
+	clean_cache_range(addr, size);
+}
+EXPORT_SYMBOL_GPL(arch_wb_cache_pmem);
+
 long __copy_user_flushcache(void *dst, const void __user *src, unsigned size)
 {
 	unsigned long flushed, dest = (unsigned long) dst;
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 823b07774244..3b87702d46bb 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -245,7 +245,7 @@ static size_t pmem_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
 static void pmem_dax_flush(struct dax_device *dax_dev, pgoff_t pgoff,
 		void *addr, size_t size)
 {
-	wb_cache_pmem(addr, size);
+	arch_wb_cache_pmem(addr, size);
 }
 
 static const struct dax_operations pmem_dax_ops = {
diff --git a/drivers/nvdimm/pmem.h b/drivers/nvdimm/pmem.h
index 7f4dbd72a90a..c4b3371c7f88 100644
--- a/drivers/nvdimm/pmem.h
+++ b/drivers/nvdimm/pmem.h
@@ -5,6 +5,14 @@
 #include <linux/pfn_t.h>
 #include <linux/fs.h>
 
+#ifdef CONFIG_ARCH_HAS_PMEM_API
+void arch_wb_cache_pmem(void *addr, size_t size);
+#else
+static inline void arch_wb_cache_pmem(void *addr, size_t size)
+{
+}
+#endif
+
 /* this definition is in it's own header for tools/testing/nvdimm to consume */
 struct pmem_device {
 	/* One contiguous memory region per device */
diff --git a/include/linux/pmem.h b/include/linux/pmem.h
index 772bd02a5b52..33ae761f010a 100644
--- a/include/linux/pmem.h
+++ b/include/linux/pmem.h
@@ -31,11 +31,6 @@ static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n)
 	BUG();
 }
 
-static inline void arch_wb_cache_pmem(void *addr, size_t size)
-{
-	BUG();
-}
-
 static inline void arch_invalidate_pmem(void *addr, size_t size)
 {
 	BUG();
@@ -80,18 +75,4 @@ static inline void invalidate_pmem(void *addr, size_t size)
 	if (arch_has_pmem_api())
 		arch_invalidate_pmem(addr, size);
 }
-
-/**
- * wb_cache_pmem - write back processor cache for PMEM memory range
- * @addr:	virtual start address
- * @size:	number of bytes to write back
- *
- * Write back the processor cache range starting at 'addr' for 'size' bytes.
- * See blkdev_issue_flush() note for memcpy_to_pmem().
- */
-static inline void wb_cache_pmem(void *addr, size_t size)
-{
-	if (arch_has_pmem_api())
-		arch_wb_cache_pmem(addr, size);
-}
 #endif /* __PMEM_H__ */
-- 
cgit v1.2.3


From 6b8190d61a622e095f04451437953acd2d74b371 Mon Sep 17 00:00:00 2001
From: Scott Bauer <scott.bauer@intel.com>
Date: Thu, 15 Jun 2017 10:44:30 -0600
Subject: nvme: implement NS Optimal IO Boundary from 1.3 Spec

The NVMe 1.3 spec introduces Namespace Optimal IO Boundaries (NOIOB),
which standardizes the stripe mechanism we currently have quirks for.
This patch implements the necessary logic to handle this new feature.

Signed-off-by: Scott Bauer <scott.bauer@intel.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c | 9 +++++++++
 drivers/nvme/host/nvme.h | 1 +
 include/linux/nvme.h     | 2 +-
 3 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 4ff5114f467d..0ddd6b9af7fc 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1080,6 +1080,12 @@ static void nvme_init_integrity(struct nvme_ns *ns)
 }
 #endif /* CONFIG_BLK_DEV_INTEGRITY */
 
+static void nvme_set_chunk_size(struct nvme_ns *ns)
+{
+	u32 chunk_size = (((u32)ns->noiob) << (ns->lba_shift - 9));
+	blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(chunk_size));
+}
+
 static void nvme_config_discard(struct nvme_ns *ns)
 {
 	struct nvme_ctrl *ctrl = ns->ctrl;
@@ -1139,12 +1145,15 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
 	if (ns->lba_shift == 0)
 		ns->lba_shift = 9;
 	bs = 1 << ns->lba_shift;
+	ns->noiob = le16_to_cpu(id->noiob);
 
 	blk_mq_freeze_queue(disk->queue);
 
 	if (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)
 		nvme_prep_integrity(disk, id, bs);
 	blk_queue_logical_block_size(ns->queue, bs);
+	if (ns->noiob)
+		nvme_set_chunk_size(ns);
 	if (ns->ms && !blk_get_integrity(disk) && !ns->ext)
 		nvme_init_integrity(ns);
 	if (ns->ms && !(ns->ms == 8 && ns->pi_type) && !blk_get_integrity(disk))
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index f27c58b860f4..ec8c7363934d 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -202,6 +202,7 @@ struct nvme_ns {
 	bool ext;
 	u8 pi_type;
 	unsigned long flags;
+	u16 noiob;
 
 #define NVME_NS_REMOVING 0
 #define NVME_NS_DEAD     1
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 6d476f242ee6..291587a0743f 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -282,7 +282,7 @@ struct nvme_id_ns {
 	__le16			nabsn;
 	__le16			nabo;
 	__le16			nabspf;
-	__u16			rsvd46;
+	__le16			noiob;
 	__u8			nvmcap[16];
 	__u8			rsvd64[40];
 	__u8			nguid[16];
-- 
cgit v1.2.3


From 59ae1d127ac0ae404baf414c434ba2651b793f46 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 16 Jun 2017 14:29:20 +0200
Subject: networking: introduce and use skb_put_data()

A common pattern with skb_put() is to just want to memcpy()
some data into the new space, introduce skb_put_data() for
this.

An spatch similar to the one for skb_put_zero() converts many
of the places using it:

    @@
    identifier p, p2;
    expression len, skb, data;
    type t, t2;
    @@
    (
    -p = skb_put(skb, len);
    +p = skb_put_data(skb, data, len);
    |
    -p = (t)skb_put(skb, len);
    +p = skb_put_data(skb, data, len);
    )
    (
    p2 = (t2)p;
    -memcpy(p2, data, len);
    |
    -memcpy(p, data, len);
    )

    @@
    type t, t2;
    identifier p, p2;
    expression skb, data;
    @@
    t *p;
    ...
    (
    -p = skb_put(skb, sizeof(t));
    +p = skb_put_data(skb, data, sizeof(t));
    |
    -p = (t *)skb_put(skb, sizeof(t));
    +p = skb_put_data(skb, data, sizeof(t));
    )
    (
    p2 = (t2)p;
    -memcpy(p2, data, sizeof(*p));
    |
    -memcpy(p, data, sizeof(*p));
    )

    @@
    expression skb, len, data;
    @@
    -memcpy(skb_put(skb, len), data, len);
    +skb_put_data(skb, data, len);

(again, manually post-processed to retain some comments)

Reviewed-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/atm/fore200e.c                             |  2 +-
 drivers/atm/he.c                                   |  2 +-
 drivers/atm/idt77252.c                             | 11 ++----
 drivers/atm/solos-pci.c                            |  2 +-
 drivers/bluetooth/bfusb.c                          |  6 +--
 drivers/bluetooth/bluecard_cs.c                    |  2 +-
 drivers/bluetooth/btmrvl_main.c                    |  2 +-
 drivers/bluetooth/btqcomsmd.c                      |  2 +-
 drivers/bluetooth/btusb.c                          | 12 +++---
 drivers/bluetooth/hci_bcsp.c                       | 16 ++++----
 drivers/bluetooth/hci_h4.c                         |  2 +-
 drivers/bluetooth/hci_h5.c                         | 12 +++---
 drivers/bluetooth/hci_intel.c                      |  7 ++--
 drivers/bluetooth/hci_ll.c                         |  2 +-
 drivers/bluetooth/hci_mrvl.c                       |  2 +-
 drivers/bluetooth/hci_qca.c                        |  2 +-
 drivers/char/pcmcia/synclink_cs.c                  |  2 +-
 drivers/firewire/net.c                             |  2 +-
 drivers/isdn/capi/capi.c                           |  2 +-
 drivers/isdn/capi/capidrv.c                        |  2 +-
 drivers/isdn/hardware/avm/b1.c                     |  6 +--
 drivers/isdn/hardware/avm/b1dma.c                  |  6 +--
 drivers/isdn/hardware/avm/c4.c                     |  6 +--
 drivers/isdn/hardware/avm/t1isa.c                  |  6 +--
 drivers/isdn/hardware/mISDN/hfcmulti.c             |  5 +--
 drivers/isdn/hardware/mISDN/hfcsusb.c              |  2 +-
 drivers/isdn/hisax/amd7930_fn.c                    |  3 +-
 drivers/isdn/hisax/avm_pci.c                       |  5 ++-
 drivers/isdn/hisax/diva.c                          |  6 ++-
 drivers/isdn/hisax/elsa_ser.c                      |  4 +-
 drivers/isdn/hisax/hfc_usb.c                       |  2 +-
 drivers/isdn/hisax/hisax_fcpcipnp.c                |  3 +-
 drivers/isdn/hisax/hisax_isac.c                    |  4 +-
 drivers/isdn/hisax/hscx_irq.c                      |  6 ++-
 drivers/isdn/hisax/icc.c                           |  2 +-
 drivers/isdn/hisax/ipacx.c                         |  8 ++--
 drivers/isdn/hisax/isac.c                          |  2 +-
 drivers/isdn/hisax/isar.c                          |  6 +--
 drivers/isdn/hisax/isdnl2.c                        |  4 +-
 drivers/isdn/hisax/jade_irq.c                      |  6 ++-
 drivers/isdn/hisax/l3_1tr6.c                       |  8 ++--
 drivers/isdn/hisax/l3dss1.c                        | 28 ++++++-------
 drivers/isdn/hisax/l3ni1.c                         | 32 +++++++--------
 drivers/isdn/hisax/netjet.c                        |  2 +-
 drivers/isdn/hisax/st5481_usb.c                    |  2 +-
 drivers/isdn/hisax/w6692.c                         |  9 +++--
 drivers/isdn/hysdn/hycapi.c                        | 31 +++++++--------
 drivers/isdn/hysdn/hysdn_net.c                     |  2 +-
 drivers/isdn/i4l/isdn_ppp.c                        |  3 +-
 drivers/isdn/i4l/isdn_tty.c                        |  2 +-
 drivers/isdn/i4l/isdn_v110.c                       |  6 +--
 drivers/isdn/isdnloop/isdnloop.c                   |  2 +-
 drivers/isdn/mISDN/dsp_cmx.c                       |  3 +-
 drivers/isdn/mISDN/layer2.c                        |  8 ++--
 drivers/isdn/mISDN/tei.c                           |  2 +-
 drivers/media/dvb-core/dvb_net.c                   |  3 +-
 drivers/media/radio/wl128x/fmdrv_common.c          |  2 +-
 drivers/misc/ti-st/st_core.c                       |  2 +-
 drivers/misc/ti-st/st_kim.c                        |  2 +-
 drivers/net/bonding/bond_alb.c                     |  3 +-
 drivers/net/caif/caif_hsi.c                        |  6 +--
 drivers/net/caif/caif_serial.c                     |  3 +-
 drivers/net/caif/caif_spi.c                        |  3 +-
 drivers/net/caif/caif_virtio.c                     |  2 +-
 drivers/net/can/slcan.c                            |  3 +-
 drivers/net/ethernet/3com/3c515.c                  |  6 +--
 drivers/net/ethernet/3com/3c59x.c                  |  5 +--
 drivers/net/ethernet/aeroflex/greth.c              |  3 +-
 drivers/net/ethernet/agere/et131x.c                |  2 +-
 drivers/net/ethernet/apple/macmace.c               |  2 +-
 drivers/net/ethernet/aurora/nb8800.c               |  4 +-
 drivers/net/ethernet/cadence/macb.c                |  2 +-
 .../net/ethernet/cavium/liquidio/octeon_network.h  |  4 +-
 drivers/net/ethernet/cirrus/cs89x0.c               |  7 ++--
 drivers/net/ethernet/dec/tulip/de4x5.c             |  6 +--
 drivers/net/ethernet/dec/tulip/interrupt.c         | 12 +++---
 drivers/net/ethernet/dec/tulip/uli526x.c           |  6 +--
 drivers/net/ethernet/ec_bhf.c                      |  2 +-
 drivers/net/ethernet/fealnx.c                      |  4 +-
 drivers/net/ethernet/i825xx/82596.c                |  3 +-
 drivers/net/ethernet/i825xx/lib82596.c             |  3 +-
 drivers/net/ethernet/intel/e1000/e1000_main.c      |  2 +-
 drivers/net/ethernet/marvell/mvneta.c              | 10 ++---
 drivers/net/ethernet/micrel/ksz884x.c              |  3 +-
 drivers/net/ethernet/nxp/lpc_eth.c                 |  7 ++--
 drivers/net/ethernet/qlogic/qede/qede_fp.c         |  3 +-
 drivers/net/ethernet/qlogic/qlge/qlge_main.c       |  7 ++--
 drivers/net/ethernet/silan/sc92031.c               | 10 ++---
 drivers/net/fjes/fjes_main.c                       |  3 +-
 drivers/net/hamradio/mkiss.c                       |  2 +-
 drivers/net/hippi/rrunner.c                        |  4 +-
 drivers/net/hyperv/netvsc_drv.c                    |  2 +-
 drivers/net/ieee802154/at86rf230.c                 |  2 +-
 drivers/net/ieee802154/ca8210.c                    |  2 +-
 drivers/net/ieee802154/mrf24j40.c                  |  2 +-
 drivers/net/irda/smsc-ircc2.c                      |  2 +-
 drivers/net/irda/vlsi_ir.c                         |  2 +-
 drivers/net/ppp/ppp_async.c                        |  3 +-
 drivers/net/ppp/ppp_synctty.c                      |  3 +-
 drivers/net/slip/slip.c                            |  2 +-
 drivers/net/usb/asix_common.c                      |  4 +-
 drivers/net/usb/cdc-phonet.c                       |  2 +-
 drivers/net/usb/cdc_mbim.c                         |  2 +-
 drivers/net/usb/cdc_ncm.c                          |  6 +--
 drivers/net/usb/gl620a.c                           |  3 +-
 drivers/net/usb/hso.c                              | 13 +++---
 drivers/net/usb/ipheth.c                           |  2 +-
 drivers/net/usb/lg-vl600.c                         |  2 +-
 drivers/net/usb/qmi_wwan.c                         |  2 +-
 drivers/net/virtio_net.c                           |  2 +-
 drivers/net/wan/farsync.c                          |  2 +-
 drivers/net/wan/hdlc_ppp.c                         |  4 +-
 drivers/net/wan/x25_asy.c                          |  2 +-
 drivers/net/wimax/i2400m/netdev.c                  |  2 +-
 drivers/net/wireless/admtek/adm8211.c              |  6 +--
 drivers/net/wireless/ath/ath10k/mac.c              |  5 +--
 drivers/net/wireless/ath/ath10k/wmi.c              |  5 +--
 drivers/net/wireless/ath/ath9k/channel.c           |  5 +--
 drivers/net/wireless/ath/ath9k/wmi.c               |  3 +-
 drivers/net/wireless/ath/carl9170/rx.c             |  6 +--
 drivers/net/wireless/ath/wil6210/wmi.c             |  2 +-
 drivers/net/wireless/atmel/atmel.c                 |  5 +--
 drivers/net/wireless/broadcom/b43legacy/dma.c      |  2 +-
 drivers/net/wireless/intel/ipw2x00/ipw2200.c       |  5 ++-
 drivers/net/wireless/intel/ipw2x00/libipw_tx.c     |  6 +--
 drivers/net/wireless/intel/iwlegacy/3945.c         |  2 +-
 drivers/net/wireless/intel/iwlegacy/4965-mac.c     |  2 +-
 drivers/net/wireless/intel/iwlwifi/dvm/rx.c        |  2 +-
 drivers/net/wireless/intel/iwlwifi/dvm/tx.c        |  3 +-
 drivers/net/wireless/intel/iwlwifi/mvm/d3.c        |  4 +-
 drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c  |  2 +-
 drivers/net/wireless/intel/iwlwifi/mvm/rx.c        |  2 +-
 drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c      |  5 +--
 drivers/net/wireless/intel/iwlwifi/pcie/tx.c       |  5 +--
 .../net/wireless/intersil/hostap/hostap_80211_tx.c |  2 +-
 drivers/net/wireless/intersil/hostap/hostap_ap.c   |  2 +-
 drivers/net/wireless/intersil/hostap/hostap_hw.c   | 11 +++---
 drivers/net/wireless/intersil/hostap/hostap_main.c |  2 +-
 drivers/net/wireless/intersil/orinoco/main.c       |  2 +-
 drivers/net/wireless/intersil/p54/p54spi.c         |  4 +-
 drivers/net/wireless/intersil/p54/txrx.c           |  5 ++-
 drivers/net/wireless/mac80211_hwsim.c              |  5 +--
 drivers/net/wireless/marvell/libertas/if_sdio.c    |  4 +-
 drivers/net/wireless/marvell/mwifiex/11n_aggr.c    |  2 +-
 drivers/net/wireless/marvell/mwifiex/cfg80211.c    | 10 ++---
 drivers/net/wireless/marvell/mwifiex/tdls.c        |  8 ++--
 drivers/net/wireless/mediatek/mt7601u/dma.c        |  4 +-
 drivers/net/wireless/mediatek/mt7601u/mcu.c        |  2 +-
 .../net/wireless/quantenna/qtnfmac/pearl/pcie.c    |  2 +-
 .../net/wireless/quantenna/qtnfmac/qlink_util.h    |  3 +-
 drivers/net/wireless/ralink/rt2x00/rt2x00debug.c   |  5 +--
 drivers/net/wireless/realtek/rtlwifi/pci.c         |  3 +-
 .../net/wireless/realtek/rtlwifi/rtl8188ee/fw.c    |  3 +-
 .../net/wireless/realtek/rtlwifi/rtl8192se/fw.c    |  7 ++--
 drivers/net/wireless/realtek/rtlwifi/usb.c         |  2 +-
 drivers/net/wireless/rsi/rsi_91x_mgmt.c            |  8 ++--
 drivers/net/wireless/st/cw1200/scan.c              |  2 +-
 drivers/net/wireless/ti/wl1251/main.c              |  2 +-
 drivers/net/wireless/ti/wlcore/cmd.c               |  4 +-
 drivers/net/wireless/ti/wlcore/rx.c                |  4 +-
 drivers/net/wireless/zydas/zd1201.c                | 26 ++++++------
 drivers/net/wireless/zydas/zd1211rw/zd_mac.c       |  2 +-
 drivers/nfc/fdp/fdp.c                              |  3 +-
 drivers/nfc/fdp/i2c.c                              |  2 +-
 drivers/nfc/nfcmrvl/fw_dnld.c                      |  7 ++--
 drivers/nfc/nfcmrvl/i2c.c                          |  2 +-
 drivers/nfc/nfcmrvl/usb.c                          |  4 +-
 drivers/nfc/nxp-nci/firmware.c                     |  3 +-
 drivers/nfc/nxp-nci/i2c.c                          |  5 +--
 drivers/nfc/pn533/pn533.c                          | 28 ++++++-------
 drivers/nfc/pn533/usb.c                            |  4 +-
 drivers/nfc/port100.c                              | 14 +++----
 drivers/nfc/s3fwrn5/firmware.c                     |  4 +-
 drivers/nfc/s3fwrn5/i2c.c                          |  2 +-
 drivers/nfc/st21nfca/dep.c                         |  2 +-
 drivers/nfc/st21nfca/i2c.c                         |  2 +-
 drivers/rpmsg/rpmsg_char.c                         |  2 +-
 drivers/s390/net/ctcm_fsms.c                       |  7 ++--
 drivers/s390/net/ctcm_main.c                       | 10 ++---
 drivers/s390/net/ctcm_mpc.c                        | 46 +++++++++-------------
 drivers/s390/net/lcs.c                             |  2 +-
 drivers/s390/net/netiucv.c                         | 10 ++---
 drivers/s390/net/qeth_core_main.c                  | 10 ++---
 drivers/staging/gdm724x/gdm_lte.c                  | 25 +++++-------
 drivers/staging/ks7010/ks_hostif.c                 | 11 +++---
 drivers/staging/most/aim-network/networking.c      |  8 ++--
 drivers/staging/octeon/ethernet-rx.c               | 10 ++---
 drivers/staging/rtl8188eu/core/rtw_recv.c          |  4 +-
 drivers/staging/rtl8188eu/os_dep/mon.c             |  2 +-
 drivers/staging/rtl8192e/rtllib_rx.c               | 11 ++----
 drivers/staging/rtl8192e/rtllib_softmac.c          |  9 ++---
 drivers/staging/rtl8192e/rtllib_tx.c               | 12 ++----
 drivers/staging/rtl8192u/ieee80211/ieee80211_rx.c  |  9 ++---
 .../staging/rtl8192u/ieee80211/ieee80211_softmac.c |  3 +-
 drivers/staging/rtl8192u/ieee80211/ieee80211_tx.c  |  7 ++--
 drivers/staging/rtl8192u/r819xU_cmdpkt.c           |  3 +-
 drivers/staging/rtl8712/rtl8712_recv.c             |  3 +-
 drivers/staging/rtl8723bs/os_dep/recv_linux.c      |  4 +-
 drivers/staging/wilc1000/linux_mon.c               |  6 +--
 drivers/staging/wilc1000/linux_wlan.c              |  2 +-
 drivers/staging/wlan-ng/hfa384x_usb.c              |  6 +--
 drivers/tty/ipwireless/network.c                   |  2 +-
 drivers/tty/n_gsm.c                                |  2 +-
 drivers/tty/synclink.c                             |  2 +-
 drivers/tty/synclink_gt.c                          |  2 +-
 drivers/tty/synclinkmp.c                           |  2 +-
 drivers/usb/gadget/function/f_ncm.c                | 11 +++---
 drivers/usb/gadget/function/f_phonet.c             |  2 +-
 include/linux/mISDNif.h                            |  2 +-
 include/linux/skbuff.h                             | 10 +++++
 lib/nlattr.c                                       |  2 +-
 net/batman-adv/bat_iv_ogm.c                        |  4 +-
 net/batman-adv/bat_v_ogm.c                         |  6 +--
 net/batman-adv/fragmentation.c                     |  3 +-
 net/bluetooth/cmtp/core.c                          |  2 +-
 net/bluetooth/hci_core.c                           |  2 +-
 net/bluetooth/hci_request.c                        |  2 +-
 net/bluetooth/hci_sock.c                           |  8 ++--
 net/bluetooth/hidp/core.c                          |  2 +-
 net/bluetooth/l2cap_core.c                         |  4 +-
 net/bluetooth/mgmt_util.c                          |  4 +-
 net/bluetooth/rfcomm/tty.c                         |  2 +-
 net/bridge/netfilter/nft_reject_bridge.c           |  6 +--
 net/can/bcm.c                                      |  6 +--
 net/decnet/dn_nsp_out.c                            | 10 ++---
 net/ieee802154/6lowpan/tx.c                        |  7 ++--
 net/ipv6/mcast.c                                   |  4 +-
 net/irda/ircomm/ircomm_tty.c                       |  2 +-
 net/irda/irlap_frame.c                             |  6 +--
 net/key/af_key.c                                   |  3 +-
 net/mac80211/ibss.c                                |  2 +-
 net/mac80211/mesh.c                                |  8 ++--
 net/mac80211/mlme.c                                | 16 ++++----
 net/mac80211/offchannel.c                          |  3 +-
 net/mac80211/rx.c                                  |  2 +-
 net/mac80211/tdls.c                                | 32 ++++++---------
 net/mac80211/tx.c                                  | 22 +++++------
 net/mac80211/util.c                                |  5 +--
 net/netlink/af_netlink.c                           |  2 +-
 net/nfc/digital_dep.c                              | 17 +++-----
 net/nfc/hci/core.c                                 |  6 +--
 net/nfc/llcp_commands.c                            | 15 ++++---
 net/nfc/llcp_core.c                                |  2 +-
 net/nfc/nci/core.c                                 |  4 +-
 net/nfc/nci/data.c                                 |  2 +-
 net/nfc/nci/hci.c                                  |  7 ++--
 net/nfc/nci/uart.c                                 |  2 +-
 net/qrtr/qrtr.c                                    |  2 +-
 net/sctp/output.c                                  |  3 +-
 net/sctp/sm_make_chunk.c                           |  4 +-
 net/vmw_vsock/virtio_transport_common.c            |  6 +--
 net/x25/x25_subr.c                                 | 21 ++++------
 252 files changed, 622 insertions(+), 741 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c
index 637c3e6b0f9e..7584ae1ded85 100644
--- a/drivers/atm/fore200e.c
+++ b/drivers/atm/fore200e.c
@@ -1104,7 +1104,7 @@ fore200e_push_rpd(struct fore200e* fore200e, struct atm_vcc* vcc, struct rpd* rp
 	/* Make device DMA transfer visible to CPU.  */
 	fore200e->bus->dma_sync_for_cpu(fore200e, buffer->data.dma_addr, rpd->rsd[ i ].length, DMA_FROM_DEVICE);
 	
-	memcpy(skb_put(skb, rpd->rsd[ i ].length), buffer->data.align_addr, rpd->rsd[ i ].length);
+	skb_put_data(skb, buffer->data.align_addr, rpd->rsd[i].length);
 
 	/* Now let the device get at it again.  */
 	fore200e->bus->dma_sync_for_device(fore200e, buffer->data.dma_addr, rpd->rsd[ i ].length, DMA_FROM_DEVICE);
diff --git a/drivers/atm/he.c b/drivers/atm/he.c
index 3617659b9184..461da2bce8ef 100644
--- a/drivers/atm/he.c
+++ b/drivers/atm/he.c
@@ -1735,7 +1735,7 @@ he_service_rbrq(struct he_dev *he_dev, int group)
 		__net_timestamp(skb);
 
 		list_for_each_entry(heb, &he_vcc->buffers, entry)
-			memcpy(skb_put(skb, heb->len), &heb->data, heb->len);
+			skb_put_data(skb, &heb->data, heb->len);
 
 		switch (vcc->qos.aal) {
 			case ATM_AAL0:
diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c
index 5ec109533bb9..4e64de380bda 100644
--- a/drivers/atm/idt77252.c
+++ b/drivers/atm/idt77252.c
@@ -1090,8 +1090,7 @@ dequeue_rx(struct idt77252_dev *card, struct rsq_entry *rsqe)
 
 			*((u32 *) sb->data) = aal0;
 			skb_put(sb, sizeof(u32));
-			memcpy(skb_put(sb, ATM_CELL_PAYLOAD),
-			       cell, ATM_CELL_PAYLOAD);
+			skb_put_data(sb, cell, ATM_CELL_PAYLOAD);
 
 			ATM_SKB(sb)->vcc = vcc;
 			__net_timestamp(sb);
@@ -1159,8 +1158,7 @@ dequeue_rx(struct idt77252_dev *card, struct rsq_entry *rsqe)
 				return;
 			}
 			skb_queue_walk(&rpp->queue, sb)
-				memcpy(skb_put(skb, sb->len),
-				       sb->data, sb->len);
+				skb_put_data(skb, sb->data, sb->len);
 
 			recycle_rx_pool_skb(card, rpp);
 
@@ -1322,8 +1320,7 @@ idt77252_rx_raw(struct idt77252_dev *card)
 
 		*((u32 *) sb->data) = header;
 		skb_put(sb, sizeof(u32));
-		memcpy(skb_put(sb, ATM_CELL_PAYLOAD), &(queue->data[16]),
-		       ATM_CELL_PAYLOAD);
+		skb_put_data(sb, &(queue->data[16]), ATM_CELL_PAYLOAD);
 
 		ATM_SKB(sb)->vcc = vcc;
 		__net_timestamp(sb);
@@ -2014,7 +2011,7 @@ idt77252_send_oam(struct atm_vcc *vcc, void *cell, int flags)
 	}
 	atomic_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc);
 
-	memcpy(skb_put(skb, 52), cell, 52);
+	skb_put_data(skb, cell, 52);
 
 	return idt77252_send_skb(vcc, skb, 1);
 }
diff --git a/drivers/atm/solos-pci.c b/drivers/atm/solos-pci.c
index 9115b292e680..077dd15c3a40 100644
--- a/drivers/atm/solos-pci.c
+++ b/drivers/atm/solos-pci.c
@@ -493,7 +493,7 @@ static int send_command(struct solos_card *card, int dev, const char *buf, size_
 	header->vci = cpu_to_le16(0);
 	header->type = cpu_to_le16(PKT_COMMAND);
 
-	memcpy(skb_put(skb, size), buf, size);
+	skb_put_data(skb, buf, size);
 
 	fpga_queue(card, dev, skb, NULL);
 
diff --git a/drivers/bluetooth/bfusb.c b/drivers/bluetooth/bfusb.c
index 3bf4ec60e073..ab090a313a5f 100644
--- a/drivers/bluetooth/bfusb.c
+++ b/drivers/bluetooth/bfusb.c
@@ -335,7 +335,7 @@ static inline int bfusb_recv_block(struct bfusb_data *data, int hdr, unsigned ch
 	}
 
 	if (len > 0)
-		memcpy(skb_put(data->reassembly, len), buf, len);
+		skb_put_data(data->reassembly, buf, len);
 
 	if (hdr & 0x08) {
 		hci_recv_frame(data->hdev, data->reassembly);
@@ -505,7 +505,7 @@ static int bfusb_send_frame(struct hci_dev *hdev, struct sk_buff *skb)
 		buf[1] = 0x00;
 		buf[2] = (size == BFUSB_MAX_BLOCK_SIZE) ? 0 : size;
 
-		memcpy(skb_put(nskb, 3), buf, 3);
+		skb_put_data(nskb, buf, 3);
 		skb_copy_from_linear_data_offset(skb, sent, skb_put(nskb, size), size);
 
 		sent  += size;
@@ -516,7 +516,7 @@ static int bfusb_send_frame(struct hci_dev *hdev, struct sk_buff *skb)
 	if ((nskb->len % data->bulk_pkt_size) == 0) {
 		buf[0] = 0xdd;
 		buf[1] = 0x00;
-		memcpy(skb_put(nskb, 2), buf, 2);
+		skb_put_data(nskb, buf, 2);
 	}
 
 	read_lock(&data->lock);
diff --git a/drivers/bluetooth/bluecard_cs.c b/drivers/bluetooth/bluecard_cs.c
index 007c0a45f31b..1d30c116b2ee 100644
--- a/drivers/bluetooth/bluecard_cs.c
+++ b/drivers/bluetooth/bluecard_cs.c
@@ -597,7 +597,7 @@ static int bluecard_hci_set_baud_rate(struct hci_dev *hdev, int baud)
 		break;
 	}
 
-	memcpy(skb_put(skb, sizeof(cmd)), cmd, sizeof(cmd));
+	skb_put_data(skb, cmd, sizeof(cmd));
 
 	skb_queue_tail(&(info->txq), skb);
 
diff --git a/drivers/bluetooth/btmrvl_main.c b/drivers/bluetooth/btmrvl_main.c
index c38cb5b91291..24a188eab360 100644
--- a/drivers/bluetooth/btmrvl_main.c
+++ b/drivers/bluetooth/btmrvl_main.c
@@ -194,7 +194,7 @@ static int btmrvl_send_sync_cmd(struct btmrvl_private *priv, u16 opcode,
 	hdr->plen = len;
 
 	if (len)
-		memcpy(skb_put(skb, len), param, len);
+		skb_put_data(skb, param, len);
 
 	hci_skb_pkt_type(skb) = MRVL_VENDOR_PKT;
 
diff --git a/drivers/bluetooth/btqcomsmd.c b/drivers/bluetooth/btqcomsmd.c
index ef730c173d4b..d00c4fdae924 100644
--- a/drivers/bluetooth/btqcomsmd.c
+++ b/drivers/bluetooth/btqcomsmd.c
@@ -43,7 +43,7 @@ static int btqcomsmd_recv(struct hci_dev *hdev, unsigned int type,
 	}
 
 	hci_skb_pkt_type(skb) = type;
-	memcpy(skb_put(skb, count), data, count);
+	skb_put_data(skb, data, count);
 
 	return hci_recv_frame(hdev, skb);
 }
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index bfd5f4bdec80..c7ea398e65c1 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -478,7 +478,7 @@ static int btusb_recv_intr(struct btusb_data *data, void *buffer, int count)
 		}
 
 		len = min_t(uint, hci_skb_expect(skb), count);
-		memcpy(skb_put(skb, len), buffer, len);
+		skb_put_data(skb, buffer, len);
 
 		count -= len;
 		buffer += len;
@@ -533,7 +533,7 @@ static int btusb_recv_bulk(struct btusb_data *data, void *buffer, int count)
 		}
 
 		len = min_t(uint, hci_skb_expect(skb), count);
-		memcpy(skb_put(skb, len), buffer, len);
+		skb_put_data(skb, buffer, len);
 
 		count -= len;
 		buffer += len;
@@ -590,7 +590,7 @@ static int btusb_recv_isoc(struct btusb_data *data, void *buffer, int count)
 		}
 
 		len = min_t(uint, hci_skb_expect(skb), count);
-		memcpy(skb_put(skb, len), buffer, len);
+		skb_put_data(skb, buffer, len);
 
 		count -= len;
 		buffer += len;
@@ -934,8 +934,8 @@ static void btusb_diag_complete(struct urb *urb)
 
 		skb = bt_skb_alloc(urb->actual_length, GFP_ATOMIC);
 		if (skb) {
-			memcpy(skb_put(skb, urb->actual_length),
-			       urb->transfer_buffer, urb->actual_length);
+			skb_put_data(skb, urb->transfer_buffer,
+				     urb->actual_length);
 			hci_recv_diag(hdev, skb);
 		}
 	} else if (urb->status == -ENOENT) {
@@ -2395,7 +2395,7 @@ static int marvell_config_oob_wake(struct hci_dev *hdev)
 		return -ENOMEM;
 	}
 
-	memcpy(skb_put(skb, sizeof(cmd)), cmd, sizeof(cmd));
+	skb_put_data(skb, cmd, sizeof(cmd));
 	hci_skb_pkt_type(skb) = HCI_COMMAND_PKT;
 
 	ret = btusb_send_frame(hdev, skb);
diff --git a/drivers/bluetooth/hci_bcsp.c b/drivers/bluetooth/hci_bcsp.c
index 910ec968f022..d880f4e33c75 100644
--- a/drivers/bluetooth/hci_bcsp.c
+++ b/drivers/bluetooth/hci_bcsp.c
@@ -125,7 +125,7 @@ static void bcsp_slip_msgdelim(struct sk_buff *skb)
 {
 	const char pkt_delim = 0xc0;
 
-	memcpy(skb_put(skb, 1), &pkt_delim, 1);
+	skb_put_data(skb, &pkt_delim, 1);
 }
 
 static void bcsp_slip_one_byte(struct sk_buff *skb, u8 c)
@@ -135,13 +135,13 @@ static void bcsp_slip_one_byte(struct sk_buff *skb, u8 c)
 
 	switch (c) {
 	case 0xc0:
-		memcpy(skb_put(skb, 2), &esc_c0, 2);
+		skb_put_data(skb, &esc_c0, 2);
 		break;
 	case 0xdb:
-		memcpy(skb_put(skb, 2), &esc_db, 2);
+		skb_put_data(skb, &esc_db, 2);
 		break;
 	default:
-		memcpy(skb_put(skb, 1), &c, 1);
+		skb_put_data(skb, &c, 1);
 	}
 }
 
@@ -423,7 +423,7 @@ static void bcsp_handle_le_pkt(struct hci_uart *hu)
 		BT_DBG("Found a LE conf pkt");
 		if (!nskb)
 			return;
-		memcpy(skb_put(nskb, 4), conf_rsp_pkt, 4);
+		skb_put_data(nskb, conf_rsp_pkt, 4);
 		hci_skb_pkt_type(nskb) = BCSP_LE_PKT;
 
 		skb_queue_head(&bcsp->unrel, nskb);
@@ -447,7 +447,7 @@ static inline void bcsp_unslip_one_byte(struct bcsp_struct *bcsp, unsigned char
 			bcsp->rx_esc_state = BCSP_ESCSTATE_ESC;
 			break;
 		default:
-			memcpy(skb_put(bcsp->rx_skb, 1), &byte, 1);
+			skb_put_data(bcsp->rx_skb, &byte, 1);
 			if ((bcsp->rx_skb->data[0] & 0x40) != 0 &&
 			    bcsp->rx_state != BCSP_W4_CRC)
 				bcsp_crc_update(&bcsp->message_crc, byte);
@@ -458,7 +458,7 @@ static inline void bcsp_unslip_one_byte(struct bcsp_struct *bcsp, unsigned char
 	case BCSP_ESCSTATE_ESC:
 		switch (byte) {
 		case 0xdc:
-			memcpy(skb_put(bcsp->rx_skb, 1), &c0, 1);
+			skb_put_data(bcsp->rx_skb, &c0, 1);
 			if ((bcsp->rx_skb->data[0] & 0x40) != 0 &&
 			    bcsp->rx_state != BCSP_W4_CRC)
 				bcsp_crc_update(&bcsp->message_crc, 0xc0);
@@ -467,7 +467,7 @@ static inline void bcsp_unslip_one_byte(struct bcsp_struct *bcsp, unsigned char
 			break;
 
 		case 0xdd:
-			memcpy(skb_put(bcsp->rx_skb, 1), &db, 1);
+			skb_put_data(bcsp->rx_skb, &db, 1);
 			if ((bcsp->rx_skb->data[0] & 0x40) != 0 &&
 			    bcsp->rx_state != BCSP_W4_CRC)
 				bcsp_crc_update(&bcsp->message_crc, 0xdb);
diff --git a/drivers/bluetooth/hci_h4.c b/drivers/bluetooth/hci_h4.c
index 82e5a32b87a4..4e328d7d47bb 100644
--- a/drivers/bluetooth/hci_h4.c
+++ b/drivers/bluetooth/hci_h4.c
@@ -209,7 +209,7 @@ struct sk_buff *h4_recv_buf(struct hci_dev *hdev, struct sk_buff *skb,
 		}
 
 		len = min_t(uint, hci_skb_expect(skb) - skb->len, count);
-		memcpy(skb_put(skb, len), buffer, len);
+		skb_put_data(skb, buffer, len);
 
 		count -= len;
 		buffer += len;
diff --git a/drivers/bluetooth/hci_h5.c b/drivers/bluetooth/hci_h5.c
index 90d0456b6744..c0e4e26dc30d 100644
--- a/drivers/bluetooth/hci_h5.c
+++ b/drivers/bluetooth/hci_h5.c
@@ -109,7 +109,7 @@ static void h5_link_control(struct hci_uart *hu, const void *data, size_t len)
 
 	hci_skb_pkt_type(nskb) = HCI_3WIRE_LINK_PKT;
 
-	memcpy(skb_put(nskb, len), data, len);
+	skb_put_data(nskb, data, len);
 
 	skb_queue_tail(&h5->unrel, nskb);
 }
@@ -487,7 +487,7 @@ static void h5_unslip_one_byte(struct h5 *h5, unsigned char c)
 		}
 	}
 
-	memcpy(skb_put(h5->rx_skb, 1), byte, 1);
+	skb_put_data(h5->rx_skb, byte, 1);
 	h5->rx_pending--;
 
 	BT_DBG("unsliped 0x%02hhx, rx_pending %zu", *byte, h5->rx_pending);
@@ -579,7 +579,7 @@ static void h5_slip_delim(struct sk_buff *skb)
 {
 	const char delim = SLIP_DELIMITER;
 
-	memcpy(skb_put(skb, 1), &delim, 1);
+	skb_put_data(skb, &delim, 1);
 }
 
 static void h5_slip_one_byte(struct sk_buff *skb, u8 c)
@@ -589,13 +589,13 @@ static void h5_slip_one_byte(struct sk_buff *skb, u8 c)
 
 	switch (c) {
 	case SLIP_DELIMITER:
-		memcpy(skb_put(skb, 2), &esc_delim, 2);
+		skb_put_data(skb, &esc_delim, 2);
 		break;
 	case SLIP_ESC:
-		memcpy(skb_put(skb, 2), &esc_esc, 2);
+		skb_put_data(skb, &esc_esc, 2);
 		break;
 	default:
-		memcpy(skb_put(skb, 1), &c, 1);
+		skb_put_data(skb, &c, 1);
 	}
 }
 
diff --git a/drivers/bluetooth/hci_intel.c b/drivers/bluetooth/hci_intel.c
index 851bee82df2a..16e728577cd8 100644
--- a/drivers/bluetooth/hci_intel.c
+++ b/drivers/bluetooth/hci_intel.c
@@ -185,7 +185,7 @@ static int intel_lpm_suspend(struct hci_uart *hu)
 		return -ENOMEM;
 	}
 
-	memcpy(skb_put(skb, sizeof(suspend)), suspend, sizeof(suspend));
+	skb_put_data(skb, suspend, sizeof(suspend));
 	hci_skb_pkt_type(skb) = HCI_LPM_PKT;
 
 	set_bit(STATE_LPM_TRANSACTION, &intel->flags);
@@ -270,8 +270,7 @@ static int intel_lpm_host_wake(struct hci_uart *hu)
 		return -ENOMEM;
 	}
 
-	memcpy(skb_put(skb, sizeof(lpm_resume_ack)), lpm_resume_ack,
-	       sizeof(lpm_resume_ack));
+	skb_put_data(skb, lpm_resume_ack, sizeof(lpm_resume_ack));
 	hci_skb_pkt_type(skb) = HCI_LPM_PKT;
 
 	/* LPM flow is a priority, enqueue packet at list head */
@@ -522,7 +521,7 @@ static int intel_set_baudrate(struct hci_uart *hu, unsigned int speed)
 		return -ENOMEM;
 	}
 
-	memcpy(skb_put(skb, sizeof(speed_cmd)), speed_cmd, sizeof(speed_cmd));
+	skb_put_data(skb, speed_cmd, sizeof(speed_cmd));
 	hci_skb_pkt_type(skb) = HCI_COMMAND_PKT;
 
 	hci_uart_set_flow_control(hu, true);
diff --git a/drivers/bluetooth/hci_ll.c b/drivers/bluetooth/hci_ll.c
index 2b16d48d82ee..cc2fa78b434e 100644
--- a/drivers/bluetooth/hci_ll.c
+++ b/drivers/bluetooth/hci_ll.c
@@ -413,7 +413,7 @@ static int ll_recv(struct hci_uart *hu, const void *data, int count)
 	while (count) {
 		if (ll->rx_count) {
 			len = min_t(unsigned int, ll->rx_count, count);
-			memcpy(skb_put(ll->rx_skb, len), ptr, len);
+			skb_put_data(ll->rx_skb, ptr, len);
 			ll->rx_count -= len; count -= len; ptr += len;
 
 			if (ll->rx_count)
diff --git a/drivers/bluetooth/hci_mrvl.c b/drivers/bluetooth/hci_mrvl.c
index bbc4b39b1dbf..ffb00669346f 100644
--- a/drivers/bluetooth/hci_mrvl.c
+++ b/drivers/bluetooth/hci_mrvl.c
@@ -328,7 +328,7 @@ static int mrvl_load_firmware(struct hci_dev *hdev, const char *name)
 		}
 		bt_cb(skb)->pkt_type = MRVL_RAW_DATA;
 
-		memcpy(skb_put(skb, mrvl->tx_len), fw_ptr, mrvl->tx_len);
+		skb_put_data(skb, fw_ptr, mrvl->tx_len);
 		fw_ptr += mrvl->tx_len;
 
 		set_bit(STATE_FW_REQ_PENDING, &mrvl->flags);
diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
index f242dfd0c2e2..b55f01320631 100644
--- a/drivers/bluetooth/hci_qca.c
+++ b/drivers/bluetooth/hci_qca.c
@@ -869,7 +869,7 @@ static int qca_set_baudrate(struct hci_dev *hdev, uint8_t baudrate)
 	}
 
 	/* Assign commands to change baudrate and packet type. */
-	memcpy(skb_put(skb, sizeof(cmd)), cmd, sizeof(cmd));
+	skb_put_data(skb, cmd, sizeof(cmd));
 	hci_skb_pkt_type(skb) = HCI_COMMAND_PKT;
 
 	skb_queue_tail(&qca->txq, skb);
diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c
index d136db1a10f0..62be953e5fb0 100644
--- a/drivers/char/pcmcia/synclink_cs.c
+++ b/drivers/char/pcmcia/synclink_cs.c
@@ -4235,7 +4235,7 @@ static void hdlcdev_rx(MGSLPC_INFO *info, char *buf, int size)
 		return;
 	}
 
-	memcpy(skb_put(skb, size), buf, size);
+	skb_put_data(skb, buf, size);
 
 	skb->protocol = hdlc_type_trans(skb, dev);
 
diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c
index 5d3640264f2d..d5040bbd34e8 100644
--- a/drivers/firewire/net.c
+++ b/drivers/firewire/net.c
@@ -600,7 +600,7 @@ static int fwnet_incoming_packet(struct fwnet_device *dev, __be32 *buf, int len,
 			return -ENOMEM;
 		}
 		skb_reserve(skb, LL_RESERVED_SPACE(net));
-		memcpy(skb_put(skb, len), buf, len);
+		skb_put_data(skb, buf, len);
 
 		return fwnet_finish_incoming_packet(net, skb, source_node_id,
 						    is_broadcast, ether_type);
diff --git a/drivers/isdn/capi/capi.c b/drivers/isdn/capi/capi.c
index 6a2df3297e77..77be17590866 100644
--- a/drivers/isdn/capi/capi.c
+++ b/drivers/isdn/capi/capi.c
@@ -1058,7 +1058,7 @@ static int capinc_tty_write(struct tty_struct *tty,
 	}
 
 	skb_reserve(skb, CAPI_DATA_B3_REQ_LEN);
-	memcpy(skb_put(skb, count), buf, count);
+	skb_put_data(skb, buf, count);
 
 	__skb_queue_tail(&mp->outqueue, skb);
 	mp->outbytes += skb->len;
diff --git a/drivers/isdn/capi/capidrv.c b/drivers/isdn/capi/capidrv.c
index 85cfa4f8691f..89dd1303a98a 100644
--- a/drivers/isdn/capi/capidrv.c
+++ b/drivers/isdn/capi/capidrv.c
@@ -516,7 +516,7 @@ static void send_message(capidrv_contr *card, _cmsg *cmsg)
 		printk(KERN_ERR "capidrv::send_message: can't allocate mem\n");
 		return;
 	}
-	memcpy(skb_put(skb, len), cmsg->buf, len);
+	skb_put_data(skb, cmsg->buf, len);
 	if (capi20_put_message(&global.ap, skb) != CAPI_NOERROR)
 		kfree_skb(skb);
 }
diff --git a/drivers/isdn/hardware/avm/b1.c b/drivers/isdn/hardware/avm/b1.c
index 9fdbd99c7547..b1833d08a5fe 100644
--- a/drivers/isdn/hardware/avm/b1.c
+++ b/drivers/isdn/hardware/avm/b1.c
@@ -529,8 +529,8 @@ irqreturn_t b1_interrupt(int interrupt, void *devptr)
 			printk(KERN_ERR "%s: incoming packet dropped\n",
 			       card->name);
 		} else {
-			memcpy(skb_put(skb, MsgLen), card->msgbuf, MsgLen);
-			memcpy(skb_put(skb, DataB3Len), card->databuf, DataB3Len);
+			skb_put_data(skb, card->msgbuf, MsgLen);
+			skb_put_data(skb, card->databuf, DataB3Len);
 			capi_ctr_handle_message(ctrl, ApplId, skb);
 		}
 		break;
@@ -544,7 +544,7 @@ irqreturn_t b1_interrupt(int interrupt, void *devptr)
 			       card->name);
 			spin_unlock_irqrestore(&card->lock, flags);
 		} else {
-			memcpy(skb_put(skb, MsgLen), card->msgbuf, MsgLen);
+			skb_put_data(skb, card->msgbuf, MsgLen);
 			if (CAPIMSG_CMD(skb->data) == CAPI_DATA_B3_CONF)
 				capilib_data_b3_conf(&cinfo->ncci_head, ApplId,
 						     CAPIMSG_NCCI(skb->data),
diff --git a/drivers/isdn/hardware/avm/b1dma.c b/drivers/isdn/hardware/avm/b1dma.c
index 818bd8f231db..9538a9e5e1a8 100644
--- a/drivers/isdn/hardware/avm/b1dma.c
+++ b/drivers/isdn/hardware/avm/b1dma.c
@@ -474,8 +474,8 @@ static void b1dma_handle_rx(avmcard *card)
 			printk(KERN_ERR "%s: incoming packet dropped\n",
 			       card->name);
 		} else {
-			memcpy(skb_put(skb, MsgLen), card->msgbuf, MsgLen);
-			memcpy(skb_put(skb, DataB3Len), card->databuf, DataB3Len);
+			skb_put_data(skb, card->msgbuf, MsgLen);
+			skb_put_data(skb, card->databuf, DataB3Len);
 			capi_ctr_handle_message(ctrl, ApplId, skb);
 		}
 		break;
@@ -488,7 +488,7 @@ static void b1dma_handle_rx(avmcard *card)
 			printk(KERN_ERR "%s: incoming packet dropped\n",
 			       card->name);
 		} else {
-			memcpy(skb_put(skb, MsgLen), card->msgbuf, MsgLen);
+			skb_put_data(skb, card->msgbuf, MsgLen);
 			if (CAPIMSG_CMD(skb->data) == CAPI_DATA_B3_CONF) {
 				spin_lock(&card->lock);
 				capilib_data_b3_conf(&cinfo->ncci_head, ApplId,
diff --git a/drivers/isdn/hardware/avm/c4.c b/drivers/isdn/hardware/avm/c4.c
index 17beb2869dc1..40c7e2cf423b 100644
--- a/drivers/isdn/hardware/avm/c4.c
+++ b/drivers/isdn/hardware/avm/c4.c
@@ -536,8 +536,8 @@ static void c4_handle_rx(avmcard *card)
 			printk(KERN_ERR "%s: incoming packet dropped\n",
 			       card->name);
 		} else {
-			memcpy(skb_put(skb, MsgLen), card->msgbuf, MsgLen);
-			memcpy(skb_put(skb, DataB3Len), card->databuf, DataB3Len);
+			skb_put_data(skb, card->msgbuf, MsgLen);
+			skb_put_data(skb, card->databuf, DataB3Len);
 			capi_ctr_handle_message(ctrl, ApplId, skb);
 		}
 		break;
@@ -555,7 +555,7 @@ static void c4_handle_rx(avmcard *card)
 			printk(KERN_ERR "%s: incoming packet dropped\n",
 			       card->name);
 		} else {
-			memcpy(skb_put(skb, MsgLen), card->msgbuf, MsgLen);
+			skb_put_data(skb, card->msgbuf, MsgLen);
 			if (CAPIMSG_CMD(skb->data) == CAPI_DATA_B3_CONF)
 				capilib_data_b3_conf(&cinfo->ncci_head, ApplId,
 						     CAPIMSG_NCCI(skb->data),
diff --git a/drivers/isdn/hardware/avm/t1isa.c b/drivers/isdn/hardware/avm/t1isa.c
index 9516203c735f..9f80d20ced87 100644
--- a/drivers/isdn/hardware/avm/t1isa.c
+++ b/drivers/isdn/hardware/avm/t1isa.c
@@ -171,8 +171,8 @@ static irqreturn_t t1isa_interrupt(int interrupt, void *devptr)
 				printk(KERN_ERR "%s: incoming packet dropped\n",
 				       card->name);
 			} else {
-				memcpy(skb_put(skb, MsgLen), card->msgbuf, MsgLen);
-				memcpy(skb_put(skb, DataB3Len), card->databuf, DataB3Len);
+				skb_put_data(skb, card->msgbuf, MsgLen);
+				skb_put_data(skb, card->databuf, DataB3Len);
 				capi_ctr_handle_message(ctrl, ApplId, skb);
 			}
 			break;
@@ -186,7 +186,7 @@ static irqreturn_t t1isa_interrupt(int interrupt, void *devptr)
 				printk(KERN_ERR "%s: incoming packet dropped\n",
 				       card->name);
 			} else {
-				memcpy(skb_put(skb, MsgLen), card->msgbuf, MsgLen);
+				skb_put_data(skb, card->msgbuf, MsgLen);
 				if (CAPIMSG_CMD(skb->data) == CAPI_DATA_B3)
 					capilib_data_b3_conf(&cinfo->ncci_head, ApplId,
 							     CAPIMSG_NCCI(skb->data),
diff --git a/drivers/isdn/hardware/mISDN/hfcmulti.c b/drivers/isdn/hardware/mISDN/hfcmulti.c
index 961c07ee47b7..aea0c9616ea5 100644
--- a/drivers/isdn/hardware/mISDN/hfcmulti.c
+++ b/drivers/isdn/hardware/mISDN/hfcmulti.c
@@ -1926,7 +1926,7 @@ hfcmulti_dtmf(struct hfc_multi *hc)
 			hh = mISDN_HEAD_P(skb);
 			hh->prim = PH_CONTROL_IND;
 			hh->id = DTMF_HFC_COEF;
-			memcpy(skb_put(skb, 512), hc->chan[ch].coeff, 512);
+			skb_put_data(skb, hc->chan[ch].coeff, 512);
 			recv_Bchannel_skb(bch, skb);
 		}
 	}
@@ -2332,8 +2332,7 @@ next_frame:
 				skb = *sp;
 				*sp = mI_alloc_skb(skb->len, GFP_ATOMIC);
 				if (*sp) {
-					memcpy(skb_put(*sp, skb->len),
-					       skb->data, skb->len);
+					skb_put_data(*sp, skb->data, skb->len);
 					skb_trim(skb, 0);
 				} else {
 					printk(KERN_DEBUG "%s: No mem\n",
diff --git a/drivers/isdn/hardware/mISDN/hfcsusb.c b/drivers/isdn/hardware/mISDN/hfcsusb.c
index 114f3bcba1b0..17cc879ad2bb 100644
--- a/drivers/isdn/hardware/mISDN/hfcsusb.c
+++ b/drivers/isdn/hardware/mISDN/hfcsusb.c
@@ -893,7 +893,7 @@ hfcsusb_rx_frame(struct usb_fifo *fifo, __u8 *data, unsigned int len,
 		}
 	}
 
-	memcpy(skb_put(rx_skb, len), data, len);
+	skb_put_data(rx_skb, data, len);
 
 	if (hdlc) {
 		/* we have a complete hdlc packet */
diff --git a/drivers/isdn/hisax/amd7930_fn.c b/drivers/isdn/hisax/amd7930_fn.c
index 3a4c2f9e19e9..dcf4c2a9fcea 100644
--- a/drivers/isdn/hisax/amd7930_fn.c
+++ b/drivers/isdn/hisax/amd7930_fn.c
@@ -317,7 +317,8 @@ Amd7930_empty_Dfifo(struct IsdnCardState *cs, int flag)
 							debugl1(cs, "%s", cs->dlog);
 						}
 						/* moves received data in sk-buffer */
-						memcpy(skb_put(skb, cs->rcvidx), cs->rcvbuf, cs->rcvidx);
+						skb_put_data(skb, cs->rcvbuf,
+							     cs->rcvidx);
 						skb_queue_tail(&cs->rq, skb);
 					}
 				}
diff --git a/drivers/isdn/hisax/avm_pci.c b/drivers/isdn/hisax/avm_pci.c
index d1427bd6452d..daf3742cdef6 100644
--- a/drivers/isdn/hisax/avm_pci.c
+++ b/drivers/isdn/hisax/avm_pci.c
@@ -378,8 +378,9 @@ HDLC_irq(struct BCState *bcs, u_int stat) {
 					if (!(skb = dev_alloc_skb(bcs->hw.hdlc.rcvidx)))
 						printk(KERN_WARNING "HDLC: receive out of memory\n");
 					else {
-						memcpy(skb_put(skb, bcs->hw.hdlc.rcvidx),
-						       bcs->hw.hdlc.rcvbuf, bcs->hw.hdlc.rcvidx);
+						skb_put_data(skb,
+							     bcs->hw.hdlc.rcvbuf,
+							     bcs->hw.hdlc.rcvidx);
 						skb_queue_tail(&bcs->rqueue, skb);
 					}
 					bcs->hw.hdlc.rcvidx = 0;
diff --git a/drivers/isdn/hisax/diva.c b/drivers/isdn/hisax/diva.c
index 079336e593f9..3fc94e7741ae 100644
--- a/drivers/isdn/hisax/diva.c
+++ b/drivers/isdn/hisax/diva.c
@@ -511,7 +511,8 @@ Memhscx_interrupt(struct IsdnCardState *cs, u_char val, u_char hscx)
 				if (!(skb = dev_alloc_skb(count)))
 					printk(KERN_WARNING "HSCX: receive out of memory\n");
 				else {
-					memcpy(skb_put(skb, count), bcs->hw.hscx.rcvbuf, count);
+					skb_put_data(skb, bcs->hw.hscx.rcvbuf,
+						     count);
 					skb_queue_tail(&bcs->rqueue, skb);
 				}
 			}
@@ -526,7 +527,8 @@ Memhscx_interrupt(struct IsdnCardState *cs, u_char val, u_char hscx)
 			if (!(skb = dev_alloc_skb(fifo_size)))
 				printk(KERN_WARNING "HiSax: receive out of memory\n");
 			else {
-				memcpy(skb_put(skb, fifo_size), bcs->hw.hscx.rcvbuf, fifo_size);
+				skb_put_data(skb, bcs->hw.hscx.rcvbuf,
+					     fifo_size);
 				skb_queue_tail(&bcs->rqueue, skb);
 			}
 			bcs->hw.hscx.rcvidx = 0;
diff --git a/drivers/isdn/hisax/elsa_ser.c b/drivers/isdn/hisax/elsa_ser.c
index a2a358c1dc8e..999effd7a276 100644
--- a/drivers/isdn/hisax/elsa_ser.c
+++ b/drivers/isdn/hisax/elsa_ser.c
@@ -333,8 +333,8 @@ static inline void receive_chars(struct IsdnCardState *cs,
 		if (!(skb = dev_alloc_skb(cs->hw.elsa.rcvcnt)))
 			printk(KERN_WARNING "ElsaSER: receive out of memory\n");
 		else {
-			memcpy(skb_put(skb, cs->hw.elsa.rcvcnt), cs->hw.elsa.rcvbuf,
-			       cs->hw.elsa.rcvcnt);
+			skb_put_data(skb, cs->hw.elsa.rcvbuf,
+				     cs->hw.elsa.rcvcnt);
 			skb_queue_tail(&cs->hw.elsa.bcs->rqueue, skb);
 		}
 		schedule_event(cs->hw.elsa.bcs, B_RCVBUFREADY);
diff --git a/drivers/isdn/hisax/hfc_usb.c b/drivers/isdn/hisax/hfc_usb.c
index 6dbd1f1da14f..ef4748083efd 100644
--- a/drivers/isdn/hisax/hfc_usb.c
+++ b/drivers/isdn/hisax/hfc_usb.c
@@ -799,7 +799,7 @@ collect_rx_frame(usb_fifo *fifo, __u8 *data, int len, int finish)
 	}
 	if (len) {
 		if (fifo->skbuff->len + len < fifo->max_size) {
-			memcpy(skb_put(fifo->skbuff, len), data, len);
+			skb_put_data(fifo->skbuff, data, len);
 		} else {
 			DBG(HFCUSB_DBG_FIFO_ERR,
 			    "HCF-USB: got frame exceeded fifo->max_size(%d) fifo(%d)",
diff --git a/drivers/isdn/hisax/hisax_fcpcipnp.c b/drivers/isdn/hisax/hisax_fcpcipnp.c
index 5e8a5d967162..5a9f39ed1d5d 100644
--- a/drivers/isdn/hisax/hisax_fcpcipnp.c
+++ b/drivers/isdn/hisax/hisax_fcpcipnp.c
@@ -495,8 +495,7 @@ static inline void hdlc_rpr_irq(struct fritz_bcs *bcs, u32 stat)
 			if (!skb) {
 				printk(KERN_WARNING "HDLC: receive out of memory\n");
 			} else {
-				memcpy(skb_put(skb, bcs->rcvidx), bcs->rcvbuf,
-				       bcs->rcvidx);
+				skb_put_data(skb, bcs->rcvbuf, bcs->rcvidx);
 				DBG_SKB(1, skb);
 				B_L1L2(bcs, PH_DATA | INDICATION, skb);
 			}
diff --git a/drivers/isdn/hisax/hisax_isac.c b/drivers/isdn/hisax/hisax_isac.c
index 5154c252a25f..0f36375478c5 100644
--- a/drivers/isdn/hisax/hisax_isac.c
+++ b/drivers/isdn/hisax/hisax_isac.c
@@ -557,7 +557,7 @@ static inline void isac_rme_interrupt(struct isac *isac)
 		DBG(DBG_WARN, "no memory, dropping\n");
 		goto out;
 	}
-	memcpy(skb_put(skb, count), isac->rcvbuf, count);
+	skb_put_data(skb, isac->rcvbuf, count);
 	DBG_SKB(DBG_RPACKET, skb);
 	D_L1L2(isac, PH_DATA | INDICATION, skb);
 out:
@@ -687,7 +687,7 @@ static inline void isacsx_rme_interrupt(struct isac *isac)
 		DBG(DBG_WARN, "no memory, dropping");
 		goto out;
 	}
-	memcpy(skb_put(skb, count), isac->rcvbuf, count);
+	skb_put_data(skb, isac->rcvbuf, count);
 	DBG_SKB(DBG_RPACKET, skb);
 	D_L1L2(isac, PH_DATA | INDICATION, skb);
 out:
diff --git a/drivers/isdn/hisax/hscx_irq.c b/drivers/isdn/hisax/hscx_irq.c
index a8d6188402c6..0d7e783c8bef 100644
--- a/drivers/isdn/hisax/hscx_irq.c
+++ b/drivers/isdn/hisax/hscx_irq.c
@@ -169,7 +169,8 @@ hscx_interrupt(struct IsdnCardState *cs, u_char val, u_char hscx)
 				if (!(skb = dev_alloc_skb(count)))
 					printk(KERN_WARNING "HSCX: receive out of memory\n");
 				else {
-					memcpy(skb_put(skb, count), bcs->hw.hscx.rcvbuf, count);
+					skb_put_data(skb, bcs->hw.hscx.rcvbuf,
+						     count);
 					skb_queue_tail(&bcs->rqueue, skb);
 				}
 			}
@@ -184,7 +185,8 @@ hscx_interrupt(struct IsdnCardState *cs, u_char val, u_char hscx)
 			if (!(skb = dev_alloc_skb(fifo_size)))
 				printk(KERN_WARNING "HiSax: receive out of memory\n");
 			else {
-				memcpy(skb_put(skb, fifo_size), bcs->hw.hscx.rcvbuf, fifo_size);
+				skb_put_data(skb, bcs->hw.hscx.rcvbuf,
+					     fifo_size);
 				skb_queue_tail(&bcs->rqueue, skb);
 			}
 			bcs->hw.hscx.rcvidx = 0;
diff --git a/drivers/isdn/hisax/icc.c b/drivers/isdn/hisax/icc.c
index c7c3797a817e..8d1804572b32 100644
--- a/drivers/isdn/hisax/icc.c
+++ b/drivers/isdn/hisax/icc.c
@@ -217,7 +217,7 @@ icc_interrupt(struct IsdnCardState *cs, u_char val)
 				if (!(skb = alloc_skb(count, GFP_ATOMIC)))
 					printk(KERN_WARNING "HiSax: D receive out of memory\n");
 				else {
-					memcpy(skb_put(skb, count), cs->rcvbuf, count);
+					skb_put_data(skb, cs->rcvbuf, count);
 					skb_queue_tail(&cs->rq, skb);
 				}
 			}
diff --git a/drivers/isdn/hisax/ipacx.c b/drivers/isdn/hisax/ipacx.c
index 43effe7082ed..c426b4fea28a 100644
--- a/drivers/isdn/hisax/ipacx.c
+++ b/drivers/isdn/hisax/ipacx.c
@@ -350,7 +350,7 @@ dch_int(struct IsdnCardState *cs)
 				if (!(skb = dev_alloc_skb(count)))
 					printk(KERN_WARNING "HiSax dch_int(): receive out of memory\n");
 				else {
-					memcpy(skb_put(skb, count), cs->rcvbuf, count);
+					skb_put_data(skb, cs->rcvbuf, count);
 					skb_queue_tail(&cs->rq, skb);
 				}
 			}
@@ -627,7 +627,8 @@ bch_int(struct IsdnCardState *cs, u_char hscx)
 				if (!(skb = dev_alloc_skb(count)))
 					printk(KERN_WARNING "HiSax bch_int(): receive frame out of memory\n");
 				else {
-					memcpy(skb_put(skb, count), bcs->hw.hscx.rcvbuf, count);
+					skb_put_data(skb, bcs->hw.hscx.rcvbuf,
+						     count);
 					skb_queue_tail(&bcs->rqueue, skb);
 				}
 			}
@@ -644,7 +645,8 @@ bch_int(struct IsdnCardState *cs, u_char hscx)
 			if (!(skb = dev_alloc_skb(B_FIFO_SIZE)))
 				printk(KERN_WARNING "HiSax bch_int(): receive transparent out of memory\n");
 			else {
-				memcpy(skb_put(skb, B_FIFO_SIZE), bcs->hw.hscx.rcvbuf, B_FIFO_SIZE);
+				skb_put_data(skb, bcs->hw.hscx.rcvbuf,
+					     B_FIFO_SIZE);
 				skb_queue_tail(&bcs->rqueue, skb);
 			}
 			bcs->hw.hscx.rcvidx = 0;
diff --git a/drivers/isdn/hisax/isac.c b/drivers/isdn/hisax/isac.c
index 4273b4548825..ea965f29a555 100644
--- a/drivers/isdn/hisax/isac.c
+++ b/drivers/isdn/hisax/isac.c
@@ -222,7 +222,7 @@ isac_interrupt(struct IsdnCardState *cs, u_char val)
 				if (!skb)
 					printk(KERN_WARNING "HiSax: D receive out of memory\n");
 				else {
-					memcpy(skb_put(skb, count), cs->rcvbuf, count);
+					skb_put_data(skb, cs->rcvbuf, count);
 					skb_queue_tail(&cs->rq, skb);
 				}
 			}
diff --git a/drivers/isdn/hisax/isar.c b/drivers/isdn/hisax/isar.c
index 0dc60b287c4b..98b4b67ea337 100644
--- a/drivers/isdn/hisax/isar.c
+++ b/drivers/isdn/hisax/isar.c
@@ -458,7 +458,7 @@ send_DLE_ETX(struct BCState *bcs)
 	struct sk_buff *skb;
 
 	if ((skb = dev_alloc_skb(2))) {
-		memcpy(skb_put(skb, 2), dleetx, 2);
+		skb_put_data(skb, dleetx, 2);
 		skb_queue_tail(&bcs->rqueue, skb);
 		schedule_event(bcs, B_RCVBUFREADY);
 	} else {
@@ -550,8 +550,8 @@ isar_rcv_frame(struct IsdnCardState *cs, struct BCState *bcs)
 				} else if (!(skb = dev_alloc_skb(bcs->hw.isar.rcvidx - 2))) {
 					printk(KERN_WARNING "ISAR: receive out of memory\n");
 				} else {
-					memcpy(skb_put(skb, bcs->hw.isar.rcvidx - 2),
-					       bcs->hw.isar.rcvbuf, bcs->hw.isar.rcvidx - 2);
+					skb_put_data(skb, bcs->hw.isar.rcvbuf,
+						     bcs->hw.isar.rcvidx - 2);
 					skb_queue_tail(&bcs->rqueue, skb);
 					schedule_event(bcs, B_RCVBUFREADY);
 				}
diff --git a/drivers/isdn/hisax/isdnl2.c b/drivers/isdn/hisax/isdnl2.c
index c53a53f6efb6..1a40ed04cb52 100644
--- a/drivers/isdn/hisax/isdnl2.c
+++ b/drivers/isdn/hisax/isdnl2.c
@@ -433,7 +433,7 @@ send_uframe(struct PStack *st, u_char cmd, u_char cr)
 		printk(KERN_WARNING "isdl2 can't alloc sbbuff for send_uframe\n");
 		return;
 	}
-	memcpy(skb_put(skb, i), tmp, i);
+	skb_put_data(skb, tmp, i);
 	enqueue_super(st, skb);
 }
 
@@ -894,7 +894,7 @@ enquiry_cr(struct PStack *st, u_char typ, u_char cr, u_char pf)
 		printk(KERN_WARNING "isdl2 can't alloc sbbuff for enquiry_cr\n");
 		return;
 	}
-	memcpy(skb_put(skb, i), tmp, i);
+	skb_put_data(skb, tmp, i);
 	enqueue_super(st, skb);
 }
 
diff --git a/drivers/isdn/hisax/jade_irq.c b/drivers/isdn/hisax/jade_irq.c
index b930da9b5aa6..a89e2df911c5 100644
--- a/drivers/isdn/hisax/jade_irq.c
+++ b/drivers/isdn/hisax/jade_irq.c
@@ -147,7 +147,8 @@ jade_interrupt(struct IsdnCardState *cs, u_char val, u_char jade)
 				if (!(skb = dev_alloc_skb(count)))
 					printk(KERN_WARNING "JADE %s receive out of memory\n", (jade ? "B" : "A"));
 				else {
-					memcpy(skb_put(skb, count), bcs->hw.hscx.rcvbuf, count);
+					skb_put_data(skb, bcs->hw.hscx.rcvbuf,
+						     count);
 					skb_queue_tail(&bcs->rqueue, skb);
 				}
 			}
@@ -162,7 +163,8 @@ jade_interrupt(struct IsdnCardState *cs, u_char val, u_char jade)
 			if (!(skb = dev_alloc_skb(fifo_size)))
 				printk(KERN_WARNING "HiSax: receive out of memory\n");
 			else {
-				memcpy(skb_put(skb, fifo_size), bcs->hw.hscx.rcvbuf, fifo_size);
+				skb_put_data(skb, bcs->hw.hscx.rcvbuf,
+					     fifo_size);
 				skb_queue_tail(&bcs->rqueue, skb);
 			}
 			bcs->hw.hscx.rcvidx = 0;
diff --git a/drivers/isdn/hisax/l3_1tr6.c b/drivers/isdn/hisax/l3_1tr6.c
index 875402e76d0a..da0a1c6aa329 100644
--- a/drivers/isdn/hisax/l3_1tr6.c
+++ b/drivers/isdn/hisax/l3_1tr6.c
@@ -149,7 +149,7 @@ l3_1tr6_setup_req(struct l3_process *pc, u_char pr, void *arg)
 	l = p - tmp;
 	if (!(skb = l3_alloc_skb(l)))
 		return;
-	memcpy(skb_put(skb, l), tmp, l);
+	skb_put_data(skb, tmp, l);
 	L3DelTimer(&pc->timer);
 	L3AddTimer(&pc->timer, T303, CC_T303);
 	newl3state(pc, 1);
@@ -497,7 +497,7 @@ l3_1tr6_setup_rsp(struct l3_process *pc, u_char pr, void *arg)
 	l = p - tmp;
 	if (!(skb = l3_alloc_skb(l)))
 		return;
-	memcpy(skb_put(skb, l), tmp, l);
+	skb_put_data(skb, tmp, l);
 	l3_msg(pc->st, DL_DATA | REQUEST, skb);
 	L3DelTimer(&pc->timer);
 	L3AddTimer(&pc->timer, T313, CC_T313);
@@ -543,7 +543,7 @@ l3_1tr6_disconnect_req(struct l3_process *pc, u_char pr, void *arg)
 	l = p - tmp;
 	if (!(skb = l3_alloc_skb(l)))
 		return;
-	memcpy(skb_put(skb, l), tmp, l);
+	skb_put_data(skb, tmp, l);
 	l3_msg(pc->st, DL_DATA | REQUEST, skb);
 	L3AddTimer(&pc->timer, T305, CC_T305);
 }
@@ -602,7 +602,7 @@ l3_1tr6_t305(struct l3_process *pc, u_char pr, void *arg)
 	l = p - tmp;
 	if (!(skb = l3_alloc_skb(l)))
 		return;
-	memcpy(skb_put(skb, l), tmp, l);
+	skb_put_data(skb, tmp, l);
 	l3_msg(pc->st, DL_DATA | REQUEST, skb);
 	L3AddTimer(&pc->timer, T308, CC_T308_1);
 }
diff --git a/drivers/isdn/hisax/l3dss1.c b/drivers/isdn/hisax/l3dss1.c
index cda700664e9c..18a3484b1f7e 100644
--- a/drivers/isdn/hisax/l3dss1.c
+++ b/drivers/isdn/hisax/l3dss1.c
@@ -525,7 +525,7 @@ l3dss1_message_cause(struct l3_process *pc, u_char mt, u_char cause)
 	l = p - tmp;
 	if (!(skb = l3_alloc_skb(l)))
 		return;
-	memcpy(skb_put(skb, l), tmp, l);
+	skb_put_data(skb, tmp, l);
 	l3_msg(pc->st, DL_DATA | REQUEST, skb);
 }
 
@@ -551,7 +551,7 @@ l3dss1_status_send(struct l3_process *pc, u_char pr, void *arg)
 	l = p - tmp;
 	if (!(skb = l3_alloc_skb(l)))
 		return;
-	memcpy(skb_put(skb, l), tmp, l);
+	skb_put_data(skb, tmp, l);
 	l3_msg(pc->st, DL_DATA | REQUEST, skb);
 }
 
@@ -587,7 +587,7 @@ l3dss1_msg_without_setup(struct l3_process *pc, u_char pr, void *arg)
 	l = p - tmp;
 	if (!(skb = l3_alloc_skb(l)))
 		return;
-	memcpy(skb_put(skb, l), tmp, l);
+	skb_put_data(skb, tmp, l);
 	l3_msg(pc->st, DL_DATA | REQUEST, skb);
 	dss1_release_l3_process(pc);
 }
@@ -944,7 +944,7 @@ l3dss1_msg_with_uus(struct l3_process *pc, u_char cmd)
 	l = p - tmp;
 	if (!(skb = l3_alloc_skb(l)))
 		return;
-	memcpy(skb_put(skb, l), tmp, l);
+	skb_put_data(skb, tmp, l);
 	l3_msg(pc->st, DL_DATA | REQUEST, skb);
 } /* l3dss1_msg_with_uus */
 
@@ -1420,7 +1420,7 @@ l3dss1_setup_req(struct l3_process *pc, u_char pr,
 	l = p - tmp;
 	if (!(skb = l3_alloc_skb(l)))
 		return;
-	memcpy(skb_put(skb, l), tmp, l);
+	skb_put_data(skb, tmp, l);
 	L3DelTimer(&pc->timer);
 	L3AddTimer(&pc->timer, T303, CC_T303);
 	newl3state(pc, 1);
@@ -1786,7 +1786,7 @@ l3dss1_disconnect_req(struct l3_process *pc, u_char pr, void *arg)
 	l = p - tmp;
 	if (!(skb = l3_alloc_skb(l)))
 		return;
-	memcpy(skb_put(skb, l), tmp, l);
+	skb_put_data(skb, tmp, l);
 	newl3state(pc, 11);
 	l3_msg(pc->st, DL_DATA | REQUEST, skb);
 	L3AddTimer(&pc->timer, T305, CC_T305);
@@ -1848,7 +1848,7 @@ l3dss1_reject_req(struct l3_process *pc, u_char pr, void *arg)
 	l = p - tmp;
 	if (!(skb = l3_alloc_skb(l)))
 		return;
-	memcpy(skb_put(skb, l), tmp, l);
+	skb_put_data(skb, tmp, l);
 	l3_msg(pc->st, DL_DATA | REQUEST, skb);
 	pc->st->l3.l3l4(pc->st, CC_RELEASE | INDICATION, pc);
 	newl3state(pc, 0);
@@ -2145,7 +2145,7 @@ static void l3dss1_redir_req(struct l3_process *pc, u_char pr, void *arg)
 
 	l = p - tmp;
 	if (!(skb = l3_alloc_skb(l))) return;
-	memcpy(skb_put(skb, l), tmp, l);
+	skb_put_data(skb, tmp, l);
 
 	l3_msg(pc->st, DL_DATA | REQUEST, skb);
 } /* l3dss1_redir_req */
@@ -2216,7 +2216,7 @@ static int l3dss1_cmd_global(struct PStack *st, isdn_ctrl *ic)
 				if (pc) dss1_release_l3_process(pc);
 				return (-2);
 			}
-			memcpy(skb_put(skb, l), temp, l);
+			skb_put_data(skb, temp, l);
 
 			if (pc)
 			{ pc->prot.dss1.invoke_id = id; /* remember id */
@@ -2359,7 +2359,7 @@ l3dss1_t305(struct l3_process *pc, u_char pr, void *arg)
 	l = p - tmp;
 	if (!(skb = l3_alloc_skb(l)))
 		return;
-	memcpy(skb_put(skb, l), tmp, l);
+	skb_put_data(skb, tmp, l);
 	newl3state(pc, 19);
 	l3_msg(pc->st, DL_DATA | REQUEST, skb);
 	L3AddTimer(&pc->timer, T308, CC_T308_1);
@@ -2528,7 +2528,7 @@ l3dss1_suspend_req(struct l3_process *pc, u_char pr, void *arg)
 	l = p - tmp;
 	if (!(skb = l3_alloc_skb(l)))
 		return;
-	memcpy(skb_put(skb, l), tmp, l);
+	skb_put_data(skb, tmp, l);
 	l3_msg(pc->st, DL_DATA | REQUEST, skb);
 	newl3state(pc, 15);
 	L3AddTimer(&pc->timer, T319, CC_T319);
@@ -2603,7 +2603,7 @@ l3dss1_resume_req(struct l3_process *pc, u_char pr, void *arg)
 	l = p - tmp;
 	if (!(skb = l3_alloc_skb(l)))
 		return;
-	memcpy(skb_put(skb, l), tmp, l);
+	skb_put_data(skb, tmp, l);
 	l3_msg(pc->st, DL_DATA | REQUEST, skb);
 	newl3state(pc, 17);
 	L3AddTimer(&pc->timer, T318, CC_T318);
@@ -2721,7 +2721,7 @@ l3dss1_global_restart(struct l3_process *pc, u_char pr, void *arg)
 	l = p - tmp;
 	if (!(skb = l3_alloc_skb(l)))
 		return;
-	memcpy(skb_put(skb, l), tmp, l);
+	skb_put_data(skb, tmp, l);
 	newl3state(pc, 0);
 	l3_msg(pc->st, DL_DATA | REQUEST, skb);
 }
@@ -2929,7 +2929,7 @@ global_handler(struct PStack *st, int mt, struct sk_buff *skb)
 		l = p - tmp;
 		if (!(skb = l3_alloc_skb(l)))
 			return;
-		memcpy(skb_put(skb, l), tmp, l);
+		skb_put_data(skb, tmp, l);
 		l3_msg(proc->st, DL_DATA | REQUEST, skb);
 	} else {
 		if (st->l3.debug & L3_DEB_STATE) {
diff --git a/drivers/isdn/hisax/l3ni1.c b/drivers/isdn/hisax/l3ni1.c
index 8dc791bfaa6f..ea311e7df48e 100644
--- a/drivers/isdn/hisax/l3ni1.c
+++ b/drivers/isdn/hisax/l3ni1.c
@@ -454,7 +454,7 @@ l3ni1_message_plus_chid(struct l3_process *pc, u_char mt)
 
 	if (!(skb = l3_alloc_skb(7)))
 		return;
-	memcpy(skb_put(skb, 7), tmp, 7);
+	skb_put_data(skb, tmp, 7);
 	l3_msg(pc->st, DL_DATA | REQUEST, skb);
 }
 
@@ -475,7 +475,7 @@ l3ni1_message_cause(struct l3_process *pc, u_char mt, u_char cause)
 	l = p - tmp;
 	if (!(skb = l3_alloc_skb(l)))
 		return;
-	memcpy(skb_put(skb, l), tmp, l);
+	skb_put_data(skb, tmp, l);
 	l3_msg(pc->st, DL_DATA | REQUEST, skb);
 }
 
@@ -501,7 +501,7 @@ l3ni1_status_send(struct l3_process *pc, u_char pr, void *arg)
 	l = p - tmp;
 	if (!(skb = l3_alloc_skb(l)))
 		return;
-	memcpy(skb_put(skb, l), tmp, l);
+	skb_put_data(skb, tmp, l);
 	l3_msg(pc->st, DL_DATA | REQUEST, skb);
 }
 
@@ -537,7 +537,7 @@ l3ni1_msg_without_setup(struct l3_process *pc, u_char pr, void *arg)
 	l = p - tmp;
 	if (!(skb = l3_alloc_skb(l)))
 		return;
-	memcpy(skb_put(skb, l), tmp, l);
+	skb_put_data(skb, tmp, l);
 	l3_msg(pc->st, DL_DATA | REQUEST, skb);
 	ni1_release_l3_process(pc);
 }
@@ -894,7 +894,7 @@ l3ni1_msg_with_uus(struct l3_process *pc, u_char cmd)
 	l = p - tmp;
 	if (!(skb = l3_alloc_skb(l)))
 		return;
-	memcpy(skb_put(skb, l), tmp, l);
+	skb_put_data(skb, tmp, l);
 	l3_msg(pc->st, DL_DATA | REQUEST, skb);
 } /* l3ni1_msg_with_uus */
 
@@ -1274,7 +1274,7 @@ l3ni1_setup_req(struct l3_process *pc, u_char pr,
 	{
 		return;
 	}
-	memcpy(skb_put(skb, l), tmp, l);
+	skb_put_data(skb, tmp, l);
 	L3DelTimer(&pc->timer);
 	L3AddTimer(&pc->timer, T303, CC_T303);
 	newl3state(pc, 1);
@@ -1640,7 +1640,7 @@ l3ni1_disconnect_req(struct l3_process *pc, u_char pr, void *arg)
 	l = p - tmp;
 	if (!(skb = l3_alloc_skb(l)))
 		return;
-	memcpy(skb_put(skb, l), tmp, l);
+	skb_put_data(skb, tmp, l);
 	newl3state(pc, 11);
 	l3_msg(pc->st, DL_DATA | REQUEST, skb);
 	L3AddTimer(&pc->timer, T305, CC_T305);
@@ -1704,7 +1704,7 @@ l3ni1_reject_req(struct l3_process *pc, u_char pr, void *arg)
 	l = p - tmp;
 	if (!(skb = l3_alloc_skb(l)))
 		return;
-	memcpy(skb_put(skb, l), tmp, l);
+	skb_put_data(skb, tmp, l);
 	l3_msg(pc->st, DL_DATA | REQUEST, skb);
 	pc->st->l3.l3l4(pc->st, CC_RELEASE | INDICATION, pc);
 	newl3state(pc, 0);
@@ -2001,7 +2001,7 @@ static void l3ni1_redir_req(struct l3_process *pc, u_char pr, void *arg)
 
 	l = p - tmp;
 	if (!(skb = l3_alloc_skb(l))) return;
-	memcpy(skb_put(skb, l), tmp, l);
+	skb_put_data(skb, tmp, l);
 
 	l3_msg(pc->st, DL_DATA | REQUEST, skb);
 } /* l3ni1_redir_req */
@@ -2076,7 +2076,7 @@ static int l3ni1_cmd_global(struct PStack *st, isdn_ctrl *ic)
 				if (pc) ni1_release_l3_process(pc);
 				return (-2);
 			}
-			memcpy(skb_put(skb, l), temp, l);
+			skb_put_data(skb, temp, l);
 
 			if (pc)
 			{ pc->prot.ni1.invoke_id = id; /* remember id */
@@ -2219,7 +2219,7 @@ l3ni1_t305(struct l3_process *pc, u_char pr, void *arg)
 	l = p - tmp;
 	if (!(skb = l3_alloc_skb(l)))
 		return;
-	memcpy(skb_put(skb, l), tmp, l);
+	skb_put_data(skb, tmp, l);
 	newl3state(pc, 19);
 	l3_msg(pc->st, DL_DATA | REQUEST, skb);
 	L3AddTimer(&pc->timer, T308, CC_T308_1);
@@ -2388,7 +2388,7 @@ l3ni1_suspend_req(struct l3_process *pc, u_char pr, void *arg)
 	l = p - tmp;
 	if (!(skb = l3_alloc_skb(l)))
 		return;
-	memcpy(skb_put(skb, l), tmp, l);
+	skb_put_data(skb, tmp, l);
 	l3_msg(pc->st, DL_DATA | REQUEST, skb);
 	newl3state(pc, 15);
 	L3AddTimer(&pc->timer, T319, CC_T319);
@@ -2463,7 +2463,7 @@ l3ni1_resume_req(struct l3_process *pc, u_char pr, void *arg)
 	l = p - tmp;
 	if (!(skb = l3_alloc_skb(l)))
 		return;
-	memcpy(skb_put(skb, l), tmp, l);
+	skb_put_data(skb, tmp, l);
 	l3_msg(pc->st, DL_DATA | REQUEST, skb);
 	newl3state(pc, 17);
 	L3AddTimer(&pc->timer, T318, CC_T318);
@@ -2582,7 +2582,7 @@ l3ni1_global_restart(struct l3_process *pc, u_char pr, void *arg)
 	l = p - tmp;
 	if (!(skb = l3_alloc_skb(l)))
 		return;
-	memcpy(skb_put(skb, l), tmp, l);
+	skb_put_data(skb, tmp, l);
 	newl3state(pc, 0);
 	l3_msg(pc->st, DL_DATA | REQUEST, skb);
 }
@@ -2655,7 +2655,7 @@ static void l3ni1_SendSpid(struct l3_process *pc, u_char pr, struct sk_buff *skb
 	*p++ = IE_SPID;
 	*p++ = l;
 
-	memcpy(skb_put(skb, l), pSPID, l);
+	skb_put_data(skb, pSPID, l);
 
 	newl3state(pc, iNewState);
 
@@ -2873,7 +2873,7 @@ global_handler(struct PStack *st, int mt, struct sk_buff *skb)
 		l = p - tmp;
 		if (!(skb = l3_alloc_skb(l)))
 			return;
-		memcpy(skb_put(skb, l), tmp, l);
+		skb_put_data(skb, tmp, l);
 		l3_msg(proc->st, DL_DATA | REQUEST, skb);
 	} else {
 		if (st->l3.debug & L3_DEB_STATE) {
diff --git a/drivers/isdn/hisax/netjet.c b/drivers/isdn/hisax/netjet.c
index 233e432e06f6..b7f54fa29228 100644
--- a/drivers/isdn/hisax/netjet.c
+++ b/drivers/isdn/hisax/netjet.c
@@ -383,7 +383,7 @@ static void got_frame(struct BCState *bcs, int count) {
 	if (!(skb = dev_alloc_skb(count)))
 		printk(KERN_WARNING "TIGER: receive out of memory\n");
 	else {
-		memcpy(skb_put(skb, count), bcs->hw.tiger.rcvbuf, count);
+		skb_put_data(skb, bcs->hw.tiger.rcvbuf, count);
 		skb_queue_tail(&bcs->rqueue, skb);
 	}
 	test_and_set_bit(B_RCVBUFREADY, &bcs->event);
diff --git a/drivers/isdn/hisax/st5481_usb.c b/drivers/isdn/hisax/st5481_usb.c
index a0fdbc074b98..1cb9930d5e24 100644
--- a/drivers/isdn/hisax/st5481_usb.c
+++ b/drivers/isdn/hisax/st5481_usb.c
@@ -527,7 +527,7 @@ static void usb_in_complete(struct urb *urb)
 				WARNING("receive out of memory\n");
 				break;
 			}
-			memcpy(skb_put(skb, status), in->rcvbuf, status);
+			skb_put_data(skb, in->rcvbuf, status);
 			in->hisax_if->l1l2(in->hisax_if, PH_DATA | INDICATION, skb);
 		} else if (status == -HDLC_CRC_ERROR) {
 			INFO("CRC error");
diff --git a/drivers/isdn/hisax/w6692.c b/drivers/isdn/hisax/w6692.c
index c99f0ec58a01..6f6733b7c1e4 100644
--- a/drivers/isdn/hisax/w6692.c
+++ b/drivers/isdn/hisax/w6692.c
@@ -309,7 +309,9 @@ W6692B_interrupt(struct IsdnCardState *cs, u_char bchan)
 				if (!(skb = dev_alloc_skb(count)))
 					printk(KERN_WARNING "W6692: Bchan receive out of memory\n");
 				else {
-					memcpy(skb_put(skb, count), bcs->hw.w6692.rcvbuf, count);
+					skb_put_data(skb,
+						     bcs->hw.w6692.rcvbuf,
+						     count);
 					skb_queue_tail(&bcs->rqueue, skb);
 				}
 			}
@@ -332,7 +334,8 @@ W6692B_interrupt(struct IsdnCardState *cs, u_char bchan)
 			if (!(skb = dev_alloc_skb(W_B_FIFO_THRESH)))
 				printk(KERN_WARNING "HiSax: receive out of memory\n");
 			else {
-				memcpy(skb_put(skb, W_B_FIFO_THRESH), bcs->hw.w6692.rcvbuf, W_B_FIFO_THRESH);
+				skb_put_data(skb, bcs->hw.w6692.rcvbuf,
+					     W_B_FIFO_THRESH);
 				skb_queue_tail(&bcs->rqueue, skb);
 			}
 			bcs->hw.w6692.rcvidx = 0;
@@ -441,7 +444,7 @@ StartW6692:
 				if (!(skb = alloc_skb(count, GFP_ATOMIC)))
 					printk(KERN_WARNING "HiSax: D receive out of memory\n");
 				else {
-					memcpy(skb_put(skb, count), cs->rcvbuf, count);
+					skb_put_data(skb, cs->rcvbuf, count);
 					skb_queue_tail(&cs->rq, skb);
 				}
 			}
diff --git a/drivers/isdn/hysdn/hycapi.c b/drivers/isdn/hysdn/hycapi.c
index 93bae94314a6..87119b517508 100644
--- a/drivers/isdn/hysdn/hycapi.c
+++ b/drivers/isdn/hysdn/hycapi.c
@@ -171,16 +171,16 @@ hycapi_register_internal(struct capi_ctr *ctrl, __u16 appl,
 		       card->myid);
 		return;
 	}
-	memcpy(skb_put(skb, sizeof(__u16)), &len, sizeof(__u16));
-	memcpy(skb_put(skb, sizeof(__u16)), &appl, sizeof(__u16));
+	skb_put_data(skb, &len, sizeof(__u16));
+	skb_put_data(skb, &appl, sizeof(__u16));
 	memcpy(skb_put(skb, sizeof(__u8)), &_command, sizeof(_command));
 	memcpy(skb_put(skb, sizeof(__u8)), &_subcommand, sizeof(_subcommand));
-	memcpy(skb_put(skb, sizeof(__u16)), &MessageNumber, sizeof(__u16));
-	memcpy(skb_put(skb, sizeof(__u16)), &MessageBufferSize, sizeof(__u16));
-	memcpy(skb_put(skb, sizeof(__u16)), &(rp->level3cnt), sizeof(__u16));
-	memcpy(skb_put(skb, sizeof(__u16)), &(rp->datablkcnt), sizeof(__u16));
-	memcpy(skb_put(skb, sizeof(__u16)), &(rp->datablklen), sizeof(__u16));
-	memcpy(skb_put(skb, slen), ExtFeatureDefaults, slen);
+	skb_put_data(skb, &MessageNumber, sizeof(__u16));
+	skb_put_data(skb, &MessageBufferSize, sizeof(__u16));
+	skb_put_data(skb, &(rp->level3cnt), sizeof(__u16));
+	skb_put_data(skb, &(rp->datablkcnt), sizeof(__u16));
+	skb_put_data(skb, &(rp->datablklen), sizeof(__u16));
+	skb_put_data(skb, ExtFeatureDefaults, slen);
 	hycapi_applications[appl - 1].ctrl_mask |= (1 << (ctrl->cnr - 1));
 	hycapi_send_message(ctrl, skb);
 }
@@ -279,11 +279,11 @@ static void hycapi_release_internal(struct capi_ctr *ctrl, __u16 appl)
 		       card->myid);
 		return;
 	}
-	memcpy(skb_put(skb, sizeof(__u16)), &len, sizeof(__u16));
-	memcpy(skb_put(skb, sizeof(__u16)), &appl, sizeof(__u16));
+	skb_put_data(skb, &len, sizeof(__u16));
+	skb_put_data(skb, &appl, sizeof(__u16));
 	memcpy(skb_put(skb, sizeof(__u8)), &_command, sizeof(_command));
 	memcpy(skb_put(skb, sizeof(__u8)), &_subcommand, sizeof(_subcommand));
-	memcpy(skb_put(skb, sizeof(__u16)), &MessageNumber, sizeof(__u16));
+	skb_put_data(skb, &MessageNumber, sizeof(__u16));
 	hycapi_send_message(ctrl, skb);
 	hycapi_applications[appl - 1].ctrl_mask &= ~(1 << (ctrl->cnr - 1));
 }
@@ -557,10 +557,9 @@ hycapi_rx_capipkt(hysdn_card *card, unsigned char *buf, unsigned short len)
 			       card->myid);
 			return;
 		}
-		memcpy(skb_put(skb, MsgLen), buf, MsgLen);
-		memcpy(skb_put(skb, 2 * sizeof(__u32)), CP64, 2 * sizeof(__u32));
-		memcpy(skb_put(skb, len - MsgLen), buf + MsgLen,
-		       len - MsgLen);
+		skb_put_data(skb, buf, MsgLen);
+		skb_put_data(skb, CP64, 2 * sizeof(__u32));
+		skb_put_data(skb, buf + MsgLen, len - MsgLen);
 		CAPIMSG_SETLEN(skb->data, 30);
 	} else {
 		if (!(skb = alloc_skb(len, GFP_ATOMIC))) {
@@ -568,7 +567,7 @@ hycapi_rx_capipkt(hysdn_card *card, unsigned char *buf, unsigned short len)
 			       card->myid);
 			return;
 		}
-		memcpy(skb_put(skb, len), buf, len);
+		skb_put_data(skb, buf, len);
 	}
 	switch (CAPIMSG_CMD(skb->data))
 	{
diff --git a/drivers/isdn/hysdn/hysdn_net.c b/drivers/isdn/hysdn/hysdn_net.c
index b93a4e9a8d34..8e9c34f33d86 100644
--- a/drivers/isdn/hysdn/hysdn_net.c
+++ b/drivers/isdn/hysdn/hysdn_net.c
@@ -201,7 +201,7 @@ hysdn_rx_netpkt(hysdn_card *card, unsigned char *buf, unsigned short len)
 		return;
 	}
 	/* copy the data */
-	memcpy(skb_put(skb, len), buf, len);
+	skb_put_data(skb, buf, len);
 
 	/* determine the used protocol */
 	skb->protocol = eth_type_trans(skb, dev);
diff --git a/drivers/isdn/i4l/isdn_ppp.c b/drivers/isdn/i4l/isdn_ppp.c
index 8aa158a09180..9ce23cf3d7d2 100644
--- a/drivers/isdn/i4l/isdn_ppp.c
+++ b/drivers/isdn/i4l/isdn_ppp.c
@@ -2258,8 +2258,7 @@ static void isdn_ppp_ccp_xmit_reset(struct ippp_struct *is, int proto,
 
 	/* Now stuff remaining bytes */
 	if (len) {
-		p = skb_put(skb, len);
-		memcpy(p, data, len);
+		p = skb_put_data(skb, data, len);
 	}
 
 	/* skb is now ready for xmit */
diff --git a/drivers/isdn/i4l/isdn_tty.c b/drivers/isdn/i4l/isdn_tty.c
index ddd8207e4e54..d30130c8d0f3 100644
--- a/drivers/isdn/i4l/isdn_tty.c
+++ b/drivers/isdn/i4l/isdn_tty.c
@@ -474,7 +474,7 @@ isdn_tty_senddown(modem_info *info)
 		return;
 	}
 	skb_reserve(skb, skb_res);
-	memcpy(skb_put(skb, buflen), info->port.xmit_buf, buflen);
+	skb_put_data(skb, info->port.xmit_buf, buflen);
 	info->xmit_count = 0;
 #ifdef CONFIG_ISDN_AUDIO
 	if (info->vonline & 2) {
diff --git a/drivers/isdn/i4l/isdn_v110.c b/drivers/isdn/i4l/isdn_v110.c
index 52827a80c51f..8b74ce412524 100644
--- a/drivers/isdn/i4l/isdn_v110.c
+++ b/drivers/isdn/i4l/isdn_v110.c
@@ -421,7 +421,7 @@ isdn_v110_sync(isdn_v110_stream *v)
 	}
 	if ((skb = dev_alloc_skb(v->framelen + v->skbres))) {
 		skb_reserve(skb, v->skbres);
-		memcpy(skb_put(skb, v->framelen), v->OfflineFrame, v->framelen);
+		skb_put_data(skb, v->OfflineFrame, v->framelen);
 	}
 	return skb;
 }
@@ -441,7 +441,7 @@ isdn_v110_idle(isdn_v110_stream *v)
 	}
 	if ((skb = dev_alloc_skb(v->framelen + v->skbres))) {
 		skb_reserve(skb, v->skbres);
-		memcpy(skb_put(skb, v->framelen), v->OnlineFrame, v->framelen);
+		skb_put_data(skb, v->OnlineFrame, v->framelen);
 	}
 	return skb;
 }
@@ -486,7 +486,7 @@ isdn_v110_encode(isdn_v110_stream *v, struct sk_buff *skb)
 	}
 	skb_reserve(nskb, v->skbres + sizeof(int));
 	if (skb->len == 0) {
-		memcpy(skb_put(nskb, v->framelen), v->OnlineFrame, v->framelen);
+		skb_put_data(nskb, v->OnlineFrame, v->framelen);
 		*((int *)skb_push(nskb, sizeof(int))) = 0;
 		return nskb;
 	}
diff --git a/drivers/isdn/isdnloop/isdnloop.c b/drivers/isdn/isdnloop/isdnloop.c
index ef9c8e4f1fa2..7ac7badb8f55 100644
--- a/drivers/isdn/isdnloop/isdnloop.c
+++ b/drivers/isdn/isdnloop/isdnloop.c
@@ -479,7 +479,7 @@ isdnloop_fake(isdnloop_card *card, char *s, int ch)
 	}
 	if (ch >= 0)
 		sprintf(skb_put(skb, 3), "%02d;", ch);
-	memcpy(skb_put(skb, strlen(s)), s, strlen(s));
+	skb_put_data(skb, s, strlen(s));
 	skb_queue_tail(&card->dqueue, skb);
 	return 0;
 }
diff --git a/drivers/isdn/mISDN/dsp_cmx.c b/drivers/isdn/mISDN/dsp_cmx.c
index 8e3aa002767b..d4b6f01a3f0e 100644
--- a/drivers/isdn/mISDN/dsp_cmx.c
+++ b/drivers/isdn/mISDN/dsp_cmx.c
@@ -1595,8 +1595,7 @@ send_packet:
 				thh = mISDN_HEAD_P(txskb);
 				thh->prim = DL_DATA_REQ;
 				thh->id = 0;
-				memcpy(skb_put(txskb, len), nskb->data + preload,
-				       len);
+				skb_put_data(txskb, nskb->data + preload, len);
 				/* queue (trigger later) */
 				skb_queue_tail(&dsp->sendq, txskb);
 			}
diff --git a/drivers/isdn/mISDN/layer2.c b/drivers/isdn/mISDN/layer2.c
index 5eb380a25903..7243a6746f8b 100644
--- a/drivers/isdn/mISDN/layer2.c
+++ b/drivers/isdn/mISDN/layer2.c
@@ -176,7 +176,7 @@ l2up_create(struct layer2 *l2, u_int prim, int len, void *arg)
 	hh->prim = prim;
 	hh->id = (l2->ch.nr << 16) | l2->ch.addr;
 	if (len)
-		memcpy(skb_put(skb, len), arg, len);
+		skb_put_data(skb, arg, len);
 	err = l2->up->send(l2->up, skb);
 	if (err) {
 		printk(KERN_WARNING "%s: dev %s err=%d\n", __func__,
@@ -235,7 +235,7 @@ l2down_create(struct layer2 *l2, u_int prim, u_int id, int len, void *arg)
 	hh->prim = prim;
 	hh->id = id;
 	if (len)
-		memcpy(skb_put(skb, len), arg, len);
+		skb_put_data(skb, arg, len);
 	err = l2down_raw(l2, skb);
 	if (err)
 		dev_kfree_skb(skb);
@@ -640,7 +640,7 @@ send_uframe(struct layer2 *l2, struct sk_buff *skb, u_char cmd, u_char cr)
 			return;
 		}
 	}
-	memcpy(skb_put(skb, i), tmp, i);
+	skb_put_data(skb, tmp, i);
 	enqueue_super(l2, skb);
 }
 
@@ -1125,7 +1125,7 @@ enquiry_cr(struct layer2 *l2, u_char typ, u_char cr, u_char pf)
 		       mISDNDevName4ch(&l2->ch), __func__);
 		return;
 	}
-	memcpy(skb_put(skb, i), tmp, i);
+	skb_put_data(skb, tmp, i);
 	enqueue_super(l2, skb);
 }
 
diff --git a/drivers/isdn/mISDN/tei.c b/drivers/isdn/mISDN/tei.c
index 592f597d8951..908127efccf8 100644
--- a/drivers/isdn/mISDN/tei.c
+++ b/drivers/isdn/mISDN/tei.c
@@ -312,7 +312,7 @@ teiup_create(struct manager *mgr, u_int prim, int len, void *arg)
 	hh->prim = prim;
 	hh->id = (mgr->ch.nr << 16) | mgr->ch.addr;
 	if (len)
-		memcpy(skb_put(skb, len), arg, len);
+		skb_put_data(skb, arg, len);
 	err = mgr->up->send(mgr->up, skb);
 	if (err) {
 		printk(KERN_WARNING "%s: err=%d\n", __func__, err);
diff --git a/drivers/media/dvb-core/dvb_net.c b/drivers/media/dvb-core/dvb_net.c
index 9947b342633e..bbaf0a8cae8b 100644
--- a/drivers/media/dvb-core/dvb_net.c
+++ b/drivers/media/dvb-core/dvb_net.c
@@ -828,8 +828,7 @@ static void dvb_net_ule(struct net_device *dev, const u8 *buf, size_t buf_len)
 
 		/* Copy data into our current skb. */
 		h.how_much = min(h.priv->ule_sndu_remain, (int)h.ts_remain);
-		memcpy(skb_put(h.priv->ule_skb, h.how_much),
-		       h.from_where, h.how_much);
+		skb_put_data(h.priv->ule_skb, h.from_where, h.how_much);
 		h.priv->ule_sndu_remain -= h.how_much;
 		h.ts_remain -= h.how_much;
 		h.from_where += h.how_much;
diff --git a/drivers/media/radio/wl128x/fmdrv_common.c b/drivers/media/radio/wl128x/fmdrv_common.c
index 588e2d61c3b4..c67e055a12c9 100644
--- a/drivers/media/radio/wl128x/fmdrv_common.c
+++ b/drivers/media/radio/wl128x/fmdrv_common.c
@@ -442,7 +442,7 @@ static int fm_send_cmd(struct fmdev *fmdev, u8 fm_op, u16 type,	void *payload,
 		fm_cb(skb)->fm_op = *((u8 *)payload + 2);
 	}
 	if (payload != NULL)
-		memcpy(skb_put(skb, payload_len), payload, payload_len);
+		skb_put_data(skb, payload, payload_len);
 
 	fm_cb(skb)->completion = wait_completion;
 	skb_queue_tail(&fmdev->tx_q, skb);
diff --git a/drivers/misc/ti-st/st_core.c b/drivers/misc/ti-st/st_core.c
index 00051590e00f..eda8d407be28 100644
--- a/drivers/misc/ti-st/st_core.c
+++ b/drivers/misc/ti-st/st_core.c
@@ -262,7 +262,7 @@ void st_int_recv(void *disc_data,
 	while (count) {
 		if (st_gdata->rx_count) {
 			len = min_t(unsigned int, st_gdata->rx_count, count);
-			memcpy(skb_put(st_gdata->rx_skb, len), ptr, len);
+			skb_put_data(st_gdata->rx_skb, ptr, len);
 			st_gdata->rx_count -= len;
 			count -= len;
 			ptr += len;
diff --git a/drivers/misc/ti-st/st_kim.c b/drivers/misc/ti-st/st_kim.c
index bf0d7708beac..e74413f882ab 100644
--- a/drivers/misc/ti-st/st_kim.c
+++ b/drivers/misc/ti-st/st_kim.c
@@ -152,7 +152,7 @@ static void kim_int_recv(struct kim_data_s *kim_gdata,
 	while (count) {
 		if (kim_gdata->rx_count) {
 			len = min_t(unsigned int, kim_gdata->rx_count, count);
-			memcpy(skb_put(kim_gdata->rx_skb, len), ptr, len);
+			skb_put_data(kim_gdata->rx_skb, ptr, len);
 			kim_gdata->rx_count -= len;
 			count -= len;
 			ptr += len;
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index 7d7a3cec149a..b796db7dd621 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -936,8 +936,7 @@ static void alb_send_lp_vid(struct slave *slave, u8 mac_addr[],
 	if (!skb)
 		return;
 
-	data = skb_put(skb, size);
-	memcpy(data, &pkt, size);
+	data = skb_put_data(skb, &pkt, size);
 
 	skb_reset_mac_header(skb);
 	skb->network_header = skb->mac_header + ETH_HLEN;
diff --git a/drivers/net/caif/caif_hsi.c b/drivers/net/caif/caif_hsi.c
index 71a7c3b44fdd..4534326e20ac 100644
--- a/drivers/net/caif/caif_hsi.c
+++ b/drivers/net/caif/caif_hsi.c
@@ -454,8 +454,7 @@ static int cfhsi_rx_desc(struct cfhsi_desc *desc, struct cfhsi *cfhsi)
 		}
 		caif_assert(skb != NULL);
 
-		dst = skb_put(skb, len);
-		memcpy(dst, pfrm, len);
+		dst = skb_put_data(skb, pfrm, len);
 
 		skb->protocol = htons(ETH_P_CAIF);
 		skb_reset_mac_header(skb);
@@ -585,8 +584,7 @@ static int cfhsi_rx_pld(struct cfhsi_desc *desc, struct cfhsi *cfhsi)
 		}
 		caif_assert(skb != NULL);
 
-		dst = skb_put(skb, len);
-		memcpy(dst, pcffrm, len);
+		dst = skb_put_data(skb, pcffrm, len);
 
 		skb->protocol = htons(ETH_P_CAIF);
 		skb_reset_mac_header(skb);
diff --git a/drivers/net/caif/caif_serial.c b/drivers/net/caif/caif_serial.c
index 76e1d3545105..5c57be2082ba 100644
--- a/drivers/net/caif/caif_serial.c
+++ b/drivers/net/caif/caif_serial.c
@@ -198,8 +198,7 @@ static void ldisc_receive(struct tty_struct *tty, const u8 *data,
 	skb = netdev_alloc_skb(ser->dev, count+1);
 	if (skb == NULL)
 		return;
-	p = skb_put(skb, count);
-	memcpy(p, data, count);
+	p = skb_put_data(skb, data, count);
 
 	skb->protocol = htons(ETH_P_CAIF);
 	skb_reset_mac_header(skb);
diff --git a/drivers/net/caif/caif_spi.c b/drivers/net/caif/caif_spi.c
index fc21afe852b9..24a5f5ca2037 100644
--- a/drivers/net/caif/caif_spi.c
+++ b/drivers/net/caif/caif_spi.c
@@ -548,8 +548,7 @@ int cfspi_rxfrm(struct cfspi *cfspi, u8 *buf, size_t len)
 		skb = netdev_alloc_skb(cfspi->ndev, pkt_len + 1);
 		caif_assert(skb != NULL);
 
-		dst = skb_put(skb, pkt_len);
-		memcpy(dst, src, pkt_len);
+		dst = skb_put_data(skb, src, pkt_len);
 		src += pkt_len;
 
 		skb->protocol = htons(ETH_P_CAIF);
diff --git a/drivers/net/caif/caif_virtio.c b/drivers/net/caif/caif_virtio.c
index 1794ea0420b7..c3d104feee13 100644
--- a/drivers/net/caif/caif_virtio.c
+++ b/drivers/net/caif/caif_virtio.c
@@ -242,7 +242,7 @@ static struct sk_buff *cfv_alloc_and_copy_skb(int *err,
 
 	skb_reserve(skb, cfv->rx_hr + pad_len);
 
-	memcpy(skb_put(skb, cfpkt_len), frm + cfv->rx_hr, cfpkt_len);
+	skb_put_data(skb, frm + cfv->rx_hr, cfpkt_len);
 	return skb;
 }
 
diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c
index 6a6e896e52fa..5d067c1b987f 100644
--- a/drivers/net/can/slcan.c
+++ b/drivers/net/can/slcan.c
@@ -216,8 +216,7 @@ static void slc_bump(struct slcan *sl)
 	can_skb_prv(skb)->ifindex = sl->dev->ifindex;
 	can_skb_prv(skb)->skbcnt = 0;
 
-	memcpy(skb_put(skb, sizeof(struct can_frame)),
-	       &cf, sizeof(struct can_frame));
+	skb_put_data(skb, &cf, sizeof(struct can_frame));
 
 	sl->dev->stats.rx_packets++;
 	sl->dev->stats.rx_bytes += cf.can_dlc;
diff --git a/drivers/net/ethernet/3com/3c515.c b/drivers/net/ethernet/3com/3c515.c
index e7b1fa56b290..c5987f518cb2 100644
--- a/drivers/net/ethernet/3com/3c515.c
+++ b/drivers/net/ethernet/3com/3c515.c
@@ -1370,9 +1370,9 @@ static int boomerang_rx(struct net_device *dev)
 			    (skb = netdev_alloc_skb(dev, pkt_len + 4)) != NULL) {
 				skb_reserve(skb, 2);	/* Align IP on 16 byte boundaries */
 				/* 'skb_put()' points to the start of sk_buff data area. */
-				memcpy(skb_put(skb, pkt_len),
-				       isa_bus_to_virt(vp->rx_ring[entry].
-						   addr), pkt_len);
+				skb_put_data(skb,
+					     isa_bus_to_virt(vp->rx_ring[entry].addr),
+					     pkt_len);
 				rx_copy++;
 			} else {
 				void *temp;
diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c
index 14cff6017756..3b516ebeeddb 100644
--- a/drivers/net/ethernet/3com/3c59x.c
+++ b/drivers/net/ethernet/3com/3c59x.c
@@ -2628,9 +2628,8 @@ boomerang_rx(struct net_device *dev)
 				skb_reserve(skb, 2);	/* Align IP on 16 byte boundaries */
 				pci_dma_sync_single_for_cpu(VORTEX_PCI(vp), dma, PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
 				/* 'skb_put()' points to the start of sk_buff data area. */
-				memcpy(skb_put(skb, pkt_len),
-					   vp->rx_skbuff[entry]->data,
-					   pkt_len);
+				skb_put_data(skb, vp->rx_skbuff[entry]->data,
+					     pkt_len);
 				pci_dma_sync_single_for_device(VORTEX_PCI(vp), dma, PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
 				vp->rx_copy++;
 			} else {
diff --git a/drivers/net/ethernet/aeroflex/greth.c b/drivers/net/ethernet/aeroflex/greth.c
index d8e133ced7b8..4309be3724ad 100644
--- a/drivers/net/ethernet/aeroflex/greth.c
+++ b/drivers/net/ethernet/aeroflex/greth.c
@@ -807,7 +807,8 @@ static int greth_rx(struct net_device *dev, int limit)
 				if (netif_msg_pktdata(greth))
 					greth_print_rx_packet(phys_to_virt(dma_addr), pkt_len);
 
-				memcpy(skb_put(skb, pkt_len), phys_to_virt(dma_addr), pkt_len);
+				skb_put_data(skb, phys_to_virt(dma_addr),
+					     pkt_len);
 
 				skb->protocol = eth_type_trans(skb, dev);
 				dev->stats.rx_bytes += pkt_len;
diff --git a/drivers/net/ethernet/agere/et131x.c b/drivers/net/ethernet/agere/et131x.c
index 87a11b9f0ea5..54eff90e2f02 100644
--- a/drivers/net/ethernet/agere/et131x.c
+++ b/drivers/net/ethernet/agere/et131x.c
@@ -2282,7 +2282,7 @@ static struct rfd *nic_rx_pkts(struct et131x_adapter *adapter)
 
 	adapter->netdev->stats.rx_bytes += rfd->len;
 
-	memcpy(skb_put(skb, rfd->len), fbr->virt[buff_index], rfd->len);
+	skb_put_data(skb, fbr->virt[buff_index], rfd->len);
 
 	skb->protocol = eth_type_trans(skb, adapter->netdev);
 	skb->ip_summed = CHECKSUM_NONE;
diff --git a/drivers/net/ethernet/apple/macmace.c b/drivers/net/ethernet/apple/macmace.c
index 857df9c45f04..f17a160dbff2 100644
--- a/drivers/net/ethernet/apple/macmace.c
+++ b/drivers/net/ethernet/apple/macmace.c
@@ -663,7 +663,7 @@ static void mace_dma_rx_frame(struct net_device *dev, struct mace_frame *mf)
 			return;
 		}
 		skb_reserve(skb, 2);
-		memcpy(skb_put(skb, frame_length), mf->data, frame_length);
+		skb_put_data(skb, mf->data, frame_length);
 
 		skb->protocol = eth_type_trans(skb, dev);
 		netif_rx(skb);
diff --git a/drivers/net/ethernet/aurora/nb8800.c b/drivers/net/ethernet/aurora/nb8800.c
index 5711fbbd6ae3..041cfb7952f8 100644
--- a/drivers/net/ethernet/aurora/nb8800.c
+++ b/drivers/net/ethernet/aurora/nb8800.c
@@ -251,7 +251,7 @@ static void nb8800_receive(struct net_device *dev, unsigned int i,
 
 	if (len <= RX_COPYBREAK) {
 		dma_sync_single_for_cpu(&dev->dev, dma, len, DMA_FROM_DEVICE);
-		memcpy(skb_put(skb, len), data, len);
+		skb_put_data(skb, data, len);
 		dma_sync_single_for_device(&dev->dev, dma, len,
 					   DMA_FROM_DEVICE);
 	} else {
@@ -264,7 +264,7 @@ static void nb8800_receive(struct net_device *dev, unsigned int i,
 		}
 
 		dma_unmap_page(&dev->dev, dma, RX_BUF_SIZE, DMA_FROM_DEVICE);
-		memcpy(skb_put(skb, RX_COPYHDR), data, RX_COPYHDR);
+		skb_put_data(skb, data, RX_COPYHDR);
 		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
 				offset + RX_COPYHDR, len - RX_COPYHDR,
 				RX_BUF_SIZE);
diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c
index 91f7492623d3..4b0168bcbc8a 100644
--- a/drivers/net/ethernet/cadence/macb.c
+++ b/drivers/net/ethernet/cadence/macb.c
@@ -2992,7 +2992,7 @@ static void at91ether_rx(struct net_device *dev)
 		skb = netdev_alloc_skb(dev, pktlen + 2);
 		if (skb) {
 			skb_reserve(skb, 2);
-			memcpy(skb_put(skb, pktlen), p_recv, pktlen);
+			skb_put_data(skb, p_recv, pktlen);
 
 			skb->protocol = eth_type_trans(skb, dev);
 			dev->stats.rx_packets++;
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_network.h b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
index bf483932ff25..4b3507972243 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_network.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
@@ -443,8 +443,8 @@ static inline void octeon_fast_packet_next(struct octeon_droq *droq,
 					   int copy_len,
 					   int idx)
 {
-	memcpy(skb_put(nicbuf, copy_len),
-	       get_rbd(droq->recv_buf_list[idx].buffer), copy_len);
+	skb_put_data(nicbuf, get_rbd(droq->recv_buf_list[idx].buffer),
+		     copy_len);
 }
 
 /**
diff --git a/drivers/net/ethernet/cirrus/cs89x0.c b/drivers/net/ethernet/cirrus/cs89x0.c
index da5b58b853e2..410a0a95130b 100644
--- a/drivers/net/ethernet/cirrus/cs89x0.c
+++ b/drivers/net/ethernet/cirrus/cs89x0.c
@@ -450,11 +450,10 @@ skip_this_frame:
 
 	if (bp + length > lp->end_dma_buff) {
 		int semi_cnt = lp->end_dma_buff - bp;
-		memcpy(skb_put(skb, semi_cnt), bp, semi_cnt);
-		memcpy(skb_put(skb, length - semi_cnt), lp->dma_buff,
-		       length - semi_cnt);
+		skb_put_data(skb, bp, semi_cnt);
+		skb_put_data(skb, lp->dma_buff, length - semi_cnt);
 	} else {
-		memcpy(skb_put(skb, length), bp, length);
+		skb_put_data(skb, bp, length);
 	}
 	bp += (length + 3) & ~3;
 	if (bp >= lp->end_dma_buff)
diff --git a/drivers/net/ethernet/dec/tulip/de4x5.c b/drivers/net/ethernet/dec/tulip/de4x5.c
index fd6bcf024729..47be5018d35d 100644
--- a/drivers/net/ethernet/dec/tulip/de4x5.c
+++ b/drivers/net/ethernet/dec/tulip/de4x5.c
@@ -3624,10 +3624,10 @@ de4x5_alloc_rx_buff(struct net_device *dev, int index, int len)
     skb_reserve(p, 2);	                               /* Align */
     if (index < lp->rx_old) {                          /* Wrapped buffer */
 	short tlen = (lp->rxRingSize - lp->rx_old) * RX_BUFF_SZ;
-	memcpy(skb_put(p,tlen),lp->rx_bufs + lp->rx_old * RX_BUFF_SZ,tlen);
-	memcpy(skb_put(p,len-tlen),lp->rx_bufs,len-tlen);
+	skb_put_data(p, lp->rx_bufs + lp->rx_old * RX_BUFF_SZ, tlen);
+	skb_put_data(p, lp->rx_bufs, len - tlen);
     } else {                                           /* Linear buffer */
-	memcpy(skb_put(p,len),lp->rx_bufs + lp->rx_old * RX_BUFF_SZ,len);
+	skb_put_data(p, lp->rx_bufs + lp->rx_old * RX_BUFF_SZ, len);
     }
 
     return p;
diff --git a/drivers/net/ethernet/dec/tulip/interrupt.c b/drivers/net/ethernet/dec/tulip/interrupt.c
index ba6ae24acf62..8df80880ecaa 100644
--- a/drivers/net/ethernet/dec/tulip/interrupt.c
+++ b/drivers/net/ethernet/dec/tulip/interrupt.c
@@ -218,9 +218,9 @@ int tulip_poll(struct napi_struct *napi, int budget)
                                                         pkt_len);
                                        skb_put(skb, pkt_len);
 #else
-                                       memcpy(skb_put(skb, pkt_len),
-                                              tp->rx_buffers[entry].skb->data,
-                                              pkt_len);
+                                       skb_put_data(skb,
+                                                    tp->rx_buffers[entry].skb->data,
+                                                    pkt_len);
 #endif
                                        pci_dma_sync_single_for_device(tp->pdev,
 								      tp->rx_buffers[entry].mapping,
@@ -444,9 +444,9 @@ static int tulip_rx(struct net_device *dev)
 						 pkt_len);
 				skb_put(skb, pkt_len);
 #else
-				memcpy(skb_put(skb, pkt_len),
-				       tp->rx_buffers[entry].skb->data,
-				       pkt_len);
+				skb_put_data(skb,
+					     tp->rx_buffers[entry].skb->data,
+					     pkt_len);
 #endif
 				pci_dma_sync_single_for_device(tp->pdev,
 							       tp->rx_buffers[entry].mapping,
diff --git a/drivers/net/ethernet/dec/tulip/uli526x.c b/drivers/net/ethernet/dec/tulip/uli526x.c
index 8d98b259d1ba..7fc248efc4ba 100644
--- a/drivers/net/ethernet/dec/tulip/uli526x.c
+++ b/drivers/net/ethernet/dec/tulip/uli526x.c
@@ -864,9 +864,9 @@ static void uli526x_rx_packet(struct net_device *dev, struct uli526x_board_info
 					skb = new_skb;
 					/* size less than COPY_SIZE, allocate a rxlen SKB */
 					skb_reserve(skb, 2); /* 16byte align */
-					memcpy(skb_put(skb, rxlen),
-					       skb_tail_pointer(rxptr->rx_skb_ptr),
-					       rxlen);
+					skb_put_data(skb,
+						     skb_tail_pointer(rxptr->rx_skb_ptr),
+						     rxlen);
 					uli526x_reuse_skb(db, rxptr->rx_skb_ptr);
 				} else
 					skb_put(skb, rxlen);
diff --git a/drivers/net/ethernet/ec_bhf.c b/drivers/net/ethernet/ec_bhf.c
index 278f139f2a22..4ee042c034a1 100644
--- a/drivers/net/ethernet/ec_bhf.c
+++ b/drivers/net/ethernet/ec_bhf.c
@@ -223,7 +223,7 @@ static void ec_bhf_process_rx(struct ec_bhf_priv *priv)
 
 		skb = netdev_alloc_skb_ip_align(priv->net_dev, pkt_size);
 		if (skb) {
-			memcpy(skb_put(skb, pkt_size), data, pkt_size);
+			skb_put_data(skb, data, pkt_size);
 			skb->protocol = eth_type_trans(skb, priv->net_dev);
 			priv->stat_rx_bytes += pkt_size;
 
diff --git a/drivers/net/ethernet/fealnx.c b/drivers/net/ethernet/fealnx.c
index 610f9c07c21d..e92859dab7ae 100644
--- a/drivers/net/ethernet/fealnx.c
+++ b/drivers/net/ethernet/fealnx.c
@@ -1711,8 +1711,8 @@ static int netdev_rx(struct net_device *dev)
 					np->cur_rx->skbuff->data, pkt_len);
 				skb_put(skb, pkt_len);
 #else
-				memcpy(skb_put(skb, pkt_len),
-					np->cur_rx->skbuff->data, pkt_len);
+				skb_put_data(skb, np->cur_rx->skbuff->data,
+					     pkt_len);
 #endif
 				pci_dma_sync_single_for_device(np->pci_dev,
 							       np->cur_rx->buffer,
diff --git a/drivers/net/ethernet/i825xx/82596.c b/drivers/net/ethernet/i825xx/82596.c
index 945883842533..d719668a6684 100644
--- a/drivers/net/ethernet/i825xx/82596.c
+++ b/drivers/net/ethernet/i825xx/82596.c
@@ -809,7 +809,8 @@ memory_squeeze:
 				if (!rx_in_place) {
 					/* 16 byte align the data fields */
 					skb_reserve(skb, 2);
-					memcpy(skb_put(skb,pkt_len), rbd->v_data, pkt_len);
+					skb_put_data(skb, rbd->v_data,
+						     pkt_len);
 				}
 				skb->protocol=eth_type_trans(skb,dev);
 				skb->len = pkt_len;
diff --git a/drivers/net/ethernet/i825xx/lib82596.c b/drivers/net/ethernet/i825xx/lib82596.c
index e86773325cbe..8449c58f01fd 100644
--- a/drivers/net/ethernet/i825xx/lib82596.c
+++ b/drivers/net/ethernet/i825xx/lib82596.c
@@ -727,7 +727,8 @@ memory_squeeze:
 					dma_sync_single_for_cpu(dev->dev.parent,
 								(dma_addr_t)SWAP32(rbd->b_data),
 								PKT_BUF_SZ, DMA_FROM_DEVICE);
-					memcpy(skb_put(skb, pkt_len), rbd->v_data, pkt_len);
+					skb_put_data(skb, rbd->v_data,
+						     pkt_len);
 					dma_sync_single_for_device(dev->dev.parent,
 								   (dma_addr_t)SWAP32(rbd->b_data),
 								   PKT_BUF_SZ, DMA_FROM_DEVICE);
diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c
index bd8b05fe8258..98375e1e1185 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_main.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_main.c
@@ -4345,7 +4345,7 @@ static struct sk_buff *e1000_copybreak(struct e1000_adapter *adapter,
 	dma_sync_single_for_cpu(&adapter->pdev->dev, buffer_info->dma,
 				length, DMA_FROM_DEVICE);
 
-	memcpy(skb_put(skb, length), data, length);
+	skb_put_data(skb, data, length);
 
 	return skb;
 }
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index d297011b535d..0aab74c2a209 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -1976,9 +1976,8 @@ err_drop_frame:
 						      MVNETA_MH_SIZE + NET_SKB_PAD,
 						      rx_bytes,
 						      DMA_FROM_DEVICE);
-			memcpy(skb_put(skb, rx_bytes),
-			       data + MVNETA_MH_SIZE + NET_SKB_PAD,
-			       rx_bytes);
+			skb_put_data(skb, data + MVNETA_MH_SIZE + NET_SKB_PAD,
+				     rx_bytes);
 
 			skb->protocol = eth_type_trans(skb, dev);
 			mvneta_rx_csum(pp, rx_status, skb);
@@ -2103,9 +2102,8 @@ err_drop_frame:
 			                              MVNETA_MH_SIZE + NET_SKB_PAD,
 			                              rx_bytes,
 			                              DMA_FROM_DEVICE);
-			memcpy(skb_put(skb, rx_bytes),
-			       data + MVNETA_MH_SIZE + NET_SKB_PAD,
-			       rx_bytes);
+			skb_put_data(skb, data + MVNETA_MH_SIZE + NET_SKB_PAD,
+				     rx_bytes);
 
 			skb->protocol = eth_type_trans(skb, dev);
 			mvneta_rx_csum(pp, rx_status, skb);
diff --git a/drivers/net/ethernet/micrel/ksz884x.c b/drivers/net/ethernet/micrel/ksz884x.c
index ee1c78abab0b..e798fbe08600 100644
--- a/drivers/net/ethernet/micrel/ksz884x.c
+++ b/drivers/net/ethernet/micrel/ksz884x.c
@@ -5020,8 +5020,7 @@ static inline int rx_proc(struct net_device *dev, struct ksz_hw* hw,
 		 */
 		skb_reserve(skb, 2);
 
-		memcpy(skb_put(skb, packet_len),
-			dma_buf->skb->data, packet_len);
+		skb_put_data(skb, dma_buf->skb->data, packet_len);
 	} while (0);
 
 	skb->protocol = eth_type_trans(skb, dev);
diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c
index 9c7ffd649e9a..828bfd93cb54 100644
--- a/drivers/net/ethernet/nxp/lpc_eth.c
+++ b/drivers/net/ethernet/nxp/lpc_eth.c
@@ -959,11 +959,10 @@ static int __lpc_handle_recv(struct net_device *ndev, int budget)
 			if (!skb) {
 				ndev->stats.rx_dropped++;
 			} else {
-				prdbuf = skb_put(skb, len);
-
 				/* Copy packet from buffer */
-				memcpy(prdbuf, pldat->rx_buff_v +
-					rxconsidx * ENET_MAXF_SIZE, len);
+				prdbuf = skb_put_data(skb,
+						      pldat->rx_buff_v + rxconsidx * ENET_MAXF_SIZE,
+						      len);
 
 				/* Pass to upper layer */
 				skb->protocol = eth_type_trans(skb, ndev);
diff --git a/drivers/net/ethernet/qlogic/qede/qede_fp.c b/drivers/net/ethernet/qlogic/qede/qede_fp.c
index 892eb98290f6..6fc854b120b0 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_fp.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_fp.c
@@ -1079,8 +1079,7 @@ static struct sk_buff *qede_rx_allocate_skb(struct qede_dev *edev,
 	 * re-use the already allcoated & mapped memory.
 	 */
 	if (len + pad <= edev->rx_copybreak) {
-		memcpy(skb_put(skb, len),
-		       page_address(page) + offset, len);
+		skb_put_data(skb, page_address(page) + offset, len);
 		qede_reuse_page(rxq, bd);
 		goto out;
 	}
diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
index 1188d420fe53..9feec7009443 100644
--- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c
+++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
@@ -1577,7 +1577,7 @@ static void ql_process_mac_rx_page(struct ql_adapter *qdev,
 		rx_ring->rx_dropped++;
 		goto err_out;
 	}
-	memcpy(skb_put(skb, hlen), addr, hlen);
+	skb_put_data(skb, addr, hlen);
 	netif_printk(qdev, rx_status, KERN_DEBUG, qdev->ndev,
 		     "%d bytes of headers and data in large. Chain page to new skb and pull tail.\n",
 		     length);
@@ -1654,7 +1654,7 @@ static void ql_process_mac_rx_skb(struct ql_adapter *qdev,
 				    dma_unmap_len(sbq_desc, maplen),
 				    PCI_DMA_FROMDEVICE);
 
-	memcpy(skb_put(new_skb, length), skb->data, length);
+	skb_put_data(new_skb, skb->data, length);
 
 	pci_dma_sync_single_for_device(qdev->pdev,
 				       dma_unmap_addr(sbq_desc, mapaddr),
@@ -1817,8 +1817,7 @@ static struct sk_buff *ql_build_rx_skb(struct ql_adapter *qdev,
 						    dma_unmap_len
 						    (sbq_desc, maplen),
 						    PCI_DMA_FROMDEVICE);
-			memcpy(skb_put(skb, length),
-			       sbq_desc->p.skb->data, length);
+			skb_put_data(skb, sbq_desc->p.skb->data, length);
 			pci_dma_sync_single_for_device(qdev->pdev,
 						       dma_unmap_addr
 						       (sbq_desc,
diff --git a/drivers/net/ethernet/silan/sc92031.c b/drivers/net/ethernet/silan/sc92031.c
index 751c81848f35..c07fd594fe71 100644
--- a/drivers/net/ethernet/silan/sc92031.c
+++ b/drivers/net/ethernet/silan/sc92031.c
@@ -795,12 +795,12 @@ static void _sc92031_rx_tasklet(struct net_device *dev)
 		}
 
 		if ((rx_ring_offset + pkt_size) > RX_BUF_LEN) {
-			memcpy(skb_put(skb, RX_BUF_LEN - rx_ring_offset),
-				rx_ring + rx_ring_offset, RX_BUF_LEN - rx_ring_offset);
-			memcpy(skb_put(skb, pkt_size - (RX_BUF_LEN - rx_ring_offset)),
-				rx_ring, pkt_size - (RX_BUF_LEN - rx_ring_offset));
+			skb_put_data(skb, rx_ring + rx_ring_offset,
+				     RX_BUF_LEN - rx_ring_offset);
+			skb_put_data(skb, rx_ring,
+				     pkt_size - (RX_BUF_LEN - rx_ring_offset));
 		} else {
-			memcpy(skb_put(skb, pkt_size), rx_ring + rx_ring_offset, pkt_size);
+			skb_put_data(skb, rx_ring + rx_ring_offset, pkt_size);
 		}
 
 		skb->protocol = eth_type_trans(skb, dev);
diff --git a/drivers/net/fjes/fjes_main.c b/drivers/net/fjes/fjes_main.c
index b56e07fa44a8..750954be5a74 100644
--- a/drivers/net/fjes/fjes_main.c
+++ b/drivers/net/fjes/fjes_main.c
@@ -1156,8 +1156,7 @@ static int fjes_poll(struct napi_struct *napi, int budget)
 				hw->ep_shm_info[cur_epid].net_stats
 							 .rx_errors += 1;
 			} else {
-				memcpy(skb_put(skb, frame_len),
-				       frame, frame_len);
+				skb_put_data(skb, frame, frame_len);
 				skb->protocol = eth_type_trans(skb, netdev);
 				skb->ip_summed = CHECKSUM_UNNECESSARY;
 
diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c
index 4a40a3d825b4..aec6c26563cf 100644
--- a/drivers/net/hamradio/mkiss.c
+++ b/drivers/net/hamradio/mkiss.c
@@ -298,7 +298,7 @@ static void ax_bump(struct mkiss *ax)
 		return;
 	}
 
-	memcpy(skb_put(skb,count), ax->rbuff, count);
+	skb_put_data(skb, ax->rbuff, count);
 	skb->protocol = ax25_type_trans(skb, ax->dev);
 	netif_rx(skb);
 	ax->dev->stats.rx_packets++;
diff --git a/drivers/net/hippi/rrunner.c b/drivers/net/hippi/rrunner.c
index 1ce6239a4849..7683fd544344 100644
--- a/drivers/net/hippi/rrunner.c
+++ b/drivers/net/hippi/rrunner.c
@@ -962,8 +962,8 @@ static void rx_int(struct net_device *dev, u32 rxlimit, u32 index)
 								    pkt_len,
 								    PCI_DMA_FROMDEVICE);
 
-					memcpy(skb_put(skb, pkt_len),
-					       rx_skb->data, pkt_len);
+					skb_put_data(skb, rx_skb->data,
+						     pkt_len);
 
 					pci_dma_sync_single_for_device(rrpriv->pci_dev,
 								       desc->addr.addrlo,
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index b65a97ecb78e..9a6c5864bc04 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -593,7 +593,7 @@ static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net,
 	 * Copy to skb. This copy is needed here since the memory pointed by
 	 * hv_netvsc_packet cannot be deallocated
 	 */
-	memcpy(skb_put(skb, buflen), data, buflen);
+	skb_put_data(skb, data, buflen);
 
 	skb->protocol = eth_type_trans(skb, net);
 
diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c
index 76ba7ecfe142..548d9d026a85 100644
--- a/drivers/net/ieee802154/at86rf230.c
+++ b/drivers/net/ieee802154/at86rf230.c
@@ -723,7 +723,7 @@ at86rf230_rx_read_frame_complete(void *context)
 		return;
 	}
 
-	memcpy(skb_put(skb, len), buf + 2, len);
+	skb_put_data(skb, buf + 2, len);
 	ieee802154_rx_irqsafe(lp->hw, skb, lqi);
 	kfree(ctx);
 }
diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c
index 7a218549c80a..a626c539fb17 100644
--- a/drivers/net/ieee802154/ca8210.c
+++ b/drivers/net/ieee802154/ca8210.c
@@ -1875,7 +1875,7 @@ static int ca8210_skb_rx(
 copy_payload:
 	/* Add <msdulen> bytes of space to the back of the buffer */
 	/* Copy msdu to skb */
-	memcpy(skb_put(skb, msdulen), &data_ind[29], msdulen);
+	skb_put_data(skb, &data_ind[29], msdulen);
 
 	ieee802154_rx_irqsafe(hw, skb, mpdulinkquality);
 	return 0;
diff --git a/drivers/net/ieee802154/mrf24j40.c b/drivers/net/ieee802154/mrf24j40.c
index bd63289c55e8..7d334963dc08 100644
--- a/drivers/net/ieee802154/mrf24j40.c
+++ b/drivers/net/ieee802154/mrf24j40.c
@@ -774,7 +774,7 @@ static void mrf24j40_handle_rx_read_buf_complete(void *context)
 		return;
 	}
 
-	memcpy(skb_put(skb, len), rx_local_buf, len);
+	skb_put_data(skb, rx_local_buf, len);
 	ieee802154_rx_irqsafe(devrec->hw, skb, 0);
 
 #ifdef DEBUG
diff --git a/drivers/net/irda/smsc-ircc2.c b/drivers/net/irda/smsc-ircc2.c
index 23ed89ae5ddc..19a55cba6beb 100644
--- a/drivers/net/irda/smsc-ircc2.c
+++ b/drivers/net/irda/smsc-ircc2.c
@@ -1459,7 +1459,7 @@ static void smsc_ircc_dma_receive_complete(struct smsc_ircc_cb *self)
 	/* Make sure IP header gets aligned */
 	skb_reserve(skb, 1);
 
-	memcpy(skb_put(skb, len), self->rx_buff.data, len);
+	skb_put_data(skb, self->rx_buff.data, len);
 	self->netdev->stats.rx_packets++;
 	self->netdev->stats.rx_bytes += len;
 
diff --git a/drivers/net/irda/vlsi_ir.c b/drivers/net/irda/vlsi_ir.c
index 15b920086251..6638784c082e 100644
--- a/drivers/net/irda/vlsi_ir.c
+++ b/drivers/net/irda/vlsi_ir.c
@@ -578,7 +578,7 @@ static int vlsi_process_rx(struct vlsi_ring *r, struct ring_descr *rd)
 	skb = rd->skb;
 	rd->skb = NULL;
 	skb->dev = ndev;
-	memcpy(skb_put(skb,len), rd->buf, len);
+	skb_put_data(skb, rd->buf, len);
 	skb_reset_mac_header(skb);
 	if (in_interrupt())
 		netif_rx(skb);
diff --git a/drivers/net/ppp/ppp_async.c b/drivers/net/ppp/ppp_async.c
index feb9569e3345..32c72db654e2 100644
--- a/drivers/net/ppp/ppp_async.c
+++ b/drivers/net/ppp/ppp_async.c
@@ -894,8 +894,7 @@ ppp_async_input(struct asyncppp *ap, const unsigned char *buf,
 				/* packet overflowed MRU */
 				ap->state |= SC_TOSS;
 			} else {
-				sp = skb_put(skb, n);
-				memcpy(sp, buf, n);
+				sp = skb_put_data(skb, buf, n);
 				if (ap->state & SC_ESCAPE) {
 					sp[0] ^= PPP_TRANS;
 					ap->state &= ~SC_ESCAPE;
diff --git a/drivers/net/ppp/ppp_synctty.c b/drivers/net/ppp/ppp_synctty.c
index 9ae53986cb4a..ce2300c0bcbf 100644
--- a/drivers/net/ppp/ppp_synctty.c
+++ b/drivers/net/ppp/ppp_synctty.c
@@ -697,8 +697,7 @@ ppp_sync_input(struct syncppp *ap, const unsigned char *buf,
 		goto err;
 	}
 
-	p = skb_put(skb, count);
-	memcpy(p, buf, count);
+	p = skb_put_data(skb, buf, count);
 
 	/* strip address/control field if present */
 	p = skb->data;
diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c
index 74b907206aa7..436dd78c396a 100644
--- a/drivers/net/slip/slip.c
+++ b/drivers/net/slip/slip.c
@@ -364,7 +364,7 @@ static void sl_bump(struct slip *sl)
 		return;
 	}
 	skb->dev = dev;
-	memcpy(skb_put(skb, count), sl->rbuff, count);
+	skb_put_data(skb, sl->rbuff, count);
 	skb_reset_mac_header(skb);
 	skb->protocol = htons(ETH_P_IP);
 	netif_rx_ni(skb);
diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c
index 125cff57c759..90facc5ecab0 100644
--- a/drivers/net/usb/asix_common.c
+++ b/drivers/net/usb/asix_common.c
@@ -167,8 +167,8 @@ int asix_rx_fixup_internal(struct usbnet *dev, struct sk_buff *skb,
 		}
 
 		if (rx->ax_skb) {
-			data = skb_put(rx->ax_skb, copy_length);
-			memcpy(data, skb->data + offset, copy_length);
+			data = skb_put_data(rx->ax_skb, skb->data + offset,
+					    copy_length);
 			if (!rx->remaining)
 				usbnet_skb_return(dev, rx->ax_skb);
 		}
diff --git a/drivers/net/usb/cdc-phonet.c b/drivers/net/usb/cdc-phonet.c
index c7a350bbaaa7..2952cb570996 100644
--- a/drivers/net/usb/cdc-phonet.c
+++ b/drivers/net/usb/cdc-phonet.c
@@ -162,7 +162,7 @@ static void rx_complete(struct urb *req)
 			skb = pnd->rx_skb = netdev_alloc_skb(dev, 12);
 			if (likely(skb)) {
 				/* Can't use pskb_pull() on page in IRQ */
-				memcpy(skb_put(skb, 1), page_address(page), 1);
+				skb_put_data(skb, page_address(page), 1);
 				skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
 						page, 1, req->actual_length,
 						PAGE_SIZE);
diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c
index a6b997cffd3b..18fa45fc979b 100644
--- a/drivers/net/usb/cdc_mbim.c
+++ b/drivers/net/usb/cdc_mbim.c
@@ -399,7 +399,7 @@ static struct sk_buff *cdc_mbim_process_dgram(struct usbnet *dev, u8 *buf, size_
 	memcpy(eth_hdr(skb)->h_dest, dev->net->dev_addr, ETH_ALEN);
 
 	/* add datagram */
-	memcpy(skb_put(skb, len), buf, len);
+	skb_put_data(skb, buf, len);
 
 	/* map MBIM session to VLAN */
 	if (tci)
diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index 7f02954772c6..8a4c8a1b9dd3 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -1180,7 +1180,7 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
 		ndp16->dpe16[index].wDatagramLength = cpu_to_le16(skb->len);
 		ndp16->dpe16[index].wDatagramIndex = cpu_to_le16(skb_out->len);
 		ndp16->wLength = cpu_to_le16(ndplen + sizeof(struct usb_cdc_ncm_dpe16));
-		memcpy(skb_put(skb_out, skb->len), skb->data, skb->len);
+		skb_put_data(skb_out, skb->data, skb->len);
 		ctx->tx_curr_frame_payload += skb->len;	/* count real tx payload data */
 		dev_kfree_skb_any(skb);
 		skb = NULL;
@@ -1229,7 +1229,7 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
 		nth16 = (struct usb_cdc_ncm_nth16 *)skb_out->data;
 		cdc_ncm_align_tail(skb_out, ctx->tx_ndp_modulus, 0, ctx->tx_max);
 		nth16->wNdpIndex = cpu_to_le16(skb_out->len);
-		memcpy(skb_put(skb_out, ctx->max_ndp_size), ctx->delayed_ndp16, ctx->max_ndp_size);
+		skb_put_data(skb_out, ctx->delayed_ndp16, ctx->max_ndp_size);
 
 		/* Zero out delayed NDP - signature checking will naturally fail. */
 		ndp16 = memset(ctx->delayed_ndp16, 0, ctx->max_ndp_size);
@@ -1497,7 +1497,7 @@ next_ndp:
 			skb = netdev_alloc_skb_ip_align(dev->net,  len);
 			if (!skb)
 				goto error;
-			memcpy(skb_put(skb, len), skb_in->data + offset, len);
+			skb_put_data(skb, skb_in->data + offset, len);
 			usbnet_skb_return(dev, skb);
 			payload += len;	/* count payload bytes in this NTB */
 		}
diff --git a/drivers/net/usb/gl620a.c b/drivers/net/usb/gl620a.c
index 1cc24e6f23e2..29276e54bb8b 100644
--- a/drivers/net/usb/gl620a.c
+++ b/drivers/net/usb/gl620a.c
@@ -121,8 +121,7 @@ static int genelink_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
 		if (gl_skb) {
 
 			// copy the packet data to the new skb
-			memcpy(skb_put(gl_skb, size),
-					packet->packet_data, size);
+			skb_put_data(gl_skb, packet->packet_data, size);
 			usbnet_skb_return(dev, gl_skb);
 		}
 
diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c
index 00067a0c51ca..908ada4ca21c 100644
--- a/drivers/net/usb/hso.c
+++ b/drivers/net/usb/hso.c
@@ -911,11 +911,9 @@ static void packetizeRx(struct hso_net *odev, unsigned char *ip_pkt,
 
 				/* Copy what we got so far. make room for iphdr
 				 * after tail. */
-				tmp_rx_buf =
-				    skb_put(odev->skb_rx_buf,
-					    sizeof(struct iphdr));
-				memcpy(tmp_rx_buf, (char *)&(odev->rx_ip_hdr),
-				       sizeof(struct iphdr));
+				tmp_rx_buf = skb_put_data(odev->skb_rx_buf,
+							  (char *)&(odev->rx_ip_hdr),
+							  sizeof(struct iphdr));
 
 				/* ETH_HLEN */
 				odev->rx_buf_size = sizeof(struct iphdr);
@@ -934,8 +932,9 @@ static void packetizeRx(struct hso_net *odev, unsigned char *ip_pkt,
 			/* Copy the rest of the bytes that are left in the
 			 * buffer into the waiting sk_buf. */
 			/* Make room for temp_bytes after tail. */
-			tmp_rx_buf = skb_put(odev->skb_rx_buf, temp_bytes);
-			memcpy(tmp_rx_buf, ip_pkt + buffer_offset, temp_bytes);
+			tmp_rx_buf = skb_put_data(odev->skb_rx_buf,
+						  ip_pkt + buffer_offset,
+						  temp_bytes);
 
 			odev->rx_buf_missing -= temp_bytes;
 			count -= temp_bytes;
diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c
index 76465b117b72..0f213ea22c75 100644
--- a/drivers/net/usb/ipheth.c
+++ b/drivers/net/usb/ipheth.c
@@ -253,7 +253,7 @@ static void ipheth_rcvbulk_callback(struct urb *urb)
 		return;
 	}
 
-	memcpy(skb_put(skb, len), buf, len);
+	skb_put_data(skb, buf, len);
 	skb->dev = dev->net;
 	skb->protocol = eth_type_trans(skb, dev->net);
 
diff --git a/drivers/net/usb/lg-vl600.c b/drivers/net/usb/lg-vl600.c
index 5714107533bb..d633492bf9eb 100644
--- a/drivers/net/usb/lg-vl600.c
+++ b/drivers/net/usb/lg-vl600.c
@@ -135,7 +135,7 @@ static int vl600_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
 		}
 
 		buf = s->current_rx_buf;
-		memcpy(skb_put(buf, skb->len), skb->data, skb->len);
+		skb_put_data(buf, skb->data, skb->len);
 	} else if (skb->len < 4) {
 		netif_err(dev, ifup, dev->net, "Frame too short\n");
 		dev->net->stats.rx_length_errors++;
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 32a22f4e8356..ffd229ec8352 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -188,7 +188,7 @@ static int qmimux_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
 			goto skip;
 		}
 
-		memcpy(skb_put(skbn, len), skb->data + offset, len);
+		skb_put_data(skbn, skb->data + offset, len);
 		if (netif_rx(skbn) != NET_RX_SUCCESS)
 			return 0;
 
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 1f8c15cb63b0..6bacbd2f0eca 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -305,7 +305,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
 	copy = len;
 	if (copy > skb_tailroom(skb))
 		copy = skb_tailroom(skb);
-	memcpy(skb_put(skb, copy), p, copy);
+	skb_put_data(skb, p, copy);
 
 	len -= copy;
 	offset += copy;
diff --git a/drivers/net/wan/farsync.c b/drivers/net/wan/farsync.c
index 33265eb50420..bd46b2552980 100644
--- a/drivers/net/wan/farsync.c
+++ b/drivers/net/wan/farsync.c
@@ -857,7 +857,7 @@ fst_rx_dma_complete(struct fst_card_info *card, struct fst_port_info *port,
 
 	dbg(DBG_TX, "fst_rx_dma_complete\n");
 	pi = port->index;
-	memcpy(skb_put(skb, len), card->rx_dma_handle_host, len);
+	skb_put_data(skb, card->rx_dma_handle_host, len);
 
 	/* Reset buffer descriptor */
 	FST_WRB(card, rxDescrRing[pi][rxp].bits, DMA_OWN);
diff --git a/drivers/net/wan/hdlc_ppp.c b/drivers/net/wan/hdlc_ppp.c
index 47fdb87d3567..f5b4ad45831a 100644
--- a/drivers/net/wan/hdlc_ppp.c
+++ b/drivers/net/wan/hdlc_ppp.c
@@ -234,9 +234,9 @@ static void ppp_tx_cp(struct net_device *dev, u16 pid, u8 code,
 	cp->len = htons(sizeof(struct cp_header) + magic_len + len);
 
 	if (magic_len)
-		memcpy(skb_put(skb, magic_len), &magic, magic_len);
+		skb_put_data(skb, &magic, magic_len);
 	if (len)
-		memcpy(skb_put(skb, len), data, len);
+		skb_put_data(skb, data, len);
 
 #if DEBUG_CP
 	BUG_ON(code >= CP_CODES);
diff --git a/drivers/net/wan/x25_asy.c b/drivers/net/wan/x25_asy.c
index 878b05d06fc7..40ee80c03c94 100644
--- a/drivers/net/wan/x25_asy.c
+++ b/drivers/net/wan/x25_asy.c
@@ -202,7 +202,7 @@ static void x25_asy_bump(struct x25_asy *sl)
 		return;
 	}
 	skb_push(skb, 1);	/* LAPB internal control */
-	memcpy(skb_put(skb, count), sl->rbuff, count);
+	skb_put_data(skb, sl->rbuff, count);
 	skb->protocol = x25_type_trans(skb, sl->dev);
 	err = lapb_data_received(skb->dev, skb);
 	if (err != LAPB_OK) {
diff --git a/drivers/net/wimax/i2400m/netdev.c b/drivers/net/wimax/i2400m/netdev.c
index 7f64e74d746b..dd7f3168c07d 100644
--- a/drivers/net/wimax/i2400m/netdev.c
+++ b/drivers/net/wimax/i2400m/netdev.c
@@ -488,7 +488,7 @@ void i2400m_net_rx(struct i2400m *i2400m, struct sk_buff *skb_rx,
 			net_dev->stats.rx_dropped++;
 			goto error_skb_realloc;
 		}
-		memcpy(skb_put(skb, buf_len), buf, buf_len);
+		skb_put_data(skb, buf, buf_len);
 	}
 	i2400m_rx_fake_eth_header(i2400m->wimax_dev.net_dev,
 				  skb->data - ETH_HLEN,
diff --git a/drivers/net/wireless/admtek/adm8211.c b/drivers/net/wireless/admtek/adm8211.c
index ed626f568b58..5f64f3928c35 100644
--- a/drivers/net/wireless/admtek/adm8211.c
+++ b/drivers/net/wireless/admtek/adm8211.c
@@ -390,9 +390,9 @@ static void adm8211_interrupt_rci(struct ieee80211_hw *dev)
 					priv->pdev,
 					priv->rx_buffers[entry].mapping,
 					pktlen, PCI_DMA_FROMDEVICE);
-				memcpy(skb_put(skb, pktlen),
-				       skb_tail_pointer(priv->rx_buffers[entry].skb),
-				       pktlen);
+				skb_put_data(skb,
+					     skb_tail_pointer(priv->rx_buffers[entry].skb),
+					     pktlen);
 				pci_dma_sync_single_for_device(
 					priv->pdev,
 					priv->rx_buffers[entry].mapping,
diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index 4674ff33d320..16cf250f6c39 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -3475,9 +3475,8 @@ static void ath10k_tx_h_add_p2p_noa_ie(struct ath10k *ar,
 		if (arvif->u.ap.noa_data)
 			if (!pskb_expand_head(skb, 0, arvif->u.ap.noa_len,
 					      GFP_ATOMIC))
-				memcpy(skb_put(skb, arvif->u.ap.noa_len),
-				       arvif->u.ap.noa_data,
-				       arvif->u.ap.noa_len);
+				skb_put_data(skb, arvif->u.ap.noa_data,
+					     arvif->u.ap.noa_len);
 		spin_unlock_bh(&ar->data_lock);
 	}
 }
diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c
index 6afc8d27f0d5..a66e2482897f 100644
--- a/drivers/net/wireless/ath/ath10k/wmi.c
+++ b/drivers/net/wireless/ath/ath10k/wmi.c
@@ -3304,9 +3304,8 @@ static void ath10k_wmi_update_noa(struct ath10k *ar, struct ath10k_vif *arvif,
 
 	if (arvif->u.ap.noa_data)
 		if (!pskb_expand_head(bcn, 0, arvif->u.ap.noa_len, GFP_ATOMIC))
-			memcpy(skb_put(bcn, arvif->u.ap.noa_len),
-			       arvif->u.ap.noa_data,
-			       arvif->u.ap.noa_len);
+			skb_put_data(bcn, arvif->u.ap.noa_data,
+			             arvif->u.ap.noa_len);
 }
 
 static int ath10k_wmi_op_pull_swba_ev(struct ath10k *ar, struct sk_buff *skb,
diff --git a/drivers/net/wireless/ath/ath9k/channel.c b/drivers/net/wireless/ath/ath9k/channel.c
index 373b1e9457fd..f0439f2d566b 100644
--- a/drivers/net/wireless/ath/ath9k/channel.c
+++ b/drivers/net/wireless/ath/ath9k/channel.c
@@ -1005,7 +1005,7 @@ static void ath_scan_send_probe(struct ath_softc *sc,
 		info->flags |= IEEE80211_TX_CTL_NO_CCK_RATE;
 
 	if (req->ie_len)
-		memcpy(skb_put(skb, req->ie_len), req->ie, req->ie_len);
+		skb_put_data(skb, req->ie, req->ie_len);
 
 	skb_set_queue_mapping(skb, IEEE80211_AC_VO);
 
@@ -1521,8 +1521,7 @@ void ath9k_beacon_add_noa(struct ath_softc *sc, struct ath_vif *avp,
 	noa_desc = !!avp->offchannel_duration + !!avp->noa_duration;
 	noa_len = 2 + sizeof(struct ieee80211_p2p_noa_desc) * noa_desc;
 
-	hdr = skb_put(skb, sizeof(noa_ie_hdr));
-	memcpy(hdr, noa_ie_hdr, sizeof(noa_ie_hdr));
+	hdr = skb_put_data(skb, noa_ie_hdr, sizeof(noa_ie_hdr));
 	hdr[1] = sizeof(noa_ie_hdr) + noa_len - 2;
 	hdr[7] = noa_len;
 
diff --git a/drivers/net/wireless/ath/ath9k/wmi.c b/drivers/net/wireless/ath/ath9k/wmi.c
index 9c16e2a6d185..c51c69b1ad96 100644
--- a/drivers/net/wireless/ath/ath9k/wmi.c
+++ b/drivers/net/wireless/ath/ath9k/wmi.c
@@ -312,8 +312,7 @@ int ath9k_wmi_cmd(struct wmi *wmi, enum wmi_cmd_id cmd_id,
 	skb_reserve(skb, headroom);
 
 	if (cmd_len != 0 && cmd_buf != NULL) {
-		data = (u8 *) skb_put(skb, cmd_len);
-		memcpy(data, cmd_buf, cmd_len);
+		data = skb_put_data(skb, cmd_buf, cmd_len);
 	}
 
 	mutex_lock(&wmi->op_mutex);
diff --git a/drivers/net/wireless/ath/carl9170/rx.c b/drivers/net/wireless/ath/carl9170/rx.c
index b2166726b05d..705063259c8f 100644
--- a/drivers/net/wireless/ath/carl9170/rx.c
+++ b/drivers/net/wireless/ath/carl9170/rx.c
@@ -481,7 +481,7 @@ static struct sk_buff *carl9170_rx_copy_data(u8 *buf, int len)
 	skb = dev_alloc_skb(len + reserved);
 	if (likely(skb)) {
 		skb_reserve(skb, reserved);
-		memcpy(skb_put(skb, len), buf, len);
+		skb_put_data(skb, buf, len);
 	}
 
 	return skb;
@@ -916,7 +916,7 @@ static void carl9170_rx_stream(struct ar9170 *ar, void *buf, unsigned int len)
 				}
 			}
 
-			memcpy(skb_put(ar->rx_failover, tlen), tbuf, tlen);
+			skb_put_data(ar->rx_failover, tbuf, tlen);
 			ar->rx_failover_missing -= tlen;
 
 			if (ar->rx_failover_missing <= 0) {
@@ -958,7 +958,7 @@ static void carl9170_rx_stream(struct ar9170 *ar, void *buf, unsigned int len)
 			 * the rx - descriptor comes round again.
 			 */
 
-			memcpy(skb_put(ar->rx_failover, tlen), tbuf, tlen);
+			skb_put_data(ar->rx_failover, tbuf, tlen);
 			ar->rx_failover_missing = clen - tlen;
 			return;
 		}
diff --git a/drivers/net/wireless/ath/wil6210/wmi.c b/drivers/net/wireless/ath/wil6210/wmi.c
index 814c35645b73..cff9c585972f 100644
--- a/drivers/net/wireless/ath/wil6210/wmi.c
+++ b/drivers/net/wireless/ath/wil6210/wmi.c
@@ -681,7 +681,7 @@ static void wmi_evt_eapol_rx(struct wil6210_priv *wil, int id,
 	ether_addr_copy(eth->h_dest, ndev->dev_addr);
 	ether_addr_copy(eth->h_source, evt->src_mac);
 	eth->h_proto = cpu_to_be16(ETH_P_PAE);
-	memcpy(skb_put(skb, eapol_len), evt->eapol, eapol_len);
+	skb_put_data(skb, evt->eapol, eapol_len);
 	skb->protocol = eth_type_trans(skb, ndev);
 	if (likely(netif_rx_ni(skb) == NET_RX_SUCCESS)) {
 		ndev->stats.rx_packets++;
diff --git a/drivers/net/wireless/atmel/atmel.c b/drivers/net/wireless/atmel/atmel.c
index 27b110dc8cc6..b68436b23a63 100644
--- a/drivers/net/wireless/atmel/atmel.c
+++ b/drivers/net/wireless/atmel/atmel.c
@@ -1036,9 +1036,8 @@ static void frag_rx_path(struct atmel_private *priv,
 				priv->dev->stats.rx_dropped++;
 			} else {
 				skb_reserve(skb, 2);
-				memcpy(skb_put(skb, priv->frag_len + 12),
-				       priv->rx_buf,
-				       priv->frag_len + 12);
+				skb_put_data(skb, priv->rx_buf,
+				             priv->frag_len + 12);
 				skb->protocol = eth_type_trans(skb, priv->dev);
 				skb->ip_summed = CHECKSUM_NONE;
 				netif_rx(skb);
diff --git a/drivers/net/wireless/broadcom/b43legacy/dma.c b/drivers/net/wireless/broadcom/b43legacy/dma.c
index f9dd892b9f27..cfa617ddb2f1 100644
--- a/drivers/net/wireless/broadcom/b43legacy/dma.c
+++ b/drivers/net/wireless/broadcom/b43legacy/dma.c
@@ -1072,7 +1072,7 @@ static int dma_tx_fragment(struct b43legacy_dmaring *ring,
 			goto out_unmap_hdr;
 		}
 
-		memcpy(skb_put(bounce_skb, skb->len), skb->data, skb->len);
+		skb_put_data(bounce_skb, skb->data, skb->len);
 		memcpy(bounce_skb->cb, skb->cb, sizeof(skb->cb));
 		bounce_skb->dev = skb->dev;
 		skb_set_queue_mapping(bounce_skb, skb_get_queue_mapping(skb));
diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2200.c b/drivers/net/wireless/intel/ipw2x00/ipw2200.c
index bbc579b647b6..e0c690b48d4e 100644
--- a/drivers/net/wireless/intel/ipw2x00/ipw2200.c
+++ b/drivers/net/wireless/intel/ipw2x00/ipw2200.c
@@ -10274,8 +10274,9 @@ static int ipw_tx_skb(struct ipw_priv *priv, struct libipw_txb *txb,
 
 				printk(KERN_INFO "Adding frag %d %d...\n",
 				       j, size);
-				memcpy(skb_put(skb, size),
-				       txb->fragments[j]->data + hdr_len, size);
+				skb_put_data(skb,
+					     txb->fragments[j]->data + hdr_len,
+					     size);
 			}
 			dev_kfree_skb_any(txb->fragments[i]);
 			txb->fragments[i] = skb;
diff --git a/drivers/net/wireless/intel/ipw2x00/libipw_tx.c b/drivers/net/wireless/intel/ipw2x00/libipw_tx.c
index 048f1e3ada11..5339d1eeb2f7 100644
--- a/drivers/net/wireless/intel/ipw2x00/libipw_tx.c
+++ b/drivers/net/wireless/intel/ipw2x00/libipw_tx.c
@@ -359,7 +359,7 @@ netdev_tx_t libipw_xmit(struct sk_buff *skb, struct net_device *dev)
 			goto failed;
 
 		skb_reserve(skb_new, crypt->ops->extra_msdu_prefix_len);
-		memcpy(skb_put(skb_new, hdr_len), &header, hdr_len);
+		skb_put_data(skb_new, &header, hdr_len);
 		snapped = 1;
 		libipw_copy_snap(skb_put(skb_new, SNAP_SIZE + sizeof(u16)),
 				    ether_type);
@@ -470,9 +470,7 @@ netdev_tx_t libipw_xmit(struct sk_buff *skb, struct net_device *dev)
 			skb_reserve(skb_frag,
 				    crypt->ops->extra_mpdu_prefix_len);
 
-		frag_hdr =
-		    (struct libipw_hdr_3addrqos *)skb_put(skb_frag, hdr_len);
-		memcpy(frag_hdr, &header, hdr_len);
+		frag_hdr = skb_put_data(skb_frag, &header, hdr_len);
 
 		/* If this is not the last fragment, then add the MOREFRAGS
 		 * bit to the frame control */
diff --git a/drivers/net/wireless/intel/iwlegacy/3945.c b/drivers/net/wireless/intel/iwlegacy/3945.c
index 080ea8155b90..dbf164d48ed3 100644
--- a/drivers/net/wireless/intel/iwlegacy/3945.c
+++ b/drivers/net/wireless/intel/iwlegacy/3945.c
@@ -520,7 +520,7 @@ il3945_pass_packet_to_mac80211(struct il_priv *il, struct il_rx_buf *rxb,
 	 * and do not consume a full page
 	 */
 	if (len <= SMALL_PACKET_SIZE) {
-		memcpy(skb_put(skb, len), rx_hdr->payload, len);
+		skb_put_data(skb, rx_hdr->payload, len);
 	} else {
 		skb_add_rx_frag(skb, 0, rxb->page,
 				(void *)rx_hdr->payload - (void *)pkt, len,
diff --git a/drivers/net/wireless/intel/iwlegacy/4965-mac.c b/drivers/net/wireless/intel/iwlegacy/4965-mac.c
index 49a2ff15ddae..5b51fba75595 100644
--- a/drivers/net/wireless/intel/iwlegacy/4965-mac.c
+++ b/drivers/net/wireless/intel/iwlegacy/4965-mac.c
@@ -606,7 +606,7 @@ il4965_pass_packet_to_mac80211(struct il_priv *il, struct ieee80211_hdr *hdr,
 	}
 
 	if (len <= SMALL_PACKET_SIZE) {
-		memcpy(skb_put(skb, len), hdr, len);
+		skb_put_data(skb, hdr, len);
 	} else {
 		skb_add_rx_frag(skb, 0, rxb->page, (void *)hdr - rxb_addr(rxb),
 				len, PAGE_SIZE << il->hw_params.rx_page_order);
diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/rx.c b/drivers/net/wireless/intel/iwlwifi/dvm/rx.c
index adfd6307edca..eaad7389b67c 100644
--- a/drivers/net/wireless/intel/iwlwifi/dvm/rx.c
+++ b/drivers/net/wireless/intel/iwlwifi/dvm/rx.c
@@ -657,7 +657,7 @@ static void iwlagn_pass_packet_to_mac80211(struct iwl_priv *priv,
 	 */
 	hdrlen = (len <= skb_tailroom(skb)) ? len : sizeof(*hdr);
 
-	memcpy(skb_put(skb, hdrlen), hdr, hdrlen);
+	skb_put_data(skb, hdr, hdrlen);
 	fraglen = len - hdrlen;
 
 	if (fraglen) {
diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/tx.c b/drivers/net/wireless/intel/iwlwifi/dvm/tx.c
index 4b97371c3b42..adaa2f0097cc 100644
--- a/drivers/net/wireless/intel/iwlwifi/dvm/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/dvm/tx.c
@@ -319,8 +319,7 @@ int iwlagn_tx_skb(struct iwl_priv *priv,
 		if (noa_data &&
 		    pskb_expand_head(skb, 0, noa_data->length,
 				     GFP_ATOMIC) == 0) {
-			memcpy(skb_put(skb, noa_data->length),
-			       noa_data->data, noa_data->length);
+			skb_put_data(skb, noa_data->data, noa_data->length);
 			hdr = (struct ieee80211_hdr *)skb->data;
 		}
 	}
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
index 119a3bd92c50..7a56a0ac151c 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
@@ -1431,7 +1431,7 @@ static void iwl_mvm_report_wakeup_reasons(struct iwl_mvm *mvm,
 			if (!pkt)
 				goto report;
 
-			memcpy(skb_put(pkt, hdrlen), pktdata, hdrlen);
+			skb_put_data(pkt, pktdata, hdrlen);
 			pktdata += hdrlen;
 			pktsize -= hdrlen;
 
@@ -1463,7 +1463,7 @@ static void iwl_mvm_report_wakeup_reasons(struct iwl_mvm *mvm,
 			pktsize -= ivlen + icvlen;
 			pktdata += ivlen;
 
-			memcpy(skb_put(pkt, pktsize), pktdata, pktsize);
+			skb_put_data(pkt, pktdata, pktsize);
 
 			if (ieee80211_data_to_8023(pkt, vif->addr, vif->type))
 				goto report;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c
index fd2fc46e2fe5..15d13017c1df 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c
@@ -1596,7 +1596,7 @@ void iwl_mvm_rx_stored_beacon_notif(struct iwl_mvm *mvm,
 					       rx_status.band);
 
 	/* copy the data */
-	memcpy(skb_put(skb, size), sb->data, size);
+	skb_put_data(skb, sb->data, size);
 	memcpy(IEEE80211_SKB_RXCB(skb), &rx_status, sizeof(rx_status));
 
 	/* pass it as regular rx to mac80211 */
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rx.c b/drivers/net/wireless/intel/iwlwifi/mvm/rx.c
index fd1dd06c4f18..2c07719aa45c 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rx.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rx.c
@@ -133,7 +133,7 @@ static void iwl_mvm_pass_packet_to_mac80211(struct iwl_mvm *mvm,
 	 */
 	hdrlen = (len <= skb_tailroom(skb)) ? len : hdrlen + crypt_len + 8;
 
-	memcpy(skb_put(skb, hdrlen), hdr, hdrlen);
+	skb_put_data(skb, hdr, hdrlen);
 	fraglen = len - hdrlen;
 
 	if (fraglen) {
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
index 966cd7543629..cf48390f6f68 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
@@ -183,9 +183,8 @@ static void iwl_mvm_create_skb(struct sk_buff *skb, struct ieee80211_hdr *hdr,
 	 * present before copying packet data.
 	 */
 	hdrlen += crypt_len;
-	memcpy(skb_put(skb, hdrlen), hdr, hdrlen);
-	memcpy(skb_put(skb, headlen - hdrlen), (u8 *)hdr + hdrlen + pad_len,
-	       headlen - hdrlen);
+	skb_put_data(skb, hdr, hdrlen);
+	skb_put_data(skb, (u8 *)hdr + hdrlen + pad_len, headlen - hdrlen);
 
 	fraglen = len - headlen;
 
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
index 386950a2d616..01013d273aa7 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
@@ -2141,8 +2141,7 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb,
 							htons(ETH_P_IPV6),
 						    data_left);
 
-			memcpy(skb_put(csum_skb, tcp_hdrlen(skb)),
-			       tcph, tcp_hdrlen(skb));
+			skb_put_data(csum_skb, tcph, tcp_hdrlen(skb));
 			skb_reset_transport_header(csum_skb);
 			csum_skb->csum_start =
 				(unsigned char *)tcp_hdr(csum_skb) -
@@ -2176,7 +2175,7 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb,
 			dma_addr_t tb_phys;
 
 			if (trans_pcie->sw_csum_tx)
-				memcpy(skb_put(csum_skb, size), tso.data, size);
+				skb_put_data(csum_skb, tso.data, size);
 
 			tb_phys = dma_map_single(trans->dev, tso.data,
 						 size, DMA_TO_DEVICE);
diff --git a/drivers/net/wireless/intersil/hostap/hostap_80211_tx.c b/drivers/net/wireless/intersil/hostap/hostap_80211_tx.c
index 055e11d353ca..c1b10d5117ad 100644
--- a/drivers/net/wireless/intersil/hostap/hostap_80211_tx.c
+++ b/drivers/net/wireless/intersil/hostap/hostap_80211_tx.c
@@ -242,7 +242,7 @@ netdev_tx_t hostap_data_start_xmit(struct sk_buff *skb,
 		memcpy(skb_push(skb, encaps_len), encaps_data, encaps_len);
 	memcpy(skb_push(skb, hdr_len), &hdr, hdr_len);
 	if (use_wds == WDS_OWN_FRAME) {
-		memcpy(skb_put(skb, ETH_ALEN), &hdr.addr4, ETH_ALEN);
+		skb_put_data(skb, &hdr.addr4, ETH_ALEN);
 	}
 
 	iface->stats.tx_packets++;
diff --git a/drivers/net/wireless/intersil/hostap/hostap_ap.c b/drivers/net/wireless/intersil/hostap/hostap_ap.c
index 89b5987303a4..91757defb9be 100644
--- a/drivers/net/wireless/intersil/hostap/hostap_ap.c
+++ b/drivers/net/wireless/intersil/hostap/hostap_ap.c
@@ -1000,7 +1000,7 @@ static void prism2_send_mgmt(struct net_device *dev,
 	hdrlen = hostap_80211_get_hdrlen(cpu_to_le16(type_subtype));
 	hdr = skb_put_zero(skb, hdrlen);
 	if (body)
-		memcpy(skb_put(skb, body_len), body, body_len);
+		skb_put_data(skb, body, body_len);
 
 	/* FIX: ctrl::ack sending used special HFA384X_TX_CTRL_802_11
 	 * tx_control instead of using local->tx_control */
diff --git a/drivers/net/wireless/intersil/hostap/hostap_hw.c b/drivers/net/wireless/intersil/hostap/hostap_hw.c
index d4f0b730796e..72b46eaf3de2 100644
--- a/drivers/net/wireless/intersil/hostap/hostap_hw.c
+++ b/drivers/net/wireless/intersil/hostap/hostap_hw.c
@@ -2005,7 +2005,7 @@ static void prism2_rx(local_info_t *local)
 		goto rx_dropped;
 	}
 	skb->dev = dev;
-	memcpy(skb_put(skb, hdr_len), &rxdesc, hdr_len);
+	skb_put_data(skb, &rxdesc, hdr_len);
 
 	if (len > 0)
 		res = hfa384x_from_bap(dev, BAP0, skb_put(skb, len), len);
@@ -2209,9 +2209,9 @@ static void hostap_tx_callback(local_info_t *local,
 		return;
 	}
 
-	memcpy(skb_put(skb, hdrlen), (void *) &txdesc->frame_control, hdrlen);
+	skb_put_data(skb, (void *)&txdesc->frame_control, hdrlen);
 	if (payload)
-		memcpy(skb_put(skb, len), payload, len);
+		skb_put_data(skb, payload, len);
 
 	skb->dev = local->dev;
 	skb_reset_mac_header(skb);
@@ -2362,8 +2362,7 @@ static void prism2_txexc(local_info_t *local)
 		struct sk_buff *skb;
 		skb = dev_alloc_skb(sizeof(txdesc));
 		if (skb) {
-			memcpy(skb_put(skb, sizeof(txdesc)), &txdesc,
-			       sizeof(txdesc));
+			skb_put_data(skb, &txdesc, sizeof(txdesc));
 			skb_queue_tail(&local->sta_tx_exc_list, skb);
 			tasklet_schedule(&local->sta_tx_exc_tasklet);
 		}
@@ -2460,7 +2459,7 @@ static void prism2_info(local_info_t *local)
 		goto out;
 	}
 
-	memcpy(skb_put(skb, sizeof(info)), &info, sizeof(info));
+	skb_put_data(skb, &info, sizeof(info));
 	if (left > 0 && hfa384x_from_bap(dev, BAP0, skb_put(skb, left), left))
 	{
 		spin_unlock(&local->baplock);
diff --git a/drivers/net/wireless/intersil/hostap/hostap_main.c b/drivers/net/wireless/intersil/hostap/hostap_main.c
index 400f9b5d620e..a3c066f90afc 100644
--- a/drivers/net/wireless/intersil/hostap/hostap_main.c
+++ b/drivers/net/wireless/intersil/hostap/hostap_main.c
@@ -1045,7 +1045,7 @@ int prism2_sta_send_mgmt(local_info_t *local, u8 *dst, u16 stype,
 	memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN);
 	memcpy(mgmt->bssid, dst, ETH_ALEN);
 	if (body)
-		memcpy(skb_put(skb, bodylen), body, bodylen);
+		skb_put_data(skb, body, bodylen);
 
 	meta = (struct hostap_skb_tx_data *) skb->cb;
 	memset(meta, 0, sizeof(*meta));
diff --git a/drivers/net/wireless/intersil/orinoco/main.c b/drivers/net/wireless/intersil/orinoco/main.c
index d9128bb25e85..f7abc439fb92 100644
--- a/drivers/net/wireless/intersil/orinoco/main.c
+++ b/drivers/net/wireless/intersil/orinoco/main.c
@@ -792,7 +792,7 @@ static void orinoco_rx_monitor(struct net_device *dev, u16 rxfid,
 	}
 
 	/* Copy the 802.11 header to the skb */
-	memcpy(skb_put(skb, hdrlen), &(desc->frame_ctl), hdrlen);
+	skb_put_data(skb, &(desc->frame_ctl), hdrlen);
 	skb_reset_mac_header(skb);
 
 	/* If any, copy the data from the card to the skb */
diff --git a/drivers/net/wireless/intersil/p54/p54spi.c b/drivers/net/wireless/intersil/p54/p54spi.c
index 7ab2f43ab425..e41bf042352e 100644
--- a/drivers/net/wireless/intersil/p54/p54spi.c
+++ b/drivers/net/wireless/intersil/p54/p54spi.c
@@ -372,9 +372,9 @@ static int p54spi_rx(struct p54s_priv *priv)
 	}
 
 	if (len <= READAHEAD_SZ) {
-		memcpy(skb_put(skb, len), rx_head + 1, len);
+		skb_put_data(skb, rx_head + 1, len);
 	} else {
-		memcpy(skb_put(skb, READAHEAD_SZ), rx_head + 1, READAHEAD_SZ);
+		skb_put_data(skb, rx_head + 1, READAHEAD_SZ);
 		p54spi_spi_read(priv, SPI_ADRS_DMA_DATA,
 				skb_put(skb, len - READAHEAD_SZ),
 				len - READAHEAD_SZ);
diff --git a/drivers/net/wireless/intersil/p54/txrx.c b/drivers/net/wireless/intersil/p54/txrx.c
index 60f9b678ef74..b00c07d72f95 100644
--- a/drivers/net/wireless/intersil/p54/txrx.c
+++ b/drivers/net/wireless/intersil/p54/txrx.c
@@ -905,8 +905,9 @@ void p54_tx_80211(struct ieee80211_hw *dev,
 		if (info->control.hw_key->cipher == WLAN_CIPHER_SUITE_TKIP) {
 			/* reserve space for the MIC key */
 			len += 8;
-			memcpy(skb_put(skb, 8), &(info->control.hw_key->key
-				[NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY]), 8);
+			skb_put_data(skb,
+				     &(info->control.hw_key->key[NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY]),
+				     8);
 		}
 		/* reserve some space for ICV */
 		len += info->control.hw_key->icv_len;
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index c854a557998b..1d6e180052b8 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -2020,8 +2020,7 @@ static void hw_scan_work(struct work_struct *work)
 			memcpy(mgmt->bssid, req->bssid, ETH_ALEN);
 
 			if (req->ie_len)
-				memcpy(skb_put(probe, req->ie_len), req->ie,
-				       req->ie_len);
+				skb_put_data(probe, req->ie, req->ie_len);
 
 			local_bh_disable();
 			mac80211_hwsim_tx_frame(hwsim->hw, probe,
@@ -3021,7 +3020,7 @@ static int hwsim_cloned_frame_received_nl(struct sk_buff *skb_2,
 		goto err;
 
 	/* Copy the data */
-	memcpy(skb_put(skb, frame_data_len), frame_data, frame_data_len);
+	skb_put_data(skb, frame_data, frame_data_len);
 
 	data2 = get_hwsim_data_ref_from_addr(dst);
 	if (!data2)
diff --git a/drivers/net/wireless/marvell/libertas/if_sdio.c b/drivers/net/wireless/marvell/libertas/if_sdio.c
index e0196208ab0d..a9e2b06b3175 100644
--- a/drivers/net/wireless/marvell/libertas/if_sdio.c
+++ b/drivers/net/wireless/marvell/libertas/if_sdio.c
@@ -256,9 +256,7 @@ static int if_sdio_handle_data(struct if_sdio_card *card,
 
 	skb_reserve(skb, NET_IP_ALIGN);
 
-	data = skb_put(skb, size);
-
-	memcpy(data, buffer, size);
+	data = skb_put_data(skb, buffer, size);
 
 	lbs_process_rxed_packet(card->priv, skb);
 
diff --git a/drivers/net/wireless/marvell/mwifiex/11n_aggr.c b/drivers/net/wireless/marvell/mwifiex/11n_aggr.c
index 53e67526f40d..bc12c37e7501 100644
--- a/drivers/net/wireless/marvell/mwifiex/11n_aggr.c
+++ b/drivers/net/wireless/marvell/mwifiex/11n_aggr.c
@@ -81,7 +81,7 @@ mwifiex_11n_form_amsdu_pkt(struct sk_buff *skb_aggr,
 	tx_header->eth803_hdr.h_proto = htons(skb_src->len + LLC_SNAP_LEN);
 
 	/* Add payload */
-	memcpy(skb_put(skb_aggr, skb_src->len), skb_src->data, skb_src->len);
+	skb_put_data(skb_aggr, skb_src->data, skb_src->len);
 
 	/* Add padding for new MSDU to start from 4 byte boundary */
 	*pad = (4 - ((unsigned long)skb_aggr->tail & 0x3)) % 4;
diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c
index 025bc06a19d6..c20e4944ef87 100644
--- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c
+++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c
@@ -176,12 +176,10 @@ mwifiex_form_mgmt_frame(struct sk_buff *skb, const u8 *buf, size_t len)
 	memcpy(skb_push(skb, sizeof(pkt_type)), &pkt_type, sizeof(pkt_type));
 
 	/* Add packet data and address4 */
-	memcpy(skb_put(skb, sizeof(struct ieee80211_hdr_3addr)), buf,
-	       sizeof(struct ieee80211_hdr_3addr));
-	memcpy(skb_put(skb, ETH_ALEN), addr, ETH_ALEN);
-	memcpy(skb_put(skb, len - sizeof(struct ieee80211_hdr_3addr)),
-	       buf + sizeof(struct ieee80211_hdr_3addr),
-	       len - sizeof(struct ieee80211_hdr_3addr));
+	skb_put_data(skb, buf, sizeof(struct ieee80211_hdr_3addr));
+	skb_put_data(skb, addr, ETH_ALEN);
+	skb_put_data(skb, buf + sizeof(struct ieee80211_hdr_3addr),
+		     len - sizeof(struct ieee80211_hdr_3addr));
 
 	skb->priority = LOW_PRIO_TID;
 	__net_timestamp(skb);
diff --git a/drivers/net/wireless/marvell/mwifiex/tdls.c b/drivers/net/wireless/marvell/mwifiex/tdls.c
index c76b7315af55..d38555fe4284 100644
--- a/drivers/net/wireless/marvell/mwifiex/tdls.c
+++ b/drivers/net/wireless/marvell/mwifiex/tdls.c
@@ -679,8 +679,7 @@ int mwifiex_send_tdls_data_frame(struct mwifiex_private *priv, const u8 *peer,
 			return ret;
 		}
 		if (extra_ies_len)
-			memcpy(skb_put(skb, extra_ies_len), extra_ies,
-			       extra_ies_len);
+			skb_put_data(skb, extra_ies, extra_ies_len);
 		mwifiex_tdls_add_link_ie(skb, priv->curr_addr, peer,
 					 priv->cfg_bssid);
 		break;
@@ -693,8 +692,7 @@ int mwifiex_send_tdls_data_frame(struct mwifiex_private *priv, const u8 *peer,
 			return ret;
 		}
 		if (extra_ies_len)
-			memcpy(skb_put(skb, extra_ies_len), extra_ies,
-			       extra_ies_len);
+			skb_put_data(skb, extra_ies, extra_ies_len);
 		mwifiex_tdls_add_link_ie(skb, peer, priv->curr_addr,
 					 priv->cfg_bssid);
 		break;
@@ -865,7 +863,7 @@ int mwifiex_send_tdls_action_frame(struct mwifiex_private *priv, const u8 *peer,
 	}
 
 	if (extra_ies_len)
-		memcpy(skb_put(skb, extra_ies_len), extra_ies, extra_ies_len);
+		skb_put_data(skb, extra_ies, extra_ies_len);
 
 	/* the TDLS link IE is always added last we are the responder */
 
diff --git a/drivers/net/wireless/mediatek/mt7601u/dma.c b/drivers/net/wireless/mediatek/mt7601u/dma.c
index a8bc064bc14f..660267b359e4 100644
--- a/drivers/net/wireless/mediatek/mt7601u/dma.c
+++ b/drivers/net/wireless/mediatek/mt7601u/dma.c
@@ -52,7 +52,7 @@ mt7601u_rx_skb_from_seg(struct mt7601u_dev *dev, struct mt7601u_rxwi *rxwi,
 		goto bad_frame;
 
 	if (rxwi->rxinfo & cpu_to_le32(MT_RXINFO_L2PAD)) {
-		memcpy(skb_put(skb, hdr_len), data, hdr_len);
+		skb_put_data(skb, data, hdr_len);
 
 		data += hdr_len + 2;
 		true_len -= hdr_len;
@@ -63,7 +63,7 @@ mt7601u_rx_skb_from_seg(struct mt7601u_dev *dev, struct mt7601u_rxwi *rxwi,
 	copy = (true_len <= skb_tailroom(skb)) ? true_len : hdr_len + 8;
 	frag = true_len - copy;
 
-	memcpy(skb_put(skb, copy), data, copy);
+	skb_put_data(skb, data, copy);
 	data += copy;
 
 	if (frag) {
diff --git a/drivers/net/wireless/mediatek/mt7601u/mcu.c b/drivers/net/wireless/mediatek/mt7601u/mcu.c
index a9f5f398b2f8..65a8004418ea 100644
--- a/drivers/net/wireless/mediatek/mt7601u/mcu.c
+++ b/drivers/net/wireless/mediatek/mt7601u/mcu.c
@@ -68,7 +68,7 @@ mt7601u_mcu_msg_alloc(struct mt7601u_dev *dev, const void *data, int len)
 	skb = alloc_skb(len + MT_DMA_HDR_LEN + 4, GFP_KERNEL);
 	if (skb) {
 		skb_reserve(skb, MT_DMA_HDR_LEN);
-		memcpy(skb_put(skb, len), data, len);
+		skb_put_data(skb, data, len);
 	}
 
 	return skb;
diff --git a/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie.c b/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie.c
index 4814d90c8040..f93b27f3a236 100644
--- a/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie.c
+++ b/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie.c
@@ -213,7 +213,7 @@ static void qtnf_pcie_control_rx_callback(void *arg, const u8 *buf, size_t len)
 		return;
 	}
 
-	memcpy(skb_put(skb, len), buf, len);
+	skb_put_data(skb, buf, len);
 
 	qtnf_trans_handle_rx_ctl_packet(bus, skb);
 }
diff --git a/drivers/net/wireless/quantenna/qtnfmac/qlink_util.h b/drivers/net/wireless/quantenna/qtnfmac/qlink_util.h
index d8de484b5995..9844ff0add2b 100644
--- a/drivers/net/wireless/quantenna/qtnfmac/qlink_util.h
+++ b/drivers/net/wireless/quantenna/qtnfmac/qlink_util.h
@@ -35,8 +35,7 @@ qtnf_cmd_skb_put_buffer(struct sk_buff *skb, const u8 *buf_src, size_t len)
 {
 	u8 *buf_dst;
 
-	buf_dst = skb_put(skb, len);
-	memcpy(buf_dst, buf_src, len);
+	buf_dst = skb_put_data(skb, buf_src, len);
 }
 
 static inline void qtnf_cmd_skb_put_tlv_arr(struct sk_buff *skb,
diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00debug.c b/drivers/net/wireless/ralink/rt2x00/rt2x00debug.c
index 4a1bca1b1e26..b70985e126bf 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt2x00debug.c
+++ b/drivers/net/wireless/ralink/rt2x00/rt2x00debug.c
@@ -203,9 +203,8 @@ void rt2x00debug_dump_frame(struct rt2x00_dev *rt2x00dev,
 	dump_hdr->timestamp_usec = cpu_to_le32(timestamp.tv_usec);
 
 	if (!(skbdesc->flags & SKBDESC_DESC_IN_SKB))
-		memcpy(skb_put(skbcopy, skbdesc->desc_len), skbdesc->desc,
-		       skbdesc->desc_len);
-	memcpy(skb_put(skbcopy, skb->len), skb->data, skb->len);
+		skb_put_data(skbcopy, skbdesc->desc, skbdesc->desc_len);
+	skb_put_data(skbcopy, skb->data, skb->len);
 
 	skb_queue_tail(&intf->frame_dump_skbqueue, skbcopy);
 	wake_up_interruptible(&intf->frame_dump_waitqueue);
diff --git a/drivers/net/wireless/realtek/rtlwifi/pci.c b/drivers/net/wireless/realtek/rtlwifi/pci.c
index 2e6b888bd417..0c1f8307e179 100644
--- a/drivers/net/wireless/realtek/rtlwifi/pci.c
+++ b/drivers/net/wireless/realtek/rtlwifi/pci.c
@@ -735,8 +735,7 @@ static void _rtl_pci_rx_to_mac80211(struct ieee80211_hw *hw,
 		if (likely(uskb)) {
 			memcpy(IEEE80211_SKB_RXCB(uskb), &rx_status,
 			       sizeof(rx_status));
-			pdata = (u8 *)skb_put(uskb, skb->len);
-			memcpy(pdata, skb->data, skb->len);
+			pdata = skb_put_data(uskb, skb->data, skb->len);
 			dev_kfree_skb_any(skb);
 			ieee80211_rx_irqsafe(hw, uskb);
 		} else {
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/fw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/fw.c
index 21ed9ad3be7a..a2eca669873b 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/fw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/fw.c
@@ -620,8 +620,7 @@ void rtl88e_set_fw_rsvdpagepkt(struct ieee80211_hw *hw, bool b_dl_finished)
 		      u1rsvdpageloc, 3);
 
 	skb = dev_alloc_skb(totalpacketlen);
-	memcpy(skb_put(skb, totalpacketlen),
-	       &reserved_page_packet, totalpacketlen);
+	skb_put_data(skb, &reserved_page_packet, totalpacketlen);
 
 	rtstatus = rtl_cmd_send_packet(hw, skb);
 
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/fw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/fw.c
index 89a0a28b8b20..dd3ba4810e7d 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/fw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/fw.c
@@ -188,10 +188,9 @@ static bool _rtl92s_firmware_downloadcode(struct ieee80211_hw *hw,
 		if (!skb)
 			return false;
 		skb_reserve(skb, extra_descoffset);
-		seg_ptr = (u8 *)skb_put(skb, (u32)(frag_length -
-					extra_descoffset));
-		memcpy(seg_ptr, code_virtual_address + frag_offset,
-		       (u32)(frag_length - extra_descoffset));
+		seg_ptr = skb_put_data(skb,
+				       code_virtual_address + frag_offset,
+				       (u32)(frag_length - extra_descoffset));
 
 		tcb_desc = (struct rtl_tcb_desc *)(skb->cb);
 		tcb_desc->queue_index = TXCMD_QUEUE;
diff --git a/drivers/net/wireless/realtek/rtlwifi/usb.c b/drivers/net/wireless/realtek/rtlwifi/usb.c
index 4d989b8ab185..5590d07d0918 100644
--- a/drivers/net/wireless/realtek/rtlwifi/usb.c
+++ b/drivers/net/wireless/realtek/rtlwifi/usb.c
@@ -653,7 +653,7 @@ static void _rtl_rx_completed(struct urb *_urb)
 		/* reserve some space for mac80211's radiotap */
 		skb_reserve(skb, __RADIO_TAP_SIZE_RSV);
 
-		memcpy(skb_put(skb, size), _urb->transfer_buffer, size);
+		skb_put_data(skb, _urb->transfer_buffer, size);
 
 		skb_queue_tail(&rtlusb->rx_queue, skb);
 		tasklet_schedule(&rtlusb->rx_work_tasklet);
diff --git a/drivers/net/wireless/rsi/rsi_91x_mgmt.c b/drivers/net/wireless/rsi/rsi_91x_mgmt.c
index fac87c06357b..4433cec4367c 100644
--- a/drivers/net/wireless/rsi/rsi_91x_mgmt.c
+++ b/drivers/net/wireless/rsi/rsi_91x_mgmt.c
@@ -412,11 +412,9 @@ static int rsi_mgmt_pkt_to_core(struct rsi_common *common,
 			return -ENOMEM;
 		}
 
-		buffer = skb_put(skb, msg_len);
-
-		memcpy(buffer,
-		       (u8 *)(msg +  FRAME_DESC_SZ + pad_bytes),
-		       msg_len);
+		buffer = skb_put_data(skb,
+				      (u8 *)(msg + FRAME_DESC_SZ + pad_bytes),
+				      msg_len);
 
 		pkt_recv = buffer[0];
 
diff --git a/drivers/net/wireless/st/cw1200/scan.c b/drivers/net/wireless/st/cw1200/scan.c
index 0a0ff7e31f5b..cc2ce60f4f09 100644
--- a/drivers/net/wireless/st/cw1200/scan.c
+++ b/drivers/net/wireless/st/cw1200/scan.c
@@ -84,7 +84,7 @@ int cw1200_hw_scan(struct ieee80211_hw *hw,
 		return -ENOMEM;
 
 	if (req->ie_len)
-		memcpy(skb_put(frame.skb, req->ie_len), req->ie, req->ie_len);
+		skb_put_data(frame.skb, req->ie, req->ie_len);
 
 	/* will be unlocked in cw1200_scan_work() */
 	down(&priv->scan.lock);
diff --git a/drivers/net/wireless/ti/wl1251/main.c b/drivers/net/wireless/ti/wl1251/main.c
index bbf7604889b7..08f0477f78d9 100644
--- a/drivers/net/wireless/ti/wl1251/main.c
+++ b/drivers/net/wireless/ti/wl1251/main.c
@@ -1036,7 +1036,7 @@ static int wl1251_op_hw_scan(struct ieee80211_hw *hw,
 		goto out_idle;
 	}
 	if (req->ie_len)
-		memcpy(skb_put(skb, req->ie_len), req->ie, req->ie_len);
+		skb_put_data(skb, req->ie, req->ie_len);
 
 	ret = wl1251_cmd_template_set(wl, CMD_PROBE_REQ, skb->data,
 				      skb->len);
diff --git a/drivers/net/wireless/ti/wlcore/cmd.c b/drivers/net/wireless/ti/wlcore/cmd.c
index 4a39fb13c478..229f4d01f239 100644
--- a/drivers/net/wireless/ti/wlcore/cmd.c
+++ b/drivers/net/wireless/ti/wlcore/cmd.c
@@ -1156,9 +1156,9 @@ int wl12xx_cmd_build_probe_req(struct wl1271 *wl, struct wl12xx_vif *wlvif,
 		goto out;
 	}
 	if (ie0_len)
-		memcpy(skb_put(skb, ie0_len), ie0, ie0_len);
+		skb_put_data(skb, ie0, ie0_len);
 	if (ie1_len)
-		memcpy(skb_put(skb, ie1_len), ie1, ie1_len);
+		skb_put_data(skb, ie1, ie1_len);
 
 	if (sched_scan &&
 	    (wl->quirks & WLCORE_QUIRK_DUAL_PROBE_TMPL)) {
diff --git a/drivers/net/wireless/ti/wlcore/rx.c b/drivers/net/wireless/ti/wlcore/rx.c
index 52a55f9acd80..53cd6d4d5b50 100644
--- a/drivers/net/wireless/ti/wlcore/rx.c
+++ b/drivers/net/wireless/ti/wlcore/rx.c
@@ -174,15 +174,13 @@ static int wl1271_rx_handle_data(struct wl1271 *wl, u8 *data, u32 length,
 	/* reserve the unaligned payload(if any) */
 	skb_reserve(skb, reserved);
 
-	buf = skb_put(skb, pkt_data_len);
-
 	/*
 	 * Copy packets from aggregation buffer to the skbs without rx
 	 * descriptor and with packet payload aligned care. In case of unaligned
 	 * packets copy the packets in offset of 2 bytes guarantee IP header
 	 * payload aligned to 4 bytes.
 	 */
-	memcpy(buf, data + sizeof(*desc), pkt_data_len);
+	buf = skb_put_data(skb, data + sizeof(*desc), pkt_data_len);
 	if (rx_align == WLCORE_RX_BUF_PADDED)
 		skb_pull(skb, RX_BUF_ALIGN);
 
diff --git a/drivers/net/wireless/zydas/zd1201.c b/drivers/net/wireless/zydas/zd1201.c
index de7ff395977a..7f586d76cf17 100644
--- a/drivers/net/wireless/zydas/zd1201.c
+++ b/drivers/net/wireless/zydas/zd1201.c
@@ -326,13 +326,13 @@ static void zd1201_usbrx(struct urb *urb)
 			if (!(skb = dev_alloc_skb(datalen+24)))
 				goto resubmit;
 			
-			memcpy(skb_put(skb, 2), &data[datalen-16], 2);
-			memcpy(skb_put(skb, 2), &data[datalen-2], 2);
-			memcpy(skb_put(skb, 6), &data[datalen-14], 6);
-			memcpy(skb_put(skb, 6), &data[datalen-22], 6);
-			memcpy(skb_put(skb, 6), &data[datalen-8], 6);
-			memcpy(skb_put(skb, 2), &data[datalen-24], 2);
-			memcpy(skb_put(skb, len), data, len);
+			skb_put_data(skb, &data[datalen - 16], 2);
+			skb_put_data(skb, &data[datalen - 2], 2);
+			skb_put_data(skb, &data[datalen - 14], 6);
+			skb_put_data(skb, &data[datalen - 22], 6);
+			skb_put_data(skb, &data[datalen - 8], 6);
+			skb_put_data(skb, &data[datalen - 24], 2);
+			skb_put_data(skb, data, len);
 			skb->protocol = eth_type_trans(skb, zd->dev);
 			zd->dev->stats.rx_packets++;
 			zd->dev->stats.rx_bytes += skb->len;
@@ -359,9 +359,9 @@ static void zd1201_usbrx(struct urb *urb)
 				frag->skb = skb;
 				frag->seq = seq & IEEE80211_SCTL_SEQ;
 				skb_reserve(skb, 2);
-				memcpy(skb_put(skb, 12), &data[datalen-14], 12);
-				memcpy(skb_put(skb, 2), &data[6], 2);
-				memcpy(skb_put(skb, len), data+8, len);
+				skb_put_data(skb, &data[datalen - 14], 12);
+				skb_put_data(skb, &data[6], 2);
+				skb_put_data(skb, data + 8, len);
 				hlist_add_head(&frag->fnode, &zd->fraglist);
 				goto resubmit;
 			}
@@ -385,9 +385,9 @@ static void zd1201_usbrx(struct urb *urb)
 			if (!skb)
 				goto resubmit;
 			skb_reserve(skb, 2);
-			memcpy(skb_put(skb, 12), &data[datalen-14], 12);
-			memcpy(skb_put(skb, 2), &data[6], 2);
-			memcpy(skb_put(skb, len), data+8, len);
+			skb_put_data(skb, &data[datalen - 14], 12);
+			skb_put_data(skb, &data[6], 2);
+			skb_put_data(skb, data + 8, len);
 		}
 		skb->protocol = eth_type_trans(skb, zd->dev);
 		zd->dev->stats.rx_packets++;
diff --git a/drivers/net/wireless/zydas/zd1211rw/zd_mac.c b/drivers/net/wireless/zydas/zd1211rw/zd_mac.c
index fe6517a621b0..2d929d2edb00 100644
--- a/drivers/net/wireless/zydas/zd1211rw/zd_mac.c
+++ b/drivers/net/wireless/zydas/zd1211rw/zd_mac.c
@@ -1103,7 +1103,7 @@ int zd_mac_rx(struct ieee80211_hw *hw, const u8 *buffer, unsigned int length)
 	}
 
 	/* FIXME : could we avoid this big memcpy ? */
-	memcpy(skb_put(skb, length), buffer, length);
+	skb_put_data(skb, buffer, length);
 
 	memcpy(IEEE80211_SKB_RXCB(skb), &stats, sizeof(stats));
 	ieee80211_rx_irqsafe(hw, skb);
diff --git a/drivers/nfc/fdp/fdp.c b/drivers/nfc/fdp/fdp.c
index 7c1eaea3b685..badd8167ac73 100644
--- a/drivers/nfc/fdp/fdp.c
+++ b/drivers/nfc/fdp/fdp.c
@@ -228,8 +228,7 @@ static int fdp_nci_send_patch(struct nci_dev *ndev, u8 conn_id, u8 type)
 
 		skb_reserve(skb, NCI_CTRL_HDR_SIZE);
 
-		memcpy(skb_put(skb, payload_size), fw->data + (fw->size - len),
-		       payload_size);
+		skb_put_data(skb, fw->data + (fw->size - len), payload_size);
 
 		rc = nci_send_data(ndev, conn_id, skb);
 
diff --git a/drivers/nfc/fdp/i2c.c b/drivers/nfc/fdp/i2c.c
index 712936f5d2d6..0877e2283f35 100644
--- a/drivers/nfc/fdp/i2c.c
+++ b/drivers/nfc/fdp/i2c.c
@@ -186,7 +186,7 @@ static int fdp_nci_i2c_read(struct fdp_i2c_phy *phy, struct sk_buff **skb)
 				goto flush;
 			}
 
-			memcpy(skb_put(*skb, len), tmp, len);
+			skb_put_data(*skb, tmp, len);
 			fdp_nci_i2c_dump_skb(&client->dev, "fdp_rd", *skb);
 
 			fdp_nci_i2c_remove_len_lrc(*skb);
diff --git a/drivers/nfc/nfcmrvl/fw_dnld.c b/drivers/nfc/nfcmrvl/fw_dnld.c
index c38bdd6a5a82..7c710458568e 100644
--- a/drivers/nfc/nfcmrvl/fw_dnld.c
+++ b/drivers/nfc/nfcmrvl/fw_dnld.c
@@ -324,10 +324,9 @@ static int process_state_fw_dnld(struct nfcmrvl_private *priv,
 			out_skb = alloc_lc_skb(priv, priv->fw_dnld.chunk_len);
 			if (!out_skb)
 				return -ENOMEM;
-			memcpy(skb_put(out_skb, priv->fw_dnld.chunk_len),
-			       ((uint8_t *)priv->fw_dnld.fw->data) +
-			       priv->fw_dnld.offset,
-			       priv->fw_dnld.chunk_len);
+			skb_put_data(out_skb,
+				     ((uint8_t *)priv->fw_dnld.fw->data) + priv->fw_dnld.offset,
+				     priv->fw_dnld.chunk_len);
 			nci_send_frame(priv->ndev, out_skb);
 			priv->fw_dnld.substate = SUBSTATE_WAIT_DATA_CREDIT;
 		}
diff --git a/drivers/nfc/nfcmrvl/i2c.c b/drivers/nfc/nfcmrvl/i2c.c
index 78b7aa835c81..ffec103702f1 100644
--- a/drivers/nfc/nfcmrvl/i2c.c
+++ b/drivers/nfc/nfcmrvl/i2c.c
@@ -60,7 +60,7 @@ static int nfcmrvl_i2c_read(struct nfcmrvl_i2c_drv_data *drv_data,
 		return -ENOMEM;
 
 	/* Copy NCI header into the SKB */
-	memcpy(skb_put(*skb, NCI_CTRL_HDR_SIZE), &nci_hdr, NCI_CTRL_HDR_SIZE);
+	skb_put_data(*skb, &nci_hdr, NCI_CTRL_HDR_SIZE);
 
 	if (nci_hdr.plen) {
 		/* Read the NCI payload */
diff --git a/drivers/nfc/nfcmrvl/usb.c b/drivers/nfc/nfcmrvl/usb.c
index 585a0f20835b..699aa9d16575 100644
--- a/drivers/nfc/nfcmrvl/usb.c
+++ b/drivers/nfc/nfcmrvl/usb.c
@@ -83,8 +83,8 @@ static void nfcmrvl_bulk_complete(struct urb *urb)
 		if (!skb) {
 			nfc_err(&drv_data->udev->dev, "failed to alloc mem\n");
 		} else {
-			memcpy(skb_put(skb, urb->actual_length),
-			       urb->transfer_buffer, urb->actual_length);
+			skb_put_data(skb, urb->transfer_buffer,
+				     urb->actual_length);
 			if (nfcmrvl_nci_recv_frame(drv_data->priv, skb) < 0)
 				nfc_err(&drv_data->udev->dev,
 					"corrupted Rx packet\n");
diff --git a/drivers/nfc/nxp-nci/firmware.c b/drivers/nfc/nxp-nci/firmware.c
index 553011f58339..99ffee1dfd1e 100644
--- a/drivers/nfc/nxp-nci/firmware.c
+++ b/drivers/nfc/nxp-nci/firmware.c
@@ -124,8 +124,7 @@ static int nxp_nci_fw_send_chunk(struct nxp_nci_info *info)
 	header |= chunk_len & NXP_NCI_FW_FRAME_LEN_MASK;
 	put_unaligned_be16(header, skb_put(skb, NXP_NCI_FW_HDR_LEN));
 
-	memcpy(skb_put(skb, chunk_len), fw_info->data + fw_info->written,
-	       chunk_len);
+	skb_put_data(skb, fw_info->data + fw_info->written, chunk_len);
 
 	crc = nxp_nci_fw_crc(skb->data, chunk_len + NXP_NCI_FW_HDR_LEN);
 	put_unaligned_be16(crc, skb_put(skb, NXP_NCI_FW_CRC_LEN));
diff --git a/drivers/nfc/nxp-nci/i2c.c b/drivers/nfc/nxp-nci/i2c.c
index ff22d761183c..198585bbc771 100644
--- a/drivers/nfc/nxp-nci/i2c.c
+++ b/drivers/nfc/nxp-nci/i2c.c
@@ -135,7 +135,7 @@ static int nxp_nci_i2c_fw_read(struct nxp_nci_i2c_phy *phy,
 		goto fw_read_exit;
 	}
 
-	memcpy(skb_put(*skb, NXP_NCI_FW_HDR_LEN), &header, NXP_NCI_FW_HDR_LEN);
+	skb_put_data(*skb, &header, NXP_NCI_FW_HDR_LEN);
 
 	r = i2c_master_recv(client, skb_put(*skb, frame_len), frame_len);
 	if (r != frame_len) {
@@ -176,8 +176,7 @@ static int nxp_nci_i2c_nci_read(struct nxp_nci_i2c_phy *phy,
 		goto nci_read_exit;
 	}
 
-	memcpy(skb_put(*skb, NCI_CTRL_HDR_SIZE), (void *) &header,
-	       NCI_CTRL_HDR_SIZE);
+	skb_put_data(*skb, (void *)&header, NCI_CTRL_HDR_SIZE);
 
 	r = i2c_master_recv(client, skb_put(*skb, header.plen), header.plen);
 	if (r != header.plen) {
diff --git a/drivers/nfc/pn533/pn533.c b/drivers/nfc/pn533/pn533.c
index 70c304504a29..9200bb308e42 100644
--- a/drivers/nfc/pn533/pn533.c
+++ b/drivers/nfc/pn533/pn533.c
@@ -1035,11 +1035,10 @@ static struct sk_buff *pn533_alloc_poll_tg_frame(struct pn533 *dev)
 	*skb_put(skb, 1) = PN533_INIT_TARGET_DEP;
 
 	/* MIFARE params */
-	memcpy(skb_put(skb, 6), mifare_params, 6);
+	skb_put_data(skb, mifare_params, 6);
 
 	/* Felica params */
-	felica = skb_put(skb, 18);
-	memcpy(felica, felica_params, 18);
+	felica = skb_put_data(skb, felica_params, 18);
 	get_random_bytes(felica + 2, 6);
 
 	/* NFCID3 */
@@ -1049,8 +1048,7 @@ static struct sk_buff *pn533_alloc_poll_tg_frame(struct pn533 *dev)
 	/* General bytes */
 	*skb_put(skb, 1) = gbytes_len;
 
-	gb = skb_put(skb, gbytes_len);
-	memcpy(gb, gbytes, gbytes_len);
+	gb = skb_put_data(skb, gbytes, gbytes_len);
 
 	/* Len Tk */
 	*skb_put(skb, 1) = 0;
@@ -1384,15 +1382,14 @@ static int pn533_poll_dep(struct nfc_dev *nfc_dev)
 	*next = 0;
 
 	/* Copy passive data */
-	memcpy(skb_put(skb, PASSIVE_DATA_LEN), passive_data, PASSIVE_DATA_LEN);
+	skb_put_data(skb, passive_data, PASSIVE_DATA_LEN);
 	*next |= 1;
 
 	/* Copy NFCID3 (which is NFCID2 from SENSF_RES) */
-	memcpy(skb_put(skb, NFC_NFCID3_MAXSIZE), nfcid3,
-	       NFC_NFCID3_MAXSIZE);
+	skb_put_data(skb, nfcid3, NFC_NFCID3_MAXSIZE);
 	*next |= 2;
 
-	memcpy(skb_put(skb, dev->gb_len), dev->gb, dev->gb_len);
+	skb_put_data(skb, dev->gb, dev->gb_len);
 	*next |= 4; /* We have some Gi */
 
 	rc = pn533_send_cmd_async(dev, PN533_CMD_IN_JUMP_FOR_DEP, skb,
@@ -1472,7 +1469,7 @@ static struct sk_buff *pn533_alloc_poll_in_frame(struct pn533 *dev,
 	if (!skb)
 		return NULL;
 
-	memcpy(skb_put(skb, mod->len), &mod->data, mod->len);
+	skb_put_data(skb, &mod->data, mod->len);
 
 	return skb;
 }
@@ -1858,7 +1855,7 @@ static int pn533_dep_link_up(struct nfc_dev *nfc_dev, struct nfc_target *target,
 	*next = 0;
 
 	/* Copy passive data */
-	memcpy(skb_put(skb, PASSIVE_DATA_LEN), passive_data, PASSIVE_DATA_LEN);
+	skb_put_data(skb, passive_data, PASSIVE_DATA_LEN);
 	*next |= 1;
 
 	/* Copy NFCID3 (which is NFCID2 from SENSF_RES) */
@@ -1866,12 +1863,11 @@ static int pn533_dep_link_up(struct nfc_dev *nfc_dev, struct nfc_target *target,
 		memcpy(skb_put(skb, NFC_NFCID3_MAXSIZE), target->nfcid2,
 		       target->nfcid2_len);
 	else
-		memcpy(skb_put(skb, NFC_NFCID3_MAXSIZE), nfcid3,
-		       NFC_NFCID3_MAXSIZE);
+		skb_put_data(skb, nfcid3, NFC_NFCID3_MAXSIZE);
 	*next |= 2;
 
 	if (gb != NULL && gb_len > 0) {
-		memcpy(skb_put(skb, gb_len), gb, gb_len);
+		skb_put_data(skb, gb, gb_len);
 		*next |= 4; /* We have some Gi */
 	} else {
 		*next = 0;
@@ -2100,7 +2096,7 @@ static int pn533_fill_fragment_skbs(struct pn533 *dev, struct sk_buff *skb)
 				*skb_push(frag, sizeof(u8)) =  1; /* TG */
 		}
 
-		memcpy(skb_put(frag, frag_size), skb->data, frag_size);
+		skb_put_data(frag, skb->data, frag_size);
 
 		/* Reduce the size of incoming buffer */
 		skb_pull(skb, frag_size);
@@ -2375,7 +2371,7 @@ static int pn533_set_configuration(struct pn533 *dev, u8 cfgitem, u8 *cfgdata,
 		return -ENOMEM;
 
 	*skb_put(skb, sizeof(cfgitem)) = cfgitem;
-	memcpy(skb_put(skb, cfgdata_len), cfgdata, cfgdata_len);
+	skb_put_data(skb, cfgdata, cfgdata_len);
 
 	resp = pn533_send_cmd_sync(dev, PN533_CMD_RF_CONFIGURATION, skb);
 	if (IS_ERR(resp))
diff --git a/drivers/nfc/pn533/usb.c b/drivers/nfc/pn533/usb.c
index 8ed203ea21ea..e153e8b64bb8 100644
--- a/drivers/nfc/pn533/usb.c
+++ b/drivers/nfc/pn533/usb.c
@@ -75,8 +75,8 @@ static void pn533_recv_response(struct urb *urb)
 		if (!skb) {
 			nfc_err(&phy->udev->dev, "failed to alloc memory\n");
 		} else {
-			memcpy(skb_put(skb, urb->actual_length),
-			       urb->transfer_buffer, urb->actual_length);
+			skb_put_data(skb, urb->transfer_buffer,
+				     urb->actual_length);
 		}
 	}
 
diff --git a/drivers/nfc/port100.c b/drivers/nfc/port100.c
index 19be93e177fe..e1260da73d45 100644
--- a/drivers/nfc/port100.c
+++ b/drivers/nfc/port100.c
@@ -1089,9 +1089,8 @@ static int port100_in_set_rf(struct nfc_digital_dev *ddev, u8 rf)
 	if (!skb)
 		return -ENOMEM;
 
-	memcpy(skb_put(skb, sizeof(struct port100_in_rf_setting)),
-	       &in_rf_settings[rf],
-	       sizeof(struct port100_in_rf_setting));
+	skb_put_data(skb, &in_rf_settings[rf],
+		     sizeof(struct port100_in_rf_setting));
 
 	resp = port100_send_cmd_sync(dev, PORT100_CMD_IN_SET_RF, skb);
 
@@ -1133,7 +1132,7 @@ static int port100_in_set_framing(struct nfc_digital_dev *ddev, int param)
 	if (!skb)
 		return -ENOMEM;
 
-	memcpy(skb_put(skb, size), protocols, size);
+	skb_put_data(skb, protocols, size);
 
 	resp = port100_send_cmd_sync(dev, PORT100_CMD_IN_SET_PROTOCOL, skb);
 
@@ -1247,9 +1246,8 @@ static int port100_tg_set_rf(struct nfc_digital_dev *ddev, u8 rf)
 	if (!skb)
 		return -ENOMEM;
 
-	memcpy(skb_put(skb, sizeof(struct port100_tg_rf_setting)),
-	       &tg_rf_settings[rf],
-	       sizeof(struct port100_tg_rf_setting));
+	skb_put_data(skb, &tg_rf_settings[rf],
+		     sizeof(struct port100_tg_rf_setting));
 
 	resp = port100_send_cmd_sync(dev, PORT100_CMD_TG_SET_RF, skb);
 
@@ -1291,7 +1289,7 @@ static int port100_tg_set_framing(struct nfc_digital_dev *ddev, int param)
 	if (!skb)
 		return -ENOMEM;
 
-	memcpy(skb_put(skb, size), protocols, size);
+	skb_put_data(skb, protocols, size);
 
 	resp = port100_send_cmd_sync(dev, PORT100_CMD_TG_SET_PROTOCOL, skb);
 
diff --git a/drivers/nfc/s3fwrn5/firmware.c b/drivers/nfc/s3fwrn5/firmware.c
index 5f97da1947e3..38548bd970cd 100644
--- a/drivers/nfc/s3fwrn5/firmware.c
+++ b/drivers/nfc/s3fwrn5/firmware.c
@@ -76,9 +76,9 @@ static int s3fwrn5_fw_prep_msg(struct s3fwrn5_fw_info *fw_info,
 	if (!skb)
 		return -ENOMEM;
 
-	memcpy(skb_put(skb, S3FWRN5_FW_HDR_SIZE), &hdr, S3FWRN5_FW_HDR_SIZE);
+	skb_put_data(skb, &hdr, S3FWRN5_FW_HDR_SIZE);
 	if (len)
-		memcpy(skb_put(skb, len), data, len);
+		skb_put_data(skb, data, len);
 
 	*msg = skb;
 
diff --git a/drivers/nfc/s3fwrn5/i2c.c b/drivers/nfc/s3fwrn5/i2c.c
index 3ed0adf6479b..3f09d7fd2285 100644
--- a/drivers/nfc/s3fwrn5/i2c.c
+++ b/drivers/nfc/s3fwrn5/i2c.c
@@ -157,7 +157,7 @@ static int s3fwrn5_i2c_read(struct s3fwrn5_i2c_phy *phy)
 	if (!skb)
 		return -ENOMEM;
 
-	memcpy(skb_put(skb, hdr_size), hdr, hdr_size);
+	skb_put_data(skb, hdr, hdr_size);
 
 	if (data_len == 0)
 		goto out;
diff --git a/drivers/nfc/st21nfca/dep.c b/drivers/nfc/st21nfca/dep.c
index 798a32bbac5d..ada7b114b6c1 100644
--- a/drivers/nfc/st21nfca/dep.c
+++ b/drivers/nfc/st21nfca/dep.c
@@ -564,7 +564,7 @@ int st21nfca_im_send_atr_req(struct nfc_hci_dev *hdev, u8 *gb, size_t gb_len)
 	atr_req->ppi = ST21NFCA_LR_BITS_PAYLOAD_SIZE_254B;
 	if (gb_len) {
 		atr_req->ppi |= ST21NFCA_GB_BIT;
-		memcpy(skb_put(skb, gb_len), gb, gb_len);
+		skb_put_data(skb, gb, gb_len);
 	}
 	atr_req->length = sizeof(struct st21nfca_atr_req) + hdev->gb_len;
 
diff --git a/drivers/nfc/st21nfca/i2c.c b/drivers/nfc/st21nfca/i2c.c
index 02a920ca07c8..94d0b913b627 100644
--- a/drivers/nfc/st21nfca/i2c.c
+++ b/drivers/nfc/st21nfca/i2c.c
@@ -407,7 +407,7 @@ static int st21nfca_hci_i2c_read(struct st21nfca_i2c_phy *phy,
 			phy->current_read_len = 0;
 		}
 
-		memcpy(skb_put(skb, len), buf, len);
+		skb_put_data(skb, buf, len);
 
 		if (skb->data[skb->len - 1] == ST21NFCA_SOF_EOF) {
 			phy->current_read_len = 0;
diff --git a/drivers/rpmsg/rpmsg_char.c b/drivers/rpmsg/rpmsg_char.c
index 0ca2ccc09ca6..2576284f99a7 100644
--- a/drivers/rpmsg/rpmsg_char.c
+++ b/drivers/rpmsg/rpmsg_char.c
@@ -116,7 +116,7 @@ static int rpmsg_ept_cb(struct rpmsg_device *rpdev, void *buf, int len,
 	if (!skb)
 		return -ENOMEM;
 
-	memcpy(skb_put(skb, len), buf, len);
+	skb_put_data(skb, buf, len);
 
 	spin_lock(&eptdev->queue_lock);
 	skb_queue_tail(&eptdev->queue, skb);
diff --git a/drivers/s390/net/ctcm_fsms.c b/drivers/s390/net/ctcm_fsms.c
index 730d9619400e..e9847ce3860d 100644
--- a/drivers/s390/net/ctcm_fsms.c
+++ b/drivers/s390/net/ctcm_fsms.c
@@ -1279,7 +1279,7 @@ static void ctcmpc_chx_txdone(fsm_instance *fi, int event, void *arg)
 		       __func__, data_space);
 
 	while ((skb = skb_dequeue(&ch->collect_queue))) {
-		memcpy(skb_put(ch->trans_skb, skb->len), skb->data, skb->len);
+		skb_put_data(ch->trans_skb, skb->data, skb->len);
 		p_header = (struct pdu *)
 			(skb_tail_pointer(ch->trans_skb) - skb->len);
 		p_header->pdu_flag = 0x00;
@@ -1431,13 +1431,12 @@ static void ctcmpc_chx_rx(fsm_instance *fi, int event, void *arg)
 			break;
 		case MPCG_STATE_FLOWC:
 		case MPCG_STATE_READY:
-			memcpy(skb_put(new_skb, block_len),
-					       skb->data, block_len);
+			skb_put_data(new_skb, skb->data, block_len);
 			skb_queue_tail(&ch->io_queue, new_skb);
 			tasklet_schedule(&ch->ch_tasklet);
 			break;
 		default:
-			memcpy(skb_put(new_skb, len), skb->data, len);
+			skb_put_data(new_skb, skb->data, len);
 			skb_queue_tail(&ch->io_queue, new_skb);
 			tasklet_hi_schedule(&ch->ch_tasklet);
 			break;
diff --git a/drivers/s390/net/ctcm_main.c b/drivers/s390/net/ctcm_main.c
index 198842ce6876..99121352c57b 100644
--- a/drivers/s390/net/ctcm_main.c
+++ b/drivers/s390/net/ctcm_main.c
@@ -522,7 +522,7 @@ static int ctcm_transmit_skb(struct channel *ch, struct sk_buff *skb)
 			ctcm_clear_busy(ch->netdev);
 			return -ENOMEM;
 		} else {
-			memcpy(skb_put(nskb, skb->len), skb->data, skb->len);
+			skb_put_data(nskb, skb->data, skb->len);
 			atomic_inc(&nskb->users);
 			atomic_dec(&skb->users);
 			dev_kfree_skb_irq(skb);
@@ -638,7 +638,7 @@ static void ctcmpc_send_sweep_req(struct channel *rch)
 	header->th.th_seq_num	= 0x00;
 	header->sw.th_last_seq	= ch->th_seq_num;
 
-	memcpy(skb_put(sweep_skb, TH_SWEEP_LENGTH), header, TH_SWEEP_LENGTH);
+	skb_put_data(sweep_skb, header, TH_SWEEP_LENGTH);
 
 	kfree(header);
 
@@ -728,7 +728,7 @@ static int ctcmpc_transmit_skb(struct channel *ch, struct sk_buff *skb)
 		if (!nskb) {
 			goto nomem_exit;
 		} else {
-			memcpy(skb_put(nskb, skb->len), skb->data, skb->len);
+			skb_put_data(nskb, skb->data, skb->len);
 			atomic_inc(&nskb->users);
 			atomic_dec(&skb->users);
 			dev_kfree_skb_irq(skb);
@@ -809,7 +809,7 @@ static int ctcmpc_transmit_skb(struct channel *ch, struct sk_buff *skb)
 		skb_reset_tail_pointer(ch->trans_skb);
 		ch->trans_skb->len = 0;
 		ch->ccw[1].count = skb->len;
-		memcpy(skb_put(ch->trans_skb, skb->len), skb->data, skb->len);
+		skb_put_data(ch->trans_skb, skb->data, skb->len);
 		atomic_dec(&skb->users);
 		dev_kfree_skb_irq(skb);
 		ccw_idx = 0;
@@ -960,7 +960,7 @@ static int ctcmpc_tx(struct sk_buff *skb, struct net_device *dev)
 		}
 		newskb->protocol = skb->protocol;
 		skb_reserve(newskb, TH_HEADER_LENGTH + PDU_HEADER_LENGTH);
-		memcpy(skb_put(newskb, skb->len), skb->data, skb->len);
+		skb_put_data(newskb, skb->data, skb->len);
 		dev_kfree_skb_any(skb);
 		skb = newskb;
 	}
diff --git a/drivers/s390/net/ctcm_mpc.c b/drivers/s390/net/ctcm_mpc.c
index c103fc7efe9f..f8be39634f03 100644
--- a/drivers/s390/net/ctcm_mpc.c
+++ b/drivers/s390/net/ctcm_mpc.c
@@ -667,7 +667,7 @@ static void ctcmpc_send_sweep_resp(struct channel *rch)
 	header->th.th_seq_num	= 0x00;
 	header->sw.th_last_seq	= ch->th_seq_num;
 
-	memcpy(skb_put(sweep_skb, TH_SWEEP_LENGTH), header, TH_SWEEP_LENGTH);
+	skb_put_data(sweep_skb, header, TH_SWEEP_LENGTH);
 
 	kfree(header);
 
@@ -974,9 +974,8 @@ void mpc_channel_action(struct channel *ch, int direction, int action)
 		skb_reset_tail_pointer(ch->xid_skb);
 		ch->xid_skb->len = 0;
 
-		memcpy(skb_put(ch->xid_skb, grp->xid_skb->len),
-				grp->xid_skb->data,
-				grp->xid_skb->len);
+		skb_put_data(ch->xid_skb, grp->xid_skb->data,
+			     grp->xid_skb->len);
 
 		ch->xid->xid2_dlc_type =
 			((CHANNEL_DIRECTION(ch->flags) == CTCM_READ)
@@ -1149,7 +1148,7 @@ static void ctcmpc_unpack_skb(struct channel *ch, struct sk_buff *pskb)
 				fsm_event(grp->fsm, MPCG_EVENT_INOP, dev);
 						goto done;
 			}
-			memcpy(skb_put(skb, new_len), pskb->data, new_len);
+			skb_put_data(skb, pskb->data, new_len);
 
 			skb_reset_mac_header(skb);
 			skb->dev = pskb->dev;
@@ -1297,16 +1296,15 @@ struct mpc_group *ctcmpc_init_mpc_group(struct ctcm_priv *priv)
 	/*  base xid for all channels in group  */
 	grp->xid_skb_data = grp->xid_skb->data;
 	grp->xid_th = (struct th_header *)grp->xid_skb->data;
-	memcpy(skb_put(grp->xid_skb, TH_HEADER_LENGTH),
-			&thnorm, TH_HEADER_LENGTH);
+	skb_put_data(grp->xid_skb, &thnorm, TH_HEADER_LENGTH);
 
 	grp->xid = (struct xid2 *)skb_tail_pointer(grp->xid_skb);
-	memcpy(skb_put(grp->xid_skb, XID2_LENGTH), &init_xid, XID2_LENGTH);
+	skb_put_data(grp->xid_skb, &init_xid, XID2_LENGTH);
 	grp->xid->xid2_adj_id = jiffies | 0xfff00000;
 	grp->xid->xid2_sender_id = jiffies;
 
 	grp->xid_id = skb_tail_pointer(grp->xid_skb);
-	memcpy(skb_put(grp->xid_skb, 4), "VTAM", 4);
+	skb_put_data(grp->xid_skb, "VTAM", 4);
 
 	grp->rcvd_xid_skb =
 		__dev_alloc_skb(MPC_BUFSIZE_DEFAULT, GFP_ATOMIC|GFP_DMA);
@@ -1318,8 +1316,7 @@ struct mpc_group *ctcmpc_init_mpc_group(struct ctcm_priv *priv)
 	}
 	grp->rcvd_xid_data = grp->rcvd_xid_skb->data;
 	grp->rcvd_xid_th = (struct th_header *)grp->rcvd_xid_skb->data;
-	memcpy(skb_put(grp->rcvd_xid_skb, TH_HEADER_LENGTH),
-			&thnorm, TH_HEADER_LENGTH);
+	skb_put_data(grp->rcvd_xid_skb, &thnorm, TH_HEADER_LENGTH);
 	grp->saved_xid2 = NULL;
 	priv->xid = grp->xid;
 	priv->mpcg = grp;
@@ -1410,8 +1407,7 @@ static void mpc_action_go_inop(fsm_instance *fi, int event, void *arg)
 	skb_reset_tail_pointer(grp->rcvd_xid_skb);
 	grp->rcvd_xid_skb->len = 0;
 	grp->rcvd_xid_th = (struct th_header *)grp->rcvd_xid_skb->data;
-	memcpy(skb_put(grp->rcvd_xid_skb, TH_HEADER_LENGTH), &thnorm,
-	       TH_HEADER_LENGTH);
+	skb_put_data(grp->rcvd_xid_skb, &thnorm, TH_HEADER_LENGTH);
 
 	if (grp->send_qllc_disc == 1) {
 		grp->send_qllc_disc = 0;
@@ -1590,8 +1586,7 @@ static int mpc_validate_xid(struct mpcg_info *mpcginfo)
 		grp->saved_xid2 =
 			(struct xid2 *)skb_tail_pointer(grp->rcvd_xid_skb);
 
-		memcpy(skb_put(grp->rcvd_xid_skb,
-					XID2_LENGTH), xid, XID2_LENGTH);
+		skb_put_data(grp->rcvd_xid_skb, xid, XID2_LENGTH);
 		grp->rcvd_xid_skb->data = grp->rcvd_xid_data;
 
 		skb_reset_tail_pointer(grp->rcvd_xid_skb);
@@ -1908,17 +1903,15 @@ static void mpc_action_doxid7(fsm_instance *fsm, int event, void *arg)
 				if (fsm_getstate(ch->fsm) == CH_XID7_PENDING1) {
 					fsm_newstate(ch->fsm, CH_XID7_PENDING2);
 					ch->ccw[8].cmd_code = CCW_CMD_SENSE_CMD;
-					memcpy(skb_put(ch->xid_skb,
-							TH_HEADER_LENGTH),
-					       &thdummy, TH_HEADER_LENGTH);
+					skb_put_data(ch->xid_skb, &thdummy,
+						     TH_HEADER_LENGTH);
 					send = 1;
 				}
 			} else if (fsm_getstate(ch->fsm) < CH_XID7_PENDING2) {
 					fsm_newstate(ch->fsm, CH_XID7_PENDING2);
 					ch->ccw[8].cmd_code = CCW_CMD_WRITE_CTL;
-					memcpy(skb_put(ch->xid_skb,
-						       TH_HEADER_LENGTH),
-					       &thnorm, TH_HEADER_LENGTH);
+					skb_put_data(ch->xid_skb, &thnorm,
+						     TH_HEADER_LENGTH);
 					send = 1;
 			}
 		} else {
@@ -1926,17 +1919,16 @@ static void mpc_action_doxid7(fsm_instance *fsm, int event, void *arg)
 			if (grp->roll == YSIDE) {
 				if (fsm_getstate(ch->fsm) < CH_XID7_PENDING4) {
 					fsm_newstate(ch->fsm, CH_XID7_PENDING4);
-					memcpy(skb_put(ch->xid_skb,
-						       TH_HEADER_LENGTH),
-					       &thnorm, TH_HEADER_LENGTH);
+					skb_put_data(ch->xid_skb, &thnorm,
+						     TH_HEADER_LENGTH);
 					ch->ccw[8].cmd_code = CCW_CMD_WRITE_CTL;
 					send = 1;
 				}
 			} else if (fsm_getstate(ch->fsm) == CH_XID7_PENDING3) {
 				fsm_newstate(ch->fsm, CH_XID7_PENDING4);
 				ch->ccw[8].cmd_code = CCW_CMD_SENSE_CMD;
-				memcpy(skb_put(ch->xid_skb, TH_HEADER_LENGTH),
-						&thdummy, TH_HEADER_LENGTH);
+				skb_put_data(ch->xid_skb, &thdummy,
+					     TH_HEADER_LENGTH);
 				send = 1;
 			}
 		}
@@ -2122,7 +2114,7 @@ static int mpc_send_qllc_discontact(struct net_device *dev)
 			return -ENOMEM;
 		}
 
-		memcpy(skb_put(skb, new_len), qllcptr, new_len);
+		skb_put_data(skb, qllcptr, new_len);
 		kfree(qllcptr);
 
 		if (skb_headroom(skb) < 4) {
diff --git a/drivers/s390/net/lcs.c b/drivers/s390/net/lcs.c
index 211b31d9f157..337bacb43d68 100644
--- a/drivers/s390/net/lcs.c
+++ b/drivers/s390/net/lcs.c
@@ -1796,7 +1796,7 @@ lcs_get_skb(struct lcs_card *card, char *skb_data, unsigned int skb_len)
 		card->stats.rx_dropped++;
 		return;
 	}
-	memcpy(skb_put(skb, skb_len), skb_data, skb_len);
+	skb_put_data(skb, skb_data, skb_len);
 	skb->protocol =	card->lan_type_trans(skb, card->dev);
 	card->stats.rx_bytes += skb_len;
 	card->stats.rx_packets++;
diff --git a/drivers/s390/net/netiucv.c b/drivers/s390/net/netiucv.c
index fa732bd86729..7db427c0a6a4 100644
--- a/drivers/s390/net/netiucv.c
+++ b/drivers/s390/net/netiucv.c
@@ -759,8 +759,7 @@ static void conn_action_txdone(fsm_instance *fi, int event, void *arg)
 	spin_lock_irqsave(&conn->collect_lock, saveflags);
 	while ((skb = skb_dequeue(&conn->collect_queue))) {
 		header.next = conn->tx_buff->len + skb->len + NETIUCV_HDRLEN;
-		memcpy(skb_put(conn->tx_buff, NETIUCV_HDRLEN), &header,
-		       NETIUCV_HDRLEN);
+		skb_put_data(conn->tx_buff, &header, NETIUCV_HDRLEN);
 		skb_copy_from_linear_data(skb,
 					  skb_put(conn->tx_buff, skb->len),
 					  skb->len);
@@ -780,7 +779,7 @@ static void conn_action_txdone(fsm_instance *fi, int event, void *arg)
 	}
 
 	header.next = 0;
-	memcpy(skb_put(conn->tx_buff, NETIUCV_HDRLEN), &header, NETIUCV_HDRLEN);
+	skb_put_data(conn->tx_buff, &header, NETIUCV_HDRLEN);
 	conn->prof.send_stamp = jiffies;
 	txmsg.class = 0;
 	txmsg.tag = 0;
@@ -1201,8 +1200,7 @@ static int netiucv_transmit_skb(struct iucv_connection *conn,
 				return rc;
 			} else {
 				skb_reserve(nskb, NETIUCV_HDRLEN);
-				memcpy(skb_put(nskb, skb->len),
-				       skb->data, skb->len);
+				skb_put_data(nskb, skb->data, skb->len);
 			}
 			copied = 1;
 		}
@@ -1212,7 +1210,7 @@ static int netiucv_transmit_skb(struct iucv_connection *conn,
 		header.next = nskb->len + NETIUCV_HDRLEN;
 		memcpy(skb_push(nskb, NETIUCV_HDRLEN), &header, NETIUCV_HDRLEN);
 		header.next = 0;
-		memcpy(skb_put(nskb, NETIUCV_HDRLEN), &header,  NETIUCV_HDRLEN);
+		skb_put_data(nskb, &header, NETIUCV_HDRLEN);
 
 		fsm_newstate(conn->fsm, CONN_STATE_TX);
 		conn->prof.send_stamp = jiffies;
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 1fb92e870040..08338f27c82c 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -5147,12 +5147,11 @@ static inline int qeth_create_skb_frag(struct qeth_qdio_buffer *qethbuffer,
 
 		skb_reserve(*pskb, ETH_HLEN);
 		if (data_len <= QETH_RX_PULL_LEN) {
-			memcpy(skb_put(*pskb, data_len), element->addr + offset,
-				data_len);
+			skb_put_data(*pskb, element->addr + offset, data_len);
 		} else {
 			get_page(page);
-			memcpy(skb_put(*pskb, QETH_RX_PULL_LEN),
-			       element->addr + offset, QETH_RX_PULL_LEN);
+			skb_put_data(*pskb, element->addr + offset,
+				     QETH_RX_PULL_LEN);
 			skb_fill_page_desc(*pskb, *pfrag, page,
 				offset + QETH_RX_PULL_LEN,
 				data_len - QETH_RX_PULL_LEN);
@@ -5248,8 +5247,7 @@ struct sk_buff *qeth_core_get_next_skb(struct qeth_card *card,
 				    &skb, offset, &frag, data_len))
 					goto no_mem;
 			} else {
-				memcpy(skb_put(skb, data_len), data_ptr,
-					data_len);
+				skb_put_data(skb, data_ptr, data_len);
 			}
 		}
 		skb_len -= data_len;
diff --git a/drivers/staging/gdm724x/gdm_lte.c b/drivers/staging/gdm724x/gdm_lte.c
index cf809987f79f..9ab6ce231f11 100644
--- a/drivers/staging/gdm724x/gdm_lte.c
+++ b/drivers/staging/gdm724x/gdm_lte.c
@@ -161,12 +161,9 @@ static int gdm_lte_emulate_arp(struct sk_buff *skb_in, u32 nic_type)
 		return -ENOMEM;
 	skb_reserve(skb_out, NET_IP_ALIGN);
 
-	memcpy(skb_put(skb_out, mac_header_len), mac_header_data,
-	       mac_header_len);
-	memcpy(skb_put(skb_out, sizeof(struct arphdr)), arp_out,
-	       sizeof(struct arphdr));
-	memcpy(skb_put(skb_out, sizeof(struct arpdata)), arp_data_out,
-	       sizeof(struct arpdata));
+	skb_put_data(skb_out, mac_header_data, mac_header_len);
+	skb_put_data(skb_out, arp_out, sizeof(struct arphdr));
+	skb_put_data(skb_out, arp_data_out, sizeof(struct arpdata));
 
 	skb_out->protocol = ((struct ethhdr *)mac_header_data)->h_proto;
 	skb_out->dev = skb_in->dev;
@@ -322,14 +319,10 @@ static int gdm_lte_emulate_ndp(struct sk_buff *skb_in, u32 nic_type)
 		return -ENOMEM;
 	skb_reserve(skb_out, NET_IP_ALIGN);
 
-	memcpy(skb_put(skb_out, mac_header_len), mac_header_data,
-	       mac_header_len);
-	memcpy(skb_put(skb_out, sizeof(struct ipv6hdr)), &ipv6_out,
-	       sizeof(struct ipv6hdr));
-	memcpy(skb_put(skb_out, sizeof(struct icmp6hdr)), &icmp6_out,
-	       sizeof(struct icmp6hdr));
-	memcpy(skb_put(skb_out, sizeof(struct neighbour_advertisement)), &na,
-	       sizeof(struct neighbour_advertisement));
+	skb_put_data(skb_out, mac_header_data, mac_header_len);
+	skb_put_data(skb_out, &ipv6_out, sizeof(struct ipv6hdr));
+	skb_put_data(skb_out, &icmp6_out, sizeof(struct icmp6hdr));
+	skb_put_data(skb_out, &na, sizeof(struct neighbour_advertisement));
 
 	skb_out->protocol = ((struct ethhdr *)mac_header_data)->h_proto;
 	skb_out->dev = skb_in->dev;
@@ -669,8 +662,8 @@ static void gdm_lte_netif_rx(struct net_device *dev, char *buf,
 		return;
 	skb_reserve(skb, NET_IP_ALIGN);
 
-	memcpy(skb_put(skb, mac_header_len), mac_header_data, mac_header_len);
-	memcpy(skb_put(skb, len), buf, len);
+	skb_put_data(skb, mac_header_data, mac_header_len);
+	skb_put_data(skb, buf, len);
 
 	skb->protocol = ((struct ethhdr *)mac_header_data)->h_proto;
 	skb->dev = dev;
diff --git a/drivers/staging/ks7010/ks_hostif.c b/drivers/staging/ks7010/ks_hostif.c
index 49e95426ac30..da801d3e0585 100644
--- a/drivers/staging/ks7010/ks_hostif.c
+++ b/drivers/staging/ks7010/ks_hostif.c
@@ -466,12 +466,12 @@ void hostif_data_indication(struct ks_wlan_private *priv)
 		DPRINTK(4, "SNAP, rx_ind_size = %d\n", rx_ind_size);
 
 		size = ETH_ALEN * 2;
-		memcpy(skb_put(skb, size), priv->rxp, size);
+		skb_put_data(skb, priv->rxp, size);
 
 		/* (SNAP+UI..) skip */
 
 		size = rx_ind_size - (ETH_ALEN * 2);
-		memcpy(skb_put(skb, size), &eth_hdr->h_proto, size);
+		skb_put_data(skb, &eth_hdr->h_proto, size);
 
 		aa1x_hdr = (struct ieee802_1x_hdr *)(priv->rxp + ETHER_HDR_SIZE);
 		break;
@@ -484,14 +484,13 @@ void hostif_data_indication(struct ks_wlan_private *priv)
 		}
 		DPRINTK(3, "NETBEUI/NetBIOS rx_ind_size=%d\n", rx_ind_size);
 
-		memcpy(skb_put(skb, 12), priv->rxp, 12);	/* 8802/FDDI MAC copy */
+		skb_put_data(skb, priv->rxp, 12);	/* 8802/FDDI MAC copy */
 
 		temp[0] = (((rx_ind_size - 12) >> 8) & 0xff);	/* NETBEUI size add */
 		temp[1] = ((rx_ind_size - 12) & 0xff);
-		memcpy(skb_put(skb, 2), temp, 2);
+		skb_put_data(skb, temp, 2);
 
-		memcpy(skb_put(skb, rx_ind_size - 14), priv->rxp + 12,
-		       rx_ind_size - 14);	/* copy after Type */
+		skb_put_data(skb, priv->rxp + 12, rx_ind_size - 14);	/* copy after Type */
 
 		aa1x_hdr = (struct ieee802_1x_hdr *)(priv->rxp + 14);
 		break;
diff --git a/drivers/staging/most/aim-network/networking.c b/drivers/staging/most/aim-network/networking.c
index ce1764cba5f0..995674f25172 100644
--- a/drivers/staging/most/aim-network/networking.c
+++ b/drivers/staging/most/aim-network/networking.c
@@ -486,11 +486,11 @@ static int aim_rx_data(struct mbo *mbo)
 		ether_addr_copy(skb_put(skb, ETH_ALEN), dev->dev_addr);
 
 		/* src */
-		memcpy(skb_put(skb, 4), &zero, 4);
-		memcpy(skb_put(skb, 2), buf + 5, 2);
+		skb_put_data(skb, &zero, 4);
+		skb_put_data(skb, buf + 5, 2);
 
 		/* eth type */
-		memcpy(skb_put(skb, 2), buf + 10, 2);
+		skb_put_data(skb, buf + 10, 2);
 
 		buf += MDP_HDR_LEN;
 		len -= MDP_HDR_LEN;
@@ -499,7 +499,7 @@ static int aim_rx_data(struct mbo *mbo)
 		len -= MEP_HDR_LEN;
 	}
 
-	memcpy(skb_put(skb, len), buf, len);
+	skb_put_data(skb, buf, len);
 	skb->protocol = eth_type_trans(skb, dev);
 	skb_len = skb->len;
 	if (netif_rx(skb) == NET_RX_SUCCESS) {
diff --git a/drivers/staging/octeon/ethernet-rx.c b/drivers/staging/octeon/ethernet-rx.c
index 65a285631994..72baedefa0f1 100644
--- a/drivers/staging/octeon/ethernet-rx.c
+++ b/drivers/staging/octeon/ethernet-rx.c
@@ -287,8 +287,7 @@ static int cvm_oct_poll(struct oct_rx_group *rx_group, int budget)
 					else
 						ptr += 6;
 				}
-				memcpy(skb_put(skb, work->word1.len), ptr,
-				       work->word1.len);
+				skb_put_data(skb, ptr, work->word1.len);
 				/* No packet buffers to free */
 			} else {
 				int segments = work->word2.s.bufs;
@@ -323,10 +322,9 @@ static int cvm_oct_poll(struct oct_rx_group *rx_group, int budget)
 					if (segment_size > len)
 						segment_size = len;
 					/* Copy the data into the packet */
-					memcpy(skb_put(skb, segment_size),
-					       cvmx_phys_to_ptr(
-					       segment_ptr.s.addr),
-					       segment_size);
+					skb_put_data(skb,
+						     cvmx_phys_to_ptr(segment_ptr.s.addr),
+						     segment_size);
 					len -= segment_size;
 					segment_ptr = next_ptr;
 				}
diff --git a/drivers/staging/rtl8188eu/core/rtw_recv.c b/drivers/staging/rtl8188eu/core/rtw_recv.c
index c6c4404e717b..14173cf6e1e7 100644
--- a/drivers/staging/rtl8188eu/core/rtw_recv.c
+++ b/drivers/staging/rtl8188eu/core/rtw_recv.c
@@ -1544,8 +1544,8 @@ static int amsdu_to_msdu(struct adapter *padapter, struct recv_frame *prframe)
 		sub_skb = dev_alloc_skb(nSubframe_Length + 12);
 		if (sub_skb) {
 			skb_reserve(sub_skb, 12);
-			data_ptr = (u8 *)skb_put(sub_skb, nSubframe_Length);
-			memcpy(data_ptr, pdata, nSubframe_Length);
+			data_ptr = skb_put_data(sub_skb, pdata,
+						nSubframe_Length);
 		} else {
 			sub_skb = skb_clone(prframe->pkt, GFP_ATOMIC);
 			if (sub_skb) {
diff --git a/drivers/staging/rtl8188eu/os_dep/mon.c b/drivers/staging/rtl8188eu/os_dep/mon.c
index 859d0d6051cd..225c23fc69dc 100644
--- a/drivers/staging/rtl8188eu/os_dep/mon.c
+++ b/drivers/staging/rtl8188eu/os_dep/mon.c
@@ -53,7 +53,7 @@ static void mon_recv_decrypted(struct net_device *dev, const u8 *data,
 	skb = netdev_alloc_skb(dev, data_len);
 	if (!skb)
 		return;
-	memcpy(skb_put(skb, data_len), data, data_len);
+	skb_put_data(skb, data, data_len);
 
 	/*
 	 * Frame data is not encrypted. Strip off protection so
diff --git a/drivers/staging/rtl8192e/rtllib_rx.c b/drivers/staging/rtl8192e/rtllib_rx.c
index 43a77745e6fb..bae98ca0a9b6 100644
--- a/drivers/staging/rtl8192e/rtllib_rx.c
+++ b/drivers/staging/rtl8192e/rtllib_rx.c
@@ -817,8 +817,7 @@ static u8 parse_subframe(struct rtllib_device *ieee, struct sk_buff *skb,
 		if (!sub_skb)
 			return 0;
 		skb_reserve(sub_skb, 12);
-		data_ptr = (u8 *)skb_put(sub_skb, skb->len);
-		memcpy(data_ptr, skb->data, skb->len);
+		data_ptr = skb_put_data(sub_skb, skb->data, skb->len);
 		sub_skb->dev = ieee->dev;
 
 		rxb->subframes[0] = sub_skb;
@@ -870,8 +869,7 @@ static u8 parse_subframe(struct rtllib_device *ieee, struct sk_buff *skb,
 		if (!sub_skb)
 			return 0;
 		skb_reserve(sub_skb, 12);
-		data_ptr = (u8 *)skb_put(sub_skb, nSubframe_Length);
-		memcpy(data_ptr, skb->data, nSubframe_Length);
+		data_ptr = skb_put_data(sub_skb, skb->data, nSubframe_Length);
 
 		sub_skb->dev = ieee->dev;
 		rxb->subframes[rxb->nr_subframes++] = sub_skb;
@@ -1141,13 +1139,12 @@ static int rtllib_rx_decrypt(struct rtllib_device *ieee, struct sk_buff *skb,
 			/* copy first fragment (including full headers) into
 			 * beginning of the fragment cache skb
 			 */
-			memcpy(skb_put(frag_skb, flen), skb->data, flen);
+			skb_put_data(frag_skb, skb->data, flen);
 		} else {
 			/* append frame payload to the end of the fragment
 			 * cache skb
 			 */
-			memcpy(skb_put(frag_skb, flen), skb->data + hdrlen,
-			       flen);
+			skb_put_data(frag_skb, skb->data + hdrlen, flen);
 		}
 		dev_kfree_skb_any(skb);
 		skb = NULL;
diff --git a/drivers/staging/rtl8192e/rtllib_softmac.c b/drivers/staging/rtl8192e/rtllib_softmac.c
index eeda17d6409b..60d07d0bb4eb 100644
--- a/drivers/staging/rtl8192e/rtllib_softmac.c
+++ b/drivers/staging/rtl8192e/rtllib_softmac.c
@@ -1272,8 +1272,7 @@ rtllib_association_req(struct rtllib_network *beacon,
 	hdr->info_element[0].id = MFIE_TYPE_SSID;
 
 	hdr->info_element[0].len = beacon->ssid_len;
-	tag = skb_put(skb, beacon->ssid_len);
-	memcpy(tag, beacon->ssid, beacon->ssid_len);
+	tag = skb_put_data(skb, beacon->ssid, beacon->ssid_len);
 
 	tag = skb_put(skb, rate_len);
 
@@ -1349,8 +1348,7 @@ rtllib_association_req(struct rtllib_network *beacon,
 	}
 
 	if (wpa_ie_len) {
-		tag = skb_put(skb, ieee->wpa_ie_len);
-		memcpy(tag, ieee->wpa_ie, ieee->wpa_ie_len);
+		tag = skb_put_data(skb, ieee->wpa_ie, ieee->wpa_ie_len);
 
 		if (PMKCacheIdx >= 0) {
 			tag = skb_put(skb, 18);
@@ -1366,8 +1364,7 @@ rtllib_association_req(struct rtllib_network *beacon,
 	}
 
 	if (wps_ie_len && ieee->wps_ie) {
-		tag = skb_put(skb, wps_ie_len);
-		memcpy(tag, ieee->wps_ie, wps_ie_len);
+		tag = skb_put_data(skb, ieee->wps_ie, wps_ie_len);
 	}
 
 	tag = skb_put(skb, turbo_info_len);
diff --git a/drivers/staging/rtl8192e/rtllib_tx.c b/drivers/staging/rtl8192e/rtllib_tx.c
index 78a3ad5b231f..fc88d47dea43 100644
--- a/drivers/staging/rtl8192e/rtllib_tx.c
+++ b/drivers/staging/rtl8192e/rtllib_tx.c
@@ -624,8 +624,7 @@ static int rtllib_xmit_inter(struct sk_buff *skb, struct net_device *dev)
 
 			txb->encrypted = 0;
 			txb->payload_size = cpu_to_le16(skb->len);
-			memcpy(skb_put(txb->fragments[0], skb->len), skb->data,
-			       skb->len);
+			skb_put_data(txb->fragments[0], skb->data, skb->len);
 
 			goto success;
 		}
@@ -818,9 +817,7 @@ static int rtllib_xmit_inter(struct sk_buff *skb, struct net_device *dev)
 			} else {
 				tcb_desc->bHwSec = 0;
 			}
-			frag_hdr = (struct rtllib_hdr_3addrqos *)
-				   skb_put(skb_frag, hdr_len);
-			memcpy(frag_hdr, &header, hdr_len);
+			frag_hdr = skb_put_data(skb_frag, &header, hdr_len);
 
 			/* If this is not the last fragment, then add the
 			 * MOREFRAGS bit to the frame control
@@ -852,7 +849,7 @@ static int rtllib_xmit_inter(struct sk_buff *skb, struct net_device *dev)
 				bytes -= SNAP_SIZE + sizeof(u16);
 			}
 
-			memcpy(skb_put(skb_frag, bytes), skb->data, bytes);
+			skb_put_data(skb_frag, skb->data, bytes);
 
 			/* Advance the SKB... */
 			skb_pull(skb, bytes);
@@ -895,8 +892,7 @@ static int rtllib_xmit_inter(struct sk_buff *skb, struct net_device *dev)
 
 		txb->encrypted = 0;
 		txb->payload_size = cpu_to_le16(skb->len);
-		memcpy(skb_put(txb->fragments[0], skb->len), skb->data,
-		       skb->len);
+		skb_put_data(txb->fragments[0], skb->data, skb->len);
 	}
 
  success:
diff --git a/drivers/staging/rtl8192u/ieee80211/ieee80211_rx.c b/drivers/staging/rtl8192u/ieee80211/ieee80211_rx.c
index 7a31510f0524..c0e2f711cb4e 100644
--- a/drivers/staging/rtl8192u/ieee80211/ieee80211_rx.c
+++ b/drivers/staging/rtl8192u/ieee80211/ieee80211_rx.c
@@ -848,8 +848,8 @@ static u8 parse_subframe(struct sk_buff *skb,
 			if (!sub_skb)
 				return 0;
 			skb_reserve(sub_skb, 12);
-			data_ptr = (u8 *)skb_put(sub_skb, nSubframe_Length);
-			memcpy(data_ptr, skb->data, nSubframe_Length);
+			data_ptr = skb_put_data(sub_skb, skb->data,
+						nSubframe_Length);
 #endif
 			rxb->subframes[rxb->nr_subframes++] = sub_skb;
 			if (rxb->nr_subframes >= MAX_SUBFRAME_COUNT) {
@@ -1180,12 +1180,11 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
 		if (frag == 0) {
 			/* copy first fragment (including full headers) into
 			 * beginning of the fragment cache skb */
-			memcpy(skb_put(frag_skb, flen), skb->data, flen);
+			skb_put_data(frag_skb, skb->data, flen);
 		} else {
 			/* append frame payload to the end of the fragment
 			 * cache skb */
-			memcpy(skb_put(frag_skb, flen), skb->data + hdrlen,
-			       flen);
+			skb_put_data(frag_skb, skb->data + hdrlen, flen);
 		}
 		dev_kfree_skb_any(skb);
 		skb = NULL;
diff --git a/drivers/staging/rtl8192u/ieee80211/ieee80211_softmac.c b/drivers/staging/rtl8192u/ieee80211/ieee80211_softmac.c
index 14aea26804f4..903a1d0269df 100644
--- a/drivers/staging/rtl8192u/ieee80211/ieee80211_softmac.c
+++ b/drivers/staging/rtl8192u/ieee80211/ieee80211_softmac.c
@@ -1115,8 +1115,7 @@ ieee80211_association_req(struct ieee80211_network *beacon,
 	hdr->info_element[0].id = MFIE_TYPE_SSID;
 
 	hdr->info_element[0].len = beacon->ssid_len;
-	tag = skb_put(skb, beacon->ssid_len);
-	memcpy(tag, beacon->ssid, beacon->ssid_len);
+	tag = skb_put_data(skb, beacon->ssid, beacon->ssid_len);
 
 	tag = skb_put(skb, rate_len);
 
diff --git a/drivers/staging/rtl8192u/ieee80211/ieee80211_tx.c b/drivers/staging/rtl8192u/ieee80211/ieee80211_tx.c
index bdb96a45a9eb..f58971a4a2e3 100644
--- a/drivers/staging/rtl8192u/ieee80211/ieee80211_tx.c
+++ b/drivers/staging/rtl8192u/ieee80211/ieee80211_tx.c
@@ -794,8 +794,7 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev)
 			{
 				tcb_desc->bHwSec = 0;
 			}
-			frag_hdr = (struct rtl_80211_hdr_3addrqos *)skb_put(skb_frag, hdr_len);
-			memcpy(frag_hdr, &header, hdr_len);
+			frag_hdr = skb_put_data(skb_frag, &header, hdr_len);
 
 			/* If this is not the last fragment, then add the MOREFRAGS
 			 * bit to the frame control
@@ -826,7 +825,7 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev)
 				bytes -= SNAP_SIZE + sizeof(u16);
 			}
 
-			memcpy(skb_put(skb_frag, bytes), skb->data, bytes);
+			skb_put_data(skb_frag, skb->data, bytes);
 
 			/* Advance the SKB... */
 			skb_pull(skb, bytes);
@@ -869,7 +868,7 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev)
 
 		txb->encrypted = 0;
 		txb->payload_size = __cpu_to_le16(skb->len);
-		memcpy(skb_put(txb->fragments[0],skb->len), skb->data, skb->len);
+		skb_put_data(txb->fragments[0], skb->data, skb->len);
 	}
 
  success:
diff --git a/drivers/staging/rtl8192u/r819xU_cmdpkt.c b/drivers/staging/rtl8192u/r819xU_cmdpkt.c
index bb6d8bd6c7ac..c3cf01c842a3 100644
--- a/drivers/staging/rtl8192u/r819xU_cmdpkt.c
+++ b/drivers/staging/rtl8192u/r819xU_cmdpkt.c
@@ -45,8 +45,7 @@ rt_status SendTxCommandPacket(struct net_device *dev, void *pData, u32 DataLen)
 	tcb_desc->bCmdOrInit = DESC_PACKET_TYPE_NORMAL;
 	tcb_desc->bLastIniPkt = 0;
 	skb_reserve(skb, USB_HWDESC_HEADER_LEN);
-	ptr_buf = skb_put(skb, DataLen);
-	memcpy(ptr_buf, pData, DataLen);
+	ptr_buf = skb_put_data(skb, pData, DataLen);
 	tcb_desc->txbuf_size = (u16)DataLen;
 
 	if (!priv->ieee80211->check_nic_enough_desc(dev, tcb_desc->queue_index) ||
diff --git a/drivers/staging/rtl8712/rtl8712_recv.c b/drivers/staging/rtl8712/rtl8712_recv.c
index 266ffefd55ed..f96c558b3c6a 100644
--- a/drivers/staging/rtl8712/rtl8712_recv.c
+++ b/drivers/staging/rtl8712/rtl8712_recv.c
@@ -372,8 +372,7 @@ static int amsdu_to_msdu(struct _adapter *padapter, union recv_frame *prframe)
 		if (!sub_skb)
 			break;
 		skb_reserve(sub_skb, 12);
-		data_ptr = (u8 *)skb_put(sub_skb, nSubframe_Length);
-		memcpy(data_ptr, pdata, nSubframe_Length);
+		data_ptr = skb_put_data(sub_skb, pdata, nSubframe_Length);
 		subframes[nr_subframes++] = sub_skb;
 		if (nr_subframes >= MAX_SUBFRAME_COUNT) {
 			netdev_warn(padapter->pnetdev, "r8712u: ParseSubframe(): Too many Subframes! Packets dropped!\n");
diff --git a/drivers/staging/rtl8723bs/os_dep/recv_linux.c b/drivers/staging/rtl8723bs/os_dep/recv_linux.c
index e731ab4e2bd7..1a6443dc3ff0 100644
--- a/drivers/staging/rtl8723bs/os_dep/recv_linux.c
+++ b/drivers/staging/rtl8723bs/os_dep/recv_linux.c
@@ -82,8 +82,8 @@ _pkt *rtw_os_alloc_msdu_pkt(union recv_frame *prframe, u16 nSubframe_Length, u8
 	if (sub_skb)
 	{
 		skb_reserve(sub_skb, 12);
-		data_ptr = (u8 *)skb_put(sub_skb, nSubframe_Length);
-		memcpy(data_ptr, (pdata + ETH_HLEN), nSubframe_Length);
+		data_ptr = skb_put_data(sub_skb, (pdata + ETH_HLEN),
+					nSubframe_Length);
 	}
 	else
 	{
diff --git a/drivers/staging/wilc1000/linux_mon.c b/drivers/staging/wilc1000/linux_mon.c
index c9782d452b07..dbc266a37974 100644
--- a/drivers/staging/wilc1000/linux_mon.c
+++ b/drivers/staging/wilc1000/linux_mon.c
@@ -72,7 +72,7 @@ void WILC_WFI_monitor_rx(u8 *buff, u32 size)
 		if (!skb)
 			return;
 
-		memcpy(skb_put(skb, size), buff, size);
+		skb_put_data(skb, buff, size);
 
 		cb_hdr = (struct wilc_wfi_radiotap_cb_hdr *)skb_push(skb, sizeof(*cb_hdr));
 		memset(cb_hdr, 0, sizeof(struct wilc_wfi_radiotap_cb_hdr));
@@ -100,7 +100,7 @@ void WILC_WFI_monitor_rx(u8 *buff, u32 size)
 		if (!skb)
 			return;
 
-		memcpy(skb_put(skb, size), buff, size);
+		skb_put_data(skb, buff, size);
 		hdr = (struct wilc_wfi_radiotap_hdr *)skb_push(skb, sizeof(*hdr));
 		memset(hdr, 0, sizeof(struct wilc_wfi_radiotap_hdr));
 		hdr->hdr.it_version = 0; /* PKTHDR_RADIOTAP_VERSION; */
@@ -200,7 +200,7 @@ static netdev_tx_t WILC_WFI_mon_xmit(struct sk_buff *skb,
 		if (!skb2)
 			return -ENOMEM;
 
-		memcpy(skb_put(skb2, skb->len), skb->data, skb->len);
+		skb_put_data(skb2, skb->data, skb->len);
 
 		cb_hdr = (struct wilc_wfi_radiotap_cb_hdr *)skb_push(skb2, sizeof(*cb_hdr));
 		memset(cb_hdr, 0, sizeof(struct wilc_wfi_radiotap_cb_hdr));
diff --git a/drivers/staging/wilc1000/linux_wlan.c b/drivers/staging/wilc1000/linux_wlan.c
index d6d803416be2..f36598a89ce0 100644
--- a/drivers/staging/wilc1000/linux_wlan.c
+++ b/drivers/staging/wilc1000/linux_wlan.c
@@ -1160,7 +1160,7 @@ void wilc_frmw_to_linux(struct wilc *wilc, u8 *buff, u32 size, u32 pkt_offset)
 
 		skb->dev = wilc_netdev;
 
-		memcpy(skb_put(skb, frame_len), buff_to_send, frame_len);
+		skb_put_data(skb, buff_to_send, frame_len);
 
 		skb->protocol = eth_type_trans(skb, wilc_netdev);
 		vif->netstats.rx_packets++;
diff --git a/drivers/staging/wlan-ng/hfa384x_usb.c b/drivers/staging/wlan-ng/hfa384x_usb.c
index a812e55ba1b0..1de67f209f2c 100644
--- a/drivers/staging/wlan-ng/hfa384x_usb.c
+++ b/drivers/staging/wlan-ng/hfa384x_usb.c
@@ -3530,13 +3530,11 @@ static void hfa384x_int_rxmonitor(struct wlandevice *wlandev,
 	/* Copy the 802.11 header to the skb
 	 * (ctl frames may be less than a full header)
 	 */
-	datap = skb_put(skb, hdrlen);
-	memcpy(datap, &rxdesc->frame_control, hdrlen);
+	datap = skb_put_data(skb, &rxdesc->frame_control, hdrlen);
 
 	/* If any, copy the data from the card to the skb */
 	if (datalen > 0) {
-		datap = skb_put(skb, datalen);
-		memcpy(datap, rxfrm->data, datalen);
+		datap = skb_put_data(skb, rxfrm->data, datalen);
 
 		/* check for unencrypted stuff if WEP bit set. */
 		if (*(datap - hdrlen + 1) & 0x40)	/* wep set */
diff --git a/drivers/tty/ipwireless/network.c b/drivers/tty/ipwireless/network.c
index c0dfb642383b..c2f9a3263b37 100644
--- a/drivers/tty/ipwireless/network.c
+++ b/drivers/tty/ipwireless/network.c
@@ -355,7 +355,7 @@ static struct sk_buff *ipw_packet_received_skb(unsigned char *data,
 	if (skb == NULL)
 		return NULL;
 	skb_reserve(skb, 2);
-	memcpy(skb_put(skb, length), data, length);
+	skb_put_data(skb, data, length);
 
 	return skb;
 }
diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c
index 2667a205a5ab..da830f833392 100644
--- a/drivers/tty/n_gsm.c
+++ b/drivers/tty/n_gsm.c
@@ -2688,7 +2688,7 @@ static void gsm_mux_rx_netchar(struct gsm_dlci *dlci,
 		return;
 	}
 	skb_reserve(skb, NET_IP_ALIGN);
-	memcpy(skb_put(skb, size), in_buf, size);
+	skb_put_data(skb, in_buf, size);
 
 	skb->dev = net;
 	skb->protocol = htons(ETH_P_IP);
diff --git a/drivers/tty/synclink.c b/drivers/tty/synclink.c
index a2c308f7d637..3fafc5a1b2e0 100644
--- a/drivers/tty/synclink.c
+++ b/drivers/tty/synclink.c
@@ -7960,7 +7960,7 @@ static void hdlcdev_rx(struct mgsl_struct *info, char *buf, int size)
 		return;
 	}
 
-	memcpy(skb_put(skb, size), buf, size);
+	skb_put_data(skb, buf, size);
 
 	skb->protocol = hdlc_type_trans(skb, dev);
 
diff --git a/drivers/tty/synclink_gt.c b/drivers/tty/synclink_gt.c
index 31885f20fc15..7e947ecf15f1 100644
--- a/drivers/tty/synclink_gt.c
+++ b/drivers/tty/synclink_gt.c
@@ -1755,7 +1755,7 @@ static void hdlcdev_rx(struct slgt_info *info, char *buf, int size)
 		return;
 	}
 
-	memcpy(skb_put(skb, size), buf, size);
+	skb_put_data(skb, buf, size);
 
 	skb->protocol = hdlc_type_trans(skb, dev);
 
diff --git a/drivers/tty/synclinkmp.c b/drivers/tty/synclinkmp.c
index 51e8846cd68f..9b4fb0251c1a 100644
--- a/drivers/tty/synclinkmp.c
+++ b/drivers/tty/synclinkmp.c
@@ -1874,7 +1874,7 @@ static void hdlcdev_rx(SLMP_INFO *info, char *buf, int size)
 		return;
 	}
 
-	memcpy(skb_put(skb, size), buf, size);
+	skb_put_data(skb, buf, size);
 
 	skb->protocol = hdlc_type_trans(skb, dev);
 
diff --git a/drivers/usb/gadget/function/f_ncm.c b/drivers/usb/gadget/function/f_ncm.c
index 2882c6d3ae66..630616aaa861 100644
--- a/drivers/usb/gadget/function/f_ncm.c
+++ b/drivers/usb/gadget/function/f_ncm.c
@@ -1007,8 +1007,8 @@ static struct sk_buff *package_for_tx(struct f_ncm *ncm)
 	ntb_iter = skb_put_zero(skb2, ndp_pad);
 
 	/* Copy NTB across. */
-	ntb_iter = (void *) skb_put(skb2, ncm->skb_tx_ndp->len);
-	memcpy(ntb_iter, ncm->skb_tx_ndp->data, ncm->skb_tx_ndp->len);
+	ntb_iter = skb_put_data(skb2, ncm->skb_tx_ndp->data,
+				ncm->skb_tx_ndp->len);
 	dev_consume_skb_any(ncm->skb_tx_ndp);
 	ncm->skb_tx_ndp = NULL;
 
@@ -1129,8 +1129,7 @@ static struct sk_buff *ncm_wrap_ntb(struct gether *port,
 
 		/* Add the new data to the skb */
 		ntb_data = skb_put_zero(ncm->skb_tx_data, dgram_pad);
-		ntb_data = (void *) skb_put(ncm->skb_tx_data, skb->len);
-		memcpy(ntb_data, skb->data, skb->len);
+		ntb_data = skb_put_data(ncm->skb_tx_data, skb->data, skb->len);
 		dev_consume_skb_any(skb);
 		skb = NULL;
 
@@ -1313,8 +1312,8 @@ static int ncm_unwrap_ntb(struct gether *port,
 							 dg_len - crc_len);
 			if (skb2 == NULL)
 				goto err;
-			memcpy(skb_put(skb2, dg_len - crc_len),
-			       skb->data + index, dg_len - crc_len);
+			skb_put_data(skb2, skb->data + index,
+				     dg_len - crc_len);
 
 			skb_queue_tail(list, skb2);
 
diff --git a/drivers/usb/gadget/function/f_phonet.c b/drivers/usb/gadget/function/f_phonet.c
index 6a1ce6a55158..9c4c58e4a1a2 100644
--- a/drivers/usb/gadget/function/f_phonet.c
+++ b/drivers/usb/gadget/function/f_phonet.c
@@ -336,7 +336,7 @@ static void pn_rx_complete(struct usb_ep *ep, struct usb_request *req)
 			skb->protocol = htons(ETH_P_PHONET);
 			skb_reset_mac_header(skb);
 			/* Can't use pskb_pull() on page in IRQ */
-			memcpy(skb_put(skb, 1), page_address(page), 1);
+			skb_put_data(skb, page_address(page), 1);
 		}
 
 		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h
index ac02c54520e9..a7330eb3ec64 100644
--- a/include/linux/mISDNif.h
+++ b/include/linux/mISDNif.h
@@ -554,7 +554,7 @@ _alloc_mISDN_skb(u_int prim, u_int id, u_int len, void *dp, gfp_t gfp_mask)
 	if (!skb)
 		return NULL;
 	if (len)
-		memcpy(skb_put(skb, len), dp, len);
+		skb_put_data(skb, dp, len);
 	hh = mISDN_HEAD_P(skb);
 	hh->prim = prim;
 	hh->id = id;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 01ea64d0783a..5af5385a0e72 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1913,6 +1913,16 @@ static inline void *skb_put_zero(struct sk_buff *skb, unsigned int len)
 	return tmp;
 }
 
+static inline void *skb_put_data(struct sk_buff *skb, const void *data,
+				 unsigned int len)
+{
+	void *tmp = skb_put(skb, len);
+
+	memcpy(tmp, data, len);
+
+	return tmp;
+}
+
 unsigned char *skb_push(struct sk_buff *skb, unsigned int len);
 static inline unsigned char *__skb_push(struct sk_buff *skb, unsigned int len)
 {
diff --git a/lib/nlattr.c b/lib/nlattr.c
index d09d9746fc5d..ab15a6c095d3 100644
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -616,7 +616,7 @@ int nla_append(struct sk_buff *skb, int attrlen, const void *data)
 	if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen)))
 		return -EMSGSIZE;
 
-	memcpy(skb_put(skb, attrlen), data, attrlen);
+	skb_put_data(skb, data, attrlen);
 	return 0;
 }
 EXPORT_SYMBOL(nla_append);
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index fa8d6b475c06..a3501173e200 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -732,8 +732,8 @@ static void batadv_iv_ogm_aggregate(struct batadv_forw_packet *forw_packet_aggr,
 	unsigned char *skb_buff;
 	unsigned long new_direct_link_flag;
 
-	skb_buff = skb_put(forw_packet_aggr->skb, packet_len);
-	memcpy(skb_buff, packet_buff, packet_len);
+	skb_buff = skb_put_data(forw_packet_aggr->skb, packet_buff,
+				packet_len);
 	forw_packet_aggr->packet_len += packet_len;
 	forw_packet_aggr->num_packets++;
 
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index 03a35c9f456d..1e3dc374bfde 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -166,8 +166,7 @@ static void batadv_v_ogm_send(struct work_struct *work)
 		goto reschedule;
 
 	skb_reserve(skb, ETH_HLEN);
-	pkt_buff = skb_put(skb, ogm_buff_len);
-	memcpy(pkt_buff, ogm_buff, ogm_buff_len);
+	pkt_buff = skb_put_data(skb, ogm_buff, ogm_buff_len);
 
 	ogm_packet = (struct batadv_ogm2_packet *)skb->data;
 	ogm_packet->seqno = htonl(atomic_read(&bat_priv->bat_v.ogm_seqno));
@@ -382,8 +381,7 @@ static void batadv_v_ogm_forward(struct batadv_priv *bat_priv,
 		goto out;
 
 	skb_reserve(skb, ETH_HLEN);
-	skb_buff = skb_put(skb, packet_len);
-	memcpy(skb_buff, ogm_received, packet_len);
+	skb_buff = skb_put_data(skb, ogm_received, packet_len);
 
 	/* apply forward penalty */
 	ogm_forward = (struct batadv_ogm2_packet *)skb_buff;
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index 8f964beaac28..a98cf1104a30 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -296,8 +296,7 @@ batadv_frag_merge_packets(struct hlist_head *chain)
 	/* Copy the payload of the each fragment into the last skb */
 	hlist_for_each_entry(entry, chain, list) {
 		size = entry->skb->len - hdr_size;
-		memcpy(skb_put(skb_out, size), entry->skb->data + hdr_size,
-		       size);
+		skb_put_data(skb_out, entry->skb->data + hdr_size, size);
 	}
 
 free:
diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c
index 9e59b6654126..f4c64ef01c24 100644
--- a/net/bluetooth/cmtp/core.c
+++ b/net/bluetooth/cmtp/core.c
@@ -122,7 +122,7 @@ static inline void cmtp_add_msgpart(struct cmtp_session *session, int id, const
 	if (skb && (skb->len > 0))
 		skb_copy_from_linear_data(skb, skb_put(nskb, skb->len), skb->len);
 
-	memcpy(skb_put(nskb, count), buf, count);
+	skb_put_data(nskb, buf, count);
 
 	session->reassembly[id] = nskb;
 
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 93806b959039..d860e3cc23cf 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -3266,7 +3266,7 @@ int hci_reset_dev(struct hci_dev *hdev)
 		return -ENOMEM;
 
 	hci_skb_pkt_type(skb) = HCI_EVENT_PKT;
-	memcpy(skb_put(skb, 3), hw_err, 3);
+	skb_put_data(skb, hw_err, 3);
 
 	/* Send Hardware Error to upper stack */
 	return hci_recv_frame(hdev, skb);
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index b5faff458d8b..4e4105a932bd 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -304,7 +304,7 @@ struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen,
 	hdr->plen   = plen;
 
 	if (plen)
-		memcpy(skb_put(skb, plen), param, plen);
+		skb_put_data(skb, param, plen);
 
 	BT_DBG("skb len %d", skb->len);
 
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 638bf0e1a2e3..083e87f26a0f 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -379,7 +379,7 @@ void hci_send_monitor_ctrl_event(struct hci_dev *hdev, u16 event,
 		put_unaligned_le16(event, skb_put(skb, 2));
 
 		if (data)
-			memcpy(skb_put(skb, data_len), data, data_len);
+			skb_put_data(skb, data, data_len);
 
 		skb->tstamp = tstamp;
 
@@ -515,10 +515,10 @@ static struct sk_buff *create_monitor_ctrl_open(struct sock *sk)
 
 	put_unaligned_le32(hci_pi(sk)->cookie, skb_put(skb, 4));
 	put_unaligned_le16(format, skb_put(skb, 2));
-	memcpy(skb_put(skb, sizeof(ver)), ver, sizeof(ver));
+	skb_put_data(skb, ver, sizeof(ver));
 	put_unaligned_le32(flags, skb_put(skb, 4));
 	*skb_put(skb, 1) = TASK_COMM_LEN;
-	memcpy(skb_put(skb, TASK_COMM_LEN), hci_pi(sk)->comm, TASK_COMM_LEN);
+	skb_put_data(skb, hci_pi(sk)->comm, TASK_COMM_LEN);
 
 	__net_timestamp(skb);
 
@@ -586,7 +586,7 @@ static struct sk_buff *create_monitor_ctrl_command(struct sock *sk, u16 index,
 	put_unaligned_le16(opcode, skb_put(skb, 2));
 
 	if (buf)
-		memcpy(skb_put(skb, len), buf, len);
+		skb_put_data(skb, buf, len);
 
 	__net_timestamp(skb);
 
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 0bec4588c3c8..9e83713262e8 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -114,7 +114,7 @@ static int hidp_send_message(struct hidp_session *session, struct socket *sock,
 
 	*skb_put(skb, 1) = hdr;
 	if (data && size > 0)
-		memcpy(skb_put(skb, size), data, size);
+		skb_put_data(skb, data, size);
 
 	skb_queue_tail(transmit, skb);
 	wake_up_interruptible(sk_sleep(sk));
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index f88ac99528ce..fe6a5529bdf5 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -2923,7 +2923,7 @@ static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn, u8 code,
 
 	if (dlen) {
 		count -= L2CAP_HDR_SIZE + L2CAP_CMD_HDR_SIZE;
-		memcpy(skb_put(skb, count), data, count);
+		skb_put_data(skb, data, count);
 		data += count;
 	}
 
@@ -2938,7 +2938,7 @@ static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn, u8 code,
 		if (!*frag)
 			goto fail;
 
-		memcpy(skb_put(*frag, count), data, count);
+		skb_put_data(*frag, data, count);
 
 		len  -= count;
 		data += count;
diff --git a/net/bluetooth/mgmt_util.c b/net/bluetooth/mgmt_util.c
index c933bd08c1fe..11d0ca64402b 100644
--- a/net/bluetooth/mgmt_util.c
+++ b/net/bluetooth/mgmt_util.c
@@ -44,7 +44,7 @@ static struct sk_buff *create_monitor_ctrl_event(__le16 index, u32 cookie,
 	put_unaligned_le16(opcode, skb_put(skb, 2));
 
 	if (buf)
-		memcpy(skb_put(skb, len), buf, len);
+		skb_put_data(skb, buf, len);
 
 	__net_timestamp(skb);
 
@@ -75,7 +75,7 @@ int mgmt_send_event(u16 event, struct hci_dev *hdev, unsigned short channel,
 	hdr->len = cpu_to_le16(data_len);
 
 	if (data)
-		memcpy(skb_put(skb, data_len), data, data_len);
+		skb_put_data(skb, data, data_len);
 
 	/* Time stamp */
 	__net_timestamp(skb);
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index 2f2cb5e27cdd..5f3074cb6b4d 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -798,7 +798,7 @@ static int rfcomm_tty_write(struct tty_struct *tty, const unsigned char *buf, in
 
 		skb_reserve(skb, RFCOMM_SKB_HEAD_RESERVE);
 
-		memcpy(skb_put(skb, size), buf + sent, size);
+		skb_put_data(skb, buf + sent, size);
 
 		rfcomm_dlc_send_noerror(dlc, skb);
 
diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c
index bb6ed8e97580..15bf0c5322ab 100644
--- a/net/bridge/netfilter/nft_reject_bridge.c
+++ b/net/bridge/netfilter/nft_reject_bridge.c
@@ -151,8 +151,7 @@ static void nft_reject_br_send_v4_unreach(struct net *net,
 	icmph->type     = ICMP_DEST_UNREACH;
 	icmph->code	= code;
 
-	payload = skb_put(nskb, len);
-	memcpy(payload, skb_network_header(oldskb), len);
+	payload = skb_put_data(nskb, skb_network_header(oldskb), len);
 
 	csum = csum_partial((void *)icmph, len + sizeof(struct icmphdr), 0);
 	icmph->checksum = csum_fold(csum);
@@ -278,8 +277,7 @@ static void nft_reject_br_send_v6_unreach(struct net *net,
 	icmp6h->icmp6_type = ICMPV6_DEST_UNREACH;
 	icmp6h->icmp6_code = code;
 
-	payload = skb_put(nskb, len);
-	memcpy(payload, skb_network_header(oldskb), len);
+	payload = skb_put_data(nskb, skb_network_header(oldskb), len);
 	nip6h->payload_len = htons(nskb->len - sizeof(struct ipv6hdr));
 
 	icmp6h->icmp6_cksum =
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 65432633a250..47a8748d953a 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -282,7 +282,7 @@ static void bcm_can_tx(struct bcm_op *op)
 	can_skb_prv(skb)->ifindex = dev->ifindex;
 	can_skb_prv(skb)->skbcnt = 0;
 
-	memcpy(skb_put(skb, op->cfsiz), cf, op->cfsiz);
+	skb_put_data(skb, cf, op->cfsiz);
 
 	/* send with loopback */
 	skb->dev = dev;
@@ -318,13 +318,13 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head,
 	if (!skb)
 		return;
 
-	memcpy(skb_put(skb, sizeof(*head)), head, sizeof(*head));
+	skb_put_data(skb, head, sizeof(*head));
 
 	if (head->nframes) {
 		/* CAN frames starting here */
 		firstframe = (struct canfd_frame *)skb_tail_pointer(skb);
 
-		memcpy(skb_put(skb, datalen), frames, datalen);
+		skb_put_data(skb, frames, datalen);
 
 		/*
 		 * the BCM uses the flags-element of the canfd_frame
diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c
index 849805e7af52..b8a558715395 100644
--- a/net/decnet/dn_nsp_out.c
+++ b/net/decnet/dn_nsp_out.c
@@ -533,7 +533,7 @@ void dn_send_conn_conf(struct sock *sk, gfp_t gfp)
 	*skb_put(skb,1) = len;
 
 	if (len > 0)
-		memcpy(skb_put(skb, len), scp->conndata_out.opt_data, len);
+		skb_put_data(skb, scp->conndata_out.opt_data, len);
 
 
 	dn_nsp_send(skb);
@@ -691,22 +691,22 @@ void dn_nsp_send_conninit(struct sock *sk, unsigned char msgflg)
 	aux = scp->accessdata.acc_userl;
 	*skb_put(skb, 1) = aux;
 	if (aux > 0)
-		memcpy(skb_put(skb, aux), scp->accessdata.acc_user, aux);
+		skb_put_data(skb, scp->accessdata.acc_user, aux);
 
 	aux = scp->accessdata.acc_passl;
 	*skb_put(skb, 1) = aux;
 	if (aux > 0)
-		memcpy(skb_put(skb, aux), scp->accessdata.acc_pass, aux);
+		skb_put_data(skb, scp->accessdata.acc_pass, aux);
 
 	aux = scp->accessdata.acc_accl;
 	*skb_put(skb, 1) = aux;
 	if (aux > 0)
-		memcpy(skb_put(skb, aux), scp->accessdata.acc_acc, aux);
+		skb_put_data(skb, scp->accessdata.acc_acc, aux);
 
 	aux = (__u8)le16_to_cpu(scp->conndata_out.opt_optl);
 	*skb_put(skb, 1) = aux;
 	if (aux > 0)
-		memcpy(skb_put(skb, aux), scp->conndata_out.opt_data, aux);
+		skb_put_data(skb, scp->conndata_out.opt_data, aux);
 
 	scp->persist = dn_nsp_persist(sk);
 	scp->persist_fxn = dn_nsp_retrans_conninit;
diff --git a/net/ieee802154/6lowpan/tx.c b/net/ieee802154/6lowpan/tx.c
index dbb476d7d38f..e6ff5128e61a 100644
--- a/net/ieee802154/6lowpan/tx.c
+++ b/net/ieee802154/6lowpan/tx.c
@@ -121,8 +121,7 @@ lowpan_alloc_frag(struct sk_buff *skb, int size,
 		*mac_cb(frag) = *mac_cb(skb);
 
 		if (frag1) {
-			memcpy(skb_put(frag, skb->mac_len),
-			       skb_mac_header(skb), skb->mac_len);
+			skb_put_data(frag, skb_mac_header(skb), skb->mac_len);
 		} else {
 			rc = wpan_dev_hard_header(frag, wdev,
 						  &master_hdr->dest,
@@ -152,8 +151,8 @@ lowpan_xmit_fragment(struct sk_buff *skb, const struct ieee802154_hdr *wpan_hdr,
 	if (IS_ERR(frag))
 		return PTR_ERR(frag);
 
-	memcpy(skb_put(frag, frag_hdrlen), frag_hdr, frag_hdrlen);
-	memcpy(skb_put(frag, len), skb_network_header(skb) + offset, len);
+	skb_put_data(frag, frag_hdr, frag_hdrlen);
+	skb_put_data(frag, skb_network_header(skb) + offset, len);
 
 	raw_dump_table(__func__, " fragment dump", frag->data, frag->len);
 
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 9098429e38bc..b64046ccae69 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1602,7 +1602,7 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu)
 
 	ip6_mc_hdr(sk, skb, dev, saddr, &mld2_all_mcr, NEXTHDR_HOP, 0);
 
-	memcpy(skb_put(skb, sizeof(ra)), ra, sizeof(ra));
+	skb_put_data(skb, ra, sizeof(ra));
 
 	skb_set_transport_header(skb, skb_tail_pointer(skb) - skb->data);
 	skb_put(skb, sizeof(*pmr));
@@ -2006,7 +2006,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
 
 	ip6_mc_hdr(sk, skb, dev, saddr, snd_addr, NEXTHDR_HOP, payload_len);
 
-	memcpy(skb_put(skb, sizeof(ra)), ra, sizeof(ra));
+	skb_put_data(skb, ra, sizeof(ra));
 
 	hdr = skb_put_zero(skb, sizeof(struct mld_msg));
 	hdr->mld_type = type;
diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c
index f6061c4bb0a8..ec157c3419b5 100644
--- a/net/irda/ircomm/ircomm_tty.c
+++ b/net/irda/ircomm/ircomm_tty.c
@@ -690,7 +690,7 @@ static int ircomm_tty_write(struct tty_struct *tty,
 		}
 
 		/* Copy data */
-		memcpy(skb_put(skb,size), buf + len, size);
+		skb_put_data(skb, buf + len, size);
 
 		count -= size;
 		len += size;
diff --git a/net/irda/irlap_frame.c b/net/irda/irlap_frame.c
index b936b1251a66..bf56ac7dba96 100644
--- a/net/irda/irlap_frame.c
+++ b/net/irda/irlap_frame.c
@@ -392,8 +392,7 @@ void irlap_send_discovery_xid_frame(struct irlap_cb *self, int S, __u8 s,
 		info[0] = discovery->data.charset;
 
 		len = IRDA_MIN(discovery->name_len, skb_tailroom(tx_skb));
-		info = skb_put(tx_skb, len);
-		memcpy(info, discovery->data.info, len);
+		info = skb_put_data(tx_skb, discovery->data.info, len);
 	}
 	irlap_queue_xmit(self, tx_skb);
 }
@@ -1216,8 +1215,7 @@ void irlap_send_test_frame(struct irlap_cb *self, __u8 caddr, __u32 daddr,
 	frame->control = TEST_RSP | PF_BIT;
 
 	/* Copy info */
-	info = skb_put(tx_skb, cmd->len);
-	memcpy(info, cmd->data, cmd->len);
+	info = skb_put_data(tx_skb, cmd->data, cmd->len);
 
 	/* Return to sender */
 	irlap_wait_min_turn_around(self, &self->qos_tx);
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 8ad430edb5b8..3ebb4268973b 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1706,8 +1706,7 @@ static int unicast_flush_resp(struct sock *sk, const struct sadb_msg *ihdr)
 	if (!skb)
 		return -ENOBUFS;
 
-	hdr = (struct sadb_msg *) skb_put(skb, sizeof(struct sadb_msg));
-	memcpy(hdr, ihdr, sizeof(struct sadb_msg));
+	hdr = skb_put_data(skb, ihdr, sizeof(struct sadb_msg));
 	hdr->sadb_msg_errno = (uint8_t) 0;
 	hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t));
 
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 660ac6a426f4..e9c6aa3ed05b 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -1569,7 +1569,7 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata,
 		return;
 
 	skb_reserve(skb, local->tx_headroom);
-	memcpy(skb_put(skb, presp->head_len), presp->head, presp->head_len);
+	skb_put_data(skb, presp->head, presp->head_len);
 
 	memcpy(((struct ieee80211_mgmt *) skb->data)->da, mgmt->sa, ETH_ALEN);
 	ibss_dbg(sdata, "Sending ProbeResp to %pM\n", mgmt->sa);
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index e45c8d94952e..861697f2d75b 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -345,7 +345,7 @@ int mesh_add_vendor_ies(struct ieee80211_sub_if_data *sdata,
 		data = ifmsh->ie + offset;
 		if (skb_tailroom(skb) < len)
 			return -ENOMEM;
-		memcpy(skb_put(skb, len), data, len);
+		skb_put_data(skb, data, len);
 	}
 
 	return 0;
@@ -369,7 +369,7 @@ int mesh_add_rsn_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb)
 
 	if (skb_tailroom(skb) < len)
 		return -ENOMEM;
-	memcpy(skb_put(skb, len), data, len);
+	skb_put_data(skb, data, len);
 
 	return 0;
 }
@@ -1125,8 +1125,8 @@ ieee80211_mesh_rx_probe_req(struct ieee80211_sub_if_data *sdata,
 		goto out;
 
 	skb_reserve(presp, local->tx_headroom);
-	memcpy(skb_put(presp, bcn->head_len), bcn->head, bcn->head_len);
-	memcpy(skb_put(presp, bcn->tail_len), bcn->tail, bcn->tail_len);
+	skb_put_data(presp, bcn->head, bcn->head_len);
+	skb_put_data(presp, bcn->tail, bcn->tail_len);
 	hdr = (struct ieee80211_mgmt *) presp->data;
 	hdr->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
 					 IEEE80211_STYPE_PROBE_RESP);
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index e810334595ff..7be7917e1541 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -796,8 +796,8 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
 						 after_ric,
 						 ARRAY_SIZE(after_ric),
 						 offset);
-		pos = skb_put(skb, noffset - offset);
-		memcpy(pos, assoc_data->ie + offset, noffset - offset);
+		pos = skb_put_data(skb, assoc_data->ie + offset,
+				   noffset - offset);
 		offset = noffset;
 	}
 
@@ -834,8 +834,8 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
 		noffset = ieee80211_ie_split(assoc_data->ie, assoc_data->ie_len,
 					     before_vht, ARRAY_SIZE(before_vht),
 					     offset);
-		pos = skb_put(skb, noffset - offset);
-		memcpy(pos, assoc_data->ie + offset, noffset - offset);
+		pos = skb_put_data(skb, assoc_data->ie + offset,
+				   noffset - offset);
 		offset = noffset;
 	}
 
@@ -848,8 +848,8 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
 		noffset = ieee80211_ie_split_vendor(assoc_data->ie,
 						    assoc_data->ie_len,
 						    offset);
-		pos = skb_put(skb, noffset - offset);
-		memcpy(pos, assoc_data->ie + offset, noffset - offset);
+		pos = skb_put_data(skb, assoc_data->ie + offset,
+				   noffset - offset);
 		offset = noffset;
 	}
 
@@ -868,8 +868,8 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
 	/* add any remaining custom (i.e. vendor specific here) IEs */
 	if (assoc_data->ie_len) {
 		noffset = assoc_data->ie_len;
-		pos = skb_put(skb, noffset - offset);
-		memcpy(pos, assoc_data->ie + offset, noffset - offset);
+		pos = skb_put_data(skb, assoc_data->ie + offset,
+				   noffset - offset);
 	}
 
 	if (assoc_data->fils_kek_len &&
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index eede5c6db8d5..f8e7a8bbc618 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -885,8 +885,7 @@ int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
 	}
 	skb_reserve(skb, local->hw.extra_tx_headroom);
 
-	data = skb_put(skb, params->len);
-	memcpy(data, params->buf, params->len);
+	data = skb_put_data(skb, params->buf, params->len);
 
 	/* Update CSA counters */
 	if (sdata->vif.csa_active &&
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index e1ab1c4af33c..53b00bb52095 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2098,7 +2098,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 		}
 	}
 	while ((skb = __skb_dequeue(&entry->skb_list))) {
-		memcpy(skb_put(rx->skb, skb->len), skb->data, skb->len);
+		skb_put_data(rx->skb, skb->data, skb->len);
 		dev_kfree_skb(skb);
 	}
 
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index c379c99cd1d8..86740670102d 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -388,8 +388,7 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_sub_if_data *sdata,
 					     before_ext_cap,
 					     ARRAY_SIZE(before_ext_cap),
 					     offset);
-		pos = skb_put(skb, noffset - offset);
-		memcpy(pos, extra_ies + offset, noffset - offset);
+		pos = skb_put_data(skb, extra_ies + offset, noffset - offset);
 		offset = noffset;
 	}
 
@@ -418,8 +417,7 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_sub_if_data *sdata,
 					     before_ht_cap,
 					     ARRAY_SIZE(before_ht_cap),
 					     offset);
-		pos = skb_put(skb, noffset - offset);
-		memcpy(pos, extra_ies + offset, noffset - offset);
+		pos = skb_put_data(skb, extra_ies + offset, noffset - offset);
 		offset = noffset;
 	}
 
@@ -490,8 +488,7 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_sub_if_data *sdata,
 					     before_vht_cap,
 					     ARRAY_SIZE(before_vht_cap),
 					     offset);
-		pos = skb_put(skb, noffset - offset);
-		memcpy(pos, extra_ies + offset, noffset - offset);
+		pos = skb_put_data(skb, extra_ies + offset, noffset - offset);
 		offset = noffset;
 	}
 
@@ -532,8 +529,7 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_sub_if_data *sdata,
 	/* add any remaining IEs */
 	if (extra_ies_len) {
 		noffset = extra_ies_len;
-		pos = skb_put(skb, noffset - offset);
-		memcpy(pos, extra_ies + offset, noffset - offset);
+		pos = skb_put_data(skb, extra_ies + offset, noffset - offset);
 	}
 
 }
@@ -575,8 +571,7 @@ ieee80211_tdls_add_setup_cfm_ies(struct ieee80211_sub_if_data *sdata,
 					     before_qos,
 					     ARRAY_SIZE(before_qos),
 					     offset);
-		pos = skb_put(skb, noffset - offset);
-		memcpy(pos, extra_ies + offset, noffset - offset);
+		pos = skb_put_data(skb, extra_ies + offset, noffset - offset);
 		offset = noffset;
 	}
 
@@ -596,8 +591,7 @@ ieee80211_tdls_add_setup_cfm_ies(struct ieee80211_sub_if_data *sdata,
 					     before_ht_op,
 					     ARRAY_SIZE(before_ht_op),
 					     offset);
-		pos = skb_put(skb, noffset - offset);
-		memcpy(pos, extra_ies + offset, noffset - offset);
+		pos = skb_put_data(skb, extra_ies + offset, noffset - offset);
 		offset = noffset;
 	}
 
@@ -638,8 +632,7 @@ ieee80211_tdls_add_setup_cfm_ies(struct ieee80211_sub_if_data *sdata,
 	/* add any remaining IEs */
 	if (extra_ies_len) {
 		noffset = extra_ies_len;
-		pos = skb_put(skb, noffset - offset);
-		memcpy(pos, extra_ies + offset, noffset - offset);
+		pos = skb_put_data(skb, extra_ies + offset, noffset - offset);
 	}
 }
 
@@ -670,8 +663,7 @@ ieee80211_tdls_add_chan_switch_req_ies(struct ieee80211_sub_if_data *sdata,
 					     before_lnkie,
 					     ARRAY_SIZE(before_lnkie),
 					     offset);
-		pos = skb_put(skb, noffset - offset);
-		memcpy(pos, extra_ies + offset, noffset - offset);
+		pos = skb_put_data(skb, extra_ies + offset, noffset - offset);
 		offset = noffset;
 	}
 
@@ -680,8 +672,7 @@ ieee80211_tdls_add_chan_switch_req_ies(struct ieee80211_sub_if_data *sdata,
 	/* add any remaining IEs */
 	if (extra_ies_len) {
 		noffset = extra_ies_len;
-		pos = skb_put(skb, noffset - offset);
-		memcpy(pos, extra_ies + offset, noffset - offset);
+		pos = skb_put_data(skb, extra_ies + offset, noffset - offset);
 	}
 }
 
@@ -696,7 +687,7 @@ ieee80211_tdls_add_chan_switch_resp_ies(struct ieee80211_sub_if_data *sdata,
 		ieee80211_tdls_add_link_ie(sdata, skb, peer, initiator);
 
 	if (extra_ies_len)
-		memcpy(skb_put(skb, extra_ies_len), extra_ies, extra_ies_len);
+		skb_put_data(skb, extra_ies, extra_ies_len);
 }
 
 static void ieee80211_tdls_add_ies(struct ieee80211_sub_if_data *sdata,
@@ -726,8 +717,7 @@ static void ieee80211_tdls_add_ies(struct ieee80211_sub_if_data *sdata,
 	case WLAN_TDLS_TEARDOWN:
 	case WLAN_TDLS_DISCOVERY_REQUEST:
 		if (extra_ies_len)
-			memcpy(skb_put(skb, extra_ies_len), extra_ies,
-			       extra_ies_len);
+			skb_put_data(skb, extra_ies, extra_ies_len);
 		if (status_code == 0 || action_code == WLAN_TDLS_TEARDOWN)
 			ieee80211_tdls_add_link_ie(sdata, skb, peer, initiator);
 		break;
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 1af9ed29a915..18c5d6e6305d 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -903,8 +903,8 @@ static int ieee80211_fragment(struct ieee80211_tx_data *tx,
 		tmp->dev = skb->dev;
 
 		/* copy header and data */
-		memcpy(skb_put(tmp, hdrlen), skb->data, hdrlen);
-		memcpy(skb_put(tmp, fraglen), skb->data + pos, fraglen);
+		skb_put_data(tmp, skb->data, hdrlen);
+		skb_put_data(tmp, skb->data + pos, fraglen);
 
 		pos += fraglen;
 	}
@@ -4132,8 +4132,7 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw,
 				goto out;
 
 			skb_reserve(skb, local->tx_headroom);
-			memcpy(skb_put(skb, beacon->head_len), beacon->head,
-			       beacon->head_len);
+			skb_put_data(skb, beacon->head, beacon->head_len);
 
 			ieee80211_beacon_add_tim(sdata, &ap->ps, skb,
 						 is_template);
@@ -4147,8 +4146,8 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw,
 			}
 
 			if (beacon->tail)
-				memcpy(skb_put(skb, beacon->tail_len),
-				       beacon->tail, beacon->tail_len);
+				skb_put_data(skb, beacon->tail,
+					     beacon->tail_len);
 		} else
 			goto out;
 	} else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) {
@@ -4171,8 +4170,7 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw,
 		if (!skb)
 			goto out;
 		skb_reserve(skb, local->tx_headroom);
-		memcpy(skb_put(skb, beacon->head_len), beacon->head,
-		       beacon->head_len);
+		skb_put_data(skb, beacon->head, beacon->head_len);
 
 		hdr = (struct ieee80211_hdr *) skb->data;
 		hdr->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
@@ -4207,8 +4205,7 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw,
 		if (!skb)
 			goto out;
 		skb_reserve(skb, local->tx_headroom);
-		memcpy(skb_put(skb, beacon->head_len), beacon->head,
-		       beacon->head_len);
+		skb_put_data(skb, beacon->head, beacon->head_len);
 		ieee80211_beacon_add_tim(sdata, &ifmsh->ps, skb, is_template);
 
 		if (offs) {
@@ -4216,8 +4213,7 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw,
 			offs->tim_length = skb->len - beacon->head_len;
 		}
 
-		memcpy(skb_put(skb, beacon->tail_len), beacon->tail,
-		       beacon->tail_len);
+		skb_put_data(skb, beacon->tail, beacon->tail_len);
 	} else {
 		WARN_ON(1);
 		goto out;
@@ -4337,7 +4333,7 @@ struct sk_buff *ieee80211_proberesp_get(struct ieee80211_hw *hw,
 	if (!skb)
 		goto out;
 
-	memcpy(skb_put(skb, presp->len), presp->data, presp->len);
+	skb_put_data(skb, presp->data, presp->len);
 
 	hdr = (struct ieee80211_hdr *) skb->data;
 	memset(hdr->addr1, 0, sizeof(hdr->addr1));
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 148c7276869c..259698de569f 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1252,7 +1252,7 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
 	mgmt->u.auth.auth_transaction = cpu_to_le16(transaction);
 	mgmt->u.auth.status_code = cpu_to_le16(status);
 	if (extra)
-		memcpy(skb_put(skb, extra_len), extra, extra_len);
+		skb_put_data(skb, extra, extra_len);
 
 	if (auth_alg == WLAN_AUTH_SHARED_KEY && transaction == 3) {
 		mgmt->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED);
@@ -1292,8 +1292,7 @@ void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
 		skb_reserve(skb, local->hw.extra_tx_headroom);
 
 		/* copy in frame */
-		memcpy(skb_put(skb, IEEE80211_DEAUTH_FRAME_LEN),
-		       mgmt, IEEE80211_DEAUTH_FRAME_LEN);
+		skb_put_data(skb, mgmt, IEEE80211_DEAUTH_FRAME_LEN);
 
 		if (sdata->vif.type != NL80211_IFTYPE_STATION ||
 		    !(sdata->u.mgd.flags & IEEE80211_STA_MFP_ENABLED))
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 7586d446d7dc..bd24a975fd49 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -170,7 +170,7 @@ static struct sk_buff *netlink_to_full_skb(const struct sk_buff *skb,
 	NETLINK_CB(new).dst_group = NETLINK_CB(skb).dst_group;
 	NETLINK_CB(new).creds = NETLINK_CB(skb).creds;
 
-	memcpy(skb_put(new, len), skb->data, len);
+	skb_put_data(new, skb->data, len);
 	return new;
 }
 
diff --git a/net/nfc/digital_dep.c b/net/nfc/digital_dep.c
index f864ce19e13d..f44f75a2a4d5 100644
--- a/net/nfc/digital_dep.c
+++ b/net/nfc/digital_dep.c
@@ -226,8 +226,7 @@ digital_send_dep_data_prep(struct nfc_digital_dev *ddev, struct sk_buff *skb,
 			return ERR_PTR(-ENOMEM);
 		}
 
-		memcpy(skb_put(new_skb, ddev->remote_payload_max), skb->data,
-		       ddev->remote_payload_max);
+		skb_put_data(new_skb, skb->data, ddev->remote_payload_max);
 		skb_pull(skb, ddev->remote_payload_max);
 
 		ddev->chaining_skb = skb;
@@ -277,8 +276,7 @@ digital_recv_dep_data_gather(struct nfc_digital_dev *ddev, u8 pfb,
 			ddev->chaining_skb = new_skb;
 		}
 
-		memcpy(skb_put(ddev->chaining_skb, resp->len), resp->data,
-		       resp->len);
+		skb_put_data(ddev->chaining_skb, resp->data, resp->len);
 
 		kfree_skb(resp);
 		resp = NULL;
@@ -525,7 +523,7 @@ int digital_in_send_atr_req(struct nfc_digital_dev *ddev,
 
 	if (gb_len) {
 		atr_req->pp |= DIGITAL_GB_BIT;
-		memcpy(skb_put(skb, gb_len), gb, gb_len);
+		skb_put_data(skb, gb, gb_len);
 	}
 
 	digital_skb_push_dep_sod(ddev, skb);
@@ -1012,8 +1010,7 @@ static int digital_tg_send_ack(struct nfc_digital_dev *ddev,
 	if (ddev->did) {
 		dep_res->pfb |= DIGITAL_NFC_DEP_PFB_DID_BIT;
 
-		memcpy(skb_put(skb, sizeof(ddev->did)), &ddev->did,
-		       sizeof(ddev->did));
+		skb_put_data(skb, &ddev->did, sizeof(ddev->did));
 	}
 
 	ddev->curr_nfc_dep_pni =
@@ -1057,8 +1054,7 @@ static int digital_tg_send_atn(struct nfc_digital_dev *ddev)
 	if (ddev->did) {
 		dep_res->pfb |= DIGITAL_NFC_DEP_PFB_DID_BIT;
 
-		memcpy(skb_put(skb, sizeof(ddev->did)), &ddev->did,
-		       sizeof(ddev->did));
+		skb_put_data(skb, &ddev->did, sizeof(ddev->did));
 	}
 
 	digital_skb_push_dep_sod(ddev, skb);
@@ -1325,8 +1321,7 @@ int digital_tg_send_dep_res(struct nfc_digital_dev *ddev, struct sk_buff *skb)
 	if (ddev->did) {
 		dep_res->pfb |= DIGITAL_NFC_DEP_PFB_DID_BIT;
 
-		memcpy(skb_put(skb, sizeof(ddev->did)), &ddev->did,
-		       sizeof(ddev->did));
+		skb_put_data(skb, &ddev->did, sizeof(ddev->did));
 	}
 
 	ddev->curr_nfc_dep_pni =
diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c
index 2b0f0ac498d2..8741ad47a6fb 100644
--- a/net/nfc/hci/core.c
+++ b/net/nfc/hci/core.c
@@ -878,9 +878,9 @@ static void nfc_hci_recv_from_llc(struct nfc_hci_dev *hdev, struct sk_buff *skb)
 
 		skb_queue_walk(&hdev->rx_hcp_frags, frag_skb) {
 			msg_len = frag_skb->len - NFC_HCI_HCP_PACKET_HEADER_LEN;
-			memcpy(skb_put(hcp_skb, msg_len),
-			       frag_skb->data + NFC_HCI_HCP_PACKET_HEADER_LEN,
-			       msg_len);
+			skb_put_data(hcp_skb,
+				     frag_skb->data + NFC_HCI_HCP_PACKET_HEADER_LEN,
+				     msg_len);
 		}
 
 		skb_queue_purge(&hdev->rx_hcp_frags);
diff --git a/net/nfc/llcp_commands.c b/net/nfc/llcp_commands.c
index c5959ce503e6..367d8c027101 100644
--- a/net/nfc/llcp_commands.c
+++ b/net/nfc/llcp_commands.c
@@ -298,7 +298,7 @@ static struct sk_buff *llcp_add_header(struct sk_buff *pdu,
 
 	pr_debug("header 0x%x 0x%x\n", header[0], header[1]);
 
-	memcpy(skb_put(pdu, LLCP_HEADER_SIZE), header, LLCP_HEADER_SIZE);
+	skb_put_data(pdu, header, LLCP_HEADER_SIZE);
 
 	return pdu;
 }
@@ -311,7 +311,7 @@ static struct sk_buff *llcp_add_tlv(struct sk_buff *pdu, u8 *tlv,
 	if (tlv == NULL)
 		return NULL;
 
-	memcpy(skb_put(pdu, tlv_length), tlv, tlv_length);
+	skb_put_data(pdu, tlv, tlv_length);
 
 	return pdu;
 }
@@ -549,7 +549,7 @@ int nfc_llcp_send_snl_sdres(struct nfc_llcp_local *local,
 		return PTR_ERR(skb);
 
 	hlist_for_each_entry_safe(sdp, n, tlv_list, node) {
-		memcpy(skb_put(skb, sdp->tlv_len), sdp->tlv, sdp->tlv_len);
+		skb_put_data(skb, sdp->tlv, sdp->tlv_len);
 
 		hlist_del(&sdp->node);
 
@@ -581,8 +581,7 @@ int nfc_llcp_send_snl_sdreq(struct nfc_llcp_local *local,
 	hlist_for_each_entry_safe(sdreq, n, tlv_list, node) {
 		pr_debug("tid %d for %s\n", sdreq->tid, sdreq->uri);
 
-		memcpy(skb_put(skb, sdreq->tlv_len), sdreq->tlv,
-		       sdreq->tlv_len);
+		skb_put_data(skb, sdreq->tlv, sdreq->tlv_len);
 
 		hlist_del(&sdreq->node);
 
@@ -622,7 +621,7 @@ int nfc_llcp_send_dm(struct nfc_llcp_local *local, u8 ssap, u8 dsap, u8 reason)
 
 	skb = llcp_add_header(skb, dsap, ssap, LLCP_PDU_DM);
 
-	memcpy(skb_put(skb, 1), &reason, 1);
+	skb_put_data(skb, &reason, 1);
 
 	skb_queue_head(&local->tx_queue, skb);
 
@@ -693,7 +692,7 @@ int nfc_llcp_send_i_frame(struct nfc_llcp_sock *sock,
 		skb_put(pdu, LLCP_SEQUENCE_SIZE);
 
 		if (likely(frag_len > 0))
-			memcpy(skb_put(pdu, frag_len), msg_ptr, frag_len);
+			skb_put_data(pdu, msg_ptr, frag_len);
 
 		skb_queue_tail(&sock->tx_queue, pdu);
 
@@ -759,7 +758,7 @@ int nfc_llcp_send_ui_frame(struct nfc_llcp_sock *sock, u8 ssap, u8 dsap,
 		pdu = llcp_add_header(pdu, dsap, ssap, LLCP_PDU_UI);
 
 		if (likely(frag_len > 0))
-			memcpy(skb_put(pdu, frag_len), msg_ptr, frag_len);
+			skb_put_data(pdu, msg_ptr, frag_len);
 
 		/* No need to check for the peer RW for UI frames */
 		skb_queue_tail(&local->tx_queue, pdu);
diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c
index e69786c6804c..02eef5cf3cce 100644
--- a/net/nfc/llcp_core.c
+++ b/net/nfc/llcp_core.c
@@ -1390,7 +1390,7 @@ static void nfc_llcp_recv_agf(struct nfc_llcp_local *local, struct sk_buff *skb)
 			return;
 		}
 
-		memcpy(skb_put(new_skb, pdu_len), skb->data, pdu_len);
+		skb_put_data(new_skb, skb->data, pdu_len);
 
 		nfc_llcp_rx_skb(local, new_skb);
 
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index 61fff422424f..17b9f1ce23db 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -462,7 +462,7 @@ int nci_nfcc_loopback(struct nci_dev *ndev, void *data, size_t data_len,
 		return -ENOMEM;
 
 	skb_reserve(skb, NCI_DATA_HDR_SIZE);
-	memcpy(skb_put(skb, data_len), data, data_len);
+	skb_put_data(skb, data, data_len);
 
 	loopback_data.conn_id = conn_id;
 	loopback_data.data = skb;
@@ -1350,7 +1350,7 @@ int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, void *payload)
 	nci_pbf_set((__u8 *)hdr, NCI_PBF_LAST);
 
 	if (plen)
-		memcpy(skb_put(skb, plen), payload, plen);
+		skb_put_data(skb, payload, plen);
 
 	skb_queue_tail(&ndev->cmd_q, skb);
 	queue_work(ndev->cmd_wq, &ndev->cmd_work);
diff --git a/net/nfc/nci/data.c b/net/nfc/nci/data.c
index dbd24254412a..2488d9241f1d 100644
--- a/net/nfc/nci/data.c
+++ b/net/nfc/nci/data.c
@@ -138,7 +138,7 @@ static int nci_queue_tx_data_frags(struct nci_dev *ndev,
 		skb_reserve(skb_frag, NCI_DATA_HDR_SIZE);
 
 		/* first, copy the data */
-		memcpy(skb_put(skb_frag, frag_len), data, frag_len);
+		skb_put_data(skb_frag, data, frag_len);
 
 		/* second, set the header */
 		nci_push_data_hdr(ndev, conn_id, skb_frag,
diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c
index a0ab26d535dc..d4a53ce818c3 100644
--- a/net/nfc/nci/hci.c
+++ b/net/nfc/nci/hci.c
@@ -187,7 +187,7 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe,
 		*skb_push(skb, 1) = cb;
 
 		if (len > 0)
-			memcpy(skb_put(skb, len), data + i, len);
+			skb_put_data(skb, data + i, len);
 
 		r = nci_send_data(ndev, conn_info->conn_id, skb);
 		if (r < 0)
@@ -476,8 +476,9 @@ void nci_hci_data_received_cb(void *context,
 
 		skb_queue_walk(&ndev->hci_dev->rx_hcp_frags, frag_skb) {
 			msg_len = frag_skb->len - NCI_HCI_HCP_PACKET_HEADER_LEN;
-			memcpy(skb_put(hcp_skb, msg_len), frag_skb->data +
-			       NCI_HCI_HCP_PACKET_HEADER_LEN, msg_len);
+			skb_put_data(hcp_skb,
+				     frag_skb->data + NCI_HCI_HCP_PACKET_HEADER_LEN,
+				     msg_len);
 		}
 
 		skb_queue_purge(&ndev->hci_dev->rx_hcp_frags);
diff --git a/net/nfc/nci/uart.c b/net/nfc/nci/uart.c
index c468eabd6943..cfa7f352c1c3 100644
--- a/net/nfc/nci/uart.c
+++ b/net/nfc/nci/uart.c
@@ -371,7 +371,7 @@ static int nci_uart_default_recv_buf(struct nci_uart *nu, const u8 *data,
 		chunk_len = nu->rx_packet_len - nu->rx_skb->len;
 		if (count < chunk_len)
 			chunk_len = count;
-		memcpy(skb_put(nu->rx_skb, chunk_len), data, chunk_len);
+		skb_put_data(nu->rx_skb, data, chunk_len);
 		data += chunk_len;
 		count -= chunk_len;
 
diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index 825f97671591..cff679167bdc 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -239,7 +239,7 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len)
 		return -ENOMEM;
 
 	skb_reset_transport_header(skb);
-	memcpy(skb_put(skb, len), data, len);
+	skb_put_data(skb, data, len);
 
 	skb_queue_tail(&node->rx_queue, skb);
 	schedule_work(&node->work);
diff --git a/net/sctp/output.c b/net/sctp/output.c
index c339c682675a..febcc350cf00 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -469,8 +469,7 @@ merge:
 				auth = (struct sctp_auth_chunk *)
 							skb_tail_pointer(nskb);
 
-			memcpy(skb_put(nskb, chunk->skb->len), chunk->skb->data,
-			       chunk->skb->len);
+			skb_put_data(nskb, chunk->skb->data, chunk->skb->len);
 
 			pr_debug("*** Chunk:%p[%s] %s 0x%x, length:%d, chunk->skb->len:%d, rtt_in_progress:%d\n",
 				 chunk,
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index aaac2660aaf7..034e916362cf 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1479,9 +1479,7 @@ void *sctp_addto_chunk(struct sctp_chunk *chunk, int len, const void *data)
 	int padlen = SCTP_PAD4(chunklen) - chunklen;
 
 	padding = skb_put_zero(chunk->skb, padlen);
-	target = skb_put(chunk->skb, len);
-
-	memcpy(target, data, len);
+	target = skb_put_data(chunk->skb, data, len);
 
 	/* Adjust the chunk length field.  */
 	chunk->chunk_hdr->length = htons(chunklen + padlen + len);
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 18e24793659f..24e2054bfbaf 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -132,12 +132,10 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque)
 		break;
 	}
 
-	t_hdr = skb_put(skb, sizeof(pkt->hdr));
-	memcpy(t_hdr, &pkt->hdr, sizeof(pkt->hdr));
+	t_hdr = skb_put_data(skb, &pkt->hdr, sizeof(pkt->hdr));
 
 	if (pkt->len) {
-		payload = skb_put(skb, pkt->len);
-		memcpy(payload, pkt->buf, pkt->len);
+		payload = skb_put_data(skb, pkt->buf, pkt->len);
 	}
 
 	return skb;
diff --git a/net/x25/x25_subr.c b/net/x25/x25_subr.c
index 6b5af65f491f..eb466ece1730 100644
--- a/net/x25/x25_subr.c
+++ b/net/x25/x25_subr.c
@@ -188,17 +188,14 @@ void x25_write_internal(struct sock *sk, int frametype)
 			*dptr++ = X25_CALL_REQUEST;
 			len     = x25_addr_aton(addresses, &x25->dest_addr,
 						&x25->source_addr);
-			dptr    = skb_put(skb, len);
-			memcpy(dptr, addresses, len);
+			dptr = skb_put_data(skb, addresses, len);
 			len     = x25_create_facilities(facilities,
 					&x25->facilities,
 					&x25->dte_facilities,
 					x25->neighbour->global_facil_mask);
-			dptr    = skb_put(skb, len);
-			memcpy(dptr, facilities, len);
-			dptr = skb_put(skb, x25->calluserdata.cudlength);
-			memcpy(dptr, x25->calluserdata.cuddata,
-			       x25->calluserdata.cudlength);
+			dptr = skb_put_data(skb, facilities, len);
+			dptr = skb_put_data(skb, x25->calluserdata.cuddata,
+					    x25->calluserdata.cudlength);
 			x25->calluserdata.cudlength = 0;
 			break;
 
@@ -210,17 +207,15 @@ void x25_write_internal(struct sock *sk, int frametype)
 							&x25->facilities,
 							&x25->dte_facilities,
 							x25->vc_facil_mask);
-			dptr    = skb_put(skb, len);
-			memcpy(dptr, facilities, len);
+			dptr = skb_put_data(skb, facilities, len);
 
 			/* fast select with no restriction on response
 				allows call user data. Userland must
 				ensure it is ours and not theirs */
 			if(x25->facilities.reverse & 0x80) {
-				dptr = skb_put(skb,
-					x25->calluserdata.cudlength);
-				memcpy(dptr, x25->calluserdata.cuddata,
-				       x25->calluserdata.cudlength);
+				dptr = skb_put_data(skb,
+						    x25->calluserdata.cuddata,
+						    x25->calluserdata.cudlength);
 			}
 			x25->calluserdata.cudlength = 0;
 			break;
-- 
cgit v1.2.3


From 4df864c1d9afb46e2461a9f808d9f11a42d31bad Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 16 Jun 2017 14:29:21 +0200
Subject: networking: make skb_put & friends return void pointers

It seems like a historic accident that these return unsigned char *,
and in many places that means casts are required, more often than not.

Make these functions (skb_put, __skb_put and pskb_put) return void *
and remove all the casts across the tree, adding a (u8 *) cast only
where the unsigned char pointer was used directly, all done with the
following spatch:

    @@
    expression SKB, LEN;
    typedef u8;
    identifier fn = { skb_put, __skb_put };
    @@
    - *(fn(SKB, LEN))
    + *(u8 *)fn(SKB, LEN)

    @@
    expression E, SKB, LEN;
    identifier fn = { skb_put, __skb_put };
    type T;
    @@
    - E = ((T *)(fn(SKB, LEN)))
    + E = fn(SKB, LEN)

which actually doesn't cover pskb_put since there are only three
users overall.

A handful of stragglers were converted manually, notably a macro in
drivers/isdn/i4l/isdn_bsdcomp.c and, oddly enough, one of the many
instances in net/bluetooth/hci_sock.c. In the former file, I also
had to fix one whitespace problem spatch introduced.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/atm/atmtcp.c                               |   4 +-
 drivers/atm/solos-pci.c                            |  12 +--
 drivers/bluetooth/bluecard_cs.c                    |   2 +-
 drivers/bluetooth/bt3c_cs.c                        |   2 +-
 drivers/bluetooth/btmrvl_main.c                    |   2 +-
 drivers/bluetooth/btuart_cs.c                      |   2 +-
 drivers/bluetooth/btusb.c                          |  10 +-
 drivers/bluetooth/dtl1_cs.c                        |   4 +-
 drivers/bluetooth/hci_bcm.c                        |   6 +-
 drivers/bluetooth/hci_intel.c                      |   6 +-
 drivers/bluetooth/hci_ll.c                         |   2 +-
 drivers/bluetooth/hci_nokia.c                      |  10 +-
 drivers/bluetooth/hci_qca.c                        |   2 +-
 drivers/bluetooth/hci_vhci.c                       |   4 +-
 drivers/crypto/chelsio/chcr_algo.c                 |  10 +-
 drivers/infiniband/core/addr.c                     |   3 +-
 drivers/infiniband/core/sa_query.c                 |   3 +-
 drivers/infiniband/hw/cxgb3/cxio_hal.c             |   2 +-
 drivers/infiniband/hw/cxgb3/iwch_cm.c              |  22 ++--
 drivers/infiniband/hw/cxgb4/cm.c                   |  22 ++--
 drivers/infiniband/hw/cxgb4/cq.c                   |   4 +-
 drivers/infiniband/hw/cxgb4/mem.c                  |   4 +-
 drivers/infiniband/hw/cxgb4/qp.c                   |   8 +-
 drivers/isdn/capi/capi.c                           |   4 +-
 drivers/isdn/gigaset/asyncdata.c                   |  26 ++---
 drivers/isdn/gigaset/isocdata.c                    |   2 +-
 drivers/isdn/i4l/isdn_audio.c                      |   4 +-
 drivers/isdn/i4l/isdn_bsdcomp.c                    |   8 +-
 drivers/isdn/i4l/isdn_x25iface.c                   |   4 +-
 drivers/media/dvb-core/dvb_net.c                   |   2 +-
 drivers/media/radio/wl128x/fmdrv_common.c          |   2 +-
 drivers/net/bonding/bond_3ad.c                     |   4 +-
 drivers/net/can/dev.c                              |   4 +-
 drivers/net/ethernet/allwinner/sun4i-emac.c        |   2 +-
 drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c    |  12 +--
 drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c |   4 +-
 drivers/net/ethernet/chelsio/cxgb3/l2t.c           |   2 +-
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c  |   4 +-
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c    |   8 +-
 drivers/net/ethernet/chelsio/cxgb4/l2t.c           |   2 +-
 drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h  |  10 +-
 drivers/net/ethernet/davicom/dm9000.c              |   2 +-
 drivers/net/ethernet/dnet.c                        |   2 +-
 drivers/net/ethernet/hp/hp100.c                    |   2 +-
 drivers/net/ethernet/intel/i40e/i40e_fcoe.c        |   2 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c      |   2 +-
 drivers/net/ethernet/mellanox/mlx4/en_selftest.c   |   4 +-
 .../net/ethernet/mellanox/mlx5/core/en_selftest.c  |   6 +-
 drivers/net/ethernet/micrel/ks8842.c               |   4 +-
 drivers/net/ethernet/sfc/falcon/selftest.c         |   3 +-
 drivers/net/ethernet/sfc/selftest.c                |   3 +-
 drivers/net/hamradio/scc.c                         |   4 +-
 drivers/net/ppp/pppoe.c                            |   2 +-
 drivers/net/usb/cdc_ncm.c                          |   2 +-
 drivers/net/usb/net1080.c                          |   4 +-
 drivers/net/usb/zaurus.c                           |   8 +-
 drivers/net/wan/hdlc_ppp.c                         |   2 +-
 drivers/net/wireless/ath/ath6kl/debug.c            |   2 +-
 drivers/net/wireless/ath/ath6kl/htc_pipe.c         |   6 +-
 drivers/net/wireless/ath/ath9k/htc_hst.c           |   9 +-
 drivers/net/wireless/ath/wil6210/wmi.c             |   2 +-
 drivers/net/wireless/cisco/airo.c                  |   4 +-
 drivers/net/wireless/intel/ipw2x00/ipw2200.c       |   2 +-
 drivers/net/wireless/intel/ipw2x00/libipw_tx.c     |   3 +-
 drivers/net/wireless/intersil/hostap/hostap_ap.c   |   2 +-
 drivers/net/wireless/intersil/p54/fwio.c           |  43 ++++----
 drivers/net/wireless/mac80211_hwsim.c              |   8 +-
 drivers/net/wireless/marvell/mwifiex/11n_aggr.c    |   2 +-
 drivers/net/wireless/marvell/mwifiex/tdls.c        |  38 +++----
 .../net/wireless/quantenna/qtnfmac/qlink_util.h    |  11 +-
 drivers/net/wireless/ralink/rt2x00/rt2x00debug.c   |   2 +-
 .../net/wireless/realtek/rtlwifi/rtl8192se/fw.c    |   2 +-
 drivers/nfc/fdp/i2c.c                              |   2 +-
 drivers/nfc/microread/i2c.c                        |   4 +-
 drivers/nfc/microread/microread.c                  |   4 +-
 drivers/nfc/nfcmrvl/fw_dnld.c                      |   6 +-
 drivers/nfc/pn533/pn533.c                          |  32 +++---
 drivers/nfc/pn544/i2c.c                            |   6 +-
 drivers/nfc/port100.c                              |   4 +-
 drivers/nfc/st21nfca/i2c.c                         |   6 +-
 drivers/nfc/st95hf/core.c                          |   2 +-
 drivers/scsi/bnx2fc/bnx2fc_fcoe.c                  |   2 +-
 drivers/scsi/fcoe/fcoe.c                           |   2 +-
 drivers/scsi/qedf/qedf_main.c                      |   2 +-
 drivers/staging/rtl8192e/rtl819x_BAProc.c          |  10 +-
 drivers/staging/rtl8192e/rtllib_softmac.c          |  34 +++---
 .../staging/rtl8192u/ieee80211/ieee80211_softmac.c |  21 ++--
 .../staging/rtl8192u/ieee80211/rtl819x_BAProc.c    |   8 +-
 drivers/target/iscsi/cxgbit/cxgbit_cm.c            |   8 +-
 drivers/target/iscsi/cxgbit/cxgbit_ddp.c           |   2 +-
 drivers/usb/gadget/function/f_ncm.c                |   5 +-
 include/linux/skbuff.h                             |   8 +-
 lib/nlattr.c                                       |   2 +-
 net/802/garp.c                                     |   6 +-
 net/802/mrp.c                                      |  11 +-
 net/appletalk/ddp.c                                |   2 +-
 net/atm/clip.c                                     |   2 +-
 net/batman-adv/icmp_socket.c                       |   2 +-
 net/batman-adv/tp_meter.c                          |   6 +-
 net/bluetooth/hci_request.c                        |   2 +-
 net/bluetooth/hci_sock.c                           |  12 +--
 net/bluetooth/hidp/core.c                          |   2 +-
 net/bluetooth/l2cap_core.c                         |  14 +--
 net/bluetooth/mgmt_util.c                          |  10 +-
 net/bluetooth/rfcomm/core.c                        |   2 +-
 net/core/pktgen.c                                  |  32 +++---
 net/core/skbuff.c                                  |   6 +-
 net/decnet/dn_dev.c                                |   2 +-
 net/decnet/dn_nsp_out.c                            |  18 ++--
 net/ipv4/arp.c                                     |   2 +-
 net/ipv4/igmp.c                                    |   6 +-
 net/ipv4/ipmr.c                                    |   2 +-
 net/ipv4/netfilter/ipt_SYNPROXY.c                  |  10 +-
 net/ipv4/netfilter/nf_reject_ipv4.c                |   2 +-
 net/ipv6/mcast.c                                   |   4 +-
 net/ipv6/ndisc.c                                   |   8 +-
 net/ipv6/netfilter/ip6t_SYNPROXY.c                 |  10 +-
 net/ipv6/netfilter/nf_reject_ipv6.c                |   2 +-
 net/irda/irlap_frame.c                             |  17 ++-
 net/key/af_key.c                                   | 116 +++++++++------------
 net/mac80211/cfg.c                                 |   4 +-
 net/mac80211/ht.c                                  |   2 +-
 net/mac80211/mesh.c                                |   2 +-
 net/mac80211/mesh_ps.c                             |   2 +-
 net/mac80211/sta_info.c                            |   2 +-
 net/mac80211/tdls.c                                |  10 +-
 net/mac80211/tx.c                                  |   2 +-
 net/mac80211/wpa.c                                 |   6 +-
 net/netfilter/nfnetlink_log.c                      |   2 +-
 net/netfilter/nfnetlink_queue.c                    |   2 +-
 net/netlink/af_netlink.c                           |   2 +-
 net/nfc/digital_core.c                             |   4 +-
 net/nfc/digital_dep.c                              |   2 +-
 net/nfc/digital_technology.c                       |  18 ++--
 net/nfc/hci/core.c                                 |   2 +-
 net/nfc/hci/llc_shdlc.c                            |   4 +-
 net/nfc/nci/core.c                                 |   2 +-
 net/nfc/nci/hci.c                                  |   2 +-
 net/nfc/nci/spi.c                                  |   8 +-
 net/nfc/nci/uart.c                                 |   2 +-
 net/psample/psample.c                              |   2 +-
 net/qrtr/qrtr.c                                    |   2 +-
 net/sctp/sm_make_chunk.c                           |   2 +-
 net/sctp/ulpevent.c                                |  29 ++----
 net/vmw_vsock/virtio_transport_common.c            |   2 +-
 145 files changed, 486 insertions(+), 547 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/atm/atmtcp.c b/drivers/atm/atmtcp.c
index 3ef6253e1cce..56fa16c85ebf 100644
--- a/drivers/atm/atmtcp.c
+++ b/drivers/atm/atmtcp.c
@@ -60,7 +60,7 @@ static int atmtcp_send_control(struct atm_vcc *vcc,int type,
 		return -EUNATCH;
 	}
 	atm_force_charge(out_vcc,skb->truesize);
-	new_msg = (struct atmtcp_control *) skb_put(skb,sizeof(*new_msg));
+	new_msg = skb_put(skb, sizeof(*new_msg));
 	*new_msg = *msg;
 	new_msg->hdr.length = ATMTCP_HDR_MAGIC;
 	new_msg->type = type;
@@ -217,7 +217,7 @@ static int atmtcp_v_send(struct atm_vcc *vcc,struct sk_buff *skb)
 		atomic_inc(&vcc->stats->tx_err);
 		return -ENOBUFS;
 	}
-	hdr = (void *) skb_put(new_skb,sizeof(struct atmtcp_hdr));
+	hdr = skb_put(new_skb, sizeof(struct atmtcp_hdr));
 	hdr->vpi = htons(vcc->vpi);
 	hdr->vci = htons(vcc->vci);
 	hdr->length = htonl(skb->len);
diff --git a/drivers/atm/solos-pci.c b/drivers/atm/solos-pci.c
index 077dd15c3a40..4fc99ae1c534 100644
--- a/drivers/atm/solos-pci.c
+++ b/drivers/atm/solos-pci.c
@@ -205,7 +205,7 @@ static ssize_t solos_param_show(struct device *dev, struct device_attribute *att
 		return -ENOMEM;
 	}
 
-	header = (void *)skb_put(skb, sizeof(*header));
+	header = skb_put(skb, sizeof(*header));
 
 	buflen = snprintf((void *)&header[1], buflen - 1,
 			  "L%05d\n%s\n", current->pid, attr->attr.name);
@@ -261,7 +261,7 @@ static ssize_t solos_param_store(struct device *dev, struct device_attribute *at
 		return -ENOMEM;
 	}
 
-	header = (void *)skb_put(skb, sizeof(*header));
+	header = skb_put(skb, sizeof(*header));
 
 	buflen = snprintf((void *)&header[1], buflen - 1,
 			  "L%05d\n%s\n%s\n", current->pid, attr->attr.name, buf);
@@ -486,7 +486,7 @@ static int send_command(struct solos_card *card, int dev, const char *buf, size_
 		return 0;
 	}
 
-	header = (void *)skb_put(skb, sizeof(*header));
+	header = skb_put(skb, sizeof(*header));
 
 	header->size = cpu_to_le16(size);
 	header->vpi = cpu_to_le16(0);
@@ -945,7 +945,7 @@ static int popen(struct atm_vcc *vcc)
 			dev_warn(&card->dev->dev, "Failed to allocate sk_buff in popen()\n");
 		return -ENOMEM;
 	}
-	header = (void *)skb_put(skb, sizeof(*header));
+	header = skb_put(skb, sizeof(*header));
 
 	header->size = cpu_to_le16(0);
 	header->vpi = cpu_to_le16(vcc->vpi);
@@ -982,7 +982,7 @@ static void pclose(struct atm_vcc *vcc)
 		dev_warn(&card->dev->dev, "Failed to allocate sk_buff in pclose()\n");
 		return;
 	}
-	header = (void *)skb_put(skb, sizeof(*header));
+	header = skb_put(skb, sizeof(*header));
 
 	header->size = cpu_to_le16(0);
 	header->vpi = cpu_to_le16(vcc->vpi);
@@ -1398,7 +1398,7 @@ static int atm_init(struct solos_card *card, struct device *parent)
 			continue;
 		}
 
-		header = (void *)skb_put(skb, sizeof(*header));
+		header = skb_put(skb, sizeof(*header));
 
 		header->size = cpu_to_le16(0);
 		header->vpi = cpu_to_le16(0);
diff --git a/drivers/bluetooth/bluecard_cs.c b/drivers/bluetooth/bluecard_cs.c
index 1d30c116b2ee..39a05b0c8998 100644
--- a/drivers/bluetooth/bluecard_cs.c
+++ b/drivers/bluetooth/bluecard_cs.c
@@ -448,7 +448,7 @@ static void bluecard_receive(struct bluecard_info *info,
 
 		} else {
 
-			*skb_put(info->rx_skb, 1) = buf[i];
+			*(u8 *)skb_put(info->rx_skb, 1) = buf[i];
 			info->rx_count--;
 
 			if (info->rx_count == 0) {
diff --git a/drivers/bluetooth/bt3c_cs.c b/drivers/bluetooth/bt3c_cs.c
index 8165ef2fe877..be2d431aa366 100644
--- a/drivers/bluetooth/bt3c_cs.c
+++ b/drivers/bluetooth/bt3c_cs.c
@@ -282,7 +282,7 @@ static void bt3c_receive(struct bt3c_info *info)
 
 			__u8 x = inb(iobase + DATA_L);
 
-			*skb_put(info->rx_skb, 1) = x;
+			*(u8 *)skb_put(info->rx_skb, 1) = x;
 			inb(iobase + DATA_H);
 			info->rx_count--;
 
diff --git a/drivers/bluetooth/btmrvl_main.c b/drivers/bluetooth/btmrvl_main.c
index 24a188eab360..8d3d9175d891 100644
--- a/drivers/bluetooth/btmrvl_main.c
+++ b/drivers/bluetooth/btmrvl_main.c
@@ -189,7 +189,7 @@ static int btmrvl_send_sync_cmd(struct btmrvl_private *priv, u16 opcode,
 		return -ENOMEM;
 	}
 
-	hdr = (struct hci_command_hdr *)skb_put(skb, HCI_COMMAND_HDR_SIZE);
+	hdr = skb_put(skb, HCI_COMMAND_HDR_SIZE);
 	hdr->opcode = cpu_to_le16(opcode);
 	hdr->plen = len;
 
diff --git a/drivers/bluetooth/btuart_cs.c b/drivers/bluetooth/btuart_cs.c
index 9624b29f8349..80b64e9684a3 100644
--- a/drivers/bluetooth/btuart_cs.c
+++ b/drivers/bluetooth/btuart_cs.c
@@ -233,7 +233,7 @@ static void btuart_receive(struct btuart_info *info)
 
 		} else {
 
-			*skb_put(info->rx_skb, 1) = inb(iobase + UART_RX);
+			*(u8 *)skb_put(info->rx_skb, 1) = inb(iobase + UART_RX);
 			info->rx_count--;
 
 			if (info->rx_count == 0) {
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index c7ea398e65c1..ba207c787605 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -1836,15 +1836,15 @@ static int inject_cmd_complete(struct hci_dev *hdev, __u16 opcode)
 	if (!skb)
 		return -ENOMEM;
 
-	hdr = (struct hci_event_hdr *)skb_put(skb, sizeof(*hdr));
+	hdr = skb_put(skb, sizeof(*hdr));
 	hdr->evt = HCI_EV_CMD_COMPLETE;
 	hdr->plen = sizeof(*evt) + 1;
 
-	evt = (struct hci_ev_cmd_complete *)skb_put(skb, sizeof(*evt));
+	evt = skb_put(skb, sizeof(*evt));
 	evt->ncmd = 0x01;
 	evt->opcode = cpu_to_le16(opcode);
 
-	*skb_put(skb, 1) = 0x00;
+	*(u8 *)skb_put(skb, 1) = 0x00;
 
 	hci_skb_pkt_type(skb) = HCI_EVENT_PKT;
 
@@ -2767,8 +2767,8 @@ static struct urb *alloc_diag_urb(struct hci_dev *hdev, bool enable)
 		return ERR_PTR(-ENOMEM);
 	}
 
-	*skb_put(skb, 1) = 0xf0;
-	*skb_put(skb, 1) = enable;
+	*(u8 *)skb_put(skb, 1) = 0xf0;
+	*(u8 *)skb_put(skb, 1) = enable;
 
 	pipe = usb_sndbulkpipe(data->udev, data->diag_tx_ep->bEndpointAddress);
 
diff --git a/drivers/bluetooth/dtl1_cs.c b/drivers/bluetooth/dtl1_cs.c
index 6317c6f323bf..6c5a3aa566a4 100644
--- a/drivers/bluetooth/dtl1_cs.c
+++ b/drivers/bluetooth/dtl1_cs.c
@@ -226,7 +226,7 @@ static void dtl1_receive(struct dtl1_info *info)
 			}
 		}
 
-		*skb_put(info->rx_skb, 1) = inb(iobase + UART_RX);
+		*(u8 *)skb_put(info->rx_skb, 1) = inb(iobase + UART_RX);
 		nsh = (struct nsh *)info->rx_skb->data;
 
 		info->rx_count--;
@@ -414,7 +414,7 @@ static int dtl1_hci_send_frame(struct hci_dev *hdev, struct sk_buff *skb)
 	skb_reserve(s, NSHL);
 	skb_copy_from_linear_data(skb, skb_put(s, skb->len), skb->len);
 	if (skb->len & 0x0001)
-		*skb_put(s, 1) = 0;	/* PAD */
+		*(u8 *)skb_put(s, 1) = 0;	/* PAD */
 
 	/* Prepend skb with Nokia frame header and queue */
 	memcpy(skb_push(s, NSHL), &nsh, NSHL);
diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c
index e2096c7803b3..c1c4048ee37d 100644
--- a/drivers/bluetooth/hci_bcm.c
+++ b/drivers/bluetooth/hci_bcm.c
@@ -262,9 +262,9 @@ static int bcm_set_diag(struct hci_dev *hdev, bool enable)
 	if (!skb)
 		return -ENOMEM;
 
-	*skb_put(skb, 1) = BCM_LM_DIAG_PKT;
-	*skb_put(skb, 1) = 0xf0;
-	*skb_put(skb, 1) = enable;
+	*(u8 *)skb_put(skb, 1) = BCM_LM_DIAG_PKT;
+	*(u8 *)skb_put(skb, 1) = 0xf0;
+	*(u8 *)skb_put(skb, 1) = enable;
 
 	skb_queue_tail(&bcm->txq, skb);
 	hci_uart_tx_wakeup(hu);
diff --git a/drivers/bluetooth/hci_intel.c b/drivers/bluetooth/hci_intel.c
index 16e728577cd8..ee97c465e32e 100644
--- a/drivers/bluetooth/hci_intel.c
+++ b/drivers/bluetooth/hci_intel.c
@@ -462,15 +462,15 @@ static int inject_cmd_complete(struct hci_dev *hdev, __u16 opcode)
 	if (!skb)
 		return -ENOMEM;
 
-	hdr = (struct hci_event_hdr *)skb_put(skb, sizeof(*hdr));
+	hdr = skb_put(skb, sizeof(*hdr));
 	hdr->evt = HCI_EV_CMD_COMPLETE;
 	hdr->plen = sizeof(*evt) + 1;
 
-	evt = (struct hci_ev_cmd_complete *)skb_put(skb, sizeof(*evt));
+	evt = skb_put(skb, sizeof(*evt));
 	evt->ncmd = 0x01;
 	evt->opcode = cpu_to_le16(opcode);
 
-	*skb_put(skb, 1) = 0x00;
+	*(u8 *)skb_put(skb, 1) = 0x00;
 
 	hci_skb_pkt_type(skb) = HCI_EVENT_PKT;
 
diff --git a/drivers/bluetooth/hci_ll.c b/drivers/bluetooth/hci_ll.c
index cc2fa78b434e..c982943f0747 100644
--- a/drivers/bluetooth/hci_ll.c
+++ b/drivers/bluetooth/hci_ll.c
@@ -120,7 +120,7 @@ static int send_hcill_cmd(u8 cmd, struct hci_uart *hu)
 	}
 
 	/* prepare packet */
-	hcill_packet = (struct hcill_cmd *) skb_put(skb, 1);
+	hcill_packet = skb_put(skb, 1);
 	hcill_packet->cmd = cmd;
 
 	/* send packet */
diff --git a/drivers/bluetooth/hci_nokia.c b/drivers/bluetooth/hci_nokia.c
index a7d687d8d456..c1b081725b2c 100644
--- a/drivers/bluetooth/hci_nokia.c
+++ b/drivers/bluetooth/hci_nokia.c
@@ -246,9 +246,9 @@ static int nokia_send_alive_packet(struct hci_uart *hu)
 	hci_skb_pkt_type(skb) = HCI_NOKIA_ALIVE_PKT;
 	memset(skb->data, 0x00, len);
 
-	hdr = (struct hci_nokia_alive_hdr *)skb_put(skb, sizeof(*hdr));
+	hdr = skb_put(skb, sizeof(*hdr));
 	hdr->dlen = sizeof(*pkt);
-	pkt = (struct hci_nokia_alive_pkt *)skb_put(skb, sizeof(*pkt));
+	pkt = skb_put(skb, sizeof(*pkt));
 	pkt->mid = NOKIA_ALIVE_REQ;
 
 	nokia_enqueue(hu, skb);
@@ -285,10 +285,10 @@ static int nokia_send_negotiation(struct hci_uart *hu)
 
 	hci_skb_pkt_type(skb) = HCI_NOKIA_NEG_PKT;
 
-	neg_hdr = (struct hci_nokia_neg_hdr *)skb_put(skb, sizeof(*neg_hdr));
+	neg_hdr = skb_put(skb, sizeof(*neg_hdr));
 	neg_hdr->dlen = sizeof(*neg_cmd);
 
-	neg_cmd = (struct hci_nokia_neg_cmd *)skb_put(skb, sizeof(*neg_cmd));
+	neg_cmd = skb_put(skb, sizeof(*neg_cmd));
 	neg_cmd->ack = NOKIA_NEG_REQ;
 	neg_cmd->baud = cpu_to_le16(baud);
 	neg_cmd->unused1 = 0x0000;
@@ -532,7 +532,7 @@ static int nokia_enqueue(struct hci_uart *hu, struct sk_buff *skb)
 		err = skb_pad(skb, 1);
 		if (err)
 			return err;
-		*skb_put(skb, 1) = 0x00;
+		*(u8 *)skb_put(skb, 1) = 0x00;
 	}
 
 	skb_queue_tail(&btdev->txq, skb);
diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
index b55f01320631..e2c88515340a 100644
--- a/drivers/bluetooth/hci_qca.c
+++ b/drivers/bluetooth/hci_qca.c
@@ -215,7 +215,7 @@ static int send_hci_ibs_cmd(u8 cmd, struct hci_uart *hu)
 	}
 
 	/* Assign HCI_IBS type */
-	*skb_put(skb, 1) = cmd;
+	*(u8 *)skb_put(skb, 1) = cmd;
 
 	skb_queue_tail(&qca->txq, skb);
 
diff --git a/drivers/bluetooth/hci_vhci.c b/drivers/bluetooth/hci_vhci.c
index 233e850fdac7..1ef9c427a2d8 100644
--- a/drivers/bluetooth/hci_vhci.c
+++ b/drivers/bluetooth/hci_vhci.c
@@ -146,8 +146,8 @@ static int __vhci_create_device(struct vhci_data *data, __u8 opcode)
 
 	hci_skb_pkt_type(skb) = HCI_VENDOR_PKT;
 
-	*skb_put(skb, 1) = 0xff;
-	*skb_put(skb, 1) = opcode;
+	*(u8 *)skb_put(skb, 1) = 0xff;
+	*(u8 *)skb_put(skb, 1) = opcode;
 	put_unaligned_le16(hdev->id, skb_put(skb, 2));
 	skb_queue_tail(&data->readq, skb);
 
diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c
index f00e0d8bd039..92185ab6797d 100644
--- a/drivers/crypto/chelsio/chcr_algo.c
+++ b/drivers/crypto/chelsio/chcr_algo.c
@@ -604,7 +604,7 @@ static struct sk_buff
 	if (!skb)
 		return ERR_PTR(-ENOMEM);
 	skb_reserve(skb, sizeof(struct sge_opaque_hdr));
-	chcr_req = (struct chcr_wr *)__skb_put(skb, transhdr_len);
+	chcr_req = __skb_put(skb, transhdr_len);
 	memset(chcr_req, 0, transhdr_len);
 	chcr_req->sec_cpl.op_ivinsrtofst =
 		FILL_SEC_CPL_OP_IVINSR(ctx->dev->rx_channel_id, 2, 1);
@@ -881,7 +881,7 @@ static struct sk_buff *create_hash_wr(struct ahash_request *req,
 		return skb;
 
 	skb_reserve(skb, sizeof(struct sge_opaque_hdr));
-	chcr_req = (struct chcr_wr *)__skb_put(skb, transhdr_len);
+	chcr_req = __skb_put(skb, transhdr_len);
 	memset(chcr_req, 0, transhdr_len);
 
 	chcr_req->sec_cpl.op_ivinsrtofst =
@@ -1447,7 +1447,7 @@ static struct sk_buff *create_authenc_wr(struct aead_request *req,
 	skb_reserve(skb, sizeof(struct sge_opaque_hdr));
 
 	/* Write WR */
-	chcr_req = (struct chcr_wr *) __skb_put(skb, transhdr_len);
+	chcr_req = __skb_put(skb, transhdr_len);
 	memset(chcr_req, 0, transhdr_len);
 
 	stop_offset = (op_type == CHCR_ENCRYPT_OP) ? 0 : authsize;
@@ -1779,7 +1779,7 @@ static struct sk_buff *create_aead_ccm_wr(struct aead_request *req,
 
 	skb_reserve(skb, sizeof(struct sge_opaque_hdr));
 
-	chcr_req = (struct chcr_wr *) __skb_put(skb, transhdr_len);
+	chcr_req = __skb_put(skb, transhdr_len);
 	memset(chcr_req, 0, transhdr_len);
 
 	fill_sec_cpl_for_aead(&chcr_req->sec_cpl, dst_size, req, op_type, ctx);
@@ -1892,7 +1892,7 @@ static struct sk_buff *create_gcm_wr(struct aead_request *req,
 	/* NIC driver is going to write the sge hdr. */
 	skb_reserve(skb, sizeof(struct sge_opaque_hdr));
 
-	chcr_req = (struct chcr_wr *)__skb_put(skb, transhdr_len);
+	chcr_req = __skb_put(skb, transhdr_len);
 	memset(chcr_req, 0, transhdr_len);
 
 	if (get_aead_subtype(tfm) == CRYPTO_ALG_SUB_TYPE_AEAD_RFC4106)
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 02971e239a18..d2957b38575f 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -179,8 +179,7 @@ static int ib_nl_ip_send_msg(struct rdma_dev_addr *dev_addr,
 	}
 
 	/* Construct the family header first */
-	header = (struct rdma_ls_ip_resolve_header *)
-		skb_put(skb, NLMSG_ALIGN(sizeof(*header)));
+	header = skb_put(skb, NLMSG_ALIGN(sizeof(*header)));
 	header->ifindex = dev_addr->bound_dev_if;
 	nla_put(skb, attrtype, size, daddr);
 
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index fb7aec4047c8..70fa4cabe48e 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -759,8 +759,7 @@ static void ib_nl_set_path_rec_attrs(struct sk_buff *skb,
 	query->mad_buf->context[1] = NULL;
 
 	/* Construct the family header first */
-	header = (struct rdma_ls_resolve_header *)
-		skb_put(skb, NLMSG_ALIGN(sizeof(*header)));
+	header = skb_put(skb, NLMSG_ALIGN(sizeof(*header)));
 	memcpy(header->device_name, query->port->agent->device->name,
 	       LS_DEVICE_NAME_MAX);
 	header->port_num = query->port->port_num;
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c
index 97f7f9544e70..3eff6541bd6f 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c
@@ -835,7 +835,7 @@ int cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr)
 	if (!skb)
 		return -ENOMEM;
 	pr_debug("%s rdev_p %p\n", __func__, rdev_p);
-	wqe = (struct t3_rdma_init_wr *) __skb_put(skb, sizeof(*wqe));
+	wqe = __skb_put(skb, sizeof(*wqe));
 	wqe->wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_INIT));
 	wqe->wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(attr->tid) |
 					   V_FW_RIWR_LEN(sizeof(*wqe) >> 3));
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index f4c23a74f18c..9ae518c01bc2 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -175,7 +175,7 @@ static void release_tid(struct t3cdev *tdev, u32 hwtid, struct sk_buff *skb)
 	skb = get_skb(skb, sizeof *req, GFP_KERNEL);
 	if (!skb)
 		return;
-	req = (struct cpl_tid_release *) skb_put(skb, sizeof(*req));
+	req = skb_put(skb, sizeof(*req));
 	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, hwtid));
 	skb->priority = CPL_PRIORITY_SETUP;
@@ -190,7 +190,7 @@ int iwch_quiesce_tid(struct iwch_ep *ep)
 
 	if (!skb)
 		return -ENOMEM;
-	req = (struct cpl_set_tcb_field *) skb_put(skb, sizeof(*req));
+	req = skb_put(skb, sizeof(*req));
 	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
 	req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid));
@@ -211,7 +211,7 @@ int iwch_resume_tid(struct iwch_ep *ep)
 
 	if (!skb)
 		return -ENOMEM;
-	req = (struct cpl_set_tcb_field *) skb_put(skb, sizeof(*req));
+	req = skb_put(skb, sizeof(*req));
 	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
 	req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid));
@@ -398,7 +398,7 @@ static int send_halfclose(struct iwch_ep *ep, gfp_t gfp)
 	}
 	skb->priority = CPL_PRIORITY_DATA;
 	set_arp_failure_handler(skb, arp_failure_discard);
-	req = (struct cpl_close_con_req *) skb_put(skb, sizeof(*req));
+	req = skb_put(skb, sizeof(*req));
 	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_CLOSE_CON));
 	req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, ep->hwtid));
@@ -455,7 +455,7 @@ static int send_connect(struct iwch_ep *ep)
 	skb->priority = CPL_PRIORITY_SETUP;
 	set_arp_failure_handler(skb, act_open_req_arp_failure);
 
-	req = (struct cpl_act_open_req *) skb_put(skb, sizeof(*req));
+	req = skb_put(skb, sizeof(*req));
 	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, ep->atid));
 	req->local_port = ep->com.local_addr.sin_port;
@@ -546,7 +546,7 @@ static int send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen)
 		return -ENOMEM;
 	}
 	skb_reserve(skb, sizeof(*req));
-	mpa = (struct mpa_message *) skb_put(skb, mpalen);
+	mpa = skb_put(skb, mpalen);
 	memset(mpa, 0, sizeof(*mpa));
 	memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
 	mpa->flags = MPA_REJECT;
@@ -596,7 +596,7 @@ static int send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen)
 	}
 	skb->priority = CPL_PRIORITY_DATA;
 	skb_reserve(skb, sizeof(*req));
-	mpa = (struct mpa_message *) skb_put(skb, mpalen);
+	mpa = skb_put(skb, mpalen);
 	memset(mpa, 0, sizeof(*mpa));
 	memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
 	mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) |
@@ -800,7 +800,7 @@ static int update_rx_credits(struct iwch_ep *ep, u32 credits)
 		return 0;
 	}
 
-	req = (struct cpl_rx_data_ack *) skb_put(skb, sizeof(*req));
+	req = skb_put(skb, sizeof(*req));
 	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, ep->hwtid));
 	req->credit_dack = htonl(V_RX_CREDITS(credits) | V_RX_FORCE_ACK(1));
@@ -1205,7 +1205,7 @@ static int listen_start(struct iwch_listen_ep *ep)
 		return -ENOMEM;
 	}
 
-	req = (struct cpl_pass_open_req *) skb_put(skb, sizeof(*req));
+	req = skb_put(skb, sizeof(*req));
 	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, ep->stid));
 	req->local_port = ep->com.local_addr.sin_port;
@@ -1246,7 +1246,7 @@ static int listen_stop(struct iwch_listen_ep *ep)
 		pr_err("%s - failed to alloc skb\n", __func__);
 		return -ENOMEM;
 	}
-	req = (struct cpl_close_listserv_req *) skb_put(skb, sizeof(*req));
+	req = skb_put(skb, sizeof(*req));
 	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
 	req->cpu_idx = 0;
 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ, ep->stid));
@@ -1614,7 +1614,7 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
 		goto out;
 	}
 	rpl_skb->priority = CPL_PRIORITY_DATA;
-	rpl = (struct cpl_abort_rpl *) skb_put(rpl_skb, sizeof(*rpl));
+	rpl = skb_put(rpl_skb, sizeof(*rpl));
 	rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_RPL));
 	rpl->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
 	OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid));
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 7c32a7c7977d..36ae3023e703 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -597,7 +597,7 @@ static int send_flowc(struct c4iw_ep *ep)
 	else
 		nparams = 9;
 
-	flowc = (struct fw_flowc_wr *)__skb_put(skb, FLOWC_LEN);
+	flowc = __skb_put(skb, FLOWC_LEN);
 
 	flowc->op_to_nparams = cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) |
 					   FW_FLOWC_WR_NPARAMS_V(nparams));
@@ -787,18 +787,16 @@ static int send_connect(struct c4iw_ep *ep)
 	if (ep->com.remote_addr.ss_family == AF_INET) {
 		switch (CHELSIO_CHIP_VERSION(adapter_type)) {
 		case CHELSIO_T4:
-			req = (struct cpl_act_open_req *)skb_put(skb, wrlen);
+			req = skb_put(skb, wrlen);
 			INIT_TP_WR(req, 0);
 			break;
 		case CHELSIO_T5:
-			t5req = (struct cpl_t5_act_open_req *)skb_put(skb,
-					wrlen);
+			t5req = skb_put(skb, wrlen);
 			INIT_TP_WR(t5req, 0);
 			req = (struct cpl_act_open_req *)t5req;
 			break;
 		case CHELSIO_T6:
-			t6req = (struct cpl_t6_act_open_req *)skb_put(skb,
-					wrlen);
+			t6req = skb_put(skb, wrlen);
 			INIT_TP_WR(t6req, 0);
 			req = (struct cpl_act_open_req *)t6req;
 			t5req = (struct cpl_t5_act_open_req *)t6req;
@@ -839,18 +837,16 @@ static int send_connect(struct c4iw_ep *ep)
 	} else {
 		switch (CHELSIO_CHIP_VERSION(adapter_type)) {
 		case CHELSIO_T4:
-			req6 = (struct cpl_act_open_req6 *)skb_put(skb, wrlen);
+			req6 = skb_put(skb, wrlen);
 			INIT_TP_WR(req6, 0);
 			break;
 		case CHELSIO_T5:
-			t5req6 = (struct cpl_t5_act_open_req6 *)skb_put(skb,
-					wrlen);
+			t5req6 = skb_put(skb, wrlen);
 			INIT_TP_WR(t5req6, 0);
 			req6 = (struct cpl_act_open_req6 *)t5req6;
 			break;
 		case CHELSIO_T6:
-			t6req6 = (struct cpl_t6_act_open_req6 *)skb_put(skb,
-					wrlen);
+			t6req6 = skb_put(skb, wrlen);
 			INIT_TP_WR(t6req6, 0);
 			req6 = (struct cpl_act_open_req6 *)t6req6;
 			t5req6 = (struct cpl_t5_act_open_req6 *)t6req6;
@@ -1904,7 +1900,7 @@ static int send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid)
 	int win;
 
 	skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
-	req = (struct fw_ofld_connection_wr *)__skb_put(skb, sizeof(*req));
+	req = __skb_put(skb, sizeof(*req));
 	memset(req, 0, sizeof(*req));
 	req->op_compl = htonl(WR_OP_V(FW_OFLD_CONNECTION_WR));
 	req->len16_pkd = htonl(FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*req), 16)));
@@ -3807,7 +3803,7 @@ static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb,
 	req_skb = alloc_skb(sizeof(struct fw_ofld_connection_wr), GFP_KERNEL);
 	if (!req_skb)
 		return;
-	req = (struct fw_ofld_connection_wr *)__skb_put(req_skb, sizeof(*req));
+	req = __skb_put(req_skb, sizeof(*req));
 	memset(req, 0, sizeof(*req));
 	req->op_compl = htonl(WR_OP_V(FW_OFLD_CONNECTION_WR) | FW_WR_COMPL_F);
 	req->len16_pkd = htonl(FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*req), 16)));
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index 14de5bde1b63..394cfe2625fe 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -44,7 +44,7 @@ static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
 	wr_len = sizeof *res_wr + sizeof *res;
 	set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
 
-	res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len);
+	res_wr = __skb_put(skb, wr_len);
 	memset(res_wr, 0, wr_len);
 	res_wr->op_nres = cpu_to_be32(
 			FW_WR_OP_V(FW_RI_RES_WR) |
@@ -114,7 +114,7 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
 	}
 	set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
 
-	res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len);
+	res_wr = __skb_put(skb, wr_len);
 	memset(res_wr, 0, wr_len);
 	res_wr->op_nres = cpu_to_be32(
 			FW_WR_OP_V(FW_RI_RES_WR) |
diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c
index 3ee7f43e419a..ca992e4b66e4 100644
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -81,7 +81,7 @@ static int _c4iw_write_mem_dma_aligned(struct c4iw_rdev *rdev, u32 addr,
 	}
 	set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
 
-	req = (struct ulp_mem_io *)__skb_put(skb, wr_len);
+	req = __skb_put(skb, wr_len);
 	memset(req, 0, wr_len);
 	INIT_ULPTX_WR(req, wr_len, 0, 0);
 	req->wr.wr_hi = cpu_to_be32(FW_WR_OP_V(FW_ULPTX_WR) |
@@ -142,7 +142,7 @@ static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len,
 		}
 		set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
 
-		req = (struct ulp_mem_io *)__skb_put(skb, wr_len);
+		req = __skb_put(skb, wr_len);
 		memset(req, 0, wr_len);
 		INIT_ULPTX_WR(req, wr_len, 0, 0);
 
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index 8e4154b4253e..b23a0b057347 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -293,7 +293,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
 	}
 	set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
 
-	res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len);
+	res_wr = __skb_put(skb, wr_len);
 	memset(res_wr, 0, wr_len);
 	res_wr->op_nres = cpu_to_be32(
 			FW_WR_OP_V(FW_RI_RES_WR) |
@@ -1228,7 +1228,7 @@ static void post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe,
 
 	set_wr_txq(skb, CPL_PRIORITY_DATA, qhp->ep->txq_idx);
 
-	wqe = (struct fw_ri_wr *)__skb_put(skb, sizeof(*wqe));
+	wqe = __skb_put(skb, sizeof(*wqe));
 	memset(wqe, 0, sizeof *wqe);
 	wqe->op_compl = cpu_to_be32(FW_WR_OP_V(FW_RI_INIT_WR));
 	wqe->flowid_len16 = cpu_to_be32(
@@ -1350,7 +1350,7 @@ static int rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
 
 	set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
 
-	wqe = (struct fw_ri_wr *)__skb_put(skb, sizeof(*wqe));
+	wqe = __skb_put(skb, sizeof(*wqe));
 	memset(wqe, 0, sizeof *wqe);
 	wqe->op_compl = cpu_to_be32(
 		FW_WR_OP_V(FW_RI_INIT_WR) |
@@ -1419,7 +1419,7 @@ static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp)
 	}
 	set_wr_txq(skb, CPL_PRIORITY_DATA, qhp->ep->txq_idx);
 
-	wqe = (struct fw_ri_wr *)__skb_put(skb, sizeof(*wqe));
+	wqe = __skb_put(skb, sizeof(*wqe));
 	memset(wqe, 0, sizeof *wqe);
 	wqe->op_compl = cpu_to_be32(
 		FW_WR_OP_V(FW_RI_INIT_WR) |
diff --git a/drivers/isdn/capi/capi.c b/drivers/isdn/capi/capi.c
index 77be17590866..96f586d34d2d 100644
--- a/drivers/isdn/capi/capi.c
+++ b/drivers/isdn/capi/capi.c
@@ -1082,7 +1082,7 @@ static int capinc_tty_put_char(struct tty_struct *tty, unsigned char ch)
 	skb = mp->outskb;
 	if (skb) {
 		if (skb_tailroom(skb) > 0) {
-			*(skb_put(skb, 1)) = ch;
+			*(u8 *)skb_put(skb, 1) = ch;
 			goto unlock_out;
 		}
 		mp->outskb = NULL;
@@ -1094,7 +1094,7 @@ static int capinc_tty_put_char(struct tty_struct *tty, unsigned char ch)
 	skb = alloc_skb(CAPI_DATA_B3_REQ_LEN + CAPI_MAX_BLKSIZE, GFP_ATOMIC);
 	if (skb) {
 		skb_reserve(skb, CAPI_DATA_B3_REQ_LEN);
-		*(skb_put(skb, 1)) = ch;
+		*(u8 *)skb_put(skb, 1) = ch;
 		mp->outskb = skb;
 	} else {
 		printk(KERN_ERR "capinc_put_char: char %u lost\n", ch);
diff --git a/drivers/isdn/gigaset/asyncdata.c b/drivers/isdn/gigaset/asyncdata.c
index c90dca5abeac..03ac9fbfe318 100644
--- a/drivers/isdn/gigaset/asyncdata.c
+++ b/drivers/isdn/gigaset/asyncdata.c
@@ -264,7 +264,7 @@ byte_stuff:
 				/* skip remainder of packet */
 				bcs->rx_skb = skb = NULL;
 			} else {
-				*__skb_put(skb, 1) = c;
+				*(u8 *)__skb_put(skb, 1) = c;
 				fcs = crc_ccitt_byte(fcs, c);
 			}
 		}
@@ -315,7 +315,7 @@ static unsigned iraw_loop(unsigned numbytes, struct inbuf_t *inbuf)
 
 		/* regular data byte: append to current skb */
 		inputstate |= INS_have_data;
-		*__skb_put(skb, 1) = bitrev8(c);
+		*(u8 *)__skb_put(skb, 1) = bitrev8(c);
 	}
 
 	/* pass data up */
@@ -492,33 +492,33 @@ static struct sk_buff *HDLC_Encode(struct sk_buff *skb)
 	hdlc_skb->mac_len = skb->mac_len;
 
 	/* Add flag sequence in front of everything.. */
-	*(skb_put(hdlc_skb, 1)) = PPP_FLAG;
+	*(u8 *)skb_put(hdlc_skb, 1) = PPP_FLAG;
 
 	/* Perform byte stuffing while copying data. */
 	while (skb->len--) {
 		if (muststuff(*skb->data)) {
-			*(skb_put(hdlc_skb, 1)) = PPP_ESCAPE;
-			*(skb_put(hdlc_skb, 1)) = (*skb->data++) ^ PPP_TRANS;
+			*(u8 *)skb_put(hdlc_skb, 1) = PPP_ESCAPE;
+			*(u8 *)skb_put(hdlc_skb, 1) = (*skb->data++) ^ PPP_TRANS;
 		} else
-			*(skb_put(hdlc_skb, 1)) = *skb->data++;
+			*(u8 *)skb_put(hdlc_skb, 1) = *skb->data++;
 	}
 
 	/* Finally add FCS (byte stuffed) and flag sequence */
 	c = (fcs & 0x00ff);	/* least significant byte first */
 	if (muststuff(c)) {
-		*(skb_put(hdlc_skb, 1)) = PPP_ESCAPE;
+		*(u8 *)skb_put(hdlc_skb, 1) = PPP_ESCAPE;
 		c ^= PPP_TRANS;
 	}
-	*(skb_put(hdlc_skb, 1)) = c;
+	*(u8 *)skb_put(hdlc_skb, 1) = c;
 
 	c = ((fcs >> 8) & 0x00ff);
 	if (muststuff(c)) {
-		*(skb_put(hdlc_skb, 1)) = PPP_ESCAPE;
+		*(u8 *)skb_put(hdlc_skb, 1) = PPP_ESCAPE;
 		c ^= PPP_TRANS;
 	}
-	*(skb_put(hdlc_skb, 1)) = c;
+	*(u8 *)skb_put(hdlc_skb, 1) = c;
 
-	*(skb_put(hdlc_skb, 1)) = PPP_FLAG;
+	*(u8 *)skb_put(hdlc_skb, 1) = PPP_FLAG;
 
 	dev_kfree_skb_any(skb);
 	return hdlc_skb;
@@ -561,8 +561,8 @@ static struct sk_buff *iraw_encode(struct sk_buff *skb)
 	while (len--) {
 		c = bitrev8(*cp++);
 		if (c == DLE_FLAG)
-			*(skb_put(iraw_skb, 1)) = c;
-		*(skb_put(iraw_skb, 1)) = c;
+			*(u8 *)skb_put(iraw_skb, 1) = c;
+		*(u8 *)skb_put(iraw_skb, 1) = c;
 	}
 	dev_kfree_skb_any(skb);
 	return iraw_skb;
diff --git a/drivers/isdn/gigaset/isocdata.c b/drivers/isdn/gigaset/isocdata.c
index bc29f1d52a2f..74e250664ce9 100644
--- a/drivers/isdn/gigaset/isocdata.c
+++ b/drivers/isdn/gigaset/isocdata.c
@@ -511,7 +511,7 @@ static inline void hdlc_putbyte(unsigned char c, struct bc_state *bcs)
 		bcs->rx_skb = NULL;
 		return;
 	}
-	*__skb_put(bcs->rx_skb, 1) = c;
+	*(u8 *)__skb_put(bcs->rx_skb, 1) = c;
 }
 
 /* hdlc_flush
diff --git a/drivers/isdn/i4l/isdn_audio.c b/drivers/isdn/i4l/isdn_audio.c
index 78ce42214713..b6bcd1eca128 100644
--- a/drivers/isdn/i4l/isdn_audio.c
+++ b/drivers/isdn/i4l/isdn_audio.c
@@ -462,7 +462,7 @@ isdn_audio_goertzel(int *sample, modem_info *info)
 		       info->line);
 		return;
 	}
-	result = (int *) skb_put(skb, sizeof(int) * NCOEFF);
+	result = skb_put(skb, sizeof(int) * NCOEFF);
 	for (k = 0; k < NCOEFF; k++) {
 		sk = sk1 = sk2 = 0;
 		for (n = 0; n < DTMF_NPOINTS; n++) {
@@ -672,7 +672,7 @@ isdn_audio_put_dle_code(modem_info *info, u_char code)
 		       info->line);
 		return;
 	}
-	p = (char *) skb_put(skb, 2);
+	p = skb_put(skb, 2);
 	p[0] = 0x10;
 	p[1] = code;
 	ISDN_AUDIO_SKB_DLECOUNT(skb) = 0;
diff --git a/drivers/isdn/i4l/isdn_bsdcomp.c b/drivers/isdn/i4l/isdn_bsdcomp.c
index 8837ac5a492d..6ade0916da4e 100644
--- a/drivers/isdn/i4l/isdn_bsdcomp.c
+++ b/drivers/isdn/i4l/isdn_bsdcomp.c
@@ -472,7 +472,7 @@ static int bsd_compress(void *state, struct sk_buff *skb_in, struct sk_buff *skb
 		accm |= ((ent) << bitno);				\
 		do	{						\
 			if (skb_out && skb_tailroom(skb_out) > 0)	\
-				*(skb_put(skb_out, 1)) = (unsigned char)(accm >> 24); \
+				*(u8 *)skb_put(skb_out, 1) = (u8)(accm >> 24); \
 			accm <<= 8;					\
 			bitno += 8;					\
 		} while (bitno <= 24);					\
@@ -602,7 +602,7 @@ static int bsd_compress(void *state, struct sk_buff *skb_in, struct sk_buff *skb
 	 * Do not emit a completely useless byte of ones.
 	 */
 	if (bitno < 32 && skb_out && skb_tailroom(skb_out) > 0)
-		*(skb_put(skb_out, 1)) = (unsigned char)((accm | (0xff << (bitno - 8))) >> 24);
+		*(u8 *)skb_put(skb_out, 1) = (unsigned char)((accm | (0xff << (bitno - 8))) >> 24);
 
 	/*
 	 * Increase code size if we would have without the packet
@@ -698,7 +698,7 @@ static int bsd_decompress(void *state, struct sk_buff *skb_in, struct sk_buff *s
 	db->bytes_out += ilen;
 
 	if (skb_tailroom(skb_out) > 0)
-		*(skb_put(skb_out, 1)) = 0;
+		*(u8 *)skb_put(skb_out, 1) = 0;
 	else
 		return DECOMP_ERR_NOMEM;
 
@@ -816,7 +816,7 @@ static int bsd_decompress(void *state, struct sk_buff *skb_in, struct sk_buff *s
 #endif
 
 		if (extra)		/* the KwKwK case again */
-			*(skb_put(skb_out, 1)) = finchar;
+			*(u8 *)skb_put(skb_out, 1) = finchar;
 
 		/*
 		 * If not first code in a packet, and
diff --git a/drivers/isdn/i4l/isdn_x25iface.c b/drivers/isdn/i4l/isdn_x25iface.c
index ba60076e0b95..e33fa3073f74 100644
--- a/drivers/isdn/i4l/isdn_x25iface.c
+++ b/drivers/isdn/i4l/isdn_x25iface.c
@@ -224,7 +224,7 @@ static int isdn_x25iface_connect_ind(struct concap_proto *cprot)
 
 	skb = dev_alloc_skb(1);
 	if (skb) {
-		*(skb_put(skb, 1)) = X25_IFACE_CONNECT;
+		*(u8 *)skb_put(skb, 1) = X25_IFACE_CONNECT;
 		skb->protocol = x25_type_trans(skb, cprot->net_dev);
 		netif_rx(skb);
 		return 0;
@@ -253,7 +253,7 @@ static int isdn_x25iface_disconn_ind(struct concap_proto *cprot)
 	*state_p = WAN_DISCONNECTED;
 	skb = dev_alloc_skb(1);
 	if (skb) {
-		*(skb_put(skb, 1)) = X25_IFACE_DISCONNECT;
+		*(u8 *)skb_put(skb, 1) = X25_IFACE_DISCONNECT;
 		skb->protocol = x25_type_trans(skb, cprot->net_dev);
 		netif_rx(skb);
 		return 0;
diff --git a/drivers/media/dvb-core/dvb_net.c b/drivers/media/dvb-core/dvb_net.c
index bbaf0a8cae8b..06b0dcc13695 100644
--- a/drivers/media/dvb-core/dvb_net.c
+++ b/drivers/media/dvb-core/dvb_net.c
@@ -963,7 +963,7 @@ static void dvb_net_sec(struct net_device *dev,
 	skb->dev = dev;
 
 	/* copy L3 payload */
-	eth = (u8 *) skb_put(skb, pkt_len - 12 - 4 + 14 - snap);
+	eth = skb_put(skb, pkt_len - 12 - 4 + 14 - snap);
 	memcpy(eth + 14, pkt + 12 + snap, pkt_len - 12 - 4 - snap);
 
 	/* create ethernet header: */
diff --git a/drivers/media/radio/wl128x/fmdrv_common.c b/drivers/media/radio/wl128x/fmdrv_common.c
index c67e055a12c9..ab3428bf63fe 100644
--- a/drivers/media/radio/wl128x/fmdrv_common.c
+++ b/drivers/media/radio/wl128x/fmdrv_common.c
@@ -416,7 +416,7 @@ static int fm_send_cmd(struct fmdev *fmdev, u8 fm_op, u16 type,	void *payload,
 	if (!test_bit(FM_FW_DW_INPROGRESS, &fmdev->flag) ||
 			test_bit(FM_INTTASK_RUNNING, &fmdev->flag)) {
 		/* Fill command header info */
-		hdr = (struct fm_cmd_msg_hdr *)skb_put(skb, FM_CMD_MSG_HDR_SIZE);
+		hdr = skb_put(skb, FM_CMD_MSG_HDR_SIZE);
 		hdr->hdr = FM_PKT_LOGICAL_CHAN_NUMBER;	/* 0x08 */
 
 		/* 3 (fm_opcode,rd_wr,dlen) + payload len) */
diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index 5427032aa05e..f43fb2f958a5 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -857,7 +857,7 @@ static int ad_lacpdu_send(struct port *port)
 	skb->protocol = PKT_TYPE_LACPDU;
 	skb->priority = TC_PRIO_CONTROL;
 
-	lacpdu_header = (struct lacpdu_header *)skb_put(skb, length);
+	lacpdu_header = skb_put(skb, length);
 
 	ether_addr_copy(lacpdu_header->hdr.h_dest, lacpdu_mcast_addr);
 	/* Note: source address is set to be the member's PERMANENT address,
@@ -899,7 +899,7 @@ static int ad_marker_send(struct port *port, struct bond_marker *marker)
 	skb->network_header = skb->mac_header + ETH_HLEN;
 	skb->protocol = PKT_TYPE_LACPDU;
 
-	marker_header = (struct bond_marker_header *)skb_put(skb, length);
+	marker_header = skb_put(skb, length);
 
 	ether_addr_copy(marker_header->hdr.h_dest, lacpdu_mcast_addr);
 	/* Note: source address is set to be the member's PERMANENT address,
diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index ae4ed03dc642..a3011c001080 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -648,7 +648,7 @@ struct sk_buff *alloc_can_skb(struct net_device *dev, struct can_frame **cf)
 	can_skb_prv(skb)->ifindex = dev->ifindex;
 	can_skb_prv(skb)->skbcnt = 0;
 
-	*cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame));
+	*cf = skb_put(skb, sizeof(struct can_frame));
 	memset(*cf, 0, sizeof(struct can_frame));
 
 	return skb;
@@ -677,7 +677,7 @@ struct sk_buff *alloc_canfd_skb(struct net_device *dev,
 	can_skb_prv(skb)->ifindex = dev->ifindex;
 	can_skb_prv(skb)->skbcnt = 0;
 
-	*cfd = (struct canfd_frame *)skb_put(skb, sizeof(struct canfd_frame));
+	*cfd = skb_put(skb, sizeof(struct canfd_frame));
 	memset(*cfd, 0, sizeof(struct canfd_frame));
 
 	return skb;
diff --git a/drivers/net/ethernet/allwinner/sun4i-emac.c b/drivers/net/ethernet/allwinner/sun4i-emac.c
index c8f4d26fc9d4..3143de45baaa 100644
--- a/drivers/net/ethernet/allwinner/sun4i-emac.c
+++ b/drivers/net/ethernet/allwinner/sun4i-emac.c
@@ -633,7 +633,7 @@ static void emac_rx(struct net_device *dev)
 			if (!skb)
 				continue;
 			skb_reserve(skb, 2);
-			rdptr = (u8 *) skb_put(skb, rxlen - 4);
+			rdptr = skb_put(skb, rxlen - 4);
 
 			/* Read received packet from RX SRAM */
 			if (netif_msg_rx_status(db))
diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
index 2ff6bd139c96..e1a50c87c9a9 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
@@ -471,7 +471,7 @@ static int init_tp_parity(struct adapter *adap)
 		if (!skb)
 			goto alloc_skb_fail;
 
-		req = (struct cpl_smt_write_req *)__skb_put(skb, sizeof(*req));
+		req = __skb_put(skb, sizeof(*req));
 		memset(req, 0, sizeof(*req));
 		req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, i));
@@ -495,7 +495,7 @@ static int init_tp_parity(struct adapter *adap)
 		if (!skb)
 			goto alloc_skb_fail;
 
-		req = (struct cpl_l2t_write_req *)__skb_put(skb, sizeof(*req));
+		req = __skb_put(skb, sizeof(*req));
 		memset(req, 0, sizeof(*req));
 		req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, i));
@@ -518,7 +518,7 @@ static int init_tp_parity(struct adapter *adap)
 		if (!skb)
 			goto alloc_skb_fail;
 
-		req = (struct cpl_rte_write_req *)__skb_put(skb, sizeof(*req));
+		req = __skb_put(skb, sizeof(*req));
 		memset(req, 0, sizeof(*req));
 		req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RTE_WRITE_REQ, i));
@@ -538,7 +538,7 @@ static int init_tp_parity(struct adapter *adap)
 	if (!skb)
 		goto alloc_skb_fail;
 
-	greq = (struct cpl_set_tcb_field *)__skb_put(skb, sizeof(*greq));
+	greq = __skb_put(skb, sizeof(*greq));
 	memset(greq, 0, sizeof(*greq));
 	greq->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
 	OPCODE_TID(greq) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, 0));
@@ -909,7 +909,7 @@ static int write_smt_entry(struct adapter *adapter, int idx)
 	if (!skb)
 		return -ENOMEM;
 
-	req = (struct cpl_smt_write_req *)__skb_put(skb, sizeof(*req));
+	req = __skb_put(skb, sizeof(*req));
 	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, idx));
 	req->mtu_idx = NMTUS - 1;	/* should be 0 but there's a T3 bug */
@@ -952,7 +952,7 @@ static int send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
 	if (!skb)
 		return -ENOMEM;
 
-	req = (struct mngt_pktsched_wr *)skb_put(skb, sizeof(*req));
+	req = skb_put(skb, sizeof(*req));
 	req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
 	req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
 	req->sched = sched;
diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c
index fa81445e334c..50cd660732c5 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c
@@ -552,7 +552,7 @@ static inline void mk_tid_release(struct sk_buff *skb, unsigned int tid)
 	struct cpl_tid_release *req;
 
 	skb->priority = CPL_PRIORITY_SETUP;
-	req = (struct cpl_tid_release *)__skb_put(skb, sizeof(*req));
+	req = __skb_put(skb, sizeof(*req));
 	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, tid));
 }
@@ -1096,7 +1096,7 @@ static void set_l2t_ix(struct t3cdev *tdev, u32 tid, struct l2t_entry *e)
 		return;
 	}
 	skb->priority = CPL_PRIORITY_CONTROL;
-	req = (struct cpl_set_tcb_field *)skb_put(skb, sizeof(*req));
+	req = skb_put(skb, sizeof(*req));
 	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid));
 	req->reply = 0;
diff --git a/drivers/net/ethernet/chelsio/cxgb3/l2t.c b/drivers/net/ethernet/chelsio/cxgb3/l2t.c
index 26264125865f..248e40c6966c 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/l2t.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/l2t.c
@@ -96,7 +96,7 @@ static int setup_l2e_send_pending(struct t3cdev *dev, struct sk_buff *skb,
 			return -ENOMEM;
 	}
 
-	req = (struct cpl_l2t_write_req *)__skb_put(skb, sizeof(*req));
+	req = __skb_put(skb, sizeof(*req));
 	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, e->idx));
 	req->params = htonl(V_L2T_W_IDX(e->idx) | V_L2T_W_IFF(e->smt_idx) |
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
index 10736738ff30..a0fab65e80e8 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
@@ -190,7 +190,7 @@ static int del_filter_wr(struct adapter *adapter, int fidx)
 	if (!skb)
 		return -ENOMEM;
 
-	fwr = (struct fw_filter_wr *)__skb_put(skb, len);
+	fwr = __skb_put(skb, len);
 	t4_mk_filtdelwr(f->tid, fwr, adapter->sge.fw_evtq.abs_id);
 
 	/* Mark the filter as "pending" and ship off the Filter Work Request.
@@ -231,7 +231,7 @@ int set_filter_wr(struct adapter *adapter, int fidx)
 		}
 	}
 
-	fwr = (struct fw_filter_wr *)__skb_put(skb, sizeof(*fwr));
+	fwr = __skb_put(skb, sizeof(*fwr));
 	memset(fwr, 0, sizeof(*fwr));
 
 	/* It would be nice to put most of the following in t4_hw.c but most
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 2c6de769f4e6..15fb284eafc0 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -1175,7 +1175,7 @@ static void mk_tid_release(struct sk_buff *skb, unsigned int chan,
 	struct cpl_tid_release *req;
 
 	set_wr_txq(skb, CPL_PRIORITY_SETUP, chan);
-	req = (struct cpl_tid_release *)__skb_put(skb, sizeof(*req));
+	req = __skb_put(skb, sizeof(*req));
 	INIT_TP_WR(req, tid);
 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, tid));
 }
@@ -1359,7 +1359,7 @@ int cxgb4_create_server(const struct net_device *dev, unsigned int stid,
 		return -ENOMEM;
 
 	adap = netdev2adap(dev);
-	req = (struct cpl_pass_open_req *)__skb_put(skb, sizeof(*req));
+	req = __skb_put(skb, sizeof(*req));
 	INIT_TP_WR(req, 0);
 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, stid));
 	req->local_port = sport;
@@ -1400,7 +1400,7 @@ int cxgb4_create_server6(const struct net_device *dev, unsigned int stid,
 		return -ENOMEM;
 
 	adap = netdev2adap(dev);
-	req = (struct cpl_pass_open_req6 *)__skb_put(skb, sizeof(*req));
+	req = __skb_put(skb, sizeof(*req));
 	INIT_TP_WR(req, 0);
 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ6, stid));
 	req->local_port = sport;
@@ -1432,7 +1432,7 @@ int cxgb4_remove_server(const struct net_device *dev, unsigned int stid,
 	if (!skb)
 		return -ENOMEM;
 
-	req = (struct cpl_close_listsvr_req *)__skb_put(skb, sizeof(*req));
+	req = __skb_put(skb, sizeof(*req));
 	INIT_TP_WR(req, 0);
 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ, stid));
 	req->reply_ctrl = htons(NO_REPLY_V(0) | (ipv6 ? LISTSVR_IPV6_V(1) :
diff --git a/drivers/net/ethernet/chelsio/cxgb4/l2t.c b/drivers/net/ethernet/chelsio/cxgb4/l2t.c
index 6f3692db29af..f7ef8871dd0b 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/l2t.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/l2t.c
@@ -146,7 +146,7 @@ static int write_l2e(struct adapter *adap, struct l2t_entry *e, int sync)
 	if (!skb)
 		return -ENOMEM;
 
-	req = (struct cpl_l2t_write_req *)__skb_put(skb, sizeof(*req));
+	req = __skb_put(skb, sizeof(*req));
 	INIT_TP_WR(req, 0);
 
 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ,
diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h
index 515b94ff9080..4b5aacc09cab 100644
--- a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h
+++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h
@@ -90,7 +90,7 @@ cxgb_mk_tid_release(struct sk_buff *skb, u32 len, u32 tid, u16 chan)
 {
 	struct cpl_tid_release *req;
 
-	req = (struct cpl_tid_release *)__skb_put(skb, len);
+	req = __skb_put(skb, len);
 	memset(req, 0, len);
 
 	INIT_TP_WR(req, tid);
@@ -104,7 +104,7 @@ cxgb_mk_close_con_req(struct sk_buff *skb, u32 len, u32 tid, u16 chan,
 {
 	struct cpl_close_con_req *req;
 
-	req = (struct cpl_close_con_req *)__skb_put(skb, len);
+	req = __skb_put(skb, len);
 	memset(req, 0, len);
 
 	INIT_TP_WR(req, tid);
@@ -119,7 +119,7 @@ cxgb_mk_abort_req(struct sk_buff *skb, u32 len, u32 tid, u16 chan,
 {
 	struct cpl_abort_req *req;
 
-	req = (struct cpl_abort_req *)__skb_put(skb, len);
+	req = __skb_put(skb, len);
 	memset(req, 0, len);
 
 	INIT_TP_WR(req, tid);
@@ -134,7 +134,7 @@ cxgb_mk_abort_rpl(struct sk_buff *skb, u32 len, u32 tid, u16 chan)
 {
 	struct cpl_abort_rpl *rpl;
 
-	rpl = (struct cpl_abort_rpl *)__skb_put(skb, len);
+	rpl = __skb_put(skb, len);
 	memset(rpl, 0, len);
 
 	INIT_TP_WR(rpl, tid);
@@ -149,7 +149,7 @@ cxgb_mk_rx_data_ack(struct sk_buff *skb, u32 len, u32 tid, u16 chan,
 {
 	struct cpl_rx_data_ack *req;
 
-	req = (struct cpl_rx_data_ack *)__skb_put(skb, len);
+	req = __skb_put(skb, len);
 	memset(req, 0, len);
 
 	INIT_TP_WR(req, tid);
diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c
index 008dc8161775..16fe776ddbe5 100644
--- a/drivers/net/ethernet/davicom/dm9000.c
+++ b/drivers/net/ethernet/davicom/dm9000.c
@@ -1171,7 +1171,7 @@ dm9000_rx(struct net_device *dev)
 		if (GoodPacket &&
 		    ((skb = netdev_alloc_skb(dev, RxLen + 4)) != NULL)) {
 			skb_reserve(skb, 2);
-			rdptr = (u8 *) skb_put(skb, RxLen - 4);
+			rdptr = skb_put(skb, RxLen - 4);
 
 			/* Read received packet from RX SRAM */
 
diff --git a/drivers/net/ethernet/dnet.c b/drivers/net/ethernet/dnet.c
index 3e77dd863175..5a847941c46b 100644
--- a/drivers/net/ethernet/dnet.c
+++ b/drivers/net/ethernet/dnet.c
@@ -399,7 +399,7 @@ static int dnet_poll(struct napi_struct *napi, int budget)
 			 * 'skb_put()' points to the start of sk_buff
 			 * data area.
 			 */
-			data_ptr = (unsigned int *)skb_put(skb, pkt_len);
+			data_ptr = skb_put(skb, pkt_len);
 			for (i = 0; i < (pkt_len + 3) >> 2; i++)
 				*data_ptr++ = dnet_readl(bp, RX_DATA_FIFO);
 			skb->protocol = eth_type_trans(skb, dev);
diff --git a/drivers/net/ethernet/hp/hp100.c b/drivers/net/ethernet/hp/hp100.c
index 5673b071e39d..c6164a98f257 100644
--- a/drivers/net/ethernet/hp/hp100.c
+++ b/drivers/net/ethernet/hp/hp100.c
@@ -1281,7 +1281,7 @@ static int hp100_build_rx_pdl(hp100_ring_t * ringptr,
 		 */
 		skb_reserve(ringptr->skb, 2);
 
-		ringptr->skb->data = (u_char *) skb_put(ringptr->skb, MAX_ETHER_SIZE);
+		ringptr->skb->data = skb_put(ringptr->skb, MAX_ETHER_SIZE);
 
 		/* ringptr->pdl points to the beginning of the PDL, i.e. the PDH */
 		/* Note: 1st Fragment is used for the 4 byte packet status
diff --git a/drivers/net/ethernet/intel/i40e/i40e_fcoe.c b/drivers/net/ethernet/intel/i40e/i40e_fcoe.c
index b077ef8b00fa..2d1253c5b7a1 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_fcoe.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_fcoe.c
@@ -762,7 +762,7 @@ int i40e_fcoe_handle_offload(struct i40e_ring *rx_ring,
 		    (fh->fh_r_ctl == FC_RCTL_DD_SOL_DATA)) {
 			struct fcoe_crc_eof *crc = NULL;
 
-			crc = (struct fcoe_crc_eof *)skb_put(skb, sizeof(*crc));
+			crc = skb_put(skb, sizeof(*crc));
 			crc->fcoe_eof = FC_EOF_T;
 		} else {
 			/* otherwise, drop the header only frame */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c
index 2a653ec954f5..a23c2b5411a0 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c
@@ -491,7 +491,7 @@ int ixgbe_fcoe_ddp(struct ixgbe_adapter *adapter,
 	if ((fh->fh_r_ctl == FC_RCTL_DD_SOL_DATA) &&
 	    (fctl & FC_FC_END_SEQ)) {
 		skb_linearize(skb);
-		crc = (struct fcoe_crc_eof *)skb_put(skb, sizeof(*crc));
+		crc = skb_put(skb, sizeof(*crc));
 		crc->fcoe_eof = FC_EOF_T;
 	}
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_selftest.c b/drivers/net/ethernet/mellanox/mlx4/en_selftest.c
index 17112faafbcc..88699b181946 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_selftest.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_selftest.c
@@ -63,8 +63,8 @@ static int mlx4_en_test_loopback_xmit(struct mlx4_en_priv *priv)
 
 	skb_reserve(skb, NET_IP_ALIGN);
 
-	ethh = (struct ethhdr *)skb_put(skb, sizeof(struct ethhdr));
-	packet	= (unsigned char *)skb_put(skb, packet_size);
+	ethh = skb_put(skb, sizeof(struct ethhdr));
+	packet = skb_put(skb, packet_size);
 	memcpy(ethh->h_dest, priv->dev->dev_addr, ETH_ALEN);
 	eth_zero_addr(ethh->h_source);
 	ethh->h_proto = htons(ETH_P_ARP);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
index 601abf240d63..c456ca07b562 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
@@ -136,10 +136,10 @@ static struct sk_buff *mlx5e_test_get_udp_skb(struct mlx5e_priv *priv)
 	skb_reset_mac_header(skb);
 
 	skb_set_network_header(skb, skb->len);
-	iph = (struct iphdr *)skb_put(skb, sizeof(struct iphdr));
+	iph = skb_put(skb, sizeof(struct iphdr));
 
 	skb_set_transport_header(skb, skb->len);
-	udph = (struct udphdr *)skb_put(skb, sizeof(struct udphdr));
+	udph = skb_put(skb, sizeof(struct udphdr));
 
 	/* Fill ETH header */
 	ether_addr_copy(ethh->h_dest, priv->netdev->dev_addr);
@@ -167,7 +167,7 @@ static struct sk_buff *mlx5e_test_get_udp_skb(struct mlx5e_priv *priv)
 	ip_send_check(iph);
 
 	/* Fill test header and data */
-	mlxh = (struct mlx5ehdr *)skb_put(skb, sizeof(*mlxh));
+	mlxh = skb_put(skb, sizeof(*mlxh));
 	mlxh->version = 0;
 	mlxh->magic = cpu_to_be64(MLX5E_TEST_MAGIC);
 	strlcpy(mlxh->text, mlx5e_test_text, sizeof(mlxh->text));
diff --git a/drivers/net/ethernet/micrel/ks8842.c b/drivers/net/ethernet/micrel/ks8842.c
index cb0102dd7f70..e3d7c74d47bb 100644
--- a/drivers/net/ethernet/micrel/ks8842.c
+++ b/drivers/net/ethernet/micrel/ks8842.c
@@ -669,7 +669,7 @@ static void ks8842_rx_frame(struct net_device *netdev,
 			ks8842_update_rx_counters(netdev, status, len);
 
 			if (adapter->conf_flags & KS884X_16BIT) {
-				u16 *data16 = (u16 *)skb_put(skb, len);
+				u16 *data16 = skb_put(skb, len);
 				ks8842_select_bank(adapter, 17);
 				while (len > 0) {
 					*data16++ = ioread16(adapter->hw_addr +
@@ -679,7 +679,7 @@ static void ks8842_rx_frame(struct net_device *netdev,
 					len -= sizeof(u32);
 				}
 			} else {
-				u32 *data = (u32 *)skb_put(skb, len);
+				u32 *data = skb_put(skb, len);
 
 				ks8842_select_bank(adapter, 17);
 				while (len > 0) {
diff --git a/drivers/net/ethernet/sfc/falcon/selftest.c b/drivers/net/ethernet/sfc/falcon/selftest.c
index 92bc34c91547..55c0fbbc4fb8 100644
--- a/drivers/net/ethernet/sfc/falcon/selftest.c
+++ b/drivers/net/ethernet/sfc/falcon/selftest.c
@@ -431,8 +431,7 @@ static int ef4_begin_loopback(struct ef4_tx_queue *tx_queue)
 
 		/* Copy the payload in, incrementing the source address to
 		 * exercise the rss vectors */
-		payload = ((struct ef4_loopback_payload *)
-			   skb_put(skb, sizeof(state->payload)));
+		payload = skb_put(skb, sizeof(state->payload));
 		memcpy(payload, &state->payload, sizeof(state->payload));
 		payload->ip.saddr = htonl(INADDR_LOOPBACK | (i << 2));
 
diff --git a/drivers/net/ethernet/sfc/selftest.c b/drivers/net/ethernet/sfc/selftest.c
index dab286a337a6..f6936949fc85 100644
--- a/drivers/net/ethernet/sfc/selftest.c
+++ b/drivers/net/ethernet/sfc/selftest.c
@@ -431,8 +431,7 @@ static int efx_begin_loopback(struct efx_tx_queue *tx_queue)
 
 		/* Copy the payload in, incrementing the source address to
 		 * exercise the rss vectors */
-		payload = ((struct efx_loopback_payload *)
-			   skb_put(skb, sizeof(state->payload)));
+		payload = skb_put(skb, sizeof(state->payload));
 		memcpy(payload, &state->payload, sizeof(state->payload));
 		payload->ip.saddr = htonl(INADDR_LOOPBACK | (i << 2));
 
diff --git a/drivers/net/hamradio/scc.c b/drivers/net/hamradio/scc.c
index 6754cd01c605..140a209f22ab 100644
--- a/drivers/net/hamradio/scc.c
+++ b/drivers/net/hamradio/scc.c
@@ -540,7 +540,7 @@ static inline void scc_rxint(struct scc_channel *scc)
 		}
 		
 		scc->rx_buff = skb;
-		*(skb_put(skb, 1)) = 0;	/* KISS data */
+		*(u8 *)skb_put(skb, 1) = 0;	/* KISS data */
 	}
 	
 	if (skb->len >= scc->stat.bufsize)
@@ -555,7 +555,7 @@ static inline void scc_rxint(struct scc_channel *scc)
 		return;
 	}
 
-	*(skb_put(skb, 1)) = Inb(scc->data);
+	*(u8 *)skb_put(skb, 1) = Inb(scc->data);
 }
 
 
diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
index d7e405268983..4e1da1645b15 100644
--- a/drivers/net/ppp/pppoe.c
+++ b/drivers/net/ppp/pppoe.c
@@ -877,7 +877,7 @@ static int pppoe_sendmsg(struct socket *sock, struct msghdr *m,
 	skb->priority = sk->sk_priority;
 	skb->protocol = cpu_to_be16(ETH_P_PPP_SES);
 
-	ph = (struct pppoe_hdr *)skb_put(skb, total_len + sizeof(struct pppoe_hdr));
+	ph = skb_put(skb, total_len + sizeof(struct pppoe_hdr));
 	start = (char *)&ph->tag[0];
 
 	error = memcpy_from_msg(start, m, total_len);
diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index 8a4c8a1b9dd3..4d4837a0645b 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -1250,7 +1250,7 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
 		skb_put_zero(skb_out, padding_count);
 	} else if (skb_out->len < ctx->tx_max &&
 		   (skb_out->len % dev->maxpacket) == 0) {
-		*skb_put(skb_out, 1) = 0;	/* force short packet */
+		*(u8 *)skb_put(skb_out, 1) = 0;	/* force short packet */
 	}
 
 	/* set final frame length */
diff --git a/drivers/net/usb/net1080.c b/drivers/net/usb/net1080.c
index 3202c19df83d..861ff45f0b09 100644
--- a/drivers/net/usb/net1080.c
+++ b/drivers/net/usb/net1080.c
@@ -473,8 +473,8 @@ encapsulate:
 
 	/* maybe pad; then trailer */
 	if (!((skb->len + sizeof *trailer) & 0x01))
-		*skb_put(skb, 1) = PAD_BYTE;
-	trailer = (struct nc_trailer *) skb_put(skb, sizeof *trailer);
+		*(u8 *)skb_put(skb, 1) = PAD_BYTE;
+	trailer = skb_put(skb, sizeof *trailer);
 	put_unaligned(header->packet_id, &trailer->packet_id);
 #if 0
 	netdev_dbg(dev->net, "frame >tx h %d p %d id %d\n",
diff --git a/drivers/net/usb/zaurus.c b/drivers/net/usb/zaurus.c
index 6aaa6eb9df72..dc3cd03763af 100644
--- a/drivers/net/usb/zaurus.c
+++ b/drivers/net/usb/zaurus.c
@@ -74,10 +74,10 @@ done:
 		fcs = crc32_le(~0, skb->data, skb->len);
 		fcs = ~fcs;
 
-		*skb_put (skb, 1) = fcs       & 0xff;
-		*skb_put (skb, 1) = (fcs>> 8) & 0xff;
-		*skb_put (skb, 1) = (fcs>>16) & 0xff;
-		*skb_put (skb, 1) = (fcs>>24) & 0xff;
+		*(u8 *)skb_put(skb, 1) = fcs       & 0xff;
+		*(u8 *)skb_put(skb, 1) = (fcs>> 8) & 0xff;
+		*(u8 *)skb_put(skb, 1) = (fcs>>16) & 0xff;
+		*(u8 *)skb_put(skb, 1) = (fcs>>24) & 0xff;
 	}
 	return skb;
 }
diff --git a/drivers/net/wan/hdlc_ppp.c b/drivers/net/wan/hdlc_ppp.c
index f5b4ad45831a..fa3460a0dbbe 100644
--- a/drivers/net/wan/hdlc_ppp.c
+++ b/drivers/net/wan/hdlc_ppp.c
@@ -228,7 +228,7 @@ static void ppp_tx_cp(struct net_device *dev, u16 pid, u8 code,
 	}
 	skb_reserve(skb, sizeof(struct hdlc_header));
 
-	cp = (struct cp_header *)skb_put(skb, sizeof(struct cp_header));
+	cp = skb_put(skb, sizeof(struct cp_header));
 	cp->code = code;
 	cp->id = id;
 	cp->len = htons(sizeof(struct cp_header) + magic_len + len);
diff --git a/drivers/net/wireless/ath/ath6kl/debug.c b/drivers/net/wireless/ath/ath6kl/debug.c
index e2b7809d7886..1eea6c23976f 100644
--- a/drivers/net/wireless/ath/ath6kl/debug.c
+++ b/drivers/net/wireless/ath/ath6kl/debug.c
@@ -348,7 +348,7 @@ void ath6kl_debug_fwlog_event(struct ath6kl *ar, const void *buf, size_t len)
 	if (!skb)
 		return;
 
-	slot = (struct ath6kl_fwlog_slot *) skb_put(skb, slot_len);
+	slot = skb_put(skb, slot_len);
 	slot->timestamp = cpu_to_le32(jiffies);
 	slot->length = cpu_to_le32(len);
 	memcpy(slot->payload, buf, len);
diff --git a/drivers/net/wireless/ath/ath6kl/htc_pipe.c b/drivers/net/wireless/ath/ath6kl/htc_pipe.c
index d127a08d60df..b13d61111072 100644
--- a/drivers/net/wireless/ath/ath6kl/htc_pipe.c
+++ b/drivers/net/wireless/ath/ath6kl/htc_pipe.c
@@ -1274,8 +1274,7 @@ static int ath6kl_htc_pipe_conn_service(struct htc_target *target,
 		length = sizeof(struct htc_conn_service_msg);
 
 		/* assemble connect service message */
-		conn_msg = (struct htc_conn_service_msg *) skb_put(skb,
-								   length);
+		conn_msg = skb_put(skb, length);
 		if (conn_msg == NULL) {
 			WARN_ON_ONCE(1);
 			status = -EINVAL;
@@ -1504,8 +1503,7 @@ static int ath6kl_htc_pipe_start(struct htc_target *target)
 	skb = packet->skb;
 
 	/* assemble setup complete message */
-	setup = (struct htc_setup_comp_ext_msg *) skb_put(skb,
-							  sizeof(*setup));
+	setup = skb_put(skb, sizeof(*setup));
 	memset(setup, 0, sizeof(struct htc_setup_comp_ext_msg));
 	setup->msg_id = cpu_to_le16(HTC_MSG_SETUP_COMPLETE_EX_ID);
 
diff --git a/drivers/net/wireless/ath/ath9k/htc_hst.c b/drivers/net/wireless/ath/ath9k/htc_hst.c
index 8e6dae23669b..9fa8970a1f7d 100644
--- a/drivers/net/wireless/ath/ath9k/htc_hst.c
+++ b/drivers/net/wireless/ath/ath9k/htc_hst.c
@@ -156,8 +156,7 @@ static int htc_config_pipe_credits(struct htc_target *target)
 	}
 	skb_reserve(skb, sizeof(struct htc_frame_hdr));
 
-	cp_msg = (struct htc_config_pipe_msg *)
-		skb_put(skb, sizeof(struct htc_config_pipe_msg));
+	cp_msg = skb_put(skb, sizeof(struct htc_config_pipe_msg));
 
 	cp_msg->message_id = cpu_to_be16(HTC_MSG_CONFIG_PIPE_ID);
 	cp_msg->pipe_id = USB_WLAN_TX_PIPE;
@@ -195,8 +194,7 @@ static int htc_setup_complete(struct htc_target *target)
 	}
 	skb_reserve(skb, sizeof(struct htc_frame_hdr));
 
-	comp_msg = (struct htc_comp_msg *)
-		skb_put(skb, sizeof(struct htc_comp_msg));
+	comp_msg = skb_put(skb, sizeof(struct htc_comp_msg));
 	comp_msg->msg_id = cpu_to_be16(HTC_MSG_SETUP_COMPLETE_ID);
 
 	target->htc_flags |= HTC_OP_START_WAIT;
@@ -265,8 +263,7 @@ int htc_connect_service(struct htc_target *target,
 
 	skb_reserve(skb, sizeof(struct htc_frame_hdr));
 
-	conn_msg = (struct htc_conn_svc_msg *)
-			skb_put(skb, sizeof(struct htc_conn_svc_msg));
+	conn_msg = skb_put(skb, sizeof(struct htc_conn_svc_msg));
 	conn_msg->service_id = cpu_to_be16(service_connreq->service_id);
 	conn_msg->msg_id = cpu_to_be16(HTC_MSG_CONNECT_SERVICE_ID);
 	conn_msg->con_flags = cpu_to_be16(service_connreq->con_flags);
diff --git a/drivers/net/wireless/ath/wil6210/wmi.c b/drivers/net/wireless/ath/wil6210/wmi.c
index cff9c585972f..0a5020f31de1 100644
--- a/drivers/net/wireless/ath/wil6210/wmi.c
+++ b/drivers/net/wireless/ath/wil6210/wmi.c
@@ -677,7 +677,7 @@ static void wmi_evt_eapol_rx(struct wil6210_priv *wil, int id,
 		return;
 	}
 
-	eth = (struct ethhdr *)skb_put(skb, ETH_HLEN);
+	eth = skb_put(skb, ETH_HLEN);
 	ether_addr_copy(eth->h_dest, ndev->dev_addr);
 	ether_addr_copy(eth->h_source, evt->src_mac);
 	eth->h_proto = cpu_to_be16(ETH_P_PAE);
diff --git a/drivers/net/wireless/cisco/airo.c b/drivers/net/wireless/cisco/airo.c
index 1b7e125a28e2..4623155ec36e 100644
--- a/drivers/net/wireless/cisco/airo.c
+++ b/drivers/net/wireless/cisco/airo.c
@@ -3330,7 +3330,7 @@ static void airo_handle_rx(struct airo_info *ai)
 	}
 
 	skb_reserve(skb, 2); /* This way the IP header is aligned */
-	buffer = (__le16 *) skb_put(skb, len + hdrlen);
+	buffer = skb_put(skb, len + hdrlen);
 	if (test_bit(FLAG_802_11, &ai->flags)) {
 		buffer[0] = fc;
 		bap_read(ai, buffer + 1, hdrlen - 2, BAP0);
@@ -3734,7 +3734,7 @@ static void mpi_receive_802_11(struct airo_info *ai)
 		ai->dev->stats.rx_dropped++;
 		goto badrx;
 	}
-	buffer = (u16*)skb_put (skb, len + hdrlen);
+	buffer = skb_put(skb, len + hdrlen);
 	memcpy ((char *)buffer, ptr, hdrlen);
 	ptr += hdrlen;
 	if (hdrlen == 24)
diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2200.c b/drivers/net/wireless/intel/ipw2x00/ipw2200.c
index e0c690b48d4e..5e4ce4abd62e 100644
--- a/drivers/net/wireless/intel/ipw2x00/ipw2200.c
+++ b/drivers/net/wireless/intel/ipw2x00/ipw2200.c
@@ -10371,7 +10371,7 @@ static void ipw_handle_promiscuous_tx(struct ipw_priv *priv,
 		if (!dst)
 			continue;
 
-		rt_hdr = (void *)skb_put(dst, sizeof(*rt_hdr));
+		rt_hdr = skb_put(dst, sizeof(*rt_hdr));
 
 		rt_hdr->it_version = PKTHDR_RADIOTAP_VERSION;
 		rt_hdr->it_pad = 0;
diff --git a/drivers/net/wireless/intel/ipw2x00/libipw_tx.c b/drivers/net/wireless/intel/ipw2x00/libipw_tx.c
index 5339d1eeb2f7..84205aa508df 100644
--- a/drivers/net/wireless/intel/ipw2x00/libipw_tx.c
+++ b/drivers/net/wireless/intel/ipw2x00/libipw_tx.c
@@ -439,8 +439,7 @@ netdev_tx_t libipw_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	if (rts_required) {
 		skb_frag = txb->fragments[0];
-		frag_hdr =
-		    (struct libipw_hdr_3addrqos *)skb_put(skb_frag, hdr_len);
+		frag_hdr = skb_put(skb_frag, hdr_len);
 
 		/*
 		 * Set header frame_ctl to the RTS.
diff --git a/drivers/net/wireless/intersil/hostap/hostap_ap.c b/drivers/net/wireless/intersil/hostap/hostap_ap.c
index 91757defb9be..eb9cd6fa9c4d 100644
--- a/drivers/net/wireless/intersil/hostap/hostap_ap.c
+++ b/drivers/net/wireless/intersil/hostap/hostap_ap.c
@@ -2361,7 +2361,7 @@ static void schedule_packet_send(local_info_t *local, struct sta_info *sta)
 		return;
 	}
 
-	hdr = (struct ieee80211_hdr *) skb_put(skb, 16);
+	hdr = skb_put(skb, 16);
 
 	/* Generate a fake pspoll frame to start packet delivery */
 	hdr->frame_control = cpu_to_le16(
diff --git a/drivers/net/wireless/intersil/p54/fwio.c b/drivers/net/wireless/intersil/p54/fwio.c
index 3076f646c829..52c095c7765f 100644
--- a/drivers/net/wireless/intersil/p54/fwio.c
+++ b/drivers/net/wireless/intersil/p54/fwio.c
@@ -206,7 +206,7 @@ static struct sk_buff *p54_alloc_skb(struct p54_common *priv, u16 hdr_flags,
 		return NULL;
 	skb_reserve(skb, priv->tx_hdr_len);
 
-	hdr = (struct p54_hdr *) skb_put(skb, sizeof(*hdr));
+	hdr = skb_put(skb, sizeof(*hdr));
 	hdr->flags = cpu_to_le16(hdr_flags);
 	hdr->len = cpu_to_le16(payload_len);
 	hdr->type = cpu_to_le16(type);
@@ -236,8 +236,7 @@ int p54_download_eeprom(struct p54_common *priv, void *buf,
 
 	mutex_lock(&priv->eeprom_mutex);
 	priv->eeprom = buf;
-	eeprom_hdr = (struct p54_eeprom_lm86 *) skb_put(skb,
-		eeprom_hdr_size + len);
+	eeprom_hdr = skb_put(skb, eeprom_hdr_size + len);
 
 	if (priv->fw_var < 0x509) {
 		eeprom_hdr->v1.offset = cpu_to_le16(offset);
@@ -273,7 +272,7 @@ int p54_update_beacon_tim(struct p54_common *priv, u16 aid, bool set)
 	if (unlikely(!skb))
 		return -ENOMEM;
 
-	tim = (struct p54_tim *) skb_put(skb, sizeof(*tim));
+	tim = skb_put(skb, sizeof(*tim));
 	tim->count = 1;
 	tim->entry[0] = cpu_to_le16(set ? (aid | 0x8000) : aid);
 	p54_tx(priv, skb);
@@ -290,7 +289,7 @@ int p54_sta_unlock(struct p54_common *priv, u8 *addr)
 	if (unlikely(!skb))
 		return -ENOMEM;
 
-	sta = (struct p54_sta_unlock *)skb_put(skb, sizeof(*sta));
+	sta = skb_put(skb, sizeof(*sta));
 	memcpy(sta->addr, addr, ETH_ALEN);
 	p54_tx(priv, skb);
 	return 0;
@@ -310,7 +309,7 @@ int p54_tx_cancel(struct p54_common *priv, __le32 req_id)
 	if (unlikely(!skb))
 		return -ENOMEM;
 
-	cancel = (struct p54_txcancel *)skb_put(skb, sizeof(*cancel));
+	cancel = skb_put(skb, sizeof(*cancel));
 	cancel->req_id = req_id;
 	p54_tx(priv, skb);
 	return 0;
@@ -327,7 +326,7 @@ int p54_setup_mac(struct p54_common *priv)
 	if (!skb)
 		return -ENOMEM;
 
-	setup = (struct p54_setup_mac *) skb_put(skb, sizeof(*setup));
+	setup = skb_put(skb, sizeof(*setup));
 	if (!(priv->hw->conf.flags & IEEE80211_CONF_IDLE)) {
 		switch (priv->mode) {
 		case NL80211_IFTYPE_STATION:
@@ -413,18 +412,18 @@ int p54_scan(struct p54_common *priv, u16 mode, u16 dwell)
 	if (!skb)
 		return -ENOMEM;
 
-	head = (struct p54_scan_head *) skb_put(skb, sizeof(*head));
+	head = skb_put(skb, sizeof(*head));
 	memset(head->scan_params, 0, sizeof(head->scan_params));
 	head->mode = cpu_to_le16(mode);
 	head->dwell = cpu_to_le16(dwell);
 	head->freq = freq;
 
 	if (priv->rxhw == PDR_SYNTH_FRONTEND_LONGBOW) {
-		__le16 *pa_power_points = (__le16 *) skb_put(skb, 2);
+		__le16 *pa_power_points = skb_put(skb, 2);
 		*pa_power_points = cpu_to_le16(0x0c);
 	}
 
-	iq_autocal = (void *) skb_put(skb, sizeof(*iq_autocal));
+	iq_autocal = skb_put(skb, sizeof(*iq_autocal));
 	for (i = 0; i < priv->iq_autocal_len; i++) {
 		if (priv->iq_autocal[i].freq != freq)
 			continue;
@@ -437,9 +436,9 @@ int p54_scan(struct p54_common *priv, u16 mode, u16 dwell)
 		goto err;
 
 	if (priv->rxhw == PDR_SYNTH_FRONTEND_LONGBOW)
-		body = (void *) skb_put(skb, sizeof(body->longbow));
+		body = skb_put(skb, sizeof(body->longbow));
 	else
-		body = (void *) skb_put(skb, sizeof(body->normal));
+		body = skb_put(skb, sizeof(body->normal));
 
 	for (i = 0; i < priv->output_limit->entries; i++) {
 		__le16 *entry_freq = (void *) (priv->output_limit->data +
@@ -500,25 +499,25 @@ int p54_scan(struct p54_common *priv, u16 mode, u16 dwell)
 		goto err;
 
 	if ((priv->fw_var >= 0x500) && (priv->fw_var < 0x509)) {
-		rate = (void *) skb_put(skb, sizeof(*rate));
+		rate = skb_put(skb, sizeof(*rate));
 		rate->basic_rate_mask = cpu_to_le32(priv->basic_rate_mask);
 		for (i = 0; i < sizeof(rate->rts_rates); i++)
 			rate->rts_rates[i] = i;
 	}
 
-	rssi = (struct pda_rssi_cal_entry *) skb_put(skb, sizeof(*rssi));
+	rssi = skb_put(skb, sizeof(*rssi));
 	rssi_data = p54_rssi_find(priv, le16_to_cpu(freq));
 	rssi->mul = cpu_to_le16(rssi_data->mul);
 	rssi->add = cpu_to_le16(rssi_data->add);
 	if (priv->rxhw == PDR_SYNTH_FRONTEND_LONGBOW) {
 		/* Longbow frontend needs ever more */
-		rssi = (void *) skb_put(skb, sizeof(*rssi));
+		rssi = skb_put(skb, sizeof(*rssi));
 		rssi->mul = cpu_to_le16(rssi_data->longbow_unkn);
 		rssi->add = cpu_to_le16(rssi_data->longbow_unk2);
 	}
 
 	if (priv->fw_var >= 0x509) {
-		rate = (void *) skb_put(skb, sizeof(*rate));
+		rate = skb_put(skb, sizeof(*rate));
 		rate->basic_rate_mask = cpu_to_le32(priv->basic_rate_mask);
 		for (i = 0; i < sizeof(rate->rts_rates); i++)
 			rate->rts_rates[i] = i;
@@ -550,7 +549,7 @@ int p54_set_leds(struct p54_common *priv)
 	if (unlikely(!skb))
 		return -ENOMEM;
 
-	led = (struct p54_led *) skb_put(skb, sizeof(*led));
+	led = skb_put(skb, sizeof(*led));
 	led->flags = cpu_to_le16(0x0003);
 	led->mask[0] = led->mask[1] = cpu_to_le16(priv->softled_state);
 	led->delay[0] = cpu_to_le16(1);
@@ -570,7 +569,7 @@ int p54_set_edcf(struct p54_common *priv)
 	if (unlikely(!skb))
 		return -ENOMEM;
 
-	edcf = (struct p54_edcf *)skb_put(skb, sizeof(*edcf));
+	edcf = skb_put(skb, sizeof(*edcf));
 	if (priv->use_short_slot) {
 		edcf->slottime = 9;
 		edcf->sifs = 0x10;
@@ -615,7 +614,7 @@ int p54_set_ps(struct p54_common *priv)
 	if (!skb)
 		return -ENOMEM;
 
-	psm = (struct p54_psm *)skb_put(skb, sizeof(*psm));
+	psm = skb_put(skb, sizeof(*psm));
 	psm->mode = cpu_to_le16(mode);
 	psm->aid = cpu_to_le16(priv->aid);
 	for (i = 0; i < ARRAY_SIZE(psm->intervals); i++) {
@@ -644,7 +643,7 @@ int p54_init_xbow_synth(struct p54_common *priv)
 	if (unlikely(!skb))
 		return -ENOMEM;
 
-	xbow = (struct p54_xbow_synth *)skb_put(skb, sizeof(*xbow));
+	xbow = skb_put(skb, sizeof(*xbow));
 	xbow->magic1 = cpu_to_le16(0x1);
 	xbow->magic2 = cpu_to_le16(0x2);
 	xbow->freq = cpu_to_le16(5390);
@@ -664,7 +663,7 @@ int p54_upload_key(struct p54_common *priv, u8 algo, int slot, u8 idx, u8 len,
 	if (unlikely(!skb))
 		return -ENOMEM;
 
-	rxkey = (struct p54_keycache *)skb_put(skb, sizeof(*rxkey));
+	rxkey = skb_put(skb, sizeof(*rxkey));
 	rxkey->entry = slot;
 	rxkey->key_id = idx;
 	rxkey->key_type = algo;
@@ -744,7 +743,7 @@ int p54_set_groupfilter(struct p54_common *priv)
 	if (!skb)
 		return -ENOMEM;
 
-	grp = (struct p54_group_address_table *)skb_put(skb, sizeof(*grp));
+	grp = skb_put(skb, sizeof(*grp));
 
 	on = !(priv->filter_flags & FIF_ALLMULTI) &&
 	     (priv->mc_maclist_num > 0 &&
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 1d6e180052b8..7418088e296f 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -650,7 +650,7 @@ static void hwsim_send_ps_poll(void *dat, u8 *mac, struct ieee80211_vif *vif)
 	skb = dev_alloc_skb(sizeof(*pspoll));
 	if (!skb)
 		return;
-	pspoll = (void *) skb_put(skb, sizeof(*pspoll));
+	pspoll = skb_put(skb, sizeof(*pspoll));
 	pspoll->frame_control = cpu_to_le16(IEEE80211_FTYPE_CTL |
 					    IEEE80211_STYPE_PSPOLL |
 					    IEEE80211_FCTL_PM);
@@ -681,7 +681,7 @@ static void hwsim_send_nullfunc(struct mac80211_hwsim_data *data, u8 *mac,
 	skb = dev_alloc_skb(sizeof(*hdr));
 	if (!skb)
 		return;
-	hdr = (void *) skb_put(skb, sizeof(*hdr) - ETH_ALEN);
+	hdr = skb_put(skb, sizeof(*hdr) - ETH_ALEN);
 	hdr->frame_control = cpu_to_le16(IEEE80211_FTYPE_DATA |
 					 IEEE80211_STYPE_NULLFUNC |
 					 (ps ? IEEE80211_FCTL_PM : 0));
@@ -892,7 +892,7 @@ static void mac80211_hwsim_monitor_ack(struct ieee80211_channel *chan,
 	if (skb == NULL)
 		return;
 
-	hdr = (struct hwsim_radiotap_ack_hdr *) skb_put(skb, sizeof(*hdr));
+	hdr = skb_put(skb, sizeof(*hdr));
 	hdr->hdr.it_version = PKTHDR_RADIOTAP_VERSION;
 	hdr->hdr.it_pad = 0;
 	hdr->hdr.it_len = cpu_to_le16(sizeof(*hdr));
@@ -904,7 +904,7 @@ static void mac80211_hwsim_monitor_ack(struct ieee80211_channel *chan,
 	flags = IEEE80211_CHAN_2GHZ;
 	hdr->rt_chbitmask = cpu_to_le16(flags);
 
-	hdr11 = (struct ieee80211_hdr *) skb_put(skb, 10);
+	hdr11 = skb_put(skb, 10);
 	hdr11->frame_control = cpu_to_le16(IEEE80211_FTYPE_CTL |
 					   IEEE80211_STYPE_ACK);
 	hdr11->duration_id = cpu_to_le16(0);
diff --git a/drivers/net/wireless/marvell/mwifiex/11n_aggr.c b/drivers/net/wireless/marvell/mwifiex/11n_aggr.c
index bc12c37e7501..042a1d07f686 100644
--- a/drivers/net/wireless/marvell/mwifiex/11n_aggr.c
+++ b/drivers/net/wireless/marvell/mwifiex/11n_aggr.c
@@ -62,7 +62,7 @@ mwifiex_11n_form_amsdu_pkt(struct sk_buff *skb_aggr,
 	};
 	struct tx_packet_hdr *tx_header;
 
-	tx_header = (void *)skb_put(skb_aggr, sizeof(*tx_header));
+	tx_header = skb_put(skb_aggr, sizeof(*tx_header));
 
 	/* Copy DA and SA */
 	dt_offset = 2 * ETH_ALEN;
diff --git a/drivers/net/wireless/marvell/mwifiex/tdls.c b/drivers/net/wireless/marvell/mwifiex/tdls.c
index d38555fe4284..39cd677d4159 100644
--- a/drivers/net/wireless/marvell/mwifiex/tdls.c
+++ b/drivers/net/wireless/marvell/mwifiex/tdls.c
@@ -158,7 +158,7 @@ static void mwifiex_tdls_add_aid(struct mwifiex_private *priv,
 	u8 *pos;
 
 	assoc_rsp = (struct ieee_types_assoc_rsp *)&priv->assoc_rsp_buf;
-	pos = (void *)skb_put(skb, 4);
+	pos = skb_put(skb, 4);
 	*pos++ = WLAN_EID_AID;
 	*pos++ = 2;
 	memcpy(pos, &assoc_rsp->a_id, sizeof(assoc_rsp->a_id));
@@ -172,7 +172,7 @@ static int mwifiex_tdls_add_vht_capab(struct mwifiex_private *priv,
 	struct ieee80211_vht_cap vht_cap;
 	u8 *pos;
 
-	pos = (void *)skb_put(skb, sizeof(struct ieee80211_vht_cap) + 2);
+	pos = skb_put(skb, sizeof(struct ieee80211_vht_cap) + 2);
 	*pos++ = WLAN_EID_VHT_CAPABILITY;
 	*pos++ = sizeof(struct ieee80211_vht_cap);
 
@@ -207,7 +207,7 @@ mwifiex_tdls_add_ht_oper(struct mwifiex_private *priv, const u8 *mac,
 		return 0;
 	}
 
-	pos = (void *)skb_put(skb, sizeof(struct ieee80211_ht_operation) + 2);
+	pos = skb_put(skb, sizeof(struct ieee80211_ht_operation) + 2);
 	*pos++ = WLAN_EID_HT_OPERATION;
 	*pos++ = sizeof(struct ieee80211_ht_operation);
 	ht_oper = (void *)pos;
@@ -272,7 +272,7 @@ static int mwifiex_tdls_add_vht_oper(struct mwifiex_private *priv,
 		ap_vht_cap = bss_desc->bcn_vht_cap;
 	}
 
-	pos = (void *)skb_put(skb, sizeof(struct ieee80211_vht_operation) + 2);
+	pos = skb_put(skb, sizeof(struct ieee80211_vht_operation) + 2);
 	*pos++ = WLAN_EID_VHT_OPERATION;
 	*pos++ = sizeof(struct ieee80211_vht_operation);
 	vht_oper = (struct ieee80211_vht_operation *)pos;
@@ -359,7 +359,7 @@ static void mwifiex_tdls_add_ext_capab(struct mwifiex_private *priv,
 {
 	struct ieee_types_extcap *extcap;
 
-	extcap = (void *)skb_put(skb, sizeof(struct ieee_types_extcap));
+	extcap = skb_put(skb, sizeof(struct ieee_types_extcap));
 	extcap->ieee_hdr.element_id = WLAN_EID_EXT_CAPABILITY;
 	extcap->ieee_hdr.len = 8;
 	memset(extcap->ext_capab, 0, 8);
@@ -372,7 +372,7 @@ static void mwifiex_tdls_add_ext_capab(struct mwifiex_private *priv,
 
 static void mwifiex_tdls_add_qos_capab(struct sk_buff *skb)
 {
-	u8 *pos = (void *)skb_put(skb, 3);
+	u8 *pos = skb_put(skb, 3);
 
 	*pos++ = WLAN_EID_QOS_CAPA;
 	*pos++ = 1;
@@ -413,8 +413,8 @@ mwifiex_add_wmm_info_ie(struct mwifiex_private *priv, struct sk_buff *skb,
 {
 	u8 *buf;
 
-	buf = (void *)skb_put(skb, MWIFIEX_TDLS_WMM_INFO_SIZE +
-			      sizeof(struct ieee_types_header));
+	buf = skb_put(skb,
+		      MWIFIEX_TDLS_WMM_INFO_SIZE + sizeof(struct ieee_types_header));
 
 	*buf++ = WLAN_EID_VENDOR_SPECIFIC;
 	*buf++ = 7; /* len */
@@ -431,7 +431,7 @@ static void mwifiex_tdls_add_bss_co_2040(struct sk_buff *skb)
 {
 	struct ieee_types_bss_co_2040 *bssco;
 
-	bssco = (void *)skb_put(skb, sizeof(struct ieee_types_bss_co_2040));
+	bssco = skb_put(skb, sizeof(struct ieee_types_bss_co_2040));
 	bssco->ieee_hdr.element_id = WLAN_EID_BSS_COEX_2040;
 	bssco->ieee_hdr.len = sizeof(struct ieee_types_bss_co_2040) -
 			      sizeof(struct ieee_types_header);
@@ -443,8 +443,8 @@ static void mwifiex_tdls_add_supported_chan(struct sk_buff *skb)
 	struct ieee_types_generic *supp_chan;
 	u8 chan_supp[] = {1, 11};
 
-	supp_chan = (void *)skb_put(skb, (sizeof(struct ieee_types_header) +
-					  sizeof(chan_supp)));
+	supp_chan = skb_put(skb,
+			    (sizeof(struct ieee_types_header) + sizeof(chan_supp)));
 	supp_chan->ieee_hdr.element_id = WLAN_EID_SUPPORTED_CHANNELS;
 	supp_chan->ieee_hdr.len = sizeof(chan_supp);
 	memcpy(supp_chan->data, chan_supp, sizeof(chan_supp));
@@ -455,8 +455,8 @@ static void mwifiex_tdls_add_oper_class(struct sk_buff *skb)
 	struct ieee_types_generic *reg_class;
 	u8 rc_list[] = {1,
 		1, 2, 3, 4, 12, 22, 23, 24, 25, 27, 28, 29, 30, 32, 33};
-	reg_class = (void *)skb_put(skb, (sizeof(struct ieee_types_header) +
-					  sizeof(rc_list)));
+	reg_class = skb_put(skb,
+			    (sizeof(struct ieee_types_header) + sizeof(rc_list)));
 	reg_class->ieee_hdr.element_id = WLAN_EID_SUPPORTED_REGULATORY_CLASSES;
 	reg_class->ieee_hdr.len = sizeof(rc_list);
 	memcpy(reg_class->data, rc_list, sizeof(rc_list));
@@ -475,7 +475,7 @@ static int mwifiex_prep_tdls_encap_data(struct mwifiex_private *priv,
 
 	capab = priv->curr_bss_params.bss_descriptor.cap_info_bitmap;
 
-	tf = (void *)skb_put(skb, offsetof(struct ieee80211_tdls_data, u));
+	tf = skb_put(skb, offsetof(struct ieee80211_tdls_data, u));
 	memcpy(tf->da, peer, ETH_ALEN);
 	memcpy(tf->sa, priv->curr_addr, ETH_ALEN);
 	tf->ether_type = cpu_to_be16(ETH_P_TDLS);
@@ -494,7 +494,7 @@ static int mwifiex_prep_tdls_encap_data(struct mwifiex_private *priv,
 			return ret;
 		}
 
-		pos = (void *)skb_put(skb, sizeof(struct ieee80211_ht_cap) + 2);
+		pos = skb_put(skb, sizeof(struct ieee80211_ht_cap) + 2);
 		*pos++ = WLAN_EID_HT_CAPABILITY;
 		*pos++ = sizeof(struct ieee80211_ht_cap);
 		ht_cap = (void *)pos;
@@ -534,7 +534,7 @@ static int mwifiex_prep_tdls_encap_data(struct mwifiex_private *priv,
 			return ret;
 		}
 
-		pos = (void *)skb_put(skb, sizeof(struct ieee80211_ht_cap) + 2);
+		pos = skb_put(skb, sizeof(struct ieee80211_ht_cap) + 2);
 		*pos++ = WLAN_EID_HT_CAPABILITY;
 		*pos++ = sizeof(struct ieee80211_ht_cap);
 		ht_cap = (void *)pos;
@@ -616,7 +616,7 @@ mwifiex_tdls_add_link_ie(struct sk_buff *skb, const u8 *src_addr,
 {
 	struct ieee80211_tdls_lnkie *lnkid;
 
-	lnkid = (void *)skb_put(skb, sizeof(struct ieee80211_tdls_lnkie));
+	lnkid = skb_put(skb, sizeof(struct ieee80211_tdls_lnkie));
 	lnkid->ie_type = WLAN_EID_LINK_ID;
 	lnkid->ie_len = sizeof(struct ieee80211_tdls_lnkie) -
 			sizeof(struct ieee_types_header);
@@ -741,7 +741,7 @@ mwifiex_construct_tdls_action_frame(struct mwifiex_private *priv,
 
 	capab = priv->curr_bss_params.bss_descriptor.cap_info_bitmap;
 
-	mgmt = (void *)skb_put(skb, offsetof(struct ieee80211_mgmt, u));
+	mgmt = skb_put(skb, offsetof(struct ieee80211_mgmt, u));
 
 	memset(mgmt, 0, 24);
 	memcpy(mgmt->da, peer, ETH_ALEN);
@@ -775,7 +775,7 @@ mwifiex_construct_tdls_action_frame(struct mwifiex_private *priv,
 			return ret;
 		}
 
-		pos = (void *)skb_put(skb, sizeof(struct ieee80211_ht_cap) + 2);
+		pos = skb_put(skb, sizeof(struct ieee80211_ht_cap) + 2);
 		*pos++ = WLAN_EID_HT_CAPABILITY;
 		*pos++ = sizeof(struct ieee80211_ht_cap);
 		ht_cap = (void *)pos;
diff --git a/drivers/net/wireless/quantenna/qtnfmac/qlink_util.h b/drivers/net/wireless/quantenna/qtnfmac/qlink_util.h
index 9844ff0add2b..f6ac39973b5d 100644
--- a/drivers/net/wireless/quantenna/qtnfmac/qlink_util.h
+++ b/drivers/net/wireless/quantenna/qtnfmac/qlink_util.h
@@ -26,7 +26,7 @@ static inline void qtnf_cmd_skb_put_action(struct sk_buff *skb, u16 action)
 {
 	__le16 *buf_ptr;
 
-	buf_ptr = (__le16 *)skb_put(skb, sizeof(action));
+	buf_ptr = skb_put(skb, sizeof(action));
 	*buf_ptr = cpu_to_le16(action);
 }
 
@@ -42,8 +42,7 @@ static inline void qtnf_cmd_skb_put_tlv_arr(struct sk_buff *skb,
 					    u16 tlv_id, const u8 arr[],
 					    size_t arr_len)
 {
-	struct qlink_tlv_hdr *hdr =
-			(void *)skb_put(skb, sizeof(*hdr) + arr_len);
+	struct qlink_tlv_hdr *hdr = skb_put(skb, sizeof(*hdr) + arr_len);
 
 	hdr->type = cpu_to_le16(tlv_id);
 	hdr->len = cpu_to_le16(arr_len);
@@ -53,8 +52,7 @@ static inline void qtnf_cmd_skb_put_tlv_arr(struct sk_buff *skb,
 static inline void qtnf_cmd_skb_put_tlv_u8(struct sk_buff *skb, u16 tlv_id,
 					   u8 value)
 {
-	struct qlink_tlv_hdr *hdr =
-			(void *)skb_put(skb, sizeof(*hdr) + sizeof(value));
+	struct qlink_tlv_hdr *hdr = skb_put(skb, sizeof(*hdr) + sizeof(value));
 
 	hdr->type = cpu_to_le16(tlv_id);
 	hdr->len = cpu_to_le16(sizeof(value));
@@ -64,8 +62,7 @@ static inline void qtnf_cmd_skb_put_tlv_u8(struct sk_buff *skb, u16 tlv_id,
 static inline void qtnf_cmd_skb_put_tlv_u16(struct sk_buff *skb,
 					    u16 tlv_id, u16 value)
 {
-	struct qlink_tlv_hdr *hdr =
-			(void *)skb_put(skb, sizeof(*hdr) + sizeof(value));
+	struct qlink_tlv_hdr *hdr = skb_put(skb, sizeof(*hdr) + sizeof(value));
 	__le16 tmp = cpu_to_le16(value);
 
 	hdr->type = cpu_to_le16(tlv_id);
diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00debug.c b/drivers/net/wireless/ralink/rt2x00/rt2x00debug.c
index b70985e126bf..51520a0e2138 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt2x00debug.c
+++ b/drivers/net/wireless/ralink/rt2x00/rt2x00debug.c
@@ -188,7 +188,7 @@ void rt2x00debug_dump_frame(struct rt2x00_dev *rt2x00dev,
 		return;
 	}
 
-	dump_hdr = (struct rt2x00dump_hdr *)skb_put(skbcopy, sizeof(*dump_hdr));
+	dump_hdr = skb_put(skbcopy, sizeof(*dump_hdr));
 	dump_hdr->version = cpu_to_le32(DUMP_HEADER_VERSION);
 	dump_hdr->header_length = cpu_to_le32(sizeof(*dump_hdr));
 	dump_hdr->desc_length = cpu_to_le32(skbdesc->desc_len);
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/fw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/fw.c
index dd3ba4810e7d..e7b1d7c0f542 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/fw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/fw.c
@@ -462,7 +462,7 @@ static u32 _rtl92s_fill_h2c_cmd(struct sk_buff *skb, u32 h2cbufferlen,
 			break;
 
 		/* Clear content */
-		ph2c_buffer = (u8 *)skb_put(skb, (u32)len);
+		ph2c_buffer = skb_put(skb, (u32)len);
 		memset((ph2c_buffer + totallen + tx_desclen), 0, len);
 
 		/* CMD len */
diff --git a/drivers/nfc/fdp/i2c.c b/drivers/nfc/fdp/i2c.c
index 0877e2283f35..97f003e84381 100644
--- a/drivers/nfc/fdp/i2c.c
+++ b/drivers/nfc/fdp/i2c.c
@@ -86,7 +86,7 @@ static void fdp_nci_i2c_add_len_lrc(struct sk_buff *skb)
 	for (i = 0; i < len + 2; i++)
 		lrc ^= skb->data[i];
 
-	*skb_put(skb, 1) = lrc;
+	*(u8 *)skb_put(skb, 1) = lrc;
 }
 
 static void fdp_nci_i2c_remove_len_lrc(struct sk_buff *skb)
diff --git a/drivers/nfc/microread/i2c.c b/drivers/nfc/microread/i2c.c
index e0e8afd27849..8e328c36a816 100644
--- a/drivers/nfc/microread/i2c.c
+++ b/drivers/nfc/microread/i2c.c
@@ -75,7 +75,7 @@ static void microread_i2c_add_len_crc(struct sk_buff *skb)
 	for (i = 0; i < skb->len; i++)
 		crc = crc ^ skb->data[i];
 
-	*skb_put(skb, 1) = crc;
+	*(u8 *)skb_put(skb, 1) = crc;
 }
 
 static void microread_i2c_remove_len_crc(struct sk_buff *skb)
@@ -173,7 +173,7 @@ static int microread_i2c_read(struct microread_i2c_phy *phy,
 		goto flush;
 	}
 
-	*skb_put(*skb, 1) = len;
+	*(u8 *)skb_put(*skb, 1) = len;
 
 	r = i2c_master_recv(client, skb_put(*skb, len), len);
 	if (r != len) {
diff --git a/drivers/nfc/microread/microread.c b/drivers/nfc/microread/microread.c
index f454dc68cc03..9d0dd1be0923 100644
--- a/drivers/nfc/microread/microread.c
+++ b/drivers/nfc/microread/microread.c
@@ -441,8 +441,8 @@ static int microread_im_transceive(struct nfc_hci_dev *hdev,
 
 		crc = crc_ccitt(0xffff, skb->data, skb->len);
 		crc = ~crc;
-		*skb_put(skb, 1) = crc & 0xff;
-		*skb_put(skb, 1) = crc >> 8;
+		*(u8 *)skb_put(skb, 1) = crc & 0xff;
+		*(u8 *)skb_put(skb, 1) = crc >> 8;
 		break;
 	case MICROREAD_GATE_ID_MREAD_NFC_T3:
 		control_bits = 0xDB;
diff --git a/drivers/nfc/nfcmrvl/fw_dnld.c b/drivers/nfc/nfcmrvl/fw_dnld.c
index 7c710458568e..788599de9d8a 100644
--- a/drivers/nfc/nfcmrvl/fw_dnld.c
+++ b/drivers/nfc/nfcmrvl/fw_dnld.c
@@ -92,7 +92,7 @@ static struct sk_buff *alloc_lc_skb(struct nfcmrvl_private *priv, uint8_t plen)
 		return NULL;
 	}
 
-	hdr = (struct nci_data_hdr *) skb_put(skb, NCI_DATA_HDR_SIZE);
+	hdr = skb_put(skb, NCI_DATA_HDR_SIZE);
 	hdr->conn_id = NCI_CORE_LC_CONNID_PROP_FW_DL;
 	hdr->rfu = 0;
 	hdr->plen = plen;
@@ -292,7 +292,7 @@ static int process_state_fw_dnld(struct nfcmrvl_private *priv,
 			out_skb = alloc_lc_skb(priv, 1);
 			if (!out_skb)
 				return -ENOMEM;
-			*skb_put(out_skb, 1) = 0xBF;
+			*(u8 *)skb_put(out_skb, 1) = 0xBF;
 			nci_send_frame(priv->ndev, out_skb);
 			priv->fw_dnld.substate = SUBSTATE_WAIT_NACK_CREDIT;
 			return 0;
@@ -301,7 +301,7 @@ static int process_state_fw_dnld(struct nfcmrvl_private *priv,
 		out_skb = alloc_lc_skb(priv, 1);
 		if (!out_skb)
 			return -ENOMEM;
-		*skb_put(out_skb, 1) = HELPER_ACK_PACKET_FORMAT;
+		*(u8 *)skb_put(out_skb, 1) = HELPER_ACK_PACKET_FORMAT;
 		nci_send_frame(priv->ndev, out_skb);
 		priv->fw_dnld.substate = SUBSTATE_WAIT_ACK_CREDIT;
 		break;
diff --git a/drivers/nfc/pn533/pn533.c b/drivers/nfc/pn533/pn533.c
index 9200bb308e42..68a3cd0287f6 100644
--- a/drivers/nfc/pn533/pn533.c
+++ b/drivers/nfc/pn533/pn533.c
@@ -1032,7 +1032,7 @@ static struct sk_buff *pn533_alloc_poll_tg_frame(struct pn533 *dev)
 		return NULL;
 
 	/* DEP support only */
-	*skb_put(skb, 1) = PN533_INIT_TARGET_DEP;
+	*(u8 *)skb_put(skb, 1) = PN533_INIT_TARGET_DEP;
 
 	/* MIFARE params */
 	skb_put_data(skb, mifare_params, 6);
@@ -1046,12 +1046,12 @@ static struct sk_buff *pn533_alloc_poll_tg_frame(struct pn533 *dev)
 	memcpy(nfcid3, felica, 8);
 
 	/* General bytes */
-	*skb_put(skb, 1) = gbytes_len;
+	*(u8 *)skb_put(skb, 1) = gbytes_len;
 
 	gb = skb_put_data(skb, gbytes, gbytes_len);
 
 	/* Len Tk */
-	*skb_put(skb, 1) = 0;
+	*(u8 *)skb_put(skb, 1) = 0;
 
 	return skb;
 }
@@ -1280,8 +1280,8 @@ static void pn533_wq_rf(struct work_struct *work)
 	if (!skb)
 		return;
 
-	*skb_put(skb, 1) = PN533_CFGITEM_RF_FIELD;
-	*skb_put(skb, 1) = PN533_CFGITEM_RF_FIELD_AUTO_RFCA;
+	*(u8 *)skb_put(skb, 1) = PN533_CFGITEM_RF_FIELD;
+	*(u8 *)skb_put(skb, 1) = PN533_CFGITEM_RF_FIELD_AUTO_RFCA;
 
 	rc = pn533_send_cmd_async(dev, PN533_CMD_RF_CONFIGURATION, skb,
 				  pn533_rf_complete, NULL);
@@ -1375,8 +1375,8 @@ static int pn533_poll_dep(struct nfc_dev *nfc_dev)
 	if (!skb)
 		return -ENOMEM;
 
-	*skb_put(skb, 1) = 0x01;  /* Active */
-	*skb_put(skb, 1) = 0x02;  /* 424 kbps */
+	*(u8 *)skb_put(skb, 1) = 0x01;  /* Active */
+	*(u8 *)skb_put(skb, 1) = 0x02;  /* 424 kbps */
 
 	next = skb_put(skb, 1);  /* Next */
 	*next = 0;
@@ -1620,8 +1620,8 @@ static int pn533_activate_target_nfcdep(struct pn533 *dev)
 	if (!skb)
 		return -ENOMEM;
 
-	*skb_put(skb, sizeof(u8)) = 1; /* TG */
-	*skb_put(skb, sizeof(u8)) = 0; /* Next */
+	*(u8 *)skb_put(skb, sizeof(u8)) = 1; /* TG */
+	*(u8 *)skb_put(skb, sizeof(u8)) = 0; /* Next */
 
 	resp = pn533_send_cmd_sync(dev, PN533_CMD_IN_ATR, skb);
 	if (IS_ERR(resp))
@@ -1737,7 +1737,7 @@ static void pn533_deactivate_target(struct nfc_dev *nfc_dev,
 	if (!skb)
 		return;
 
-	*skb_put(skb, 1) = 1; /* TG*/
+	*(u8 *)skb_put(skb, 1) = 1; /* TG*/
 
 	rc = pn533_send_cmd_async(dev, PN533_CMD_IN_RELEASE, skb,
 				  pn533_deactivate_target_complete, NULL);
@@ -1848,8 +1848,8 @@ static int pn533_dep_link_up(struct nfc_dev *nfc_dev, struct nfc_target *target,
 	if (!skb)
 		return -ENOMEM;
 
-	*skb_put(skb, 1) = !comm_mode;  /* ActPass */
-	*skb_put(skb, 1) = 0x02;  /* 424 kbps */
+	*(u8 *)skb_put(skb, 1) = !comm_mode;  /* ActPass */
+	*(u8 *)skb_put(skb, 1) = 0x02;  /* 424 kbps */
 
 	next = skb_put(skb, 1);  /* Next */
 	*next = 0;
@@ -2274,7 +2274,7 @@ static void pn533_wq_mi_recv(struct work_struct *work)
 			break;
 		}
 	default:
-		*skb_put(skb, sizeof(u8)) =  1; /*TG*/
+		*(u8 *)skb_put(skb, sizeof(u8)) =  1; /*TG*/
 
 		rc = pn533_send_cmd_direct_async(dev,
 						 PN533_CMD_IN_DATA_EXCHANGE,
@@ -2370,7 +2370,7 @@ static int pn533_set_configuration(struct pn533 *dev, u8 cfgitem, u8 *cfgdata,
 	if (!skb)
 		return -ENOMEM;
 
-	*skb_put(skb, sizeof(cfgitem)) = cfgitem;
+	*(u8 *)skb_put(skb, sizeof(cfgitem)) = cfgitem;
 	skb_put_data(skb, cfgdata, cfgdata_len);
 
 	resp = pn533_send_cmd_sync(dev, PN533_CMD_RF_CONFIGURATION, skb);
@@ -2415,7 +2415,7 @@ static int pn533_pasori_fw_reset(struct pn533 *dev)
 	if (!skb)
 		return -ENOMEM;
 
-	*skb_put(skb, sizeof(u8)) = 0x1;
+	*(u8 *)skb_put(skb, sizeof(u8)) = 0x1;
 
 	resp = pn533_send_cmd_sync(dev, 0x18, skb);
 	if (IS_ERR(resp))
@@ -2454,7 +2454,7 @@ static int pn532_sam_configuration(struct nfc_dev *nfc_dev)
 	if (!skb)
 		return -ENOMEM;
 
-	*skb_put(skb, 1) = 0x01;
+	*(u8 *)skb_put(skb, 1) = 0x01;
 
 	resp = pn533_send_cmd_sync(dev, PN533_CMD_SAM_CONFIGURATION, skb);
 	if (IS_ERR(resp))
diff --git a/drivers/nfc/pn544/i2c.c b/drivers/nfc/pn544/i2c.c
index 71ac0836c9f4..dc1e3768cee6 100644
--- a/drivers/nfc/pn544/i2c.c
+++ b/drivers/nfc/pn544/i2c.c
@@ -287,8 +287,8 @@ static void pn544_hci_i2c_add_len_crc(struct sk_buff *skb)
 
 	crc = crc_ccitt(0xffff, skb->data, skb->len);
 	crc = ~crc;
-	*skb_put(skb, 1) = crc & 0xff;
-	*skb_put(skb, 1) = crc >> 8;
+	*(u8 *)skb_put(skb, 1) = crc & 0xff;
+	*(u8 *)skb_put(skb, 1) = crc >> 8;
 }
 
 static void pn544_hci_i2c_remove_len_crc(struct sk_buff *skb)
@@ -391,7 +391,7 @@ static int pn544_hci_i2c_read(struct pn544_i2c_phy *phy, struct sk_buff **skb)
 		goto flush;
 	}
 
-	*skb_put(*skb, 1) = len;
+	*(u8 *)skb_put(*skb, 1) = len;
 
 	r = i2c_master_recv(client, skb_put(*skb, len), len);
 	if (r != len) {
diff --git a/drivers/nfc/port100.c b/drivers/nfc/port100.c
index e1260da73d45..5fa3cf0fabd6 100644
--- a/drivers/nfc/port100.c
+++ b/drivers/nfc/port100.c
@@ -991,7 +991,7 @@ static int port100_set_command_type(struct port100 *dev, u8 command_type)
 	if (!skb)
 		return -ENOMEM;
 
-	*skb_put(skb, sizeof(u8)) = command_type;
+	*(u8 *)skb_put(skb, sizeof(u8)) = command_type;
 
 	resp = port100_send_cmd_sync(dev, PORT100_CMD_SET_COMMAND_TYPE, skb);
 	if (IS_ERR(resp))
@@ -1059,7 +1059,7 @@ static int port100_switch_rf(struct nfc_digital_dev *ddev, bool on)
 	if (!skb)
 		return -ENOMEM;
 
-	*skb_put(skb, 1) = on ? 1 : 0;
+	*(u8 *)skb_put(skb, 1) = on ? 1 : 0;
 
 	/* Cancel the last command if the device is being switched off */
 	if (!on)
diff --git a/drivers/nfc/st21nfca/i2c.c b/drivers/nfc/st21nfca/i2c.c
index 94d0b913b627..c36f0e0afdfd 100644
--- a/drivers/nfc/st21nfca/i2c.c
+++ b/drivers/nfc/st21nfca/i2c.c
@@ -177,10 +177,10 @@ static void st21nfca_hci_add_len_crc(struct sk_buff *skb)
 	crc = ~crc;
 
 	tmp = crc & 0x00ff;
-	*skb_put(skb, 1) = tmp;
+	*(u8 *)skb_put(skb, 1) = tmp;
 
 	tmp = (crc >> 8) & 0x00ff;
-	*skb_put(skb, 1) = tmp;
+	*(u8 *)skb_put(skb, 1) = tmp;
 }
 
 static void st21nfca_hci_remove_len_crc(struct sk_buff *skb)
@@ -214,7 +214,7 @@ static int st21nfca_hci_i2c_write(void *phy_id, struct sk_buff *skb)
 	st21nfca_hci_add_len_crc(skb);
 
 	/* add ST21NFCA_SOF_EOF on tail */
-	*skb_put(skb, 1) = ST21NFCA_SOF_EOF;
+	*(u8 *)skb_put(skb, 1) = ST21NFCA_SOF_EOF;
 	/* add ST21NFCA_SOF_EOF on head */
 	*skb_push(skb, 1) = ST21NFCA_SOF_EOF;
 
diff --git a/drivers/nfc/st95hf/core.c b/drivers/nfc/st95hf/core.c
index c2840e412962..168adcc46cb8 100644
--- a/drivers/nfc/st95hf/core.c
+++ b/drivers/nfc/st95hf/core.c
@@ -949,7 +949,7 @@ static int st95hf_in_send_cmd(struct nfc_digital_dev *ddev,
 	switch (stcontext->current_rf_tech) {
 	case NFC_DIGITAL_RF_TECH_106A:
 		len_data_to_tag = skb->len + 1;
-		*skb_put(skb, 1) = stcontext->sendrcv_trflag;
+		*(u8 *)skb_put(skb, 1) = stcontext->sendrcv_trflag;
 		break;
 	case NFC_DIGITAL_RF_TECH_106B:
 	case NFC_DIGITAL_RF_TECH_ISO15693:
diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
index 902722dc4ce3..b025ee5da1ba 100644
--- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
+++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
@@ -351,7 +351,7 @@ static int bnx2fc_xmit(struct fc_lport *lport, struct fc_frame *fp)
 		frag = &skb_shinfo(skb)->frags[skb_shinfo(skb)->nr_frags - 1];
 		cp = kmap_atomic(skb_frag_page(frag)) + frag->page_offset;
 	} else {
-		cp = (struct fcoe_crc_eof *)skb_put(skb, tlen);
+		cp = skb_put(skb, tlen);
 	}
 
 	memset(cp, 0, sizeof(*cp));
diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
index 90939f66bc0d..539e23ec0e59 100644
--- a/drivers/scsi/fcoe/fcoe.c
+++ b/drivers/scsi/fcoe/fcoe.c
@@ -1543,7 +1543,7 @@ static int fcoe_xmit(struct fc_lport *lport, struct fc_frame *fp)
 		cp = kmap_atomic(skb_frag_page(frag))
 			+ frag->page_offset;
 	} else {
-		cp = (struct fcoe_crc_eof *)skb_put(skb, tlen);
+		cp = skb_put(skb, tlen);
 	}
 
 	memset(cp, 0, sizeof(*cp));
diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c
index b97405ed6cae..da0fcce6f842 100644
--- a/drivers/scsi/qedf/qedf_main.c
+++ b/drivers/scsi/qedf/qedf_main.c
@@ -874,7 +874,7 @@ static int qedf_xmit(struct fc_lport *lport, struct fc_frame *fp)
 		frag = &skb_shinfo(skb)->frags[skb_shinfo(skb)->nr_frags - 1];
 		cp = kmap_atomic(skb_frag_page(frag)) + frag->page_offset;
 	} else {
-		cp = (struct fcoe_crc_eof *)skb_put(skb, tlen);
+		cp = skb_put(skb, tlen);
 	}
 
 	memset(cp, 0, sizeof(*cp));
diff --git a/drivers/staging/rtl8192e/rtl819x_BAProc.c b/drivers/staging/rtl8192e/rtl819x_BAProc.c
index 1d3963136295..1720e1b6ae04 100644
--- a/drivers/staging/rtl8192e/rtl819x_BAProc.c
+++ b/drivers/staging/rtl8192e/rtl819x_BAProc.c
@@ -95,8 +95,7 @@ static struct sk_buff *rtllib_ADDBA(struct rtllib_device *ieee, u8 *Dst,
 
 	skb_reserve(skb, ieee->tx_headroom);
 
-	BAReq = (struct rtllib_hdr_3addr *)skb_put(skb,
-		 sizeof(struct rtllib_hdr_3addr));
+	BAReq = skb_put(skb, sizeof(struct rtllib_hdr_3addr));
 
 	ether_addr_copy(BAReq->addr1, Dst);
 	ether_addr_copy(BAReq->addr2, ieee->dev->dev_addr);
@@ -104,7 +103,7 @@ static struct sk_buff *rtllib_ADDBA(struct rtllib_device *ieee, u8 *Dst,
 	ether_addr_copy(BAReq->addr3, ieee->current_network.bssid);
 	BAReq->frame_ctl = cpu_to_le16(RTLLIB_STYPE_MANAGE_ACT);
 
-	tag = (u8 *)skb_put(skb, 9);
+	tag = skb_put(skb, 9);
 	*tag++ = ACT_CAT_BA;
 	*tag++ = type;
 	*tag++ = pBA->DialogToken;
@@ -159,15 +158,14 @@ static struct sk_buff *rtllib_DELBA(struct rtllib_device *ieee, u8 *dst,
 
 	skb_reserve(skb, ieee->tx_headroom);
 
-	Delba = (struct rtllib_hdr_3addr *) skb_put(skb,
-		 sizeof(struct rtllib_hdr_3addr));
+	Delba = skb_put(skb, sizeof(struct rtllib_hdr_3addr));
 
 	ether_addr_copy(Delba->addr1, dst);
 	ether_addr_copy(Delba->addr2, ieee->dev->dev_addr);
 	ether_addr_copy(Delba->addr3, ieee->current_network.bssid);
 	Delba->frame_ctl = cpu_to_le16(RTLLIB_STYPE_MANAGE_ACT);
 
-	tag = (u8 *)skb_put(skb, 6);
+	tag = skb_put(skb, 6);
 
 	*tag++ = ACT_CAT_BA;
 	*tag++ = ACT_DELBA;
diff --git a/drivers/staging/rtl8192e/rtllib_softmac.c b/drivers/staging/rtl8192e/rtllib_softmac.c
index 60d07d0bb4eb..5f2751d4d464 100644
--- a/drivers/staging/rtl8192e/rtllib_softmac.c
+++ b/drivers/staging/rtl8192e/rtllib_softmac.c
@@ -351,8 +351,7 @@ static inline struct sk_buff *rtllib_probe_req(struct rtllib_device *ieee)
 
 	skb_reserve(skb, ieee->tx_headroom);
 
-	req = (struct rtllib_probe_request *) skb_put(skb,
-	      sizeof(struct rtllib_probe_request));
+	req = skb_put(skb, sizeof(struct rtllib_probe_request));
 	req->header.frame_ctl = cpu_to_le16(RTLLIB_STYPE_PROBE_REQ);
 	req->header.duration_id = 0;
 
@@ -360,7 +359,7 @@ static inline struct sk_buff *rtllib_probe_req(struct rtllib_device *ieee)
 	ether_addr_copy(req->header.addr2, ieee->dev->dev_addr);
 	eth_broadcast_addr(req->header.addr3);
 
-	tag = (u8 *) skb_put(skb, len + 2 + rate_len);
+	tag = skb_put(skb, len + 2 + rate_len);
 
 	*tag++ = MFIE_TYPE_SSID;
 	*tag++ = len;
@@ -789,8 +788,7 @@ rtllib_authentication_req(struct rtllib_network *beacon,
 
 	skb_reserve(skb, ieee->tx_headroom);
 
-	auth = (struct rtllib_authentication *)
-		skb_put(skb, sizeof(struct rtllib_authentication));
+	auth = skb_put(skb, sizeof(struct rtllib_authentication));
 
 	auth->header.frame_ctl = cpu_to_le16(RTLLIB_STYPE_AUTH);
 	if (challengelen)
@@ -889,8 +887,7 @@ static struct sk_buff *rtllib_probe_resp(struct rtllib_device *ieee,
 
 	skb_reserve(skb, ieee->tx_headroom);
 
-	beacon_buf = (struct rtllib_probe_response *) skb_put(skb,
-		     (beacon_size - ieee->tx_headroom));
+	beacon_buf = skb_put(skb, (beacon_size - ieee->tx_headroom));
 	ether_addr_copy(beacon_buf->header.addr1, dest);
 	ether_addr_copy(beacon_buf->header.addr2, ieee->dev->dev_addr);
 	ether_addr_copy(beacon_buf->header.addr3, ieee->current_network.bssid);
@@ -984,8 +981,7 @@ static struct sk_buff *rtllib_assoc_resp(struct rtllib_device *ieee, u8 *dest)
 
 	skb_reserve(skb, ieee->tx_headroom);
 
-	assoc = (struct rtllib_assoc_response_frame *)
-		skb_put(skb, sizeof(struct rtllib_assoc_response_frame));
+	assoc = skb_put(skb, sizeof(struct rtllib_assoc_response_frame));
 
 	assoc->header.frame_ctl = cpu_to_le16(RTLLIB_STYPE_ASSOC_RESP);
 	ether_addr_copy(assoc->header.addr1, dest);
@@ -1016,7 +1012,7 @@ static struct sk_buff *rtllib_assoc_resp(struct rtllib_device *ieee, u8 *dest)
 	else
 		ieee->assoc_id++;
 
-	tag = (u8 *) skb_put(skb, rate_len);
+	tag = skb_put(skb, rate_len);
 	rtllib_MFIE_Brate(ieee, &tag);
 	rtllib_MFIE_Grate(ieee, &tag);
 
@@ -1038,8 +1034,7 @@ static struct sk_buff *rtllib_auth_resp(struct rtllib_device *ieee, int status,
 
 	skb_reserve(skb, ieee->tx_headroom);
 
-	auth = (struct rtllib_authentication *)
-		skb_put(skb, sizeof(struct rtllib_authentication));
+	auth = skb_put(skb, sizeof(struct rtllib_authentication));
 
 	auth->status = cpu_to_le16(status);
 	auth->transaction = cpu_to_le16(2);
@@ -1065,8 +1060,7 @@ static struct sk_buff *rtllib_null_func(struct rtllib_device *ieee, short pwr)
 
 	skb_reserve(skb, ieee->tx_headroom);
 
-	hdr = (struct rtllib_hdr_3addr *)skb_put(skb,
-	      sizeof(struct rtllib_hdr_3addr));
+	hdr = skb_put(skb, sizeof(struct rtllib_hdr_3addr));
 
 	ether_addr_copy(hdr->addr1, ieee->current_network.bssid);
 	ether_addr_copy(hdr->addr2, ieee->dev->dev_addr);
@@ -1092,8 +1086,7 @@ static struct sk_buff *rtllib_pspoll_func(struct rtllib_device *ieee)
 
 	skb_reserve(skb, ieee->tx_headroom);
 
-	hdr = (struct rtllib_pspoll_hdr *)skb_put(skb,
-	      sizeof(struct rtllib_pspoll_hdr));
+	hdr = skb_put(skb, sizeof(struct rtllib_pspoll_hdr));
 
 	ether_addr_copy(hdr->bssid, ieee->current_network.bssid);
 	ether_addr_copy(hdr->ta, ieee->dev->dev_addr);
@@ -1243,8 +1236,7 @@ rtllib_association_req(struct rtllib_network *beacon,
 
 	skb_reserve(skb, ieee->tx_headroom);
 
-	hdr = (struct rtllib_assoc_request_frame *)
-		skb_put(skb, sizeof(struct rtllib_assoc_request_frame) + 2);
+	hdr = skb_put(skb, sizeof(struct rtllib_assoc_request_frame) + 2);
 
 
 	hdr->header.frame_ctl = cpu_to_le16(RTLLIB_STYPE_ASSOC_REQ);
@@ -3414,8 +3406,7 @@ rtllib_disauth_skb(struct rtllib_network *beacon,
 
 	skb_reserve(skb, ieee->tx_headroom);
 
-	disauth = (struct rtllib_disauth *) skb_put(skb,
-		  sizeof(struct rtllib_disauth));
+	disauth = skb_put(skb, sizeof(struct rtllib_disauth));
 	disauth->header.frame_ctl = cpu_to_le16(RTLLIB_STYPE_DEAUTH);
 	disauth->header.duration_id = 0;
 
@@ -3442,8 +3433,7 @@ rtllib_disassociate_skb(struct rtllib_network *beacon,
 
 	skb_reserve(skb, ieee->tx_headroom);
 
-	disass = (struct rtllib_disassoc *) skb_put(skb,
-					 sizeof(struct rtllib_disassoc));
+	disass = skb_put(skb, sizeof(struct rtllib_disassoc));
 	disass->header.frame_ctl = cpu_to_le16(RTLLIB_STYPE_DISASSOC);
 	disass->header.duration_id = 0;
 
diff --git a/drivers/staging/rtl8192u/ieee80211/ieee80211_softmac.c b/drivers/staging/rtl8192u/ieee80211/ieee80211_softmac.c
index 903a1d0269df..107069180ed2 100644
--- a/drivers/staging/rtl8192u/ieee80211/ieee80211_softmac.c
+++ b/drivers/staging/rtl8192u/ieee80211/ieee80211_softmac.c
@@ -341,7 +341,7 @@ static inline struct sk_buff *ieee80211_probe_req(struct ieee80211_device *ieee)
 
 	skb_reserve(skb, ieee->tx_headroom);
 
-	req = (struct ieee80211_probe_request *) skb_put(skb,sizeof(struct ieee80211_probe_request));
+	req = skb_put(skb, sizeof(struct ieee80211_probe_request));
 	req->header.frame_ctl = cpu_to_le16(IEEE80211_STYPE_PROBE_REQ);
 	req->header.duration_id = 0; /* FIXME: is this OK? */
 
@@ -349,7 +349,7 @@ static inline struct sk_buff *ieee80211_probe_req(struct ieee80211_device *ieee)
 	memcpy(req->header.addr2, ieee->dev->dev_addr, ETH_ALEN);
 	eth_broadcast_addr(req->header.addr3);
 
-	tag = (u8 *) skb_put(skb,len+2+rate_len);
+	tag = skb_put(skb, len + 2 + rate_len);
 
 	*tag++ = MFIE_TYPE_SSID;
 	*tag++ = len;
@@ -659,8 +659,7 @@ ieee80211_authentication_req(struct ieee80211_network *beacon,
 	if (!skb) return NULL;
 
 	skb_reserve(skb, ieee->tx_headroom);
-	auth = (struct ieee80211_authentication *)
-		skb_put(skb, sizeof(struct ieee80211_authentication));
+	auth = skb_put(skb, sizeof(struct ieee80211_authentication));
 
 	if (challengelen)
 		auth->header.frame_ctl = cpu_to_le16(IEEE80211_STYPE_AUTH
@@ -768,7 +767,7 @@ static struct sk_buff *ieee80211_probe_resp(struct ieee80211_device *ieee, u8 *d
 	if (!skb)
 		return NULL;
 	skb_reserve(skb, ieee->tx_headroom);
-	beacon_buf = (struct ieee80211_probe_response *) skb_put(skb, (beacon_size - ieee->tx_headroom));
+	beacon_buf = skb_put(skb, (beacon_size - ieee->tx_headroom));
 	memcpy (beacon_buf->header.addr1, dest,ETH_ALEN);
 	memcpy (beacon_buf->header.addr2, ieee->dev->dev_addr, ETH_ALEN);
 	memcpy (beacon_buf->header.addr3, ieee->current_network.bssid, ETH_ALEN);
@@ -864,8 +863,7 @@ static struct sk_buff *ieee80211_assoc_resp(struct ieee80211_device *ieee,
 
 	skb_reserve(skb, ieee->tx_headroom);
 
-	assoc = (struct ieee80211_assoc_response_frame *)
-		skb_put(skb, sizeof(struct ieee80211_assoc_response_frame));
+	assoc = skb_put(skb, sizeof(struct ieee80211_assoc_response_frame));
 
 	assoc->header.frame_ctl = cpu_to_le16(IEEE80211_STYPE_ASSOC_RESP);
 	memcpy(assoc->header.addr1, dest,ETH_ALEN);
@@ -892,7 +890,7 @@ static struct sk_buff *ieee80211_assoc_resp(struct ieee80211_device *ieee,
 	if (ieee->assoc_id == 0x2007) ieee->assoc_id=0;
 	else ieee->assoc_id++;
 
-	tag = (u8 *) skb_put(skb, rate_len);
+	tag = skb_put(skb, rate_len);
 
 	ieee80211_MFIE_Brate(ieee, &tag);
 	ieee80211_MFIE_Grate(ieee, &tag);
@@ -940,7 +938,7 @@ static struct sk_buff *ieee80211_null_func(struct ieee80211_device *ieee,
 	if (!skb)
 		return NULL;
 
-	hdr = (struct rtl_80211_hdr_3addr *)skb_put(skb,sizeof(struct rtl_80211_hdr_3addr));
+	hdr = skb_put(skb, sizeof(struct rtl_80211_hdr_3addr));
 
 	memcpy(hdr->addr1, ieee->current_network.bssid, ETH_ALEN);
 	memcpy(hdr->addr2, ieee->dev->dev_addr, ETH_ALEN);
@@ -1086,8 +1084,7 @@ ieee80211_association_req(struct ieee80211_network *beacon,
 
 	skb_reserve(skb, ieee->tx_headroom);
 
-	hdr = (struct ieee80211_assoc_request_frame *)
-		skb_put(skb, sizeof(struct ieee80211_assoc_request_frame)+2);
+	hdr = skb_put(skb, sizeof(struct ieee80211_assoc_request_frame) + 2);
 
 
 	hdr->header.frame_ctl = IEEE80211_STYPE_ASSOC_REQ;
@@ -3110,7 +3107,7 @@ static inline struct sk_buff *ieee80211_disassociate_skb(
 	if (!skb)
 		return NULL;
 
-	disass = (struct ieee80211_disassoc *) skb_put(skb, sizeof(struct ieee80211_disassoc));
+	disass = skb_put(skb, sizeof(struct ieee80211_disassoc));
 	disass->header.frame_ctl = cpu_to_le16(IEEE80211_STYPE_DISASSOC);
 	disass->header.duration_id = 0;
 
diff --git a/drivers/staging/rtl8192u/ieee80211/rtl819x_BAProc.c b/drivers/staging/rtl8192u/ieee80211/rtl819x_BAProc.c
index e82b5073c3f1..8aa38dcf0dfd 100644
--- a/drivers/staging/rtl8192u/ieee80211/rtl819x_BAProc.c
+++ b/drivers/staging/rtl8192u/ieee80211/rtl819x_BAProc.c
@@ -125,7 +125,7 @@ static struct sk_buff *ieee80211_ADDBA(struct ieee80211_device *ieee, u8 *Dst, P
 	memset(skb->data, 0, sizeof( struct rtl_80211_hdr_3addr));	//I wonder whether it's necessary. Apparently kernel will not do it when alloc a skb.
 	skb_reserve(skb, ieee->tx_headroom);
 
-	BAReq = ( struct rtl_80211_hdr_3addr *) skb_put(skb,sizeof( struct rtl_80211_hdr_3addr));
+	BAReq = skb_put(skb, sizeof(struct rtl_80211_hdr_3addr));
 
 	memcpy(BAReq->addr1, Dst, ETH_ALEN);
 	memcpy(BAReq->addr2, ieee->dev->dev_addr, ETH_ALEN);
@@ -135,7 +135,7 @@ static struct sk_buff *ieee80211_ADDBA(struct ieee80211_device *ieee, u8 *Dst, P
 	BAReq->frame_ctl = cpu_to_le16(IEEE80211_STYPE_MANAGE_ACT); //action frame
 
 	//tag += sizeof( struct rtl_80211_hdr_3addr); //move to action field
-	tag = (u8 *)skb_put(skb, 9);
+	tag = skb_put(skb, 9);
 	*tag ++= ACT_CAT_BA;
 	*tag ++= type;
 	// Dialog Token
@@ -209,14 +209,14 @@ static struct sk_buff *ieee80211_DELBA(
 //	memset(skb->data, 0, len+sizeof( struct rtl_80211_hdr_3addr));
 	skb_reserve(skb, ieee->tx_headroom);
 
-	Delba = ( struct rtl_80211_hdr_3addr *) skb_put(skb,sizeof( struct rtl_80211_hdr_3addr));
+	Delba = skb_put(skb, sizeof(struct rtl_80211_hdr_3addr));
 
 	memcpy(Delba->addr1, dst, ETH_ALEN);
 	memcpy(Delba->addr2, ieee->dev->dev_addr, ETH_ALEN);
 	memcpy(Delba->addr3, ieee->current_network.bssid, ETH_ALEN);
 	Delba->frame_ctl = cpu_to_le16(IEEE80211_STYPE_MANAGE_ACT); //action frame
 
-	tag = (u8 *)skb_put(skb, 6);
+	tag = skb_put(skb, 6);
 
 	*tag ++= ACT_CAT_BA;
 	*tag ++= ACT_DELBA;
diff --git a/drivers/target/iscsi/cxgbit/cxgbit_cm.c b/drivers/target/iscsi/cxgbit/cxgbit_cm.c
index 939c6ec51e4d..15cd1e33b16b 100644
--- a/drivers/target/iscsi/cxgbit/cxgbit_cm.c
+++ b/drivers/target/iscsi/cxgbit/cxgbit_cm.c
@@ -1085,7 +1085,7 @@ cxgbit_pass_accept_rpl(struct cxgbit_sock *csk, struct cpl_pass_accept_req *req)
 		return;
 	}
 
-	rpl5 = (struct cpl_t5_pass_accept_rpl *)__skb_put(skb, len);
+	rpl5 = __skb_put(skb, len);
 	memset(rpl5, 0, len);
 
 	INIT_TP_WR(rpl5, csk->tid);
@@ -1367,7 +1367,7 @@ u32 cxgbit_send_tx_flowc_wr(struct cxgbit_sock *csk)
 	flowclen16 = cxgbit_tx_flowc_wr_credits(csk, &nparams, &flowclen);
 
 	skb = __skb_dequeue(&csk->skbq);
-	flowc = (struct fw_flowc_wr *)__skb_put(skb, flowclen);
+	flowc = __skb_put(skb, flowclen);
 	memset(flowc, 0, flowclen);
 
 	flowc->op_to_nparams = cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) |
@@ -1439,7 +1439,7 @@ int cxgbit_setup_conn_digest(struct cxgbit_sock *csk)
 		return -ENOMEM;
 
 	/*  set up ulp submode */
-	req = (struct cpl_set_tcb_field *)__skb_put(skb, len);
+	req = __skb_put(skb, len);
 	memset(req, 0, len);
 
 	INIT_TP_WR(req, csk->tid);
@@ -1476,7 +1476,7 @@ int cxgbit_setup_conn_pgidx(struct cxgbit_sock *csk, u32 pg_idx)
 	if (!skb)
 		return -ENOMEM;
 
-	req = (struct cpl_set_tcb_field *)__skb_put(skb, len);
+	req = __skb_put(skb, len);
 	memset(req, 0, len);
 
 	INIT_TP_WR(req, csk->tid);
diff --git a/drivers/target/iscsi/cxgbit/cxgbit_ddp.c b/drivers/target/iscsi/cxgbit/cxgbit_ddp.c
index 5d78bdb7fc64..5fdb57cac968 100644
--- a/drivers/target/iscsi/cxgbit/cxgbit_ddp.c
+++ b/drivers/target/iscsi/cxgbit/cxgbit_ddp.c
@@ -79,7 +79,7 @@ cxgbit_ppod_init_idata(struct cxgbit_device *cdev, struct cxgbi_ppm *ppm,
 	if (!skb)
 		return NULL;
 
-	req = (struct ulp_mem_io *)__skb_put(skb, wr_len);
+	req = __skb_put(skb, wr_len);
 	INIT_ULPTX_WR(req, wr_len, 0, tid);
 	req->wr.wr_hi = htonl(FW_WR_OP_V(FW_ULPTX_WR) |
 		FW_WR_ATOMIC_V(0));
diff --git a/drivers/usb/gadget/function/f_ncm.c b/drivers/usb/gadget/function/f_ncm.c
index 630616aaa861..a9c28c72c1c7 100644
--- a/drivers/usb/gadget/function/f_ncm.c
+++ b/drivers/usb/gadget/function/f_ncm.c
@@ -1047,7 +1047,7 @@ static struct sk_buff *ncm_wrap_ntb(struct gether *port,
 			crc = ~crc32_le(~0,
 					skb->data,
 					skb->len);
-			crc_pos = (void *) skb_put(skb, sizeof(uint32_t));
+			crc_pos = skb_put(skb, sizeof(uint32_t));
 			put_unaligned_le32(crc, crc_pos);
 		}
 
@@ -1097,8 +1097,7 @@ static struct sk_buff *ncm_wrap_ntb(struct gether *port,
 				goto err;
 
 			ncm->skb_tx_ndp->dev = ncm->netdev;
-			ntb_ndp = (void *) skb_put(ncm->skb_tx_ndp,
-						    opts->ndp_size);
+			ntb_ndp = skb_put(ncm->skb_tx_ndp, opts->ndp_size);
 			memset(ntb_ndp, 0, ncb_len);
 			/* dwSignature */
 			put_unaligned_le32(ncm->ndp_sign, ntb_ndp);
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 5af5385a0e72..454ea37dddbb 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1893,11 +1893,11 @@ static inline void skb_set_tail_pointer(struct sk_buff *skb, const int offset)
 /*
  *	Add data to an sk_buff
  */
-unsigned char *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len);
-unsigned char *skb_put(struct sk_buff *skb, unsigned int len);
-static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len)
+void *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len);
+void *skb_put(struct sk_buff *skb, unsigned int len);
+static inline void *__skb_put(struct sk_buff *skb, unsigned int len)
 {
-	unsigned char *tmp = skb_tail_pointer(skb);
+	void *tmp = skb_tail_pointer(skb);
 	SKB_LINEAR_ASSERT(skb);
 	skb->tail += len;
 	skb->len  += len;
diff --git a/lib/nlattr.c b/lib/nlattr.c
index ab15a6c095d3..a0c738aa6a79 100644
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -352,7 +352,7 @@ struct nlattr *__nla_reserve(struct sk_buff *skb, int attrtype, int attrlen)
 {
 	struct nlattr *nla;
 
-	nla = (struct nlattr *) skb_put(skb, nla_total_size(attrlen));
+	nla = skb_put(skb, nla_total_size(attrlen));
 	nla->nla_type = attrtype;
 	nla->nla_len = nla_attr_size(attrlen);
 
diff --git a/net/802/garp.c b/net/802/garp.c
index b38ee6dcba45..a9a266569293 100644
--- a/net/802/garp.c
+++ b/net/802/garp.c
@@ -221,7 +221,7 @@ static int garp_pdu_init(struct garp_applicant *app)
 	skb->protocol = htons(ETH_P_802_2);
 	skb_reserve(skb, LL_RESERVED_SPACE(app->dev) + LLC_RESERVE);
 
-	gp = (struct garp_pdu_hdr *)__skb_put(skb, sizeof(*gp));
+	gp = __skb_put(skb, sizeof(*gp));
 	put_unaligned(htons(GARP_PROTOCOL_ID), &gp->protocol);
 
 	app->pdu = skb;
@@ -268,7 +268,7 @@ static int garp_pdu_append_msg(struct garp_applicant *app, u8 attrtype)
 
 	if (skb_tailroom(app->pdu) < sizeof(*gm))
 		return -1;
-	gm = (struct garp_msg_hdr *)__skb_put(app->pdu, sizeof(*gm));
+	gm = __skb_put(app->pdu, sizeof(*gm));
 	gm->attrtype = attrtype;
 	garp_cb(app->pdu)->cur_type = attrtype;
 	return 0;
@@ -299,7 +299,7 @@ again:
 	len = sizeof(*ga) + attr->dlen;
 	if (skb_tailroom(app->pdu) < len)
 		goto queue;
-	ga = (struct garp_attr_hdr *)__skb_put(app->pdu, len);
+	ga = __skb_put(app->pdu, len);
 	ga->len   = len;
 	ga->event = event;
 	memcpy(ga->data, attr->data, attr->dlen);
diff --git a/net/802/mrp.c b/net/802/mrp.c
index 72db2785ef2c..be4dd3165347 100644
--- a/net/802/mrp.c
+++ b/net/802/mrp.c
@@ -311,7 +311,7 @@ static int mrp_pdu_init(struct mrp_applicant *app)
 	skb_reset_network_header(skb);
 	skb_reset_transport_header(skb);
 
-	ph = (struct mrp_pdu_hdr *)__skb_put(skb, sizeof(*ph));
+	ph = __skb_put(skb, sizeof(*ph));
 	ph->version = app->app->version;
 
 	app->pdu = skb;
@@ -324,7 +324,7 @@ static int mrp_pdu_append_end_mark(struct mrp_applicant *app)
 
 	if (skb_tailroom(app->pdu) < sizeof(*endmark))
 		return -1;
-	endmark = (__be16 *)__skb_put(app->pdu, sizeof(*endmark));
+	endmark = __skb_put(app->pdu, sizeof(*endmark));
 	put_unaligned(MRP_END_MARK, endmark);
 	return 0;
 }
@@ -368,7 +368,7 @@ static int mrp_pdu_append_msg_hdr(struct mrp_applicant *app,
 
 	if (skb_tailroom(app->pdu) < sizeof(*mh))
 		return -1;
-	mh = (struct mrp_msg_hdr *)__skb_put(app->pdu, sizeof(*mh));
+	mh = __skb_put(app->pdu, sizeof(*mh));
 	mh->attrtype = attrtype;
 	mh->attrlen = attrlen;
 	mrp_cb(app->pdu)->mh = mh;
@@ -382,8 +382,7 @@ static int mrp_pdu_append_vecattr_hdr(struct mrp_applicant *app,
 
 	if (skb_tailroom(app->pdu) < sizeof(*vah) + attrlen)
 		return -1;
-	vah = (struct mrp_vecattr_hdr *)__skb_put(app->pdu,
-						  sizeof(*vah) + attrlen);
+	vah = __skb_put(app->pdu, sizeof(*vah) + attrlen);
 	put_unaligned(0, &vah->lenflags);
 	memcpy(vah->firstattrvalue, firstattrvalue, attrlen);
 	mrp_cb(app->pdu)->vah = vah;
@@ -435,7 +434,7 @@ again:
 	if (!pos) {
 		if (skb_tailroom(app->pdu) < sizeof(u8))
 			goto queue;
-		vaevents = (u8 *)__skb_put(app->pdu, sizeof(u8));
+		vaevents = __skb_put(app->pdu, sizeof(u8));
 	} else {
 		vaevents = (u8 *)(skb_tail_pointer(app->pdu) - sizeof(u8));
 	}
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 465cc24b41e5..c7af6dc70fa2 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1647,7 +1647,7 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 
 	SOCK_DEBUG(sk, "SK %p: Begin build.\n", sk);
 
-	ddp = (struct ddpehdr *)skb_put(skb, sizeof(struct ddpehdr));
+	ddp = skb_put(skb, sizeof(struct ddpehdr));
 	ddp->deh_len_hops  = htons(len + sizeof(*ddp));
 	ddp->deh_dnet  = usat->sat_addr.s_net;
 	ddp->deh_snet  = at->src_net;
diff --git a/net/atm/clip.c b/net/atm/clip.c
index ec527b62f79d..a7e4018370b4 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -60,7 +60,7 @@ static int to_atmarpd(enum atmarp_ctrl_type type, int itf, __be32 ip)
 	skb = alloc_skb(sizeof(struct atmarp_ctrl), GFP_ATOMIC);
 	if (!skb)
 		return -ENOMEM;
-	ctrl = (struct atmarp_ctrl *)skb_put(skb, sizeof(struct atmarp_ctrl));
+	ctrl = skb_put(skb, sizeof(struct atmarp_ctrl));
 	ctrl->type = type;
 	ctrl->itf_num = itf;
 	ctrl->ip = ip;
diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index 6308c9f0fd96..8ead292886d1 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -207,7 +207,7 @@ static ssize_t batadv_socket_write(struct file *file, const char __user *buff,
 
 	skb->priority = TC_PRIO_CONTROL;
 	skb_reserve(skb, ETH_HLEN);
-	icmp_header = (struct batadv_icmp_header *)skb_put(skb, packet_len);
+	icmp_header = skb_put(skb, packet_len);
 
 	if (copy_from_user(icmp_header, buff, packet_len)) {
 		len = -EFAULT;
diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c
index e3e2585d0977..bfe8effe9238 100644
--- a/net/batman-adv/tp_meter.c
+++ b/net/batman-adv/tp_meter.c
@@ -595,7 +595,7 @@ static int batadv_tp_send_msg(struct batadv_tp_vars *tp_vars, const u8 *src,
 		return BATADV_TP_REASON_MEMORY_ERROR;
 
 	skb_reserve(skb, ETH_HLEN);
-	icmp = (struct batadv_icmp_tp_packet *)skb_put(skb, sizeof(*icmp));
+	icmp = skb_put(skb, sizeof(*icmp));
 
 	/* fill the icmp header */
 	ether_addr_copy(icmp->dst, orig_node->orig);
@@ -612,7 +612,7 @@ static int batadv_tp_send_msg(struct batadv_tp_vars *tp_vars, const u8 *src,
 	icmp->timestamp = htonl(timestamp);
 
 	data_len = len - sizeof(*icmp);
-	data = (u8 *)skb_put(skb, data_len);
+	data = skb_put(skb, data_len);
 	batadv_tp_fill_prerandom(tp_vars, data, data_len);
 
 	r = batadv_send_skb_to_orig(skb, orig_node, NULL);
@@ -1190,7 +1190,7 @@ static int batadv_tp_send_ack(struct batadv_priv *bat_priv, const u8 *dst,
 	}
 
 	skb_reserve(skb, ETH_HLEN);
-	icmp = (struct batadv_icmp_tp_packet *)skb_put(skb, sizeof(*icmp));
+	icmp = skb_put(skb, sizeof(*icmp));
 	icmp->packet_type = BATADV_ICMP;
 	icmp->version = BATADV_COMPAT_VERSION;
 	icmp->ttl = BATADV_TTL;
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index 4e4105a932bd..b73ac149de34 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -299,7 +299,7 @@ struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen,
 	if (!skb)
 		return NULL;
 
-	hdr = (struct hci_command_hdr *) skb_put(skb, HCI_COMMAND_HDR_SIZE);
+	hdr = skb_put(skb, HCI_COMMAND_HDR_SIZE);
 	hdr->opcode = cpu_to_le16(opcode);
 	hdr->plen   = plen;
 
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 083e87f26a0f..1301a8786d8d 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -410,7 +410,7 @@ static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event)
 		if (!skb)
 			return NULL;
 
-		ni = (void *)skb_put(skb, HCI_MON_NEW_INDEX_SIZE);
+		ni = skb_put(skb, HCI_MON_NEW_INDEX_SIZE);
 		ni->type = hdev->dev_type;
 		ni->bus = hdev->bus;
 		bacpy(&ni->bdaddr, &hdev->bdaddr);
@@ -438,7 +438,7 @@ static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event)
 		if (!skb)
 			return NULL;
 
-		ii = (void *)skb_put(skb, HCI_MON_INDEX_INFO_SIZE);
+		ii = skb_put(skb, HCI_MON_INDEX_INFO_SIZE);
 		bacpy(&ii->bdaddr, &hdev->bdaddr);
 		ii->manufacturer = cpu_to_le16(hdev->manufacturer);
 
@@ -517,7 +517,7 @@ static struct sk_buff *create_monitor_ctrl_open(struct sock *sk)
 	put_unaligned_le16(format, skb_put(skb, 2));
 	skb_put_data(skb, ver, sizeof(ver));
 	put_unaligned_le32(flags, skb_put(skb, 4));
-	*skb_put(skb, 1) = TASK_COMM_LEN;
+	*(u8 *)skb_put(skb, 1) = TASK_COMM_LEN;
 	skb_put_data(skb, hci_pi(sk)->comm, TASK_COMM_LEN);
 
 	__net_timestamp(skb);
@@ -616,7 +616,7 @@ send_monitor_note(struct sock *sk, const char *fmt, ...)
 
 	va_start(args, fmt);
 	vsprintf(skb_put(skb, len), fmt, args);
-	*skb_put(skb, 1) = 0;
+	*(u8 *)skb_put(skb, 1) = 0;
 	va_end(args);
 
 	__net_timestamp(skb);
@@ -703,11 +703,11 @@ static void hci_si_event(struct hci_dev *hdev, int type, int dlen, void *data)
 	if (!skb)
 		return;
 
-	hdr = (void *)skb_put(skb, HCI_EVENT_HDR_SIZE);
+	hdr = skb_put(skb, HCI_EVENT_HDR_SIZE);
 	hdr->evt  = HCI_EV_STACK_INTERNAL;
 	hdr->plen = sizeof(*ev) + dlen;
 
-	ev  = (void *)skb_put(skb, sizeof(*ev) + dlen);
+	ev = skb_put(skb, sizeof(*ev) + dlen);
 	ev->type = type;
 	memcpy(ev->data, data, dlen);
 
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 9e83713262e8..c0d0832a023d 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -112,7 +112,7 @@ static int hidp_send_message(struct hidp_session *session, struct socket *sock,
 		return -ENOMEM;
 	}
 
-	*skb_put(skb, 1) = hdr;
+	*(u8 *)skb_put(skb, 1) = hdr;
 	if (data && size > 0)
 		skb_put_data(skb, data, size);
 
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index fe6a5529bdf5..303c779bfe38 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -1048,7 +1048,7 @@ static struct sk_buff *l2cap_create_sframe_pdu(struct l2cap_chan *chan,
 	if (!skb)
 		return ERR_PTR(-ENOMEM);
 
-	lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE);
+	lh = skb_put(skb, L2CAP_HDR_SIZE);
 	lh->len = cpu_to_le16(hlen - L2CAP_HDR_SIZE);
 	lh->cid = cpu_to_le16(chan->dcid);
 
@@ -2182,7 +2182,7 @@ static struct sk_buff *l2cap_create_connless_pdu(struct l2cap_chan *chan,
 		return skb;
 
 	/* Create L2CAP header */
-	lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE);
+	lh = skb_put(skb, L2CAP_HDR_SIZE);
 	lh->cid = cpu_to_le16(chan->dcid);
 	lh->len = cpu_to_le16(len + L2CAP_PSMLEN_SIZE);
 	put_unaligned(chan->psm, (__le16 *) skb_put(skb, L2CAP_PSMLEN_SIZE));
@@ -2213,7 +2213,7 @@ static struct sk_buff *l2cap_create_basic_pdu(struct l2cap_chan *chan,
 		return skb;
 
 	/* Create L2CAP header */
-	lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE);
+	lh = skb_put(skb, L2CAP_HDR_SIZE);
 	lh->cid = cpu_to_le16(chan->dcid);
 	lh->len = cpu_to_le16(len);
 
@@ -2255,7 +2255,7 @@ static struct sk_buff *l2cap_create_iframe_pdu(struct l2cap_chan *chan,
 		return skb;
 
 	/* Create L2CAP header */
-	lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE);
+	lh = skb_put(skb, L2CAP_HDR_SIZE);
 	lh->cid = cpu_to_le16(chan->dcid);
 	lh->len = cpu_to_le16(len + (hlen - L2CAP_HDR_SIZE));
 
@@ -2373,7 +2373,7 @@ static struct sk_buff *l2cap_create_le_flowctl_pdu(struct l2cap_chan *chan,
 		return skb;
 
 	/* Create L2CAP header */
-	lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE);
+	lh = skb_put(skb, L2CAP_HDR_SIZE);
 	lh->cid = cpu_to_le16(chan->dcid);
 	lh->len = cpu_to_le16(len + (hlen - L2CAP_HDR_SIZE));
 
@@ -2908,7 +2908,7 @@ static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn, u8 code,
 	if (!skb)
 		return NULL;
 
-	lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE);
+	lh = skb_put(skb, L2CAP_HDR_SIZE);
 	lh->len = cpu_to_le16(L2CAP_CMD_HDR_SIZE + dlen);
 
 	if (conn->hcon->type == LE_LINK)
@@ -2916,7 +2916,7 @@ static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn, u8 code,
 	else
 		lh->cid = cpu_to_le16(L2CAP_CID_SIGNALING);
 
-	cmd = (struct l2cap_cmd_hdr *) skb_put(skb, L2CAP_CMD_HDR_SIZE);
+	cmd = skb_put(skb, L2CAP_CMD_HDR_SIZE);
 	cmd->code  = code;
 	cmd->ident = ident;
 	cmd->len   = cpu_to_le16(dlen);
diff --git a/net/bluetooth/mgmt_util.c b/net/bluetooth/mgmt_util.c
index 11d0ca64402b..d057113e0d4b 100644
--- a/net/bluetooth/mgmt_util.c
+++ b/net/bluetooth/mgmt_util.c
@@ -66,7 +66,7 @@ int mgmt_send_event(u16 event, struct hci_dev *hdev, unsigned short channel,
 	if (!skb)
 		return -ENOMEM;
 
-	hdr = (void *) skb_put(skb, sizeof(*hdr));
+	hdr = skb_put(skb, sizeof(*hdr));
 	hdr->opcode = cpu_to_le16(event);
 	if (hdev)
 		hdr->index = cpu_to_le16(hdev->id);
@@ -103,13 +103,13 @@ int mgmt_cmd_status(struct sock *sk, u16 index, u16 cmd, u8 status)
 	if (!skb)
 		return -ENOMEM;
 
-	hdr = (void *) skb_put(skb, sizeof(*hdr));
+	hdr = skb_put(skb, sizeof(*hdr));
 
 	hdr->opcode = cpu_to_le16(MGMT_EV_CMD_STATUS);
 	hdr->index = cpu_to_le16(index);
 	hdr->len = cpu_to_le16(sizeof(*ev));
 
-	ev = (void *) skb_put(skb, sizeof(*ev));
+	ev = skb_put(skb, sizeof(*ev));
 	ev->status = status;
 	ev->opcode = cpu_to_le16(cmd);
 
@@ -147,13 +147,13 @@ int mgmt_cmd_complete(struct sock *sk, u16 index, u16 cmd, u8 status,
 	if (!skb)
 		return -ENOMEM;
 
-	hdr = (void *) skb_put(skb, sizeof(*hdr));
+	hdr = skb_put(skb, sizeof(*hdr));
 
 	hdr->opcode = cpu_to_le16(MGMT_EV_CMD_COMPLETE);
 	hdr->index = cpu_to_le16(index);
 	hdr->len = cpu_to_le16(sizeof(*ev) + rp_len);
 
-	ev = (void *) skb_put(skb, sizeof(*ev) + rp_len);
+	ev = skb_put(skb, sizeof(*ev) + rp_len);
 	ev->opcode = cpu_to_le16(cmd);
 	ev->status = status;
 
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 8ebca9033d60..1a9b906c5a35 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -863,7 +863,7 @@ static int rfcomm_queue_disc(struct rfcomm_dlc *d)
 	if (!skb)
 		return -ENOMEM;
 
-	cmd = (void *) __skb_put(skb, sizeof(*cmd));
+	cmd = __skb_put(skb, sizeof(*cmd));
 	cmd->addr = d->addr;
 	cmd->ctrl = __ctrl(RFCOMM_DISC, 1);
 	cmd->len  = __len8(0);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 8860ad985d68..b8bcf9021329 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2714,7 +2714,7 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb,
 	struct timeval timestamp;
 	struct pktgen_hdr *pgh;
 
-	pgh = (struct pktgen_hdr *)skb_put(skb, sizeof(*pgh));
+	pgh = skb_put(skb, sizeof(*pgh));
 	datalen -= sizeof(*pgh);
 
 	if (pkt_dev->nfrags <= 0) {
@@ -2845,33 +2845,34 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
 
 	/*  Reserve for ethernet and IP header  */
 	eth = (__u8 *) skb_push(skb, 14);
-	mpls = (__be32 *)skb_put(skb, pkt_dev->nr_labels*sizeof(__u32));
+	mpls = skb_put(skb, pkt_dev->nr_labels * sizeof(__u32));
 	if (pkt_dev->nr_labels)
 		mpls_push(mpls, pkt_dev);
 
 	if (pkt_dev->vlan_id != 0xffff) {
 		if (pkt_dev->svlan_id != 0xffff) {
-			svlan_tci = (__be16 *)skb_put(skb, sizeof(__be16));
+			svlan_tci = skb_put(skb, sizeof(__be16));
 			*svlan_tci = build_tci(pkt_dev->svlan_id,
 					       pkt_dev->svlan_cfi,
 					       pkt_dev->svlan_p);
-			svlan_encapsulated_proto = (__be16 *)skb_put(skb, sizeof(__be16));
+			svlan_encapsulated_proto = skb_put(skb,
+							   sizeof(__be16));
 			*svlan_encapsulated_proto = htons(ETH_P_8021Q);
 		}
-		vlan_tci = (__be16 *)skb_put(skb, sizeof(__be16));
+		vlan_tci = skb_put(skb, sizeof(__be16));
 		*vlan_tci = build_tci(pkt_dev->vlan_id,
 				      pkt_dev->vlan_cfi,
 				      pkt_dev->vlan_p);
-		vlan_encapsulated_proto = (__be16 *)skb_put(skb, sizeof(__be16));
+		vlan_encapsulated_proto = skb_put(skb, sizeof(__be16));
 		*vlan_encapsulated_proto = htons(ETH_P_IP);
 	}
 
 	skb_reset_mac_header(skb);
 	skb_set_network_header(skb, skb->len);
-	iph = (struct iphdr *) skb_put(skb, sizeof(struct iphdr));
+	iph = skb_put(skb, sizeof(struct iphdr));
 
 	skb_set_transport_header(skb, skb->len);
-	udph = (struct udphdr *) skb_put(skb, sizeof(struct udphdr));
+	udph = skb_put(skb, sizeof(struct udphdr));
 	skb_set_queue_mapping(skb, queue_map);
 	skb->priority = pkt_dev->skb_priority;
 
@@ -2972,33 +2973,34 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
 
 	/*  Reserve for ethernet and IP header  */
 	eth = (__u8 *) skb_push(skb, 14);
-	mpls = (__be32 *)skb_put(skb, pkt_dev->nr_labels*sizeof(__u32));
+	mpls = skb_put(skb, pkt_dev->nr_labels * sizeof(__u32));
 	if (pkt_dev->nr_labels)
 		mpls_push(mpls, pkt_dev);
 
 	if (pkt_dev->vlan_id != 0xffff) {
 		if (pkt_dev->svlan_id != 0xffff) {
-			svlan_tci = (__be16 *)skb_put(skb, sizeof(__be16));
+			svlan_tci = skb_put(skb, sizeof(__be16));
 			*svlan_tci = build_tci(pkt_dev->svlan_id,
 					       pkt_dev->svlan_cfi,
 					       pkt_dev->svlan_p);
-			svlan_encapsulated_proto = (__be16 *)skb_put(skb, sizeof(__be16));
+			svlan_encapsulated_proto = skb_put(skb,
+							   sizeof(__be16));
 			*svlan_encapsulated_proto = htons(ETH_P_8021Q);
 		}
-		vlan_tci = (__be16 *)skb_put(skb, sizeof(__be16));
+		vlan_tci = skb_put(skb, sizeof(__be16));
 		*vlan_tci = build_tci(pkt_dev->vlan_id,
 				      pkt_dev->vlan_cfi,
 				      pkt_dev->vlan_p);
-		vlan_encapsulated_proto = (__be16 *)skb_put(skb, sizeof(__be16));
+		vlan_encapsulated_proto = skb_put(skb, sizeof(__be16));
 		*vlan_encapsulated_proto = htons(ETH_P_IPV6);
 	}
 
 	skb_reset_mac_header(skb);
 	skb_set_network_header(skb, skb->len);
-	iph = (struct ipv6hdr *) skb_put(skb, sizeof(struct ipv6hdr));
+	iph = skb_put(skb, sizeof(struct ipv6hdr));
 
 	skb_set_transport_header(skb, skb->len);
-	udph = (struct udphdr *) skb_put(skb, sizeof(struct udphdr));
+	udph = skb_put(skb, sizeof(struct udphdr));
 	skb_set_queue_mapping(skb, queue_map);
 	skb->priority = pkt_dev->skb_priority;
 
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index c4d2c1f824bb..0baa7f2dd8ef 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1421,7 +1421,7 @@ EXPORT_SYMBOL(skb_pad);
  *	returned.
  */
 
-unsigned char *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len)
+void *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len)
 {
 	if (tail != skb) {
 		skb->data_len += len;
@@ -1440,9 +1440,9 @@ EXPORT_SYMBOL_GPL(pskb_put);
  *	exceed the total buffer size the kernel will panic. A pointer to the
  *	first byte of the extra data is returned.
  */
-unsigned char *skb_put(struct sk_buff *skb, unsigned int len)
+void *skb_put(struct sk_buff *skb, unsigned int len)
 {
-	unsigned char *tmp = skb_tail_pointer(skb);
+	void *tmp = skb_tail_pointer(skb);
 	SKB_LINEAR_ASSERT(skb);
 	skb->tail += len;
 	skb->len  += len;
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 9017a9a73ab5..1d84f6dae315 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -846,7 +846,7 @@ static void dn_send_endnode_hello(struct net_device *dev, struct dn_ifaddr *ifa)
 
 	skb->dev = dev;
 
-	msg = (struct endnode_hello_message *)skb_put(skb,sizeof(*msg));
+	msg = skb_put(skb, sizeof(*msg));
 
 	msg->msgflg  = 0x0D;
 	memcpy(msg->tiver, dn_eco_version, 3);
diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c
index b8a558715395..7e054b2f270a 100644
--- a/net/decnet/dn_nsp_out.c
+++ b/net/decnet/dn_nsp_out.c
@@ -484,7 +484,7 @@ void dn_send_conn_ack (struct sock *sk)
 	if ((skb = dn_alloc_skb(sk, 3, sk->sk_allocation)) == NULL)
 		return;
 
-	msg = (struct nsp_conn_ack_msg *)skb_put(skb, 3);
+	msg = skb_put(skb, 3);
 	msg->msgflg = 0x24;
 	msg->dstaddr = scp->addrrem;
 
@@ -522,7 +522,7 @@ void dn_send_conn_conf(struct sock *sk, gfp_t gfp)
 	if ((skb = dn_alloc_skb(sk, 50 + len, gfp)) == NULL)
 		return;
 
-	msg = (struct nsp_conn_init_msg *)skb_put(skb, sizeof(*msg));
+	msg = skb_put(skb, sizeof(*msg));
 	msg->msgflg = 0x28;
 	msg->dstaddr = scp->addrrem;
 	msg->srcaddr = scp->addrloc;
@@ -530,7 +530,7 @@ void dn_send_conn_conf(struct sock *sk, gfp_t gfp)
 	msg->info = scp->info_loc;
 	msg->segsize = cpu_to_le16(scp->segsize_loc);
 
-	*skb_put(skb,1) = len;
+	*(u8 *)skb_put(skb, 1) = len;
 
 	if (len > 0)
 		skb_put_data(skb, scp->conndata_out.opt_data, len);
@@ -662,7 +662,7 @@ void dn_nsp_send_conninit(struct sock *sk, unsigned char msgflg)
 		return;
 
 	cb  = DN_SKB_CB(skb);
-	msg = (struct nsp_conn_init_msg *)skb_put(skb,sizeof(*msg));
+	msg = skb_put(skb, sizeof(*msg));
 
 	msg->msgflg	= msgflg;
 	msg->dstaddr	= 0x0000;		/* Remote Node will assign it*/
@@ -686,25 +686,25 @@ void dn_nsp_send_conninit(struct sock *sk, unsigned char msgflg)
 	if (scp->peer.sdn_flags & SDF_UICPROXY)
 		menuver |= DN_MENUVER_UIC;
 
-	*skb_put(skb, 1) = menuver;	/* Menu Version		*/
+	*(u8 *)skb_put(skb, 1) = menuver;	/* Menu Version		*/
 
 	aux = scp->accessdata.acc_userl;
-	*skb_put(skb, 1) = aux;
+	*(u8 *)skb_put(skb, 1) = aux;
 	if (aux > 0)
 		skb_put_data(skb, scp->accessdata.acc_user, aux);
 
 	aux = scp->accessdata.acc_passl;
-	*skb_put(skb, 1) = aux;
+	*(u8 *)skb_put(skb, 1) = aux;
 	if (aux > 0)
 		skb_put_data(skb, scp->accessdata.acc_pass, aux);
 
 	aux = scp->accessdata.acc_accl;
-	*skb_put(skb, 1) = aux;
+	*(u8 *)skb_put(skb, 1) = aux;
 	if (aux > 0)
 		skb_put_data(skb, scp->accessdata.acc_acc, aux);
 
 	aux = (__u8)le16_to_cpu(scp->conndata_out.opt_optl);
-	*skb_put(skb, 1) = aux;
+	*(u8 *)skb_put(skb, 1) = aux;
 	if (aux > 0)
 		skb_put_data(skb, scp->conndata_out.opt_data, aux);
 
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index a651c53260ec..8b52179ddc6e 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -539,7 +539,7 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,
 
 	skb_reserve(skb, hlen);
 	skb_reset_network_header(skb);
-	arp = (struct arphdr *) skb_put(skb, arp_hdr_len(dev));
+	arp = skb_put(skb, arp_hdr_len(dev));
 	skb->dev = dev;
 	skb->protocol = htons(ETH_P_ARP);
 	if (!src_hw)
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 8f6b5bbcbf69..2202edf31884 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -414,7 +414,7 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ip_mc_list *pmc,
 		skb = igmpv3_newpack(dev, dev->mtu);
 	if (!skb)
 		return NULL;
-	pgr = (struct igmpv3_grec *)skb_put(skb, sizeof(struct igmpv3_grec));
+	pgr = skb_put(skb, sizeof(struct igmpv3_grec));
 	pgr->grec_type = type;
 	pgr->grec_auxwords = 0;
 	pgr->grec_nsrcs = 0;
@@ -508,7 +508,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
 		}
 		if (!skb)
 			return NULL;
-		psrc = (__be32 *)skb_put(skb, sizeof(__be32));
+		psrc = skb_put(skb, sizeof(__be32));
 		*psrc = psf->sf_inaddr;
 		scount++; stotal++;
 		if ((type == IGMPV3_ALLOW_NEW_SOURCES ||
@@ -742,7 +742,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
 	((u8 *)&iph[1])[2] = 0;
 	((u8 *)&iph[1])[3] = 0;
 
-	ih = (struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
+	ih = skb_put(skb, sizeof(struct igmphdr));
 	ih->type = type;
 	ih->code = 0;
 	ih->csum = 0;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index a1199895b8a6..abbd7c992960 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1044,7 +1044,7 @@ static int ipmr_cache_report(struct mr_table *mrt,
 		msg->im_vif = vifi;
 		skb_dst_set(skb, dst_clone(skb_dst(pkt)));
 		/* Add our header */
-		igmp = (struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
+		igmp = skb_put(skb, sizeof(struct igmphdr));
 		igmp->type = assert;
 		msg->im_msgtype = assert;
 		igmp->code = 0;
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
index af2b69b6895f..f1528f7175a8 100644
--- a/net/ipv4/netfilter/ipt_SYNPROXY.c
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -24,7 +24,7 @@ synproxy_build_ip(struct net *net, struct sk_buff *skb, __be32 saddr,
 	struct iphdr *iph;
 
 	skb_reset_network_header(skb);
-	iph = (struct iphdr *)skb_put(skb, sizeof(*iph));
+	iph = skb_put(skb, sizeof(*iph));
 	iph->version	= 4;
 	iph->ihl	= sizeof(*iph) / 4;
 	iph->tos	= 0;
@@ -91,7 +91,7 @@ synproxy_send_client_synack(struct net *net,
 	niph = synproxy_build_ip(net, nskb, iph->daddr, iph->saddr);
 
 	skb_reset_transport_header(nskb);
-	nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
+	nth = skb_put(nskb, tcp_hdr_size);
 	nth->source	= th->dest;
 	nth->dest	= th->source;
 	nth->seq	= htonl(__cookie_v4_init_sequence(iph, th, &mss));
@@ -133,7 +133,7 @@ synproxy_send_server_syn(struct net *net,
 	niph = synproxy_build_ip(net, nskb, iph->saddr, iph->daddr);
 
 	skb_reset_transport_header(nskb);
-	nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
+	nth = skb_put(nskb, tcp_hdr_size);
 	nth->source	= th->source;
 	nth->dest	= th->dest;
 	nth->seq	= htonl(recv_seq - 1);
@@ -178,7 +178,7 @@ synproxy_send_server_ack(struct net *net,
 	niph = synproxy_build_ip(net, nskb, iph->daddr, iph->saddr);
 
 	skb_reset_transport_header(nskb);
-	nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
+	nth = skb_put(nskb, tcp_hdr_size);
 	nth->source	= th->dest;
 	nth->dest	= th->source;
 	nth->seq	= htonl(ntohl(th->ack_seq));
@@ -216,7 +216,7 @@ synproxy_send_client_ack(struct net *net,
 	niph = synproxy_build_ip(net, nskb, iph->saddr, iph->daddr);
 
 	skb_reset_transport_header(nskb);
-	nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
+	nth = skb_put(nskb, tcp_hdr_size);
 	nth->source	= th->source;
 	nth->dest	= th->dest;
 	nth->seq	= htonl(ntohl(th->seq) + 1);
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index 52b7dcc5aaf3..eeacbdaf7cdf 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -51,7 +51,7 @@ struct iphdr *nf_reject_iphdr_put(struct sk_buff *nskb,
 	struct iphdr *niph, *oiph = ip_hdr(oldskb);
 
 	skb_reset_network_header(nskb);
-	niph = (struct iphdr *)skb_put(nskb, sizeof(struct iphdr));
+	niph = skb_put(nskb, sizeof(struct iphdr));
 	niph->version	= 4;
 	niph->ihl	= sizeof(struct iphdr) / 4;
 	niph->tos	= 0;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index b64046ccae69..e2221135858b 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1692,7 +1692,7 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc,
 		skb = mld_newpack(pmc->idev, dev->mtu);
 	if (!skb)
 		return NULL;
-	pgr = (struct mld2_grec *)skb_put(skb, sizeof(struct mld2_grec));
+	pgr = skb_put(skb, sizeof(struct mld2_grec));
 	pgr->grec_type = type;
 	pgr->grec_auxwords = 0;
 	pgr->grec_nsrcs = 0;
@@ -1784,7 +1784,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
 		}
 		if (!skb)
 			return NULL;
-		psrc = (struct in6_addr *)skb_put(skb, sizeof(*psrc));
+		psrc = skb_put(skb, sizeof(*psrc));
 		*psrc = psf->sf_addr;
 		scount++; stotal++;
 		if ((type == MLD2_ALLOW_NEW_SOURCES ||
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index d310dc41209a..0327c1f2e6fc 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -528,7 +528,7 @@ void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr,
 	if (!skb)
 		return;
 
-	msg = (struct nd_msg *)skb_put(skb, sizeof(*msg));
+	msg = skb_put(skb, sizeof(*msg));
 	*msg = (struct nd_msg) {
 		.icmph = {
 			.icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT,
@@ -597,7 +597,7 @@ void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit,
 	if (!skb)
 		return;
 
-	msg = (struct nd_msg *)skb_put(skb, sizeof(*msg));
+	msg = skb_put(skb, sizeof(*msg));
 	*msg = (struct nd_msg) {
 		.icmph = {
 			.icmp6_type = NDISC_NEIGHBOUR_SOLICITATION,
@@ -657,7 +657,7 @@ void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr,
 	if (!skb)
 		return;
 
-	msg = (struct rs_msg *)skb_put(skb, sizeof(*msg));
+	msg = skb_put(skb, sizeof(*msg));
 	*msg = (struct rs_msg) {
 		.icmph = {
 			.icmp6_type = NDISC_ROUTER_SOLICITATION,
@@ -1633,7 +1633,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
 	if (!buff)
 		goto release;
 
-	msg = (struct rd_msg *)skb_put(buff, sizeof(*msg));
+	msg = skb_put(buff, sizeof(*msg));
 	*msg = (struct rd_msg) {
 		.icmph = {
 			.icmp6_type = NDISC_REDIRECT,
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c
index d3c4daa708b9..ce203dd729e0 100644
--- a/net/ipv6/netfilter/ip6t_SYNPROXY.c
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -27,7 +27,7 @@ synproxy_build_ip(struct net *net, struct sk_buff *skb,
 	struct ipv6hdr *iph;
 
 	skb_reset_network_header(skb);
-	iph = (struct ipv6hdr *)skb_put(skb, sizeof(*iph));
+	iph = skb_put(skb, sizeof(*iph));
 	ip6_flow_hdr(iph, 0, 0);
 	iph->hop_limit	= net->ipv6.devconf_all->hop_limit;
 	iph->nexthdr	= IPPROTO_TCP;
@@ -105,7 +105,7 @@ synproxy_send_client_synack(struct net *net,
 	niph = synproxy_build_ip(net, nskb, &iph->daddr, &iph->saddr);
 
 	skb_reset_transport_header(nskb);
-	nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
+	nth = skb_put(nskb, tcp_hdr_size);
 	nth->source	= th->dest;
 	nth->dest	= th->source;
 	nth->seq	= htonl(__cookie_v6_init_sequence(iph, th, &mss));
@@ -147,7 +147,7 @@ synproxy_send_server_syn(struct net *net,
 	niph = synproxy_build_ip(net, nskb, &iph->saddr, &iph->daddr);
 
 	skb_reset_transport_header(nskb);
-	nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
+	nth = skb_put(nskb, tcp_hdr_size);
 	nth->source	= th->source;
 	nth->dest	= th->dest;
 	nth->seq	= htonl(recv_seq - 1);
@@ -192,7 +192,7 @@ synproxy_send_server_ack(struct net *net,
 	niph = synproxy_build_ip(net, nskb, &iph->daddr, &iph->saddr);
 
 	skb_reset_transport_header(nskb);
-	nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
+	nth = skb_put(nskb, tcp_hdr_size);
 	nth->source	= th->dest;
 	nth->dest	= th->source;
 	nth->seq	= htonl(ntohl(th->ack_seq));
@@ -230,7 +230,7 @@ synproxy_send_client_ack(struct net *net,
 	niph = synproxy_build_ip(net, nskb, &iph->saddr, &iph->daddr);
 
 	skb_reset_transport_header(nskb);
-	nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
+	nth = skb_put(nskb, tcp_hdr_size);
 	nth->source	= th->source;
 	nth->dest	= th->dest;
 	nth->seq	= htonl(ntohl(th->seq) + 1);
diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c
index f63b18e05c69..24858402e374 100644
--- a/net/ipv6/netfilter/nf_reject_ipv6.c
+++ b/net/ipv6/netfilter/nf_reject_ipv6.c
@@ -95,7 +95,7 @@ void nf_reject_ip6_tcphdr_put(struct sk_buff *nskb,
 	int needs_ack;
 
 	skb_reset_transport_header(nskb);
-	tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr));
+	tcph = skb_put(nskb, sizeof(struct tcphdr));
 	/* Truncate to length (no data) */
 	tcph->doff = sizeof(struct tcphdr)/4;
 	tcph->source = oth->dest;
diff --git a/net/irda/irlap_frame.c b/net/irda/irlap_frame.c
index bf56ac7dba96..82e71e5622c2 100644
--- a/net/irda/irlap_frame.c
+++ b/net/irda/irlap_frame.c
@@ -133,7 +133,7 @@ void irlap_send_snrm_frame(struct irlap_cb *self, struct qos_info *qos)
 	if (!tx_skb)
 		return;
 
-	frame = (struct snrm_frame *) skb_put(tx_skb, 2);
+	frame = skb_put(tx_skb, 2);
 
 	/* Insert connection address field */
 	if (qos)
@@ -228,7 +228,7 @@ void irlap_send_ua_response_frame(struct irlap_cb *self, struct qos_info *qos)
 	if (!tx_skb)
 		return;
 
-	frame = (struct ua_frame *) skb_put(tx_skb, 10);
+	frame = skb_put(tx_skb, 10);
 
 	/* Build UA response */
 	frame->caddr = self->caddr;
@@ -268,7 +268,7 @@ void irlap_send_dm_frame( struct irlap_cb *self)
 	if (!tx_skb)
 		return;
 
-	frame = (struct dm_frame *)skb_put(tx_skb, 2);
+	frame = skb_put(tx_skb, 2);
 
 	if (self->state == LAP_NDM)
 		frame->caddr = CBROADCAST;
@@ -298,7 +298,7 @@ void irlap_send_disc_frame(struct irlap_cb *self)
 	if (!tx_skb)
 		return;
 
-	frame = (struct disc_frame *)skb_put(tx_skb, 2);
+	frame = skb_put(tx_skb, 2);
 
 	frame->caddr = self->caddr | CMD_FRAME;
 	frame->control = DISC_CMD | PF_BIT;
@@ -587,7 +587,7 @@ void irlap_send_rr_frame(struct irlap_cb *self, int command)
 	if (!tx_skb)
 		return;
 
-	frame = (struct rr_frame *)skb_put(tx_skb, 2);
+	frame = skb_put(tx_skb, 2);
 
 	frame->caddr = self->caddr;
 	frame->caddr |= (command) ? CMD_FRAME : 0;
@@ -612,7 +612,7 @@ void irlap_send_rd_frame(struct irlap_cb *self)
 	if (!tx_skb)
 		return;
 
-	frame = (struct rd_frame *)skb_put(tx_skb, 2);
+	frame = skb_put(tx_skb, 2);
 
 	frame->caddr = self->caddr;
 	frame->control = RD_RSP | PF_BIT;
@@ -1202,14 +1202,13 @@ void irlap_send_test_frame(struct irlap_cb *self, __u8 caddr, __u32 daddr,
 
 	/* Broadcast frames must include saddr and daddr fields */
 	if (caddr == CBROADCAST) {
-		frame = (struct test_frame *)
-			skb_put(tx_skb, sizeof(struct test_frame));
+		frame = skb_put(tx_skb, sizeof(struct test_frame));
 
 		/* Insert the swapped addresses */
 		frame->saddr = cpu_to_le32(self->saddr);
 		frame->daddr = cpu_to_le32(daddr);
 	} else
-		frame = (struct test_frame *) skb_put(tx_skb, LAP_ADDR_HEADER + LAP_CTRL_HEADER);
+		frame = skb_put(tx_skb, LAP_ADDR_HEADER + LAP_CTRL_HEADER);
 
 	frame->caddr = caddr;
 	frame->control = TEST_RSP | PF_BIT;
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 3ebb4268973b..daa4e90dc4db 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -349,7 +349,7 @@ static int pfkey_error(const struct sadb_msg *orig, int err, struct sock *sk)
 		err = EINVAL;
 	BUG_ON(err <= 0 || err >= 256);
 
-	hdr = (struct sadb_msg *) skb_put(skb, sizeof(struct sadb_msg));
+	hdr = skb_put(skb, sizeof(struct sadb_msg));
 	pfkey_hdr_dup(hdr, orig);
 	hdr->sadb_msg_errno = (uint8_t) err;
 	hdr->sadb_msg_len = (sizeof(struct sadb_msg) /
@@ -810,12 +810,12 @@ static struct sk_buff *__pfkey_xfrm_state2msg(const struct xfrm_state *x,
 		return ERR_PTR(-ENOBUFS);
 
 	/* call should fill header later */
-	hdr = (struct sadb_msg *) skb_put(skb, sizeof(struct sadb_msg));
+	hdr = skb_put(skb, sizeof(struct sadb_msg));
 	memset(hdr, 0, size);	/* XXX do we need this ? */
 	hdr->sadb_msg_len = size / sizeof(uint64_t);
 
 	/* sa */
-	sa = (struct sadb_sa *)  skb_put(skb, sizeof(struct sadb_sa));
+	sa = skb_put(skb, sizeof(struct sadb_sa));
 	sa->sadb_sa_len = sizeof(struct sadb_sa)/sizeof(uint64_t);
 	sa->sadb_sa_exttype = SADB_EXT_SA;
 	sa->sadb_sa_spi = x->id.spi;
@@ -862,8 +862,7 @@ static struct sk_buff *__pfkey_xfrm_state2msg(const struct xfrm_state *x,
 
 	/* hard time */
 	if (hsc & 2) {
-		lifetime = (struct sadb_lifetime *)  skb_put(skb,
-							     sizeof(struct sadb_lifetime));
+		lifetime = skb_put(skb, sizeof(struct sadb_lifetime));
 		lifetime->sadb_lifetime_len =
 			sizeof(struct sadb_lifetime)/sizeof(uint64_t);
 		lifetime->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD;
@@ -874,8 +873,7 @@ static struct sk_buff *__pfkey_xfrm_state2msg(const struct xfrm_state *x,
 	}
 	/* soft time */
 	if (hsc & 1) {
-		lifetime = (struct sadb_lifetime *)  skb_put(skb,
-							     sizeof(struct sadb_lifetime));
+		lifetime = skb_put(skb, sizeof(struct sadb_lifetime));
 		lifetime->sadb_lifetime_len =
 			sizeof(struct sadb_lifetime)/sizeof(uint64_t);
 		lifetime->sadb_lifetime_exttype = SADB_EXT_LIFETIME_SOFT;
@@ -885,8 +883,7 @@ static struct sk_buff *__pfkey_xfrm_state2msg(const struct xfrm_state *x,
 		lifetime->sadb_lifetime_usetime = x->lft.soft_use_expires_seconds;
 	}
 	/* current time */
-	lifetime = (struct sadb_lifetime *)  skb_put(skb,
-						     sizeof(struct sadb_lifetime));
+	lifetime = skb_put(skb, sizeof(struct sadb_lifetime));
 	lifetime->sadb_lifetime_len =
 		sizeof(struct sadb_lifetime)/sizeof(uint64_t);
 	lifetime->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT;
@@ -895,8 +892,7 @@ static struct sk_buff *__pfkey_xfrm_state2msg(const struct xfrm_state *x,
 	lifetime->sadb_lifetime_addtime = x->curlft.add_time;
 	lifetime->sadb_lifetime_usetime = x->curlft.use_time;
 	/* src address */
-	addr = (struct sadb_address*) skb_put(skb,
-					      sizeof(struct sadb_address)+sockaddr_size);
+	addr = skb_put(skb, sizeof(struct sadb_address) + sockaddr_size);
 	addr->sadb_address_len =
 		(sizeof(struct sadb_address)+sockaddr_size)/
 			sizeof(uint64_t);
@@ -915,8 +911,7 @@ static struct sk_buff *__pfkey_xfrm_state2msg(const struct xfrm_state *x,
 		BUG();
 
 	/* dst address */
-	addr = (struct sadb_address*) skb_put(skb,
-					      sizeof(struct sadb_address)+sockaddr_size);
+	addr = skb_put(skb, sizeof(struct sadb_address) + sockaddr_size);
 	addr->sadb_address_len =
 		(sizeof(struct sadb_address)+sockaddr_size)/
 			sizeof(uint64_t);
@@ -933,8 +928,8 @@ static struct sk_buff *__pfkey_xfrm_state2msg(const struct xfrm_state *x,
 
 	if (!xfrm_addr_equal(&x->sel.saddr, &x->props.saddr,
 			     x->props.family)) {
-		addr = (struct sadb_address*) skb_put(skb,
-			sizeof(struct sadb_address)+sockaddr_size);
+		addr = skb_put(skb,
+			       sizeof(struct sadb_address) + sockaddr_size);
 		addr->sadb_address_len =
 			(sizeof(struct sadb_address)+sockaddr_size)/
 			sizeof(uint64_t);
@@ -951,8 +946,7 @@ static struct sk_buff *__pfkey_xfrm_state2msg(const struct xfrm_state *x,
 
 	/* auth key */
 	if (add_keys && auth_key_size) {
-		key = (struct sadb_key *) skb_put(skb,
-						  sizeof(struct sadb_key)+auth_key_size);
+		key = skb_put(skb, sizeof(struct sadb_key) + auth_key_size);
 		key->sadb_key_len = (sizeof(struct sadb_key) + auth_key_size) /
 			sizeof(uint64_t);
 		key->sadb_key_exttype = SADB_EXT_KEY_AUTH;
@@ -962,8 +956,7 @@ static struct sk_buff *__pfkey_xfrm_state2msg(const struct xfrm_state *x,
 	}
 	/* encrypt key */
 	if (add_keys && encrypt_key_size) {
-		key = (struct sadb_key *) skb_put(skb,
-						  sizeof(struct sadb_key)+encrypt_key_size);
+		key = skb_put(skb, sizeof(struct sadb_key) + encrypt_key_size);
 		key->sadb_key_len = (sizeof(struct sadb_key) +
 				     encrypt_key_size) / sizeof(uint64_t);
 		key->sadb_key_exttype = SADB_EXT_KEY_ENCRYPT;
@@ -974,7 +967,7 @@ static struct sk_buff *__pfkey_xfrm_state2msg(const struct xfrm_state *x,
 	}
 
 	/* sa */
-	sa2 = (struct sadb_x_sa2 *)  skb_put(skb, sizeof(struct sadb_x_sa2));
+	sa2 = skb_put(skb, sizeof(struct sadb_x_sa2));
 	sa2->sadb_x_sa2_len = sizeof(struct sadb_x_sa2)/sizeof(uint64_t);
 	sa2->sadb_x_sa2_exttype = SADB_X_EXT_SA2;
 	if ((mode = pfkey_mode_from_xfrm(x->props.mode)) < 0) {
@@ -992,7 +985,7 @@ static struct sk_buff *__pfkey_xfrm_state2msg(const struct xfrm_state *x,
 		struct sadb_x_nat_t_port *n_port;
 
 		/* type */
-		n_type = (struct sadb_x_nat_t_type*) skb_put(skb, sizeof(*n_type));
+		n_type = skb_put(skb, sizeof(*n_type));
 		n_type->sadb_x_nat_t_type_len = sizeof(*n_type)/sizeof(uint64_t);
 		n_type->sadb_x_nat_t_type_exttype = SADB_X_EXT_NAT_T_TYPE;
 		n_type->sadb_x_nat_t_type_type = natt->encap_type;
@@ -1001,14 +994,14 @@ static struct sk_buff *__pfkey_xfrm_state2msg(const struct xfrm_state *x,
 		n_type->sadb_x_nat_t_type_reserved[2] = 0;
 
 		/* source port */
-		n_port = (struct sadb_x_nat_t_port*) skb_put(skb, sizeof (*n_port));
+		n_port = skb_put(skb, sizeof(*n_port));
 		n_port->sadb_x_nat_t_port_len = sizeof(*n_port)/sizeof(uint64_t);
 		n_port->sadb_x_nat_t_port_exttype = SADB_X_EXT_NAT_T_SPORT;
 		n_port->sadb_x_nat_t_port_port = natt->encap_sport;
 		n_port->sadb_x_nat_t_port_reserved = 0;
 
 		/* dest port */
-		n_port = (struct sadb_x_nat_t_port*) skb_put(skb, sizeof (*n_port));
+		n_port = skb_put(skb, sizeof(*n_port));
 		n_port->sadb_x_nat_t_port_len = sizeof(*n_port)/sizeof(uint64_t);
 		n_port->sadb_x_nat_t_port_exttype = SADB_X_EXT_NAT_T_DPORT;
 		n_port->sadb_x_nat_t_port_port = natt->encap_dport;
@@ -1017,8 +1010,8 @@ static struct sk_buff *__pfkey_xfrm_state2msg(const struct xfrm_state *x,
 
 	/* security context */
 	if (xfrm_ctx) {
-		sec_ctx = (struct sadb_x_sec_ctx *) skb_put(skb,
-				sizeof(struct sadb_x_sec_ctx) + ctx_size);
+		sec_ctx = skb_put(skb,
+				  sizeof(struct sadb_x_sec_ctx) + ctx_size);
 		sec_ctx->sadb_x_sec_len =
 		  (sizeof(struct sadb_x_sec_ctx) + ctx_size) / sizeof(uint64_t);
 		sec_ctx->sadb_x_sec_exttype = SADB_X_EXT_SEC_CTX;
@@ -1617,7 +1610,7 @@ static struct sk_buff *compose_sadb_supported(const struct sadb_msg *orig,
 	if (!skb)
 		goto out_put_algs;
 
-	hdr = (struct sadb_msg *) skb_put(skb, sizeof(*hdr));
+	hdr = skb_put(skb, sizeof(*hdr));
 	pfkey_hdr_dup(hdr, orig);
 	hdr->sadb_msg_errno = 0;
 	hdr->sadb_msg_len = len / sizeof(uint64_t);
@@ -1626,7 +1619,7 @@ static struct sk_buff *compose_sadb_supported(const struct sadb_msg *orig,
 		struct sadb_supported *sp;
 		struct sadb_alg *ap;
 
-		sp = (struct sadb_supported *) skb_put(skb, auth_len);
+		sp = skb_put(skb, auth_len);
 		ap = (struct sadb_alg *) (sp + 1);
 
 		sp->sadb_supported_len = auth_len / sizeof(uint64_t);
@@ -1647,7 +1640,7 @@ static struct sk_buff *compose_sadb_supported(const struct sadb_msg *orig,
 		struct sadb_supported *sp;
 		struct sadb_alg *ap;
 
-		sp = (struct sadb_supported *) skb_put(skb, enc_len);
+		sp = skb_put(skb, enc_len);
 		ap = (struct sadb_alg *) (sp + 1);
 
 		sp->sadb_supported_len = enc_len / sizeof(uint64_t);
@@ -1721,7 +1714,7 @@ static int key_notify_sa_flush(const struct km_event *c)
 	skb = alloc_skb(sizeof(struct sadb_msg) + 16, GFP_ATOMIC);
 	if (!skb)
 		return -ENOBUFS;
-	hdr = (struct sadb_msg *) skb_put(skb, sizeof(struct sadb_msg));
+	hdr = skb_put(skb, sizeof(struct sadb_msg));
 	hdr->sadb_msg_satype = pfkey_proto2satype(c->data.proto);
 	hdr->sadb_msg_type = SADB_FLUSH;
 	hdr->sadb_msg_seq = c->seq;
@@ -2046,12 +2039,11 @@ static int pfkey_xfrm_policy2msg(struct sk_buff *skb, const struct xfrm_policy *
 	size = pfkey_xfrm_policy2msg_size(xp);
 
 	/* call should fill header later */
-	hdr = (struct sadb_msg *) skb_put(skb, sizeof(struct sadb_msg));
+	hdr = skb_put(skb, sizeof(struct sadb_msg));
 	memset(hdr, 0, size);	/* XXX do we need this ? */
 
 	/* src address */
-	addr = (struct sadb_address*) skb_put(skb,
-					      sizeof(struct sadb_address)+sockaddr_size);
+	addr = skb_put(skb, sizeof(struct sadb_address) + sockaddr_size);
 	addr->sadb_address_len =
 		(sizeof(struct sadb_address)+sockaddr_size)/
 			sizeof(uint64_t);
@@ -2066,8 +2058,7 @@ static int pfkey_xfrm_policy2msg(struct sk_buff *skb, const struct xfrm_policy *
 		BUG();
 
 	/* dst address */
-	addr = (struct sadb_address*) skb_put(skb,
-					      sizeof(struct sadb_address)+sockaddr_size);
+	addr = skb_put(skb, sizeof(struct sadb_address) + sockaddr_size);
 	addr->sadb_address_len =
 		(sizeof(struct sadb_address)+sockaddr_size)/
 			sizeof(uint64_t);
@@ -2081,8 +2072,7 @@ static int pfkey_xfrm_policy2msg(struct sk_buff *skb, const struct xfrm_policy *
 			    xp->family);
 
 	/* hard time */
-	lifetime = (struct sadb_lifetime *)  skb_put(skb,
-						     sizeof(struct sadb_lifetime));
+	lifetime = skb_put(skb, sizeof(struct sadb_lifetime));
 	lifetime->sadb_lifetime_len =
 		sizeof(struct sadb_lifetime)/sizeof(uint64_t);
 	lifetime->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD;
@@ -2091,8 +2081,7 @@ static int pfkey_xfrm_policy2msg(struct sk_buff *skb, const struct xfrm_policy *
 	lifetime->sadb_lifetime_addtime = xp->lft.hard_add_expires_seconds;
 	lifetime->sadb_lifetime_usetime = xp->lft.hard_use_expires_seconds;
 	/* soft time */
-	lifetime = (struct sadb_lifetime *)  skb_put(skb,
-						     sizeof(struct sadb_lifetime));
+	lifetime = skb_put(skb, sizeof(struct sadb_lifetime));
 	lifetime->sadb_lifetime_len =
 		sizeof(struct sadb_lifetime)/sizeof(uint64_t);
 	lifetime->sadb_lifetime_exttype = SADB_EXT_LIFETIME_SOFT;
@@ -2101,8 +2090,7 @@ static int pfkey_xfrm_policy2msg(struct sk_buff *skb, const struct xfrm_policy *
 	lifetime->sadb_lifetime_addtime = xp->lft.soft_add_expires_seconds;
 	lifetime->sadb_lifetime_usetime = xp->lft.soft_use_expires_seconds;
 	/* current time */
-	lifetime = (struct sadb_lifetime *)  skb_put(skb,
-						     sizeof(struct sadb_lifetime));
+	lifetime = skb_put(skb, sizeof(struct sadb_lifetime));
 	lifetime->sadb_lifetime_len =
 		sizeof(struct sadb_lifetime)/sizeof(uint64_t);
 	lifetime->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT;
@@ -2111,7 +2099,7 @@ static int pfkey_xfrm_policy2msg(struct sk_buff *skb, const struct xfrm_policy *
 	lifetime->sadb_lifetime_addtime = xp->curlft.add_time;
 	lifetime->sadb_lifetime_usetime = xp->curlft.use_time;
 
-	pol = (struct sadb_x_policy *)  skb_put(skb, sizeof(struct sadb_x_policy));
+	pol = skb_put(skb, sizeof(struct sadb_x_policy));
 	pol->sadb_x_policy_len = sizeof(struct sadb_x_policy)/sizeof(uint64_t);
 	pol->sadb_x_policy_exttype = SADB_X_EXT_POLICY;
 	pol->sadb_x_policy_type = IPSEC_POLICY_DISCARD;
@@ -2139,7 +2127,7 @@ static int pfkey_xfrm_policy2msg(struct sk_buff *skb, const struct xfrm_policy *
 		} else {
 			size -= 2*socklen;
 		}
-		rq = (void*)skb_put(skb, req_size);
+		rq = skb_put(skb, req_size);
 		pol->sadb_x_policy_len += req_size/8;
 		memset(rq, 0, sizeof(*rq));
 		rq->sadb_x_ipsecrequest_len = req_size;
@@ -2169,7 +2157,7 @@ static int pfkey_xfrm_policy2msg(struct sk_buff *skb, const struct xfrm_policy *
 	if ((xfrm_ctx = xp->security)) {
 		int ctx_size = pfkey_xfrm_policy2sec_ctx_size(xp);
 
-		sec_ctx = (struct sadb_x_sec_ctx *) skb_put(skb, ctx_size);
+		sec_ctx = skb_put(skb, ctx_size);
 		sec_ctx->sadb_x_sec_len = ctx_size / sizeof(uint64_t);
 		sec_ctx->sadb_x_sec_exttype = SADB_X_EXT_SEC_CTX;
 		sec_ctx->sadb_x_ctx_doi = xfrm_ctx->ctx_doi;
@@ -2733,7 +2721,7 @@ static int key_notify_policy_flush(const struct km_event *c)
 	skb_out = alloc_skb(sizeof(struct sadb_msg) + 16, GFP_ATOMIC);
 	if (!skb_out)
 		return -ENOBUFS;
-	hdr = (struct sadb_msg *) skb_put(skb_out, sizeof(struct sadb_msg));
+	hdr = skb_put(skb_out, sizeof(struct sadb_msg));
 	hdr->sadb_msg_type = SADB_X_SPDFLUSH;
 	hdr->sadb_msg_seq = c->seq;
 	hdr->sadb_msg_pid = c->portid;
@@ -2917,7 +2905,7 @@ static void dump_ah_combs(struct sk_buff *skb, const struct xfrm_tmpl *t)
 	struct sadb_prop *p;
 	int i;
 
-	p = (struct sadb_prop*)skb_put(skb, sizeof(struct sadb_prop));
+	p = skb_put(skb, sizeof(struct sadb_prop));
 	p->sadb_prop_len = sizeof(struct sadb_prop)/8;
 	p->sadb_prop_exttype = SADB_EXT_PROPOSAL;
 	p->sadb_prop_replay = 32;
@@ -2951,7 +2939,7 @@ static void dump_esp_combs(struct sk_buff *skb, const struct xfrm_tmpl *t)
 	struct sadb_prop *p;
 	int i, k;
 
-	p = (struct sadb_prop*)skb_put(skb, sizeof(struct sadb_prop));
+	p = skb_put(skb, sizeof(struct sadb_prop));
 	p->sadb_prop_len = sizeof(struct sadb_prop)/8;
 	p->sadb_prop_exttype = SADB_EXT_PROPOSAL;
 	p->sadb_prop_replay = 32;
@@ -2977,7 +2965,7 @@ static void dump_esp_combs(struct sk_buff *skb, const struct xfrm_tmpl *t)
 				continue;
 			if (!(aalg_tmpl_set(t, aalg) && aalg->available))
 				continue;
-			c = (struct sadb_comb*)skb_put(skb, sizeof(struct sadb_comb));
+			c = skb_put(skb, sizeof(struct sadb_comb));
 			memset(c, 0, sizeof(*c));
 			p->sadb_prop_len += sizeof(struct sadb_comb)/8;
 			c->sadb_comb_auth = aalg->desc.sadb_alg_id;
@@ -3144,7 +3132,7 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct
 	if (skb == NULL)
 		return -ENOMEM;
 
-	hdr = (struct sadb_msg *) skb_put(skb, sizeof(struct sadb_msg));
+	hdr = skb_put(skb, sizeof(struct sadb_msg));
 	hdr->sadb_msg_version = PF_KEY_V2;
 	hdr->sadb_msg_type = SADB_ACQUIRE;
 	hdr->sadb_msg_satype = pfkey_proto2satype(x->id.proto);
@@ -3155,8 +3143,7 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct
 	hdr->sadb_msg_pid = 0;
 
 	/* src address */
-	addr = (struct sadb_address*) skb_put(skb,
-					      sizeof(struct sadb_address)+sockaddr_size);
+	addr = skb_put(skb, sizeof(struct sadb_address) + sockaddr_size);
 	addr->sadb_address_len =
 		(sizeof(struct sadb_address)+sockaddr_size)/
 			sizeof(uint64_t);
@@ -3171,8 +3158,7 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct
 		BUG();
 
 	/* dst address */
-	addr = (struct sadb_address*) skb_put(skb,
-					      sizeof(struct sadb_address)+sockaddr_size);
+	addr = skb_put(skb, sizeof(struct sadb_address) + sockaddr_size);
 	addr->sadb_address_len =
 		(sizeof(struct sadb_address)+sockaddr_size)/
 			sizeof(uint64_t);
@@ -3186,7 +3172,7 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct
 	if (!addr->sadb_address_prefixlen)
 		BUG();
 
-	pol = (struct sadb_x_policy *)  skb_put(skb, sizeof(struct sadb_x_policy));
+	pol = skb_put(skb, sizeof(struct sadb_x_policy));
 	pol->sadb_x_policy_len = sizeof(struct sadb_x_policy)/sizeof(uint64_t);
 	pol->sadb_x_policy_exttype = SADB_X_EXT_POLICY;
 	pol->sadb_x_policy_type = IPSEC_POLICY_IPSEC;
@@ -3203,8 +3189,8 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct
 
 	/* security context */
 	if (xfrm_ctx) {
-		sec_ctx = (struct sadb_x_sec_ctx *) skb_put(skb,
-				sizeof(struct sadb_x_sec_ctx) + ctx_size);
+		sec_ctx = skb_put(skb,
+				  sizeof(struct sadb_x_sec_ctx) + ctx_size);
 		sec_ctx->sadb_x_sec_len =
 		  (sizeof(struct sadb_x_sec_ctx) + ctx_size) / sizeof(uint64_t);
 		sec_ctx->sadb_x_sec_exttype = SADB_X_EXT_SEC_CTX;
@@ -3346,7 +3332,7 @@ static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr,
 	if (skb == NULL)
 		return -ENOMEM;
 
-	hdr = (struct sadb_msg *) skb_put(skb, sizeof(struct sadb_msg));
+	hdr = skb_put(skb, sizeof(struct sadb_msg));
 	hdr->sadb_msg_version = PF_KEY_V2;
 	hdr->sadb_msg_type = SADB_X_NAT_T_NEW_MAPPING;
 	hdr->sadb_msg_satype = satype;
@@ -3357,7 +3343,7 @@ static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr,
 	hdr->sadb_msg_pid = 0;
 
 	/* SA */
-	sa = (struct sadb_sa *) skb_put(skb, sizeof(struct sadb_sa));
+	sa = skb_put(skb, sizeof(struct sadb_sa));
 	sa->sadb_sa_len = sizeof(struct sadb_sa)/sizeof(uint64_t);
 	sa->sadb_sa_exttype = SADB_EXT_SA;
 	sa->sadb_sa_spi = x->id.spi;
@@ -3368,8 +3354,7 @@ static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr,
 	sa->sadb_sa_flags = 0;
 
 	/* ADDRESS_SRC (old addr) */
-	addr = (struct sadb_address*)
-		skb_put(skb, sizeof(struct sadb_address)+sockaddr_size);
+	addr = skb_put(skb, sizeof(struct sadb_address) + sockaddr_size);
 	addr->sadb_address_len =
 		(sizeof(struct sadb_address)+sockaddr_size)/
 			sizeof(uint64_t);
@@ -3384,15 +3369,14 @@ static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr,
 		BUG();
 
 	/* NAT_T_SPORT (old port) */
-	n_port = (struct sadb_x_nat_t_port*) skb_put(skb, sizeof (*n_port));
+	n_port = skb_put(skb, sizeof(*n_port));
 	n_port->sadb_x_nat_t_port_len = sizeof(*n_port)/sizeof(uint64_t);
 	n_port->sadb_x_nat_t_port_exttype = SADB_X_EXT_NAT_T_SPORT;
 	n_port->sadb_x_nat_t_port_port = natt->encap_sport;
 	n_port->sadb_x_nat_t_port_reserved = 0;
 
 	/* ADDRESS_DST (new addr) */
-	addr = (struct sadb_address*)
-		skb_put(skb, sizeof(struct sadb_address)+sockaddr_size);
+	addr = skb_put(skb, sizeof(struct sadb_address) + sockaddr_size);
 	addr->sadb_address_len =
 		(sizeof(struct sadb_address)+sockaddr_size)/
 			sizeof(uint64_t);
@@ -3407,7 +3391,7 @@ static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr,
 		BUG();
 
 	/* NAT_T_DPORT (new port) */
-	n_port = (struct sadb_x_nat_t_port*) skb_put(skb, sizeof (*n_port));
+	n_port = skb_put(skb, sizeof(*n_port));
 	n_port->sadb_x_nat_t_port_len = sizeof(*n_port)/sizeof(uint64_t);
 	n_port->sadb_x_nat_t_port_exttype = SADB_X_EXT_NAT_T_DPORT;
 	n_port->sadb_x_nat_t_port_port = sport;
@@ -3421,7 +3405,7 @@ static int set_sadb_address(struct sk_buff *skb, int sasize, int type,
 			    const struct xfrm_selector *sel)
 {
 	struct sadb_address *addr;
-	addr = (struct sadb_address *)skb_put(skb, sizeof(struct sadb_address) + sasize);
+	addr = skb_put(skb, sizeof(struct sadb_address) + sasize);
 	addr->sadb_address_len = (sizeof(struct sadb_address) + sasize)/8;
 	addr->sadb_address_exttype = type;
 	addr->sadb_address_proto = sel->proto;
@@ -3553,7 +3537,7 @@ static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
 	if (skb == NULL)
 		return -ENOMEM;
 
-	hdr = (struct sadb_msg *)skb_put(skb, sizeof(struct sadb_msg));
+	hdr = skb_put(skb, sizeof(struct sadb_msg));
 	hdr->sadb_msg_version = PF_KEY_V2;
 	hdr->sadb_msg_type = SADB_X_MIGRATE;
 	hdr->sadb_msg_satype = pfkey_proto2satype(m->proto);
@@ -3574,7 +3558,7 @@ static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
 	set_sadb_address(skb, sasize_sel, SADB_EXT_ADDRESS_DST, sel);
 
 	/* policy information */
-	pol = (struct sadb_x_policy *)skb_put(skb, sizeof(struct sadb_x_policy));
+	pol = skb_put(skb, sizeof(struct sadb_x_policy));
 	pol->sadb_x_policy_len = size_pol / 8;
 	pol->sadb_x_policy_exttype = SADB_X_EXT_POLICY;
 	pol->sadb_x_policy_type = IPSEC_POLICY_IPSEC;
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index f9eb2486d550..a354f1939e49 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1107,7 +1107,7 @@ static void ieee80211_send_layer2_update(struct sta_info *sta)
 	skb = dev_alloc_skb(sizeof(*msg));
 	if (!skb)
 		return;
-	msg = (struct iapp_layer2_update *)skb_put(skb, sizeof(*msg));
+	msg = skb_put(skb, sizeof(*msg));
 
 	/* 802.2 Type 1 Logical Link Control (LLC) Exchange Identifier (XID)
 	 * Update response frame; IEEE Std 802.2-1998, 5.4.1.2.1 */
@@ -3414,7 +3414,7 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev,
 
 	skb_reserve(skb, local->hw.extra_tx_headroom);
 
-	nullfunc = (void *) skb_put(skb, size);
+	nullfunc = skb_put(skb, size);
 	nullfunc->frame_control = fc;
 	nullfunc->duration_id = 0;
 	memcpy(nullfunc->addr1, sta->sta.addr, ETH_ALEN);
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 927215d4dd8f..c92df492e898 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -459,7 +459,7 @@ int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata,
 		return -ENOMEM;
 
 	skb_reserve(skb, local->hw.extra_tx_headroom);
-	action_frame = (void *)skb_put(skb, 27);
+	action_frame = skb_put(skb, 27);
 	memcpy(action_frame->da, da, ETH_ALEN);
 	memcpy(action_frame->sa, sdata->dev->dev_addr, ETH_ALEN);
 	memcpy(action_frame->bssid, bssid, ETH_ALEN);
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 861697f2d75b..a550c707cd8a 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -1265,7 +1265,7 @@ static int mesh_fwd_csa_frame(struct ieee80211_sub_if_data *sdata,
 	if (!skb)
 		return -ENOMEM;
 	skb_reserve(skb, local->tx_headroom);
-	mgmt_fwd = (struct ieee80211_mgmt *) skb_put(skb, len);
+	mgmt_fwd = skb_put(skb, len);
 
 	/* offset_ttl is based on whether the secondary channel
 	 * offset is available or not. Subtract 1 from the mesh TTL
diff --git a/net/mac80211/mesh_ps.c b/net/mac80211/mesh_ps.c
index 96c987e641b3..d8cd91424175 100644
--- a/net/mac80211/mesh_ps.c
+++ b/net/mac80211/mesh_ps.c
@@ -30,7 +30,7 @@ static struct sk_buff *mps_qos_null_get(struct sta_info *sta)
 		return NULL;
 	skb_reserve(skb, local->hw.extra_tx_headroom);
 
-	nullfunc = (struct ieee80211_hdr *) skb_put(skb, size);
+	nullfunc = skb_put(skb, size);
 	fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_QOS_NULLFUNC);
 	ieee80211_fill_mesh_addresses(nullfunc, &fc, sta->sta.addr,
 				      sdata->vif.addr);
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 46e1809356f6..69615016d5bf 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -1312,7 +1312,7 @@ static void ieee80211_send_null_response(struct sta_info *sta, int tid,
 
 	skb_reserve(skb, local->hw.extra_tx_headroom);
 
-	nullfunc = (void *) skb_put(skb, size);
+	nullfunc = skb_put(skb, size);
 	nullfunc->frame_control = fc;
 	nullfunc->duration_id = 0;
 	memcpy(nullfunc->addr1, sta->sta.addr, ETH_ALEN);
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index 86740670102d..709ef02fe67e 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -49,7 +49,7 @@ static void ieee80211_tdls_add_ext_capab(struct ieee80211_sub_if_data *sdata,
 			  !ifmgd->tdls_wider_bw_prohibited;
 	struct ieee80211_supported_band *sband = ieee80211_get_sband(sdata);
 	bool vht = sband && sband->vht_cap.vht_supported;
-	u8 *pos = (void *)skb_put(skb, 10);
+	u8 *pos = skb_put(skb, 10);
 
 	*pos++ = WLAN_EID_EXT_CAPABILITY;
 	*pos++ = 8; /* len */
@@ -168,7 +168,7 @@ static void ieee80211_tdls_add_oper_classes(struct ieee80211_sub_if_data *sdata,
 
 static void ieee80211_tdls_add_bss_coex_ie(struct sk_buff *skb)
 {
-	u8 *pos = (void *)skb_put(skb, 3);
+	u8 *pos = skb_put(skb, 3);
 
 	*pos++ = WLAN_EID_BSS_COEX_2040;
 	*pos++ = 1; /* len */
@@ -209,7 +209,7 @@ static void ieee80211_tdls_add_link_ie(struct ieee80211_sub_if_data *sdata,
 		rsp_addr = sdata->vif.addr;
 	}
 
-	lnkid = (void *)skb_put(skb, sizeof(struct ieee80211_tdls_lnkie));
+	lnkid = skb_put(skb, sizeof(struct ieee80211_tdls_lnkie));
 
 	lnkid->ie_type = WLAN_EID_LINK_ID;
 	lnkid->ie_len = sizeof(struct ieee80211_tdls_lnkie) - 2;
@@ -223,7 +223,7 @@ static void
 ieee80211_tdls_add_aid(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb)
 {
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
-	u8 *pos = (void *)skb_put(skb, 4);
+	u8 *pos = skb_put(skb, 4);
 
 	*pos++ = WLAN_EID_AID;
 	*pos++ = 2; /* len */
@@ -745,7 +745,7 @@ ieee80211_prep_tdls_encap_data(struct wiphy *wiphy, struct net_device *dev,
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct ieee80211_tdls_data *tf;
 
-	tf = (void *)skb_put(skb, offsetof(struct ieee80211_tdls_data, u));
+	tf = skb_put(skb, offsetof(struct ieee80211_tdls_data, u));
 
 	memcpy(tf->da, peer, ETH_ALEN);
 	memcpy(tf->sa, sdata->vif.addr, ETH_ALEN);
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 18c5d6e6305d..ec5a9a72797e 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -3874,7 +3874,7 @@ static void __ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata,
 			ps->dtim_count--;
 	}
 
-	tim = pos = (u8 *) skb_put(skb, 6);
+	tim = pos = skb_put(skb, 6);
 	*pos++ = WLAN_EID_TIM;
 	*pos++ = 4;
 	*pos++ = ps->dtim_count;
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index cc19614ff4e6..0d722ea98a1b 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -949,7 +949,7 @@ ieee80211_crypto_aes_cmac_encrypt(struct ieee80211_tx_data *tx)
 	if (WARN_ON(skb_tailroom(skb) < sizeof(*mmie)))
 		return TX_DROP;
 
-	mmie = (struct ieee80211_mmie *) skb_put(skb, sizeof(*mmie));
+	mmie = skb_put(skb, sizeof(*mmie));
 	mmie->element_id = WLAN_EID_MMIE;
 	mmie->length = sizeof(*mmie) - 2;
 	mmie->key_id = cpu_to_le16(key->conf.keyidx);
@@ -993,7 +993,7 @@ ieee80211_crypto_aes_cmac_256_encrypt(struct ieee80211_tx_data *tx)
 	if (WARN_ON(skb_tailroom(skb) < sizeof(*mmie)))
 		return TX_DROP;
 
-	mmie = (struct ieee80211_mmie_16 *)skb_put(skb, sizeof(*mmie));
+	mmie = skb_put(skb, sizeof(*mmie));
 	mmie->element_id = WLAN_EID_MMIE;
 	mmie->length = sizeof(*mmie) - 2;
 	mmie->key_id = cpu_to_le16(key->conf.keyidx);
@@ -1138,7 +1138,7 @@ ieee80211_crypto_aes_gmac_encrypt(struct ieee80211_tx_data *tx)
 	if (WARN_ON(skb_tailroom(skb) < sizeof(*mmie)))
 		return TX_DROP;
 
-	mmie = (struct ieee80211_mmie_16 *)skb_put(skb, sizeof(*mmie));
+	mmie = skb_put(skb, sizeof(*mmie));
 	mmie->element_id = WLAN_EID_MMIE;
 	mmie->length = sizeof(*mmie) - 2;
 	mmie->key_id = cpu_to_le16(key->conf.keyidx);
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index da9704971a83..94ec0d0765a8 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -590,7 +590,7 @@ __build_packet_message(struct nfnl_log_net *log,
 		if (skb_tailroom(inst->skb) < nla_total_size(data_len))
 			goto nla_put_failure;
 
-		nla = (struct nlattr *)skb_put(inst->skb, nla_total_size(data_len));
+		nla = skb_put(inst->skb, nla_total_size(data_len));
 		nla->nla_type = NFULA_PAYLOAD;
 		nla->nla_len = size;
 
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 8a0f218b7938..1b17a1b445a3 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -589,7 +589,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
 		if (skb_tailroom(skb) < sizeof(*nla) + hlen)
 			goto nla_put_failure;
 
-		nla = (struct nlattr *)skb_put(skb, sizeof(*nla));
+		nla = skb_put(skb, sizeof(*nla));
 		nla->nla_type = NFQA_PAYLOAD;
 		nla->nla_len = nla_attr_size(data_len);
 
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index bd24a975fd49..a88745e4b7df 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2104,7 +2104,7 @@ __nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int fla
 	struct nlmsghdr *nlh;
 	int size = nlmsg_msg_size(len);
 
-	nlh = (struct nlmsghdr *)skb_put(skb, NLMSG_ALIGN(size));
+	nlh = skb_put(skb, NLMSG_ALIGN(size));
 	nlh->nlmsg_type = type;
 	nlh->nlmsg_len = size;
 	nlh->nlmsg_flags = flags;
diff --git a/net/nfc/digital_core.c b/net/nfc/digital_core.c
index 0fd5518bf252..fec47a7d0092 100644
--- a/net/nfc/digital_core.c
+++ b/net/nfc/digital_core.c
@@ -74,8 +74,8 @@ void digital_skb_add_crc(struct sk_buff *skb, crc_func_t crc_func, u16 init,
 	if (msb_first)
 		crc = __fswab16(crc);
 
-	*skb_put(skb, 1) = crc & 0xFF;
-	*skb_put(skb, 1) = (crc >> 8) & 0xFF;
+	*(u8 *)skb_put(skb, 1) = crc & 0xFF;
+	*(u8 *)skb_put(skb, 1) = (crc >> 8) & 0xFF;
 }
 
 int digital_skb_check_crc(struct sk_buff *skb, crc_func_t crc_func,
diff --git a/net/nfc/digital_dep.c b/net/nfc/digital_dep.c
index f44f75a2a4d5..82471af5553e 100644
--- a/net/nfc/digital_dep.c
+++ b/net/nfc/digital_dep.c
@@ -654,7 +654,7 @@ static int digital_in_send_rtox(struct nfc_digital_dev *ddev,
 	if (!skb)
 		return -ENOMEM;
 
-	*skb_put(skb, 1) = rtox;
+	*(u8 *)skb_put(skb, 1) = rtox;
 
 	skb_push(skb, sizeof(struct digital_dep_req_res));
 
diff --git a/net/nfc/digital_technology.c b/net/nfc/digital_technology.c
index d9080dec5d27..fae6d31b377c 100644
--- a/net/nfc/digital_technology.c
+++ b/net/nfc/digital_technology.c
@@ -266,8 +266,8 @@ static int digital_in_send_rats(struct nfc_digital_dev *ddev,
 	if (!skb)
 		return -ENOMEM;
 
-	*skb_put(skb, 1) = DIGITAL_RATS_BYTE1;
-	*skb_put(skb, 1) = DIGITAL_RATS_PARAM;
+	*(u8 *)skb_put(skb, 1) = DIGITAL_RATS_BYTE1;
+	*(u8 *)skb_put(skb, 1) = DIGITAL_RATS_PARAM;
 
 	rc = digital_in_send_cmd(ddev, skb, 30, digital_in_recv_ats,
 				 target);
@@ -470,8 +470,8 @@ static int digital_in_send_sdd_req(struct nfc_digital_dev *ddev,
 	else
 		sel_cmd = DIGITAL_CMD_SEL_REQ_CL3;
 
-	*skb_put(skb, sizeof(u8)) = sel_cmd;
-	*skb_put(skb, sizeof(u8)) = DIGITAL_SDD_REQ_SEL_PAR;
+	*(u8 *)skb_put(skb, sizeof(u8)) = sel_cmd;
+	*(u8 *)skb_put(skb, sizeof(u8)) = DIGITAL_SDD_REQ_SEL_PAR;
 
 	return digital_in_send_cmd(ddev, skb, 30, digital_in_recv_sdd_res,
 				   target);
@@ -541,7 +541,7 @@ int digital_in_send_sens_req(struct nfc_digital_dev *ddev, u8 rf_tech)
 	if (!skb)
 		return -ENOMEM;
 
-	*skb_put(skb, sizeof(u8)) = DIGITAL_CMD_SENS_REQ;
+	*(u8 *)skb_put(skb, sizeof(u8)) = DIGITAL_CMD_SENS_REQ;
 
 	rc = digital_in_send_cmd(ddev, skb, 30, digital_in_recv_sens_res, NULL);
 	if (rc)
@@ -625,8 +625,7 @@ static int digital_in_send_attrib_req(struct nfc_digital_dev *ddev,
 	if (!skb)
 		return -ENOMEM;
 
-	attrib_req = (struct digital_attrib_req *)skb_put(skb,
-							  sizeof(*attrib_req));
+	attrib_req = skb_put(skb, sizeof(*attrib_req));
 
 	attrib_req->cmd = DIGITAL_CMD_ATTRIB_REQ;
 	memcpy(attrib_req->nfcid0, sensb_res->nfcid0,
@@ -730,8 +729,7 @@ int digital_in_send_sensb_req(struct nfc_digital_dev *ddev, u8 rf_tech)
 	if (!skb)
 		return -ENOMEM;
 
-	sensb_req = (struct digital_sensb_req *)skb_put(skb,
-							sizeof(*sensb_req));
+	sensb_req = skb_put(skb, sizeof(*sensb_req));
 
 	sensb_req->cmd = DIGITAL_CMD_SENSB_REQ;
 	sensb_req->afi = 0x00; /* All families and sub-families */
@@ -939,7 +937,7 @@ static int digital_tg_send_sel_res(struct nfc_digital_dev *ddev)
 	if (!skb)
 		return -ENOMEM;
 
-	*skb_put(skb, 1) = DIGITAL_SEL_RES_NFC_DEP;
+	*(u8 *)skb_put(skb, 1) = DIGITAL_SEL_RES_NFC_DEP;
 
 	if (!DIGITAL_DRV_CAPS_TG_CRC(ddev))
 		digital_skb_add_crc_a(skb);
diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c
index 8741ad47a6fb..3a0c94590411 100644
--- a/net/nfc/hci/core.c
+++ b/net/nfc/hci/core.c
@@ -874,7 +874,7 @@ static void nfc_hci_recv_from_llc(struct nfc_hci_dev *hdev, struct sk_buff *skb)
 			return;
 		}
 
-		*skb_put(hcp_skb, NFC_HCI_HCP_PACKET_HEADER_LEN) = pipe;
+		*(u8 *)skb_put(hcp_skb, NFC_HCI_HCP_PACKET_HEADER_LEN) = pipe;
 
 		skb_queue_walk(&hdev->rx_hcp_frags, frag_skb) {
 			msg_len = frag_skb->len - NFC_HCI_HCP_PACKET_HEADER_LEN;
diff --git a/net/nfc/hci/llc_shdlc.c b/net/nfc/hci/llc_shdlc.c
index 401c7e255273..9ab4a05f086f 100644
--- a/net/nfc/hci/llc_shdlc.c
+++ b/net/nfc/hci/llc_shdlc.c
@@ -382,8 +382,8 @@ static int llc_shdlc_connect_initiate(struct llc_shdlc *shdlc)
 	if (skb == NULL)
 		return -ENOMEM;
 
-	*skb_put(skb, 1) = SHDLC_MAX_WINDOW;
-	*skb_put(skb, 1) = SHDLC_SREJ_SUPPORT ? 1 : 0;
+	*(u8 *)skb_put(skb, 1) = SHDLC_MAX_WINDOW;
+	*(u8 *)skb_put(skb, 1) = SHDLC_SREJ_SUPPORT ? 1 : 0;
 
 	return llc_shdlc_send_u_frame(shdlc, skb, U_FRAME_RSET);
 }
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index 17b9f1ce23db..a3dac34cf790 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -1341,7 +1341,7 @@ int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, void *payload)
 		return -ENOMEM;
 	}
 
-	hdr = (struct nci_ctrl_hdr *) skb_put(skb, NCI_CTRL_HDR_SIZE);
+	hdr = skb_put(skb, NCI_CTRL_HDR_SIZE);
 	hdr->gid = nci_opcode_gid(opcode);
 	hdr->oid = nci_opcode_oid(opcode);
 	hdr->plen = plen;
diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c
index d4a53ce818c3..d1119bb35f24 100644
--- a/net/nfc/nci/hci.c
+++ b/net/nfc/nci/hci.c
@@ -472,7 +472,7 @@ void nci_hci_data_received_cb(void *context,
 			return;
 		}
 
-		*skb_put(hcp_skb, NCI_HCI_HCP_PACKET_HEADER_LEN) = pipe;
+		*(u8 *)skb_put(hcp_skb, NCI_HCI_HCP_PACKET_HEADER_LEN) = pipe;
 
 		skb_queue_walk(&ndev->hci_dev->rx_hcp_frags, frag_skb) {
 			msg_len = frag_skb->len - NCI_HCI_HCP_PACKET_HEADER_LEN;
diff --git a/net/nfc/nci/spi.c b/net/nfc/nci/spi.c
index d904cd2f1442..a502a334918a 100644
--- a/net/nfc/nci/spi.c
+++ b/net/nfc/nci/spi.c
@@ -86,8 +86,8 @@ int nci_spi_send(struct nci_spi *nspi,
 		u16 crc;
 
 		crc = crc_ccitt(CRC_INIT, skb->data, skb->len);
-		*skb_put(skb, 1) = crc >> 8;
-		*skb_put(skb, 1) = crc & 0xFF;
+		*(u8 *)skb_put(skb, 1) = crc >> 8;
+		*(u8 *)skb_put(skb, 1) = crc & 0xFF;
 	}
 
 	if (write_handshake_completion)	{
@@ -172,8 +172,8 @@ static int send_acknowledge(struct nci_spi *nspi, u8 acknowledge)
 	hdr[3] = 0;
 
 	crc = crc_ccitt(CRC_INIT, skb->data, skb->len);
-	*skb_put(skb, 1) = crc >> 8;
-	*skb_put(skb, 1) = crc & 0xFF;
+	*(u8 *)skb_put(skb, 1) = crc >> 8;
+	*(u8 *)skb_put(skb, 1) = crc & 0xFF;
 
 	ret = __nci_spi_send(nspi, skb, 0);
 
diff --git a/net/nfc/nci/uart.c b/net/nfc/nci/uart.c
index cfa7f352c1c3..442f8eadfc76 100644
--- a/net/nfc/nci/uart.c
+++ b/net/nfc/nci/uart.c
@@ -355,7 +355,7 @@ static int nci_uart_default_recv_buf(struct nci_uart *nu, const u8 *data,
 
 		/* Eat byte after byte till full packet header is received */
 		if (nu->rx_skb->len < NCI_CTRL_HDR_SIZE) {
-			*skb_put(nu->rx_skb, 1) = *data++;
+			*(u8 *)skb_put(nu->rx_skb, 1) = *data++;
 			--count;
 			continue;
 		}
diff --git a/net/psample/psample.c b/net/psample/psample.c
index 8aa58a918783..3a6ad0f438dc 100644
--- a/net/psample/psample.c
+++ b/net/psample/psample.c
@@ -264,7 +264,7 @@ void psample_sample_packet(struct psample_group *group, struct sk_buff *skb,
 		int nla_len = nla_total_size(data_len);
 		struct nlattr *nla;
 
-		nla = (struct nlattr *)skb_put(nl_skb, nla_len);
+		nla = skb_put(nl_skb, nla_len);
 		nla->nla_type = PSAMPLE_ATTR_DATA;
 		nla->nla_len = nla_attr_size(data_len);
 
diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index cff679167bdc..5586609afa27 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -259,7 +259,7 @@ static struct sk_buff *qrtr_alloc_ctrl_packet(u32 type, size_t pkt_len,
 		return NULL;
 	skb_reset_transport_header(skb);
 
-	hdr = (struct qrtr_hdr *)skb_put(skb, QRTR_HDR_SIZE);
+	hdr = skb_put(skb, QRTR_HDR_SIZE);
 	hdr->version = cpu_to_le32(QRTR_PROTO_VER);
 	hdr->type = cpu_to_le32(type);
 	hdr->src_node_id = cpu_to_le32(src_node);
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 034e916362cf..2c196b3e9cd3 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1389,7 +1389,7 @@ static struct sctp_chunk *_sctp_make_chunk(const struct sctp_association *asoc,
 		goto nodata;
 
 	/* Make room for the chunk header.  */
-	chunk_hdr = (sctp_chunkhdr_t *)skb_put(skb, sizeof(sctp_chunkhdr_t));
+	chunk_hdr = skb_put(skb, sizeof(sctp_chunkhdr_t));
 	chunk_hdr->type	  = type;
 	chunk_hdr->flags  = flags;
 	chunk_hdr->length = htons(sizeof(sctp_chunkhdr_t));
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index ec2b3e013c2f..e361f0b57fb6 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -167,8 +167,7 @@ struct sctp_ulpevent  *sctp_ulpevent_make_assoc_change(
 			goto fail;
 
 		skb = sctp_event2skb(event);
-		sac = (struct sctp_assoc_change *) skb_put(skb,
-					sizeof(struct sctp_assoc_change));
+		sac = skb_put(skb, sizeof(struct sctp_assoc_change));
 	}
 
 	/* Socket Extensions for SCTP
@@ -270,8 +269,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_peer_addr_change(
 		goto fail;
 
 	skb = sctp_event2skb(event);
-	spc = (struct sctp_paddr_change *)
-		skb_put(skb, sizeof(struct sctp_paddr_change));
+	spc = skb_put(skb, sizeof(struct sctp_paddr_change));
 
 	/* Sockets API Extensions for SCTP
 	 * Section 5.3.1.2 SCTP_PEER_ADDR_CHANGE
@@ -549,8 +547,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_shutdown_event(
 		goto fail;
 
 	skb = sctp_event2skb(event);
-	sse = (struct sctp_shutdown_event *)
-		skb_put(skb, sizeof(struct sctp_shutdown_event));
+	sse = skb_put(skb, sizeof(struct sctp_shutdown_event));
 
 	/* Socket Extensions for SCTP
 	 * 5.3.1.5 SCTP_SHUTDOWN_EVENT
@@ -612,8 +609,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_adaptation_indication(
 		goto fail;
 
 	skb = sctp_event2skb(event);
-	sai = (struct sctp_adaptation_event *)
-		skb_put(skb, sizeof(struct sctp_adaptation_event));
+	sai = skb_put(skb, sizeof(struct sctp_adaptation_event));
 
 	sai->sai_type = SCTP_ADAPTATION_INDICATION;
 	sai->sai_flags = 0;
@@ -751,8 +747,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_pdapi(
 		goto fail;
 
 	skb = sctp_event2skb(event);
-	pd = (struct sctp_pdapi_event *)
-		skb_put(skb, sizeof(struct sctp_pdapi_event));
+	pd = skb_put(skb, sizeof(struct sctp_pdapi_event));
 
 	/* pdapi_type
 	 *   It should be SCTP_PARTIAL_DELIVERY_EVENT
@@ -803,8 +798,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_authkey(
 		goto fail;
 
 	skb = sctp_event2skb(event);
-	ak = (struct sctp_authkey_event *)
-		skb_put(skb, sizeof(struct sctp_authkey_event));
+	ak = skb_put(skb, sizeof(struct sctp_authkey_event));
 
 	ak->auth_type = SCTP_AUTHENTICATION_EVENT;
 	ak->auth_flags = 0;
@@ -842,8 +836,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_sender_dry_event(
 		return NULL;
 
 	skb = sctp_event2skb(event);
-	sdry = (struct sctp_sender_dry_event *)
-		skb_put(skb, sizeof(struct sctp_sender_dry_event));
+	sdry = skb_put(skb, sizeof(struct sctp_sender_dry_event));
 
 	sdry->sender_dry_type = SCTP_SENDER_DRY_EVENT;
 	sdry->sender_dry_flags = 0;
@@ -869,7 +862,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_stream_reset_event(
 		return NULL;
 
 	skb = sctp_event2skb(event);
-	sreset = (struct sctp_stream_reset_event *)skb_put(skb, length);
+	sreset = skb_put(skb, length);
 
 	sreset->strreset_type = SCTP_STREAM_RESET_EVENT;
 	sreset->strreset_flags = flags;
@@ -897,8 +890,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_assoc_reset_event(
 		return NULL;
 
 	skb = sctp_event2skb(event);
-	areset = (struct sctp_assoc_reset_event *)
-		skb_put(skb, sizeof(struct sctp_assoc_reset_event));
+	areset = skb_put(skb, sizeof(struct sctp_assoc_reset_event));
 
 	areset->assocreset_type = SCTP_ASSOC_RESET_EVENT;
 	areset->assocreset_flags = flags;
@@ -925,8 +917,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_stream_change_event(
 		return NULL;
 
 	skb = sctp_event2skb(event);
-	schange = (struct sctp_stream_change_event *)
-		skb_put(skb, sizeof(struct sctp_stream_change_event));
+	schange = skb_put(skb, sizeof(struct sctp_stream_change_event));
 
 	schange->strchange_type = SCTP_STREAM_CHANGE_EVENT;
 	schange->strchange_flags = flags;
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 24e2054bfbaf..7d6ee03f2762 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -99,7 +99,7 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque)
 	if (!skb)
 		return NULL;
 
-	hdr = (struct af_vsockmon_hdr *)skb_put(skb, sizeof(*hdr));
+	hdr = skb_put(skb, sizeof(*hdr));
 
 	/* pkt->hdr is little-endian so no need to byteswap here */
 	hdr->src_cid = pkt->hdr.src_cid;
-- 
cgit v1.2.3


From af72868b9070d1b843c829f0d0d0b22c04a20815 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 16 Jun 2017 14:29:22 +0200
Subject: networking: make skb_pull & friends return void pointers

It seems like a historic accident that these return unsigned char *,
and in many places that means casts are required, more often than not.

Make these functions return void * and remove all the casts across
the tree, adding a (u8 *) cast only where the unsigned char pointer
was used directly, all done with the following spatch:

    @@
    expression SKB, LEN;
    typedef u8;
    identifier fn = {
            skb_pull,
            __skb_pull,
            skb_pull_inline,
            __pskb_pull_tail,
            __pskb_pull,
            pskb_pull
    };
    @@
    - *(fn(SKB, LEN))
    + *(u8 *)fn(SKB, LEN)

    @@
    expression E, SKB, LEN;
    identifier fn = {
            skb_pull,
            __skb_pull,
            skb_pull_inline,
            __pskb_pull_tail,
            __pskb_pull,
            pskb_pull
    };
    type T;
    @@
    - E = ((T *)(fn(SKB, LEN)))
    + E = fn(SKB, LEN)

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/bluetooth/hci_nokia.c  |  4 ++--
 drivers/isdn/i4l/isdn_ppp.c    |  2 +-
 drivers/net/wan/hdlc_ppp.c     |  2 +-
 drivers/nfc/nxp-nci/firmware.c |  3 +--
 drivers/scsi/fnic/fnic_fcs.c   |  2 +-
 drivers/scsi/qedf/qedf_main.c  |  2 +-
 include/linux/skbuff.h         | 14 +++++++-------
 net/bluetooth/a2mp.c           |  4 ++--
 net/core/skbuff.c              |  6 +++---
 net/ipv4/ipmr.c                |  6 ++++--
 net/ipv4/xfrm4_mode_beet.c     |  3 +--
 net/ipv6/ip6mr.c               |  6 ++++--
 net/ipv6/xfrm6_mode_beet.c     |  2 +-
 13 files changed, 29 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/bluetooth/hci_nokia.c b/drivers/bluetooth/hci_nokia.c
index c1b081725b2c..072a77a61e67 100644
--- a/drivers/bluetooth/hci_nokia.c
+++ b/drivers/bluetooth/hci_nokia.c
@@ -557,7 +557,7 @@ static int nokia_recv_negotiation_packet(struct hci_dev *hdev,
 		goto finish_neg;
 	}
 
-	evt = (struct hci_nokia_neg_evt *)skb_pull(skb, sizeof(*hdr));
+	evt = skb_pull(skb, sizeof(*hdr));
 
 	if (evt->ack != NOKIA_NEG_ACK) {
 		dev_err(dev, "Negotiation received: wrong reply");
@@ -595,7 +595,7 @@ static int nokia_recv_alive_packet(struct hci_dev *hdev, struct sk_buff *skb)
 		goto finish_alive;
 	}
 
-	pkt = (struct hci_nokia_alive_pkt *)skb_pull(skb, sizeof(*hdr));
+	pkt = skb_pull(skb, sizeof(*hdr));
 
 	if (pkt->mid != NOKIA_ALIVE_RESP) {
 		dev_err(dev, "Alive received: invalid response: 0x%02x!",
diff --git a/drivers/isdn/i4l/isdn_ppp.c b/drivers/isdn/i4l/isdn_ppp.c
index 9ce23cf3d7d2..e26cae9baf17 100644
--- a/drivers/isdn/i4l/isdn_ppp.c
+++ b/drivers/isdn/i4l/isdn_ppp.c
@@ -1509,7 +1509,7 @@ int isdn_ppp_autodial_filter(struct sk_buff *skb, isdn_net_local *lp)
 	 * temporarily remove part of the fake header stuck on
 	 * earlier.
 	 */
-	*skb_pull(skb, IPPP_MAX_HEADER - 4) = 1; /* indicate outbound */
+	*(u8 *)skb_pull(skb, IPPP_MAX_HEADER - 4) = 1; /* indicate outbound */
 
 	{
 		__be16 *p = (__be16 *)skb->data;
diff --git a/drivers/net/wan/hdlc_ppp.c b/drivers/net/wan/hdlc_ppp.c
index fa3460a0dbbe..0d2e00ece804 100644
--- a/drivers/net/wan/hdlc_ppp.c
+++ b/drivers/net/wan/hdlc_ppp.c
@@ -448,7 +448,7 @@ static int ppp_rx(struct sk_buff *skb)
 	/* Check HDLC header */
 	if (skb->len < sizeof(struct hdlc_header))
 		goto rx_error;
-	cp = (struct cp_header*)skb_pull(skb, sizeof(struct hdlc_header));
+	cp = skb_pull(skb, sizeof(struct hdlc_header));
 	if (hdr->address != HDLC_ADDR_ALLSTATIONS ||
 	    hdr->control != HDLC_CTRL_UI)
 		goto rx_error;
diff --git a/drivers/nfc/nxp-nci/firmware.c b/drivers/nfc/nxp-nci/firmware.c
index 99ffee1dfd1e..e50c6f67bb39 100644
--- a/drivers/nfc/nxp-nci/firmware.c
+++ b/drivers/nfc/nxp-nci/firmware.c
@@ -311,8 +311,7 @@ void nxp_nci_fw_recv_frame(struct nci_dev *ndev, struct sk_buff *skb)
 		if (nxp_nci_fw_check_crc(skb) != 0x00)
 			fw_info->cmd_result = -EBADMSG;
 		else
-			fw_info->cmd_result = nxp_nci_fw_read_status(
-					*skb_pull(skb, NXP_NCI_FW_HDR_LEN));
+			fw_info->cmd_result = nxp_nci_fw_read_status(*(u8 *)skb_pull(skb, NXP_NCI_FW_HDR_LEN));
 		kfree_skb(skb);
 	} else {
 		fw_info->cmd_result = -EIO;
diff --git a/drivers/scsi/fnic/fnic_fcs.c b/drivers/scsi/fnic/fnic_fcs.c
index 245dcd95e11f..e3b964b7235a 100644
--- a/drivers/scsi/fnic/fnic_fcs.c
+++ b/drivers/scsi/fnic/fnic_fcs.c
@@ -640,7 +640,7 @@ static inline int fnic_import_rq_eth_pkt(struct fnic *fnic, struct sk_buff *skb)
 	eh = (struct ethhdr *)skb->data;
 	if (eh->h_proto == htons(ETH_P_8021Q)) {
 		memmove((u8 *)eh + VLAN_HLEN, eh, ETH_ALEN * 2);
-		eh = (struct ethhdr *)skb_pull(skb, VLAN_HLEN);
+		eh = skb_pull(skb, VLAN_HLEN);
 		skb_reset_mac_header(skb);
 	}
 	if (eh->h_proto == htons(ETH_P_FIP)) {
diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c
index da0fcce6f842..542a6e75c2bb 100644
--- a/drivers/scsi/qedf/qedf_main.c
+++ b/drivers/scsi/qedf/qedf_main.c
@@ -2117,7 +2117,7 @@ static void qedf_ll2_process_skb(struct work_struct *work)
 	/* Undo VLAN encapsulation */
 	if (eh->h_proto == htons(ETH_P_8021Q)) {
 		memmove((u8 *)eh + VLAN_HLEN, eh, ETH_ALEN * 2);
-		eh = (struct ethhdr *)skb_pull(skb, VLAN_HLEN);
+		eh = skb_pull(skb, VLAN_HLEN);
 		skb_reset_mac_header(skb);
 	}
 
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 454ea37dddbb..ac9d10dadd1a 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1931,22 +1931,22 @@ static inline unsigned char *__skb_push(struct sk_buff *skb, unsigned int len)
 	return skb->data;
 }
 
-unsigned char *skb_pull(struct sk_buff *skb, unsigned int len);
-static inline unsigned char *__skb_pull(struct sk_buff *skb, unsigned int len)
+void *skb_pull(struct sk_buff *skb, unsigned int len);
+static inline void *__skb_pull(struct sk_buff *skb, unsigned int len)
 {
 	skb->len -= len;
 	BUG_ON(skb->len < skb->data_len);
 	return skb->data += len;
 }
 
-static inline unsigned char *skb_pull_inline(struct sk_buff *skb, unsigned int len)
+static inline void *skb_pull_inline(struct sk_buff *skb, unsigned int len)
 {
 	return unlikely(len > skb->len) ? NULL : __skb_pull(skb, len);
 }
 
-unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta);
+void *__pskb_pull_tail(struct sk_buff *skb, int delta);
 
-static inline unsigned char *__pskb_pull(struct sk_buff *skb, unsigned int len)
+static inline void *__pskb_pull(struct sk_buff *skb, unsigned int len)
 {
 	if (len > skb_headlen(skb) &&
 	    !__pskb_pull_tail(skb, len - skb_headlen(skb)))
@@ -1955,7 +1955,7 @@ static inline unsigned char *__pskb_pull(struct sk_buff *skb, unsigned int len)
 	return skb->data += len;
 }
 
-static inline unsigned char *pskb_pull(struct sk_buff *skb, unsigned int len)
+static inline void *pskb_pull(struct sk_buff *skb, unsigned int len)
 {
 	return unlikely(len > skb->len) ? NULL : __pskb_pull(skb, len);
 }
@@ -2938,7 +2938,7 @@ static inline void skb_postpush_rcsum(struct sk_buff *skb,
 	__skb_postpush_rcsum(skb, start, len, 0);
 }
 
-unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len);
+void *skb_pull_rcsum(struct sk_buff *skb, unsigned int len);
 
 /**
  *	skb_push_rcsum - push skb and update receive checksum
diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c
index f0095fd79818..aad994edd3bb 100644
--- a/net/bluetooth/a2mp.c
+++ b/net/bluetooth/a2mp.c
@@ -239,7 +239,7 @@ static int a2mp_discover_rsp(struct amp_mgr *mgr, struct sk_buff *skb,
 		}
 
 		len -= sizeof(*cl);
-		cl = (void *) skb_pull(skb, sizeof(*cl));
+		cl = skb_pull(skb, sizeof(*cl));
 	}
 
 	/* Fall back to L2CAP init sequence */
@@ -279,7 +279,7 @@ static int a2mp_change_notify(struct amp_mgr *mgr, struct sk_buff *skb,
 	while (skb->len >= sizeof(*cl)) {
 		BT_DBG("Controller id %d type %d status %d", cl->id, cl->type,
 		       cl->status);
-		cl = (struct a2mp_cl *) skb_pull(skb, sizeof(*cl));
+		cl = skb_pull(skb, sizeof(*cl));
 	}
 
 	/* TODO send A2MP_CHANGE_RSP */
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 0baa7f2dd8ef..9a1639f7d61a 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1481,7 +1481,7 @@ EXPORT_SYMBOL(skb_push);
  *	is returned. Once the data has been pulled future pushes will overwrite
  *	the old data.
  */
-unsigned char *skb_pull(struct sk_buff *skb, unsigned int len)
+void *skb_pull(struct sk_buff *skb, unsigned int len)
 {
 	return skb_pull_inline(skb, len);
 }
@@ -1616,7 +1616,7 @@ EXPORT_SYMBOL(___pskb_trim);
  *
  * It is pretty complicated. Luckily, it is called only in exceptional cases.
  */
-unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta)
+void *__pskb_pull_tail(struct sk_buff *skb, int delta)
 {
 	/* If skb has not enough free space at tail, get new one
 	 * plus 128 bytes for future expansions. If we have enough
@@ -3065,7 +3065,7 @@ EXPORT_SYMBOL_GPL(skb_append_pagefrags);
  *	that the checksum difference is zero (e.g., a valid IP header)
  *	or you are setting ip_summed to CHECKSUM_NONE.
  */
-unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len)
+void *skb_pull_rcsum(struct sk_buff *skb, unsigned int len)
 {
 	unsigned char *data = skb->data;
 
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index abbd7c992960..3e7454aa49e8 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -669,7 +669,8 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
 
 	while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
 		if (ip_hdr(skb)->version == 0) {
-			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
+			struct nlmsghdr *nlh = skb_pull(skb,
+							sizeof(struct iphdr));
 			nlh->nlmsg_type = NLMSG_ERROR;
 			nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
 			skb_trim(skb, nlh->nlmsg_len);
@@ -972,7 +973,8 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
 	/* Play the pending entries through our router */
 	while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
 		if (ip_hdr(skb)->version == 0) {
-			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
+			struct nlmsghdr *nlh = skb_pull(skb,
+							sizeof(struct iphdr));
 
 			if (__ipmr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
 				nlh->nlmsg_len = skb_tail_pointer(skb) -
diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c
index 71acd0014f2d..856d2dfdb44b 100644
--- a/net/ipv4/xfrm4_mode_beet.c
+++ b/net/ipv4/xfrm4_mode_beet.c
@@ -57,8 +57,7 @@ static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb)
 
 	xfrm4_beet_make_header(skb);
 
-	ph = (struct ip_beet_phdr *)
-		__skb_pull(skb, XFRM_MODE_SKB_CB(skb)->ihl - hdrlen);
+	ph = __skb_pull(skb, XFRM_MODE_SKB_CB(skb)->ihl - hdrlen);
 
 	top_iph = ip_hdr(skb);
 
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 2ecb39b943b5..b0e2bf1f4212 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -846,7 +846,8 @@ static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
 
 	while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
 		if (ipv6_hdr(skb)->version == 0) {
-			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
+			struct nlmsghdr *nlh = skb_pull(skb,
+							sizeof(struct ipv6hdr));
 			nlh->nlmsg_type = NLMSG_ERROR;
 			nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
 			skb_trim(skb, nlh->nlmsg_len);
@@ -1106,7 +1107,8 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
 
 	while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
 		if (ipv6_hdr(skb)->version == 0) {
-			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
+			struct nlmsghdr *nlh = skb_pull(skb,
+							sizeof(struct ipv6hdr));
 
 			if (__ip6mr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
 				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c
index 1e205c3253ac..57fd314ec2b8 100644
--- a/net/ipv6/xfrm6_mode_beet.c
+++ b/net/ipv6/xfrm6_mode_beet.c
@@ -54,7 +54,7 @@ static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb)
 	skb->mac_header = skb->network_header +
 			  offsetof(struct ipv6hdr, nexthdr);
 	skb->transport_header = skb->network_header + sizeof(*top_iph);
-	ph = (struct ip_beet_phdr *)__skb_pull(skb, XFRM_MODE_SKB_CB(skb)->ihl-hdr_len);
+	ph = __skb_pull(skb, XFRM_MODE_SKB_CB(skb)->ihl - hdr_len);
 
 	xfrm6_beet_make_header(skb);
 
-- 
cgit v1.2.3


From d58ff35122847a83ba55394e2ae3a1527b6febf5 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 16 Jun 2017 14:29:23 +0200
Subject: networking: make skb_push & __skb_push return void pointers

It seems like a historic accident that these return unsigned char *,
and in many places that means casts are required, more often than not.

Make these functions return void * and remove all the casts across
the tree, adding a (u8 *) cast only where the unsigned char pointer
was used directly, all done with the following spatch:

    @@
    expression SKB, LEN;
    typedef u8;
    identifier fn = { skb_push, __skb_push, skb_push_rcsum };
    @@
    - *(fn(SKB, LEN))
    + *(u8 *)fn(SKB, LEN)

    @@
    expression E, SKB, LEN;
    identifier fn = { skb_push, __skb_push, skb_push_rcsum };
    type T;
    @@
    - E = ((T *)(fn(SKB, LEN)))
    + E = fn(SKB, LEN)

    @@
    expression SKB, LEN;
    identifier fn = { skb_push, __skb_push, skb_push_rcsum };
    @@
    - fn(SKB, LEN)[0]
    + *(u8 *)fn(SKB, LEN)

Note that the last part there converts from push(...)[0] to the
more idiomatic *(u8 *)push(...).

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/atm/solos-pci.c                            |  2 +-
 drivers/bluetooth/bpa10x.c                         |  2 +-
 drivers/firewire/net.c                             |  8 +++---
 drivers/infiniband/hw/cxgb3/iwch_cm.c              |  6 ++---
 drivers/infiniband/hw/cxgb4/cm.c                   |  2 +-
 drivers/infiniband/ulp/ipoib/ipoib_main.c          |  4 +--
 drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c   |  2 +-
 drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c  |  2 +-
 drivers/isdn/i4l/isdn_ppp.c                        |  2 +-
 drivers/net/arcnet/arc-rawmode.c                   |  2 +-
 drivers/net/arcnet/capmode.c                       |  2 +-
 drivers/net/arcnet/rfc1051.c                       |  2 +-
 drivers/net/arcnet/rfc1201.c                       |  2 +-
 drivers/net/ethernet/broadcom/bcmsysport.c         |  2 +-
 drivers/net/ethernet/chelsio/cxgb/sge.c            |  4 +--
 drivers/net/ethernet/freescale/gianfar.c           |  2 +-
 .../net/ethernet/mellanox/mlx5/core/en_selftest.c  |  2 +-
 drivers/net/ethernet/sun/niu.c                     |  2 +-
 drivers/net/ethernet/toshiba/ps3_gelic_net.c       |  2 +-
 drivers/net/geneve.c                               |  3 +--
 drivers/net/gtp.c                                  |  4 +--
 drivers/net/hippi/rrunner.c                        |  2 +-
 drivers/net/macsec.c                               |  2 +-
 drivers/net/ppp/ppp_async.c                        |  2 +-
 drivers/net/ppp/ppp_generic.c                      |  6 ++---
 drivers/net/ppp/ppp_synctty.c                      |  2 +-
 drivers/net/ppp/pptp.c                             |  2 +-
 drivers/net/usb/gl620a.c                           |  2 +-
 drivers/net/usb/int51x1.c                          |  2 +-
 drivers/net/usb/kaweth.c                           |  2 +-
 drivers/net/usb/lg-vl600.c                         |  2 +-
 drivers/net/usb/net1080.c                          |  2 +-
 drivers/net/usb/qmi_wwan.c                         |  2 +-
 drivers/net/usb/rndis_host.c                       |  2 +-
 drivers/net/vrf.c                                  |  2 +-
 drivers/net/vxlan.c                                |  2 +-
 drivers/net/wimax/i2400m/netdev.c                  |  2 +-
 drivers/net/wireless/admtek/adm8211.c              |  2 +-
 drivers/net/wireless/ath/ar5523/ar5523.c           |  4 +--
 drivers/net/wireless/ath/ath6kl/htc_pipe.c         |  3 +--
 drivers/net/wireless/ath/ath9k/hif_usb.c           |  2 +-
 drivers/net/wireless/ath/ath9k/htc_hst.c           |  3 +--
 drivers/net/wireless/ath/ath9k/wmi.c               |  2 +-
 drivers/net/wireless/ath/carl9170/tx.c             |  2 +-
 drivers/net/wireless/ath/wil6210/txrx.c            |  2 +-
 .../net/wireless/intersil/hostap/hostap_80211_rx.c |  8 +++---
 drivers/net/wireless/intersil/orinoco/main.c       |  7 +++--
 drivers/net/wireless/intersil/p54/txrx.c           |  4 +--
 drivers/net/wireless/intersil/prism54/islpci_eth.c |  5 +---
 drivers/net/wireless/mac80211_hwsim.c              |  4 +--
 drivers/net/wireless/marvell/libertas/rx.c         |  2 +-
 drivers/net/wireless/marvell/libertas_tf/main.c    |  2 +-
 drivers/net/wireless/mediatek/mt7601u/tx.c         |  2 +-
 drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c |  6 ++---
 .../net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c  |  2 +-
 .../net/wireless/realtek/rtlwifi/rtl8192cu/trx.c   |  2 +-
 drivers/net/wireless/st/cw1200/txrx.c              |  2 +-
 drivers/net/wireless/ti/wl1251/tx.c                |  3 +--
 drivers/net/wireless/ti/wlcore/cmd.c               |  2 +-
 drivers/net/wireless/ti/wlcore/tx.c                |  3 +--
 drivers/net/wireless/zydas/zd1211rw/zd_mac.c       |  3 +--
 drivers/nfc/fdp/i2c.c                              |  4 +--
 drivers/nfc/microread/i2c.c                        |  2 +-
 drivers/nfc/microread/microread.c                  |  4 +--
 drivers/nfc/nfcmrvl/main.c                         |  2 +-
 drivers/nfc/pn533/pn533.c                          |  8 +++---
 drivers/nfc/pn544/i2c.c                            |  2 +-
 drivers/nfc/pn544/pn544.c                          |  8 +++---
 drivers/nfc/st-nci/ndlc.c                          |  2 +-
 drivers/nfc/st21nfca/core.c                        |  6 ++---
 drivers/nfc/st21nfca/dep.c                         | 30 +++++++++++-----------
 drivers/nfc/st21nfca/i2c.c                         |  4 +--
 drivers/s390/net/qeth_l2_main.c                    |  3 +--
 drivers/s390/net/qeth_l3_main.c                    |  6 ++---
 drivers/scsi/cxgbi/cxgb3i/cxgb3i.c                 |  2 +-
 drivers/scsi/cxgbi/cxgb4i/cxgb4i.c                 |  2 +-
 drivers/scsi/fcoe/fcoe_ctlr.c                      |  2 +-
 drivers/scsi/fnic/fnic_fcs.c                       |  7 +++--
 drivers/scsi/qedf/qedf_fip.c                       |  3 +--
 drivers/staging/wilc1000/linux_mon.c               |  6 ++---
 drivers/staging/wlan-ng/p80211conv.c               | 14 ++++------
 drivers/target/iscsi/cxgbit/cxgbit_target.c        |  5 ++--
 drivers/usb/gadget/function/rndis.c                |  2 +-
 include/linux/if_vlan.h                            |  2 +-
 include/linux/skbuff.h                             |  7 +++--
 net/802/fc.c                                       |  4 +--
 net/802/fddi.c                                     |  2 +-
 net/802/hippi.c                                    |  2 +-
 net/8021q/vlan_dev.c                               |  2 +-
 net/appletalk/ddp.c                                |  2 +-
 net/ax25/af_ax25.c                                 |  2 +-
 net/bluetooth/hci_sock.c                           | 12 ++++-----
 net/bluetooth/mgmt_util.c                          |  2 +-
 net/bluetooth/rfcomm/core.c                        |  4 +--
 net/bridge/netfilter/nft_reject_bridge.c           |  2 +-
 net/core/netpoll.c                                 |  4 +--
 net/core/pktgen.c                                  |  6 ++---
 net/core/skbuff.c                                  |  2 +-
 net/dccp/options.c                                 |  2 +-
 net/decnet/dn_dev.c                                |  4 +--
 net/ethernet/eth.c                                 |  2 +-
 net/ipv4/esp4.c                                    |  2 +-
 net/ipv4/ip_gre.c                                  |  2 +-
 net/ipv6/esp6.c                                    |  2 +-
 net/ipv6/exthdrs.c                                 |  6 ++---
 net/ipv6/ip6_gre.c                                 |  2 +-
 net/ipv6/ip6_output.c                              |  4 +--
 net/ipv6/tcp_ipv6.c                                |  2 +-
 net/irda/irnet/irnet_irda.c                        |  2 +-
 net/iucv/af_iucv.c                                 |  3 +--
 net/mac80211/rx.c                                  |  2 +-
 net/mac80211/status.c                              |  2 +-
 net/mac80211/tx.c                                  |  4 +--
 net/ncsi/ncsi-cmd.c                                |  2 +-
 net/nfc/digital_dep.c                              |  2 +-
 net/nfc/digital_technology.c                       |  4 +--
 net/nfc/hci/core.c                                 |  2 +-
 net/nfc/hci/llc_shdlc.c                            |  8 +++---
 net/nfc/nci/data.c                                 |  2 +-
 net/nfc/nci/hci.c                                  |  4 +--
 net/nfc/nci/spi.c                                  |  4 +--
 net/nfc/rawsock.c                                  |  2 +-
 net/sctp/output.c                                  |  2 +-
 net/sctp/sm_statefuns.c                            |  4 +--
 net/sctp/ulpevent.c                                |  8 +++---
 net/wireless/util.c                                |  2 +-
 126 files changed, 204 insertions(+), 234 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/atm/solos-pci.c b/drivers/atm/solos-pci.c
index 4fc99ae1c534..c8f2ca6d8b29 100644
--- a/drivers/atm/solos-pci.c
+++ b/drivers/atm/solos-pci.c
@@ -1174,7 +1174,7 @@ static int psend(struct atm_vcc *vcc, struct sk_buff *skb)
 		}
 	}
 
-	header = (void *)skb_push(skb, sizeof(*header));
+	header = skb_push(skb, sizeof(*header));
 
 	/* This does _not_ include the size of the header */
 	header->size = cpu_to_le16(pktlen);
diff --git a/drivers/bluetooth/bpa10x.c b/drivers/bluetooth/bpa10x.c
index a9932fe57d92..48d10cb5c9a1 100644
--- a/drivers/bluetooth/bpa10x.c
+++ b/drivers/bluetooth/bpa10x.c
@@ -297,7 +297,7 @@ static int bpa10x_send_frame(struct hci_dev *hdev, struct sk_buff *skb)
 		return -ENOMEM;
 
 	/* Prepend skb with frame type */
-	*skb_push(skb, 1) = hci_skb_pkt_type(skb);
+	*(u8 *)skb_push(skb, 1) = hci_skb_pkt_type(skb);
 
 	switch (hci_skb_pkt_type(skb)) {
 	case HCI_COMMAND_PKT:
diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c
index d5040bbd34e8..242359c2d1f1 100644
--- a/drivers/firewire/net.c
+++ b/drivers/firewire/net.c
@@ -219,7 +219,7 @@ static int fwnet_header_create(struct sk_buff *skb, struct net_device *net,
 {
 	struct fwnet_header *h;
 
-	h = (struct fwnet_header *)skb_push(skb, sizeof(*h));
+	h = skb_push(skb, sizeof(*h));
 	put_unaligned_be16(type, &h->h_proto);
 
 	if (net->flags & (IFF_LOOPBACK | IFF_NOARP)) {
@@ -961,16 +961,14 @@ static int fwnet_send_packet(struct fwnet_packet_task *ptask)
 	tx_len = ptask->max_payload;
 	switch (fwnet_get_hdr_lf(&ptask->hdr)) {
 	case RFC2374_HDR_UNFRAG:
-		bufhdr = (struct rfc2734_header *)
-				skb_push(ptask->skb, RFC2374_UNFRAG_HDR_SIZE);
+		bufhdr = skb_push(ptask->skb, RFC2374_UNFRAG_HDR_SIZE);
 		put_unaligned_be32(ptask->hdr.w0, &bufhdr->w0);
 		break;
 
 	case RFC2374_HDR_FIRSTFRAG:
 	case RFC2374_HDR_INTFRAG:
 	case RFC2374_HDR_LASTFRAG:
-		bufhdr = (struct rfc2734_header *)
-				skb_push(ptask->skb, RFC2374_FRAG_HDR_SIZE);
+		bufhdr = skb_push(ptask->skb, RFC2374_FRAG_HDR_SIZE);
 		put_unaligned_be32(ptask->hdr.w0, &bufhdr->w0);
 		put_unaligned_be32(ptask->hdr.w1, &bufhdr->w1);
 		break;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index 9ae518c01bc2..86975370a4c0 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -513,7 +513,7 @@ static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb)
 	set_arp_failure_handler(skb, arp_failure_discard);
 	skb_reset_transport_header(skb);
 	len = skb->len;
-	req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
+	req = skb_push(skb, sizeof(*req));
 	req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL);
 	req->wr_lo = htonl(V_WR_TID(ep->hwtid));
 	req->len = htonl(len);
@@ -564,7 +564,7 @@ static int send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen)
 	skb->priority = CPL_PRIORITY_DATA;
 	set_arp_failure_handler(skb, arp_failure_discard);
 	skb_reset_transport_header(skb);
-	req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
+	req = skb_push(skb, sizeof(*req));
 	req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL);
 	req->wr_lo = htonl(V_WR_TID(ep->hwtid));
 	req->len = htonl(mpalen);
@@ -615,7 +615,7 @@ static int send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen)
 	set_arp_failure_handler(skb, arp_failure_discard);
 	skb_reset_transport_header(skb);
 	len = skb->len;
-	req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
+	req = skb_push(skb, sizeof(*req));
 	req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL);
 	req->wr_lo = htonl(V_WR_TID(ep->hwtid));
 	req->len = htonl(len);
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 36ae3023e703..76fb39415e18 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -3751,7 +3751,7 @@ static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos)
 	tcp_clear_options(&tmp_opt);
 	tcp_parse_options(&init_net, skb, &tmp_opt, 0, NULL);
 
-	req = (struct cpl_pass_accept_req *)__skb_push(skb, sizeof(*req));
+	req = __skb_push(skb, sizeof(*req));
 	memset(req, 0, sizeof(*req));
 	req->l2info = cpu_to_be16(SYN_INTF_V(intf) |
 			 SYN_MAC_IDX_V(RX_MACIDX_G(
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index a115c0b7a310..5da6f2e9f22e 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -681,7 +681,7 @@ static void push_pseudo_header(struct sk_buff *skb, const char *daddr)
 {
 	struct ipoib_pseudo_header *phdr;
 
-	phdr = (struct ipoib_pseudo_header *)skb_push(skb, sizeof(*phdr));
+	phdr = skb_push(skb, sizeof(*phdr));
 	memcpy(phdr->hwaddr, daddr, INFINIBAND_ALEN);
 }
 
@@ -1129,7 +1129,7 @@ static int ipoib_hard_header(struct sk_buff *skb,
 {
 	struct ipoib_header *header;
 
-	header = (struct ipoib_header *) skb_push(skb, sizeof *header);
+	header = skb_push(skb, sizeof *header);
 
 	header->proto = htons(type);
 	header->reserved = 0;
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c
index 2e8fee982436..afa938bd26d6 100644
--- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c
@@ -460,7 +460,7 @@ void opa_vnic_encap_skb(struct opa_vnic_adapter *adapter, struct sk_buff *skb)
 	sc = opa_vnic_get_sc(info, skb);
 	l4_hdr = info->vesw.vesw_id;
 
-	mdata = (struct opa_vnic_skb_mdata *)skb_push(skb, sizeof(*mdata));
+	mdata = skb_push(skb, sizeof(*mdata));
 	mdata->vl = opa_vnic_get_vl(adapter, skb);
 	mdata->entropy = entropy;
 	mdata->flags = 0;
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c
index 905f39dda5aa..fcf75323d62a 100644
--- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c
@@ -103,7 +103,7 @@ static u16 opa_vnic_select_queue(struct net_device *netdev, struct sk_buff *skb,
 	int rc;
 
 	/* pass entropy and vl as metadata in skb */
-	mdata = (struct opa_vnic_skb_mdata *)skb_push(skb, sizeof(*mdata));
+	mdata = skb_push(skb, sizeof(*mdata));
 	mdata->entropy =  opa_vnic_calc_entropy(adapter, skb);
 	mdata->vl = opa_vnic_get_vl(adapter, skb);
 	rc = adapter->rn_ops->ndo_select_queue(netdev, skb,
diff --git a/drivers/isdn/i4l/isdn_ppp.c b/drivers/isdn/i4l/isdn_ppp.c
index e26cae9baf17..b7e3f1cde683 100644
--- a/drivers/isdn/i4l/isdn_ppp.c
+++ b/drivers/isdn/i4l/isdn_ppp.c
@@ -1312,7 +1312,7 @@ isdn_ppp_xmit(struct sk_buff *skb, struct net_device *netdev)
 	/* check if we should pass this packet
 	 * the filter instructions are constructed assuming
 	 * a four-byte PPP header on each packet */
-	*skb_push(skb, 4) = 1; /* indicate outbound */
+	*(u8 *)skb_push(skb, 4) = 1; /* indicate outbound */
 
 	{
 		__be16 *p = (__be16 *)skb->data;
diff --git a/drivers/net/arcnet/arc-rawmode.c b/drivers/net/arcnet/arc-rawmode.c
index d78f30186642..8c651fdee039 100644
--- a/drivers/net/arcnet/arc-rawmode.c
+++ b/drivers/net/arcnet/arc-rawmode.c
@@ -85,7 +85,7 @@ static int build_header(struct sk_buff *skb, struct net_device *dev,
 			unsigned short type, uint8_t daddr)
 {
 	int hdr_size = ARC_HDR_SIZE;
-	struct archdr *pkt = (struct archdr *)skb_push(skb, hdr_size);
+	struct archdr *pkt = skb_push(skb, hdr_size);
 
 	/* Set the source hardware address.
 	 *
diff --git a/drivers/net/arcnet/capmode.c b/drivers/net/arcnet/capmode.c
index 2056878fb087..a80f4eb9262d 100644
--- a/drivers/net/arcnet/capmode.c
+++ b/drivers/net/arcnet/capmode.c
@@ -101,7 +101,7 @@ static int build_header(struct sk_buff *skb,
 			uint8_t daddr)
 {
 	int hdr_size = ARC_HDR_SIZE;
-	struct archdr *pkt = (struct archdr *)skb_push(skb, hdr_size);
+	struct archdr *pkt = skb_push(skb, hdr_size);
 
 	arc_printk(D_PROTO, dev, "Preparing header for cap packet %x.\n",
 		   *((int *)&pkt->soft.cap.cookie[0]));
diff --git a/drivers/net/arcnet/rfc1051.c b/drivers/net/arcnet/rfc1051.c
index 4b1a75469cb1..a7752a5b647f 100644
--- a/drivers/net/arcnet/rfc1051.c
+++ b/drivers/net/arcnet/rfc1051.c
@@ -162,7 +162,7 @@ static int build_header(struct sk_buff *skb, struct net_device *dev,
 			unsigned short type, uint8_t daddr)
 {
 	int hdr_size = ARC_HDR_SIZE + RFC1051_HDR_SIZE;
-	struct archdr *pkt = (struct archdr *)skb_push(skb, hdr_size);
+	struct archdr *pkt = skb_push(skb, hdr_size);
 	struct arc_rfc1051 *soft = &pkt->soft.rfc1051;
 
 	/* set the protocol ID according to RFC1051 */
diff --git a/drivers/net/arcnet/rfc1201.c b/drivers/net/arcnet/rfc1201.c
index 566da5ecdc9d..a4c856282674 100644
--- a/drivers/net/arcnet/rfc1201.c
+++ b/drivers/net/arcnet/rfc1201.c
@@ -379,7 +379,7 @@ static int build_header(struct sk_buff *skb, struct net_device *dev,
 {
 	struct arcnet_local *lp = netdev_priv(dev);
 	int hdr_size = ARC_HDR_SIZE + RFC1201_HDR_SIZE;
-	struct archdr *pkt = (struct archdr *)skb_push(skb, hdr_size);
+	struct archdr *pkt = skb_push(skb, hdr_size);
 	struct arc_rfc1201 *soft = &pkt->soft.rfc1201;
 
 	/* set the protocol ID according to RFC1201 */
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index 5274501428e4..5333601f855f 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -1099,7 +1099,7 @@ static struct sk_buff *bcm_sysport_insert_tsb(struct sk_buff *skb,
 		skb = nskb;
 	}
 
-	tsb = (struct bcm_tsb *)skb_push(skb, sizeof(*tsb));
+	tsb = skb_push(skb, sizeof(*tsb));
 	/* Zero-out TSB by default */
 	memset(tsb, 0, sizeof(*tsb));
 
diff --git a/drivers/net/ethernet/chelsio/cxgb/sge.c b/drivers/net/ethernet/chelsio/cxgb/sge.c
index d56142b98534..0f13a7f7c1d3 100644
--- a/drivers/net/ethernet/chelsio/cxgb/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb/sge.c
@@ -1801,7 +1801,7 @@ netdev_tx_t t1_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		eth_type = skb_network_offset(skb) == ETH_HLEN ?
 			CPL_ETH_II : CPL_ETH_II_VLAN;
 
-		hdr = (struct cpl_tx_pkt_lso *)skb_push(skb, sizeof(*hdr));
+		hdr = skb_push(skb, sizeof(*hdr));
 		hdr->opcode = CPL_TX_PKT_LSO;
 		hdr->ip_csum_dis = hdr->l4_csum_dis = 0;
 		hdr->ip_hdr_words = ip_hdr(skb)->ihl;
@@ -1849,7 +1849,7 @@ netdev_tx_t t1_start_xmit(struct sk_buff *skb, struct net_device *dev)
 			}
 		}
 
-		cpl = (struct cpl_tx_pkt *)__skb_push(skb, sizeof(*cpl));
+		cpl = __skb_push(skb, sizeof(*cpl));
 		cpl->opcode = CPL_TX_PKT;
 		cpl->ip_csum_dis = 1;    /* SW calculates IP csum */
 		cpl->l4_csum_dis = skb->ip_summed == CHECKSUM_PARTIAL ? 0 : 1;
diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index 0ff166ec3e7e..a79e257bc338 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -2250,7 +2250,7 @@ static int gfar_enet_open(struct net_device *dev)
 
 static inline struct txfcb *gfar_add_fcb(struct sk_buff *skb)
 {
-	struct txfcb *fcb = (struct txfcb *)skb_push(skb, GMAC_FCB_LEN);
+	struct txfcb *fcb = skb_push(skb, GMAC_FCB_LEN);
 
 	memset(fcb, 0, GMAC_FCB_LEN);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
index c456ca07b562..898759fcf9ec 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
@@ -132,7 +132,7 @@ static struct sk_buff *mlx5e_test_get_udp_skb(struct mlx5e_priv *priv)
 	skb_reserve(skb, NET_IP_ALIGN);
 
 	/*  Reserve for ethernet and IP header  */
-	ethh = (struct ethhdr *)skb_push(skb, ETH_HLEN);
+	ethh = skb_push(skb, ETH_HLEN);
 	skb_reset_mac_header(skb);
 
 	skb_set_network_header(skb, skb->len);
diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c
index 2dcca249eb9c..46cb7f8955a2 100644
--- a/drivers/net/ethernet/sun/niu.c
+++ b/drivers/net/ethernet/sun/niu.c
@@ -6667,7 +6667,7 @@ static netdev_tx_t niu_start_xmit(struct sk_buff *skb,
 	headroom = align + sizeof(struct tx_pkt_hdr);
 
 	ehdr = (struct ethhdr *) skb->data;
-	tp = (struct tx_pkt_hdr *) skb_push(skb, headroom);
+	tp = skb_push(skb, headroom);
 
 	len = skb->len - sizeof(struct tx_pkt_hdr);
 	tp->flags = cpu_to_le64(niu_compute_tx_flags(skb, ehdr, align, len));
diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.c b/drivers/net/ethernet/toshiba/ps3_gelic_net.c
index fa6a06571187..88d74aef218a 100644
--- a/drivers/net/ethernet/toshiba/ps3_gelic_net.c
+++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.c
@@ -754,7 +754,7 @@ static struct sk_buff *gelic_put_vlan_tag(struct sk_buff *skb,
 			return NULL;
 		dev_kfree_skb_any(sk_tmp);
 	}
-	veth = (struct vlan_ethhdr *)skb_push(skb, VLAN_HLEN);
+	veth = skb_push(skb, VLAN_HLEN);
 
 	/* Move the mac addresses to the top of buffer */
 	memmove(skb->data, skb->data + VLAN_HLEN, 2 * ETH_ALEN);
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 7bcf1b52020e..d586ad93aaff 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -687,8 +687,7 @@ static int geneve_build_skb(struct dst_entry *dst, struct sk_buff *skb,
 	if (err)
 		goto free_dst;
 
-	gnvh = (struct genevehdr *)__skb_push(skb, sizeof(*gnvh) +
-						   info->options_len);
+	gnvh = __skb_push(skb, sizeof(*gnvh) + info->options_len);
 	geneve_build_header(gnvh, info);
 	skb_set_inner_protocol(skb, htons(ETH_P_TEB));
 	return 0;
diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
index ca110cd2a4e4..8e333a8a2295 100644
--- a/drivers/net/gtp.c
+++ b/drivers/net/gtp.c
@@ -398,7 +398,7 @@ static inline void gtp0_push_header(struct sk_buff *skb, struct pdp_ctx *pctx)
 	int payload_len = skb->len;
 	struct gtp0_header *gtp0;
 
-	gtp0 = (struct gtp0_header *) skb_push(skb, sizeof(*gtp0));
+	gtp0 = skb_push(skb, sizeof(*gtp0));
 
 	gtp0->flags	= 0x1e; /* v0, GTP-non-prime. */
 	gtp0->type	= GTP_TPDU;
@@ -415,7 +415,7 @@ static inline void gtp1_push_header(struct sk_buff *skb, struct pdp_ctx *pctx)
 	int payload_len = skb->len;
 	struct gtp1_header *gtp1;
 
-	gtp1 = (struct gtp1_header *) skb_push(skb, sizeof(*gtp1));
+	gtp1 = skb_push(skb, sizeof(*gtp1));
 
 	/* Bits    8  7  6  5  4  3  2	1
 	 *	  +--+--+--+--+--+--+--+--+
diff --git a/drivers/net/hippi/rrunner.c b/drivers/net/hippi/rrunner.c
index 7683fd544344..71ddadbf2368 100644
--- a/drivers/net/hippi/rrunner.c
+++ b/drivers/net/hippi/rrunner.c
@@ -1422,7 +1422,7 @@ static netdev_tx_t rr_start_xmit(struct sk_buff *skb,
 		skb = new_skb;
 	}
 
-	ifield = (u32 *)skb_push(skb, 8);
+	ifield = skb_push(skb, 8);
 
 	ifield[0] = 0;
 	ifield[1] = hcb->ifield;
diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index 2067dcc71535..e370d7c894cb 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -697,7 +697,7 @@ static struct sk_buff *macsec_encrypt(struct sk_buff *skb,
 	unprotected_len = skb->len;
 	eth = eth_hdr(skb);
 	sci_present = send_sci(secy);
-	hh = (struct macsec_eth_header *)skb_push(skb, macsec_extra_len(sci_present));
+	hh = skb_push(skb, macsec_extra_len(sci_present));
 	memmove(hh, eth, 2 * ETH_ALEN);
 
 	pn = tx_sa_update_pn(tx_sa, secy);
diff --git a/drivers/net/ppp/ppp_async.c b/drivers/net/ppp/ppp_async.c
index 32c72db654e2..814fd8fae67d 100644
--- a/drivers/net/ppp/ppp_async.c
+++ b/drivers/net/ppp/ppp_async.c
@@ -802,7 +802,7 @@ process_input_packet(struct asyncppp *ap)
 	proto = p[0];
 	if (proto & 1) {
 		/* protocol is compressed */
-		skb_push(skb, 1)[0] = 0;
+		*(u8 *)skb_push(skb, 1) = 0;
 	} else {
 		if (skb->len < 2)
 			goto err;
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index bbded33120fe..d42091f11eb8 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -1490,7 +1490,7 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb)
 		/* check if we should pass this packet */
 		/* the filter instructions are constructed assuming
 		   a four-byte PPP header on each packet */
-		*skb_push(skb, 2) = 1;
+		*(u8 *)skb_push(skb, 2) = 1;
 		if (ppp->pass_filter &&
 		    BPF_PROG_RUN(ppp->pass_filter, skb) == 0) {
 			if (ppp->debug & 1)
@@ -2133,7 +2133,7 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb)
 			if (skb_unclone(skb, GFP_ATOMIC))
 				goto err;
 
-			*skb_push(skb, 2) = 0;
+			*(u8 *)skb_push(skb, 2) = 0;
 			if (ppp->pass_filter &&
 			    BPF_PROG_RUN(ppp->pass_filter, skb) == 0) {
 				if (ppp->debug & 1)
@@ -2267,7 +2267,7 @@ ppp_receive_mp_frame(struct ppp *ppp, struct sk_buff *skb, struct channel *pch)
 	 * Do protocol ID decompression on the first fragment of each packet.
 	 */
 	if ((PPP_MP_CB(skb)->BEbits & B) && (skb->data[0] & 1))
-		*skb_push(skb, 1) = 0;
+		*(u8 *)skb_push(skb, 1) = 0;
 
 	/*
 	 * Expand sequence number to 32 bits, making it as close
diff --git a/drivers/net/ppp/ppp_synctty.c b/drivers/net/ppp/ppp_synctty.c
index ce2300c0bcbf..ef08590db873 100644
--- a/drivers/net/ppp/ppp_synctty.c
+++ b/drivers/net/ppp/ppp_synctty.c
@@ -711,7 +711,7 @@ ppp_sync_input(struct syncppp *ap, const unsigned char *buf,
 	/* decompress protocol field if compressed */
 	if (p[0] & 1) {
 		/* protocol is compressed */
-		skb_push(skb, 1)[0] = 0;
+		*(u8 *)skb_push(skb, 1) = 0;
 	} else if (skb->len < 2)
 		goto err;
 
diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c
index 1951b1085cb8..2f22e318a67f 100644
--- a/drivers/net/ppp/pptp.c
+++ b/drivers/net/ppp/pptp.c
@@ -328,7 +328,7 @@ allow_packet:
 
 		if ((*skb->data) & 1) {
 			/* protocol is compressed */
-			skb_push(skb, 1)[0] = 0;
+			*(u8 *)skb_push(skb, 1) = 0;
 		}
 
 		skb->ip_summed = CHECKSUM_NONE;
diff --git a/drivers/net/usb/gl620a.c b/drivers/net/usb/gl620a.c
index 29276e54bb8b..ba1ce1006c4f 100644
--- a/drivers/net/usb/gl620a.c
+++ b/drivers/net/usb/gl620a.c
@@ -174,7 +174,7 @@ genelink_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags)
 	}
 
 	// attach the packet count to the header
-	packet_count = (__le32 *) skb_push(skb, (4 + 4*1));
+	packet_count = skb_push(skb, (4 + 4 * 1));
 	packet_len = packet_count + 1;
 
 	*packet_count = cpu_to_le32(1);
diff --git a/drivers/net/usb/int51x1.c b/drivers/net/usb/int51x1.c
index 5a43b77a6b9c..be63a829b8fe 100644
--- a/drivers/net/usb/int51x1.c
+++ b/drivers/net/usb/int51x1.c
@@ -106,7 +106,7 @@ static struct sk_buff *int51x1_tx_fixup(struct usbnet *dev,
 	pack_len += need_tail;
 	pack_len &= 0x07ff;
 
-	len = (__le16 *) __skb_push(skb, INT51X1_HEADER_SIZE);
+	len = __skb_push(skb, INT51X1_HEADER_SIZE);
 	*len = cpu_to_le16(pack_len);
 
 	if(need_tail)
diff --git a/drivers/net/usb/kaweth.c b/drivers/net/usb/kaweth.c
index 37fb621fde86..92e4fd29ae44 100644
--- a/drivers/net/usb/kaweth.c
+++ b/drivers/net/usb/kaweth.c
@@ -809,7 +809,7 @@ static netdev_tx_t kaweth_start_xmit(struct sk_buff *skb,
 		return NETDEV_TX_OK;
 	}
 
-	private_header = (__le16 *)__skb_push(skb, 2);
+	private_header = __skb_push(skb, 2);
 	*private_header = cpu_to_le16(skb->len-2);
 	kaweth->tx_skb = skb;
 
diff --git a/drivers/net/usb/lg-vl600.c b/drivers/net/usb/lg-vl600.c
index d633492bf9eb..dbabd7ca5268 100644
--- a/drivers/net/usb/lg-vl600.c
+++ b/drivers/net/usb/lg-vl600.c
@@ -304,7 +304,7 @@ encapsulate:
 	memset(&packet->dummy, 0, sizeof(packet->dummy));
 	packet->len = cpu_to_le32(orig_len);
 
-	frame = (struct vl600_frame_hdr *) skb_push(skb, sizeof(*frame));
+	frame = skb_push(skb, sizeof(*frame));
 	memset(frame, 0, sizeof(*frame));
 	frame->len = cpu_to_le32(full_len);
 	frame->serial = cpu_to_le32(serial++);
diff --git a/drivers/net/usb/net1080.c b/drivers/net/usb/net1080.c
index 861ff45f0b09..be53ff30b7b5 100644
--- a/drivers/net/usb/net1080.c
+++ b/drivers/net/usb/net1080.c
@@ -466,7 +466,7 @@ net1080_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags)
 
 encapsulate:
 	/* header first */
-	header = (struct nc_header *) skb_push(skb, sizeof *header);
+	header = skb_push(skb, sizeof *header);
 	header->hdr_len = cpu_to_le16(sizeof (*header));
 	header->packet_len = cpu_to_le16(len);
 	header->packet_id = cpu_to_le16((u16)dev->xid++);
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index ffd229ec8352..5894e3c9468f 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -101,7 +101,7 @@ static netdev_tx_t qmimux_start_xmit(struct sk_buff *skb, struct net_device *dev
 	unsigned int len = skb->len;
 	struct qmimux_hdr *hdr;
 
-	hdr = (struct qmimux_hdr *)skb_push(skb, sizeof(struct qmimux_hdr));
+	hdr = skb_push(skb, sizeof(struct qmimux_hdr));
 	hdr->pad = 0;
 	hdr->mux_id = priv->mux_id;
 	hdr->pkt_len = cpu_to_be16(len);
diff --git a/drivers/net/usb/rndis_host.c b/drivers/net/usb/rndis_host.c
index e96e2e5673d7..a151f267aebb 100644
--- a/drivers/net/usb/rndis_host.c
+++ b/drivers/net/usb/rndis_host.c
@@ -578,7 +578,7 @@ rndis_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags)
 	 * packets; Linux minimizes wasted bandwidth through tx queues.
 	 */
 fill:
-	hdr = (void *) __skb_push(skb, sizeof *hdr);
+	hdr = __skb_push(skb, sizeof *hdr);
 	memset(hdr, 0, sizeof *hdr);
 	hdr->msg_type = cpu_to_le32(RNDIS_MSG_PACKET);
 	hdr->msg_len = cpu_to_le32(skb->len);
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 022c0b5f9844..c6c0595d267b 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -383,7 +383,7 @@ static int vrf_finish_direct(struct net *net, struct sock *sk,
 
 	if (!list_empty(&vrf_dev->ptype_all) &&
 	    likely(skb_headroom(skb) >= ETH_HLEN)) {
-		struct ethhdr *eth = (struct ethhdr *)skb_push(skb, ETH_HLEN);
+		struct ethhdr *eth = skb_push(skb, ETH_HLEN);
 
 		ether_addr_copy(eth->h_source, vrf_dev->dev_addr);
 		eth_zero_addr(eth->h_dest);
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 4e1d427d340c..94ce98229828 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1827,7 +1827,7 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,
 	if (err)
 		return err;
 
-	vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
+	vxh = __skb_push(skb, sizeof(*vxh));
 	vxh->vx_flags = VXLAN_HF_VNI;
 	vxh->vx_vni = vxlan_vni_field(vni);
 
diff --git a/drivers/net/wimax/i2400m/netdev.c b/drivers/net/wimax/i2400m/netdev.c
index dd7f3168c07d..a654687b5fa2 100644
--- a/drivers/net/wimax/i2400m/netdev.c
+++ b/drivers/net/wimax/i2400m/netdev.c
@@ -221,7 +221,7 @@ void i2400m_tx_prep_header(struct sk_buff *skb)
 {
 	struct i2400m_pl_data_hdr *pl_hdr;
 	skb_pull(skb, ETH_HLEN);
-	pl_hdr = (struct i2400m_pl_data_hdr *) skb_push(skb, sizeof(*pl_hdr));
+	pl_hdr = skb_push(skb, sizeof(*pl_hdr));
 	pl_hdr->reserved = 0;
 }
 
diff --git a/drivers/net/wireless/admtek/adm8211.c b/drivers/net/wireless/admtek/adm8211.c
index 5f64f3928c35..3b0802fc5bf5 100644
--- a/drivers/net/wireless/admtek/adm8211.c
+++ b/drivers/net/wireless/admtek/adm8211.c
@@ -1700,7 +1700,7 @@ static void adm8211_tx(struct ieee80211_hw *dev,
 	skb_pull(skb, hdrlen);
 	payload_len = skb->len;
 
-	txhdr = (struct adm8211_tx_hdr *) skb_push(skb, sizeof(*txhdr));
+	txhdr = skb_push(skb, sizeof(*txhdr));
 	memset(txhdr, 0, sizeof(*txhdr));
 	memcpy(txhdr->da, ieee80211_get_DA(hdr), ETH_ALEN);
 	txhdr->signal = plcp_signal;
diff --git a/drivers/net/wireless/ath/ar5523/ar5523.c b/drivers/net/wireless/ath/ar5523/ar5523.c
index f2f4ccfdf8da..106d6f8d471a 100644
--- a/drivers/net/wireless/ath/ar5523/ar5523.c
+++ b/drivers/net/wireless/ath/ar5523/ar5523.c
@@ -829,8 +829,8 @@ static void ar5523_tx_work_locked(struct ar5523 *ar)
 		data->ar = ar;
 		data->urb = urb;
 
-		desc = (struct ar5523_tx_desc *)skb_push(skb, sizeof(*desc));
-		chunk = (struct ar5523_chunk *)skb_push(skb, sizeof(*chunk));
+		desc = skb_push(skb, sizeof(*desc));
+		chunk = skb_push(skb, sizeof(*chunk));
 
 		chunk->seqnum = 0;
 		chunk->flags = UATH_CFLAGS_FINAL;
diff --git a/drivers/net/wireless/ath/ath6kl/htc_pipe.c b/drivers/net/wireless/ath/ath6kl/htc_pipe.c
index b13d61111072..5c0ba83a44aa 100644
--- a/drivers/net/wireless/ath/ath6kl/htc_pipe.c
+++ b/drivers/net/wireless/ath/ath6kl/htc_pipe.c
@@ -228,8 +228,7 @@ static int htc_issue_packets(struct htc_target *target,
 		payload_len = packet->act_len;
 
 		/* setup HTC frame header */
-		htc_hdr = (struct htc_frame_hdr *) skb_push(skb,
-							    sizeof(*htc_hdr));
+		htc_hdr = skb_push(skb, sizeof(*htc_hdr));
 		if (!htc_hdr) {
 			WARN_ON_ONCE(1);
 			status = -EINVAL;
diff --git a/drivers/net/wireless/ath/ath9k/hif_usb.c b/drivers/net/wireless/ath/ath9k/hif_usb.c
index 12aa8abbcba4..0d9687a2aa98 100644
--- a/drivers/net/wireless/ath/ath9k/hif_usb.c
+++ b/drivers/net/wireless/ath/ath9k/hif_usb.c
@@ -199,7 +199,7 @@ static int hif_usb_send_mgmt(struct hif_device_usb *hif_dev,
 	cmd->skb = skb;
 	cmd->hif_dev = hif_dev;
 
-	hdr = (__le16 *) skb_push(skb, 4);
+	hdr = skb_push(skb, 4);
 	*hdr++ = cpu_to_le16(skb->len - 4);
 	*hdr++ = cpu_to_le16(ATH_USB_TX_STREAM_MODE_TAG);
 
diff --git a/drivers/net/wireless/ath/ath9k/htc_hst.c b/drivers/net/wireless/ath/ath9k/htc_hst.c
index 9fa8970a1f7d..1bf63a4efb4c 100644
--- a/drivers/net/wireless/ath/ath9k/htc_hst.c
+++ b/drivers/net/wireless/ath/ath9k/htc_hst.c
@@ -26,8 +26,7 @@ static int htc_issue_send(struct htc_target *target, struct sk_buff* skb,
 	struct htc_endpoint *endpoint = &target->endpoint[epid];
 	int status;
 
-	hdr = (struct htc_frame_hdr *)
-		skb_push(skb, sizeof(struct htc_frame_hdr));
+	hdr = skb_push(skb, sizeof(struct htc_frame_hdr));
 	hdr->endpoint_id = epid;
 	hdr->flags = flags;
 	hdr->payload_len = cpu_to_be16(len);
diff --git a/drivers/net/wireless/ath/ath9k/wmi.c b/drivers/net/wireless/ath/ath9k/wmi.c
index c51c69b1ad96..85d09fdef8dc 100644
--- a/drivers/net/wireless/ath/ath9k/wmi.c
+++ b/drivers/net/wireless/ath/ath9k/wmi.c
@@ -277,7 +277,7 @@ static int ath9k_wmi_cmd_issue(struct wmi *wmi,
 	struct wmi_cmd_hdr *hdr;
 	unsigned long flags;
 
-	hdr = (struct wmi_cmd_hdr *) skb_push(skb, sizeof(struct wmi_cmd_hdr));
+	hdr = skb_push(skb, sizeof(struct wmi_cmd_hdr));
 	hdr->command_id = cpu_to_be16(cmd);
 	hdr->seq_no = cpu_to_be16(++wmi->tx_seq_id);
 
diff --git a/drivers/net/wireless/ath/carl9170/tx.c b/drivers/net/wireless/ath/carl9170/tx.c
index 2bf04c9edc98..0cb5b58925dc 100644
--- a/drivers/net/wireless/ath/carl9170/tx.c
+++ b/drivers/net/wireless/ath/carl9170/tx.c
@@ -991,7 +991,7 @@ static int carl9170_tx_prepare(struct ar9170 *ar,
 	else
 		cvif = NULL;
 
-	txc = (void *)skb_push(skb, sizeof(*txc));
+	txc = skb_push(skb, sizeof(*txc));
 	memset(txc, 0, sizeof(*txc));
 
 	SET_VAL(CARL9170_TX_SUPER_MISC_QUEUE, txc->s.misc, hw_queue);
diff --git a/drivers/net/wireless/ath/wil6210/txrx.c b/drivers/net/wireless/ath/wil6210/txrx.c
index edab4c0a900f..84d91606e6f3 100644
--- a/drivers/net/wireless/ath/wil6210/txrx.c
+++ b/drivers/net/wireless/ath/wil6210/txrx.c
@@ -363,7 +363,7 @@ static void wil_rx_add_radiotap_header(struct wil6210_priv *wil,
 		return;
 	}
 
-	rtap_vendor = (void *)skb_push(skb, rtap_len);
+	rtap_vendor = skb_push(skb, rtap_len);
 	memset(rtap_vendor, 0, rtap_len);
 
 	rtap_vendor->rtap.rthdr.it_version = PKTHDR_RADIOTAP_VERSION;
diff --git a/drivers/net/wireless/intersil/hostap/hostap_80211_rx.c b/drivers/net/wireless/intersil/hostap/hostap_80211_rx.c
index 34dbddbf3f9b..6d8b64ca1a63 100644
--- a/drivers/net/wireless/intersil/hostap/hostap_80211_rx.c
+++ b/drivers/net/wireless/intersil/hostap/hostap_80211_rx.c
@@ -131,8 +131,7 @@ int prism2_rx_80211(struct net_device *dev, struct sk_buff *skb,
 
 	if (prism_header == 1) {
 		struct linux_wlan_ng_prism_hdr *hdr;
-		hdr = (struct linux_wlan_ng_prism_hdr *)
-			skb_push(skb, phdrlen);
+		hdr = skb_push(skb, phdrlen);
 		memset(hdr, 0, phdrlen);
 		hdr->msgcode = LWNG_CAP_DID_BASE;
 		hdr->msglen = sizeof(*hdr);
@@ -153,8 +152,7 @@ hdr->f.status = s; hdr->f.len = l; hdr->f.data = d
 #undef LWNG_SETVAL
 	} else if (prism_header == 2) {
 		struct linux_wlan_ng_cap_hdr *hdr;
-		hdr = (struct linux_wlan_ng_cap_hdr *)
-			skb_push(skb, phdrlen);
+		hdr = skb_push(skb, phdrlen);
 		memset(hdr, 0, phdrlen);
 		hdr->version    = htonl(LWNG_CAPHDR_VERSION);
 		hdr->length     = htonl(phdrlen);
@@ -172,7 +170,7 @@ hdr->f.status = s; hdr->f.len = l; hdr->f.data = d
 		hdr->encoding   = htonl(1); /* cck */
 	} else if (prism_header == 3) {
 		struct hostap_radiotap_rx *hdr;
-		hdr = (struct hostap_radiotap_rx *)skb_push(skb, phdrlen);
+		hdr = skb_push(skb, phdrlen);
 		memset(hdr, 0, phdrlen);
 		hdr->hdr.it_len = cpu_to_le16(phdrlen);
 		hdr->hdr.it_present =
diff --git a/drivers/net/wireless/intersil/orinoco/main.c b/drivers/net/wireless/intersil/orinoco/main.c
index f7abc439fb92..28dac36d7c4c 100644
--- a/drivers/net/wireless/intersil/orinoco/main.c
+++ b/drivers/net/wireless/intersil/orinoco/main.c
@@ -396,7 +396,7 @@ int orinoco_process_xmit_skb(struct sk_buff *skb,
 		memcpy(hdr.encap, encaps_hdr, sizeof(encaps_hdr));
 
 		/* Make room for the new header, and copy it in */
-		eh = (struct ethhdr *) skb_push(skb, ENCAPS_OVERHEAD);
+		eh = skb_push(skb, ENCAPS_OVERHEAD);
 		memcpy(eh, &hdr, sizeof(hdr));
 	}
 
@@ -1029,11 +1029,10 @@ static void orinoco_rx(struct net_device *dev,
 		/* These indicate a SNAP within 802.2 LLC within
 		   802.11 frame which we'll need to de-encapsulate to
 		   the original EthernetII frame. */
-		hdr = (struct ethhdr *)skb_push(skb,
-						ETH_HLEN - ENCAPS_OVERHEAD);
+		hdr = skb_push(skb, ETH_HLEN - ENCAPS_OVERHEAD);
 	} else {
 		/* 802.3 frame - prepend 802.3 header as is */
-		hdr = (struct ethhdr *)skb_push(skb, ETH_HLEN);
+		hdr = skb_push(skb, ETH_HLEN);
 		hdr->h_proto = htons(length);
 	}
 	memcpy(hdr->h_dest, desc->addr1, ETH_ALEN);
diff --git a/drivers/net/wireless/intersil/p54/txrx.c b/drivers/net/wireless/intersil/p54/txrx.c
index b00c07d72f95..3a4214d362ff 100644
--- a/drivers/net/wireless/intersil/p54/txrx.c
+++ b/drivers/net/wireless/intersil/p54/txrx.c
@@ -815,8 +815,8 @@ void p54_tx_80211(struct ieee80211_hw *dev,
 		}
 	}
 
-	txhdr = (struct p54_tx_data *) skb_push(skb, sizeof(*txhdr) + padding);
-	hdr = (struct p54_hdr *) skb_push(skb, sizeof(*hdr));
+	txhdr = skb_push(skb, sizeof(*txhdr) + padding);
+	hdr = skb_push(skb, sizeof(*hdr));
 
 	if (padding)
 		hdr_flags |= P54_HDR_FLAG_DATA_ALIGN;
diff --git a/drivers/net/wireless/intersil/prism54/islpci_eth.c b/drivers/net/wireless/intersil/prism54/islpci_eth.c
index d83f6332019e..9b0ded733294 100644
--- a/drivers/net/wireless/intersil/prism54/islpci_eth.c
+++ b/drivers/net/wireless/intersil/prism54/islpci_eth.c
@@ -276,10 +276,7 @@ islpci_monitor_rx(islpci_private *priv, struct sk_buff **skb)
 		}
 
 		/* make room for the new header and fill it. */
-		avs =
-		    (struct avs_80211_1_header *) skb_push(*skb,
-							   sizeof (struct
-								   avs_80211_1_header));
+		avs = skb_push(*skb, sizeof(struct avs_80211_1_header));
 
 		avs->version = cpu_to_be32(P80211CAPTURE_VERSION);
 		avs->length = cpu_to_be32(sizeof (struct avs_80211_1_header));
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 7418088e296f..c8852acc1462 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -848,7 +848,7 @@ static void mac80211_hwsim_monitor_rx(struct ieee80211_hw *hw,
 	if (skb == NULL)
 		return;
 
-	hdr = (struct hwsim_radiotap_hdr *) skb_push(skb, sizeof(*hdr));
+	hdr = skb_push(skb, sizeof(*hdr));
 	hdr->hdr.it_version = PKTHDR_RADIOTAP_VERSION;
 	hdr->hdr.it_pad = 0;
 	hdr->hdr.it_len = cpu_to_le16(sizeof(*hdr));
@@ -1146,7 +1146,7 @@ static void mac80211_hwsim_add_vendor_rtap(struct sk_buff *skb)
 	 * Note that this code requires the headroom in the SKB
 	 * that was allocated earlier.
 	 */
-	rtap = (void *)skb_push(skb, sizeof(*rtap) + 8 + 4);
+	rtap = skb_push(skb, sizeof(*rtap) + 8 + 4);
 	rtap->oui[0] = HWSIM_RADIOTAP_OUI[0];
 	rtap->oui[1] = HWSIM_RADIOTAP_OUI[1];
 	rtap->oui[2] = HWSIM_RADIOTAP_OUI[2];
diff --git a/drivers/net/wireless/marvell/libertas/rx.c b/drivers/net/wireless/marvell/libertas/rx.c
index a18bb7a9889c..7586ff681b23 100644
--- a/drivers/net/wireless/marvell/libertas/rx.c
+++ b/drivers/net/wireless/marvell/libertas/rx.c
@@ -257,7 +257,7 @@ static int process_rxed_802_11_packet(struct lbs_private *priv,
 		goto done;
 	}
 
-	pradiotap_hdr = (void *)skb_push(skb, sizeof(struct rx_radiotap_hdr));
+	pradiotap_hdr = skb_push(skb, sizeof(struct rx_radiotap_hdr));
 	memcpy(pradiotap_hdr, &radiotap_hdr, sizeof(struct rx_radiotap_hdr));
 
 	priv->cur_rate = lbs_fw_index_to_data_rate(prxpd->rx_rate);
diff --git a/drivers/net/wireless/marvell/libertas_tf/main.c b/drivers/net/wireless/marvell/libertas_tf/main.c
index d80333117989..81228bf73043 100644
--- a/drivers/net/wireless/marvell/libertas_tf/main.c
+++ b/drivers/net/wireless/marvell/libertas_tf/main.c
@@ -260,7 +260,7 @@ static void lbtf_tx_work(struct work_struct *work)
 
 	len = skb->len;
 	info  = IEEE80211_SKB_CB(skb);
-	txpd = (struct txpd *)  skb_push(skb, sizeof(struct txpd));
+	txpd = skb_push(skb, sizeof(struct txpd));
 
 	if (priv->surpriseremoved) {
 		dev_kfree_skb_any(skb);
diff --git a/drivers/net/wireless/mediatek/mt7601u/tx.c b/drivers/net/wireless/mediatek/mt7601u/tx.c
index ad77bec1ba0f..3600e911a63e 100644
--- a/drivers/net/wireless/mediatek/mt7601u/tx.c
+++ b/drivers/net/wireless/mediatek/mt7601u/tx.c
@@ -148,7 +148,7 @@ mt7601u_push_txwi(struct mt7601u_dev *dev, struct sk_buff *skb,
 	u16 rate_ctl;
 	u8 nss;
 
-	txwi = (struct mt76_txwi *)skb_push(skb, sizeof(struct mt76_txwi));
+	txwi = skb_push(skb, sizeof(struct mt76_txwi));
 	memset(txwi, 0, sizeof(*txwi));
 
 	if (!wcid->tx_rate_set)
diff --git a/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c b/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c
index 35fe991dcc56..55198ac2b755 100644
--- a/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c
+++ b/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c
@@ -278,8 +278,7 @@ static void rtl8187_tx(struct ieee80211_hw *dev,
 	}
 
 	if (!priv->is_rtl8187b) {
-		struct rtl8187_tx_hdr *hdr =
-			(struct rtl8187_tx_hdr *)skb_push(skb, sizeof(*hdr));
+		struct rtl8187_tx_hdr *hdr = skb_push(skb, sizeof(*hdr));
 		hdr->flags = cpu_to_le32(flags);
 		hdr->len = 0;
 		hdr->rts_duration = rts_dur;
@@ -292,8 +291,7 @@ static void rtl8187_tx(struct ieee80211_hw *dev,
 		unsigned int epmap[4] = { 6, 7, 5, 4 };
 		u16 fc = le16_to_cpu(tx_hdr->frame_control);
 
-		struct rtl8187b_tx_hdr *hdr =
-			(struct rtl8187b_tx_hdr *)skb_push(skb, sizeof(*hdr));
+		struct rtl8187b_tx_hdr *hdr = skb_push(skb, sizeof(*hdr));
 		struct ieee80211_rate *txrate =
 			ieee80211_get_tx_rate(dev, info);
 		memset(hdr, 0, sizeof(*hdr));
diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c
index 39d56313bc94..21e5ef021260 100644
--- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c
+++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c
@@ -4952,7 +4952,7 @@ static void rtl8xxxu_tx(struct ieee80211_hw *hw,
 	if (control && control->sta)
 		sta = control->sta;
 
-	tx_desc = (struct rtl8xxxu_txdesc32 *)skb_push(skb, tx_desc_size);
+	tx_desc = skb_push(skb, tx_desc_size);
 
 	memset(tx_desc, 0, tx_desc_size);
 	tx_desc->pkt_size = cpu_to_le16(pktlen);
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/trx.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/trx.c
index 41422e4da8b7..de6c3428f7c6 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/trx.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/trx.c
@@ -512,7 +512,7 @@ void rtl92cu_tx_fill_desc(struct ieee80211_hw *hw,
 
 	seq_number = (le16_to_cpu(hdr->seq_ctrl) & IEEE80211_SCTL_SEQ) >> 4;
 	rtl_get_tcb_desc(hw, info, sta, skb, tcb_desc);
-	txdesc = (u8 *)skb_push(skb, RTL_TX_HEADER_SIZE);
+	txdesc = skb_push(skb, RTL_TX_HEADER_SIZE);
 	memset(txdesc, 0, RTL_TX_HEADER_SIZE);
 	SET_TX_DESC_PKT_SIZE(txdesc, pktlen);
 	SET_TX_DESC_LINIP(txdesc, 0);
diff --git a/drivers/net/wireless/st/cw1200/txrx.c b/drivers/net/wireless/st/cw1200/txrx.c
index cd63ffef025a..e9050b41157a 100644
--- a/drivers/net/wireless/st/cw1200/txrx.c
+++ b/drivers/net/wireless/st/cw1200/txrx.c
@@ -574,7 +574,7 @@ cw1200_tx_h_wsm(struct cw1200_common *priv,
 		return NULL;
 	}
 
-	wsm = (struct wsm_tx *)skb_push(t->skb, sizeof(struct wsm_tx));
+	wsm = skb_push(t->skb, sizeof(struct wsm_tx));
 	t->txpriv.offset += sizeof(struct wsm_tx);
 	memset(wsm, 0, sizeof(*wsm));
 	wsm->hdr.len = __cpu_to_le16(t->skb->len);
diff --git a/drivers/net/wireless/ti/wl1251/tx.c b/drivers/net/wireless/ti/wl1251/tx.c
index 81de83c6fcf6..de2fa6705574 100644
--- a/drivers/net/wireless/ti/wl1251/tx.c
+++ b/drivers/net/wireless/ti/wl1251/tx.c
@@ -161,8 +161,7 @@ static int wl1251_tx_fill_hdr(struct wl1251 *wl, struct sk_buff *skb,
 		return id;
 
 	fc = *(u16 *)skb->data;
-	tx_hdr = (struct tx_double_buffer_desc *) skb_push(skb,
-							   sizeof(*tx_hdr));
+	tx_hdr = skb_push(skb, sizeof(*tx_hdr));
 
 	tx_hdr->length = cpu_to_le16(skb->len - sizeof(*tx_hdr));
 	rate = ieee80211_get_tx_rate(wl->hw, control);
diff --git a/drivers/net/wireless/ti/wlcore/cmd.c b/drivers/net/wireless/ti/wlcore/cmd.c
index 229f4d01f239..2bfc12fdc929 100644
--- a/drivers/net/wireless/ti/wlcore/cmd.c
+++ b/drivers/net/wireless/ti/wlcore/cmd.c
@@ -1282,7 +1282,7 @@ int wl1271_cmd_build_arp_rsp(struct wl1271 *wl, struct wl12xx_vif *wlvif)
 		memset(skb_push(skb, sizeof(__le16)), 0, sizeof(__le16));
 
 	/* mac80211 header */
-	hdr = (struct ieee80211_hdr_3addr *)skb_push(skb, sizeof(*hdr));
+	hdr = skb_push(skb, sizeof(*hdr));
 	memset(hdr, 0, sizeof(*hdr));
 	fc = IEEE80211_FTYPE_DATA | IEEE80211_FCTL_TODS;
 	if (wlvif->sta.qos)
diff --git a/drivers/net/wireless/ti/wlcore/tx.c b/drivers/net/wireless/ti/wlcore/tx.c
index c1b8e4e9d70b..a3f5e9ca492a 100644
--- a/drivers/net/wireless/ti/wlcore/tx.c
+++ b/drivers/net/wireless/ti/wlcore/tx.c
@@ -223,8 +223,7 @@ static int wl1271_tx_allocate(struct wl1271 *wl, struct wl12xx_vif *wlvif,
 	total_blocks = wlcore_hw_calc_tx_blocks(wl, total_len, spare_blocks);
 
 	if (total_blocks <= wl->tx_blocks_available) {
-		desc = (struct wl1271_tx_hw_descr *)skb_push(
-			skb, total_len - skb->len);
+		desc = skb_push(skb, total_len - skb->len);
 
 		wlcore_hw_set_tx_desc_blocks(wl, desc, total_blocks,
 					     spare_blocks);
diff --git a/drivers/net/wireless/zydas/zd1211rw/zd_mac.c b/drivers/net/wireless/zydas/zd1211rw/zd_mac.c
index 2d929d2edb00..b785742bfd9e 100644
--- a/drivers/net/wireless/zydas/zd1211rw/zd_mac.c
+++ b/drivers/net/wireless/zydas/zd1211rw/zd_mac.c
@@ -868,8 +868,7 @@ static int fill_ctrlset(struct zd_mac *mac,
 	unsigned int frag_len = skb->len + FCS_LEN;
 	unsigned int packet_length;
 	struct ieee80211_rate *txrate;
-	struct zd_ctrlset *cs = (struct zd_ctrlset *)
-		skb_push(skb, sizeof(struct zd_ctrlset));
+	struct zd_ctrlset *cs = skb_push(skb, sizeof(struct zd_ctrlset));
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 
 	ZD_ASSERT(frag_len <= 0xffff);
diff --git a/drivers/nfc/fdp/i2c.c b/drivers/nfc/fdp/i2c.c
index 97f003e84381..d5781aa0f791 100644
--- a/drivers/nfc/fdp/i2c.c
+++ b/drivers/nfc/fdp/i2c.c
@@ -79,8 +79,8 @@ static void fdp_nci_i2c_add_len_lrc(struct sk_buff *skb)
 
 	/* Add length header */
 	len = skb->len;
-	*skb_push(skb, 1) = len & 0xff;
-	*skb_push(skb, 1) = len >> 8;
+	*(u8 *)skb_push(skb, 1) = len & 0xff;
+	*(u8 *)skb_push(skb, 1) = len >> 8;
 
 	/* Compute and add lrc */
 	for (i = 0; i < len + 2; i++)
diff --git a/drivers/nfc/microread/i2c.c b/drivers/nfc/microread/i2c.c
index 8e328c36a816..386cc61d95b9 100644
--- a/drivers/nfc/microread/i2c.c
+++ b/drivers/nfc/microread/i2c.c
@@ -70,7 +70,7 @@ static void microread_i2c_add_len_crc(struct sk_buff *skb)
 	int len;
 
 	len = skb->len;
-	*skb_push(skb, 1) = len;
+	*(u8 *)skb_push(skb, 1) = len;
 
 	for (i = 0; i < skb->len; i++)
 		crc = crc ^ skb->data[i];
diff --git a/drivers/nfc/microread/microread.c b/drivers/nfc/microread/microread.c
index 9d0dd1be0923..38a979eacc29 100644
--- a/drivers/nfc/microread/microread.c
+++ b/drivers/nfc/microread/microread.c
@@ -419,7 +419,7 @@ static int microread_im_transceive(struct nfc_hci_dev *hdev,
 	pr_info("data exchange to gate 0x%x\n", target->hci_reader_gate);
 
 	if (target->hci_reader_gate == MICROREAD_GATE_ID_P2P_INITIATOR) {
-		*skb_push(skb, 1) = 0;
+		*(u8 *)skb_push(skb, 1) = 0;
 
 		return nfc_hci_send_event(hdev, target->hci_reader_gate,
 				     MICROREAD_EVT_P2P_INITIATOR_EXCHANGE_TO_RF,
@@ -453,7 +453,7 @@ static int microread_im_transceive(struct nfc_hci_dev *hdev,
 		return 1;
 	}
 
-	*skb_push(skb, 1) = control_bits;
+	*(u8 *)skb_push(skb, 1) = control_bits;
 
 	info->async_cb_type = MICROREAD_CB_TYPE_READER_ALL;
 	info->async_cb = cb;
diff --git a/drivers/nfc/nfcmrvl/main.c b/drivers/nfc/nfcmrvl/main.c
index 51c8240a1672..c5038e6447bd 100644
--- a/drivers/nfc/nfcmrvl/main.c
+++ b/drivers/nfc/nfcmrvl/main.c
@@ -68,7 +68,7 @@ static int nfcmrvl_nci_send(struct nci_dev *ndev, struct sk_buff *skb)
 		unsigned char *hdr;
 		unsigned char len = skb->len;
 
-		hdr = (char *) skb_push(skb, NFCMRVL_HCI_EVENT_HEADER_SIZE);
+		hdr = skb_push(skb, NFCMRVL_HCI_EVENT_HEADER_SIZE);
 		hdr[0] = NFCMRVL_HCI_COMMAND_CODE;
 		hdr[1] = NFCMRVL_HCI_OGF;
 		hdr[2] = NFCMRVL_HCI_OCF;
diff --git a/drivers/nfc/pn533/pn533.c b/drivers/nfc/pn533/pn533.c
index 68a3cd0287f6..6a711b5b9490 100644
--- a/drivers/nfc/pn533/pn533.c
+++ b/drivers/nfc/pn533/pn533.c
@@ -2090,10 +2090,10 @@ static int pn533_fill_fragment_skbs(struct pn533 *dev, struct sk_buff *skb)
 
 			/* MI + TG */
 			if (frag_size  == PN533_CMD_DATAFRAME_MAXLEN)
-				*skb_push(frag, sizeof(u8)) =
-							(PN533_CMD_MI_MASK | 1);
+				*(u8 *)skb_push(frag, sizeof(u8)) =
+						(PN533_CMD_MI_MASK | 1);
 			else
-				*skb_push(frag, sizeof(u8)) =  1; /* TG */
+				*(u8 *)skb_push(frag, sizeof(u8)) =  1; /* TG */
 		}
 
 		skb_put_data(frag, skb->data, frag_size);
@@ -2160,7 +2160,7 @@ static int pn533_transceive(struct nfc_dev *nfc_dev,
 				goto error;
 			}
 		} else {
-			*skb_push(skb, sizeof(u8)) =  1; /* TG */
+			*(u8 *)skb_push(skb, sizeof(u8)) =  1; /* TG */
 		}
 
 		rc = pn533_send_data_async(dev, PN533_CMD_IN_DATA_EXCHANGE,
diff --git a/drivers/nfc/pn544/i2c.c b/drivers/nfc/pn544/i2c.c
index dc1e3768cee6..b7be6c25b7e6 100644
--- a/drivers/nfc/pn544/i2c.c
+++ b/drivers/nfc/pn544/i2c.c
@@ -283,7 +283,7 @@ static void pn544_hci_i2c_add_len_crc(struct sk_buff *skb)
 	int len;
 
 	len = skb->len + 2;
-	*skb_push(skb, 1) = len;
+	*(u8 *)skb_push(skb, 1) = len;
 
 	crc = crc_ccitt(0xffff, skb->data, skb->len);
 	crc = ~crc;
diff --git a/drivers/nfc/pn544/pn544.c b/drivers/nfc/pn544/pn544.c
index 12e819ddf17a..70e898e38b16 100644
--- a/drivers/nfc/pn544/pn544.c
+++ b/drivers/nfc/pn544/pn544.c
@@ -649,8 +649,8 @@ static int pn544_hci_im_transceive(struct nfc_hci_dev *hdev,
 		} else
 			return 1;
 	case PN544_RF_READER_F_GATE:
-		*skb_push(skb, 1) = 0;
-		*skb_push(skb, 1) = 0;
+		*(u8 *)skb_push(skb, 1) = 0;
+		*(u8 *)skb_push(skb, 1) = 0;
 
 		info->async_cb_type = PN544_CB_TYPE_READER_F;
 		info->async_cb = cb;
@@ -665,7 +665,7 @@ static int pn544_hci_im_transceive(struct nfc_hci_dev *hdev,
 					      PN544_JEWEL_RAW_CMD, skb->data,
 					      skb->len, cb, cb_context);
 	case PN544_RF_READER_NFCIP1_INITIATOR_GATE:
-		*skb_push(skb, 1) = 0;
+		*(u8 *)skb_push(skb, 1) = 0;
 
 		return nfc_hci_send_event(hdev, target->hci_reader_gate,
 					PN544_HCI_EVT_SND_DATA, skb->data,
@@ -680,7 +680,7 @@ static int pn544_hci_tm_send(struct nfc_hci_dev *hdev, struct sk_buff *skb)
 	int r;
 
 	/* Set default false for multiple information chaining */
-	*skb_push(skb, 1) = 0;
+	*(u8 *)skb_push(skb, 1) = 0;
 
 	r = nfc_hci_send_event(hdev, PN544_RF_READER_NFCIP1_TARGET_GATE,
 			       PN544_HCI_EVT_SND_DATA, skb->data, skb->len);
diff --git a/drivers/nfc/st-nci/ndlc.c b/drivers/nfc/st-nci/ndlc.c
index 50880d747b02..9477994cf975 100644
--- a/drivers/nfc/st-nci/ndlc.c
+++ b/drivers/nfc/st-nci/ndlc.c
@@ -87,7 +87,7 @@ int ndlc_send(struct llt_ndlc *ndlc, struct sk_buff *skb)
 	u8 pcb = PCB_TYPE_DATAFRAME | PCB_DATAFRAME_RETRANSMIT_NO |
 		PCB_FRAME_CRC_INFO_NOTPRESENT;
 
-	*skb_push(skb, 1) = pcb;
+	*(u8 *)skb_push(skb, 1) = pcb;
 	skb_queue_tail(&ndlc->send_q, skb);
 
 	schedule_work(&ndlc->sm_work);
diff --git a/drivers/nfc/st21nfca/core.c b/drivers/nfc/st21nfca/core.c
index 50be3b788f1c..e803fdfa9189 100644
--- a/drivers/nfc/st21nfca/core.c
+++ b/drivers/nfc/st21nfca/core.c
@@ -782,12 +782,12 @@ static int st21nfca_hci_im_transceive(struct nfc_hci_dev *hdev,
 		if (target->supported_protocols == NFC_PROTO_NFC_DEP_MASK)
 			return st21nfca_im_send_dep_req(hdev, skb);
 
-		*skb_push(skb, 1) = 0x1a;
+		*(u8 *)skb_push(skb, 1) = 0x1a;
 		return nfc_hci_send_cmd_async(hdev, target->hci_reader_gate,
 					      ST21NFCA_WR_XCHG_DATA, skb->data,
 					      skb->len, cb, cb_context);
 	case ST21NFCA_RF_READER_14443_3_A_GATE:
-		*skb_push(skb, 1) = 0x1a;	/* CTR, see spec:10.2.2.1 */
+		*(u8 *)skb_push(skb, 1) = 0x1a;	/* CTR, see spec:10.2.2.1 */
 
 		return nfc_hci_send_cmd_async(hdev, target->hci_reader_gate,
 					      ST21NFCA_WR_XCHG_DATA, skb->data,
@@ -797,7 +797,7 @@ static int st21nfca_hci_im_transceive(struct nfc_hci_dev *hdev,
 		info->async_cb = cb;
 		info->async_cb_context = cb_context;
 
-		*skb_push(skb, 1) = 0x17;
+		*(u8 *)skb_push(skb, 1) = 0x17;
 
 		return nfc_hci_send_cmd_async(hdev, target->hci_reader_gate,
 					      ST21NFCA_WR_XCHG_DATA, skb->data,
diff --git a/drivers/nfc/st21nfca/dep.c b/drivers/nfc/st21nfca/dep.c
index ada7b114b6c1..fd08be2917e6 100644
--- a/drivers/nfc/st21nfca/dep.c
+++ b/drivers/nfc/st21nfca/dep.c
@@ -315,10 +315,10 @@ int st21nfca_tm_send_dep_res(struct nfc_hci_dev *hdev, struct sk_buff *skb)
 	int r;
 	struct st21nfca_hci_info *info = nfc_hci_get_clientdata(hdev);
 
-	*skb_push(skb, 1) = info->dep_info.curr_nfc_dep_pni;
-	*skb_push(skb, 1) = ST21NFCA_NFCIP1_DEP_RES;
-	*skb_push(skb, 1) = ST21NFCA_NFCIP1_RES;
-	*skb_push(skb, 1) = skb->len;
+	*(u8 *)skb_push(skb, 1) = info->dep_info.curr_nfc_dep_pni;
+	*(u8 *)skb_push(skb, 1) = ST21NFCA_NFCIP1_DEP_RES;
+	*(u8 *)skb_push(skb, 1) = ST21NFCA_NFCIP1_RES;
+	*(u8 *)skb_push(skb, 1) = skb->len;
 
 	r = nfc_hci_send_event(hdev, ST21NFCA_RF_CARD_F_GATE,
 			ST21NFCA_EVT_SEND_DATA, skb->data, skb->len);
@@ -466,7 +466,7 @@ static void st21nfca_im_send_psl_req(struct nfc_hci_dev *hdev, u8 did, u8 bsi,
 	psl_req->brs = (0x30 & bsi << 4) | (bri & 0x03);
 	psl_req->fsl = lri;
 
-	*skb_push(skb, 1) = info->dep_info.to | 0x10;
+	*(u8 *)skb_push(skb, 1) = info->dep_info.to | 0x10;
 
 	st21nfca_im_send_pdu(info, skb);
 }
@@ -568,7 +568,7 @@ int st21nfca_im_send_atr_req(struct nfc_hci_dev *hdev, u8 *gb, size_t gb_len)
 	}
 	atr_req->length = sizeof(struct st21nfca_atr_req) + hdev->gb_len;
 
-	*skb_push(skb, 1) = info->dep_info.to | 0x10; /* timeout */
+	*(u8 *)skb_push(skb, 1) = info->dep_info.to | 0x10; /* timeout */
 
 	info->async_cb_type = ST21NFCA_CB_TYPE_READER_F;
 	info->async_cb_context = info;
@@ -629,10 +629,10 @@ static void st21nfca_im_recv_dep_res_cb(void *context, struct sk_buff *skb,
 		case ST21NFCA_NFC_DEP_PFB_SUPERVISOR_PDU:
 			pr_err("Received a SUPERVISOR PDU\n");
 			skb_pull(skb, size);
-			*skb_push(skb, 1) = ST21NFCA_NFCIP1_DEP_REQ;
-			*skb_push(skb, 1) = ST21NFCA_NFCIP1_REQ;
-			*skb_push(skb, 1) = skb->len;
-			*skb_push(skb, 1) = info->dep_info.to | 0x10;
+			*(u8 *)skb_push(skb, 1) = ST21NFCA_NFCIP1_DEP_REQ;
+			*(u8 *)skb_push(skb, 1) = ST21NFCA_NFCIP1_REQ;
+			*(u8 *)skb_push(skb, 1) = skb->len;
+			*(u8 *)skb_push(skb, 1) = info->dep_info.to | 0x10;
 
 			st21nfca_im_send_pdu(info, skb);
 			break;
@@ -655,12 +655,12 @@ int st21nfca_im_send_dep_req(struct nfc_hci_dev *hdev, struct sk_buff *skb)
 	info->async_cb_context = info;
 	info->async_cb = st21nfca_im_recv_dep_res_cb;
 
-	*skb_push(skb, 1) = info->dep_info.curr_nfc_dep_pni;
-	*skb_push(skb, 1) = ST21NFCA_NFCIP1_DEP_REQ;
-	*skb_push(skb, 1) = ST21NFCA_NFCIP1_REQ;
-	*skb_push(skb, 1) = skb->len;
+	*(u8 *)skb_push(skb, 1) = info->dep_info.curr_nfc_dep_pni;
+	*(u8 *)skb_push(skb, 1) = ST21NFCA_NFCIP1_DEP_REQ;
+	*(u8 *)skb_push(skb, 1) = ST21NFCA_NFCIP1_REQ;
+	*(u8 *)skb_push(skb, 1) = skb->len;
 
-	*skb_push(skb, 1) = info->dep_info.to | 0x10;
+	*(u8 *)skb_push(skb, 1) = info->dep_info.to | 0x10;
 
 	return nfc_hci_send_cmd_async(hdev, ST21NFCA_RF_READER_F_GATE,
 				      ST21NFCA_WR_XCHG_DATA,
diff --git a/drivers/nfc/st21nfca/i2c.c b/drivers/nfc/st21nfca/i2c.c
index c36f0e0afdfd..396cdafb3e36 100644
--- a/drivers/nfc/st21nfca/i2c.c
+++ b/drivers/nfc/st21nfca/i2c.c
@@ -171,7 +171,7 @@ static void st21nfca_hci_add_len_crc(struct sk_buff *skb)
 	u16 crc;
 	u8 tmp;
 
-	*skb_push(skb, 1) = 0;
+	*(u8 *)skb_push(skb, 1) = 0;
 
 	crc = crc_ccitt(0xffff, skb->data, skb->len);
 	crc = ~crc;
@@ -216,7 +216,7 @@ static int st21nfca_hci_i2c_write(void *phy_id, struct sk_buff *skb)
 	/* add ST21NFCA_SOF_EOF on tail */
 	*(u8 *)skb_put(skb, 1) = ST21NFCA_SOF_EOF;
 	/* add ST21NFCA_SOF_EOF on head */
-	*skb_push(skb, 1) = ST21NFCA_SOF_EOF;
+	*(u8 *)skb_push(skb, 1) = ST21NFCA_SOF_EOF;
 
 	/*
 	 * Compute byte stuffing
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 70b633f951ea..c6bc63b8b295 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -759,8 +759,7 @@ static netdev_tx_t qeth_l2_hard_start_xmit(struct sk_buff *skb,
 						sizeof(struct qeth_hdr));
 			if (!new_skb)
 				goto tx_drop;
-			hdr = (struct qeth_hdr *)skb_push(new_skb,
-						sizeof(struct qeth_hdr));
+			hdr = skb_push(new_skb, sizeof(struct qeth_hdr));
 			skb_set_mac_header(new_skb, sizeof(struct qeth_hdr));
 			qeth_l2_fill_header(card, hdr, new_skb, cast_type);
 			if (new_skb->ip_summed == CHECKSUM_PARTIAL)
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 37b594231b76..3062cde33a3d 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -2729,16 +2729,14 @@ static netdev_tx_t qeth_l3_hard_start_xmit(struct sk_buff *skb,
 	}
 
 	if (use_tso) {
-		hdr = (struct qeth_hdr *)skb_push(new_skb,
-						sizeof(struct qeth_hdr_tso));
+		hdr = skb_push(new_skb, sizeof(struct qeth_hdr_tso));
 		memset(hdr, 0, sizeof(struct qeth_hdr_tso));
 		qeth_l3_fill_header(card, hdr, new_skb, ipv, cast_type);
 		qeth_tso_fill_header(card, hdr, new_skb);
 		hdr_elements++;
 	} else {
 		if (data_offset < 0) {
-			hdr = (struct qeth_hdr *)skb_push(new_skb,
-						sizeof(struct qeth_hdr));
+			hdr = skb_push(new_skb, sizeof(struct qeth_hdr));
 			qeth_l3_fill_header(card, hdr, new_skb, ipv,
 						cast_type);
 		} else {
diff --git a/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c b/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c
index 1880eb6c68f7..7b09e7ddf35e 100644
--- a/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c
+++ b/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c
@@ -354,7 +354,7 @@ static inline void make_tx_data_wr(struct cxgbi_sock *csk, struct sk_buff *skb,
 	struct l2t_entry *l2t = csk->l2t;
 
 	skb_reset_transport_header(skb);
-	req = (struct tx_data_wr *)__skb_push(skb, sizeof(*req));
+	req = __skb_push(skb, sizeof(*req));
 	req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA) |
 			(req_completion ? F_WR_COMPL : 0));
 	req->wr_lo = htonl(V_WR_TID(csk->tid));
diff --git a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c
index 397094b8bad6..5485d68f286a 100644
--- a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c
+++ b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c
@@ -644,7 +644,7 @@ static inline void make_tx_data_wr(struct cxgbi_sock *csk, struct sk_buff *skb,
 	unsigned int wr_ulp_mode = 0, val;
 	bool imm = is_ofld_imm(skb);
 
-	req = (struct fw_ofld_tx_data_wr *)__skb_push(skb, sizeof(*req));
+	req = __skb_push(skb, sizeof(*req));
 
 	if (imm) {
 		req->op_to_immdlen = htonl(FW_WR_OP_V(FW_OFLD_TX_DATA_WR) |
diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c
index e17bdb3adf9e..fff6f1851dc1 100644
--- a/drivers/scsi/fcoe/fcoe_ctlr.c
+++ b/drivers/scsi/fcoe/fcoe_ctlr.c
@@ -626,7 +626,7 @@ static int fcoe_ctlr_encaps(struct fcoe_ctlr *fip, struct fc_lport *lport,
 	fh = (struct fc_frame_header *)skb->data;
 	op = *(u8 *)(fh + 1);
 	dlen = sizeof(struct fip_encaps) + skb->len;	/* len before push */
-	cap = (struct fip_encaps_head *)skb_push(skb, sizeof(*cap));
+	cap = skb_push(skb, sizeof(*cap));
 	memset(cap, 0, sizeof(*cap));
 
 	if (lport->point_to_multipoint) {
diff --git a/drivers/scsi/fnic/fnic_fcs.c b/drivers/scsi/fnic/fnic_fcs.c
index e3b964b7235a..e72becaad8a5 100644
--- a/drivers/scsi/fnic/fnic_fcs.c
+++ b/drivers/scsi/fnic/fnic_fcs.c
@@ -1000,8 +1000,7 @@ void fnic_eth_send(struct fcoe_ctlr *fip, struct sk_buff *skb)
 
 	if (!fnic->vlan_hw_insert) {
 		eth_hdr = (struct ethhdr *)skb_mac_header(skb);
-		vlan_hdr = (struct vlan_ethhdr *)skb_push(skb,
-				sizeof(*vlan_hdr) - sizeof(*eth_hdr));
+		vlan_hdr = skb_push(skb, sizeof(*vlan_hdr) - sizeof(*eth_hdr));
 		memcpy(vlan_hdr, eth_hdr, 2 * ETH_ALEN);
 		vlan_hdr->h_vlan_proto = htons(ETH_P_8021Q);
 		vlan_hdr->h_vlan_encapsulated_proto = eth_hdr->h_proto;
@@ -1067,7 +1066,7 @@ static int fnic_send_frame(struct fnic *fnic, struct fc_frame *fp)
 
 	if (!fnic->vlan_hw_insert) {
 		eth_hdr_len = sizeof(*vlan_hdr) + sizeof(*fcoe_hdr);
-		vlan_hdr = (struct vlan_ethhdr *)skb_push(skb, eth_hdr_len);
+		vlan_hdr = skb_push(skb, eth_hdr_len);
 		eth_hdr = (struct ethhdr *)vlan_hdr;
 		vlan_hdr->h_vlan_proto = htons(ETH_P_8021Q);
 		vlan_hdr->h_vlan_encapsulated_proto = htons(ETH_P_FCOE);
@@ -1075,7 +1074,7 @@ static int fnic_send_frame(struct fnic *fnic, struct fc_frame *fp)
 		fcoe_hdr = (struct fcoe_hdr *)(vlan_hdr + 1);
 	} else {
 		eth_hdr_len = sizeof(*eth_hdr) + sizeof(*fcoe_hdr);
-		eth_hdr = (struct ethhdr *)skb_push(skb, eth_hdr_len);
+		eth_hdr = skb_push(skb, eth_hdr_len);
 		eth_hdr->h_proto = htons(ETH_P_FCOE);
 		fcoe_hdr = (struct fcoe_hdr *)(eth_hdr + 1);
 	}
diff --git a/drivers/scsi/qedf/qedf_fip.c b/drivers/scsi/qedf/qedf_fip.c
index e10b91cc3c62..0d4bf70803ae 100644
--- a/drivers/scsi/qedf/qedf_fip.c
+++ b/drivers/scsi/qedf/qedf_fip.c
@@ -125,8 +125,7 @@ void qedf_fip_send(struct fcoe_ctlr *fip, struct sk_buff *skb)
 	sub = fiph->fip_subcode;
 
 	if (!qedf->vlan_hw_insert) {
-		vlan_hdr = (struct vlan_ethhdr *)skb_push(skb, sizeof(*vlan_hdr)
-		    - sizeof(*eth_hdr));
+		vlan_hdr = skb_push(skb, sizeof(*vlan_hdr) - sizeof(*eth_hdr));
 		memcpy(vlan_hdr, eth_hdr, 2 * ETH_ALEN);
 		vlan_hdr->h_vlan_proto = htons(ETH_P_8021Q);
 		vlan_hdr->h_vlan_encapsulated_proto = eth_hdr->h_proto;
diff --git a/drivers/staging/wilc1000/linux_mon.c b/drivers/staging/wilc1000/linux_mon.c
index dbc266a37974..01efa80b4f88 100644
--- a/drivers/staging/wilc1000/linux_mon.c
+++ b/drivers/staging/wilc1000/linux_mon.c
@@ -74,7 +74,7 @@ void WILC_WFI_monitor_rx(u8 *buff, u32 size)
 
 		skb_put_data(skb, buff, size);
 
-		cb_hdr = (struct wilc_wfi_radiotap_cb_hdr *)skb_push(skb, sizeof(*cb_hdr));
+		cb_hdr = skb_push(skb, sizeof(*cb_hdr));
 		memset(cb_hdr, 0, sizeof(struct wilc_wfi_radiotap_cb_hdr));
 
 		cb_hdr->hdr.it_version = 0; /* PKTHDR_RADIOTAP_VERSION; */
@@ -101,7 +101,7 @@ void WILC_WFI_monitor_rx(u8 *buff, u32 size)
 			return;
 
 		skb_put_data(skb, buff, size);
-		hdr = (struct wilc_wfi_radiotap_hdr *)skb_push(skb, sizeof(*hdr));
+		hdr = skb_push(skb, sizeof(*hdr));
 		memset(hdr, 0, sizeof(struct wilc_wfi_radiotap_hdr));
 		hdr->hdr.it_version = 0; /* PKTHDR_RADIOTAP_VERSION; */
 		hdr->hdr.it_len = cpu_to_le16(sizeof(struct wilc_wfi_radiotap_hdr));
@@ -202,7 +202,7 @@ static netdev_tx_t WILC_WFI_mon_xmit(struct sk_buff *skb,
 
 		skb_put_data(skb2, skb->data, skb->len);
 
-		cb_hdr = (struct wilc_wfi_radiotap_cb_hdr *)skb_push(skb2, sizeof(*cb_hdr));
+		cb_hdr = skb_push(skb2, sizeof(*cb_hdr));
 		memset(cb_hdr, 0, sizeof(struct wilc_wfi_radiotap_cb_hdr));
 
 		cb_hdr->hdr.it_version = 0; /* PKTHDR_RADIOTAP_VERSION; */
diff --git a/drivers/staging/wlan-ng/p80211conv.c b/drivers/staging/wlan-ng/p80211conv.c
index a062e80361ef..fc8ad33ade9f 100644
--- a/drivers/staging/wlan-ng/p80211conv.c
+++ b/drivers/staging/wlan-ng/p80211conv.c
@@ -148,9 +148,7 @@ int skb_ether_to_p80211(struct wlandevice *wlandev, u32 ethconv,
 			skb_pull(skb, ETH_HLEN);
 
 			/* tack on SNAP */
-			e_snap =
-			    (struct wlan_snap *)skb_push(skb,
-				sizeof(struct wlan_snap));
+			e_snap = skb_push(skb, sizeof(struct wlan_snap));
 			e_snap->type = htons(proto);
 			if (ethconv == WLAN_ETHCONV_8021h &&
 			    p80211_stt_findproto(proto)) {
@@ -162,9 +160,7 @@ int skb_ether_to_p80211(struct wlandevice *wlandev, u32 ethconv,
 			}
 
 			/* tack on llc */
-			e_llc =
-			    (struct wlan_llc *)skb_push(skb,
-				sizeof(struct wlan_llc));
+			e_llc = skb_push(skb, sizeof(struct wlan_llc));
 			e_llc->dsap = 0xAA;	/* SNAP, see IEEE 802 */
 			e_llc->ssap = 0xAA;
 			e_llc->ctl = 0x03;
@@ -407,7 +403,7 @@ int skb_p80211_to_ether(struct wlandevice *wlandev, u32 ethconv,
 		skb_pull(skb, payload_offset);
 
 		/* create 802.3 header at beginning of skb. */
-		e_hdr = (struct wlan_ethhdr *)skb_push(skb, ETH_HLEN);
+		e_hdr = skb_push(skb, ETH_HLEN);
 		ether_addr_copy(e_hdr->daddr, daddr);
 		ether_addr_copy(e_hdr->saddr, saddr);
 		e_hdr->type = htons(payload_length);
@@ -448,7 +444,7 @@ int skb_p80211_to_ether(struct wlandevice *wlandev, u32 ethconv,
 		skb_pull(skb, sizeof(struct wlan_snap));
 
 		/* create 802.3 header at beginning of skb. */
-		e_hdr = (struct wlan_ethhdr *)skb_push(skb, ETH_HLEN);
+		e_hdr = skb_push(skb, ETH_HLEN);
 		e_hdr->type = e_snap->type;
 		ether_addr_copy(e_hdr->daddr, daddr);
 		ether_addr_copy(e_hdr->saddr, saddr);
@@ -475,7 +471,7 @@ int skb_p80211_to_ether(struct wlandevice *wlandev, u32 ethconv,
 		skb_pull(skb, payload_offset);
 
 		/* create 802.3 header at beginning of skb. */
-		e_hdr = (struct wlan_ethhdr *)skb_push(skb, ETH_HLEN);
+		e_hdr = skb_push(skb, ETH_HLEN);
 		ether_addr_copy(e_hdr->daddr, daddr);
 		ether_addr_copy(e_hdr->saddr, saddr);
 		e_hdr->type = htons(payload_length);
diff --git a/drivers/target/iscsi/cxgbit/cxgbit_target.c b/drivers/target/iscsi/cxgbit/cxgbit_target.c
index bdcc8b4c522a..dda13f1af38e 100644
--- a/drivers/target/iscsi/cxgbit/cxgbit_target.c
+++ b/drivers/target/iscsi/cxgbit/cxgbit_target.c
@@ -136,7 +136,7 @@ cxgbit_cpl_tx_data_iso(struct sk_buff *skb, struct cxgbit_iso_info *iso_info)
 	unsigned int fslice = !!(iso_info->flags & CXGBIT_ISO_FSLICE);
 	unsigned int lslice = !!(iso_info->flags & CXGBIT_ISO_LSLICE);
 
-	cpl = (struct cpl_tx_data_iso *)__skb_push(skb, sizeof(*cpl));
+	cpl = __skb_push(skb, sizeof(*cpl));
 
 	cpl->op_to_scsi = htonl(CPL_TX_DATA_ISO_OP_V(CPL_TX_DATA_ISO) |
 			CPL_TX_DATA_ISO_FIRST_V(fslice) |
@@ -183,8 +183,7 @@ cxgbit_tx_data_wr(struct cxgbit_sock *csk, struct sk_buff *skb, u32 dlen,
 	if (cxgbit_is_ofld_imm(skb))
 		immlen += dlen;
 
-	req = (struct fw_ofld_tx_data_wr *)__skb_push(skb,
-							hdr_size);
+	req = __skb_push(skb, hdr_size);
 	req->op_to_immdlen = cpu_to_be32(FW_WR_OP_V(opcode) |
 					FW_WR_COMPL_V(compl) |
 					FW_WR_IMMDLEN_V(immlen));
diff --git a/drivers/usb/gadget/function/rndis.c b/drivers/usb/gadget/function/rndis.c
index a3b5e468b116..d6341045c631 100644
--- a/drivers/usb/gadget/function/rndis.c
+++ b/drivers/usb/gadget/function/rndis.c
@@ -999,7 +999,7 @@ void rndis_add_hdr(struct sk_buff *skb)
 
 	if (!skb)
 		return;
-	header = (void *)skb_push(skb, sizeof(*header));
+	header = skb_push(skb, sizeof(*header));
 	memset(header, 0, sizeof *header);
 	header->MessageType = cpu_to_le32(RNDIS_MSG_PACKET);
 	header->MessageLength = cpu_to_le32(skb->len);
diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 283dc2f5364d..5e6a2d4dc366 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -318,7 +318,7 @@ static inline int __vlan_insert_tag(struct sk_buff *skb,
 	if (skb_cow_head(skb, VLAN_HLEN) < 0)
 		return -ENOMEM;
 
-	veth = (struct vlan_ethhdr *)skb_push(skb, VLAN_HLEN);
+	veth = skb_push(skb, VLAN_HLEN);
 
 	/* Move the mac addresses to the beginning of the new header. */
 	memmove(skb->data, skb->data + VLAN_HLEN, 2 * ETH_ALEN);
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index ac9d10dadd1a..46bd514e719c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1923,8 +1923,8 @@ static inline void *skb_put_data(struct sk_buff *skb, const void *data,
 	return tmp;
 }
 
-unsigned char *skb_push(struct sk_buff *skb, unsigned int len);
-static inline unsigned char *__skb_push(struct sk_buff *skb, unsigned int len)
+void *skb_push(struct sk_buff *skb, unsigned int len);
+static inline void *__skb_push(struct sk_buff *skb, unsigned int len)
 {
 	skb->data -= len;
 	skb->len  += len;
@@ -2951,8 +2951,7 @@ void *skb_pull_rcsum(struct sk_buff *skb, unsigned int len);
  *	that the checksum difference is zero (e.g., a valid IP header)
  *	or you are setting ip_summed to CHECKSUM_NONE.
  */
-static inline unsigned char *skb_push_rcsum(struct sk_buff *skb,
-					    unsigned int len)
+static inline void *skb_push_rcsum(struct sk_buff *skb, unsigned int len)
 {
 	skb_push(skb, len);
 	skb_postpush_rcsum(skb, skb->data, len);
diff --git a/net/802/fc.c b/net/802/fc.c
index 1bb496ea997e..058a9f708918 100644
--- a/net/802/fc.c
+++ b/net/802/fc.c
@@ -49,7 +49,7 @@ static int fc_header(struct sk_buff *skb, struct net_device *dev,
 		struct fcllc *fcllc;
 
 		hdr_len = sizeof(struct fch_hdr) + sizeof(struct fcllc);
-		fch = (struct fch_hdr *)skb_push(skb, hdr_len);
+		fch = skb_push(skb, hdr_len);
 		fcllc = (struct fcllc *)(fch+1);
 		fcllc->dsap = fcllc->ssap = EXTENDED_SAP;
 		fcllc->llc = UI_CMD;
@@ -59,7 +59,7 @@ static int fc_header(struct sk_buff *skb, struct net_device *dev,
 	else
 	{
 		hdr_len = sizeof(struct fch_hdr);
-		fch = (struct fch_hdr *)skb_push(skb, hdr_len);
+		fch = skb_push(skb, hdr_len);
 	}
 
 	if(saddr)
diff --git a/net/802/fddi.c b/net/802/fddi.c
index 6356623fc238..90f1416567a1 100644
--- a/net/802/fddi.c
+++ b/net/802/fddi.c
@@ -58,7 +58,7 @@ static int fddi_header(struct sk_buff *skb, struct net_device *dev,
 
 	if(type != ETH_P_IP && type != ETH_P_IPV6 && type != ETH_P_ARP)
 		hl=FDDI_K_8022_HLEN-3;
-	fddi = (struct fddihdr *)skb_push(skb, hl);
+	fddi = skb_push(skb, hl);
 	fddi->fc			 = FDDI_FC_K_ASYNC_LLC_DEF;
 	if(type == ETH_P_IP || type == ETH_P_IPV6 || type == ETH_P_ARP)
 	{
diff --git a/net/802/hippi.c b/net/802/hippi.c
index 4460606e9c36..690308b9b94a 100644
--- a/net/802/hippi.c
+++ b/net/802/hippi.c
@@ -47,7 +47,7 @@ static int hippi_header(struct sk_buff *skb, struct net_device *dev,
 			unsigned short type,
 			const void *daddr, const void *saddr, unsigned int len)
 {
-	struct hippi_hdr *hip = (struct hippi_hdr *)skb_push(skb, HIPPI_HLEN);
+	struct hippi_hdr *hip = skb_push(skb, HIPPI_HLEN);
 	struct hippi_cb *hcb = (struct hippi_cb *) skb->cb;
 
 	if (!len){
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index c1742322f7d2..f7e83f6d2e64 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -58,7 +58,7 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 	int rc;
 
 	if (!(vlan->flags & VLAN_FLAG_REORDER_HDR)) {
-		vhdr = (struct vlan_hdr *) skb_push(skb, VLAN_HLEN);
+		vhdr = skb_push(skb, VLAN_HLEN);
 
 		vlan_tci = vlan->vlan_id;
 		vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb->priority);
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index c7af6dc70fa2..5d035c1f1156 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1529,7 +1529,7 @@ static int ltalk_rcv(struct sk_buff *skb, struct net_device *dev,
 		 * The push leaves us with a ddephdr not an shdr, and
 		 * handily the port bytes in the right place preset.
 		 */
-		ddp = (struct ddpehdr *) skb_push(skb, sizeof(*ddp) - 4);
+		ddp = skb_push(skb, sizeof(*ddp) - 4);
 
 		/* Now fill in the long header */
 
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index b7c486752b3a..0c92ba0cbe0b 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1562,7 +1562,7 @@ static int ax25_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 
 	/* Add the PID if one is not supplied by the user in the skb */
 	if (!ax25->pidincl)
-		*skb_push(skb, 1) = sk->sk_protocol;
+		*(u8 *)skb_push(skb, 1) = sk->sk_protocol;
 
 	if (sk->sk_type == SOCK_SEQPACKET) {
 		/* Connected mode sockets go via the LAPB machine */
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 1301a8786d8d..cdb5c1a7481e 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -332,7 +332,7 @@ void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb)
 		return;
 
 	/* Put header before the data */
-	hdr = (void *)skb_push(skb_copy, HCI_MON_HDR_SIZE);
+	hdr = skb_push(skb_copy, HCI_MON_HDR_SIZE);
 	hdr->opcode = opcode;
 	hdr->index = cpu_to_le16(hdev->id);
 	hdr->len = cpu_to_le16(skb->len);
@@ -383,7 +383,7 @@ void hci_send_monitor_ctrl_event(struct hci_dev *hdev, u16 event,
 
 		skb->tstamp = tstamp;
 
-		hdr = (void *)skb_push(skb, HCI_MON_HDR_SIZE);
+		hdr = skb_push(skb, HCI_MON_HDR_SIZE);
 		hdr->opcode = cpu_to_le16(HCI_MON_CTRL_EVENT);
 		hdr->index = index;
 		hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE);
@@ -467,7 +467,7 @@ static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event)
 
 	__net_timestamp(skb);
 
-	hdr = (void *)skb_push(skb, HCI_MON_HDR_SIZE);
+	hdr = skb_push(skb, HCI_MON_HDR_SIZE);
 	hdr->opcode = opcode;
 	hdr->index = cpu_to_le16(hdev->id);
 	hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE);
@@ -522,7 +522,7 @@ static struct sk_buff *create_monitor_ctrl_open(struct sock *sk)
 
 	__net_timestamp(skb);
 
-	hdr = (void *)skb_push(skb, HCI_MON_HDR_SIZE);
+	hdr = skb_push(skb, HCI_MON_HDR_SIZE);
 	hdr->opcode = cpu_to_le16(HCI_MON_CTRL_OPEN);
 	if (hci_pi(sk)->hdev)
 		hdr->index = cpu_to_le16(hci_pi(sk)->hdev->id);
@@ -560,7 +560,7 @@ static struct sk_buff *create_monitor_ctrl_close(struct sock *sk)
 
 	__net_timestamp(skb);
 
-	hdr = (void *)skb_push(skb, HCI_MON_HDR_SIZE);
+	hdr = skb_push(skb, HCI_MON_HDR_SIZE);
 	hdr->opcode = cpu_to_le16(HCI_MON_CTRL_CLOSE);
 	if (hci_pi(sk)->hdev)
 		hdr->index = cpu_to_le16(hci_pi(sk)->hdev->id);
@@ -590,7 +590,7 @@ static struct sk_buff *create_monitor_ctrl_command(struct sock *sk, u16 index,
 
 	__net_timestamp(skb);
 
-	hdr = (void *)skb_push(skb, HCI_MON_HDR_SIZE);
+	hdr = skb_push(skb, HCI_MON_HDR_SIZE);
 	hdr->opcode = cpu_to_le16(HCI_MON_CTRL_COMMAND);
 	hdr->index = cpu_to_le16(index);
 	hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE);
diff --git a/net/bluetooth/mgmt_util.c b/net/bluetooth/mgmt_util.c
index d057113e0d4b..0d0a6d77b9e8 100644
--- a/net/bluetooth/mgmt_util.c
+++ b/net/bluetooth/mgmt_util.c
@@ -48,7 +48,7 @@ static struct sk_buff *create_monitor_ctrl_event(__le16 index, u32 cookie,
 
 	__net_timestamp(skb);
 
-	hdr = (void *)skb_push(skb, HCI_MON_HDR_SIZE);
+	hdr = skb_push(skb, HCI_MON_HDR_SIZE);
 	hdr->opcode = cpu_to_le16(HCI_MON_CTRL_EVENT);
 	hdr->index = index;
 	hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE);
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 1a9b906c5a35..4a0b41d75c84 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -1149,10 +1149,10 @@ static void rfcomm_make_uih(struct sk_buff *skb, u8 addr)
 	u8 *crc;
 
 	if (len > 127) {
-		hdr = (void *) skb_push(skb, 4);
+		hdr = skb_push(skb, 4);
 		put_unaligned(cpu_to_le16(__len16(len)), (__le16 *) &hdr->len);
 	} else {
-		hdr = (void *) skb_push(skb, 3);
+		hdr = skb_push(skb, 3);
 		hdr->len = __len8(len);
 	}
 	hdr->addr = addr;
diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c
index 15bf0c5322ab..a05775afa44b 100644
--- a/net/bridge/netfilter/nft_reject_bridge.c
+++ b/net/bridge/netfilter/nft_reject_bridge.c
@@ -28,7 +28,7 @@ static void nft_reject_br_push_etherhdr(struct sk_buff *oldskb,
 {
 	struct ethhdr *eth;
 
-	eth = (struct ethhdr *)skb_push(nskb, ETH_HLEN);
+	eth = skb_push(nskb, ETH_HLEN);
 	skb_reset_mac_header(nskb);
 	ether_addr_copy(eth->h_source, eth_hdr(oldskb)->h_dest);
 	ether_addr_copy(eth->h_dest, eth_hdr(oldskb)->h_source);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 29be2466970c..37c1e34ddd85 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -441,7 +441,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
 		ip6h->saddr = np->local_ip.in6;
 		ip6h->daddr = np->remote_ip.in6;
 
-		eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
+		eth = skb_push(skb, ETH_HLEN);
 		skb_reset_mac_header(skb);
 		skb->protocol = eth->h_proto = htons(ETH_P_IPV6);
 	} else {
@@ -470,7 +470,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
 		put_unaligned(np->remote_ip.ip, &(iph->daddr));
 		iph->check    = ip_fast_csum((unsigned char *)iph, iph->ihl);
 
-		eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
+		eth = skb_push(skb, ETH_HLEN);
 		skb_reset_mac_header(skb);
 		skb->protocol = eth->h_proto = htons(ETH_P_IP);
 	}
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index b8bcf9021329..2dd42c5b0366 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2675,7 +2675,7 @@ static int process_ipsec(struct pktgen_dev *pkt_dev,
 				goto err;
 			}
 			/* restore ll */
-			eth = (struct ethhdr *)skb_push(skb, ETH_HLEN);
+			eth = skb_push(skb, ETH_HLEN);
 			memcpy(eth, pkt_dev->hh, 2 * ETH_ALEN);
 			eth->h_proto = protocol;
 
@@ -2844,7 +2844,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
 	skb_reserve(skb, 16);
 
 	/*  Reserve for ethernet and IP header  */
-	eth = (__u8 *) skb_push(skb, 14);
+	eth = skb_push(skb, 14);
 	mpls = skb_put(skb, pkt_dev->nr_labels * sizeof(__u32));
 	if (pkt_dev->nr_labels)
 		mpls_push(mpls, pkt_dev);
@@ -2972,7 +2972,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
 	skb_reserve(skb, 16);
 
 	/*  Reserve for ethernet and IP header  */
-	eth = (__u8 *) skb_push(skb, 14);
+	eth = skb_push(skb, 14);
 	mpls = skb_put(skb, pkt_dev->nr_labels * sizeof(__u32));
 	if (pkt_dev->nr_labels)
 		mpls_push(mpls, pkt_dev);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 9a1639f7d61a..f75897a33fa4 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1461,7 +1461,7 @@ EXPORT_SYMBOL(skb_put);
  *	start. If this would exceed the total buffer headroom the kernel will
  *	panic. A pointer to the first byte of the extra data is returned.
  */
-unsigned char *skb_push(struct sk_buff *skb, unsigned int len)
+void *skb_push(struct sk_buff *skb, unsigned int len)
 {
 	skb->data -= len;
 	skb->len  += len;
diff --git a/net/dccp/options.c b/net/dccp/options.c
index 74d29c56c367..51cdfc3bd8ca 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -484,7 +484,7 @@ int dccp_insert_option_mandatory(struct sk_buff *skb)
 		return -1;
 
 	DCCP_SKB_CB(skb)->dccpd_opt_len++;
-	*skb_push(skb, 1) = DCCPO_MANDATORY;
+	*(u8 *)skb_push(skb, 1) = DCCPO_MANDATORY;
 	return 0;
 }
 
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 1d84f6dae315..fa0110b57ca1 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -867,7 +867,7 @@ static void dn_send_endnode_hello(struct net_device *dev, struct dn_ifaddr *ifa)
 	msg->datalen = 0x02;
 	memset(msg->data, 0xAA, 2);
 
-	pktlen = (__le16 *)skb_push(skb,2);
+	pktlen = skb_push(skb, 2);
 	*pktlen = cpu_to_le16(skb->len - 2);
 
 	skb_reset_network_header(skb);
@@ -959,7 +959,7 @@ static void dn_send_router_hello(struct net_device *dev, struct dn_ifaddr *ifa)
 
 	skb_trim(skb, (27 + *i2));
 
-	pktlen = (__le16 *)skb_push(skb, 2);
+	pktlen = skb_push(skb, 2);
 	*pktlen = cpu_to_le16(skb->len - 2);
 
 	skb_reset_network_header(skb);
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 1446810047f5..eaeba9b99a73 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -83,7 +83,7 @@ int eth_header(struct sk_buff *skb, struct net_device *dev,
 	       unsigned short type,
 	       const void *daddr, const void *saddr, unsigned int len)
 {
-	struct ethhdr *eth = (struct ethhdr *)skb_push(skb, ETH_HLEN);
+	struct ethhdr *eth = skb_push(skb, ETH_HLEN);
 
 	if (type != ETH_P_802_3 && type != ETH_P_802_2)
 		eth->h_proto = htons(type);
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index d815d1755473..1f18b4650253 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -609,7 +609,7 @@ static void esp_input_set_header(struct sk_buff *skb, __be32 *seqhi)
 	 * decryption.
 	 */
 	if ((x->props.flags & XFRM_STATE_ESN)) {
-		esph = (void *)skb_push(skb, 4);
+		esph = skb_push(skb, 4);
 		*seqhi = esph->spi;
 		esph->spi = esph->seq_no;
 		esph->seq_no = XFRM_SKB_CB(skb)->seq.input.hi;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index e90c80a548ad..41394a4b9af9 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -592,7 +592,7 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
 	struct iphdr *iph;
 	struct gre_base_hdr *greh;
 
-	iph = (struct iphdr *)skb_push(skb, t->hlen + sizeof(*iph));
+	iph = skb_push(skb, t->hlen + sizeof(*iph));
 	greh = (struct gre_base_hdr *)(iph+1);
 	greh->flags = gre_tnl_flags_to_gre_flags(t->parms.o_flags);
 	greh->protocol = htons(type);
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 2ede4e459c4e..d8b40ff4b2e6 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -538,7 +538,7 @@ static void esp_input_set_header(struct sk_buff *skb, __be32 *seqhi)
 	 * decryption.
 	 */
 	if ((x->props.flags & XFRM_STATE_ESN)) {
-		esph = (void *)skb_push(skb, 4);
+		esph = skb_push(skb, 4);
 		*seqhi = esph->spi;
 		esph->spi = esph->seq_no;
 		esph->seq_no = XFRM_SKB_CB(skb)->seq.input.hi;
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index b636f1da9aec..0460af226011 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -847,7 +847,7 @@ static void ipv6_push_rthdr0(struct sk_buff *skb, u8 *proto,
 
 	ihdr = (struct rt0_hdr *) opt;
 
-	phdr = (struct rt0_hdr *) skb_push(skb, (ihdr->rt_hdr.hdrlen + 1) << 3);
+	phdr = skb_push(skb, (ihdr->rt_hdr.hdrlen + 1) << 3);
 	memcpy(phdr, ihdr, sizeof(struct rt0_hdr));
 
 	hops = ihdr->rt_hdr.hdrlen >> 1;
@@ -873,7 +873,7 @@ static void ipv6_push_rthdr4(struct sk_buff *skb, u8 *proto,
 	sr_ihdr = (struct ipv6_sr_hdr *)opt;
 	plen = (sr_ihdr->hdrlen + 1) << 3;
 
-	sr_phdr = (struct ipv6_sr_hdr *)skb_push(skb, plen);
+	sr_phdr = skb_push(skb, plen);
 	memcpy(sr_phdr, sr_ihdr, sizeof(struct ipv6_sr_hdr));
 
 	hops = sr_ihdr->first_segment + 1;
@@ -923,7 +923,7 @@ static void ipv6_push_rthdr(struct sk_buff *skb, u8 *proto,
 
 static void ipv6_push_exthdr(struct sk_buff *skb, u8 *proto, u8 type, struct ipv6_opt_hdr *opt)
 {
-	struct ipv6_opt_hdr *h = (struct ipv6_opt_hdr *)skb_push(skb, ipv6_optlen(opt));
+	struct ipv6_opt_hdr *h = skb_push(skb, ipv6_optlen(opt));
 
 	memcpy(h, opt, ipv6_optlen(opt));
 	h->nexthdr = *proto;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 64eea3962733..e0e726c338a7 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -942,7 +942,7 @@ static int ip6gre_header(struct sk_buff *skb, struct net_device *dev,
 			const void *daddr, const void *saddr, unsigned int len)
 {
 	struct ip6_tnl *t = netdev_priv(dev);
-	struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb_push(skb, t->hlen);
+	struct ipv6hdr *ipv6h = skb_push(skb, t->hlen);
 	__be16 *p = (__be16 *)(ipv6h+1);
 
 	ip6_flow_hdr(ipv6h, 0,
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 0d6f3b6345de..8b8efb0e55bf 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -682,7 +682,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 		skb_frag_list_init(skb);
 
 		__skb_pull(skb, hlen);
-		fh = (struct frag_hdr *)__skb_push(skb, sizeof(struct frag_hdr));
+		fh = __skb_push(skb, sizeof(struct frag_hdr));
 		__skb_push(skb, hlen);
 		skb_reset_network_header(skb);
 		memcpy(skb_network_header(skb), tmp_hdr, hlen);
@@ -706,7 +706,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 			if (frag) {
 				frag->ip_summed = CHECKSUM_NONE;
 				skb_reset_transport_header(frag);
-				fh = (struct frag_hdr *)__skb_push(frag, sizeof(struct frag_hdr));
+				fh = __skb_push(frag, sizeof(struct frag_hdr));
 				__skb_push(frag, hlen);
 				skb_reset_network_header(frag);
 				memcpy(skb_network_header(frag), tmp_hdr,
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 84ad50218255..6264917fe4c7 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -789,7 +789,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
 
 	skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
 
-	t1 = (struct tcphdr *) skb_push(buff, tot_len);
+	t1 = skb_push(buff, tot_len);
 	skb_reset_transport_header(buff);
 
 	/* Swap the send and the receive. */
diff --git a/net/irda/irnet/irnet_irda.c b/net/irda/irnet/irnet_irda.c
index 7f17a8020e8a..e390bceeb2f8 100644
--- a/net/irda/irnet/irnet_irda.c
+++ b/net/irda/irnet/irnet_irda.c
@@ -1065,7 +1065,7 @@ irnet_data_indication(void *	instance,
   if(p[0] & 1)
     {
       /* protocol is compressed */
-      skb_push(skb, 1)[0] = 0;
+      *(u8 *)skb_push(skb, 1) = 0;
     }
   else
     if(skb->len < 2)
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 84de7b6326dc..2cf9d59f1b72 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -322,8 +322,7 @@ static int afiucv_hs_send(struct iucv_message *imsg, struct sock *sock,
 	int err, confirm_recv = 0;
 
 	memset(skb->head, 0, ETH_HLEN);
-	phs_hdr = (struct af_iucv_trans_hdr *)skb_push(skb,
-					sizeof(struct af_iucv_trans_hdr));
+	phs_hdr = skb_push(skb, sizeof(struct af_iucv_trans_hdr));
 	skb_reset_mac_header(skb);
 	skb_reset_network_header(skb);
 	skb_push(skb, ETH_HLEN);
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 53b00bb52095..70e9d2ca8bbe 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -273,7 +273,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
 	if (!(has_fcs && ieee80211_hw_check(&local->hw, RX_INCLUDES_FCS)))
 		mpdulen += FCS_LEN;
 
-	rthdr = (struct ieee80211_radiotap_header *)skb_push(skb, rtap_len);
+	rthdr = skb_push(skb, rtap_len);
 	memset(rthdr, 0, rtap_len - rtap.len - rtap.pad);
 	it_present = &rthdr->it_present;
 
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index a9fa6ee57e8f..da7427a41529 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -288,7 +288,7 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local,
 	unsigned char *pos;
 	u16 txflags;
 
-	rthdr = (struct ieee80211_radiotap_header *) skb_push(skb, rtap_len);
+	rthdr = skb_push(skb, rtap_len);
 
 	memset(rthdr, 0, rtap_len);
 	rthdr->it_len = cpu_to_le16(rtap_len);
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index ec5a9a72797e..8858f4f185e9 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -2708,7 +2708,7 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata,
 	if (ieee80211_is_data_qos(fc)) {
 		__le16 *qos_control;
 
-		qos_control = (__le16 *) skb_push(skb, 2);
+		qos_control = skb_push(skb, 2);
 		memcpy(skb_push(skb, hdrlen - 2), &hdr, hdrlen - 2);
 		/*
 		 * Maybe we could actually set some fields here, for now just
@@ -3347,7 +3347,7 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
 	}
 
 	memcpy(&eth, skb->data, ETH_HLEN - 2);
-	hdr = (void *)skb_push(skb, extra_head);
+	hdr = skb_push(skb, extra_head);
 	memcpy(skb->data, fast_tx->hdr, fast_tx->hdr_len);
 	memcpy(skb->data + fast_tx->da_offs, eth.h_dest, ETH_ALEN);
 	memcpy(skb->data + fast_tx->sa_offs, eth.h_source, ETH_ALEN);
diff --git a/net/ncsi/ncsi-cmd.c b/net/ncsi/ncsi-cmd.c
index b010ae94175b..5e03ed190e18 100644
--- a/net/ncsi/ncsi-cmd.c
+++ b/net/ncsi/ncsi-cmd.c
@@ -331,7 +331,7 @@ int ncsi_xmit_cmd(struct ncsi_cmd_arg *nca)
 	}
 
 	/* Fill the ethernet header */
-	eh = (struct ethhdr *)skb_push(nr->cmd, sizeof(*eh));
+	eh = skb_push(nr->cmd, sizeof(*eh));
 	eh->h_proto = htons(ETH_P_NCSI);
 	eth_broadcast_addr(eh->h_dest);
 	eth_broadcast_addr(eh->h_source);
diff --git a/net/nfc/digital_dep.c b/net/nfc/digital_dep.c
index 82471af5553e..f948fc2099d2 100644
--- a/net/nfc/digital_dep.c
+++ b/net/nfc/digital_dep.c
@@ -185,7 +185,7 @@ static void digital_skb_push_dep_sod(struct nfc_digital_dev *ddev,
 	skb->data[0] = skb->len;
 
 	if (ddev->curr_rf_tech == NFC_DIGITAL_RF_TECH_106A)
-		*skb_push(skb, sizeof(u8)) = DIGITAL_NFC_DEP_NFCA_SOD_SB;
+		*(u8 *)skb_push(skb, sizeof(u8)) = DIGITAL_NFC_DEP_NFCA_SOD_SB;
 }
 
 static int digital_skb_pull_dep_sod(struct nfc_digital_dev *ddev,
diff --git a/net/nfc/digital_technology.c b/net/nfc/digital_technology.c
index fae6d31b377c..492204e440ec 100644
--- a/net/nfc/digital_technology.c
+++ b/net/nfc/digital_technology.c
@@ -828,7 +828,7 @@ int digital_in_send_sensf_req(struct nfc_digital_dev *ddev, u8 rf_tech)
 	sensf_req->rc = 0;
 	sensf_req->tsn = 0;
 
-	*skb_push(skb, 1) = size + 1;
+	*(u8 *)skb_push(skb, 1) = size + 1;
 
 	if (!DIGITAL_DRV_CAPS_IN_CRC(ddev))
 		digital_skb_add_crc_f(skb);
@@ -1161,7 +1161,7 @@ static int digital_tg_send_sensf_res(struct nfc_digital_dev *ddev,
 		break;
 	}
 
-	*skb_push(skb, sizeof(u8)) = size + 1;
+	*(u8 *)skb_push(skb, sizeof(u8)) = size + 1;
 
 	if (!DIGITAL_DRV_CAPS_TG_CRC(ddev))
 		digital_skb_add_crc_f(skb);
diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c
index 3a0c94590411..7b2bdda1514c 100644
--- a/net/nfc/hci/core.c
+++ b/net/nfc/hci/core.c
@@ -727,7 +727,7 @@ static int hci_transceive(struct nfc_dev *nfc_dev, struct nfc_target *target,
 				break;
 		}
 
-		*skb_push(skb, 1) = 0;	/* CTR, see spec:10.2.2.1 */
+		*(u8 *)skb_push(skb, 1) = 0;	/* CTR, see spec:10.2.2.1 */
 
 		hdev->async_cb_type = HCI_CB_TYPE_TRANSCEIVE;
 		hdev->async_cb = cb;
diff --git a/net/nfc/hci/llc_shdlc.c b/net/nfc/hci/llc_shdlc.c
index 9ab4a05f086f..5bd4529279f5 100644
--- a/net/nfc/hci/llc_shdlc.c
+++ b/net/nfc/hci/llc_shdlc.c
@@ -160,7 +160,7 @@ static int llc_shdlc_send_s_frame(struct llc_shdlc *shdlc,
 	if (skb == NULL)
 		return -ENOMEM;
 
-	*skb_push(skb, 1) = SHDLC_CONTROL_HEAD_S | (sframe_type << 3) | nr;
+	*(u8 *)skb_push(skb, 1) = SHDLC_CONTROL_HEAD_S | (sframe_type << 3) | nr;
 
 	r = shdlc->xmit_to_drv(shdlc->hdev, skb);
 
@@ -178,7 +178,7 @@ static int llc_shdlc_send_u_frame(struct llc_shdlc *shdlc,
 
 	pr_debug("uframe_modifier=%d\n", uframe_modifier);
 
-	*skb_push(skb, 1) = SHDLC_CONTROL_HEAD_U | uframe_modifier;
+	*(u8 *)skb_push(skb, 1) = SHDLC_CONTROL_HEAD_U | uframe_modifier;
 
 	r = shdlc->xmit_to_drv(shdlc->hdev, skb);
 
@@ -551,8 +551,8 @@ static void llc_shdlc_handle_send_queue(struct llc_shdlc *shdlc)
 
 		skb = skb_dequeue(&shdlc->send_q);
 
-		*skb_push(skb, 1) = SHDLC_CONTROL_HEAD_I | (shdlc->ns << 3) |
-				    shdlc->nr;
+		*(u8 *)skb_push(skb, 1) = SHDLC_CONTROL_HEAD_I | (shdlc->ns << 3) |
+					shdlc->nr;
 
 		pr_debug("Sending I-Frame %d, waiting to rcv %d\n", shdlc->ns,
 			 shdlc->nr);
diff --git a/net/nfc/nci/data.c b/net/nfc/nci/data.c
index 2488d9241f1d..908f25e3773e 100644
--- a/net/nfc/nci/data.c
+++ b/net/nfc/nci/data.c
@@ -81,7 +81,7 @@ static inline void nci_push_data_hdr(struct nci_dev *ndev,
 	struct nci_data_hdr *hdr;
 	int plen = skb->len;
 
-	hdr = (struct nci_data_hdr *) skb_push(skb, NCI_DATA_HDR_SIZE);
+	hdr = skb_push(skb, NCI_DATA_HDR_SIZE);
 	hdr->conn_id = conn_id;
 	hdr->rfu = 0;
 	hdr->plen = plen;
diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c
index d1119bb35f24..3f93df58d9f1 100644
--- a/net/nfc/nci/hci.c
+++ b/net/nfc/nci/hci.c
@@ -170,7 +170,7 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe,
 		return -ENOMEM;
 
 	skb_reserve(skb, NCI_DATA_HDR_SIZE + 2);
-	*skb_push(skb, 1) = data_type;
+	*(u8 *)skb_push(skb, 1) = data_type;
 
 	do {
 		len = conn_info->max_pkt_payload_len;
@@ -184,7 +184,7 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe,
 			len = conn_info->max_pkt_payload_len - skb->len - 1;
 		}
 
-		*skb_push(skb, 1) = cb;
+		*(u8 *)skb_push(skb, 1) = cb;
 
 		if (len > 0)
 			skb_put_data(skb, data + i, len);
diff --git a/net/nfc/nci/spi.c b/net/nfc/nci/spi.c
index a502a334918a..3b4512731a2f 100644
--- a/net/nfc/nci/spi.c
+++ b/net/nfc/nci/spi.c
@@ -238,8 +238,8 @@ static struct sk_buff *__nci_spi_read(struct nci_spi *nspi)
 		goto receive_error;
 
 	if (nspi->acknowledge_mode == NCI_SPI_CRC_ENABLED) {
-		*skb_push(skb, 1) = resp_hdr[1];
-		*skb_push(skb, 1) = resp_hdr[0];
+		*(u8 *)skb_push(skb, 1) = resp_hdr[1];
+		*(u8 *)skb_push(skb, 1) = resp_hdr[0];
 	}
 
 	return skb;
diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c
index e386e6c90b17..e2188deb08dc 100644
--- a/net/nfc/rawsock.c
+++ b/net/nfc/rawsock.c
@@ -142,7 +142,7 @@ error:
 
 static int rawsock_add_header(struct sk_buff *skb)
 {
-	*skb_push(skb, NFC_HEADER_SIZE) = 0;
+	*(u8 *)skb_push(skb, NFC_HEADER_SIZE) = 0;
 
 	return 0;
 }
diff --git a/net/sctp/output.c b/net/sctp/output.c
index febcc350cf00..89cee1482d35 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -585,7 +585,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
 	sctp_packet_set_owner_w(head, sk);
 
 	/* set sctp header */
-	sh = (struct sctphdr *)skb_push(head, sizeof(struct sctphdr));
+	sh = skb_push(head, sizeof(struct sctphdr));
 	skb_reset_transport_header(head);
 	sh->source = htons(packet->source_port);
 	sh->dest = htons(packet->destination_port);
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index df73190da761..8feff96a5bef 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -770,8 +770,8 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net,
 		auth.skb = chunk->auth_chunk;
 		auth.asoc = chunk->asoc;
 		auth.sctp_hdr = chunk->sctp_hdr;
-		auth.chunk_hdr = (sctp_chunkhdr_t *)skb_push(chunk->auth_chunk,
-					    sizeof(sctp_chunkhdr_t));
+		auth.chunk_hdr = skb_push(chunk->auth_chunk,
+					  sizeof(sctp_chunkhdr_t));
 		skb_pull(chunk->auth_chunk, sizeof(sctp_chunkhdr_t));
 		auth.transport = chunk->transport;
 
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index e361f0b57fb6..17854fb0e512 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -153,8 +153,7 @@ struct sctp_ulpevent  *sctp_ulpevent_make_assoc_change(
 		sctp_ulpevent_init(event, MSG_NOTIFICATION, skb->truesize);
 
 		/* Include the notification structure */
-		sac = (struct sctp_assoc_change *)
-			skb_push(skb, sizeof(struct sctp_assoc_change));
+		sac = skb_push(skb, sizeof(struct sctp_assoc_change));
 
 		/* Trim the buffer to the right length.  */
 		skb_trim(skb, sizeof(struct sctp_assoc_change) +
@@ -400,7 +399,7 @@ sctp_ulpevent_make_remote_error(const struct sctp_association *asoc,
 	event = sctp_skb2event(skb);
 	sctp_ulpevent_init(event, MSG_NOTIFICATION, skb->truesize);
 
-	sre = (struct sctp_remote_error *) skb_push(skb, sizeof(*sre));
+	sre = skb_push(skb, sizeof(*sre));
 
 	/* Trim the buffer to the right length.  */
 	skb_trim(skb, sizeof(*sre) + elen);
@@ -451,8 +450,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_send_failed(
 	event = sctp_skb2event(skb);
 	sctp_ulpevent_init(event, MSG_NOTIFICATION, skb->truesize);
 
-	ssf = (struct sctp_send_failed *)
-		skb_push(skb, sizeof(struct sctp_send_failed));
+	ssf = skb_push(skb, sizeof(struct sctp_send_failed));
 
 	/* Socket Extensions for SCTP
 	 * 5.3.1.4 SCTP_SEND_FAILED
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 96613fe2c6b1..bcb1284c3415 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -522,7 +522,7 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
 	pskb_pull(skb, hdrlen);
 
 	if (!ehdr)
-		ehdr = (struct ethhdr *) skb_push(skb, sizeof(struct ethhdr));
+		ehdr = skb_push(skb, sizeof(struct ethhdr));
 	memcpy(ehdr, &tmp, sizeof(tmp));
 
 	return 0;
-- 
cgit v1.2.3


From 634fef61076d644b989b86abc2f560d81a089a31 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 16 Jun 2017 14:29:24 +0200
Subject: networking: add and use skb_put_u8()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Joe and Bjørn suggested that it'd be nicer to not have the
cast in the fairly common case of doing
	*(u8 *)skb_put(skb, 1) = c;

Add skb_put_u8() for this case, and use it across the code,
using the following spatch:

    @@
    expression SKB, C, S;
    typedef u8;
    identifier fn = {skb_put};
    fresh identifier fn2 = fn ## "_u8";
    @@
    - *(u8 *)fn(SKB, S) = C;
    + fn2(SKB, C);

Note that due to the "S", the spatch isn't perfect, it should
have checked that S is 1, but there's also places that use a
sizeof expression like sizeof(var) or sizeof(u8) etc. Turns
out that nobody ever did something like
	*(u8 *)skb_put(skb, 2) = c;

which would be wrong anyway since the second byte wouldn't be
initialized.

Suggested-by: Joe Perches <joe@perches.com>
Suggested-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/bluetooth/bluecard_cs.c   |  2 +-
 drivers/bluetooth/bt3c_cs.c       |  2 +-
 drivers/bluetooth/btuart_cs.c     |  2 +-
 drivers/bluetooth/btusb.c         |  6 +++---
 drivers/bluetooth/dtl1_cs.c       |  4 ++--
 drivers/bluetooth/hci_bcm.c       |  6 +++---
 drivers/bluetooth/hci_intel.c     |  2 +-
 drivers/bluetooth/hci_nokia.c     |  2 +-
 drivers/bluetooth/hci_qca.c       |  2 +-
 drivers/bluetooth/hci_vhci.c      |  4 ++--
 drivers/isdn/capi/capi.c          |  4 ++--
 drivers/isdn/gigaset/asyncdata.c  | 22 +++++++++++-----------
 drivers/isdn/i4l/isdn_bsdcomp.c   |  7 ++++---
 drivers/isdn/i4l/isdn_x25iface.c  |  4 ++--
 drivers/net/hamradio/scc.c        |  4 ++--
 drivers/net/usb/cdc_ncm.c         |  2 +-
 drivers/net/usb/net1080.c         |  2 +-
 drivers/net/usb/zaurus.c          |  8 ++++----
 drivers/nfc/fdp/i2c.c             |  2 +-
 drivers/nfc/microread/i2c.c       |  4 ++--
 drivers/nfc/microread/microread.c |  4 ++--
 drivers/nfc/nfcmrvl/fw_dnld.c     |  4 ++--
 drivers/nfc/pn533/pn533.c         | 32 ++++++++++++++++----------------
 drivers/nfc/pn544/i2c.c           |  6 +++---
 drivers/nfc/port100.c             |  4 ++--
 drivers/nfc/st21nfca/i2c.c        |  6 +++---
 drivers/nfc/st95hf/core.c         |  2 +-
 include/linux/skbuff.h            |  5 +++++
 net/bluetooth/hci_sock.c          |  2 +-
 net/bluetooth/hidp/core.c         |  2 +-
 net/decnet/dn_nsp_out.c           | 12 ++++++------
 net/nfc/digital_core.c            |  4 ++--
 net/nfc/digital_dep.c             |  2 +-
 net/nfc/digital_technology.c      | 12 ++++++------
 net/nfc/hci/core.c                |  2 +-
 net/nfc/hci/llc_shdlc.c           |  4 ++--
 net/nfc/nci/hci.c                 |  2 +-
 net/nfc/nci/spi.c                 |  8 ++++----
 net/nfc/nci/uart.c                |  2 +-
 39 files changed, 106 insertions(+), 100 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/bluetooth/bluecard_cs.c b/drivers/bluetooth/bluecard_cs.c
index 39a05b0c8998..d4b0b655dde6 100644
--- a/drivers/bluetooth/bluecard_cs.c
+++ b/drivers/bluetooth/bluecard_cs.c
@@ -448,7 +448,7 @@ static void bluecard_receive(struct bluecard_info *info,
 
 		} else {
 
-			*(u8 *)skb_put(info->rx_skb, 1) = buf[i];
+			skb_put_u8(info->rx_skb, buf[i]);
 			info->rx_count--;
 
 			if (info->rx_count == 0) {
diff --git a/drivers/bluetooth/bt3c_cs.c b/drivers/bluetooth/bt3c_cs.c
index be2d431aa366..32dcac017395 100644
--- a/drivers/bluetooth/bt3c_cs.c
+++ b/drivers/bluetooth/bt3c_cs.c
@@ -282,7 +282,7 @@ static void bt3c_receive(struct bt3c_info *info)
 
 			__u8 x = inb(iobase + DATA_L);
 
-			*(u8 *)skb_put(info->rx_skb, 1) = x;
+			skb_put_u8(info->rx_skb, x);
 			inb(iobase + DATA_H);
 			info->rx_count--;
 
diff --git a/drivers/bluetooth/btuart_cs.c b/drivers/bluetooth/btuart_cs.c
index 80b64e9684a3..7df79bb12350 100644
--- a/drivers/bluetooth/btuart_cs.c
+++ b/drivers/bluetooth/btuart_cs.c
@@ -233,7 +233,7 @@ static void btuart_receive(struct btuart_info *info)
 
 		} else {
 
-			*(u8 *)skb_put(info->rx_skb, 1) = inb(iobase + UART_RX);
+			skb_put_u8(info->rx_skb, inb(iobase + UART_RX));
 			info->rx_count--;
 
 			if (info->rx_count == 0) {
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index ba207c787605..fa24d693af24 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -1844,7 +1844,7 @@ static int inject_cmd_complete(struct hci_dev *hdev, __u16 opcode)
 	evt->ncmd = 0x01;
 	evt->opcode = cpu_to_le16(opcode);
 
-	*(u8 *)skb_put(skb, 1) = 0x00;
+	skb_put_u8(skb, 0x00);
 
 	hci_skb_pkt_type(skb) = HCI_EVENT_PKT;
 
@@ -2767,8 +2767,8 @@ static struct urb *alloc_diag_urb(struct hci_dev *hdev, bool enable)
 		return ERR_PTR(-ENOMEM);
 	}
 
-	*(u8 *)skb_put(skb, 1) = 0xf0;
-	*(u8 *)skb_put(skb, 1) = enable;
+	skb_put_u8(skb, 0xf0);
+	skb_put_u8(skb, enable);
 
 	pipe = usb_sndbulkpipe(data->udev, data->diag_tx_ep->bEndpointAddress);
 
diff --git a/drivers/bluetooth/dtl1_cs.c b/drivers/bluetooth/dtl1_cs.c
index 6c5a3aa566a4..2adfe4fade76 100644
--- a/drivers/bluetooth/dtl1_cs.c
+++ b/drivers/bluetooth/dtl1_cs.c
@@ -226,7 +226,7 @@ static void dtl1_receive(struct dtl1_info *info)
 			}
 		}
 
-		*(u8 *)skb_put(info->rx_skb, 1) = inb(iobase + UART_RX);
+		skb_put_u8(info->rx_skb, inb(iobase + UART_RX));
 		nsh = (struct nsh *)info->rx_skb->data;
 
 		info->rx_count--;
@@ -414,7 +414,7 @@ static int dtl1_hci_send_frame(struct hci_dev *hdev, struct sk_buff *skb)
 	skb_reserve(s, NSHL);
 	skb_copy_from_linear_data(skb, skb_put(s, skb->len), skb->len);
 	if (skb->len & 0x0001)
-		*(u8 *)skb_put(s, 1) = 0;	/* PAD */
+		skb_put_u8(s, 0);	/* PAD */
 
 	/* Prepend skb with Nokia frame header and queue */
 	memcpy(skb_push(s, NSHL), &nsh, NSHL);
diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c
index c1c4048ee37d..d2e9e2d1b014 100644
--- a/drivers/bluetooth/hci_bcm.c
+++ b/drivers/bluetooth/hci_bcm.c
@@ -262,9 +262,9 @@ static int bcm_set_diag(struct hci_dev *hdev, bool enable)
 	if (!skb)
 		return -ENOMEM;
 
-	*(u8 *)skb_put(skb, 1) = BCM_LM_DIAG_PKT;
-	*(u8 *)skb_put(skb, 1) = 0xf0;
-	*(u8 *)skb_put(skb, 1) = enable;
+	skb_put_u8(skb, BCM_LM_DIAG_PKT);
+	skb_put_u8(skb, 0xf0);
+	skb_put_u8(skb, enable);
 
 	skb_queue_tail(&bcm->txq, skb);
 	hci_uart_tx_wakeup(hu);
diff --git a/drivers/bluetooth/hci_intel.c b/drivers/bluetooth/hci_intel.c
index ee97c465e32e..aad07e40ea4f 100644
--- a/drivers/bluetooth/hci_intel.c
+++ b/drivers/bluetooth/hci_intel.c
@@ -470,7 +470,7 @@ static int inject_cmd_complete(struct hci_dev *hdev, __u16 opcode)
 	evt->ncmd = 0x01;
 	evt->opcode = cpu_to_le16(opcode);
 
-	*(u8 *)skb_put(skb, 1) = 0x00;
+	skb_put_u8(skb, 0x00);
 
 	hci_skb_pkt_type(skb) = HCI_EVENT_PKT;
 
diff --git a/drivers/bluetooth/hci_nokia.c b/drivers/bluetooth/hci_nokia.c
index 072a77a61e67..181a15b549e5 100644
--- a/drivers/bluetooth/hci_nokia.c
+++ b/drivers/bluetooth/hci_nokia.c
@@ -532,7 +532,7 @@ static int nokia_enqueue(struct hci_uart *hu, struct sk_buff *skb)
 		err = skb_pad(skb, 1);
 		if (err)
 			return err;
-		*(u8 *)skb_put(skb, 1) = 0x00;
+		skb_put_u8(skb, 0x00);
 	}
 
 	skb_queue_tail(&btdev->txq, skb);
diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
index e2c88515340a..392f412b4575 100644
--- a/drivers/bluetooth/hci_qca.c
+++ b/drivers/bluetooth/hci_qca.c
@@ -215,7 +215,7 @@ static int send_hci_ibs_cmd(u8 cmd, struct hci_uart *hu)
 	}
 
 	/* Assign HCI_IBS type */
-	*(u8 *)skb_put(skb, 1) = cmd;
+	skb_put_u8(skb, cmd);
 
 	skb_queue_tail(&qca->txq, skb);
 
diff --git a/drivers/bluetooth/hci_vhci.c b/drivers/bluetooth/hci_vhci.c
index 1ef9c427a2d8..e6f6dbc04131 100644
--- a/drivers/bluetooth/hci_vhci.c
+++ b/drivers/bluetooth/hci_vhci.c
@@ -146,8 +146,8 @@ static int __vhci_create_device(struct vhci_data *data, __u8 opcode)
 
 	hci_skb_pkt_type(skb) = HCI_VENDOR_PKT;
 
-	*(u8 *)skb_put(skb, 1) = 0xff;
-	*(u8 *)skb_put(skb, 1) = opcode;
+	skb_put_u8(skb, 0xff);
+	skb_put_u8(skb, opcode);
 	put_unaligned_le16(hdev->id, skb_put(skb, 2));
 	skb_queue_tail(&data->readq, skb);
 
diff --git a/drivers/isdn/capi/capi.c b/drivers/isdn/capi/capi.c
index 96f586d34d2d..dde8f46bc254 100644
--- a/drivers/isdn/capi/capi.c
+++ b/drivers/isdn/capi/capi.c
@@ -1082,7 +1082,7 @@ static int capinc_tty_put_char(struct tty_struct *tty, unsigned char ch)
 	skb = mp->outskb;
 	if (skb) {
 		if (skb_tailroom(skb) > 0) {
-			*(u8 *)skb_put(skb, 1) = ch;
+			skb_put_u8(skb, ch);
 			goto unlock_out;
 		}
 		mp->outskb = NULL;
@@ -1094,7 +1094,7 @@ static int capinc_tty_put_char(struct tty_struct *tty, unsigned char ch)
 	skb = alloc_skb(CAPI_DATA_B3_REQ_LEN + CAPI_MAX_BLKSIZE, GFP_ATOMIC);
 	if (skb) {
 		skb_reserve(skb, CAPI_DATA_B3_REQ_LEN);
-		*(u8 *)skb_put(skb, 1) = ch;
+		skb_put_u8(skb, ch);
 		mp->outskb = skb;
 	} else {
 		printk(KERN_ERR "capinc_put_char: char %u lost\n", ch);
diff --git a/drivers/isdn/gigaset/asyncdata.c b/drivers/isdn/gigaset/asyncdata.c
index 03ac9fbfe318..4caecdcc6f29 100644
--- a/drivers/isdn/gigaset/asyncdata.c
+++ b/drivers/isdn/gigaset/asyncdata.c
@@ -492,33 +492,33 @@ static struct sk_buff *HDLC_Encode(struct sk_buff *skb)
 	hdlc_skb->mac_len = skb->mac_len;
 
 	/* Add flag sequence in front of everything.. */
-	*(u8 *)skb_put(hdlc_skb, 1) = PPP_FLAG;
+	skb_put_u8(hdlc_skb, PPP_FLAG);
 
 	/* Perform byte stuffing while copying data. */
 	while (skb->len--) {
 		if (muststuff(*skb->data)) {
-			*(u8 *)skb_put(hdlc_skb, 1) = PPP_ESCAPE;
-			*(u8 *)skb_put(hdlc_skb, 1) = (*skb->data++) ^ PPP_TRANS;
+			skb_put_u8(hdlc_skb, PPP_ESCAPE);
+			skb_put_u8(hdlc_skb, (*skb->data++) ^ PPP_TRANS);
 		} else
-			*(u8 *)skb_put(hdlc_skb, 1) = *skb->data++;
+			skb_put_u8(hdlc_skb, *skb->data++);
 	}
 
 	/* Finally add FCS (byte stuffed) and flag sequence */
 	c = (fcs & 0x00ff);	/* least significant byte first */
 	if (muststuff(c)) {
-		*(u8 *)skb_put(hdlc_skb, 1) = PPP_ESCAPE;
+		skb_put_u8(hdlc_skb, PPP_ESCAPE);
 		c ^= PPP_TRANS;
 	}
-	*(u8 *)skb_put(hdlc_skb, 1) = c;
+	skb_put_u8(hdlc_skb, c);
 
 	c = ((fcs >> 8) & 0x00ff);
 	if (muststuff(c)) {
-		*(u8 *)skb_put(hdlc_skb, 1) = PPP_ESCAPE;
+		skb_put_u8(hdlc_skb, PPP_ESCAPE);
 		c ^= PPP_TRANS;
 	}
-	*(u8 *)skb_put(hdlc_skb, 1) = c;
+	skb_put_u8(hdlc_skb, c);
 
-	*(u8 *)skb_put(hdlc_skb, 1) = PPP_FLAG;
+	skb_put_u8(hdlc_skb, PPP_FLAG);
 
 	dev_kfree_skb_any(skb);
 	return hdlc_skb;
@@ -561,8 +561,8 @@ static struct sk_buff *iraw_encode(struct sk_buff *skb)
 	while (len--) {
 		c = bitrev8(*cp++);
 		if (c == DLE_FLAG)
-			*(u8 *)skb_put(iraw_skb, 1) = c;
-		*(u8 *)skb_put(iraw_skb, 1) = c;
+			skb_put_u8(iraw_skb, c);
+		skb_put_u8(iraw_skb, c);
 	}
 	dev_kfree_skb_any(skb);
 	return iraw_skb;
diff --git a/drivers/isdn/i4l/isdn_bsdcomp.c b/drivers/isdn/i4l/isdn_bsdcomp.c
index 6ade0916da4e..3035210a6119 100644
--- a/drivers/isdn/i4l/isdn_bsdcomp.c
+++ b/drivers/isdn/i4l/isdn_bsdcomp.c
@@ -602,7 +602,8 @@ static int bsd_compress(void *state, struct sk_buff *skb_in, struct sk_buff *skb
 	 * Do not emit a completely useless byte of ones.
 	 */
 	if (bitno < 32 && skb_out && skb_tailroom(skb_out) > 0)
-		*(u8 *)skb_put(skb_out, 1) = (unsigned char)((accm | (0xff << (bitno - 8))) >> 24);
+		skb_put_u8(skb_out,
+			   (unsigned char)((accm | (0xff << (bitno - 8))) >> 24));
 
 	/*
 	 * Increase code size if we would have without the packet
@@ -698,7 +699,7 @@ static int bsd_decompress(void *state, struct sk_buff *skb_in, struct sk_buff *s
 	db->bytes_out += ilen;
 
 	if (skb_tailroom(skb_out) > 0)
-		*(u8 *)skb_put(skb_out, 1) = 0;
+		skb_put_u8(skb_out, 0);
 	else
 		return DECOMP_ERR_NOMEM;
 
@@ -816,7 +817,7 @@ static int bsd_decompress(void *state, struct sk_buff *skb_in, struct sk_buff *s
 #endif
 
 		if (extra)		/* the KwKwK case again */
-			*(u8 *)skb_put(skb_out, 1) = finchar;
+			skb_put_u8(skb_out, finchar);
 
 		/*
 		 * If not first code in a packet, and
diff --git a/drivers/isdn/i4l/isdn_x25iface.c b/drivers/isdn/i4l/isdn_x25iface.c
index e33fa3073f74..48bfbcb4a09d 100644
--- a/drivers/isdn/i4l/isdn_x25iface.c
+++ b/drivers/isdn/i4l/isdn_x25iface.c
@@ -224,7 +224,7 @@ static int isdn_x25iface_connect_ind(struct concap_proto *cprot)
 
 	skb = dev_alloc_skb(1);
 	if (skb) {
-		*(u8 *)skb_put(skb, 1) = X25_IFACE_CONNECT;
+		skb_put_u8(skb, X25_IFACE_CONNECT);
 		skb->protocol = x25_type_trans(skb, cprot->net_dev);
 		netif_rx(skb);
 		return 0;
@@ -253,7 +253,7 @@ static int isdn_x25iface_disconn_ind(struct concap_proto *cprot)
 	*state_p = WAN_DISCONNECTED;
 	skb = dev_alloc_skb(1);
 	if (skb) {
-		*(u8 *)skb_put(skb, 1) = X25_IFACE_DISCONNECT;
+		skb_put_u8(skb, X25_IFACE_DISCONNECT);
 		skb->protocol = x25_type_trans(skb, cprot->net_dev);
 		netif_rx(skb);
 		return 0;
diff --git a/drivers/net/hamradio/scc.c b/drivers/net/hamradio/scc.c
index 140a209f22ab..295f267b73ea 100644
--- a/drivers/net/hamradio/scc.c
+++ b/drivers/net/hamradio/scc.c
@@ -540,7 +540,7 @@ static inline void scc_rxint(struct scc_channel *scc)
 		}
 		
 		scc->rx_buff = skb;
-		*(u8 *)skb_put(skb, 1) = 0;	/* KISS data */
+		skb_put_u8(skb, 0);	/* KISS data */
 	}
 	
 	if (skb->len >= scc->stat.bufsize)
@@ -555,7 +555,7 @@ static inline void scc_rxint(struct scc_channel *scc)
 		return;
 	}
 
-	*(u8 *)skb_put(skb, 1) = Inb(scc->data);
+	skb_put_u8(skb, Inb(scc->data));
 }
 
 
diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index 4d4837a0645b..bcb974707118 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -1250,7 +1250,7 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
 		skb_put_zero(skb_out, padding_count);
 	} else if (skb_out->len < ctx->tx_max &&
 		   (skb_out->len % dev->maxpacket) == 0) {
-		*(u8 *)skb_put(skb_out, 1) = 0;	/* force short packet */
+		skb_put_u8(skb_out, 0);	/* force short packet */
 	}
 
 	/* set final frame length */
diff --git a/drivers/net/usb/net1080.c b/drivers/net/usb/net1080.c
index be53ff30b7b5..18a13aa5fcbb 100644
--- a/drivers/net/usb/net1080.c
+++ b/drivers/net/usb/net1080.c
@@ -473,7 +473,7 @@ encapsulate:
 
 	/* maybe pad; then trailer */
 	if (!((skb->len + sizeof *trailer) & 0x01))
-		*(u8 *)skb_put(skb, 1) = PAD_BYTE;
+		skb_put_u8(skb, PAD_BYTE);
 	trailer = skb_put(skb, sizeof *trailer);
 	put_unaligned(header->packet_id, &trailer->packet_id);
 #if 0
diff --git a/drivers/net/usb/zaurus.c b/drivers/net/usb/zaurus.c
index dc3cd03763af..9c2196c3fd11 100644
--- a/drivers/net/usb/zaurus.c
+++ b/drivers/net/usb/zaurus.c
@@ -74,10 +74,10 @@ done:
 		fcs = crc32_le(~0, skb->data, skb->len);
 		fcs = ~fcs;
 
-		*(u8 *)skb_put(skb, 1) = fcs       & 0xff;
-		*(u8 *)skb_put(skb, 1) = (fcs>> 8) & 0xff;
-		*(u8 *)skb_put(skb, 1) = (fcs>>16) & 0xff;
-		*(u8 *)skb_put(skb, 1) = (fcs>>24) & 0xff;
+		skb_put_u8(skb, fcs & 0xff);
+		skb_put_u8(skb, (fcs >> 8) & 0xff);
+		skb_put_u8(skb, (fcs >> 16) & 0xff);
+		skb_put_u8(skb, (fcs >> 24) & 0xff);
 	}
 	return skb;
 }
diff --git a/drivers/nfc/fdp/i2c.c b/drivers/nfc/fdp/i2c.c
index d5781aa0f791..e0baec848ff2 100644
--- a/drivers/nfc/fdp/i2c.c
+++ b/drivers/nfc/fdp/i2c.c
@@ -86,7 +86,7 @@ static void fdp_nci_i2c_add_len_lrc(struct sk_buff *skb)
 	for (i = 0; i < len + 2; i++)
 		lrc ^= skb->data[i];
 
-	*(u8 *)skb_put(skb, 1) = lrc;
+	skb_put_u8(skb, lrc);
 }
 
 static void fdp_nci_i2c_remove_len_lrc(struct sk_buff *skb)
diff --git a/drivers/nfc/microread/i2c.c b/drivers/nfc/microread/i2c.c
index 386cc61d95b9..b668b7b9a61e 100644
--- a/drivers/nfc/microread/i2c.c
+++ b/drivers/nfc/microread/i2c.c
@@ -75,7 +75,7 @@ static void microread_i2c_add_len_crc(struct sk_buff *skb)
 	for (i = 0; i < skb->len; i++)
 		crc = crc ^ skb->data[i];
 
-	*(u8 *)skb_put(skb, 1) = crc;
+	skb_put_u8(skb, crc);
 }
 
 static void microread_i2c_remove_len_crc(struct sk_buff *skb)
@@ -173,7 +173,7 @@ static int microread_i2c_read(struct microread_i2c_phy *phy,
 		goto flush;
 	}
 
-	*(u8 *)skb_put(*skb, 1) = len;
+	skb_put_u8(*skb, len);
 
 	r = i2c_master_recv(client, skb_put(*skb, len), len);
 	if (r != len) {
diff --git a/drivers/nfc/microread/microread.c b/drivers/nfc/microread/microread.c
index 38a979eacc29..e5d5d2d97409 100644
--- a/drivers/nfc/microread/microread.c
+++ b/drivers/nfc/microread/microread.c
@@ -441,8 +441,8 @@ static int microread_im_transceive(struct nfc_hci_dev *hdev,
 
 		crc = crc_ccitt(0xffff, skb->data, skb->len);
 		crc = ~crc;
-		*(u8 *)skb_put(skb, 1) = crc & 0xff;
-		*(u8 *)skb_put(skb, 1) = crc >> 8;
+		skb_put_u8(skb, crc & 0xff);
+		skb_put_u8(skb, crc >> 8);
 		break;
 	case MICROREAD_GATE_ID_MREAD_NFC_T3:
 		control_bits = 0xDB;
diff --git a/drivers/nfc/nfcmrvl/fw_dnld.c b/drivers/nfc/nfcmrvl/fw_dnld.c
index 788599de9d8a..f9f000c546d1 100644
--- a/drivers/nfc/nfcmrvl/fw_dnld.c
+++ b/drivers/nfc/nfcmrvl/fw_dnld.c
@@ -292,7 +292,7 @@ static int process_state_fw_dnld(struct nfcmrvl_private *priv,
 			out_skb = alloc_lc_skb(priv, 1);
 			if (!out_skb)
 				return -ENOMEM;
-			*(u8 *)skb_put(out_skb, 1) = 0xBF;
+			skb_put_u8(out_skb, 0xBF);
 			nci_send_frame(priv->ndev, out_skb);
 			priv->fw_dnld.substate = SUBSTATE_WAIT_NACK_CREDIT;
 			return 0;
@@ -301,7 +301,7 @@ static int process_state_fw_dnld(struct nfcmrvl_private *priv,
 		out_skb = alloc_lc_skb(priv, 1);
 		if (!out_skb)
 			return -ENOMEM;
-		*(u8 *)skb_put(out_skb, 1) = HELPER_ACK_PACKET_FORMAT;
+		skb_put_u8(out_skb, HELPER_ACK_PACKET_FORMAT);
 		nci_send_frame(priv->ndev, out_skb);
 		priv->fw_dnld.substate = SUBSTATE_WAIT_ACK_CREDIT;
 		break;
diff --git a/drivers/nfc/pn533/pn533.c b/drivers/nfc/pn533/pn533.c
index 6a711b5b9490..c8a8f5badb5b 100644
--- a/drivers/nfc/pn533/pn533.c
+++ b/drivers/nfc/pn533/pn533.c
@@ -1032,7 +1032,7 @@ static struct sk_buff *pn533_alloc_poll_tg_frame(struct pn533 *dev)
 		return NULL;
 
 	/* DEP support only */
-	*(u8 *)skb_put(skb, 1) = PN533_INIT_TARGET_DEP;
+	skb_put_u8(skb, PN533_INIT_TARGET_DEP);
 
 	/* MIFARE params */
 	skb_put_data(skb, mifare_params, 6);
@@ -1046,12 +1046,12 @@ static struct sk_buff *pn533_alloc_poll_tg_frame(struct pn533 *dev)
 	memcpy(nfcid3, felica, 8);
 
 	/* General bytes */
-	*(u8 *)skb_put(skb, 1) = gbytes_len;
+	skb_put_u8(skb, gbytes_len);
 
 	gb = skb_put_data(skb, gbytes, gbytes_len);
 
 	/* Len Tk */
-	*(u8 *)skb_put(skb, 1) = 0;
+	skb_put_u8(skb, 0);
 
 	return skb;
 }
@@ -1280,8 +1280,8 @@ static void pn533_wq_rf(struct work_struct *work)
 	if (!skb)
 		return;
 
-	*(u8 *)skb_put(skb, 1) = PN533_CFGITEM_RF_FIELD;
-	*(u8 *)skb_put(skb, 1) = PN533_CFGITEM_RF_FIELD_AUTO_RFCA;
+	skb_put_u8(skb, PN533_CFGITEM_RF_FIELD);
+	skb_put_u8(skb, PN533_CFGITEM_RF_FIELD_AUTO_RFCA);
 
 	rc = pn533_send_cmd_async(dev, PN533_CMD_RF_CONFIGURATION, skb,
 				  pn533_rf_complete, NULL);
@@ -1375,8 +1375,8 @@ static int pn533_poll_dep(struct nfc_dev *nfc_dev)
 	if (!skb)
 		return -ENOMEM;
 
-	*(u8 *)skb_put(skb, 1) = 0x01;  /* Active */
-	*(u8 *)skb_put(skb, 1) = 0x02;  /* 424 kbps */
+	skb_put_u8(skb, 0x01);  /* Active */
+	skb_put_u8(skb, 0x02);  /* 424 kbps */
 
 	next = skb_put(skb, 1);  /* Next */
 	*next = 0;
@@ -1620,8 +1620,8 @@ static int pn533_activate_target_nfcdep(struct pn533 *dev)
 	if (!skb)
 		return -ENOMEM;
 
-	*(u8 *)skb_put(skb, sizeof(u8)) = 1; /* TG */
-	*(u8 *)skb_put(skb, sizeof(u8)) = 0; /* Next */
+	skb_put_u8(skb, 1); /* TG */
+	skb_put_u8(skb, 0); /* Next */
 
 	resp = pn533_send_cmd_sync(dev, PN533_CMD_IN_ATR, skb);
 	if (IS_ERR(resp))
@@ -1737,7 +1737,7 @@ static void pn533_deactivate_target(struct nfc_dev *nfc_dev,
 	if (!skb)
 		return;
 
-	*(u8 *)skb_put(skb, 1) = 1; /* TG*/
+	skb_put_u8(skb, 1); /* TG*/
 
 	rc = pn533_send_cmd_async(dev, PN533_CMD_IN_RELEASE, skb,
 				  pn533_deactivate_target_complete, NULL);
@@ -1848,8 +1848,8 @@ static int pn533_dep_link_up(struct nfc_dev *nfc_dev, struct nfc_target *target,
 	if (!skb)
 		return -ENOMEM;
 
-	*(u8 *)skb_put(skb, 1) = !comm_mode;  /* ActPass */
-	*(u8 *)skb_put(skb, 1) = 0x02;  /* 424 kbps */
+	skb_put_u8(skb, !comm_mode);  /* ActPass */
+	skb_put_u8(skb, 0x02);  /* 424 kbps */
 
 	next = skb_put(skb, 1);  /* Next */
 	*next = 0;
@@ -2274,7 +2274,7 @@ static void pn533_wq_mi_recv(struct work_struct *work)
 			break;
 		}
 	default:
-		*(u8 *)skb_put(skb, sizeof(u8)) =  1; /*TG*/
+		skb_put_u8(skb, 1); /*TG*/
 
 		rc = pn533_send_cmd_direct_async(dev,
 						 PN533_CMD_IN_DATA_EXCHANGE,
@@ -2370,7 +2370,7 @@ static int pn533_set_configuration(struct pn533 *dev, u8 cfgitem, u8 *cfgdata,
 	if (!skb)
 		return -ENOMEM;
 
-	*(u8 *)skb_put(skb, sizeof(cfgitem)) = cfgitem;
+	skb_put_u8(skb, cfgitem);
 	skb_put_data(skb, cfgdata, cfgdata_len);
 
 	resp = pn533_send_cmd_sync(dev, PN533_CMD_RF_CONFIGURATION, skb);
@@ -2415,7 +2415,7 @@ static int pn533_pasori_fw_reset(struct pn533 *dev)
 	if (!skb)
 		return -ENOMEM;
 
-	*(u8 *)skb_put(skb, sizeof(u8)) = 0x1;
+	skb_put_u8(skb, 0x1);
 
 	resp = pn533_send_cmd_sync(dev, 0x18, skb);
 	if (IS_ERR(resp))
@@ -2454,7 +2454,7 @@ static int pn532_sam_configuration(struct nfc_dev *nfc_dev)
 	if (!skb)
 		return -ENOMEM;
 
-	*(u8 *)skb_put(skb, 1) = 0x01;
+	skb_put_u8(skb, 0x01);
 
 	resp = pn533_send_cmd_sync(dev, PN533_CMD_SAM_CONFIGURATION, skb);
 	if (IS_ERR(resp))
diff --git a/drivers/nfc/pn544/i2c.c b/drivers/nfc/pn544/i2c.c
index b7be6c25b7e6..fedde9d46ab6 100644
--- a/drivers/nfc/pn544/i2c.c
+++ b/drivers/nfc/pn544/i2c.c
@@ -287,8 +287,8 @@ static void pn544_hci_i2c_add_len_crc(struct sk_buff *skb)
 
 	crc = crc_ccitt(0xffff, skb->data, skb->len);
 	crc = ~crc;
-	*(u8 *)skb_put(skb, 1) = crc & 0xff;
-	*(u8 *)skb_put(skb, 1) = crc >> 8;
+	skb_put_u8(skb, crc & 0xff);
+	skb_put_u8(skb, crc >> 8);
 }
 
 static void pn544_hci_i2c_remove_len_crc(struct sk_buff *skb)
@@ -391,7 +391,7 @@ static int pn544_hci_i2c_read(struct pn544_i2c_phy *phy, struct sk_buff **skb)
 		goto flush;
 	}
 
-	*(u8 *)skb_put(*skb, 1) = len;
+	skb_put_u8(*skb, len);
 
 	r = i2c_master_recv(client, skb_put(*skb, len), len);
 	if (r != len) {
diff --git a/drivers/nfc/port100.c b/drivers/nfc/port100.c
index 5fa3cf0fabd6..bb43cebda9dc 100644
--- a/drivers/nfc/port100.c
+++ b/drivers/nfc/port100.c
@@ -991,7 +991,7 @@ static int port100_set_command_type(struct port100 *dev, u8 command_type)
 	if (!skb)
 		return -ENOMEM;
 
-	*(u8 *)skb_put(skb, sizeof(u8)) = command_type;
+	skb_put_u8(skb, command_type);
 
 	resp = port100_send_cmd_sync(dev, PORT100_CMD_SET_COMMAND_TYPE, skb);
 	if (IS_ERR(resp))
@@ -1059,7 +1059,7 @@ static int port100_switch_rf(struct nfc_digital_dev *ddev, bool on)
 	if (!skb)
 		return -ENOMEM;
 
-	*(u8 *)skb_put(skb, 1) = on ? 1 : 0;
+	skb_put_u8(skb, on ? 1 : 0);
 
 	/* Cancel the last command if the device is being switched off */
 	if (!on)
diff --git a/drivers/nfc/st21nfca/i2c.c b/drivers/nfc/st21nfca/i2c.c
index 396cdafb3e36..4bff76baa341 100644
--- a/drivers/nfc/st21nfca/i2c.c
+++ b/drivers/nfc/st21nfca/i2c.c
@@ -177,10 +177,10 @@ static void st21nfca_hci_add_len_crc(struct sk_buff *skb)
 	crc = ~crc;
 
 	tmp = crc & 0x00ff;
-	*(u8 *)skb_put(skb, 1) = tmp;
+	skb_put_u8(skb, tmp);
 
 	tmp = (crc >> 8) & 0x00ff;
-	*(u8 *)skb_put(skb, 1) = tmp;
+	skb_put_u8(skb, tmp);
 }
 
 static void st21nfca_hci_remove_len_crc(struct sk_buff *skb)
@@ -214,7 +214,7 @@ static int st21nfca_hci_i2c_write(void *phy_id, struct sk_buff *skb)
 	st21nfca_hci_add_len_crc(skb);
 
 	/* add ST21NFCA_SOF_EOF on tail */
-	*(u8 *)skb_put(skb, 1) = ST21NFCA_SOF_EOF;
+	skb_put_u8(skb, ST21NFCA_SOF_EOF);
 	/* add ST21NFCA_SOF_EOF on head */
 	*(u8 *)skb_push(skb, 1) = ST21NFCA_SOF_EOF;
 
diff --git a/drivers/nfc/st95hf/core.c b/drivers/nfc/st95hf/core.c
index 168adcc46cb8..2b26f762fbc3 100644
--- a/drivers/nfc/st95hf/core.c
+++ b/drivers/nfc/st95hf/core.c
@@ -949,7 +949,7 @@ static int st95hf_in_send_cmd(struct nfc_digital_dev *ddev,
 	switch (stcontext->current_rf_tech) {
 	case NFC_DIGITAL_RF_TECH_106A:
 		len_data_to_tag = skb->len + 1;
-		*(u8 *)skb_put(skb, 1) = stcontext->sendrcv_trflag;
+		skb_put_u8(skb, stcontext->sendrcv_trflag);
 		break;
 	case NFC_DIGITAL_RF_TECH_106B:
 	case NFC_DIGITAL_RF_TECH_ISO15693:
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 46bd514e719c..852feacf4bbf 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1923,6 +1923,11 @@ static inline void *skb_put_data(struct sk_buff *skb, const void *data,
 	return tmp;
 }
 
+static inline void skb_put_u8(struct sk_buff *skb, u8 val)
+{
+	*(u8 *)skb_put(skb, 1) = val;
+}
+
 void *skb_push(struct sk_buff *skb, unsigned int len);
 static inline void *__skb_push(struct sk_buff *skb, unsigned int len)
 {
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index cdb5c1a7481e..65d734c165bd 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -517,7 +517,7 @@ static struct sk_buff *create_monitor_ctrl_open(struct sock *sk)
 	put_unaligned_le16(format, skb_put(skb, 2));
 	skb_put_data(skb, ver, sizeof(ver));
 	put_unaligned_le32(flags, skb_put(skb, 4));
-	*(u8 *)skb_put(skb, 1) = TASK_COMM_LEN;
+	skb_put_u8(skb, TASK_COMM_LEN);
 	skb_put_data(skb, hci_pi(sk)->comm, TASK_COMM_LEN);
 
 	__net_timestamp(skb);
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index c0d0832a023d..961f7f53e178 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -112,7 +112,7 @@ static int hidp_send_message(struct hidp_session *session, struct socket *sock,
 		return -ENOMEM;
 	}
 
-	*(u8 *)skb_put(skb, 1) = hdr;
+	skb_put_u8(skb, hdr);
 	if (data && size > 0)
 		skb_put_data(skb, data, size);
 
diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c
index 7e054b2f270a..66f035e476ea 100644
--- a/net/decnet/dn_nsp_out.c
+++ b/net/decnet/dn_nsp_out.c
@@ -530,7 +530,7 @@ void dn_send_conn_conf(struct sock *sk, gfp_t gfp)
 	msg->info = scp->info_loc;
 	msg->segsize = cpu_to_le16(scp->segsize_loc);
 
-	*(u8 *)skb_put(skb, 1) = len;
+	skb_put_u8(skb, len);
 
 	if (len > 0)
 		skb_put_data(skb, scp->conndata_out.opt_data, len);
@@ -686,25 +686,25 @@ void dn_nsp_send_conninit(struct sock *sk, unsigned char msgflg)
 	if (scp->peer.sdn_flags & SDF_UICPROXY)
 		menuver |= DN_MENUVER_UIC;
 
-	*(u8 *)skb_put(skb, 1) = menuver;	/* Menu Version		*/
+	skb_put_u8(skb, menuver);	/* Menu Version		*/
 
 	aux = scp->accessdata.acc_userl;
-	*(u8 *)skb_put(skb, 1) = aux;
+	skb_put_u8(skb, aux);
 	if (aux > 0)
 		skb_put_data(skb, scp->accessdata.acc_user, aux);
 
 	aux = scp->accessdata.acc_passl;
-	*(u8 *)skb_put(skb, 1) = aux;
+	skb_put_u8(skb, aux);
 	if (aux > 0)
 		skb_put_data(skb, scp->accessdata.acc_pass, aux);
 
 	aux = scp->accessdata.acc_accl;
-	*(u8 *)skb_put(skb, 1) = aux;
+	skb_put_u8(skb, aux);
 	if (aux > 0)
 		skb_put_data(skb, scp->accessdata.acc_acc, aux);
 
 	aux = (__u8)le16_to_cpu(scp->conndata_out.opt_optl);
-	*(u8 *)skb_put(skb, 1) = aux;
+	skb_put_u8(skb, aux);
 	if (aux > 0)
 		skb_put_data(skb, scp->conndata_out.opt_data, aux);
 
diff --git a/net/nfc/digital_core.c b/net/nfc/digital_core.c
index fec47a7d0092..ebeace7a8278 100644
--- a/net/nfc/digital_core.c
+++ b/net/nfc/digital_core.c
@@ -74,8 +74,8 @@ void digital_skb_add_crc(struct sk_buff *skb, crc_func_t crc_func, u16 init,
 	if (msb_first)
 		crc = __fswab16(crc);
 
-	*(u8 *)skb_put(skb, 1) = crc & 0xFF;
-	*(u8 *)skb_put(skb, 1) = (crc >> 8) & 0xFF;
+	skb_put_u8(skb, crc & 0xFF);
+	skb_put_u8(skb, (crc >> 8) & 0xFF);
 }
 
 int digital_skb_check_crc(struct sk_buff *skb, crc_func_t crc_func,
diff --git a/net/nfc/digital_dep.c b/net/nfc/digital_dep.c
index f948fc2099d2..74ccc2dd79d0 100644
--- a/net/nfc/digital_dep.c
+++ b/net/nfc/digital_dep.c
@@ -654,7 +654,7 @@ static int digital_in_send_rtox(struct nfc_digital_dev *ddev,
 	if (!skb)
 		return -ENOMEM;
 
-	*(u8 *)skb_put(skb, 1) = rtox;
+	skb_put_u8(skb, rtox);
 
 	skb_push(skb, sizeof(struct digital_dep_req_res));
 
diff --git a/net/nfc/digital_technology.c b/net/nfc/digital_technology.c
index 492204e440ec..3cc3448da524 100644
--- a/net/nfc/digital_technology.c
+++ b/net/nfc/digital_technology.c
@@ -266,8 +266,8 @@ static int digital_in_send_rats(struct nfc_digital_dev *ddev,
 	if (!skb)
 		return -ENOMEM;
 
-	*(u8 *)skb_put(skb, 1) = DIGITAL_RATS_BYTE1;
-	*(u8 *)skb_put(skb, 1) = DIGITAL_RATS_PARAM;
+	skb_put_u8(skb, DIGITAL_RATS_BYTE1);
+	skb_put_u8(skb, DIGITAL_RATS_PARAM);
 
 	rc = digital_in_send_cmd(ddev, skb, 30, digital_in_recv_ats,
 				 target);
@@ -470,8 +470,8 @@ static int digital_in_send_sdd_req(struct nfc_digital_dev *ddev,
 	else
 		sel_cmd = DIGITAL_CMD_SEL_REQ_CL3;
 
-	*(u8 *)skb_put(skb, sizeof(u8)) = sel_cmd;
-	*(u8 *)skb_put(skb, sizeof(u8)) = DIGITAL_SDD_REQ_SEL_PAR;
+	skb_put_u8(skb, sel_cmd);
+	skb_put_u8(skb, DIGITAL_SDD_REQ_SEL_PAR);
 
 	return digital_in_send_cmd(ddev, skb, 30, digital_in_recv_sdd_res,
 				   target);
@@ -541,7 +541,7 @@ int digital_in_send_sens_req(struct nfc_digital_dev *ddev, u8 rf_tech)
 	if (!skb)
 		return -ENOMEM;
 
-	*(u8 *)skb_put(skb, sizeof(u8)) = DIGITAL_CMD_SENS_REQ;
+	skb_put_u8(skb, DIGITAL_CMD_SENS_REQ);
 
 	rc = digital_in_send_cmd(ddev, skb, 30, digital_in_recv_sens_res, NULL);
 	if (rc)
@@ -937,7 +937,7 @@ static int digital_tg_send_sel_res(struct nfc_digital_dev *ddev)
 	if (!skb)
 		return -ENOMEM;
 
-	*(u8 *)skb_put(skb, 1) = DIGITAL_SEL_RES_NFC_DEP;
+	skb_put_u8(skb, DIGITAL_SEL_RES_NFC_DEP);
 
 	if (!DIGITAL_DRV_CAPS_TG_CRC(ddev))
 		digital_skb_add_crc_a(skb);
diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c
index 7b2bdda1514c..b740fef0acc5 100644
--- a/net/nfc/hci/core.c
+++ b/net/nfc/hci/core.c
@@ -874,7 +874,7 @@ static void nfc_hci_recv_from_llc(struct nfc_hci_dev *hdev, struct sk_buff *skb)
 			return;
 		}
 
-		*(u8 *)skb_put(hcp_skb, NFC_HCI_HCP_PACKET_HEADER_LEN) = pipe;
+		skb_put_u8(hcp_skb, pipe);
 
 		skb_queue_walk(&hdev->rx_hcp_frags, frag_skb) {
 			msg_len = frag_skb->len - NFC_HCI_HCP_PACKET_HEADER_LEN;
diff --git a/net/nfc/hci/llc_shdlc.c b/net/nfc/hci/llc_shdlc.c
index 5bd4529279f5..17e59a009ce6 100644
--- a/net/nfc/hci/llc_shdlc.c
+++ b/net/nfc/hci/llc_shdlc.c
@@ -382,8 +382,8 @@ static int llc_shdlc_connect_initiate(struct llc_shdlc *shdlc)
 	if (skb == NULL)
 		return -ENOMEM;
 
-	*(u8 *)skb_put(skb, 1) = SHDLC_MAX_WINDOW;
-	*(u8 *)skb_put(skb, 1) = SHDLC_SREJ_SUPPORT ? 1 : 0;
+	skb_put_u8(skb, SHDLC_MAX_WINDOW);
+	skb_put_u8(skb, SHDLC_SREJ_SUPPORT ? 1 : 0);
 
 	return llc_shdlc_send_u_frame(shdlc, skb, U_FRAME_RSET);
 }
diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c
index 3f93df58d9f1..ddfc52ac1f9b 100644
--- a/net/nfc/nci/hci.c
+++ b/net/nfc/nci/hci.c
@@ -472,7 +472,7 @@ void nci_hci_data_received_cb(void *context,
 			return;
 		}
 
-		*(u8 *)skb_put(hcp_skb, NCI_HCI_HCP_PACKET_HEADER_LEN) = pipe;
+		skb_put_u8(hcp_skb, pipe);
 
 		skb_queue_walk(&ndev->hci_dev->rx_hcp_frags, frag_skb) {
 			msg_len = frag_skb->len - NCI_HCI_HCP_PACKET_HEADER_LEN;
diff --git a/net/nfc/nci/spi.c b/net/nfc/nci/spi.c
index 3b4512731a2f..452f4c16b7a9 100644
--- a/net/nfc/nci/spi.c
+++ b/net/nfc/nci/spi.c
@@ -86,8 +86,8 @@ int nci_spi_send(struct nci_spi *nspi,
 		u16 crc;
 
 		crc = crc_ccitt(CRC_INIT, skb->data, skb->len);
-		*(u8 *)skb_put(skb, 1) = crc >> 8;
-		*(u8 *)skb_put(skb, 1) = crc & 0xFF;
+		skb_put_u8(skb, crc >> 8);
+		skb_put_u8(skb, crc & 0xFF);
 	}
 
 	if (write_handshake_completion)	{
@@ -172,8 +172,8 @@ static int send_acknowledge(struct nci_spi *nspi, u8 acknowledge)
 	hdr[3] = 0;
 
 	crc = crc_ccitt(CRC_INIT, skb->data, skb->len);
-	*(u8 *)skb_put(skb, 1) = crc >> 8;
-	*(u8 *)skb_put(skb, 1) = crc & 0xFF;
+	skb_put_u8(skb, crc >> 8);
+	skb_put_u8(skb, crc & 0xFF);
 
 	ret = __nci_spi_send(nspi, skb, 0);
 
diff --git a/net/nfc/nci/uart.c b/net/nfc/nci/uart.c
index 442f8eadfc76..8d104c1db628 100644
--- a/net/nfc/nci/uart.c
+++ b/net/nfc/nci/uart.c
@@ -355,7 +355,7 @@ static int nci_uart_default_recv_buf(struct nci_uart *nu, const u8 *data,
 
 		/* Eat byte after byte till full packet header is received */
 		if (nu->rx_skb->len < NCI_CTRL_HDR_SIZE) {
-			*(u8 *)skb_put(nu->rx_skb, 1) = *data++;
+			skb_put_u8(nu->rx_skb, *data++);
 			--count;
 			continue;
 		}
-- 
cgit v1.2.3


From 58038695e62b4473e4d70e1503933579c640cd52 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Thu, 15 Jun 2017 17:29:09 -0700
Subject: net: Add IFLA_XDP_PROG_ID

Expose prog_id through IFLA_XDP_PROG_ID.  This patch
makes modification to generic_xdp.  The later patches will
modify other xdp-supported drivers.

prog_id is added to struct net_dev_xdp.

iproute2 patch will be followed. Here is how the 'ip link'
will look like:
> ip link show eth0
3: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 xdp(prog_id:1) qdisc fq_codel state UP mode DEFAULT group default qlen 1000

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Alexei Starovoitov <ast@fb.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h    |  7 +++++--
 include/uapi/linux/if_link.h |  1 +
 net/core/dev.c               | 19 +++++++++++--------
 net/core/rtnetlink.c         | 27 +++++++++++++++++++++------
 4 files changed, 38 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ad98a83f1332..7c7118b3bd69 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -824,7 +824,10 @@ struct netdev_xdp {
 			struct netlink_ext_ack *extack;
 		};
 		/* XDP_QUERY_PROG */
-		bool prog_attached;
+		struct {
+			bool prog_attached;
+			u32 prog_id;
+		};
 	};
 };
 
@@ -3302,7 +3305,7 @@ struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 typedef int (*xdp_op_t)(struct net_device *dev, struct netdev_xdp *xdp);
 int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
 		      int fd, u32 flags);
-bool __dev_xdp_attached(struct net_device *dev, xdp_op_t xdp_op);
+bool __dev_xdp_attached(struct net_device *dev, xdp_op_t xdp_op, u32 *prog_id);
 
 int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
 int dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 8ed679fe603f..dd88375a6580 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -907,6 +907,7 @@ enum {
 	IFLA_XDP_FD,
 	IFLA_XDP_ATTACHED,
 	IFLA_XDP_FLAGS,
+	IFLA_XDP_PROG_ID,
 	__IFLA_XDP_MAX,
 };
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 8658074ecad6..b8d6dd9e8b5c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4342,13 +4342,12 @@ static struct static_key generic_xdp_needed __read_mostly;
 
 static int generic_xdp_install(struct net_device *dev, struct netdev_xdp *xdp)
 {
+	struct bpf_prog *old = rtnl_dereference(dev->xdp_prog);
 	struct bpf_prog *new = xdp->prog;
 	int ret = 0;
 
 	switch (xdp->command) {
-	case XDP_SETUP_PROG: {
-		struct bpf_prog *old = rtnl_dereference(dev->xdp_prog);
-
+	case XDP_SETUP_PROG:
 		rcu_assign_pointer(dev->xdp_prog, new);
 		if (old)
 			bpf_prog_put(old);
@@ -4360,10 +4359,10 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_xdp *xdp)
 			dev_disable_lro(dev);
 		}
 		break;
-	}
 
 	case XDP_QUERY_PROG:
-		xdp->prog_attached = !!rcu_access_pointer(dev->xdp_prog);
+		xdp->prog_attached = !!old;
+		xdp->prog_id = old ? old->aux->id : 0;
 		break;
 
 	default:
@@ -6937,7 +6936,8 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down)
 }
 EXPORT_SYMBOL(dev_change_proto_down);
 
-bool __dev_xdp_attached(struct net_device *dev, xdp_op_t xdp_op)
+bool __dev_xdp_attached(struct net_device *dev, xdp_op_t xdp_op,
+			u32 *prog_id)
 {
 	struct netdev_xdp xdp;
 
@@ -6946,6 +6946,9 @@ bool __dev_xdp_attached(struct net_device *dev, xdp_op_t xdp_op)
 
 	/* Query must always succeed. */
 	WARN_ON(xdp_op(dev, &xdp) < 0);
+	if (prog_id)
+		*prog_id = xdp.prog_id;
+
 	return xdp.prog_attached;
 }
 
@@ -6991,10 +6994,10 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
 		xdp_chk = generic_xdp_install;
 
 	if (fd >= 0) {
-		if (xdp_chk && __dev_xdp_attached(dev, xdp_chk))
+		if (xdp_chk && __dev_xdp_attached(dev, xdp_chk, NULL))
 			return -EEXIST;
 		if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) &&
-		    __dev_xdp_attached(dev, xdp_op))
+		    __dev_xdp_attached(dev, xdp_op, NULL))
 			return -EBUSY;
 
 		prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 2769ad9834d1..3aa57848a895 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -39,6 +39,7 @@
 #include <linux/if_vlan.h>
 #include <linux/pci.h>
 #include <linux/etherdevice.h>
+#include <linux/bpf.h>
 
 #include <linux/uaccess.h>
 
@@ -899,7 +900,8 @@ static size_t rtnl_port_size(const struct net_device *dev,
 static size_t rtnl_xdp_size(void)
 {
 	size_t xdp_size = nla_total_size(0) +	/* nest IFLA_XDP */
-			  nla_total_size(1);	/* XDP_ATTACHED */
+			  nla_total_size(1) +	/* XDP_ATTACHED */
+			  nla_total_size(4);	/* XDP_PROG_ID */
 
 	return xdp_size;
 }
@@ -1248,15 +1250,20 @@ static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
 	return 0;
 }
 
-static u8 rtnl_xdp_attached_mode(struct net_device *dev)
+static u8 rtnl_xdp_attached_mode(struct net_device *dev, u32 *prog_id)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
+	const struct bpf_prog *generic_xdp_prog;
 
 	ASSERT_RTNL();
 
-	if (rcu_access_pointer(dev->xdp_prog))
+	*prog_id = 0;
+	generic_xdp_prog = rtnl_dereference(dev->xdp_prog);
+	if (generic_xdp_prog) {
+		*prog_id = generic_xdp_prog->aux->id;
 		return XDP_ATTACHED_SKB;
-	if (ops->ndo_xdp && __dev_xdp_attached(dev, ops->ndo_xdp))
+	}
+	if (ops->ndo_xdp && __dev_xdp_attached(dev, ops->ndo_xdp, prog_id))
 		return XDP_ATTACHED_DRV;
 
 	return XDP_ATTACHED_NONE;
@@ -1265,6 +1272,7 @@ static u8 rtnl_xdp_attached_mode(struct net_device *dev)
 static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev)
 {
 	struct nlattr *xdp;
+	u32 prog_id;
 	int err;
 
 	xdp = nla_nest_start(skb, IFLA_XDP);
@@ -1272,10 +1280,16 @@ static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev)
 		return -EMSGSIZE;
 
 	err = nla_put_u8(skb, IFLA_XDP_ATTACHED,
-			 rtnl_xdp_attached_mode(dev));
+			 rtnl_xdp_attached_mode(dev, &prog_id));
 	if (err)
 		goto err_cancel;
 
+	if (prog_id) {
+		err = nla_put_u32(skb, IFLA_XDP_PROG_ID, prog_id);
+		if (err)
+			goto err_cancel;
+	}
+
 	nla_nest_end(skb, xdp);
 	return 0;
 
@@ -1553,6 +1567,7 @@ static const struct nla_policy ifla_xdp_policy[IFLA_XDP_MAX + 1] = {
 	[IFLA_XDP_FD]		= { .type = NLA_S32 },
 	[IFLA_XDP_ATTACHED]	= { .type = NLA_U8 },
 	[IFLA_XDP_FLAGS]	= { .type = NLA_U32 },
+	[IFLA_XDP_PROG_ID]	= { .type = NLA_U32 },
 };
 
 static const struct rtnl_link_ops *linkinfo_to_kind_ops(const struct nlattr *nla)
@@ -2225,7 +2240,7 @@ static int do_setlink(const struct sk_buff *skb,
 		if (err < 0)
 			goto errout;
 
-		if (xdp[IFLA_XDP_ATTACHED]) {
+		if (xdp[IFLA_XDP_ATTACHED] || xdp[IFLA_XDP_PROG_ID]) {
 			err = -EINVAL;
 			goto errout;
 		}
-- 
cgit v1.2.3


From 68c35ea25bdd4ad10445c4c02f7d48b3dccab8cc Mon Sep 17 00:00:00 2001
From: Gwendal Grignou <gwendal@chromium.org>
Date: Tue, 16 May 2017 17:46:48 +0200
Subject: mfd: cros_ec: Add helper for event notifier.

Add cros_ec_get_event() entry point to retrieve event within functions
called by the notifier.

Signed-off-by: Gwendal Grignou <gwendal@chromium.org>
Signed-off-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Benson Leung <bleung@chromium.org>
---
 drivers/platform/chrome/cros_ec_proto.c | 20 ++++++++++++++++++++
 include/linux/mfd/cros_ec.h             | 10 ++++++++++
 2 files changed, 30 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/platform/chrome/cros_ec_proto.c b/drivers/platform/chrome/cros_ec_proto.c
index ed5dee744c74..7428c2b965bb 100644
--- a/drivers/platform/chrome/cros_ec_proto.c
+++ b/drivers/platform/chrome/cros_ec_proto.c
@@ -494,3 +494,23 @@ int cros_ec_get_next_event(struct cros_ec_device *ec_dev)
 		return get_keyboard_state_event(ec_dev);
 }
 EXPORT_SYMBOL(cros_ec_get_next_event);
+
+u32 cros_ec_get_host_event(struct cros_ec_device *ec_dev)
+{
+	u32 host_event;
+
+	BUG_ON(!ec_dev->mkbp_event_supported);
+
+	if (ec_dev->event_data.event_type != EC_MKBP_EVENT_HOST_EVENT)
+		return 0;
+
+	if (ec_dev->event_size != sizeof(host_event)) {
+		dev_warn(ec_dev->dev, "Invalid host event size\n");
+		return 0;
+	}
+
+	host_event = get_unaligned_le32(&ec_dev->event_data.data.host_event);
+
+	return host_event;
+}
+EXPORT_SYMBOL(cros_ec_get_host_event);
diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h
index 28baee63eaf6..b61b2e013698 100644
--- a/include/linux/mfd/cros_ec.h
+++ b/include/linux/mfd/cros_ec.h
@@ -300,6 +300,16 @@ int cros_ec_query_all(struct cros_ec_device *ec_dev);
  */
 int cros_ec_get_next_event(struct cros_ec_device *ec_dev);
 
+/**
+ * cros_ec_get_host_event - Return a mask of event set by the EC.
+ *
+ * When MKBP is supported, when the EC raises an interrupt,
+ * We collect the events raised and call the functions in the ec notifier.
+ *
+ * This function is a helper to know which events are raised.
+ */
+u32 cros_ec_get_host_event(struct cros_ec_device *ec_dev);
+
 /* sysfs stuff */
 extern struct attribute_group cros_ec_attr_group;
 extern struct attribute_group cros_ec_lightbar_attr_group;
-- 
cgit v1.2.3


From 0aa877c558477e5c4b0faaa618cfd41f8c0b3319 Mon Sep 17 00:00:00 2001
From: Nicolas Boichat <drinkcat@chromium.org>
Date: Tue, 16 May 2017 17:46:48 +0200
Subject: mfd: cros_ec: Add EC console read structures definitions

ec_params_console_read_v1 is used to capture EC logs from kernel,
and ec_params_get_cmd_versions_v1 is used to probe whether EC
supports that command.

Signed-off-by: Nicolas Boichat <drinkcat@chromium.org>
Reviewed-by: Guenter Roeck <groeck@chromium.org>
Acked-by: Lee Jones <lee.jones@linaro.org>
Tested-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Signed-off-by: Benson Leung <bleung@chromium.org>
---
 include/linux/mfd/cros_ec_commands.h | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h
index c93e7e0300ef..1b19e424e1cf 100644
--- a/include/linux/mfd/cros_ec_commands.h
+++ b/include/linux/mfd/cros_ec_commands.h
@@ -625,6 +625,10 @@ struct ec_params_get_cmd_versions {
 	uint8_t cmd;      /* Command to check */
 } __packed;
 
+struct ec_params_get_cmd_versions_v1 {
+	uint16_t cmd;     /* Command to check */
+} __packed;
+
 struct ec_response_get_cmd_versions {
 	/*
 	 * Mask of supported versions; use EC_VER_MASK() to compare with a
@@ -2285,13 +2289,28 @@ struct ec_params_charge_control {
 #define EC_CMD_CONSOLE_SNAPSHOT 0x97
 
 /*
- * Read next chunk of data from saved snapshot.
+ * Read data from the saved snapshot. If the subcmd parameter is
+ * CONSOLE_READ_NEXT, this will return data starting from the beginning of
+ * the latest snapshot. If it is CONSOLE_READ_RECENT, it will start from the
+ * end of the previous snapshot.
+ *
+ * The params are only looked at in version >= 1 of this command. Prior
+ * versions will just default to CONSOLE_READ_NEXT behavior.
  *
  * Response is null-terminated string.  Empty string, if there is no more
  * remaining output.
  */
 #define EC_CMD_CONSOLE_READ 0x98
 
+enum ec_console_read_subcmd {
+	CONSOLE_READ_NEXT = 0,
+	CONSOLE_READ_RECENT
+};
+
+struct ec_params_console_read_v1 {
+	uint8_t subcmd; /* enum ec_console_read_subcmd */
+} __packed;
+
 /*****************************************************************************/
 
 /*
-- 
cgit v1.2.3


From e86264595225d2764a903965356ef59aeb7d1c47 Mon Sep 17 00:00:00 2001
From: Eric Caruso <ejcaruso@chromium.org>
Date: Tue, 16 May 2017 17:46:48 +0200
Subject: mfd: cros_ec: add debugfs, console log file

If the EC supports the new CONSOLE_READ command type, then we
place a console_log file in debugfs for that EC device which allows
us to grab EC logs. The kernel will poll every 10 seconds for the
log and keep its own buffer, but userspace should grab this and
write it out to some logs which actually get rotated.

Signed-off-by: Eric Caruso <ejcaruso@chromium.org>
Signed-off-by: Nicolas Boichat <drinkcat@chromium.org>
Acked-by: Lee Jones <lee.jones@linaro.org>
Tested-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
[bleung: restored original version of this commit, with pointer size
 issue to be fixed in next commit]
Signed-off-by: Benson Leung <bleung@chromium.org>
---
 drivers/platform/chrome/Makefile          |   3 +-
 drivers/platform/chrome/cros_ec_debugfs.c | 347 ++++++++++++++++++++++++++++++
 drivers/platform/chrome/cros_ec_debugfs.h |  27 +++
 drivers/platform/chrome/cros_ec_dev.c     |   7 +
 include/linux/mfd/cros_ec.h               |   4 +
 5 files changed, 387 insertions(+), 1 deletion(-)
 create mode 100644 drivers/platform/chrome/cros_ec_debugfs.c
 create mode 100644 drivers/platform/chrome/cros_ec_debugfs.h

(limited to 'include/linux')

diff --git a/drivers/platform/chrome/Makefile b/drivers/platform/chrome/Makefile
index 4f3462783a3c..3870afeefcf4 100644
--- a/drivers/platform/chrome/Makefile
+++ b/drivers/platform/chrome/Makefile
@@ -2,7 +2,8 @@
 obj-$(CONFIG_CHROMEOS_LAPTOP)		+= chromeos_laptop.o
 obj-$(CONFIG_CHROMEOS_PSTORE)		+= chromeos_pstore.o
 cros_ec_devs-objs			:= cros_ec_dev.o cros_ec_sysfs.o \
-					   cros_ec_lightbar.o cros_ec_vbc.o
+					   cros_ec_lightbar.o cros_ec_vbc.o \
+					   cros_ec_debugfs.o
 obj-$(CONFIG_CROS_EC_CHARDEV)		+= cros_ec_devs.o
 obj-$(CONFIG_CROS_EC_LPC)		+= cros_ec_lpc.o
 obj-$(CONFIG_CROS_EC_PROTO)		+= cros_ec_proto.o
diff --git a/drivers/platform/chrome/cros_ec_debugfs.c b/drivers/platform/chrome/cros_ec_debugfs.c
new file mode 100644
index 000000000000..225f93631051
--- /dev/null
+++ b/drivers/platform/chrome/cros_ec_debugfs.c
@@ -0,0 +1,347 @@
+/*
+ * cros_ec_debugfs - debug logs for Chrome OS EC
+ *
+ * Copyright 2015 Google, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/circ_buf.h>
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/fs.h>
+#include <linux/mfd/cros_ec.h>
+#include <linux/mfd/cros_ec_commands.h>
+#include <linux/mutex.h>
+#include <linux/poll.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/wait.h>
+
+#include "cros_ec_dev.h"
+#include "cros_ec_debugfs.h"
+
+#define LOG_SHIFT		14
+#define LOG_SIZE		(1 << LOG_SHIFT)
+#define LOG_POLL_SEC		10
+
+#define CIRC_ADD(idx, size, value)	(((idx) + (value)) & ((size) - 1))
+
+/* struct cros_ec_debugfs - ChromeOS EC debugging information
+ *
+ * @ec: EC device this debugfs information belongs to
+ * @dir: dentry for debugfs files
+ * @log_buffer: circular buffer for console log information
+ * @read_msg: preallocated EC command and buffer to read console log
+ * @log_mutex: mutex to protect circular buffer
+ * @log_wq: waitqueue for log readers
+ * @log_poll_work: recurring task to poll EC for new console log data
+ */
+struct cros_ec_debugfs {
+	struct cros_ec_dev *ec;
+	struct dentry *dir;
+	struct circ_buf log_buffer;
+	struct cros_ec_command *read_msg;
+	struct mutex log_mutex;
+	wait_queue_head_t log_wq;
+	struct delayed_work log_poll_work;
+};
+
+/*
+ * We need to make sure that the EC log buffer on the UART is large enough,
+ * so that it is unlikely enough to overlow within LOG_POLL_SEC.
+ */
+static void cros_ec_console_log_work(struct work_struct *__work)
+{
+	struct cros_ec_debugfs *debug_info =
+		container_of(to_delayed_work(__work),
+			     struct cros_ec_debugfs,
+			     log_poll_work);
+	struct cros_ec_dev *ec = debug_info->ec;
+	struct circ_buf *cb = &debug_info->log_buffer;
+	struct cros_ec_command snapshot_msg = {
+		.command = EC_CMD_CONSOLE_SNAPSHOT + ec->cmd_offset,
+	};
+
+	struct ec_params_console_read_v1 *read_params =
+		(struct ec_params_console_read_v1 *)debug_info->read_msg->data;
+	uint8_t *ec_buffer = (uint8_t *)debug_info->read_msg->data;
+	int idx;
+	int buf_space;
+	int ret;
+
+	ret = cros_ec_cmd_xfer(ec->ec_dev, &snapshot_msg);
+	if (ret < 0) {
+		dev_err(ec->dev, "EC communication failed\n");
+		goto resched;
+	}
+	if (snapshot_msg.result != EC_RES_SUCCESS) {
+		dev_err(ec->dev, "EC failed to snapshot the console log\n");
+		goto resched;
+	}
+
+	/* Loop until we have read everything, or there's an error. */
+	mutex_lock(&debug_info->log_mutex);
+	buf_space = CIRC_SPACE(cb->head, cb->tail, LOG_SIZE);
+
+	while (1) {
+		if (!buf_space) {
+			dev_info_once(ec->dev,
+				      "Some logs may have been dropped...\n");
+			break;
+		}
+
+		memset(read_params, '\0', sizeof(*read_params));
+		read_params->subcmd = CONSOLE_READ_RECENT;
+		ret = cros_ec_cmd_xfer(ec->ec_dev, debug_info->read_msg);
+		if (ret < 0) {
+			dev_err(ec->dev, "EC communication failed\n");
+			break;
+		}
+		if (debug_info->read_msg->result != EC_RES_SUCCESS) {
+			dev_err(ec->dev,
+				"EC failed to read the console log\n");
+			break;
+		}
+
+		/* If the buffer is empty, we're done here. */
+		if (ret == 0 || ec_buffer[0] == '\0')
+			break;
+
+		idx = 0;
+		while (idx < ret && ec_buffer[idx] != '\0' && buf_space > 0) {
+			cb->buf[cb->head] = ec_buffer[idx];
+			cb->head = CIRC_ADD(cb->head, LOG_SIZE, 1);
+			idx++;
+			buf_space--;
+		}
+
+		wake_up(&debug_info->log_wq);
+	}
+
+	mutex_unlock(&debug_info->log_mutex);
+
+resched:
+	schedule_delayed_work(&debug_info->log_poll_work,
+			      msecs_to_jiffies(LOG_POLL_SEC * 1000));
+}
+
+static int cros_ec_console_log_open(struct inode *inode, struct file *file)
+{
+	file->private_data = inode->i_private;
+
+	return nonseekable_open(inode, file);
+}
+
+static ssize_t cros_ec_console_log_read(struct file *file, char __user *buf,
+					size_t count, loff_t *ppos)
+{
+	struct cros_ec_debugfs *debug_info = file->private_data;
+	struct circ_buf *cb = &debug_info->log_buffer;
+	ssize_t ret;
+
+	mutex_lock(&debug_info->log_mutex);
+
+	while (!CIRC_CNT(cb->head, cb->tail, LOG_SIZE)) {
+		if (file->f_flags & O_NONBLOCK) {
+			ret = -EAGAIN;
+			goto error;
+		}
+
+		mutex_unlock(&debug_info->log_mutex);
+
+		ret = wait_event_interruptible(debug_info->log_wq,
+					CIRC_CNT(cb->head, cb->tail, LOG_SIZE));
+		if (ret < 0)
+			return ret;
+
+		mutex_lock(&debug_info->log_mutex);
+	}
+
+	/* Only copy until the end of the circular buffer, and let userspace
+	 * retry to get the rest of the data.
+	 */
+	ret = min_t(size_t, CIRC_CNT_TO_END(cb->head, cb->tail, LOG_SIZE),
+		    count);
+
+	if (copy_to_user(buf, cb->buf + cb->tail, ret)) {
+		ret = -EFAULT;
+		goto error;
+	}
+
+	cb->tail = CIRC_ADD(cb->tail, LOG_SIZE, ret);
+
+error:
+	mutex_unlock(&debug_info->log_mutex);
+	return ret;
+}
+
+static unsigned int cros_ec_console_log_poll(struct file *file,
+					     poll_table *wait)
+{
+	struct cros_ec_debugfs *debug_info = file->private_data;
+	unsigned int mask = 0;
+
+	poll_wait(file, &debug_info->log_wq, wait);
+
+	mutex_lock(&debug_info->log_mutex);
+	if (CIRC_CNT(debug_info->log_buffer.head,
+		     debug_info->log_buffer.tail,
+		     LOG_SIZE))
+		mask |= POLLIN | POLLRDNORM;
+	mutex_unlock(&debug_info->log_mutex);
+
+	return mask;
+}
+
+static int cros_ec_console_log_release(struct inode *inode, struct file *file)
+{
+	return 0;
+}
+
+const struct file_operations cros_ec_console_log_fops = {
+	.owner = THIS_MODULE,
+	.open = cros_ec_console_log_open,
+	.read = cros_ec_console_log_read,
+	.llseek = no_llseek,
+	.poll = cros_ec_console_log_poll,
+	.release = cros_ec_console_log_release,
+};
+
+static int ec_read_version_supported(struct cros_ec_dev *ec)
+{
+	struct ec_params_get_cmd_versions_v1 *params;
+	struct ec_response_get_cmd_versions *response;
+	int ret;
+
+	struct cros_ec_command *msg;
+
+	msg = kzalloc(sizeof(*msg) + max(sizeof(params), sizeof(response)),
+		GFP_KERNEL);
+	if (!msg)
+		return 0;
+
+	msg->command = EC_CMD_GET_CMD_VERSIONS + ec->cmd_offset;
+	msg->outsize = sizeof(params);
+	msg->insize = sizeof(response);
+
+	params = (struct ec_params_get_cmd_versions_v1 *)msg->data;
+	params->cmd = EC_CMD_CONSOLE_READ;
+	response = (struct ec_response_get_cmd_versions *)msg->data;
+
+	ret = cros_ec_cmd_xfer(ec->ec_dev, msg) >= 0 &&
+		msg->result == EC_RES_SUCCESS &&
+		(response->version_mask & EC_VER_MASK(1));
+
+	kfree(msg);
+
+	return ret;
+}
+
+static int cros_ec_create_console_log(struct cros_ec_debugfs *debug_info)
+{
+	struct cros_ec_dev *ec = debug_info->ec;
+	char *buf;
+	int read_params_size;
+	int read_response_size;
+
+	if (!ec_read_version_supported(ec)) {
+		dev_warn(ec->dev,
+			"device does not support reading the console log\n");
+		return 0;
+	}
+
+	buf = devm_kzalloc(ec->dev, LOG_SIZE, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	read_params_size = sizeof(struct ec_params_console_read_v1);
+	read_response_size = ec->ec_dev->max_response;
+	debug_info->read_msg = devm_kzalloc(ec->dev,
+		sizeof(*debug_info->read_msg) +
+			max(read_params_size, read_response_size), GFP_KERNEL);
+	if (!debug_info->read_msg)
+		return -ENOMEM;
+
+	debug_info->read_msg->version = 1;
+	debug_info->read_msg->command = EC_CMD_CONSOLE_READ + ec->cmd_offset;
+	debug_info->read_msg->outsize = read_params_size;
+	debug_info->read_msg->insize = read_response_size;
+
+	debug_info->log_buffer.buf = buf;
+	debug_info->log_buffer.head = 0;
+	debug_info->log_buffer.tail = 0;
+
+	mutex_init(&debug_info->log_mutex);
+	init_waitqueue_head(&debug_info->log_wq);
+
+	if (!debugfs_create_file("console_log",
+				 S_IFREG | S_IRUGO,
+				 debug_info->dir,
+				 debug_info,
+				 &cros_ec_console_log_fops))
+		return -ENOMEM;
+
+	INIT_DELAYED_WORK(&debug_info->log_poll_work,
+			  cros_ec_console_log_work);
+	schedule_delayed_work(&debug_info->log_poll_work, 0);
+
+	return 0;
+}
+
+static void cros_ec_cleanup_console_log(struct cros_ec_debugfs *debug_info)
+{
+	if (debug_info->log_buffer.buf) {
+		cancel_delayed_work_sync(&debug_info->log_poll_work);
+		mutex_destroy(&debug_info->log_mutex);
+	}
+}
+
+int cros_ec_debugfs_init(struct cros_ec_dev *ec)
+{
+	struct cros_ec_platform *ec_platform = dev_get_platdata(ec->dev);
+	const char *name = ec_platform->ec_name;
+	struct cros_ec_debugfs *debug_info;
+	int ret;
+
+	debug_info = devm_kzalloc(ec->dev, sizeof(*debug_info), GFP_KERNEL);
+	if (!debug_info)
+		return -ENOMEM;
+
+	debug_info->ec = ec;
+	debug_info->dir = debugfs_create_dir(name, NULL);
+	if (!debug_info->dir)
+		return -ENOMEM;
+
+	ret = cros_ec_create_console_log(debug_info);
+	if (ret)
+		goto remove_debugfs;
+
+	ec->debug_info = debug_info;
+
+	return 0;
+
+remove_debugfs:
+	debugfs_remove_recursive(debug_info->dir);
+	return ret;
+}
+
+void cros_ec_debugfs_remove(struct cros_ec_dev *ec)
+{
+	if (!ec->debug_info)
+		return;
+
+	debugfs_remove_recursive(ec->debug_info->dir);
+	cros_ec_cleanup_console_log(ec->debug_info);
+}
diff --git a/drivers/platform/chrome/cros_ec_debugfs.h b/drivers/platform/chrome/cros_ec_debugfs.h
new file mode 100644
index 000000000000..1ff3a50aa1b8
--- /dev/null
+++ b/drivers/platform/chrome/cros_ec_debugfs.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright 2015 Google, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _DRV_CROS_EC_DEBUGFS_H_
+#define _DRV_CROS_EC_DEBUGFS_H_
+
+#include "cros_ec_dev.h"
+
+/* debugfs stuff */
+int cros_ec_debugfs_init(struct cros_ec_dev *ec);
+void cros_ec_debugfs_remove(struct cros_ec_dev *ec);
+
+#endif  /* _DRV_CROS_EC_DEBUGFS_H_ */
diff --git a/drivers/platform/chrome/cros_ec_dev.c b/drivers/platform/chrome/cros_ec_dev.c
index 6aa120cd0574..20ce1c23fb5c 100644
--- a/drivers/platform/chrome/cros_ec_dev.c
+++ b/drivers/platform/chrome/cros_ec_dev.c
@@ -24,6 +24,7 @@
 #include <linux/slab.h>
 #include <linux/uaccess.h>
 
+#include "cros_ec_debugfs.h"
 #include "cros_ec_dev.h"
 
 /* Device variables */
@@ -427,6 +428,9 @@ static int ec_device_probe(struct platform_device *pdev)
 		goto failed;
 	}
 
+	if (cros_ec_debugfs_init(ec))
+		dev_warn(dev, "failed to create debugfs directory\n");
+
 	/* check whether this EC is a sensor hub. */
 	if (cros_ec_check_features(ec, EC_FEATURE_MOTION_SENSE))
 		cros_ec_sensors_register(ec);
@@ -441,6 +445,9 @@ failed:
 static int ec_device_remove(struct platform_device *pdev)
 {
 	struct cros_ec_dev *ec = dev_get_drvdata(&pdev->dev);
+
+	cros_ec_debugfs_remove(ec);
+
 	cdev_del(&ec->cdev);
 	device_unregister(&ec->class_dev);
 	return 0;
diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h
index b61b2e013698..3b16c9009749 100644
--- a/include/linux/mfd/cros_ec.h
+++ b/include/linux/mfd/cros_ec.h
@@ -172,6 +172,8 @@ struct cros_ec_platform {
 	u16 cmd_offset;
 };
 
+struct cros_ec_debugfs;
+
 /*
  * struct cros_ec_dev - ChromeOS EC device entry point
  *
@@ -179,6 +181,7 @@ struct cros_ec_platform {
  * @cdev: Character device structure in /dev
  * @ec_dev: cros_ec_device structure to talk to the physical device
  * @dev: pointer to the platform device
+ * @debug_info: cros_ec_debugfs structure for debugging information
  * @cmd_offset: offset to apply for each command.
  */
 struct cros_ec_dev {
@@ -186,6 +189,7 @@ struct cros_ec_dev {
 	struct cdev cdev;
 	struct cros_ec_device *ec_dev;
 	struct device *dev;
+	struct cros_ec_debugfs *debug_info;
 	u16 cmd_offset;
 	u32 features[2];
 };
-- 
cgit v1.2.3


From 99b3c58f7ba7fae801e501b45c5fcf6e08d9247f Mon Sep 17 00:00:00 2001
From: Piotr Gregor <piotrgregor@rsyncme.org>
Date: Fri, 26 May 2017 22:02:25 +0100
Subject: PCI: Test INTx masking during enumeration, not at run-time

The test for INTx masking via PCI_COMMAND_INTX_DISABLE performed in
pci_intx_mask_supported() should be done before the device can be used.
This is to avoid writing PCI_COMMAND while the driver owns the device, in
case that has any effect on MSI/MSI-X interrupts.

Move the content of pci_intx_mask_supported() to pci_intx_mask_broken() and
call it from pci_setup_device().

The test result can be queried at any time later using the same
pci_intx_mask_supported() interface as before (though with changed
implementation), so callers (uio, vfio) should be unaffected.

Signed-off-by: Piotr Gregor <piotrgregor@rsyncme.org>
[bhelgaas: changelog, remove quirk check, remove locking, move
dev->broken_intx_masking assignment to caller]
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/pci/pci.c   | 42 +-----------------------------------------
 drivers/pci/probe.c | 30 ++++++++++++++++++++++++++++++
 include/linux/pci.h | 12 ++++++++++--
 3 files changed, 41 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index b01bd5bba8e6..7c4e1aa67c67 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -3708,46 +3708,6 @@ void pci_intx(struct pci_dev *pdev, int enable)
 }
 EXPORT_SYMBOL_GPL(pci_intx);
 
-/**
- * pci_intx_mask_supported - probe for INTx masking support
- * @dev: the PCI device to operate on
- *
- * Check if the device dev support INTx masking via the config space
- * command word.
- */
-bool pci_intx_mask_supported(struct pci_dev *dev)
-{
-	bool mask_supported = false;
-	u16 orig, new;
-
-	if (dev->broken_intx_masking)
-		return false;
-
-	pci_cfg_access_lock(dev);
-
-	pci_read_config_word(dev, PCI_COMMAND, &orig);
-	pci_write_config_word(dev, PCI_COMMAND,
-			      orig ^ PCI_COMMAND_INTX_DISABLE);
-	pci_read_config_word(dev, PCI_COMMAND, &new);
-
-	/*
-	 * There's no way to protect against hardware bugs or detect them
-	 * reliably, but as long as we know what the value should be, let's
-	 * go ahead and check it.
-	 */
-	if ((new ^ orig) & ~PCI_COMMAND_INTX_DISABLE) {
-		dev_err(&dev->dev, "Command register changed from 0x%x to 0x%x: driver or hardware bug?\n",
-			orig, new);
-	} else if ((new ^ orig) & PCI_COMMAND_INTX_DISABLE) {
-		mask_supported = true;
-		pci_write_config_word(dev, PCI_COMMAND, orig);
-	}
-
-	pci_cfg_access_unlock(dev);
-	return mask_supported;
-}
-EXPORT_SYMBOL_GPL(pci_intx_mask_supported);
-
 static bool pci_check_and_set_intx_mask(struct pci_dev *dev, bool mask)
 {
 	struct pci_bus *bus = dev->bus;
@@ -3798,7 +3758,7 @@ done:
  * @dev: the PCI device to operate on
  *
  * Check if the device dev has its INTx line asserted, mask it and
- * return true in that case. False is returned if not interrupt was
+ * return true in that case. False is returned if no interrupt was
  * pending.
  */
 bool pci_check_and_mask_intx(struct pci_dev *dev)
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 19c8950c6c38..8b8826b9a398 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1329,6 +1329,34 @@ static void pci_msi_setup_pci_dev(struct pci_dev *dev)
 		pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_ENABLE, 0);
 }
 
+/**
+ * pci_intx_mask_broken - test PCI_COMMAND_INTX_DISABLE writability
+ * @dev: PCI device
+ *
+ * Test whether PCI_COMMAND_INTX_DISABLE is writable for @dev.  Check this
+ * at enumeration-time to avoid modifying PCI_COMMAND at run-time.
+ */
+static int pci_intx_mask_broken(struct pci_dev *dev)
+{
+	u16 orig, toggle, new;
+
+	pci_read_config_word(dev, PCI_COMMAND, &orig);
+	toggle = orig ^ PCI_COMMAND_INTX_DISABLE;
+	pci_write_config_word(dev, PCI_COMMAND, toggle);
+	pci_read_config_word(dev, PCI_COMMAND, &new);
+
+	pci_write_config_word(dev, PCI_COMMAND, orig);
+
+	/*
+	 * PCI_COMMAND_INTX_DISABLE was reserved and read-only prior to PCI
+	 * r2.3, so strictly speaking, a device is not *broken* if it's not
+	 * writable.  But we'll live with the misnomer for now.
+	 */
+	if (new != toggle)
+		return 1;
+	return 0;
+}
+
 /**
  * pci_setup_device - fill in class and map information of a device
  * @dev: the device structure to fill
@@ -1399,6 +1427,8 @@ int pci_setup_device(struct pci_dev *dev)
 		}
 	}
 
+	dev->broken_intx_masking = pci_intx_mask_broken(dev);
+
 	switch (dev->hdr_type) {		    /* header type */
 	case PCI_HEADER_TYPE_NORMAL:		    /* standard header */
 		if (class == PCI_CLASS_BRIDGE_PCI)
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 33c2b0b77429..4f0613d5d2d9 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -366,7 +366,7 @@ struct pci_dev {
 	unsigned int	is_thunderbolt:1; /* Thunderbolt controller */
 	unsigned int    __aer_firmware_first_valid:1;
 	unsigned int	__aer_firmware_first:1;
-	unsigned int	broken_intx_masking:1;
+	unsigned int	broken_intx_masking:1; /* INTx masking can't be used */
 	unsigned int	io_window_1k:1;	/* Intel P2P bridge 1K I/O windows */
 	unsigned int	irq_managed:1;
 	unsigned int	has_secondary_link:1;
@@ -1003,6 +1003,15 @@ int __must_check pci_reenable_device(struct pci_dev *);
 int __must_check pcim_enable_device(struct pci_dev *pdev);
 void pcim_pin_device(struct pci_dev *pdev);
 
+static inline bool pci_intx_mask_supported(struct pci_dev *pdev)
+{
+	/*
+	 * INTx masking is supported if PCI_COMMAND_INTX_DISABLE is
+	 * writable and no quirk has marked the feature broken.
+	 */
+	return !pdev->broken_intx_masking;
+}
+
 static inline int pci_is_enabled(struct pci_dev *pdev)
 {
 	return (atomic_read(&pdev->enable_cnt) > 0);
@@ -1026,7 +1035,6 @@ int __must_check pci_set_mwi(struct pci_dev *dev);
 int pci_try_set_mwi(struct pci_dev *dev);
 void pci_clear_mwi(struct pci_dev *dev);
 void pci_intx(struct pci_dev *dev, int enable);
-bool pci_intx_mask_supported(struct pci_dev *dev);
 bool pci_check_and_mask_intx(struct pci_dev *dev);
 bool pci_check_and_unmask_intx(struct pci_dev *dev);
 int pci_wait_for_pending(struct pci_dev *dev, int pos, u16 mask);
-- 
cgit v1.2.3


From 7b9e93616399638521aafd1f01dfcf474c736393 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 16 Jun 2017 18:15:21 +0200
Subject: blk-mq-sched: unify request finished methods

No need to have two different callouts of bfq vs kyber.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bfq-iosched.c      |  6 +++---
 block/blk-mq.c           | 11 ++++-------
 block/kyber-iosched.c    |  8 +++-----
 include/linux/elevator.h |  3 +--
 4 files changed, 11 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index ed93da2462ab..4f69e39c2f89 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -4290,7 +4290,7 @@ static void bfq_put_rq_priv_body(struct bfq_queue *bfqq)
 	bfq_put_queue(bfqq);
 }
 
-static void bfq_put_rq_private(struct request_queue *q, struct request *rq)
+static void bfq_finish_request(struct request *rq)
 {
 	struct bfq_queue *bfqq = RQ_BFQQ(rq);
 	struct bfq_data *bfqd = bfqq->bfqd;
@@ -4324,7 +4324,7 @@ static void bfq_put_rq_private(struct request_queue *q, struct request *rq)
 		 */
 
 		if (!RB_EMPTY_NODE(&rq->rb_node))
-			bfq_remove_request(q, rq);
+			bfq_remove_request(rq->q, rq);
 		bfq_put_rq_priv_body(bfqq);
 	}
 
@@ -4951,7 +4951,7 @@ static struct elv_fs_entry bfq_attrs[] = {
 static struct elevator_type iosched_bfq_mq = {
 	.ops.mq = {
 		.get_rq_priv		= bfq_get_rq_private,
-		.put_rq_priv		= bfq_put_rq_private,
+		.finish_request		= bfq_finish_request,
 		.exit_icq		= bfq_exit_icq,
 		.insert_requests	= bfq_insert_requests,
 		.dispatch_request	= bfq_dispatch_request,
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 1a45c287db64..9df7e0394a48 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -437,19 +437,16 @@ void blk_mq_free_request(struct request *rq)
 	struct request_queue *q = rq->q;
 	struct elevator_queue *e = q->elevator;
 
-	if (rq->rq_flags & RQF_ELVPRIV) {
-		if (e && e->type->ops.mq.put_rq_priv)
-			e->type->ops.mq.put_rq_priv(q, rq);
+	if (rq->rq_flags & (RQF_ELVPRIV | RQF_QUEUED)) {
+		if (e && e->type->ops.mq.finish_request)
+			e->type->ops.mq.finish_request(rq);
 		if (rq->elv.icq) {
 			put_io_context(rq->elv.icq->ioc);
 			rq->elv.icq = NULL;
 		}
 	}
 
-	if ((rq->rq_flags & RQF_QUEUED) && e && e->type->ops.mq.put_request)
-		e->type->ops.mq.put_request(rq);
-	else
-		blk_mq_finish_request(rq);
+	blk_mq_finish_request(rq);
 }
 EXPORT_SYMBOL_GPL(blk_mq_free_request);
 
diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c
index b9faabc75fdb..2557b399f0a8 100644
--- a/block/kyber-iosched.c
+++ b/block/kyber-iosched.c
@@ -446,13 +446,11 @@ static struct request *kyber_get_request(struct request_queue *q,
 	return rq;
 }
 
-static void kyber_put_request(struct request *rq)
+static void kyber_finish_request(struct request *rq)
 {
-	struct request_queue *q = rq->q;
-	struct kyber_queue_data *kqd = q->elevator->elevator_data;
+	struct kyber_queue_data *kqd = rq->q->elevator->elevator_data;
 
 	rq_clear_domain_token(kqd, rq);
-	blk_mq_finish_request(rq);
 }
 
 static void kyber_completed_request(struct request *rq)
@@ -816,7 +814,7 @@ static struct elevator_type kyber_sched = {
 		.init_hctx = kyber_init_hctx,
 		.exit_hctx = kyber_exit_hctx,
 		.get_request = kyber_get_request,
-		.put_request = kyber_put_request,
+		.finish_request = kyber_finish_request,
 		.completed_request = kyber_completed_request,
 		.dispatch_request = kyber_dispatch_request,
 		.has_work = kyber_has_work,
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 0e306c5a86d6..4acea351d43f 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -105,7 +105,7 @@ struct elevator_mq_ops {
 	void (*request_merged)(struct request_queue *, struct request *, enum elv_merge);
 	void (*requests_merged)(struct request_queue *, struct request *, struct request *);
 	struct request *(*get_request)(struct request_queue *, unsigned int, struct blk_mq_alloc_data *);
-	void (*put_request)(struct request *);
+	void (*finish_request)(struct request *);
 	void (*insert_requests)(struct blk_mq_hw_ctx *, struct list_head *, bool);
 	struct request *(*dispatch_request)(struct blk_mq_hw_ctx *);
 	bool (*has_work)(struct blk_mq_hw_ctx *);
@@ -115,7 +115,6 @@ struct elevator_mq_ops {
 	struct request *(*former_request)(struct request_queue *, struct request *);
 	struct request *(*next_request)(struct request_queue *, struct request *);
 	int (*get_rq_priv)(struct request_queue *, struct request *, struct bio *);
-	void (*put_rq_priv)(struct request_queue *, struct request *);
 	void (*init_icq)(struct io_cq *);
 	void (*exit_icq)(struct io_cq *);
 };
-- 
cgit v1.2.3


From 5bbf4e5a8e3a780874b2ed77bd1bd57850f3f6da Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 16 Jun 2017 18:15:26 +0200
Subject: blk-mq-sched: unify request prepare methods

This patch makes sure we always allocate requests in the core blk-mq
code and use a common prepare_request method to initialize them for
both mq I/O schedulers.  For Kyber and additional limit_depth method
is added that is called before allocating the request.

Also because none of the intializations can really fail the new method
does not return an error - instead the bfq finish method is hardened
to deal with the no-IOC case.

Last but not least this removes the abuse of RQF_QUEUE by the blk-mq
scheduling code as RQF_ELFPRIV is all that is needed now.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bfq-iosched.c      | 19 ++++++++++++-------
 block/blk-mq.c           | 22 ++++++----------------
 block/kyber-iosched.c    | 23 +++++++++++------------
 include/linux/elevator.h |  4 ++--
 4 files changed, 31 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index f037b005faa1..60d32700f104 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -4292,8 +4292,14 @@ static void bfq_put_rq_priv_body(struct bfq_queue *bfqq)
 
 static void bfq_finish_request(struct request *rq)
 {
-	struct bfq_queue *bfqq = RQ_BFQQ(rq);
-	struct bfq_data *bfqd = bfqq->bfqd;
+	struct bfq_queue *bfqq;
+	struct bfq_data *bfqd;
+
+	if (!rq->elv.icq)
+		return;
+
+	bfqq = RQ_BFQQ(rq);
+	bfqd = bfqq->bfqd;
 
 	if (rq->rq_flags & RQF_STARTED)
 		bfqg_stats_update_completion(bfqq_group(bfqq),
@@ -4394,9 +4400,9 @@ static struct bfq_queue *bfq_get_bfqq_handle_split(struct bfq_data *bfqd,
 /*
  * Allocate bfq data structures associated with this request.
  */
-static int bfq_get_rq_private(struct request_queue *q, struct request *rq,
-			      struct bio *bio)
+static void bfq_prepare_request(struct request *rq, struct bio *bio)
 {
+	struct request_queue *q = rq->q;
 	struct bfq_data *bfqd = q->elevator->elevator_data;
 	struct bfq_io_cq *bic;
 	const int is_sync = rq_is_sync(rq);
@@ -4405,7 +4411,7 @@ static int bfq_get_rq_private(struct request_queue *q, struct request *rq,
 	bool split = false;
 
 	if (!rq->elv.icq)
-		return 1;
+		return;
 	bic = icq_to_bic(rq->elv.icq);
 
 	spin_lock_irq(&bfqd->lock);
@@ -4466,7 +4472,6 @@ static int bfq_get_rq_private(struct request_queue *q, struct request *rq,
 		bfq_handle_burst(bfqd, bfqq);
 
 	spin_unlock_irq(&bfqd->lock);
-	return 0;
 }
 
 static void bfq_idle_slice_timer_body(struct bfq_queue *bfqq)
@@ -4945,7 +4950,7 @@ static struct elv_fs_entry bfq_attrs[] = {
 
 static struct elevator_type iosched_bfq_mq = {
 	.ops.mq = {
-		.get_rq_priv		= bfq_get_rq_private,
+		.prepare_request	= bfq_prepare_request,
 		.finish_request		= bfq_finish_request,
 		.exit_icq		= bfq_exit_icq,
 		.insert_requests	= bfq_insert_requests,
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 2f380ab7a603..81d05c19d4b3 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -298,16 +298,11 @@ static struct request *blk_mq_get_request(struct request_queue *q,
 		 * Flush requests are special and go directly to the
 		 * dispatch list.
 		 */
-		if (!op_is_flush(op) && e->type->ops.mq.get_request) {
-			rq = e->type->ops.mq.get_request(q, op, data);
-			if (rq)
-				rq->rq_flags |= RQF_QUEUED;
-			goto allocated;
-		}
+		if (!op_is_flush(op) && e->type->ops.mq.limit_depth)
+			e->type->ops.mq.limit_depth(op, data);
 	}
 
 	rq = __blk_mq_alloc_request(data, op);
-allocated:
 	if (!rq) {
 		blk_queue_exit(q);
 		return NULL;
@@ -315,17 +310,12 @@ allocated:
 
 	if (!op_is_flush(op)) {
 		rq->elv.icq = NULL;
-		if (e && e->type->ops.mq.get_rq_priv) {
+		if (e && e->type->ops.mq.prepare_request) {
 			if (e->type->icq_cache && rq_ioc(bio))
 				blk_mq_sched_assign_ioc(rq, bio);
 
-			if (e->type->ops.mq.get_rq_priv(q, rq, bio)) {
-				if (rq->elv.icq)
-					put_io_context(rq->elv.icq->ioc);
-				rq->elv.icq = NULL;
-			} else {
-				rq->rq_flags |= RQF_ELVPRIV;
-			}
+			e->type->ops.mq.prepare_request(rq, bio);
+			rq->rq_flags |= RQF_ELVPRIV;
 		}
 	}
 	data->hctx->queued++;
@@ -413,7 +403,7 @@ void blk_mq_free_request(struct request *rq)
 	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
 	const int sched_tag = rq->internal_tag;
 
-	if (rq->rq_flags & (RQF_ELVPRIV | RQF_QUEUED)) {
+	if (rq->rq_flags & RQF_ELVPRIV) {
 		if (e && e->type->ops.mq.finish_request)
 			e->type->ops.mq.finish_request(rq);
 		if (rq->elv.icq) {
diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c
index 2557b399f0a8..a9f6fd3fab8e 100644
--- a/block/kyber-iosched.c
+++ b/block/kyber-iosched.c
@@ -426,24 +426,22 @@ static void rq_clear_domain_token(struct kyber_queue_data *kqd,
 	}
 }
 
-static struct request *kyber_get_request(struct request_queue *q,
-					 unsigned int op,
-					 struct blk_mq_alloc_data *data)
+static void kyber_limit_depth(unsigned int op, struct blk_mq_alloc_data *data)
 {
-	struct kyber_queue_data *kqd = q->elevator->elevator_data;
-	struct request *rq;
-
 	/*
 	 * We use the scheduler tags as per-hardware queue queueing tokens.
 	 * Async requests can be limited at this stage.
 	 */
-	if (!op_is_sync(op))
+	if (!op_is_sync(op)) {
+		struct kyber_queue_data *kqd = data->q->elevator->elevator_data;
+
 		data->shallow_depth = kqd->async_depth;
+	}
+}
 
-	rq = __blk_mq_alloc_request(data, op);
-	if (rq)
-		rq_set_domain_token(rq, -1);
-	return rq;
+static void kyber_prepare_request(struct request *rq, struct bio *bio)
+{
+	rq_set_domain_token(rq, -1);
 }
 
 static void kyber_finish_request(struct request *rq)
@@ -813,7 +811,8 @@ static struct elevator_type kyber_sched = {
 		.exit_sched = kyber_exit_sched,
 		.init_hctx = kyber_init_hctx,
 		.exit_hctx = kyber_exit_hctx,
-		.get_request = kyber_get_request,
+		.limit_depth = kyber_limit_depth,
+		.prepare_request = kyber_prepare_request,
 		.finish_request = kyber_finish_request,
 		.completed_request = kyber_completed_request,
 		.dispatch_request = kyber_dispatch_request,
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 4acea351d43f..5bc8f8682a3e 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -104,7 +104,8 @@ struct elevator_mq_ops {
 	int (*request_merge)(struct request_queue *q, struct request **, struct bio *);
 	void (*request_merged)(struct request_queue *, struct request *, enum elv_merge);
 	void (*requests_merged)(struct request_queue *, struct request *, struct request *);
-	struct request *(*get_request)(struct request_queue *, unsigned int, struct blk_mq_alloc_data *);
+	void (*limit_depth)(unsigned int, struct blk_mq_alloc_data *);
+	void (*prepare_request)(struct request *, struct bio *bio);
 	void (*finish_request)(struct request *);
 	void (*insert_requests)(struct blk_mq_hw_ctx *, struct list_head *, bool);
 	struct request *(*dispatch_request)(struct blk_mq_hw_ctx *);
@@ -114,7 +115,6 @@ struct elevator_mq_ops {
 	void (*requeue_request)(struct request *);
 	struct request *(*former_request)(struct request_queue *, struct request *);
 	struct request *(*next_request)(struct request_queue *, struct request *);
-	int (*get_rq_priv)(struct request_queue *, struct request *, struct bio *);
 	void (*init_icq)(struct io_cq *);
 	void (*exit_icq)(struct io_cq *);
 };
-- 
cgit v1.2.3


From af67c31fba3b879b241536a48df703a2eee18ebf Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Sun, 18 Jun 2017 14:38:57 +1000
Subject: blk: remove bio_set arg from blk_queue_split()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

blk_queue_split() is always called with the last arg being q->bio_split,
where 'q' is the first arg.

Also blk_queue_split() sometimes uses the passed-in 'bs' and sometimes uses
q->bio_split.

This is inconsistent and unnecessary.  Remove the last arg and always use
q->bio_split inside blk_queue_split()

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Credit-to: Javier González <jg@lightnvm.io> (Noticed that lightnvm was missed)
Reviewed-by: Javier González <javier@cnexlabs.com>
Tested-by: Javier González <javier@cnexlabs.com>
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c              | 2 +-
 block/blk-merge.c             | 9 ++++-----
 block/blk-mq.c                | 2 +-
 drivers/block/drbd/drbd_req.c | 2 +-
 drivers/block/pktcdvd.c       | 2 +-
 drivers/block/ps3vram.c       | 2 +-
 drivers/block/rsxx/dev.c      | 2 +-
 drivers/block/umem.c          | 2 +-
 drivers/lightnvm/pblk-init.c  | 4 ++--
 drivers/lightnvm/rrpc.c       | 2 +-
 drivers/md/md.c               | 2 +-
 drivers/s390/block/dcssblk.c  | 2 +-
 drivers/s390/block/xpram.c    | 2 +-
 include/linux/blkdev.h        | 3 +--
 14 files changed, 18 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 8592409db272..31b5ece6b18e 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1723,7 +1723,7 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
 	 */
 	blk_queue_bounce(q, &bio);
 
-	blk_queue_split(q, &bio, q->bio_split);
+	blk_queue_split(q, &bio);
 
 	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
 		bio->bi_status = BLK_STS_IOERR;
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 3990ae406341..d59074556703 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -202,8 +202,7 @@ split:
 	return do_split ? new : NULL;
 }
 
-void blk_queue_split(struct request_queue *q, struct bio **bio,
-		     struct bio_set *bs)
+void blk_queue_split(struct request_queue *q, struct bio **bio)
 {
 	struct bio *split, *res;
 	unsigned nsegs;
@@ -211,13 +210,13 @@ void blk_queue_split(struct request_queue *q, struct bio **bio,
 	switch (bio_op(*bio)) {
 	case REQ_OP_DISCARD:
 	case REQ_OP_SECURE_ERASE:
-		split = blk_bio_discard_split(q, *bio, bs, &nsegs);
+		split = blk_bio_discard_split(q, *bio, q->bio_split, &nsegs);
 		break;
 	case REQ_OP_WRITE_ZEROES:
-		split = blk_bio_write_zeroes_split(q, *bio, bs, &nsegs);
+		split = blk_bio_write_zeroes_split(q, *bio, q->bio_split, &nsegs);
 		break;
 	case REQ_OP_WRITE_SAME:
-		split = blk_bio_write_same_split(q, *bio, bs, &nsegs);
+		split = blk_bio_write_same_split(q, *bio, q->bio_split, &nsegs);
 		break;
 	default:
 		split = blk_bio_segment_split(q, *bio, q->bio_split, &nsegs);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index be40c1d6e3a4..cc85de9d6b2d 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1499,7 +1499,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 
 	blk_queue_bounce(q, &bio);
 
-	blk_queue_split(q, &bio, q->bio_split);
+	blk_queue_split(q, &bio);
 
 	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
 		bio_io_error(bio);
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index fca6b9914948..f6e865b2d543 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -1560,7 +1560,7 @@ blk_qc_t drbd_make_request(struct request_queue *q, struct bio *bio)
 	struct drbd_device *device = (struct drbd_device *) q->queuedata;
 	unsigned long start_jif;
 
-	blk_queue_split(q, &bio, q->bio_split);
+	blk_queue_split(q, &bio);
 
 	start_jif = jiffies;
 
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index e8a381161db6..1f363638b453 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -2414,7 +2414,7 @@ static blk_qc_t pkt_make_request(struct request_queue *q, struct bio *bio)
 
 	blk_queue_bounce(q, &bio);
 
-	blk_queue_split(q, &bio, q->bio_split);
+	blk_queue_split(q, &bio);
 
 	pd = q->queuedata;
 	if (!pd) {
diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c
index 6fa2b8197013..e0e81cacd781 100644
--- a/drivers/block/ps3vram.c
+++ b/drivers/block/ps3vram.c
@@ -606,7 +606,7 @@ static blk_qc_t ps3vram_make_request(struct request_queue *q, struct bio *bio)
 
 	dev_dbg(&dev->core, "%s\n", __func__);
 
-	blk_queue_split(q, &bio, q->bio_split);
+	blk_queue_split(q, &bio);
 
 	spin_lock_irq(&priv->lock);
 	busy = !bio_list_empty(&priv->list);
diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c
index 0b0a0a902355..4e8bdfa0aa31 100644
--- a/drivers/block/rsxx/dev.c
+++ b/drivers/block/rsxx/dev.c
@@ -151,7 +151,7 @@ static blk_qc_t rsxx_make_request(struct request_queue *q, struct bio *bio)
 	struct rsxx_bio_meta *bio_meta;
 	blk_status_t st = BLK_STS_IOERR;
 
-	blk_queue_split(q, &bio, q->bio_split);
+	blk_queue_split(q, &bio);
 
 	might_sleep();
 
diff --git a/drivers/block/umem.c b/drivers/block/umem.c
index 4b3c947697b1..0677d2514665 100644
--- a/drivers/block/umem.c
+++ b/drivers/block/umem.c
@@ -529,7 +529,7 @@ static blk_qc_t mm_make_request(struct request_queue *q, struct bio *bio)
 		 (unsigned long long)bio->bi_iter.bi_sector,
 		 bio->bi_iter.bi_size);
 
-	blk_queue_split(q, &bio, q->bio_split);
+	blk_queue_split(q, &bio);
 
 	spin_lock_irq(&card->lock);
 	*card->biotail = bio;
diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c
index ae8cd6d5af8b..b3fec8ec55b8 100644
--- a/drivers/lightnvm/pblk-init.c
+++ b/drivers/lightnvm/pblk-init.c
@@ -33,7 +33,7 @@ static int pblk_rw_io(struct request_queue *q, struct pblk *pblk,
 	 * constraint. Writes can be of arbitrary size.
 	 */
 	if (bio_data_dir(bio) == READ) {
-		blk_queue_split(q, &bio, q->bio_split);
+		blk_queue_split(q, &bio);
 		ret = pblk_submit_read(pblk, bio);
 		if (ret == NVM_IO_DONE && bio_flagged(bio, BIO_CLONED))
 			bio_put(bio);
@@ -46,7 +46,7 @@ static int pblk_rw_io(struct request_queue *q, struct pblk *pblk,
 	 * available for user I/O.
 	 */
 	if (unlikely(pblk_get_secs(bio) >= pblk_rl_sysfs_rate_show(&pblk->rl)))
-		blk_queue_split(q, &bio, q->bio_split);
+		blk_queue_split(q, &bio);
 
 	return pblk_write_to_cache(pblk, bio, PBLK_IOTYPE_USER);
 }
diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
index 8d3b53bb3307..267f01ae87e4 100644
--- a/drivers/lightnvm/rrpc.c
+++ b/drivers/lightnvm/rrpc.c
@@ -994,7 +994,7 @@ static blk_qc_t rrpc_make_rq(struct request_queue *q, struct bio *bio)
 	struct nvm_rq *rqd;
 	int err;
 
-	blk_queue_split(q, &bio, q->bio_split);
+	blk_queue_split(q, &bio);
 
 	if (bio_op(bio) == REQ_OP_DISCARD) {
 		rrpc_discard(rrpc, bio);
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 6d493b54d56c..d43df1176c23 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -265,7 +265,7 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
 	unsigned int sectors;
 	int cpu;
 
-	blk_queue_split(q, &bio, q->bio_split);
+	blk_queue_split(q, &bio);
 
 	if (mddev == NULL || mddev->pers == NULL) {
 		bio_io_error(bio);
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index 36e5280af3e4..06eb1de52d1c 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -845,7 +845,7 @@ dcssblk_make_request(struct request_queue *q, struct bio *bio)
 	unsigned long source_addr;
 	unsigned long bytes_done;
 
-	blk_queue_split(q, &bio, q->bio_split);
+	blk_queue_split(q, &bio);
 
 	bytes_done = 0;
 	dev_info = bio->bi_bdev->bd_disk->private_data;
diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c
index b9d7e755c8a3..a48f0d40c1d2 100644
--- a/drivers/s390/block/xpram.c
+++ b/drivers/s390/block/xpram.c
@@ -190,7 +190,7 @@ static blk_qc_t xpram_make_request(struct request_queue *q, struct bio *bio)
 	unsigned long page_addr;
 	unsigned long bytes;
 
-	blk_queue_split(q, &bio, q->bio_split);
+	blk_queue_split(q, &bio);
 
 	if ((bio->bi_iter.bi_sector & 7) != 0 ||
 	    (bio->bi_iter.bi_size & 4095) != 0)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 76b6df862a12..670df402bc51 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -944,8 +944,7 @@ extern blk_status_t blk_insert_cloned_request(struct request_queue *q,
 				     struct request *rq);
 extern int blk_rq_append_bio(struct request *rq, struct bio *bio);
 extern void blk_delay_queue(struct request_queue *, unsigned long);
-extern void blk_queue_split(struct request_queue *, struct bio **,
-			    struct bio_set *);
+extern void blk_queue_split(struct request_queue *, struct bio **);
 extern void blk_recount_segments(struct request_queue *, struct bio *);
 extern int scsi_verify_blk_ioctl(struct block_device *, unsigned int);
 extern int scsi_cmd_blk_ioctl(struct block_device *, fmode_t,
-- 
cgit v1.2.3


From 011067b05668b05aae88e5a24cff0ca0a67ca0b0 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Sun, 18 Jun 2017 14:38:57 +1000
Subject: blk: replace bioset_create_nobvec() with a flags arg to
 bioset_create()

"flags" arguments are often seen as good API design as they allow
easy extensibility.
bioset_create_nobvec() is implemented internally as a variation in
flags passed to __bioset_create().

To support future extension, make the internal structure part of the
API.
i.e. add a 'flags' argument to bioset_create() and discard
bioset_create_nobvec().

Note that the bio_split allocations in drivers/md/raid* do not need
the bvec mempool - they should have used bioset_create_nobvec().

Suggested-by: Christoph Hellwig <hch@infradead.org>
Reviewed-by: Christoph Hellwig <hch@infradead.org>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c                         | 60 ++++++++++++++-----------------------
 block/blk-core.c                    |  2 +-
 drivers/block/drbd/drbd_main.c      |  2 +-
 drivers/md/bcache/super.c           |  4 +--
 drivers/md/dm-crypt.c               |  2 +-
 drivers/md/dm-io.c                  |  2 +-
 drivers/md/dm.c                     |  2 +-
 drivers/md/md.c                     |  2 +-
 drivers/md/raid1.c                  |  2 +-
 drivers/md/raid10.c                 |  2 +-
 drivers/md/raid5-cache.c            |  2 +-
 drivers/md/raid5-ppl.c              |  2 +-
 drivers/md/raid5.c                  |  2 +-
 drivers/target/target_core_iblock.c |  2 +-
 fs/block_dev.c                      |  2 +-
 fs/btrfs/extent_io.c                |  3 +-
 fs/xfs/xfs_super.c                  |  3 +-
 include/linux/bio.h                 |  6 ++--
 18 files changed, 45 insertions(+), 57 deletions(-)

(limited to 'include/linux')

diff --git a/block/bio.c b/block/bio.c
index 0e36ca5407b5..84b313bd3ce8 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1921,9 +1921,26 @@ void bioset_free(struct bio_set *bs)
 }
 EXPORT_SYMBOL(bioset_free);
 
-static struct bio_set *__bioset_create(unsigned int pool_size,
-				       unsigned int front_pad,
-				       bool create_bvec_pool)
+/**
+ * bioset_create  - Create a bio_set
+ * @pool_size:	Number of bio and bio_vecs to cache in the mempool
+ * @front_pad:	Number of bytes to allocate in front of the returned bio
+ * @flags:	Flags to modify behavior, currently only %BIOSET_NEED_BVECS
+ *
+ * Description:
+ *    Set up a bio_set to be used with @bio_alloc_bioset. Allows the caller
+ *    to ask for a number of bytes to be allocated in front of the bio.
+ *    Front pad allocation is useful for embedding the bio inside
+ *    another structure, to avoid allocating extra data to go with the bio.
+ *    Note that the bio must be embedded at the END of that structure always,
+ *    or things will break badly.
+ *    If %BIOSET_NEED_BVECS is set in @flags, a separate pool will be allocated
+ *    for allocating iovecs.  This pool is not needed e.g. for bio_clone_fast().
+ *
+ */
+struct bio_set *bioset_create(unsigned int pool_size,
+			      unsigned int front_pad,
+			      int flags)
 {
 	unsigned int back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec);
 	struct bio_set *bs;
@@ -1948,7 +1965,7 @@ static struct bio_set *__bioset_create(unsigned int pool_size,
 	if (!bs->bio_pool)
 		goto bad;
 
-	if (create_bvec_pool) {
+	if (flags & BIOSET_NEED_BVECS) {
 		bs->bvec_pool = biovec_create_pool(pool_size);
 		if (!bs->bvec_pool)
 			goto bad;
@@ -1963,41 +1980,8 @@ bad:
 	bioset_free(bs);
 	return NULL;
 }
-
-/**
- * bioset_create  - Create a bio_set
- * @pool_size:	Number of bio and bio_vecs to cache in the mempool
- * @front_pad:	Number of bytes to allocate in front of the returned bio
- *
- * Description:
- *    Set up a bio_set to be used with @bio_alloc_bioset. Allows the caller
- *    to ask for a number of bytes to be allocated in front of the bio.
- *    Front pad allocation is useful for embedding the bio inside
- *    another structure, to avoid allocating extra data to go with the bio.
- *    Note that the bio must be embedded at the END of that structure always,
- *    or things will break badly.
- */
-struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
-{
-	return __bioset_create(pool_size, front_pad, true);
-}
 EXPORT_SYMBOL(bioset_create);
 
-/**
- * bioset_create_nobvec  - Create a bio_set without bio_vec mempool
- * @pool_size:	Number of bio to cache in the mempool
- * @front_pad:	Number of bytes to allocate in front of the returned bio
- *
- * Description:
- *    Same functionality as bioset_create() except that mempool is not
- *    created for bio_vecs. Saving some memory for bio_clone_fast() users.
- */
-struct bio_set *bioset_create_nobvec(unsigned int pool_size, unsigned int front_pad)
-{
-	return __bioset_create(pool_size, front_pad, false);
-}
-EXPORT_SYMBOL(bioset_create_nobvec);
-
 #ifdef CONFIG_BLK_CGROUP
 
 /**
@@ -2112,7 +2096,7 @@ static int __init init_bio(void)
 	bio_integrity_init();
 	biovec_init_slabs();
 
-	fs_bio_set = bioset_create(BIO_POOL_SIZE, 0);
+	fs_bio_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
 	if (!fs_bio_set)
 		panic("bio: can't allocate bios\n");
 
diff --git a/block/blk-core.c b/block/blk-core.c
index 31b5ece6b18e..62cf92550512 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -790,7 +790,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 	if (q->id < 0)
 		goto fail_q;
 
-	q->bio_split = bioset_create(BIO_POOL_SIZE, 0);
+	q->bio_split = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
 	if (!q->bio_split)
 		goto fail_id;
 
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 84455c365f57..b395fe391171 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2165,7 +2165,7 @@ static int drbd_create_mempools(void)
 		goto Enomem;
 
 	/* mempools */
-	drbd_md_io_bio_set = bioset_create(DRBD_MIN_POOL_PAGES, 0);
+	drbd_md_io_bio_set = bioset_create(DRBD_MIN_POOL_PAGES, 0, BIOSET_NEED_BVECS);
 	if (drbd_md_io_bio_set == NULL)
 		goto Enomem;
 
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index fbc4f5412dec..abd6e825b39b 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -782,7 +782,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size,
 
 	minor *= BCACHE_MINORS;
 
-	if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio))) ||
+	if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio), BIOSET_NEED_BVECS)) ||
 	    !(d->disk = alloc_disk(BCACHE_MINORS))) {
 		ida_simple_remove(&bcache_minor, minor);
 		return -ENOMEM;
@@ -1516,7 +1516,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
 				sizeof(struct bbio) + sizeof(struct bio_vec) *
 				bucket_pages(c))) ||
 	    !(c->fill_iter = mempool_create_kmalloc_pool(1, iter_size)) ||
-	    !(c->bio_split = bioset_create(4, offsetof(struct bbio, bio))) ||
+	    !(c->bio_split = bioset_create(4, offsetof(struct bbio, bio), BIOSET_NEED_BVECS)) ||
 	    !(c->uuids = alloc_bucket_pages(GFP_KERNEL, c)) ||
 	    !(c->moving_gc_wq = alloc_workqueue("bcache_gc",
 						WQ_MEM_RECLAIM, 0)) ||
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 586cef085c6a..237ff8e9752a 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -2677,7 +2677,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		goto bad;
 	}
 
-	cc->bs = bioset_create(MIN_IOS, 0);
+	cc->bs = bioset_create(MIN_IOS, 0, BIOSET_NEED_BVECS);
 	if (!cc->bs) {
 		ti->error = "Cannot allocate crypt bioset";
 		goto bad;
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index c8f8f3004085..5c4121024d92 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -58,7 +58,7 @@ struct dm_io_client *dm_io_client_create(void)
 	if (!client->pool)
 		goto bad;
 
-	client->bios = bioset_create(min_ios, 0);
+	client->bios = bioset_create(min_ios, 0, BIOSET_NEED_BVECS);
 	if (!client->bios)
 		goto bad;
 
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index c4b74f7398ac..3394a311de3d 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -2660,7 +2660,7 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, enum dm_qu
 		BUG();
 	}
 
-	pools->bs = bioset_create_nobvec(pool_size, front_pad);
+	pools->bs = bioset_create(pool_size, front_pad, 0);
 	if (!pools->bs)
 		goto out;
 
diff --git a/drivers/md/md.c b/drivers/md/md.c
index d43df1176c23..07fe780ccd29 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -5428,7 +5428,7 @@ int md_run(struct mddev *mddev)
 	}
 
 	if (mddev->bio_set == NULL) {
-		mddev->bio_set = bioset_create(BIO_POOL_SIZE, 0);
+		mddev->bio_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
 		if (!mddev->bio_set)
 			return -ENOMEM;
 	}
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index a1a3cf0293df..98ca2c1d3226 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -2955,7 +2955,7 @@ static struct r1conf *setup_conf(struct mddev *mddev)
 	if (!conf->r1bio_pool)
 		goto abort;
 
-	conf->bio_split = bioset_create(BIO_POOL_SIZE, 0);
+	conf->bio_split = bioset_create(BIO_POOL_SIZE, 0, 0);
 	if (!conf->bio_split)
 		goto abort;
 
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 3178273a7253..57a250fdbbcc 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -3552,7 +3552,7 @@ static struct r10conf *setup_conf(struct mddev *mddev)
 	if (!conf->r10bio_pool)
 		goto out;
 
-	conf->bio_split = bioset_create(BIO_POOL_SIZE, 0);
+	conf->bio_split = bioset_create(BIO_POOL_SIZE, 0, 0);
 	if (!conf->bio_split)
 		goto out;
 
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 3746c9c27e54..bfa1e907c472 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -3063,7 +3063,7 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
 	if (!log->io_pool)
 		goto io_pool;
 
-	log->bs = bioset_create(R5L_POOL_SIZE, 0);
+	log->bs = bioset_create(R5L_POOL_SIZE, 0, BIOSET_NEED_BVECS);
 	if (!log->bs)
 		goto io_bs;
 
diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c
index e709ada0bf09..77cce3573aa8 100644
--- a/drivers/md/raid5-ppl.c
+++ b/drivers/md/raid5-ppl.c
@@ -1150,7 +1150,7 @@ int ppl_init_log(struct r5conf *conf)
 		goto err;
 	}
 
-	ppl_conf->bs = bioset_create(conf->raid_disks, 0);
+	ppl_conf->bs = bioset_create(conf->raid_disks, 0, 0);
 	if (!ppl_conf->bs) {
 		ret = -ENOMEM;
 		goto err;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 7171bfd48223..62c965be97e1 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -6943,7 +6943,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
 			goto abort;
 	}
 
-	conf->bio_split = bioset_create(BIO_POOL_SIZE, 0);
+	conf->bio_split = bioset_create(BIO_POOL_SIZE, 0, 0);
 	if (!conf->bio_split)
 		goto abort;
 	conf->mddev = mddev;
diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index 75373624604b..c05d38016556 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -93,7 +93,7 @@ static int iblock_configure_device(struct se_device *dev)
 		return -EINVAL;
 	}
 
-	ib_dev->ibd_bio_set = bioset_create(IBLOCK_BIO_POOL_SIZE, 0);
+	ib_dev->ibd_bio_set = bioset_create(IBLOCK_BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
 	if (!ib_dev->ibd_bio_set) {
 		pr_err("IBLOCK: Unable to create bioset\n");
 		goto out;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index bcd8e16a34e1..dd91c99e9ba0 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -439,7 +439,7 @@ blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 
 static __init int blkdev_init(void)
 {
-	blkdev_dio_pool = bioset_create(4, offsetof(struct blkdev_dio, bio));
+	blkdev_dio_pool = bioset_create(4, offsetof(struct blkdev_dio, bio), BIOSET_NEED_BVECS);
 	if (!blkdev_dio_pool)
 		return -ENOMEM;
 	return 0;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 8f66e55e7ba1..19eedf2e630b 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -174,7 +174,8 @@ int __init extent_io_init(void)
 		goto free_state_cache;
 
 	btrfs_bioset = bioset_create(BIO_POOL_SIZE,
-				     offsetof(struct btrfs_io_bio, bio));
+				     offsetof(struct btrfs_io_bio, bio),
+				     BIOSET_NEED_BVECS);
 	if (!btrfs_bioset)
 		goto free_buffer_cache;
 
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 455a575f101d..97df4db13b2e 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1766,7 +1766,8 @@ STATIC int __init
 xfs_init_zones(void)
 {
 	xfs_ioend_bioset = bioset_create(4 * MAX_BUF_PER_PAGE,
-			offsetof(struct xfs_ioend, io_inline_bio));
+			offsetof(struct xfs_ioend, io_inline_bio),
+			BIOSET_NEED_BVECS);
 	if (!xfs_ioend_bioset)
 		goto out;
 
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 9455aada1399..985dc645637e 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -373,8 +373,10 @@ static inline struct bio *bio_next_split(struct bio *bio, int sectors,
 	return bio_split(bio, sectors, gfp, bs);
 }
 
-extern struct bio_set *bioset_create(unsigned int, unsigned int);
-extern struct bio_set *bioset_create_nobvec(unsigned int, unsigned int);
+extern struct bio_set *bioset_create(unsigned int, unsigned int, int flags);
+enum {
+	BIOSET_NEED_BVECS = BIT(0),
+};
 extern void bioset_free(struct bio_set *);
 extern mempool_t *biovec_create_pool(int pool_entries);
 
-- 
cgit v1.2.3


From 47e0fb461fca1a68a566c82fcc006cc787312d8c Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Sun, 18 Jun 2017 14:38:57 +1000
Subject: blk: make the bioset rescue_workqueue optional.

This patch converts bioset_create() to not create a workqueue by
default, so alloctions will never trigger punt_bios_to_rescuer().  It
also introduces a new flag BIOSET_NEED_RESCUER which tells
bioset_create() to preserve the old behavior.

All callers of bioset_create() that are inside block device drivers,
are given the BIOSET_NEED_RESCUER flag.

biosets used by filesystems or other top-level users do not
need rescuing as the bio can never be queued behind other
bios.  This includes fs_bio_set, blkdev_dio_pool,
btrfs_bioset, xfs_ioend_bioset, and one allocated by
target_core_iblock.c.

biosets used by md/raid do not need rescuing as
their usage was recently audited and revised to never
risk deadlock.

It is hoped that most, if not all, of the remaining biosets
can end up being the non-rescued version.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Credit-to: Ming Lei <ming.lei@redhat.com> (minor fixes)
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c                    | 13 +++++++++++--
 block/blk-core.c               |  3 ++-
 drivers/block/drbd/drbd_main.c |  4 +++-
 drivers/md/bcache/super.c      |  8 ++++++--
 drivers/md/dm-crypt.c          |  3 ++-
 drivers/md/dm-io.c             |  3 ++-
 drivers/md/dm.c                |  5 +++--
 include/linux/bio.h            |  1 +
 8 files changed, 30 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/block/bio.c b/block/bio.c
index 84b313bd3ce8..2bd064906e06 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -363,6 +363,8 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
 	struct bio_list punt, nopunt;
 	struct bio *bio;
 
+	if (WARN_ON_ONCE(!bs->rescue_workqueue))
+		return;
 	/*
 	 * In order to guarantee forward progress we must punt only bios that
 	 * were allocated from this bio_set; otherwise, if there was a bio on
@@ -474,7 +476,8 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned int nr_iovecs,
 
 		if (current->bio_list &&
 		    (!bio_list_empty(&current->bio_list[0]) ||
-		     !bio_list_empty(&current->bio_list[1])))
+		     !bio_list_empty(&current->bio_list[1])) &&
+		    bs->rescue_workqueue)
 			gfp_mask &= ~__GFP_DIRECT_RECLAIM;
 
 		p = mempool_alloc(bs->bio_pool, gfp_mask);
@@ -1925,7 +1928,8 @@ EXPORT_SYMBOL(bioset_free);
  * bioset_create  - Create a bio_set
  * @pool_size:	Number of bio and bio_vecs to cache in the mempool
  * @front_pad:	Number of bytes to allocate in front of the returned bio
- * @flags:	Flags to modify behavior, currently only %BIOSET_NEED_BVECS
+ * @flags:	Flags to modify behavior, currently %BIOSET_NEED_BVECS
+ *              and %BIOSET_NEED_RESCUER
  *
  * Description:
  *    Set up a bio_set to be used with @bio_alloc_bioset. Allows the caller
@@ -1936,6 +1940,8 @@ EXPORT_SYMBOL(bioset_free);
  *    or things will break badly.
  *    If %BIOSET_NEED_BVECS is set in @flags, a separate pool will be allocated
  *    for allocating iovecs.  This pool is not needed e.g. for bio_clone_fast().
+ *    If %BIOSET_NEED_RESCUER is set, a workqueue is created which can be used to
+ *    dispatch queued requests when the mempool runs out of space.
  *
  */
 struct bio_set *bioset_create(unsigned int pool_size,
@@ -1971,6 +1977,9 @@ struct bio_set *bioset_create(unsigned int pool_size,
 			goto bad;
 	}
 
+	if (!(flags & BIOSET_NEED_RESCUER))
+		return bs;
+
 	bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0);
 	if (!bs->rescue_workqueue)
 		goto bad;
diff --git a/block/blk-core.c b/block/blk-core.c
index 62cf92550512..9bd10c46a538 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -790,7 +790,8 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 	if (q->id < 0)
 		goto fail_q;
 
-	q->bio_split = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
+	q->bio_split = bioset_create(BIO_POOL_SIZE, 0, (BIOSET_NEED_BVECS |
+							BIOSET_NEED_RESCUER));
 	if (!q->bio_split)
 		goto fail_id;
 
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index b395fe391171..bdf51b6977cf 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2165,7 +2165,9 @@ static int drbd_create_mempools(void)
 		goto Enomem;
 
 	/* mempools */
-	drbd_md_io_bio_set = bioset_create(DRBD_MIN_POOL_PAGES, 0, BIOSET_NEED_BVECS);
+	drbd_md_io_bio_set = bioset_create(DRBD_MIN_POOL_PAGES, 0,
+					   BIOSET_NEED_BVECS |
+					   BIOSET_NEED_RESCUER);
 	if (drbd_md_io_bio_set == NULL)
 		goto Enomem;
 
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index abd6e825b39b..8352fad765f6 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -782,7 +782,9 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size,
 
 	minor *= BCACHE_MINORS;
 
-	if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio), BIOSET_NEED_BVECS)) ||
+	if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio),
+					   BIOSET_NEED_BVECS |
+					   BIOSET_NEED_RESCUER)) ||
 	    !(d->disk = alloc_disk(BCACHE_MINORS))) {
 		ida_simple_remove(&bcache_minor, minor);
 		return -ENOMEM;
@@ -1516,7 +1518,9 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
 				sizeof(struct bbio) + sizeof(struct bio_vec) *
 				bucket_pages(c))) ||
 	    !(c->fill_iter = mempool_create_kmalloc_pool(1, iter_size)) ||
-	    !(c->bio_split = bioset_create(4, offsetof(struct bbio, bio), BIOSET_NEED_BVECS)) ||
+	    !(c->bio_split = bioset_create(4, offsetof(struct bbio, bio),
+					   BIOSET_NEED_BVECS |
+					   BIOSET_NEED_RESCUER)) ||
 	    !(c->uuids = alloc_bucket_pages(GFP_KERNEL, c)) ||
 	    !(c->moving_gc_wq = alloc_workqueue("bcache_gc",
 						WQ_MEM_RECLAIM, 0)) ||
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 237ff8e9752a..9e1b72e8f7ef 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -2677,7 +2677,8 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		goto bad;
 	}
 
-	cc->bs = bioset_create(MIN_IOS, 0, BIOSET_NEED_BVECS);
+	cc->bs = bioset_create(MIN_IOS, 0, (BIOSET_NEED_BVECS |
+					    BIOSET_NEED_RESCUER));
 	if (!cc->bs) {
 		ti->error = "Cannot allocate crypt bioset";
 		goto bad;
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index 5c4121024d92..81248a8a8b57 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -58,7 +58,8 @@ struct dm_io_client *dm_io_client_create(void)
 	if (!client->pool)
 		goto bad;
 
-	client->bios = bioset_create(min_ios, 0, BIOSET_NEED_BVECS);
+	client->bios = bioset_create(min_ios, 0, (BIOSET_NEED_BVECS |
+						  BIOSET_NEED_RESCUER));
 	if (!client->bios)
 		goto bad;
 
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 3394a311de3d..fbd06b9f9467 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1036,7 +1036,8 @@ static void flush_current_bio_list(struct blk_plug_cb *cb, bool from_schedule)
 
 		while ((bio = bio_list_pop(&list))) {
 			struct bio_set *bs = bio->bi_pool;
-			if (unlikely(!bs) || bs == fs_bio_set) {
+			if (unlikely(!bs) || bs == fs_bio_set ||
+			    !bs->rescue_workqueue) {
 				bio_list_add(&current->bio_list[i], bio);
 				continue;
 			}
@@ -2660,7 +2661,7 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, enum dm_qu
 		BUG();
 	}
 
-	pools->bs = bioset_create(pool_size, front_pad, 0);
+	pools->bs = bioset_create(pool_size, front_pad, BIOSET_NEED_RESCUER);
 	if (!pools->bs)
 		goto out;
 
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 985dc645637e..32c786baa10a 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -376,6 +376,7 @@ static inline struct bio *bio_next_split(struct bio *bio, int sectors,
 extern struct bio_set *bioset_create(unsigned int, unsigned int, int flags);
 enum {
 	BIOSET_NEED_BVECS = BIT(0),
+	BIOSET_NEED_RESCUER = BIT(1),
 };
 extern void bioset_free(struct bio_set *);
 extern mempool_t *biovec_create_pool(int pool_entries);
-- 
cgit v1.2.3


From 9b10f6a9c2aaab49c56b8cff0facdc1b64ed7e1c Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Sun, 18 Jun 2017 14:38:59 +1000
Subject: block: remove bio_clone() and all references.

bio_clone() is no longer used.
Only bio_clone_bioset() or bio_clone_fast().
This is for the best, as bio_clone() used fs_bio_set,
and filesystems are unlikely to want to use bio_clone().

So remove bio_clone() and all references.
This includes a fix to some incorrect documentation.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 Documentation/block/biodoc.txt | 2 +-
 block/bio.c                    | 2 +-
 block/blk-merge.c              | 6 +++---
 drivers/md/md.c                | 2 +-
 include/linux/bio.h            | 5 -----
 5 files changed, 6 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt
index 01ddeaf64b0f..9490f2845f06 100644
--- a/Documentation/block/biodoc.txt
+++ b/Documentation/block/biodoc.txt
@@ -632,7 +632,7 @@ to i/o submission, if the bio fields are likely to be accessed after the
 i/o is issued (since the bio may otherwise get freed in case i/o completion
 happens in the meantime).
 
-The bio_clone() routine may be used to duplicate a bio, where the clone
+The bio_clone_fast() routine may be used to duplicate a bio, where the clone
 shares the bio_vec_list with the original bio (i.e. both point to the
 same bio_vec_list). This would typically be used for splitting i/o requests
 in lvm or md.
diff --git a/block/bio.c b/block/bio.c
index 2bd064906e06..89a51bd49ab7 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -547,7 +547,7 @@ EXPORT_SYMBOL(zero_fill_bio);
  *
  * Description:
  *   Put a reference to a &struct bio, either one you have gotten with
- *   bio_alloc, bio_get or bio_clone. The last put of a bio will free it.
+ *   bio_alloc, bio_get or bio_clone_*. The last put of a bio will free it.
  **/
 void bio_put(struct bio *bio)
 {
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 51c84540d3bb..e7862e9dcc39 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -115,13 +115,13 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
 		 * With arbitrary bio size, the incoming bio may be very
 		 * big. We have to split the bio into small bios so that
 		 * each holds at most BIO_MAX_PAGES bvecs because
-		 * bio_clone() can fail to allocate big bvecs.
+		 * bio_clone_bioset() can fail to allocate big bvecs.
 		 *
-		 * Those drivers which will need to use bio_clone()
+		 * Those drivers which will need to use bio_clone_bioset()
 		 * should tell us in some way.  For now, impose the
 		 * BIO_MAX_PAGES limit on all queues.
 		 *
-		 * TODO: handle users of bio_clone() differently.
+		 * TODO: handle users of bio_clone_bioset() differently.
 		 */
 		if (bvecs++ >= BIO_MAX_PAGES)
 			goto split;
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 07fe780ccd29..31bcbfb09fef 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -185,7 +185,7 @@ static int start_readonly;
 static bool create_on_open = true;
 
 /* bio_clone_mddev
- * like bio_clone, but with a local bio set
+ * like bio_clone_bioset, but with a local bio set
  */
 
 struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 32c786baa10a..40d054185277 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -395,11 +395,6 @@ static inline struct bio *bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
 	return bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set);
 }
 
-static inline struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask)
-{
-	return bio_clone_bioset(bio, gfp_mask, fs_bio_set);
-}
-
 static inline struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned int nr_iovecs)
 {
 	return bio_alloc_bioset(gfp_mask, nr_iovecs, NULL);
-- 
cgit v1.2.3


From 97e0120990f4a7037f72c0e115e5c7f514025738 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Tue, 6 Jun 2017 23:22:01 +0800
Subject: blk-mq: move blk_mq_quiesce_queue() into include/linux/blk-mq.h

We usually put blk_mq_*() into include/linux/blk-mq.h, so
move this API into there.

Signed-off-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Bart Van Assche <Bart.VanAssche@sandisk.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-mq.h | 1 +
 include/linux/blkdev.h | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index b144b7b0e104..99348adb3e16 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -244,6 +244,7 @@ void blk_mq_stop_hw_queues(struct request_queue *q);
 void blk_mq_start_hw_queues(struct request_queue *q);
 void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
 void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async);
+void blk_mq_quiesce_queue(struct request_queue *q);
 void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
 void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
 void blk_mq_run_hw_queues(struct request_queue *q, bool async);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 670df402bc51..8423f6baf818 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -965,7 +965,6 @@ extern void __blk_run_queue(struct request_queue *q);
 extern void __blk_run_queue_uncond(struct request_queue *q);
 extern void blk_run_queue(struct request_queue *);
 extern void blk_run_queue_async(struct request_queue *q);
-extern void blk_mq_quiesce_queue(struct request_queue *q);
 extern int blk_rq_map_user(struct request_queue *, struct request *,
 			   struct rq_map_data *, void __user *, unsigned long,
 			   gfp_t);
-- 
cgit v1.2.3


From 4f084b41a0c04a69067be98a210e6b50969f9945 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Tue, 6 Jun 2017 23:22:02 +0800
Subject: blk-mq: introduce blk_mq_quiesce_queue_nowait()

This patch introduces blk_mq_quiesce_queue_nowait() so
that we can workaround mpt3sas for quiescing its queue.

Once mpt3sas is fixed, we can remove this helper.

Reviewed-by: Bart Van Assche <Bart.VanAssche@sandisk.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-mq.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 99348adb3e16..78a8b64074ea 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -262,6 +262,14 @@ int blk_mq_reinit_tagset(struct blk_mq_tag_set *set);
 int blk_mq_map_queues(struct blk_mq_tag_set *set);
 void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues);
 
+/*
+ * FIXME: this helper is just for working around mpt3sas.
+ */
+static inline void blk_mq_quiesce_queue_nowait(struct request_queue *q)
+{
+	blk_mq_stop_hw_queues(q);
+}
+
 /*
  * Driver command data is immediately after the request. So subtract request
  * size to get back to the original request, add request size to get the PDU.
-- 
cgit v1.2.3


From e4e739131ac93d373cd2d2fd92820a6a39115ba5 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Tue, 6 Jun 2017 23:22:03 +0800
Subject: blk-mq: introduce blk_mq_unquiesce_queue

blk_mq_start_stopped_hw_queues() is used implictly
as counterpart of blk_mq_quiesce_queue() for unquiescing queue,
so we introduce blk_mq_unquiesce_queue() and make it
as counterpart of blk_mq_quiesce_queue() explicitly.

This function is for improving the current quiescing mechanism
in the following patches.

Reviewed-by: Bart Van Assche <Bart.VanAssche@sandisk.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c         | 13 +++++++++++++
 include/linux/blk-mq.h |  1 +
 2 files changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index cc85de9d6b2d..07785b5cf2bc 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -181,6 +181,19 @@ void blk_mq_quiesce_queue(struct request_queue *q)
 }
 EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue);
 
+/*
+ * blk_mq_unquiesce_queue() - counterpart of blk_mq_quiesce_queue()
+ * @q: request queue.
+ *
+ * This function recovers queue into the state before quiescing
+ * which is done by blk_mq_quiesce_queue.
+ */
+void blk_mq_unquiesce_queue(struct request_queue *q)
+{
+	blk_mq_start_stopped_hw_queues(q, true);
+}
+EXPORT_SYMBOL_GPL(blk_mq_unquiesce_queue);
+
 void blk_mq_wake_waiters(struct request_queue *q)
 {
 	struct blk_mq_hw_ctx *hctx;
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 78a8b64074ea..787d8a2a2ac6 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -245,6 +245,7 @@ void blk_mq_start_hw_queues(struct request_queue *q);
 void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
 void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async);
 void blk_mq_quiesce_queue(struct request_queue *q);
+void blk_mq_unquiesce_queue(struct request_queue *q);
 void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
 void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
 void blk_mq_run_hw_queues(struct request_queue *q, bool async);
-- 
cgit v1.2.3


From f4560ffe8cec1361b1021d81aca6a4173f8e7c87 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Sun, 18 Jun 2017 14:24:27 -0600
Subject: blk-mq: use QUEUE_FLAG_QUIESCED to quiesce queue

It is required that no dispatch can happen any more once
blk_mq_quiesce_queue() returns, and we don't have such requirement
on APIs of stopping queue.

But blk_mq_quiesce_queue() still may not block/drain dispatch in the
the case of BLK_MQ_S_START_ON_RUN, so use the new introduced flag of
QUEUE_FLAG_QUIESCED and evaluate it inside RCU read-side critical
sections for fixing this issue.

Also blk_mq_quiesce_queue() is implemented via stopping queue, which
limits its uses, and easy to cause race, because any queue restart in
other paths may break blk_mq_quiesce_queue(). With the introduced
flag of QUEUE_FLAG_QUIESCED, we don't need to depend on stopping queue
for quiescing any more.

Signed-off-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Bart Van Assche <Bart.VanAssche@sandisk.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-sched.c   |  3 ++-
 block/blk-mq.c         | 11 ++++++++++-
 include/linux/blk-mq.h |  4 ++++
 include/linux/blkdev.h |  2 ++
 4 files changed, 18 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 254d1c164567..9f025289da63 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -58,7 +58,8 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
 	bool did_work = false;
 	LIST_HEAD(rq_list);
 
-	if (unlikely(blk_mq_hctx_stopped(hctx)))
+	/* RCU or SRCU read lock is needed before checking quiesced flag */
+	if (unlikely(blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)))
 		return;
 
 	hctx->run++;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 07785b5cf2bc..40b22c7f684e 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -170,6 +170,10 @@ void blk_mq_quiesce_queue(struct request_queue *q)
 
 	__blk_mq_stop_hw_queues(q, true);
 
+	spin_lock_irq(q->queue_lock);
+	queue_flag_set(QUEUE_FLAG_QUIESCED, q);
+	spin_unlock_irq(q->queue_lock);
+
 	queue_for_each_hw_ctx(q, hctx, i) {
 		if (hctx->flags & BLK_MQ_F_BLOCKING)
 			synchronize_srcu(&hctx->queue_rq_srcu);
@@ -190,6 +194,10 @@ EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue);
  */
 void blk_mq_unquiesce_queue(struct request_queue *q)
 {
+	spin_lock_irq(q->queue_lock);
+	queue_flag_clear(QUEUE_FLAG_QUIESCED, q);
+	spin_unlock_irq(q->queue_lock);
+
 	blk_mq_start_stopped_hw_queues(q, true);
 }
 EXPORT_SYMBOL_GPL(blk_mq_unquiesce_queue);
@@ -1444,7 +1452,8 @@ static void __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
 	blk_status_t ret;
 	bool run_queue = true;
 
-	if (blk_mq_hctx_stopped(hctx)) {
+	/* RCU or SRCU read lock is needed before checking quiesced flag */
+	if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)) {
 		run_queue = false;
 		goto insert;
 	}
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 787d8a2a2ac6..de6536c14ae7 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -268,6 +268,10 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues);
  */
 static inline void blk_mq_quiesce_queue_nowait(struct request_queue *q)
 {
+	spin_lock_irq(q->queue_lock);
+	queue_flag_set(QUEUE_FLAG_QUIESCED, q);
+	spin_unlock_irq(q->queue_lock);
+
 	blk_mq_stop_hw_queues(q);
 }
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 8423f6baf818..22cfba64ce81 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -619,6 +619,7 @@ struct request_queue {
 #define QUEUE_FLAG_POLL_STATS  28	/* collecting stats for hybrid polling */
 #define QUEUE_FLAG_REGISTERED  29	/* queue has been registered to a disk */
 #define QUEUE_FLAG_SCSI_PASSTHROUGH 30	/* queue supports SCSI commands */
+#define QUEUE_FLAG_QUIESCED    31	/* queue has been quiesced */
 
 #define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_STACKABLE)	|	\
@@ -715,6 +716,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
 #define blk_noretry_request(rq) \
 	((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \
 			     REQ_FAILFAST_DRIVER))
+#define blk_queue_quiesced(q)	test_bit(QUEUE_FLAG_QUIESCED, &(q)->queue_flags)
 
 static inline bool blk_account_rq(struct request *rq)
 {
-- 
cgit v1.2.3


From 1d9e9bc6b56e1bb7e33e7e2e1b99d7088356c006 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Tue, 6 Jun 2017 23:22:08 +0800
Subject: blk-mq: don't stop queue for quiescing

Queue can be started by other blk-mq APIs and can be used in
different cases, this limits uses of blk_mq_quiesce_queue()
if it is based on stopping queue, and make its usage very
difficult, especially users have to use the stop queue APIs
carefully for avoiding to break blk_mq_quiesce_queue().

We have applied the QUIESCED flag for draining and blocking
dispatch, so it isn't necessary to stop queue any more.

After stopping queue is removed, blk_mq_quiesce_queue() can
be used safely and easily, then users won't worry about queue
restarting during quiescing at all.

Reviewed-by: Bart Van Assche <Bart.VanAssche@sandisk.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c         | 9 +++------
 include/linux/blk-mq.h | 2 --
 2 files changed, 3 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index f2a73190f60d..dbae586602f6 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -169,11 +169,7 @@ void blk_mq_quiesce_queue(struct request_queue *q)
 	unsigned int i;
 	bool rcu = false;
 
-	__blk_mq_stop_hw_queues(q, true);
-
-	spin_lock_irq(q->queue_lock);
-	queue_flag_set(QUEUE_FLAG_QUIESCED, q);
-	spin_unlock_irq(q->queue_lock);
+	blk_mq_quiesce_queue_nowait(q);
 
 	queue_for_each_hw_ctx(q, hctx, i) {
 		if (hctx->flags & BLK_MQ_F_BLOCKING)
@@ -199,7 +195,8 @@ void blk_mq_unquiesce_queue(struct request_queue *q)
 	queue_flag_clear(QUEUE_FLAG_QUIESCED, q);
 	spin_unlock_irq(q->queue_lock);
 
-	blk_mq_start_stopped_hw_queues(q, true);
+	/* dispatch requests which are inserted during quiescing */
+	blk_mq_run_hw_queues(q, true);
 }
 EXPORT_SYMBOL_GPL(blk_mq_unquiesce_queue);
 
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index de6536c14ae7..f1bd13ae8f57 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -271,8 +271,6 @@ static inline void blk_mq_quiesce_queue_nowait(struct request_queue *q)
 	spin_lock_irq(q->queue_lock);
 	queue_flag_set(QUEUE_FLAG_QUIESCED, q);
 	spin_unlock_irq(q->queue_lock);
-
-	blk_mq_stop_hw_queues(q);
 }
 
 /*
-- 
cgit v1.2.3


From e33a3f84f88f13eab6a45c5230c9b9ee9ac78e60 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Thu, 30 Mar 2017 12:15:42 +0200
Subject: NFC: nfcmrvl: allow gpio 0 for reset signalling

Allow gpio 0 to be used for reset signalling, and instead use negative
errnos to disable the reset functionality.

Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/nfc/nfcmrvl/main.c            | 9 ++++-----
 include/linux/platform_data/nfcmrvl.h | 2 +-
 2 files changed, 5 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/nfc/nfcmrvl/main.c b/drivers/nfc/nfcmrvl/main.c
index a5faa604a3b4..e65d027b91fa 100644
--- a/drivers/nfc/nfcmrvl/main.c
+++ b/drivers/nfc/nfcmrvl/main.c
@@ -123,12 +123,12 @@ struct nfcmrvl_private *nfcmrvl_nci_register_dev(enum nfcmrvl_phy phy,
 
 	memcpy(&priv->config, pdata, sizeof(*pdata));
 
-	if (priv->config.reset_n_io) {
+	if (gpio_is_valid(priv->config.reset_n_io)) {
 		rc = gpio_request_one(priv->config.reset_n_io,
 				      GPIOF_OUT_INIT_LOW,
 				      "nfcmrvl_reset_n");
 		if (rc < 0) {
-			priv->config.reset_n_io = 0;
+			priv->config.reset_n_io = -EINVAL;
 			nfc_err(dev, "failed to request reset_n io\n");
 		}
 	}
@@ -183,7 +183,7 @@ error_fw_dnld_deinit:
 error_free_dev:
 	nci_free_device(priv->ndev);
 error_free_gpio:
-	if (priv->config.reset_n_io)
+	if (gpio_is_valid(priv->config.reset_n_io))
 		gpio_free(priv->config.reset_n_io);
 	kfree(priv);
 	return ERR_PTR(rc);
@@ -199,7 +199,7 @@ void nfcmrvl_nci_unregister_dev(struct nfcmrvl_private *priv)
 
 	nfcmrvl_fw_dnld_deinit(priv);
 
-	if (priv->config.reset_n_io)
+	if (gpio_is_valid(priv->config.reset_n_io))
 		gpio_free(priv->config.reset_n_io);
 
 	nci_unregister_device(ndev);
@@ -267,7 +267,6 @@ int nfcmrvl_parse_dt(struct device_node *node,
 	reset_n_io = of_get_named_gpio(node, "reset-n-io", 0);
 	if (reset_n_io < 0) {
 		pr_info("no reset-n-io config\n");
-		reset_n_io = 0;
 	} else if (!gpio_is_valid(reset_n_io)) {
 		pr_err("invalid reset-n-io GPIO\n");
 		return reset_n_io;
diff --git a/include/linux/platform_data/nfcmrvl.h b/include/linux/platform_data/nfcmrvl.h
index a6f9d633f5be..9e75ac8d19be 100644
--- a/include/linux/platform_data/nfcmrvl.h
+++ b/include/linux/platform_data/nfcmrvl.h
@@ -23,7 +23,7 @@ struct nfcmrvl_platform_data {
 	 */
 
 	/* GPIO that is wired to RESET_N signal */
-	unsigned int reset_n_io;
+	int reset_n_io;
 	/* Tell if transport is muxed in HCI one */
 	unsigned int hci_muxed;
 
-- 
cgit v1.2.3


From 1be7107fbe18eed3e319a6c3e83c78254b693acb Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Mon, 19 Jun 2017 04:03:24 -0700
Subject: mm: larger stack guard gap, between vmas

Stack guard page is a useful feature to reduce a risk of stack smashing
into a different mapping. We have been using a single page gap which
is sufficient to prevent having stack adjacent to a different mapping.
But this seems to be insufficient in the light of the stack usage in
userspace. E.g. glibc uses as large as 64kB alloca() in many commonly
used functions. Others use constructs liks gid_t buffer[NGROUPS_MAX]
which is 256kB or stack strings with MAX_ARG_STRLEN.

This will become especially dangerous for suid binaries and the default
no limit for the stack size limit because those applications can be
tricked to consume a large portion of the stack and a single glibc call
could jump over the guard page. These attacks are not theoretical,
unfortunatelly.

Make those attacks less probable by increasing the stack guard gap
to 1MB (on systems with 4k pages; but make it depend on the page size
because systems with larger base pages might cap stack allocations in
the PAGE_SIZE units) which should cover larger alloca() and VLA stack
allocations. It is obviously not a full fix because the problem is
somehow inherent, but it should reduce attack space a lot.

One could argue that the gap size should be configurable from userspace,
but that can be done later when somebody finds that the new 1MB is wrong
for some special case applications.  For now, add a kernel command line
option (stack_guard_gap) to specify the stack gap size (in page units).

Implementation wise, first delete all the old code for stack guard page:
because although we could get away with accounting one extra page in a
stack vma, accounting a larger gap can break userspace - case in point,
a program run with "ulimit -S -v 20000" failed when the 1MB gap was
counted for RLIMIT_AS; similar problems could come with RLIMIT_MLOCK
and strict non-overcommit mode.

Instead of keeping gap inside the stack vma, maintain the stack guard
gap as a gap between vmas: using vm_start_gap() in place of vm_start
(or vm_end_gap() in place of vm_end if VM_GROWSUP) in just those few
places which need to respect the gap - mainly arch_get_unmapped_area(),
and and the vma tree's subtree_gap support for that.

Original-patch-by: Oleg Nesterov <oleg@redhat.com>
Original-patch-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Hugh Dickins <hughd@google.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Tested-by: Helge Deller <deller@gmx.de> # parisc
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/admin-guide/kernel-parameters.txt |   7 ++
 arch/arc/mm/mmap.c                              |   2 +-
 arch/arm/mm/mmap.c                              |   4 +-
 arch/frv/mm/elf-fdpic.c                         |   2 +-
 arch/mips/mm/mmap.c                             |   2 +-
 arch/parisc/kernel/sys_parisc.c                 |  15 ++-
 arch/powerpc/mm/hugetlbpage-radix.c             |   2 +-
 arch/powerpc/mm/mmap.c                          |   4 +-
 arch/powerpc/mm/slice.c                         |   2 +-
 arch/s390/mm/mmap.c                             |   4 +-
 arch/sh/mm/mmap.c                               |   4 +-
 arch/sparc/kernel/sys_sparc_64.c                |   4 +-
 arch/sparc/mm/hugetlbpage.c                     |   2 +-
 arch/tile/mm/hugetlbpage.c                      |   2 +-
 arch/x86/kernel/sys_x86_64.c                    |   4 +-
 arch/x86/mm/hugetlbpage.c                       |   2 +-
 arch/xtensa/kernel/syscall.c                    |   2 +-
 fs/hugetlbfs/inode.c                            |   2 +-
 fs/proc/task_mmu.c                              |   4 -
 include/linux/mm.h                              |  53 ++++-----
 mm/gup.c                                        |   5 -
 mm/memory.c                                     |  38 ------
 mm/mmap.c                                       | 149 ++++++++++++++----------
 23 files changed, 152 insertions(+), 163 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 0f5c3b4347c6..7737ab5d04b2 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3811,6 +3811,13 @@
 			expediting.  Set to zero to disable automatic
 			expediting.
 
+	stack_guard_gap=	[MM]
+			override the default stack gap protection. The value
+			is in page units and it defines how many pages prior
+			to (for stacks growing down) resp. after (for stacks
+			growing up) the main stack are reserved for no other
+			mapping. Default value is 256 pages.
+
 	stacktrace	[FTRACE]
 			Enabled the stack tracer on boot up.
 
diff --git a/arch/arc/mm/mmap.c b/arch/arc/mm/mmap.c
index 3e25e8d6486b..2e13683dfb24 100644
--- a/arch/arc/mm/mmap.c
+++ b/arch/arc/mm/mmap.c
@@ -65,7 +65,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
 
 		vma = find_vma(mm, addr);
 		if (TASK_SIZE - len >= addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		    (!vma || addr + len <= vm_start_gap(vma)))
 			return addr;
 	}
 
diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c
index 2239fde10b80..f0701d8d24df 100644
--- a/arch/arm/mm/mmap.c
+++ b/arch/arm/mm/mmap.c
@@ -90,7 +90,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
 
 		vma = find_vma(mm, addr);
 		if (TASK_SIZE - len >= addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		    (!vma || addr + len <= vm_start_gap(vma)))
 			return addr;
 	}
 
@@ -141,7 +141,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 			addr = PAGE_ALIGN(addr);
 		vma = find_vma(mm, addr);
 		if (TASK_SIZE - len >= addr &&
-				(!vma || addr + len <= vma->vm_start))
+				(!vma || addr + len <= vm_start_gap(vma)))
 			return addr;
 	}
 
diff --git a/arch/frv/mm/elf-fdpic.c b/arch/frv/mm/elf-fdpic.c
index da82c25301e7..46aa289c5102 100644
--- a/arch/frv/mm/elf-fdpic.c
+++ b/arch/frv/mm/elf-fdpic.c
@@ -75,7 +75,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi
 		addr = PAGE_ALIGN(addr);
 		vma = find_vma(current->mm, addr);
 		if (TASK_SIZE - len >= addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		    (!vma || addr + len <= vm_start_gap(vma)))
 			goto success;
 	}
 
diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c
index 64dd8bdd92c3..28adeabe851f 100644
--- a/arch/mips/mm/mmap.c
+++ b/arch/mips/mm/mmap.c
@@ -93,7 +93,7 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp,
 
 		vma = find_vma(mm, addr);
 		if (TASK_SIZE - len >= addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		    (!vma || addr + len <= vm_start_gap(vma)))
 			return addr;
 	}
 
diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c
index e5288638a1d9..378a754ca186 100644
--- a/arch/parisc/kernel/sys_parisc.c
+++ b/arch/parisc/kernel/sys_parisc.c
@@ -90,7 +90,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
 		unsigned long len, unsigned long pgoff, unsigned long flags)
 {
 	struct mm_struct *mm = current->mm;
-	struct vm_area_struct *vma;
+	struct vm_area_struct *vma, *prev;
 	unsigned long task_size = TASK_SIZE;
 	int do_color_align, last_mmap;
 	struct vm_unmapped_area_info info;
@@ -117,9 +117,10 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
 		else
 			addr = PAGE_ALIGN(addr);
 
-		vma = find_vma(mm, addr);
+		vma = find_vma_prev(mm, addr, &prev);
 		if (task_size - len >= addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		    (!vma || addr + len <= vm_start_gap(vma)) &&
+		    (!prev || addr >= vm_end_gap(prev)))
 			goto found_addr;
 	}
 
@@ -143,7 +144,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 			  const unsigned long len, const unsigned long pgoff,
 			  const unsigned long flags)
 {
-	struct vm_area_struct *vma;
+	struct vm_area_struct *vma, *prev;
 	struct mm_struct *mm = current->mm;
 	unsigned long addr = addr0;
 	int do_color_align, last_mmap;
@@ -177,9 +178,11 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 			addr = COLOR_ALIGN(addr, last_mmap, pgoff);
 		else
 			addr = PAGE_ALIGN(addr);
-		vma = find_vma(mm, addr);
+
+		vma = find_vma_prev(mm, addr, &prev);
 		if (TASK_SIZE - len >= addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		    (!vma || addr + len <= vm_start_gap(vma)) &&
+		    (!prev || addr >= vm_end_gap(prev)))
 			goto found_addr;
 	}
 
diff --git a/arch/powerpc/mm/hugetlbpage-radix.c b/arch/powerpc/mm/hugetlbpage-radix.c
index 6575b9aabef4..a12e86395025 100644
--- a/arch/powerpc/mm/hugetlbpage-radix.c
+++ b/arch/powerpc/mm/hugetlbpage-radix.c
@@ -68,7 +68,7 @@ radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 		addr = ALIGN(addr, huge_page_size(h));
 		vma = find_vma(mm, addr);
 		if (mm->task_size - len >= addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		    (!vma || addr + len <= vm_start_gap(vma)))
 			return addr;
 	}
 	/*
diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c
index 9dbd2a733d6b..0ee6be4f1ba4 100644
--- a/arch/powerpc/mm/mmap.c
+++ b/arch/powerpc/mm/mmap.c
@@ -112,7 +112,7 @@ radix__arch_get_unmapped_area(struct file *filp, unsigned long addr,
 		addr = PAGE_ALIGN(addr);
 		vma = find_vma(mm, addr);
 		if (mm->task_size - len >= addr && addr >= mmap_min_addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		    (!vma || addr + len <= vm_start_gap(vma)))
 			return addr;
 	}
 
@@ -157,7 +157,7 @@ radix__arch_get_unmapped_area_topdown(struct file *filp,
 		addr = PAGE_ALIGN(addr);
 		vma = find_vma(mm, addr);
 		if (mm->task_size - len >= addr && addr >= mmap_min_addr &&
-				(!vma || addr + len <= vma->vm_start))
+				(!vma || addr + len <= vm_start_gap(vma)))
 			return addr;
 	}
 
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 966b9fccfa66..45f6740dd407 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -99,7 +99,7 @@ static int slice_area_is_free(struct mm_struct *mm, unsigned long addr,
 	if ((mm->task_size - len) < addr)
 		return 0;
 	vma = find_vma(mm, addr);
-	return (!vma || (addr + len) <= vma->vm_start);
+	return (!vma || (addr + len) <= vm_start_gap(vma));
 }
 
 static int slice_low_has_vma(struct mm_struct *mm, unsigned long slice)
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index b017daed6887..b854b1da281a 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -101,7 +101,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
 		addr = PAGE_ALIGN(addr);
 		vma = find_vma(mm, addr);
 		if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		    (!vma || addr + len <= vm_start_gap(vma)))
 			goto check_asce_limit;
 	}
 
@@ -151,7 +151,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 		addr = PAGE_ALIGN(addr);
 		vma = find_vma(mm, addr);
 		if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
-				(!vma || addr + len <= vma->vm_start))
+				(!vma || addr + len <= vm_start_gap(vma)))
 			goto check_asce_limit;
 	}
 
diff --git a/arch/sh/mm/mmap.c b/arch/sh/mm/mmap.c
index 08e7af0be4a7..6a1a1297baae 100644
--- a/arch/sh/mm/mmap.c
+++ b/arch/sh/mm/mmap.c
@@ -64,7 +64,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
 
 		vma = find_vma(mm, addr);
 		if (TASK_SIZE - len >= addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		    (!vma || addr + len <= vm_start_gap(vma)))
 			return addr;
 	}
 
@@ -114,7 +114,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 
 		vma = find_vma(mm, addr);
 		if (TASK_SIZE - len >= addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		    (!vma || addr + len <= vm_start_gap(vma)))
 			return addr;
 	}
 
diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
index ef4520efc813..043544d0cda3 100644
--- a/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c
@@ -120,7 +120,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi
 
 		vma = find_vma(mm, addr);
 		if (task_size - len >= addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		    (!vma || addr + len <= vm_start_gap(vma)))
 			return addr;
 	}
 
@@ -183,7 +183,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 
 		vma = find_vma(mm, addr);
 		if (task_size - len >= addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		    (!vma || addr + len <= vm_start_gap(vma)))
 			return addr;
 	}
 
diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c
index 7c29d38e6b99..88855e383b34 100644
--- a/arch/sparc/mm/hugetlbpage.c
+++ b/arch/sparc/mm/hugetlbpage.c
@@ -120,7 +120,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 		addr = ALIGN(addr, huge_page_size(h));
 		vma = find_vma(mm, addr);
 		if (task_size - len >= addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		    (!vma || addr + len <= vm_start_gap(vma)))
 			return addr;
 	}
 	if (mm->get_unmapped_area == arch_get_unmapped_area)
diff --git a/arch/tile/mm/hugetlbpage.c b/arch/tile/mm/hugetlbpage.c
index cb10153b5c9f..03e5cc4e76e4 100644
--- a/arch/tile/mm/hugetlbpage.c
+++ b/arch/tile/mm/hugetlbpage.c
@@ -233,7 +233,7 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 		addr = ALIGN(addr, huge_page_size(h));
 		vma = find_vma(mm, addr);
 		if (TASK_SIZE - len >= addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		    (!vma || addr + len <= vm_start_gap(vma)))
 			return addr;
 	}
 	if (current->mm->get_unmapped_area == arch_get_unmapped_area)
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index 207b8f2582c7..213ddf3e937d 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -144,7 +144,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
 		addr = PAGE_ALIGN(addr);
 		vma = find_vma(mm, addr);
 		if (end - len >= addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		    (!vma || addr + len <= vm_start_gap(vma)))
 			return addr;
 	}
 
@@ -187,7 +187,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 		addr = PAGE_ALIGN(addr);
 		vma = find_vma(mm, addr);
 		if (TASK_SIZE - len >= addr &&
-				(!vma || addr + len <= vma->vm_start))
+				(!vma || addr + len <= vm_start_gap(vma)))
 			return addr;
 	}
 
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index 302f43fd9c28..adad702b39cd 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -148,7 +148,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 		addr = ALIGN(addr, huge_page_size(h));
 		vma = find_vma(mm, addr);
 		if (TASK_SIZE - len >= addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		    (!vma || addr + len <= vm_start_gap(vma)))
 			return addr;
 	}
 	if (mm->get_unmapped_area == arch_get_unmapped_area)
diff --git a/arch/xtensa/kernel/syscall.c b/arch/xtensa/kernel/syscall.c
index 06937928cb72..74afbf02d07e 100644
--- a/arch/xtensa/kernel/syscall.c
+++ b/arch/xtensa/kernel/syscall.c
@@ -88,7 +88,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
 		/* At this point:  (!vmm || addr < vmm->vm_end). */
 		if (TASK_SIZE - len < addr)
 			return -ENOMEM;
-		if (!vmm || addr + len <= vmm->vm_start)
+		if (!vmm || addr + len <= vm_start_gap(vmm))
 			return addr;
 		addr = vmm->vm_end;
 		if (flags & MAP_SHARED)
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index dde861387a40..d44f5456eb9b 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -200,7 +200,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 		addr = ALIGN(addr, huge_page_size(h));
 		vma = find_vma(mm, addr);
 		if (TASK_SIZE - len >= addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		    (!vma || addr + len <= vm_start_gap(vma)))
 			return addr;
 	}
 
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index f0c8b33d99b1..520802da059c 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -300,11 +300,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
 
 	/* We don't show the stack guard page in /proc/maps */
 	start = vma->vm_start;
-	if (stack_guard_page_start(vma, start))
-		start += PAGE_SIZE;
 	end = vma->vm_end;
-	if (stack_guard_page_end(vma, end))
-		end -= PAGE_SIZE;
 
 	seq_setwidth(m, 25 + sizeof(void *) * 6 - 1);
 	seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ",
diff --git a/include/linux/mm.h b/include/linux/mm.h
index b892e95d4929..6f543a47fc92 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1393,12 +1393,6 @@ int clear_page_dirty_for_io(struct page *page);
 
 int get_cmdline(struct task_struct *task, char *buffer, int buflen);
 
-/* Is the vma a continuation of the stack vma above it? */
-static inline int vma_growsdown(struct vm_area_struct *vma, unsigned long addr)
-{
-	return vma && (vma->vm_end == addr) && (vma->vm_flags & VM_GROWSDOWN);
-}
-
 static inline bool vma_is_anonymous(struct vm_area_struct *vma)
 {
 	return !vma->vm_ops;
@@ -1414,28 +1408,6 @@ bool vma_is_shmem(struct vm_area_struct *vma);
 static inline bool vma_is_shmem(struct vm_area_struct *vma) { return false; }
 #endif
 
-static inline int stack_guard_page_start(struct vm_area_struct *vma,
-					     unsigned long addr)
-{
-	return (vma->vm_flags & VM_GROWSDOWN) &&
-		(vma->vm_start == addr) &&
-		!vma_growsdown(vma->vm_prev, addr);
-}
-
-/* Is the vma a continuation of the stack vma below it? */
-static inline int vma_growsup(struct vm_area_struct *vma, unsigned long addr)
-{
-	return vma && (vma->vm_start == addr) && (vma->vm_flags & VM_GROWSUP);
-}
-
-static inline int stack_guard_page_end(struct vm_area_struct *vma,
-					   unsigned long addr)
-{
-	return (vma->vm_flags & VM_GROWSUP) &&
-		(vma->vm_end == addr) &&
-		!vma_growsup(vma->vm_next, addr);
-}
-
 int vma_is_stack_for_current(struct vm_area_struct *vma);
 
 extern unsigned long move_page_tables(struct vm_area_struct *vma,
@@ -2222,6 +2194,7 @@ void page_cache_async_readahead(struct address_space *mapping,
 				pgoff_t offset,
 				unsigned long size);
 
+extern unsigned long stack_guard_gap;
 /* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */
 extern int expand_stack(struct vm_area_struct *vma, unsigned long address);
 
@@ -2250,6 +2223,30 @@ static inline struct vm_area_struct * find_vma_intersection(struct mm_struct * m
 	return vma;
 }
 
+static inline unsigned long vm_start_gap(struct vm_area_struct *vma)
+{
+	unsigned long vm_start = vma->vm_start;
+
+	if (vma->vm_flags & VM_GROWSDOWN) {
+		vm_start -= stack_guard_gap;
+		if (vm_start > vma->vm_start)
+			vm_start = 0;
+	}
+	return vm_start;
+}
+
+static inline unsigned long vm_end_gap(struct vm_area_struct *vma)
+{
+	unsigned long vm_end = vma->vm_end;
+
+	if (vma->vm_flags & VM_GROWSUP) {
+		vm_end += stack_guard_gap;
+		if (vm_end < vma->vm_end)
+			vm_end = -PAGE_SIZE;
+	}
+	return vm_end;
+}
+
 static inline unsigned long vma_pages(struct vm_area_struct *vma)
 {
 	return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
diff --git a/mm/gup.c b/mm/gup.c
index b3c7214d710d..576c4df58882 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -387,11 +387,6 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
 	/* mlock all present pages, but do not fault in new pages */
 	if ((*flags & (FOLL_POPULATE | FOLL_MLOCK)) == FOLL_MLOCK)
 		return -ENOENT;
-	/* For mm_populate(), just skip the stack guard page. */
-	if ((*flags & FOLL_POPULATE) &&
-			(stack_guard_page_start(vma, address) ||
-			 stack_guard_page_end(vma, address + PAGE_SIZE)))
-		return -ENOENT;
 	if (*flags & FOLL_WRITE)
 		fault_flags |= FAULT_FLAG_WRITE;
 	if (*flags & FOLL_REMOTE)
diff --git a/mm/memory.c b/mm/memory.c
index 2e65df1831d9..bb11c474857e 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2854,40 +2854,6 @@ out_release:
 	return ret;
 }
 
-/*
- * This is like a special single-page "expand_{down|up}wards()",
- * except we must first make sure that 'address{-|+}PAGE_SIZE'
- * doesn't hit another vma.
- */
-static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned long address)
-{
-	address &= PAGE_MASK;
-	if ((vma->vm_flags & VM_GROWSDOWN) && address == vma->vm_start) {
-		struct vm_area_struct *prev = vma->vm_prev;
-
-		/*
-		 * Is there a mapping abutting this one below?
-		 *
-		 * That's only ok if it's the same stack mapping
-		 * that has gotten split..
-		 */
-		if (prev && prev->vm_end == address)
-			return prev->vm_flags & VM_GROWSDOWN ? 0 : -ENOMEM;
-
-		return expand_downwards(vma, address - PAGE_SIZE);
-	}
-	if ((vma->vm_flags & VM_GROWSUP) && address + PAGE_SIZE == vma->vm_end) {
-		struct vm_area_struct *next = vma->vm_next;
-
-		/* As VM_GROWSDOWN but s/below/above/ */
-		if (next && next->vm_start == address + PAGE_SIZE)
-			return next->vm_flags & VM_GROWSUP ? 0 : -ENOMEM;
-
-		return expand_upwards(vma, address + PAGE_SIZE);
-	}
-	return 0;
-}
-
 /*
  * We enter with non-exclusive mmap_sem (to exclude vma changes,
  * but allow concurrent faults), and pte mapped but not yet locked.
@@ -2904,10 +2870,6 @@ static int do_anonymous_page(struct vm_fault *vmf)
 	if (vma->vm_flags & VM_SHARED)
 		return VM_FAULT_SIGBUS;
 
-	/* Check if we need to add a guard page to the stack */
-	if (check_stack_guard_page(vma, vmf->address) < 0)
-		return VM_FAULT_SIGSEGV;
-
 	/*
 	 * Use pte_alloc() instead of pte_alloc_map().  We can't run
 	 * pte_offset_map() on pmds where a huge pmd might be created
diff --git a/mm/mmap.c b/mm/mmap.c
index f82741e199c0..8e07976d5e47 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -183,6 +183,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
 	unsigned long retval;
 	unsigned long newbrk, oldbrk;
 	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *next;
 	unsigned long min_brk;
 	bool populate;
 	LIST_HEAD(uf);
@@ -229,7 +230,8 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
 	}
 
 	/* Check against existing mmap mappings. */
-	if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))
+	next = find_vma(mm, oldbrk);
+	if (next && newbrk + PAGE_SIZE > vm_start_gap(next))
 		goto out;
 
 	/* Ok, looks good - let it rip. */
@@ -253,10 +255,22 @@ out:
 
 static long vma_compute_subtree_gap(struct vm_area_struct *vma)
 {
-	unsigned long max, subtree_gap;
-	max = vma->vm_start;
-	if (vma->vm_prev)
-		max -= vma->vm_prev->vm_end;
+	unsigned long max, prev_end, subtree_gap;
+
+	/*
+	 * Note: in the rare case of a VM_GROWSDOWN above a VM_GROWSUP, we
+	 * allow two stack_guard_gaps between them here, and when choosing
+	 * an unmapped area; whereas when expanding we only require one.
+	 * That's a little inconsistent, but keeps the code here simpler.
+	 */
+	max = vm_start_gap(vma);
+	if (vma->vm_prev) {
+		prev_end = vm_end_gap(vma->vm_prev);
+		if (max > prev_end)
+			max -= prev_end;
+		else
+			max = 0;
+	}
 	if (vma->vm_rb.rb_left) {
 		subtree_gap = rb_entry(vma->vm_rb.rb_left,
 				struct vm_area_struct, vm_rb)->rb_subtree_gap;
@@ -352,7 +366,7 @@ static void validate_mm(struct mm_struct *mm)
 			anon_vma_unlock_read(anon_vma);
 		}
 
-		highest_address = vma->vm_end;
+		highest_address = vm_end_gap(vma);
 		vma = vma->vm_next;
 		i++;
 	}
@@ -541,7 +555,7 @@ void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
 	if (vma->vm_next)
 		vma_gap_update(vma->vm_next);
 	else
-		mm->highest_vm_end = vma->vm_end;
+		mm->highest_vm_end = vm_end_gap(vma);
 
 	/*
 	 * vma->vm_prev wasn't known when we followed the rbtree to find the
@@ -856,7 +870,7 @@ again:
 			vma_gap_update(vma);
 		if (end_changed) {
 			if (!next)
-				mm->highest_vm_end = end;
+				mm->highest_vm_end = vm_end_gap(vma);
 			else if (!adjust_next)
 				vma_gap_update(next);
 		}
@@ -941,7 +955,7 @@ again:
 			 * mm->highest_vm_end doesn't need any update
 			 * in remove_next == 1 case.
 			 */
-			VM_WARN_ON(mm->highest_vm_end != end);
+			VM_WARN_ON(mm->highest_vm_end != vm_end_gap(vma));
 		}
 	}
 	if (insert && file)
@@ -1787,7 +1801,7 @@ unsigned long unmapped_area(struct vm_unmapped_area_info *info)
 
 	while (true) {
 		/* Visit left subtree if it looks promising */
-		gap_end = vma->vm_start;
+		gap_end = vm_start_gap(vma);
 		if (gap_end >= low_limit && vma->vm_rb.rb_left) {
 			struct vm_area_struct *left =
 				rb_entry(vma->vm_rb.rb_left,
@@ -1798,7 +1812,7 @@ unsigned long unmapped_area(struct vm_unmapped_area_info *info)
 			}
 		}
 
-		gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
+		gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0;
 check_current:
 		/* Check if current node has a suitable gap */
 		if (gap_start > high_limit)
@@ -1825,8 +1839,8 @@ check_current:
 			vma = rb_entry(rb_parent(prev),
 				       struct vm_area_struct, vm_rb);
 			if (prev == vma->vm_rb.rb_left) {
-				gap_start = vma->vm_prev->vm_end;
-				gap_end = vma->vm_start;
+				gap_start = vm_end_gap(vma->vm_prev);
+				gap_end = vm_start_gap(vma);
 				goto check_current;
 			}
 		}
@@ -1890,7 +1904,7 @@ unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
 
 	while (true) {
 		/* Visit right subtree if it looks promising */
-		gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
+		gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0;
 		if (gap_start <= high_limit && vma->vm_rb.rb_right) {
 			struct vm_area_struct *right =
 				rb_entry(vma->vm_rb.rb_right,
@@ -1903,7 +1917,7 @@ unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
 
 check_current:
 		/* Check if current node has a suitable gap */
-		gap_end = vma->vm_start;
+		gap_end = vm_start_gap(vma);
 		if (gap_end < low_limit)
 			return -ENOMEM;
 		if (gap_start <= high_limit && gap_end - gap_start >= length)
@@ -1929,7 +1943,7 @@ check_current:
 				       struct vm_area_struct, vm_rb);
 			if (prev == vma->vm_rb.rb_right) {
 				gap_start = vma->vm_prev ?
-					vma->vm_prev->vm_end : 0;
+					vm_end_gap(vma->vm_prev) : 0;
 				goto check_current;
 			}
 		}
@@ -1967,7 +1981,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
 		unsigned long len, unsigned long pgoff, unsigned long flags)
 {
 	struct mm_struct *mm = current->mm;
-	struct vm_area_struct *vma;
+	struct vm_area_struct *vma, *prev;
 	struct vm_unmapped_area_info info;
 
 	if (len > TASK_SIZE - mmap_min_addr)
@@ -1978,9 +1992,10 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
 
 	if (addr) {
 		addr = PAGE_ALIGN(addr);
-		vma = find_vma(mm, addr);
+		vma = find_vma_prev(mm, addr, &prev);
 		if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		    (!vma || addr + len <= vm_start_gap(vma)) &&
+		    (!prev || addr >= vm_end_gap(prev)))
 			return addr;
 	}
 
@@ -2003,7 +2018,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 			  const unsigned long len, const unsigned long pgoff,
 			  const unsigned long flags)
 {
-	struct vm_area_struct *vma;
+	struct vm_area_struct *vma, *prev;
 	struct mm_struct *mm = current->mm;
 	unsigned long addr = addr0;
 	struct vm_unmapped_area_info info;
@@ -2018,9 +2033,10 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 	/* requesting a specific address */
 	if (addr) {
 		addr = PAGE_ALIGN(addr);
-		vma = find_vma(mm, addr);
+		vma = find_vma_prev(mm, addr, &prev);
 		if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
-				(!vma || addr + len <= vma->vm_start))
+				(!vma || addr + len <= vm_start_gap(vma)) &&
+				(!prev || addr >= vm_end_gap(prev)))
 			return addr;
 	}
 
@@ -2155,21 +2171,19 @@ find_vma_prev(struct mm_struct *mm, unsigned long addr,
  * update accounting. This is shared with both the
  * grow-up and grow-down cases.
  */
-static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, unsigned long grow)
+static int acct_stack_growth(struct vm_area_struct *vma,
+			     unsigned long size, unsigned long grow)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	struct rlimit *rlim = current->signal->rlim;
-	unsigned long new_start, actual_size;
+	unsigned long new_start;
 
 	/* address space limit tests */
 	if (!may_expand_vm(mm, vma->vm_flags, grow))
 		return -ENOMEM;
 
 	/* Stack limit test */
-	actual_size = size;
-	if (size && (vma->vm_flags & (VM_GROWSUP | VM_GROWSDOWN)))
-		actual_size -= PAGE_SIZE;
-	if (actual_size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur))
+	if (size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur))
 		return -ENOMEM;
 
 	/* mlock limit tests */
@@ -2207,17 +2221,30 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
 int expand_upwards(struct vm_area_struct *vma, unsigned long address)
 {
 	struct mm_struct *mm = vma->vm_mm;
+	struct vm_area_struct *next;
+	unsigned long gap_addr;
 	int error = 0;
 
 	if (!(vma->vm_flags & VM_GROWSUP))
 		return -EFAULT;
 
 	/* Guard against wrapping around to address 0. */
-	if (address < PAGE_ALIGN(address+4))
-		address = PAGE_ALIGN(address+4);
-	else
+	address &= PAGE_MASK;
+	address += PAGE_SIZE;
+	if (!address)
 		return -ENOMEM;
 
+	/* Enforce stack_guard_gap */
+	gap_addr = address + stack_guard_gap;
+	if (gap_addr < address)
+		return -ENOMEM;
+	next = vma->vm_next;
+	if (next && next->vm_start < gap_addr) {
+		if (!(next->vm_flags & VM_GROWSUP))
+			return -ENOMEM;
+		/* Check that both stack segments have the same anon_vma? */
+	}
+
 	/* We must make sure the anon_vma is allocated. */
 	if (unlikely(anon_vma_prepare(vma)))
 		return -ENOMEM;
@@ -2261,7 +2288,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
 				if (vma->vm_next)
 					vma_gap_update(vma->vm_next);
 				else
-					mm->highest_vm_end = address;
+					mm->highest_vm_end = vm_end_gap(vma);
 				spin_unlock(&mm->page_table_lock);
 
 				perf_event_mmap(vma);
@@ -2282,6 +2309,8 @@ int expand_downwards(struct vm_area_struct *vma,
 				   unsigned long address)
 {
 	struct mm_struct *mm = vma->vm_mm;
+	struct vm_area_struct *prev;
+	unsigned long gap_addr;
 	int error;
 
 	address &= PAGE_MASK;
@@ -2289,6 +2318,17 @@ int expand_downwards(struct vm_area_struct *vma,
 	if (error)
 		return error;
 
+	/* Enforce stack_guard_gap */
+	gap_addr = address - stack_guard_gap;
+	if (gap_addr > address)
+		return -ENOMEM;
+	prev = vma->vm_prev;
+	if (prev && prev->vm_end > gap_addr) {
+		if (!(prev->vm_flags & VM_GROWSDOWN))
+			return -ENOMEM;
+		/* Check that both stack segments have the same anon_vma? */
+	}
+
 	/* We must make sure the anon_vma is allocated. */
 	if (unlikely(anon_vma_prepare(vma)))
 		return -ENOMEM;
@@ -2343,28 +2383,25 @@ int expand_downwards(struct vm_area_struct *vma,
 	return error;
 }
 
-/*
- * Note how expand_stack() refuses to expand the stack all the way to
- * abut the next virtual mapping, *unless* that mapping itself is also
- * a stack mapping. We want to leave room for a guard page, after all
- * (the guard page itself is not added here, that is done by the
- * actual page faulting logic)
- *
- * This matches the behavior of the guard page logic (see mm/memory.c:
- * check_stack_guard_page()), which only allows the guard page to be
- * removed under these circumstances.
- */
+/* enforced gap between the expanding stack and other mappings. */
+unsigned long stack_guard_gap = 256UL<<PAGE_SHIFT;
+
+static int __init cmdline_parse_stack_guard_gap(char *p)
+{
+	unsigned long val;
+	char *endptr;
+
+	val = simple_strtoul(p, &endptr, 10);
+	if (!*endptr)
+		stack_guard_gap = val << PAGE_SHIFT;
+
+	return 0;
+}
+__setup("stack_guard_gap=", cmdline_parse_stack_guard_gap);
+
 #ifdef CONFIG_STACK_GROWSUP
 int expand_stack(struct vm_area_struct *vma, unsigned long address)
 {
-	struct vm_area_struct *next;
-
-	address &= PAGE_MASK;
-	next = vma->vm_next;
-	if (next && next->vm_start == address + PAGE_SIZE) {
-		if (!(next->vm_flags & VM_GROWSUP))
-			return -ENOMEM;
-	}
 	return expand_upwards(vma, address);
 }
 
@@ -2386,14 +2423,6 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
 #else
 int expand_stack(struct vm_area_struct *vma, unsigned long address)
 {
-	struct vm_area_struct *prev;
-
-	address &= PAGE_MASK;
-	prev = vma->vm_prev;
-	if (prev && prev->vm_end == address) {
-		if (!(prev->vm_flags & VM_GROWSDOWN))
-			return -ENOMEM;
-	}
 	return expand_downwards(vma, address);
 }
 
@@ -2491,7 +2520,7 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
 		vma->vm_prev = prev;
 		vma_gap_update(vma);
 	} else
-		mm->highest_vm_end = prev ? prev->vm_end : 0;
+		mm->highest_vm_end = prev ? vm_end_gap(prev) : 0;
 	tail_vma->vm_next = NULL;
 
 	/* Kill the cache */
-- 
cgit v1.2.3


From 57129044f5044dcd73c22d91491906104bd331fd Mon Sep 17 00:00:00 2001
From: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Date: Mon, 5 Jun 2017 12:08:05 -0700
Subject: mfd: intel_soc_pmic_bxtwc: Use chained IRQs for second level IRQ
 chips

Whishkey cove PMIC has support to mask/unmask interrupts at two levels.
At first level we can mask/unmask interrupt domains like TMU, GPIO, ADC,
CHGR, BCU THERMAL and PWRBTN and at second level, it provides facility
to mask/unmask individual interrupts belong each of this domain. For
example, in case of TMU, at first level we have TMU interrupt domain,
and at second level we have two interrupts, wake alarm, system alarm that
belong to the TMU interrupt domain.

Currently, in this driver all first level IRQs are registered as part of
IRQ chip(bxtwc_regmap_irq_chip). By default, after you register the IRQ
chip from your driver, all IRQs in that chip will masked and can only be
enabled if that IRQ is requested using request_irq() call. This is the
default Linux IRQ behavior model. And whenever a dependent device that
belongs to PMIC requests only the second level IRQ and not explicitly
unmask the first level IRQ, then in essence the second level IRQ will
still be disabled. For example, if TMU device driver request wake_alarm
IRQ and not explicitly unmask TMU level 1 IRQ then according to the default
Linux IRQ model,  wake_alarm IRQ will still be disabled. So the proper
solution to fix this issue is to use the chained IRQ chip concept. We
should chain all the second level chip IRQs to the corresponding first
level IRQ. To do this, we need to create separate IRQ chips for every
group of second level IRQs.

In case of TMU, when adding second level IRQ chip, instead of using PMIC
IRQ we should use the corresponding first level IRQ. So the following
code will change from

ret = regmap_add_irq_chip(pmic->regmap, pmic->irq, ...)

to,

virq = regmap_irq_get_virq(&pmic->irq_chip_data, BXTWC_TMU_LVL1_IRQ);

ret = regmap_add_irq_chip(pmic->regmap, virq, ...)

In case of Whiskey Cove Type-C driver, Since USBC IRQ is moved under
charger level2 IRQ chip. We should use charger IRQ chip(irq_chip_data_chgr)
to get the USBC virtual IRQ number.

Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Revieved-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/intel_soc_pmic_bxtwc.c | 168 ++++++++++++++++++++++++++++++-------
 drivers/usb/typec/typec_wcove.c    |   2 +-
 include/linux/mfd/intel_soc_pmic.h |   5 +-
 3 files changed, 143 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/intel_soc_pmic_bxtwc.c b/drivers/mfd/intel_soc_pmic_bxtwc.c
index feeda6ed546c..15bc052704a6 100644
--- a/drivers/mfd/intel_soc_pmic_bxtwc.c
+++ b/drivers/mfd/intel_soc_pmic_bxtwc.c
@@ -82,20 +82,28 @@ enum bxtwc_irqs {
 	BXTWC_PWRBTN_IRQ,
 };
 
-enum bxtwc_irqs_level2 {
-	/* Level 2 */
+enum bxtwc_irqs_bcu {
 	BXTWC_BCU_IRQ = 0,
-	BXTWC_ADC_IRQ,
-	BXTWC_USBC_IRQ,
+};
+
+enum bxtwc_irqs_adc {
+	BXTWC_ADC_IRQ = 0,
+};
+
+enum bxtwc_irqs_chgr {
+	BXTWC_USBC_IRQ = 0,
 	BXTWC_CHGR0_IRQ,
 	BXTWC_CHGR1_IRQ,
-	BXTWC_CRIT_IRQ,
 };
 
 enum bxtwc_irqs_tmu {
 	BXTWC_TMU_IRQ = 0,
 };
 
+enum bxtwc_irqs_crit {
+	BXTWC_CRIT_IRQ = 0,
+};
+
 static const struct regmap_irq bxtwc_regmap_irqs[] = {
 	REGMAP_IRQ_REG(BXTWC_PWRBTN_LVL1_IRQ, 0, BIT(0)),
 	REGMAP_IRQ_REG(BXTWC_TMU_LVL1_IRQ, 0, BIT(1)),
@@ -108,19 +116,28 @@ static const struct regmap_irq bxtwc_regmap_irqs[] = {
 	REGMAP_IRQ_REG(BXTWC_PWRBTN_IRQ, 1, 0x03),
 };
 
-static const struct regmap_irq bxtwc_regmap_irqs_level2[] = {
+static const struct regmap_irq bxtwc_regmap_irqs_bcu[] = {
 	REGMAP_IRQ_REG(BXTWC_BCU_IRQ, 0, 0x1f),
-	REGMAP_IRQ_REG(BXTWC_ADC_IRQ, 1, 0xff),
-	REGMAP_IRQ_REG(BXTWC_USBC_IRQ, 2, BIT(5)),
-	REGMAP_IRQ_REG(BXTWC_CHGR0_IRQ, 2, 0x1f),
-	REGMAP_IRQ_REG(BXTWC_CHGR1_IRQ, 3, 0x1f),
-	REGMAP_IRQ_REG(BXTWC_CRIT_IRQ, 6, 0x03),
+};
+
+static const struct regmap_irq bxtwc_regmap_irqs_adc[] = {
+	REGMAP_IRQ_REG(BXTWC_ADC_IRQ, 0, 0xff),
+};
+
+static const struct regmap_irq bxtwc_regmap_irqs_chgr[] = {
+	REGMAP_IRQ_REG(BXTWC_USBC_IRQ, 0, BIT(5)),
+	REGMAP_IRQ_REG(BXTWC_CHGR0_IRQ, 0, 0x1f),
+	REGMAP_IRQ_REG(BXTWC_CHGR1_IRQ, 1, 0x1f),
 };
 
 static const struct regmap_irq bxtwc_regmap_irqs_tmu[] = {
 	REGMAP_IRQ_REG(BXTWC_TMU_IRQ, 0, 0x06),
 };
 
+static const struct regmap_irq bxtwc_regmap_irqs_crit[] = {
+	REGMAP_IRQ_REG(BXTWC_CRIT_IRQ, 0, 0x03),
+};
+
 static struct regmap_irq_chip bxtwc_regmap_irq_chip = {
 	.name = "bxtwc_irq_chip",
 	.status_base = BXTWC_IRQLVL1,
@@ -130,15 +147,6 @@ static struct regmap_irq_chip bxtwc_regmap_irq_chip = {
 	.num_regs = 2,
 };
 
-static struct regmap_irq_chip bxtwc_regmap_irq_chip_level2 = {
-	.name = "bxtwc_irq_chip_level2",
-	.status_base = BXTWC_BCUIRQ,
-	.mask_base = BXTWC_MBCUIRQ,
-	.irqs = bxtwc_regmap_irqs_level2,
-	.num_irqs = ARRAY_SIZE(bxtwc_regmap_irqs_level2),
-	.num_regs = 10,
-};
-
 static struct regmap_irq_chip bxtwc_regmap_irq_chip_tmu = {
 	.name = "bxtwc_irq_chip_tmu",
 	.status_base = BXTWC_TMUIRQ,
@@ -148,6 +156,42 @@ static struct regmap_irq_chip bxtwc_regmap_irq_chip_tmu = {
 	.num_regs = 1,
 };
 
+static struct regmap_irq_chip bxtwc_regmap_irq_chip_bcu = {
+	.name = "bxtwc_irq_chip_bcu",
+	.status_base = BXTWC_BCUIRQ,
+	.mask_base = BXTWC_MBCUIRQ,
+	.irqs = bxtwc_regmap_irqs_bcu,
+	.num_irqs = ARRAY_SIZE(bxtwc_regmap_irqs_bcu),
+	.num_regs = 1,
+};
+
+static struct regmap_irq_chip bxtwc_regmap_irq_chip_adc = {
+	.name = "bxtwc_irq_chip_adc",
+	.status_base = BXTWC_ADCIRQ,
+	.mask_base = BXTWC_MADCIRQ,
+	.irqs = bxtwc_regmap_irqs_adc,
+	.num_irqs = ARRAY_SIZE(bxtwc_regmap_irqs_adc),
+	.num_regs = 1,
+};
+
+static struct regmap_irq_chip bxtwc_regmap_irq_chip_chgr = {
+	.name = "bxtwc_irq_chip_chgr",
+	.status_base = BXTWC_CHGR0IRQ,
+	.mask_base = BXTWC_MCHGR0IRQ,
+	.irqs = bxtwc_regmap_irqs_chgr,
+	.num_irqs = ARRAY_SIZE(bxtwc_regmap_irqs_chgr),
+	.num_regs = 2,
+};
+
+static struct regmap_irq_chip bxtwc_regmap_irq_chip_crit = {
+	.name = "bxtwc_irq_chip_crit",
+	.status_base = BXTWC_CRITIRQ,
+	.mask_base = BXTWC_MCRITIRQ,
+	.irqs = bxtwc_regmap_irqs_crit,
+	.num_irqs = ARRAY_SIZE(bxtwc_regmap_irqs_crit),
+	.num_regs = 1,
+};
+
 static struct resource gpio_resources[] = {
 	DEFINE_RES_IRQ_NAMED(BXTWC_GPIO_LVL1_IRQ, "GPIO"),
 };
@@ -357,6 +401,26 @@ static const struct regmap_config bxtwc_regmap_config = {
 	.reg_read = regmap_ipc_byte_reg_read,
 };
 
+static int bxtwc_add_chained_irq_chip(struct intel_soc_pmic *pmic,
+				struct regmap_irq_chip_data *pdata,
+				int pirq, int irq_flags,
+				const struct regmap_irq_chip *chip,
+				struct regmap_irq_chip_data **data)
+{
+	int irq;
+
+	irq = regmap_irq_get_virq(pdata, pirq);
+	if (irq < 0) {
+		dev_err(pmic->dev,
+			"Failed to get parent vIRQ(%d) for chip %s, ret:%d\n",
+			pirq, chip->name, irq);
+		return irq;
+	}
+
+	return devm_regmap_add_irq_chip(pmic->dev, pmic->regmap, irq, irq_flags,
+					0, chip, data);
+}
+
 static int bxtwc_probe(struct platform_device *pdev)
 {
 	int ret;
@@ -408,21 +472,65 @@ static int bxtwc_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	ret = devm_regmap_add_irq_chip(&pdev->dev, pmic->regmap, pmic->irq,
-				       IRQF_ONESHOT | IRQF_SHARED,
-				       0, &bxtwc_regmap_irq_chip_level2,
-				       &pmic->irq_chip_data_level2);
+	ret = bxtwc_add_chained_irq_chip(pmic, pmic->irq_chip_data,
+					 BXTWC_TMU_LVL1_IRQ,
+					 IRQF_ONESHOT,
+					 &bxtwc_regmap_irq_chip_tmu,
+					 &pmic->irq_chip_data_tmu);
 	if (ret) {
-		dev_err(&pdev->dev, "Failed to add secondary IRQ chip\n");
+		dev_err(&pdev->dev, "Failed to add TMU IRQ chip\n");
 		return ret;
 	}
 
-	ret = devm_regmap_add_irq_chip(&pdev->dev, pmic->regmap, pmic->irq,
-				       IRQF_ONESHOT | IRQF_SHARED,
-				       0, &bxtwc_regmap_irq_chip_tmu,
-				       &pmic->irq_chip_data_tmu);
+	/* Add chained IRQ handler for BCU IRQs */
+	ret = bxtwc_add_chained_irq_chip(pmic, pmic->irq_chip_data,
+					 BXTWC_BCU_LVL1_IRQ,
+					 IRQF_ONESHOT,
+					 &bxtwc_regmap_irq_chip_bcu,
+					 &pmic->irq_chip_data_bcu);
+
+
 	if (ret) {
-		dev_err(&pdev->dev, "Failed to add TMU IRQ chip\n");
+		dev_err(&pdev->dev, "Failed to add BUC IRQ chip\n");
+		return ret;
+	}
+
+	/* Add chained IRQ handler for ADC IRQs */
+	ret = bxtwc_add_chained_irq_chip(pmic, pmic->irq_chip_data,
+					 BXTWC_ADC_LVL1_IRQ,
+					 IRQF_ONESHOT,
+					 &bxtwc_regmap_irq_chip_adc,
+					 &pmic->irq_chip_data_adc);
+
+
+	if (ret) {
+		dev_err(&pdev->dev, "Failed to add ADC IRQ chip\n");
+		return ret;
+	}
+
+	/* Add chained IRQ handler for CHGR IRQs */
+	ret = bxtwc_add_chained_irq_chip(pmic, pmic->irq_chip_data,
+					 BXTWC_CHGR_LVL1_IRQ,
+					 IRQF_ONESHOT,
+					 &bxtwc_regmap_irq_chip_chgr,
+					 &pmic->irq_chip_data_chgr);
+
+
+	if (ret) {
+		dev_err(&pdev->dev, "Failed to add CHGR IRQ chip\n");
+		return ret;
+	}
+
+	/* Add chained IRQ handler for CRIT IRQs */
+	ret = bxtwc_add_chained_irq_chip(pmic, pmic->irq_chip_data,
+					 BXTWC_CRIT_LVL1_IRQ,
+					 IRQF_ONESHOT,
+					 &bxtwc_regmap_irq_chip_crit,
+					 &pmic->irq_chip_data_crit);
+
+
+	if (ret) {
+		dev_err(&pdev->dev, "Failed to add CRIT IRQ chip\n");
 		return ret;
 	}
 
diff --git a/drivers/usb/typec/typec_wcove.c b/drivers/usb/typec/typec_wcove.c
index d5a7b21fa3f1..00a4bd20fb60 100644
--- a/drivers/usb/typec/typec_wcove.c
+++ b/drivers/usb/typec/typec_wcove.c
@@ -303,7 +303,7 @@ static int wcove_typec_probe(struct platform_device *pdev)
 	wcove->dev = &pdev->dev;
 	wcove->regmap = pmic->regmap;
 
-	ret = regmap_irq_get_virq(pmic->irq_chip_data_level2,
+	ret = regmap_irq_get_virq(pmic->irq_chip_data_chgr,
 				  platform_get_irq(pdev, 0));
 	if (ret < 0)
 		return ret;
diff --git a/include/linux/mfd/intel_soc_pmic.h b/include/linux/mfd/intel_soc_pmic.h
index 956caa0628f5..5aacdb017a9f 100644
--- a/include/linux/mfd/intel_soc_pmic.h
+++ b/include/linux/mfd/intel_soc_pmic.h
@@ -25,8 +25,11 @@ struct intel_soc_pmic {
 	int irq;
 	struct regmap *regmap;
 	struct regmap_irq_chip_data *irq_chip_data;
-	struct regmap_irq_chip_data *irq_chip_data_level2;
 	struct regmap_irq_chip_data *irq_chip_data_tmu;
+	struct regmap_irq_chip_data *irq_chip_data_bcu;
+	struct regmap_irq_chip_data *irq_chip_data_adc;
+	struct regmap_irq_chip_data *irq_chip_data_chgr;
+	struct regmap_irq_chip_data *irq_chip_data_crit;
 	struct device *dev;
 };
 
-- 
cgit v1.2.3


From d2c3c8dcb5987b8352e82089c79a41b6e17e28d2 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Thu, 20 Apr 2017 10:46:07 -0700
Subject: dm: convert DM printk macros to pr_<level> macros

Using pr_<level> is the more common logging style.

Standardize style and use new macro DM_FMT.
Use no_printk in DMDEBUG macros when CONFIG_DM_DEBUG is not #defined.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 include/linux/device-mapper.h | 71 +++++++++++++++++++------------------------
 1 file changed, 32 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 456da5017b32..19bc7fdfd6da 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -543,48 +543,41 @@ extern struct ratelimit_state dm_ratelimit_state;
 #define dm_ratelimit()	0
 #endif
 
-#define DMCRIT(f, arg...) \
-	printk(KERN_CRIT DM_NAME ": " DM_MSG_PREFIX ": " f "\n", ## arg)
-
-#define DMERR(f, arg...) \
-	printk(KERN_ERR DM_NAME ": " DM_MSG_PREFIX ": " f "\n", ## arg)
-#define DMERR_LIMIT(f, arg...) \
-	do { \
-		if (dm_ratelimit())	\
-			printk(KERN_ERR DM_NAME ": " DM_MSG_PREFIX ": " \
-			       f "\n", ## arg); \
-	} while (0)
-
-#define DMWARN(f, arg...) \
-	printk(KERN_WARNING DM_NAME ": " DM_MSG_PREFIX ": " f "\n", ## arg)
-#define DMWARN_LIMIT(f, arg...) \
-	do { \
-		if (dm_ratelimit())	\
-			printk(KERN_WARNING DM_NAME ": " DM_MSG_PREFIX ": " \
-			       f "\n", ## arg); \
-	} while (0)
-
-#define DMINFO(f, arg...) \
-	printk(KERN_INFO DM_NAME ": " DM_MSG_PREFIX ": " f "\n", ## arg)
-#define DMINFO_LIMIT(f, arg...) \
-	do { \
-		if (dm_ratelimit())	\
-			printk(KERN_INFO DM_NAME ": " DM_MSG_PREFIX ": " f \
-			       "\n", ## arg); \
-	} while (0)
+#define DM_FMT(fmt) DM_NAME ": " DM_MSG_PREFIX ": " fmt "\n"
+
+#define DMCRIT(fmt, ...) pr_crit(DM_FMT(fmt), ##__VA_ARGS__)
+
+#define DMERR(fmt, ...) pr_err(DM_FMT(fmt), ##__VA_ARGS__)
+#define DMERR_LIMIT(fmt, ...)						\
+do {									\
+	if (dm_ratelimit())						\
+		DMERR(fmt, ##__VA_ARGS__);				\
+} while (0)
+
+#define DMWARN(fmt, ...) pr_warn(DM_FMT(fmt), ##__VA_ARGS__)
+#define DMWARN_LIMIT(fmt, ...)						\
+do {									\
+	if (dm_ratelimit())						\
+		DMWARN(fmt, ##__VA_ARGS__);				\
+} while (0)
+
+#define DMINFO(fmt, ...) pr_info(DM_FMT(fmt), ##__VA_ARGS__)
+#define DMINFO_LIMIT(fmt, ...)						\
+do {									\
+	if (dm_ratelimit())						\
+		DMINFO(fmt, ##__VA_ARGS__);				\
+} while (0)
 
 #ifdef CONFIG_DM_DEBUG
-#  define DMDEBUG(f, arg...) \
-	printk(KERN_DEBUG DM_NAME ": " DM_MSG_PREFIX " DEBUG: " f "\n", ## arg)
-#  define DMDEBUG_LIMIT(f, arg...) \
-	do { \
-		if (dm_ratelimit())	\
-			printk(KERN_DEBUG DM_NAME ": " DM_MSG_PREFIX ": " f \
-			       "\n", ## arg); \
-	} while (0)
+#define DMDEBUG(fmt, ...) printk(KERN_DEBUG DM_FMT(fmt), ##__VA_ARGS__)
+#define DMDEBUG_LIMIT(fmt, ...)						\
+do {									\
+	if (dm_ratelimit())						\
+		DMDEBUG(fmt, ##__VA_ARGS__);				\
+} while (0)
 #else
-#  define DMDEBUG(f, arg...) do {} while (0)
-#  define DMDEBUG_LIMIT(f, arg...) do {} while (0)
+#define DMDEBUG(fmt, ...) no_printk(fmt, ##__VA_ARGS__)
+#define DMDEBUG_LIMIT(fmt, ...) no_printk(fmt, ##__VA_ARGS__)
 #endif
 
 #define DMEMIT(x...) sz += ((sz >= maxlen) ? \
-- 
cgit v1.2.3


From dd88d313bef0277e27597aa394607ed26c658724 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Mon, 8 May 2017 16:40:43 -0700
Subject: dm table: add zoned block devices validation

1) Introduce DM_TARGET_ZONED_HM feature flag:

The target drivers currently available will not operate correctly if a
table target maps onto a host-managed zoned block device.

To avoid problems, introduce the new feature flag DM_TARGET_ZONED_HM to
allow a target to explicitly state that it supports host-managed zoned
block devices.  This feature is checked for all targets in a table if
any of the table's block devices are host-managed.

Note that as host-aware zoned block devices are backward compatible with
regular block devices, they can be used by any of the current target
types.  This new feature is thus restricted to host-managed zoned block
devices.

2) Check device area zone alignment:

If a target maps to a zoned block device, check that the device area is
aligned on zone boundaries to avoid problems with REQ_OP_ZONE_RESET
operations (resetting a partially mapped sequential zone would not be
possible).  This also facilitates the processing of zone report with
REQ_OP_ZONE_REPORT bios.

3) Check block devices zone model compatibility

When setting the DM device's queue limits, several possibilities exists
for zoned block devices:
1) The DM target driver may want to expose a different zone model
(e.g. host-managed device emulation or regular block device on top of
host-managed zoned block devices)
2) Expose the underlying zone model of the devices as-is

To allow both cases, the underlying block device zone model must be set
in the target limits in dm_set_device_limits() and the compatibility of
all devices checked similarly to the logical block size alignment.  For
this last check, introduce validate_hardware_zoned_model() to check that
all targets of a table have the same zone model and that the zone size
of the target devices are equal.

Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com>
[Mike Snitzer refactored Damien's original work to simplify the code]
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-table.c         | 162 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/device-mapper.h |   6 ++
 2 files changed, 168 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 5f5eae41f804..a39bcd9b982a 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -319,6 +319,39 @@ static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev,
 		return 1;
 	}
 
+	/*
+	 * If the target is mapped to zoned block device(s), check
+	 * that the zones are not partially mapped.
+	 */
+	if (bdev_zoned_model(bdev) != BLK_ZONED_NONE) {
+		unsigned int zone_sectors = bdev_zone_sectors(bdev);
+
+		if (start & (zone_sectors - 1)) {
+			DMWARN("%s: start=%llu not aligned to h/w zone size %u of %s",
+			       dm_device_name(ti->table->md),
+			       (unsigned long long)start,
+			       zone_sectors, bdevname(bdev, b));
+			return 1;
+		}
+
+		/*
+		 * Note: The last zone of a zoned block device may be smaller
+		 * than other zones. So for a target mapping the end of a
+		 * zoned block device with such a zone, len would not be zone
+		 * aligned. We do not allow such last smaller zone to be part
+		 * of the mapping here to ensure that mappings with multiple
+		 * devices do not end up with a smaller zone in the middle of
+		 * the sector range.
+		 */
+		if (len & (zone_sectors - 1)) {
+			DMWARN("%s: len=%llu not aligned to h/w zone size %u of %s",
+			       dm_device_name(ti->table->md),
+			       (unsigned long long)len,
+			       zone_sectors, bdevname(bdev, b));
+			return 1;
+		}
+	}
+
 	if (logical_block_size_sectors <= 1)
 		return 0;
 
@@ -456,6 +489,8 @@ static int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
 		       q->limits.alignment_offset,
 		       (unsigned long long) start << SECTOR_SHIFT);
 
+	limits->zoned = blk_queue_zoned_model(q);
+
 	return 0;
 }
 
@@ -1346,6 +1381,88 @@ bool dm_table_has_no_data_devices(struct dm_table *table)
 	return true;
 }
 
+static int device_is_zoned_model(struct dm_target *ti, struct dm_dev *dev,
+				 sector_t start, sector_t len, void *data)
+{
+	struct request_queue *q = bdev_get_queue(dev->bdev);
+	enum blk_zoned_model *zoned_model = data;
+
+	return q && blk_queue_zoned_model(q) == *zoned_model;
+}
+
+static bool dm_table_supports_zoned_model(struct dm_table *t,
+					  enum blk_zoned_model zoned_model)
+{
+	struct dm_target *ti;
+	unsigned i;
+
+	for (i = 0; i < dm_table_get_num_targets(t); i++) {
+		ti = dm_table_get_target(t, i);
+
+		if (zoned_model == BLK_ZONED_HM &&
+		    !dm_target_supports_zoned_hm(ti->type))
+			return false;
+
+		if (!ti->type->iterate_devices ||
+		    !ti->type->iterate_devices(ti, device_is_zoned_model, &zoned_model))
+			return false;
+	}
+
+	return true;
+}
+
+static int device_matches_zone_sectors(struct dm_target *ti, struct dm_dev *dev,
+				       sector_t start, sector_t len, void *data)
+{
+	struct request_queue *q = bdev_get_queue(dev->bdev);
+	unsigned int *zone_sectors = data;
+
+	return q && blk_queue_zone_sectors(q) == *zone_sectors;
+}
+
+static bool dm_table_matches_zone_sectors(struct dm_table *t,
+					  unsigned int zone_sectors)
+{
+	struct dm_target *ti;
+	unsigned i;
+
+	for (i = 0; i < dm_table_get_num_targets(t); i++) {
+		ti = dm_table_get_target(t, i);
+
+		if (!ti->type->iterate_devices ||
+		    !ti->type->iterate_devices(ti, device_matches_zone_sectors, &zone_sectors))
+			return false;
+	}
+
+	return true;
+}
+
+static int validate_hardware_zoned_model(struct dm_table *table,
+					 enum blk_zoned_model zoned_model,
+					 unsigned int zone_sectors)
+{
+	if (zoned_model == BLK_ZONED_NONE)
+		return 0;
+
+	if (!dm_table_supports_zoned_model(table, zoned_model)) {
+		DMERR("%s: zoned model is not consistent across all devices",
+		      dm_device_name(table->md));
+		return -EINVAL;
+	}
+
+	/* Check zone size validity and compatibility */
+	if (!zone_sectors || !is_power_of_2(zone_sectors))
+		return -EINVAL;
+
+	if (!dm_table_matches_zone_sectors(table, zone_sectors)) {
+		DMERR("%s: zone sectors is not consistent across all devices",
+		      dm_device_name(table->md));
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 /*
  * Establish the new table's queue_limits and validate them.
  */
@@ -1355,6 +1472,8 @@ int dm_calculate_queue_limits(struct dm_table *table,
 	struct dm_target *ti;
 	struct queue_limits ti_limits;
 	unsigned i;
+	enum blk_zoned_model zoned_model = BLK_ZONED_NONE;
+	unsigned int zone_sectors = 0;
 
 	blk_set_stacking_limits(limits);
 
@@ -1372,6 +1491,15 @@ int dm_calculate_queue_limits(struct dm_table *table,
 		ti->type->iterate_devices(ti, dm_set_device_limits,
 					  &ti_limits);
 
+		if (zoned_model == BLK_ZONED_NONE && ti_limits.zoned != BLK_ZONED_NONE) {
+			/*
+			 * After stacking all limits, validate all devices
+			 * in table support this zoned model and zone sectors.
+			 */
+			zoned_model = ti_limits.zoned;
+			zone_sectors = ti_limits.chunk_sectors;
+		}
+
 		/* Set I/O hints portion of queue limits */
 		if (ti->type->io_hints)
 			ti->type->io_hints(ti, &ti_limits);
@@ -1396,8 +1524,42 @@ combine_limits:
 			       dm_device_name(table->md),
 			       (unsigned long long) ti->begin,
 			       (unsigned long long) ti->len);
+
+		/*
+		 * FIXME: this should likely be moved to blk_stack_limits(), would
+		 * also eliminate limits->zoned stacking hack in dm_set_device_limits()
+		 */
+		if (limits->zoned == BLK_ZONED_NONE && ti_limits.zoned != BLK_ZONED_NONE) {
+			/*
+			 * By default, the stacked limits zoned model is set to
+			 * BLK_ZONED_NONE in blk_set_stacking_limits(). Update
+			 * this model using the first target model reported
+			 * that is not BLK_ZONED_NONE. This will be either the
+			 * first target device zoned model or the model reported
+			 * by the target .io_hints.
+			 */
+			limits->zoned = ti_limits.zoned;
+		}
 	}
 
+	/*
+	 * Verify that the zoned model and zone sectors, as determined before
+	 * any .io_hints override, are the same across all devices in the table.
+	 * - this is especially relevant if .io_hints is emulating a disk-managed
+	 *   zoned model (aka BLK_ZONED_NONE) on host-managed zoned block devices.
+	 * BUT...
+	 */
+	if (limits->zoned != BLK_ZONED_NONE) {
+		/*
+		 * ...IF the above limits stacking determined a zoned model
+		 * validate that all of the table's devices conform to it.
+		 */
+		zoned_model = limits->zoned;
+		zone_sectors = limits->chunk_sectors;
+	}
+	if (validate_hardware_zoned_model(table, zoned_model, zone_sectors))
+		return -EINVAL;
+
 	return validate_hardware_logical_block_alignment(table, limits);
 }
 
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 19bc7fdfd6da..186ef74009cb 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -237,6 +237,12 @@ typedef unsigned (*dm_num_write_bios_fn) (struct dm_target *ti, struct bio *bio)
 #define DM_TARGET_PASSES_INTEGRITY	0x00000020
 #define dm_target_passes_integrity(type) ((type)->features & DM_TARGET_PASSES_INTEGRITY)
 
+/*
+ * Indicates that a target supports host-managed zoned block devices.
+ */
+#define DM_TARGET_ZONED_HM		0x00000040
+#define dm_target_supports_zoned_hm(type) ((type)->features & DM_TARGET_ZONED_HM)
+
 struct dm_target {
 	struct dm_table *table;
 	struct target_type *type;
-- 
cgit v1.2.3


From 10999307c14eac281fbec3ada73bee7a05bd41dc Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Mon, 8 May 2017 16:40:48 -0700
Subject: dm: introduce dm_remap_zone_report()

A target driver support zoned block devices and exposing it as such may
receive REQ_OP_ZONE_REPORT request for the user to determine the mapped
device zone configuration. To process properly such request, the target
driver may need to remap the zone descriptors provided in the report
reply. The helper function dm_remap_zone_report() does this generically
using only the target start offset and length and the start offset
within the target device.

dm_remap_zone_report() will remap the start sector of all zones
reported. If the report includes sequential zones, the write pointer
position of these zones will also be remapped.

Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm.c               | 79 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/device-mapper.h |  2 ++
 2 files changed, 81 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index e38d1d7d17d6..96bd13e581cd 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1012,6 +1012,85 @@ void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors)
 }
 EXPORT_SYMBOL_GPL(dm_accept_partial_bio);
 
+/*
+ * The zone descriptors obtained with a zone report indicate
+ * zone positions within the target device. The zone descriptors
+ * must be remapped to match their position within the dm device.
+ * A target may call dm_remap_zone_report after completion of a
+ * REQ_OP_ZONE_REPORT bio to remap the zone descriptors obtained
+ * from the target device mapping to the dm device.
+ */
+void dm_remap_zone_report(struct dm_target *ti, struct bio *bio, sector_t start)
+{
+#ifdef CONFIG_BLK_DEV_ZONED
+	struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
+	struct bio *report_bio = tio->io->bio;
+	struct blk_zone_report_hdr *hdr = NULL;
+	struct blk_zone *zone;
+	unsigned int nr_rep = 0;
+	unsigned int ofst;
+	struct bio_vec bvec;
+	struct bvec_iter iter;
+	void *addr;
+
+	if (bio->bi_status)
+		return;
+
+	/*
+	 * Remap the start sector of the reported zones. For sequential zones,
+	 * also remap the write pointer position.
+	 */
+	bio_for_each_segment(bvec, report_bio, iter) {
+		addr = kmap_atomic(bvec.bv_page);
+
+		/* Remember the report header in the first page */
+		if (!hdr) {
+			hdr = addr;
+			ofst = sizeof(struct blk_zone_report_hdr);
+		} else
+			ofst = 0;
+
+		/* Set zones start sector */
+		while (hdr->nr_zones && ofst < bvec.bv_len) {
+			zone = addr + ofst;
+			if (zone->start >= start + ti->len) {
+				hdr->nr_zones = 0;
+				break;
+			}
+			zone->start = zone->start + ti->begin - start;
+			if (zone->type != BLK_ZONE_TYPE_CONVENTIONAL) {
+				if (zone->cond == BLK_ZONE_COND_FULL)
+					zone->wp = zone->start + zone->len;
+				else if (zone->cond == BLK_ZONE_COND_EMPTY)
+					zone->wp = zone->start;
+				else
+					zone->wp = zone->wp + ti->begin - start;
+			}
+			ofst += sizeof(struct blk_zone);
+			hdr->nr_zones--;
+			nr_rep++;
+		}
+
+		if (addr != hdr)
+			kunmap_atomic(addr);
+
+		if (!hdr->nr_zones)
+			break;
+	}
+
+	if (hdr) {
+		hdr->nr_zones = nr_rep;
+		kunmap_atomic(hdr);
+	}
+
+	bio_advance(report_bio, report_bio->bi_iter.bi_size);
+
+#else /* !CONFIG_BLK_DEV_ZONED */
+	bio->bi_status = BLK_STS_NOTSUPP;
+#endif
+}
+EXPORT_SYMBOL_GPL(dm_remap_zone_report);
+
 /*
  * Flush current->bio_list when the target map method blocks.
  * This fixes deadlocks in snapshot and possibly in other targets.
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 186ef74009cb..0c1b50ad23b0 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -450,6 +450,8 @@ struct gendisk *dm_disk(struct mapped_device *md);
 int dm_suspended(struct dm_target *ti);
 int dm_noflush_suspending(struct dm_target *ti);
 void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors);
+void dm_remap_zone_report(struct dm_target *ti, struct bio *bio,
+			  sector_t start);
 union map_info *dm_get_rq_mapinfo(struct request *rq);
 
 struct queue_limits *dm_get_queue_limits(struct mapped_device *md);
-- 
cgit v1.2.3


From b73c67c2cbb0004e6da9720a167fe42e31f7a6e8 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Mon, 8 May 2017 16:40:51 -0700
Subject: dm kcopyd: add sequential write feature

When copyying blocks to host-managed zoned block devices, writes must be
sequential.  However, dm_kcopyd_copy() does not guarantee this as writes
are issued in the completion order of reads, and reads may complete out
of order despite being issued sequentially.

Fix this by introducing the DM_KCOPYD_WRITE_SEQ feature flag.  This can
be specified when calling dm_kcopyd_copy() and should be set
automatically if one of the destinations is a host-managed zoned block
device.  For a split job, the master job maintains the write position at
which writes must be issued.  This is checked with the pop() function
which is modified to not return any write I/O sub job that is not at the
correct write position.

When DM_KCOPYD_WRITE_SEQ is specified for a job, errors cannot be
ignored and the flag DM_KCOPYD_IGNORE_ERROR is ignored, even if
specified by the user.

Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-kcopyd.c    | 65 +++++++++++++++++++++++++++++++++++++++++++++--
 include/linux/dm-kcopyd.h |  1 +
 2 files changed, 64 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c
index f85846741d50..cf2c67e35eaf 100644
--- a/drivers/md/dm-kcopyd.c
+++ b/drivers/md/dm-kcopyd.c
@@ -356,6 +356,7 @@ struct kcopyd_job {
 	struct mutex lock;
 	atomic_t sub_jobs;
 	sector_t progress;
+	sector_t write_offset;
 
 	struct kcopyd_job *master_job;
 };
@@ -386,6 +387,31 @@ void dm_kcopyd_exit(void)
  * Functions to push and pop a job onto the head of a given job
  * list.
  */
+static struct kcopyd_job *pop_io_job(struct list_head *jobs,
+				     struct dm_kcopyd_client *kc)
+{
+	struct kcopyd_job *job;
+
+	/*
+	 * For I/O jobs, pop any read, any write without sequential write
+	 * constraint and sequential writes that are at the right position.
+	 */
+	list_for_each_entry(job, jobs, list) {
+		if (job->rw == READ || !test_bit(DM_KCOPYD_WRITE_SEQ, &job->flags)) {
+			list_del(&job->list);
+			return job;
+		}
+
+		if (job->write_offset == job->master_job->write_offset) {
+			job->master_job->write_offset += job->source.count;
+			list_del(&job->list);
+			return job;
+		}
+	}
+
+	return NULL;
+}
+
 static struct kcopyd_job *pop(struct list_head *jobs,
 			      struct dm_kcopyd_client *kc)
 {
@@ -395,8 +421,12 @@ static struct kcopyd_job *pop(struct list_head *jobs,
 	spin_lock_irqsave(&kc->job_lock, flags);
 
 	if (!list_empty(jobs)) {
-		job = list_entry(jobs->next, struct kcopyd_job, list);
-		list_del(&job->list);
+		if (jobs == &kc->io_jobs)
+			job = pop_io_job(jobs, kc);
+		else {
+			job = list_entry(jobs->next, struct kcopyd_job, list);
+			list_del(&job->list);
+		}
 	}
 	spin_unlock_irqrestore(&kc->job_lock, flags);
 
@@ -506,6 +536,14 @@ static int run_io_job(struct kcopyd_job *job)
 		.client = job->kc->io_client,
 	};
 
+	/*
+	 * If we need to write sequentially and some reads or writes failed,
+	 * no point in continuing.
+	 */
+	if (test_bit(DM_KCOPYD_WRITE_SEQ, &job->flags) &&
+	    job->master_job->write_err)
+		return -EIO;
+
 	io_job_start(job->kc->throttle);
 
 	if (job->rw == READ)
@@ -655,6 +693,7 @@ static void segment_complete(int read_err, unsigned long write_err,
 		int i;
 
 		*sub_job = *job;
+		sub_job->write_offset = progress;
 		sub_job->source.sector += progress;
 		sub_job->source.count = count;
 
@@ -723,6 +762,27 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
 	job->num_dests = num_dests;
 	memcpy(&job->dests, dests, sizeof(*dests) * num_dests);
 
+	/*
+	 * If one of the destination is a host-managed zoned block device,
+	 * we need to write sequentially. If one of the destination is a
+	 * host-aware device, then leave it to the caller to choose what to do.
+	 */
+	if (!test_bit(DM_KCOPYD_WRITE_SEQ, &job->flags)) {
+		for (i = 0; i < job->num_dests; i++) {
+			if (bdev_zoned_model(dests[i].bdev) == BLK_ZONED_HM) {
+				set_bit(DM_KCOPYD_WRITE_SEQ, &job->flags);
+				break;
+			}
+		}
+	}
+
+	/*
+	 * If we need to write sequentially, errors cannot be ignored.
+	 */
+	if (test_bit(DM_KCOPYD_WRITE_SEQ, &job->flags) &&
+	    test_bit(DM_KCOPYD_IGNORE_ERROR, &job->flags))
+		clear_bit(DM_KCOPYD_IGNORE_ERROR, &job->flags);
+
 	if (from) {
 		job->source = *from;
 		job->pages = NULL;
@@ -746,6 +806,7 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
 	job->fn = fn;
 	job->context = context;
 	job->master_job = job;
+	job->write_offset = 0;
 
 	if (job->source.count <= SUB_JOB_SIZE)
 		dispatch_job(job);
diff --git a/include/linux/dm-kcopyd.h b/include/linux/dm-kcopyd.h
index f486d636b82e..cfac8588ed56 100644
--- a/include/linux/dm-kcopyd.h
+++ b/include/linux/dm-kcopyd.h
@@ -20,6 +20,7 @@
 #define DM_KCOPYD_MAX_REGIONS 8
 
 #define DM_KCOPYD_IGNORE_ERROR 1
+#define DM_KCOPYD_WRITE_SEQ    2
 
 struct dm_kcopyd_throttle {
 	unsigned throttle;
-- 
cgit v1.2.3


From e15b9c50c4555e30be3c4f26aab7aeb10aee7aa6 Mon Sep 17 00:00:00 2001
From: Gao Feng <gfree.wind@vip.163.com>
Date: Wed, 31 May 2017 16:55:43 +0800
Subject: netfilter: ebt: Use new helper ebt_invalid_target to check target

Use the new helper function ebt_invalid_target instead of the old
macro INVALID_TARGET and other duplicated codes to enhance the readability.

Signed-off-by: Gao Feng <gfree.wind@vip.163.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_bridge/ebtables.h | 2 --
 net/bridge/netfilter/ebt_dnat.c           | 2 +-
 net/bridge/netfilter/ebt_mark.c           | 2 +-
 net/bridge/netfilter/ebt_redirect.c       | 2 +-
 net/bridge/netfilter/ebt_snat.c           | 2 +-
 5 files changed, 4 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h
index e0cbf17af780..2c2a5514b0df 100644
--- a/include/linux/netfilter_bridge/ebtables.h
+++ b/include/linux/netfilter_bridge/ebtables.h
@@ -122,8 +122,6 @@ extern unsigned int ebt_do_table(struct sk_buff *skb,
 #define BASE_CHAIN (par->hook_mask & (1 << NF_BR_NUMHOOKS))
 /* Clear the bit in the hook mask that tells if the rule is on a base chain */
 #define CLEAR_BASE_CHAIN_BIT (par->hook_mask &= ~(1 << NF_BR_NUMHOOKS))
-/* True if the target is not a standard target */
-#define INVALID_TARGET (info->target < -NUM_STANDARD_TARGETS || info->target >= 0)
 
 static inline bool ebt_invalid_target(int target)
 {
diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c
index e0bb624c3845..dfc86a0199da 100644
--- a/net/bridge/netfilter/ebt_dnat.c
+++ b/net/bridge/netfilter/ebt_dnat.c
@@ -61,7 +61,7 @@ static int ebt_dnat_tg_check(const struct xt_tgchk_param *par)
 	    (strcmp(par->table, "broute") != 0 ||
 	    hook_mask & ~(1 << NF_BR_BROUTING)))
 		return -EINVAL;
-	if (INVALID_TARGET)
+	if (ebt_invalid_target(info->target))
 		return -EINVAL;
 	return 0;
 }
diff --git a/net/bridge/netfilter/ebt_mark.c b/net/bridge/netfilter/ebt_mark.c
index 66697cbd0a8b..19f0f9592d32 100644
--- a/net/bridge/netfilter/ebt_mark.c
+++ b/net/bridge/netfilter/ebt_mark.c
@@ -44,7 +44,7 @@ static int ebt_mark_tg_check(const struct xt_tgchk_param *par)
 	tmp = info->target | ~EBT_VERDICT_BITS;
 	if (BASE_CHAIN && tmp == EBT_RETURN)
 		return -EINVAL;
-	if (tmp < -NUM_STANDARD_TARGETS || tmp >= 0)
+	if (ebt_invalid_target(tmp))
 		return -EINVAL;
 	tmp = info->target & ~EBT_VERDICT_BITS;
 	if (tmp != MARK_SET_VALUE && tmp != MARK_OR_VALUE &&
diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c
index 8d2a85e0594e..a7223eaf490b 100644
--- a/net/bridge/netfilter/ebt_redirect.c
+++ b/net/bridge/netfilter/ebt_redirect.c
@@ -47,7 +47,7 @@ static int ebt_redirect_tg_check(const struct xt_tgchk_param *par)
 	    (strcmp(par->table, "broute") != 0 ||
 	    hook_mask & ~(1 << NF_BR_BROUTING)))
 		return -EINVAL;
-	if (INVALID_TARGET)
+	if (ebt_invalid_target(info->target))
 		return -EINVAL;
 	return 0;
 }
diff --git a/net/bridge/netfilter/ebt_snat.c b/net/bridge/netfilter/ebt_snat.c
index e56ccd060d26..11cf9e9e9222 100644
--- a/net/bridge/netfilter/ebt_snat.c
+++ b/net/bridge/netfilter/ebt_snat.c
@@ -51,7 +51,7 @@ static int ebt_snat_tg_check(const struct xt_tgchk_param *par)
 	if (BASE_CHAIN && tmp == EBT_RETURN)
 		return -EINVAL;
 
-	if (tmp < -NUM_STANDARD_TARGETS || tmp >= 0)
+	if (ebt_invalid_target(tmp))
 		return -EINVAL;
 	tmp = info->target | EBT_VERDICT_BITS;
 	if ((tmp & ~NAT_ARP_BIT) != ~NAT_ARP_BIT)
-- 
cgit v1.2.3


From 04ba724b659c6808b0ca31528121bdb2f2807e00 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 19 Jun 2017 18:35:46 +0100
Subject: netfilter: nfnetlink: extended ACK reporting

Pass down struct netlink_ext_ack as parameter to all of our nfnetlink
subsystem callbacks, so we can work on follow up patches to provide
finer grain error reporting using the new infrastructure that
2d4bc93368f5 ("netlink: extended ACK reporting") provides.

No functional change, just pass down this new object to callbacks.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nfnetlink.h  | 10 +++---
 net/netfilter/ipset/ip_set_core.c    | 39 ++++++++++++++++--------
 net/netfilter/nf_conntrack_netlink.c | 39 ++++++++++++++++--------
 net/netfilter/nf_tables_api.c        | 59 ++++++++++++++++++++++++------------
 net/netfilter/nfnetlink.c            | 21 +++++++++----
 net/netfilter/nfnetlink_acct.c       |  9 ++++--
 net/netfilter/nfnetlink_cthelper.c   |  9 ++++--
 net/netfilter/nfnetlink_cttimeout.c  | 15 ++++++---
 net/netfilter/nfnetlink_log.c        |  6 ++--
 net/netfilter/nfnetlink_queue.c      | 12 +++++---
 net/netfilter/nft_compat.c           |  3 +-
 net/netfilter/xt_osf.c               |  6 ++--
 12 files changed, 152 insertions(+), 76 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
index 996711d8a7b4..41d04e9d088a 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -1,7 +1,6 @@
 #ifndef _NFNETLINK_H
 #define _NFNETLINK_H
 
-
 #include <linux/netlink.h>
 #include <linux/capability.h>
 #include <net/netlink.h>
@@ -10,13 +9,16 @@
 struct nfnl_callback {
 	int (*call)(struct net *net, struct sock *nl, struct sk_buff *skb,
 		    const struct nlmsghdr *nlh,
-		    const struct nlattr * const cda[]);
+		    const struct nlattr * const cda[],
+		    struct netlink_ext_ack *extack);
 	int (*call_rcu)(struct net *net, struct sock *nl, struct sk_buff *skb,
 			const struct nlmsghdr *nlh,
-			const struct nlattr * const cda[]);
+			const struct nlattr * const cda[],
+			struct netlink_ext_ack *extack);
 	int (*call_batch)(struct net *net, struct sock *nl, struct sk_buff *skb,
 			  const struct nlmsghdr *nlh,
-			  const struct nlattr * const cda[]);
+			  const struct nlattr * const cda[],
+			  struct netlink_ext_ack *extack);
 	const struct nla_policy *policy;	/* netlink attribute policy */
 	const u_int16_t attr_count;		/* number of nlattr's */
 };
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index ba6a5516dc7c..e495b5e484b1 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -841,14 +841,16 @@ find_free_id(struct ip_set_net *inst, const char *name, ip_set_id_t *index,
 
 static int ip_set_none(struct net *net, struct sock *ctnl, struct sk_buff *skb,
 		       const struct nlmsghdr *nlh,
-		       const struct nlattr * const attr[])
+		       const struct nlattr * const attr[],
+		       struct netlink_ext_ack *extack)
 {
 	return -EOPNOTSUPP;
 }
 
 static int ip_set_create(struct net *net, struct sock *ctnl,
 			 struct sk_buff *skb, const struct nlmsghdr *nlh,
-			 const struct nlattr * const attr[])
+			 const struct nlattr * const attr[],
+			 struct netlink_ext_ack *extack)
 {
 	struct ip_set_net *inst = ip_set_pernet(net);
 	struct ip_set *set, *clash = NULL;
@@ -989,7 +991,8 @@ ip_set_destroy_set(struct ip_set *set)
 
 static int ip_set_destroy(struct net *net, struct sock *ctnl,
 			  struct sk_buff *skb, const struct nlmsghdr *nlh,
-			  const struct nlattr * const attr[])
+			  const struct nlattr * const attr[],
+			  struct netlink_ext_ack *extack)
 {
 	struct ip_set_net *inst = ip_set_pernet(net);
 	struct ip_set *s;
@@ -1067,7 +1070,8 @@ ip_set_flush_set(struct ip_set *set)
 
 static int ip_set_flush(struct net *net, struct sock *ctnl, struct sk_buff *skb,
 			const struct nlmsghdr *nlh,
-			const struct nlattr * const attr[])
+			const struct nlattr * const attr[],
+			struct netlink_ext_ack *extack)
 {
 	struct ip_set_net *inst = ip_set_pernet(net);
 	struct ip_set *s;
@@ -1106,7 +1110,8 @@ ip_set_setname2_policy[IPSET_ATTR_CMD_MAX + 1] = {
 
 static int ip_set_rename(struct net *net, struct sock *ctnl,
 			 struct sk_buff *skb, const struct nlmsghdr *nlh,
-			 const struct nlattr * const attr[])
+			 const struct nlattr * const attr[],
+			 struct netlink_ext_ack *extack)
 {
 	struct ip_set_net *inst = ip_set_pernet(net);
 	struct ip_set *set, *s;
@@ -1155,7 +1160,8 @@ out:
 
 static int ip_set_swap(struct net *net, struct sock *ctnl, struct sk_buff *skb,
 		       const struct nlmsghdr *nlh,
-		       const struct nlattr * const attr[])
+		       const struct nlattr * const attr[],
+		       struct netlink_ext_ack *extack)
 {
 	struct ip_set_net *inst = ip_set_pernet(net);
 	struct ip_set *from, *to;
@@ -1428,7 +1434,8 @@ out:
 
 static int ip_set_dump(struct net *net, struct sock *ctnl, struct sk_buff *skb,
 		       const struct nlmsghdr *nlh,
-		       const struct nlattr * const attr[])
+		       const struct nlattr * const attr[],
+		       struct netlink_ext_ack *extack)
 {
 	if (unlikely(protocol_failed(attr)))
 		return -IPSET_ERR_PROTOCOL;
@@ -1513,7 +1520,8 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
 
 static int ip_set_uadd(struct net *net, struct sock *ctnl, struct sk_buff *skb,
 		       const struct nlmsghdr *nlh,
-		       const struct nlattr * const attr[])
+		       const struct nlattr * const attr[],
+		       struct netlink_ext_ack *extack)
 {
 	struct ip_set_net *inst = ip_set_pernet(net);
 	struct ip_set *set;
@@ -1567,7 +1575,8 @@ static int ip_set_uadd(struct net *net, struct sock *ctnl, struct sk_buff *skb,
 
 static int ip_set_udel(struct net *net, struct sock *ctnl, struct sk_buff *skb,
 		       const struct nlmsghdr *nlh,
-		       const struct nlattr * const attr[])
+		       const struct nlattr * const attr[],
+		       struct netlink_ext_ack *extack)
 {
 	struct ip_set_net *inst = ip_set_pernet(net);
 	struct ip_set *set;
@@ -1621,7 +1630,8 @@ static int ip_set_udel(struct net *net, struct sock *ctnl, struct sk_buff *skb,
 
 static int ip_set_utest(struct net *net, struct sock *ctnl, struct sk_buff *skb,
 			const struct nlmsghdr *nlh,
-			const struct nlattr * const attr[])
+			const struct nlattr * const attr[],
+			struct netlink_ext_ack *extack)
 {
 	struct ip_set_net *inst = ip_set_pernet(net);
 	struct ip_set *set;
@@ -1656,7 +1666,8 @@ static int ip_set_utest(struct net *net, struct sock *ctnl, struct sk_buff *skb,
 
 static int ip_set_header(struct net *net, struct sock *ctnl,
 			 struct sk_buff *skb, const struct nlmsghdr *nlh,
-			 const struct nlattr * const attr[])
+			 const struct nlattr * const attr[],
+			 struct netlink_ext_ack *extack)
 {
 	struct ip_set_net *inst = ip_set_pernet(net);
 	const struct ip_set *set;
@@ -1712,7 +1723,8 @@ static const struct nla_policy ip_set_type_policy[IPSET_ATTR_CMD_MAX + 1] = {
 
 static int ip_set_type(struct net *net, struct sock *ctnl, struct sk_buff *skb,
 		       const struct nlmsghdr *nlh,
-		       const struct nlattr * const attr[])
+		       const struct nlattr * const attr[],
+		       struct netlink_ext_ack *extack)
 {
 	struct sk_buff *skb2;
 	struct nlmsghdr *nlh2;
@@ -1770,7 +1782,8 @@ ip_set_protocol_policy[IPSET_ATTR_CMD_MAX + 1] = {
 
 static int ip_set_protocol(struct net *net, struct sock *ctnl,
 			   struct sk_buff *skb, const struct nlmsghdr *nlh,
-			   const struct nlattr * const attr[])
+			   const struct nlattr * const attr[],
+			   struct netlink_ext_ack *extack)
 {
 	struct sk_buff *skb2;
 	struct nlmsghdr *nlh2;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index e1eca47105bd..573eb83d5d17 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1127,7 +1127,8 @@ static int ctnetlink_flush_conntrack(struct net *net,
 static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl,
 				   struct sk_buff *skb,
 				   const struct nlmsghdr *nlh,
-				   const struct nlattr * const cda[])
+				   const struct nlattr * const cda[],
+				   struct netlink_ext_ack *extack)
 {
 	struct nf_conntrack_tuple_hash *h;
 	struct nf_conntrack_tuple tuple;
@@ -1179,7 +1180,8 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl,
 static int ctnetlink_get_conntrack(struct net *net, struct sock *ctnl,
 				   struct sk_buff *skb,
 				   const struct nlmsghdr *nlh,
-				   const struct nlattr * const cda[])
+				   const struct nlattr * const cda[],
+				   struct netlink_ext_ack *extack)
 {
 	struct nf_conntrack_tuple_hash *h;
 	struct nf_conntrack_tuple tuple;
@@ -1340,7 +1342,8 @@ ctnetlink_dump_dying(struct sk_buff *skb, struct netlink_callback *cb)
 static int ctnetlink_get_ct_dying(struct net *net, struct sock *ctnl,
 				  struct sk_buff *skb,
 				  const struct nlmsghdr *nlh,
-				  const struct nlattr * const cda[])
+				  const struct nlattr * const cda[],
+				  struct netlink_ext_ack *extack)
 {
 	if (nlh->nlmsg_flags & NLM_F_DUMP) {
 		struct netlink_dump_control c = {
@@ -1362,7 +1365,8 @@ ctnetlink_dump_unconfirmed(struct sk_buff *skb, struct netlink_callback *cb)
 static int ctnetlink_get_ct_unconfirmed(struct net *net, struct sock *ctnl,
 					struct sk_buff *skb,
 					const struct nlmsghdr *nlh,
-					const struct nlattr * const cda[])
+					const struct nlattr * const cda[],
+					struct netlink_ext_ack *extack)
 {
 	if (nlh->nlmsg_flags & NLM_F_DUMP) {
 		struct netlink_dump_control c = {
@@ -1901,7 +1905,8 @@ err1:
 static int ctnetlink_new_conntrack(struct net *net, struct sock *ctnl,
 				   struct sk_buff *skb,
 				   const struct nlmsghdr *nlh,
-				   const struct nlattr * const cda[])
+				   const struct nlattr * const cda[],
+				   struct netlink_ext_ack *extack)
 {
 	struct nf_conntrack_tuple otuple, rtuple;
 	struct nf_conntrack_tuple_hash *h = NULL;
@@ -2066,7 +2071,8 @@ ctnetlink_ct_stat_cpu_dump(struct sk_buff *skb, struct netlink_callback *cb)
 static int ctnetlink_stat_ct_cpu(struct net *net, struct sock *ctnl,
 				 struct sk_buff *skb,
 				 const struct nlmsghdr *nlh,
-				 const struct nlattr * const cda[])
+				 const struct nlattr * const cda[],
+				 struct netlink_ext_ack *extack)
 {
 	if (nlh->nlmsg_flags & NLM_F_DUMP) {
 		struct netlink_dump_control c = {
@@ -2111,7 +2117,8 @@ nlmsg_failure:
 
 static int ctnetlink_stat_ct(struct net *net, struct sock *ctnl,
 			     struct sk_buff *skb, const struct nlmsghdr *nlh,
-			     const struct nlattr * const cda[])
+			     const struct nlattr * const cda[],
+			     struct netlink_ext_ack *extack)
 {
 	struct sk_buff *skb2;
 	int err;
@@ -2773,7 +2780,8 @@ out:
 static int ctnetlink_dump_exp_ct(struct net *net, struct sock *ctnl,
 				 struct sk_buff *skb,
 				 const struct nlmsghdr *nlh,
-				 const struct nlattr * const cda[])
+				 const struct nlattr * const cda[],
+				 struct netlink_ext_ack *extack)
 {
 	int err;
 	struct nfgenmsg *nfmsg = nlmsg_data(nlh);
@@ -2817,7 +2825,8 @@ static int ctnetlink_dump_exp_ct(struct net *net, struct sock *ctnl,
 
 static int ctnetlink_get_expect(struct net *net, struct sock *ctnl,
 				struct sk_buff *skb, const struct nlmsghdr *nlh,
-				const struct nlattr * const cda[])
+				const struct nlattr * const cda[],
+				struct netlink_ext_ack *extack)
 {
 	struct nf_conntrack_tuple tuple;
 	struct nf_conntrack_expect *exp;
@@ -2829,7 +2838,8 @@ static int ctnetlink_get_expect(struct net *net, struct sock *ctnl,
 
 	if (nlh->nlmsg_flags & NLM_F_DUMP) {
 		if (cda[CTA_EXPECT_MASTER])
-			return ctnetlink_dump_exp_ct(net, ctnl, skb, nlh, cda);
+			return ctnetlink_dump_exp_ct(net, ctnl, skb, nlh, cda,
+						     extack);
 		else {
 			struct netlink_dump_control c = {
 				.dump = ctnetlink_exp_dump_table,
@@ -2897,7 +2907,8 @@ out:
 
 static int ctnetlink_del_expect(struct net *net, struct sock *ctnl,
 				struct sk_buff *skb, const struct nlmsghdr *nlh,
-				const struct nlattr * const cda[])
+				const struct nlattr * const cda[],
+				struct netlink_ext_ack *extack)
 {
 	struct nf_conntrack_expect *exp;
 	struct nf_conntrack_tuple tuple;
@@ -3185,7 +3196,8 @@ err_ct:
 
 static int ctnetlink_new_expect(struct net *net, struct sock *ctnl,
 				struct sk_buff *skb, const struct nlmsghdr *nlh,
-				const struct nlattr * const cda[])
+				const struct nlattr * const cda[],
+				struct netlink_ext_ack *extack)
 {
 	struct nf_conntrack_tuple tuple;
 	struct nf_conntrack_expect *exp;
@@ -3291,7 +3303,8 @@ ctnetlink_exp_stat_cpu_dump(struct sk_buff *skb, struct netlink_callback *cb)
 static int ctnetlink_stat_exp_cpu(struct net *net, struct sock *ctnl,
 				  struct sk_buff *skb,
 				  const struct nlmsghdr *nlh,
-				  const struct nlattr * const cda[])
+				  const struct nlattr * const cda[],
+				  struct netlink_ext_ack *extack)
 {
 	if (nlh->nlmsg_flags & NLM_F_DUMP) {
 		struct netlink_dump_control c = {
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 5f3339978f6b..7843efa33c59 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -535,7 +535,8 @@ done:
 
 static int nf_tables_gettable(struct net *net, struct sock *nlsk,
 			      struct sk_buff *skb, const struct nlmsghdr *nlh,
-			      const struct nlattr * const nla[])
+			      const struct nlattr * const nla[],
+			      struct netlink_ext_ack *extack)
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	u8 genmask = nft_genmask_cur(net);
@@ -678,7 +679,8 @@ err:
 
 static int nf_tables_newtable(struct net *net, struct sock *nlsk,
 			      struct sk_buff *skb, const struct nlmsghdr *nlh,
-			      const struct nlattr * const nla[])
+			      const struct nlattr * const nla[],
+			      struct netlink_ext_ack *extack)
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	u8 genmask = nft_genmask_next(net);
@@ -831,7 +833,8 @@ out:
 
 static int nf_tables_deltable(struct net *net, struct sock *nlsk,
 			      struct sk_buff *skb, const struct nlmsghdr *nlh,
-			      const struct nlattr * const nla[])
+			      const struct nlattr * const nla[],
+			      struct netlink_ext_ack *extack)
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	u8 genmask = nft_genmask_next(net);
@@ -1127,7 +1130,8 @@ done:
 
 static int nf_tables_getchain(struct net *net, struct sock *nlsk,
 			      struct sk_buff *skb, const struct nlmsghdr *nlh,
-			      const struct nlattr * const nla[])
+			      const struct nlattr * const nla[],
+			      struct netlink_ext_ack *extack)
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	u8 genmask = nft_genmask_cur(net);
@@ -1323,7 +1327,8 @@ static void nft_chain_release_hook(struct nft_chain_hook *hook)
 
 static int nf_tables_newchain(struct net *net, struct sock *nlsk,
 			      struct sk_buff *skb, const struct nlmsghdr *nlh,
-			      const struct nlattr * const nla[])
+			      const struct nlattr * const nla[],
+			      struct netlink_ext_ack *extack)
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	const struct nlattr * uninitialized_var(name);
@@ -1561,7 +1566,8 @@ err1:
 
 static int nf_tables_delchain(struct net *net, struct sock *nlsk,
 			      struct sk_buff *skb, const struct nlmsghdr *nlh,
-			      const struct nlattr * const nla[])
+			      const struct nlattr * const nla[],
+			      struct netlink_ext_ack *extack)
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	u8 genmask = nft_genmask_next(net);
@@ -2042,7 +2048,8 @@ static int nf_tables_dump_rules_done(struct netlink_callback *cb)
 
 static int nf_tables_getrule(struct net *net, struct sock *nlsk,
 			     struct sk_buff *skb, const struct nlmsghdr *nlh,
-			     const struct nlattr * const nla[])
+			     const struct nlattr * const nla[],
+			     struct netlink_ext_ack *extack)
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	u8 genmask = nft_genmask_cur(net);
@@ -2135,7 +2142,8 @@ static struct nft_expr_info *info;
 
 static int nf_tables_newrule(struct net *net, struct sock *nlsk,
 			     struct sk_buff *skb, const struct nlmsghdr *nlh,
-			     const struct nlattr * const nla[])
+			     const struct nlattr * const nla[],
+			     struct netlink_ext_ack *extack)
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	u8 genmask = nft_genmask_next(net);
@@ -2317,7 +2325,8 @@ static struct nft_rule *nft_rule_lookup_byid(const struct net *net,
 
 static int nf_tables_delrule(struct net *net, struct sock *nlsk,
 			     struct sk_buff *skb, const struct nlmsghdr *nlh,
-			     const struct nlattr * const nla[])
+			     const struct nlattr * const nla[],
+			     struct netlink_ext_ack *extack)
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	u8 genmask = nft_genmask_next(net);
@@ -2833,7 +2842,8 @@ static int nf_tables_dump_sets_done(struct netlink_callback *cb)
 
 static int nf_tables_getset(struct net *net, struct sock *nlsk,
 			    struct sk_buff *skb, const struct nlmsghdr *nlh,
-			    const struct nlattr * const nla[])
+			    const struct nlattr * const nla[],
+			    struct netlink_ext_ack *extack)
 {
 	u8 genmask = nft_genmask_cur(net);
 	const struct nft_set *set;
@@ -2909,7 +2919,8 @@ static int nf_tables_set_desc_parse(const struct nft_ctx *ctx,
 
 static int nf_tables_newset(struct net *net, struct sock *nlsk,
 			    struct sk_buff *skb, const struct nlmsghdr *nlh,
-			    const struct nlattr * const nla[])
+			    const struct nlattr * const nla[],
+			    struct netlink_ext_ack *extack)
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	u8 genmask = nft_genmask_next(net);
@@ -3127,7 +3138,8 @@ static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set
 
 static int nf_tables_delset(struct net *net, struct sock *nlsk,
 			    struct sk_buff *skb, const struct nlmsghdr *nlh,
-			    const struct nlattr * const nla[])
+			    const struct nlattr * const nla[],
+			    struct netlink_ext_ack *extack)
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	u8 genmask = nft_genmask_next(net);
@@ -3487,7 +3499,8 @@ static int nf_tables_dump_set_done(struct netlink_callback *cb)
 
 static int nf_tables_getsetelem(struct net *net, struct sock *nlsk,
 				struct sk_buff *skb, const struct nlmsghdr *nlh,
-				const struct nlattr * const nla[])
+				const struct nlattr * const nla[],
+				struct netlink_ext_ack *extack)
 {
 	u8 genmask = nft_genmask_cur(net);
 	const struct nft_set *set;
@@ -3888,7 +3901,8 @@ err1:
 
 static int nf_tables_newsetelem(struct net *net, struct sock *nlsk,
 				struct sk_buff *skb, const struct nlmsghdr *nlh,
-				const struct nlattr * const nla[])
+				const struct nlattr * const nla[],
+				struct netlink_ext_ack *extack)
 {
 	u8 genmask = nft_genmask_next(net);
 	const struct nlattr *attr;
@@ -4085,7 +4099,8 @@ err1:
 
 static int nf_tables_delsetelem(struct net *net, struct sock *nlsk,
 				struct sk_buff *skb, const struct nlmsghdr *nlh,
-				const struct nlattr * const nla[])
+				const struct nlattr * const nla[],
+				struct netlink_ext_ack *extack)
 {
 	u8 genmask = nft_genmask_next(net);
 	const struct nlattr *attr;
@@ -4295,7 +4310,8 @@ static const struct nft_object_type *nft_obj_type_get(u32 objtype)
 
 static int nf_tables_newobj(struct net *net, struct sock *nlsk,
 			    struct sk_buff *skb, const struct nlmsghdr *nlh,
-			    const struct nlattr * const nla[])
+			    const struct nlattr * const nla[],
+			    struct netlink_ext_ack *extack)
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	const struct nft_object_type *type;
@@ -4489,7 +4505,8 @@ nft_obj_filter_alloc(const struct nlattr * const nla[])
 
 static int nf_tables_getobj(struct net *net, struct sock *nlsk,
 			    struct sk_buff *skb, const struct nlmsghdr *nlh,
-			    const struct nlattr * const nla[])
+			    const struct nlattr * const nla[],
+			    struct netlink_ext_ack *extack)
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	u8 genmask = nft_genmask_cur(net);
@@ -4567,8 +4584,9 @@ static void nft_obj_destroy(struct nft_object *obj)
 }
 
 static int nf_tables_delobj(struct net *net, struct sock *nlsk,
-			      struct sk_buff *skb, const struct nlmsghdr *nlh,
-			      const struct nlattr * const nla[])
+			    struct sk_buff *skb, const struct nlmsghdr *nlh,
+			    const struct nlattr * const nla[],
+			    struct netlink_ext_ack *extack)
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	u8 genmask = nft_genmask_next(net);
@@ -4698,7 +4716,8 @@ err:
 
 static int nf_tables_getgen(struct net *net, struct sock *nlsk,
 			    struct sk_buff *skb, const struct nlmsghdr *nlh,
-			    const struct nlattr * const nla[])
+			    const struct nlattr * const nla[],
+			    struct netlink_ext_ack *extack)
 {
 	struct sk_buff *skb2;
 	int err;
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 80f5ecf2c3d7..92b05e188fd1 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -201,7 +201,8 @@ replay:
 
 		if (nc->call_rcu) {
 			err = nc->call_rcu(net, net->nfnl, skb, nlh,
-					   (const struct nlattr **)cda);
+					   (const struct nlattr **)cda,
+					   extack);
 			rcu_read_unlock();
 		} else {
 			rcu_read_unlock();
@@ -211,7 +212,8 @@ replay:
 				err = -EAGAIN;
 			else if (nc->call)
 				err = nc->call(net, net->nfnl, skb, nlh,
-					       (const struct nlattr **)cda);
+					       (const struct nlattr **)cda,
+					       extack);
 			else
 				err = -EINVAL;
 			nfnl_unlock(subsys_id);
@@ -226,9 +228,11 @@ struct nfnl_err {
 	struct list_head	head;
 	struct nlmsghdr		*nlh;
 	int			err;
+	struct netlink_ext_ack	extack;
 };
 
-static int nfnl_err_add(struct list_head *list, struct nlmsghdr *nlh, int err)
+static int nfnl_err_add(struct list_head *list, struct nlmsghdr *nlh, int err,
+			const struct netlink_ext_ack *extack)
 {
 	struct nfnl_err *nfnl_err;
 
@@ -238,6 +242,7 @@ static int nfnl_err_add(struct list_head *list, struct nlmsghdr *nlh, int err)
 
 	nfnl_err->nlh = nlh;
 	nfnl_err->err = err;
+	nfnl_err->extack = *extack;
 	list_add_tail(&nfnl_err->head, list);
 
 	return 0;
@@ -262,7 +267,8 @@ static void nfnl_err_deliver(struct list_head *err_list, struct sk_buff *skb)
 	struct nfnl_err *nfnl_err, *next;
 
 	list_for_each_entry_safe(nfnl_err, next, err_list, head) {
-		netlink_ack(skb, nfnl_err->nlh, nfnl_err->err, NULL);
+		netlink_ack(skb, nfnl_err->nlh, nfnl_err->err,
+			    &nfnl_err->extack);
 		nfnl_err_del(nfnl_err);
 	}
 }
@@ -280,6 +286,7 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
 	struct net *net = sock_net(skb->sk);
 	const struct nfnetlink_subsystem *ss;
 	const struct nfnl_callback *nc;
+	struct netlink_ext_ack extack;
 	LIST_HEAD(err_list);
 	u32 status;
 	int err;
@@ -325,6 +332,7 @@ replay:
 	while (skb->len >= nlmsg_total_size(0)) {
 		int msglen, type;
 
+		memset(&extack, 0, sizeof(extack));
 		nlh = nlmsg_hdr(skb);
 		err = 0;
 
@@ -384,7 +392,8 @@ replay:
 
 			if (nc->call_batch) {
 				err = nc->call_batch(net, net->nfnl, skb, nlh,
-						     (const struct nlattr **)cda);
+						     (const struct nlattr **)cda,
+						     &extack);
 			}
 
 			/* The lock was released to autoload some module, we
@@ -402,7 +411,7 @@ ack:
 			 * processed, this avoids that the same error is
 			 * reported several times when replaying the batch.
 			 */
-			if (nfnl_err_add(&err_list, nlh, err) < 0) {
+			if (nfnl_err_add(&err_list, nlh, err, &extack) < 0) {
 				/* We failed to enqueue an error, reset the
 				 * list of errors and send OOM to userspace
 				 * pointing to the batch header.
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index 9898fb4d0512..c45e6d4358ab 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -49,7 +49,8 @@ struct nfacct_filter {
 
 static int nfnl_acct_new(struct net *net, struct sock *nfnl,
 			 struct sk_buff *skb, const struct nlmsghdr *nlh,
-			 const struct nlattr * const tb[])
+			 const struct nlattr * const tb[],
+			 struct netlink_ext_ack *extack)
 {
 	struct nf_acct *nfacct, *matching = NULL;
 	char *acct_name;
@@ -264,7 +265,8 @@ nfacct_filter_alloc(const struct nlattr * const attr)
 
 static int nfnl_acct_get(struct net *net, struct sock *nfnl,
 			 struct sk_buff *skb, const struct nlmsghdr *nlh,
-			 const struct nlattr * const tb[])
+			 const struct nlattr * const tb[],
+			 struct netlink_ext_ack *extack)
 {
 	int ret = -ENOENT;
 	struct nf_acct *cur;
@@ -343,7 +345,8 @@ static int nfnl_acct_try_del(struct nf_acct *cur)
 
 static int nfnl_acct_del(struct net *net, struct sock *nfnl,
 			 struct sk_buff *skb, const struct nlmsghdr *nlh,
-			 const struct nlattr * const tb[])
+			 const struct nlattr * const tb[],
+			 struct netlink_ext_ack *extack)
 {
 	struct nf_acct *cur, *tmp;
 	int ret = -ENOENT;
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index be678a323598..41628b393673 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -398,7 +398,8 @@ nfnl_cthelper_update(const struct nlattr * const tb[],
 
 static int nfnl_cthelper_new(struct net *net, struct sock *nfnl,
 			     struct sk_buff *skb, const struct nlmsghdr *nlh,
-			     const struct nlattr * const tb[])
+			     const struct nlattr * const tb[],
+			     struct netlink_ext_ack *extack)
 {
 	const char *helper_name;
 	struct nf_conntrack_helper *cur, *helper = NULL;
@@ -599,7 +600,8 @@ out:
 
 static int nfnl_cthelper_get(struct net *net, struct sock *nfnl,
 			     struct sk_buff *skb, const struct nlmsghdr *nlh,
-			     const struct nlattr * const tb[])
+			     const struct nlattr * const tb[],
+			     struct netlink_ext_ack *extack)
 {
 	int ret = -ENOENT;
 	struct nf_conntrack_helper *cur;
@@ -666,7 +668,8 @@ static int nfnl_cthelper_get(struct net *net, struct sock *nfnl,
 
 static int nfnl_cthelper_del(struct net *net, struct sock *nfnl,
 			     struct sk_buff *skb, const struct nlmsghdr *nlh,
-			     const struct nlattr * const tb[])
+			     const struct nlattr * const tb[],
+			     struct netlink_ext_ack *extack)
 {
 	char *helper_name = NULL;
 	struct nf_conntrack_helper *cur;
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 49638b03ccc9..400e9ae97153 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -69,7 +69,8 @@ ctnl_timeout_parse_policy(void *timeouts, struct nf_conntrack_l4proto *l4proto,
 static int cttimeout_new_timeout(struct net *net, struct sock *ctnl,
 				 struct sk_buff *skb,
 				 const struct nlmsghdr *nlh,
-				 const struct nlattr * const cda[])
+				 const struct nlattr * const cda[],
+				 struct netlink_ext_ack *extack)
 {
 	__u16 l3num;
 	__u8 l4num;
@@ -239,7 +240,8 @@ ctnl_timeout_dump(struct sk_buff *skb, struct netlink_callback *cb)
 static int cttimeout_get_timeout(struct net *net, struct sock *ctnl,
 				 struct sk_buff *skb,
 				 const struct nlmsghdr *nlh,
-				 const struct nlattr * const cda[])
+				 const struct nlattr * const cda[],
+				 struct netlink_ext_ack *extack)
 {
 	int ret = -ENOENT;
 	char *name;
@@ -326,7 +328,8 @@ static int ctnl_timeout_try_del(struct net *net, struct ctnl_timeout *timeout)
 static int cttimeout_del_timeout(struct net *net, struct sock *ctnl,
 				 struct sk_buff *skb,
 				 const struct nlmsghdr *nlh,
-				 const struct nlattr * const cda[])
+				 const struct nlattr * const cda[],
+				 struct netlink_ext_ack *extack)
 {
 	struct ctnl_timeout *cur, *tmp;
 	int ret = -ENOENT;
@@ -357,7 +360,8 @@ static int cttimeout_del_timeout(struct net *net, struct sock *ctnl,
 static int cttimeout_default_set(struct net *net, struct sock *ctnl,
 				 struct sk_buff *skb,
 				 const struct nlmsghdr *nlh,
-				 const struct nlattr * const cda[])
+				 const struct nlattr * const cda[],
+				 struct netlink_ext_ack *extack)
 {
 	__u16 l3num;
 	__u8 l4num;
@@ -446,7 +450,8 @@ nla_put_failure:
 static int cttimeout_default_get(struct net *net, struct sock *ctnl,
 				 struct sk_buff *skb,
 				 const struct nlmsghdr *nlh,
-				 const struct nlattr * const cda[])
+				 const struct nlattr * const cda[],
+				 struct netlink_ext_ack *extack)
 {
 	__u16 l3num;
 	__u8 l4num;
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index da9704971a83..9c14892ee65f 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -795,7 +795,8 @@ static struct notifier_block nfulnl_rtnl_notifier = {
 
 static int nfulnl_recv_unsupp(struct net *net, struct sock *ctnl,
 			      struct sk_buff *skb, const struct nlmsghdr *nlh,
-			      const struct nlattr * const nfqa[])
+			      const struct nlattr * const nfqa[],
+			      struct netlink_ext_ack *extack)
 {
 	return -ENOTSUPP;
 }
@@ -818,7 +819,8 @@ static const struct nla_policy nfula_cfg_policy[NFULA_CFG_MAX+1] = {
 
 static int nfulnl_recv_config(struct net *net, struct sock *ctnl,
 			      struct sk_buff *skb, const struct nlmsghdr *nlh,
-			      const struct nlattr * const nfula[])
+			      const struct nlattr * const nfula[],
+			      struct netlink_ext_ack *extack)
 {
 	struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	u_int16_t group_num = ntohs(nfmsg->res_id);
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 8a0f218b7938..12b7dc11b6b5 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -1032,7 +1032,8 @@ static int nfq_id_after(unsigned int id, unsigned int max)
 static int nfqnl_recv_verdict_batch(struct net *net, struct sock *ctnl,
 				    struct sk_buff *skb,
 				    const struct nlmsghdr *nlh,
-			            const struct nlattr * const nfqa[])
+			            const struct nlattr * const nfqa[],
+				    struct netlink_ext_ack *extack)
 {
 	struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	struct nf_queue_entry *entry, *tmp;
@@ -1136,7 +1137,8 @@ static int nfqa_parse_bridge(struct nf_queue_entry *entry,
 static int nfqnl_recv_verdict(struct net *net, struct sock *ctnl,
 			      struct sk_buff *skb,
 			      const struct nlmsghdr *nlh,
-			      const struct nlattr * const nfqa[])
+			      const struct nlattr * const nfqa[],
+			      struct netlink_ext_ack *extack)
 {
 	struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	u_int16_t queue_num = ntohs(nfmsg->res_id);
@@ -1200,7 +1202,8 @@ static int nfqnl_recv_verdict(struct net *net, struct sock *ctnl,
 
 static int nfqnl_recv_unsupp(struct net *net, struct sock *ctnl,
 			     struct sk_buff *skb, const struct nlmsghdr *nlh,
-			     const struct nlattr * const nfqa[])
+			     const struct nlattr * const nfqa[],
+			     struct netlink_ext_ack *extack)
 {
 	return -ENOTSUPP;
 }
@@ -1217,7 +1220,8 @@ static const struct nf_queue_handler nfqh = {
 
 static int nfqnl_recv_config(struct net *net, struct sock *ctnl,
 			     struct sk_buff *skb, const struct nlmsghdr *nlh,
-			     const struct nlattr * const nfqa[])
+			     const struct nlattr * const nfqa[],
+			     struct netlink_ext_ack *extack)
 {
 	struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	u_int16_t queue_num = ntohs(nfmsg->res_id);
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index f753ec69f790..f5a7cb68694e 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -530,7 +530,8 @@ nla_put_failure:
 
 static int nfnl_compat_get(struct net *net, struct sock *nfnl,
 			   struct sk_buff *skb, const struct nlmsghdr *nlh,
-			   const struct nlattr * const tb[])
+			   const struct nlattr * const tb[],
+			   struct netlink_ext_ack *extack)
 {
 	int ret = 0, target;
 	struct nfgenmsg *nfmsg;
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index c05fefcec238..71cfa9551d08 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -63,7 +63,8 @@ static const struct nla_policy xt_osf_policy[OSF_ATTR_MAX + 1] = {
 
 static int xt_osf_add_callback(struct net *net, struct sock *ctnl,
 			       struct sk_buff *skb, const struct nlmsghdr *nlh,
-			       const struct nlattr * const osf_attrs[])
+			       const struct nlattr * const osf_attrs[],
+			       struct netlink_ext_ack *extack)
 {
 	struct xt_osf_user_finger *f;
 	struct xt_osf_finger *kf = NULL, *sf;
@@ -107,7 +108,8 @@ static int xt_osf_add_callback(struct net *net, struct sock *ctnl,
 static int xt_osf_remove_callback(struct net *net, struct sock *ctnl,
 				  struct sk_buff *skb,
 				  const struct nlmsghdr *nlh,
-				  const struct nlattr * const osf_attrs[])
+				  const struct nlattr * const osf_attrs[],
+				  struct netlink_ext_ack *extack)
 {
 	struct xt_osf_user_finger *f;
 	struct xt_osf_finger *sf;
-- 
cgit v1.2.3


From 204a2be30a7a8a8d12642f23f3fbdc8b9923b500 Mon Sep 17 00:00:00 2001
From: Andreas Schwab <schwab@linux-m68k.org>
Date: Wed, 7 Jun 2017 00:11:44 +0200
Subject: m68k: Remove ptrace_signal_deliver

This fixes debugger syscall restart interactions.  A debugger that
modifies the tracee's program counter is expected to set the orig_d0
pseudo register to -1, to disable a possible syscall restart.

This removes the last user of the ptrace_signal_deliver hook in the ptrace
signal handling, so remove that as well.

Signed-off-by: Andreas Schwab <schwab@linux-m68k.org>
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/include/asm/signal.h |  5 -----
 arch/m68k/kernel/signal.c      | 16 ----------------
 include/linux/ptrace.h         |  4 ----
 kernel/signal.c                |  1 -
 4 files changed, 26 deletions(-)

(limited to 'include/linux')

diff --git a/arch/m68k/include/asm/signal.h b/arch/m68k/include/asm/signal.h
index 8c8ce5e1ee0e..3bc64d02ba5f 100644
--- a/arch/m68k/include/asm/signal.h
+++ b/arch/m68k/include/asm/signal.h
@@ -62,9 +62,4 @@ static inline int __gen_sigismember(sigset_t *set, int _sig)
 
 #endif /* !CONFIG_CPU_HAS_NO_BITFIELDS */
 
-#ifndef __uClinux__
-extern void ptrace_signal_deliver(void);
-#define ptrace_signal_deliver ptrace_signal_deliver
-#endif /* __uClinux__ */
-
 #endif /* _M68K_SIGNAL_H */
diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c
index 6f945bb5ffbd..e79421f5b9cd 100644
--- a/arch/m68k/kernel/signal.c
+++ b/arch/m68k/kernel/signal.c
@@ -109,22 +109,6 @@ int fixup_exception(struct pt_regs *regs)
 	return 1;
 }
 
-void ptrace_signal_deliver(void)
-{
-	struct pt_regs *regs = signal_pt_regs();
-	if (regs->orig_d0 < 0)
-		return;
-	switch (regs->d0) {
-	case -ERESTARTNOHAND:
-	case -ERESTARTSYS:
-	case -ERESTARTNOINTR:
-		regs->d0 = regs->orig_d0;
-		regs->orig_d0 = -1;
-		regs->pc -= 2;
-		break;
-	}
-}
-
 static inline void push_cache (unsigned long vaddr)
 {
 	/*
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 422bc2e4cb6a..9a2e04be0657 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -388,10 +388,6 @@ static inline void user_single_step_siginfo(struct task_struct *tsk,
 #define current_pt_regs() task_pt_regs(current)
 #endif
 
-#ifndef ptrace_signal_deliver
-#define ptrace_signal_deliver() ((void)0)
-#endif
-
 /*
  * unlike current_pt_regs(), this one is equal to task_pt_regs(current)
  * on *all* architectures; the only reason to have a per-arch definition
diff --git a/kernel/signal.c b/kernel/signal.c
index ca92bcfeb322..f0a48e5b1f0b 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2092,7 +2092,6 @@ static void do_jobctl_trap(void)
 
 static int ptrace_signal(int signr, siginfo_t *info)
 {
-	ptrace_signal_deliver();
 	/*
 	 * We do not check sig_kernel_stop(signr) but set this marker
 	 * unconditionally because we do not know whether debugger will
-- 
cgit v1.2.3


From e297a783e41560b44e3c14f38e420cba518113b8 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Wed, 7 Jun 2017 19:58:56 -0400
Subject: random: add wait_for_random_bytes() API

This enables users of get_random_{bytes,u32,u64,int,long} to wait until
the pool is ready before using this function, in case they actually want
to have reliable randomness.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 drivers/char/random.c  | 41 +++++++++++++++++++++++++++++++----------
 include/linux/random.h |  1 +
 2 files changed, 32 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 01a260f67437..3853dd4f92e7 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -851,11 +851,6 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r)
 	}
 }
 
-static inline void crng_wait_ready(void)
-{
-	wait_event_interruptible(crng_init_wait, crng_ready());
-}
-
 static void _extract_crng(struct crng_state *crng,
 			  __u8 out[CHACHA20_BLOCK_SIZE])
 {
@@ -1477,7 +1472,10 @@ static ssize_t extract_entropy_user(struct entropy_store *r, void __user *buf,
  * number of good random numbers, suitable for key generation, seeding
  * TCP sequence numbers, etc.  It does not rely on the hardware random
  * number generator.  For random bytes direct from the hardware RNG
- * (when available), use get_random_bytes_arch().
+ * (when available), use get_random_bytes_arch(). In order to ensure
+ * that the randomness provided by this function is okay, the function
+ * wait_for_random_bytes() should be called and return 0 at least once
+ * at any point prior.
  */
 void get_random_bytes(void *buf, int nbytes)
 {
@@ -1506,6 +1504,24 @@ void get_random_bytes(void *buf, int nbytes)
 }
 EXPORT_SYMBOL(get_random_bytes);
 
+/*
+ * Wait for the urandom pool to be seeded and thus guaranteed to supply
+ * cryptographically secure random numbers. This applies to: the /dev/urandom
+ * device, the get_random_bytes function, and the get_random_{u32,u64,int,long}
+ * family of functions. Using any of these functions without first calling
+ * this function forfeits the guarantee of security.
+ *
+ * Returns: 0 if the urandom pool has been seeded.
+ *          -ERESTARTSYS if the function was interrupted by a signal.
+ */
+int wait_for_random_bytes(void)
+{
+	if (likely(crng_ready()))
+		return 0;
+	return wait_event_interruptible(crng_init_wait, crng_ready());
+}
+EXPORT_SYMBOL(wait_for_random_bytes);
+
 /*
  * Add a callback function that will be invoked when the nonblocking
  * pool is initialised.
@@ -1860,6 +1876,8 @@ const struct file_operations urandom_fops = {
 SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count,
 		unsigned int, flags)
 {
+	int ret;
+
 	if (flags & ~(GRND_NONBLOCK|GRND_RANDOM))
 		return -EINVAL;
 
@@ -1872,9 +1890,9 @@ SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count,
 	if (!crng_ready()) {
 		if (flags & GRND_NONBLOCK)
 			return -EAGAIN;
-		crng_wait_ready();
-		if (signal_pending(current))
-			return -ERESTARTSYS;
+		ret = wait_for_random_bytes();
+		if (unlikely(ret))
+			return ret;
 	}
 	return urandom_read(NULL, buf, count, NULL);
 }
@@ -2035,7 +2053,10 @@ static rwlock_t batched_entropy_reset_lock = __RW_LOCK_UNLOCKED(batched_entropy_
 /*
  * Get a random word for internal kernel use only. The quality of the random
  * number is either as good as RDRAND or as good as /dev/urandom, with the
- * goal of being quite fast and not depleting entropy.
+ * goal of being quite fast and not depleting entropy. In order to ensure
+ * that the randomness provided by this function is okay, the function
+ * wait_for_random_bytes() should be called and return 0 at least once
+ * at any point prior.
  */
 static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64);
 u64 get_random_u64(void)
diff --git a/include/linux/random.h b/include/linux/random.h
index ed5c3838780d..e29929347c95 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -34,6 +34,7 @@ extern void add_input_randomness(unsigned int type, unsigned int code,
 extern void add_interrupt_randomness(int irq, int irq_flags) __latent_entropy;
 
 extern void get_random_bytes(void *buf, int nbytes);
+extern int wait_for_random_bytes(void);
 extern int add_random_ready_callback(struct random_ready_callback *rdy);
 extern void del_random_ready_callback(struct random_ready_callback *rdy);
 extern void get_random_bytes_arch(void *buf, int nbytes);
-- 
cgit v1.2.3


From da9ba564bd683374b8d319756f312821b8265b06 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Wed, 7 Jun 2017 20:05:02 -0400
Subject: random: add get_random_{bytes,u32,u64,int,long,once}_wait family

These functions are simple convenience wrappers that call
wait_for_random_bytes before calling the respective get_random_*
function.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 include/linux/net.h    |  2 ++
 include/linux/once.h   |  2 ++
 include/linux/random.h | 25 +++++++++++++++++++++++++
 3 files changed, 29 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/net.h b/include/linux/net.h
index abcfa46a2bd9..dda2cc939a53 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -274,6 +274,8 @@ do {									\
 
 #define net_get_random_once(buf, nbytes)			\
 	get_random_once((buf), (nbytes))
+#define net_get_random_once_wait(buf, nbytes)			\
+	get_random_once_wait((buf), (nbytes))
 
 int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec,
 		   size_t num, size_t len);
diff --git a/include/linux/once.h b/include/linux/once.h
index 285f12cb40e6..9c98aaa87cbc 100644
--- a/include/linux/once.h
+++ b/include/linux/once.h
@@ -53,5 +53,7 @@ void __do_once_done(bool *done, struct static_key *once_key,
 
 #define get_random_once(buf, nbytes)					     \
 	DO_ONCE(get_random_bytes, (buf), (nbytes))
+#define get_random_once_wait(buf, nbytes)                                    \
+	DO_ONCE(get_random_bytes_wait, (buf), (nbytes))                      \
 
 #endif /* _LINUX_ONCE_H */
diff --git a/include/linux/random.h b/include/linux/random.h
index e29929347c95..4aecc339558d 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -58,6 +58,31 @@ static inline unsigned long get_random_long(void)
 #endif
 }
 
+/* Calls wait_for_random_bytes() and then calls get_random_bytes(buf, nbytes).
+ * Returns the result of the call to wait_for_random_bytes. */
+static inline int get_random_bytes_wait(void *buf, int nbytes)
+{
+	int ret = wait_for_random_bytes();
+	if (unlikely(ret))
+		return ret;
+	get_random_bytes(buf, nbytes);
+	return 0;
+}
+
+#define declare_get_random_var_wait(var) \
+	static inline int get_random_ ## var ## _wait(var *out) { \
+		int ret = wait_for_random_bytes(); \
+		if (unlikely(ret)) \
+			return ret; \
+		*out = get_random_ ## var(); \
+		return 0; \
+	}
+declare_get_random_var_wait(u32)
+declare_get_random_var_wait(u64)
+declare_get_random_var_wait(int)
+declare_get_random_var_wait(long)
+#undef declare_get_random_var
+
 unsigned long randomize_page(unsigned long start, unsigned long range);
 
 u32 prandom_u32(void);
-- 
cgit v1.2.3


From 63709fd4296221aa4ebd06230bce3eed70ddd927 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 16 Jun 2017 21:13:38 +0300
Subject: uuid: Take const on input of uuid_is_null() and guid_is_null()

The null check functions do not and must not modify contents of the UUID
or GUID supplied.

Mark argument explicitly to reflect that.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/uuid.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/uuid.h b/include/linux/uuid.h
index 75f7182d5360..d1defe4ab167 100644
--- a/include/linux/uuid.h
+++ b/include/linux/uuid.h
@@ -48,7 +48,7 @@ static inline void guid_copy(guid_t *dst, const guid_t *src)
 	memcpy(dst, src, sizeof(guid_t));
 }
 
-static inline bool guid_is_null(guid_t *guid)
+static inline bool guid_is_null(const guid_t *guid)
 {
 	return guid_equal(guid, &guid_null);
 }
@@ -63,7 +63,7 @@ static inline void uuid_copy(uuid_t *dst, const uuid_t *src)
 	memcpy(dst, src, sizeof(uuid_t));
 }
 
-static inline bool uuid_is_null(uuid_t *uuid)
+static inline bool uuid_is_null(const uuid_t *uuid)
 {
 	return uuid_equal(uuid, &uuid_null);
 }
-- 
cgit v1.2.3


From 682696605c7093d2800c498c04166831e5aedf87 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Thu, 13 Apr 2017 16:48:11 +0200
Subject: mmc: sdio: Add API to manage SDIO IRQs from a workqueue

For hosts not supporting MMC_CAP2_SDIO_IRQ_NOTHREAD but MMC_CAP_SDIO_IRQ,
the SDIO IRQs are processed from a dedicated kernel thread. For these
cases, the host calls mmc_signal_sdio_irq() from its ISR to signal a new
SDIO IRQ.

Signaling an SDIO IRQ makes the host's ->enable_sdio_irq() callback to be
invoked to temporary disable the IRQs, before the kernel thread is woken up
to process it. When processing of the IRQs are completed, they are
re-enabled by the kernel thread, again via invoking the host's
->enable_sdio_irq().

The observation from this, is that the execution path is being unnecessary
complex, as the host driver already knows that it needs to temporary
disable the IRQs before signaling a new one. Moreover, replacing the kernel
thread with a work/workqueue would not only greatly simplify the code, but
also make it more robust.

To address the above problems, let's continue to build upon the support for
MMC_CAP2_SDIO_IRQ_NOTHREAD, as it already implements SDIO IRQs to be
processed without using the clumsy kernel thread and without the ping-pong
calls of the host's ->enable_sdio_irq() callback for each processed IRQ.

Therefore, let's add new API sdio_signal_irq(), which enables hosts to
signal/process SDIO IRQs by using a work/workqueue, rather than using the
kernel thread.

Add also a new host callback ->ack_sdio_irq(), which the work invokes when
the SDIO IRQs have been processed. This informs the host about when it
shall re-enable the SDIO IRQs. Potentially, we could re-use the existing
->enable_sdio_irq() callback instead of adding a new one, however it has
turned out that it's more convenient for hosts to get this information via
a separate callback.

Hosts that wants to use this new method to signal/process SDIO IRQs, must
enable MMC_CAP2_SDIO_IRQ_NOTHREAD and implement the ->ack_sdio_irq()
callback.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Tested-by: Douglas Anderson <dianders@chromium.org>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
---
 drivers/mmc/core/host.c     |  2 ++
 drivers/mmc/core/sdio_irq.c | 16 ++++++++++++++++
 drivers/mmc/core/sdio_ops.h |  2 ++
 include/linux/mmc/host.h    |  3 +++
 4 files changed, 23 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
index 3f8c85d5aa09..8823c97a7b38 100644
--- a/drivers/mmc/core/host.c
+++ b/drivers/mmc/core/host.c
@@ -30,6 +30,7 @@
 #include "host.h"
 #include "slot-gpio.h"
 #include "pwrseq.h"
+#include "sdio_ops.h"
 
 #define cls_dev_to_mmc_host(d)	container_of(d, struct mmc_host, class_dev)
 
@@ -379,6 +380,7 @@ struct mmc_host *mmc_alloc_host(int extra, struct device *dev)
 	spin_lock_init(&host->lock);
 	init_waitqueue_head(&host->wq);
 	INIT_DELAYED_WORK(&host->detect, mmc_rescan);
+	INIT_DELAYED_WORK(&host->sdio_irq_work, sdio_irq_work);
 	setup_timer(&host->retune_timer, mmc_retune_timer, (unsigned long)host);
 
 	/*
diff --git a/drivers/mmc/core/sdio_irq.c b/drivers/mmc/core/sdio_irq.c
index 44d9c86bd2d4..c771843e4c15 100644
--- a/drivers/mmc/core/sdio_irq.c
+++ b/drivers/mmc/core/sdio_irq.c
@@ -98,11 +98,27 @@ void sdio_run_irqs(struct mmc_host *host)
 	if (host->sdio_irqs) {
 		host->sdio_irq_pending = true;
 		process_sdio_pending_irqs(host);
+		if (host->ops->ack_sdio_irq)
+			host->ops->ack_sdio_irq(host);
 	}
 	mmc_release_host(host);
 }
 EXPORT_SYMBOL_GPL(sdio_run_irqs);
 
+void sdio_irq_work(struct work_struct *work)
+{
+	struct mmc_host *host =
+		container_of(work, struct mmc_host, sdio_irq_work.work);
+
+	sdio_run_irqs(host);
+}
+
+void sdio_signal_irq(struct mmc_host *host)
+{
+	queue_delayed_work(system_wq, &host->sdio_irq_work, 0);
+}
+EXPORT_SYMBOL_GPL(sdio_signal_irq);
+
 static int sdio_irq_thread(void *_host)
 {
 	struct mmc_host *host = _host;
diff --git a/drivers/mmc/core/sdio_ops.h b/drivers/mmc/core/sdio_ops.h
index ee35cb4d170e..96945cafbf0b 100644
--- a/drivers/mmc/core/sdio_ops.h
+++ b/drivers/mmc/core/sdio_ops.h
@@ -17,6 +17,7 @@
 
 struct mmc_host;
 struct mmc_card;
+struct work_struct;
 
 int mmc_send_io_op_cond(struct mmc_host *host, u32 ocr, u32 *rocr);
 int mmc_io_rw_direct(struct mmc_card *card, int write, unsigned fn,
@@ -25,6 +26,7 @@ int mmc_io_rw_extended(struct mmc_card *card, int write, unsigned fn,
 	unsigned addr, int incr_addr, u8 *buf, unsigned blocks, unsigned blksz);
 int sdio_reset(struct mmc_host *host);
 unsigned int mmc_align_data_size(struct mmc_card *card, unsigned int sz);
+void sdio_irq_work(struct work_struct *work);
 
 static inline bool sdio_is_io_busy(u32 opcode, u32 arg)
 {
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 21385ac0c9b1..f186b26c05a4 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -130,6 +130,7 @@ struct mmc_host_ops {
 	int	(*get_cd)(struct mmc_host *host);
 
 	void	(*enable_sdio_irq)(struct mmc_host *host, int enable);
+	void	(*ack_sdio_irq)(struct mmc_host *host);
 
 	/* optional callback for HC quirks */
 	void	(*init_card)(struct mmc_host *host, struct mmc_card *card);
@@ -358,6 +359,7 @@ struct mmc_host {
 
 	unsigned int		sdio_irqs;
 	struct task_struct	*sdio_irq_thread;
+	struct delayed_work	sdio_irq_work;
 	bool			sdio_irq_pending;
 	atomic_t		sdio_irq_thread_abort;
 
@@ -428,6 +430,7 @@ static inline void mmc_signal_sdio_irq(struct mmc_host *host)
 }
 
 void sdio_run_irqs(struct mmc_host *host);
+void sdio_signal_irq(struct mmc_host *host);
 
 #ifdef CONFIG_REGULATOR
 int mmc_regulator_get_ocrmask(struct regulator *supply);
-- 
cgit v1.2.3


From c3dccb74be28a345a2ebcc224e41b774529b8b8f Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Thu, 18 May 2017 11:29:31 +0200
Subject: mmc: core: Delete bounce buffer Kconfig option

This option is activated by all multiplatform configs and what
not so we almost always have it turned on, and the memory it
saves is negligible, even more so moving forward. The actual
bounce buffer only gets allocated only when used, the only
thing the ifdefs are saving is a little bit of code.

It is highly improper to have this as a Kconfig option that
get turned on by Kconfig, make this a pure runtime-thing and
let the host decide whether we use bounce buffers. We add a
new property "disable_bounce" to the host struct.

Notice that mmc_queue_calc_bouncesz() already disables the
bounce buffers if host->max_segs != 1, so any arch that has a
maximum number of segments higher than 1 will have bounce
buffers disabled.

The option CONFIG_MMC_BLOCK_BOUNCE is default y so the
majority of platforms in the kernel already have it on, and
it then gets turned off at runtime since most of these have
a host->max_segs > 1. The few exceptions that have
host->max_segs == 1 and still turn off the bounce buffering
are those that disable it in their defconfig.

Those are the following:

arch/arm/configs/colibri_pxa300_defconfig
arch/arm/configs/zeus_defconfig
- Uses MMC_PXA, drivers/mmc/host/pxamci.c
- Sets host->max_segs = NR_SG, which is 1
- This needs its bounce buffer deactivated so we set
  host->disable_bounce to true in the host driver

arch/arm/configs/davinci_all_defconfig
- Uses MMC_DAVINCI, drivers/mmc/host/davinci_mmc.c
- This driver sets host->max_segs to MAX_NR_SG, which is 16
- That means this driver anyways disabled bounce buffers
- No special action needed for this platform

arch/arm/configs/lpc32xx_defconfig
arch/arm/configs/nhk8815_defconfig
arch/arm/configs/u300_defconfig
- Uses MMC_ARMMMCI, drivers/mmc/host/mmci.[c|h]
- This driver by default sets host->max_segs to NR_SG,
  which is 128, unless a DMA engine is used, and in that case
  the number of segments are also > 1
- That means this driver already disables bounce buffers
- No special action needed for these platforms

arch/arm/configs/sama5_defconfig
- Uses MMC_SDHCI, MMC_SDHCI_PLTFM, MMC_SDHCI_OF_AT91, MMC_ATMELMCI
- Uses drivers/mmc/host/sdhci.c
- Normally sets host->max_segs to SDHCI_MAX_SEGS which is 128 and
  thus disables bounce buffers
- Sets host->max_segs to 1 if SDHCI_USE_SDMA is set
- SDHCI_USE_SDMA is only set by SDHCI on PCI adapers
- That means that for this platform bounce buffers are already
  disabled at runtime
- No special action needed for this platform

arch/blackfin/configs/CM-BF533_defconfig
arch/blackfin/configs/CM-BF537E_defconfig
- Uses MMC_SPI (a simple MMC card connected on SPI pins)
- Uses drivers/mmc/host/mmc_spi.c
- Sets host->max_segs to MMC_SPI_BLOCKSATONCE which is 128
- That means this platform already disables bounce buffers at
  runtime
- No special action needed for these platforms

arch/mips/configs/cavium_octeon_defconfig
- Uses MMC_CAVIUM_OCTEON, drivers/mmc/host/cavium.c
- Sets host->max_segs to 16 or 1
- Setting host->disable_bounce to be sure for the 1 case

arch/mips/configs/qi_lb60_defconfig
- Uses MMC_JZ4740, drivers/mmc/host/jz4740_mmc.c
- This sets host->max_segs to 128 so bounce buffers are
  already runtime disabled
- No action needed for this platform

It would be interesting to come up with a list of the platforms
that actually end up using bounce buffers. I have not been
able to infer such a list, but it occurs when
host->max_segs == 1 and the bounce buffering is not explicitly
disabled.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/Kconfig  | 18 ------------------
 drivers/mmc/core/queue.c  | 15 +--------------
 drivers/mmc/host/cavium.c |  4 +++-
 drivers/mmc/host/pxamci.c |  6 +++++-
 include/linux/mmc/host.h  |  1 +
 5 files changed, 10 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/Kconfig b/drivers/mmc/core/Kconfig
index fc1ecdaaa9ca..42e89060cd41 100644
--- a/drivers/mmc/core/Kconfig
+++ b/drivers/mmc/core/Kconfig
@@ -61,24 +61,6 @@ config MMC_BLOCK_MINORS
 
 	  If unsure, say 8 here.
 
-config MMC_BLOCK_BOUNCE
-	bool "Use bounce buffer for simple hosts"
-	depends on MMC_BLOCK
-	default y
-	help
-	  SD/MMC is a high latency protocol where it is crucial to
-	  send large requests in order to get high performance. Many
-	  controllers, however, are restricted to continuous memory
-	  (i.e. they can't do scatter-gather), something the kernel
-	  rarely can provide.
-
-	  Say Y here to help these restricted hosts by bouncing
-	  requests back and forth from a large buffer. You will get
-	  a big performance gain at the cost of up to 64 KiB of
-	  physical memory.
-
-	  If unsure, say Y here.
-
 config SDIO_UART
 	tristate "SDIO UART/GPS class support"
 	depends on TTY
diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
index 5c37b6be3e7b..70ba7f94c706 100644
--- a/drivers/mmc/core/queue.c
+++ b/drivers/mmc/core/queue.c
@@ -219,7 +219,6 @@ static struct mmc_queue_req *mmc_queue_alloc_mqrqs(int qdepth)
 	return mqrq;
 }
 
-#ifdef CONFIG_MMC_BLOCK_BOUNCE
 static int mmc_queue_alloc_bounce_bufs(struct mmc_queue_req *mqrq, int qdepth,
 				       unsigned int bouncesz)
 {
@@ -258,7 +257,7 @@ static unsigned int mmc_queue_calc_bouncesz(struct mmc_host *host)
 {
 	unsigned int bouncesz = MMC_QUEUE_BOUNCESZ;
 
-	if (host->max_segs != 1)
+	if (host->max_segs != 1 || (host->caps & MMC_CAP_NO_BOUNCE_BUFF))
 		return 0;
 
 	if (bouncesz > host->max_req_size)
@@ -273,18 +272,6 @@ static unsigned int mmc_queue_calc_bouncesz(struct mmc_host *host)
 
 	return bouncesz;
 }
-#else
-static inline bool mmc_queue_alloc_bounce(struct mmc_queue_req *mqrq,
-					  int qdepth, unsigned int bouncesz)
-{
-	return false;
-}
-
-static unsigned int mmc_queue_calc_bouncesz(struct mmc_host *host)
-{
-	return 0;
-}
-#endif
 
 static int mmc_queue_alloc_sgs(struct mmc_queue_req *mqrq, int qdepth,
 			       int max_segs)
diff --git a/drivers/mmc/host/cavium.c b/drivers/mmc/host/cavium.c
index b8aaf0fdb77c..3686d77c717b 100644
--- a/drivers/mmc/host/cavium.c
+++ b/drivers/mmc/host/cavium.c
@@ -1035,10 +1035,12 @@ int cvm_mmc_of_slot_probe(struct device *dev, struct cvm_mmc_host *host)
 	 * We only have a 3.3v supply, we cannot support any
 	 * of the UHS modes. We do support the high speed DDR
 	 * modes up to 52MHz.
+	 *
+	 * Disable bounce buffers for max_segs = 1
 	 */
 	mmc->caps |= MMC_CAP_MMC_HIGHSPEED | MMC_CAP_SD_HIGHSPEED |
 		     MMC_CAP_ERASE | MMC_CAP_CMD23 | MMC_CAP_POWER_OFF_CARD |
-		     MMC_CAP_3_3V_DDR;
+		     MMC_CAP_3_3V_DDR | MMC_CAP_NO_BOUNCE_BUFF;
 
 	if (host->use_sg)
 		mmc->max_segs = 16;
diff --git a/drivers/mmc/host/pxamci.c b/drivers/mmc/host/pxamci.c
index c763b404510f..59ab194cb009 100644
--- a/drivers/mmc/host/pxamci.c
+++ b/drivers/mmc/host/pxamci.c
@@ -702,7 +702,11 @@ static int pxamci_probe(struct platform_device *pdev)
 
 	pxamci_init_ocr(host);
 
-	mmc->caps = 0;
+	/*
+	 * This architecture used to disable bounce buffers through its
+	 * defconfig, now it is done at runtime as a host property.
+	 */
+	mmc->caps = MMC_CAP_NO_BOUNCE_BUFF;
 	host->cmdat = 0;
 	if (!cpu_is_pxa25x()) {
 		mmc->caps |= MMC_CAP_4_BIT_DATA | MMC_CAP_SDIO_IRQ;
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index f186b26c05a4..9209f95a5106 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -271,6 +271,7 @@ struct mmc_host {
 #define MMC_CAP_UHS_SDR50	(1 << 18)	/* Host supports UHS SDR50 mode */
 #define MMC_CAP_UHS_SDR104	(1 << 19)	/* Host supports UHS SDR104 mode */
 #define MMC_CAP_UHS_DDR50	(1 << 20)	/* Host supports UHS DDR50 mode */
+#define MMC_CAP_NO_BOUNCE_BUFF	(1 << 21)	/* Disable bounce buffers on host */
 #define MMC_CAP_DRIVER_TYPE_A	(1 << 23)	/* Host supports Driver Type A */
 #define MMC_CAP_DRIVER_TYPE_C	(1 << 24)	/* Host supports Driver Type C */
 #define MMC_CAP_DRIVER_TYPE_D	(1 << 25)	/* Host supports Driver Type D */
-- 
cgit v1.2.3


From 304419d8a7e9204c5d19b704467b814df8c8f5b1 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Thu, 18 May 2017 11:29:32 +0200
Subject: mmc: core: Allocate per-request data using the block layer core

The mmc_queue_req is a per-request state container the MMC core uses
to carry bounce buffers, pointers to asynchronous requests and so on.
Currently allocated as a static array of objects, then as a request
comes in, a mmc_queue_req is assigned to it, and used during the
lifetime of the request.

This is backwards compared to how other block layer drivers work:
they usally let the block core provide a per-request struct that get
allocated right beind the struct request, and which can be obtained
using the blk_mq_rq_to_pdu() helper. (The _mq_ infix in this function
name is misleading: it is used by both the old and the MQ block
layer.)

The per-request struct gets allocated to the size stored in the queue
variable .cmd_size initialized using the .init_rq_fn() and
cleaned up using .exit_rq_fn().

The block layer code makes the MMC core rely on this mechanism to
allocate the per-request mmc_queue_req state container.

Doing this make a lot of complicated queue handling go away. We only
need to keep the .qnct that keeps count of how many request are
currently being processed by the MMC layer. The MQ block layer will
replace also this once we transition to it.

Doing this refactoring is necessary to move the ioctl() operations
into custom block layer requests tagged with REQ_OP_DRV_[IN|OUT]
instead of the custom code using the BigMMCHostLock that we have
today: those require that per-request data be obtainable easily from
a request after creating a custom request with e.g.:

struct request *rq = blk_get_request(q, REQ_OP_DRV_IN, __GFP_RECLAIM);
struct mmc_queue_req *mq_rq = req_to_mq_rq(rq);

And this is not possible with the current construction, as the request
is not immediately assigned the per-request state container, but
instead it gets assigned when the request finally enters the MMC
queue, which is way too late for custom requests.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
[Ulf: Folded in the fix to drop a call to blk_cleanup_queue()]
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Tested-by: Heiner Kallweit <hkallweit1@gmail.com>
---
 drivers/mmc/core/block.c |  38 ++------
 drivers/mmc/core/queue.c | 220 ++++++++++++-----------------------------------
 drivers/mmc/core/queue.h |  22 ++---
 include/linux/mmc/card.h |   2 -
 4 files changed, 78 insertions(+), 204 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index 8273b078686d..5f29b5625216 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -129,13 +129,6 @@ static inline int mmc_blk_part_switch(struct mmc_card *card,
 				      struct mmc_blk_data *md);
 static int get_card_status(struct mmc_card *card, u32 *status, int retries);
 
-static void mmc_blk_requeue(struct request_queue *q, struct request *req)
-{
-	spin_lock_irq(q->queue_lock);
-	blk_requeue_request(q, req);
-	spin_unlock_irq(q->queue_lock);
-}
-
 static struct mmc_blk_data *mmc_blk_get(struct gendisk *disk)
 {
 	struct mmc_blk_data *md;
@@ -1642,7 +1635,7 @@ static void mmc_blk_rw_cmd_abort(struct mmc_queue *mq, struct mmc_card *card,
 	if (mmc_card_removed(card))
 		req->rq_flags |= RQF_QUIET;
 	while (blk_end_request(req, -EIO, blk_rq_cur_bytes(req)));
-	mmc_queue_req_free(mq, mqrq);
+	mq->qcnt--;
 }
 
 /**
@@ -1662,7 +1655,7 @@ static void mmc_blk_rw_try_restart(struct mmc_queue *mq, struct request *req,
 	if (mmc_card_removed(mq->card)) {
 		req->rq_flags |= RQF_QUIET;
 		blk_end_request_all(req, -EIO);
-		mmc_queue_req_free(mq, mqrq);
+		mq->qcnt--; /* FIXME: just set to 0? */
 		return;
 	}
 	/* Else proceed and try to restart the current async request */
@@ -1685,12 +1678,8 @@ static void mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *new_req)
 	bool req_pending = true;
 
 	if (new_req) {
-		mqrq_cur = mmc_queue_req_find(mq, new_req);
-		if (!mqrq_cur) {
-			WARN_ON(1);
-			mmc_blk_requeue(mq->queue, new_req);
-			new_req = NULL;
-		}
+		mqrq_cur = req_to_mmc_queue_req(new_req);
+		mq->qcnt++;
 	}
 
 	if (!mq->qcnt)
@@ -1764,12 +1753,12 @@ static void mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *new_req)
 				if (req_pending)
 					mmc_blk_rw_cmd_abort(mq, card, old_req, mq_rq);
 				else
-					mmc_queue_req_free(mq, mq_rq);
+					mq->qcnt--;
 				mmc_blk_rw_try_restart(mq, new_req, mqrq_cur);
 				return;
 			}
 			if (!req_pending) {
-				mmc_queue_req_free(mq, mq_rq);
+				mq->qcnt--;
 				mmc_blk_rw_try_restart(mq, new_req, mqrq_cur);
 				return;
 			}
@@ -1814,7 +1803,7 @@ static void mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *new_req)
 			req_pending = blk_end_request(old_req, -EIO,
 						      brq->data.blksz);
 			if (!req_pending) {
-				mmc_queue_req_free(mq, mq_rq);
+				mq->qcnt--;
 				mmc_blk_rw_try_restart(mq, new_req, mqrq_cur);
 				return;
 			}
@@ -1844,7 +1833,7 @@ static void mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *new_req)
 		}
 	} while (req_pending);
 
-	mmc_queue_req_free(mq, mq_rq);
+	mq->qcnt--;
 }
 
 void mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
@@ -2166,7 +2155,6 @@ static int mmc_blk_probe(struct mmc_card *card)
 {
 	struct mmc_blk_data *md, *part_md;
 	char cap_str[10];
-	int ret;
 
 	/*
 	 * Check that the card supports the command class(es) we need.
@@ -2176,15 +2164,9 @@ static int mmc_blk_probe(struct mmc_card *card)
 
 	mmc_fixup_device(card, mmc_blk_fixups);
 
-	ret = mmc_queue_alloc_shared_queue(card);
-	if (ret)
-		return ret;
-
 	md = mmc_blk_alloc(card);
-	if (IS_ERR(md)) {
-		mmc_queue_free_shared_queue(card);
+	if (IS_ERR(md))
 		return PTR_ERR(md);
-	}
 
 	string_get_size((u64)get_capacity(md->disk), 512, STRING_UNITS_2,
 			cap_str, sizeof(cap_str));
@@ -2222,7 +2204,6 @@ static int mmc_blk_probe(struct mmc_card *card)
  out:
 	mmc_blk_remove_parts(card, md);
 	mmc_blk_remove_req(md);
-	mmc_queue_free_shared_queue(card);
 	return 0;
 }
 
@@ -2240,7 +2221,6 @@ static void mmc_blk_remove(struct mmc_card *card)
 	pm_runtime_put_noidle(&card->dev);
 	mmc_blk_remove_req(md);
 	dev_set_drvdata(&card->dev, NULL);
-	mmc_queue_free_shared_queue(card);
 }
 
 static int _mmc_blk_suspend(struct mmc_card *card)
diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
index 70ba7f94c706..d6c7b4cde4db 100644
--- a/drivers/mmc/core/queue.c
+++ b/drivers/mmc/core/queue.c
@@ -40,35 +40,6 @@ static int mmc_prep_request(struct request_queue *q, struct request *req)
 	return BLKPREP_OK;
 }
 
-struct mmc_queue_req *mmc_queue_req_find(struct mmc_queue *mq,
-					 struct request *req)
-{
-	struct mmc_queue_req *mqrq;
-	int i = ffz(mq->qslots);
-
-	if (i >= mq->qdepth)
-		return NULL;
-
-	mqrq = &mq->mqrq[i];
-	WARN_ON(mqrq->req || mq->qcnt >= mq->qdepth ||
-		test_bit(mqrq->task_id, &mq->qslots));
-	mqrq->req = req;
-	mq->qcnt += 1;
-	__set_bit(mqrq->task_id, &mq->qslots);
-
-	return mqrq;
-}
-
-void mmc_queue_req_free(struct mmc_queue *mq,
-			struct mmc_queue_req *mqrq)
-{
-	WARN_ON(!mqrq->req || mq->qcnt < 1 ||
-		!test_bit(mqrq->task_id, &mq->qslots));
-	mqrq->req = NULL;
-	mq->qcnt -= 1;
-	__clear_bit(mqrq->task_id, &mq->qslots);
-}
-
 static int mmc_queue_thread(void *d)
 {
 	struct mmc_queue *mq = d;
@@ -149,11 +120,11 @@ static void mmc_request_fn(struct request_queue *q)
 		wake_up_process(mq->thread);
 }
 
-static struct scatterlist *mmc_alloc_sg(int sg_len)
+static struct scatterlist *mmc_alloc_sg(int sg_len, gfp_t gfp)
 {
 	struct scatterlist *sg;
 
-	sg = kmalloc_array(sg_len, sizeof(*sg), GFP_KERNEL);
+	sg = kmalloc_array(sg_len, sizeof(*sg), gfp);
 	if (sg)
 		sg_init_table(sg, sg_len);
 
@@ -179,80 +150,6 @@ static void mmc_queue_setup_discard(struct request_queue *q,
 		queue_flag_set_unlocked(QUEUE_FLAG_SECERASE, q);
 }
 
-static void mmc_queue_req_free_bufs(struct mmc_queue_req *mqrq)
-{
-	kfree(mqrq->bounce_sg);
-	mqrq->bounce_sg = NULL;
-
-	kfree(mqrq->sg);
-	mqrq->sg = NULL;
-
-	kfree(mqrq->bounce_buf);
-	mqrq->bounce_buf = NULL;
-}
-
-static void mmc_queue_reqs_free_bufs(struct mmc_queue_req *mqrq, int qdepth)
-{
-	int i;
-
-	for (i = 0; i < qdepth; i++)
-		mmc_queue_req_free_bufs(&mqrq[i]);
-}
-
-static void mmc_queue_free_mqrqs(struct mmc_queue_req *mqrq, int qdepth)
-{
-	mmc_queue_reqs_free_bufs(mqrq, qdepth);
-	kfree(mqrq);
-}
-
-static struct mmc_queue_req *mmc_queue_alloc_mqrqs(int qdepth)
-{
-	struct mmc_queue_req *mqrq;
-	int i;
-
-	mqrq = kcalloc(qdepth, sizeof(*mqrq), GFP_KERNEL);
-	if (mqrq) {
-		for (i = 0; i < qdepth; i++)
-			mqrq[i].task_id = i;
-	}
-
-	return mqrq;
-}
-
-static int mmc_queue_alloc_bounce_bufs(struct mmc_queue_req *mqrq, int qdepth,
-				       unsigned int bouncesz)
-{
-	int i;
-
-	for (i = 0; i < qdepth; i++) {
-		mqrq[i].bounce_buf = kmalloc(bouncesz, GFP_KERNEL);
-		if (!mqrq[i].bounce_buf)
-			return -ENOMEM;
-
-		mqrq[i].sg = mmc_alloc_sg(1);
-		if (!mqrq[i].sg)
-			return -ENOMEM;
-
-		mqrq[i].bounce_sg = mmc_alloc_sg(bouncesz / 512);
-		if (!mqrq[i].bounce_sg)
-			return -ENOMEM;
-	}
-
-	return 0;
-}
-
-static bool mmc_queue_alloc_bounce(struct mmc_queue_req *mqrq, int qdepth,
-				   unsigned int bouncesz)
-{
-	int ret;
-
-	ret = mmc_queue_alloc_bounce_bufs(mqrq, qdepth, bouncesz);
-	if (ret)
-		mmc_queue_reqs_free_bufs(mqrq, qdepth);
-
-	return !ret;
-}
-
 static unsigned int mmc_queue_calc_bouncesz(struct mmc_host *host)
 {
 	unsigned int bouncesz = MMC_QUEUE_BOUNCESZ;
@@ -273,71 +170,61 @@ static unsigned int mmc_queue_calc_bouncesz(struct mmc_host *host)
 	return bouncesz;
 }
 
-static int mmc_queue_alloc_sgs(struct mmc_queue_req *mqrq, int qdepth,
-			       int max_segs)
+/**
+ * mmc_init_request() - initialize the MMC-specific per-request data
+ * @q: the request queue
+ * @req: the request
+ * @gfp: memory allocation policy
+ */
+static int mmc_init_request(struct request_queue *q, struct request *req,
+			    gfp_t gfp)
 {
-	int i;
+	struct mmc_queue_req *mq_rq = req_to_mmc_queue_req(req);
+	struct mmc_queue *mq = q->queuedata;
+	struct mmc_card *card = mq->card;
+	struct mmc_host *host = card->host;
 
-	for (i = 0; i < qdepth; i++) {
-		mqrq[i].sg = mmc_alloc_sg(max_segs);
-		if (!mqrq[i].sg)
+	mq_rq->req = req;
+
+	if (card->bouncesz) {
+		mq_rq->bounce_buf = kmalloc(card->bouncesz, gfp);
+		if (!mq_rq->bounce_buf)
+			return -ENOMEM;
+		if (card->bouncesz > 512) {
+			mq_rq->sg = mmc_alloc_sg(1, gfp);
+			if (!mq_rq->sg)
+				return -ENOMEM;
+			mq_rq->bounce_sg = mmc_alloc_sg(card->bouncesz / 512,
+							gfp);
+			if (!mq_rq->bounce_sg)
+				return -ENOMEM;
+		}
+	} else {
+		mq_rq->bounce_buf = NULL;
+		mq_rq->bounce_sg = NULL;
+		mq_rq->sg = mmc_alloc_sg(host->max_segs, gfp);
+		if (!mq_rq->sg)
 			return -ENOMEM;
 	}
 
 	return 0;
 }
 
-void mmc_queue_free_shared_queue(struct mmc_card *card)
-{
-	if (card->mqrq) {
-		mmc_queue_free_mqrqs(card->mqrq, card->qdepth);
-		card->mqrq = NULL;
-	}
-}
-
-static int __mmc_queue_alloc_shared_queue(struct mmc_card *card, int qdepth)
+static void mmc_exit_request(struct request_queue *q, struct request *req)
 {
-	struct mmc_host *host = card->host;
-	struct mmc_queue_req *mqrq;
-	unsigned int bouncesz;
-	int ret = 0;
-
-	if (card->mqrq)
-		return -EINVAL;
+	struct mmc_queue_req *mq_rq = req_to_mmc_queue_req(req);
 
-	mqrq = mmc_queue_alloc_mqrqs(qdepth);
-	if (!mqrq)
-		return -ENOMEM;
-
-	card->mqrq = mqrq;
-	card->qdepth = qdepth;
+	/* It is OK to kfree(NULL) so this will be smooth */
+	kfree(mq_rq->bounce_sg);
+	mq_rq->bounce_sg = NULL;
 
-	bouncesz = mmc_queue_calc_bouncesz(host);
+	kfree(mq_rq->bounce_buf);
+	mq_rq->bounce_buf = NULL;
 
-	if (bouncesz && !mmc_queue_alloc_bounce(mqrq, qdepth, bouncesz)) {
-		bouncesz = 0;
-		pr_warn("%s: unable to allocate bounce buffers\n",
-			mmc_card_name(card));
-	}
+	kfree(mq_rq->sg);
+	mq_rq->sg = NULL;
 
-	card->bouncesz = bouncesz;
-
-	if (!bouncesz) {
-		ret = mmc_queue_alloc_sgs(mqrq, qdepth, host->max_segs);
-		if (ret)
-			goto out_err;
-	}
-
-	return ret;
-
-out_err:
-	mmc_queue_free_shared_queue(card);
-	return ret;
-}
-
-int mmc_queue_alloc_shared_queue(struct mmc_card *card)
-{
-	return __mmc_queue_alloc_shared_queue(card, 2);
+	mq_rq->req = NULL;
 }
 
 /**
@@ -360,13 +247,21 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card,
 		limit = (u64)dma_max_pfn(mmc_dev(host)) << PAGE_SHIFT;
 
 	mq->card = card;
-	mq->queue = blk_init_queue(mmc_request_fn, lock);
+	mq->queue = blk_alloc_queue(GFP_KERNEL);
 	if (!mq->queue)
 		return -ENOMEM;
-
-	mq->mqrq = card->mqrq;
-	mq->qdepth = card->qdepth;
+	mq->queue->queue_lock = lock;
+	mq->queue->request_fn = mmc_request_fn;
+	mq->queue->init_rq_fn = mmc_init_request;
+	mq->queue->exit_rq_fn = mmc_exit_request;
+	mq->queue->cmd_size = sizeof(struct mmc_queue_req);
 	mq->queue->queuedata = mq;
+	mq->qcnt = 0;
+	ret = blk_init_allocated_queue(mq->queue);
+	if (ret) {
+		blk_cleanup_queue(mq->queue);
+		return ret;
+	}
 
 	blk_queue_prep_rq(mq->queue, mmc_prep_request);
 	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, mq->queue);
@@ -374,6 +269,7 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card,
 	if (mmc_can_erase(card))
 		mmc_queue_setup_discard(mq->queue, card);
 
+	card->bouncesz = mmc_queue_calc_bouncesz(host);
 	if (card->bouncesz) {
 		blk_queue_bounce_limit(mq->queue, BLK_BOUNCE_ANY);
 		blk_queue_max_hw_sectors(mq->queue, card->bouncesz / 512);
@@ -400,7 +296,6 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card,
 	return 0;
 
 cleanup_queue:
-	mq->mqrq = NULL;
 	blk_cleanup_queue(mq->queue);
 	return ret;
 }
@@ -422,7 +317,6 @@ void mmc_cleanup_queue(struct mmc_queue *mq)
 	blk_start_queue(q);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 
-	mq->mqrq = NULL;
 	mq->card = NULL;
 }
 EXPORT_SYMBOL(mmc_cleanup_queue);
diff --git a/drivers/mmc/core/queue.h b/drivers/mmc/core/queue.h
index 871796c3f406..dae31bc0c2d3 100644
--- a/drivers/mmc/core/queue.h
+++ b/drivers/mmc/core/queue.h
@@ -3,9 +3,15 @@
 
 #include <linux/types.h>
 #include <linux/blkdev.h>
+#include <linux/blk-mq.h>
 #include <linux/mmc/core.h>
 #include <linux/mmc/host.h>
 
+static inline struct mmc_queue_req *req_to_mmc_queue_req(struct request *rq)
+{
+	return blk_mq_rq_to_pdu(rq);
+}
+
 static inline bool mmc_req_is_special(struct request *req)
 {
 	return req &&
@@ -34,7 +40,6 @@ struct mmc_queue_req {
 	struct scatterlist	*bounce_sg;
 	unsigned int		bounce_sg_len;
 	struct mmc_async_req	areq;
-	int			task_id;
 };
 
 struct mmc_queue {
@@ -45,14 +50,15 @@ struct mmc_queue {
 	bool			asleep;
 	struct mmc_blk_data	*blkdata;
 	struct request_queue	*queue;
-	struct mmc_queue_req	*mqrq;
-	int			qdepth;
+	/*
+	 * FIXME: this counter is not a very reliable way of keeping
+	 * track of how many requests that are ongoing. Switch to just
+	 * letting the block core keep track of requests and per-request
+	 * associated mmc_queue_req data.
+	 */
 	int			qcnt;
-	unsigned long		qslots;
 };
 
-extern int mmc_queue_alloc_shared_queue(struct mmc_card *card);
-extern void mmc_queue_free_shared_queue(struct mmc_card *card);
 extern int mmc_init_queue(struct mmc_queue *, struct mmc_card *, spinlock_t *,
 			  const char *);
 extern void mmc_cleanup_queue(struct mmc_queue *);
@@ -66,8 +72,4 @@ extern void mmc_queue_bounce_post(struct mmc_queue_req *);
 
 extern int mmc_access_rpmb(struct mmc_queue *);
 
-extern struct mmc_queue_req *mmc_queue_req_find(struct mmc_queue *,
-						struct request *);
-extern void mmc_queue_req_free(struct mmc_queue *, struct mmc_queue_req *);
-
 #endif
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index aad015e0152b..46c73e97e61f 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -305,9 +305,7 @@ struct mmc_card {
 	struct mmc_part	part[MMC_NUM_PHY_PARTITION]; /* physical partitions */
 	unsigned int    nr_parts;
 
-	struct mmc_queue_req	*mqrq;		/* Shared queue structure */
 	unsigned int		bouncesz;	/* Bounce buffer size */
-	int			qdepth;		/* Shared queue depth */
 };
 
 static inline bool mmc_large_sector(struct mmc_card *card)
-- 
cgit v1.2.3


From d63c2bf49c0de83e88153da3af9970f68c633257 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Sun, 28 May 2017 11:30:47 +0200
Subject: mmc: use proper name for the R-Car SoC

It is 'R-Car', not 'RCar'. No code or binding changes, only descriptive text.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Acked-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/renesas_sdhi_core.c | 2 +-
 include/linux/mfd/tmio.h             | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c
index fa6c188e0327..82150a966391 100644
--- a/drivers/mmc/host/renesas_sdhi_core.c
+++ b/drivers/mmc/host/renesas_sdhi_core.c
@@ -130,7 +130,7 @@ static unsigned int renesas_sdhi_clk_update(struct tmio_mmc_host *host,
 	unsigned int freq, diff, best_freq = 0, diff_min = ~0;
 	int i, ret;
 
-	/* tested only on RCar Gen2+ currently; may work for others */
+	/* tested only on R-Car Gen2+ currently; may work for others */
 	if (!(host->pdata->flags & TMIO_MMC_MIN_RCAR2))
 		return clk_get_rate(priv->clk);
 
diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h
index a1520d88ebf3..c83c16b931a8 100644
--- a/include/linux/mfd/tmio.h
+++ b/include/linux/mfd/tmio.h
@@ -66,7 +66,7 @@
  */
 #define TMIO_MMC_SDIO_IRQ		(1 << 2)
 
-/* Some features are only available or tested on RCar Gen2 or later */
+/* Some features are only available or tested on R-Car Gen2 or later */
 #define TMIO_MMC_MIN_RCAR2		(1 << 3)
 
 /*
-- 
cgit v1.2.3


From d2a47176a877b1eccd3086a4c8d790d644d594cb Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Thu, 8 Jun 2017 15:23:08 +0200
Subject: mmc: core: Remove MMC_CAP2_HC_ERASE_SZ

The MMC_CAP2_HC_ERASE_SZ is used only by a few mmc host drivers. Its intent
is to enable eMMC's high-capacity erase size, as to improve the behaviour
of the erase operations.

We should strive to avoid software configuration options that aren't
necessary, but instead deploy common behaviours. For these reasons, let's
remove the capability bit for MMC_CAP2_HC_ERASE_SZ and make it the default
behaviour.

Note that this change doesn't affect eMMCs supporting trim/discard, because
these commands operates on sectors and takes precedence over erase
commands.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Shawn Lin <shawn.lin@rock-chips.com>
Tested-by: Shawn Lin <shawn.lin@rock-chips.com>
---
 drivers/mmc/core/mmc.c            | 8 ++------
 drivers/mmc/host/sdhci-acpi.c     | 1 -
 drivers/mmc/host/sdhci-brcmstb.c  | 3 ---
 drivers/mmc/host/sdhci-pci-core.c | 4 +---
 include/linux/mmc/host.h          | 1 -
 5 files changed, 3 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index e504b66bd41c..4ffea14b7eb6 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -1651,12 +1651,8 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 		mmc_set_erase_size(card);
 	}
 
-	/*
-	 * If enhanced_area_en is TRUE, host needs to enable ERASE_GRP_DEF
-	 * bit.  This bit will be lost every time after a reset or power off.
-	 */
-	if (card->ext_csd.partition_setting_completed ||
-	    (card->ext_csd.rev >= 3 && (host->caps2 & MMC_CAP2_HC_ERASE_SZ))) {
+	/* Enable ERASE_GRP_DEF. This bit is lost after a reset or power off. */
+	if (card->ext_csd.rev >= 3) {
 		err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
 				 EXT_CSD_ERASE_GROUP_DEF, 1,
 				 card->ext_csd.generic_cmd6_time);
diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c
index 89d9a8c014f5..cf66a3db71b8 100644
--- a/drivers/mmc/host/sdhci-acpi.c
+++ b/drivers/mmc/host/sdhci-acpi.c
@@ -274,7 +274,6 @@ static const struct sdhci_acpi_slot sdhci_acpi_slot_int_emmc = {
 	.caps    = MMC_CAP_8_BIT_DATA | MMC_CAP_NONREMOVABLE |
 		   MMC_CAP_HW_RESET | MMC_CAP_1_8V_DDR |
 		   MMC_CAP_CMD_DURING_TFR | MMC_CAP_WAIT_WHILE_BUSY,
-	.caps2   = MMC_CAP2_HC_ERASE_SZ,
 	.flags   = SDHCI_ACPI_RUNTIME_PM,
 	.quirks  = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC,
 	.quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN |
diff --git a/drivers/mmc/host/sdhci-brcmstb.c b/drivers/mmc/host/sdhci-brcmstb.c
index 242c5dc7a81e..e2f638338e8f 100644
--- a/drivers/mmc/host/sdhci-brcmstb.c
+++ b/drivers/mmc/host/sdhci-brcmstb.c
@@ -89,9 +89,6 @@ static int sdhci_brcmstb_probe(struct platform_device *pdev)
 		goto err_clk;
 	}
 
-	/* Enable MMC_CAP2_HC_ERASE_SZ for better max discard calculations */
-	host->mmc->caps2 |= MMC_CAP2_HC_ERASE_SZ;
-
 	sdhci_get_of_property(pdev);
 	mmc_of_parse(host->mmc);
 
diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c
index 8fa84a013be4..227a5cb4b3bf 100644
--- a/drivers/mmc/host/sdhci-pci-core.c
+++ b/drivers/mmc/host/sdhci-pci-core.c
@@ -347,8 +347,7 @@ static inline void sdhci_pci_remove_own_cd(struct sdhci_pci_slot *slot)
 static int mfd_emmc_probe_slot(struct sdhci_pci_slot *slot)
 {
 	slot->host->mmc->caps |= MMC_CAP_8_BIT_DATA | MMC_CAP_NONREMOVABLE;
-	slot->host->mmc->caps2 |= MMC_CAP2_BOOTPART_NOACC |
-				  MMC_CAP2_HC_ERASE_SZ;
+	slot->host->mmc->caps2 |= MMC_CAP2_BOOTPART_NOACC;
 	return 0;
 }
 
@@ -587,7 +586,6 @@ static int byt_emmc_probe_slot(struct sdhci_pci_slot *slot)
 				 MMC_CAP_HW_RESET | MMC_CAP_1_8V_DDR |
 				 MMC_CAP_CMD_DURING_TFR |
 				 MMC_CAP_WAIT_WHILE_BUSY;
-	slot->host->mmc->caps2 |= MMC_CAP2_HC_ERASE_SZ;
 	slot->hw_reset = sdhci_pci_int_hw_reset;
 	if (slot->chip->pdev->device == PCI_DEVICE_ID_INTEL_BSW_EMMC)
 		slot->host->timeout_clk = 1000; /* 1000 kHz i.e. 1 MHz */
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 9209f95a5106..c81380a2181f 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -287,7 +287,6 @@ struct mmc_host {
 #define MMC_CAP2_HS200_1_2V_SDR	(1 << 6)        /* can support */
 #define MMC_CAP2_HS200		(MMC_CAP2_HS200_1_8V_SDR | \
 				 MMC_CAP2_HS200_1_2V_SDR)
-#define MMC_CAP2_HC_ERASE_SZ	(1 << 9)	/* High-capacity erase size */
 #define MMC_CAP2_CD_ACTIVE_HIGH	(1 << 10)	/* Card-detect signal active high */
 #define MMC_CAP2_RO_ACTIVE_HIGH	(1 << 11)	/* Write-protect signal active high */
 #define MMC_CAP2_PACKED_RD	(1 << 12)	/* Allow packed read */
-- 
cgit v1.2.3


From 03dbaa04a2e5bac0ae907a9ed31472bc4bb56fd3 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Tue, 13 Jun 2017 15:07:51 +0300
Subject: mmc: slot-gpio: Add support to enable irq wake on cd_irq

Add host capability MMC_CAP_CD_WAKE to enable irq wake on the card detect
irq.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/core.c      | 5 ++++-
 drivers/mmc/core/slot-gpio.c | 2 ++
 include/linux/mmc/host.h     | 2 ++
 3 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index d40697fae911..26431267a3e2 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -2652,8 +2652,11 @@ void mmc_stop_host(struct mmc_host *host)
 	host->removed = 1;
 	spin_unlock_irqrestore(&host->lock, flags);
 #endif
-	if (host->slot.cd_irq >= 0)
+	if (host->slot.cd_irq >= 0) {
+		if (host->slot.cd_wake_enabled)
+			disable_irq_wake(host->slot.cd_irq);
 		disable_irq(host->slot.cd_irq);
+	}
 
 	host->rescan_disable = 1;
 	cancel_delayed_work_sync(&host->detect);
diff --git a/drivers/mmc/core/slot-gpio.c b/drivers/mmc/core/slot-gpio.c
index a8450a8701e4..863f1dbbfc1b 100644
--- a/drivers/mmc/core/slot-gpio.c
+++ b/drivers/mmc/core/slot-gpio.c
@@ -151,6 +151,8 @@ void mmc_gpiod_request_cd_irq(struct mmc_host *host)
 
 	if (irq < 0)
 		host->caps |= MMC_CAP_NEEDS_POLL;
+	else if ((host->caps & MMC_CAP_CD_WAKE) && !enable_irq_wake(irq))
+		host->slot.cd_wake_enabled = true;
 }
 EXPORT_SYMBOL(mmc_gpiod_request_cd_irq);
 
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index c81380a2181f..ebd1cebbef0c 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -185,6 +185,7 @@ struct mmc_async_req {
  */
 struct mmc_slot {
 	int cd_irq;
+	bool cd_wake_enabled;
 	void *handler_priv;
 };
 
@@ -275,6 +276,7 @@ struct mmc_host {
 #define MMC_CAP_DRIVER_TYPE_A	(1 << 23)	/* Host supports Driver Type A */
 #define MMC_CAP_DRIVER_TYPE_C	(1 << 24)	/* Host supports Driver Type C */
 #define MMC_CAP_DRIVER_TYPE_D	(1 << 25)	/* Host supports Driver Type D */
+#define MMC_CAP_CD_WAKE		(1 << 28)	/* Enable card detect wake */
 #define MMC_CAP_CMD_DURING_TFR	(1 << 29)	/* Commands during data transfer */
 #define MMC_CAP_CMD23		(1 << 30)	/* CMD23 supported. */
 #define MMC_CAP_HW_RESET	(1 << 31)	/* Hardware reset */
-- 
cgit v1.2.3


From f2218db81548544bf7349911546a94bfaabbd697 Mon Sep 17 00:00:00 2001
From: Simon Horman <horms+renesas@verge.net.au>
Date: Fri, 16 Jun 2017 18:11:03 +0200
Subject: mmc: tmio: improve checkpatch cleanness

Trivial updates to improve checkpatch cleanness.

Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
Reviewed-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Tested-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/renesas_sdhi.h  |  2 +-
 drivers/mmc/host/tmio_mmc.c      |  8 ++---
 drivers/mmc/host/tmio_mmc.h      | 17 ++++++-----
 drivers/mmc/host/tmio_mmc_core.c | 66 +++++++++++++++++++++-------------------
 include/linux/mfd/tmio.h         | 33 ++++++++++----------
 5 files changed, 66 insertions(+), 60 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/renesas_sdhi.h b/drivers/mmc/host/renesas_sdhi.h
index eb3ea15ff92d..6b4a79508c6b 100644
--- a/drivers/mmc/host/renesas_sdhi.h
+++ b/drivers/mmc/host/renesas_sdhi.h
@@ -27,7 +27,7 @@ struct renesas_sdhi_of_data {
 	unsigned long capabilities2;
 	enum dma_slave_buswidth dma_buswidth;
 	dma_addr_t dma_rx_offset;
-	unsigned bus_shift;
+	unsigned int bus_shift;
 	int scc_offset;
 	struct renesas_sdhi_scc *taps;
 	int taps_num;
diff --git a/drivers/mmc/host/tmio_mmc.c b/drivers/mmc/host/tmio_mmc.c
index 61cf36fb270b..64b7e9f18361 100644
--- a/drivers/mmc/host/tmio_mmc.c
+++ b/drivers/mmc/host/tmio_mmc.c
@@ -104,8 +104,8 @@ static int tmio_mmc_probe(struct platform_device *pdev)
 		goto host_free;
 
 	ret = devm_request_irq(&pdev->dev, irq, tmio_mmc_irq,
-				IRQF_TRIGGER_FALLING,
-				dev_name(&pdev->dev), host);
+			       IRQF_TRIGGER_FALLING,
+			       dev_name(&pdev->dev), host);
 	if (ret)
 		goto host_remove;
 
@@ -132,6 +132,7 @@ static int tmio_mmc_remove(struct platform_device *pdev)
 
 	if (mmc) {
 		struct tmio_mmc_host *host = mmc_priv(mmc);
+
 		tmio_mmc_host_remove(host);
 		if (cell->disable)
 			cell->disable(pdev);
@@ -145,8 +146,7 @@ static int tmio_mmc_remove(struct platform_device *pdev)
 static const struct dev_pm_ops tmio_mmc_dev_pm_ops = {
 	SET_SYSTEM_SLEEP_PM_OPS(tmio_mmc_suspend, tmio_mmc_resume)
 	SET_RUNTIME_PM_OPS(tmio_mmc_host_runtime_suspend,
-			tmio_mmc_host_runtime_resume,
-			NULL)
+			   tmio_mmc_host_runtime_resume, NULL)
 };
 
 static struct platform_driver tmio_mmc_driver = {
diff --git a/drivers/mmc/host/tmio_mmc.h b/drivers/mmc/host/tmio_mmc.h
index 768c8abaedda..6ad6704175dc 100644
--- a/drivers/mmc/host/tmio_mmc.h
+++ b/drivers/mmc/host/tmio_mmc.h
@@ -239,24 +239,26 @@ static inline u16 sd_ctrl_read16(struct tmio_mmc_host *host, int addr)
 }
 
 static inline void sd_ctrl_read16_rep(struct tmio_mmc_host *host, int addr,
-		u16 *buf, int count)
+				      u16 *buf, int count)
 {
 	readsw(host->ctl + (addr << host->bus_shift), buf, count);
 }
 
-static inline u32 sd_ctrl_read16_and_16_as_32(struct tmio_mmc_host *host, int addr)
+static inline u32 sd_ctrl_read16_and_16_as_32(struct tmio_mmc_host *host,
+					      int addr)
 {
 	return readw(host->ctl + (addr << host->bus_shift)) |
 	       readw(host->ctl + ((addr + 2) << host->bus_shift)) << 16;
 }
 
 static inline void sd_ctrl_read32_rep(struct tmio_mmc_host *host, int addr,
-		u32 *buf, int count)
+				      u32 *buf, int count)
 {
 	readsl(host->ctl + (addr << host->bus_shift), buf, count);
 }
 
-static inline void sd_ctrl_write16(struct tmio_mmc_host *host, int addr, u16 val)
+static inline void sd_ctrl_write16(struct tmio_mmc_host *host, int addr,
+				   u16 val)
 {
 	/* If there is a hook and it returns non-zero then there
 	 * is an error and the write should be skipped
@@ -267,19 +269,20 @@ static inline void sd_ctrl_write16(struct tmio_mmc_host *host, int addr, u16 val
 }
 
 static inline void sd_ctrl_write16_rep(struct tmio_mmc_host *host, int addr,
-		u16 *buf, int count)
+				       u16 *buf, int count)
 {
 	writesw(host->ctl + (addr << host->bus_shift), buf, count);
 }
 
-static inline void sd_ctrl_write32_as_16_and_16(struct tmio_mmc_host *host, int addr, u32 val)
+static inline void sd_ctrl_write32_as_16_and_16(struct tmio_mmc_host *host,
+						int addr, u32 val)
 {
 	writew(val & 0xffff, host->ctl + (addr << host->bus_shift));
 	writew(val >> 16, host->ctl + ((addr + 2) << host->bus_shift));
 }
 
 static inline void sd_ctrl_write32_rep(struct tmio_mmc_host *host, int addr,
-		const u32 *buf, int count)
+				       const u32 *buf, int count)
 {
 	writesl(host->ctl + (addr << host->bus_shift), buf, count);
 }
diff --git a/drivers/mmc/host/tmio_mmc_core.c b/drivers/mmc/host/tmio_mmc_core.c
index fbe38464e7d7..82b80d42f7ae 100644
--- a/drivers/mmc/host/tmio_mmc_core.c
+++ b/drivers/mmc/host/tmio_mmc_core.c
@@ -127,16 +127,17 @@ static int tmio_mmc_next_sg(struct tmio_mmc_host *host)
 
 #define STATUS_TO_TEXT(a, status, i) \
 	do { \
-		if (status & TMIO_STAT_##a) { \
-			if (i++) \
-				printk(" | "); \
-			printk(#a); \
+		if ((status) & TMIO_STAT_##a) { \
+			if ((i)++) \
+				printk(KERN_DEBUG " | "); \
+			printk(KERN_DEBUG #a); \
 		} \
 	} while (0)
 
 static void pr_debug_status(u32 status)
 {
 	int i = 0;
+
 	pr_debug("status: %08x = ", status);
 	STATUS_TO_TEXT(CARD_REMOVE, status, i);
 	STATUS_TO_TEXT(CARD_INSERT, status, i);
@@ -177,8 +178,7 @@ static void tmio_mmc_enable_sdio_irq(struct mmc_host *mmc, int enable)
 		pm_runtime_get_sync(mmc_dev(mmc));
 
 		host->sdio_irq_enabled = true;
-		host->sdio_irq_mask = TMIO_SDIO_MASK_ALL &
-					~TMIO_SDIO_STAT_IOIRQ;
+		host->sdio_irq_mask = TMIO_SDIO_MASK_ALL & ~TMIO_SDIO_STAT_IOIRQ;
 
 		/* Clear obsolete interrupts before enabling */
 		sdio_status = sd_ctrl_read16(host, CTL_SDIO_STATUS) & ~TMIO_SDIO_MASK_ALL;
@@ -222,7 +222,7 @@ static void tmio_mmc_clk_stop(struct tmio_mmc_host *host)
 }
 
 static void tmio_mmc_set_clock(struct tmio_mmc_host *host,
-				unsigned int new_clock)
+			       unsigned int new_clock)
 {
 	u32 clk = 0, clock;
 
@@ -289,16 +289,16 @@ static void tmio_mmc_reset_work(struct work_struct *work)
 	 * cancel_delayed_work(), it can happen, that a .set_ios() call preempts
 	 * us, so, have to check for IS_ERR(host->mrq)
 	 */
-	if (IS_ERR_OR_NULL(mrq)
-	    || time_is_after_jiffies(host->last_req_ts +
-		msecs_to_jiffies(CMDREQ_TIMEOUT))) {
+	if (IS_ERR_OR_NULL(mrq) ||
+	    time_is_after_jiffies(host->last_req_ts +
+				  msecs_to_jiffies(CMDREQ_TIMEOUT))) {
 		spin_unlock_irqrestore(&host->lock, flags);
 		return;
 	}
 
 	dev_warn(&host->pdev->dev,
-		"timeout waiting for hardware interrupt (CMD%u)\n",
-		mrq->cmd->opcode);
+		 "timeout waiting for hardware interrupt (CMD%u)\n",
+		 mrq->cmd->opcode);
 
 	if (host->data)
 		host->data->error = -ETIMEDOUT;
@@ -336,7 +336,8 @@ static void tmio_mmc_reset_work(struct work_struct *work)
 #define SECURITY_CMD   0x4000
 #define NO_CMD12_ISSUE 0x4000 /* TMIO_MMC_HAVE_CMD12_CTRL */
 
-static int tmio_mmc_start_command(struct tmio_mmc_host *host, struct mmc_command *cmd)
+static int tmio_mmc_start_command(struct tmio_mmc_host *host,
+				  struct mmc_command *cmd)
 {
 	struct mmc_data *data = host->data;
 	int c = cmd->opcode;
@@ -375,8 +376,8 @@ static int tmio_mmc_start_command(struct tmio_mmc_host *host, struct mmc_command
 			c |= TRANSFER_MULTI;
 
 			/*
-			 * Disable auto CMD12 at IO_RW_EXTENDED and SET_BLOCK_COUNT
-			 * when doing multiple block transfer
+			 * Disable auto CMD12 at IO_RW_EXTENDED and
+			 * SET_BLOCK_COUNT when doing multiple block transfer
 			 */
 			if ((host->pdata->flags & TMIO_MMC_HAVE_CMD12_CTRL) &&
 			    (cmd->opcode == SD_IO_RW_EXTENDED || host->mrq->sbc))
@@ -501,8 +502,6 @@ static void tmio_mmc_pio_irq(struct tmio_mmc_host *host)
 
 	if (host->sg_off == host->sg_ptr->length)
 		tmio_mmc_next_sg(host);
-
-	return;
 }
 
 static void tmio_mmc_check_bounce_buffer(struct tmio_mmc_host *host)
@@ -510,6 +509,7 @@ static void tmio_mmc_check_bounce_buffer(struct tmio_mmc_host *host)
 	if (host->sg_ptr == &host->bounce_sg) {
 		unsigned long flags;
 		void *sg_vaddr = tmio_mmc_kmap_atomic(host->sg_orig, &flags);
+
 		memcpy(sg_vaddr, host->bounce_buf, host->bounce_sg.length);
 		tmio_mmc_kunmap_atomic(host->sg_orig, &flags, sg_vaddr);
 	}
@@ -574,6 +574,7 @@ EXPORT_SYMBOL_GPL(tmio_mmc_do_data_irq);
 static void tmio_mmc_data_irq(struct tmio_mmc_host *host, unsigned int stat)
 {
 	struct mmc_data *data;
+
 	spin_lock(&host->lock);
 	data = host->data;
 
@@ -618,8 +619,7 @@ out:
 	spin_unlock(&host->lock);
 }
 
-static void tmio_mmc_cmd_irq(struct tmio_mmc_host *host,
-	unsigned int stat)
+static void tmio_mmc_cmd_irq(struct tmio_mmc_host *host, unsigned int stat)
 {
 	struct mmc_command *cmd = host->cmd;
 	int i, addr;
@@ -680,7 +680,7 @@ out:
 }
 
 static bool __tmio_mmc_card_detect_irq(struct tmio_mmc_host *host,
-				      int ireg, int status)
+				       int ireg, int status)
 {
 	struct mmc_host *mmc = host->mmc;
 
@@ -698,14 +698,13 @@ static bool __tmio_mmc_card_detect_irq(struct tmio_mmc_host *host,
 	return false;
 }
 
-static bool __tmio_mmc_sdcard_irq(struct tmio_mmc_host *host,
-				 int ireg, int status)
+static bool __tmio_mmc_sdcard_irq(struct tmio_mmc_host *host, int ireg,
+				  int status)
 {
 	/* Command completion */
 	if (ireg & (TMIO_STAT_CMDRESPEND | TMIO_STAT_CMDTIMEOUT)) {
-		tmio_mmc_ack_mmc_irqs(host,
-			     TMIO_STAT_CMDRESPEND |
-			     TMIO_STAT_CMDTIMEOUT);
+		tmio_mmc_ack_mmc_irqs(host, TMIO_STAT_CMDRESPEND |
+				      TMIO_STAT_CMDTIMEOUT);
 		tmio_mmc_cmd_irq(host, status);
 		return true;
 	}
@@ -776,7 +775,7 @@ irqreturn_t tmio_mmc_irq(int irq, void *devid)
 EXPORT_SYMBOL_GPL(tmio_mmc_irq);
 
 static int tmio_mmc_start_data(struct tmio_mmc_host *host,
-	struct mmc_data *data)
+			       struct mmc_data *data)
 {
 	struct tmio_mmc_data *pdata = host->pdata;
 
@@ -831,7 +830,7 @@ static int tmio_mmc_execute_tuning(struct mmc_host *mmc, u32 opcode)
 
 	if (host->tap_num * 2 >= sizeof(host->taps) * BITS_PER_BYTE) {
 		dev_warn_once(&host->pdev->dev,
-		      "Too many taps, skipping tuning. Please consider updating size of taps field of tmio_mmc_host\n");
+			"Too many taps, skipping tuning. Please consider updating size of taps field of tmio_mmc_host\n");
 		goto out;
 	}
 
@@ -862,7 +861,8 @@ out:
 	return ret;
 }
 
-static void tmio_process_mrq(struct tmio_mmc_host *host, struct mmc_request *mrq)
+static void tmio_process_mrq(struct tmio_mmc_host *host,
+			     struct mmc_request *mrq)
 {
 	struct mmc_command *cmd;
 	int ret;
@@ -1030,7 +1030,7 @@ static void tmio_mmc_power_off(struct tmio_mmc_host *host)
 }
 
 static void tmio_mmc_set_bus_width(struct tmio_mmc_host *host,
-				unsigned char bus_width)
+				   unsigned char bus_width)
 {
 	u16 reg = sd_ctrl_read16(host, CTL_SD_MEM_CARD_OPT)
 				& ~(CARD_OPT_WIDTH | CARD_OPT_WIDTH8);
@@ -1070,7 +1070,8 @@ static void tmio_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 			dev_dbg(dev,
 				"%s.%d: CMD%u active since %lu, now %lu!\n",
 				current->comm, task_pid_nr(current),
-				host->mrq->cmd->opcode, host->last_req_ts, jiffies);
+				host->mrq->cmd->opcode, host->last_req_ts,
+				jiffies);
 		}
 		spin_unlock_irqrestore(&host->lock, flags);
 
@@ -1117,6 +1118,7 @@ static int tmio_mmc_get_ro(struct mmc_host *mmc)
 	struct tmio_mmc_host *host = mmc_priv(mmc);
 	struct tmio_mmc_data *pdata = host->pdata;
 	int ret = mmc_gpio_get_ro(mmc);
+
 	if (ret >= 0)
 		return ret;
 
@@ -1173,6 +1175,7 @@ static void tmio_mmc_of_parse(struct platform_device *pdev,
 			      struct tmio_mmc_data *pdata)
 {
 	const struct device_node *np = pdev->dev.of_node;
+
 	if (!np)
 		return;
 
@@ -1243,7 +1246,8 @@ int tmio_mmc_host_probe(struct tmio_mmc_host *_host,
 		return -ENOMEM;
 
 	tmio_mmc_ops.card_busy = _host->card_busy;
-	tmio_mmc_ops.start_signal_voltage_switch = _host->start_signal_voltage_switch;
+	tmio_mmc_ops.start_signal_voltage_switch =
+		_host->start_signal_voltage_switch;
 	mmc->ops = &tmio_mmc_ops;
 
 	mmc->caps |= MMC_CAP_4_BIT_DATA | pdata->capabilities;
diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h
index c83c16b931a8..26e8f8c0a6db 100644
--- a/include/linux/mfd/tmio.h
+++ b/include/linux/mfd/tmio.h
@@ -13,15 +13,15 @@
 #define tmio_ioread16(addr) readw(addr)
 #define tmio_ioread16_rep(r, b, l) readsw(r, b, l)
 #define tmio_ioread32(addr) \
-	(((u32) readw((addr))) | (((u32) readw((addr) + 2)) << 16))
+	(((u32)readw((addr))) | (((u32)readw((addr) + 2)) << 16))
 
 #define tmio_iowrite8(val, addr) writeb((val), (addr))
 #define tmio_iowrite16(val, addr) writew((val), (addr))
 #define tmio_iowrite16_rep(r, b, l) writesw(r, b, l)
 #define tmio_iowrite32(val, addr) \
 	do { \
-	writew((val),       (addr)); \
-	writew((val) >> 16, (addr) + 2); \
+		writew((val),       (addr)); \
+		writew((val) >> 16, (addr) + 2); \
 	} while (0)
 
 #define CNF_CMD     0x04
@@ -55,57 +55,57 @@
 	} while (0)
 
 /* tmio MMC platform flags */
-#define TMIO_MMC_WRPROTECT_DISABLE	(1 << 0)
+#define TMIO_MMC_WRPROTECT_DISABLE	BIT(0)
 /*
  * Some controllers can support a 2-byte block size when the bus width
  * is configured in 4-bit mode.
  */
-#define TMIO_MMC_BLKSZ_2BYTES		(1 << 1)
+#define TMIO_MMC_BLKSZ_2BYTES		BIT(1)
 /*
  * Some controllers can support SDIO IRQ signalling.
  */
-#define TMIO_MMC_SDIO_IRQ		(1 << 2)
+#define TMIO_MMC_SDIO_IRQ		BIT(2)
 
 /* Some features are only available or tested on R-Car Gen2 or later */
-#define TMIO_MMC_MIN_RCAR2		(1 << 3)
+#define TMIO_MMC_MIN_RCAR2		BIT(3)
 
 /*
  * Some controllers require waiting for the SD bus to become
  * idle before writing to some registers.
  */
-#define TMIO_MMC_HAS_IDLE_WAIT		(1 << 4)
+#define TMIO_MMC_HAS_IDLE_WAIT		BIT(4)
 /*
  * A GPIO is used for card hotplug detection. We need an extra flag for this,
  * because 0 is a valid GPIO number too, and requiring users to specify
  * cd_gpio < 0 to disable GPIO hotplug would break backwards compatibility.
  */
-#define TMIO_MMC_USE_GPIO_CD		(1 << 5)
+#define TMIO_MMC_USE_GPIO_CD		BIT(5)
 
 /*
  * Some controllers doesn't have over 0x100 register.
  * it is used to checking accessibility of
  * CTL_SD_CARD_CLK_CTL / CTL_CLK_AND_WAIT_CTL
  */
-#define TMIO_MMC_HAVE_HIGH_REG		(1 << 6)
+#define TMIO_MMC_HAVE_HIGH_REG		BIT(6)
 
 /*
  * Some controllers have CMD12 automatically
  * issue/non-issue register
  */
-#define TMIO_MMC_HAVE_CMD12_CTRL	(1 << 7)
+#define TMIO_MMC_HAVE_CMD12_CTRL	BIT(7)
 
 /* Controller has some SDIO status bits which must be 1 */
-#define TMIO_MMC_SDIO_STATUS_SETBITS	(1 << 8)
+#define TMIO_MMC_SDIO_STATUS_SETBITS	BIT(8)
 
 /*
  * Some controllers have a 32-bit wide data port register
  */
-#define TMIO_MMC_32BIT_DATA_PORT	(1 << 9)
+#define TMIO_MMC_32BIT_DATA_PORT	BIT(9)
 
 /*
  * Some controllers allows to set SDx actual clock
  */
-#define TMIO_MMC_CLK_ACTUAL		(1 << 10)
+#define TMIO_MMC_CLK_ACTUAL		BIT(10)
 
 int tmio_core_mmc_enable(void __iomem *cnf, int shift, unsigned long base);
 int tmio_core_mmc_resume(void __iomem *cnf, int shift, unsigned long base);
@@ -146,9 +146,9 @@ struct tmio_nand_data {
 
 struct tmio_fb_data {
 	int			(*lcd_set_power)(struct platform_device *fb_dev,
-								bool on);
+						 bool on);
 	int			(*lcd_mode)(struct platform_device *fb_dev,
-					const struct fb_videomode *mode);
+					    const struct fb_videomode *mode);
 	int			num_modes;
 	struct fb_videomode	*modes;
 
@@ -157,5 +157,4 @@ struct tmio_fb_data {
 	int			width;
 };
 
-
 #endif
-- 
cgit v1.2.3


From ceea5e3771ed2378668455fa21861bead7504df5 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Thu, 8 Jun 2017 16:44:20 -0700
Subject: time: Fix clock->read(clock) race around clocksource changes

In tests, which excercise switching of clocksources, a NULL
pointer dereference can be observed on AMR64 platforms in the
clocksource read() function:

u64 clocksource_mmio_readl_down(struct clocksource *c)
{
	return ~(u64)readl_relaxed(to_mmio_clksrc(c)->reg) & c->mask;
}

This is called from the core timekeeping code via:

	cycle_now = tkr->read(tkr->clock);

tkr->read is the cached tkr->clock->read() function pointer.
When the clocksource is changed then tkr->clock and tkr->read
are updated sequentially. The code above results in a sequential
load operation of tkr->read and tkr->clock as well.

If the store to tkr->clock hits between the loads of tkr->read
and tkr->clock, then the old read() function is called with the
new clock pointer. As a consequence the read() function
dereferences a different data structure and the resulting 'reg'
pointer can point anywhere including NULL.

This problem was introduced when the timekeeping code was
switched over to use struct tk_read_base. Before that, it was
theoretically possible as well when the compiler decided to
reload clock in the code sequence:

     now = tk->clock->read(tk->clock);

Add a helper function which avoids the issue by reading
tk_read_base->clock once into a local variable clk and then issue
the read function via clk->read(clk). This guarantees that the
read() function always gets the proper clocksource pointer handed
in.

Since there is now no use for the tkr.read pointer, this patch
also removes it, and to address stopping the fast timekeeper
during suspend/resume, it introduces a dummy clocksource to use
rather then just a dummy read function.

Signed-off-by: John Stultz <john.stultz@linaro.org>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Stephen Boyd <stephen.boyd@linaro.org>
Cc: stable <stable@vger.kernel.org>
Cc: Miroslav Lichvar <mlichvar@redhat.com>
Cc: Daniel Mentz <danielmentz@google.com>
Link: http://lkml.kernel.org/r/1496965462-20003-2-git-send-email-john.stultz@linaro.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/timekeeper_internal.h |  1 -
 kernel/time/timekeeping.c           | 52 +++++++++++++++++++++++++------------
 2 files changed, 36 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index 110f4532188c..e9834ada4d0c 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -29,7 +29,6 @@
  */
 struct tk_read_base {
 	struct clocksource	*clock;
-	u64			(*read)(struct clocksource *cs);
 	u64			mask;
 	u64			cycle_last;
 	u32			mult;
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 9652bc57fd09..eff94cb8e89e 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -118,6 +118,26 @@ static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
 	tk->offs_boot = ktime_add(tk->offs_boot, delta);
 }
 
+/*
+ * tk_clock_read - atomic clocksource read() helper
+ *
+ * This helper is necessary to use in the read paths because, while the
+ * seqlock ensures we don't return a bad value while structures are updated,
+ * it doesn't protect from potential crashes. There is the possibility that
+ * the tkr's clocksource may change between the read reference, and the
+ * clock reference passed to the read function.  This can cause crashes if
+ * the wrong clocksource is passed to the wrong read function.
+ * This isn't necessary to use when holding the timekeeper_lock or doing
+ * a read of the fast-timekeeper tkrs (which is protected by its own locking
+ * and update logic).
+ */
+static inline u64 tk_clock_read(struct tk_read_base *tkr)
+{
+	struct clocksource *clock = READ_ONCE(tkr->clock);
+
+	return clock->read(clock);
+}
+
 #ifdef CONFIG_DEBUG_TIMEKEEPING
 #define WARNING_FREQ (HZ*300) /* 5 minute rate-limiting */
 
@@ -175,7 +195,7 @@ static inline u64 timekeeping_get_delta(struct tk_read_base *tkr)
 	 */
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
-		now = tkr->read(tkr->clock);
+		now = tk_clock_read(tkr);
 		last = tkr->cycle_last;
 		mask = tkr->mask;
 		max = tkr->clock->max_cycles;
@@ -209,7 +229,7 @@ static inline u64 timekeeping_get_delta(struct tk_read_base *tkr)
 	u64 cycle_now, delta;
 
 	/* read clocksource */
-	cycle_now = tkr->read(tkr->clock);
+	cycle_now = tk_clock_read(tkr);
 
 	/* calculate the delta since the last update_wall_time */
 	delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask);
@@ -238,12 +258,10 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
 	++tk->cs_was_changed_seq;
 	old_clock = tk->tkr_mono.clock;
 	tk->tkr_mono.clock = clock;
-	tk->tkr_mono.read = clock->read;
 	tk->tkr_mono.mask = clock->mask;
-	tk->tkr_mono.cycle_last = tk->tkr_mono.read(clock);
+	tk->tkr_mono.cycle_last = tk_clock_read(&tk->tkr_mono);
 
 	tk->tkr_raw.clock = clock;
-	tk->tkr_raw.read = clock->read;
 	tk->tkr_raw.mask = clock->mask;
 	tk->tkr_raw.cycle_last = tk->tkr_mono.cycle_last;
 
@@ -404,7 +422,7 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
 
 		now += timekeeping_delta_to_ns(tkr,
 				clocksource_delta(
-					tkr->read(tkr->clock),
+					tk_clock_read(tkr),
 					tkr->cycle_last,
 					tkr->mask));
 	} while (read_seqcount_retry(&tkf->seq, seq));
@@ -461,6 +479,10 @@ static u64 dummy_clock_read(struct clocksource *cs)
 	return cycles_at_suspend;
 }
 
+static struct clocksource dummy_clock = {
+	.read = dummy_clock_read,
+};
+
 /**
  * halt_fast_timekeeper - Prevent fast timekeeper from accessing clocksource.
  * @tk: Timekeeper to snapshot.
@@ -477,13 +499,13 @@ static void halt_fast_timekeeper(struct timekeeper *tk)
 	struct tk_read_base *tkr = &tk->tkr_mono;
 
 	memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
-	cycles_at_suspend = tkr->read(tkr->clock);
-	tkr_dummy.read = dummy_clock_read;
+	cycles_at_suspend = tk_clock_read(tkr);
+	tkr_dummy.clock = &dummy_clock;
 	update_fast_timekeeper(&tkr_dummy, &tk_fast_mono);
 
 	tkr = &tk->tkr_raw;
 	memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
-	tkr_dummy.read = dummy_clock_read;
+	tkr_dummy.clock = &dummy_clock;
 	update_fast_timekeeper(&tkr_dummy, &tk_fast_raw);
 }
 
@@ -649,11 +671,10 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
  */
 static void timekeeping_forward_now(struct timekeeper *tk)
 {
-	struct clocksource *clock = tk->tkr_mono.clock;
 	u64 cycle_now, delta;
 	u64 nsec;
 
-	cycle_now = tk->tkr_mono.read(clock);
+	cycle_now = tk_clock_read(&tk->tkr_mono);
 	delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
 	tk->tkr_mono.cycle_last = cycle_now;
 	tk->tkr_raw.cycle_last  = cycle_now;
@@ -929,8 +950,7 @@ void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot)
 
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
-
-		now = tk->tkr_mono.read(tk->tkr_mono.clock);
+		now = tk_clock_read(&tk->tkr_mono);
 		systime_snapshot->cs_was_changed_seq = tk->cs_was_changed_seq;
 		systime_snapshot->clock_was_set_seq = tk->clock_was_set_seq;
 		base_real = ktime_add(tk->tkr_mono.base,
@@ -1108,7 +1128,7 @@ int get_device_system_crosststamp(int (*get_time_fn)
 		 * Check whether the system counter value provided by the
 		 * device driver is on the current timekeeping interval.
 		 */
-		now = tk->tkr_mono.read(tk->tkr_mono.clock);
+		now = tk_clock_read(&tk->tkr_mono);
 		interval_start = tk->tkr_mono.cycle_last;
 		if (!cycle_between(interval_start, cycles, now)) {
 			clock_was_set_seq = tk->clock_was_set_seq;
@@ -1629,7 +1649,7 @@ void timekeeping_resume(void)
 	 * The less preferred source will only be tried if there is no better
 	 * usable source. The rtc part is handled separately in rtc core code.
 	 */
-	cycle_now = tk->tkr_mono.read(clock);
+	cycle_now = tk_clock_read(&tk->tkr_mono);
 	if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) &&
 		cycle_now > tk->tkr_mono.cycle_last) {
 		u64 nsec, cyc_delta;
@@ -2030,7 +2050,7 @@ void update_wall_time(void)
 #ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
 	offset = real_tk->cycle_interval;
 #else
-	offset = clocksource_delta(tk->tkr_mono.read(tk->tkr_mono.clock),
+	offset = clocksource_delta(tk_clock_read(&tk->tkr_mono),
 				   tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
 #endif
 
-- 
cgit v1.2.3


From 3d88d56c5873f6eebe23e05c3da701960146b801 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Thu, 8 Jun 2017 16:44:21 -0700
Subject: time: Fix CLOCK_MONOTONIC_RAW sub-nanosecond accounting

Due to how the MONOTONIC_RAW accumulation logic was handled,
there is the potential for a 1ns discontinuity when we do
accumulations. This small discontinuity has for the most part
gone un-noticed, but since ARM64 enabled CLOCK_MONOTONIC_RAW
in their vDSO clock_gettime implementation, we've seen failures
with the inconsistency-check test in kselftest.

This patch addresses the issue by using the same sub-ns
accumulation handling that CLOCK_MONOTONIC uses, which avoids
the issue for in-kernel users.

Since the ARM64 vDSO implementation has its own clock_gettime
calculation logic, this patch reduces the frequency of errors,
but failures are still seen. The ARM64 vDSO will need to be
updated to include the sub-nanosecond xtime_nsec values in its
calculation for this issue to be completely fixed.

Signed-off-by: John Stultz <john.stultz@linaro.org>
Tested-by: Daniel Mentz <danielmentz@google.com>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Kevin Brodsky <kevin.brodsky@arm.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Stephen Boyd <stephen.boyd@linaro.org>
Cc: Will Deacon <will.deacon@arm.com>
Cc: "stable #4 . 8+" <stable@vger.kernel.org>
Cc: Miroslav Lichvar <mlichvar@redhat.com>
Link: http://lkml.kernel.org/r/1496965462-20003-3-git-send-email-john.stultz@linaro.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/timekeeper_internal.h |  4 ++--
 kernel/time/timekeeping.c           | 19 ++++++++++---------
 2 files changed, 12 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index e9834ada4d0c..f7043ccca81c 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -57,7 +57,7 @@ struct tk_read_base {
  *			interval.
  * @xtime_remainder:	Shifted nano seconds left over when rounding
  *			@cycle_interval
- * @raw_interval:	Raw nano seconds accumulated per NTP interval.
+ * @raw_interval:	Shifted raw nano seconds accumulated per NTP interval.
  * @ntp_error:		Difference between accumulated time and NTP time in ntp
  *			shifted nano seconds.
  * @ntp_error_shift:	Shift conversion between clock shifted nano seconds and
@@ -99,7 +99,7 @@ struct timekeeper {
 	u64			cycle_interval;
 	u64			xtime_interval;
 	s64			xtime_remainder;
-	u32			raw_interval;
+	u64			raw_interval;
 	/* The ntp_tick_length() value currently being used.
 	 * This cached copy ensures we consistently apply the tick
 	 * length for an entire tick, as ntp_tick_length may change
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index eff94cb8e89e..b602c48cb841 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -280,7 +280,7 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
 	/* Go back from cycles -> shifted ns */
 	tk->xtime_interval = interval * clock->mult;
 	tk->xtime_remainder = ntpinterval - tk->xtime_interval;
-	tk->raw_interval = (interval * clock->mult) >> clock->shift;
+	tk->raw_interval = interval * clock->mult;
 
 	 /* if changing clocks, convert xtime_nsec shift units */
 	if (old_clock) {
@@ -1996,7 +1996,7 @@ static u64 logarithmic_accumulation(struct timekeeper *tk, u64 offset,
 				    u32 shift, unsigned int *clock_set)
 {
 	u64 interval = tk->cycle_interval << shift;
-	u64 raw_nsecs;
+	u64 snsec_per_sec;
 
 	/* If the offset is smaller than a shifted interval, do nothing */
 	if (offset < interval)
@@ -2011,14 +2011,15 @@ static u64 logarithmic_accumulation(struct timekeeper *tk, u64 offset,
 	*clock_set |= accumulate_nsecs_to_secs(tk);
 
 	/* Accumulate raw time */
-	raw_nsecs = (u64)tk->raw_interval << shift;
-	raw_nsecs += tk->raw_time.tv_nsec;
-	if (raw_nsecs >= NSEC_PER_SEC) {
-		u64 raw_secs = raw_nsecs;
-		raw_nsecs = do_div(raw_secs, NSEC_PER_SEC);
-		tk->raw_time.tv_sec += raw_secs;
+	tk->tkr_raw.xtime_nsec += (u64)tk->raw_time.tv_nsec << tk->tkr_raw.shift;
+	tk->tkr_raw.xtime_nsec += tk->raw_interval << shift;
+	snsec_per_sec = (u64)NSEC_PER_SEC << tk->tkr_raw.shift;
+	while (tk->tkr_raw.xtime_nsec >= snsec_per_sec) {
+		tk->tkr_raw.xtime_nsec -= snsec_per_sec;
+		tk->raw_time.tv_sec++;
 	}
-	tk->raw_time.tv_nsec = raw_nsecs;
+	tk->raw_time.tv_nsec = tk->tkr_raw.xtime_nsec >> tk->tkr_raw.shift;
+	tk->tkr_raw.xtime_nsec -= (u64)tk->raw_time.tv_nsec << tk->tkr_raw.shift;
 
 	/* Accumulate error between NTP and clock interval */
 	tk->ntp_error += tk->ntp_tick << shift;
-- 
cgit v1.2.3


From ac6424b981bce1c4bc55675c6ce11bfe1bbfa64f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Tue, 20 Jun 2017 12:06:13 +0200
Subject: sched/wait: Rename wait_queue_t => wait_queue_entry_t

Rename:

	wait_queue_t		=>	wait_queue_entry_t

'wait_queue_t' was always a slight misnomer: its name implies that it's a "queue",
but in reality it's a queue *entry*. The 'real' queue is the wait queue head,
which had to carry the name.

Start sorting this out by renaming it to 'wait_queue_entry_t'.

This also allows the real structure name 'struct __wait_queue' to
lose its double underscore and become 'struct wait_queue_entry',
which is the more canonical nomenclature for such data types.

Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 Documentation/DocBook/kernel-hacking.tmpl          |  2 +-
 Documentation/filesystems/autofs4.txt              | 12 ++--
 block/blk-mq.c                                     |  2 +-
 block/blk-wbt.c                                    |  2 +-
 block/kyber-iosched.c                              |  8 +--
 drivers/bluetooth/btmrvl_main.c                    |  2 +-
 drivers/char/ipmi/ipmi_watchdog.c                  |  2 +-
 drivers/gpu/drm/i915/i915_gem_request.h            |  2 +-
 drivers/gpu/drm/i915/i915_sw_fence.c               | 14 ++---
 drivers/gpu/drm/i915/i915_sw_fence.h               |  2 +-
 drivers/gpu/drm/radeon/radeon.h                    |  2 +-
 drivers/gpu/drm/radeon/radeon_fence.c              |  2 +-
 drivers/gpu/vga/vgaarb.c                           |  2 +-
 drivers/infiniband/hw/i40iw/i40iw_main.c           |  2 +-
 drivers/md/bcache/btree.h                          |  2 +-
 drivers/net/ethernet/cavium/liquidio/octeon_main.h |  4 +-
 drivers/net/wireless/cisco/airo.c                  |  2 +-
 .../net/wireless/intersil/hostap/hostap_ioctl.c    |  2 +-
 drivers/net/wireless/marvell/libertas/main.c       |  2 +-
 drivers/scsi/dpt/dpti_i2o.h                        |  2 +-
 drivers/scsi/ips.c                                 | 12 ++--
 drivers/scsi/ips.h                                 |  4 +-
 .../staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c |  6 +-
 .../staging/lustre/lnet/klnds/socklnd/socklnd_cb.c |  4 +-
 drivers/staging/lustre/lnet/libcfs/debug.c         |  2 +-
 drivers/staging/lustre/lnet/libcfs/tracefile.c     |  2 +-
 drivers/staging/lustre/lnet/lnet/lib-eq.c          |  2 +-
 drivers/staging/lustre/lnet/lnet/lib-socket.c      |  2 +-
 drivers/staging/lustre/lustre/fid/fid_request.c    |  6 +-
 drivers/staging/lustre/lustre/include/lustre_lib.h |  4 +-
 drivers/staging/lustre/lustre/llite/lcommon_cl.c   |  2 +-
 .../staging/lustre/lustre/lov/lov_cl_internal.h    |  2 +-
 drivers/staging/lustre/lustre/lov/lov_object.c     |  2 +-
 drivers/staging/lustre/lustre/obdclass/lu_object.c |  6 +-
 drivers/tty/synclink_gt.c                          |  2 +-
 drivers/vfio/virqfd.c                              |  2 +-
 drivers/vhost/vhost.c                              |  2 +-
 drivers/vhost/vhost.h                              |  2 +-
 fs/autofs4/autofs_i.h                              |  2 +-
 fs/autofs4/waitq.c                                 | 18 +++---
 fs/cachefiles/internal.h                           |  2 +-
 fs/cachefiles/namei.c                              |  2 +-
 fs/cachefiles/rdwr.c                               |  2 +-
 fs/dax.c                                           |  4 +-
 fs/eventfd.c                                       |  2 +-
 fs/eventpoll.c                                     | 10 ++--
 fs/fs_pin.c                                        |  2 +-
 fs/nfs/nfs4proc.c                                  |  4 +-
 fs/nilfs2/segment.c                                |  2 +-
 fs/orangefs/orangefs-bufmap.c                      |  4 +-
 fs/reiserfs/journal.c                              |  2 +-
 fs/select.c                                        |  4 +-
 fs/signalfd.c                                      |  2 +-
 fs/userfaultfd.c                                   |  8 +--
 include/linux/blk-mq.h                             |  2 +-
 include/linux/eventfd.h                            |  4 +-
 include/linux/kvm_irqfd.h                          |  2 +-
 include/linux/pagemap.h                            |  2 +-
 include/linux/poll.h                               |  2 +-
 include/linux/vfio.h                               |  2 +-
 include/linux/wait.h                               | 67 +++++++++++-----------
 include/net/af_unix.h                              |  2 +-
 include/uapi/linux/auto_fs.h                       |  4 +-
 include/uapi/linux/auto_fs4.h                      |  4 +-
 kernel/exit.c                                      |  4 +-
 kernel/futex.c                                     |  2 +-
 kernel/sched/completion.c                          |  2 +-
 kernel/sched/core.c                                |  2 +-
 kernel/sched/wait.c                                | 42 +++++++-------
 kernel/workqueue.c                                 |  4 +-
 mm/filemap.c                                       | 10 ++--
 mm/memcontrol.c                                    |  8 +--
 mm/mempool.c                                       |  2 +-
 mm/shmem.c                                         |  2 +-
 net/9p/trans_fd.c                                  |  4 +-
 net/bluetooth/bnep/core.c                          |  2 +-
 net/bluetooth/cmtp/core.c                          |  2 +-
 net/bluetooth/hidp/core.c                          |  2 +-
 net/core/datagram.c                                |  2 +-
 net/unix/af_unix.c                                 |  4 +-
 sound/core/control.c                               |  2 +-
 sound/core/hwdep.c                                 |  2 +-
 sound/core/init.c                                  |  2 +-
 sound/core/oss/pcm_oss.c                           |  4 +-
 sound/core/pcm_lib.c                               |  2 +-
 sound/core/pcm_native.c                            |  4 +-
 sound/core/rawmidi.c                               |  8 +--
 sound/core/seq/seq_fifo.c                          |  2 +-
 sound/core/seq/seq_memory.c                        |  2 +-
 sound/core/timer.c                                 |  2 +-
 sound/isa/wavefront/wavefront_synth.c              |  2 +-
 sound/pci/mixart/mixart_core.c                     |  4 +-
 sound/pci/ymfpci/ymfpci_main.c                     |  2 +-
 virt/kvm/eventfd.c                                 |  2 +-
 94 files changed, 216 insertions(+), 213 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/DocBook/kernel-hacking.tmpl b/Documentation/DocBook/kernel-hacking.tmpl
index da5c087462b1..c3c705591532 100644
--- a/Documentation/DocBook/kernel-hacking.tmpl
+++ b/Documentation/DocBook/kernel-hacking.tmpl
@@ -819,7 +819,7 @@ printk(KERN_INFO "my ip: %pI4\n", &amp;ipaddress);
    certain condition is true.  They must be used carefully to ensure
    there is no race condition.  You declare a
    <type>wait_queue_head_t</type>, and then processes which want to
-   wait for that condition declare a <type>wait_queue_t</type>
+   wait for that condition declare a <type>wait_queue_entry_t</type>
    referring to themselves, and place that in the queue.
   </para>
 
diff --git a/Documentation/filesystems/autofs4.txt b/Documentation/filesystems/autofs4.txt
index f10dd590f69f..8444dc3d57e8 100644
--- a/Documentation/filesystems/autofs4.txt
+++ b/Documentation/filesystems/autofs4.txt
@@ -316,7 +316,7 @@ For version 5, the format of the message is:
         struct autofs_v5_packet {
                 int proto_version;                /* Protocol version */
                 int type;                        /* Type of packet */
-                autofs_wqt_t wait_queue_token;
+                autofs_wqt_t wait_queue_entry_token;
                 __u32 dev;
                 __u64 ino;
                 __u32 uid;
@@ -341,12 +341,12 @@ The pipe will be set to "packet mode" (equivalent to passing
 `O_DIRECT`) to _pipe2(2)_ so that a read from the pipe will return at
 most one packet, and any unread portion of a packet will be discarded.
 
-The `wait_queue_token` is a unique number which can identify a
+The `wait_queue_entry_token` is a unique number which can identify a
 particular request to be acknowledged.  When a message is sent over
 the pipe the affected dentry is marked as either "active" or
 "expiring" and other accesses to it block until the message is
 acknowledged using one of the ioctls below and the relevant
-`wait_queue_token`.
+`wait_queue_entry_token`.
 
 Communicating with autofs: root directory ioctls
 ------------------------------------------------
@@ -358,7 +358,7 @@ capability, or must be the automount daemon.
 The available ioctl commands are:
 
 - **AUTOFS_IOC_READY**: a notification has been handled.  The argument
-    to the ioctl command is the "wait_queue_token" number
+    to the ioctl command is the "wait_queue_entry_token" number
     corresponding to the notification being acknowledged.
 - **AUTOFS_IOC_FAIL**: similar to above, but indicates failure with
     the error code `ENOENT`.
@@ -382,14 +382,14 @@ The available ioctl commands are:
         struct autofs_packet_expire_multi {
                 int proto_version;              /* Protocol version */
                 int type;                       /* Type of packet */
-                autofs_wqt_t wait_queue_token;
+                autofs_wqt_t wait_queue_entry_token;
                 int len;
                 char name[NAME_MAX+1];
         };
 
      is required.  This is filled in with the name of something
      that can be unmounted or removed.  If nothing can be expired,
-     `errno` is set to `EAGAIN`.  Even though a `wait_queue_token`
+     `errno` is set to `EAGAIN`.  Even though a `wait_queue_entry_token`
      is present in the structure, no "wait queue" is established
      and no acknowledgment is needed.
 - **AUTOFS_IOC_EXPIRE_MULTI**:  This is similar to
diff --git a/block/blk-mq.c b/block/blk-mq.c
index bb66c96850b1..a083f95e04b1 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -926,7 +926,7 @@ static bool reorder_tags_to_front(struct list_head *list)
 	return first != NULL;
 }
 
-static int blk_mq_dispatch_wake(wait_queue_t *wait, unsigned mode, int flags,
+static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode, int flags,
 				void *key)
 {
 	struct blk_mq_hw_ctx *hctx;
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 17676f4d7fd1..5f3a37c2784c 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -503,7 +503,7 @@ static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw)
 }
 
 static inline bool may_queue(struct rq_wb *rwb, struct rq_wait *rqw,
-			     wait_queue_t *wait, unsigned long rw)
+			     wait_queue_entry_t *wait, unsigned long rw)
 {
 	/*
 	 * inc it here even if disabled, since we'll dec it at completion.
diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c
index b9faabc75fdb..b95d6bd714c0 100644
--- a/block/kyber-iosched.c
+++ b/block/kyber-iosched.c
@@ -99,7 +99,7 @@ struct kyber_hctx_data {
 	struct list_head rqs[KYBER_NUM_DOMAINS];
 	unsigned int cur_domain;
 	unsigned int batching;
-	wait_queue_t domain_wait[KYBER_NUM_DOMAINS];
+	wait_queue_entry_t domain_wait[KYBER_NUM_DOMAINS];
 	atomic_t wait_index[KYBER_NUM_DOMAINS];
 };
 
@@ -507,7 +507,7 @@ static void kyber_flush_busy_ctxs(struct kyber_hctx_data *khd,
 	}
 }
 
-static int kyber_domain_wake(wait_queue_t *wait, unsigned mode, int flags,
+static int kyber_domain_wake(wait_queue_entry_t *wait, unsigned mode, int flags,
 			     void *key)
 {
 	struct blk_mq_hw_ctx *hctx = READ_ONCE(wait->private);
@@ -523,7 +523,7 @@ static int kyber_get_domain_token(struct kyber_queue_data *kqd,
 {
 	unsigned int sched_domain = khd->cur_domain;
 	struct sbitmap_queue *domain_tokens = &kqd->domain_tokens[sched_domain];
-	wait_queue_t *wait = &khd->domain_wait[sched_domain];
+	wait_queue_entry_t *wait = &khd->domain_wait[sched_domain];
 	struct sbq_wait_state *ws;
 	int nr;
 
@@ -734,7 +734,7 @@ static int kyber_##name##_waiting_show(void *data, struct seq_file *m)	\
 {									\
 	struct blk_mq_hw_ctx *hctx = data;				\
 	struct kyber_hctx_data *khd = hctx->sched_data;			\
-	wait_queue_t *wait = &khd->domain_wait[domain];			\
+	wait_queue_entry_t *wait = &khd->domain_wait[domain];		\
 									\
 	seq_printf(m, "%d\n", !list_empty_careful(&wait->task_list));	\
 	return 0;							\
diff --git a/drivers/bluetooth/btmrvl_main.c b/drivers/bluetooth/btmrvl_main.c
index c38cb5b91291..fe850f0567cb 100644
--- a/drivers/bluetooth/btmrvl_main.c
+++ b/drivers/bluetooth/btmrvl_main.c
@@ -602,7 +602,7 @@ static int btmrvl_service_main_thread(void *data)
 	struct btmrvl_thread *thread = data;
 	struct btmrvl_private *priv = thread->priv;
 	struct btmrvl_adapter *adapter = priv->adapter;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	struct sk_buff *skb;
 	ulong flags;
 
diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c
index d165af8abe36..a5c6cfe71a8e 100644
--- a/drivers/char/ipmi/ipmi_watchdog.c
+++ b/drivers/char/ipmi/ipmi_watchdog.c
@@ -821,7 +821,7 @@ static ssize_t ipmi_read(struct file *file,
 			 loff_t      *ppos)
 {
 	int          rv = 0;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 
 	if (count <= 0)
 		return 0;
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 129c58bb4805..a4a920c4c454 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -123,7 +123,7 @@ struct drm_i915_gem_request {
 	 * It is used by the driver to then queue the request for execution.
 	 */
 	struct i915_sw_fence submit;
-	wait_queue_t submitq;
+	wait_queue_entry_t submitq;
 	wait_queue_head_t execute;
 
 	/* A list of everyone we wait upon, and everyone who waits upon us.
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
index a277f8eb7beb..8669bfa33064 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.c
+++ b/drivers/gpu/drm/i915/i915_sw_fence.c
@@ -152,7 +152,7 @@ static void __i915_sw_fence_wake_up_all(struct i915_sw_fence *fence,
 					struct list_head *continuation)
 {
 	wait_queue_head_t *x = &fence->wait;
-	wait_queue_t *pos, *next;
+	wait_queue_entry_t *pos, *next;
 	unsigned long flags;
 
 	debug_fence_deactivate(fence);
@@ -254,7 +254,7 @@ void i915_sw_fence_commit(struct i915_sw_fence *fence)
 	__i915_sw_fence_commit(fence);
 }
 
-static int i915_sw_fence_wake(wait_queue_t *wq, unsigned mode, int flags, void *key)
+static int i915_sw_fence_wake(wait_queue_entry_t *wq, unsigned mode, int flags, void *key)
 {
 	list_del(&wq->task_list);
 	__i915_sw_fence_complete(wq->private, key);
@@ -267,7 +267,7 @@ static int i915_sw_fence_wake(wait_queue_t *wq, unsigned mode, int flags, void *
 static bool __i915_sw_fence_check_if_after(struct i915_sw_fence *fence,
 				    const struct i915_sw_fence * const signaler)
 {
-	wait_queue_t *wq;
+	wait_queue_entry_t *wq;
 
 	if (__test_and_set_bit(I915_SW_FENCE_CHECKED_BIT, &fence->flags))
 		return false;
@@ -288,7 +288,7 @@ static bool __i915_sw_fence_check_if_after(struct i915_sw_fence *fence,
 
 static void __i915_sw_fence_clear_checked_bit(struct i915_sw_fence *fence)
 {
-	wait_queue_t *wq;
+	wait_queue_entry_t *wq;
 
 	if (!__test_and_clear_bit(I915_SW_FENCE_CHECKED_BIT, &fence->flags))
 		return;
@@ -320,7 +320,7 @@ static bool i915_sw_fence_check_if_after(struct i915_sw_fence *fence,
 
 static int __i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
 					  struct i915_sw_fence *signaler,
-					  wait_queue_t *wq, gfp_t gfp)
+					  wait_queue_entry_t *wq, gfp_t gfp)
 {
 	unsigned long flags;
 	int pending;
@@ -359,7 +359,7 @@ static int __i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
 
 	spin_lock_irqsave(&signaler->wait.lock, flags);
 	if (likely(!i915_sw_fence_done(signaler))) {
-		__add_wait_queue_tail(&signaler->wait, wq);
+		__add_wait_queue_entry_tail(&signaler->wait, wq);
 		pending = 1;
 	} else {
 		i915_sw_fence_wake(wq, 0, 0, NULL);
@@ -372,7 +372,7 @@ static int __i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
 
 int i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
 				 struct i915_sw_fence *signaler,
-				 wait_queue_t *wq)
+				 wait_queue_entry_t *wq)
 {
 	return __i915_sw_fence_await_sw_fence(fence, signaler, wq, 0);
 }
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.h b/drivers/gpu/drm/i915/i915_sw_fence.h
index d31cefbbcc04..fd3c3bf6c8b7 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.h
+++ b/drivers/gpu/drm/i915/i915_sw_fence.h
@@ -66,7 +66,7 @@ void i915_sw_fence_commit(struct i915_sw_fence *fence);
 
 int i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
 				 struct i915_sw_fence *after,
-				 wait_queue_t *wq);
+				 wait_queue_entry_t *wq);
 int i915_sw_fence_await_sw_fence_gfp(struct i915_sw_fence *fence,
 				     struct i915_sw_fence *after,
 				     gfp_t gfp);
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index c1c8e2208a21..e562a78510ff 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -375,7 +375,7 @@ struct radeon_fence {
 	unsigned		ring;
 	bool			is_vm_update;
 
-	wait_queue_t		fence_wake;
+	wait_queue_entry_t		fence_wake;
 };
 
 int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring);
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index ef09f0a63754..e86f2bd38410 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -158,7 +158,7 @@ int radeon_fence_emit(struct radeon_device *rdev,
  * for the fence locking itself, so unlocked variants are used for
  * fence_signal, and remove_wait_queue.
  */
-static int radeon_fence_check_signaled(wait_queue_t *wait, unsigned mode, int flags, void *key)
+static int radeon_fence_check_signaled(wait_queue_entry_t *wait, unsigned mode, int flags, void *key)
 {
 	struct radeon_fence *fence;
 	u64 seq;
diff --git a/drivers/gpu/vga/vgaarb.c b/drivers/gpu/vga/vgaarb.c
index 92f1452dad57..76875f6299b8 100644
--- a/drivers/gpu/vga/vgaarb.c
+++ b/drivers/gpu/vga/vgaarb.c
@@ -417,7 +417,7 @@ int vga_get(struct pci_dev *pdev, unsigned int rsrc, int interruptible)
 {
 	struct vga_device *vgadev, *conflict;
 	unsigned long flags;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	int rc = 0;
 
 	vga_check_first_use();
diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c
index a3f18a22f5ed..e0f47cc2effc 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_main.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_main.c
@@ -1939,7 +1939,7 @@ static int i40iw_virtchnl_receive(struct i40e_info *ldev,
 bool i40iw_vf_clear_to_send(struct i40iw_sc_dev *dev)
 {
 	struct i40iw_device *iwdev;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 
 	iwdev = dev->back_dev;
 
diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h
index 9b80417cd547..73da1f5626cb 100644
--- a/drivers/md/bcache/btree.h
+++ b/drivers/md/bcache/btree.h
@@ -207,7 +207,7 @@ void bkey_put(struct cache_set *c, struct bkey *k);
 
 struct btree_op {
 	/* for waiting on btree reserve in btree_split() */
-	wait_queue_t		wait;
+	wait_queue_entry_t		wait;
 
 	/* Btree level at which we start taking write locks */
 	short			lock;
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_main.h b/drivers/net/ethernet/cavium/liquidio/octeon_main.h
index bed9ef17bc26..7ccffbb0019e 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_main.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_main.h
@@ -144,7 +144,7 @@ static inline int
 sleep_cond(wait_queue_head_t *wait_queue, int *condition)
 {
 	int errno = 0;
-	wait_queue_t we;
+	wait_queue_entry_t we;
 
 	init_waitqueue_entry(&we, current);
 	add_wait_queue(wait_queue, &we);
@@ -171,7 +171,7 @@ sleep_timeout_cond(wait_queue_head_t *wait_queue,
 		   int *condition,
 		   int timeout)
 {
-	wait_queue_t we;
+	wait_queue_entry_t we;
 
 	init_waitqueue_entry(&we, current);
 	add_wait_queue(wait_queue, &we);
diff --git a/drivers/net/wireless/cisco/airo.c b/drivers/net/wireless/cisco/airo.c
index 1b7e125a28e2..6a13303af2b7 100644
--- a/drivers/net/wireless/cisco/airo.c
+++ b/drivers/net/wireless/cisco/airo.c
@@ -3066,7 +3066,7 @@ static int airo_thread(void *data) {
 		if (ai->jobs) {
 			locked = down_interruptible(&ai->sem);
 		} else {
-			wait_queue_t wait;
+			wait_queue_entry_t wait;
 
 			init_waitqueue_entry(&wait, current);
 			add_wait_queue(&ai->thr_wait, &wait);
diff --git a/drivers/net/wireless/intersil/hostap/hostap_ioctl.c b/drivers/net/wireless/intersil/hostap/hostap_ioctl.c
index b2c6b065b542..ff153ce29539 100644
--- a/drivers/net/wireless/intersil/hostap/hostap_ioctl.c
+++ b/drivers/net/wireless/intersil/hostap/hostap_ioctl.c
@@ -2544,7 +2544,7 @@ static int prism2_ioctl_priv_prism2_param(struct net_device *dev,
 			ret = -EINVAL;
 		}
 		if (local->iw_mode == IW_MODE_MASTER) {
-			wait_queue_t __wait;
+			wait_queue_entry_t __wait;
 			init_waitqueue_entry(&__wait, current);
 			add_wait_queue(&local->hostscan_wq, &__wait);
 			set_current_state(TASK_INTERRUPTIBLE);
diff --git a/drivers/net/wireless/marvell/libertas/main.c b/drivers/net/wireless/marvell/libertas/main.c
index e3500203715c..dde065d0d5c1 100644
--- a/drivers/net/wireless/marvell/libertas/main.c
+++ b/drivers/net/wireless/marvell/libertas/main.c
@@ -453,7 +453,7 @@ static int lbs_thread(void *data)
 {
 	struct net_device *dev = data;
 	struct lbs_private *priv = dev->ml_priv;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 
 	lbs_deb_enter(LBS_DEB_THREAD);
 
diff --git a/drivers/scsi/dpt/dpti_i2o.h b/drivers/scsi/dpt/dpti_i2o.h
index bd9e31e16249..16fc380b5512 100644
--- a/drivers/scsi/dpt/dpti_i2o.h
+++ b/drivers/scsi/dpt/dpti_i2o.h
@@ -48,7 +48,7 @@
 #include <linux/wait.h>
 typedef wait_queue_head_t adpt_wait_queue_head_t;
 #define ADPT_DECLARE_WAIT_QUEUE_HEAD(wait) DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wait)
-typedef wait_queue_t adpt_wait_queue_t;
+typedef wait_queue_entry_t adpt_wait_queue_entry_t;
 
 /*
  * message structures
diff --git a/drivers/scsi/ips.c b/drivers/scsi/ips.c
index 3419e1bcdff6..67621308eb9c 100644
--- a/drivers/scsi/ips.c
+++ b/drivers/scsi/ips.c
@@ -301,13 +301,13 @@ static uint32_t ips_statupd_copperhead_memio(ips_ha_t *);
 static uint32_t ips_statupd_morpheus(ips_ha_t *);
 static ips_scb_t *ips_getscb(ips_ha_t *);
 static void ips_putq_scb_head(ips_scb_queue_t *, ips_scb_t *);
-static void ips_putq_wait_tail(ips_wait_queue_t *, struct scsi_cmnd *);
+static void ips_putq_wait_tail(ips_wait_queue_entry_t *, struct scsi_cmnd *);
 static void ips_putq_copp_tail(ips_copp_queue_t *,
 				      ips_copp_wait_item_t *);
 static ips_scb_t *ips_removeq_scb_head(ips_scb_queue_t *);
 static ips_scb_t *ips_removeq_scb(ips_scb_queue_t *, ips_scb_t *);
-static struct scsi_cmnd *ips_removeq_wait_head(ips_wait_queue_t *);
-static struct scsi_cmnd *ips_removeq_wait(ips_wait_queue_t *,
+static struct scsi_cmnd *ips_removeq_wait_head(ips_wait_queue_entry_t *);
+static struct scsi_cmnd *ips_removeq_wait(ips_wait_queue_entry_t *,
 					  struct scsi_cmnd *);
 static ips_copp_wait_item_t *ips_removeq_copp(ips_copp_queue_t *,
 						     ips_copp_wait_item_t *);
@@ -2871,7 +2871,7 @@ ips_removeq_scb(ips_scb_queue_t * queue, ips_scb_t * item)
 /* ASSUMED to be called from within the HA lock                             */
 /*                                                                          */
 /****************************************************************************/
-static void ips_putq_wait_tail(ips_wait_queue_t *queue, struct scsi_cmnd *item)
+static void ips_putq_wait_tail(ips_wait_queue_entry_t *queue, struct scsi_cmnd *item)
 {
 	METHOD_TRACE("ips_putq_wait_tail", 1);
 
@@ -2902,7 +2902,7 @@ static void ips_putq_wait_tail(ips_wait_queue_t *queue, struct scsi_cmnd *item)
 /* ASSUMED to be called from within the HA lock                             */
 /*                                                                          */
 /****************************************************************************/
-static struct scsi_cmnd *ips_removeq_wait_head(ips_wait_queue_t *queue)
+static struct scsi_cmnd *ips_removeq_wait_head(ips_wait_queue_entry_t *queue)
 {
 	struct scsi_cmnd *item;
 
@@ -2936,7 +2936,7 @@ static struct scsi_cmnd *ips_removeq_wait_head(ips_wait_queue_t *queue)
 /* ASSUMED to be called from within the HA lock                             */
 /*                                                                          */
 /****************************************************************************/
-static struct scsi_cmnd *ips_removeq_wait(ips_wait_queue_t *queue,
+static struct scsi_cmnd *ips_removeq_wait(ips_wait_queue_entry_t *queue,
 					  struct scsi_cmnd *item)
 {
 	struct scsi_cmnd *p;
diff --git a/drivers/scsi/ips.h b/drivers/scsi/ips.h
index b782bb60baf0..366be3b2f9b4 100644
--- a/drivers/scsi/ips.h
+++ b/drivers/scsi/ips.h
@@ -989,7 +989,7 @@ typedef struct ips_wait_queue {
 	struct scsi_cmnd *head;
 	struct scsi_cmnd *tail;
 	int count;
-} ips_wait_queue_t;
+} ips_wait_queue_entry_t;
 
 typedef struct ips_copp_wait_item {
 	struct scsi_cmnd *scsi_cmd;
@@ -1035,7 +1035,7 @@ typedef struct ips_ha {
    ips_stat_t         sp;                 /* Status packer pointer      */
    struct ips_scb    *scbs;               /* Array of all CCBS          */
    struct ips_scb    *scb_freelist;       /* SCB free list              */
-   ips_wait_queue_t   scb_waitlist;       /* Pending SCB list           */
+   ips_wait_queue_entry_t   scb_waitlist;       /* Pending SCB list           */
    ips_copp_queue_t   copp_waitlist;      /* Pending PT list            */
    ips_scb_queue_t    scb_activelist;     /* Active SCB list            */
    IPS_IO_CMD        *dummy;              /* dummy command              */
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
index 0db662d6abdd..85b242ec5f9b 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
@@ -3267,7 +3267,7 @@ int
 kiblnd_connd(void *arg)
 {
 	spinlock_t *lock = &kiblnd_data.kib_connd_lock;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	unsigned long flags;
 	struct kib_conn *conn;
 	int timeout;
@@ -3521,7 +3521,7 @@ kiblnd_scheduler(void *arg)
 	long id = (long)arg;
 	struct kib_sched_info *sched;
 	struct kib_conn *conn;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	unsigned long flags;
 	struct ib_wc wc;
 	int did_something;
@@ -3656,7 +3656,7 @@ kiblnd_failover_thread(void *arg)
 {
 	rwlock_t *glock = &kiblnd_data.kib_global_lock;
 	struct kib_dev *dev;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	unsigned long flags;
 	int rc;
 
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c
index 3ed3b08c122c..6b38d5a8fe92 100644
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c
@@ -2166,7 +2166,7 @@ ksocknal_connd(void *arg)
 {
 	spinlock_t *connd_lock = &ksocknal_data.ksnd_connd_lock;
 	struct ksock_connreq *cr;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	int nloops = 0;
 	int cons_retry = 0;
 
@@ -2554,7 +2554,7 @@ ksocknal_check_peer_timeouts(int idx)
 int
 ksocknal_reaper(void *arg)
 {
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	struct ksock_conn *conn;
 	struct ksock_sched *sched;
 	struct list_head enomem_conns;
diff --git a/drivers/staging/lustre/lnet/libcfs/debug.c b/drivers/staging/lustre/lnet/libcfs/debug.c
index c56e9922cd5b..49deb448b044 100644
--- a/drivers/staging/lustre/lnet/libcfs/debug.c
+++ b/drivers/staging/lustre/lnet/libcfs/debug.c
@@ -361,7 +361,7 @@ static int libcfs_debug_dumplog_thread(void *arg)
 
 void libcfs_debug_dumplog(void)
 {
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	struct task_struct *dumper;
 
 	/* we're being careful to ensure that the kernel thread is
diff --git a/drivers/staging/lustre/lnet/libcfs/tracefile.c b/drivers/staging/lustre/lnet/libcfs/tracefile.c
index 9599b7441feb..27082d2f7938 100644
--- a/drivers/staging/lustre/lnet/libcfs/tracefile.c
+++ b/drivers/staging/lustre/lnet/libcfs/tracefile.c
@@ -990,7 +990,7 @@ static int tracefiled(void *arg)
 	complete(&tctl->tctl_start);
 
 	while (1) {
-		wait_queue_t __wait;
+		wait_queue_entry_t __wait;
 
 		pc.pc_want_daemon_pages = 0;
 		collect_pages(&pc);
diff --git a/drivers/staging/lustre/lnet/lnet/lib-eq.c b/drivers/staging/lustre/lnet/lnet/lib-eq.c
index ce4b83584e17..9ebba4ef5f90 100644
--- a/drivers/staging/lustre/lnet/lnet/lib-eq.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-eq.c
@@ -312,7 +312,7 @@ __must_hold(&the_lnet.ln_eq_wait_lock)
 {
 	int tms = *timeout_ms;
 	int wait;
-	wait_queue_t wl;
+	wait_queue_entry_t wl;
 	unsigned long now;
 
 	if (!tms)
diff --git a/drivers/staging/lustre/lnet/lnet/lib-socket.c b/drivers/staging/lustre/lnet/lnet/lib-socket.c
index 9fca8d225ee0..f075706bba6d 100644
--- a/drivers/staging/lustre/lnet/lnet/lib-socket.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-socket.c
@@ -516,7 +516,7 @@ lnet_sock_listen(struct socket **sockp, __u32 local_ip, int local_port,
 int
 lnet_sock_accept(struct socket **newsockp, struct socket *sock)
 {
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	struct socket *newsock;
 	int rc;
 
diff --git a/drivers/staging/lustre/lustre/fid/fid_request.c b/drivers/staging/lustre/lustre/fid/fid_request.c
index 999f250ceed0..bf31bc200d27 100644
--- a/drivers/staging/lustre/lustre/fid/fid_request.c
+++ b/drivers/staging/lustre/lustre/fid/fid_request.c
@@ -192,7 +192,7 @@ static int seq_client_alloc_seq(const struct lu_env *env,
 }
 
 static int seq_fid_alloc_prep(struct lu_client_seq *seq,
-			      wait_queue_t *link)
+			      wait_queue_entry_t *link)
 {
 	if (seq->lcs_update) {
 		add_wait_queue(&seq->lcs_waitq, link);
@@ -223,7 +223,7 @@ static void seq_fid_alloc_fini(struct lu_client_seq *seq)
 int seq_client_alloc_fid(const struct lu_env *env,
 			 struct lu_client_seq *seq, struct lu_fid *fid)
 {
-	wait_queue_t link;
+	wait_queue_entry_t link;
 	int rc;
 
 	LASSERT(seq);
@@ -290,7 +290,7 @@ EXPORT_SYMBOL(seq_client_alloc_fid);
  */
 void seq_client_flush(struct lu_client_seq *seq)
 {
-	wait_queue_t link;
+	wait_queue_entry_t link;
 
 	LASSERT(seq);
 	init_waitqueue_entry(&link, current);
diff --git a/drivers/staging/lustre/lustre/include/lustre_lib.h b/drivers/staging/lustre/lustre/include/lustre_lib.h
index b04d613846ee..f24970da8323 100644
--- a/drivers/staging/lustre/lustre/include/lustre_lib.h
+++ b/drivers/staging/lustre/lustre/include/lustre_lib.h
@@ -201,7 +201,7 @@ struct l_wait_info {
 			   sigmask(SIGALRM))
 
 /**
- * wait_queue_t of Linux (version < 2.6.34) is a FIFO list for exclusively
+ * wait_queue_entry_t of Linux (version < 2.6.34) is a FIFO list for exclusively
  * waiting threads, which is not always desirable because all threads will
  * be waken up again and again, even user only needs a few of them to be
  * active most time. This is not good for performance because cache can
@@ -228,7 +228,7 @@ struct l_wait_info {
  */
 #define __l_wait_event(wq, condition, info, ret, l_add_wait)		   \
 do {									   \
-	wait_queue_t __wait;						 \
+	wait_queue_entry_t __wait;						 \
 	long __timeout = info->lwi_timeout;			  \
 	sigset_t   __blocked;					      \
 	int   __allow_intr = info->lwi_allow_intr;			     \
diff --git a/drivers/staging/lustre/lustre/llite/lcommon_cl.c b/drivers/staging/lustre/lustre/llite/lcommon_cl.c
index 8af611033e12..96515b839436 100644
--- a/drivers/staging/lustre/lustre/llite/lcommon_cl.c
+++ b/drivers/staging/lustre/lustre/llite/lcommon_cl.c
@@ -207,7 +207,7 @@ int cl_file_inode_init(struct inode *inode, struct lustre_md *md)
 static void cl_object_put_last(struct lu_env *env, struct cl_object *obj)
 {
 	struct lu_object_header *header = obj->co_lu.lo_header;
-	wait_queue_t	   waiter;
+	wait_queue_entry_t	   waiter;
 
 	if (unlikely(atomic_read(&header->loh_ref) != 1)) {
 		struct lu_site *site = obj->co_lu.lo_dev->ld_site;
diff --git a/drivers/staging/lustre/lustre/lov/lov_cl_internal.h b/drivers/staging/lustre/lustre/lov/lov_cl_internal.h
index 391c632365ae..e889d3a7de9c 100644
--- a/drivers/staging/lustre/lustre/lov/lov_cl_internal.h
+++ b/drivers/staging/lustre/lustre/lov/lov_cl_internal.h
@@ -370,7 +370,7 @@ struct lov_thread_info {
 	struct ost_lvb	  lti_lvb;
 	struct cl_2queue	lti_cl2q;
 	struct cl_page_list     lti_plist;
-	wait_queue_t	  lti_waiter;
+	wait_queue_entry_t	  lti_waiter;
 	struct cl_attr          lti_attr;
 };
 
diff --git a/drivers/staging/lustre/lustre/lov/lov_object.c b/drivers/staging/lustre/lustre/lov/lov_object.c
index ab3ecfeeadc8..eddabbe31e5c 100644
--- a/drivers/staging/lustre/lustre/lov/lov_object.c
+++ b/drivers/staging/lustre/lustre/lov/lov_object.c
@@ -371,7 +371,7 @@ static void lov_subobject_kill(const struct lu_env *env, struct lov_object *lov,
 	struct lov_layout_raid0 *r0;
 	struct lu_site	  *site;
 	struct lu_site_bkt_data *bkt;
-	wait_queue_t	  *waiter;
+	wait_queue_entry_t	  *waiter;
 
 	r0  = &lov->u.raid0;
 	LASSERT(r0->lo_sub[idx] == los);
diff --git a/drivers/staging/lustre/lustre/obdclass/lu_object.c b/drivers/staging/lustre/lustre/obdclass/lu_object.c
index abcf951208d2..76ae600ae2c8 100644
--- a/drivers/staging/lustre/lustre/obdclass/lu_object.c
+++ b/drivers/staging/lustre/lustre/obdclass/lu_object.c
@@ -556,7 +556,7 @@ EXPORT_SYMBOL(lu_object_print);
 static struct lu_object *htable_lookup(struct lu_site *s,
 				       struct cfs_hash_bd *bd,
 				       const struct lu_fid *f,
-				       wait_queue_t *waiter,
+				       wait_queue_entry_t *waiter,
 				       __u64 *version)
 {
 	struct lu_site_bkt_data *bkt;
@@ -670,7 +670,7 @@ static struct lu_object *lu_object_find_try(const struct lu_env *env,
 					    struct lu_device *dev,
 					    const struct lu_fid *f,
 					    const struct lu_object_conf *conf,
-					    wait_queue_t *waiter)
+					    wait_queue_entry_t *waiter)
 {
 	struct lu_object      *o;
 	struct lu_object      *shadow;
@@ -750,7 +750,7 @@ struct lu_object *lu_object_find_at(const struct lu_env *env,
 {
 	struct lu_site_bkt_data *bkt;
 	struct lu_object	*obj;
-	wait_queue_t	   wait;
+	wait_queue_entry_t	   wait;
 
 	while (1) {
 		obj = lu_object_find_try(env, dev, f, conf, &wait);
diff --git a/drivers/tty/synclink_gt.c b/drivers/tty/synclink_gt.c
index 31885f20fc15..cc047de72e2a 100644
--- a/drivers/tty/synclink_gt.c
+++ b/drivers/tty/synclink_gt.c
@@ -184,7 +184,7 @@ static void hdlcdev_exit(struct slgt_info *info);
 struct cond_wait {
 	struct cond_wait *next;
 	wait_queue_head_t q;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	unsigned int data;
 };
 static void init_cond_wait(struct cond_wait *w, unsigned int data);
diff --git a/drivers/vfio/virqfd.c b/drivers/vfio/virqfd.c
index 27c89cd5d70b..4797217e5e72 100644
--- a/drivers/vfio/virqfd.c
+++ b/drivers/vfio/virqfd.c
@@ -43,7 +43,7 @@ static void virqfd_deactivate(struct virqfd *virqfd)
 	queue_work(vfio_irqfd_cleanup_wq, &virqfd->shutdown);
 }
 
-static int virqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
+static int virqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
 {
 	struct virqfd *virqfd = container_of(wait, struct virqfd, wait);
 	unsigned long flags = (unsigned long)key;
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 042030e5a035..e4613a3c362d 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -165,7 +165,7 @@ static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh,
 	add_wait_queue(wqh, &poll->wait);
 }
 
-static int vhost_poll_wakeup(wait_queue_t *wait, unsigned mode, int sync,
+static int vhost_poll_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync,
 			     void *key)
 {
 	struct vhost_poll *poll = container_of(wait, struct vhost_poll, wait);
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index f55671d53f28..f72095868b93 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -31,7 +31,7 @@ struct vhost_work {
 struct vhost_poll {
 	poll_table                table;
 	wait_queue_head_t        *wqh;
-	wait_queue_t              wait;
+	wait_queue_entry_t              wait;
 	struct vhost_work	  work;
 	unsigned long		  mask;
 	struct vhost_dev	 *dev;
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index beef981aa54f..974f5346458a 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -83,7 +83,7 @@ struct autofs_info {
 struct autofs_wait_queue {
 	wait_queue_head_t queue;
 	struct autofs_wait_queue *next;
-	autofs_wqt_t wait_queue_token;
+	autofs_wqt_t wait_queue_entry_token;
 	/* We use the following to see what we are waiting for */
 	struct qstr name;
 	u32 dev;
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 24a58bf9ca72..7071895b0678 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -104,7 +104,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
 	size_t pktsz;
 
 	pr_debug("wait id = 0x%08lx, name = %.*s, type=%d\n",
-		 (unsigned long) wq->wait_queue_token,
+		 (unsigned long) wq->wait_queue_entry_token,
 		 wq->name.len, wq->name.name, type);
 
 	memset(&pkt, 0, sizeof(pkt)); /* For security reasons */
@@ -120,7 +120,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
 
 		pktsz = sizeof(*mp);
 
-		mp->wait_queue_token = wq->wait_queue_token;
+		mp->wait_queue_entry_token = wq->wait_queue_entry_token;
 		mp->len = wq->name.len;
 		memcpy(mp->name, wq->name.name, wq->name.len);
 		mp->name[wq->name.len] = '\0';
@@ -133,7 +133,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
 
 		pktsz = sizeof(*ep);
 
-		ep->wait_queue_token = wq->wait_queue_token;
+		ep->wait_queue_entry_token = wq->wait_queue_entry_token;
 		ep->len = wq->name.len;
 		memcpy(ep->name, wq->name.name, wq->name.len);
 		ep->name[wq->name.len] = '\0';
@@ -153,7 +153,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
 
 		pktsz = sizeof(*packet);
 
-		packet->wait_queue_token = wq->wait_queue_token;
+		packet->wait_queue_entry_token = wq->wait_queue_entry_token;
 		packet->len = wq->name.len;
 		memcpy(packet->name, wq->name.name, wq->name.len);
 		packet->name[wq->name.len] = '\0';
@@ -428,7 +428,7 @@ int autofs4_wait(struct autofs_sb_info *sbi,
 			return -ENOMEM;
 		}
 
-		wq->wait_queue_token = autofs4_next_wait_queue;
+		wq->wait_queue_entry_token = autofs4_next_wait_queue;
 		if (++autofs4_next_wait_queue == 0)
 			autofs4_next_wait_queue = 1;
 		wq->next = sbi->queues;
@@ -461,7 +461,7 @@ int autofs4_wait(struct autofs_sb_info *sbi,
 		}
 
 		pr_debug("new wait id = 0x%08lx, name = %.*s, nfy=%d\n",
-			 (unsigned long) wq->wait_queue_token, wq->name.len,
+			 (unsigned long) wq->wait_queue_entry_token, wq->name.len,
 			 wq->name.name, notify);
 
 		/*
@@ -471,7 +471,7 @@ int autofs4_wait(struct autofs_sb_info *sbi,
 	} else {
 		wq->wait_ctr++;
 		pr_debug("existing wait id = 0x%08lx, name = %.*s, nfy=%d\n",
-			 (unsigned long) wq->wait_queue_token, wq->name.len,
+			 (unsigned long) wq->wait_queue_entry_token, wq->name.len,
 			 wq->name.name, notify);
 		mutex_unlock(&sbi->wq_mutex);
 		kfree(qstr.name);
@@ -550,13 +550,13 @@ int autofs4_wait(struct autofs_sb_info *sbi,
 }
 
 
-int autofs4_wait_release(struct autofs_sb_info *sbi, autofs_wqt_t wait_queue_token, int status)
+int autofs4_wait_release(struct autofs_sb_info *sbi, autofs_wqt_t wait_queue_entry_token, int status)
 {
 	struct autofs_wait_queue *wq, **wql;
 
 	mutex_lock(&sbi->wq_mutex);
 	for (wql = &sbi->queues; (wq = *wql) != NULL; wql = &wq->next) {
-		if (wq->wait_queue_token == wait_queue_token)
+		if (wq->wait_queue_entry_token == wait_queue_entry_token)
 			break;
 	}
 
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
index 9bf90bcc56ac..54a4fcd679ed 100644
--- a/fs/cachefiles/internal.h
+++ b/fs/cachefiles/internal.h
@@ -97,7 +97,7 @@ struct cachefiles_cache {
  * backing file read tracking
  */
 struct cachefiles_one_read {
-	wait_queue_t			monitor;	/* link into monitored waitqueue */
+	wait_queue_entry_t			monitor;	/* link into monitored waitqueue */
 	struct page			*back_page;	/* backing file page we're waiting for */
 	struct page			*netfs_page;	/* netfs page we're going to fill */
 	struct fscache_retrieval	*op;		/* retrieval op covering this */
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 41df8a27d7eb..3978b324cbca 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -204,7 +204,7 @@ wait_for_old_object:
 		wait_queue_head_t *wq;
 
 		signed long timeout = 60 * HZ;
-		wait_queue_t wait;
+		wait_queue_entry_t wait;
 		bool requeue;
 
 		/* if the object we're waiting for is queued for processing,
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index afbdc418966d..8be33b33b981 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -21,7 +21,7 @@
  * - we use this to detect read completion of backing pages
  * - the caller holds the waitqueue lock
  */
-static int cachefiles_read_waiter(wait_queue_t *wait, unsigned mode,
+static int cachefiles_read_waiter(wait_queue_entry_t *wait, unsigned mode,
 				  int sync, void *_key)
 {
 	struct cachefiles_one_read *monitor =
diff --git a/fs/dax.c b/fs/dax.c
index 2a6889b3585f..323ea481d4a8 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -84,7 +84,7 @@ struct exceptional_entry_key {
 };
 
 struct wait_exceptional_entry_queue {
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	struct exceptional_entry_key key;
 };
 
@@ -108,7 +108,7 @@ static wait_queue_head_t *dax_entry_waitqueue(struct address_space *mapping,
 	return wait_table + hash;
 }
 
-static int wake_exceptional_entry_func(wait_queue_t *wait, unsigned int mode,
+static int wake_exceptional_entry_func(wait_queue_entry_t *wait, unsigned int mode,
 				       int sync, void *keyp)
 {
 	struct exceptional_entry_key *key = keyp;
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 68b9fffcb2c8..9736df2ce89d 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -191,7 +191,7 @@ static void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt)
  * This is used to atomically remove a wait queue entry from the eventfd wait
  * queue head, and read/reset the counter value.
  */
-int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_t *wait,
+int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait,
 				  __u64 *cnt)
 {
 	unsigned long flags;
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 5420767c9b68..5ac1cba5ef72 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -244,7 +244,7 @@ struct eppoll_entry {
 	 * Wait queue item that will be linked to the target file wait
 	 * queue head.
 	 */
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 
 	/* The wait queue head that linked the "wait" wait queue item */
 	wait_queue_head_t *whead;
@@ -347,13 +347,13 @@ static inline int ep_is_linked(struct list_head *p)
 	return !list_empty(p);
 }
 
-static inline struct eppoll_entry *ep_pwq_from_wait(wait_queue_t *p)
+static inline struct eppoll_entry *ep_pwq_from_wait(wait_queue_entry_t *p)
 {
 	return container_of(p, struct eppoll_entry, wait);
 }
 
 /* Get the "struct epitem" from a wait queue pointer */
-static inline struct epitem *ep_item_from_wait(wait_queue_t *p)
+static inline struct epitem *ep_item_from_wait(wait_queue_entry_t *p)
 {
 	return container_of(p, struct eppoll_entry, wait)->base;
 }
@@ -1078,7 +1078,7 @@ static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd)
  * mechanism. It is called by the stored file descriptors when they
  * have events to report.
  */
-static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *key)
+static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
 {
 	int pwake = 0;
 	unsigned long flags;
@@ -1699,7 +1699,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
 	int res = 0, eavail, timed_out = 0;
 	unsigned long flags;
 	u64 slack = 0;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	ktime_t expires, *to = NULL;
 
 	if (timeout > 0) {
diff --git a/fs/fs_pin.c b/fs/fs_pin.c
index 611b5408f6ec..7b447a245760 100644
--- a/fs/fs_pin.c
+++ b/fs/fs_pin.c
@@ -34,7 +34,7 @@ void pin_insert(struct fs_pin *pin, struct vfsmount *m)
 
 void pin_kill(struct fs_pin *p)
 {
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 
 	if (!p) {
 		rcu_read_unlock();
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index c08c46a3b8cd..be5a8f84e5bb 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -6372,7 +6372,7 @@ struct nfs4_lock_waiter {
 };
 
 static int
-nfs4_wake_lock_waiter(wait_queue_t *wait, unsigned int mode, int flags, void *key)
+nfs4_wake_lock_waiter(wait_queue_entry_t *wait, unsigned int mode, int flags, void *key)
 {
 	int ret;
 	struct cb_notify_lock_args *cbnl = key;
@@ -6415,7 +6415,7 @@ nfs4_retry_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
 					   .inode = state->inode,
 					   .owner = &owner,
 					   .notified = false };
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 
 	/* Don't bother with waitqueue if we don't expect a callback */
 	if (!test_bit(NFS_STATE_MAY_NOTIFY_LOCK, &state->flags))
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index febed1217b3f..775304e7f96f 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -2161,7 +2161,7 @@ void nilfs_flush_segment(struct super_block *sb, ino_t ino)
 }
 
 struct nilfs_segctor_wait_request {
-	wait_queue_t	wq;
+	wait_queue_entry_t	wq;
 	__u32		seq;
 	int		err;
 	atomic_t	done;
diff --git a/fs/orangefs/orangefs-bufmap.c b/fs/orangefs/orangefs-bufmap.c
index 83b506020718..9e37b7028ea4 100644
--- a/fs/orangefs/orangefs-bufmap.c
+++ b/fs/orangefs/orangefs-bufmap.c
@@ -47,7 +47,7 @@ static void run_down(struct slot_map *m)
 	if (m->c != -1) {
 		for (;;) {
 			if (likely(list_empty(&wait.task_list)))
-				__add_wait_queue_tail(&m->q, &wait);
+				__add_wait_queue_entry_tail(&m->q, &wait);
 			set_current_state(TASK_UNINTERRUPTIBLE);
 
 			if (m->c == -1)
@@ -85,7 +85,7 @@ static int wait_for_free(struct slot_map *m)
 	do {
 		long n = left, t;
 		if (likely(list_empty(&wait.task_list)))
-			__add_wait_queue_tail_exclusive(&m->q, &wait);
+			__add_wait_queue_entry_tail_exclusive(&m->q, &wait);
 		set_current_state(TASK_INTERRUPTIBLE);
 
 		if (m->c > 0)
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 39bb1e838d8d..a11d773e5ff3 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2956,7 +2956,7 @@ void reiserfs_wait_on_write_block(struct super_block *s)
 
 static void queue_log_writer(struct super_block *s)
 {
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	struct reiserfs_journal *journal = SB_JOURNAL(s);
 	set_bit(J_WRITERS_QUEUED, &journal->j_state);
 
diff --git a/fs/select.c b/fs/select.c
index d6c652a31e99..5b524a977d91 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -180,7 +180,7 @@ static struct poll_table_entry *poll_get_entry(struct poll_wqueues *p)
 	return table->entry++;
 }
 
-static int __pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
+static int __pollwake(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
 {
 	struct poll_wqueues *pwq = wait->private;
 	DECLARE_WAITQUEUE(dummy_wait, pwq->polling_task);
@@ -206,7 +206,7 @@ static int __pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
 	return default_wake_function(&dummy_wait, mode, sync, key);
 }
 
-static int pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
+static int pollwake(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
 {
 	struct poll_table_entry *entry;
 
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 7e3d71109f51..593b022ac11b 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -43,7 +43,7 @@ void signalfd_cleanup(struct sighand_struct *sighand)
 	if (likely(!waitqueue_active(wqh)))
 		return;
 
-	/* wait_queue_t->func(POLLFREE) should do remove_wait_queue() */
+	/* wait_queue_entry_t->func(POLLFREE) should do remove_wait_queue() */
 	wake_up_poll(wqh, POLLHUP | POLLFREE);
 }
 
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 1d622f276e3a..bda64fcd8a0c 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -81,7 +81,7 @@ struct userfaultfd_unmap_ctx {
 
 struct userfaultfd_wait_queue {
 	struct uffd_msg msg;
-	wait_queue_t wq;
+	wait_queue_entry_t wq;
 	struct userfaultfd_ctx *ctx;
 	bool waken;
 };
@@ -91,7 +91,7 @@ struct userfaultfd_wake_range {
 	unsigned long len;
 };
 
-static int userfaultfd_wake_function(wait_queue_t *wq, unsigned mode,
+static int userfaultfd_wake_function(wait_queue_entry_t *wq, unsigned mode,
 				     int wake_flags, void *key)
 {
 	struct userfaultfd_wake_range *range = key;
@@ -860,7 +860,7 @@ wakeup:
 static inline struct userfaultfd_wait_queue *find_userfault_in(
 		wait_queue_head_t *wqh)
 {
-	wait_queue_t *wq;
+	wait_queue_entry_t *wq;
 	struct userfaultfd_wait_queue *uwq;
 
 	VM_BUG_ON(!spin_is_locked(&wqh->lock));
@@ -1747,7 +1747,7 @@ static long userfaultfd_ioctl(struct file *file, unsigned cmd,
 static void userfaultfd_show_fdinfo(struct seq_file *m, struct file *f)
 {
 	struct userfaultfd_ctx *ctx = f->private_data;
-	wait_queue_t *wq;
+	wait_queue_entry_t *wq;
 	struct userfaultfd_wait_queue *uwq;
 	unsigned long pending = 0, total = 0;
 
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index fcd641032f8d..95ba83806c5d 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -33,7 +33,7 @@ struct blk_mq_hw_ctx {
 	struct blk_mq_ctx	**ctxs;
 	unsigned int		nr_ctx;
 
-	wait_queue_t		dispatch_wait;
+	wait_queue_entry_t		dispatch_wait;
 	atomic_t		wait_index;
 
 	struct blk_mq_tags	*tags;
diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h
index ff0b981f078e..9e4befd95bc7 100644
--- a/include/linux/eventfd.h
+++ b/include/linux/eventfd.h
@@ -37,7 +37,7 @@ struct eventfd_ctx *eventfd_ctx_fdget(int fd);
 struct eventfd_ctx *eventfd_ctx_fileget(struct file *file);
 __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n);
 ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait, __u64 *cnt);
-int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_t *wait,
+int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait,
 				  __u64 *cnt);
 
 #else /* CONFIG_EVENTFD */
@@ -73,7 +73,7 @@ static inline ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait,
 }
 
 static inline int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx,
-						wait_queue_t *wait, __u64 *cnt)
+						wait_queue_entry_t *wait, __u64 *cnt)
 {
 	return -ENOSYS;
 }
diff --git a/include/linux/kvm_irqfd.h b/include/linux/kvm_irqfd.h
index 0c1de05098c8..76c2fbc59f35 100644
--- a/include/linux/kvm_irqfd.h
+++ b/include/linux/kvm_irqfd.h
@@ -46,7 +46,7 @@ struct kvm_kernel_irqfd_resampler {
 struct kvm_kernel_irqfd {
 	/* Used for MSI fast-path */
 	struct kvm *kvm;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	/* Update side is protected by irqfds.lock */
 	struct kvm_kernel_irq_routing_entry irq_entry;
 	seqcount_t irq_entry_sc;
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 316a19f6b635..e7bbd9d4dc6c 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -524,7 +524,7 @@ void page_endio(struct page *page, bool is_write, int err);
 /*
  * Add an arbitrary waiter to a page's wait queue
  */
-extern void add_page_wait_queue(struct page *page, wait_queue_t *waiter);
+extern void add_page_wait_queue(struct page *page, wait_queue_entry_t *waiter);
 
 /*
  * Fault everything in given userspace address range in.
diff --git a/include/linux/poll.h b/include/linux/poll.h
index 75ffc5729e4c..2889f09a1c60 100644
--- a/include/linux/poll.h
+++ b/include/linux/poll.h
@@ -75,7 +75,7 @@ static inline void init_poll_funcptr(poll_table *pt, poll_queue_proc qproc)
 struct poll_table_entry {
 	struct file *filp;
 	unsigned long key;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	wait_queue_head_t *wait_address;
 };
 
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index edf9b2cad277..f57076b958b7 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -183,7 +183,7 @@ struct virqfd {
 	void			(*thread)(void *, void *);
 	void			*data;
 	struct work_struct	inject;
-	wait_queue_t		wait;
+	wait_queue_entry_t		wait;
 	poll_table		pt;
 	struct work_struct	shutdown;
 	struct virqfd		**pvirqfd;
diff --git a/include/linux/wait.h b/include/linux/wait.h
index db076ca7f11d..5889f0c86ff7 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -10,15 +10,18 @@
 #include <asm/current.h>
 #include <uapi/linux/wait.h>
 
-typedef struct __wait_queue wait_queue_t;
-typedef int (*wait_queue_func_t)(wait_queue_t *wait, unsigned mode, int flags, void *key);
-int default_wake_function(wait_queue_t *wait, unsigned mode, int flags, void *key);
+typedef struct wait_queue_entry wait_queue_entry_t;
+typedef int (*wait_queue_func_t)(wait_queue_entry_t *wait, unsigned mode, int flags, void *key);
+int default_wake_function(wait_queue_entry_t *wait, unsigned mode, int flags, void *key);
 
-/* __wait_queue::flags */
+/* wait_queue_entry::flags */
 #define WQ_FLAG_EXCLUSIVE	0x01
 #define WQ_FLAG_WOKEN		0x02
 
-struct __wait_queue {
+/*
+ * A single wait-queue entry structure:
+ */
+struct wait_queue_entry {
 	unsigned int		flags;
 	void			*private;
 	wait_queue_func_t	func;
@@ -34,7 +37,7 @@ struct wait_bit_key {
 
 struct wait_bit_queue {
 	struct wait_bit_key	key;
-	wait_queue_t		wait;
+	wait_queue_entry_t	wait;
 };
 
 struct __wait_queue_head {
@@ -55,7 +58,7 @@ struct task_struct;
 	.task_list	= { NULL, NULL } }
 
 #define DECLARE_WAITQUEUE(name, tsk)					\
-	wait_queue_t name = __WAITQUEUE_INITIALIZER(name, tsk)
+	wait_queue_entry_t name = __WAITQUEUE_INITIALIZER(name, tsk)
 
 #define __WAIT_QUEUE_HEAD_INITIALIZER(name) {				\
 	.lock		= __SPIN_LOCK_UNLOCKED(name.lock),		\
@@ -88,7 +91,7 @@ extern void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct
 # define DECLARE_WAIT_QUEUE_HEAD_ONSTACK(name) DECLARE_WAIT_QUEUE_HEAD(name)
 #endif
 
-static inline void init_waitqueue_entry(wait_queue_t *q, struct task_struct *p)
+static inline void init_waitqueue_entry(wait_queue_entry_t *q, struct task_struct *p)
 {
 	q->flags	= 0;
 	q->private	= p;
@@ -96,7 +99,7 @@ static inline void init_waitqueue_entry(wait_queue_t *q, struct task_struct *p)
 }
 
 static inline void
-init_waitqueue_func_entry(wait_queue_t *q, wait_queue_func_t func)
+init_waitqueue_func_entry(wait_queue_entry_t *q, wait_queue_func_t func)
 {
 	q->flags	= 0;
 	q->private	= NULL;
@@ -159,11 +162,11 @@ static inline bool wq_has_sleeper(wait_queue_head_t *wq)
 	return waitqueue_active(wq);
 }
 
-extern void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait);
-extern void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait);
-extern void remove_wait_queue(wait_queue_head_t *q, wait_queue_t *wait);
+extern void add_wait_queue(wait_queue_head_t *q, wait_queue_entry_t *wait);
+extern void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_entry_t *wait);
+extern void remove_wait_queue(wait_queue_head_t *q, wait_queue_entry_t *wait);
 
-static inline void __add_wait_queue(wait_queue_head_t *head, wait_queue_t *new)
+static inline void __add_wait_queue(wait_queue_head_t *head, wait_queue_entry_t *new)
 {
 	list_add(&new->task_list, &head->task_list);
 }
@@ -172,27 +175,27 @@ static inline void __add_wait_queue(wait_queue_head_t *head, wait_queue_t *new)
  * Used for wake-one threads:
  */
 static inline void
-__add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait)
+__add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_entry_t *wait)
 {
 	wait->flags |= WQ_FLAG_EXCLUSIVE;
 	__add_wait_queue(q, wait);
 }
 
-static inline void __add_wait_queue_tail(wait_queue_head_t *head,
-					 wait_queue_t *new)
+static inline void __add_wait_queue_entry_tail(wait_queue_head_t *head,
+					 wait_queue_entry_t *new)
 {
 	list_add_tail(&new->task_list, &head->task_list);
 }
 
 static inline void
-__add_wait_queue_tail_exclusive(wait_queue_head_t *q, wait_queue_t *wait)
+__add_wait_queue_entry_tail_exclusive(wait_queue_head_t *q, wait_queue_entry_t *wait)
 {
 	wait->flags |= WQ_FLAG_EXCLUSIVE;
-	__add_wait_queue_tail(q, wait);
+	__add_wait_queue_entry_tail(q, wait);
 }
 
 static inline void
-__remove_wait_queue(wait_queue_head_t *head, wait_queue_t *old)
+__remove_wait_queue(wait_queue_head_t *head, wait_queue_entry_t *old)
 {
 	list_del(&old->task_list);
 }
@@ -249,7 +252,7 @@ wait_queue_head_t *bit_waitqueue(void *, int);
 	(!__builtin_constant_p(state) ||				\
 		state == TASK_INTERRUPTIBLE || state == TASK_KILLABLE)	\
 
-extern void init_wait_entry(wait_queue_t *__wait, int flags);
+extern void init_wait_entry(wait_queue_entry_t *__wait, int flags);
 
 /*
  * The below macro ___wait_event() has an explicit shadow of the __ret
@@ -266,7 +269,7 @@ extern void init_wait_entry(wait_queue_t *__wait, int flags);
 #define ___wait_event(wq, condition, state, exclusive, ret, cmd)	\
 ({									\
 	__label__ __out;						\
-	wait_queue_t __wait;						\
+	wait_queue_entry_t __wait;						\
 	long __ret = ret;	/* explicit shadow */			\
 									\
 	init_wait_entry(&__wait, exclusive ? WQ_FLAG_EXCLUSIVE : 0);	\
@@ -620,8 +623,8 @@ do {									\
 	__ret;								\
 })
 
-extern int do_wait_intr(wait_queue_head_t *, wait_queue_t *);
-extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_t *);
+extern int do_wait_intr(wait_queue_head_t *, wait_queue_entry_t *);
+extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_entry_t *);
 
 #define __wait_event_interruptible_locked(wq, condition, exclusive, fn) \
 ({									\
@@ -967,17 +970,17 @@ do {									\
 /*
  * Waitqueues which are removed from the waitqueue_head at wakeup time
  */
-void prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state);
-void prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state);
-long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state);
-void finish_wait(wait_queue_head_t *q, wait_queue_t *wait);
-long wait_woken(wait_queue_t *wait, unsigned mode, long timeout);
-int woken_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
-int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
-int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
+void prepare_to_wait(wait_queue_head_t *q, wait_queue_entry_t *wait, int state);
+void prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_entry_t *wait, int state);
+long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_entry_t *wait, int state);
+void finish_wait(wait_queue_head_t *q, wait_queue_entry_t *wait);
+long wait_woken(wait_queue_entry_t *wait, unsigned mode, long timeout);
+int woken_wake_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *key);
+int autoremove_wake_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *key);
+int wake_bit_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *key);
 
 #define DEFINE_WAIT_FUNC(name, function)				\
-	wait_queue_t name = {						\
+	wait_queue_entry_t name = {					\
 		.private	= current,				\
 		.func		= function,				\
 		.task_list	= LIST_HEAD_INIT((name).task_list),	\
diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index fd60eccb59a6..75e612a45824 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -62,7 +62,7 @@ struct unix_sock {
 #define UNIX_GC_CANDIDATE	0
 #define UNIX_GC_MAYBE_CYCLE	1
 	struct socket_wq	peer_wq;
-	wait_queue_t		peer_wake;
+	wait_queue_entry_t		peer_wake;
 };
 
 static inline struct unix_sock *unix_sk(const struct sock *sk)
diff --git a/include/uapi/linux/auto_fs.h b/include/uapi/linux/auto_fs.h
index aa63451ef20a..1953f8d6063b 100644
--- a/include/uapi/linux/auto_fs.h
+++ b/include/uapi/linux/auto_fs.h
@@ -26,7 +26,7 @@
 #define AUTOFS_MIN_PROTO_VERSION	AUTOFS_PROTO_VERSION
 
 /*
- * The wait_queue_token (autofs_wqt_t) is part of a structure which is passed
+ * The wait_queue_entry_token (autofs_wqt_t) is part of a structure which is passed
  * back to the kernel via ioctl from userspace. On architectures where 32- and
  * 64-bit userspace binaries can be executed it's important that the size of
  * autofs_wqt_t stays constant between 32- and 64-bit Linux kernels so that we
@@ -49,7 +49,7 @@ struct autofs_packet_hdr {
 
 struct autofs_packet_missing {
 	struct autofs_packet_hdr hdr;
-	autofs_wqt_t wait_queue_token;
+	autofs_wqt_t wait_queue_entry_token;
 	int len;
 	char name[NAME_MAX+1];
 };	
diff --git a/include/uapi/linux/auto_fs4.h b/include/uapi/linux/auto_fs4.h
index 7c6da423d54e..65b72d0222e7 100644
--- a/include/uapi/linux/auto_fs4.h
+++ b/include/uapi/linux/auto_fs4.h
@@ -108,7 +108,7 @@ enum autofs_notify {
 /* v4 multi expire (via pipe) */
 struct autofs_packet_expire_multi {
 	struct autofs_packet_hdr hdr;
-	autofs_wqt_t wait_queue_token;
+	autofs_wqt_t wait_queue_entry_token;
 	int len;
 	char name[NAME_MAX+1];
 };
@@ -123,7 +123,7 @@ union autofs_packet_union {
 /* autofs v5 common packet struct */
 struct autofs_v5_packet {
 	struct autofs_packet_hdr hdr;
-	autofs_wqt_t wait_queue_token;
+	autofs_wqt_t wait_queue_entry_token;
 	__u32 dev;
 	__u64 ino;
 	__u32 uid;
diff --git a/kernel/exit.c b/kernel/exit.c
index 516acdb0e0ec..7d694437ab44 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1004,7 +1004,7 @@ struct wait_opts {
 	int __user		*wo_stat;
 	struct rusage __user	*wo_rusage;
 
-	wait_queue_t		child_wait;
+	wait_queue_entry_t		child_wait;
 	int			notask_error;
 };
 
@@ -1541,7 +1541,7 @@ static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk)
 	return 0;
 }
 
-static int child_wait_callback(wait_queue_t *wait, unsigned mode,
+static int child_wait_callback(wait_queue_entry_t *wait, unsigned mode,
 				int sync, void *key)
 {
 	struct wait_opts *wo = container_of(wait, struct wait_opts,
diff --git a/kernel/futex.c b/kernel/futex.c
index 357348a6cf6b..d6cf71d08f21 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -225,7 +225,7 @@ struct futex_pi_state {
  * @requeue_pi_key:	the requeue_pi target futex key
  * @bitset:		bitset for the optional bitmasked wakeup
  *
- * We use this hashed waitqueue, instead of a normal wait_queue_t, so
+ * We use this hashed waitqueue, instead of a normal wait_queue_entry_t, so
  * we can wake only the relevant ones (hashed queues may be shared).
  *
  * A futex_q has a woken state, just like tasks have TASK_RUNNING.
diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c
index 53f9558fa925..13fc5ae9bf2f 100644
--- a/kernel/sched/completion.c
+++ b/kernel/sched/completion.c
@@ -66,7 +66,7 @@ do_wait_for_common(struct completion *x,
 	if (!x->done) {
 		DECLARE_WAITQUEUE(wait, current);
 
-		__add_wait_queue_tail_exclusive(&x->wait, &wait);
+		__add_wait_queue_entry_tail_exclusive(&x->wait, &wait);
 		do {
 			if (signal_pending_state(state, current)) {
 				timeout = -ERESTARTSYS;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 326d4f88e2b1..5b36644536ab 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3687,7 +3687,7 @@ asmlinkage __visible void __sched preempt_schedule_irq(void)
 	exception_exit(prev_state);
 }
 
-int default_wake_function(wait_queue_t *curr, unsigned mode, int wake_flags,
+int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flags,
 			  void *key)
 {
 	return try_to_wake_up(curr->private, mode, wake_flags);
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index b8c84c6dee64..301ea02dede0 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -21,7 +21,7 @@ void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct lock_c
 
 EXPORT_SYMBOL(__init_waitqueue_head);
 
-void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
+void add_wait_queue(wait_queue_head_t *q, wait_queue_entry_t *wait)
 {
 	unsigned long flags;
 
@@ -32,18 +32,18 @@ void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
 }
 EXPORT_SYMBOL(add_wait_queue);
 
-void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait)
+void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_entry_t *wait)
 {
 	unsigned long flags;
 
 	wait->flags |= WQ_FLAG_EXCLUSIVE;
 	spin_lock_irqsave(&q->lock, flags);
-	__add_wait_queue_tail(q, wait);
+	__add_wait_queue_entry_tail(q, wait);
 	spin_unlock_irqrestore(&q->lock, flags);
 }
 EXPORT_SYMBOL(add_wait_queue_exclusive);
 
-void remove_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
+void remove_wait_queue(wait_queue_head_t *q, wait_queue_entry_t *wait)
 {
 	unsigned long flags;
 
@@ -66,7 +66,7 @@ EXPORT_SYMBOL(remove_wait_queue);
 static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
 			int nr_exclusive, int wake_flags, void *key)
 {
-	wait_queue_t *curr, *next;
+	wait_queue_entry_t *curr, *next;
 
 	list_for_each_entry_safe(curr, next, &q->task_list, task_list) {
 		unsigned flags = curr->flags;
@@ -170,7 +170,7 @@ EXPORT_SYMBOL_GPL(__wake_up_sync);	/* For internal use only */
  * loads to move into the critical region).
  */
 void
-prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state)
+prepare_to_wait(wait_queue_head_t *q, wait_queue_entry_t *wait, int state)
 {
 	unsigned long flags;
 
@@ -184,20 +184,20 @@ prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state)
 EXPORT_SYMBOL(prepare_to_wait);
 
 void
-prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state)
+prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_entry_t *wait, int state)
 {
 	unsigned long flags;
 
 	wait->flags |= WQ_FLAG_EXCLUSIVE;
 	spin_lock_irqsave(&q->lock, flags);
 	if (list_empty(&wait->task_list))
-		__add_wait_queue_tail(q, wait);
+		__add_wait_queue_entry_tail(q, wait);
 	set_current_state(state);
 	spin_unlock_irqrestore(&q->lock, flags);
 }
 EXPORT_SYMBOL(prepare_to_wait_exclusive);
 
-void init_wait_entry(wait_queue_t *wait, int flags)
+void init_wait_entry(wait_queue_entry_t *wait, int flags)
 {
 	wait->flags = flags;
 	wait->private = current;
@@ -206,7 +206,7 @@ void init_wait_entry(wait_queue_t *wait, int flags)
 }
 EXPORT_SYMBOL(init_wait_entry);
 
-long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state)
+long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_entry_t *wait, int state)
 {
 	unsigned long flags;
 	long ret = 0;
@@ -230,7 +230,7 @@ long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state)
 	} else {
 		if (list_empty(&wait->task_list)) {
 			if (wait->flags & WQ_FLAG_EXCLUSIVE)
-				__add_wait_queue_tail(q, wait);
+				__add_wait_queue_entry_tail(q, wait);
 			else
 				__add_wait_queue(q, wait);
 		}
@@ -249,10 +249,10 @@ EXPORT_SYMBOL(prepare_to_wait_event);
  * condition in the caller before they add the wait
  * entry to the wake queue.
  */
-int do_wait_intr(wait_queue_head_t *wq, wait_queue_t *wait)
+int do_wait_intr(wait_queue_head_t *wq, wait_queue_entry_t *wait)
 {
 	if (likely(list_empty(&wait->task_list)))
-		__add_wait_queue_tail(wq, wait);
+		__add_wait_queue_entry_tail(wq, wait);
 
 	set_current_state(TASK_INTERRUPTIBLE);
 	if (signal_pending(current))
@@ -265,10 +265,10 @@ int do_wait_intr(wait_queue_head_t *wq, wait_queue_t *wait)
 }
 EXPORT_SYMBOL(do_wait_intr);
 
-int do_wait_intr_irq(wait_queue_head_t *wq, wait_queue_t *wait)
+int do_wait_intr_irq(wait_queue_head_t *wq, wait_queue_entry_t *wait)
 {
 	if (likely(list_empty(&wait->task_list)))
-		__add_wait_queue_tail(wq, wait);
+		__add_wait_queue_entry_tail(wq, wait);
 
 	set_current_state(TASK_INTERRUPTIBLE);
 	if (signal_pending(current))
@@ -290,7 +290,7 @@ EXPORT_SYMBOL(do_wait_intr_irq);
  * the wait descriptor from the given waitqueue if still
  * queued.
  */
-void finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
+void finish_wait(wait_queue_head_t *q, wait_queue_entry_t *wait)
 {
 	unsigned long flags;
 
@@ -316,7 +316,7 @@ void finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
 }
 EXPORT_SYMBOL(finish_wait);
 
-int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
+int autoremove_wake_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
 {
 	int ret = default_wake_function(wait, mode, sync, key);
 
@@ -351,7 +351,7 @@ static inline bool is_kthread_should_stop(void)
  * remove_wait_queue(&wq, &wait);
  *
  */
-long wait_woken(wait_queue_t *wait, unsigned mode, long timeout)
+long wait_woken(wait_queue_entry_t *wait, unsigned mode, long timeout)
 {
 	set_current_state(mode); /* A */
 	/*
@@ -375,7 +375,7 @@ long wait_woken(wait_queue_t *wait, unsigned mode, long timeout)
 }
 EXPORT_SYMBOL(wait_woken);
 
-int woken_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
+int woken_wake_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
 {
 	/*
 	 * Although this function is called under waitqueue lock, LOCK
@@ -391,7 +391,7 @@ int woken_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
 }
 EXPORT_SYMBOL(woken_wake_function);
 
-int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *arg)
+int wake_bit_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *arg)
 {
 	struct wait_bit_key *key = arg;
 	struct wait_bit_queue *wait_bit
@@ -534,7 +534,7 @@ static inline wait_queue_head_t *atomic_t_waitqueue(atomic_t *p)
 	return bit_waitqueue(p, 0);
 }
 
-static int wake_atomic_t_function(wait_queue_t *wait, unsigned mode, int sync,
+static int wake_atomic_t_function(wait_queue_entry_t *wait, unsigned mode, int sync,
 				  void *arg)
 {
 	struct wait_bit_key *key = arg;
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index c74bf39ef764..a86688fabc55 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2864,11 +2864,11 @@ bool flush_work(struct work_struct *work)
 EXPORT_SYMBOL_GPL(flush_work);
 
 struct cwt_wait {
-	wait_queue_t		wait;
+	wait_queue_entry_t		wait;
 	struct work_struct	*work;
 };
 
-static int cwt_wakefn(wait_queue_t *wait, unsigned mode, int sync, void *key)
+static int cwt_wakefn(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
 {
 	struct cwt_wait *cwait = container_of(wait, struct cwt_wait, wait);
 
diff --git a/mm/filemap.c b/mm/filemap.c
index 6f1be573a5e6..80c19ee81e95 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -768,10 +768,10 @@ struct wait_page_key {
 struct wait_page_queue {
 	struct page *page;
 	int bit_nr;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 };
 
-static int wake_page_function(wait_queue_t *wait, unsigned mode, int sync, void *arg)
+static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *arg)
 {
 	struct wait_page_key *key = arg;
 	struct wait_page_queue *wait_page
@@ -834,7 +834,7 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q,
 		struct page *page, int bit_nr, int state, bool lock)
 {
 	struct wait_page_queue wait_page;
-	wait_queue_t *wait = &wait_page.wait;
+	wait_queue_entry_t *wait = &wait_page.wait;
 	int ret = 0;
 
 	init_wait(wait);
@@ -847,7 +847,7 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q,
 
 		if (likely(list_empty(&wait->task_list))) {
 			if (lock)
-				__add_wait_queue_tail_exclusive(q, wait);
+				__add_wait_queue_entry_tail_exclusive(q, wait);
 			else
 				__add_wait_queue(q, wait);
 			SetPageWaiters(page);
@@ -907,7 +907,7 @@ int wait_on_page_bit_killable(struct page *page, int bit_nr)
  *
  * Add an arbitrary @waiter to the wait queue for the nominated @page.
  */
-void add_page_wait_queue(struct page *page, wait_queue_t *waiter)
+void add_page_wait_queue(struct page *page, wait_queue_entry_t *waiter)
 {
 	wait_queue_head_t *q = page_waitqueue(page);
 	unsigned long flags;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 94172089f52f..9a90b096dc6b 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -170,7 +170,7 @@ struct mem_cgroup_event {
 	 */
 	poll_table pt;
 	wait_queue_head_t *wqh;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	struct work_struct remove;
 };
 
@@ -1479,10 +1479,10 @@ static DECLARE_WAIT_QUEUE_HEAD(memcg_oom_waitq);
 
 struct oom_wait_info {
 	struct mem_cgroup *memcg;
-	wait_queue_t	wait;
+	wait_queue_entry_t	wait;
 };
 
-static int memcg_oom_wake_function(wait_queue_t *wait,
+static int memcg_oom_wake_function(wait_queue_entry_t *wait,
 	unsigned mode, int sync, void *arg)
 {
 	struct mem_cgroup *wake_memcg = (struct mem_cgroup *)arg;
@@ -3725,7 +3725,7 @@ static void memcg_event_remove(struct work_struct *work)
  *
  * Called with wqh->lock held and interrupts disabled.
  */
-static int memcg_event_wake(wait_queue_t *wait, unsigned mode,
+static int memcg_event_wake(wait_queue_entry_t *wait, unsigned mode,
 			    int sync, void *key)
 {
 	struct mem_cgroup_event *event =
diff --git a/mm/mempool.c b/mm/mempool.c
index 47a659dedd44..1c0294858527 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -312,7 +312,7 @@ void *mempool_alloc(mempool_t *pool, gfp_t gfp_mask)
 {
 	void *element;
 	unsigned long flags;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	gfp_t gfp_temp;
 
 	VM_WARN_ON_ONCE(gfp_mask & __GFP_ZERO);
diff --git a/mm/shmem.c b/mm/shmem.c
index e67d6ba4e98e..a6c7dece4660 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1902,7 +1902,7 @@ unlock:
  * entry unconditionally - even if something else had already woken the
  * target.
  */
-static int synchronous_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
+static int synchronous_wake_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
 {
 	int ret = default_wake_function(wait, mode, sync, key);
 	list_del_init(&wait->task_list);
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 7bc2208b6cc4..dca3cdd1a014 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -95,7 +95,7 @@ enum {
 
 struct p9_poll_wait {
 	struct p9_conn *conn;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	wait_queue_head_t *wait_addr;
 };
 
@@ -522,7 +522,7 @@ error:
 	clear_bit(Wworksched, &m->wsched);
 }
 
-static int p9_pollwake(wait_queue_t *wait, unsigned int mode, int sync, void *key)
+static int p9_pollwake(wait_queue_entry_t *wait, unsigned int mode, int sync, void *key)
 {
 	struct p9_poll_wait *pwait =
 		container_of(wait, struct p9_poll_wait, wait);
diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index fbf251fef70f..5c4808b3da2d 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -484,7 +484,7 @@ static int bnep_session(void *arg)
 	struct net_device *dev = s->dev;
 	struct sock *sk = s->sock->sk;
 	struct sk_buff *skb;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 
 	BT_DBG("");
 
diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c
index 9e59b6654126..14f7c8135c31 100644
--- a/net/bluetooth/cmtp/core.c
+++ b/net/bluetooth/cmtp/core.c
@@ -280,7 +280,7 @@ static int cmtp_session(void *arg)
 	struct cmtp_session *session = arg;
 	struct sock *sk = session->sock->sk;
 	struct sk_buff *skb;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 
 	BT_DBG("session %p", session);
 
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 0bec4588c3c8..fc31161e98f2 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -1244,7 +1244,7 @@ static void hidp_session_run(struct hidp_session *session)
 static int hidp_session_thread(void *arg)
 {
 	struct hidp_session *session = arg;
-	wait_queue_t ctrl_wait, intr_wait;
+	wait_queue_entry_t ctrl_wait, intr_wait;
 
 	BT_DBG("session %p", session);
 
diff --git a/net/core/datagram.c b/net/core/datagram.c
index db1866f2ffcf..34678828e2bb 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -68,7 +68,7 @@ static inline int connection_based(struct sock *sk)
 	return sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM;
 }
 
-static int receiver_wake_function(wait_queue_t *wait, unsigned int mode, int sync,
+static int receiver_wake_function(wait_queue_entry_t *wait, unsigned int mode, int sync,
 				  void *key)
 {
 	unsigned long bits = (unsigned long)key;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 1a0c961f4ffe..c77ced0109b7 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -343,7 +343,7 @@ found:
  * are still connected to it and there's no way to inform "a polling
  * implementation" that it should let go of a certain wait queue
  *
- * In order to propagate a wake up, a wait_queue_t of the client
+ * In order to propagate a wake up, a wait_queue_entry_t of the client
  * socket is enqueued on the peer_wait queue of the server socket
  * whose wake function does a wake_up on the ordinary client socket
  * wait queue. This connection is established whenever a write (or
@@ -352,7 +352,7 @@ found:
  * was relayed.
  */
 
-static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags,
+static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
 				      void *key)
 {
 	struct unix_sock *u;
diff --git a/sound/core/control.c b/sound/core/control.c
index c109b82eef4b..6362da17ac3f 100644
--- a/sound/core/control.c
+++ b/sound/core/control.c
@@ -1577,7 +1577,7 @@ static ssize_t snd_ctl_read(struct file *file, char __user *buffer,
 		struct snd_ctl_event ev;
 		struct snd_kctl_event *kev;
 		while (list_empty(&ctl->events)) {
-			wait_queue_t wait;
+			wait_queue_entry_t wait;
 			if ((file->f_flags & O_NONBLOCK) != 0 || result > 0) {
 				err = -EAGAIN;
 				goto __end_lock;
diff --git a/sound/core/hwdep.c b/sound/core/hwdep.c
index 9602a7e38d8a..a73baa1242be 100644
--- a/sound/core/hwdep.c
+++ b/sound/core/hwdep.c
@@ -85,7 +85,7 @@ static int snd_hwdep_open(struct inode *inode, struct file * file)
 	int major = imajor(inode);
 	struct snd_hwdep *hw;
 	int err;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 
 	if (major == snd_major) {
 		hw = snd_lookup_minor_data(iminor(inode),
diff --git a/sound/core/init.c b/sound/core/init.c
index 6bda8436d765..d61d2b3cd521 100644
--- a/sound/core/init.c
+++ b/sound/core/init.c
@@ -989,7 +989,7 @@ EXPORT_SYMBOL(snd_card_file_remove);
  */
 int snd_power_wait(struct snd_card *card, unsigned int power_state)
 {
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	int result = 0;
 
 	/* fastpath */
diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c
index 36baf962f9b0..cd8b7bef8d06 100644
--- a/sound/core/oss/pcm_oss.c
+++ b/sound/core/oss/pcm_oss.c
@@ -1554,7 +1554,7 @@ static int snd_pcm_oss_sync1(struct snd_pcm_substream *substream, size_t size)
 	ssize_t result = 0;
 	snd_pcm_state_t state;
 	long res;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 
 	runtime = substream->runtime;
 	init_waitqueue_entry(&wait, current);
@@ -2387,7 +2387,7 @@ static int snd_pcm_oss_open(struct inode *inode, struct file *file)
 	struct snd_pcm_oss_file *pcm_oss_file;
 	struct snd_pcm_oss_setup setup[2];
 	int nonblock;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 
 	err = nonseekable_open(inode, file);
 	if (err < 0)
diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c
index 5088d4b8db22..dd5254077ef7 100644
--- a/sound/core/pcm_lib.c
+++ b/sound/core/pcm_lib.c
@@ -1904,7 +1904,7 @@ static int wait_for_avail(struct snd_pcm_substream *substream,
 {
 	struct snd_pcm_runtime *runtime = substream->runtime;
 	int is_playback = substream->stream == SNDRV_PCM_STREAM_PLAYBACK;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	int err = 0;
 	snd_pcm_uframes_t avail = 0;
 	long wait_time, tout;
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index 13dec5ec93f2..faa2e2be6f2e 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -1652,7 +1652,7 @@ static int snd_pcm_drain(struct snd_pcm_substream *substream,
 	struct snd_card *card;
 	struct snd_pcm_runtime *runtime;
 	struct snd_pcm_substream *s;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	int result = 0;
 	int nonblock = 0;
 
@@ -2353,7 +2353,7 @@ static int snd_pcm_capture_open(struct inode *inode, struct file *file)
 static int snd_pcm_open(struct file *file, struct snd_pcm *pcm, int stream)
 {
 	int err;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 
 	if (pcm == NULL) {
 		err = -ENODEV;
diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c
index ab890336175f..32588ad05653 100644
--- a/sound/core/rawmidi.c
+++ b/sound/core/rawmidi.c
@@ -368,7 +368,7 @@ static int snd_rawmidi_open(struct inode *inode, struct file *file)
 	int err;
 	struct snd_rawmidi *rmidi;
 	struct snd_rawmidi_file *rawmidi_file = NULL;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 
 	if ((file->f_flags & O_APPEND) && !(file->f_flags & O_NONBLOCK)) 
 		return -EINVAL;		/* invalid combination */
@@ -1002,7 +1002,7 @@ static ssize_t snd_rawmidi_read(struct file *file, char __user *buf, size_t coun
 	while (count > 0) {
 		spin_lock_irq(&runtime->lock);
 		while (!snd_rawmidi_ready(substream)) {
-			wait_queue_t wait;
+			wait_queue_entry_t wait;
 			if ((file->f_flags & O_NONBLOCK) != 0 || result > 0) {
 				spin_unlock_irq(&runtime->lock);
 				return result > 0 ? result : -EAGAIN;
@@ -1306,7 +1306,7 @@ static ssize_t snd_rawmidi_write(struct file *file, const char __user *buf,
 	while (count > 0) {
 		spin_lock_irq(&runtime->lock);
 		while (!snd_rawmidi_ready_append(substream, count)) {
-			wait_queue_t wait;
+			wait_queue_entry_t wait;
 			if (file->f_flags & O_NONBLOCK) {
 				spin_unlock_irq(&runtime->lock);
 				return result > 0 ? result : -EAGAIN;
@@ -1338,7 +1338,7 @@ static ssize_t snd_rawmidi_write(struct file *file, const char __user *buf,
 	if (file->f_flags & O_DSYNC) {
 		spin_lock_irq(&runtime->lock);
 		while (runtime->avail != runtime->buffer_size) {
-			wait_queue_t wait;
+			wait_queue_entry_t wait;
 			unsigned int last_avail = runtime->avail;
 			init_waitqueue_entry(&wait, current);
 			add_wait_queue(&runtime->sleep, &wait);
diff --git a/sound/core/seq/seq_fifo.c b/sound/core/seq/seq_fifo.c
index 01c4cfe30c9f..a8c2822e0198 100644
--- a/sound/core/seq/seq_fifo.c
+++ b/sound/core/seq/seq_fifo.c
@@ -179,7 +179,7 @@ int snd_seq_fifo_cell_out(struct snd_seq_fifo *f,
 {
 	struct snd_seq_event_cell *cell;
 	unsigned long flags;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 
 	if (snd_BUG_ON(!f))
 		return -EINVAL;
diff --git a/sound/core/seq/seq_memory.c b/sound/core/seq/seq_memory.c
index d4c61ec9be13..d6e9aacdc36b 100644
--- a/sound/core/seq/seq_memory.c
+++ b/sound/core/seq/seq_memory.c
@@ -227,7 +227,7 @@ static int snd_seq_cell_alloc(struct snd_seq_pool *pool,
 	struct snd_seq_event_cell *cell;
 	unsigned long flags;
 	int err = -EAGAIN;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 
 	if (pool == NULL)
 		return -EINVAL;
diff --git a/sound/core/timer.c b/sound/core/timer.c
index cd67d1c12cf1..884c3066b028 100644
--- a/sound/core/timer.c
+++ b/sound/core/timer.c
@@ -1964,7 +1964,7 @@ static ssize_t snd_timer_user_read(struct file *file, char __user *buffer,
 	spin_lock_irq(&tu->qlock);
 	while ((long)count - result >= unit) {
 		while (!tu->qused) {
-			wait_queue_t wait;
+			wait_queue_entry_t wait;
 
 			if ((file->f_flags & O_NONBLOCK) != 0 || result > 0) {
 				err = -EAGAIN;
diff --git a/sound/isa/wavefront/wavefront_synth.c b/sound/isa/wavefront/wavefront_synth.c
index 4dae9ff9ef5a..0b1e4b34b299 100644
--- a/sound/isa/wavefront/wavefront_synth.c
+++ b/sound/isa/wavefront/wavefront_synth.c
@@ -1782,7 +1782,7 @@ wavefront_should_cause_interrupt (snd_wavefront_t *dev,
 				  int val, int port, unsigned long timeout)
 
 {
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 
 	init_waitqueue_entry(&wait, current);
 	spin_lock_irq(&dev->irq_lock);
diff --git a/sound/pci/mixart/mixart_core.c b/sound/pci/mixart/mixart_core.c
index dccf3db48fe0..8bf2ce32d4a8 100644
--- a/sound/pci/mixart/mixart_core.c
+++ b/sound/pci/mixart/mixart_core.c
@@ -239,7 +239,7 @@ int snd_mixart_send_msg(struct mixart_mgr *mgr, struct mixart_msg *request, int
 	struct mixart_msg resp;
 	u32 msg_frame = 0; /* set to 0, so it's no notification to wait for, but the answer */
 	int err;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	long timeout;
 
 	init_waitqueue_entry(&wait, current);
@@ -284,7 +284,7 @@ int snd_mixart_send_msg_wait_notif(struct mixart_mgr *mgr,
 				   struct mixart_msg *request, u32 notif_event)
 {
 	int err;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	long timeout;
 
 	if (snd_BUG_ON(!notif_event))
diff --git a/sound/pci/ymfpci/ymfpci_main.c b/sound/pci/ymfpci/ymfpci_main.c
index fe4ba463b57c..1114166c685c 100644
--- a/sound/pci/ymfpci/ymfpci_main.c
+++ b/sound/pci/ymfpci/ymfpci_main.c
@@ -781,7 +781,7 @@ static snd_pcm_uframes_t snd_ymfpci_capture_pointer(struct snd_pcm_substream *su
 
 static void snd_ymfpci_irq_wait(struct snd_ymfpci *chip)
 {
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	int loops = 4;
 
 	while (loops-- > 0) {
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index a8d540398bbd..9120edf3c94b 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -184,7 +184,7 @@ int __attribute__((weak)) kvm_arch_set_irq_inatomic(
  * Called with wqh->lock held and interrupts disabled
  */
 static int
-irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
+irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
 {
 	struct kvm_kernel_irqfd *irqfd =
 		container_of(wait, struct kvm_kernel_irqfd, wait);
-- 
cgit v1.2.3


From 50816c48997af857d4bab3dca1aba90339705e96 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Mar 2017 10:33:16 +0100
Subject: sched/wait: Standardize internal naming of wait-queue entries

So the various wait-queue entry variables in include/linux/wait.h
and kernel/sched/wait.c are named in a colorfully inconsistent
way:

	wait_queue_entry_t *wait
	wait_queue_entry_t *__wait	(even in plain C code!)
	wait_queue_entry_t *q		(!)
	wait_queue_entry_t *new		(making anyone who knows C++ cringe)
	wait_queue_entry_t *old

I think part of the reason for the inconsistency is the constant
apparent confusion about what a wait queue 'head' versus 'entry' is.

( Some of the documentation talks about a 'wait descriptor', which is
  the wait-queue entry itself - further adding to the confusion. )

The most common name is 'wait', but that in itself is somewhat
ambiguous as well, as it does not really make it clear whether
it's a wait-queue entry or head.

To improve all this name the wait-queue entry structure parameters
and variables consistently and push through this naming into all
the wait.h and wait.c code:

	struct wait_queue_entry *wq_entry

The 'wq_' prefix makes it easy to grep for, and we also use the
opportunity to move away from the typedef to a plain 'struct' naming:
in the kernel we typically reserve typedefs for cases where a
C structure is really small and somewhat opaque - such as pte_t.

wait-queue entries are neither small nor opaque, so use the more
standard 'struct xxx_entry' list management code nomenclature instead.

( We don't touch external users, and we preserve the typedef as well
  for actual wait-queue users, to reduce unnecessary churn. )

Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/wait.h | 84 ++++++++++++++++++++++----------------------
 kernel/sched/wait.c  | 98 ++++++++++++++++++++++++++--------------------------
 2 files changed, 91 insertions(+), 91 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index 5889f0c86ff7..77fdea851d8b 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -11,8 +11,9 @@
 #include <uapi/linux/wait.h>
 
 typedef struct wait_queue_entry wait_queue_entry_t;
-typedef int (*wait_queue_func_t)(wait_queue_entry_t *wait, unsigned mode, int flags, void *key);
-int default_wake_function(wait_queue_entry_t *wait, unsigned mode, int flags, void *key);
+
+typedef int (*wait_queue_func_t)(struct wait_queue_entry *wq_entry, unsigned mode, int flags, void *key);
+int default_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int flags, void *key);
 
 /* wait_queue_entry::flags */
 #define WQ_FLAG_EXCLUSIVE	0x01
@@ -37,7 +38,7 @@ struct wait_bit_key {
 
 struct wait_bit_queue {
 	struct wait_bit_key	key;
-	wait_queue_entry_t	wait;
+	struct wait_queue_entry	wait;
 };
 
 struct __wait_queue_head {
@@ -58,7 +59,7 @@ struct task_struct;
 	.task_list	= { NULL, NULL } }
 
 #define DECLARE_WAITQUEUE(name, tsk)					\
-	wait_queue_entry_t name = __WAITQUEUE_INITIALIZER(name, tsk)
+	struct wait_queue_entry name = __WAITQUEUE_INITIALIZER(name, tsk)
 
 #define __WAIT_QUEUE_HEAD_INITIALIZER(name) {				\
 	.lock		= __SPIN_LOCK_UNLOCKED(name.lock),		\
@@ -91,19 +92,19 @@ extern void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct
 # define DECLARE_WAIT_QUEUE_HEAD_ONSTACK(name) DECLARE_WAIT_QUEUE_HEAD(name)
 #endif
 
-static inline void init_waitqueue_entry(wait_queue_entry_t *q, struct task_struct *p)
+static inline void init_waitqueue_entry(struct wait_queue_entry *wq_entry, struct task_struct *p)
 {
-	q->flags	= 0;
-	q->private	= p;
-	q->func		= default_wake_function;
+	wq_entry->flags		= 0;
+	wq_entry->private	= p;
+	wq_entry->func		= default_wake_function;
 }
 
 static inline void
-init_waitqueue_func_entry(wait_queue_entry_t *q, wait_queue_func_t func)
+init_waitqueue_func_entry(struct wait_queue_entry *wq_entry, wait_queue_func_t func)
 {
-	q->flags	= 0;
-	q->private	= NULL;
-	q->func		= func;
+	wq_entry->flags		= 0;
+	wq_entry->private	= NULL;
+	wq_entry->func		= func;
 }
 
 /**
@@ -162,42 +163,41 @@ static inline bool wq_has_sleeper(wait_queue_head_t *wq)
 	return waitqueue_active(wq);
 }
 
-extern void add_wait_queue(wait_queue_head_t *q, wait_queue_entry_t *wait);
-extern void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_entry_t *wait);
-extern void remove_wait_queue(wait_queue_head_t *q, wait_queue_entry_t *wait);
+extern void add_wait_queue(wait_queue_head_t *q, struct wait_queue_entry *wq_entry);
+extern void add_wait_queue_exclusive(wait_queue_head_t *q, struct wait_queue_entry *wq_entry);
+extern void remove_wait_queue(wait_queue_head_t *q, struct wait_queue_entry *wq_entry);
 
-static inline void __add_wait_queue(wait_queue_head_t *head, wait_queue_entry_t *new)
+static inline void __add_wait_queue(wait_queue_head_t *head, struct wait_queue_entry *wq_entry)
 {
-	list_add(&new->task_list, &head->task_list);
+	list_add(&wq_entry->task_list, &head->task_list);
 }
 
 /*
  * Used for wake-one threads:
  */
 static inline void
-__add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_entry_t *wait)
+__add_wait_queue_exclusive(wait_queue_head_t *q, struct wait_queue_entry *wq_entry)
 {
-	wait->flags |= WQ_FLAG_EXCLUSIVE;
-	__add_wait_queue(q, wait);
+	wq_entry->flags |= WQ_FLAG_EXCLUSIVE;
+	__add_wait_queue(q, wq_entry);
 }
 
-static inline void __add_wait_queue_entry_tail(wait_queue_head_t *head,
-					 wait_queue_entry_t *new)
+static inline void __add_wait_queue_entry_tail(wait_queue_head_t *head, struct wait_queue_entry *wq_entry)
 {
-	list_add_tail(&new->task_list, &head->task_list);
+	list_add_tail(&wq_entry->task_list, &head->task_list);
 }
 
 static inline void
-__add_wait_queue_entry_tail_exclusive(wait_queue_head_t *q, wait_queue_entry_t *wait)
+__add_wait_queue_entry_tail_exclusive(wait_queue_head_t *q, struct wait_queue_entry *wq_entry)
 {
-	wait->flags |= WQ_FLAG_EXCLUSIVE;
-	__add_wait_queue_entry_tail(q, wait);
+	wq_entry->flags |= WQ_FLAG_EXCLUSIVE;
+	__add_wait_queue_entry_tail(q, wq_entry);
 }
 
 static inline void
-__remove_wait_queue(wait_queue_head_t *head, wait_queue_entry_t *old)
+__remove_wait_queue(wait_queue_head_t *head, struct wait_queue_entry *wq_entry)
 {
-	list_del(&old->task_list);
+	list_del(&wq_entry->task_list);
 }
 
 typedef int wait_bit_action_f(struct wait_bit_key *, int mode);
@@ -252,7 +252,7 @@ wait_queue_head_t *bit_waitqueue(void *, int);
 	(!__builtin_constant_p(state) ||				\
 		state == TASK_INTERRUPTIBLE || state == TASK_KILLABLE)	\
 
-extern void init_wait_entry(wait_queue_entry_t *__wait, int flags);
+extern void init_wait_entry(struct wait_queue_entry *wq_entry, int flags);
 
 /*
  * The below macro ___wait_event() has an explicit shadow of the __ret
@@ -269,12 +269,12 @@ extern void init_wait_entry(wait_queue_entry_t *__wait, int flags);
 #define ___wait_event(wq, condition, state, exclusive, ret, cmd)	\
 ({									\
 	__label__ __out;						\
-	wait_queue_entry_t __wait;						\
+	struct wait_queue_entry __wq_entry;				\
 	long __ret = ret;	/* explicit shadow */			\
 									\
-	init_wait_entry(&__wait, exclusive ? WQ_FLAG_EXCLUSIVE : 0);	\
+	init_wait_entry(&__wq_entry, exclusive ? WQ_FLAG_EXCLUSIVE : 0);\
 	for (;;) {							\
-		long __int = prepare_to_wait_event(&wq, &__wait, state);\
+		long __int = prepare_to_wait_event(&wq, &__wq_entry, state);\
 									\
 		if (condition)						\
 			break;						\
@@ -286,7 +286,7 @@ extern void init_wait_entry(wait_queue_entry_t *__wait, int flags);
 									\
 		cmd;							\
 	}								\
-	finish_wait(&wq, &__wait);					\
+	finish_wait(&wq, &__wq_entry);					\
 __out:	__ret;								\
 })
 
@@ -970,17 +970,17 @@ do {									\
 /*
  * Waitqueues which are removed from the waitqueue_head at wakeup time
  */
-void prepare_to_wait(wait_queue_head_t *q, wait_queue_entry_t *wait, int state);
-void prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_entry_t *wait, int state);
-long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_entry_t *wait, int state);
-void finish_wait(wait_queue_head_t *q, wait_queue_entry_t *wait);
-long wait_woken(wait_queue_entry_t *wait, unsigned mode, long timeout);
-int woken_wake_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *key);
-int autoremove_wake_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *key);
-int wake_bit_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *key);
+void prepare_to_wait(wait_queue_head_t *q, struct wait_queue_entry *wq_entry, int state);
+void prepare_to_wait_exclusive(wait_queue_head_t *q, struct wait_queue_entry *wq_entry, int state);
+long prepare_to_wait_event(wait_queue_head_t *q, struct wait_queue_entry *wq_entry, int state);
+void finish_wait(wait_queue_head_t *q, struct wait_queue_entry *wq_entry);
+long wait_woken(struct wait_queue_entry *wq_entry, unsigned mode, long timeout);
+int woken_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key);
+int autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key);
+int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key);
 
 #define DEFINE_WAIT_FUNC(name, function)				\
-	wait_queue_entry_t name = {					\
+	struct wait_queue_entry name = {				\
 		.private	= current,				\
 		.func		= function,				\
 		.task_list	= LIST_HEAD_INIT((name).task_list),	\
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 301ea02dede0..c37b3140763e 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -21,34 +21,34 @@ void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct lock_c
 
 EXPORT_SYMBOL(__init_waitqueue_head);
 
-void add_wait_queue(wait_queue_head_t *q, wait_queue_entry_t *wait)
+void add_wait_queue(wait_queue_head_t *q, struct wait_queue_entry *wq_entry)
 {
 	unsigned long flags;
 
-	wait->flags &= ~WQ_FLAG_EXCLUSIVE;
+	wq_entry->flags &= ~WQ_FLAG_EXCLUSIVE;
 	spin_lock_irqsave(&q->lock, flags);
-	__add_wait_queue(q, wait);
+	__add_wait_queue_entry_tail(q, wq_entry);
 	spin_unlock_irqrestore(&q->lock, flags);
 }
 EXPORT_SYMBOL(add_wait_queue);
 
-void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_entry_t *wait)
+void add_wait_queue_exclusive(wait_queue_head_t *q, struct wait_queue_entry *wq_entry)
 {
 	unsigned long flags;
 
-	wait->flags |= WQ_FLAG_EXCLUSIVE;
+	wq_entry->flags |= WQ_FLAG_EXCLUSIVE;
 	spin_lock_irqsave(&q->lock, flags);
-	__add_wait_queue_entry_tail(q, wait);
+	__add_wait_queue_entry_tail(q, wq_entry);
 	spin_unlock_irqrestore(&q->lock, flags);
 }
 EXPORT_SYMBOL(add_wait_queue_exclusive);
 
-void remove_wait_queue(wait_queue_head_t *q, wait_queue_entry_t *wait)
+void remove_wait_queue(wait_queue_head_t *q, struct wait_queue_entry *wq_entry)
 {
 	unsigned long flags;
 
 	spin_lock_irqsave(&q->lock, flags);
-	__remove_wait_queue(q, wait);
+	__remove_wait_queue(q, wq_entry);
 	spin_unlock_irqrestore(&q->lock, flags);
 }
 EXPORT_SYMBOL(remove_wait_queue);
@@ -170,43 +170,43 @@ EXPORT_SYMBOL_GPL(__wake_up_sync);	/* For internal use only */
  * loads to move into the critical region).
  */
 void
-prepare_to_wait(wait_queue_head_t *q, wait_queue_entry_t *wait, int state)
+prepare_to_wait(wait_queue_head_t *q, struct wait_queue_entry *wq_entry, int state)
 {
 	unsigned long flags;
 
-	wait->flags &= ~WQ_FLAG_EXCLUSIVE;
+	wq_entry->flags &= ~WQ_FLAG_EXCLUSIVE;
 	spin_lock_irqsave(&q->lock, flags);
-	if (list_empty(&wait->task_list))
-		__add_wait_queue(q, wait);
+	if (list_empty(&wq_entry->task_list))
+		__add_wait_queue(q, wq_entry);
 	set_current_state(state);
 	spin_unlock_irqrestore(&q->lock, flags);
 }
 EXPORT_SYMBOL(prepare_to_wait);
 
 void
-prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_entry_t *wait, int state)
+prepare_to_wait_exclusive(wait_queue_head_t *q, struct wait_queue_entry *wq_entry, int state)
 {
 	unsigned long flags;
 
-	wait->flags |= WQ_FLAG_EXCLUSIVE;
+	wq_entry->flags |= WQ_FLAG_EXCLUSIVE;
 	spin_lock_irqsave(&q->lock, flags);
-	if (list_empty(&wait->task_list))
-		__add_wait_queue_entry_tail(q, wait);
+	if (list_empty(&wq_entry->task_list))
+		__add_wait_queue_entry_tail(q, wq_entry);
 	set_current_state(state);
 	spin_unlock_irqrestore(&q->lock, flags);
 }
 EXPORT_SYMBOL(prepare_to_wait_exclusive);
 
-void init_wait_entry(wait_queue_entry_t *wait, int flags)
+void init_wait_entry(struct wait_queue_entry *wq_entry, int flags)
 {
-	wait->flags = flags;
-	wait->private = current;
-	wait->func = autoremove_wake_function;
-	INIT_LIST_HEAD(&wait->task_list);
+	wq_entry->flags = flags;
+	wq_entry->private = current;
+	wq_entry->func = autoremove_wake_function;
+	INIT_LIST_HEAD(&wq_entry->task_list);
 }
 EXPORT_SYMBOL(init_wait_entry);
 
-long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_entry_t *wait, int state)
+long prepare_to_wait_event(wait_queue_head_t *q, struct wait_queue_entry *wq_entry, int state)
 {
 	unsigned long flags;
 	long ret = 0;
@@ -225,14 +225,14 @@ long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_entry_t *wait, int s
 		 * can't see us, it should wake up another exclusive waiter if
 		 * we fail.
 		 */
-		list_del_init(&wait->task_list);
+		list_del_init(&wq_entry->task_list);
 		ret = -ERESTARTSYS;
 	} else {
-		if (list_empty(&wait->task_list)) {
-			if (wait->flags & WQ_FLAG_EXCLUSIVE)
-				__add_wait_queue_entry_tail(q, wait);
+		if (list_empty(&wq_entry->task_list)) {
+			if (wq_entry->flags & WQ_FLAG_EXCLUSIVE)
+				__add_wait_queue_entry_tail(q, wq_entry);
 			else
-				__add_wait_queue(q, wait);
+				__add_wait_queue(q, wq_entry);
 		}
 		set_current_state(state);
 	}
@@ -284,13 +284,13 @@ EXPORT_SYMBOL(do_wait_intr_irq);
 /**
  * finish_wait - clean up after waiting in a queue
  * @q: waitqueue waited on
- * @wait: wait descriptor
+ * @wq_entry: wait descriptor
  *
  * Sets current thread back to running state and removes
  * the wait descriptor from the given waitqueue if still
  * queued.
  */
-void finish_wait(wait_queue_head_t *q, wait_queue_entry_t *wait)
+void finish_wait(wait_queue_head_t *q, struct wait_queue_entry *wq_entry)
 {
 	unsigned long flags;
 
@@ -308,20 +308,20 @@ void finish_wait(wait_queue_head_t *q, wait_queue_entry_t *wait)
 	 *    have _one_ other CPU that looks at or modifies
 	 *    the list).
 	 */
-	if (!list_empty_careful(&wait->task_list)) {
+	if (!list_empty_careful(&wq_entry->task_list)) {
 		spin_lock_irqsave(&q->lock, flags);
-		list_del_init(&wait->task_list);
+		list_del_init(&wq_entry->task_list);
 		spin_unlock_irqrestore(&q->lock, flags);
 	}
 }
 EXPORT_SYMBOL(finish_wait);
 
-int autoremove_wake_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
+int autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key)
 {
-	int ret = default_wake_function(wait, mode, sync, key);
+	int ret = default_wake_function(wq_entry, mode, sync, key);
 
 	if (ret)
-		list_del_init(&wait->task_list);
+		list_del_init(&wq_entry->task_list);
 	return ret;
 }
 EXPORT_SYMBOL(autoremove_wake_function);
@@ -341,17 +341,17 @@ static inline bool is_kthread_should_stop(void)
  *
  *     p->state = mode;				condition = true;
  *     smp_mb(); // A				smp_wmb(); // C
- *     if (!wait->flags & WQ_FLAG_WOKEN)	wait->flags |= WQ_FLAG_WOKEN;
+ *     if (!wq_entry->flags & WQ_FLAG_WOKEN)	wq_entry->flags |= WQ_FLAG_WOKEN;
  *         schedule()				try_to_wake_up();
  *     p->state = TASK_RUNNING;		    ~~~~~~~~~~~~~~~~~~
- *     wait->flags &= ~WQ_FLAG_WOKEN;		condition = true;
+ *     wq_entry->flags &= ~WQ_FLAG_WOKEN;		condition = true;
  *     smp_mb() // B				smp_wmb(); // C
- *						wait->flags |= WQ_FLAG_WOKEN;
+ *						wq_entry->flags |= WQ_FLAG_WOKEN;
  * }
  * remove_wait_queue(&wq, &wait);
  *
  */
-long wait_woken(wait_queue_entry_t *wait, unsigned mode, long timeout)
+long wait_woken(struct wait_queue_entry *wq_entry, unsigned mode, long timeout)
 {
 	set_current_state(mode); /* A */
 	/*
@@ -359,7 +359,7 @@ long wait_woken(wait_queue_entry_t *wait, unsigned mode, long timeout)
 	 * woken_wake_function() such that if we observe WQ_FLAG_WOKEN we must
 	 * also observe all state before the wakeup.
 	 */
-	if (!(wait->flags & WQ_FLAG_WOKEN) && !is_kthread_should_stop())
+	if (!(wq_entry->flags & WQ_FLAG_WOKEN) && !is_kthread_should_stop())
 		timeout = schedule_timeout(timeout);
 	__set_current_state(TASK_RUNNING);
 
@@ -369,13 +369,13 @@ long wait_woken(wait_queue_entry_t *wait, unsigned mode, long timeout)
 	 * condition being true _OR_ WQ_FLAG_WOKEN such that we will not miss
 	 * an event.
 	 */
-	smp_store_mb(wait->flags, wait->flags & ~WQ_FLAG_WOKEN); /* B */
+	smp_store_mb(wq_entry->flags, wq_entry->flags & ~WQ_FLAG_WOKEN); /* B */
 
 	return timeout;
 }
 EXPORT_SYMBOL(wait_woken);
 
-int woken_wake_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
+int woken_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key)
 {
 	/*
 	 * Although this function is called under waitqueue lock, LOCK
@@ -385,24 +385,24 @@ int woken_wake_function(wait_queue_entry_t *wait, unsigned mode, int sync, void
 	 * and is paired with smp_store_mb() in wait_woken().
 	 */
 	smp_wmb(); /* C */
-	wait->flags |= WQ_FLAG_WOKEN;
+	wq_entry->flags |= WQ_FLAG_WOKEN;
 
-	return default_wake_function(wait, mode, sync, key);
+	return default_wake_function(wq_entry, mode, sync, key);
 }
 EXPORT_SYMBOL(woken_wake_function);
 
-int wake_bit_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *arg)
+int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *arg)
 {
 	struct wait_bit_key *key = arg;
 	struct wait_bit_queue *wait_bit
-		= container_of(wait, struct wait_bit_queue, wait);
+		= container_of(wq_entry, struct wait_bit_queue, wait);
 
 	if (wait_bit->key.flags != key->flags ||
 			wait_bit->key.bit_nr != key->bit_nr ||
 			test_bit(key->bit_nr, key->flags))
 		return 0;
 	else
-		return autoremove_wake_function(wait, mode, sync, key);
+		return autoremove_wake_function(wq_entry, mode, sync, key);
 }
 EXPORT_SYMBOL(wake_bit_function);
 
@@ -534,19 +534,19 @@ static inline wait_queue_head_t *atomic_t_waitqueue(atomic_t *p)
 	return bit_waitqueue(p, 0);
 }
 
-static int wake_atomic_t_function(wait_queue_entry_t *wait, unsigned mode, int sync,
+static int wake_atomic_t_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync,
 				  void *arg)
 {
 	struct wait_bit_key *key = arg;
 	struct wait_bit_queue *wait_bit
-		= container_of(wait, struct wait_bit_queue, wait);
+		= container_of(wq_entry, struct wait_bit_queue, wait);
 	atomic_t *val = key->flags;
 
 	if (wait_bit->key.flags != key->flags ||
 	    wait_bit->key.bit_nr != key->bit_nr ||
 	    atomic_read(val) != 0)
 		return 0;
-	return autoremove_wake_function(wait, mode, sync, key);
+	return autoremove_wake_function(wq_entry, mode, sync, key);
 }
 
 /*
-- 
cgit v1.2.3


From 9d9d676f595b5081326be7a17dc681fcb38fb3b2 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Mar 2017 11:10:18 +0100
Subject: sched/wait: Standardize internal naming of wait-queue heads

The wait-queue head parameters and variables are named in a
couple of ways, we have the following variants currently:

	wait_queue_head_t *q
	wait_queue_head_t *wq
	wait_queue_head_t *head

In particular the 'wq' naming is ambiguous in the sense whether it's
a wait-queue head or entry name - as entries were often named 'wait'.

( Not to mention the confusion of any readers coming over from
  workqueue-land. )

Standardize all this around a single, unambiguous parameter and
variable name:

	struct wait_queue_head *wq_head

which is easy to grep for and also rhymes nicely with the wait-queue
entry naming:

	struct wait_queue_entry *wq_entry

Also rename:

	struct __wait_queue_head => struct wait_queue_head

... and use this struct type to migrate from typedefs usage to 'struct'
usage, which is more in line with existing kernel practices.

Don't touch any external users and preserve the main wait_queue_head_t
typedef.

Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/wait.h |  76 ++++++++++++-------------
 kernel/sched/wait.c  | 154 +++++++++++++++++++++++++--------------------------
 2 files changed, 115 insertions(+), 115 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index 77fdea851d8b..c3d1cefc7853 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -41,11 +41,11 @@ struct wait_bit_queue {
 	struct wait_queue_entry	wait;
 };
 
-struct __wait_queue_head {
+struct wait_queue_head {
 	spinlock_t		lock;
 	struct list_head	task_list;
 };
-typedef struct __wait_queue_head wait_queue_head_t;
+typedef struct wait_queue_head wait_queue_head_t;
 
 struct task_struct;
 
@@ -66,7 +66,7 @@ struct task_struct;
 	.task_list	= { &(name).task_list, &(name).task_list } }
 
 #define DECLARE_WAIT_QUEUE_HEAD(name) \
-	wait_queue_head_t name = __WAIT_QUEUE_HEAD_INITIALIZER(name)
+	struct wait_queue_head name = __WAIT_QUEUE_HEAD_INITIALIZER(name)
 
 #define __WAIT_BIT_KEY_INITIALIZER(word, bit)				\
 	{ .flags = word, .bit_nr = bit, }
@@ -74,20 +74,20 @@ struct task_struct;
 #define __WAIT_ATOMIC_T_KEY_INITIALIZER(p)				\
 	{ .flags = p, .bit_nr = WAIT_ATOMIC_T_BIT_NR, }
 
-extern void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct lock_class_key *);
+extern void __init_waitqueue_head(struct wait_queue_head *wq_head, const char *name, struct lock_class_key *);
 
-#define init_waitqueue_head(q)				\
+#define init_waitqueue_head(wq_head)			\
 	do {						\
 		static struct lock_class_key __key;	\
 							\
-		__init_waitqueue_head((q), #q, &__key);	\
+		__init_waitqueue_head((wq_head), #wq_head, &__key);	\
 	} while (0)
 
 #ifdef CONFIG_LOCKDEP
 # define __WAIT_QUEUE_HEAD_INIT_ONSTACK(name) \
 	({ init_waitqueue_head(&name); name; })
 # define DECLARE_WAIT_QUEUE_HEAD_ONSTACK(name) \
-	wait_queue_head_t name = __WAIT_QUEUE_HEAD_INIT_ONSTACK(name)
+	struct wait_queue_head name = __WAIT_QUEUE_HEAD_INIT_ONSTACK(name)
 #else
 # define DECLARE_WAIT_QUEUE_HEAD_ONSTACK(name) DECLARE_WAIT_QUEUE_HEAD(name)
 #endif
@@ -109,14 +109,14 @@ init_waitqueue_func_entry(struct wait_queue_entry *wq_entry, wait_queue_func_t f
 
 /**
  * waitqueue_active -- locklessly test for waiters on the queue
- * @q: the waitqueue to test for waiters
+ * @wq_head: the waitqueue to test for waiters
  *
  * returns true if the wait list is not empty
  *
  * NOTE: this function is lockless and requires care, incorrect usage _will_
  * lead to sporadic and non-obvious failure.
  *
- * Use either while holding wait_queue_head_t::lock or when used for wakeups
+ * Use either while holding wait_queue_head::lock or when used for wakeups
  * with an extra smp_mb() like:
  *
  *      CPU0 - waker                    CPU1 - waiter
@@ -137,9 +137,9 @@ init_waitqueue_func_entry(struct wait_queue_entry *wq_entry, wait_queue_func_t f
  * Also note that this 'optimization' trades a spin_lock() for an smp_mb(),
  * which (when the lock is uncontended) are of roughly equal cost.
  */
-static inline int waitqueue_active(wait_queue_head_t *q)
+static inline int waitqueue_active(struct wait_queue_head *wq_head)
 {
-	return !list_empty(&q->task_list);
+	return !list_empty(&wq_head->task_list);
 }
 
 /**
@@ -150,7 +150,7 @@ static inline int waitqueue_active(wait_queue_head_t *q)
  *
  * Please refer to the comment for waitqueue_active.
  */
-static inline bool wq_has_sleeper(wait_queue_head_t *wq)
+static inline bool wq_has_sleeper(struct wait_queue_head *wq_head)
 {
 	/*
 	 * We need to be sure we are in sync with the
@@ -160,62 +160,62 @@ static inline bool wq_has_sleeper(wait_queue_head_t *wq)
 	 * waiting side.
 	 */
 	smp_mb();
-	return waitqueue_active(wq);
+	return waitqueue_active(wq_head);
 }
 
-extern void add_wait_queue(wait_queue_head_t *q, struct wait_queue_entry *wq_entry);
-extern void add_wait_queue_exclusive(wait_queue_head_t *q, struct wait_queue_entry *wq_entry);
-extern void remove_wait_queue(wait_queue_head_t *q, struct wait_queue_entry *wq_entry);
+extern void add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
+extern void add_wait_queue_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
+extern void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
 
-static inline void __add_wait_queue(wait_queue_head_t *head, struct wait_queue_entry *wq_entry)
+static inline void __add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
 {
-	list_add(&wq_entry->task_list, &head->task_list);
+	list_add(&wq_entry->task_list, &wq_head->task_list);
 }
 
 /*
  * Used for wake-one threads:
  */
 static inline void
-__add_wait_queue_exclusive(wait_queue_head_t *q, struct wait_queue_entry *wq_entry)
+__add_wait_queue_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
 {
 	wq_entry->flags |= WQ_FLAG_EXCLUSIVE;
-	__add_wait_queue(q, wq_entry);
+	__add_wait_queue(wq_head, wq_entry);
 }
 
-static inline void __add_wait_queue_entry_tail(wait_queue_head_t *head, struct wait_queue_entry *wq_entry)
+static inline void __add_wait_queue_entry_tail(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
 {
-	list_add_tail(&wq_entry->task_list, &head->task_list);
+	list_add_tail(&wq_entry->task_list, &wq_head->task_list);
 }
 
 static inline void
-__add_wait_queue_entry_tail_exclusive(wait_queue_head_t *q, struct wait_queue_entry *wq_entry)
+__add_wait_queue_entry_tail_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
 {
 	wq_entry->flags |= WQ_FLAG_EXCLUSIVE;
-	__add_wait_queue_entry_tail(q, wq_entry);
+	__add_wait_queue_entry_tail(wq_head, wq_entry);
 }
 
 static inline void
-__remove_wait_queue(wait_queue_head_t *head, struct wait_queue_entry *wq_entry)
+__remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
 {
 	list_del(&wq_entry->task_list);
 }
 
 typedef int wait_bit_action_f(struct wait_bit_key *, int mode);
-void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key);
-void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key);
-void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr, void *key);
-void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr);
-void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr);
-void __wake_up_bit(wait_queue_head_t *, void *, int);
-int __wait_on_bit(wait_queue_head_t *, struct wait_bit_queue *, wait_bit_action_f *, unsigned);
-int __wait_on_bit_lock(wait_queue_head_t *, struct wait_bit_queue *, wait_bit_action_f *, unsigned);
+void __wake_up(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key);
+void __wake_up_locked_key(struct wait_queue_head *wq_head, unsigned int mode, void *key);
+void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key);
+void __wake_up_locked(struct wait_queue_head *wq_head, unsigned int mode, int nr);
+void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode, int nr);
+void __wake_up_bit(struct wait_queue_head *, void *, int);
+int __wait_on_bit(struct wait_queue_head *, struct wait_bit_queue *, wait_bit_action_f *, unsigned);
+int __wait_on_bit_lock(struct wait_queue_head *, struct wait_bit_queue *, wait_bit_action_f *, unsigned);
 void wake_up_bit(void *, int);
 void wake_up_atomic_t(atomic_t *);
 int out_of_line_wait_on_bit(void *, int, wait_bit_action_f *, unsigned);
 int out_of_line_wait_on_bit_timeout(void *, int, wait_bit_action_f *, unsigned, unsigned long);
 int out_of_line_wait_on_bit_lock(void *, int, wait_bit_action_f *, unsigned);
 int out_of_line_wait_on_atomic_t(atomic_t *, int (*)(atomic_t *), unsigned);
-wait_queue_head_t *bit_waitqueue(void *, int);
+struct wait_queue_head *bit_waitqueue(void *, int);
 
 #define wake_up(x)			__wake_up(x, TASK_NORMAL, 1, NULL)
 #define wake_up_nr(x, nr)		__wake_up(x, TASK_NORMAL, nr, NULL)
@@ -970,10 +970,10 @@ do {									\
 /*
  * Waitqueues which are removed from the waitqueue_head at wakeup time
  */
-void prepare_to_wait(wait_queue_head_t *q, struct wait_queue_entry *wq_entry, int state);
-void prepare_to_wait_exclusive(wait_queue_head_t *q, struct wait_queue_entry *wq_entry, int state);
-long prepare_to_wait_event(wait_queue_head_t *q, struct wait_queue_entry *wq_entry, int state);
-void finish_wait(wait_queue_head_t *q, struct wait_queue_entry *wq_entry);
+void prepare_to_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state);
+void prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state);
+long prepare_to_wait_event(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state);
+void finish_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
 long wait_woken(struct wait_queue_entry *wq_entry, unsigned mode, long timeout);
 int woken_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key);
 int autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key);
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index c37b3140763e..203aeea96f16 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -12,44 +12,44 @@
 #include <linux/hash.h>
 #include <linux/kthread.h>
 
-void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct lock_class_key *key)
+void __init_waitqueue_head(struct wait_queue_head *wq_head, const char *name, struct lock_class_key *key)
 {
-	spin_lock_init(&q->lock);
-	lockdep_set_class_and_name(&q->lock, key, name);
-	INIT_LIST_HEAD(&q->task_list);
+	spin_lock_init(&wq_head->lock);
+	lockdep_set_class_and_name(&wq_head->lock, key, name);
+	INIT_LIST_HEAD(&wq_head->task_list);
 }
 
 EXPORT_SYMBOL(__init_waitqueue_head);
 
-void add_wait_queue(wait_queue_head_t *q, struct wait_queue_entry *wq_entry)
+void add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
 {
 	unsigned long flags;
 
 	wq_entry->flags &= ~WQ_FLAG_EXCLUSIVE;
-	spin_lock_irqsave(&q->lock, flags);
-	__add_wait_queue_entry_tail(q, wq_entry);
-	spin_unlock_irqrestore(&q->lock, flags);
+	spin_lock_irqsave(&wq_head->lock, flags);
+	__add_wait_queue_entry_tail(wq_head, wq_entry);
+	spin_unlock_irqrestore(&wq_head->lock, flags);
 }
 EXPORT_SYMBOL(add_wait_queue);
 
-void add_wait_queue_exclusive(wait_queue_head_t *q, struct wait_queue_entry *wq_entry)
+void add_wait_queue_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
 {
 	unsigned long flags;
 
 	wq_entry->flags |= WQ_FLAG_EXCLUSIVE;
-	spin_lock_irqsave(&q->lock, flags);
-	__add_wait_queue_entry_tail(q, wq_entry);
-	spin_unlock_irqrestore(&q->lock, flags);
+	spin_lock_irqsave(&wq_head->lock, flags);
+	__add_wait_queue_entry_tail(wq_head, wq_entry);
+	spin_unlock_irqrestore(&wq_head->lock, flags);
 }
 EXPORT_SYMBOL(add_wait_queue_exclusive);
 
-void remove_wait_queue(wait_queue_head_t *q, struct wait_queue_entry *wq_entry)
+void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&q->lock, flags);
-	__remove_wait_queue(q, wq_entry);
-	spin_unlock_irqrestore(&q->lock, flags);
+	spin_lock_irqsave(&wq_head->lock, flags);
+	__remove_wait_queue(wq_head, wq_entry);
+	spin_unlock_irqrestore(&wq_head->lock, flags);
 }
 EXPORT_SYMBOL(remove_wait_queue);
 
@@ -63,12 +63,12 @@ EXPORT_SYMBOL(remove_wait_queue);
  * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
  * zero in this (rare) case, and we handle it by continuing to scan the queue.
  */
-static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
+static void __wake_up_common(struct wait_queue_head *wq_head, unsigned int mode,
 			int nr_exclusive, int wake_flags, void *key)
 {
 	wait_queue_entry_t *curr, *next;
 
-	list_for_each_entry_safe(curr, next, &q->task_list, task_list) {
+	list_for_each_entry_safe(curr, next, &wq_head->task_list, task_list) {
 		unsigned flags = curr->flags;
 
 		if (curr->func(curr, mode, wake_flags, key) &&
@@ -79,7 +79,7 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
 
 /**
  * __wake_up - wake up threads blocked on a waitqueue.
- * @q: the waitqueue
+ * @wq_head: the waitqueue
  * @mode: which threads
  * @nr_exclusive: how many wake-one or wake-many threads to wake up
  * @key: is directly passed to the wakeup function
@@ -87,35 +87,35 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
  * It may be assumed that this function implies a write memory barrier before
  * changing the task state if and only if any tasks are woken up.
  */
-void __wake_up(wait_queue_head_t *q, unsigned int mode,
+void __wake_up(struct wait_queue_head *wq_head, unsigned int mode,
 			int nr_exclusive, void *key)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&q->lock, flags);
-	__wake_up_common(q, mode, nr_exclusive, 0, key);
-	spin_unlock_irqrestore(&q->lock, flags);
+	spin_lock_irqsave(&wq_head->lock, flags);
+	__wake_up_common(wq_head, mode, nr_exclusive, 0, key);
+	spin_unlock_irqrestore(&wq_head->lock, flags);
 }
 EXPORT_SYMBOL(__wake_up);
 
 /*
  * Same as __wake_up but called with the spinlock in wait_queue_head_t held.
  */
-void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr)
+void __wake_up_locked(struct wait_queue_head *wq_head, unsigned int mode, int nr)
 {
-	__wake_up_common(q, mode, nr, 0, NULL);
+	__wake_up_common(wq_head, mode, nr, 0, NULL);
 }
 EXPORT_SYMBOL_GPL(__wake_up_locked);
 
-void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key)
+void __wake_up_locked_key(struct wait_queue_head *wq_head, unsigned int mode, void *key)
 {
-	__wake_up_common(q, mode, 1, 0, key);
+	__wake_up_common(wq_head, mode, 1, 0, key);
 }
 EXPORT_SYMBOL_GPL(__wake_up_locked_key);
 
 /**
  * __wake_up_sync_key - wake up threads blocked on a waitqueue.
- * @q: the waitqueue
+ * @wq_head: the waitqueue
  * @mode: which threads
  * @nr_exclusive: how many wake-one or wake-many threads to wake up
  * @key: opaque value to be passed to wakeup targets
@@ -130,30 +130,30 @@ EXPORT_SYMBOL_GPL(__wake_up_locked_key);
  * It may be assumed that this function implies a write memory barrier before
  * changing the task state if and only if any tasks are woken up.
  */
-void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode,
+void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode,
 			int nr_exclusive, void *key)
 {
 	unsigned long flags;
 	int wake_flags = 1; /* XXX WF_SYNC */
 
-	if (unlikely(!q))
+	if (unlikely(!wq_head))
 		return;
 
 	if (unlikely(nr_exclusive != 1))
 		wake_flags = 0;
 
-	spin_lock_irqsave(&q->lock, flags);
-	__wake_up_common(q, mode, nr_exclusive, wake_flags, key);
-	spin_unlock_irqrestore(&q->lock, flags);
+	spin_lock_irqsave(&wq_head->lock, flags);
+	__wake_up_common(wq_head, mode, nr_exclusive, wake_flags, key);
+	spin_unlock_irqrestore(&wq_head->lock, flags);
 }
 EXPORT_SYMBOL_GPL(__wake_up_sync_key);
 
 /*
  * __wake_up_sync - see __wake_up_sync_key()
  */
-void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
+void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode, int nr_exclusive)
 {
-	__wake_up_sync_key(q, mode, nr_exclusive, NULL);
+	__wake_up_sync_key(wq_head, mode, nr_exclusive, NULL);
 }
 EXPORT_SYMBOL_GPL(__wake_up_sync);	/* For internal use only */
 
@@ -170,30 +170,30 @@ EXPORT_SYMBOL_GPL(__wake_up_sync);	/* For internal use only */
  * loads to move into the critical region).
  */
 void
-prepare_to_wait(wait_queue_head_t *q, struct wait_queue_entry *wq_entry, int state)
+prepare_to_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state)
 {
 	unsigned long flags;
 
 	wq_entry->flags &= ~WQ_FLAG_EXCLUSIVE;
-	spin_lock_irqsave(&q->lock, flags);
+	spin_lock_irqsave(&wq_head->lock, flags);
 	if (list_empty(&wq_entry->task_list))
-		__add_wait_queue(q, wq_entry);
+		__add_wait_queue(wq_head, wq_entry);
 	set_current_state(state);
-	spin_unlock_irqrestore(&q->lock, flags);
+	spin_unlock_irqrestore(&wq_head->lock, flags);
 }
 EXPORT_SYMBOL(prepare_to_wait);
 
 void
-prepare_to_wait_exclusive(wait_queue_head_t *q, struct wait_queue_entry *wq_entry, int state)
+prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state)
 {
 	unsigned long flags;
 
 	wq_entry->flags |= WQ_FLAG_EXCLUSIVE;
-	spin_lock_irqsave(&q->lock, flags);
+	spin_lock_irqsave(&wq_head->lock, flags);
 	if (list_empty(&wq_entry->task_list))
-		__add_wait_queue_entry_tail(q, wq_entry);
+		__add_wait_queue_entry_tail(wq_head, wq_entry);
 	set_current_state(state);
-	spin_unlock_irqrestore(&q->lock, flags);
+	spin_unlock_irqrestore(&wq_head->lock, flags);
 }
 EXPORT_SYMBOL(prepare_to_wait_exclusive);
 
@@ -206,12 +206,12 @@ void init_wait_entry(struct wait_queue_entry *wq_entry, int flags)
 }
 EXPORT_SYMBOL(init_wait_entry);
 
-long prepare_to_wait_event(wait_queue_head_t *q, struct wait_queue_entry *wq_entry, int state)
+long prepare_to_wait_event(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state)
 {
 	unsigned long flags;
 	long ret = 0;
 
-	spin_lock_irqsave(&q->lock, flags);
+	spin_lock_irqsave(&wq_head->lock, flags);
 	if (unlikely(signal_pending_state(state, current))) {
 		/*
 		 * Exclusive waiter must not fail if it was selected by wakeup,
@@ -219,7 +219,7 @@ long prepare_to_wait_event(wait_queue_head_t *q, struct wait_queue_entry *wq_ent
 		 *
 		 * The caller will recheck the condition and return success if
 		 * we were already woken up, we can not miss the event because
-		 * wakeup locks/unlocks the same q->lock.
+		 * wakeup locks/unlocks the same wq_head->lock.
 		 *
 		 * But we need to ensure that set-condition + wakeup after that
 		 * can't see us, it should wake up another exclusive waiter if
@@ -230,13 +230,13 @@ long prepare_to_wait_event(wait_queue_head_t *q, struct wait_queue_entry *wq_ent
 	} else {
 		if (list_empty(&wq_entry->task_list)) {
 			if (wq_entry->flags & WQ_FLAG_EXCLUSIVE)
-				__add_wait_queue_entry_tail(q, wq_entry);
+				__add_wait_queue_entry_tail(wq_head, wq_entry);
 			else
-				__add_wait_queue(q, wq_entry);
+				__add_wait_queue(wq_head, wq_entry);
 		}
 		set_current_state(state);
 	}
-	spin_unlock_irqrestore(&q->lock, flags);
+	spin_unlock_irqrestore(&wq_head->lock, flags);
 
 	return ret;
 }
@@ -283,14 +283,14 @@ EXPORT_SYMBOL(do_wait_intr_irq);
 
 /**
  * finish_wait - clean up after waiting in a queue
- * @q: waitqueue waited on
+ * @wq_head: waitqueue waited on
  * @wq_entry: wait descriptor
  *
  * Sets current thread back to running state and removes
  * the wait descriptor from the given waitqueue if still
  * queued.
  */
-void finish_wait(wait_queue_head_t *q, struct wait_queue_entry *wq_entry)
+void finish_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
 {
 	unsigned long flags;
 
@@ -309,9 +309,9 @@ void finish_wait(wait_queue_head_t *q, struct wait_queue_entry *wq_entry)
 	 *    the list).
 	 */
 	if (!list_empty_careful(&wq_entry->task_list)) {
-		spin_lock_irqsave(&q->lock, flags);
+		spin_lock_irqsave(&wq_head->lock, flags);
 		list_del_init(&wq_entry->task_list);
-		spin_unlock_irqrestore(&q->lock, flags);
+		spin_unlock_irqrestore(&wq_head->lock, flags);
 	}
 }
 EXPORT_SYMBOL(finish_wait);
@@ -334,7 +334,7 @@ static inline bool is_kthread_should_stop(void)
 /*
  * DEFINE_WAIT_FUNC(wait, woken_wake_func);
  *
- * add_wait_queue(&wq, &wait);
+ * add_wait_queue(&wq_head, &wait);
  * for (;;) {
  *     if (condition)
  *         break;
@@ -348,7 +348,7 @@ static inline bool is_kthread_should_stop(void)
  *     smp_mb() // B				smp_wmb(); // C
  *						wq_entry->flags |= WQ_FLAG_WOKEN;
  * }
- * remove_wait_queue(&wq, &wait);
+ * remove_wait_queue(&wq_head, &wait);
  *
  */
 long wait_woken(struct wait_queue_entry *wq_entry, unsigned mode, long timeout)
@@ -412,17 +412,17 @@ EXPORT_SYMBOL(wake_bit_function);
  * permitted return codes. Nonzero return codes halt waiting and return.
  */
 int __sched
-__wait_on_bit(wait_queue_head_t *wq, struct wait_bit_queue *q,
+__wait_on_bit(struct wait_queue_head *wq_head, struct wait_bit_queue *q,
 	      wait_bit_action_f *action, unsigned mode)
 {
 	int ret = 0;
 
 	do {
-		prepare_to_wait(wq, &q->wait, mode);
+		prepare_to_wait(wq_head, &q->wait, mode);
 		if (test_bit(q->key.bit_nr, q->key.flags))
 			ret = (*action)(&q->key, mode);
 	} while (test_bit(q->key.bit_nr, q->key.flags) && !ret);
-	finish_wait(wq, &q->wait);
+	finish_wait(wq_head, &q->wait);
 	return ret;
 }
 EXPORT_SYMBOL(__wait_on_bit);
@@ -430,10 +430,10 @@ EXPORT_SYMBOL(__wait_on_bit);
 int __sched out_of_line_wait_on_bit(void *word, int bit,
 				    wait_bit_action_f *action, unsigned mode)
 {
-	wait_queue_head_t *wq = bit_waitqueue(word, bit);
+	struct wait_queue_head *wq_head = bit_waitqueue(word, bit);
 	DEFINE_WAIT_BIT(wait, word, bit);
 
-	return __wait_on_bit(wq, &wait, action, mode);
+	return __wait_on_bit(wq_head, &wait, action, mode);
 }
 EXPORT_SYMBOL(out_of_line_wait_on_bit);
 
@@ -441,36 +441,36 @@ int __sched out_of_line_wait_on_bit_timeout(
 	void *word, int bit, wait_bit_action_f *action,
 	unsigned mode, unsigned long timeout)
 {
-	wait_queue_head_t *wq = bit_waitqueue(word, bit);
+	struct wait_queue_head *wq_head = bit_waitqueue(word, bit);
 	DEFINE_WAIT_BIT(wait, word, bit);
 
 	wait.key.timeout = jiffies + timeout;
-	return __wait_on_bit(wq, &wait, action, mode);
+	return __wait_on_bit(wq_head, &wait, action, mode);
 }
 EXPORT_SYMBOL_GPL(out_of_line_wait_on_bit_timeout);
 
 int __sched
-__wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q,
+__wait_on_bit_lock(struct wait_queue_head *wq_head, struct wait_bit_queue *q,
 			wait_bit_action_f *action, unsigned mode)
 {
 	int ret = 0;
 
 	for (;;) {
-		prepare_to_wait_exclusive(wq, &q->wait, mode);
+		prepare_to_wait_exclusive(wq_head, &q->wait, mode);
 		if (test_bit(q->key.bit_nr, q->key.flags)) {
 			ret = action(&q->key, mode);
 			/*
 			 * See the comment in prepare_to_wait_event().
-			 * finish_wait() does not necessarily takes wq->lock,
+			 * finish_wait() does not necessarily takes wwq_head->lock,
 			 * but test_and_set_bit() implies mb() which pairs with
 			 * smp_mb__after_atomic() before wake_up_page().
 			 */
 			if (ret)
-				finish_wait(wq, &q->wait);
+				finish_wait(wq_head, &q->wait);
 		}
 		if (!test_and_set_bit(q->key.bit_nr, q->key.flags)) {
 			if (!ret)
-				finish_wait(wq, &q->wait);
+				finish_wait(wq_head, &q->wait);
 			return 0;
 		} else if (ret) {
 			return ret;
@@ -482,18 +482,18 @@ EXPORT_SYMBOL(__wait_on_bit_lock);
 int __sched out_of_line_wait_on_bit_lock(void *word, int bit,
 					 wait_bit_action_f *action, unsigned mode)
 {
-	wait_queue_head_t *wq = bit_waitqueue(word, bit);
+	struct wait_queue_head *wq_head = bit_waitqueue(word, bit);
 	DEFINE_WAIT_BIT(wait, word, bit);
 
-	return __wait_on_bit_lock(wq, &wait, action, mode);
+	return __wait_on_bit_lock(wq_head, &wait, action, mode);
 }
 EXPORT_SYMBOL(out_of_line_wait_on_bit_lock);
 
-void __wake_up_bit(wait_queue_head_t *wq, void *word, int bit)
+void __wake_up_bit(struct wait_queue_head *wq_head, void *word, int bit)
 {
 	struct wait_bit_key key = __WAIT_BIT_KEY_INITIALIZER(word, bit);
-	if (waitqueue_active(wq))
-		__wake_up(wq, TASK_NORMAL, 1, &key);
+	if (waitqueue_active(wq_head))
+		__wake_up(wq_head, TASK_NORMAL, 1, &key);
 }
 EXPORT_SYMBOL(__wake_up_bit);
 
@@ -555,20 +555,20 @@ static int wake_atomic_t_function(struct wait_queue_entry *wq_entry, unsigned mo
  * return codes halt waiting and return.
  */
 static __sched
-int __wait_on_atomic_t(wait_queue_head_t *wq, struct wait_bit_queue *q,
+int __wait_on_atomic_t(struct wait_queue_head *wq_head, struct wait_bit_queue *q,
 		       int (*action)(atomic_t *), unsigned mode)
 {
 	atomic_t *val;
 	int ret = 0;
 
 	do {
-		prepare_to_wait(wq, &q->wait, mode);
+		prepare_to_wait(wq_head, &q->wait, mode);
 		val = q->key.flags;
 		if (atomic_read(val) == 0)
 			break;
 		ret = (*action)(val);
 	} while (!ret && atomic_read(val) != 0);
-	finish_wait(wq, &q->wait);
+	finish_wait(wq_head, &q->wait);
 	return ret;
 }
 
@@ -586,10 +586,10 @@ int __wait_on_atomic_t(wait_queue_head_t *wq, struct wait_bit_queue *q,
 __sched int out_of_line_wait_on_atomic_t(atomic_t *p, int (*action)(atomic_t *),
 					 unsigned mode)
 {
-	wait_queue_head_t *wq = atomic_t_waitqueue(p);
+	struct wait_queue_head *wq_head = atomic_t_waitqueue(p);
 	DEFINE_WAIT_ATOMIC_T(wait, p);
 
-	return __wait_on_atomic_t(wq, &wait, action, mode);
+	return __wait_on_atomic_t(wq_head, &wait, action, mode);
 }
 EXPORT_SYMBOL(out_of_line_wait_on_atomic_t);
 
-- 
cgit v1.2.3


From 2141713616c652aeabf2dd5c1e89bc601c4fed6a Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Mar 2017 11:25:39 +0100
Subject: sched/wait: Standardize 'struct wait_bit_queue' wait-queue entry
 field name

Rename 'struct wait_bit_queue::wait' to ::wq_entry, to more clearly
name it as a wait-queue entry.

Propagate it to a couple of usage sites where the wait-bit-queue internals
are exposed.

Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 fs/inode.c           |  8 ++++----
 fs/jbd2/journal.c    |  4 ++--
 fs/xfs/xfs_icache.c  |  4 ++--
 fs/xfs/xfs_inode.c   |  8 ++++----
 include/linux/wait.h |  6 +++---
 kernel/sched/wait.c  | 41 ++++++++++++++++++++---------------------
 6 files changed, 35 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/fs/inode.c b/fs/inode.c
index db5914783a71..70761d6cafcd 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1891,11 +1891,11 @@ static void __wait_on_freeing_inode(struct inode *inode)
 	wait_queue_head_t *wq;
 	DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
 	wq = bit_waitqueue(&inode->i_state, __I_NEW);
-	prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
+	prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
 	spin_unlock(&inode->i_lock);
 	spin_unlock(&inode_hash_lock);
 	schedule();
-	finish_wait(wq, &wait.wait);
+	finish_wait(wq, &wait.wq_entry);
 	spin_lock(&inode_hash_lock);
 }
 
@@ -2038,11 +2038,11 @@ static void __inode_dio_wait(struct inode *inode)
 	DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP);
 
 	do {
-		prepare_to_wait(wq, &q.wait, TASK_UNINTERRUPTIBLE);
+		prepare_to_wait(wq, &q.wq_entry, TASK_UNINTERRUPTIBLE);
 		if (atomic_read(&inode->i_dio_count))
 			schedule();
 	} while (atomic_read(&inode->i_dio_count));
-	finish_wait(wq, &q.wait);
+	finish_wait(wq, &q.wq_entry);
 }
 
 /**
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index ebad34266bcf..7d5ef3bf3f3e 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -2579,10 +2579,10 @@ restart:
 		wait_queue_head_t *wq;
 		DEFINE_WAIT_BIT(wait, &jinode->i_flags, __JI_COMMIT_RUNNING);
 		wq = bit_waitqueue(&jinode->i_flags, __JI_COMMIT_RUNNING);
-		prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
+		prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
 		spin_unlock(&journal->j_list_lock);
 		schedule();
-		finish_wait(wq, &wait.wait);
+		finish_wait(wq, &wait.wq_entry);
 		goto restart;
 	}
 
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 990210fcb9c3..b9c12e1cc23a 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -269,12 +269,12 @@ xfs_inew_wait(
 	DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_INEW_BIT);
 
 	do {
-		prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
+		prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
 		if (!xfs_iflags_test(ip, XFS_INEW))
 			break;
 		schedule();
 	} while (true);
-	finish_wait(wq, &wait.wait);
+	finish_wait(wq, &wait.wq_entry);
 }
 
 /*
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index ec9826c56500..c0a1e840a588 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -622,12 +622,12 @@ __xfs_iflock(
 	DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IFLOCK_BIT);
 
 	do {
-		prepare_to_wait_exclusive(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
+		prepare_to_wait_exclusive(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
 		if (xfs_isiflocked(ip))
 			io_schedule();
 	} while (!xfs_iflock_nowait(ip));
 
-	finish_wait(wq, &wait.wait);
+	finish_wait(wq, &wait.wq_entry);
 }
 
 STATIC uint
@@ -2486,11 +2486,11 @@ __xfs_iunpin_wait(
 	xfs_iunpin(ip);
 
 	do {
-		prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
+		prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
 		if (xfs_ipincount(ip))
 			io_schedule();
 	} while (xfs_ipincount(ip));
-	finish_wait(wq, &wait.wait);
+	finish_wait(wq, &wait.wq_entry);
 }
 
 void
diff --git a/include/linux/wait.h b/include/linux/wait.h
index c3d1cefc7853..1c8add685f22 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -38,7 +38,7 @@ struct wait_bit_key {
 
 struct wait_bit_queue {
 	struct wait_bit_key	key;
-	struct wait_queue_entry	wait;
+	struct wait_queue_entry	wq_entry;
 };
 
 struct wait_queue_head {
@@ -991,11 +991,11 @@ int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync
 #define DEFINE_WAIT_BIT(name, word, bit)				\
 	struct wait_bit_queue name = {					\
 		.key = __WAIT_BIT_KEY_INITIALIZER(word, bit),		\
-		.wait	= {						\
+		.wq_entry = {						\
 			.private	= current,			\
 			.func		= wake_bit_function,		\
 			.task_list	=				\
-				LIST_HEAD_INIT((name).wait.task_list),	\
+				LIST_HEAD_INIT((name).wq_entry.task_list), \
 		},							\
 	}
 
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 203aeea96f16..f1ba0625b8be 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -395,7 +395,7 @@ int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync
 {
 	struct wait_bit_key *key = arg;
 	struct wait_bit_queue *wait_bit
-		= container_of(wq_entry, struct wait_bit_queue, wait);
+		= container_of(wq_entry, struct wait_bit_queue, wq_entry);
 
 	if (wait_bit->key.flags != key->flags ||
 			wait_bit->key.bit_nr != key->bit_nr ||
@@ -418,11 +418,11 @@ __wait_on_bit(struct wait_queue_head *wq_head, struct wait_bit_queue *q,
 	int ret = 0;
 
 	do {
-		prepare_to_wait(wq_head, &q->wait, mode);
+		prepare_to_wait(wq_head, &q->wq_entry, mode);
 		if (test_bit(q->key.bit_nr, q->key.flags))
 			ret = (*action)(&q->key, mode);
 	} while (test_bit(q->key.bit_nr, q->key.flags) && !ret);
-	finish_wait(wq_head, &q->wait);
+	finish_wait(wq_head, &q->wq_entry);
 	return ret;
 }
 EXPORT_SYMBOL(__wait_on_bit);
@@ -431,9 +431,9 @@ int __sched out_of_line_wait_on_bit(void *word, int bit,
 				    wait_bit_action_f *action, unsigned mode)
 {
 	struct wait_queue_head *wq_head = bit_waitqueue(word, bit);
-	DEFINE_WAIT_BIT(wait, word, bit);
+	DEFINE_WAIT_BIT(wq_entry, word, bit);
 
-	return __wait_on_bit(wq_head, &wait, action, mode);
+	return __wait_on_bit(wq_head, &wq_entry, action, mode);
 }
 EXPORT_SYMBOL(out_of_line_wait_on_bit);
 
@@ -442,10 +442,10 @@ int __sched out_of_line_wait_on_bit_timeout(
 	unsigned mode, unsigned long timeout)
 {
 	struct wait_queue_head *wq_head = bit_waitqueue(word, bit);
-	DEFINE_WAIT_BIT(wait, word, bit);
+	DEFINE_WAIT_BIT(wq_entry, word, bit);
 
-	wait.key.timeout = jiffies + timeout;
-	return __wait_on_bit(wq_head, &wait, action, mode);
+	wq_entry.key.timeout = jiffies + timeout;
+	return __wait_on_bit(wq_head, &wq_entry, action, mode);
 }
 EXPORT_SYMBOL_GPL(out_of_line_wait_on_bit_timeout);
 
@@ -456,7 +456,7 @@ __wait_on_bit_lock(struct wait_queue_head *wq_head, struct wait_bit_queue *q,
 	int ret = 0;
 
 	for (;;) {
-		prepare_to_wait_exclusive(wq_head, &q->wait, mode);
+		prepare_to_wait_exclusive(wq_head, &q->wq_entry, mode);
 		if (test_bit(q->key.bit_nr, q->key.flags)) {
 			ret = action(&q->key, mode);
 			/*
@@ -466,11 +466,11 @@ __wait_on_bit_lock(struct wait_queue_head *wq_head, struct wait_bit_queue *q,
 			 * smp_mb__after_atomic() before wake_up_page().
 			 */
 			if (ret)
-				finish_wait(wq_head, &q->wait);
+				finish_wait(wq_head, &q->wq_entry);
 		}
 		if (!test_and_set_bit(q->key.bit_nr, q->key.flags)) {
 			if (!ret)
-				finish_wait(wq_head, &q->wait);
+				finish_wait(wq_head, &q->wq_entry);
 			return 0;
 		} else if (ret) {
 			return ret;
@@ -483,9 +483,9 @@ int __sched out_of_line_wait_on_bit_lock(void *word, int bit,
 					 wait_bit_action_f *action, unsigned mode)
 {
 	struct wait_queue_head *wq_head = bit_waitqueue(word, bit);
-	DEFINE_WAIT_BIT(wait, word, bit);
+	DEFINE_WAIT_BIT(wq_entry, word, bit);
 
-	return __wait_on_bit_lock(wq_head, &wait, action, mode);
+	return __wait_on_bit_lock(wq_head, &wq_entry, action, mode);
 }
 EXPORT_SYMBOL(out_of_line_wait_on_bit_lock);
 
@@ -538,8 +538,7 @@ static int wake_atomic_t_function(struct wait_queue_entry *wq_entry, unsigned mo
 				  void *arg)
 {
 	struct wait_bit_key *key = arg;
-	struct wait_bit_queue *wait_bit
-		= container_of(wq_entry, struct wait_bit_queue, wait);
+	struct wait_bit_queue *wait_bit = container_of(wq_entry, struct wait_bit_queue, wq_entry);
 	atomic_t *val = key->flags;
 
 	if (wait_bit->key.flags != key->flags ||
@@ -562,24 +561,24 @@ int __wait_on_atomic_t(struct wait_queue_head *wq_head, struct wait_bit_queue *q
 	int ret = 0;
 
 	do {
-		prepare_to_wait(wq_head, &q->wait, mode);
+		prepare_to_wait(wq_head, &q->wq_entry, mode);
 		val = q->key.flags;
 		if (atomic_read(val) == 0)
 			break;
 		ret = (*action)(val);
 	} while (!ret && atomic_read(val) != 0);
-	finish_wait(wq_head, &q->wait);
+	finish_wait(wq_head, &q->wq_entry);
 	return ret;
 }
 
 #define DEFINE_WAIT_ATOMIC_T(name, p)					\
 	struct wait_bit_queue name = {					\
 		.key = __WAIT_ATOMIC_T_KEY_INITIALIZER(p),		\
-		.wait	= {						\
+		.wq_entry = {						\
 			.private	= current,			\
 			.func		= wake_atomic_t_function,	\
 			.task_list	=				\
-				LIST_HEAD_INIT((name).wait.task_list),	\
+				LIST_HEAD_INIT((name).wq_entry.task_list), \
 		},							\
 	}
 
@@ -587,9 +586,9 @@ __sched int out_of_line_wait_on_atomic_t(atomic_t *p, int (*action)(atomic_t *),
 					 unsigned mode)
 {
 	struct wait_queue_head *wq_head = atomic_t_waitqueue(p);
-	DEFINE_WAIT_ATOMIC_T(wait, p);
+	DEFINE_WAIT_ATOMIC_T(wq_entry, p);
 
-	return __wait_on_atomic_t(wq_head, &wait, action, mode);
+	return __wait_on_atomic_t(wq_head, &wq_entry, action, mode);
 }
 EXPORT_SYMBOL(out_of_line_wait_on_atomic_t);
 
-- 
cgit v1.2.3


From 76c85ddc4695bb7b8209bfeff11f5156088f9197 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Mar 2017 11:35:27 +0100
Subject: sched/wait: Standardize wait_bit_queue naming

So wait-bit-queue head variables are often named:

	struct wait_bit_queue *q

... which is a bit ambiguous and super confusing, because
they clearly suggest wait-queue head semantics and behavior
(they rhyme with the old wait_queue_t *q naming), while they
are extended wait-queue _entries_, not heads!

They are misnomers in two ways:

 - the 'wait_bit_queue' leaves open the question of whether
   it's an entry or a head

 - the 'q' parameter and local variable naming falsely implies
   that it's a 'queue' - while it's an entry.

This resulted in sometimes confusing cases such as:

	finish_wait(wq, &q->wait);

where the 'q' is not a wait-queue head, but a wait-bit-queue entry.

So improve this all by standardizing wait-bit-queue nomenclature
similar to wait-queue head naming:

	struct wait_bit_queue   => struct wait_bit_queue_entry
	q			=> wbq_entry

Which makes it all a much clearer:

	struct wait_bit_queue_entry *wbq_entry

... and turns the former confusing piece of code into:

	finish_wait(wq_head, &wbq_entry->wq_entry;

which IMHO makes it apparently clear what we are doing,
without having to analyze the context of the code: we are
adding a wait-queue entry to a regular wait-queue head,
which entry is embedded in a wait-bit-queue entry.

I'm not a big fan of acronyms, but repeating wait_bit_queue_entry
in field and local variable names is too long, so Hopefully it's
clear enough that 'wq_' prefixes stand for wait-queues, while
'wbq_' prefixes stand for wait-bit-queues.

Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/wait.h |  8 ++++----
 kernel/sched/wait.c  | 41 ++++++++++++++++++++---------------------
 2 files changed, 24 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index 1c8add685f22..fc7c32d82120 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -36,7 +36,7 @@ struct wait_bit_key {
 	unsigned long		timeout;
 };
 
-struct wait_bit_queue {
+struct wait_bit_queue_entry {
 	struct wait_bit_key	key;
 	struct wait_queue_entry	wq_entry;
 };
@@ -207,8 +207,8 @@ void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode, int
 void __wake_up_locked(struct wait_queue_head *wq_head, unsigned int mode, int nr);
 void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode, int nr);
 void __wake_up_bit(struct wait_queue_head *, void *, int);
-int __wait_on_bit(struct wait_queue_head *, struct wait_bit_queue *, wait_bit_action_f *, unsigned);
-int __wait_on_bit_lock(struct wait_queue_head *, struct wait_bit_queue *, wait_bit_action_f *, unsigned);
+int __wait_on_bit(struct wait_queue_head *, struct wait_bit_queue_entry *, wait_bit_action_f *, unsigned);
+int __wait_on_bit_lock(struct wait_queue_head *, struct wait_bit_queue_entry *, wait_bit_action_f *, unsigned);
 void wake_up_bit(void *, int);
 void wake_up_atomic_t(atomic_t *);
 int out_of_line_wait_on_bit(void *, int, wait_bit_action_f *, unsigned);
@@ -989,7 +989,7 @@ int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync
 #define DEFINE_WAIT(name) DEFINE_WAIT_FUNC(name, autoremove_wake_function)
 
 #define DEFINE_WAIT_BIT(name, word, bit)				\
-	struct wait_bit_queue name = {					\
+	struct wait_bit_queue_entry name = {				\
 		.key = __WAIT_BIT_KEY_INITIALIZER(word, bit),		\
 		.wq_entry = {						\
 			.private	= current,			\
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index f1ba0625b8be..95e6d3820cba 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -394,8 +394,7 @@ EXPORT_SYMBOL(woken_wake_function);
 int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *arg)
 {
 	struct wait_bit_key *key = arg;
-	struct wait_bit_queue *wait_bit
-		= container_of(wq_entry, struct wait_bit_queue, wq_entry);
+	struct wait_bit_queue_entry *wait_bit = container_of(wq_entry, struct wait_bit_queue_entry, wq_entry);
 
 	if (wait_bit->key.flags != key->flags ||
 			wait_bit->key.bit_nr != key->bit_nr ||
@@ -412,17 +411,17 @@ EXPORT_SYMBOL(wake_bit_function);
  * permitted return codes. Nonzero return codes halt waiting and return.
  */
 int __sched
-__wait_on_bit(struct wait_queue_head *wq_head, struct wait_bit_queue *q,
+__wait_on_bit(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry,
 	      wait_bit_action_f *action, unsigned mode)
 {
 	int ret = 0;
 
 	do {
-		prepare_to_wait(wq_head, &q->wq_entry, mode);
-		if (test_bit(q->key.bit_nr, q->key.flags))
-			ret = (*action)(&q->key, mode);
-	} while (test_bit(q->key.bit_nr, q->key.flags) && !ret);
-	finish_wait(wq_head, &q->wq_entry);
+		prepare_to_wait(wq_head, &wbq_entry->wq_entry, mode);
+		if (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags))
+			ret = (*action)(&wbq_entry->key, mode);
+	} while (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret);
+	finish_wait(wq_head, &wbq_entry->wq_entry);
 	return ret;
 }
 EXPORT_SYMBOL(__wait_on_bit);
@@ -450,15 +449,15 @@ int __sched out_of_line_wait_on_bit_timeout(
 EXPORT_SYMBOL_GPL(out_of_line_wait_on_bit_timeout);
 
 int __sched
-__wait_on_bit_lock(struct wait_queue_head *wq_head, struct wait_bit_queue *q,
+__wait_on_bit_lock(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry,
 			wait_bit_action_f *action, unsigned mode)
 {
 	int ret = 0;
 
 	for (;;) {
-		prepare_to_wait_exclusive(wq_head, &q->wq_entry, mode);
-		if (test_bit(q->key.bit_nr, q->key.flags)) {
-			ret = action(&q->key, mode);
+		prepare_to_wait_exclusive(wq_head, &wbq_entry->wq_entry, mode);
+		if (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags)) {
+			ret = action(&wbq_entry->key, mode);
 			/*
 			 * See the comment in prepare_to_wait_event().
 			 * finish_wait() does not necessarily takes wwq_head->lock,
@@ -466,11 +465,11 @@ __wait_on_bit_lock(struct wait_queue_head *wq_head, struct wait_bit_queue *q,
 			 * smp_mb__after_atomic() before wake_up_page().
 			 */
 			if (ret)
-				finish_wait(wq_head, &q->wq_entry);
+				finish_wait(wq_head, &wbq_entry->wq_entry);
 		}
-		if (!test_and_set_bit(q->key.bit_nr, q->key.flags)) {
+		if (!test_and_set_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags)) {
 			if (!ret)
-				finish_wait(wq_head, &q->wq_entry);
+				finish_wait(wq_head, &wbq_entry->wq_entry);
 			return 0;
 		} else if (ret) {
 			return ret;
@@ -538,7 +537,7 @@ static int wake_atomic_t_function(struct wait_queue_entry *wq_entry, unsigned mo
 				  void *arg)
 {
 	struct wait_bit_key *key = arg;
-	struct wait_bit_queue *wait_bit = container_of(wq_entry, struct wait_bit_queue, wq_entry);
+	struct wait_bit_queue_entry *wait_bit = container_of(wq_entry, struct wait_bit_queue_entry, wq_entry);
 	atomic_t *val = key->flags;
 
 	if (wait_bit->key.flags != key->flags ||
@@ -554,25 +553,25 @@ static int wake_atomic_t_function(struct wait_queue_entry *wq_entry, unsigned mo
  * return codes halt waiting and return.
  */
 static __sched
-int __wait_on_atomic_t(struct wait_queue_head *wq_head, struct wait_bit_queue *q,
+int __wait_on_atomic_t(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry,
 		       int (*action)(atomic_t *), unsigned mode)
 {
 	atomic_t *val;
 	int ret = 0;
 
 	do {
-		prepare_to_wait(wq_head, &q->wq_entry, mode);
-		val = q->key.flags;
+		prepare_to_wait(wq_head, &wbq_entry->wq_entry, mode);
+		val = wbq_entry->key.flags;
 		if (atomic_read(val) == 0)
 			break;
 		ret = (*action)(val);
 	} while (!ret && atomic_read(val) != 0);
-	finish_wait(wq_head, &q->wq_entry);
+	finish_wait(wq_head, &wbq_entry->wq_entry);
 	return ret;
 }
 
 #define DEFINE_WAIT_ATOMIC_T(name, p)					\
-	struct wait_bit_queue name = {					\
+	struct wait_bit_queue_entry name = {				\
 		.key = __WAIT_ATOMIC_T_KEY_INITIALIZER(p),		\
 		.wq_entry = {						\
 			.private	= current,			\
-- 
cgit v1.2.3


From 939798a072300698870b96756c38bb34c20f6c71 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Mar 2017 11:54:46 +0100
Subject: sched/wait: Improve the bit-wait API parameter names in the API
 function prototypes

Contrary to kernel tradition, most of the bit-wait function prototypes
in <linux/wait.h> don't fully define the parameter names, they only
list the types:

	int out_of_line_wait_on_bit_timeout(void *, int, wait_bit_action_f *, unsigned, unsigned long);

... which is pretty passive-aggressive in terms of informing the reader
about what these functions are doing.

Fill in the parameter names, such as:

	int out_of_line_wait_on_bit_timeout(void *word, int, wait_bit_action_f *action, unsigned int mode, unsigned long timeout);

Also turn spurious (and inconsistently utilized) cases of 'unsigned' into 'unsigned int'.

Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/wait.h | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index fc7c32d82120..1338505d8b9f 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -200,22 +200,22 @@ __remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq
 	list_del(&wq_entry->task_list);
 }
 
-typedef int wait_bit_action_f(struct wait_bit_key *, int mode);
+typedef int wait_bit_action_f(struct wait_bit_key *key, int mode);
 void __wake_up(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key);
 void __wake_up_locked_key(struct wait_queue_head *wq_head, unsigned int mode, void *key);
 void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key);
 void __wake_up_locked(struct wait_queue_head *wq_head, unsigned int mode, int nr);
 void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode, int nr);
-void __wake_up_bit(struct wait_queue_head *, void *, int);
-int __wait_on_bit(struct wait_queue_head *, struct wait_bit_queue_entry *, wait_bit_action_f *, unsigned);
-int __wait_on_bit_lock(struct wait_queue_head *, struct wait_bit_queue_entry *, wait_bit_action_f *, unsigned);
-void wake_up_bit(void *, int);
-void wake_up_atomic_t(atomic_t *);
-int out_of_line_wait_on_bit(void *, int, wait_bit_action_f *, unsigned);
-int out_of_line_wait_on_bit_timeout(void *, int, wait_bit_action_f *, unsigned, unsigned long);
-int out_of_line_wait_on_bit_lock(void *, int, wait_bit_action_f *, unsigned);
-int out_of_line_wait_on_atomic_t(atomic_t *, int (*)(atomic_t *), unsigned);
-struct wait_queue_head *bit_waitqueue(void *, int);
+void __wake_up_bit(struct wait_queue_head *wq_head, void *word, int bit);
+int __wait_on_bit(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry, wait_bit_action_f *action, unsigned int mode);
+int __wait_on_bit_lock(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry, wait_bit_action_f *action, unsigned int mode);
+void wake_up_bit(void *word, int bit);
+void wake_up_atomic_t(atomic_t *p);
+int out_of_line_wait_on_bit(void *word, int, wait_bit_action_f *action, unsigned int mode);
+int out_of_line_wait_on_bit_timeout(void *word, int, wait_bit_action_f *action, unsigned int mode, unsigned long timeout);
+int out_of_line_wait_on_bit_lock(void *word, int, wait_bit_action_f *action, unsigned int mode);
+int out_of_line_wait_on_atomic_t(atomic_t *p, int (*)(atomic_t *), unsigned int mode);
+struct wait_queue_head *bit_waitqueue(void *word, int bit);
 
 #define wake_up(x)			__wake_up(x, TASK_NORMAL, 1, NULL)
 #define wake_up_nr(x, nr)		__wake_up(x, TASK_NORMAL, nr, NULL)
@@ -1008,10 +1008,10 @@ int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync
 	} while (0)
 
 
-extern int bit_wait(struct wait_bit_key *, int);
-extern int bit_wait_io(struct wait_bit_key *, int);
-extern int bit_wait_timeout(struct wait_bit_key *, int);
-extern int bit_wait_io_timeout(struct wait_bit_key *, int);
+extern int bit_wait(struct wait_bit_key *key, int bit);
+extern int bit_wait_io(struct wait_bit_key *key, int bit);
+extern int bit_wait_timeout(struct wait_bit_key *key, int bit);
+extern int bit_wait_io_timeout(struct wait_bit_key *key, int bit);
 
 /**
  * wait_on_bit - wait for a bit to be cleared
-- 
cgit v1.2.3


From 4b1c480bfa3b246e292f4d50167756252a9717ed Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Mar 2017 12:07:33 +0100
Subject: sched/wait: Re-adjust macro line continuation backslashes in
 <linux/wait.h>

So there's over 300 CPP macro line-continuation backslashes in
include/linux/wait.h (!!), which are aligned vertically to make
the macro maze a bit more navigable.

The recent renames and reorganization broke some of them, and
instead of re-aligning them in every patch (which would add
a lot of stylistic noise to the patches and make them less
readable), I just ignored them - and fixed them up in a single
go in this patch.

Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/wait.h | 645 +++++++++++++++++++++++++--------------------------
 1 file changed, 322 insertions(+), 323 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index 1338505d8b9f..0805098f3589 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -53,34 +53,34 @@ struct task_struct;
  * Macros for declaration and initialisaton of the datatypes
  */
 
-#define __WAITQUEUE_INITIALIZER(name, tsk) {				\
-	.private	= tsk,						\
-	.func		= default_wake_function,			\
+#define __WAITQUEUE_INITIALIZER(name, tsk) {					\
+	.private	= tsk,							\
+	.func		= default_wake_function,				\
 	.task_list	= { NULL, NULL } }
 
-#define DECLARE_WAITQUEUE(name, tsk)					\
+#define DECLARE_WAITQUEUE(name, tsk)						\
 	struct wait_queue_entry name = __WAITQUEUE_INITIALIZER(name, tsk)
 
-#define __WAIT_QUEUE_HEAD_INITIALIZER(name) {				\
-	.lock		= __SPIN_LOCK_UNLOCKED(name.lock),		\
+#define __WAIT_QUEUE_HEAD_INITIALIZER(name) {					\
+	.lock		= __SPIN_LOCK_UNLOCKED(name.lock),			\
 	.task_list	= { &(name).task_list, &(name).task_list } }
 
 #define DECLARE_WAIT_QUEUE_HEAD(name) \
 	struct wait_queue_head name = __WAIT_QUEUE_HEAD_INITIALIZER(name)
 
-#define __WAIT_BIT_KEY_INITIALIZER(word, bit)				\
+#define __WAIT_BIT_KEY_INITIALIZER(word, bit)					\
 	{ .flags = word, .bit_nr = bit, }
 
-#define __WAIT_ATOMIC_T_KEY_INITIALIZER(p)				\
+#define __WAIT_ATOMIC_T_KEY_INITIALIZER(p)					\
 	{ .flags = p, .bit_nr = WAIT_ATOMIC_T_BIT_NR, }
 
 extern void __init_waitqueue_head(struct wait_queue_head *wq_head, const char *name, struct lock_class_key *);
 
-#define init_waitqueue_head(wq_head)			\
-	do {						\
-		static struct lock_class_key __key;	\
-							\
-		__init_waitqueue_head((wq_head), #wq_head, &__key);	\
+#define init_waitqueue_head(wq_head)						\
+	do {									\
+		static struct lock_class_key __key;				\
+										\
+		__init_waitqueue_head((wq_head), #wq_head, &__key);		\
 	} while (0)
 
 #ifdef CONFIG_LOCKDEP
@@ -122,13 +122,13 @@ init_waitqueue_func_entry(struct wait_queue_entry *wq_entry, wait_queue_func_t f
  *      CPU0 - waker                    CPU1 - waiter
  *
  *                                      for (;;) {
- *      @cond = true;                     prepare_to_wait(&wq, &wait, state);
+ *      @cond = true;                     prepare_to_wait(&wq_head, &wait, state);
  *      smp_mb();                         // smp_mb() from set_current_state()
- *      if (waitqueue_active(wq))         if (@cond)
- *        wake_up(wq);                      break;
+ *      if (waitqueue_active(wq_head))         if (@cond)
+ *        wake_up(wq_head);                      break;
  *                                        schedule();
  *                                      }
- *                                      finish_wait(&wq, &wait);
+ *                                      finish_wait(&wq_head, &wait);
  *
  * Because without the explicit smp_mb() it's possible for the
  * waitqueue_active() load to get hoisted over the @cond store such that we'll
@@ -144,9 +144,9 @@ static inline int waitqueue_active(struct wait_queue_head *wq_head)
 
 /**
  * wq_has_sleeper - check if there are any waiting processes
- * @wq: wait queue head
+ * @wq_head: wait queue head
  *
- * Returns true if wq has waiting processes
+ * Returns true if wq_head has waiting processes
  *
  * Please refer to the comment for waitqueue_active.
  */
@@ -231,26 +231,26 @@ struct wait_queue_head *bit_waitqueue(void *word, int bit);
 /*
  * Wakeup macros to be used to report events to the targets.
  */
-#define wake_up_poll(x, m)						\
+#define wake_up_poll(x, m)							\
 	__wake_up(x, TASK_NORMAL, 1, (void *) (m))
-#define wake_up_locked_poll(x, m)					\
+#define wake_up_locked_poll(x, m)						\
 	__wake_up_locked_key((x), TASK_NORMAL, (void *) (m))
-#define wake_up_interruptible_poll(x, m)				\
+#define wake_up_interruptible_poll(x, m)					\
 	__wake_up(x, TASK_INTERRUPTIBLE, 1, (void *) (m))
-#define wake_up_interruptible_sync_poll(x, m)				\
+#define wake_up_interruptible_sync_poll(x, m)					\
 	__wake_up_sync_key((x), TASK_INTERRUPTIBLE, 1, (void *) (m))
 
-#define ___wait_cond_timeout(condition)					\
-({									\
-	bool __cond = (condition);					\
-	if (__cond && !__ret)						\
-		__ret = 1;						\
-	__cond || !__ret;						\
+#define ___wait_cond_timeout(condition)						\
+({										\
+	bool __cond = (condition);						\
+	if (__cond && !__ret)							\
+		__ret = 1;							\
+	__cond || !__ret;							\
 })
 
-#define ___wait_is_interruptible(state)					\
-	(!__builtin_constant_p(state) ||				\
-		state == TASK_INTERRUPTIBLE || state == TASK_KILLABLE)	\
+#define ___wait_is_interruptible(state)						\
+	(!__builtin_constant_p(state) ||					\
+		state == TASK_INTERRUPTIBLE || state == TASK_KILLABLE)		\
 
 extern void init_wait_entry(struct wait_queue_entry *wq_entry, int flags);
 
@@ -266,108 +266,108 @@ extern void init_wait_entry(struct wait_queue_entry *wq_entry, int flags);
  * otherwise.
  */
 
-#define ___wait_event(wq, condition, state, exclusive, ret, cmd)	\
-({									\
-	__label__ __out;						\
-	struct wait_queue_entry __wq_entry;				\
-	long __ret = ret;	/* explicit shadow */			\
-									\
-	init_wait_entry(&__wq_entry, exclusive ? WQ_FLAG_EXCLUSIVE : 0);\
-	for (;;) {							\
-		long __int = prepare_to_wait_event(&wq, &__wq_entry, state);\
-									\
-		if (condition)						\
-			break;						\
-									\
-		if (___wait_is_interruptible(state) && __int) {		\
-			__ret = __int;					\
-			goto __out;					\
-		}							\
-									\
-		cmd;							\
-	}								\
-	finish_wait(&wq, &__wq_entry);					\
-__out:	__ret;								\
+#define ___wait_event(wq_head, condition, state, exclusive, ret, cmd)		\
+({										\
+	__label__ __out;							\
+	struct wait_queue_entry __wq_entry;					\
+	long __ret = ret;	/* explicit shadow */				\
+										\
+	init_wait_entry(&__wq_entry, exclusive ? WQ_FLAG_EXCLUSIVE : 0);	\
+	for (;;) {								\
+		long __int = prepare_to_wait_event(&wq_head, &__wq_entry, state);\
+										\
+		if (condition)							\
+			break;							\
+										\
+		if (___wait_is_interruptible(state) && __int) {			\
+			__ret = __int;						\
+			goto __out;						\
+		}								\
+										\
+		cmd;								\
+	}									\
+	finish_wait(&wq_head, &__wq_entry);					\
+__out:	__ret;									\
 })
 
-#define __wait_event(wq, condition)					\
-	(void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0,	\
+#define __wait_event(wq_head, condition)					\
+	(void)___wait_event(wq_head, condition, TASK_UNINTERRUPTIBLE, 0, 0,	\
 			    schedule())
 
 /**
  * wait_event - sleep until a condition gets true
- * @wq: the waitqueue to wait on
+ * @wq_head: the waitqueue to wait on
  * @condition: a C expression for the event to wait for
  *
  * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
  * @condition evaluates to true. The @condition is checked each time
- * the waitqueue @wq is woken up.
+ * the waitqueue @wq_head is woken up.
  *
  * wake_up() has to be called after changing any variable that could
  * change the result of the wait condition.
  */
-#define wait_event(wq, condition)					\
-do {									\
-	might_sleep();							\
-	if (condition)							\
-		break;							\
-	__wait_event(wq, condition);					\
+#define wait_event(wq_head, condition)						\
+do {										\
+	might_sleep();								\
+	if (condition)								\
+		break;								\
+	__wait_event(wq_head, condition);					\
 } while (0)
 
-#define __io_wait_event(wq, condition)					\
-	(void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0,	\
+#define __io_wait_event(wq_head, condition)					\
+	(void)___wait_event(wq_head, condition, TASK_UNINTERRUPTIBLE, 0, 0,	\
 			    io_schedule())
 
 /*
  * io_wait_event() -- like wait_event() but with io_schedule()
  */
-#define io_wait_event(wq, condition)					\
-do {									\
-	might_sleep();							\
-	if (condition)							\
-		break;							\
-	__io_wait_event(wq, condition);					\
+#define io_wait_event(wq_head, condition)					\
+do {										\
+	might_sleep();								\
+	if (condition)								\
+		break;								\
+	__io_wait_event(wq_head, condition);					\
 } while (0)
 
-#define __wait_event_freezable(wq, condition)				\
-	___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0,		\
+#define __wait_event_freezable(wq_head, condition)				\
+	___wait_event(wq_head, condition, TASK_INTERRUPTIBLE, 0, 0,		\
 			    schedule(); try_to_freeze())
 
 /**
  * wait_event_freezable - sleep (or freeze) until a condition gets true
- * @wq: the waitqueue to wait on
+ * @wq_head: the waitqueue to wait on
  * @condition: a C expression for the event to wait for
  *
  * The process is put to sleep (TASK_INTERRUPTIBLE -- so as not to contribute
  * to system load) until the @condition evaluates to true. The
- * @condition is checked each time the waitqueue @wq is woken up.
+ * @condition is checked each time the waitqueue @wq_head is woken up.
  *
  * wake_up() has to be called after changing any variable that could
  * change the result of the wait condition.
  */
-#define wait_event_freezable(wq, condition)				\
-({									\
-	int __ret = 0;							\
-	might_sleep();							\
-	if (!(condition))						\
-		__ret = __wait_event_freezable(wq, condition);		\
-	__ret;								\
+#define wait_event_freezable(wq_head, condition)				\
+({										\
+	int __ret = 0;								\
+	might_sleep();								\
+	if (!(condition))							\
+		__ret = __wait_event_freezable(wq_head, condition);		\
+	__ret;									\
 })
 
-#define __wait_event_timeout(wq, condition, timeout)			\
-	___wait_event(wq, ___wait_cond_timeout(condition),		\
-		      TASK_UNINTERRUPTIBLE, 0, timeout,			\
+#define __wait_event_timeout(wq_head, condition, timeout)			\
+	___wait_event(wq_head, ___wait_cond_timeout(condition),			\
+		      TASK_UNINTERRUPTIBLE, 0, timeout,				\
 		      __ret = schedule_timeout(__ret))
 
 /**
  * wait_event_timeout - sleep until a condition gets true or a timeout elapses
- * @wq: the waitqueue to wait on
+ * @wq_head: the waitqueue to wait on
  * @condition: a C expression for the event to wait for
  * @timeout: timeout, in jiffies
  *
  * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
  * @condition evaluates to true. The @condition is checked each time
- * the waitqueue @wq is woken up.
+ * the waitqueue @wq_head is woken up.
  *
  * wake_up() has to be called after changing any variable that could
  * change the result of the wait condition.
@@ -378,83 +378,83 @@ do {									\
  * or the remaining jiffies (at least 1) if the @condition evaluated
  * to %true before the @timeout elapsed.
  */
-#define wait_event_timeout(wq, condition, timeout)			\
-({									\
-	long __ret = timeout;						\
-	might_sleep();							\
-	if (!___wait_cond_timeout(condition))				\
-		__ret = __wait_event_timeout(wq, condition, timeout);	\
-	__ret;								\
+#define wait_event_timeout(wq_head, condition, timeout)				\
+({										\
+	long __ret = timeout;							\
+	might_sleep();								\
+	if (!___wait_cond_timeout(condition))					\
+		__ret = __wait_event_timeout(wq_head, condition, timeout);	\
+	__ret;									\
 })
 
-#define __wait_event_freezable_timeout(wq, condition, timeout)		\
-	___wait_event(wq, ___wait_cond_timeout(condition),		\
-		      TASK_INTERRUPTIBLE, 0, timeout,			\
+#define __wait_event_freezable_timeout(wq_head, condition, timeout)		\
+	___wait_event(wq_head, ___wait_cond_timeout(condition),			\
+		      TASK_INTERRUPTIBLE, 0, timeout,				\
 		      __ret = schedule_timeout(__ret); try_to_freeze())
 
 /*
  * like wait_event_timeout() -- except it uses TASK_INTERRUPTIBLE to avoid
  * increasing load and is freezable.
  */
-#define wait_event_freezable_timeout(wq, condition, timeout)		\
-({									\
-	long __ret = timeout;						\
-	might_sleep();							\
-	if (!___wait_cond_timeout(condition))				\
-		__ret = __wait_event_freezable_timeout(wq, condition, timeout);	\
-	__ret;								\
+#define wait_event_freezable_timeout(wq_head, condition, timeout)		\
+({										\
+	long __ret = timeout;							\
+	might_sleep();								\
+	if (!___wait_cond_timeout(condition))					\
+		__ret = __wait_event_freezable_timeout(wq_head, condition, timeout); \
+	__ret;									\
 })
 
-#define __wait_event_exclusive_cmd(wq, condition, cmd1, cmd2)		\
-	(void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 1, 0,	\
+#define __wait_event_exclusive_cmd(wq_head, condition, cmd1, cmd2)		\
+	(void)___wait_event(wq_head, condition, TASK_UNINTERRUPTIBLE, 1, 0,	\
 			    cmd1; schedule(); cmd2)
 /*
  * Just like wait_event_cmd(), except it sets exclusive flag
  */
-#define wait_event_exclusive_cmd(wq, condition, cmd1, cmd2)		\
-do {									\
-	if (condition)							\
-		break;							\
-	__wait_event_exclusive_cmd(wq, condition, cmd1, cmd2);		\
+#define wait_event_exclusive_cmd(wq_head, condition, cmd1, cmd2)		\
+do {										\
+	if (condition)								\
+		break;								\
+	__wait_event_exclusive_cmd(wq_head, condition, cmd1, cmd2);		\
 } while (0)
 
-#define __wait_event_cmd(wq, condition, cmd1, cmd2)			\
-	(void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0,	\
+#define __wait_event_cmd(wq_head, condition, cmd1, cmd2)			\
+	(void)___wait_event(wq_head, condition, TASK_UNINTERRUPTIBLE, 0, 0,	\
 			    cmd1; schedule(); cmd2)
 
 /**
  * wait_event_cmd - sleep until a condition gets true
- * @wq: the waitqueue to wait on
+ * @wq_head: the waitqueue to wait on
  * @condition: a C expression for the event to wait for
  * @cmd1: the command will be executed before sleep
  * @cmd2: the command will be executed after sleep
  *
  * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
  * @condition evaluates to true. The @condition is checked each time
- * the waitqueue @wq is woken up.
+ * the waitqueue @wq_head is woken up.
  *
  * wake_up() has to be called after changing any variable that could
  * change the result of the wait condition.
  */
-#define wait_event_cmd(wq, condition, cmd1, cmd2)			\
-do {									\
-	if (condition)							\
-		break;							\
-	__wait_event_cmd(wq, condition, cmd1, cmd2);			\
+#define wait_event_cmd(wq_head, condition, cmd1, cmd2)				\
+do {										\
+	if (condition)								\
+		break;								\
+	__wait_event_cmd(wq_head, condition, cmd1, cmd2);			\
 } while (0)
 
-#define __wait_event_interruptible(wq, condition)			\
-	___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0,		\
+#define __wait_event_interruptible(wq_head, condition)				\
+	___wait_event(wq_head, condition, TASK_INTERRUPTIBLE, 0, 0,		\
 		      schedule())
 
 /**
  * wait_event_interruptible - sleep until a condition gets true
- * @wq: the waitqueue to wait on
+ * @wq_head: the waitqueue to wait on
  * @condition: a C expression for the event to wait for
  *
  * The process is put to sleep (TASK_INTERRUPTIBLE) until the
  * @condition evaluates to true or a signal is received.
- * The @condition is checked each time the waitqueue @wq is woken up.
+ * The @condition is checked each time the waitqueue @wq_head is woken up.
  *
  * wake_up() has to be called after changing any variable that could
  * change the result of the wait condition.
@@ -462,29 +462,29 @@ do {									\
  * The function will return -ERESTARTSYS if it was interrupted by a
  * signal and 0 if @condition evaluated to true.
  */
-#define wait_event_interruptible(wq, condition)				\
-({									\
-	int __ret = 0;							\
-	might_sleep();							\
-	if (!(condition))						\
-		__ret = __wait_event_interruptible(wq, condition);	\
-	__ret;								\
+#define wait_event_interruptible(wq_head, condition)				\
+({										\
+	int __ret = 0;								\
+	might_sleep();								\
+	if (!(condition))							\
+		__ret = __wait_event_interruptible(wq_head, condition);		\
+	__ret;									\
 })
 
-#define __wait_event_interruptible_timeout(wq, condition, timeout)	\
-	___wait_event(wq, ___wait_cond_timeout(condition),		\
-		      TASK_INTERRUPTIBLE, 0, timeout,			\
+#define __wait_event_interruptible_timeout(wq_head, condition, timeout)		\
+	___wait_event(wq_head, ___wait_cond_timeout(condition),			\
+		      TASK_INTERRUPTIBLE, 0, timeout,				\
 		      __ret = schedule_timeout(__ret))
 
 /**
  * wait_event_interruptible_timeout - sleep until a condition gets true or a timeout elapses
- * @wq: the waitqueue to wait on
+ * @wq_head: the waitqueue to wait on
  * @condition: a C expression for the event to wait for
  * @timeout: timeout, in jiffies
  *
  * The process is put to sleep (TASK_INTERRUPTIBLE) until the
  * @condition evaluates to true or a signal is received.
- * The @condition is checked each time the waitqueue @wq is woken up.
+ * The @condition is checked each time the waitqueue @wq_head is woken up.
  *
  * wake_up() has to be called after changing any variable that could
  * change the result of the wait condition.
@@ -496,50 +496,49 @@ do {									\
  * to %true before the @timeout elapsed, or -%ERESTARTSYS if it was
  * interrupted by a signal.
  */
-#define wait_event_interruptible_timeout(wq, condition, timeout)	\
-({									\
-	long __ret = timeout;						\
-	might_sleep();							\
-	if (!___wait_cond_timeout(condition))				\
-		__ret = __wait_event_interruptible_timeout(wq,		\
-						condition, timeout);	\
-	__ret;								\
+#define wait_event_interruptible_timeout(wq_head, condition, timeout)		\
+({										\
+	long __ret = timeout;							\
+	might_sleep();								\
+	if (!___wait_cond_timeout(condition))					\
+		__ret = __wait_event_interruptible_timeout(wq_head,		\
+						condition, timeout);		\
+	__ret;									\
 })
 
-#define __wait_event_hrtimeout(wq, condition, timeout, state)		\
-({									\
-	int __ret = 0;							\
-	struct hrtimer_sleeper __t;					\
-									\
-	hrtimer_init_on_stack(&__t.timer, CLOCK_MONOTONIC,		\
-			      HRTIMER_MODE_REL);			\
-	hrtimer_init_sleeper(&__t, current);				\
-	if ((timeout) != KTIME_MAX)				\
-		hrtimer_start_range_ns(&__t.timer, timeout,		\
-				       current->timer_slack_ns,		\
-				       HRTIMER_MODE_REL);		\
-									\
-	__ret = ___wait_event(wq, condition, state, 0, 0,		\
-		if (!__t.task) {					\
-			__ret = -ETIME;					\
-			break;						\
-		}							\
-		schedule());						\
-									\
-	hrtimer_cancel(&__t.timer);					\
-	destroy_hrtimer_on_stack(&__t.timer);				\
-	__ret;								\
+#define __wait_event_hrtimeout(wq_head, condition, timeout, state)		\
+({										\
+	int __ret = 0;								\
+	struct hrtimer_sleeper __t;						\
+										\
+	hrtimer_init_on_stack(&__t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);	\
+	hrtimer_init_sleeper(&__t, current);					\
+	if ((timeout) != KTIME_MAX)						\
+		hrtimer_start_range_ns(&__t.timer, timeout,			\
+				       current->timer_slack_ns,			\
+				       HRTIMER_MODE_REL);			\
+										\
+	__ret = ___wait_event(wq_head, condition, state, 0, 0,			\
+		if (!__t.task) {						\
+			__ret = -ETIME;						\
+			break;							\
+		}								\
+		schedule());							\
+										\
+	hrtimer_cancel(&__t.timer);						\
+	destroy_hrtimer_on_stack(&__t.timer);					\
+	__ret;									\
 })
 
 /**
  * wait_event_hrtimeout - sleep until a condition gets true or a timeout elapses
- * @wq: the waitqueue to wait on
+ * @wq_head: the waitqueue to wait on
  * @condition: a C expression for the event to wait for
  * @timeout: timeout, as a ktime_t
  *
  * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
  * @condition evaluates to true or a signal is received.
- * The @condition is checked each time the waitqueue @wq is woken up.
+ * The @condition is checked each time the waitqueue @wq_head is woken up.
  *
  * wake_up() has to be called after changing any variable that could
  * change the result of the wait condition.
@@ -547,25 +546,25 @@ do {									\
  * The function returns 0 if @condition became true, or -ETIME if the timeout
  * elapsed.
  */
-#define wait_event_hrtimeout(wq, condition, timeout)			\
-({									\
-	int __ret = 0;							\
-	might_sleep();							\
-	if (!(condition))						\
-		__ret = __wait_event_hrtimeout(wq, condition, timeout,	\
-					       TASK_UNINTERRUPTIBLE);	\
-	__ret;								\
+#define wait_event_hrtimeout(wq_head, condition, timeout)			\
+({										\
+	int __ret = 0;								\
+	might_sleep();								\
+	if (!(condition))							\
+		__ret = __wait_event_hrtimeout(wq_head, condition, timeout,	\
+					       TASK_UNINTERRUPTIBLE);		\
+	__ret;									\
 })
 
 /**
  * wait_event_interruptible_hrtimeout - sleep until a condition gets true or a timeout elapses
- * @wq: the waitqueue to wait on
+ * @wq_head: the waitqueue to wait on
  * @condition: a C expression for the event to wait for
  * @timeout: timeout, as a ktime_t
  *
  * The process is put to sleep (TASK_INTERRUPTIBLE) until the
  * @condition evaluates to true or a signal is received.
- * The @condition is checked each time the waitqueue @wq is woken up.
+ * The @condition is checked each time the waitqueue @wq_head is woken up.
  *
  * wake_up() has to be called after changing any variable that could
  * change the result of the wait condition.
@@ -573,73 +572,73 @@ do {									\
  * The function returns 0 if @condition became true, -ERESTARTSYS if it was
  * interrupted by a signal, or -ETIME if the timeout elapsed.
  */
-#define wait_event_interruptible_hrtimeout(wq, condition, timeout)	\
-({									\
-	long __ret = 0;							\
-	might_sleep();							\
-	if (!(condition))						\
-		__ret = __wait_event_hrtimeout(wq, condition, timeout,	\
-					       TASK_INTERRUPTIBLE);	\
-	__ret;								\
+#define wait_event_interruptible_hrtimeout(wq, condition, timeout)		\
+({										\
+	long __ret = 0;								\
+	might_sleep();								\
+	if (!(condition))							\
+		__ret = __wait_event_hrtimeout(wq, condition, timeout,		\
+					       TASK_INTERRUPTIBLE);		\
+	__ret;									\
 })
 
-#define __wait_event_interruptible_exclusive(wq, condition)		\
-	___wait_event(wq, condition, TASK_INTERRUPTIBLE, 1, 0,		\
+#define __wait_event_interruptible_exclusive(wq, condition)			\
+	___wait_event(wq, condition, TASK_INTERRUPTIBLE, 1, 0,			\
 		      schedule())
 
-#define wait_event_interruptible_exclusive(wq, condition)		\
-({									\
-	int __ret = 0;							\
-	might_sleep();							\
-	if (!(condition))						\
-		__ret = __wait_event_interruptible_exclusive(wq, condition);\
-	__ret;								\
+#define wait_event_interruptible_exclusive(wq, condition)			\
+({										\
+	int __ret = 0;								\
+	might_sleep();								\
+	if (!(condition))							\
+		__ret = __wait_event_interruptible_exclusive(wq, condition);	\
+	__ret;									\
 })
 
-#define __wait_event_killable_exclusive(wq, condition)			\
-	___wait_event(wq, condition, TASK_KILLABLE, 1, 0,		\
+#define __wait_event_killable_exclusive(wq, condition)				\
+	___wait_event(wq, condition, TASK_KILLABLE, 1, 0,			\
 		      schedule())
 
-#define wait_event_killable_exclusive(wq, condition)			\
-({									\
-	int __ret = 0;							\
-	might_sleep();							\
-	if (!(condition))						\
-		__ret = __wait_event_killable_exclusive(wq, condition);	\
-	__ret;								\
+#define wait_event_killable_exclusive(wq, condition)				\
+({										\
+	int __ret = 0;								\
+	might_sleep();								\
+	if (!(condition))							\
+		__ret = __wait_event_killable_exclusive(wq, condition);		\
+	__ret;									\
 })
 
 
-#define __wait_event_freezable_exclusive(wq, condition)			\
-	___wait_event(wq, condition, TASK_INTERRUPTIBLE, 1, 0,		\
+#define __wait_event_freezable_exclusive(wq, condition)				\
+	___wait_event(wq, condition, TASK_INTERRUPTIBLE, 1, 0,			\
 			schedule(); try_to_freeze())
 
-#define wait_event_freezable_exclusive(wq, condition)			\
-({									\
-	int __ret = 0;							\
-	might_sleep();							\
-	if (!(condition))						\
-		__ret = __wait_event_freezable_exclusive(wq, condition);\
-	__ret;								\
+#define wait_event_freezable_exclusive(wq, condition)				\
+({										\
+	int __ret = 0;								\
+	might_sleep();								\
+	if (!(condition))							\
+		__ret = __wait_event_freezable_exclusive(wq, condition);	\
+	__ret;									\
 })
 
 extern int do_wait_intr(wait_queue_head_t *, wait_queue_entry_t *);
 extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_entry_t *);
 
-#define __wait_event_interruptible_locked(wq, condition, exclusive, fn) \
-({									\
-	int __ret;							\
-	DEFINE_WAIT(__wait);						\
-	if (exclusive)							\
-		__wait.flags |= WQ_FLAG_EXCLUSIVE;			\
-	do {								\
-		__ret = fn(&(wq), &__wait);				\
-		if (__ret)						\
-			break;						\
-	} while (!(condition));						\
-	__remove_wait_queue(&(wq), &__wait);				\
-	__set_current_state(TASK_RUNNING);				\
-	__ret;								\
+#define __wait_event_interruptible_locked(wq, condition, exclusive, fn)		\
+({										\
+	int __ret;								\
+	DEFINE_WAIT(__wait);							\
+	if (exclusive)								\
+		__wait.flags |= WQ_FLAG_EXCLUSIVE;				\
+	do {									\
+		__ret = fn(&(wq), &__wait);					\
+		if (__ret)							\
+			break;							\
+	} while (!(condition));							\
+	__remove_wait_queue(&(wq), &__wait);					\
+	__set_current_state(TASK_RUNNING);					\
+	__ret;									\
 })
 
 
@@ -666,8 +665,8 @@ extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_entry_t *);
  * The function will return -ERESTARTSYS if it was interrupted by a
  * signal and 0 if @condition evaluated to true.
  */
-#define wait_event_interruptible_locked(wq, condition)			\
-	((condition)							\
+#define wait_event_interruptible_locked(wq, condition)				\
+	((condition)								\
 	 ? 0 : __wait_event_interruptible_locked(wq, condition, 0, do_wait_intr))
 
 /**
@@ -693,8 +692,8 @@ extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_entry_t *);
  * The function will return -ERESTARTSYS if it was interrupted by a
  * signal and 0 if @condition evaluated to true.
  */
-#define wait_event_interruptible_locked_irq(wq, condition)		\
-	((condition)							\
+#define wait_event_interruptible_locked_irq(wq, condition)			\
+	((condition)								\
 	 ? 0 : __wait_event_interruptible_locked(wq, condition, 0, do_wait_intr_irq))
 
 /**
@@ -724,8 +723,8 @@ extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_entry_t *);
  * The function will return -ERESTARTSYS if it was interrupted by a
  * signal and 0 if @condition evaluated to true.
  */
-#define wait_event_interruptible_exclusive_locked(wq, condition)	\
-	((condition)							\
+#define wait_event_interruptible_exclusive_locked(wq, condition)		\
+	((condition)								\
 	 ? 0 : __wait_event_interruptible_locked(wq, condition, 1, do_wait_intr))
 
 /**
@@ -755,12 +754,12 @@ extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_entry_t *);
  * The function will return -ERESTARTSYS if it was interrupted by a
  * signal and 0 if @condition evaluated to true.
  */
-#define wait_event_interruptible_exclusive_locked_irq(wq, condition)	\
-	((condition)							\
+#define wait_event_interruptible_exclusive_locked_irq(wq, condition)		\
+	((condition)								\
 	 ? 0 : __wait_event_interruptible_locked(wq, condition, 1, do_wait_intr_irq))
 
 
-#define __wait_event_killable(wq, condition)				\
+#define __wait_event_killable(wq, condition)					\
 	___wait_event(wq, condition, TASK_KILLABLE, 0, 0, schedule())
 
 /**
@@ -778,21 +777,21 @@ extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_entry_t *);
  * The function will return -ERESTARTSYS if it was interrupted by a
  * signal and 0 if @condition evaluated to true.
  */
-#define wait_event_killable(wq, condition)				\
-({									\
-	int __ret = 0;							\
-	might_sleep();							\
-	if (!(condition))						\
-		__ret = __wait_event_killable(wq, condition);		\
-	__ret;								\
+#define wait_event_killable(wq_head, condition)					\
+({										\
+	int __ret = 0;								\
+	might_sleep();								\
+	if (!(condition))							\
+		__ret = __wait_event_killable(wq_head, condition);		\
+	__ret;									\
 })
 
 
-#define __wait_event_lock_irq(wq, condition, lock, cmd)			\
-	(void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0,	\
-			    spin_unlock_irq(&lock);			\
-			    cmd;					\
-			    schedule();					\
+#define __wait_event_lock_irq(wq_head, condition, lock, cmd)			\
+	(void)___wait_event(wq_head, condition, TASK_UNINTERRUPTIBLE, 0, 0,	\
+			    spin_unlock_irq(&lock);				\
+			    cmd;						\
+			    schedule();						\
 			    spin_lock_irq(&lock))
 
 /**
@@ -800,7 +799,7 @@ extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_entry_t *);
  *			     condition is checked under the lock. This
  *			     is expected to be called with the lock
  *			     taken.
- * @wq: the waitqueue to wait on
+ * @wq_head: the waitqueue to wait on
  * @condition: a C expression for the event to wait for
  * @lock: a locked spinlock_t, which will be released before cmd
  *	  and schedule() and reacquired afterwards.
@@ -809,7 +808,7 @@ extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_entry_t *);
  *
  * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
  * @condition evaluates to true. The @condition is checked each time
- * the waitqueue @wq is woken up.
+ * the waitqueue @wq_head is woken up.
  *
  * wake_up() has to be called after changing any variable that could
  * change the result of the wait condition.
@@ -818,11 +817,11 @@ extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_entry_t *);
  * dropped before invoking the cmd and going to sleep and is reacquired
  * afterwards.
  */
-#define wait_event_lock_irq_cmd(wq, condition, lock, cmd)		\
-do {									\
-	if (condition)							\
-		break;							\
-	__wait_event_lock_irq(wq, condition, lock, cmd);		\
+#define wait_event_lock_irq_cmd(wq_head, condition, lock, cmd)			\
+do {										\
+	if (condition)								\
+		break;								\
+	__wait_event_lock_irq(wq_head, condition, lock, cmd);			\
 } while (0)
 
 /**
@@ -830,14 +829,14 @@ do {									\
  *			 condition is checked under the lock. This
  *			 is expected to be called with the lock
  *			 taken.
- * @wq: the waitqueue to wait on
+ * @wq_head: the waitqueue to wait on
  * @condition: a C expression for the event to wait for
  * @lock: a locked spinlock_t, which will be released before schedule()
  *	  and reacquired afterwards.
  *
  * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
  * @condition evaluates to true. The @condition is checked each time
- * the waitqueue @wq is woken up.
+ * the waitqueue @wq_head is woken up.
  *
  * wake_up() has to be called after changing any variable that could
  * change the result of the wait condition.
@@ -845,26 +844,26 @@ do {									\
  * This is supposed to be called while holding the lock. The lock is
  * dropped before going to sleep and is reacquired afterwards.
  */
-#define wait_event_lock_irq(wq, condition, lock)			\
-do {									\
-	if (condition)							\
-		break;							\
-	__wait_event_lock_irq(wq, condition, lock, );			\
+#define wait_event_lock_irq(wq_head, condition, lock)				\
+do {										\
+	if (condition)								\
+		break;								\
+	__wait_event_lock_irq(wq_head, condition, lock, );			\
 } while (0)
 
 
-#define __wait_event_interruptible_lock_irq(wq, condition, lock, cmd)	\
-	___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0,		\
-		      spin_unlock_irq(&lock);				\
-		      cmd;						\
-		      schedule();					\
+#define __wait_event_interruptible_lock_irq(wq_head, condition, lock, cmd)	\
+	___wait_event(wq_head, condition, TASK_INTERRUPTIBLE, 0, 0,		\
+		      spin_unlock_irq(&lock);					\
+		      cmd;							\
+		      schedule();						\
 		      spin_lock_irq(&lock))
 
 /**
  * wait_event_interruptible_lock_irq_cmd - sleep until a condition gets true.
  *		The condition is checked under the lock. This is expected to
  *		be called with the lock taken.
- * @wq: the waitqueue to wait on
+ * @wq_head: the waitqueue to wait on
  * @condition: a C expression for the event to wait for
  * @lock: a locked spinlock_t, which will be released before cmd and
  *	  schedule() and reacquired afterwards.
@@ -873,7 +872,7 @@ do {									\
  *
  * The process is put to sleep (TASK_INTERRUPTIBLE) until the
  * @condition evaluates to true or a signal is received. The @condition is
- * checked each time the waitqueue @wq is woken up.
+ * checked each time the waitqueue @wq_head is woken up.
  *
  * wake_up() has to be called after changing any variable that could
  * change the result of the wait condition.
@@ -885,27 +884,27 @@ do {									\
  * The macro will return -ERESTARTSYS if it was interrupted by a signal
  * and 0 if @condition evaluated to true.
  */
-#define wait_event_interruptible_lock_irq_cmd(wq, condition, lock, cmd)	\
-({									\
-	int __ret = 0;							\
-	if (!(condition))						\
-		__ret = __wait_event_interruptible_lock_irq(wq,		\
-						condition, lock, cmd);	\
-	__ret;								\
+#define wait_event_interruptible_lock_irq_cmd(wq_head, condition, lock, cmd)	\
+({										\
+	int __ret = 0;								\
+	if (!(condition))							\
+		__ret = __wait_event_interruptible_lock_irq(wq_head,		\
+						condition, lock, cmd);		\
+	__ret;									\
 })
 
 /**
  * wait_event_interruptible_lock_irq - sleep until a condition gets true.
  *		The condition is checked under the lock. This is expected
  *		to be called with the lock taken.
- * @wq: the waitqueue to wait on
+ * @wq_head: the waitqueue to wait on
  * @condition: a C expression for the event to wait for
  * @lock: a locked spinlock_t, which will be released before schedule()
  *	  and reacquired afterwards.
  *
  * The process is put to sleep (TASK_INTERRUPTIBLE) until the
  * @condition evaluates to true or signal is received. The @condition is
- * checked each time the waitqueue @wq is woken up.
+ * checked each time the waitqueue @wq_head is woken up.
  *
  * wake_up() has to be called after changing any variable that could
  * change the result of the wait condition.
@@ -916,28 +915,28 @@ do {									\
  * The macro will return -ERESTARTSYS if it was interrupted by a signal
  * and 0 if @condition evaluated to true.
  */
-#define wait_event_interruptible_lock_irq(wq, condition, lock)		\
-({									\
-	int __ret = 0;							\
-	if (!(condition))						\
-		__ret = __wait_event_interruptible_lock_irq(wq,		\
-						condition, lock,);	\
-	__ret;								\
+#define wait_event_interruptible_lock_irq(wq_head, condition, lock)		\
+({										\
+	int __ret = 0;								\
+	if (!(condition))							\
+		__ret = __wait_event_interruptible_lock_irq(wq_head,		\
+						condition, lock,);		\
+	__ret;									\
 })
 
-#define __wait_event_interruptible_lock_irq_timeout(wq, condition,	\
-						    lock, timeout)	\
-	___wait_event(wq, ___wait_cond_timeout(condition),		\
-		      TASK_INTERRUPTIBLE, 0, timeout,			\
-		      spin_unlock_irq(&lock);				\
-		      __ret = schedule_timeout(__ret);			\
+#define __wait_event_interruptible_lock_irq_timeout(wq_head, condition,		\
+						    lock, timeout)		\
+	___wait_event(wq_head, ___wait_cond_timeout(condition),			\
+		      TASK_INTERRUPTIBLE, 0, timeout,				\
+		      spin_unlock_irq(&lock);					\
+		      __ret = schedule_timeout(__ret);				\
 		      spin_lock_irq(&lock));
 
 /**
  * wait_event_interruptible_lock_irq_timeout - sleep until a condition gets
  *		true or a timeout elapses. The condition is checked under
  *		the lock. This is expected to be called with the lock taken.
- * @wq: the waitqueue to wait on
+ * @wq_head: the waitqueue to wait on
  * @condition: a C expression for the event to wait for
  * @lock: a locked spinlock_t, which will be released before schedule()
  *	  and reacquired afterwards.
@@ -945,7 +944,7 @@ do {									\
  *
  * The process is put to sleep (TASK_INTERRUPTIBLE) until the
  * @condition evaluates to true or signal is received. The @condition is
- * checked each time the waitqueue @wq is woken up.
+ * checked each time the waitqueue @wq_head is woken up.
  *
  * wake_up() has to be called after changing any variable that could
  * change the result of the wait condition.
@@ -957,14 +956,14 @@ do {									\
  * was interrupted by a signal, and the remaining jiffies otherwise
  * if the condition evaluated to true before the timeout elapsed.
  */
-#define wait_event_interruptible_lock_irq_timeout(wq, condition, lock,	\
-						  timeout)		\
-({									\
-	long __ret = timeout;						\
-	if (!___wait_cond_timeout(condition))				\
-		__ret = __wait_event_interruptible_lock_irq_timeout(	\
-					wq, condition, lock, timeout);	\
-	__ret;								\
+#define wait_event_interruptible_lock_irq_timeout(wq_head, condition, lock,	\
+						  timeout)			\
+({										\
+	long __ret = timeout;							\
+	if (!___wait_cond_timeout(condition))					\
+		__ret = __wait_event_interruptible_lock_irq_timeout(		\
+					wq_head, condition, lock, timeout);	\
+	__ret;									\
 })
 
 /*
@@ -979,32 +978,32 @@ int woken_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sy
 int autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key);
 int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key);
 
-#define DEFINE_WAIT_FUNC(name, function)				\
-	struct wait_queue_entry name = {				\
-		.private	= current,				\
-		.func		= function,				\
-		.task_list	= LIST_HEAD_INIT((name).task_list),	\
+#define DEFINE_WAIT_FUNC(name, function)					\
+	struct wait_queue_entry name = {					\
+		.private	= current,					\
+		.func		= function,					\
+		.task_list	= LIST_HEAD_INIT((name).task_list),		\
 	}
 
 #define DEFINE_WAIT(name) DEFINE_WAIT_FUNC(name, autoremove_wake_function)
 
-#define DEFINE_WAIT_BIT(name, word, bit)				\
-	struct wait_bit_queue_entry name = {				\
-		.key = __WAIT_BIT_KEY_INITIALIZER(word, bit),		\
-		.wq_entry = {						\
-			.private	= current,			\
-			.func		= wake_bit_function,		\
-			.task_list	=				\
-				LIST_HEAD_INIT((name).wq_entry.task_list), \
-		},							\
+#define DEFINE_WAIT_BIT(name, word, bit)					\
+	struct wait_bit_queue_entry name = {					\
+		.key = __WAIT_BIT_KEY_INITIALIZER(word, bit),			\
+		.wq_entry = {							\
+			.private	= current,				\
+			.func		= wake_bit_function,			\
+			.task_list	=					\
+				LIST_HEAD_INIT((name).wq_entry.task_list),	\
+		},								\
 	}
 
-#define init_wait(wait)							\
-	do {								\
-		(wait)->private = current;				\
-		(wait)->func = autoremove_wake_function;		\
-		INIT_LIST_HEAD(&(wait)->task_list);			\
-		(wait)->flags = 0;					\
+#define init_wait(wait)								\
+	do {									\
+		(wait)->private = current;					\
+		(wait)->func = autoremove_wake_function;			\
+		INIT_LIST_HEAD(&(wait)->task_list);				\
+		(wait)->flags = 0;						\
 	} while (0)
 
 
-- 
cgit v1.2.3


From 5dd43ce2f69d42a71dcacdb13d17d8c0ac1fe8f7 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Tue, 20 Jun 2017 12:19:09 +0200
Subject: sched/wait: Split out the wait_bit*() APIs from <linux/wait.h> into
 <linux/wait_bit.h>

The wait_bit*() types and APIs are mixed into wait.h, but they
are a pretty orthogonal extension of wait-queues.

Furthermore, only about 50 kernel files use these APIs, while
over 1000 use the regular wait-queue functionality.

So clean up the main wait.h by moving the wait-bit functionality
out of it, into a separate .h and .c file:

  include/linux/wait_bit.h  for types and APIs
  kernel/sched/wait_bit.c   for the implementation

Update all header dependencies.

This reduces the size of wait.h rather significantly, by about 30%.

Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 fs/cachefiles/internal.h     |   2 +-
 fs/cifs/inode.c              |   1 +
 fs/nfs/internal.h            |   1 +
 include/linux/fs.h           |   2 +-
 include/linux/sunrpc/sched.h |   2 +-
 include/linux/wait.h         | 250 ----------------------------------------
 include/linux/wait_bit.h     | 260 ++++++++++++++++++++++++++++++++++++++++++
 kernel/sched/Makefile        |   2 +-
 kernel/sched/wait.c          | 257 ------------------------------------------
 kernel/sched/wait_bit.c      | 263 +++++++++++++++++++++++++++++++++++++++++++
 security/keys/internal.h     |   1 +
 11 files changed, 530 insertions(+), 511 deletions(-)
 create mode 100644 include/linux/wait_bit.h
 create mode 100644 kernel/sched/wait_bit.c

(limited to 'include/linux')

diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
index 54a4fcd679ed..bb3a02ca9da4 100644
--- a/fs/cachefiles/internal.h
+++ b/fs/cachefiles/internal.h
@@ -18,7 +18,7 @@
 
 #include <linux/fscache-cache.h>
 #include <linux/timer.h>
-#include <linux/wait.h>
+#include <linux/wait_bit.h>
 #include <linux/cred.h>
 #include <linux/workqueue.h>
 #include <linux/security.h>
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 4d1fcd76d022..a8693632235f 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -24,6 +24,7 @@
 #include <linux/pagemap.h>
 #include <linux/freezer.h>
 #include <linux/sched/signal.h>
+#include <linux/wait_bit.h>
 
 #include <asm/div64.h>
 #include "cifsfs.h"
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 3e24392f2caa..8701d7617964 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -7,6 +7,7 @@
 #include <linux/security.h>
 #include <linux/crc32.h>
 #include <linux/nfs_page.h>
+#include <linux/wait_bit.h>
 
 #define NFS_MS_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_SYNCHRONOUS)
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 803e5a9b2654..53f7e49d8fe5 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2,7 +2,7 @@
 #define _LINUX_FS_H
 
 #include <linux/linkage.h>
-#include <linux/wait.h>
+#include <linux/wait_bit.h>
 #include <linux/kdev_t.h>
 #include <linux/dcache.h>
 #include <linux/path.h>
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 7ba040c797ec..9d7529ffc4ce 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -13,7 +13,7 @@
 #include <linux/ktime.h>
 #include <linux/sunrpc/types.h>
 #include <linux/spinlock.h>
-#include <linux/wait.h>
+#include <linux/wait_bit.h>
 #include <linux/workqueue.h>
 #include <linux/sunrpc/xdr.h>
 
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 0805098f3589..629489746f8a 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -29,18 +29,6 @@ struct wait_queue_entry {
 	struct list_head	task_list;
 };
 
-struct wait_bit_key {
-	void			*flags;
-	int			bit_nr;
-#define WAIT_ATOMIC_T_BIT_NR	-1
-	unsigned long		timeout;
-};
-
-struct wait_bit_queue_entry {
-	struct wait_bit_key	key;
-	struct wait_queue_entry	wq_entry;
-};
-
 struct wait_queue_head {
 	spinlock_t		lock;
 	struct list_head	task_list;
@@ -68,12 +56,6 @@ struct task_struct;
 #define DECLARE_WAIT_QUEUE_HEAD(name) \
 	struct wait_queue_head name = __WAIT_QUEUE_HEAD_INITIALIZER(name)
 
-#define __WAIT_BIT_KEY_INITIALIZER(word, bit)					\
-	{ .flags = word, .bit_nr = bit, }
-
-#define __WAIT_ATOMIC_T_KEY_INITIALIZER(p)					\
-	{ .flags = p, .bit_nr = WAIT_ATOMIC_T_BIT_NR, }
-
 extern void __init_waitqueue_head(struct wait_queue_head *wq_head, const char *name, struct lock_class_key *);
 
 #define init_waitqueue_head(wq_head)						\
@@ -200,22 +182,11 @@ __remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq
 	list_del(&wq_entry->task_list);
 }
 
-typedef int wait_bit_action_f(struct wait_bit_key *key, int mode);
 void __wake_up(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key);
 void __wake_up_locked_key(struct wait_queue_head *wq_head, unsigned int mode, void *key);
 void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key);
 void __wake_up_locked(struct wait_queue_head *wq_head, unsigned int mode, int nr);
 void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode, int nr);
-void __wake_up_bit(struct wait_queue_head *wq_head, void *word, int bit);
-int __wait_on_bit(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry, wait_bit_action_f *action, unsigned int mode);
-int __wait_on_bit_lock(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry, wait_bit_action_f *action, unsigned int mode);
-void wake_up_bit(void *word, int bit);
-void wake_up_atomic_t(atomic_t *p);
-int out_of_line_wait_on_bit(void *word, int, wait_bit_action_f *action, unsigned int mode);
-int out_of_line_wait_on_bit_timeout(void *word, int, wait_bit_action_f *action, unsigned int mode, unsigned long timeout);
-int out_of_line_wait_on_bit_lock(void *word, int, wait_bit_action_f *action, unsigned int mode);
-int out_of_line_wait_on_atomic_t(atomic_t *p, int (*)(atomic_t *), unsigned int mode);
-struct wait_queue_head *bit_waitqueue(void *word, int bit);
 
 #define wake_up(x)			__wake_up(x, TASK_NORMAL, 1, NULL)
 #define wake_up_nr(x, nr)		__wake_up(x, TASK_NORMAL, nr, NULL)
@@ -976,7 +947,6 @@ void finish_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_en
 long wait_woken(struct wait_queue_entry *wq_entry, unsigned mode, long timeout);
 int woken_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key);
 int autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key);
-int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key);
 
 #define DEFINE_WAIT_FUNC(name, function)					\
 	struct wait_queue_entry name = {					\
@@ -987,17 +957,6 @@ int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync
 
 #define DEFINE_WAIT(name) DEFINE_WAIT_FUNC(name, autoremove_wake_function)
 
-#define DEFINE_WAIT_BIT(name, word, bit)					\
-	struct wait_bit_queue_entry name = {					\
-		.key = __WAIT_BIT_KEY_INITIALIZER(word, bit),			\
-		.wq_entry = {							\
-			.private	= current,				\
-			.func		= wake_bit_function,			\
-			.task_list	=					\
-				LIST_HEAD_INIT((name).wq_entry.task_list),	\
-		},								\
-	}
-
 #define init_wait(wait)								\
 	do {									\
 		(wait)->private = current;					\
@@ -1006,213 +965,4 @@ int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync
 		(wait)->flags = 0;						\
 	} while (0)
 
-
-extern int bit_wait(struct wait_bit_key *key, int bit);
-extern int bit_wait_io(struct wait_bit_key *key, int bit);
-extern int bit_wait_timeout(struct wait_bit_key *key, int bit);
-extern int bit_wait_io_timeout(struct wait_bit_key *key, int bit);
-
-/**
- * wait_on_bit - wait for a bit to be cleared
- * @word: the word being waited on, a kernel virtual address
- * @bit: the bit of the word being waited on
- * @mode: the task state to sleep in
- *
- * There is a standard hashed waitqueue table for generic use. This
- * is the part of the hashtable's accessor API that waits on a bit.
- * For instance, if one were to have waiters on a bitflag, one would
- * call wait_on_bit() in threads waiting for the bit to clear.
- * One uses wait_on_bit() where one is waiting for the bit to clear,
- * but has no intention of setting it.
- * Returned value will be zero if the bit was cleared, or non-zero
- * if the process received a signal and the mode permitted wakeup
- * on that signal.
- */
-static inline int
-wait_on_bit(unsigned long *word, int bit, unsigned mode)
-{
-	might_sleep();
-	if (!test_bit(bit, word))
-		return 0;
-	return out_of_line_wait_on_bit(word, bit,
-				       bit_wait,
-				       mode);
-}
-
-/**
- * wait_on_bit_io - wait for a bit to be cleared
- * @word: the word being waited on, a kernel virtual address
- * @bit: the bit of the word being waited on
- * @mode: the task state to sleep in
- *
- * Use the standard hashed waitqueue table to wait for a bit
- * to be cleared.  This is similar to wait_on_bit(), but calls
- * io_schedule() instead of schedule() for the actual waiting.
- *
- * Returned value will be zero if the bit was cleared, or non-zero
- * if the process received a signal and the mode permitted wakeup
- * on that signal.
- */
-static inline int
-wait_on_bit_io(unsigned long *word, int bit, unsigned mode)
-{
-	might_sleep();
-	if (!test_bit(bit, word))
-		return 0;
-	return out_of_line_wait_on_bit(word, bit,
-				       bit_wait_io,
-				       mode);
-}
-
-/**
- * wait_on_bit_timeout - wait for a bit to be cleared or a timeout elapses
- * @word: the word being waited on, a kernel virtual address
- * @bit: the bit of the word being waited on
- * @mode: the task state to sleep in
- * @timeout: timeout, in jiffies
- *
- * Use the standard hashed waitqueue table to wait for a bit
- * to be cleared. This is similar to wait_on_bit(), except also takes a
- * timeout parameter.
- *
- * Returned value will be zero if the bit was cleared before the
- * @timeout elapsed, or non-zero if the @timeout elapsed or process
- * received a signal and the mode permitted wakeup on that signal.
- */
-static inline int
-wait_on_bit_timeout(unsigned long *word, int bit, unsigned mode,
-		    unsigned long timeout)
-{
-	might_sleep();
-	if (!test_bit(bit, word))
-		return 0;
-	return out_of_line_wait_on_bit_timeout(word, bit,
-					       bit_wait_timeout,
-					       mode, timeout);
-}
-
-/**
- * wait_on_bit_action - wait for a bit to be cleared
- * @word: the word being waited on, a kernel virtual address
- * @bit: the bit of the word being waited on
- * @action: the function used to sleep, which may take special actions
- * @mode: the task state to sleep in
- *
- * Use the standard hashed waitqueue table to wait for a bit
- * to be cleared, and allow the waiting action to be specified.
- * This is like wait_on_bit() but allows fine control of how the waiting
- * is done.
- *
- * Returned value will be zero if the bit was cleared, or non-zero
- * if the process received a signal and the mode permitted wakeup
- * on that signal.
- */
-static inline int
-wait_on_bit_action(unsigned long *word, int bit, wait_bit_action_f *action,
-		   unsigned mode)
-{
-	might_sleep();
-	if (!test_bit(bit, word))
-		return 0;
-	return out_of_line_wait_on_bit(word, bit, action, mode);
-}
-
-/**
- * wait_on_bit_lock - wait for a bit to be cleared, when wanting to set it
- * @word: the word being waited on, a kernel virtual address
- * @bit: the bit of the word being waited on
- * @mode: the task state to sleep in
- *
- * There is a standard hashed waitqueue table for generic use. This
- * is the part of the hashtable's accessor API that waits on a bit
- * when one intends to set it, for instance, trying to lock bitflags.
- * For instance, if one were to have waiters trying to set bitflag
- * and waiting for it to clear before setting it, one would call
- * wait_on_bit() in threads waiting to be able to set the bit.
- * One uses wait_on_bit_lock() where one is waiting for the bit to
- * clear with the intention of setting it, and when done, clearing it.
- *
- * Returns zero if the bit was (eventually) found to be clear and was
- * set.  Returns non-zero if a signal was delivered to the process and
- * the @mode allows that signal to wake the process.
- */
-static inline int
-wait_on_bit_lock(unsigned long *word, int bit, unsigned mode)
-{
-	might_sleep();
-	if (!test_and_set_bit(bit, word))
-		return 0;
-	return out_of_line_wait_on_bit_lock(word, bit, bit_wait, mode);
-}
-
-/**
- * wait_on_bit_lock_io - wait for a bit to be cleared, when wanting to set it
- * @word: the word being waited on, a kernel virtual address
- * @bit: the bit of the word being waited on
- * @mode: the task state to sleep in
- *
- * Use the standard hashed waitqueue table to wait for a bit
- * to be cleared and then to atomically set it.  This is similar
- * to wait_on_bit(), but calls io_schedule() instead of schedule()
- * for the actual waiting.
- *
- * Returns zero if the bit was (eventually) found to be clear and was
- * set.  Returns non-zero if a signal was delivered to the process and
- * the @mode allows that signal to wake the process.
- */
-static inline int
-wait_on_bit_lock_io(unsigned long *word, int bit, unsigned mode)
-{
-	might_sleep();
-	if (!test_and_set_bit(bit, word))
-		return 0;
-	return out_of_line_wait_on_bit_lock(word, bit, bit_wait_io, mode);
-}
-
-/**
- * wait_on_bit_lock_action - wait for a bit to be cleared, when wanting to set it
- * @word: the word being waited on, a kernel virtual address
- * @bit: the bit of the word being waited on
- * @action: the function used to sleep, which may take special actions
- * @mode: the task state to sleep in
- *
- * Use the standard hashed waitqueue table to wait for a bit
- * to be cleared and then to set it, and allow the waiting action
- * to be specified.
- * This is like wait_on_bit() but allows fine control of how the waiting
- * is done.
- *
- * Returns zero if the bit was (eventually) found to be clear and was
- * set.  Returns non-zero if a signal was delivered to the process and
- * the @mode allows that signal to wake the process.
- */
-static inline int
-wait_on_bit_lock_action(unsigned long *word, int bit, wait_bit_action_f *action,
-			unsigned mode)
-{
-	might_sleep();
-	if (!test_and_set_bit(bit, word))
-		return 0;
-	return out_of_line_wait_on_bit_lock(word, bit, action, mode);
-}
-
-/**
- * wait_on_atomic_t - Wait for an atomic_t to become 0
- * @val: The atomic value being waited on, a kernel virtual address
- * @action: the function used to sleep, which may take special actions
- * @mode: the task state to sleep in
- *
- * Wait for an atomic_t to become 0.  We abuse the bit-wait waitqueue table for
- * the purpose of getting a waitqueue, but we set the key to a bit number
- * outside of the target 'word'.
- */
-static inline
-int wait_on_atomic_t(atomic_t *val, int (*action)(atomic_t *), unsigned mode)
-{
-	might_sleep();
-	if (atomic_read(val) == 0)
-		return 0;
-	return out_of_line_wait_on_atomic_t(val, action, mode);
-}
-
 #endif /* _LINUX_WAIT_H */
diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h
new file mode 100644
index 000000000000..8c85c52d94b6
--- /dev/null
+++ b/include/linux/wait_bit.h
@@ -0,0 +1,260 @@
+#ifndef _LINUX_WAIT_BIT_H
+#define _LINUX_WAIT_BIT_H
+
+/*
+ * Linux wait-bit related types and methods:
+ */
+#include <linux/wait.h>
+
+struct wait_bit_key {
+	void			*flags;
+	int			bit_nr;
+#define WAIT_ATOMIC_T_BIT_NR	-1
+	unsigned long		timeout;
+};
+
+struct wait_bit_queue_entry {
+	struct wait_bit_key	key;
+	struct wait_queue_entry	wq_entry;
+};
+
+#define __WAIT_BIT_KEY_INITIALIZER(word, bit)					\
+	{ .flags = word, .bit_nr = bit, }
+
+#define __WAIT_ATOMIC_T_KEY_INITIALIZER(p)					\
+	{ .flags = p, .bit_nr = WAIT_ATOMIC_T_BIT_NR, }
+
+typedef int wait_bit_action_f(struct wait_bit_key *key, int mode);
+void __wake_up_bit(struct wait_queue_head *wq_head, void *word, int bit);
+int __wait_on_bit(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry, wait_bit_action_f *action, unsigned int mode);
+int __wait_on_bit_lock(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry, wait_bit_action_f *action, unsigned int mode);
+void wake_up_bit(void *word, int bit);
+void wake_up_atomic_t(atomic_t *p);
+int out_of_line_wait_on_bit(void *word, int, wait_bit_action_f *action, unsigned int mode);
+int out_of_line_wait_on_bit_timeout(void *word, int, wait_bit_action_f *action, unsigned int mode, unsigned long timeout);
+int out_of_line_wait_on_bit_lock(void *word, int, wait_bit_action_f *action, unsigned int mode);
+int out_of_line_wait_on_atomic_t(atomic_t *p, int (*)(atomic_t *), unsigned int mode);
+struct wait_queue_head *bit_waitqueue(void *word, int bit);
+
+int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key);
+
+#define DEFINE_WAIT_BIT(name, word, bit)					\
+	struct wait_bit_queue_entry name = {					\
+		.key = __WAIT_BIT_KEY_INITIALIZER(word, bit),			\
+		.wq_entry = {							\
+			.private	= current,				\
+			.func		= wake_bit_function,			\
+			.task_list	=					\
+				LIST_HEAD_INIT((name).wq_entry.task_list),	\
+		},								\
+	}
+
+extern int bit_wait(struct wait_bit_key *key, int bit);
+extern int bit_wait_io(struct wait_bit_key *key, int bit);
+extern int bit_wait_timeout(struct wait_bit_key *key, int bit);
+extern int bit_wait_io_timeout(struct wait_bit_key *key, int bit);
+
+/**
+ * wait_on_bit - wait for a bit to be cleared
+ * @word: the word being waited on, a kernel virtual address
+ * @bit: the bit of the word being waited on
+ * @mode: the task state to sleep in
+ *
+ * There is a standard hashed waitqueue table for generic use. This
+ * is the part of the hashtable's accessor API that waits on a bit.
+ * For instance, if one were to have waiters on a bitflag, one would
+ * call wait_on_bit() in threads waiting for the bit to clear.
+ * One uses wait_on_bit() where one is waiting for the bit to clear,
+ * but has no intention of setting it.
+ * Returned value will be zero if the bit was cleared, or non-zero
+ * if the process received a signal and the mode permitted wakeup
+ * on that signal.
+ */
+static inline int
+wait_on_bit(unsigned long *word, int bit, unsigned mode)
+{
+	might_sleep();
+	if (!test_bit(bit, word))
+		return 0;
+	return out_of_line_wait_on_bit(word, bit,
+				       bit_wait,
+				       mode);
+}
+
+/**
+ * wait_on_bit_io - wait for a bit to be cleared
+ * @word: the word being waited on, a kernel virtual address
+ * @bit: the bit of the word being waited on
+ * @mode: the task state to sleep in
+ *
+ * Use the standard hashed waitqueue table to wait for a bit
+ * to be cleared.  This is similar to wait_on_bit(), but calls
+ * io_schedule() instead of schedule() for the actual waiting.
+ *
+ * Returned value will be zero if the bit was cleared, or non-zero
+ * if the process received a signal and the mode permitted wakeup
+ * on that signal.
+ */
+static inline int
+wait_on_bit_io(unsigned long *word, int bit, unsigned mode)
+{
+	might_sleep();
+	if (!test_bit(bit, word))
+		return 0;
+	return out_of_line_wait_on_bit(word, bit,
+				       bit_wait_io,
+				       mode);
+}
+
+/**
+ * wait_on_bit_timeout - wait for a bit to be cleared or a timeout elapses
+ * @word: the word being waited on, a kernel virtual address
+ * @bit: the bit of the word being waited on
+ * @mode: the task state to sleep in
+ * @timeout: timeout, in jiffies
+ *
+ * Use the standard hashed waitqueue table to wait for a bit
+ * to be cleared. This is similar to wait_on_bit(), except also takes a
+ * timeout parameter.
+ *
+ * Returned value will be zero if the bit was cleared before the
+ * @timeout elapsed, or non-zero if the @timeout elapsed or process
+ * received a signal and the mode permitted wakeup on that signal.
+ */
+static inline int
+wait_on_bit_timeout(unsigned long *word, int bit, unsigned mode,
+		    unsigned long timeout)
+{
+	might_sleep();
+	if (!test_bit(bit, word))
+		return 0;
+	return out_of_line_wait_on_bit_timeout(word, bit,
+					       bit_wait_timeout,
+					       mode, timeout);
+}
+
+/**
+ * wait_on_bit_action - wait for a bit to be cleared
+ * @word: the word being waited on, a kernel virtual address
+ * @bit: the bit of the word being waited on
+ * @action: the function used to sleep, which may take special actions
+ * @mode: the task state to sleep in
+ *
+ * Use the standard hashed waitqueue table to wait for a bit
+ * to be cleared, and allow the waiting action to be specified.
+ * This is like wait_on_bit() but allows fine control of how the waiting
+ * is done.
+ *
+ * Returned value will be zero if the bit was cleared, or non-zero
+ * if the process received a signal and the mode permitted wakeup
+ * on that signal.
+ */
+static inline int
+wait_on_bit_action(unsigned long *word, int bit, wait_bit_action_f *action,
+		   unsigned mode)
+{
+	might_sleep();
+	if (!test_bit(bit, word))
+		return 0;
+	return out_of_line_wait_on_bit(word, bit, action, mode);
+}
+
+/**
+ * wait_on_bit_lock - wait for a bit to be cleared, when wanting to set it
+ * @word: the word being waited on, a kernel virtual address
+ * @bit: the bit of the word being waited on
+ * @mode: the task state to sleep in
+ *
+ * There is a standard hashed waitqueue table for generic use. This
+ * is the part of the hashtable's accessor API that waits on a bit
+ * when one intends to set it, for instance, trying to lock bitflags.
+ * For instance, if one were to have waiters trying to set bitflag
+ * and waiting for it to clear before setting it, one would call
+ * wait_on_bit() in threads waiting to be able to set the bit.
+ * One uses wait_on_bit_lock() where one is waiting for the bit to
+ * clear with the intention of setting it, and when done, clearing it.
+ *
+ * Returns zero if the bit was (eventually) found to be clear and was
+ * set.  Returns non-zero if a signal was delivered to the process and
+ * the @mode allows that signal to wake the process.
+ */
+static inline int
+wait_on_bit_lock(unsigned long *word, int bit, unsigned mode)
+{
+	might_sleep();
+	if (!test_and_set_bit(bit, word))
+		return 0;
+	return out_of_line_wait_on_bit_lock(word, bit, bit_wait, mode);
+}
+
+/**
+ * wait_on_bit_lock_io - wait for a bit to be cleared, when wanting to set it
+ * @word: the word being waited on, a kernel virtual address
+ * @bit: the bit of the word being waited on
+ * @mode: the task state to sleep in
+ *
+ * Use the standard hashed waitqueue table to wait for a bit
+ * to be cleared and then to atomically set it.  This is similar
+ * to wait_on_bit(), but calls io_schedule() instead of schedule()
+ * for the actual waiting.
+ *
+ * Returns zero if the bit was (eventually) found to be clear and was
+ * set.  Returns non-zero if a signal was delivered to the process and
+ * the @mode allows that signal to wake the process.
+ */
+static inline int
+wait_on_bit_lock_io(unsigned long *word, int bit, unsigned mode)
+{
+	might_sleep();
+	if (!test_and_set_bit(bit, word))
+		return 0;
+	return out_of_line_wait_on_bit_lock(word, bit, bit_wait_io, mode);
+}
+
+/**
+ * wait_on_bit_lock_action - wait for a bit to be cleared, when wanting to set it
+ * @word: the word being waited on, a kernel virtual address
+ * @bit: the bit of the word being waited on
+ * @action: the function used to sleep, which may take special actions
+ * @mode: the task state to sleep in
+ *
+ * Use the standard hashed waitqueue table to wait for a bit
+ * to be cleared and then to set it, and allow the waiting action
+ * to be specified.
+ * This is like wait_on_bit() but allows fine control of how the waiting
+ * is done.
+ *
+ * Returns zero if the bit was (eventually) found to be clear and was
+ * set.  Returns non-zero if a signal was delivered to the process and
+ * the @mode allows that signal to wake the process.
+ */
+static inline int
+wait_on_bit_lock_action(unsigned long *word, int bit, wait_bit_action_f *action,
+			unsigned mode)
+{
+	might_sleep();
+	if (!test_and_set_bit(bit, word))
+		return 0;
+	return out_of_line_wait_on_bit_lock(word, bit, action, mode);
+}
+
+/**
+ * wait_on_atomic_t - Wait for an atomic_t to become 0
+ * @val: The atomic value being waited on, a kernel virtual address
+ * @action: the function used to sleep, which may take special actions
+ * @mode: the task state to sleep in
+ *
+ * Wait for an atomic_t to become 0.  We abuse the bit-wait waitqueue table for
+ * the purpose of getting a waitqueue, but we set the key to a bit number
+ * outside of the target 'word'.
+ */
+static inline
+int wait_on_atomic_t(atomic_t *val, int (*action)(atomic_t *), unsigned mode)
+{
+	might_sleep();
+	if (atomic_read(val) == 0)
+		return 0;
+	return out_of_line_wait_on_atomic_t(val, action, mode);
+}
+
+#endif /* _LINUX_WAIT_BIT_H */
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index 89ab6758667b..16277e2ed8ee 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -17,7 +17,7 @@ endif
 
 obj-y += core.o loadavg.o clock.o cputime.o
 obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o
-obj-y += wait.o swait.o completion.o idle.o
+obj-y += wait.o wait_bit.o swait.o completion.o idle.o
 obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o
 obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o
 obj-$(CONFIG_SCHEDSTATS) += stats.o
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 95e6d3820cba..6bcd7c3c4501 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -390,260 +390,3 @@ int woken_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sy
 	return default_wake_function(wq_entry, mode, sync, key);
 }
 EXPORT_SYMBOL(woken_wake_function);
-
-int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *arg)
-{
-	struct wait_bit_key *key = arg;
-	struct wait_bit_queue_entry *wait_bit = container_of(wq_entry, struct wait_bit_queue_entry, wq_entry);
-
-	if (wait_bit->key.flags != key->flags ||
-			wait_bit->key.bit_nr != key->bit_nr ||
-			test_bit(key->bit_nr, key->flags))
-		return 0;
-	else
-		return autoremove_wake_function(wq_entry, mode, sync, key);
-}
-EXPORT_SYMBOL(wake_bit_function);
-
-/*
- * To allow interruptible waiting and asynchronous (i.e. nonblocking)
- * waiting, the actions of __wait_on_bit() and __wait_on_bit_lock() are
- * permitted return codes. Nonzero return codes halt waiting and return.
- */
-int __sched
-__wait_on_bit(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry,
-	      wait_bit_action_f *action, unsigned mode)
-{
-	int ret = 0;
-
-	do {
-		prepare_to_wait(wq_head, &wbq_entry->wq_entry, mode);
-		if (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags))
-			ret = (*action)(&wbq_entry->key, mode);
-	} while (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret);
-	finish_wait(wq_head, &wbq_entry->wq_entry);
-	return ret;
-}
-EXPORT_SYMBOL(__wait_on_bit);
-
-int __sched out_of_line_wait_on_bit(void *word, int bit,
-				    wait_bit_action_f *action, unsigned mode)
-{
-	struct wait_queue_head *wq_head = bit_waitqueue(word, bit);
-	DEFINE_WAIT_BIT(wq_entry, word, bit);
-
-	return __wait_on_bit(wq_head, &wq_entry, action, mode);
-}
-EXPORT_SYMBOL(out_of_line_wait_on_bit);
-
-int __sched out_of_line_wait_on_bit_timeout(
-	void *word, int bit, wait_bit_action_f *action,
-	unsigned mode, unsigned long timeout)
-{
-	struct wait_queue_head *wq_head = bit_waitqueue(word, bit);
-	DEFINE_WAIT_BIT(wq_entry, word, bit);
-
-	wq_entry.key.timeout = jiffies + timeout;
-	return __wait_on_bit(wq_head, &wq_entry, action, mode);
-}
-EXPORT_SYMBOL_GPL(out_of_line_wait_on_bit_timeout);
-
-int __sched
-__wait_on_bit_lock(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry,
-			wait_bit_action_f *action, unsigned mode)
-{
-	int ret = 0;
-
-	for (;;) {
-		prepare_to_wait_exclusive(wq_head, &wbq_entry->wq_entry, mode);
-		if (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags)) {
-			ret = action(&wbq_entry->key, mode);
-			/*
-			 * See the comment in prepare_to_wait_event().
-			 * finish_wait() does not necessarily takes wwq_head->lock,
-			 * but test_and_set_bit() implies mb() which pairs with
-			 * smp_mb__after_atomic() before wake_up_page().
-			 */
-			if (ret)
-				finish_wait(wq_head, &wbq_entry->wq_entry);
-		}
-		if (!test_and_set_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags)) {
-			if (!ret)
-				finish_wait(wq_head, &wbq_entry->wq_entry);
-			return 0;
-		} else if (ret) {
-			return ret;
-		}
-	}
-}
-EXPORT_SYMBOL(__wait_on_bit_lock);
-
-int __sched out_of_line_wait_on_bit_lock(void *word, int bit,
-					 wait_bit_action_f *action, unsigned mode)
-{
-	struct wait_queue_head *wq_head = bit_waitqueue(word, bit);
-	DEFINE_WAIT_BIT(wq_entry, word, bit);
-
-	return __wait_on_bit_lock(wq_head, &wq_entry, action, mode);
-}
-EXPORT_SYMBOL(out_of_line_wait_on_bit_lock);
-
-void __wake_up_bit(struct wait_queue_head *wq_head, void *word, int bit)
-{
-	struct wait_bit_key key = __WAIT_BIT_KEY_INITIALIZER(word, bit);
-	if (waitqueue_active(wq_head))
-		__wake_up(wq_head, TASK_NORMAL, 1, &key);
-}
-EXPORT_SYMBOL(__wake_up_bit);
-
-/**
- * wake_up_bit - wake up a waiter on a bit
- * @word: the word being waited on, a kernel virtual address
- * @bit: the bit of the word being waited on
- *
- * There is a standard hashed waitqueue table for generic use. This
- * is the part of the hashtable's accessor API that wakes up waiters
- * on a bit. For instance, if one were to have waiters on a bitflag,
- * one would call wake_up_bit() after clearing the bit.
- *
- * In order for this to function properly, as it uses waitqueue_active()
- * internally, some kind of memory barrier must be done prior to calling
- * this. Typically, this will be smp_mb__after_atomic(), but in some
- * cases where bitflags are manipulated non-atomically under a lock, one
- * may need to use a less regular barrier, such fs/inode.c's smp_mb(),
- * because spin_unlock() does not guarantee a memory barrier.
- */
-void wake_up_bit(void *word, int bit)
-{
-	__wake_up_bit(bit_waitqueue(word, bit), word, bit);
-}
-EXPORT_SYMBOL(wake_up_bit);
-
-/*
- * Manipulate the atomic_t address to produce a better bit waitqueue table hash
- * index (we're keying off bit -1, but that would produce a horrible hash
- * value).
- */
-static inline wait_queue_head_t *atomic_t_waitqueue(atomic_t *p)
-{
-	if (BITS_PER_LONG == 64) {
-		unsigned long q = (unsigned long)p;
-		return bit_waitqueue((void *)(q & ~1), q & 1);
-	}
-	return bit_waitqueue(p, 0);
-}
-
-static int wake_atomic_t_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync,
-				  void *arg)
-{
-	struct wait_bit_key *key = arg;
-	struct wait_bit_queue_entry *wait_bit = container_of(wq_entry, struct wait_bit_queue_entry, wq_entry);
-	atomic_t *val = key->flags;
-
-	if (wait_bit->key.flags != key->flags ||
-	    wait_bit->key.bit_nr != key->bit_nr ||
-	    atomic_read(val) != 0)
-		return 0;
-	return autoremove_wake_function(wq_entry, mode, sync, key);
-}
-
-/*
- * To allow interruptible waiting and asynchronous (i.e. nonblocking) waiting,
- * the actions of __wait_on_atomic_t() are permitted return codes.  Nonzero
- * return codes halt waiting and return.
- */
-static __sched
-int __wait_on_atomic_t(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry,
-		       int (*action)(atomic_t *), unsigned mode)
-{
-	atomic_t *val;
-	int ret = 0;
-
-	do {
-		prepare_to_wait(wq_head, &wbq_entry->wq_entry, mode);
-		val = wbq_entry->key.flags;
-		if (atomic_read(val) == 0)
-			break;
-		ret = (*action)(val);
-	} while (!ret && atomic_read(val) != 0);
-	finish_wait(wq_head, &wbq_entry->wq_entry);
-	return ret;
-}
-
-#define DEFINE_WAIT_ATOMIC_T(name, p)					\
-	struct wait_bit_queue_entry name = {				\
-		.key = __WAIT_ATOMIC_T_KEY_INITIALIZER(p),		\
-		.wq_entry = {						\
-			.private	= current,			\
-			.func		= wake_atomic_t_function,	\
-			.task_list	=				\
-				LIST_HEAD_INIT((name).wq_entry.task_list), \
-		},							\
-	}
-
-__sched int out_of_line_wait_on_atomic_t(atomic_t *p, int (*action)(atomic_t *),
-					 unsigned mode)
-{
-	struct wait_queue_head *wq_head = atomic_t_waitqueue(p);
-	DEFINE_WAIT_ATOMIC_T(wq_entry, p);
-
-	return __wait_on_atomic_t(wq_head, &wq_entry, action, mode);
-}
-EXPORT_SYMBOL(out_of_line_wait_on_atomic_t);
-
-/**
- * wake_up_atomic_t - Wake up a waiter on a atomic_t
- * @p: The atomic_t being waited on, a kernel virtual address
- *
- * Wake up anyone waiting for the atomic_t to go to zero.
- *
- * Abuse the bit-waker function and its waitqueue hash table set (the atomic_t
- * check is done by the waiter's wake function, not the by the waker itself).
- */
-void wake_up_atomic_t(atomic_t *p)
-{
-	__wake_up_bit(atomic_t_waitqueue(p), p, WAIT_ATOMIC_T_BIT_NR);
-}
-EXPORT_SYMBOL(wake_up_atomic_t);
-
-__sched int bit_wait(struct wait_bit_key *word, int mode)
-{
-	schedule();
-	if (signal_pending_state(mode, current))
-		return -EINTR;
-	return 0;
-}
-EXPORT_SYMBOL(bit_wait);
-
-__sched int bit_wait_io(struct wait_bit_key *word, int mode)
-{
-	io_schedule();
-	if (signal_pending_state(mode, current))
-		return -EINTR;
-	return 0;
-}
-EXPORT_SYMBOL(bit_wait_io);
-
-__sched int bit_wait_timeout(struct wait_bit_key *word, int mode)
-{
-	unsigned long now = READ_ONCE(jiffies);
-	if (time_after_eq(now, word->timeout))
-		return -EAGAIN;
-	schedule_timeout(word->timeout - now);
-	if (signal_pending_state(mode, current))
-		return -EINTR;
-	return 0;
-}
-EXPORT_SYMBOL_GPL(bit_wait_timeout);
-
-__sched int bit_wait_io_timeout(struct wait_bit_key *word, int mode)
-{
-	unsigned long now = READ_ONCE(jiffies);
-	if (time_after_eq(now, word->timeout))
-		return -EAGAIN;
-	io_schedule_timeout(word->timeout - now);
-	if (signal_pending_state(mode, current))
-		return -EINTR;
-	return 0;
-}
-EXPORT_SYMBOL_GPL(bit_wait_io_timeout);
diff --git a/kernel/sched/wait_bit.c b/kernel/sched/wait_bit.c
new file mode 100644
index 000000000000..463bac84dfd1
--- /dev/null
+++ b/kernel/sched/wait_bit.c
@@ -0,0 +1,263 @@
+/*
+ * The implementation of the wait_bit*() and related waiting APIs:
+ */
+#include <linux/wait_bit.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/debug.h>
+
+int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *arg)
+{
+	struct wait_bit_key *key = arg;
+	struct wait_bit_queue_entry *wait_bit = container_of(wq_entry, struct wait_bit_queue_entry, wq_entry);
+
+	if (wait_bit->key.flags != key->flags ||
+			wait_bit->key.bit_nr != key->bit_nr ||
+			test_bit(key->bit_nr, key->flags))
+		return 0;
+	else
+		return autoremove_wake_function(wq_entry, mode, sync, key);
+}
+EXPORT_SYMBOL(wake_bit_function);
+
+/*
+ * To allow interruptible waiting and asynchronous (i.e. nonblocking)
+ * waiting, the actions of __wait_on_bit() and __wait_on_bit_lock() are
+ * permitted return codes. Nonzero return codes halt waiting and return.
+ */
+int __sched
+__wait_on_bit(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry,
+	      wait_bit_action_f *action, unsigned mode)
+{
+	int ret = 0;
+
+	do {
+		prepare_to_wait(wq_head, &wbq_entry->wq_entry, mode);
+		if (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags))
+			ret = (*action)(&wbq_entry->key, mode);
+	} while (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret);
+	finish_wait(wq_head, &wbq_entry->wq_entry);
+	return ret;
+}
+EXPORT_SYMBOL(__wait_on_bit);
+
+int __sched out_of_line_wait_on_bit(void *word, int bit,
+				    wait_bit_action_f *action, unsigned mode)
+{
+	struct wait_queue_head *wq_head = bit_waitqueue(word, bit);
+	DEFINE_WAIT_BIT(wq_entry, word, bit);
+
+	return __wait_on_bit(wq_head, &wq_entry, action, mode);
+}
+EXPORT_SYMBOL(out_of_line_wait_on_bit);
+
+int __sched out_of_line_wait_on_bit_timeout(
+	void *word, int bit, wait_bit_action_f *action,
+	unsigned mode, unsigned long timeout)
+{
+	struct wait_queue_head *wq_head = bit_waitqueue(word, bit);
+	DEFINE_WAIT_BIT(wq_entry, word, bit);
+
+	wq_entry.key.timeout = jiffies + timeout;
+	return __wait_on_bit(wq_head, &wq_entry, action, mode);
+}
+EXPORT_SYMBOL_GPL(out_of_line_wait_on_bit_timeout);
+
+int __sched
+__wait_on_bit_lock(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry,
+			wait_bit_action_f *action, unsigned mode)
+{
+	int ret = 0;
+
+	for (;;) {
+		prepare_to_wait_exclusive(wq_head, &wbq_entry->wq_entry, mode);
+		if (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags)) {
+			ret = action(&wbq_entry->key, mode);
+			/*
+			 * See the comment in prepare_to_wait_event().
+			 * finish_wait() does not necessarily takes wwq_head->lock,
+			 * but test_and_set_bit() implies mb() which pairs with
+			 * smp_mb__after_atomic() before wake_up_page().
+			 */
+			if (ret)
+				finish_wait(wq_head, &wbq_entry->wq_entry);
+		}
+		if (!test_and_set_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags)) {
+			if (!ret)
+				finish_wait(wq_head, &wbq_entry->wq_entry);
+			return 0;
+		} else if (ret) {
+			return ret;
+		}
+	}
+}
+EXPORT_SYMBOL(__wait_on_bit_lock);
+
+int __sched out_of_line_wait_on_bit_lock(void *word, int bit,
+					 wait_bit_action_f *action, unsigned mode)
+{
+	struct wait_queue_head *wq_head = bit_waitqueue(word, bit);
+	DEFINE_WAIT_BIT(wq_entry, word, bit);
+
+	return __wait_on_bit_lock(wq_head, &wq_entry, action, mode);
+}
+EXPORT_SYMBOL(out_of_line_wait_on_bit_lock);
+
+void __wake_up_bit(struct wait_queue_head *wq_head, void *word, int bit)
+{
+	struct wait_bit_key key = __WAIT_BIT_KEY_INITIALIZER(word, bit);
+	if (waitqueue_active(wq_head))
+		__wake_up(wq_head, TASK_NORMAL, 1, &key);
+}
+EXPORT_SYMBOL(__wake_up_bit);
+
+/**
+ * wake_up_bit - wake up a waiter on a bit
+ * @word: the word being waited on, a kernel virtual address
+ * @bit: the bit of the word being waited on
+ *
+ * There is a standard hashed waitqueue table for generic use. This
+ * is the part of the hashtable's accessor API that wakes up waiters
+ * on a bit. For instance, if one were to have waiters on a bitflag,
+ * one would call wake_up_bit() after clearing the bit.
+ *
+ * In order for this to function properly, as it uses waitqueue_active()
+ * internally, some kind of memory barrier must be done prior to calling
+ * this. Typically, this will be smp_mb__after_atomic(), but in some
+ * cases where bitflags are manipulated non-atomically under a lock, one
+ * may need to use a less regular barrier, such fs/inode.c's smp_mb(),
+ * because spin_unlock() does not guarantee a memory barrier.
+ */
+void wake_up_bit(void *word, int bit)
+{
+	__wake_up_bit(bit_waitqueue(word, bit), word, bit);
+}
+EXPORT_SYMBOL(wake_up_bit);
+
+/*
+ * Manipulate the atomic_t address to produce a better bit waitqueue table hash
+ * index (we're keying off bit -1, but that would produce a horrible hash
+ * value).
+ */
+static inline wait_queue_head_t *atomic_t_waitqueue(atomic_t *p)
+{
+	if (BITS_PER_LONG == 64) {
+		unsigned long q = (unsigned long)p;
+		return bit_waitqueue((void *)(q & ~1), q & 1);
+	}
+	return bit_waitqueue(p, 0);
+}
+
+static int wake_atomic_t_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync,
+				  void *arg)
+{
+	struct wait_bit_key *key = arg;
+	struct wait_bit_queue_entry *wait_bit = container_of(wq_entry, struct wait_bit_queue_entry, wq_entry);
+	atomic_t *val = key->flags;
+
+	if (wait_bit->key.flags != key->flags ||
+	    wait_bit->key.bit_nr != key->bit_nr ||
+	    atomic_read(val) != 0)
+		return 0;
+	return autoremove_wake_function(wq_entry, mode, sync, key);
+}
+
+/*
+ * To allow interruptible waiting and asynchronous (i.e. nonblocking) waiting,
+ * the actions of __wait_on_atomic_t() are permitted return codes.  Nonzero
+ * return codes halt waiting and return.
+ */
+static __sched
+int __wait_on_atomic_t(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry,
+		       int (*action)(atomic_t *), unsigned mode)
+{
+	atomic_t *val;
+	int ret = 0;
+
+	do {
+		prepare_to_wait(wq_head, &wbq_entry->wq_entry, mode);
+		val = wbq_entry->key.flags;
+		if (atomic_read(val) == 0)
+			break;
+		ret = (*action)(val);
+	} while (!ret && atomic_read(val) != 0);
+	finish_wait(wq_head, &wbq_entry->wq_entry);
+	return ret;
+}
+
+#define DEFINE_WAIT_ATOMIC_T(name, p)					\
+	struct wait_bit_queue_entry name = {				\
+		.key = __WAIT_ATOMIC_T_KEY_INITIALIZER(p),		\
+		.wq_entry = {						\
+			.private	= current,			\
+			.func		= wake_atomic_t_function,	\
+			.task_list	=				\
+				LIST_HEAD_INIT((name).wq_entry.task_list), \
+		},							\
+	}
+
+__sched int out_of_line_wait_on_atomic_t(atomic_t *p, int (*action)(atomic_t *),
+					 unsigned mode)
+{
+	struct wait_queue_head *wq_head = atomic_t_waitqueue(p);
+	DEFINE_WAIT_ATOMIC_T(wq_entry, p);
+
+	return __wait_on_atomic_t(wq_head, &wq_entry, action, mode);
+}
+EXPORT_SYMBOL(out_of_line_wait_on_atomic_t);
+
+/**
+ * wake_up_atomic_t - Wake up a waiter on a atomic_t
+ * @p: The atomic_t being waited on, a kernel virtual address
+ *
+ * Wake up anyone waiting for the atomic_t to go to zero.
+ *
+ * Abuse the bit-waker function and its waitqueue hash table set (the atomic_t
+ * check is done by the waiter's wake function, not the by the waker itself).
+ */
+void wake_up_atomic_t(atomic_t *p)
+{
+	__wake_up_bit(atomic_t_waitqueue(p), p, WAIT_ATOMIC_T_BIT_NR);
+}
+EXPORT_SYMBOL(wake_up_atomic_t);
+
+__sched int bit_wait(struct wait_bit_key *word, int mode)
+{
+	schedule();
+	if (signal_pending_state(mode, current))
+		return -EINTR;
+	return 0;
+}
+EXPORT_SYMBOL(bit_wait);
+
+__sched int bit_wait_io(struct wait_bit_key *word, int mode)
+{
+	io_schedule();
+	if (signal_pending_state(mode, current))
+		return -EINTR;
+	return 0;
+}
+EXPORT_SYMBOL(bit_wait_io);
+
+__sched int bit_wait_timeout(struct wait_bit_key *word, int mode)
+{
+	unsigned long now = READ_ONCE(jiffies);
+	if (time_after_eq(now, word->timeout))
+		return -EAGAIN;
+	schedule_timeout(word->timeout - now);
+	if (signal_pending_state(mode, current))
+		return -EINTR;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(bit_wait_timeout);
+
+__sched int bit_wait_io_timeout(struct wait_bit_key *word, int mode)
+{
+	unsigned long now = READ_ONCE(jiffies);
+	if (time_after_eq(now, word->timeout))
+		return -EAGAIN;
+	io_schedule_timeout(word->timeout - now);
+	if (signal_pending_state(mode, current))
+		return -EINTR;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(bit_wait_io_timeout);
diff --git a/security/keys/internal.h b/security/keys/internal.h
index c0f8682eba69..91bc6214ae57 100644
--- a/security/keys/internal.h
+++ b/security/keys/internal.h
@@ -13,6 +13,7 @@
 #define _INTERNAL_H
 
 #include <linux/sched.h>
+#include <linux/wait_bit.h>
 #include <linux/cred.h>
 #include <linux/key-type.h>
 #include <linux/task_work.h>
-- 
cgit v1.2.3


From 5822a454d6d22297c5fcd66264120587b2ec21cd Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Mar 2017 13:09:07 +0100
Subject: sched/wait: Move bit_wait_table[] and related functionality from
 sched/core.c to sched/wait_bit.c

The key hashed waitqueue data structures and their initialization
was done in the main scheduler file for no good reason, move them
to sched/wait_bit.c instead.

Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/wait_bit.h |  1 +
 kernel/sched/core.c      | 18 ++----------------
 kernel/sched/wait_bit.c  | 23 +++++++++++++++++++++++
 3 files changed, 26 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h
index 8c85c52d94b6..9cc82114dbcb 100644
--- a/include/linux/wait_bit.h
+++ b/include/linux/wait_bit.h
@@ -35,6 +35,7 @@ int out_of_line_wait_on_bit_timeout(void *word, int, wait_bit_action_f *action,
 int out_of_line_wait_on_bit_lock(void *word, int, wait_bit_action_f *action, unsigned int mode);
 int out_of_line_wait_on_atomic_t(atomic_t *p, int (*)(atomic_t *), unsigned int mode);
 struct wait_queue_head *bit_waitqueue(void *word, int bit);
+extern void __init wait_bit_init(void);
 
 int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key);
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5b36644536ab..e7b9ef8df126 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -10,6 +10,7 @@
 #include <uapi/linux/sched/types.h>
 #include <linux/sched/loadavg.h>
 #include <linux/sched/hotplug.h>
+#include <linux/wait_bit.h>
 #include <linux/cpuset.h>
 #include <linux/delayacct.h>
 #include <linux/init_task.h>
@@ -6026,28 +6027,13 @@ static struct kmem_cache *task_group_cache __read_mostly;
 DECLARE_PER_CPU(cpumask_var_t, load_balance_mask);
 DECLARE_PER_CPU(cpumask_var_t, select_idle_mask);
 
-#define WAIT_TABLE_BITS 8
-#define WAIT_TABLE_SIZE (1 << WAIT_TABLE_BITS)
-static wait_queue_head_t bit_wait_table[WAIT_TABLE_SIZE] __cacheline_aligned;
-
-wait_queue_head_t *bit_waitqueue(void *word, int bit)
-{
-	const int shift = BITS_PER_LONG == 32 ? 5 : 6;
-	unsigned long val = (unsigned long)word << shift | bit;
-
-	return bit_wait_table + hash_long(val, WAIT_TABLE_BITS);
-}
-EXPORT_SYMBOL(bit_waitqueue);
-
 void __init sched_init(void)
 {
 	int i, j;
 	unsigned long alloc_size = 0, ptr;
 
 	sched_clock_init();
-
-	for (i = 0; i < WAIT_TABLE_SIZE; i++)
-		init_waitqueue_head(bit_wait_table + i);
+	wait_bit_init();
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	alloc_size += 2 * nr_cpu_ids * sizeof(void **);
diff --git a/kernel/sched/wait_bit.c b/kernel/sched/wait_bit.c
index 463bac84dfd1..c891b34e1896 100644
--- a/kernel/sched/wait_bit.c
+++ b/kernel/sched/wait_bit.c
@@ -4,6 +4,21 @@
 #include <linux/wait_bit.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/debug.h>
+#include <linux/hash.h>
+
+#define WAIT_TABLE_BITS 8
+#define WAIT_TABLE_SIZE (1 << WAIT_TABLE_BITS)
+
+static wait_queue_head_t bit_wait_table[WAIT_TABLE_SIZE] __cacheline_aligned;
+
+wait_queue_head_t *bit_waitqueue(void *word, int bit)
+{
+	const int shift = BITS_PER_LONG == 32 ? 5 : 6;
+	unsigned long val = (unsigned long)word << shift | bit;
+
+	return bit_wait_table + hash_long(val, WAIT_TABLE_BITS);
+}
+EXPORT_SYMBOL(bit_waitqueue);
 
 int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *arg)
 {
@@ -261,3 +276,11 @@ __sched int bit_wait_io_timeout(struct wait_bit_key *word, int mode)
 	return 0;
 }
 EXPORT_SYMBOL_GPL(bit_wait_io_timeout);
+
+void __init wait_bit_init(void)
+{
+	int i;
+
+	for (i = 0; i < WAIT_TABLE_SIZE; i++)
+		init_waitqueue_head(bit_wait_table + i);
+}
-- 
cgit v1.2.3


From 2055da97389a605c8a00d163d40903afbe413921 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Tue, 20 Jun 2017 12:06:46 +0200
Subject: sched/wait: Disambiguate wq_entry->task_list and wq_head->task_list
 naming

So I've noticed a number of instances where it was not obvious from the
code whether ->task_list was for a wait-queue head or a wait-queue entry.

Furthermore, there's a number of wait-queue users where the lists are
not for 'tasks' but other entities (poll tables, etc.), in which case
the 'task_list' name is actively confusing.

To clear this all up, name the wait-queue head and entry list structure
fields unambiguously:

	struct wait_queue_head::task_list	=> ::head
	struct wait_queue_entry::task_list	=> ::entry

For example, this code:

	rqw->wait.task_list.next != &wait->task_list

... is was pretty unclear (to me) what it's doing, while now it's written this way:

	rqw->wait.head.next != &wait->entry

... which makes it pretty clear that we are iterating a list until we see the head.

Other examples are:

	list_for_each_entry_safe(pos, next, &x->task_list, task_list) {
	list_for_each_entry(wq, &fence->wait.task_list, task_list) {

... where it's unclear (to me) what we are iterating, and during review it's
hard to tell whether it's trying to walk a wait-queue entry (which would be
a bug), while now it's written as:

	list_for_each_entry_safe(pos, next, &x->head, entry) {
	list_for_each_entry(wq, &fence->wait.head, entry) {

Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 block/blk-mq.c                       |  2 +-
 block/blk-wbt.c                      |  2 +-
 block/kyber-iosched.c                |  8 ++++----
 drivers/gpu/drm/i915/i915_sw_fence.c | 21 ++++++++++-----------
 drivers/rtc/rtc-imxdi.c              |  2 +-
 fs/cachefiles/rdwr.c                 |  2 +-
 fs/eventpoll.c                       |  2 +-
 fs/fs_pin.c                          |  2 +-
 fs/nilfs2/segment.c                  |  3 +--
 fs/orangefs/orangefs-bufmap.c        |  8 ++++----
 fs/userfaultfd.c                     | 22 +++++++++++-----------
 include/linux/wait.h                 | 20 ++++++++++----------
 include/linux/wait_bit.h             |  4 ++--
 kernel/sched/wait.c                  | 24 ++++++++++++------------
 kernel/sched/wait_bit.c              |  4 ++--
 mm/filemap.c                         |  2 +-
 mm/memcontrol.c                      |  2 +-
 mm/shmem.c                           |  4 ++--
 18 files changed, 66 insertions(+), 68 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index a083f95e04b1..121aa1dbb192 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -933,7 +933,7 @@ static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode, int fla
 
 	hctx = container_of(wait, struct blk_mq_hw_ctx, dispatch_wait);
 
-	list_del(&wait->task_list);
+	list_del(&wait->entry);
 	clear_bit_unlock(BLK_MQ_S_TAG_WAITING, &hctx->state);
 	blk_mq_run_hw_queue(hctx, true);
 	return 1;
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 5f3a37c2784c..6a9a0f03a67b 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -520,7 +520,7 @@ static inline bool may_queue(struct rq_wb *rwb, struct rq_wait *rqw,
 	 * in line to be woken up, wait for our turn.
 	 */
 	if (waitqueue_active(&rqw->wait) &&
-	    rqw->wait.task_list.next != &wait->task_list)
+	    rqw->wait.head.next != &wait->entry)
 		return false;
 
 	return atomic_inc_below(&rqw->inflight, get_limit(rwb, rw));
diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c
index b95d6bd714c0..9bf1484365b2 100644
--- a/block/kyber-iosched.c
+++ b/block/kyber-iosched.c
@@ -385,7 +385,7 @@ static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
 
 	for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
 		INIT_LIST_HEAD(&khd->rqs[i]);
-		INIT_LIST_HEAD(&khd->domain_wait[i].task_list);
+		INIT_LIST_HEAD(&khd->domain_wait[i].entry);
 		atomic_set(&khd->wait_index[i], 0);
 	}
 
@@ -512,7 +512,7 @@ static int kyber_domain_wake(wait_queue_entry_t *wait, unsigned mode, int flags,
 {
 	struct blk_mq_hw_ctx *hctx = READ_ONCE(wait->private);
 
-	list_del_init(&wait->task_list);
+	list_del_init(&wait->entry);
 	blk_mq_run_hw_queue(hctx, true);
 	return 1;
 }
@@ -536,7 +536,7 @@ static int kyber_get_domain_token(struct kyber_queue_data *kqd,
 	 * run when one becomes available. Note that this is serialized on
 	 * khd->lock, but we still need to be careful about the waker.
 	 */
-	if (list_empty_careful(&wait->task_list)) {
+	if (list_empty_careful(&wait->entry)) {
 		init_waitqueue_func_entry(wait, kyber_domain_wake);
 		wait->private = hctx;
 		ws = sbq_wait_ptr(domain_tokens,
@@ -736,7 +736,7 @@ static int kyber_##name##_waiting_show(void *data, struct seq_file *m)	\
 	struct kyber_hctx_data *khd = hctx->sched_data;			\
 	wait_queue_entry_t *wait = &khd->domain_wait[domain];		\
 									\
-	seq_printf(m, "%d\n", !list_empty_careful(&wait->task_list));	\
+	seq_printf(m, "%d\n", !list_empty_careful(&wait->entry));	\
 	return 0;							\
 }
 KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_READ, read)
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
index 8669bfa33064..380de4360b8a 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.c
+++ b/drivers/gpu/drm/i915/i915_sw_fence.c
@@ -160,31 +160,30 @@ static void __i915_sw_fence_wake_up_all(struct i915_sw_fence *fence,
 
 	/*
 	 * To prevent unbounded recursion as we traverse the graph of
-	 * i915_sw_fences, we move the task_list from this, the next ready
-	 * fence, to the tail of the original fence's task_list
+	 * i915_sw_fences, we move the entry list from this, the next ready
+	 * fence, to the tail of the original fence's entry list
 	 * (and so added to the list to be woken).
 	 */
 
 	spin_lock_irqsave_nested(&x->lock, flags, 1 + !!continuation);
 	if (continuation) {
-		list_for_each_entry_safe(pos, next, &x->task_list, task_list) {
+		list_for_each_entry_safe(pos, next, &x->head, entry) {
 			if (pos->func == autoremove_wake_function)
 				pos->func(pos, TASK_NORMAL, 0, continuation);
 			else
-				list_move_tail(&pos->task_list, continuation);
+				list_move_tail(&pos->entry, continuation);
 		}
 	} else {
 		LIST_HEAD(extra);
 
 		do {
-			list_for_each_entry_safe(pos, next,
-						 &x->task_list, task_list)
+			list_for_each_entry_safe(pos, next, &x->head, entry)
 				pos->func(pos, TASK_NORMAL, 0, &extra);
 
 			if (list_empty(&extra))
 				break;
 
-			list_splice_tail_init(&extra, &x->task_list);
+			list_splice_tail_init(&extra, &x->head);
 		} while (1);
 	}
 	spin_unlock_irqrestore(&x->lock, flags);
@@ -256,7 +255,7 @@ void i915_sw_fence_commit(struct i915_sw_fence *fence)
 
 static int i915_sw_fence_wake(wait_queue_entry_t *wq, unsigned mode, int flags, void *key)
 {
-	list_del(&wq->task_list);
+	list_del(&wq->entry);
 	__i915_sw_fence_complete(wq->private, key);
 	i915_sw_fence_put(wq->private);
 	if (wq->flags & I915_SW_FENCE_FLAG_ALLOC)
@@ -275,7 +274,7 @@ static bool __i915_sw_fence_check_if_after(struct i915_sw_fence *fence,
 	if (fence == signaler)
 		return true;
 
-	list_for_each_entry(wq, &fence->wait.task_list, task_list) {
+	list_for_each_entry(wq, &fence->wait.head, entry) {
 		if (wq->func != i915_sw_fence_wake)
 			continue;
 
@@ -293,7 +292,7 @@ static void __i915_sw_fence_clear_checked_bit(struct i915_sw_fence *fence)
 	if (!__test_and_clear_bit(I915_SW_FENCE_CHECKED_BIT, &fence->flags))
 		return;
 
-	list_for_each_entry(wq, &fence->wait.task_list, task_list) {
+	list_for_each_entry(wq, &fence->wait.head, entry) {
 		if (wq->func != i915_sw_fence_wake)
 			continue;
 
@@ -350,7 +349,7 @@ static int __i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
 		pending |= I915_SW_FENCE_FLAG_ALLOC;
 	}
 
-	INIT_LIST_HEAD(&wq->task_list);
+	INIT_LIST_HEAD(&wq->entry);
 	wq->flags = pending;
 	wq->func = i915_sw_fence_wake;
 	wq->private = i915_sw_fence_get(fence);
diff --git a/drivers/rtc/rtc-imxdi.c b/drivers/rtc/rtc-imxdi.c
index 6b54f6c24c5f..80931114c899 100644
--- a/drivers/rtc/rtc-imxdi.c
+++ b/drivers/rtc/rtc-imxdi.c
@@ -709,7 +709,7 @@ static irqreturn_t dryice_irq(int irq, void *dev_id)
 		/*If the write wait queue is empty then there is no pending
 		  operations. It means the interrupt is for DryIce -Security.
 		  IRQ must be returned as none.*/
-		if (list_empty_careful(&imxdi->write_wait.task_list))
+		if (list_empty_careful(&imxdi->write_wait.head))
 			return rc;
 
 		/* DSR_WCF clears itself on DSR read */
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index 8be33b33b981..18d7aa61ef0f 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -48,7 +48,7 @@ static int cachefiles_read_waiter(wait_queue_entry_t *wait, unsigned mode,
 	}
 
 	/* remove from the waitqueue */
-	list_del(&wait->task_list);
+	list_del(&wait->entry);
 
 	/* move onto the action list and queue for FS-Cache thread pool */
 	ASSERT(monitor->op);
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 5ac1cba5ef72..b1c8e23ddf65 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1094,7 +1094,7 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v
 		 * can't use __remove_wait_queue(). whead->lock is held by
 		 * the caller.
 		 */
-		list_del_init(&wait->task_list);
+		list_del_init(&wait->entry);
 	}
 
 	spin_lock_irqsave(&ep->lock, flags);
diff --git a/fs/fs_pin.c b/fs/fs_pin.c
index 7b447a245760..e747b3d720ee 100644
--- a/fs/fs_pin.c
+++ b/fs/fs_pin.c
@@ -61,7 +61,7 @@ void pin_kill(struct fs_pin *p)
 		rcu_read_unlock();
 		schedule();
 		rcu_read_lock();
-		if (likely(list_empty(&wait.task_list)))
+		if (likely(list_empty(&wait.entry)))
 			break;
 		/* OK, we know p couldn't have been freed yet */
 		spin_lock_irq(&p->wait.lock);
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 775304e7f96f..70ded52dc1dd 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -2206,8 +2206,7 @@ static void nilfs_segctor_wakeup(struct nilfs_sc_info *sci, int err)
 	unsigned long flags;
 
 	spin_lock_irqsave(&sci->sc_wait_request.lock, flags);
-	list_for_each_entry_safe(wrq, n, &sci->sc_wait_request.task_list,
-				 wq.task_list) {
+	list_for_each_entry_safe(wrq, n, &sci->sc_wait_request.head, wq.entry) {
 		if (!atomic_read(&wrq->done) &&
 		    nilfs_cnt32_ge(sci->sc_seq_done, wrq->seq)) {
 			wrq->err = err;
diff --git a/fs/orangefs/orangefs-bufmap.c b/fs/orangefs/orangefs-bufmap.c
index 9e37b7028ea4..038d67545d9f 100644
--- a/fs/orangefs/orangefs-bufmap.c
+++ b/fs/orangefs/orangefs-bufmap.c
@@ -46,7 +46,7 @@ static void run_down(struct slot_map *m)
 	spin_lock(&m->q.lock);
 	if (m->c != -1) {
 		for (;;) {
-			if (likely(list_empty(&wait.task_list)))
+			if (likely(list_empty(&wait.entry)))
 				__add_wait_queue_entry_tail(&m->q, &wait);
 			set_current_state(TASK_UNINTERRUPTIBLE);
 
@@ -84,7 +84,7 @@ static int wait_for_free(struct slot_map *m)
 
 	do {
 		long n = left, t;
-		if (likely(list_empty(&wait.task_list)))
+		if (likely(list_empty(&wait.entry)))
 			__add_wait_queue_entry_tail_exclusive(&m->q, &wait);
 		set_current_state(TASK_INTERRUPTIBLE);
 
@@ -108,8 +108,8 @@ static int wait_for_free(struct slot_map *m)
 			left = -EINTR;
 	} while (left > 0);
 
-	if (!list_empty(&wait.task_list))
-		list_del(&wait.task_list);
+	if (!list_empty(&wait.entry))
+		list_del(&wait.entry);
 	else if (left <= 0 && waitqueue_active(&m->q))
 		__wake_up_locked_key(&m->q, TASK_INTERRUPTIBLE, NULL);
 	__set_current_state(TASK_RUNNING);
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index bda64fcd8a0c..6148ccd6cccf 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -129,7 +129,7 @@ static int userfaultfd_wake_function(wait_queue_entry_t *wq, unsigned mode,
 		 * wouldn't be enough, the smp_mb__before_spinlock is
 		 * enough to avoid an explicit smp_mb() here.
 		 */
-		list_del_init(&wq->task_list);
+		list_del_init(&wq->entry);
 out:
 	return ret;
 }
@@ -522,13 +522,13 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
 	 * and it's fine not to block on the spinlock. The uwq on this
 	 * kernel stack can be released after the list_del_init.
 	 */
-	if (!list_empty_careful(&uwq.wq.task_list)) {
+	if (!list_empty_careful(&uwq.wq.entry)) {
 		spin_lock(&ctx->fault_pending_wqh.lock);
 		/*
 		 * No need of list_del_init(), the uwq on the stack
 		 * will be freed shortly anyway.
 		 */
-		list_del(&uwq.wq.task_list);
+		list_del(&uwq.wq.entry);
 		spin_unlock(&ctx->fault_pending_wqh.lock);
 	}
 
@@ -869,7 +869,7 @@ static inline struct userfaultfd_wait_queue *find_userfault_in(
 	if (!waitqueue_active(wqh))
 		goto out;
 	/* walk in reverse to provide FIFO behavior to read userfaults */
-	wq = list_last_entry(&wqh->task_list, typeof(*wq), task_list);
+	wq = list_last_entry(&wqh->head, typeof(*wq), entry);
 	uwq = container_of(wq, struct userfaultfd_wait_queue, wq);
 out:
 	return uwq;
@@ -1003,14 +1003,14 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
 			 * changes __remove_wait_queue() to use
 			 * list_del_init() in turn breaking the
 			 * !list_empty_careful() check in
-			 * handle_userfault(). The uwq->wq.task_list
+			 * handle_userfault(). The uwq->wq.head list
 			 * must never be empty at any time during the
 			 * refile, or the waitqueue could disappear
 			 * from under us. The "wait_queue_head_t"
 			 * parameter of __remove_wait_queue() is unused
 			 * anyway.
 			 */
-			list_del(&uwq->wq.task_list);
+			list_del(&uwq->wq.entry);
 			__add_wait_queue(&ctx->fault_wqh, &uwq->wq);
 
 			write_seqcount_end(&ctx->refile_seq);
@@ -1032,7 +1032,7 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
 				fork_nctx = (struct userfaultfd_ctx *)
 					(unsigned long)
 					uwq->msg.arg.reserved.reserved1;
-				list_move(&uwq->wq.task_list, &fork_event);
+				list_move(&uwq->wq.entry, &fork_event);
 				spin_unlock(&ctx->event_wqh.lock);
 				ret = 0;
 				break;
@@ -1069,8 +1069,8 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
 			if (!list_empty(&fork_event)) {
 				uwq = list_first_entry(&fork_event,
 						       typeof(*uwq),
-						       wq.task_list);
-				list_del(&uwq->wq.task_list);
+						       wq.entry);
+				list_del(&uwq->wq.entry);
 				__add_wait_queue(&ctx->event_wqh, &uwq->wq);
 				userfaultfd_event_complete(ctx, uwq);
 			}
@@ -1752,12 +1752,12 @@ static void userfaultfd_show_fdinfo(struct seq_file *m, struct file *f)
 	unsigned long pending = 0, total = 0;
 
 	spin_lock(&ctx->fault_pending_wqh.lock);
-	list_for_each_entry(wq, &ctx->fault_pending_wqh.task_list, task_list) {
+	list_for_each_entry(wq, &ctx->fault_pending_wqh.head, entry) {
 		uwq = container_of(wq, struct userfaultfd_wait_queue, wq);
 		pending++;
 		total++;
 	}
-	list_for_each_entry(wq, &ctx->fault_wqh.task_list, task_list) {
+	list_for_each_entry(wq, &ctx->fault_wqh.head, entry) {
 		uwq = container_of(wq, struct userfaultfd_wait_queue, wq);
 		total++;
 	}
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 629489746f8a..b289c96151ee 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -26,12 +26,12 @@ struct wait_queue_entry {
 	unsigned int		flags;
 	void			*private;
 	wait_queue_func_t	func;
-	struct list_head	task_list;
+	struct list_head	entry;
 };
 
 struct wait_queue_head {
 	spinlock_t		lock;
-	struct list_head	task_list;
+	struct list_head	head;
 };
 typedef struct wait_queue_head wait_queue_head_t;
 
@@ -44,14 +44,14 @@ struct task_struct;
 #define __WAITQUEUE_INITIALIZER(name, tsk) {					\
 	.private	= tsk,							\
 	.func		= default_wake_function,				\
-	.task_list	= { NULL, NULL } }
+	.entry		= { NULL, NULL } }
 
 #define DECLARE_WAITQUEUE(name, tsk)						\
 	struct wait_queue_entry name = __WAITQUEUE_INITIALIZER(name, tsk)
 
 #define __WAIT_QUEUE_HEAD_INITIALIZER(name) {					\
 	.lock		= __SPIN_LOCK_UNLOCKED(name.lock),			\
-	.task_list	= { &(name).task_list, &(name).task_list } }
+	.head		= { &(name).head, &(name).head } }
 
 #define DECLARE_WAIT_QUEUE_HEAD(name) \
 	struct wait_queue_head name = __WAIT_QUEUE_HEAD_INITIALIZER(name)
@@ -121,7 +121,7 @@ init_waitqueue_func_entry(struct wait_queue_entry *wq_entry, wait_queue_func_t f
  */
 static inline int waitqueue_active(struct wait_queue_head *wq_head)
 {
-	return !list_empty(&wq_head->task_list);
+	return !list_empty(&wq_head->head);
 }
 
 /**
@@ -151,7 +151,7 @@ extern void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue
 
 static inline void __add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
 {
-	list_add(&wq_entry->task_list, &wq_head->task_list);
+	list_add(&wq_entry->entry, &wq_head->head);
 }
 
 /*
@@ -166,7 +166,7 @@ __add_wait_queue_exclusive(struct wait_queue_head *wq_head, struct wait_queue_en
 
 static inline void __add_wait_queue_entry_tail(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
 {
-	list_add_tail(&wq_entry->task_list, &wq_head->task_list);
+	list_add_tail(&wq_entry->entry, &wq_head->head);
 }
 
 static inline void
@@ -179,7 +179,7 @@ __add_wait_queue_entry_tail_exclusive(struct wait_queue_head *wq_head, struct wa
 static inline void
 __remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
 {
-	list_del(&wq_entry->task_list);
+	list_del(&wq_entry->entry);
 }
 
 void __wake_up(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key);
@@ -952,7 +952,7 @@ int autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, i
 	struct wait_queue_entry name = {					\
 		.private	= current,					\
 		.func		= function,					\
-		.task_list	= LIST_HEAD_INIT((name).task_list),		\
+		.entry		= LIST_HEAD_INIT((name).entry),			\
 	}
 
 #define DEFINE_WAIT(name) DEFINE_WAIT_FUNC(name, autoremove_wake_function)
@@ -961,7 +961,7 @@ int autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, i
 	do {									\
 		(wait)->private = current;					\
 		(wait)->func = autoremove_wake_function;			\
-		INIT_LIST_HEAD(&(wait)->task_list);				\
+		INIT_LIST_HEAD(&(wait)->entry);					\
 		(wait)->flags = 0;						\
 	} while (0)
 
diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h
index 9cc82114dbcb..12b26660d7e9 100644
--- a/include/linux/wait_bit.h
+++ b/include/linux/wait_bit.h
@@ -45,8 +45,8 @@ int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync
 		.wq_entry = {							\
 			.private	= current,				\
 			.func		= wake_bit_function,			\
-			.task_list	=					\
-				LIST_HEAD_INIT((name).wq_entry.task_list),	\
+			.entry		=					\
+				LIST_HEAD_INIT((name).wq_entry.entry),		\
 		},								\
 	}
 
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 6bcd7c3c4501..17f11c6b0a9f 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -16,7 +16,7 @@ void __init_waitqueue_head(struct wait_queue_head *wq_head, const char *name, st
 {
 	spin_lock_init(&wq_head->lock);
 	lockdep_set_class_and_name(&wq_head->lock, key, name);
-	INIT_LIST_HEAD(&wq_head->task_list);
+	INIT_LIST_HEAD(&wq_head->head);
 }
 
 EXPORT_SYMBOL(__init_waitqueue_head);
@@ -68,7 +68,7 @@ static void __wake_up_common(struct wait_queue_head *wq_head, unsigned int mode,
 {
 	wait_queue_entry_t *curr, *next;
 
-	list_for_each_entry_safe(curr, next, &wq_head->task_list, task_list) {
+	list_for_each_entry_safe(curr, next, &wq_head->head, entry) {
 		unsigned flags = curr->flags;
 
 		if (curr->func(curr, mode, wake_flags, key) &&
@@ -176,7 +176,7 @@ prepare_to_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_ent
 
 	wq_entry->flags &= ~WQ_FLAG_EXCLUSIVE;
 	spin_lock_irqsave(&wq_head->lock, flags);
-	if (list_empty(&wq_entry->task_list))
+	if (list_empty(&wq_entry->entry))
 		__add_wait_queue(wq_head, wq_entry);
 	set_current_state(state);
 	spin_unlock_irqrestore(&wq_head->lock, flags);
@@ -190,7 +190,7 @@ prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_ent
 
 	wq_entry->flags |= WQ_FLAG_EXCLUSIVE;
 	spin_lock_irqsave(&wq_head->lock, flags);
-	if (list_empty(&wq_entry->task_list))
+	if (list_empty(&wq_entry->entry))
 		__add_wait_queue_entry_tail(wq_head, wq_entry);
 	set_current_state(state);
 	spin_unlock_irqrestore(&wq_head->lock, flags);
@@ -202,7 +202,7 @@ void init_wait_entry(struct wait_queue_entry *wq_entry, int flags)
 	wq_entry->flags = flags;
 	wq_entry->private = current;
 	wq_entry->func = autoremove_wake_function;
-	INIT_LIST_HEAD(&wq_entry->task_list);
+	INIT_LIST_HEAD(&wq_entry->entry);
 }
 EXPORT_SYMBOL(init_wait_entry);
 
@@ -225,10 +225,10 @@ long prepare_to_wait_event(struct wait_queue_head *wq_head, struct wait_queue_en
 		 * can't see us, it should wake up another exclusive waiter if
 		 * we fail.
 		 */
-		list_del_init(&wq_entry->task_list);
+		list_del_init(&wq_entry->entry);
 		ret = -ERESTARTSYS;
 	} else {
-		if (list_empty(&wq_entry->task_list)) {
+		if (list_empty(&wq_entry->entry)) {
 			if (wq_entry->flags & WQ_FLAG_EXCLUSIVE)
 				__add_wait_queue_entry_tail(wq_head, wq_entry);
 			else
@@ -251,7 +251,7 @@ EXPORT_SYMBOL(prepare_to_wait_event);
  */
 int do_wait_intr(wait_queue_head_t *wq, wait_queue_entry_t *wait)
 {
-	if (likely(list_empty(&wait->task_list)))
+	if (likely(list_empty(&wait->entry)))
 		__add_wait_queue_entry_tail(wq, wait);
 
 	set_current_state(TASK_INTERRUPTIBLE);
@@ -267,7 +267,7 @@ EXPORT_SYMBOL(do_wait_intr);
 
 int do_wait_intr_irq(wait_queue_head_t *wq, wait_queue_entry_t *wait)
 {
-	if (likely(list_empty(&wait->task_list)))
+	if (likely(list_empty(&wait->entry)))
 		__add_wait_queue_entry_tail(wq, wait);
 
 	set_current_state(TASK_INTERRUPTIBLE);
@@ -308,9 +308,9 @@ void finish_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_en
 	 *    have _one_ other CPU that looks at or modifies
 	 *    the list).
 	 */
-	if (!list_empty_careful(&wq_entry->task_list)) {
+	if (!list_empty_careful(&wq_entry->entry)) {
 		spin_lock_irqsave(&wq_head->lock, flags);
-		list_del_init(&wq_entry->task_list);
+		list_del_init(&wq_entry->entry);
 		spin_unlock_irqrestore(&wq_head->lock, flags);
 	}
 }
@@ -321,7 +321,7 @@ int autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, i
 	int ret = default_wake_function(wq_entry, mode, sync, key);
 
 	if (ret)
-		list_del_init(&wq_entry->task_list);
+		list_del_init(&wq_entry->entry);
 	return ret;
 }
 EXPORT_SYMBOL(autoremove_wake_function);
diff --git a/kernel/sched/wait_bit.c b/kernel/sched/wait_bit.c
index c891b34e1896..f8159698aa4d 100644
--- a/kernel/sched/wait_bit.c
+++ b/kernel/sched/wait_bit.c
@@ -205,8 +205,8 @@ int __wait_on_atomic_t(struct wait_queue_head *wq_head, struct wait_bit_queue_en
 		.wq_entry = {						\
 			.private	= current,			\
 			.func		= wake_atomic_t_function,	\
-			.task_list	=				\
-				LIST_HEAD_INIT((name).wq_entry.task_list), \
+			.entry		=				\
+				LIST_HEAD_INIT((name).wq_entry.entry),	\
 		},							\
 	}
 
diff --git a/mm/filemap.c b/mm/filemap.c
index 80c19ee81e95..926484561624 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -845,7 +845,7 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q,
 	for (;;) {
 		spin_lock_irq(&q->lock);
 
-		if (likely(list_empty(&wait->task_list))) {
+		if (likely(list_empty(&wait->entry))) {
 			if (lock)
 				__add_wait_queue_entry_tail_exclusive(q, wait);
 			else
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9a90b096dc6b..d75b38b66ef6 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1570,7 +1570,7 @@ bool mem_cgroup_oom_synchronize(bool handle)
 	owait.wait.flags = 0;
 	owait.wait.func = memcg_oom_wake_function;
 	owait.wait.private = current;
-	INIT_LIST_HEAD(&owait.wait.task_list);
+	INIT_LIST_HEAD(&owait.wait.entry);
 
 	prepare_to_wait(&memcg_oom_waitq, &owait.wait, TASK_KILLABLE);
 	mem_cgroup_mark_under_oom(memcg);
diff --git a/mm/shmem.c b/mm/shmem.c
index a6c7dece4660..fdc413f82a99 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1905,7 +1905,7 @@ unlock:
 static int synchronous_wake_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
 {
 	int ret = default_wake_function(wait, mode, sync, key);
-	list_del_init(&wait->task_list);
+	list_del_init(&wait->entry);
 	return ret;
 }
 
@@ -2840,7 +2840,7 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
 		spin_lock(&inode->i_lock);
 		inode->i_private = NULL;
 		wake_up_all(&shmem_falloc_waitq);
-		WARN_ON_ONCE(!list_empty(&shmem_falloc_waitq.task_list));
+		WARN_ON_ONCE(!list_empty(&shmem_falloc_waitq.head));
 		spin_unlock(&inode->i_lock);
 		error = 0;
 		goto out;
-- 
cgit v1.2.3


From a2bce3794a2122425abce5b0359d075b790930bc Mon Sep 17 00:00:00 2001
From: Steve Longerbeam <slongerbeam@gmail.com>
Date: Wed, 7 Jun 2017 15:33:57 -0300
Subject: [media] media: Add userspace header file for i.MX

This adds a header file for use by userspace programs wanting to interact
with the i.MX media driver. It defines custom events and v4l2 controls for
the i.MX v4l2 subdevices.

Signed-off-by: Steve Longerbeam <steve_longerbeam@mentor.com>
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 include/linux/imx-media.h | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)
 create mode 100644 include/linux/imx-media.h

(limited to 'include/linux')

diff --git a/include/linux/imx-media.h b/include/linux/imx-media.h
new file mode 100644
index 000000000000..77221ecad6fc
--- /dev/null
+++ b/include/linux/imx-media.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2014-2017 Mentor Graphics Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version
+ */
+
+#ifndef __LINUX_IMX_MEDIA_H__
+#define __LINUX_IMX_MEDIA_H__
+
+/*
+ * events from the subdevs
+ */
+#define V4L2_EVENT_IMX_CLASS                V4L2_EVENT_PRIVATE_START
+#define V4L2_EVENT_IMX_FRAME_INTERVAL_ERROR (V4L2_EVENT_IMX_CLASS + 1)
+
+enum imx_ctrl_id {
+	V4L2_CID_IMX_FIM_ENABLE = (V4L2_CID_USER_IMX_BASE + 0),
+	V4L2_CID_IMX_FIM_NUM,
+	V4L2_CID_IMX_FIM_TOLERANCE_MIN,
+	V4L2_CID_IMX_FIM_TOLERANCE_MAX,
+	V4L2_CID_IMX_FIM_NUM_SKIP,
+	V4L2_CID_IMX_FIM_ICAP_EDGE,
+	V4L2_CID_IMX_FIM_ICAP_CHANNEL,
+};
+
+#endif
-- 
cgit v1.2.3


From f11cc0760b8397e0d230122606421b6a96e9f869 Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <dave@stgolabs.net>
Date: Wed, 14 Jun 2017 19:37:30 -0700
Subject: sched/core: Drop the unused try_get_task_struct() helper function

This function was introduced by:

  150593bf8693 ("sched/api: Introduce task_rcu_dereference() and try_get_task_struct()")

... to allow easier usage of task_rcu_dereference(), however no users
were ever added. Drop the helper.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: dave@stgolabs.net
Link: http://lkml.kernel.org/r/20170615023730.22827-1-dave@stgolabs.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/task.h |  2 --
 kernel/exit.c              | 13 -------------
 2 files changed, 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index a978d7189cfd..f0f065c5afcf 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -95,8 +95,6 @@ static inline void put_task_struct(struct task_struct *t)
 }
 
 struct task_struct *task_rcu_dereference(struct task_struct **ptask);
-struct task_struct *try_get_task_struct(struct task_struct **ptask);
-
 
 #ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT
 extern int arch_task_struct_size __read_mostly;
diff --git a/kernel/exit.c b/kernel/exit.c
index 7d694437ab44..c63226283aef 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -318,19 +318,6 @@ void rcuwait_wake_up(struct rcuwait *w)
 	rcu_read_unlock();
 }
 
-struct task_struct *try_get_task_struct(struct task_struct **ptask)
-{
-	struct task_struct *task;
-
-	rcu_read_lock();
-	task = task_rcu_dereference(ptask);
-	if (task)
-		get_task_struct(task);
-	rcu_read_unlock();
-
-	return task;
-}
-
 /*
  * Determine if a process group is "orphaned", according to the POSIX
  * definition in 2.2.2.52.  Orphaned process groups are not to be affected
-- 
cgit v1.2.3


From fdd2f5b7de2afaa931e5f7bad7bcda35d1f1b479 Mon Sep 17 00:00:00 2001
From: Goldwyn Rodrigues <rgoldwyn@suse.com>
Date: Tue, 20 Jun 2017 07:05:40 -0500
Subject: fs: Separate out kiocb flags setup based on RWF_* flags

Also added RWF_SUPPORTED to encompass all flags.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/read_write.c         | 12 +++---------
 include/linux/fs.h      | 14 ++++++++++++++
 include/uapi/linux/fs.h |  2 ++
 3 files changed, 19 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/fs/read_write.c b/fs/read_write.c
index 47c1d4484df9..53c816c61122 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -678,16 +678,10 @@ static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
 	struct kiocb kiocb;
 	ssize_t ret;
 
-	if (flags & ~(RWF_HIPRI | RWF_DSYNC | RWF_SYNC))
-		return -EOPNOTSUPP;
-
 	init_sync_kiocb(&kiocb, filp);
-	if (flags & RWF_HIPRI)
-		kiocb.ki_flags |= IOCB_HIPRI;
-	if (flags & RWF_DSYNC)
-		kiocb.ki_flags |= IOCB_DSYNC;
-	if (flags & RWF_SYNC)
-		kiocb.ki_flags |= (IOCB_DSYNC | IOCB_SYNC);
+	ret = kiocb_set_rw_flags(&kiocb, flags);
+	if (ret)
+		return ret;
 	kiocb.ki_pos = *ppos;
 
 	if (type == READ)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 023f0324762b..96a1a1fa54a9 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3057,6 +3057,20 @@ static inline int iocb_flags(struct file *file)
 	return res;
 }
 
+static inline int kiocb_set_rw_flags(struct kiocb *ki, int flags)
+{
+	if (unlikely(flags & ~RWF_SUPPORTED))
+		return -EOPNOTSUPP;
+
+	if (flags & RWF_HIPRI)
+		ki->ki_flags |= IOCB_HIPRI;
+	if (flags & RWF_DSYNC)
+		ki->ki_flags |= IOCB_DSYNC;
+	if (flags & RWF_SYNC)
+		ki->ki_flags |= (IOCB_DSYNC | IOCB_SYNC);
+	return 0;
+}
+
 static inline ino_t parent_ino(struct dentry *dentry)
 {
 	ino_t res;
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 24e61a54feaa..937c3e39650a 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -361,4 +361,6 @@ struct fscrypt_key {
 #define RWF_DSYNC			0x00000002 /* per-IO O_DSYNC */
 #define RWF_SYNC			0x00000004 /* per-IO O_SYNC */
 
+#define RWF_SUPPORTED			(RWF_HIPRI | RWF_DSYNC | RWF_SYNC)
+
 #endif /* _UAPI_LINUX_FS_H */
-- 
cgit v1.2.3


From 7fc9e4722435cd8459182c4975f48934f2bb1274 Mon Sep 17 00:00:00 2001
From: Goldwyn Rodrigues <rgoldwyn@suse.com>
Date: Tue, 20 Jun 2017 07:05:41 -0500
Subject: fs: Introduce filemap_range_has_page()

filemap_range_has_page() return true if the file's mapping has
a page within the range mentioned. This function will be used
to check if a write() call will cause a writeback of previous
writes.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/fs.h |  2 ++
 mm/filemap.c       | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 34 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 96a1a1fa54a9..0d34f5b5a6b0 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2518,6 +2518,8 @@ extern int filemap_fdatawait(struct address_space *);
 extern void filemap_fdatawait_keep_errors(struct address_space *);
 extern int filemap_fdatawait_range(struct address_space *, loff_t lstart,
 				   loff_t lend);
+extern bool filemap_range_has_page(struct address_space *, loff_t lstart,
+				  loff_t lend);
 extern int filemap_write_and_wait(struct address_space *mapping);
 extern int filemap_write_and_wait_range(struct address_space *mapping,
 				        loff_t lstart, loff_t lend);
diff --git a/mm/filemap.c b/mm/filemap.c
index 6f1be573a5e6..9b39a2390b9e 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -376,6 +376,38 @@ int filemap_flush(struct address_space *mapping)
 }
 EXPORT_SYMBOL(filemap_flush);
 
+/**
+ * filemap_range_has_page - check if a page exists in range.
+ * @mapping:           address space within which to check
+ * @start_byte:        offset in bytes where the range starts
+ * @end_byte:          offset in bytes where the range ends (inclusive)
+ *
+ * Find at least one page in the range supplied, usually used to check if
+ * direct writing in this range will trigger a writeback.
+ */
+bool filemap_range_has_page(struct address_space *mapping,
+			   loff_t start_byte, loff_t end_byte)
+{
+	pgoff_t index = start_byte >> PAGE_SHIFT;
+	pgoff_t end = end_byte >> PAGE_SHIFT;
+	struct pagevec pvec;
+	bool ret;
+
+	if (end_byte < start_byte)
+		return false;
+
+	if (mapping->nrpages == 0)
+		return false;
+
+	pagevec_init(&pvec, 0);
+	if (!pagevec_lookup(&pvec, mapping, index, 1))
+		return false;
+	ret = (pvec.pages[0]->index <= end);
+	pagevec_release(&pvec);
+	return ret;
+}
+EXPORT_SYMBOL(filemap_range_has_page);
+
 static int __filemap_fdatawait_range(struct address_space *mapping,
 				     loff_t start_byte, loff_t end_byte)
 {
-- 
cgit v1.2.3


From b745fafaf70c0a98a2e1e7ac8cb14542889ceb0e Mon Sep 17 00:00:00 2001
From: Goldwyn Rodrigues <rgoldwyn@suse.com>
Date: Tue, 20 Jun 2017 07:05:43 -0500
Subject: fs: Introduce RWF_NOWAIT and FMODE_AIO_NOWAIT

RWF_NOWAIT informs kernel to bail out if an AIO request will block
for reasons such as file allocations, or a writeback triggered,
or would block while allocating requests while performing
direct I/O.

RWF_NOWAIT is translated to IOCB_NOWAIT for iocb->ki_flags.

FMODE_AIO_NOWAIT is a flag which identifies the file opened is capable
of returning -EAGAIN if the AIO call will block. This must be set by
supporting filesystems in the ->open() call.

Filesystems xfs, btrfs and ext4 would be supported in the following patches.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/aio.c                | 6 ++++++
 include/linux/fs.h      | 9 +++++++++
 include/uapi/linux/fs.h | 4 +++-
 3 files changed, 18 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/aio.c b/fs/aio.c
index 020fa0045e3c..34027b67e2f4 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1592,6 +1592,12 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
 		goto out_put_req;
 	}
 
+	if ((req->common.ki_flags & IOCB_NOWAIT) &&
+			!(req->common.ki_flags & IOCB_DIRECT)) {
+		ret = -EOPNOTSUPP;
+		goto out_put_req;
+	}
+
 	ret = put_user(KIOCB_KEY, &user_iocb->aio_key);
 	if (unlikely(ret)) {
 		pr_debug("EFAULT: aio_key\n");
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0d34f5b5a6b0..4574121f4746 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -143,6 +143,9 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 /* File was opened by fanotify and shouldn't generate fanotify events */
 #define FMODE_NONOTIFY		((__force fmode_t)0x4000000)
 
+/* File is capable of returning -EAGAIN if AIO will block */
+#define FMODE_AIO_NOWAIT	((__force fmode_t)0x8000000)
+
 /*
  * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector
  * that indicates that they should check the contents of the iovec are
@@ -269,6 +272,7 @@ struct writeback_control;
 #define IOCB_DSYNC		(1 << 4)
 #define IOCB_SYNC		(1 << 5)
 #define IOCB_WRITE		(1 << 6)
+#define IOCB_NOWAIT		(1 << 7)
 
 struct kiocb {
 	struct file		*ki_filp;
@@ -3064,6 +3068,11 @@ static inline int kiocb_set_rw_flags(struct kiocb *ki, int flags)
 	if (unlikely(flags & ~RWF_SUPPORTED))
 		return -EOPNOTSUPP;
 
+	if (flags & RWF_NOWAIT) {
+		if (!(ki->ki_filp->f_mode & FMODE_AIO_NOWAIT))
+			return -EOPNOTSUPP;
+		ki->ki_flags |= IOCB_NOWAIT;
+	}
 	if (flags & RWF_HIPRI)
 		ki->ki_flags |= IOCB_HIPRI;
 	if (flags & RWF_DSYNC)
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 937c3e39650a..27d8c36c04af 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -360,7 +360,9 @@ struct fscrypt_key {
 #define RWF_HIPRI			0x00000001 /* high priority request, poll if possible */
 #define RWF_DSYNC			0x00000002 /* per-IO O_DSYNC */
 #define RWF_SYNC			0x00000004 /* per-IO O_SYNC */
+#define RWF_NOWAIT			0x00000008 /* per-IO, return -EAGAIN if operation would block */
 
-#define RWF_SUPPORTED			(RWF_HIPRI | RWF_DSYNC | RWF_SYNC)
+#define RWF_SUPPORTED			(RWF_HIPRI | RWF_DSYNC | RWF_SYNC |\
+					 RWF_NOWAIT)
 
 #endif /* _UAPI_LINUX_FS_H */
-- 
cgit v1.2.3


From a38d1243704f501a4c42de1db1062ff6eba83453 Mon Sep 17 00:00:00 2001
From: Goldwyn Rodrigues <rgoldwyn@suse.com>
Date: Tue, 20 Jun 2017 07:05:45 -0500
Subject: fs: Introduce IOMAP_NOWAIT

IOCB_NOWAIT translates to IOMAP_NOWAIT for iomaps.
This is used by XFS in the XFS patch.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/iomap.c            | 8 ++++++++
 include/linux/iomap.h | 1 +
 2 files changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/fs/iomap.c b/fs/iomap.c
index 18f2f2b8ba2c..c71a64b97fba 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -881,6 +881,14 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 		flags |= IOMAP_WRITE;
 	}
 
+	if (iocb->ki_flags & IOCB_NOWAIT) {
+		if (filemap_range_has_page(mapping, start, end)) {
+			ret = -EAGAIN;
+			goto out_free_dio;
+		}
+		flags |= IOMAP_NOWAIT;
+	}
+
 	ret = filemap_write_and_wait_range(mapping, start, end);
 	if (ret)
 		goto out_free_dio;
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index f753e788da31..69f4e9470084 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -52,6 +52,7 @@ struct iomap {
 #define IOMAP_REPORT		(1 << 2) /* report extent status, e.g. FIEMAP */
 #define IOMAP_FAULT		(1 << 3) /* mapping for page fault */
 #define IOMAP_DIRECT		(1 << 4) /* direct I/O */
+#define IOMAP_NOWAIT		(1 << 5) /* Don't wait for writeback */
 
 struct iomap_ops {
 	/*
-- 
cgit v1.2.3


From 03a07c92a9ed9938d828ca7f1d11b8bc63a7bb89 Mon Sep 17 00:00:00 2001
From: Goldwyn Rodrigues <rgoldwyn@suse.com>
Date: Tue, 20 Jun 2017 07:05:46 -0500
Subject: block: return on congested block device

A new bio operation flag REQ_NOWAIT is introduced to identify bio's
orignating from iocb with IOCB_NOWAIT. This flag indicates
to return immediately if a request cannot be made instead
of retrying.

Stacked devices such as md (the ones with make_request_fn hooks)
currently are not supported because it may block for housekeeping.
For example, an md can have a part of the device suspended.
For this reason, only request based devices are supported.
In the future, this feature will be expanded to stacked devices
by teaching them how to handle the REQ_NOWAIT flags.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c          | 22 ++++++++++++++++++++--
 block/blk-mq.c            |  4 ++++
 fs/direct-io.c            | 10 ++++++++--
 include/linux/bio.h       |  6 ++++++
 include/linux/blk_types.h |  4 ++++
 5 files changed, 42 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 62cf92550512..279e3c432d7b 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -143,6 +143,7 @@ static const struct {
 	[BLK_STS_MEDIUM]	= { -ENODATA,	"critical medium" },
 	[BLK_STS_PROTECTION]	= { -EILSEQ,	"protection" },
 	[BLK_STS_RESOURCE]	= { -ENOMEM,	"kernel resource" },
+	[BLK_STS_AGAIN]		= { -EAGAIN,	"nonblocking retry" },
 
 	/* device mapper special case, should not leak out: */
 	[BLK_STS_DM_REQUEUE]	= { -EREMCHG, "dm internal retry" },
@@ -1314,6 +1315,11 @@ retry:
 	if (!IS_ERR(rq))
 		return rq;
 
+	if (op & REQ_NOWAIT) {
+		blk_put_rl(rl);
+		return ERR_PTR(-EAGAIN);
+	}
+
 	if (!gfpflags_allow_blocking(gfp_mask) || unlikely(blk_queue_dying(q))) {
 		blk_put_rl(rl);
 		return rq;
@@ -1961,6 +1967,14 @@ generic_make_request_checks(struct bio *bio)
 		goto end_io;
 	}
 
+	/*
+	 * For a REQ_NOWAIT based request, return -EOPNOTSUPP
+	 * if queue is not a request based queue.
+	 */
+
+	if ((bio->bi_opf & REQ_NOWAIT) && !queue_is_rq_based(q))
+		goto not_supported;
+
 	part = bio->bi_bdev->bd_part;
 	if (should_fail_request(part, bio->bi_iter.bi_size) ||
 	    should_fail_request(&part_to_disk(part)->part0,
@@ -2118,7 +2132,7 @@ blk_qc_t generic_make_request(struct bio *bio)
 	do {
 		struct request_queue *q = bdev_get_queue(bio->bi_bdev);
 
-		if (likely(blk_queue_enter(q, false) == 0)) {
+		if (likely(blk_queue_enter(q, bio->bi_opf & REQ_NOWAIT) == 0)) {
 			struct bio_list lower, same;
 
 			/* Create a fresh bio_list for all subordinate requests */
@@ -2143,7 +2157,11 @@ blk_qc_t generic_make_request(struct bio *bio)
 			bio_list_merge(&bio_list_on_stack[0], &same);
 			bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]);
 		} else {
-			bio_io_error(bio);
+			if (unlikely(!blk_queue_dying(q) &&
+					(bio->bi_opf & REQ_NOWAIT)))
+				bio_wouldblock_error(bio);
+			else
+				bio_io_error(bio);
 		}
 		bio = bio_list_pop(&bio_list_on_stack[0]);
 	} while (bio);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index dd276a9e138e..ca03cd4b263f 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -293,6 +293,8 @@ static struct request *blk_mq_get_request(struct request_queue *q,
 		data->ctx = blk_mq_get_ctx(q);
 	if (likely(!data->hctx))
 		data->hctx = blk_mq_map_queue(q, data->ctx->cpu);
+	if (op & REQ_NOWAIT)
+		data->flags |= BLK_MQ_REQ_NOWAIT;
 
 	if (e) {
 		data->flags |= BLK_MQ_REQ_INTERNAL;
@@ -1544,6 +1546,8 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 	rq = blk_mq_get_request(q, bio, bio->bi_opf, &data);
 	if (unlikely(!rq)) {
 		__wbt_done(q->rq_wb, wb_acct);
+		if (bio->bi_opf & REQ_NOWAIT)
+			bio_wouldblock_error(bio);
 		return BLK_QC_T_NONE;
 	}
 
diff --git a/fs/direct-io.c b/fs/direct-io.c
index e8baaabebf13..c87077d1dc33 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -479,8 +479,12 @@ static blk_status_t dio_bio_complete(struct dio *dio, struct bio *bio)
 	unsigned i;
 	blk_status_t err = bio->bi_status;
 
-	if (err)
-		dio->io_error = -EIO;
+	if (err) {
+		if (err == BLK_STS_AGAIN && (bio->bi_opf & REQ_NOWAIT))
+			dio->io_error = -EAGAIN;
+		else
+			dio->io_error = -EIO;
+	}
 
 	if (dio->is_async && dio->op == REQ_OP_READ && dio->should_dirty) {
 		bio_check_pages_dirty(bio);	/* transfers ownership */
@@ -1194,6 +1198,8 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
 	if (iov_iter_rw(iter) == WRITE) {
 		dio->op = REQ_OP_WRITE;
 		dio->op_flags = REQ_SYNC | REQ_IDLE;
+		if (iocb->ki_flags & IOCB_NOWAIT)
+			dio->op_flags |= REQ_NOWAIT;
 	} else {
 		dio->op = REQ_OP_READ;
 	}
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 40d054185277..36aa641cde28 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -416,6 +416,12 @@ static inline void bio_io_error(struct bio *bio)
 	bio_endio(bio);
 }
 
+static inline void bio_wouldblock_error(struct bio *bio)
+{
+	bio->bi_status = BLK_STS_AGAIN;
+	bio_endio(bio);
+}
+
 struct request_queue;
 extern int bio_phys_segments(struct request_queue *, struct bio *);
 
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index dcd45b15a3a5..e210da6d14b8 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -36,6 +36,8 @@ typedef u8 __bitwise blk_status_t;
 /* hack for device mapper, don't use elsewhere: */
 #define BLK_STS_DM_REQUEUE    ((__force blk_status_t)11)
 
+#define BLK_STS_AGAIN		((__force blk_status_t)12)
+
 struct blk_issue_stat {
 	u64 stat;
 };
@@ -224,6 +226,7 @@ enum req_flag_bits {
 	/* command specific flags for REQ_OP_WRITE_ZEROES: */
 	__REQ_NOUNMAP,		/* do not free blocks when zeroing */
 
+	__REQ_NOWAIT,           /* Don't wait if request will block */
 	__REQ_NR_BITS,		/* stops here */
 };
 
@@ -242,6 +245,7 @@ enum req_flag_bits {
 #define REQ_BACKGROUND		(1ULL << __REQ_BACKGROUND)
 
 #define REQ_NOUNMAP		(1ULL << __REQ_NOUNMAP)
+#define REQ_NOWAIT		(1ULL << __REQ_NOWAIT)
 
 #define REQ_FAILFAST_MASK \
 	(REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)
-- 
cgit v1.2.3


From 1a4a69751f4d24ffd3530f5a9694636db1566a3b Mon Sep 17 00:00:00 2001
From: "Mintz, Yuval" <Yuval.Mintz@cavium.com>
Date: Tue, 20 Jun 2017 16:00:00 +0300
Subject: qed: Chain support for external PBL

iWARP would require the chains to allocate/free their PBL memory
independently, so add the infrastructure to provide it externally.

Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/hw/qedr/main.c             |  2 +-
 drivers/infiniband/hw/qedr/verbs.c            |  6 ++---
 drivers/net/ethernet/qlogic/qed/qed_dev.c     | 35 ++++++++++++++++++++-------
 drivers/net/ethernet/qlogic/qed/qed_dev_api.h |  5 +++-
 drivers/net/ethernet/qlogic/qed/qed_iscsi.c   |  6 ++---
 drivers/net/ethernet/qlogic/qed/qed_ll2.c     |  6 ++---
 drivers/net/ethernet/qlogic/qed/qed_spq.c     |  6 ++---
 drivers/net/ethernet/qlogic/qede/qede_main.c  |  8 +++---
 include/linux/qed/qed_chain.h                 |  7 ++++++
 include/linux/qed/qed_if.h                    |  3 ++-
 10 files changed, 56 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c
index 485c1fef238b..5a32b802e4da 100644
--- a/drivers/infiniband/hw/qedr/main.c
+++ b/drivers/infiniband/hw/qedr/main.c
@@ -276,7 +276,7 @@ static int qedr_alloc_resources(struct qedr_dev *dev)
 						   QED_CHAIN_CNT_TYPE_U16,
 						   n_entries,
 						   sizeof(struct regpair *),
-						   &cnq->pbl);
+						   &cnq->pbl, NULL);
 		if (rc)
 			goto err4;
 
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index 17685cfea6a2..80df89b5a9ce 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -925,7 +925,7 @@ struct ib_cq *qedr_create_cq(struct ib_device *ibdev,
 						   QED_CHAIN_CNT_TYPE_U32,
 						   chain_entries,
 						   sizeof(union rdma_cqe),
-						   &cq->pbl);
+						   &cq->pbl, NULL);
 		if (rc)
 			goto err1;
 
@@ -1413,7 +1413,7 @@ qedr_roce_create_kernel_qp(struct qedr_dev *dev,
 					   QED_CHAIN_CNT_TYPE_U32,
 					   n_sq_elems,
 					   QEDR_SQE_ELEMENT_SIZE,
-					   &qp->sq.pbl);
+					   &qp->sq.pbl, NULL);
 
 	if (rc)
 		return rc;
@@ -1427,7 +1427,7 @@ qedr_roce_create_kernel_qp(struct qedr_dev *dev,
 					   QED_CHAIN_CNT_TYPE_U32,
 					   n_rq_elems,
 					   QEDR_RQE_ELEMENT_SIZE,
-					   &qp->rq.pbl);
+					   &qp->rq.pbl, NULL);
 	if (rc)
 		return rc;
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index 65fe4940f20d..8b140541736a 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -3075,12 +3075,15 @@ static void qed_chain_free_pbl(struct qed_dev *cdev, struct qed_chain *p_chain)
 	}
 
 	pbl_size = page_cnt * QED_CHAIN_PBL_ENTRY_SIZE;
-	dma_free_coherent(&cdev->pdev->dev,
-			  pbl_size,
-			  p_chain->pbl_sp.p_virt_table,
-			  p_chain->pbl_sp.p_phys_table);
+
+	if (!p_chain->b_external_pbl)
+		dma_free_coherent(&cdev->pdev->dev,
+				  pbl_size,
+				  p_chain->pbl_sp.p_virt_table,
+				  p_chain->pbl_sp.p_phys_table);
 out:
 	vfree(p_chain->pbl.pp_virt_addr_tbl);
+	p_chain->pbl.pp_virt_addr_tbl = NULL;
 }
 
 void qed_chain_free(struct qed_dev *cdev, struct qed_chain *p_chain)
@@ -3174,7 +3177,10 @@ qed_chain_alloc_single(struct qed_dev *cdev, struct qed_chain *p_chain)
 	return 0;
 }
 
-static int qed_chain_alloc_pbl(struct qed_dev *cdev, struct qed_chain *p_chain)
+static int
+qed_chain_alloc_pbl(struct qed_dev *cdev,
+		    struct qed_chain *p_chain,
+		    struct qed_chain_ext_pbl *ext_pbl)
 {
 	u32 page_cnt = p_chain->page_cnt, size, i;
 	dma_addr_t p_phys = 0, p_pbl_phys = 0;
@@ -3194,8 +3200,16 @@ static int qed_chain_alloc_pbl(struct qed_dev *cdev, struct qed_chain *p_chain)
 	 * should be saved to allow its freeing during the error flow.
 	 */
 	size = page_cnt * QED_CHAIN_PBL_ENTRY_SIZE;
-	p_pbl_virt = dma_alloc_coherent(&cdev->pdev->dev,
-					size, &p_pbl_phys, GFP_KERNEL);
+
+	if (!ext_pbl) {
+		p_pbl_virt = dma_alloc_coherent(&cdev->pdev->dev,
+						size, &p_pbl_phys, GFP_KERNEL);
+	} else {
+		p_pbl_virt = ext_pbl->p_pbl_virt;
+		p_pbl_phys = ext_pbl->p_pbl_phys;
+		p_chain->b_external_pbl = true;
+	}
+
 	qed_chain_init_pbl_mem(p_chain, p_pbl_virt, p_pbl_phys,
 			       pp_virt_addr_tbl);
 	if (!p_pbl_virt)
@@ -3228,7 +3242,10 @@ int qed_chain_alloc(struct qed_dev *cdev,
 		    enum qed_chain_use_mode intended_use,
 		    enum qed_chain_mode mode,
 		    enum qed_chain_cnt_type cnt_type,
-		    u32 num_elems, size_t elem_size, struct qed_chain *p_chain)
+		    u32 num_elems,
+		    size_t elem_size,
+		    struct qed_chain *p_chain,
+		    struct qed_chain_ext_pbl *ext_pbl)
 {
 	u32 page_cnt;
 	int rc = 0;
@@ -3259,7 +3276,7 @@ int qed_chain_alloc(struct qed_dev *cdev,
 		rc = qed_chain_alloc_single(cdev, p_chain);
 		break;
 	case QED_CHAIN_MODE_PBL:
-		rc = qed_chain_alloc_pbl(cdev, p_chain);
+		rc = qed_chain_alloc_pbl(cdev, p_chain, ext_pbl);
 		break;
 	}
 	if (rc)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
index 12d16c096e36..1f1df1bf127c 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
@@ -307,6 +307,7 @@ int qed_dmae_host2host(struct qed_hwfn *p_hwfn,
  * @param num_elems
  * @param elem_size
  * @param p_chain
+ * @param ext_pbl - a possible external PBL
  *
  * @return int
  */
@@ -315,7 +316,9 @@ qed_chain_alloc(struct qed_dev *cdev,
 		enum qed_chain_use_mode intended_use,
 		enum qed_chain_mode mode,
 		enum qed_chain_cnt_type cnt_type,
-		u32 num_elems, size_t elem_size, struct qed_chain *p_chain);
+		u32 num_elems,
+		size_t elem_size,
+		struct qed_chain *p_chain, struct qed_chain_ext_pbl *ext_pbl);
 
 /**
  * @brief qed_chain_free - Free chain DMA memory
diff --git a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
index 6103723d7118..5a1ed05d0c5f 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
@@ -752,7 +752,7 @@ static int qed_iscsi_allocate_connection(struct qed_hwfn *p_hwfn,
 			     QED_CHAIN_USE_TO_CONSUME_PRODUCE,
 			     QED_CHAIN_MODE_PBL,
 			     QED_CHAIN_CNT_TYPE_U16,
-			     r2tq_num_elements, 0x80, &p_conn->r2tq);
+			     r2tq_num_elements, 0x80, &p_conn->r2tq, NULL);
 	if (rc)
 		goto nomem_r2tq;
 
@@ -763,7 +763,7 @@ static int qed_iscsi_allocate_connection(struct qed_hwfn *p_hwfn,
 			     QED_CHAIN_MODE_PBL,
 			     QED_CHAIN_CNT_TYPE_U16,
 			     uhq_num_elements,
-			     sizeof(struct iscsi_uhqe), &p_conn->uhq);
+			     sizeof(struct iscsi_uhqe), &p_conn->uhq, NULL);
 	if (rc)
 		goto nomem_uhq;
 
@@ -773,7 +773,7 @@ static int qed_iscsi_allocate_connection(struct qed_hwfn *p_hwfn,
 			     QED_CHAIN_MODE_PBL,
 			     QED_CHAIN_CNT_TYPE_U16,
 			     xhq_num_elements,
-			     sizeof(struct iscsi_xhqe), &p_conn->xhq);
+			     sizeof(struct iscsi_xhqe), &p_conn->xhq, NULL);
 	if (rc)
 		goto nomem;
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
index 0e26193156e4..f9c51cb8585e 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
@@ -1056,7 +1056,7 @@ qed_ll2_acquire_connection_rx(struct qed_hwfn *p_hwfn,
 			     QED_CHAIN_CNT_TYPE_U16,
 			     p_ll2_info->input.rx_num_desc,
 			     sizeof(struct core_rx_bd),
-			     &p_ll2_info->rx_queue.rxq_chain);
+			     &p_ll2_info->rx_queue.rxq_chain, NULL);
 	if (rc) {
 		DP_NOTICE(p_hwfn, "Failed to allocate ll2 rxq chain\n");
 		goto out;
@@ -1078,7 +1078,7 @@ qed_ll2_acquire_connection_rx(struct qed_hwfn *p_hwfn,
 			     QED_CHAIN_CNT_TYPE_U16,
 			     p_ll2_info->input.rx_num_desc,
 			     sizeof(struct core_rx_fast_path_cqe),
-			     &p_ll2_info->rx_queue.rcq_chain);
+			     &p_ll2_info->rx_queue.rcq_chain, NULL);
 	if (rc) {
 		DP_NOTICE(p_hwfn, "Failed to allocate ll2 rcq chain\n");
 		goto out;
@@ -1108,7 +1108,7 @@ static int qed_ll2_acquire_connection_tx(struct qed_hwfn *p_hwfn,
 			     QED_CHAIN_CNT_TYPE_U16,
 			     p_ll2_info->input.tx_num_desc,
 			     sizeof(struct core_tx_bd),
-			     &p_ll2_info->tx_queue.txq_chain);
+			     &p_ll2_info->tx_queue.txq_chain, NULL);
 	if (rc)
 		goto out;
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c
index dede73f41e61..a971916af7cc 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_spq.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c
@@ -419,7 +419,7 @@ int qed_eq_alloc(struct qed_hwfn *p_hwfn, u16 num_elem)
 			    QED_CHAIN_CNT_TYPE_U16,
 			    num_elem,
 			    sizeof(union event_ring_element),
-			    &p_eq->chain))
+			    &p_eq->chain, NULL))
 		goto eq_allocate_fail;
 
 	/* register EQ completion on the SP SB */
@@ -547,7 +547,7 @@ int qed_spq_alloc(struct qed_hwfn *p_hwfn)
 			    QED_CHAIN_CNT_TYPE_U16,
 			    0,   /* N/A when the mode is SINGLE */
 			    sizeof(struct slow_path_element),
-			    &p_spq->chain))
+			    &p_spq->chain, NULL))
 		goto spq_allocate_fail;
 
 	/* allocate and fill the SPQ elements (incl. ramrod data list) */
@@ -953,7 +953,7 @@ int qed_consq_alloc(struct qed_hwfn *p_hwfn)
 			    QED_CHAIN_MODE_PBL,
 			    QED_CHAIN_CNT_TYPE_U16,
 			    QED_CHAIN_PAGE_SIZE / 0x80,
-			    0x80, &p_consq->chain))
+			    0x80, &p_consq->chain, NULL))
 		goto consq_allocate_fail;
 
 	p_hwfn->p_consq = p_consq;
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index fdf04bc5406e..37ad79917770 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -1317,8 +1317,7 @@ static int qede_alloc_mem_rxq(struct qede_dev *edev, struct qede_rx_queue *rxq)
 					    QED_CHAIN_CNT_TYPE_U16,
 					    RX_RING_SIZE,
 					    sizeof(struct eth_rx_bd),
-					    &rxq->rx_bd_ring);
-
+					    &rxq->rx_bd_ring, NULL);
 	if (rc)
 		goto err;
 
@@ -1329,7 +1328,7 @@ static int qede_alloc_mem_rxq(struct qede_dev *edev, struct qede_rx_queue *rxq)
 					    QED_CHAIN_CNT_TYPE_U16,
 					    RX_RING_SIZE,
 					    sizeof(union eth_rx_cqe),
-					    &rxq->rx_comp_ring);
+					    &rxq->rx_comp_ring, NULL);
 	if (rc)
 		goto err;
 
@@ -1387,7 +1386,8 @@ static int qede_alloc_mem_txq(struct qede_dev *edev, struct qede_tx_queue *txq)
 					    QED_CHAIN_MODE_PBL,
 					    QED_CHAIN_CNT_TYPE_U16,
 					    txq->num_tx_buffers,
-					    sizeof(*p_virt), &txq->tx_pbl);
+					    sizeof(*p_virt),
+					    &txq->tx_pbl, NULL);
 	if (rc)
 		goto err;
 
diff --git a/include/linux/qed/qed_chain.h b/include/linux/qed/qed_chain.h
index 5cd7a4608c9b..59ddf9af909e 100644
--- a/include/linux/qed/qed_chain.h
+++ b/include/linux/qed/qed_chain.h
@@ -80,6 +80,11 @@ struct qed_chain_pbl_u32 {
 	u32 cons_page_idx;
 };
 
+struct qed_chain_ext_pbl {
+	dma_addr_t p_pbl_phys;
+	void *p_pbl_virt;
+};
+
 struct qed_chain_u16 {
 	/* Cyclic index of next element to produce/consme */
 	u16 prod_idx;
@@ -155,6 +160,8 @@ struct qed_chain {
 	u32 size;
 
 	u8 intended_use;
+
+	bool b_external_pbl;
 };
 
 #define QED_CHAIN_PBL_ENTRY_SIZE        (8)
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index 74f6b99754aa..ef39c7f40ae6 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -634,7 +634,8 @@ struct qed_common_ops {
 				       enum qed_chain_cnt_type cnt_type,
 				       u32 num_elems,
 				       size_t elem_size,
-				       struct qed_chain *p_chain);
+				       struct qed_chain *p_chain,
+				       struct qed_chain_ext_pbl *ext_pbl);
 
 	void		(*chain_free)(struct qed_dev *cdev,
 				      struct qed_chain *p_chain);
-- 
cgit v1.2.3


From b262a06e642cfb1eeb6c2c772f76dad674ada57e Mon Sep 17 00:00:00 2001
From: Michal Kalderon <Michal.Kalderon@cavium.com>
Date: Tue, 20 Jun 2017 16:00:03 +0300
Subject: qed*: qede_roce.[ch] -> qede_rdma.[ch]

Once we have iWARP support, the qede portion of the qedr<->qede would
serve all the RDMA protocols - so rename the file to be appropriate
to its function.

While we're at it, we're also moving a couple of inclusions to it into
.h files and adding includes to make sure it contains all type
definitions it requires.

Signed-off-by: Michal Kalderon <Michal.Kalderon@cavium.com>
Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/hw/qedr/main.c            |   2 +-
 drivers/infiniband/hw/qedr/qedr.h            |   2 +-
 drivers/net/ethernet/qlogic/qede/Makefile    |   2 +-
 drivers/net/ethernet/qlogic/qede/qede.h      |   1 +
 drivers/net/ethernet/qlogic/qede/qede_main.c |   1 -
 drivers/net/ethernet/qlogic/qede/qede_rdma.c | 314 +++++++++++++++++++++++++++
 drivers/net/ethernet/qlogic/qede/qede_roce.c | 314 ---------------------------
 include/linux/qed/qede_rdma.h                |  93 ++++++++
 include/linux/qed/qede_roce.h                |  88 --------
 9 files changed, 411 insertions(+), 406 deletions(-)
 create mode 100644 drivers/net/ethernet/qlogic/qede/qede_rdma.c
 delete mode 100644 drivers/net/ethernet/qlogic/qede/qede_roce.c
 create mode 100644 include/linux/qed/qede_rdma.h
 delete mode 100644 include/linux/qed/qede_roce.h

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c
index 5a32b802e4da..714eb0c92312 100644
--- a/drivers/infiniband/hw/qedr/main.c
+++ b/drivers/infiniband/hw/qedr/main.c
@@ -37,7 +37,7 @@
 #include <linux/iommu.h>
 #include <linux/pci.h>
 #include <net/addrconf.h>
-#include <linux/qed/qede_roce.h>
+
 #include <linux/qed/qed_chain.h>
 #include <linux/qed/qed_if.h>
 #include "qedr.h"
diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h
index 80333ec2c8b6..2376019a48ae 100644
--- a/drivers/infiniband/hw/qedr/qedr.h
+++ b/drivers/infiniband/hw/qedr/qedr.h
@@ -37,7 +37,7 @@
 #include <linux/qed/qed_if.h>
 #include <linux/qed/qed_chain.h>
 #include <linux/qed/qed_roce_if.h>
-#include <linux/qed/qede_roce.h>
+#include <linux/qed/qede_rdma.h>
 #include <linux/qed/roce_common.h>
 #include "qedr_hsi_rdma.h"
 
diff --git a/drivers/net/ethernet/qlogic/qede/Makefile b/drivers/net/ethernet/qlogic/qede/Makefile
index bc5f7c3b277d..75408fbb7680 100644
--- a/drivers/net/ethernet/qlogic/qede/Makefile
+++ b/drivers/net/ethernet/qlogic/qede/Makefile
@@ -2,4 +2,4 @@ obj-$(CONFIG_QEDE) := qede.o
 
 qede-y := qede_main.o qede_fp.o qede_filter.o qede_ethtool.o qede_ptp.o
 qede-$(CONFIG_DCB) += qede_dcbnl.o
-qede-$(CONFIG_QED_RDMA) += qede_roce.o
+qede-$(CONFIG_QED_RDMA) += qede_rdma.o
diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h
index 694c09b8997e..2d6b30c47b3a 100644
--- a/drivers/net/ethernet/qlogic/qede/qede.h
+++ b/drivers/net/ethernet/qlogic/qede/qede.h
@@ -40,6 +40,7 @@
 #include <linux/kernel.h>
 #include <linux/mutex.h>
 #include <linux/bpf.h>
+#include <linux/qed/qede_rdma.h>
 #include <linux/io.h>
 #ifdef CONFIG_RFS_ACCEL
 #include <linux/cpu_rmap.h>
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 37ad79917770..e9eaa38895e6 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -60,7 +60,6 @@
 #include <net/ip6_checksum.h>
 #include <linux/bitops.h>
 #include <linux/vmalloc.h>
-#include <linux/qed/qede_roce.h>
 #include "qede.h"
 #include "qede_ptp.h"
 
diff --git a/drivers/net/ethernet/qlogic/qede/qede_rdma.c b/drivers/net/ethernet/qlogic/qede/qede_rdma.c
new file mode 100644
index 000000000000..9837ee20cbae
--- /dev/null
+++ b/drivers/net/ethernet/qlogic/qede/qede_rdma.c
@@ -0,0 +1,314 @@
+/* QLogic qedr NIC Driver
+ * Copyright (c) 2015-2017  QLogic Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and /or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/qed/qede_rdma.h>
+#include "qede.h"
+
+static struct qedr_driver *qedr_drv;
+static LIST_HEAD(qedr_dev_list);
+static DEFINE_MUTEX(qedr_dev_list_lock);
+
+bool qede_roce_supported(struct qede_dev *dev)
+{
+	return dev->dev_info.common.rdma_supported;
+}
+
+static void _qede_roce_dev_add(struct qede_dev *edev)
+{
+	if (!qedr_drv)
+		return;
+
+	edev->rdma_info.qedr_dev = qedr_drv->add(edev->cdev, edev->pdev,
+						 edev->ndev);
+}
+
+static int qede_roce_create_wq(struct qede_dev *edev)
+{
+	INIT_LIST_HEAD(&edev->rdma_info.roce_event_list);
+	edev->rdma_info.roce_wq = create_singlethread_workqueue("roce_wq");
+	if (!edev->rdma_info.roce_wq) {
+		DP_NOTICE(edev, "qedr: Could not create workqueue\n");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void qede_roce_cleanup_event(struct qede_dev *edev)
+{
+	struct list_head *head = &edev->rdma_info.roce_event_list;
+	struct qede_roce_event_work *event_node;
+
+	flush_workqueue(edev->rdma_info.roce_wq);
+	while (!list_empty(head)) {
+		event_node = list_entry(head->next, struct qede_roce_event_work,
+					list);
+		cancel_work_sync(&event_node->work);
+		list_del(&event_node->list);
+		kfree(event_node);
+	}
+}
+
+static void qede_roce_destroy_wq(struct qede_dev *edev)
+{
+	qede_roce_cleanup_event(edev);
+	destroy_workqueue(edev->rdma_info.roce_wq);
+}
+
+int qede_roce_dev_add(struct qede_dev *edev)
+{
+	int rc = 0;
+
+	if (qede_roce_supported(edev)) {
+		rc = qede_roce_create_wq(edev);
+		if (rc)
+			return rc;
+
+		INIT_LIST_HEAD(&edev->rdma_info.entry);
+		mutex_lock(&qedr_dev_list_lock);
+		list_add_tail(&edev->rdma_info.entry, &qedr_dev_list);
+		_qede_roce_dev_add(edev);
+		mutex_unlock(&qedr_dev_list_lock);
+	}
+
+	return rc;
+}
+
+static void _qede_roce_dev_remove(struct qede_dev *edev)
+{
+	if (qedr_drv && qedr_drv->remove && edev->rdma_info.qedr_dev)
+		qedr_drv->remove(edev->rdma_info.qedr_dev);
+	edev->rdma_info.qedr_dev = NULL;
+}
+
+void qede_roce_dev_remove(struct qede_dev *edev)
+{
+	if (!qede_roce_supported(edev))
+		return;
+
+	qede_roce_destroy_wq(edev);
+	mutex_lock(&qedr_dev_list_lock);
+	_qede_roce_dev_remove(edev);
+	list_del(&edev->rdma_info.entry);
+	mutex_unlock(&qedr_dev_list_lock);
+}
+
+static void _qede_roce_dev_open(struct qede_dev *edev)
+{
+	if (qedr_drv && edev->rdma_info.qedr_dev && qedr_drv->notify)
+		qedr_drv->notify(edev->rdma_info.qedr_dev, QEDE_UP);
+}
+
+static void qede_roce_dev_open(struct qede_dev *edev)
+{
+	if (!qede_roce_supported(edev))
+		return;
+
+	mutex_lock(&qedr_dev_list_lock);
+	_qede_roce_dev_open(edev);
+	mutex_unlock(&qedr_dev_list_lock);
+}
+
+static void _qede_roce_dev_close(struct qede_dev *edev)
+{
+	if (qedr_drv && edev->rdma_info.qedr_dev && qedr_drv->notify)
+		qedr_drv->notify(edev->rdma_info.qedr_dev, QEDE_DOWN);
+}
+
+static void qede_roce_dev_close(struct qede_dev *edev)
+{
+	if (!qede_roce_supported(edev))
+		return;
+
+	mutex_lock(&qedr_dev_list_lock);
+	_qede_roce_dev_close(edev);
+	mutex_unlock(&qedr_dev_list_lock);
+}
+
+static void qede_roce_dev_shutdown(struct qede_dev *edev)
+{
+	if (!qede_roce_supported(edev))
+		return;
+
+	mutex_lock(&qedr_dev_list_lock);
+	if (qedr_drv && edev->rdma_info.qedr_dev && qedr_drv->notify)
+		qedr_drv->notify(edev->rdma_info.qedr_dev, QEDE_CLOSE);
+	mutex_unlock(&qedr_dev_list_lock);
+}
+
+int qede_roce_register_driver(struct qedr_driver *drv)
+{
+	struct qede_dev *edev;
+	u8 qedr_counter = 0;
+
+	mutex_lock(&qedr_dev_list_lock);
+	if (qedr_drv) {
+		mutex_unlock(&qedr_dev_list_lock);
+		return -EINVAL;
+	}
+	qedr_drv = drv;
+
+	list_for_each_entry(edev, &qedr_dev_list, rdma_info.entry) {
+		struct net_device *ndev;
+
+		qedr_counter++;
+		_qede_roce_dev_add(edev);
+		ndev = edev->ndev;
+		if (netif_running(ndev) && netif_oper_up(ndev))
+			_qede_roce_dev_open(edev);
+	}
+	mutex_unlock(&qedr_dev_list_lock);
+
+	pr_notice("qedr: discovered and registered %d RoCE funcs\n",
+		  qedr_counter);
+
+	return 0;
+}
+EXPORT_SYMBOL(qede_roce_register_driver);
+
+void qede_roce_unregister_driver(struct qedr_driver *drv)
+{
+	struct qede_dev *edev;
+
+	mutex_lock(&qedr_dev_list_lock);
+	list_for_each_entry(edev, &qedr_dev_list, rdma_info.entry) {
+		if (edev->rdma_info.qedr_dev)
+			_qede_roce_dev_remove(edev);
+	}
+	qedr_drv = NULL;
+	mutex_unlock(&qedr_dev_list_lock);
+}
+EXPORT_SYMBOL(qede_roce_unregister_driver);
+
+static void qede_roce_changeaddr(struct qede_dev *edev)
+{
+	if (!qede_roce_supported(edev))
+		return;
+
+	if (qedr_drv && edev->rdma_info.qedr_dev && qedr_drv->notify)
+		qedr_drv->notify(edev->rdma_info.qedr_dev, QEDE_CHANGE_ADDR);
+}
+
+static struct qede_roce_event_work *
+qede_roce_get_free_event_node(struct qede_dev *edev)
+{
+	struct qede_roce_event_work *event_node = NULL;
+	struct list_head *list_node = NULL;
+	bool found = false;
+
+	list_for_each(list_node, &edev->rdma_info.roce_event_list) {
+		event_node = list_entry(list_node, struct qede_roce_event_work,
+					list);
+		if (!work_pending(&event_node->work)) {
+			found = true;
+			break;
+		}
+	}
+
+	if (!found) {
+		event_node = kzalloc(sizeof(*event_node), GFP_KERNEL);
+		if (!event_node) {
+			DP_NOTICE(edev,
+				  "qedr: Could not allocate memory for roce work\n");
+			return NULL;
+		}
+		list_add_tail(&event_node->list,
+			      &edev->rdma_info.roce_event_list);
+	}
+
+	return event_node;
+}
+
+static void qede_roce_handle_event(struct work_struct *work)
+{
+	struct qede_roce_event_work *event_node;
+	enum qede_roce_event event;
+	struct qede_dev *edev;
+
+	event_node = container_of(work, struct qede_roce_event_work, work);
+	event = event_node->event;
+	edev = event_node->ptr;
+
+	switch (event) {
+	case QEDE_UP:
+		qede_roce_dev_open(edev);
+		break;
+	case QEDE_DOWN:
+		qede_roce_dev_close(edev);
+		break;
+	case QEDE_CLOSE:
+		qede_roce_dev_shutdown(edev);
+		break;
+	case QEDE_CHANGE_ADDR:
+		qede_roce_changeaddr(edev);
+		break;
+	default:
+		DP_NOTICE(edev, "Invalid roce event %d", event);
+	}
+}
+
+static void qede_roce_add_event(struct qede_dev *edev,
+				enum qede_roce_event event)
+{
+	struct qede_roce_event_work *event_node;
+
+	if (!edev->rdma_info.qedr_dev)
+		return;
+
+	event_node = qede_roce_get_free_event_node(edev);
+	if (!event_node)
+		return;
+
+	event_node->event = event;
+	event_node->ptr = edev;
+
+	INIT_WORK(&event_node->work, qede_roce_handle_event);
+	queue_work(edev->rdma_info.roce_wq, &event_node->work);
+}
+
+void qede_roce_dev_event_open(struct qede_dev *edev)
+{
+	qede_roce_add_event(edev, QEDE_UP);
+}
+
+void qede_roce_dev_event_close(struct qede_dev *edev)
+{
+	qede_roce_add_event(edev, QEDE_DOWN);
+}
+
+void qede_roce_event_changeaddr(struct qede_dev *edev)
+{
+	qede_roce_add_event(edev, QEDE_CHANGE_ADDR);
+}
diff --git a/drivers/net/ethernet/qlogic/qede/qede_roce.c b/drivers/net/ethernet/qlogic/qede/qede_roce.c
deleted file mode 100644
index c0030fb8d842..000000000000
--- a/drivers/net/ethernet/qlogic/qede/qede_roce.c
+++ /dev/null
@@ -1,314 +0,0 @@
-/* QLogic qedr NIC Driver
- * Copyright (c) 2015-2017  QLogic Corporation
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and /or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <linux/pci.h>
-#include <linux/netdevice.h>
-#include <linux/list.h>
-#include <linux/mutex.h>
-#include <linux/qed/qede_roce.h>
-#include "qede.h"
-
-static struct qedr_driver *qedr_drv;
-static LIST_HEAD(qedr_dev_list);
-static DEFINE_MUTEX(qedr_dev_list_lock);
-
-bool qede_roce_supported(struct qede_dev *dev)
-{
-	return dev->dev_info.common.rdma_supported;
-}
-
-static void _qede_roce_dev_add(struct qede_dev *edev)
-{
-	if (!qedr_drv)
-		return;
-
-	edev->rdma_info.qedr_dev = qedr_drv->add(edev->cdev, edev->pdev,
-						 edev->ndev);
-}
-
-static int qede_roce_create_wq(struct qede_dev *edev)
-{
-	INIT_LIST_HEAD(&edev->rdma_info.roce_event_list);
-	edev->rdma_info.roce_wq = create_singlethread_workqueue("roce_wq");
-	if (!edev->rdma_info.roce_wq) {
-		DP_NOTICE(edev, "qedr: Could not create workqueue\n");
-		return -ENOMEM;
-	}
-
-	return 0;
-}
-
-static void qede_roce_cleanup_event(struct qede_dev *edev)
-{
-	struct list_head *head = &edev->rdma_info.roce_event_list;
-	struct qede_roce_event_work *event_node;
-
-	flush_workqueue(edev->rdma_info.roce_wq);
-	while (!list_empty(head)) {
-		event_node = list_entry(head->next, struct qede_roce_event_work,
-					list);
-		cancel_work_sync(&event_node->work);
-		list_del(&event_node->list);
-		kfree(event_node);
-	}
-}
-
-static void qede_roce_destroy_wq(struct qede_dev *edev)
-{
-	qede_roce_cleanup_event(edev);
-	destroy_workqueue(edev->rdma_info.roce_wq);
-}
-
-int qede_roce_dev_add(struct qede_dev *edev)
-{
-	int rc = 0;
-
-	if (qede_roce_supported(edev)) {
-		rc = qede_roce_create_wq(edev);
-		if (rc)
-			return rc;
-
-		INIT_LIST_HEAD(&edev->rdma_info.entry);
-		mutex_lock(&qedr_dev_list_lock);
-		list_add_tail(&edev->rdma_info.entry, &qedr_dev_list);
-		_qede_roce_dev_add(edev);
-		mutex_unlock(&qedr_dev_list_lock);
-	}
-
-	return rc;
-}
-
-static void _qede_roce_dev_remove(struct qede_dev *edev)
-{
-	if (qedr_drv && qedr_drv->remove && edev->rdma_info.qedr_dev)
-		qedr_drv->remove(edev->rdma_info.qedr_dev);
-	edev->rdma_info.qedr_dev = NULL;
-}
-
-void qede_roce_dev_remove(struct qede_dev *edev)
-{
-	if (!qede_roce_supported(edev))
-		return;
-
-	qede_roce_destroy_wq(edev);
-	mutex_lock(&qedr_dev_list_lock);
-	_qede_roce_dev_remove(edev);
-	list_del(&edev->rdma_info.entry);
-	mutex_unlock(&qedr_dev_list_lock);
-}
-
-static void _qede_roce_dev_open(struct qede_dev *edev)
-{
-	if (qedr_drv && edev->rdma_info.qedr_dev && qedr_drv->notify)
-		qedr_drv->notify(edev->rdma_info.qedr_dev, QEDE_UP);
-}
-
-static void qede_roce_dev_open(struct qede_dev *edev)
-{
-	if (!qede_roce_supported(edev))
-		return;
-
-	mutex_lock(&qedr_dev_list_lock);
-	_qede_roce_dev_open(edev);
-	mutex_unlock(&qedr_dev_list_lock);
-}
-
-static void _qede_roce_dev_close(struct qede_dev *edev)
-{
-	if (qedr_drv && edev->rdma_info.qedr_dev && qedr_drv->notify)
-		qedr_drv->notify(edev->rdma_info.qedr_dev, QEDE_DOWN);
-}
-
-static void qede_roce_dev_close(struct qede_dev *edev)
-{
-	if (!qede_roce_supported(edev))
-		return;
-
-	mutex_lock(&qedr_dev_list_lock);
-	_qede_roce_dev_close(edev);
-	mutex_unlock(&qedr_dev_list_lock);
-}
-
-static void qede_roce_dev_shutdown(struct qede_dev *edev)
-{
-	if (!qede_roce_supported(edev))
-		return;
-
-	mutex_lock(&qedr_dev_list_lock);
-	if (qedr_drv && edev->rdma_info.qedr_dev && qedr_drv->notify)
-		qedr_drv->notify(edev->rdma_info.qedr_dev, QEDE_CLOSE);
-	mutex_unlock(&qedr_dev_list_lock);
-}
-
-int qede_roce_register_driver(struct qedr_driver *drv)
-{
-	struct qede_dev *edev;
-	u8 qedr_counter = 0;
-
-	mutex_lock(&qedr_dev_list_lock);
-	if (qedr_drv) {
-		mutex_unlock(&qedr_dev_list_lock);
-		return -EINVAL;
-	}
-	qedr_drv = drv;
-
-	list_for_each_entry(edev, &qedr_dev_list, rdma_info.entry) {
-		struct net_device *ndev;
-
-		qedr_counter++;
-		_qede_roce_dev_add(edev);
-		ndev = edev->ndev;
-		if (netif_running(ndev) && netif_oper_up(ndev))
-			_qede_roce_dev_open(edev);
-	}
-	mutex_unlock(&qedr_dev_list_lock);
-
-	pr_notice("qedr: discovered and registered %d RoCE funcs\n",
-		  qedr_counter);
-
-	return 0;
-}
-EXPORT_SYMBOL(qede_roce_register_driver);
-
-void qede_roce_unregister_driver(struct qedr_driver *drv)
-{
-	struct qede_dev *edev;
-
-	mutex_lock(&qedr_dev_list_lock);
-	list_for_each_entry(edev, &qedr_dev_list, rdma_info.entry) {
-		if (edev->rdma_info.qedr_dev)
-			_qede_roce_dev_remove(edev);
-	}
-	qedr_drv = NULL;
-	mutex_unlock(&qedr_dev_list_lock);
-}
-EXPORT_SYMBOL(qede_roce_unregister_driver);
-
-static void qede_roce_changeaddr(struct qede_dev *edev)
-{
-	if (!qede_roce_supported(edev))
-		return;
-
-	if (qedr_drv && edev->rdma_info.qedr_dev && qedr_drv->notify)
-		qedr_drv->notify(edev->rdma_info.qedr_dev, QEDE_CHANGE_ADDR);
-}
-
-static struct qede_roce_event_work *
-qede_roce_get_free_event_node(struct qede_dev *edev)
-{
-	struct qede_roce_event_work *event_node = NULL;
-	struct list_head *list_node = NULL;
-	bool found = false;
-
-	list_for_each(list_node, &edev->rdma_info.roce_event_list) {
-		event_node = list_entry(list_node, struct qede_roce_event_work,
-					list);
-		if (!work_pending(&event_node->work)) {
-			found = true;
-			break;
-		}
-	}
-
-	if (!found) {
-		event_node = kzalloc(sizeof(*event_node), GFP_KERNEL);
-		if (!event_node) {
-			DP_NOTICE(edev,
-				  "qedr: Could not allocate memory for roce work\n");
-			return NULL;
-		}
-		list_add_tail(&event_node->list,
-			      &edev->rdma_info.roce_event_list);
-	}
-
-	return event_node;
-}
-
-static void qede_roce_handle_event(struct work_struct *work)
-{
-	struct qede_roce_event_work *event_node;
-	enum qede_roce_event event;
-	struct qede_dev *edev;
-
-	event_node = container_of(work, struct qede_roce_event_work, work);
-	event = event_node->event;
-	edev = event_node->ptr;
-
-	switch (event) {
-	case QEDE_UP:
-		qede_roce_dev_open(edev);
-		break;
-	case QEDE_DOWN:
-		qede_roce_dev_close(edev);
-		break;
-	case QEDE_CLOSE:
-		qede_roce_dev_shutdown(edev);
-		break;
-	case QEDE_CHANGE_ADDR:
-		qede_roce_changeaddr(edev);
-		break;
-	default:
-		DP_NOTICE(edev, "Invalid roce event %d", event);
-	}
-}
-
-static void qede_roce_add_event(struct qede_dev *edev,
-				enum qede_roce_event event)
-{
-	struct qede_roce_event_work *event_node;
-
-	if (!edev->rdma_info.qedr_dev)
-		return;
-
-	event_node = qede_roce_get_free_event_node(edev);
-	if (!event_node)
-		return;
-
-	event_node->event = event;
-	event_node->ptr = edev;
-
-	INIT_WORK(&event_node->work, qede_roce_handle_event);
-	queue_work(edev->rdma_info.roce_wq, &event_node->work);
-}
-
-void qede_roce_dev_event_open(struct qede_dev *edev)
-{
-	qede_roce_add_event(edev, QEDE_UP);
-}
-
-void qede_roce_dev_event_close(struct qede_dev *edev)
-{
-	qede_roce_add_event(edev, QEDE_DOWN);
-}
-
-void qede_roce_event_changeaddr(struct qede_dev *edev)
-{
-	qede_roce_add_event(edev, QEDE_CHANGE_ADDR);
-}
diff --git a/include/linux/qed/qede_rdma.h b/include/linux/qed/qede_rdma.h
new file mode 100644
index 000000000000..a1a9b81f7612
--- /dev/null
+++ b/include/linux/qed/qede_rdma.h
@@ -0,0 +1,93 @@
+/* QLogic qedr NIC Driver
+ * Copyright (c) 2015-2017  QLogic Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and /or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef QEDE_ROCE_H
+#define QEDE_ROCE_H
+
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+
+struct qedr_dev;
+struct qed_dev;
+struct qede_dev;
+
+enum qede_roce_event {
+	QEDE_UP,
+	QEDE_DOWN,
+	QEDE_CHANGE_ADDR,
+	QEDE_CLOSE
+};
+
+struct qede_roce_event_work {
+	struct list_head list;
+	struct work_struct work;
+	void *ptr;
+	enum qede_roce_event event;
+};
+
+struct qedr_driver {
+	unsigned char name[32];
+
+	struct qedr_dev* (*add)(struct qed_dev *, struct pci_dev *,
+				struct net_device *);
+
+	void (*remove)(struct qedr_dev *);
+	void (*notify)(struct qedr_dev *, enum qede_roce_event);
+};
+
+/* APIs for RoCE driver to register callback handlers,
+ * which will be invoked when device is added, removed, ifup, ifdown
+ */
+int qede_roce_register_driver(struct qedr_driver *drv);
+void qede_roce_unregister_driver(struct qedr_driver *drv);
+
+bool qede_roce_supported(struct qede_dev *dev);
+
+#if IS_ENABLED(CONFIG_QED_RDMA)
+int qede_roce_dev_add(struct qede_dev *dev);
+void qede_roce_dev_event_open(struct qede_dev *dev);
+void qede_roce_dev_event_close(struct qede_dev *dev);
+void qede_roce_dev_remove(struct qede_dev *dev);
+void qede_roce_event_changeaddr(struct qede_dev *qedr);
+#else
+static inline int qede_roce_dev_add(struct qede_dev *dev)
+{
+	return 0;
+}
+
+static inline void qede_roce_dev_event_open(struct qede_dev *dev) {}
+static inline void qede_roce_dev_event_close(struct qede_dev *dev) {}
+static inline void qede_roce_dev_remove(struct qede_dev *dev) {}
+static inline void qede_roce_event_changeaddr(struct qede_dev *qedr) {}
+#endif
+#endif
diff --git a/include/linux/qed/qede_roce.h b/include/linux/qed/qede_roce.h
deleted file mode 100644
index 3b8dd551a98c..000000000000
--- a/include/linux/qed/qede_roce.h
+++ /dev/null
@@ -1,88 +0,0 @@
-/* QLogic qedr NIC Driver
- * Copyright (c) 2015-2017  QLogic Corporation
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and /or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef QEDE_ROCE_H
-#define QEDE_ROCE_H
-
-struct qedr_dev;
-struct qed_dev;
-struct qede_dev;
-
-enum qede_roce_event {
-	QEDE_UP,
-	QEDE_DOWN,
-	QEDE_CHANGE_ADDR,
-	QEDE_CLOSE
-};
-
-struct qede_roce_event_work {
-	struct list_head list;
-	struct work_struct work;
-	void *ptr;
-	enum qede_roce_event event;
-};
-
-struct qedr_driver {
-	unsigned char name[32];
-
-	struct qedr_dev* (*add)(struct qed_dev *, struct pci_dev *,
-				struct net_device *);
-
-	void (*remove)(struct qedr_dev *);
-	void (*notify)(struct qedr_dev *, enum qede_roce_event);
-};
-
-/* APIs for RoCE driver to register callback handlers,
- * which will be invoked when device is added, removed, ifup, ifdown
- */
-int qede_roce_register_driver(struct qedr_driver *drv);
-void qede_roce_unregister_driver(struct qedr_driver *drv);
-
-bool qede_roce_supported(struct qede_dev *dev);
-
-#if IS_ENABLED(CONFIG_QED_RDMA)
-int qede_roce_dev_add(struct qede_dev *dev);
-void qede_roce_dev_event_open(struct qede_dev *dev);
-void qede_roce_dev_event_close(struct qede_dev *dev);
-void qede_roce_dev_remove(struct qede_dev *dev);
-void qede_roce_event_changeaddr(struct qede_dev *qedr);
-#else
-static inline int qede_roce_dev_add(struct qede_dev *dev)
-{
-	return 0;
-}
-
-static inline void qede_roce_dev_event_open(struct qede_dev *dev) {}
-static inline void qede_roce_dev_event_close(struct qede_dev *dev) {}
-static inline void qede_roce_dev_remove(struct qede_dev *dev) {}
-static inline void qede_roce_event_changeaddr(struct qede_dev *qedr) {}
-#endif
-#endif
-- 
cgit v1.2.3


From bbfcd1e8e1677b1e692144c5709945e1dfe1ed30 Mon Sep 17 00:00:00 2001
From: Michal Kalderon <Michal.Kalderon@cavium.com>
Date: Tue, 20 Jun 2017 16:00:04 +0300
Subject: qed*: Set rdma generic functions prefix

Rename the functions common to both iWARP and RoCE to have a prefix of
_rdma_ instead of _roce_.

Signed-off-by: Michal Kalderon <Michal.Kalderon@cavium.com>
Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/hw/qedr/main.c            |   6 +-
 drivers/net/ethernet/qlogic/qede/qede.h      |   4 +-
 drivers/net/ethernet/qlogic/qede/qede_main.c |  12 +--
 drivers/net/ethernet/qlogic/qede/qede_rdma.c | 142 +++++++++++++--------------
 include/linux/qed/qede_rdma.h                |  37 +++----
 5 files changed, 101 insertions(+), 100 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c
index 714eb0c92312..b5851fd67d4f 100644
--- a/drivers/infiniband/hw/qedr/main.c
+++ b/drivers/infiniband/hw/qedr/main.c
@@ -902,7 +902,7 @@ static void qedr_mac_address_change(struct qedr_dev *dev)
  * initialization done before RoCE driver notifies
  * event to stack.
  */
-static void qedr_notify(struct qedr_dev *dev, enum qede_roce_event event)
+static void qedr_notify(struct qedr_dev *dev, enum qede_rdma_event event)
 {
 	switch (event) {
 	case QEDE_UP:
@@ -931,12 +931,12 @@ static struct qedr_driver qedr_drv = {
 
 static int __init qedr_init_module(void)
 {
-	return qede_roce_register_driver(&qedr_drv);
+	return qede_rdma_register_driver(&qedr_drv);
 }
 
 static void __exit qedr_exit_module(void)
 {
-	qede_roce_unregister_driver(&qedr_drv);
+	qede_rdma_unregister_driver(&qedr_drv);
 }
 
 module_init(qedr_init_module);
diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h
index 2d6b30c47b3a..4dfb238221f9 100644
--- a/drivers/net/ethernet/qlogic/qede/qede.h
+++ b/drivers/net/ethernet/qlogic/qede/qede.h
@@ -154,8 +154,8 @@ struct qede_vlan {
 struct qede_rdma_dev {
 	struct qedr_dev *qedr_dev;
 	struct list_head entry;
-	struct list_head roce_event_list;
-	struct workqueue_struct *roce_wq;
+	struct list_head rdma_event_list;
+	struct workqueue_struct *rdma_wq;
 };
 
 struct qede_ptp;
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index e9eaa38895e6..06ca13dd9ddb 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -262,7 +262,7 @@ static int qede_netdev_event(struct notifier_block *this, unsigned long event,
 		break;
 	case NETDEV_CHANGEADDR:
 		edev = netdev_priv(ndev);
-		qede_roce_event_changeaddr(edev);
+		qede_rdma_event_changeaddr(edev);
 		break;
 	}
 
@@ -977,7 +977,7 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
 
 	qede_init_ndev(edev);
 
-	rc = qede_roce_dev_add(edev);
+	rc = qede_rdma_dev_add(edev);
 	if (rc)
 		goto err3;
 
@@ -1013,7 +1013,7 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
 	return 0;
 
 err4:
-	qede_roce_dev_remove(edev);
+	qede_rdma_dev_remove(edev);
 err3:
 	free_netdev(edev->ndev);
 err2:
@@ -1064,7 +1064,7 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode)
 
 	qede_ptp_disable(edev);
 
-	qede_roce_dev_remove(edev);
+	qede_rdma_dev_remove(edev);
 
 	edev->ops->common->set_power_state(cdev, PCI_D0);
 
@@ -1964,7 +1964,7 @@ static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode,
 
 	edev->state = QEDE_STATE_CLOSED;
 
-	qede_roce_dev_event_close(edev);
+	qede_rdma_dev_event_close(edev);
 
 	/* Close OS Tx */
 	netif_tx_disable(edev->ndev);
@@ -2069,7 +2069,7 @@ static int qede_load(struct qede_dev *edev, enum qede_load_mode mode,
 	link_params.link_up = true;
 	edev->ops->common->set_link(edev->cdev, &link_params);
 
-	qede_roce_dev_event_open(edev);
+	qede_rdma_dev_event_open(edev);
 
 	edev->state = QEDE_STATE_OPEN;
 
diff --git a/drivers/net/ethernet/qlogic/qede/qede_rdma.c b/drivers/net/ethernet/qlogic/qede/qede_rdma.c
index 9837ee20cbae..50b142fad6b8 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_rdma.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_rdma.c
@@ -40,12 +40,12 @@ static struct qedr_driver *qedr_drv;
 static LIST_HEAD(qedr_dev_list);
 static DEFINE_MUTEX(qedr_dev_list_lock);
 
-bool qede_roce_supported(struct qede_dev *dev)
+bool qede_rdma_supported(struct qede_dev *dev)
 {
 	return dev->dev_info.common.rdma_supported;
 }
 
-static void _qede_roce_dev_add(struct qede_dev *edev)
+static void _qede_rdma_dev_add(struct qede_dev *edev)
 {
 	if (!qedr_drv)
 		return;
@@ -54,11 +54,11 @@ static void _qede_roce_dev_add(struct qede_dev *edev)
 						 edev->ndev);
 }
 
-static int qede_roce_create_wq(struct qede_dev *edev)
+static int qede_rdma_create_wq(struct qede_dev *edev)
 {
-	INIT_LIST_HEAD(&edev->rdma_info.roce_event_list);
-	edev->rdma_info.roce_wq = create_singlethread_workqueue("roce_wq");
-	if (!edev->rdma_info.roce_wq) {
+	INIT_LIST_HEAD(&edev->rdma_info.rdma_event_list);
+	edev->rdma_info.rdma_wq = create_singlethread_workqueue("rdma_wq");
+	if (!edev->rdma_info.rdma_wq) {
 		DP_NOTICE(edev, "qedr: Could not create workqueue\n");
 		return -ENOMEM;
 	}
@@ -66,14 +66,14 @@ static int qede_roce_create_wq(struct qede_dev *edev)
 	return 0;
 }
 
-static void qede_roce_cleanup_event(struct qede_dev *edev)
+static void qede_rdma_cleanup_event(struct qede_dev *edev)
 {
-	struct list_head *head = &edev->rdma_info.roce_event_list;
-	struct qede_roce_event_work *event_node;
+	struct list_head *head = &edev->rdma_info.rdma_event_list;
+	struct qede_rdma_event_work *event_node;
 
-	flush_workqueue(edev->rdma_info.roce_wq);
+	flush_workqueue(edev->rdma_info.rdma_wq);
 	while (!list_empty(head)) {
-		event_node = list_entry(head->next, struct qede_roce_event_work,
+		event_node = list_entry(head->next, struct qede_rdma_event_work,
 					list);
 		cancel_work_sync(&event_node->work);
 		list_del(&event_node->list);
@@ -81,85 +81,85 @@ static void qede_roce_cleanup_event(struct qede_dev *edev)
 	}
 }
 
-static void qede_roce_destroy_wq(struct qede_dev *edev)
+static void qede_rdma_destroy_wq(struct qede_dev *edev)
 {
-	qede_roce_cleanup_event(edev);
-	destroy_workqueue(edev->rdma_info.roce_wq);
+	qede_rdma_cleanup_event(edev);
+	destroy_workqueue(edev->rdma_info.rdma_wq);
 }
 
-int qede_roce_dev_add(struct qede_dev *edev)
+int qede_rdma_dev_add(struct qede_dev *edev)
 {
 	int rc = 0;
 
-	if (qede_roce_supported(edev)) {
-		rc = qede_roce_create_wq(edev);
+	if (qede_rdma_supported(edev)) {
+		rc = qede_rdma_create_wq(edev);
 		if (rc)
 			return rc;
 
 		INIT_LIST_HEAD(&edev->rdma_info.entry);
 		mutex_lock(&qedr_dev_list_lock);
 		list_add_tail(&edev->rdma_info.entry, &qedr_dev_list);
-		_qede_roce_dev_add(edev);
+		_qede_rdma_dev_add(edev);
 		mutex_unlock(&qedr_dev_list_lock);
 	}
 
 	return rc;
 }
 
-static void _qede_roce_dev_remove(struct qede_dev *edev)
+static void _qede_rdma_dev_remove(struct qede_dev *edev)
 {
 	if (qedr_drv && qedr_drv->remove && edev->rdma_info.qedr_dev)
 		qedr_drv->remove(edev->rdma_info.qedr_dev);
 	edev->rdma_info.qedr_dev = NULL;
 }
 
-void qede_roce_dev_remove(struct qede_dev *edev)
+void qede_rdma_dev_remove(struct qede_dev *edev)
 {
-	if (!qede_roce_supported(edev))
+	if (!qede_rdma_supported(edev))
 		return;
 
-	qede_roce_destroy_wq(edev);
+	qede_rdma_destroy_wq(edev);
 	mutex_lock(&qedr_dev_list_lock);
-	_qede_roce_dev_remove(edev);
+	_qede_rdma_dev_remove(edev);
 	list_del(&edev->rdma_info.entry);
 	mutex_unlock(&qedr_dev_list_lock);
 }
 
-static void _qede_roce_dev_open(struct qede_dev *edev)
+static void _qede_rdma_dev_open(struct qede_dev *edev)
 {
 	if (qedr_drv && edev->rdma_info.qedr_dev && qedr_drv->notify)
 		qedr_drv->notify(edev->rdma_info.qedr_dev, QEDE_UP);
 }
 
-static void qede_roce_dev_open(struct qede_dev *edev)
+static void qede_rdma_dev_open(struct qede_dev *edev)
 {
-	if (!qede_roce_supported(edev))
+	if (!qede_rdma_supported(edev))
 		return;
 
 	mutex_lock(&qedr_dev_list_lock);
-	_qede_roce_dev_open(edev);
+	_qede_rdma_dev_open(edev);
 	mutex_unlock(&qedr_dev_list_lock);
 }
 
-static void _qede_roce_dev_close(struct qede_dev *edev)
+static void _qede_rdma_dev_close(struct qede_dev *edev)
 {
 	if (qedr_drv && edev->rdma_info.qedr_dev && qedr_drv->notify)
 		qedr_drv->notify(edev->rdma_info.qedr_dev, QEDE_DOWN);
 }
 
-static void qede_roce_dev_close(struct qede_dev *edev)
+static void qede_rdma_dev_close(struct qede_dev *edev)
 {
-	if (!qede_roce_supported(edev))
+	if (!qede_rdma_supported(edev))
 		return;
 
 	mutex_lock(&qedr_dev_list_lock);
-	_qede_roce_dev_close(edev);
+	_qede_rdma_dev_close(edev);
 	mutex_unlock(&qedr_dev_list_lock);
 }
 
-static void qede_roce_dev_shutdown(struct qede_dev *edev)
+static void qede_rdma_dev_shutdown(struct qede_dev *edev)
 {
-	if (!qede_roce_supported(edev))
+	if (!qede_rdma_supported(edev))
 		return;
 
 	mutex_lock(&qedr_dev_list_lock);
@@ -168,7 +168,7 @@ static void qede_roce_dev_shutdown(struct qede_dev *edev)
 	mutex_unlock(&qedr_dev_list_lock);
 }
 
-int qede_roce_register_driver(struct qedr_driver *drv)
+int qede_rdma_register_driver(struct qedr_driver *drv)
 {
 	struct qede_dev *edev;
 	u8 qedr_counter = 0;
@@ -184,52 +184,52 @@ int qede_roce_register_driver(struct qedr_driver *drv)
 		struct net_device *ndev;
 
 		qedr_counter++;
-		_qede_roce_dev_add(edev);
+		_qede_rdma_dev_add(edev);
 		ndev = edev->ndev;
 		if (netif_running(ndev) && netif_oper_up(ndev))
-			_qede_roce_dev_open(edev);
+			_qede_rdma_dev_open(edev);
 	}
 	mutex_unlock(&qedr_dev_list_lock);
 
-	pr_notice("qedr: discovered and registered %d RoCE funcs\n",
+	pr_notice("qedr: discovered and registered %d RDMA funcs\n",
 		  qedr_counter);
 
 	return 0;
 }
-EXPORT_SYMBOL(qede_roce_register_driver);
+EXPORT_SYMBOL(qede_rdma_register_driver);
 
-void qede_roce_unregister_driver(struct qedr_driver *drv)
+void qede_rdma_unregister_driver(struct qedr_driver *drv)
 {
 	struct qede_dev *edev;
 
 	mutex_lock(&qedr_dev_list_lock);
 	list_for_each_entry(edev, &qedr_dev_list, rdma_info.entry) {
 		if (edev->rdma_info.qedr_dev)
-			_qede_roce_dev_remove(edev);
+			_qede_rdma_dev_remove(edev);
 	}
 	qedr_drv = NULL;
 	mutex_unlock(&qedr_dev_list_lock);
 }
-EXPORT_SYMBOL(qede_roce_unregister_driver);
+EXPORT_SYMBOL(qede_rdma_unregister_driver);
 
-static void qede_roce_changeaddr(struct qede_dev *edev)
+static void qede_rdma_changeaddr(struct qede_dev *edev)
 {
-	if (!qede_roce_supported(edev))
+	if (!qede_rdma_supported(edev))
 		return;
 
 	if (qedr_drv && edev->rdma_info.qedr_dev && qedr_drv->notify)
 		qedr_drv->notify(edev->rdma_info.qedr_dev, QEDE_CHANGE_ADDR);
 }
 
-static struct qede_roce_event_work *
-qede_roce_get_free_event_node(struct qede_dev *edev)
+static struct qede_rdma_event_work *
+qede_rdma_get_free_event_node(struct qede_dev *edev)
 {
-	struct qede_roce_event_work *event_node = NULL;
+	struct qede_rdma_event_work *event_node = NULL;
 	struct list_head *list_node = NULL;
 	bool found = false;
 
-	list_for_each(list_node, &edev->rdma_info.roce_event_list) {
-		event_node = list_entry(list_node, struct qede_roce_event_work,
+	list_for_each(list_node, &edev->rdma_info.rdma_event_list) {
+		event_node = list_entry(list_node, struct qede_rdma_event_work,
 					list);
 		if (!work_pending(&event_node->work)) {
 			found = true;
@@ -241,74 +241,74 @@ qede_roce_get_free_event_node(struct qede_dev *edev)
 		event_node = kzalloc(sizeof(*event_node), GFP_KERNEL);
 		if (!event_node) {
 			DP_NOTICE(edev,
-				  "qedr: Could not allocate memory for roce work\n");
+				  "qedr: Could not allocate memory for rdma work\n");
 			return NULL;
 		}
 		list_add_tail(&event_node->list,
-			      &edev->rdma_info.roce_event_list);
+			      &edev->rdma_info.rdma_event_list);
 	}
 
 	return event_node;
 }
 
-static void qede_roce_handle_event(struct work_struct *work)
+static void qede_rdma_handle_event(struct work_struct *work)
 {
-	struct qede_roce_event_work *event_node;
-	enum qede_roce_event event;
+	struct qede_rdma_event_work *event_node;
+	enum qede_rdma_event event;
 	struct qede_dev *edev;
 
-	event_node = container_of(work, struct qede_roce_event_work, work);
+	event_node = container_of(work, struct qede_rdma_event_work, work);
 	event = event_node->event;
 	edev = event_node->ptr;
 
 	switch (event) {
 	case QEDE_UP:
-		qede_roce_dev_open(edev);
+		qede_rdma_dev_open(edev);
 		break;
 	case QEDE_DOWN:
-		qede_roce_dev_close(edev);
+		qede_rdma_dev_close(edev);
 		break;
 	case QEDE_CLOSE:
-		qede_roce_dev_shutdown(edev);
+		qede_rdma_dev_shutdown(edev);
 		break;
 	case QEDE_CHANGE_ADDR:
-		qede_roce_changeaddr(edev);
+		qede_rdma_changeaddr(edev);
 		break;
 	default:
-		DP_NOTICE(edev, "Invalid roce event %d", event);
+		DP_NOTICE(edev, "Invalid rdma event %d", event);
 	}
 }
 
-static void qede_roce_add_event(struct qede_dev *edev,
-				enum qede_roce_event event)
+static void qede_rdma_add_event(struct qede_dev *edev,
+				enum qede_rdma_event event)
 {
-	struct qede_roce_event_work *event_node;
+	struct qede_rdma_event_work *event_node;
 
 	if (!edev->rdma_info.qedr_dev)
 		return;
 
-	event_node = qede_roce_get_free_event_node(edev);
+	event_node = qede_rdma_get_free_event_node(edev);
 	if (!event_node)
 		return;
 
 	event_node->event = event;
 	event_node->ptr = edev;
 
-	INIT_WORK(&event_node->work, qede_roce_handle_event);
-	queue_work(edev->rdma_info.roce_wq, &event_node->work);
+	INIT_WORK(&event_node->work, qede_rdma_handle_event);
+	queue_work(edev->rdma_info.rdma_wq, &event_node->work);
 }
 
-void qede_roce_dev_event_open(struct qede_dev *edev)
+void qede_rdma_dev_event_open(struct qede_dev *edev)
 {
-	qede_roce_add_event(edev, QEDE_UP);
+	qede_rdma_add_event(edev, QEDE_UP);
 }
 
-void qede_roce_dev_event_close(struct qede_dev *edev)
+void qede_rdma_dev_event_close(struct qede_dev *edev)
 {
-	qede_roce_add_event(edev, QEDE_DOWN);
+	qede_rdma_add_event(edev, QEDE_DOWN);
 }
 
-void qede_roce_event_changeaddr(struct qede_dev *edev)
+void qede_rdma_event_changeaddr(struct qede_dev *edev)
 {
-	qede_roce_add_event(edev, QEDE_CHANGE_ADDR);
+	qede_rdma_add_event(edev, QEDE_CHANGE_ADDR);
 }
diff --git a/include/linux/qed/qede_rdma.h b/include/linux/qed/qede_rdma.h
index a1a9b81f7612..1348a16e5e4b 100644
--- a/include/linux/qed/qede_rdma.h
+++ b/include/linux/qed/qede_rdma.h
@@ -41,18 +41,18 @@ struct qedr_dev;
 struct qed_dev;
 struct qede_dev;
 
-enum qede_roce_event {
+enum qede_rdma_event {
 	QEDE_UP,
 	QEDE_DOWN,
 	QEDE_CHANGE_ADDR,
 	QEDE_CLOSE
 };
 
-struct qede_roce_event_work {
+struct qede_rdma_event_work {
 	struct list_head list;
 	struct work_struct work;
 	void *ptr;
-	enum qede_roce_event event;
+	enum qede_rdma_event event;
 };
 
 struct qedr_driver {
@@ -62,32 +62,33 @@ struct qedr_driver {
 				struct net_device *);
 
 	void (*remove)(struct qedr_dev *);
-	void (*notify)(struct qedr_dev *, enum qede_roce_event);
+	void (*notify)(struct qedr_dev *, enum qede_rdma_event);
 };
 
-/* APIs for RoCE driver to register callback handlers,
+/* APIs for RDMA driver to register callback handlers,
  * which will be invoked when device is added, removed, ifup, ifdown
  */
-int qede_roce_register_driver(struct qedr_driver *drv);
-void qede_roce_unregister_driver(struct qedr_driver *drv);
+int qede_rdma_register_driver(struct qedr_driver *drv);
+void qede_rdma_unregister_driver(struct qedr_driver *drv);
 
-bool qede_roce_supported(struct qede_dev *dev);
+bool qede_rdma_supported(struct qede_dev *dev);
 
 #if IS_ENABLED(CONFIG_QED_RDMA)
-int qede_roce_dev_add(struct qede_dev *dev);
-void qede_roce_dev_event_open(struct qede_dev *dev);
-void qede_roce_dev_event_close(struct qede_dev *dev);
-void qede_roce_dev_remove(struct qede_dev *dev);
-void qede_roce_event_changeaddr(struct qede_dev *qedr);
+int qede_rdma_dev_add(struct qede_dev *dev);
+void qede_rdma_dev_event_open(struct qede_dev *dev);
+void qede_rdma_dev_event_close(struct qede_dev *dev);
+void qede_rdma_dev_remove(struct qede_dev *dev);
+void qede_rdma_event_changeaddr(struct qede_dev *edr);
+
 #else
-static inline int qede_roce_dev_add(struct qede_dev *dev)
+static inline int qede_rdma_dev_add(struct qede_dev *dev);
 {
 	return 0;
 }
 
-static inline void qede_roce_dev_event_open(struct qede_dev *dev) {}
-static inline void qede_roce_dev_event_close(struct qede_dev *dev) {}
-static inline void qede_roce_dev_remove(struct qede_dev *dev) {}
-static inline void qede_roce_event_changeaddr(struct qede_dev *qedr) {}
+static inline void qede_rdma_dev_event_open(struct qede_dev *dev) {}
+static inline void qede_rdma_dev_event_close(struct qede_dev *dev) {}
+static inline void qede_rdma_dev_remove(struct qede_dev *dev) {}
+static inline void qede_rdma_event_changeaddr(struct qede_dev *edr) {}
 #endif
 #endif
-- 
cgit v1.2.3


From de77b966ce8adcb4c58d50e2f087320d5479812a Mon Sep 17 00:00:00 2001
From: yuan linyu <Linyu.Yuan@alcatel-sbell.com.cn>
Date: Sun, 18 Jun 2017 22:48:17 +0800
Subject: net: introduce __skb_put_[zero, data, u8]

follow Johannes Berg, semantic patch file as below,
@@
identifier p, p2;
expression len;
expression skb;
type t, t2;
@@
(
-p = __skb_put(skb, len);
+p = __skb_put_zero(skb, len);
|
-p = (t)__skb_put(skb, len);
+p = __skb_put_zero(skb, len);
)
... when != p
(
p2 = (t2)p;
-memset(p2, 0, len);
|
-memset(p, 0, len);
)

@@
identifier p;
expression len;
expression skb;
type t;
@@
(
-t p = __skb_put(skb, len);
+t p = __skb_put_zero(skb, len);
)
... when != p
(
-memset(p, 0, len);
)

@@
type t, t2;
identifier p, p2;
expression skb;
@@
t *p;
...
(
-p = __skb_put(skb, sizeof(t));
+p = __skb_put_zero(skb, sizeof(t));
|
-p = (t *)__skb_put(skb, sizeof(t));
+p = __skb_put_zero(skb, sizeof(t));
)
... when != p
(
p2 = (t2)p;
-memset(p2, 0, sizeof(*p));
|
-memset(p, 0, sizeof(*p));
)

@@
expression skb, len;
@@
-memset(__skb_put(skb, len), 0, len);
+__skb_put_zero(skb, len);

@@
expression skb, len, data;
@@
-memcpy(__skb_put(skb, len), data, len);
+__skb_put_data(skb, data, len);

@@
expression SKB, C, S;
typedef u8;
identifier fn = {__skb_put};
fresh identifier fn2 = fn ## "_u8";
@@
- *(u8 *)fn(SKB, S) = C;
+ fn2(SKB, C);

Signed-off-by: yuan linyu <Linyu.Yuan@alcatel-sbell.com.cn>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/crypto/chelsio/chcr_algo.c                | 15 +++++----------
 drivers/infiniband/hw/cxgb4/cm.c                  |  6 ++----
 drivers/infiniband/hw/cxgb4/cq.c                  |  6 ++----
 drivers/infiniband/hw/cxgb4/mem.c                 |  6 ++----
 drivers/infiniband/hw/cxgb4/qp.c                  |  3 +--
 drivers/isdn/gigaset/asyncdata.c                  |  4 ++--
 drivers/isdn/gigaset/isocdata.c                   |  2 +-
 drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c   | 12 ++++--------
 drivers/net/ethernet/chelsio/cxgb3/sge.c          |  2 +-
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c |  3 +--
 drivers/net/usb/int51x1.c                         |  2 +-
 drivers/staging/octeon/ethernet-tx.c              |  3 +--
 drivers/target/iscsi/cxgbit/cxgbit_cm.c           | 12 ++++--------
 include/linux/skbuff.h                            | 22 ++++++++++++++++++++++
 lib/test_bpf.c                                    |  2 +-
 net/802/garp.c                                    |  2 +-
 net/bluetooth/bnep/core.c                         | 15 ++++++---------
 net/bluetooth/bnep/netdev.c                       | 12 ++++++------
 net/bridge/br_stp_bpdu.c                          |  2 +-
 19 files changed, 64 insertions(+), 67 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c
index 92185ab6797d..b75b8beed68f 100644
--- a/drivers/crypto/chelsio/chcr_algo.c
+++ b/drivers/crypto/chelsio/chcr_algo.c
@@ -604,8 +604,7 @@ static struct sk_buff
 	if (!skb)
 		return ERR_PTR(-ENOMEM);
 	skb_reserve(skb, sizeof(struct sge_opaque_hdr));
-	chcr_req = __skb_put(skb, transhdr_len);
-	memset(chcr_req, 0, transhdr_len);
+	chcr_req = __skb_put_zero(skb, transhdr_len);
 	chcr_req->sec_cpl.op_ivinsrtofst =
 		FILL_SEC_CPL_OP_IVINSR(ctx->dev->rx_channel_id, 2, 1);
 
@@ -881,8 +880,7 @@ static struct sk_buff *create_hash_wr(struct ahash_request *req,
 		return skb;
 
 	skb_reserve(skb, sizeof(struct sge_opaque_hdr));
-	chcr_req = __skb_put(skb, transhdr_len);
-	memset(chcr_req, 0, transhdr_len);
+	chcr_req = __skb_put_zero(skb, transhdr_len);
 
 	chcr_req->sec_cpl.op_ivinsrtofst =
 		FILL_SEC_CPL_OP_IVINSR(ctx->dev->rx_channel_id, 2, 0);
@@ -1447,8 +1445,7 @@ static struct sk_buff *create_authenc_wr(struct aead_request *req,
 	skb_reserve(skb, sizeof(struct sge_opaque_hdr));
 
 	/* Write WR */
-	chcr_req = __skb_put(skb, transhdr_len);
-	memset(chcr_req, 0, transhdr_len);
+	chcr_req = __skb_put_zero(skb, transhdr_len);
 
 	stop_offset = (op_type == CHCR_ENCRYPT_OP) ? 0 : authsize;
 
@@ -1779,8 +1776,7 @@ static struct sk_buff *create_aead_ccm_wr(struct aead_request *req,
 
 	skb_reserve(skb, sizeof(struct sge_opaque_hdr));
 
-	chcr_req = __skb_put(skb, transhdr_len);
-	memset(chcr_req, 0, transhdr_len);
+	chcr_req = __skb_put_zero(skb, transhdr_len);
 
 	fill_sec_cpl_for_aead(&chcr_req->sec_cpl, dst_size, req, op_type, ctx);
 
@@ -1892,8 +1888,7 @@ static struct sk_buff *create_gcm_wr(struct aead_request *req,
 	/* NIC driver is going to write the sge hdr. */
 	skb_reserve(skb, sizeof(struct sge_opaque_hdr));
 
-	chcr_req = __skb_put(skb, transhdr_len);
-	memset(chcr_req, 0, transhdr_len);
+	chcr_req = __skb_put_zero(skb, transhdr_len);
 
 	if (get_aead_subtype(tfm) == CRYPTO_ALG_SUB_TYPE_AEAD_RFC4106)
 		req->assoclen -= 8;
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 76fb39415e18..e49b34c3b136 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -1900,8 +1900,7 @@ static int send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid)
 	int win;
 
 	skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
-	req = __skb_put(skb, sizeof(*req));
-	memset(req, 0, sizeof(*req));
+	req = __skb_put_zero(skb, sizeof(*req));
 	req->op_compl = htonl(WR_OP_V(FW_OFLD_CONNECTION_WR));
 	req->len16_pkd = htonl(FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*req), 16)));
 	req->le.filter = cpu_to_be32(cxgb4_select_ntuple(
@@ -3803,8 +3802,7 @@ static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb,
 	req_skb = alloc_skb(sizeof(struct fw_ofld_connection_wr), GFP_KERNEL);
 	if (!req_skb)
 		return;
-	req = __skb_put(req_skb, sizeof(*req));
-	memset(req, 0, sizeof(*req));
+	req = __skb_put_zero(req_skb, sizeof(*req));
 	req->op_compl = htonl(WR_OP_V(FW_OFLD_CONNECTION_WR) | FW_WR_COMPL_F);
 	req->len16_pkd = htonl(FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*req), 16)));
 	req->le.version_cpl = htonl(FW_OFLD_CONNECTION_WR_CPL_F);
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index 394cfe2625fe..e16fcaf6b5a3 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -44,8 +44,7 @@ static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
 	wr_len = sizeof *res_wr + sizeof *res;
 	set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
 
-	res_wr = __skb_put(skb, wr_len);
-	memset(res_wr, 0, wr_len);
+	res_wr = __skb_put_zero(skb, wr_len);
 	res_wr->op_nres = cpu_to_be32(
 			FW_WR_OP_V(FW_RI_RES_WR) |
 			FW_RI_RES_WR_NRES_V(1) |
@@ -114,8 +113,7 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
 	}
 	set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
 
-	res_wr = __skb_put(skb, wr_len);
-	memset(res_wr, 0, wr_len);
+	res_wr = __skb_put_zero(skb, wr_len);
 	res_wr->op_nres = cpu_to_be32(
 			FW_WR_OP_V(FW_RI_RES_WR) |
 			FW_RI_RES_WR_NRES_V(1) |
diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c
index ca992e4b66e4..5332f06b99ba 100644
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -81,8 +81,7 @@ static int _c4iw_write_mem_dma_aligned(struct c4iw_rdev *rdev, u32 addr,
 	}
 	set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
 
-	req = __skb_put(skb, wr_len);
-	memset(req, 0, wr_len);
+	req = __skb_put_zero(skb, wr_len);
 	INIT_ULPTX_WR(req, wr_len, 0, 0);
 	req->wr.wr_hi = cpu_to_be32(FW_WR_OP_V(FW_ULPTX_WR) |
 			(wait ? FW_WR_COMPL_F : 0));
@@ -142,8 +141,7 @@ static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len,
 		}
 		set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
 
-		req = __skb_put(skb, wr_len);
-		memset(req, 0, wr_len);
+		req = __skb_put_zero(skb, wr_len);
 		INIT_ULPTX_WR(req, wr_len, 0, 0);
 
 		if (i == (num_wqe-1)) {
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index b23a0b057347..bfc77596acbe 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -293,8 +293,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
 	}
 	set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
 
-	res_wr = __skb_put(skb, wr_len);
-	memset(res_wr, 0, wr_len);
+	res_wr = __skb_put_zero(skb, wr_len);
 	res_wr->op_nres = cpu_to_be32(
 			FW_WR_OP_V(FW_RI_RES_WR) |
 			FW_RI_RES_WR_NRES_V(2) |
diff --git a/drivers/isdn/gigaset/asyncdata.c b/drivers/isdn/gigaset/asyncdata.c
index 4caecdcc6f29..bc208557f783 100644
--- a/drivers/isdn/gigaset/asyncdata.c
+++ b/drivers/isdn/gigaset/asyncdata.c
@@ -264,7 +264,7 @@ byte_stuff:
 				/* skip remainder of packet */
 				bcs->rx_skb = skb = NULL;
 			} else {
-				*(u8 *)__skb_put(skb, 1) = c;
+				__skb_put_u8(skb, c);
 				fcs = crc_ccitt_byte(fcs, c);
 			}
 		}
@@ -315,7 +315,7 @@ static unsigned iraw_loop(unsigned numbytes, struct inbuf_t *inbuf)
 
 		/* regular data byte: append to current skb */
 		inputstate |= INS_have_data;
-		*(u8 *)__skb_put(skb, 1) = bitrev8(c);
+		__skb_put_u8(skb, bitrev8(c));
 	}
 
 	/* pass data up */
diff --git a/drivers/isdn/gigaset/isocdata.c b/drivers/isdn/gigaset/isocdata.c
index 74e250664ce9..97e00118ccfe 100644
--- a/drivers/isdn/gigaset/isocdata.c
+++ b/drivers/isdn/gigaset/isocdata.c
@@ -511,7 +511,7 @@ static inline void hdlc_putbyte(unsigned char c, struct bc_state *bcs)
 		bcs->rx_skb = NULL;
 		return;
 	}
-	*(u8 *)__skb_put(bcs->rx_skb, 1) = c;
+	__skb_put_u8(bcs->rx_skb, c);
 }
 
 /* hdlc_flush
diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
index e1a50c87c9a9..0bc6a4ffce30 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
@@ -471,8 +471,7 @@ static int init_tp_parity(struct adapter *adap)
 		if (!skb)
 			goto alloc_skb_fail;
 
-		req = __skb_put(skb, sizeof(*req));
-		memset(req, 0, sizeof(*req));
+		req = __skb_put_zero(skb, sizeof(*req));
 		req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, i));
 		req->mtu_idx = NMTUS - 1;
@@ -495,8 +494,7 @@ static int init_tp_parity(struct adapter *adap)
 		if (!skb)
 			goto alloc_skb_fail;
 
-		req = __skb_put(skb, sizeof(*req));
-		memset(req, 0, sizeof(*req));
+		req = __skb_put_zero(skb, sizeof(*req));
 		req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, i));
 		req->params = htonl(V_L2T_W_IDX(i));
@@ -518,8 +516,7 @@ static int init_tp_parity(struct adapter *adap)
 		if (!skb)
 			goto alloc_skb_fail;
 
-		req = __skb_put(skb, sizeof(*req));
-		memset(req, 0, sizeof(*req));
+		req = __skb_put_zero(skb, sizeof(*req));
 		req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RTE_WRITE_REQ, i));
 		req->l2t_idx = htonl(V_L2T_W_IDX(i));
@@ -538,8 +535,7 @@ static int init_tp_parity(struct adapter *adap)
 	if (!skb)
 		goto alloc_skb_fail;
 
-	greq = __skb_put(skb, sizeof(*greq));
-	memset(greq, 0, sizeof(*greq));
+	greq = __skb_put_zero(skb, sizeof(*greq));
 	greq->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
 	OPCODE_TID(greq) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, 0));
 	greq->mask = cpu_to_be64(1);
diff --git a/drivers/net/ethernet/chelsio/cxgb3/sge.c b/drivers/net/ethernet/chelsio/cxgb3/sge.c
index 1b9d154f1149..e2d342647b19 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/sge.c
@@ -2282,7 +2282,7 @@ static int process_responses(struct adapter *adap, struct sge_qset *qs,
 			if (!skb)
 				goto no_mem;
 
-			memcpy(__skb_put(skb, AN_PKT_SIZE), r, AN_PKT_SIZE);
+			__skb_put_data(skb, r, AN_PKT_SIZE);
 			skb->data[0] = CPL_ASYNC_NOTIF;
 			rss_hi = htonl(CPL_ASYNC_NOTIF << 24);
 			q->async_notif++;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
index a0fab65e80e8..45b5853ca2f1 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
@@ -231,8 +231,7 @@ int set_filter_wr(struct adapter *adapter, int fidx)
 		}
 	}
 
-	fwr = __skb_put(skb, sizeof(*fwr));
-	memset(fwr, 0, sizeof(*fwr));
+	fwr = __skb_put_zero(skb, sizeof(*fwr));
 
 	/* It would be nice to put most of the following in t4_hw.c but most
 	 * of the work is translating the cxgbtool ch_filter_specification
diff --git a/drivers/net/usb/int51x1.c b/drivers/net/usb/int51x1.c
index be63a829b8fe..ae2b2563460b 100644
--- a/drivers/net/usb/int51x1.c
+++ b/drivers/net/usb/int51x1.c
@@ -110,7 +110,7 @@ static struct sk_buff *int51x1_tx_fixup(struct usbnet *dev,
 	*len = cpu_to_le16(pack_len);
 
 	if(need_tail)
-		memset(__skb_put(skb, need_tail), 0, need_tail);
+		__skb_put_zero(skb, need_tail);
 
 	return skb;
 }
diff --git a/drivers/staging/octeon/ethernet-tx.c b/drivers/staging/octeon/ethernet-tx.c
index ff4119e8de42..31f35025d19e 100644
--- a/drivers/staging/octeon/ethernet-tx.c
+++ b/drivers/staging/octeon/ethernet-tx.c
@@ -251,8 +251,7 @@ int cvm_oct_xmit(struct sk_buff *skb, struct net_device *dev)
 
 				if ((skb_tail_pointer(skb) + add_bytes) <=
 				    skb_end_pointer(skb))
-					memset(__skb_put(skb, add_bytes), 0,
-					       add_bytes);
+					__skb_put_zero(skb, add_bytes);
 			}
 		}
 	}
diff --git a/drivers/target/iscsi/cxgbit/cxgbit_cm.c b/drivers/target/iscsi/cxgbit/cxgbit_cm.c
index 15cd1e33b16b..e583dd8a418b 100644
--- a/drivers/target/iscsi/cxgbit/cxgbit_cm.c
+++ b/drivers/target/iscsi/cxgbit/cxgbit_cm.c
@@ -1085,8 +1085,7 @@ cxgbit_pass_accept_rpl(struct cxgbit_sock *csk, struct cpl_pass_accept_req *req)
 		return;
 	}
 
-	rpl5 = __skb_put(skb, len);
-	memset(rpl5, 0, len);
+	rpl5 = __skb_put_zero(skb, len);
 
 	INIT_TP_WR(rpl5, csk->tid);
 	OPCODE_TID(rpl5) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
@@ -1367,8 +1366,7 @@ u32 cxgbit_send_tx_flowc_wr(struct cxgbit_sock *csk)
 	flowclen16 = cxgbit_tx_flowc_wr_credits(csk, &nparams, &flowclen);
 
 	skb = __skb_dequeue(&csk->skbq);
-	flowc = __skb_put(skb, flowclen);
-	memset(flowc, 0, flowclen);
+	flowc = __skb_put_zero(skb, flowclen);
 
 	flowc->op_to_nparams = cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) |
 					   FW_FLOWC_WR_NPARAMS_V(nparams));
@@ -1439,8 +1437,7 @@ int cxgbit_setup_conn_digest(struct cxgbit_sock *csk)
 		return -ENOMEM;
 
 	/*  set up ulp submode */
-	req = __skb_put(skb, len);
-	memset(req, 0, len);
+	req = __skb_put_zero(skb, len);
 
 	INIT_TP_WR(req, csk->tid);
 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, csk->tid));
@@ -1476,8 +1473,7 @@ int cxgbit_setup_conn_pgidx(struct cxgbit_sock *csk, u32 pg_idx)
 	if (!skb)
 		return -ENOMEM;
 
-	req = __skb_put(skb, len);
-	memset(req, 0, len);
+	req = __skb_put_zero(skb, len);
 
 	INIT_TP_WR(req, csk->tid);
 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, csk->tid));
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 852feacf4bbf..a17e235639ae 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1904,6 +1904,28 @@ static inline void *__skb_put(struct sk_buff *skb, unsigned int len)
 	return tmp;
 }
 
+static inline void *__skb_put_zero(struct sk_buff *skb, unsigned int len)
+{
+	void *tmp = __skb_put(skb, len);
+
+	memset(tmp, 0, len);
+	return tmp;
+}
+
+static inline void *__skb_put_data(struct sk_buff *skb, const void *data,
+				   unsigned int len)
+{
+	void *tmp = __skb_put(skb, len);
+
+	memcpy(tmp, data, len);
+	return tmp;
+}
+
+static inline void __skb_put_u8(struct sk_buff *skb, u8 val)
+{
+	*(u8 *)__skb_put(skb, 1) = val;
+}
+
 static inline void *skb_put_zero(struct sk_buff *skb, unsigned int len)
 {
 	void *tmp = skb_put(skb, len);
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index c871e0e76c2a..d9d5a410955c 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -5717,7 +5717,7 @@ static struct sk_buff *populate_skb(char *buf, int size)
 	if (!skb)
 		return NULL;
 
-	memcpy(__skb_put(skb, size), buf, size);
+	__skb_put_data(skb, buf, size);
 
 	/* Initialize a fake skb with test pattern. */
 	skb_reset_mac_header(skb);
diff --git a/net/802/garp.c b/net/802/garp.c
index a9a266569293..2dac647ff420 100644
--- a/net/802/garp.c
+++ b/net/802/garp.c
@@ -232,7 +232,7 @@ static int garp_pdu_append_end_mark(struct garp_applicant *app)
 {
 	if (skb_tailroom(app->pdu) < sizeof(u8))
 		return -1;
-	*(u8 *)__skb_put(app->pdu, sizeof(u8)) = GARP_END_MARK;
+	__skb_put_u8(app->pdu, GARP_END_MARK);
 	return 0;
 }
 
diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index fbf251fef70f..9a40013da915 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -374,25 +374,22 @@ static int bnep_rx_frame(struct bnep_session *s, struct sk_buff *skb)
 	/* Decompress header and construct ether frame */
 	switch (type & BNEP_TYPE_MASK) {
 	case BNEP_COMPRESSED:
-		memcpy(__skb_put(nskb, ETH_HLEN), &s->eh, ETH_HLEN);
+		__skb_put_data(nskb, &s->eh, ETH_HLEN);
 		break;
 
 	case BNEP_COMPRESSED_SRC_ONLY:
-		memcpy(__skb_put(nskb, ETH_ALEN), s->eh.h_dest, ETH_ALEN);
-		memcpy(__skb_put(nskb, ETH_ALEN), skb_mac_header(skb), ETH_ALEN);
+		__skb_put_data(nskb, s->eh.h_dest, ETH_ALEN);
+		__skb_put_data(nskb, skb_mac_header(skb), ETH_ALEN);
 		put_unaligned(s->eh.h_proto, (__be16 *) __skb_put(nskb, 2));
 		break;
 
 	case BNEP_COMPRESSED_DST_ONLY:
-		memcpy(__skb_put(nskb, ETH_ALEN), skb_mac_header(skb),
-								ETH_ALEN);
-		memcpy(__skb_put(nskb, ETH_ALEN + 2), s->eh.h_source,
-								ETH_ALEN + 2);
+		__skb_put_data(nskb, skb_mac_header(skb), ETH_ALEN);
+		__skb_put_data(nskb, s->eh.h_source, ETH_ALEN + 2);
 		break;
 
 	case BNEP_GENERAL:
-		memcpy(__skb_put(nskb, ETH_ALEN * 2), skb_mac_header(skb),
-								ETH_ALEN * 2);
+		__skb_put_data(nskb, skb_mac_header(skb), ETH_ALEN * 2);
 		put_unaligned(s->eh.h_proto, (__be16 *) __skb_put(nskb, 2));
 		break;
 	}
diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c
index 2b875edf77e1..1d4d7d415730 100644
--- a/net/bluetooth/bnep/netdev.c
+++ b/net/bluetooth/bnep/netdev.c
@@ -75,16 +75,16 @@ static void bnep_net_set_mc_list(struct net_device *dev)
 		u8 start[ETH_ALEN] = { 0x01 };
 
 		/* Request all addresses */
-		memcpy(__skb_put(skb, ETH_ALEN), start, ETH_ALEN);
-		memcpy(__skb_put(skb, ETH_ALEN), dev->broadcast, ETH_ALEN);
+		__skb_put_data(skb, start, ETH_ALEN);
+		__skb_put_data(skb, dev->broadcast, ETH_ALEN);
 		r->len = htons(ETH_ALEN * 2);
 	} else {
 		struct netdev_hw_addr *ha;
 		int i, len = skb->len;
 
 		if (dev->flags & IFF_BROADCAST) {
-			memcpy(__skb_put(skb, ETH_ALEN), dev->broadcast, ETH_ALEN);
-			memcpy(__skb_put(skb, ETH_ALEN), dev->broadcast, ETH_ALEN);
+			__skb_put_data(skb, dev->broadcast, ETH_ALEN);
+			__skb_put_data(skb, dev->broadcast, ETH_ALEN);
 		}
 
 		/* FIXME: We should group addresses here. */
@@ -93,8 +93,8 @@ static void bnep_net_set_mc_list(struct net_device *dev)
 		netdev_for_each_mc_addr(ha, dev) {
 			if (i == BNEP_MAX_MULTICAST_FILTERS)
 				break;
-			memcpy(__skb_put(skb, ETH_ALEN), ha->addr, ETH_ALEN);
-			memcpy(__skb_put(skb, ETH_ALEN), ha->addr, ETH_ALEN);
+			__skb_put_data(skb, ha->addr, ETH_ALEN);
+			__skb_put_data(skb, ha->addr, ETH_ALEN);
 
 			i++;
 		}
diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c
index 5881fbc114a9..1b75d6bf12bd 100644
--- a/net/bridge/br_stp_bpdu.c
+++ b/net/bridge/br_stp_bpdu.c
@@ -50,7 +50,7 @@ static void br_send_bpdu(struct net_bridge_port *p,
 	skb->priority = TC_PRIO_CONTROL;
 
 	skb_reserve(skb, LLC_RESERVE);
-	memcpy(__skb_put(skb, length), data, length);
+	__skb_put_data(skb, data, length);
 
 	llc_pdu_header_init(skb, LLC_PDU_TYPE_U, LLC_SAP_BSPAN,
 			    LLC_SAP_BSPAN, LLC_PDU_CMD);
-- 
cgit v1.2.3


From 3f1d472055bbe914c9e54715fdbf2272851e23ff Mon Sep 17 00:00:00 2001
From: Mariusz Skamra <mariuszx.skamra@intel.com>
Date: Fri, 26 May 2017 15:00:47 +0200
Subject: ktime: Simplify ktime_compare implementation

ktime_sub can be used here instread of two conditional checks.

Signed-off-by: Mariusz Skamra <mariuszx.skamra@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@intel.com>
Link: http://lkml.kernel.org/r/1495803647-9504-1-git-send-email-mariuszx.skamra@intel.com
---
 include/linux/ktime.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ktime.h b/include/linux/ktime.h
index 0c8bd45c8206..04817b1ca019 100644
--- a/include/linux/ktime.h
+++ b/include/linux/ktime.h
@@ -108,11 +108,7 @@ static inline ktime_t timeval_to_ktime(struct timeval tv)
  */
 static inline int ktime_compare(const ktime_t cmp1, const ktime_t cmp2)
 {
-	if (cmp1 < cmp2)
-		return -1;
-	if (cmp1 > cmp2)
-		return 1;
-	return 0;
+	return ktime_sub(cmp1, cmp2);
 }
 
 /**
-- 
cgit v1.2.3


From 104b4e5139fe384431ac11c3b8a6cf4a529edf4a Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Tue, 20 Jun 2017 21:01:20 +0300
Subject: percpu_counter: Rename __percpu_counter_add to
 percpu_counter_add_batch

Currently, percpu_counter_add is a wrapper around __percpu_counter_add
which is preempt safe due to explicit calls to preempt_disable.  Given
how __ prefix is used in percpu related interfaces, the naming
unfortunately creates the false sense that __percpu_counter_add is
less safe than percpu_counter_add.  In terms of context-safety,
they're equivalent.  The only difference is that the __ version takes
a batch parameter.

Make this a bit more explicit by just renaming __percpu_counter_add to
percpu_counter_add_batch.

This patch doesn't cause any functional changes.

tj: Minor updates to patch description for clarity.  Cosmetic
    indentation updates.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Chris Mason <clm@fb.com>
Cc: Josef Bacik <jbacik@fb.com>
Cc: David Sterba <dsterba@suse.com>
Cc: Darrick J. Wong <darrick.wong@oracle.com>
Cc: Jan Kara <jack@suse.com>
Cc: Jens Axboe <axboe@fb.com>
Cc: linux-mm@kvack.org
Cc: "David S. Miller" <davem@davemloft.net>
---
 fs/btrfs/disk-io.c             | 12 ++++++------
 fs/btrfs/extent_io.c           |  6 +++---
 fs/btrfs/inode.c               |  8 ++++----
 fs/xfs/xfs_mount.c             |  4 ++--
 include/linux/backing-dev.h    |  2 +-
 include/linux/blk-cgroup.h     |  6 +++---
 include/linux/mman.h           |  2 +-
 include/linux/percpu_counter.h |  7 ++++---
 include/net/inet_frag.h        |  4 ++--
 lib/flex_proportions.c         |  6 +++---
 lib/percpu_counter.c           |  4 ++--
 11 files changed, 31 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 061c1d1f774f..eb5c95569300 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1255,9 +1255,9 @@ void clean_tree_block(struct btrfs_fs_info *fs_info,
 		btrfs_assert_tree_locked(buf);
 
 		if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)) {
-			__percpu_counter_add(&fs_info->dirty_metadata_bytes,
-					     -buf->len,
-					     fs_info->dirty_metadata_batch);
+			percpu_counter_add_batch(&fs_info->dirty_metadata_bytes,
+						 -buf->len,
+						 fs_info->dirty_metadata_batch);
 			/* ugh, clear_extent_buffer_dirty needs to lock the page */
 			btrfs_set_lock_blocking(buf);
 			clear_extent_buffer_dirty(buf);
@@ -4046,9 +4046,9 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
 			buf->start, transid, fs_info->generation);
 	was_dirty = set_extent_buffer_dirty(buf);
 	if (!was_dirty)
-		__percpu_counter_add(&fs_info->dirty_metadata_bytes,
-				     buf->len,
-				     fs_info->dirty_metadata_batch);
+		percpu_counter_add_batch(&fs_info->dirty_metadata_bytes,
+					 buf->len,
+					 fs_info->dirty_metadata_batch);
 #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
 	if (btrfs_header_level(buf) == 0 && check_leaf(root, buf)) {
 		btrfs_print_leaf(fs_info, buf);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 27fdb250b446..a3455d216a1d 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3584,9 +3584,9 @@ lock_extent_buffer_for_io(struct extent_buffer *eb,
 		set_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
 		spin_unlock(&eb->refs_lock);
 		btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
-		__percpu_counter_add(&fs_info->dirty_metadata_bytes,
-				     -eb->len,
-				     fs_info->dirty_metadata_batch);
+		percpu_counter_add_batch(&fs_info->dirty_metadata_bytes,
+					 -eb->len,
+					 fs_info->dirty_metadata_batch);
 		ret = 1;
 	} else {
 		spin_unlock(&eb->refs_lock);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 5e71f1ea3391..aabdcb9f234f 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1682,8 +1682,8 @@ static void btrfs_set_bit_hook(struct inode *inode,
 		if (btrfs_is_testing(fs_info))
 			return;
 
-		__percpu_counter_add(&fs_info->delalloc_bytes, len,
-				     fs_info->delalloc_batch);
+		percpu_counter_add_batch(&fs_info->delalloc_bytes, len,
+					 fs_info->delalloc_batch);
 		spin_lock(&BTRFS_I(inode)->lock);
 		BTRFS_I(inode)->delalloc_bytes += len;
 		if (*bits & EXTENT_DEFRAG)
@@ -1749,8 +1749,8 @@ static void btrfs_clear_bit_hook(struct btrfs_inode *inode,
 					&inode->vfs_inode,
 					state->start, len);
 
-		__percpu_counter_add(&fs_info->delalloc_bytes, -len,
-				     fs_info->delalloc_batch);
+		percpu_counter_add_batch(&fs_info->delalloc_bytes, -len,
+					 fs_info->delalloc_batch);
 		spin_lock(&inode->lock);
 		inode->delalloc_bytes -= len;
 		if (do_list && inode->delalloc_bytes == 0 &&
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 2eaf81859166..7147d4a8d207 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1209,7 +1209,7 @@ xfs_mod_icount(
 	struct xfs_mount	*mp,
 	int64_t			delta)
 {
-	__percpu_counter_add(&mp->m_icount, delta, XFS_ICOUNT_BATCH);
+	percpu_counter_add_batch(&mp->m_icount, delta, XFS_ICOUNT_BATCH);
 	if (__percpu_counter_compare(&mp->m_icount, 0, XFS_ICOUNT_BATCH) < 0) {
 		ASSERT(0);
 		percpu_counter_add(&mp->m_icount, -delta);
@@ -1288,7 +1288,7 @@ xfs_mod_fdblocks(
 	else
 		batch = XFS_FDBLOCKS_BATCH;
 
-	__percpu_counter_add(&mp->m_fdblocks, delta, batch);
+	percpu_counter_add_batch(&mp->m_fdblocks, delta, batch);
 	if (__percpu_counter_compare(&mp->m_fdblocks, mp->m_alloc_set_aside,
 				     XFS_FDBLOCKS_BATCH) >= 0) {
 		/* we had space! */
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 557d84063934..ace73f96eb1e 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -66,7 +66,7 @@ static inline bool bdi_has_dirty_io(struct backing_dev_info *bdi)
 static inline void __add_wb_stat(struct bdi_writeback *wb,
 				 enum wb_stat_item item, s64 amount)
 {
-	__percpu_counter_add(&wb->stat[item], amount, WB_STAT_BATCH);
+	percpu_counter_add_batch(&wb->stat[item], amount, WB_STAT_BATCH);
 }
 
 static inline void __inc_wb_stat(struct bdi_writeback *wb,
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 01b62e7bac74..7104bea8dab1 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -518,7 +518,7 @@ static inline void blkg_stat_exit(struct blkg_stat *stat)
  */
 static inline void blkg_stat_add(struct blkg_stat *stat, uint64_t val)
 {
-	__percpu_counter_add(&stat->cpu_cnt, val, BLKG_STAT_CPU_BATCH);
+	percpu_counter_add_batch(&stat->cpu_cnt, val, BLKG_STAT_CPU_BATCH);
 }
 
 /**
@@ -597,14 +597,14 @@ static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat,
 	else
 		cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_READ];
 
-	__percpu_counter_add(cnt, val, BLKG_STAT_CPU_BATCH);
+	percpu_counter_add_batch(cnt, val, BLKG_STAT_CPU_BATCH);
 
 	if (op_is_sync(op))
 		cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_SYNC];
 	else
 		cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_ASYNC];
 
-	__percpu_counter_add(cnt, val, BLKG_STAT_CPU_BATCH);
+	percpu_counter_add_batch(cnt, val, BLKG_STAT_CPU_BATCH);
 }
 
 /**
diff --git a/include/linux/mman.h b/include/linux/mman.h
index 634c4c51fe3a..c8367041fafd 100644
--- a/include/linux/mman.h
+++ b/include/linux/mman.h
@@ -22,7 +22,7 @@ unsigned long vm_memory_committed(void);
 
 static inline void vm_acct_memory(long pages)
 {
-	__percpu_counter_add(&vm_committed_as, pages, vm_committed_as_batch);
+	percpu_counter_add_batch(&vm_committed_as, pages, vm_committed_as_batch);
 }
 
 static inline void vm_unacct_memory(long pages)
diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h
index 84a109449610..ec065387f443 100644
--- a/include/linux/percpu_counter.h
+++ b/include/linux/percpu_counter.h
@@ -39,7 +39,8 @@ int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, gfp_t gfp,
 
 void percpu_counter_destroy(struct percpu_counter *fbc);
 void percpu_counter_set(struct percpu_counter *fbc, s64 amount);
-void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch);
+void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount,
+			      s32 batch);
 s64 __percpu_counter_sum(struct percpu_counter *fbc);
 int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch);
 
@@ -50,7 +51,7 @@ static inline int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs)
 
 static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount)
 {
-	__percpu_counter_add(fbc, amount, percpu_counter_batch);
+	percpu_counter_add_batch(fbc, amount, percpu_counter_batch);
 }
 
 static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc)
@@ -136,7 +137,7 @@ percpu_counter_add(struct percpu_counter *fbc, s64 amount)
 }
 
 static inline void
-__percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch)
+percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch)
 {
 	percpu_counter_add(fbc, amount);
 }
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 5894730ec82a..5932e6de8fc0 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -154,12 +154,12 @@ static inline int frag_mem_limit(struct netns_frags *nf)
 
 static inline void sub_frag_mem_limit(struct netns_frags *nf, int i)
 {
-	__percpu_counter_add(&nf->mem, -i, frag_percpu_counter_batch);
+	percpu_counter_add_batch(&nf->mem, -i, frag_percpu_counter_batch);
 }
 
 static inline void add_frag_mem_limit(struct netns_frags *nf, int i)
 {
-	__percpu_counter_add(&nf->mem, i, frag_percpu_counter_batch);
+	percpu_counter_add_batch(&nf->mem, i, frag_percpu_counter_batch);
 }
 
 static inline unsigned int sum_frag_mem_limit(struct netns_frags *nf)
diff --git a/lib/flex_proportions.c b/lib/flex_proportions.c
index a71cf1bdd4c9..2cc1f94e03a1 100644
--- a/lib/flex_proportions.c
+++ b/lib/flex_proportions.c
@@ -207,7 +207,7 @@ static void fprop_reflect_period_percpu(struct fprop_global *p,
 		if (val < (nr_cpu_ids * PROP_BATCH))
 			val = percpu_counter_sum(&pl->events);
 
-		__percpu_counter_add(&pl->events,
+		percpu_counter_add_batch(&pl->events,
 			-val + (val >> (period-pl->period)), PROP_BATCH);
 	} else
 		percpu_counter_set(&pl->events, 0);
@@ -219,7 +219,7 @@ static void fprop_reflect_period_percpu(struct fprop_global *p,
 void __fprop_inc_percpu(struct fprop_global *p, struct fprop_local_percpu *pl)
 {
 	fprop_reflect_period_percpu(p, pl);
-	__percpu_counter_add(&pl->events, 1, PROP_BATCH);
+	percpu_counter_add_batch(&pl->events, 1, PROP_BATCH);
 	percpu_counter_add(&p->events, 1);
 }
 
@@ -267,6 +267,6 @@ void __fprop_inc_percpu_max(struct fprop_global *p,
 			return;
 	} else
 		fprop_reflect_period_percpu(p, pl);
-	__percpu_counter_add(&pl->events, 1, PROP_BATCH);
+	percpu_counter_add_batch(&pl->events, 1, PROP_BATCH);
 	percpu_counter_add(&p->events, 1);
 }
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index 9c21000df0b5..8ee7e5ec21be 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -72,7 +72,7 @@ void percpu_counter_set(struct percpu_counter *fbc, s64 amount)
 }
 EXPORT_SYMBOL(percpu_counter_set);
 
-void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch)
+void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch)
 {
 	s64 count;
 
@@ -89,7 +89,7 @@ void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch)
 	}
 	preempt_enable();
 }
-EXPORT_SYMBOL(__percpu_counter_add);
+EXPORT_SYMBOL(percpu_counter_add_batch);
 
 /*
  * Add up all the per-cpu counts, return the result.  This is a more accurate
-- 
cgit v1.2.3


From 80ab6af432523b33352771b1eca1cee793cc7c81 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 19 Jun 2017 09:24:40 +0200
Subject: block: remove the unused bio_to_phys macro

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/bio.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index 36aa641cde28..4907bea03908 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -118,7 +118,6 @@ static inline void *bio_data(struct bio *bio)
 /*
  * will die
  */
-#define bio_to_phys(bio)	(page_to_phys(bio_page((bio))) + (unsigned long) bio_offset((bio)))
 #define bvec_to_phys(bv)	(page_to_phys((bv)->bv_page) + (unsigned long) (bv)->bv_offset)
 
 /*
-- 
cgit v1.2.3


From efbeccdb59d666b9c77d505af01097cc0a9d102b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 19 Jun 2017 09:24:41 +0200
Subject: block: stop using bio_data() in blk_write_same_mergeable

While the Write Same page currently always is in low-level it is just
as easy and safer to just compare the page and offset directly.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 22cfba64ce81..0deed7274a7f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -815,7 +815,8 @@ static inline bool rq_mergeable(struct request *rq)
 
 static inline bool blk_write_same_mergeable(struct bio *a, struct bio *b)
 {
-	if (bio_data(a) == bio_data(b))
+	if (bio_page(a) == bio_page(b) &&
+	    bio_offset(a) == bio_offset(b))
 		return true;
 
 	return false;
-- 
cgit v1.2.3


From 073196787727e454e17a96d222ea55eba2000978 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Tue, 20 Jun 2017 11:15:38 -0700
Subject: blk-mq: Reduce blk_mq_hw_ctx size

Since the srcu structure is rather large (184 bytes on an x86-64
system with kernel debugging disabled), only allocate it if needed.

Reported-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c         | 30 ++++++++++++++++++++++--------
 include/linux/blk-mq.h |  5 +++--
 2 files changed, 25 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index ca03cd4b263f..3e0cc11b1a90 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -172,7 +172,7 @@ void blk_mq_quiesce_queue(struct request_queue *q)
 
 	queue_for_each_hw_ctx(q, hctx, i) {
 		if (hctx->flags & BLK_MQ_F_BLOCKING)
-			synchronize_srcu(&hctx->queue_rq_srcu);
+			synchronize_srcu(hctx->queue_rq_srcu);
 		else
 			rcu = true;
 	}
@@ -1094,9 +1094,9 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
 	} else {
 		might_sleep();
 
-		srcu_idx = srcu_read_lock(&hctx->queue_rq_srcu);
+		srcu_idx = srcu_read_lock(hctx->queue_rq_srcu);
 		blk_mq_sched_dispatch_requests(hctx);
-		srcu_read_unlock(&hctx->queue_rq_srcu, srcu_idx);
+		srcu_read_unlock(hctx->queue_rq_srcu, srcu_idx);
 	}
 }
 
@@ -1505,9 +1505,9 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
 
 		might_sleep();
 
-		srcu_idx = srcu_read_lock(&hctx->queue_rq_srcu);
+		srcu_idx = srcu_read_lock(hctx->queue_rq_srcu);
 		__blk_mq_try_issue_directly(hctx, rq, cookie, true);
-		srcu_read_unlock(&hctx->queue_rq_srcu, srcu_idx);
+		srcu_read_unlock(hctx->queue_rq_srcu, srcu_idx);
 	}
 }
 
@@ -1853,7 +1853,7 @@ static void blk_mq_exit_hctx(struct request_queue *q,
 		set->ops->exit_hctx(hctx, hctx_idx);
 
 	if (hctx->flags & BLK_MQ_F_BLOCKING)
-		cleanup_srcu_struct(&hctx->queue_rq_srcu);
+		cleanup_srcu_struct(hctx->queue_rq_srcu);
 
 	blk_mq_remove_cpuhp(hctx);
 	blk_free_flush_queue(hctx->fq);
@@ -1926,7 +1926,7 @@ static int blk_mq_init_hctx(struct request_queue *q,
 		goto free_fq;
 
 	if (hctx->flags & BLK_MQ_F_BLOCKING)
-		init_srcu_struct(&hctx->queue_rq_srcu);
+		init_srcu_struct(hctx->queue_rq_srcu);
 
 	blk_mq_debugfs_register_hctx(q, hctx);
 
@@ -2201,6 +2201,20 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
 }
 EXPORT_SYMBOL(blk_mq_init_queue);
 
+static int blk_mq_hw_ctx_size(struct blk_mq_tag_set *tag_set)
+{
+	int hw_ctx_size = sizeof(struct blk_mq_hw_ctx);
+
+	BUILD_BUG_ON(ALIGN(offsetof(struct blk_mq_hw_ctx, queue_rq_srcu),
+			   __alignof__(struct blk_mq_hw_ctx)) !=
+		     sizeof(struct blk_mq_hw_ctx));
+
+	if (tag_set->flags & BLK_MQ_F_BLOCKING)
+		hw_ctx_size += sizeof(struct srcu_struct);
+
+	return hw_ctx_size;
+}
+
 static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
 						struct request_queue *q)
 {
@@ -2215,7 +2229,7 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
 			continue;
 
 		node = blk_mq_hw_queue_to_node(q->mq_map, i);
-		hctxs[i] = kzalloc_node(sizeof(struct blk_mq_hw_ctx),
+		hctxs[i] = kzalloc_node(blk_mq_hw_ctx_size(set),
 					GFP_KERNEL, node);
 		if (!hctxs[i])
 			break;
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index f1bd13ae8f57..3f2c22a42df6 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -39,8 +39,6 @@ struct blk_mq_hw_ctx {
 	struct blk_mq_tags	*tags;
 	struct blk_mq_tags	*sched_tags;
 
-	struct srcu_struct	queue_rq_srcu;
-
 	unsigned long		queued;
 	unsigned long		run;
 #define BLK_MQ_MAX_DISPATCH_ORDER	7
@@ -62,6 +60,9 @@ struct blk_mq_hw_ctx {
 	struct dentry		*debugfs_dir;
 	struct dentry		*sched_debugfs_dir;
 #endif
+
+	/* Must be the last member - see also blk_mq_hw_ctx_size(). */
+	struct srcu_struct	queue_rq_srcu[0];
 };
 
 struct blk_mq_tag_set {
-- 
cgit v1.2.3


From cd6ce1482fd9e691bb68c660fa918c90f6b1bc25 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Tue, 20 Jun 2017 11:15:39 -0700
Subject: block: Make request operation type argument declarations consistent

Instead of declaring the second argument of blk_*_get_request()
as int and passing it to functions that expect an unsigned int,
declare that second argument as unsigned int. Also because of
consistency, rename that second argument from 'rw' into 'op'.
This patch does not change any functionality.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Omar Sandoval <osandov@fb.com>
Cc: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c       | 13 +++++++------
 block/blk-mq.c         | 10 +++++-----
 include/linux/blk-mq.h |  6 +++---
 include/linux/blkdev.h |  3 ++-
 4 files changed, 17 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 279e3c432d7b..21f6f1020303 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1347,8 +1347,8 @@ retry:
 	goto retry;
 }
 
-static struct request *blk_old_get_request(struct request_queue *q, int rw,
-		gfp_t gfp_mask)
+static struct request *blk_old_get_request(struct request_queue *q,
+					   unsigned int op, gfp_t gfp_mask)
 {
 	struct request *rq;
 
@@ -1356,7 +1356,7 @@ static struct request *blk_old_get_request(struct request_queue *q, int rw,
 	create_io_context(gfp_mask, q->node);
 
 	spin_lock_irq(q->queue_lock);
-	rq = get_request(q, rw, NULL, gfp_mask);
+	rq = get_request(q, op, NULL, gfp_mask);
 	if (IS_ERR(rq)) {
 		spin_unlock_irq(q->queue_lock);
 		return rq;
@@ -1369,14 +1369,15 @@ static struct request *blk_old_get_request(struct request_queue *q, int rw,
 	return rq;
 }
 
-struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
+struct request *blk_get_request(struct request_queue *q, unsigned int op,
+				gfp_t gfp_mask)
 {
 	if (q->mq_ops)
-		return blk_mq_alloc_request(q, rw,
+		return blk_mq_alloc_request(q, op,
 			(gfp_mask & __GFP_DIRECT_RECLAIM) ?
 				0 : BLK_MQ_REQ_NOWAIT);
 	else
-		return blk_old_get_request(q, rw, gfp_mask);
+		return blk_old_get_request(q, op, gfp_mask);
 }
 EXPORT_SYMBOL(blk_get_request);
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 3e0cc11b1a90..2d21fbccc3a5 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -328,7 +328,7 @@ static struct request *blk_mq_get_request(struct request_queue *q,
 	return rq;
 }
 
-struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
+struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op,
 		unsigned int flags)
 {
 	struct blk_mq_alloc_data alloc_data = { .flags = flags };
@@ -339,7 +339,7 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
 	if (ret)
 		return ERR_PTR(ret);
 
-	rq = blk_mq_get_request(q, NULL, rw, &alloc_data);
+	rq = blk_mq_get_request(q, NULL, op, &alloc_data);
 
 	blk_mq_put_ctx(alloc_data.ctx);
 	blk_queue_exit(q);
@@ -354,8 +354,8 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
 }
 EXPORT_SYMBOL(blk_mq_alloc_request);
 
-struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw,
-		unsigned int flags, unsigned int hctx_idx)
+struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
+		unsigned int op, unsigned int flags, unsigned int hctx_idx)
 {
 	struct blk_mq_alloc_data alloc_data = { .flags = flags };
 	struct request *rq;
@@ -390,7 +390,7 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw,
 	cpu = cpumask_first(alloc_data.hctx->cpumask);
 	alloc_data.ctx = __blk_mq_get_ctx(q, cpu);
 
-	rq = blk_mq_get_request(q, NULL, rw, &alloc_data);
+	rq = blk_mq_get_request(q, NULL, op, &alloc_data);
 
 	blk_queue_exit(q);
 
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 3f2c22a42df6..3077714250ce 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -202,10 +202,10 @@ enum {
 	BLK_MQ_REQ_INTERNAL	= (1 << 2), /* allocate internal/sched tag */
 };
 
-struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
+struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op,
 		unsigned int flags);
-struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int op,
-		unsigned int flags, unsigned int hctx_idx);
+struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
+		unsigned int op, unsigned int flags, unsigned int hctx_idx);
 struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag);
 
 enum {
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 0deed7274a7f..e21dd893ee86 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -935,7 +935,8 @@ extern void blk_rq_init(struct request_queue *q, struct request *rq);
 extern void blk_init_request_from_bio(struct request *req, struct bio *bio);
 extern void blk_put_request(struct request *);
 extern void __blk_put_request(struct request_queue *, struct request *);
-extern struct request *blk_get_request(struct request_queue *, int, gfp_t);
+extern struct request *blk_get_request(struct request_queue *, unsigned int op,
+				       gfp_t gfp_mask);
 extern void blk_requeue_request(struct request_queue *, struct request *);
 extern int blk_lld_busy(struct request_queue *q);
 extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
-- 
cgit v1.2.3


From d280bab305431c1836423f3cd6a5ff0e35a601ef Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Tue, 20 Jun 2017 11:15:40 -0700
Subject: block: Introduce request_queue.initialize_rq_fn()

Several block drivers need to initialize the driver-private request
data after having called blk_get_request() and before .prep_rq_fn()
is called, e.g. when submitting a REQ_OP_SCSI_* request. Avoid that
that initialization code has to be repeated after every
blk_get_request() call by adding new callback functions to struct
request_queue and to struct blk_mq_ops.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c       | 17 +++++++++++++----
 include/linux/blk-mq.h |  2 ++
 include/linux/blkdev.h |  4 ++++
 3 files changed, 19 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 21f6f1020303..09989028616f 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1372,12 +1372,21 @@ static struct request *blk_old_get_request(struct request_queue *q,
 struct request *blk_get_request(struct request_queue *q, unsigned int op,
 				gfp_t gfp_mask)
 {
-	if (q->mq_ops)
-		return blk_mq_alloc_request(q, op,
+	struct request *req;
+
+	if (q->mq_ops) {
+		req = blk_mq_alloc_request(q, op,
 			(gfp_mask & __GFP_DIRECT_RECLAIM) ?
 				0 : BLK_MQ_REQ_NOWAIT);
-	else
-		return blk_old_get_request(q, op, gfp_mask);
+		if (!IS_ERR(req) && q->mq_ops->initialize_rq_fn)
+			q->mq_ops->initialize_rq_fn(req);
+	} else {
+		req = blk_old_get_request(q, op, gfp_mask);
+		if (!IS_ERR(req) && q->initialize_rq_fn)
+			q->initialize_rq_fn(req);
+	}
+
+	return req;
 }
 EXPORT_SYMBOL(blk_get_request);
 
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 3077714250ce..366b83cee955 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -144,6 +144,8 @@ struct blk_mq_ops {
 	init_request_fn		*init_request;
 	exit_request_fn		*exit_request;
 	reinit_request_fn	*reinit_request;
+	/* Called from inside blk_get_request() */
+	void (*initialize_rq_fn)(struct request *rq);
 
 	map_queues_fn		*map_queues;
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index e21dd893ee86..9a36164487d0 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -410,8 +410,12 @@ struct request_queue {
 	rq_timed_out_fn		*rq_timed_out_fn;
 	dma_drain_needed_fn	*dma_drain_needed;
 	lld_busy_fn		*lld_busy_fn;
+	/* Called just after a request is allocated */
 	init_rq_fn		*init_rq_fn;
+	/* Called just before a request is freed */
 	exit_rq_fn		*exit_rq_fn;
+	/* Called from inside blk_get_request() */
+	void (*initialize_rq_fn)(struct request *rq);
 
 	const struct blk_mq_ops	*mq_ops;
 
-- 
cgit v1.2.3


From 9e0c829906b9aa1e7ad84689f2bcd56457bdb417 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Tue, 20 Jun 2017 11:15:44 -0700
Subject: block: Add a comment above queue_lockdep_assert_held()

Add a comment above the queue_lockdep_assert_held() macro that
explains the purpose of the q->queue_lock test.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Omar Sandoval <osandov@fb.com>
Cc: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 9a36164487d0..3e60e7a654bd 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -635,6 +635,13 @@ struct request_queue {
 				 (1 << QUEUE_FLAG_SAME_COMP)	|	\
 				 (1 << QUEUE_FLAG_POLL))
 
+/*
+ * @q->queue_lock is set while a queue is being initialized. Since we know
+ * that no other threads access the queue object before @q->queue_lock has
+ * been set, it is safe to manipulate queue flags without holding the
+ * queue_lock if @q->queue_lock == NULL. See also blk_alloc_queue_node() and
+ * blk_init_allocated_queue().
+ */
 static inline void queue_lockdep_assert_held(struct request_queue *q)
 {
 	if (q->queue_lock)
-- 
cgit v1.2.3


From fc6eead7c1e2e5376c25d2795d4539fdacbc0648 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Mon, 22 May 2017 17:20:20 -0700
Subject: time: Clean up CLOCK_MONOTONIC_RAW time handling

Now that we fixed the sub-ns handling for CLOCK_MONOTONIC_RAW,
remove the duplicitive tk->raw_time.tv_nsec, which can be
stored in tk->tkr_raw.xtime_nsec (similarly to how its handled
for monotonic time).

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Miroslav Lichvar <mlichvar@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Stephen Boyd <stephen.boyd@linaro.org>
Cc: Kevin Brodsky <kevin.brodsky@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Daniel Mentz <danielmentz@google.com>
Tested-by: Daniel Mentz <danielmentz@google.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 arch/arm64/kernel/vdso.c            |  6 ++---
 include/linux/timekeeper_internal.h |  4 ++--
 kernel/time/timekeeping.c           | 45 ++++++++++++++++++++-----------------
 3 files changed, 29 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index d0cb007fa482..7492d9009610 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -220,10 +220,8 @@ void update_vsyscall(struct timekeeper *tk)
 	if (!use_syscall) {
 		/* tkr_mono.cycle_last == tkr_raw.cycle_last */
 		vdso_data->cs_cycle_last	= tk->tkr_mono.cycle_last;
-		vdso_data->raw_time_sec		= tk->raw_time.tv_sec;
-		vdso_data->raw_time_nsec	= (tk->raw_time.tv_nsec <<
-						   tk->tkr_raw.shift) +
-						  tk->tkr_raw.xtime_nsec;
+		vdso_data->raw_time_sec         = tk->raw_sec;
+		vdso_data->raw_time_nsec        = tk->tkr_raw.xtime_nsec;
 		vdso_data->xtime_clock_sec	= tk->xtime_sec;
 		vdso_data->xtime_clock_nsec	= tk->tkr_mono.xtime_nsec;
 		vdso_data->cs_mono_mult		= tk->tkr_mono.mult;
diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index f7043ccca81c..0a0a53daf2a2 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -51,7 +51,7 @@ struct tk_read_base {
  * @clock_was_set_seq:	The sequence number of clock was set events
  * @cs_was_changed_seq:	The sequence number of clocksource change events
  * @next_leap_ktime:	CLOCK_MONOTONIC time value of a pending leap-second
- * @raw_time:		Monotonic raw base time in timespec64 format
+ * @raw_sec:		CLOCK_MONOTONIC_RAW  time in seconds
  * @cycle_interval:	Number of clock cycles in one NTP interval
  * @xtime_interval:	Number of clock shifted nano seconds in one NTP
  *			interval.
@@ -93,7 +93,7 @@ struct timekeeper {
 	unsigned int		clock_was_set_seq;
 	u8			cs_was_changed_seq;
 	ktime_t			next_leap_ktime;
-	struct timespec64	raw_time;
+	u64			raw_sec;
 
 	/* The following members are for timekeeping internal use */
 	u64			cycle_interval;
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index b602c48cb841..0454bfa24353 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -72,6 +72,10 @@ static inline void tk_normalize_xtime(struct timekeeper *tk)
 		tk->tkr_mono.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr_mono.shift;
 		tk->xtime_sec++;
 	}
+	while (tk->tkr_raw.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr_raw.shift)) {
+		tk->tkr_raw.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr_raw.shift;
+		tk->raw_sec++;
+	}
 }
 
 static inline struct timespec64 tk_xtime(struct timekeeper *tk)
@@ -285,12 +289,14 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
 	 /* if changing clocks, convert xtime_nsec shift units */
 	if (old_clock) {
 		int shift_change = clock->shift - old_clock->shift;
-		if (shift_change < 0)
+		if (shift_change < 0) {
 			tk->tkr_mono.xtime_nsec >>= -shift_change;
-		else
+			tk->tkr_raw.xtime_nsec >>= -shift_change;
+		} else {
 			tk->tkr_mono.xtime_nsec <<= shift_change;
+			tk->tkr_raw.xtime_nsec <<= shift_change;
+		}
 	}
-	tk->tkr_raw.xtime_nsec = 0;
 
 	tk->tkr_mono.shift = clock->shift;
 	tk->tkr_raw.shift = clock->shift;
@@ -619,9 +625,6 @@ static inline void tk_update_ktime_data(struct timekeeper *tk)
 	nsec = (u32) tk->wall_to_monotonic.tv_nsec;
 	tk->tkr_mono.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec);
 
-	/* Update the monotonic raw base */
-	tk->tkr_raw.base = timespec64_to_ktime(tk->raw_time);
-
 	/*
 	 * The sum of the nanoseconds portions of xtime and
 	 * wall_to_monotonic can be greater/equal one second. Take
@@ -631,6 +634,11 @@ static inline void tk_update_ktime_data(struct timekeeper *tk)
 	if (nsec >= NSEC_PER_SEC)
 		seconds++;
 	tk->ktime_sec = seconds;
+
+	/* Update the monotonic raw base */
+	seconds = tk->raw_sec;
+	nsec = (u32)(tk->tkr_raw.xtime_nsec >> tk->tkr_raw.shift);
+	tk->tkr_raw.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec);
 }
 
 /* must hold timekeeper_lock */
@@ -672,7 +680,6 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
 static void timekeeping_forward_now(struct timekeeper *tk)
 {
 	u64 cycle_now, delta;
-	u64 nsec;
 
 	cycle_now = tk_clock_read(&tk->tkr_mono);
 	delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
@@ -684,10 +691,13 @@ static void timekeeping_forward_now(struct timekeeper *tk)
 	/* If arch requires, add in get_arch_timeoffset() */
 	tk->tkr_mono.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr_mono.shift;
 
-	tk_normalize_xtime(tk);
 
-	nsec = clocksource_cyc2ns(delta, tk->tkr_raw.mult, tk->tkr_raw.shift);
-	timespec64_add_ns(&tk->raw_time, nsec);
+	tk->tkr_raw.xtime_nsec += delta * tk->tkr_raw.mult;
+
+	/* If arch requires, add in get_arch_timeoffset() */
+	tk->tkr_raw.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr_raw.shift;
+
+	tk_normalize_xtime(tk);
 }
 
 /**
@@ -1373,19 +1383,18 @@ int timekeeping_notify(struct clocksource *clock)
 void getrawmonotonic64(struct timespec64 *ts)
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
-	struct timespec64 ts64;
 	unsigned long seq;
 	u64 nsecs;
 
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
+		ts->tv_sec = tk->raw_sec;
 		nsecs = timekeeping_get_ns(&tk->tkr_raw);
-		ts64 = tk->raw_time;
 
 	} while (read_seqcount_retry(&tk_core.seq, seq));
 
-	timespec64_add_ns(&ts64, nsecs);
-	*ts = ts64;
+	ts->tv_nsec = 0;
+	timespec64_add_ns(ts, nsecs);
 }
 EXPORT_SYMBOL(getrawmonotonic64);
 
@@ -1509,8 +1518,7 @@ void __init timekeeping_init(void)
 	tk_setup_internals(tk, clock);
 
 	tk_set_xtime(tk, &now);
-	tk->raw_time.tv_sec = 0;
-	tk->raw_time.tv_nsec = 0;
+	tk->raw_sec = 0;
 	if (boot.tv_sec == 0 && boot.tv_nsec == 0)
 		boot = tk_xtime(tk);
 
@@ -2011,15 +2019,12 @@ static u64 logarithmic_accumulation(struct timekeeper *tk, u64 offset,
 	*clock_set |= accumulate_nsecs_to_secs(tk);
 
 	/* Accumulate raw time */
-	tk->tkr_raw.xtime_nsec += (u64)tk->raw_time.tv_nsec << tk->tkr_raw.shift;
 	tk->tkr_raw.xtime_nsec += tk->raw_interval << shift;
 	snsec_per_sec = (u64)NSEC_PER_SEC << tk->tkr_raw.shift;
 	while (tk->tkr_raw.xtime_nsec >= snsec_per_sec) {
 		tk->tkr_raw.xtime_nsec -= snsec_per_sec;
-		tk->raw_time.tv_sec++;
+		tk->raw_sec++;
 	}
-	tk->raw_time.tv_nsec = tk->tkr_raw.xtime_nsec >> tk->tkr_raw.shift;
-	tk->tkr_raw.xtime_nsec -= (u64)tk->raw_time.tv_nsec << tk->tkr_raw.shift;
 
 	/* Accumulate error between NTP and clock interval */
 	tk->ntp_error += tk->ntp_tick << shift;
-- 
cgit v1.2.3


From 707188f5f2421a304324e6ef3aaf4413cfab0f3d Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <brgl@bgdev.pl>
Date: Wed, 31 May 2017 18:06:56 +0200
Subject: irq/generic-chip: Provide irq_free_generic_chip()

Currently there's no way for users of irq_alloc_generic_chip() to free
the allocated memory other than calling kfree() manually on the
returned pointer. This may lead to errors if the internals of
irq_alloc_generic_chip() ever change. Provide a routine to free the
generic chip.

Signed-off-by: Bartosz Golaszewski <brgl@bgdev.pl>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Cc: linux-doc@vger.kernel.org
Cc: Jonathan Corbet <corbet@lwn.net>
Link: http://lkml.kernel.org/r/1496246820-13250-2-git-send-email-brgl@bgdev.pl
---
 include/linux/irq.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 94d1ad6ffdd4..2c957fe5d9d7 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -22,6 +22,7 @@
 #include <linux/topology.h>
 #include <linux/wait.h>
 #include <linux/io.h>
+#include <linux/slab.h>
 
 #include <asm/irq.h>
 #include <asm/ptrace.h>
@@ -973,6 +974,11 @@ int __irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip,
 					 handler, clr, set, flags);	\
 })
 
+static inline void irq_free_generic_chip(struct irq_chip_generic *gc)
+{
+	kfree(gc);
+}
+
 static inline struct irq_chip_type *irq_data_get_chip_type(struct irq_data *d)
 {
 	return container_of(d->chip, struct irq_chip_type, chip);
-- 
cgit v1.2.3


From 32bb6cbb3b4ea5ca24e3fa13e11772c192616e04 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <brgl@bgdev.pl>
Date: Wed, 31 May 2017 18:06:57 +0200
Subject: irq/generic-chip: Provide irq_destroy_generic_chip()

Most users of irq_alloc_generic_chip() call irq_setup_generic_chip()
too. To simplify the cleanup provide a function that both removes a
generic chip and frees its memory.

Signed-off-by: Bartosz Golaszewski <brgl@bgdev.pl>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Cc: linux-doc@vger.kernel.org
Cc: Jonathan Corbet <corbet@lwn.net>
Link: http://lkml.kernel.org/r/1496246820-13250-3-git-send-email-brgl@bgdev.pl
---
 include/linux/irq.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 2c957fe5d9d7..dc63aa10ce70 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -979,6 +979,14 @@ static inline void irq_free_generic_chip(struct irq_chip_generic *gc)
 	kfree(gc);
 }
 
+static inline void irq_destroy_generic_chip(struct irq_chip_generic *gc,
+					    u32 msk, unsigned int clr,
+					    unsigned int set)
+{
+	irq_remove_generic_chip(gc, msk, clr, set);
+	irq_free_generic_chip(gc);
+}
+
 static inline struct irq_chip_type *irq_data_get_chip_type(struct irq_data *d)
 {
 	return container_of(d->chip, struct irq_chip_type, chip);
-- 
cgit v1.2.3


From 1c3e36309fe2e94b8a889fa32cb5c871434f8ed6 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <brgl@bgdev.pl>
Date: Wed, 31 May 2017 18:06:59 +0200
Subject: irq/generic-chip: Provide devm_irq_alloc_generic_chip()

Provide a resource managed variant of irq_alloc_generic_chip().

Signed-off-by: Bartosz Golaszewski <brgl@bgdev.pl>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Cc: linux-doc@vger.kernel.org
Cc: Jonathan Corbet <corbet@lwn.net>
Link: http://lkml.kernel.org/r/1496246820-13250-5-git-send-email-brgl@bgdev.pl
---
 Documentation/driver-model/devres.txt |  1 +
 include/linux/irq.h                   |  5 +++++
 kernel/irq/devres.c                   | 34 ++++++++++++++++++++++++++++++++++
 3 files changed, 40 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt
index e72587fe477d..d473be8c8781 100644
--- a/Documentation/driver-model/devres.txt
+++ b/Documentation/driver-model/devres.txt
@@ -311,6 +311,7 @@ IRQ
   devm_irq_alloc_desc_at()
   devm_irq_alloc_desc_from()
   devm_irq_alloc_descs_from()
+  devm_irq_alloc_generic_chip()
 
 LED
   devm_led_classdev_register()
diff --git a/include/linux/irq.h b/include/linux/irq.h
index dc63aa10ce70..64ae54673e08 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -958,6 +958,11 @@ int irq_setup_alt_chip(struct irq_data *d, unsigned int type);
 void irq_remove_generic_chip(struct irq_chip_generic *gc, u32 msk,
 			     unsigned int clr, unsigned int set);
 
+struct irq_chip_generic *
+devm_irq_alloc_generic_chip(struct device *dev, const char *name, int num_ct,
+			    unsigned int irq_base, void __iomem *reg_base,
+			    irq_flow_handler_t handler);
+
 struct irq_chip_generic *irq_get_domain_generic_chip(struct irq_domain *d, unsigned int hw_irq);
 
 int __irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip,
diff --git a/kernel/irq/devres.c b/kernel/irq/devres.c
index 1613bfd48365..21ee0aebccfb 100644
--- a/kernel/irq/devres.c
+++ b/kernel/irq/devres.c
@@ -4,6 +4,8 @@
 #include <linux/gfp.h>
 #include <linux/irq.h>
 
+#include "internals.h"
+
 /*
  * Device resource management aware IRQ request/free implementation.
  */
@@ -198,3 +200,35 @@ int __devm_irq_alloc_descs(struct device *dev, int irq, unsigned int from,
 	return base;
 }
 EXPORT_SYMBOL_GPL(__devm_irq_alloc_descs);
+
+#ifdef CONFIG_GENERIC_IRQ_CHIP
+/**
+ * devm_irq_alloc_generic_chip - Allocate and initialize a generic chip
+ *                               for a managed device
+ * @dev:	Device to allocate the generic chip for
+ * @name:	Name of the irq chip
+ * @num_ct:	Number of irq_chip_type instances associated with this
+ * @irq_base:	Interrupt base nr for this chip
+ * @reg_base:	Register base address (virtual)
+ * @handler:	Default flow handler associated with this chip
+ *
+ * Returns an initialized irq_chip_generic structure. The chip defaults
+ * to the primary (index 0) irq_chip_type and @handler
+ */
+struct irq_chip_generic *
+devm_irq_alloc_generic_chip(struct device *dev, const char *name, int num_ct,
+			    unsigned int irq_base, void __iomem *reg_base,
+			    irq_flow_handler_t handler)
+{
+	struct irq_chip_generic *gc;
+	unsigned long sz = sizeof(*gc) + num_ct * sizeof(struct irq_chip_type);
+
+	gc = devm_kzalloc(dev, sz, GFP_KERNEL);
+	if (gc)
+		irq_init_generic_chip(gc, name, num_ct,
+				      irq_base, reg_base, handler);
+
+	return gc;
+}
+EXPORT_SYMBOL_GPL(devm_irq_alloc_generic_chip);
+#endif /* CONFIG_GENERIC_IRQ_CHIP */
-- 
cgit v1.2.3


From 30fd8fc5c91973485705f83c7efe9588b8e6f371 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <brgl@bgdev.pl>
Date: Wed, 31 May 2017 18:07:00 +0200
Subject: irq/generic-chip: Provide devm_irq_setup_generic_chip()

Provide a resource managed variant of irq_setup_generic_chip().

Signed-off-by: Bartosz Golaszewski <brgl@bgdev.pl>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Cc: linux-doc@vger.kernel.org
Cc: Jonathan Corbet <corbet@lwn.net>
Link: http://lkml.kernel.org/r/1496246820-13250-6-git-send-email-brgl@bgdev.pl
---
 Documentation/driver-model/devres.txt |  1 +
 include/linux/irq.h                   |  3 ++
 kernel/irq/devres.c                   | 52 +++++++++++++++++++++++++++++++++++
 3 files changed, 56 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt
index d473be8c8781..6a6618f34440 100644
--- a/Documentation/driver-model/devres.txt
+++ b/Documentation/driver-model/devres.txt
@@ -312,6 +312,7 @@ IRQ
   devm_irq_alloc_desc_from()
   devm_irq_alloc_descs_from()
   devm_irq_alloc_generic_chip()
+  devm_irq_setup_generic_chip()
 
 LED
   devm_led_classdev_register()
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 64ae54673e08..d996314b6522 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -962,6 +962,9 @@ struct irq_chip_generic *
 devm_irq_alloc_generic_chip(struct device *dev, const char *name, int num_ct,
 			    unsigned int irq_base, void __iomem *reg_base,
 			    irq_flow_handler_t handler);
+int devm_irq_setup_generic_chip(struct device *dev, struct irq_chip_generic *gc,
+				u32 msk, enum irq_gc_flags flags,
+				unsigned int clr, unsigned int set);
 
 struct irq_chip_generic *irq_get_domain_generic_chip(struct irq_domain *d, unsigned int hw_irq);
 
diff --git a/kernel/irq/devres.c b/kernel/irq/devres.c
index 21ee0aebccfb..194c506d9d20 100644
--- a/kernel/irq/devres.c
+++ b/kernel/irq/devres.c
@@ -231,4 +231,56 @@ devm_irq_alloc_generic_chip(struct device *dev, const char *name, int num_ct,
 	return gc;
 }
 EXPORT_SYMBOL_GPL(devm_irq_alloc_generic_chip);
+
+struct irq_generic_chip_devres {
+	struct irq_chip_generic *gc;
+	u32 msk;
+	unsigned int clr;
+	unsigned int set;
+};
+
+static void devm_irq_remove_generic_chip(struct device *dev, void *res)
+{
+	struct irq_generic_chip_devres *this = res;
+
+	irq_remove_generic_chip(this->gc, this->msk, this->clr, this->set);
+}
+
+/**
+ * devm_irq_setup_generic_chip - Setup a range of interrupts with a generic
+ *                               chip for a managed device
+ *
+ * @dev:	Device to setup the generic chip for
+ * @gc:		Generic irq chip holding all data
+ * @msk:	Bitmask holding the irqs to initialize relative to gc->irq_base
+ * @flags:	Flags for initialization
+ * @clr:	IRQ_* bits to clear
+ * @set:	IRQ_* bits to set
+ *
+ * Set up max. 32 interrupts starting from gc->irq_base. Note, this
+ * initializes all interrupts to the primary irq_chip_type and its
+ * associated handler.
+ */
+int devm_irq_setup_generic_chip(struct device *dev, struct irq_chip_generic *gc,
+				u32 msk, enum irq_gc_flags flags,
+				unsigned int clr, unsigned int set)
+{
+	struct irq_generic_chip_devres *dr;
+
+	dr = devres_alloc(devm_irq_remove_generic_chip,
+			  sizeof(*dr), GFP_KERNEL);
+	if (!dr)
+		return -ENOMEM;
+
+	irq_setup_generic_chip(gc, msk, flags, clr, set);
+
+	dr->gc = gc;
+	dr->msk = msk;
+	dr->clr = clr;
+	dr->set = set;
+	devres_add(dev, dr);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(devm_irq_setup_generic_chip);
 #endif /* CONFIG_GENERIC_IRQ_CHIP */
-- 
cgit v1.2.3


From da2e9cf03b8fccbb69dd1e215bb1e554ce8e8cbe Mon Sep 17 00:00:00 2001
From: Chad Dupuis <chad.dupuis@cavium.com>
Date: Wed, 21 Jun 2017 08:26:34 +0300
Subject: qede: Fix compilation without QED_RDMA
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When CONFIG_QED_RDMA isn't defined, we'd hit the following:

 /include/linux/qed/qede_rdma.h:84:19:
 warning: ‘qede_rdma_dev_add’ used but never defined [enabled by default]
 static inline int qede_rdma_dev_add(struct qede_dev *dev);

Fixes: bbfcd1e8e167 ("qed*: Set rdma generic functions prefix")
Signed-off-by: Chad Dupuis <chad.dupuis@cavium.com>
Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/qed/qede_rdma.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/qed/qede_rdma.h b/include/linux/qed/qede_rdma.h
index 1348a16e5e4b..9904617a9730 100644
--- a/include/linux/qed/qede_rdma.h
+++ b/include/linux/qed/qede_rdma.h
@@ -81,7 +81,7 @@ void qede_rdma_dev_remove(struct qede_dev *dev);
 void qede_rdma_event_changeaddr(struct qede_dev *edr);
 
 #else
-static inline int qede_rdma_dev_add(struct qede_dev *dev);
+static inline int qede_rdma_dev_add(struct qede_dev *dev)
 {
 	return 0;
 }
-- 
cgit v1.2.3


From 8e8320c9315c47a6a090188720ccff32a6a6ba18 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Tue, 20 Jun 2017 17:56:13 -0600
Subject: blk-mq: fix performance regression with shared tags

If we have shared tags enabled, then every IO completion will trigger
a full loop of every queue belonging to a tag set, and every hardware
queue for each of those queues, even if nothing needs to be done.
This causes a massive performance regression if you have a lot of
shared devices.

Instead of doing this huge full scan on every IO, add an atomic
counter to the main queue that tracks how many hardware queues have
been marked as needing a restart. With that, we can avoid looking for
restartable queues, if we don't have to.

Max reports that this restores performance. Before this patch, 4K
IOPS was limited to 22-23K IOPS. With the patch, we are running at
950-970K IOPS.

Fixes: 6d8c6c0f97ad ("blk-mq: Restart a single queue if tag sets are shared")
Reported-by: Max Gurtovoy <maxg@mellanox.com>
Tested-by: Max Gurtovoy <maxg@mellanox.com>
Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com>
Tested-by: Bart Van Assche <bart.vanassche@wdc.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-sched.c   | 58 +++++++++++++++++++++++++++++++++++++++-----------
 block/blk-mq-sched.h   |  9 --------
 block/blk-mq.c         | 16 +++++++++++---
 include/linux/blkdev.h |  2 ++
 4 files changed, 61 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 1f5b692526ae..0ded5e846335 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -68,6 +68,45 @@ static void blk_mq_sched_assign_ioc(struct request_queue *q,
 		__blk_mq_sched_assign_ioc(q, rq, bio, ioc);
 }
 
+/*
+ * Mark a hardware queue as needing a restart. For shared queues, maintain
+ * a count of how many hardware queues are marked for restart.
+ */
+static void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
+{
+	if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
+		return;
+
+	if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
+		struct request_queue *q = hctx->queue;
+
+		if (!test_and_set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
+			atomic_inc(&q->shared_hctx_restart);
+	} else
+		set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
+}
+
+static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
+{
+	if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
+		return false;
+
+	if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
+		struct request_queue *q = hctx->queue;
+
+		if (test_and_clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
+			atomic_dec(&q->shared_hctx_restart);
+	} else
+		clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
+
+	if (blk_mq_hctx_has_pending(hctx)) {
+		blk_mq_run_hw_queue(hctx, true);
+		return true;
+	}
+
+	return false;
+}
+
 struct request *blk_mq_sched_get_request(struct request_queue *q,
 					 struct bio *bio,
 					 unsigned int op,
@@ -266,18 +305,6 @@ static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
 	return true;
 }
 
-static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
-{
-	if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) {
-		clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
-		if (blk_mq_hctx_has_pending(hctx)) {
-			blk_mq_run_hw_queue(hctx, true);
-			return true;
-		}
-	}
-	return false;
-}
-
 /**
  * list_for_each_entry_rcu_rr - iterate in a round-robin fashion over rcu list
  * @pos:    loop cursor.
@@ -309,6 +336,13 @@ void blk_mq_sched_restart(struct blk_mq_hw_ctx *const hctx)
 	unsigned int i, j;
 
 	if (set->flags & BLK_MQ_F_TAG_SHARED) {
+		/*
+		 * If this is 0, then we know that no hardware queues
+		 * have RESTART marked. We're done.
+		 */
+		if (!atomic_read(&queue->shared_hctx_restart))
+			return;
+
 		rcu_read_lock();
 		list_for_each_entry_rcu_rr(q, queue, &set->tag_list,
 					   tag_set_list) {
diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h
index edafb5383b7b..5007edece51a 100644
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@@ -115,15 +115,6 @@ static inline bool blk_mq_sched_has_work(struct blk_mq_hw_ctx *hctx)
 	return false;
 }
 
-/*
- * Mark a hardware queue as needing a restart.
- */
-static inline void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
-{
-	if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
-		set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
-}
-
 static inline bool blk_mq_sched_needs_restart(struct blk_mq_hw_ctx *hctx)
 {
 	return test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index bb66c96850b1..958cedaff8b8 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2103,20 +2103,30 @@ static void blk_mq_map_swqueue(struct request_queue *q,
 	}
 }
 
+/*
+ * Caller needs to ensure that we're either frozen/quiesced, or that
+ * the queue isn't live yet.
+ */
 static void queue_set_hctx_shared(struct request_queue *q, bool shared)
 {
 	struct blk_mq_hw_ctx *hctx;
 	int i;
 
 	queue_for_each_hw_ctx(q, hctx, i) {
-		if (shared)
+		if (shared) {
+			if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
+				atomic_inc(&q->shared_hctx_restart);
 			hctx->flags |= BLK_MQ_F_TAG_SHARED;
-		else
+		} else {
+			if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
+				atomic_dec(&q->shared_hctx_restart);
 			hctx->flags &= ~BLK_MQ_F_TAG_SHARED;
+		}
 	}
 }
 
-static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set, bool shared)
+static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set,
+					bool shared)
 {
 	struct request_queue *q;
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index b74a3edcb3da..1ddd36bd2173 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -391,6 +391,8 @@ struct request_queue {
 	int			nr_rqs[2];	/* # allocated [a]sync rqs */
 	int			nr_rqs_elvpriv;	/* # allocated rqs w/ elvpriv */
 
+	atomic_t		shared_hctx_restart;
+
 	struct blk_queue_stats	*stats;
 	struct rq_wb		*rq_wb;
 
-- 
cgit v1.2.3


From d0ba52f1d7649a3f088e410e860559cf36d479d0 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Wed, 21 Jun 2017 13:39:13 -0400
Subject: ftrace: Add missing comment for FTRACE_OPS_FL_RCU

All the enum flags for FTRACE_OPS has a comment except for the RCU one. Add
the comment for that.

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/ftrace.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 473f088aabea..1b6992e994e6 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -119,6 +119,7 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops);
  *            for any of the functions that this ops will be registered for, then
  *            this ops will fail to register or set_filter_ip.
  * PID     - Is affected by set_ftrace_pid (allows filtering on those pids)
+ * RCU     - Set when the ops can only be called when RCU is watching.
  */
 enum {
 	FTRACE_OPS_FL_ENABLED			= 1 << 0,
-- 
cgit v1.2.3


From 852ec80983d682dc08a0573d37eeaa9814c4f6b1 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Wed, 21 Jun 2017 10:55:47 -0700
Subject: blk-mq: Make it safe to quiesce and unquiesce from an interrupt
 handler

Since blk_mq_quiesce_queue_nowait() can be called from interrupt
context, make this safe. Since this function is not in the hot
path, uninline it.

Fixes: commit f4560ffe8cec ("blk-mq: use QUEUE_FLAG_QUIESCED to quiesce queue")
Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c         | 20 ++++++++++++++++++--
 include/linux/blk-mq.h | 10 +---------
 2 files changed, 19 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 1c4f1f4978c6..2caac30e128a 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -153,6 +153,20 @@ void blk_mq_unfreeze_queue(struct request_queue *q)
 }
 EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
 
+/*
+ * FIXME: replace the scsi_internal_device_*block_nowait() calls in the
+ * mpt3sas driver such that this function can be removed.
+ */
+void blk_mq_quiesce_queue_nowait(struct request_queue *q)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(q->queue_lock, flags);
+	queue_flag_set(QUEUE_FLAG_QUIESCED, q);
+	spin_unlock_irqrestore(q->queue_lock, flags);
+}
+EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue_nowait);
+
 /**
  * blk_mq_quiesce_queue() - wait until all ongoing dispatches have finished
  * @q: request queue.
@@ -190,9 +204,11 @@ EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue);
  */
 void blk_mq_unquiesce_queue(struct request_queue *q)
 {
-	spin_lock_irq(q->queue_lock);
+	unsigned long flags;
+
+	spin_lock_irqsave(q->queue_lock, flags);
 	queue_flag_clear(QUEUE_FLAG_QUIESCED, q);
-	spin_unlock_irq(q->queue_lock);
+	spin_unlock_irqrestore(q->queue_lock, flags);
 
 	/* dispatch requests which are inserted during quiescing */
 	blk_mq_run_hw_queues(q, true);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 366b83cee955..23d32ff0b462 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -266,15 +266,7 @@ int blk_mq_reinit_tagset(struct blk_mq_tag_set *set);
 int blk_mq_map_queues(struct blk_mq_tag_set *set);
 void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues);
 
-/*
- * FIXME: this helper is just for working around mpt3sas.
- */
-static inline void blk_mq_quiesce_queue_nowait(struct request_queue *q)
-{
-	spin_lock_irq(q->queue_lock);
-	queue_flag_set(QUEUE_FLAG_QUIESCED, q);
-	spin_unlock_irq(q->queue_lock);
-}
+void blk_mq_quiesce_queue_nowait(struct request_queue *q);
 
 /*
  * Driver command data is immediately after the request. So subtract request
-- 
cgit v1.2.3


From 7003cdd6a121e7bdb8a05eb1931f9549a36ea723 Mon Sep 17 00:00:00 2001
From: "Kalderon, Michal" <Michal.Kalderon@cavium.com>
Date: Wed, 21 Jun 2017 16:22:46 +0300
Subject: qed*: Rename qed_roce_if.h to qed_rdma_if.h

Rename the qed_roce_if file to qed_rdma_if as it
represents a common interface for RoCE and iWARP.

this commit affects RDMA/qedr as well.

Signed-off-by: Michal Kalderon <Michal.Kalderon@cavium.com>
Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/hw/qedr/qedr.h          |   2 +-
 drivers/infiniband/hw/qedr/qedr_cm.c       |   2 +-
 drivers/net/ethernet/qlogic/qed/qed_rdma.c |   2 +-
 drivers/net/ethernet/qlogic/qed/qed_rdma.h |   2 +-
 drivers/net/ethernet/qlogic/qed/qed_roce.c |   2 +-
 include/linux/qed/qed_rdma_if.h            | 582 +++++++++++++++++++++++++++++
 include/linux/qed/qed_roce_if.h            | 582 -----------------------------
 7 files changed, 587 insertions(+), 587 deletions(-)
 create mode 100644 include/linux/qed/qed_rdma_if.h
 delete mode 100644 include/linux/qed/qed_roce_if.h

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h
index 2376019a48ae..15365d5a0e19 100644
--- a/drivers/infiniband/hw/qedr/qedr.h
+++ b/drivers/infiniband/hw/qedr/qedr.h
@@ -36,7 +36,7 @@
 #include <rdma/ib_addr.h>
 #include <linux/qed/qed_if.h>
 #include <linux/qed/qed_chain.h>
-#include <linux/qed/qed_roce_if.h>
+#include <linux/qed/qed_rdma_if.h>
 #include <linux/qed/qede_rdma.h>
 #include <linux/qed/roce_common.h>
 #include "qedr_hsi_rdma.h"
diff --git a/drivers/infiniband/hw/qedr/qedr_cm.c b/drivers/infiniband/hw/qedr/qedr_cm.c
index eb3dce72fc21..4689e802b332 100644
--- a/drivers/infiniband/hw/qedr/qedr_cm.c
+++ b/drivers/infiniband/hw/qedr/qedr_cm.c
@@ -44,7 +44,7 @@
 #include <rdma/ib_cache.h>
 
 #include <linux/qed/qed_if.h>
-#include <linux/qed/qed_roce_if.h>
+#include <linux/qed/qed_rdma_if.h>
 #include "qedr.h"
 #include "verbs.h"
 #include <rdma/qedr-abi.h>
diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
index cb620e008444..df76e212f86e 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
@@ -53,7 +53,7 @@
 #include "qed_ll2.h"
 #include "qed_mcp.h"
 #include "qed_reg_addr.h"
-#include <linux/qed/qed_roce_if.h>
+#include <linux/qed/qed_rdma_if.h>
 #include "qed_rdma.h"
 #include "qed_roce.h"
 #include "qed_sp.h"
diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.h b/drivers/net/ethernet/qlogic/qed/qed_rdma.h
index 1836ff1810b4..d91e5c4069a6 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_rdma.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.h
@@ -38,7 +38,7 @@
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/qed/qed_if.h>
-#include <linux/qed/qed_roce_if.h>
+#include <linux/qed/qed_rdma_if.h>
 #include "qed.h"
 #include "qed_dev_api.h"
 #include "qed_hsi.h"
diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c
index a8426936f837..e53adc3d009b 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_roce.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c
@@ -53,7 +53,7 @@
 #include "qed_ll2.h"
 #include "qed_mcp.h"
 #include "qed_reg_addr.h"
-#include <linux/qed/qed_roce_if.h>
+#include <linux/qed/qed_rdma_if.h>
 #include "qed_rdma.h"
 #include "qed_roce.h"
 #include "qed_sp.h"
diff --git a/include/linux/qed/qed_rdma_if.h b/include/linux/qed/qed_rdma_if.h
new file mode 100644
index 000000000000..ff9be01b5f53
--- /dev/null
+++ b/include/linux/qed/qed_rdma_if.h
@@ -0,0 +1,582 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015-2017  QLogic Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and /or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _QED_RDMA_IF_H
+#define _QED_RDMA_IF_H
+#include <linux/types.h>
+#include <linux/delay.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/qed/qed_if.h>
+#include <linux/qed/qed_ll2_if.h>
+#include <linux/qed/rdma_common.h>
+
+enum qed_roce_ll2_tx_dest {
+	/* Light L2 TX Destination to the Network */
+	QED_ROCE_LL2_TX_DEST_NW,
+
+	/* Light L2 TX Destination to the Loopback */
+	QED_ROCE_LL2_TX_DEST_LB,
+	QED_ROCE_LL2_TX_DEST_MAX
+};
+
+#define QED_RDMA_MAX_CNQ_SIZE               (0xFFFF)
+
+/* rdma interface */
+
+enum qed_roce_qp_state {
+	QED_ROCE_QP_STATE_RESET,
+	QED_ROCE_QP_STATE_INIT,
+	QED_ROCE_QP_STATE_RTR,
+	QED_ROCE_QP_STATE_RTS,
+	QED_ROCE_QP_STATE_SQD,
+	QED_ROCE_QP_STATE_ERR,
+	QED_ROCE_QP_STATE_SQE
+};
+
+enum qed_rdma_tid_type {
+	QED_RDMA_TID_REGISTERED_MR,
+	QED_RDMA_TID_FMR,
+	QED_RDMA_TID_MW_TYPE1,
+	QED_RDMA_TID_MW_TYPE2A
+};
+
+struct qed_rdma_events {
+	void *context;
+	void (*affiliated_event)(void *context, u8 fw_event_code,
+				 void *fw_handle);
+	void (*unaffiliated_event)(void *context, u8 event_code);
+};
+
+struct qed_rdma_device {
+	u32 vendor_id;
+	u32 vendor_part_id;
+	u32 hw_ver;
+	u64 fw_ver;
+
+	u64 node_guid;
+	u64 sys_image_guid;
+
+	u8 max_cnq;
+	u8 max_sge;
+	u8 max_srq_sge;
+	u16 max_inline;
+	u32 max_wqe;
+	u32 max_srq_wqe;
+	u8 max_qp_resp_rd_atomic_resc;
+	u8 max_qp_req_rd_atomic_resc;
+	u64 max_dev_resp_rd_atomic_resc;
+	u32 max_cq;
+	u32 max_qp;
+	u32 max_srq;
+	u32 max_mr;
+	u64 max_mr_size;
+	u32 max_cqe;
+	u32 max_mw;
+	u32 max_fmr;
+	u32 max_mr_mw_fmr_pbl;
+	u64 max_mr_mw_fmr_size;
+	u32 max_pd;
+	u32 max_ah;
+	u8 max_pkey;
+	u16 max_srq_wr;
+	u8 max_stats_queues;
+	u32 dev_caps;
+
+	/* Abilty to support RNR-NAK generation */
+
+#define QED_RDMA_DEV_CAP_RNR_NAK_MASK                           0x1
+#define QED_RDMA_DEV_CAP_RNR_NAK_SHIFT                  0
+	/* Abilty to support shutdown port */
+#define QED_RDMA_DEV_CAP_SHUTDOWN_PORT_MASK                     0x1
+#define QED_RDMA_DEV_CAP_SHUTDOWN_PORT_SHIFT                    1
+	/* Abilty to support port active event */
+#define QED_RDMA_DEV_CAP_PORT_ACTIVE_EVENT_MASK         0x1
+#define QED_RDMA_DEV_CAP_PORT_ACTIVE_EVENT_SHIFT                2
+	/* Abilty to support port change event */
+#define QED_RDMA_DEV_CAP_PORT_CHANGE_EVENT_MASK         0x1
+#define QED_RDMA_DEV_CAP_PORT_CHANGE_EVENT_SHIFT                3
+	/* Abilty to support system image GUID */
+#define QED_RDMA_DEV_CAP_SYS_IMAGE_MASK                 0x1
+#define QED_RDMA_DEV_CAP_SYS_IMAGE_SHIFT                        4
+	/* Abilty to support bad P_Key counter support */
+#define QED_RDMA_DEV_CAP_BAD_PKEY_CNT_MASK                      0x1
+#define QED_RDMA_DEV_CAP_BAD_PKEY_CNT_SHIFT                     5
+	/* Abilty to support atomic operations */
+#define QED_RDMA_DEV_CAP_ATOMIC_OP_MASK                 0x1
+#define QED_RDMA_DEV_CAP_ATOMIC_OP_SHIFT                        6
+#define QED_RDMA_DEV_CAP_RESIZE_CQ_MASK                 0x1
+#define QED_RDMA_DEV_CAP_RESIZE_CQ_SHIFT                        7
+	/* Abilty to support modifying the maximum number of
+	 * outstanding work requests per QP
+	 */
+#define QED_RDMA_DEV_CAP_RESIZE_MAX_WR_MASK                     0x1
+#define QED_RDMA_DEV_CAP_RESIZE_MAX_WR_SHIFT                    8
+	/* Abilty to support automatic path migration */
+#define QED_RDMA_DEV_CAP_AUTO_PATH_MIG_MASK                     0x1
+#define QED_RDMA_DEV_CAP_AUTO_PATH_MIG_SHIFT                    9
+	/* Abilty to support the base memory management extensions */
+#define QED_RDMA_DEV_CAP_BASE_MEMORY_EXT_MASK                   0x1
+#define QED_RDMA_DEV_CAP_BASE_MEMORY_EXT_SHIFT          10
+#define QED_RDMA_DEV_CAP_BASE_QUEUE_EXT_MASK                    0x1
+#define QED_RDMA_DEV_CAP_BASE_QUEUE_EXT_SHIFT                   11
+	/* Abilty to support multipile page sizes per memory region */
+#define QED_RDMA_DEV_CAP_MULTI_PAGE_PER_MR_EXT_MASK             0x1
+#define QED_RDMA_DEV_CAP_MULTI_PAGE_PER_MR_EXT_SHIFT            12
+	/* Abilty to support block list physical buffer list */
+#define QED_RDMA_DEV_CAP_BLOCK_MODE_MASK                        0x1
+#define QED_RDMA_DEV_CAP_BLOCK_MODE_SHIFT                       13
+	/* Abilty to support zero based virtual addresses */
+#define QED_RDMA_DEV_CAP_ZBVA_MASK                              0x1
+#define QED_RDMA_DEV_CAP_ZBVA_SHIFT                             14
+	/* Abilty to support local invalidate fencing */
+#define QED_RDMA_DEV_CAP_LOCAL_INV_FENCE_MASK                   0x1
+#define QED_RDMA_DEV_CAP_LOCAL_INV_FENCE_SHIFT          15
+	/* Abilty to support Loopback on QP */
+#define QED_RDMA_DEV_CAP_LB_INDICATOR_MASK                      0x1
+#define QED_RDMA_DEV_CAP_LB_INDICATOR_SHIFT                     16
+	u64 page_size_caps;
+	u8 dev_ack_delay;
+	u32 reserved_lkey;
+	u32 bad_pkey_counter;
+	struct qed_rdma_events events;
+};
+
+enum qed_port_state {
+	QED_RDMA_PORT_UP,
+	QED_RDMA_PORT_DOWN,
+};
+
+enum qed_roce_capability {
+	QED_ROCE_V1 = 1 << 0,
+	QED_ROCE_V2 = 1 << 1,
+};
+
+struct qed_rdma_port {
+	enum qed_port_state port_state;
+	int link_speed;
+	u64 max_msg_size;
+	u8 source_gid_table_len;
+	void *source_gid_table_ptr;
+	u8 pkey_table_len;
+	void *pkey_table_ptr;
+	u32 pkey_bad_counter;
+	enum qed_roce_capability capability;
+};
+
+struct qed_rdma_cnq_params {
+	u8 num_pbl_pages;
+	u64 pbl_ptr;
+};
+
+/* The CQ Mode affects the CQ doorbell transaction size.
+ * 64/32 bit machines should configure to 32/16 bits respectively.
+ */
+enum qed_rdma_cq_mode {
+	QED_RDMA_CQ_MODE_16_BITS,
+	QED_RDMA_CQ_MODE_32_BITS,
+};
+
+struct qed_roce_dcqcn_params {
+	u8 notification_point;
+	u8 reaction_point;
+
+	/* fields for notification point */
+	u32 cnp_send_timeout;
+
+	/* fields for reaction point */
+	u32 rl_bc_rate;
+	u16 rl_max_rate;
+	u16 rl_r_ai;
+	u16 rl_r_hai;
+	u16 dcqcn_g;
+	u32 dcqcn_k_us;
+	u32 dcqcn_timeout_us;
+};
+
+struct qed_rdma_start_in_params {
+	struct qed_rdma_events *events;
+	struct qed_rdma_cnq_params cnq_pbl_list[128];
+	u8 desired_cnq;
+	enum qed_rdma_cq_mode cq_mode;
+	struct qed_roce_dcqcn_params dcqcn_params;
+	u16 max_mtu;
+	u8 mac_addr[ETH_ALEN];
+	u8 iwarp_flags;
+};
+
+struct qed_rdma_add_user_out_params {
+	u16 dpi;
+	u64 dpi_addr;
+	u64 dpi_phys_addr;
+	u32 dpi_size;
+	u16 wid_count;
+};
+
+enum roce_mode {
+	ROCE_V1,
+	ROCE_V2_IPV4,
+	ROCE_V2_IPV6,
+	MAX_ROCE_MODE
+};
+
+union qed_gid {
+	u8 bytes[16];
+	u16 words[8];
+	u32 dwords[4];
+	u64 qwords[2];
+	u32 ipv4_addr;
+};
+
+struct qed_rdma_register_tid_in_params {
+	u32 itid;
+	enum qed_rdma_tid_type tid_type;
+	u8 key;
+	u16 pd;
+	bool local_read;
+	bool local_write;
+	bool remote_read;
+	bool remote_write;
+	bool remote_atomic;
+	bool mw_bind;
+	u64 pbl_ptr;
+	bool pbl_two_level;
+	u8 pbl_page_size_log;
+	u8 page_size_log;
+	u32 fbo;
+	u64 length;
+	u64 vaddr;
+	bool zbva;
+	bool phy_mr;
+	bool dma_mr;
+
+	bool dif_enabled;
+	u64 dif_error_addr;
+	u64 dif_runt_addr;
+};
+
+struct qed_rdma_create_cq_in_params {
+	u32 cq_handle_lo;
+	u32 cq_handle_hi;
+	u32 cq_size;
+	u16 dpi;
+	bool pbl_two_level;
+	u64 pbl_ptr;
+	u16 pbl_num_pages;
+	u8 pbl_page_size_log;
+	u8 cnq_id;
+	u16 int_timeout;
+};
+
+struct qed_rdma_create_srq_in_params {
+	u64 pbl_base_addr;
+	u64 prod_pair_addr;
+	u16 num_pages;
+	u16 pd_id;
+	u16 page_size;
+};
+
+struct qed_rdma_destroy_cq_in_params {
+	u16 icid;
+};
+
+struct qed_rdma_destroy_cq_out_params {
+	u16 num_cq_notif;
+};
+
+struct qed_rdma_create_qp_in_params {
+	u32 qp_handle_lo;
+	u32 qp_handle_hi;
+	u32 qp_handle_async_lo;
+	u32 qp_handle_async_hi;
+	bool use_srq;
+	bool signal_all;
+	bool fmr_and_reserved_lkey;
+	u16 pd;
+	u16 dpi;
+	u16 sq_cq_id;
+	u16 sq_num_pages;
+	u64 sq_pbl_ptr;
+	u8 max_sq_sges;
+	u16 rq_cq_id;
+	u16 rq_num_pages;
+	u64 rq_pbl_ptr;
+	u16 srq_id;
+	u8 stats_queue;
+};
+
+struct qed_rdma_create_qp_out_params {
+	u32 qp_id;
+	u16 icid;
+	void *rq_pbl_virt;
+	dma_addr_t rq_pbl_phys;
+	void *sq_pbl_virt;
+	dma_addr_t sq_pbl_phys;
+};
+
+struct qed_rdma_modify_qp_in_params {
+	u32 modify_flags;
+#define QED_RDMA_MODIFY_QP_VALID_NEW_STATE_MASK               0x1
+#define QED_RDMA_MODIFY_QP_VALID_NEW_STATE_SHIFT              0
+#define QED_ROCE_MODIFY_QP_VALID_PKEY_MASK                    0x1
+#define QED_ROCE_MODIFY_QP_VALID_PKEY_SHIFT                   1
+#define QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN_MASK             0x1
+#define QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN_SHIFT            2
+#define QED_ROCE_MODIFY_QP_VALID_DEST_QP_MASK                 0x1
+#define QED_ROCE_MODIFY_QP_VALID_DEST_QP_SHIFT                3
+#define QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR_MASK          0x1
+#define QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR_SHIFT         4
+#define QED_ROCE_MODIFY_QP_VALID_RQ_PSN_MASK                  0x1
+#define QED_ROCE_MODIFY_QP_VALID_RQ_PSN_SHIFT                 5
+#define QED_ROCE_MODIFY_QP_VALID_SQ_PSN_MASK                  0x1
+#define QED_ROCE_MODIFY_QP_VALID_SQ_PSN_SHIFT                 6
+#define QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ_MASK       0x1
+#define QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ_SHIFT      7
+#define QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP_MASK      0x1
+#define QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP_SHIFT     8
+#define QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT_MASK             0x1
+#define QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT_SHIFT            9
+#define QED_ROCE_MODIFY_QP_VALID_RETRY_CNT_MASK               0x1
+#define QED_ROCE_MODIFY_QP_VALID_RETRY_CNT_SHIFT              10
+#define QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT_MASK           0x1
+#define QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT_SHIFT          11
+#define QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER_MASK       0x1
+#define QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER_SHIFT      12
+#define QED_ROCE_MODIFY_QP_VALID_E2E_FLOW_CONTROL_EN_MASK     0x1
+#define QED_ROCE_MODIFY_QP_VALID_E2E_FLOW_CONTROL_EN_SHIFT    13
+#define QED_ROCE_MODIFY_QP_VALID_ROCE_MODE_MASK               0x1
+#define QED_ROCE_MODIFY_QP_VALID_ROCE_MODE_SHIFT              14
+
+	enum qed_roce_qp_state new_state;
+	u16 pkey;
+	bool incoming_rdma_read_en;
+	bool incoming_rdma_write_en;
+	bool incoming_atomic_en;
+	bool e2e_flow_control_en;
+	u32 dest_qp;
+	bool lb_indication;
+	u16 mtu;
+	u8 traffic_class_tos;
+	u8 hop_limit_ttl;
+	u32 flow_label;
+	union qed_gid sgid;
+	union qed_gid dgid;
+	u16 udp_src_port;
+
+	u16 vlan_id;
+
+	u32 rq_psn;
+	u32 sq_psn;
+	u8 max_rd_atomic_resp;
+	u8 max_rd_atomic_req;
+	u32 ack_timeout;
+	u8 retry_cnt;
+	u8 rnr_retry_cnt;
+	u8 min_rnr_nak_timer;
+	bool sqd_async;
+	u8 remote_mac_addr[6];
+	u8 local_mac_addr[6];
+	bool use_local_mac;
+	enum roce_mode roce_mode;
+};
+
+struct qed_rdma_query_qp_out_params {
+	enum qed_roce_qp_state state;
+	u32 rq_psn;
+	u32 sq_psn;
+	bool draining;
+	u16 mtu;
+	u32 dest_qp;
+	bool incoming_rdma_read_en;
+	bool incoming_rdma_write_en;
+	bool incoming_atomic_en;
+	bool e2e_flow_control_en;
+	union qed_gid sgid;
+	union qed_gid dgid;
+	u32 flow_label;
+	u8 hop_limit_ttl;
+	u8 traffic_class_tos;
+	u32 timeout;
+	u8 rnr_retry;
+	u8 retry_cnt;
+	u8 min_rnr_nak_timer;
+	u16 pkey_index;
+	u8 max_rd_atomic;
+	u8 max_dest_rd_atomic;
+	bool sqd_async;
+};
+
+struct qed_rdma_create_srq_out_params {
+	u16 srq_id;
+};
+
+struct qed_rdma_destroy_srq_in_params {
+	u16 srq_id;
+};
+
+struct qed_rdma_modify_srq_in_params {
+	u32 wqe_limit;
+	u16 srq_id;
+};
+
+struct qed_rdma_stats_out_params {
+	u64 sent_bytes;
+	u64 sent_pkts;
+	u64 rcv_bytes;
+	u64 rcv_pkts;
+};
+
+struct qed_rdma_counters_out_params {
+	u64 pd_count;
+	u64 max_pd;
+	u64 dpi_count;
+	u64 max_dpi;
+	u64 cq_count;
+	u64 max_cq;
+	u64 qp_count;
+	u64 max_qp;
+	u64 tid_count;
+	u64 max_tid;
+};
+
+#define QED_ROCE_TX_HEAD_FAILURE        (1)
+#define QED_ROCE_TX_FRAG_FAILURE        (2)
+
+struct qed_roce_ll2_header {
+	void *vaddr;
+	dma_addr_t baddr;
+	size_t len;
+};
+
+struct qed_roce_ll2_buffer {
+	dma_addr_t baddr;
+	size_t len;
+};
+
+struct qed_roce_ll2_packet {
+	struct qed_roce_ll2_header header;
+	int n_seg;
+	struct qed_roce_ll2_buffer payload[RDMA_MAX_SGE_PER_SQ_WQE];
+	int roce_mode;
+	enum qed_roce_ll2_tx_dest tx_dest;
+};
+
+enum qed_rdma_type {
+	QED_RDMA_TYPE_ROCE,
+};
+
+struct qed_dev_rdma_info {
+	struct qed_dev_info common;
+	enum qed_rdma_type rdma_type;
+	u8 user_dpm_enabled;
+};
+
+struct qed_rdma_ops {
+	const struct qed_common_ops *common;
+
+	int (*fill_dev_info)(struct qed_dev *cdev,
+			     struct qed_dev_rdma_info *info);
+	void *(*rdma_get_rdma_ctx)(struct qed_dev *cdev);
+
+	int (*rdma_init)(struct qed_dev *dev,
+			 struct qed_rdma_start_in_params *iparams);
+
+	int (*rdma_add_user)(void *rdma_cxt,
+			     struct qed_rdma_add_user_out_params *oparams);
+
+	void (*rdma_remove_user)(void *rdma_cxt, u16 dpi);
+	int (*rdma_stop)(void *rdma_cxt);
+	struct qed_rdma_device* (*rdma_query_device)(void *rdma_cxt);
+	struct qed_rdma_port* (*rdma_query_port)(void *rdma_cxt);
+	int (*rdma_get_start_sb)(struct qed_dev *cdev);
+	int (*rdma_get_min_cnq_msix)(struct qed_dev *cdev);
+	void (*rdma_cnq_prod_update)(void *rdma_cxt, u8 cnq_index, u16 prod);
+	int (*rdma_get_rdma_int)(struct qed_dev *cdev,
+				 struct qed_int_info *info);
+	int (*rdma_set_rdma_int)(struct qed_dev *cdev, u16 cnt);
+	int (*rdma_alloc_pd)(void *rdma_cxt, u16 *pd);
+	void (*rdma_dealloc_pd)(void *rdma_cxt, u16 pd);
+	int (*rdma_create_cq)(void *rdma_cxt,
+			      struct qed_rdma_create_cq_in_params *params,
+			      u16 *icid);
+	int (*rdma_destroy_cq)(void *rdma_cxt,
+			       struct qed_rdma_destroy_cq_in_params *iparams,
+			       struct qed_rdma_destroy_cq_out_params *oparams);
+	struct qed_rdma_qp *
+	(*rdma_create_qp)(void *rdma_cxt,
+			  struct qed_rdma_create_qp_in_params *iparams,
+			  struct qed_rdma_create_qp_out_params *oparams);
+
+	int (*rdma_modify_qp)(void *roce_cxt, struct qed_rdma_qp *qp,
+			      struct qed_rdma_modify_qp_in_params *iparams);
+
+	int (*rdma_query_qp)(void *rdma_cxt, struct qed_rdma_qp *qp,
+			     struct qed_rdma_query_qp_out_params *oparams);
+	int (*rdma_destroy_qp)(void *rdma_cxt, struct qed_rdma_qp *qp);
+
+	int
+	(*rdma_register_tid)(void *rdma_cxt,
+			     struct qed_rdma_register_tid_in_params *iparams);
+
+	int (*rdma_deregister_tid)(void *rdma_cxt, u32 itid);
+	int (*rdma_alloc_tid)(void *rdma_cxt, u32 *itid);
+	void (*rdma_free_tid)(void *rdma_cxt, u32 itid);
+
+	int (*ll2_acquire_connection)(void *rdma_cxt,
+				      struct qed_ll2_acquire_data *data);
+
+	int (*ll2_establish_connection)(void *rdma_cxt, u8 connection_handle);
+	int (*ll2_terminate_connection)(void *rdma_cxt, u8 connection_handle);
+	void (*ll2_release_connection)(void *rdma_cxt, u8 connection_handle);
+
+	int (*ll2_prepare_tx_packet)(void *rdma_cxt,
+				     u8 connection_handle,
+				     struct qed_ll2_tx_pkt_info *pkt,
+				     bool notify_fw);
+
+	int (*ll2_set_fragment_of_tx_packet)(void *rdma_cxt,
+					     u8 connection_handle,
+					     dma_addr_t addr,
+					     u16 nbytes);
+	int (*ll2_post_rx_buffer)(void *rdma_cxt, u8 connection_handle,
+				  dma_addr_t addr, u16 buf_len, void *cookie,
+				  u8 notify_fw);
+	int (*ll2_get_stats)(void *rdma_cxt,
+			     u8 connection_handle,
+			     struct qed_ll2_stats *p_stats);
+	int (*ll2_set_mac_filter)(struct qed_dev *cdev,
+				  u8 *old_mac_address, u8 *new_mac_address);
+
+};
+
+const struct qed_rdma_ops *qed_get_rdma_ops(void);
+
+#endif
diff --git a/include/linux/qed/qed_roce_if.h b/include/linux/qed/qed_roce_if.h
deleted file mode 100644
index 8e70f5ee05af..000000000000
--- a/include/linux/qed/qed_roce_if.h
+++ /dev/null
@@ -1,582 +0,0 @@
-/* QLogic qed NIC Driver
- * Copyright (c) 2015-2017  QLogic Corporation
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and /or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef _QED_ROCE_IF_H
-#define _QED_ROCE_IF_H
-#include <linux/types.h>
-#include <linux/delay.h>
-#include <linux/list.h>
-#include <linux/slab.h>
-#include <linux/qed/qed_if.h>
-#include <linux/qed/qed_ll2_if.h>
-#include <linux/qed/rdma_common.h>
-
-enum qed_roce_ll2_tx_dest {
-	/* Light L2 TX Destination to the Network */
-	QED_ROCE_LL2_TX_DEST_NW,
-
-	/* Light L2 TX Destination to the Loopback */
-	QED_ROCE_LL2_TX_DEST_LB,
-	QED_ROCE_LL2_TX_DEST_MAX
-};
-
-#define QED_RDMA_MAX_CNQ_SIZE               (0xFFFF)
-
-/* rdma interface */
-
-enum qed_roce_qp_state {
-	QED_ROCE_QP_STATE_RESET,
-	QED_ROCE_QP_STATE_INIT,
-	QED_ROCE_QP_STATE_RTR,
-	QED_ROCE_QP_STATE_RTS,
-	QED_ROCE_QP_STATE_SQD,
-	QED_ROCE_QP_STATE_ERR,
-	QED_ROCE_QP_STATE_SQE
-};
-
-enum qed_rdma_tid_type {
-	QED_RDMA_TID_REGISTERED_MR,
-	QED_RDMA_TID_FMR,
-	QED_RDMA_TID_MW_TYPE1,
-	QED_RDMA_TID_MW_TYPE2A
-};
-
-struct qed_rdma_events {
-	void *context;
-	void (*affiliated_event)(void *context, u8 fw_event_code,
-				 void *fw_handle);
-	void (*unaffiliated_event)(void *context, u8 event_code);
-};
-
-struct qed_rdma_device {
-	u32 vendor_id;
-	u32 vendor_part_id;
-	u32 hw_ver;
-	u64 fw_ver;
-
-	u64 node_guid;
-	u64 sys_image_guid;
-
-	u8 max_cnq;
-	u8 max_sge;
-	u8 max_srq_sge;
-	u16 max_inline;
-	u32 max_wqe;
-	u32 max_srq_wqe;
-	u8 max_qp_resp_rd_atomic_resc;
-	u8 max_qp_req_rd_atomic_resc;
-	u64 max_dev_resp_rd_atomic_resc;
-	u32 max_cq;
-	u32 max_qp;
-	u32 max_srq;
-	u32 max_mr;
-	u64 max_mr_size;
-	u32 max_cqe;
-	u32 max_mw;
-	u32 max_fmr;
-	u32 max_mr_mw_fmr_pbl;
-	u64 max_mr_mw_fmr_size;
-	u32 max_pd;
-	u32 max_ah;
-	u8 max_pkey;
-	u16 max_srq_wr;
-	u8 max_stats_queues;
-	u32 dev_caps;
-
-	/* Abilty to support RNR-NAK generation */
-
-#define QED_RDMA_DEV_CAP_RNR_NAK_MASK                           0x1
-#define QED_RDMA_DEV_CAP_RNR_NAK_SHIFT                  0
-	/* Abilty to support shutdown port */
-#define QED_RDMA_DEV_CAP_SHUTDOWN_PORT_MASK                     0x1
-#define QED_RDMA_DEV_CAP_SHUTDOWN_PORT_SHIFT                    1
-	/* Abilty to support port active event */
-#define QED_RDMA_DEV_CAP_PORT_ACTIVE_EVENT_MASK         0x1
-#define QED_RDMA_DEV_CAP_PORT_ACTIVE_EVENT_SHIFT                2
-	/* Abilty to support port change event */
-#define QED_RDMA_DEV_CAP_PORT_CHANGE_EVENT_MASK         0x1
-#define QED_RDMA_DEV_CAP_PORT_CHANGE_EVENT_SHIFT                3
-	/* Abilty to support system image GUID */
-#define QED_RDMA_DEV_CAP_SYS_IMAGE_MASK                 0x1
-#define QED_RDMA_DEV_CAP_SYS_IMAGE_SHIFT                        4
-	/* Abilty to support bad P_Key counter support */
-#define QED_RDMA_DEV_CAP_BAD_PKEY_CNT_MASK                      0x1
-#define QED_RDMA_DEV_CAP_BAD_PKEY_CNT_SHIFT                     5
-	/* Abilty to support atomic operations */
-#define QED_RDMA_DEV_CAP_ATOMIC_OP_MASK                 0x1
-#define QED_RDMA_DEV_CAP_ATOMIC_OP_SHIFT                        6
-#define QED_RDMA_DEV_CAP_RESIZE_CQ_MASK                 0x1
-#define QED_RDMA_DEV_CAP_RESIZE_CQ_SHIFT                        7
-	/* Abilty to support modifying the maximum number of
-	 * outstanding work requests per QP
-	 */
-#define QED_RDMA_DEV_CAP_RESIZE_MAX_WR_MASK                     0x1
-#define QED_RDMA_DEV_CAP_RESIZE_MAX_WR_SHIFT                    8
-	/* Abilty to support automatic path migration */
-#define QED_RDMA_DEV_CAP_AUTO_PATH_MIG_MASK                     0x1
-#define QED_RDMA_DEV_CAP_AUTO_PATH_MIG_SHIFT                    9
-	/* Abilty to support the base memory management extensions */
-#define QED_RDMA_DEV_CAP_BASE_MEMORY_EXT_MASK                   0x1
-#define QED_RDMA_DEV_CAP_BASE_MEMORY_EXT_SHIFT          10
-#define QED_RDMA_DEV_CAP_BASE_QUEUE_EXT_MASK                    0x1
-#define QED_RDMA_DEV_CAP_BASE_QUEUE_EXT_SHIFT                   11
-	/* Abilty to support multipile page sizes per memory region */
-#define QED_RDMA_DEV_CAP_MULTI_PAGE_PER_MR_EXT_MASK             0x1
-#define QED_RDMA_DEV_CAP_MULTI_PAGE_PER_MR_EXT_SHIFT            12
-	/* Abilty to support block list physical buffer list */
-#define QED_RDMA_DEV_CAP_BLOCK_MODE_MASK                        0x1
-#define QED_RDMA_DEV_CAP_BLOCK_MODE_SHIFT                       13
-	/* Abilty to support zero based virtual addresses */
-#define QED_RDMA_DEV_CAP_ZBVA_MASK                              0x1
-#define QED_RDMA_DEV_CAP_ZBVA_SHIFT                             14
-	/* Abilty to support local invalidate fencing */
-#define QED_RDMA_DEV_CAP_LOCAL_INV_FENCE_MASK                   0x1
-#define QED_RDMA_DEV_CAP_LOCAL_INV_FENCE_SHIFT          15
-	/* Abilty to support Loopback on QP */
-#define QED_RDMA_DEV_CAP_LB_INDICATOR_MASK                      0x1
-#define QED_RDMA_DEV_CAP_LB_INDICATOR_SHIFT                     16
-	u64 page_size_caps;
-	u8 dev_ack_delay;
-	u32 reserved_lkey;
-	u32 bad_pkey_counter;
-	struct qed_rdma_events events;
-};
-
-enum qed_port_state {
-	QED_RDMA_PORT_UP,
-	QED_RDMA_PORT_DOWN,
-};
-
-enum qed_roce_capability {
-	QED_ROCE_V1 = 1 << 0,
-	QED_ROCE_V2 = 1 << 1,
-};
-
-struct qed_rdma_port {
-	enum qed_port_state port_state;
-	int link_speed;
-	u64 max_msg_size;
-	u8 source_gid_table_len;
-	void *source_gid_table_ptr;
-	u8 pkey_table_len;
-	void *pkey_table_ptr;
-	u32 pkey_bad_counter;
-	enum qed_roce_capability capability;
-};
-
-struct qed_rdma_cnq_params {
-	u8 num_pbl_pages;
-	u64 pbl_ptr;
-};
-
-/* The CQ Mode affects the CQ doorbell transaction size.
- * 64/32 bit machines should configure to 32/16 bits respectively.
- */
-enum qed_rdma_cq_mode {
-	QED_RDMA_CQ_MODE_16_BITS,
-	QED_RDMA_CQ_MODE_32_BITS,
-};
-
-struct qed_roce_dcqcn_params {
-	u8 notification_point;
-	u8 reaction_point;
-
-	/* fields for notification point */
-	u32 cnp_send_timeout;
-
-	/* fields for reaction point */
-	u32 rl_bc_rate;
-	u16 rl_max_rate;
-	u16 rl_r_ai;
-	u16 rl_r_hai;
-	u16 dcqcn_g;
-	u32 dcqcn_k_us;
-	u32 dcqcn_timeout_us;
-};
-
-struct qed_rdma_start_in_params {
-	struct qed_rdma_events *events;
-	struct qed_rdma_cnq_params cnq_pbl_list[128];
-	u8 desired_cnq;
-	enum qed_rdma_cq_mode cq_mode;
-	struct qed_roce_dcqcn_params dcqcn_params;
-	u16 max_mtu;
-	u8 mac_addr[ETH_ALEN];
-	u8 iwarp_flags;
-};
-
-struct qed_rdma_add_user_out_params {
-	u16 dpi;
-	u64 dpi_addr;
-	u64 dpi_phys_addr;
-	u32 dpi_size;
-	u16 wid_count;
-};
-
-enum roce_mode {
-	ROCE_V1,
-	ROCE_V2_IPV4,
-	ROCE_V2_IPV6,
-	MAX_ROCE_MODE
-};
-
-union qed_gid {
-	u8 bytes[16];
-	u16 words[8];
-	u32 dwords[4];
-	u64 qwords[2];
-	u32 ipv4_addr;
-};
-
-struct qed_rdma_register_tid_in_params {
-	u32 itid;
-	enum qed_rdma_tid_type tid_type;
-	u8 key;
-	u16 pd;
-	bool local_read;
-	bool local_write;
-	bool remote_read;
-	bool remote_write;
-	bool remote_atomic;
-	bool mw_bind;
-	u64 pbl_ptr;
-	bool pbl_two_level;
-	u8 pbl_page_size_log;
-	u8 page_size_log;
-	u32 fbo;
-	u64 length;
-	u64 vaddr;
-	bool zbva;
-	bool phy_mr;
-	bool dma_mr;
-
-	bool dif_enabled;
-	u64 dif_error_addr;
-	u64 dif_runt_addr;
-};
-
-struct qed_rdma_create_cq_in_params {
-	u32 cq_handle_lo;
-	u32 cq_handle_hi;
-	u32 cq_size;
-	u16 dpi;
-	bool pbl_two_level;
-	u64 pbl_ptr;
-	u16 pbl_num_pages;
-	u8 pbl_page_size_log;
-	u8 cnq_id;
-	u16 int_timeout;
-};
-
-struct qed_rdma_create_srq_in_params {
-	u64 pbl_base_addr;
-	u64 prod_pair_addr;
-	u16 num_pages;
-	u16 pd_id;
-	u16 page_size;
-};
-
-struct qed_rdma_destroy_cq_in_params {
-	u16 icid;
-};
-
-struct qed_rdma_destroy_cq_out_params {
-	u16 num_cq_notif;
-};
-
-struct qed_rdma_create_qp_in_params {
-	u32 qp_handle_lo;
-	u32 qp_handle_hi;
-	u32 qp_handle_async_lo;
-	u32 qp_handle_async_hi;
-	bool use_srq;
-	bool signal_all;
-	bool fmr_and_reserved_lkey;
-	u16 pd;
-	u16 dpi;
-	u16 sq_cq_id;
-	u16 sq_num_pages;
-	u64 sq_pbl_ptr;
-	u8 max_sq_sges;
-	u16 rq_cq_id;
-	u16 rq_num_pages;
-	u64 rq_pbl_ptr;
-	u16 srq_id;
-	u8 stats_queue;
-};
-
-struct qed_rdma_create_qp_out_params {
-	u32 qp_id;
-	u16 icid;
-	void *rq_pbl_virt;
-	dma_addr_t rq_pbl_phys;
-	void *sq_pbl_virt;
-	dma_addr_t sq_pbl_phys;
-};
-
-struct qed_rdma_modify_qp_in_params {
-	u32 modify_flags;
-#define QED_RDMA_MODIFY_QP_VALID_NEW_STATE_MASK               0x1
-#define QED_RDMA_MODIFY_QP_VALID_NEW_STATE_SHIFT              0
-#define QED_ROCE_MODIFY_QP_VALID_PKEY_MASK                    0x1
-#define QED_ROCE_MODIFY_QP_VALID_PKEY_SHIFT                   1
-#define QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN_MASK             0x1
-#define QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN_SHIFT            2
-#define QED_ROCE_MODIFY_QP_VALID_DEST_QP_MASK                 0x1
-#define QED_ROCE_MODIFY_QP_VALID_DEST_QP_SHIFT                3
-#define QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR_MASK          0x1
-#define QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR_SHIFT         4
-#define QED_ROCE_MODIFY_QP_VALID_RQ_PSN_MASK                  0x1
-#define QED_ROCE_MODIFY_QP_VALID_RQ_PSN_SHIFT                 5
-#define QED_ROCE_MODIFY_QP_VALID_SQ_PSN_MASK                  0x1
-#define QED_ROCE_MODIFY_QP_VALID_SQ_PSN_SHIFT                 6
-#define QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ_MASK       0x1
-#define QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ_SHIFT      7
-#define QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP_MASK      0x1
-#define QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP_SHIFT     8
-#define QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT_MASK             0x1
-#define QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT_SHIFT            9
-#define QED_ROCE_MODIFY_QP_VALID_RETRY_CNT_MASK               0x1
-#define QED_ROCE_MODIFY_QP_VALID_RETRY_CNT_SHIFT              10
-#define QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT_MASK           0x1
-#define QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT_SHIFT          11
-#define QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER_MASK       0x1
-#define QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER_SHIFT      12
-#define QED_ROCE_MODIFY_QP_VALID_E2E_FLOW_CONTROL_EN_MASK     0x1
-#define QED_ROCE_MODIFY_QP_VALID_E2E_FLOW_CONTROL_EN_SHIFT    13
-#define QED_ROCE_MODIFY_QP_VALID_ROCE_MODE_MASK               0x1
-#define QED_ROCE_MODIFY_QP_VALID_ROCE_MODE_SHIFT              14
-
-	enum qed_roce_qp_state new_state;
-	u16 pkey;
-	bool incoming_rdma_read_en;
-	bool incoming_rdma_write_en;
-	bool incoming_atomic_en;
-	bool e2e_flow_control_en;
-	u32 dest_qp;
-	bool lb_indication;
-	u16 mtu;
-	u8 traffic_class_tos;
-	u8 hop_limit_ttl;
-	u32 flow_label;
-	union qed_gid sgid;
-	union qed_gid dgid;
-	u16 udp_src_port;
-
-	u16 vlan_id;
-
-	u32 rq_psn;
-	u32 sq_psn;
-	u8 max_rd_atomic_resp;
-	u8 max_rd_atomic_req;
-	u32 ack_timeout;
-	u8 retry_cnt;
-	u8 rnr_retry_cnt;
-	u8 min_rnr_nak_timer;
-	bool sqd_async;
-	u8 remote_mac_addr[6];
-	u8 local_mac_addr[6];
-	bool use_local_mac;
-	enum roce_mode roce_mode;
-};
-
-struct qed_rdma_query_qp_out_params {
-	enum qed_roce_qp_state state;
-	u32 rq_psn;
-	u32 sq_psn;
-	bool draining;
-	u16 mtu;
-	u32 dest_qp;
-	bool incoming_rdma_read_en;
-	bool incoming_rdma_write_en;
-	bool incoming_atomic_en;
-	bool e2e_flow_control_en;
-	union qed_gid sgid;
-	union qed_gid dgid;
-	u32 flow_label;
-	u8 hop_limit_ttl;
-	u8 traffic_class_tos;
-	u32 timeout;
-	u8 rnr_retry;
-	u8 retry_cnt;
-	u8 min_rnr_nak_timer;
-	u16 pkey_index;
-	u8 max_rd_atomic;
-	u8 max_dest_rd_atomic;
-	bool sqd_async;
-};
-
-struct qed_rdma_create_srq_out_params {
-	u16 srq_id;
-};
-
-struct qed_rdma_destroy_srq_in_params {
-	u16 srq_id;
-};
-
-struct qed_rdma_modify_srq_in_params {
-	u32 wqe_limit;
-	u16 srq_id;
-};
-
-struct qed_rdma_stats_out_params {
-	u64 sent_bytes;
-	u64 sent_pkts;
-	u64 rcv_bytes;
-	u64 rcv_pkts;
-};
-
-struct qed_rdma_counters_out_params {
-	u64 pd_count;
-	u64 max_pd;
-	u64 dpi_count;
-	u64 max_dpi;
-	u64 cq_count;
-	u64 max_cq;
-	u64 qp_count;
-	u64 max_qp;
-	u64 tid_count;
-	u64 max_tid;
-};
-
-#define QED_ROCE_TX_HEAD_FAILURE        (1)
-#define QED_ROCE_TX_FRAG_FAILURE        (2)
-
-struct qed_roce_ll2_header {
-	void *vaddr;
-	dma_addr_t baddr;
-	size_t len;
-};
-
-struct qed_roce_ll2_buffer {
-	dma_addr_t baddr;
-	size_t len;
-};
-
-struct qed_roce_ll2_packet {
-	struct qed_roce_ll2_header header;
-	int n_seg;
-	struct qed_roce_ll2_buffer payload[RDMA_MAX_SGE_PER_SQ_WQE];
-	int roce_mode;
-	enum qed_roce_ll2_tx_dest tx_dest;
-};
-
-enum qed_rdma_type {
-	QED_RDMA_TYPE_ROCE,
-};
-
-struct qed_dev_rdma_info {
-	struct qed_dev_info common;
-	enum qed_rdma_type rdma_type;
-	u8 user_dpm_enabled;
-};
-
-struct qed_rdma_ops {
-	const struct qed_common_ops *common;
-
-	int (*fill_dev_info)(struct qed_dev *cdev,
-			     struct qed_dev_rdma_info *info);
-	void *(*rdma_get_rdma_ctx)(struct qed_dev *cdev);
-
-	int (*rdma_init)(struct qed_dev *dev,
-			 struct qed_rdma_start_in_params *iparams);
-
-	int (*rdma_add_user)(void *rdma_cxt,
-			     struct qed_rdma_add_user_out_params *oparams);
-
-	void (*rdma_remove_user)(void *rdma_cxt, u16 dpi);
-	int (*rdma_stop)(void *rdma_cxt);
-	struct qed_rdma_device* (*rdma_query_device)(void *rdma_cxt);
-	struct qed_rdma_port* (*rdma_query_port)(void *rdma_cxt);
-	int (*rdma_get_start_sb)(struct qed_dev *cdev);
-	int (*rdma_get_min_cnq_msix)(struct qed_dev *cdev);
-	void (*rdma_cnq_prod_update)(void *rdma_cxt, u8 cnq_index, u16 prod);
-	int (*rdma_get_rdma_int)(struct qed_dev *cdev,
-				 struct qed_int_info *info);
-	int (*rdma_set_rdma_int)(struct qed_dev *cdev, u16 cnt);
-	int (*rdma_alloc_pd)(void *rdma_cxt, u16 *pd);
-	void (*rdma_dealloc_pd)(void *rdma_cxt, u16 pd);
-	int (*rdma_create_cq)(void *rdma_cxt,
-			      struct qed_rdma_create_cq_in_params *params,
-			      u16 *icid);
-	int (*rdma_destroy_cq)(void *rdma_cxt,
-			       struct qed_rdma_destroy_cq_in_params *iparams,
-			       struct qed_rdma_destroy_cq_out_params *oparams);
-	struct qed_rdma_qp *
-	(*rdma_create_qp)(void *rdma_cxt,
-			  struct qed_rdma_create_qp_in_params *iparams,
-			  struct qed_rdma_create_qp_out_params *oparams);
-
-	int (*rdma_modify_qp)(void *roce_cxt, struct qed_rdma_qp *qp,
-			      struct qed_rdma_modify_qp_in_params *iparams);
-
-	int (*rdma_query_qp)(void *rdma_cxt, struct qed_rdma_qp *qp,
-			     struct qed_rdma_query_qp_out_params *oparams);
-	int (*rdma_destroy_qp)(void *rdma_cxt, struct qed_rdma_qp *qp);
-
-	int
-	(*rdma_register_tid)(void *rdma_cxt,
-			     struct qed_rdma_register_tid_in_params *iparams);
-
-	int (*rdma_deregister_tid)(void *rdma_cxt, u32 itid);
-	int (*rdma_alloc_tid)(void *rdma_cxt, u32 *itid);
-	void (*rdma_free_tid)(void *rdma_cxt, u32 itid);
-
-	int (*ll2_acquire_connection)(void *rdma_cxt,
-				      struct qed_ll2_acquire_data *data);
-
-	int (*ll2_establish_connection)(void *rdma_cxt, u8 connection_handle);
-	int (*ll2_terminate_connection)(void *rdma_cxt, u8 connection_handle);
-	void (*ll2_release_connection)(void *rdma_cxt, u8 connection_handle);
-
-	int (*ll2_prepare_tx_packet)(void *rdma_cxt,
-				     u8 connection_handle,
-				     struct qed_ll2_tx_pkt_info *pkt,
-				     bool notify_fw);
-
-	int (*ll2_set_fragment_of_tx_packet)(void *rdma_cxt,
-					     u8 connection_handle,
-					     dma_addr_t addr,
-					     u16 nbytes);
-	int (*ll2_post_rx_buffer)(void *rdma_cxt, u8 connection_handle,
-				  dma_addr_t addr, u16 buf_len, void *cookie,
-				  u8 notify_fw);
-	int (*ll2_get_stats)(void *rdma_cxt,
-			     u8 connection_handle,
-			     struct qed_ll2_stats *p_stats);
-	int (*ll2_set_mac_filter)(struct qed_dev *cdev,
-				  u8 *old_mac_address, u8 *new_mac_address);
-
-};
-
-const struct qed_rdma_ops *qed_get_rdma_ops(void);
-
-#endif
-- 
cgit v1.2.3


From 3708184afc77bb67709a67a35d9f367ebd32cbc4 Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Tue, 6 Jun 2017 12:37:37 +0300
Subject: device property: Move FW type specific functionality to FW specific
 files

The device and fwnode property API supports Devicetree, ACPI and pset
properties. The implementation of this functionality for each firmware
type was embedded in the fwnode property core. Move it out to firmware
type specific locations, making it easier to maintain.

Depends-on: ("of: Move OF property and graph API from base.c to property.c")
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/property.c |  68 ++++++++++++++++
 drivers/acpi/scan.c     |   1 +
 drivers/base/property.c | 208 +++++++++++++++++++-----------------------------
 drivers/of/property.c   |  90 +++++++++++++++++++++
 include/linux/acpi.h    |   4 +
 include/linux/fwnode.h  |  54 +++++++++++++
 include/linux/of.h      |   2 +
 7 files changed, 302 insertions(+), 125 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c
index 9364398204e9..14013f635db6 100644
--- a/drivers/acpi/property.c
+++ b/drivers/acpi/property.c
@@ -57,6 +57,7 @@ static bool acpi_nondev_subnode_extract(const union acpi_object *desc,
 
 	dn->name = link->package.elements[0].string.pointer;
 	dn->fwnode.type = FWNODE_ACPI_DATA;
+	dn->fwnode.ops = &acpi_fwnode_ops;
 	dn->parent = parent;
 	INIT_LIST_HEAD(&dn->data.subnodes);
 
@@ -1119,3 +1120,70 @@ int acpi_graph_get_remote_endpoint(struct fwnode_handle *fwnode,
 
 	return 0;
 }
+
+static bool acpi_fwnode_property_present(struct fwnode_handle *fwnode,
+					 const char *propname)
+{
+	return !acpi_node_prop_get(fwnode, propname, NULL);
+}
+
+static int acpi_fwnode_property_read_int_array(struct fwnode_handle *fwnode,
+					       const char *propname,
+					       unsigned int elem_size,
+					       void *val, size_t nval)
+{
+	enum dev_prop_type type;
+
+	switch (elem_size) {
+	case sizeof(u8):
+		type = DEV_PROP_U8;
+		break;
+	case sizeof(u16):
+		type = DEV_PROP_U16;
+		break;
+	case sizeof(u32):
+		type = DEV_PROP_U32;
+		break;
+	case sizeof(u64):
+		type = DEV_PROP_U64;
+		break;
+	default:
+		return -ENXIO;
+	}
+
+	return acpi_node_prop_read(fwnode, propname, type, val, nval);
+}
+
+static int acpi_fwnode_property_read_string_array(struct fwnode_handle *fwnode,
+						  const char *propname,
+						  const char **val, size_t nval)
+{
+	return acpi_node_prop_read(fwnode, propname, DEV_PROP_STRING,
+				   val, nval);
+}
+
+static struct fwnode_handle *
+acpi_fwnode_get_named_child_node(struct fwnode_handle *fwnode,
+				 const char *childname)
+{
+	struct fwnode_handle *child;
+
+	/*
+	 * Find first matching named child node of this fwnode.
+	 * For ACPI this will be a data only sub-node.
+	 */
+	fwnode_for_each_child_node(fwnode, child)
+		if (acpi_data_node_match(child, childname))
+			return child;
+
+	return NULL;
+}
+
+const struct fwnode_operations acpi_fwnode_ops = {
+	.property_present = acpi_fwnode_property_present,
+	.property_read_int_array = acpi_fwnode_property_read_int_array,
+	.property_read_string_array = acpi_fwnode_property_read_string_array,
+	.get_parent = acpi_node_get_parent,
+	.get_next_child_node = acpi_get_next_subnode,
+	.get_named_child_node = acpi_fwnode_get_named_child_node,
+};
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index c98f88b28948..bc2a525c495f 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -1436,6 +1436,7 @@ void acpi_init_device_object(struct acpi_device *device, acpi_handle handle,
 	device->handle = handle;
 	device->parent = acpi_bus_get_parent(handle);
 	device->fwnode.type = FWNODE_ACPI;
+	device->fwnode.ops = &acpi_fwnode_ops;
 	acpi_set_device_status(device, sta);
 	acpi_device_get_busid(device);
 	acpi_set_pnp_ids(handle, &device->pnp, type);
diff --git a/drivers/base/property.c b/drivers/base/property.c
index 149de311a10e..fee0705e40e9 100644
--- a/drivers/base/property.c
+++ b/drivers/base/property.c
@@ -187,6 +187,50 @@ struct fwnode_handle *dev_fwnode(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(dev_fwnode);
 
+static bool pset_fwnode_property_present(struct fwnode_handle *fwnode,
+					 const char *propname)
+{
+	return !!pset_prop_get(to_pset_node(fwnode), propname);
+}
+
+static int pset_fwnode_read_int_array(struct fwnode_handle *fwnode,
+				      const char *propname,
+				      unsigned int elem_size, void *val,
+				      size_t nval)
+{
+	struct property_set *node = to_pset_node(fwnode);
+
+	if (!val)
+		return pset_prop_count_elems_of_size(node, propname, elem_size);
+
+	switch (elem_size) {
+	case sizeof(u8):
+		return pset_prop_read_u8_array(node, propname, val, nval);
+	case sizeof(u16):
+		return pset_prop_read_u16_array(node, propname, val, nval);
+	case sizeof(u32):
+		return pset_prop_read_u32_array(node, propname, val, nval);
+	case sizeof(u64):
+		return pset_prop_read_u64_array(node, propname, val, nval);
+	}
+
+	return -ENXIO;
+}
+
+static int pset_fwnode_property_read_string_array(struct fwnode_handle *fwnode,
+						  const char *propname,
+						  const char **val, size_t nval)
+{
+	return pset_prop_read_string_array(to_pset_node(fwnode), propname,
+					   val, nval);
+}
+
+static const struct fwnode_operations pset_fwnode_ops = {
+	.property_present = pset_fwnode_property_present,
+	.property_read_int_array = pset_fwnode_read_int_array,
+	.property_read_string_array = pset_fwnode_property_read_string_array,
+};
+
 /**
  * device_property_present - check if a property of a device is present
  * @dev: Device whose property is being checked
@@ -200,18 +244,6 @@ bool device_property_present(struct device *dev, const char *propname)
 }
 EXPORT_SYMBOL_GPL(device_property_present);
 
-static bool __fwnode_property_present(struct fwnode_handle *fwnode,
-				      const char *propname)
-{
-	if (is_of_node(fwnode))
-		return of_property_read_bool(to_of_node(fwnode), propname);
-	else if (is_acpi_node(fwnode))
-		return !acpi_node_prop_get(fwnode, propname, NULL);
-	else if (is_pset_node(fwnode))
-		return !!pset_prop_get(to_pset_node(fwnode), propname);
-	return false;
-}
-
 /**
  * fwnode_property_present - check if a property of a firmware node is present
  * @fwnode: Firmware node whose property to check
@@ -221,10 +253,11 @@ bool fwnode_property_present(struct fwnode_handle *fwnode, const char *propname)
 {
 	bool ret;
 
-	ret = __fwnode_property_present(fwnode, propname);
+	ret = fwnode_call_int_op(fwnode, property_present, propname);
 	if (ret == false && !IS_ERR_OR_NULL(fwnode) &&
 	    !IS_ERR_OR_NULL(fwnode->secondary))
-		ret = __fwnode_property_present(fwnode->secondary, propname);
+		ret = fwnode_call_int_op(fwnode->secondary, property_present,
+					 propname);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(fwnode_property_present);
@@ -398,42 +431,23 @@ int device_property_match_string(struct device *dev, const char *propname,
 }
 EXPORT_SYMBOL_GPL(device_property_match_string);
 
-#define OF_DEV_PROP_READ_ARRAY(node, propname, type, val, nval)				\
-	(val) ? of_property_read_##type##_array((node), (propname), (val), (nval))	\
-	      : of_property_count_elems_of_size((node), (propname), sizeof(type))
-
-#define PSET_PROP_READ_ARRAY(node, propname, type, val, nval)				\
-	(val) ? pset_prop_read_##type##_array((node), (propname), (val), (nval))	\
-	      : pset_prop_count_elems_of_size((node), (propname), sizeof(type))
-
-#define FWNODE_PROP_READ(_fwnode_, _propname_, _type_, _proptype_, _val_, _nval_)	\
-({											\
-	int _ret_;									\
-	if (is_of_node(_fwnode_))							\
-		_ret_ = OF_DEV_PROP_READ_ARRAY(to_of_node(_fwnode_), _propname_,	\
-					       _type_, _val_, _nval_);			\
-	else if (is_acpi_node(_fwnode_))						\
-		_ret_ = acpi_node_prop_read(_fwnode_, _propname_, _proptype_,		\
-					    _val_, _nval_);				\
-	else if (is_pset_node(_fwnode_)) 						\
-		_ret_ = PSET_PROP_READ_ARRAY(to_pset_node(_fwnode_), _propname_,	\
-					     _type_, _val_, _nval_);			\
-	else										\
-		_ret_ = -ENXIO;								\
-	_ret_;										\
-})
-
-#define FWNODE_PROP_READ_ARRAY(_fwnode_, _propname_, _type_, _proptype_, _val_, _nval_)	\
-({											\
-	int _ret_;									\
-	_ret_ = FWNODE_PROP_READ(_fwnode_, _propname_, _type_, _proptype_,		\
-				 _val_, _nval_);					\
-	if (_ret_ == -EINVAL && !IS_ERR_OR_NULL(_fwnode_) &&				\
-	    !IS_ERR_OR_NULL(_fwnode_->secondary))					\
-		_ret_ = FWNODE_PROP_READ(_fwnode_->secondary, _propname_, _type_,	\
-				_proptype_, _val_, _nval_);				\
-	_ret_;										\
-})
+static int fwnode_property_read_int_array(struct fwnode_handle *fwnode,
+					  const char *propname,
+					  unsigned int elem_size, void *val,
+					  size_t nval)
+{
+	int ret;
+
+	ret = fwnode_call_int_op(fwnode, property_read_int_array, propname,
+				 elem_size, val, nval);
+	if (ret == -EINVAL && !IS_ERR_OR_NULL(fwnode) &&
+	    !IS_ERR_OR_NULL(fwnode->secondary))
+		ret = fwnode_call_int_op(
+			fwnode->secondary, property_read_int_array, propname,
+			elem_size, val, nval);
+
+	return ret;
+}
 
 /**
  * fwnode_property_read_u8_array - return a u8 array property of firmware node
@@ -456,8 +470,8 @@ EXPORT_SYMBOL_GPL(device_property_match_string);
 int fwnode_property_read_u8_array(struct fwnode_handle *fwnode,
 				  const char *propname, u8 *val, size_t nval)
 {
-	return FWNODE_PROP_READ_ARRAY(fwnode, propname, u8, DEV_PROP_U8,
-				      val, nval);
+	return fwnode_property_read_int_array(fwnode, propname, sizeof(u8),
+					      val, nval);
 }
 EXPORT_SYMBOL_GPL(fwnode_property_read_u8_array);
 
@@ -482,8 +496,8 @@ EXPORT_SYMBOL_GPL(fwnode_property_read_u8_array);
 int fwnode_property_read_u16_array(struct fwnode_handle *fwnode,
 				   const char *propname, u16 *val, size_t nval)
 {
-	return FWNODE_PROP_READ_ARRAY(fwnode, propname, u16, DEV_PROP_U16,
-				      val, nval);
+	return fwnode_property_read_int_array(fwnode, propname, sizeof(u16),
+					      val, nval);
 }
 EXPORT_SYMBOL_GPL(fwnode_property_read_u16_array);
 
@@ -508,8 +522,8 @@ EXPORT_SYMBOL_GPL(fwnode_property_read_u16_array);
 int fwnode_property_read_u32_array(struct fwnode_handle *fwnode,
 				   const char *propname, u32 *val, size_t nval)
 {
-	return FWNODE_PROP_READ_ARRAY(fwnode, propname, u32, DEV_PROP_U32,
-				      val, nval);
+	return fwnode_property_read_int_array(fwnode, propname, sizeof(u32),
+					      val, nval);
 }
 EXPORT_SYMBOL_GPL(fwnode_property_read_u32_array);
 
@@ -534,29 +548,11 @@ EXPORT_SYMBOL_GPL(fwnode_property_read_u32_array);
 int fwnode_property_read_u64_array(struct fwnode_handle *fwnode,
 				   const char *propname, u64 *val, size_t nval)
 {
-	return FWNODE_PROP_READ_ARRAY(fwnode, propname, u64, DEV_PROP_U64,
-				      val, nval);
+	return fwnode_property_read_int_array(fwnode, propname, sizeof(u64),
+					      val, nval);
 }
 EXPORT_SYMBOL_GPL(fwnode_property_read_u64_array);
 
-static int __fwnode_property_read_string_array(struct fwnode_handle *fwnode,
-					       const char *propname,
-					       const char **val, size_t nval)
-{
-	if (is_of_node(fwnode))
-		return val ?
-			of_property_read_string_array(to_of_node(fwnode),
-						      propname, val, nval) :
-			of_property_count_strings(to_of_node(fwnode), propname);
-	else if (is_acpi_node(fwnode))
-		return acpi_node_prop_read(fwnode, propname, DEV_PROP_STRING,
-					   val, nval);
-	else if (is_pset_node(fwnode))
-		return pset_prop_read_string_array(to_pset_node(fwnode),
-						   propname, val, nval);
-	return -ENXIO;
-}
-
 /**
  * fwnode_property_read_string_array - return string array property of a node
  * @fwnode: Firmware node to get the property of
@@ -581,11 +577,13 @@ int fwnode_property_read_string_array(struct fwnode_handle *fwnode,
 {
 	int ret;
 
-	ret = __fwnode_property_read_string_array(fwnode, propname, val, nval);
+	ret = fwnode_call_int_op(fwnode, property_read_string_array, propname,
+				 val, nval);
 	if (ret == -EINVAL && !IS_ERR_OR_NULL(fwnode) &&
 	    !IS_ERR_OR_NULL(fwnode->secondary))
-		ret = __fwnode_property_read_string_array(fwnode->secondary,
-							  propname, val, nval);
+		ret = fwnode_call_int_op(fwnode->secondary,
+					 property_read_string_array, propname,
+					 val, nval);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(fwnode_property_read_string_array);
@@ -903,6 +901,7 @@ int device_add_properties(struct device *dev,
 		return PTR_ERR(p);
 
 	p->fwnode.type = FWNODE_PDATA;
+	p->fwnode.ops = &pset_fwnode_ops;
 	set_secondary_fwnode(dev, &p->fwnode);
 	return 0;
 }
@@ -938,19 +937,7 @@ EXPORT_SYMBOL_GPL(fwnode_get_next_parent);
  */
 struct fwnode_handle *fwnode_get_parent(struct fwnode_handle *fwnode)
 {
-	struct fwnode_handle *parent = NULL;
-
-	if (is_of_node(fwnode)) {
-		struct device_node *node;
-
-		node = of_get_parent(to_of_node(fwnode));
-		if (node)
-			parent = &node->fwnode;
-	} else if (is_acpi_node(fwnode)) {
-		parent = acpi_node_get_parent(fwnode);
-	}
-
-	return parent;
+	return fwnode_call_ptr_op(fwnode, get_parent);
 }
 EXPORT_SYMBOL_GPL(fwnode_get_parent);
 
@@ -962,18 +949,7 @@ EXPORT_SYMBOL_GPL(fwnode_get_parent);
 struct fwnode_handle *fwnode_get_next_child_node(struct fwnode_handle *fwnode,
 						 struct fwnode_handle *child)
 {
-	if (is_of_node(fwnode)) {
-		struct device_node *node;
-
-		node = of_get_next_available_child(to_of_node(fwnode),
-						   to_of_node(child));
-		if (node)
-			return &node->fwnode;
-	} else if (is_acpi_node(fwnode)) {
-		return acpi_get_next_subnode(fwnode, child);
-	}
-
-	return NULL;
+	return fwnode_call_ptr_op(fwnode, get_next_child_node, child);
 }
 EXPORT_SYMBOL_GPL(fwnode_get_next_child_node);
 
@@ -1005,23 +981,7 @@ EXPORT_SYMBOL_GPL(device_get_next_child_node);
 struct fwnode_handle *fwnode_get_named_child_node(struct fwnode_handle *fwnode,
 						  const char *childname)
 {
-	struct fwnode_handle *child;
-
-	/*
-	 * Find first matching named child node of this fwnode.
-	 * For ACPI this will be a data only sub-node.
-	 */
-	fwnode_for_each_child_node(fwnode, child) {
-		if (is_of_node(child)) {
-			if (!of_node_cmp(to_of_node(child)->name, childname))
-				return child;
-		} else if (is_acpi_data_node(child)) {
-			if (acpi_data_node_match(child, childname))
-				return child;
-		}
-	}
-
-	return NULL;
+	return fwnode_call_ptr_op(fwnode, get_named_child_node, childname);
 }
 EXPORT_SYMBOL_GPL(fwnode_get_named_child_node);
 
@@ -1043,8 +1003,7 @@ EXPORT_SYMBOL_GPL(device_get_named_child_node);
  */
 void fwnode_handle_get(struct fwnode_handle *fwnode)
 {
-	if (is_of_node(fwnode))
-		of_node_get(to_of_node(fwnode));
+	fwnode_call_void_op(fwnode, get);
 }
 EXPORT_SYMBOL_GPL(fwnode_handle_get);
 
@@ -1058,8 +1017,7 @@ EXPORT_SYMBOL_GPL(fwnode_handle_get);
  */
 void fwnode_handle_put(struct fwnode_handle *fwnode)
 {
-	if (is_of_node(fwnode))
-		of_node_put(to_of_node(fwnode));
+	fwnode_call_void_op(fwnode, put);
 }
 EXPORT_SYMBOL_GPL(fwnode_handle_put);
 
diff --git a/drivers/of/property.c b/drivers/of/property.c
index 457c313a8924..250859234fb0 100644
--- a/drivers/of/property.c
+++ b/drivers/of/property.c
@@ -764,3 +764,93 @@ struct device_node *of_graph_get_remote_node(const struct device_node *node,
 	return remote;
 }
 EXPORT_SYMBOL(of_graph_get_remote_node);
+
+static void of_fwnode_get(struct fwnode_handle *fwnode)
+{
+	of_node_get(to_of_node(fwnode));
+}
+
+static void of_fwnode_put(struct fwnode_handle *fwnode)
+{
+	of_node_put(to_of_node(fwnode));
+}
+
+static bool of_fwnode_property_present(struct fwnode_handle *fwnode,
+				       const char *propname)
+{
+	return of_property_read_bool(to_of_node(fwnode), propname);
+}
+
+static int of_fwnode_property_read_int_array(struct fwnode_handle *fwnode,
+					     const char *propname,
+					     unsigned int elem_size, void *val,
+					     size_t nval)
+{
+	struct device_node *node = to_of_node(fwnode);
+
+	if (!val)
+		return of_property_count_elems_of_size(node, propname,
+						       elem_size);
+
+	switch (elem_size) {
+	case sizeof(u8):
+		return of_property_read_u8_array(node, propname, val, nval);
+	case sizeof(u16):
+		return of_property_read_u16_array(node, propname, val, nval);
+	case sizeof(u32):
+		return of_property_read_u32_array(node, propname, val, nval);
+	case sizeof(u64):
+		return of_property_read_u64_array(node, propname, val, nval);
+	}
+
+	return -ENXIO;
+}
+
+static int of_fwnode_property_read_string_array(struct fwnode_handle *fwnode,
+						const char *propname,
+						const char **val, size_t nval)
+{
+	struct device_node *node = to_of_node(fwnode);
+
+	return val ?
+		of_property_read_string_array(node, propname, val, nval) :
+		of_property_count_strings(node, propname);
+}
+
+static struct fwnode_handle *of_fwnode_get_parent(struct fwnode_handle *fwnode)
+{
+	return of_fwnode_handle(of_get_parent(to_of_node(fwnode)));
+}
+
+static struct fwnode_handle *
+of_fwnode_get_next_child_node(struct fwnode_handle *fwnode,
+			      struct fwnode_handle *child)
+{
+	return of_fwnode_handle(of_get_next_available_child(to_of_node(fwnode),
+							    to_of_node(child)));
+}
+
+static struct fwnode_handle *
+of_fwnode_get_named_child_node(struct fwnode_handle *fwnode,
+			       const char *childname)
+{
+	struct device_node *node = to_of_node(fwnode);
+	struct device_node *child;
+
+	for_each_available_child_of_node(node, child)
+		if (!of_node_cmp(child->name, childname))
+			return of_fwnode_handle(child);
+
+	return NULL;
+}
+
+const struct fwnode_operations of_fwnode_ops = {
+	.get = of_fwnode_get,
+	.put = of_fwnode_put,
+	.property_present = of_fwnode_property_present,
+	.property_read_int_array = of_fwnode_property_read_int_array,
+	.property_read_string_array = of_fwnode_property_read_string_array,
+	.get_parent = of_fwnode_get_parent,
+	.get_next_child_node = of_fwnode_get_next_child_node,
+	.get_named_child_node = of_fwnode_get_named_child_node,
+};
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 137e4a3d89c5..b8f23c521b67 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -56,6 +56,9 @@ static inline acpi_handle acpi_device_handle(struct acpi_device *adev)
 	acpi_fwnode_handle(adev) : NULL)
 #define ACPI_HANDLE(dev)		acpi_device_handle(ACPI_COMPANION(dev))
 
+
+extern const struct fwnode_operations acpi_fwnode_ops;
+
 static inline struct fwnode_handle *acpi_alloc_fwnode_static(void)
 {
 	struct fwnode_handle *fwnode;
@@ -65,6 +68,7 @@ static inline struct fwnode_handle *acpi_alloc_fwnode_static(void)
 		return NULL;
 
 	fwnode->type = FWNODE_ACPI_STATIC;
+	fwnode->ops = &acpi_fwnode_ops;
 
 	return fwnode;
 }
diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h
index 3dff2398a5f0..8f64b3ae9c57 100644
--- a/include/linux/fwnode.h
+++ b/include/linux/fwnode.h
@@ -12,6 +12,8 @@
 #ifndef _LINUX_FWNODE_H_
 #define _LINUX_FWNODE_H_
 
+#include <linux/types.h>
+
 enum fwnode_type {
 	FWNODE_INVALID = 0,
 	FWNODE_OF,
@@ -22,9 +24,12 @@ enum fwnode_type {
 	FWNODE_IRQCHIP
 };
 
+struct fwnode_operations;
+
 struct fwnode_handle {
 	enum fwnode_type type;
 	struct fwnode_handle *secondary;
+	const struct fwnode_operations *ops;
 };
 
 /**
@@ -39,4 +44,53 @@ struct fwnode_endpoint {
 	const struct fwnode_handle *local_fwnode;
 };
 
+/**
+ * struct fwnode_operations - Operations for fwnode interface
+ * @get: Get a reference to an fwnode.
+ * @put: Put a reference to an fwnode.
+ * @property_present: Return true if a property is present.
+ * @property_read_integer_array: Read an array of integer properties. Return
+ *				 zero on success, a negative error code
+ *				 otherwise.
+ * @property_read_string_array: Read an array of string properties. Return zero
+ *				on success, a negative error code otherwise.
+ * @get_parent: Return the parent of an fwnode.
+ * @get_next_child_node: Return the next child node in an iteration.
+ * @get_named_child_node: Return a child node with a given name.
+ */
+struct fwnode_operations {
+	void (*get)(struct fwnode_handle *fwnode);
+	void (*put)(struct fwnode_handle *fwnode);
+	bool (*property_present)(struct fwnode_handle *fwnode,
+				 const char *propname);
+	int (*property_read_int_array)(struct fwnode_handle *fwnode,
+				       const char *propname,
+				       unsigned int elem_size, void *val,
+				       size_t nval);
+	int (*property_read_string_array)(struct fwnode_handle *fwnode_handle,
+					  const char *propname,
+					  const char **val, size_t nval);
+	struct fwnode_handle *(*get_parent)(struct fwnode_handle *fwnode);
+	struct fwnode_handle *
+	(*get_next_child_node)(struct fwnode_handle *fwnode,
+			       struct fwnode_handle *child);
+	struct fwnode_handle *
+	(*get_named_child_node)(struct fwnode_handle *fwnode, const char *name);
+};
+
+#define fwnode_has_op(fwnode, op)				\
+	((fwnode) && (fwnode)->ops && (fwnode)->ops->op)
+#define fwnode_call_int_op(fwnode, op, ...)				\
+	(fwnode ? (fwnode_has_op(fwnode, op) ?				\
+		   (fwnode)->ops->op(fwnode, ## __VA_ARGS__) : -ENXIO) : \
+	 -EINVAL)
+#define fwnode_call_ptr_op(fwnode, op, ...)		\
+	(fwnode_has_op(fwnode, op) ?			\
+	 (fwnode)->ops->op(fwnode, ## __VA_ARGS__) : NULL)
+#define fwnode_call_void_op(fwnode, op, ...)				\
+	do {								\
+		if (fwnode_has_op(fwnode, op))				\
+			(fwnode)->ops->op(fwnode, ## __VA_ARGS__);	\
+	} while (false)
+
 #endif
diff --git a/include/linux/of.h b/include/linux/of.h
index 29b7b738b509..cdbfa88c32cf 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -100,10 +100,12 @@ struct of_reconfig_data {
 
 /* initialize a node */
 extern struct kobj_type of_node_ktype;
+extern const struct fwnode_operations of_fwnode_ops;
 static inline void of_node_init(struct device_node *node)
 {
 	kobject_init(&node->kobj, &of_node_ktype);
 	node->fwnode.type = FWNODE_OF;
+	node->fwnode.ops = &of_fwnode_ops;
 }
 
 /* true when node is initialized */
-- 
cgit v1.2.3


From 3b27d00e7b6d7c889d87fd00df600c495b968e30 Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Tue, 6 Jun 2017 12:37:38 +0300
Subject: device property: Move fwnode graph ops to firmware specific locations

Move firmware specific implementations of the fwnode graph operations to
firmware specific locations.

Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/property.c | 40 ++++++++++++++++++++++
 drivers/base/property.c | 91 +++++--------------------------------------------
 drivers/of/property.c   | 52 ++++++++++++++++++++++++++++
 include/linux/fwnode.h  | 14 ++++++++
 4 files changed, 114 insertions(+), 83 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c
index 14013f635db6..a24ca61294eb 100644
--- a/drivers/acpi/property.c
+++ b/drivers/acpi/property.c
@@ -1179,6 +1179,42 @@ acpi_fwnode_get_named_child_node(struct fwnode_handle *fwnode,
 	return NULL;
 }
 
+static struct fwnode_handle *
+acpi_fwnode_graph_get_next_endpoint(struct fwnode_handle *fwnode,
+				    struct fwnode_handle *prev)
+{
+	struct fwnode_handle *endpoint;
+
+	endpoint = acpi_graph_get_next_endpoint(fwnode, prev);
+	if (IS_ERR(endpoint))
+		return NULL;
+
+	return endpoint;
+}
+
+static struct fwnode_handle *
+acpi_fwnode_graph_get_remote_endpoint(struct fwnode_handle *fwnode)
+{
+	struct fwnode_handle *endpoint = NULL;
+
+	acpi_graph_get_remote_endpoint(fwnode, NULL, NULL, &endpoint);
+
+	return endpoint;
+}
+
+static int acpi_fwnode_graph_parse_endpoint(struct fwnode_handle *fwnode,
+					    struct fwnode_endpoint *endpoint)
+{
+	struct fwnode_handle *port_fwnode = fwnode_get_parent(fwnode);
+
+	endpoint->local_fwnode = fwnode;
+
+	fwnode_property_read_u32(port_fwnode, "port", &endpoint->port);
+	fwnode_property_read_u32(fwnode, "endpoint", &endpoint->id);
+
+	return 0;
+}
+
 const struct fwnode_operations acpi_fwnode_ops = {
 	.property_present = acpi_fwnode_property_present,
 	.property_read_int_array = acpi_fwnode_property_read_int_array,
@@ -1186,4 +1222,8 @@ const struct fwnode_operations acpi_fwnode_ops = {
 	.get_parent = acpi_node_get_parent,
 	.get_next_child_node = acpi_get_next_subnode,
 	.get_named_child_node = acpi_fwnode_get_named_child_node,
+	.graph_get_next_endpoint = acpi_fwnode_graph_get_next_endpoint,
+	.graph_get_remote_endpoint = acpi_fwnode_graph_get_remote_endpoint,
+	.graph_get_port_parent = acpi_node_get_parent,
+	.graph_parse_endpoint = acpi_fwnode_graph_parse_endpoint,
 };
diff --git a/drivers/base/property.c b/drivers/base/property.c
index fee0705e40e9..e1a58ac8840e 100644
--- a/drivers/base/property.c
+++ b/drivers/base/property.c
@@ -1156,24 +1156,7 @@ struct fwnode_handle *
 fwnode_graph_get_next_endpoint(struct fwnode_handle *fwnode,
 			       struct fwnode_handle *prev)
 {
-	struct fwnode_handle *endpoint = NULL;
-
-	if (is_of_node(fwnode)) {
-		struct device_node *node;
-
-		node = of_graph_get_next_endpoint(to_of_node(fwnode),
-						  to_of_node(prev));
-
-		if (node)
-			endpoint = &node->fwnode;
-	} else if (is_acpi_node(fwnode)) {
-		endpoint = acpi_graph_get_next_endpoint(fwnode, prev);
-		if (IS_ERR(endpoint))
-			endpoint = NULL;
-	}
-
-	return endpoint;
-
+	return fwnode_call_ptr_op(fwnode, graph_get_next_endpoint, prev);
 }
 EXPORT_SYMBOL_GPL(fwnode_graph_get_next_endpoint);
 
@@ -1186,22 +1169,12 @@ EXPORT_SYMBOL_GPL(fwnode_graph_get_next_endpoint);
 struct fwnode_handle *
 fwnode_graph_get_remote_port_parent(struct fwnode_handle *fwnode)
 {
-	struct fwnode_handle *parent = NULL;
+	struct fwnode_handle *port, *parent;
 
-	if (is_of_node(fwnode)) {
-		struct device_node *node;
+	port = fwnode_graph_get_remote_port(fwnode);
+	parent = fwnode_call_ptr_op(port, graph_get_port_parent);
 
-		node = of_graph_get_remote_port_parent(to_of_node(fwnode));
-		if (node)
-			parent = &node->fwnode;
-	} else if (is_acpi_node(fwnode)) {
-		int ret;
-
-		ret = acpi_graph_get_remote_endpoint(fwnode, &parent, NULL,
-						     NULL);
-		if (ret)
-			return NULL;
-	}
+	fwnode_handle_put(port);
 
 	return parent;
 }
@@ -1215,23 +1188,7 @@ EXPORT_SYMBOL_GPL(fwnode_graph_get_remote_port_parent);
  */
 struct fwnode_handle *fwnode_graph_get_remote_port(struct fwnode_handle *fwnode)
 {
-	struct fwnode_handle *port = NULL;
-
-	if (is_of_node(fwnode)) {
-		struct device_node *node;
-
-		node = of_graph_get_remote_port(to_of_node(fwnode));
-		if (node)
-			port = &node->fwnode;
-	} else if (is_acpi_node(fwnode)) {
-		int ret;
-
-		ret = acpi_graph_get_remote_endpoint(fwnode, NULL, &port, NULL);
-		if (ret)
-			return NULL;
-	}
-
-	return port;
+	return fwnode_get_next_parent(fwnode_graph_get_remote_endpoint(fwnode));
 }
 EXPORT_SYMBOL_GPL(fwnode_graph_get_remote_port);
 
@@ -1244,25 +1201,7 @@ EXPORT_SYMBOL_GPL(fwnode_graph_get_remote_port);
 struct fwnode_handle *
 fwnode_graph_get_remote_endpoint(struct fwnode_handle *fwnode)
 {
-	struct fwnode_handle *endpoint = NULL;
-
-	if (is_of_node(fwnode)) {
-		struct device_node *node;
-
-		node = of_parse_phandle(to_of_node(fwnode), "remote-endpoint",
-					0);
-		if (node)
-			endpoint = &node->fwnode;
-	} else if (is_acpi_node(fwnode)) {
-		int ret;
-
-		ret = acpi_graph_get_remote_endpoint(fwnode, NULL, NULL,
-						     &endpoint);
-		if (ret)
-			return NULL;
-	}
-
-	return endpoint;
+	return fwnode_call_ptr_op(fwnode, graph_get_remote_endpoint);
 }
 EXPORT_SYMBOL_GPL(fwnode_graph_get_remote_endpoint);
 
@@ -1278,22 +1217,8 @@ EXPORT_SYMBOL_GPL(fwnode_graph_get_remote_endpoint);
 int fwnode_graph_parse_endpoint(struct fwnode_handle *fwnode,
 				struct fwnode_endpoint *endpoint)
 {
-	struct fwnode_handle *port_fwnode = fwnode_get_parent(fwnode);
-
 	memset(endpoint, 0, sizeof(*endpoint));
 
-	endpoint->local_fwnode = fwnode;
-
-	if (is_acpi_node(port_fwnode)) {
-		fwnode_property_read_u32(port_fwnode, "port", &endpoint->port);
-		fwnode_property_read_u32(fwnode, "endpoint", &endpoint->id);
-	} else {
-		fwnode_property_read_u32(port_fwnode, "reg", &endpoint->port);
-		fwnode_property_read_u32(fwnode, "reg", &endpoint->id);
-	}
-
-	fwnode_handle_put(port_fwnode);
-
-	return 0;
+	return fwnode_call_int_op(fwnode, graph_parse_endpoint, endpoint);
 }
 EXPORT_SYMBOL(fwnode_graph_parse_endpoint);
diff --git a/drivers/of/property.c b/drivers/of/property.c
index 250859234fb0..e859e41e33f3 100644
--- a/drivers/of/property.c
+++ b/drivers/of/property.c
@@ -844,6 +844,54 @@ of_fwnode_get_named_child_node(struct fwnode_handle *fwnode,
 	return NULL;
 }
 
+static struct fwnode_handle *
+of_fwnode_graph_get_next_endpoint(struct fwnode_handle *fwnode,
+				  struct fwnode_handle *prev)
+{
+	return of_fwnode_handle(of_graph_get_next_endpoint(to_of_node(fwnode),
+							   to_of_node(prev)));
+}
+
+static struct fwnode_handle *
+of_fwnode_graph_get_remote_endpoint(struct fwnode_handle *fwnode)
+{
+	return of_fwnode_handle(of_parse_phandle(to_of_node(fwnode),
+						 "remote-endpoint", 0));
+}
+
+static struct fwnode_handle *
+of_fwnode_graph_get_port_parent(struct fwnode_handle *fwnode)
+{
+	struct device_node *np;
+
+	/* Get the parent of the port */
+	np = of_get_next_parent(to_of_node(fwnode));
+	if (!np)
+		return NULL;
+
+	/* Is this the "ports" node? If not, it's the port parent. */
+	if (of_node_cmp(np->name, "ports"))
+		return of_fwnode_handle(np);
+
+	return of_fwnode_handle(of_get_next_parent(np));
+}
+
+static int of_fwnode_graph_parse_endpoint(struct fwnode_handle *fwnode,
+					  struct fwnode_endpoint *endpoint)
+{
+	struct device_node *node = to_of_node(fwnode);
+	struct device_node *port_node = of_get_parent(node);
+
+	endpoint->local_fwnode = fwnode;
+
+	of_property_read_u32(port_node, "reg", &endpoint->port);
+	of_property_read_u32(node, "reg", &endpoint->id);
+
+	of_node_put(port_node);
+
+	return 0;
+}
+
 const struct fwnode_operations of_fwnode_ops = {
 	.get = of_fwnode_get,
 	.put = of_fwnode_put,
@@ -853,4 +901,8 @@ const struct fwnode_operations of_fwnode_ops = {
 	.get_parent = of_fwnode_get_parent,
 	.get_next_child_node = of_fwnode_get_next_child_node,
 	.get_named_child_node = of_fwnode_get_named_child_node,
+	.graph_get_next_endpoint = of_fwnode_graph_get_next_endpoint,
+	.graph_get_remote_endpoint = of_fwnode_graph_get_remote_endpoint,
+	.graph_get_port_parent = of_fwnode_graph_get_port_parent,
+	.graph_parse_endpoint = of_fwnode_graph_parse_endpoint,
 };
diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h
index 8f64b3ae9c57..e315d867d631 100644
--- a/include/linux/fwnode.h
+++ b/include/linux/fwnode.h
@@ -57,6 +57,11 @@ struct fwnode_endpoint {
  * @get_parent: Return the parent of an fwnode.
  * @get_next_child_node: Return the next child node in an iteration.
  * @get_named_child_node: Return a child node with a given name.
+ * @graph_get_next_endpoint: Return an endpoint node in an iteration.
+ * @graph_get_remote_endpoint: Return the remote endpoint node of a local
+ *			       endpoint node.
+ * @graph_get_port_parent: Return the parent node of a port node.
+ * @graph_parse_endpoint: Parse endpoint for port and endpoint id.
  */
 struct fwnode_operations {
 	void (*get)(struct fwnode_handle *fwnode);
@@ -76,6 +81,15 @@ struct fwnode_operations {
 			       struct fwnode_handle *child);
 	struct fwnode_handle *
 	(*get_named_child_node)(struct fwnode_handle *fwnode, const char *name);
+	struct fwnode_handle *
+	(*graph_get_next_endpoint)(struct fwnode_handle *fwnode,
+				   struct fwnode_handle *prev);
+	struct fwnode_handle *
+	(*graph_get_remote_endpoint)(struct fwnode_handle *fwnode);
+	struct fwnode_handle *
+	(*graph_get_port_parent)(struct fwnode_handle *fwnode);
+	int (*graph_parse_endpoint)(struct fwnode_handle *fwnode,
+				    struct fwnode_endpoint *endpoint);
 };
 
 #define fwnode_has_op(fwnode, op)				\
-- 
cgit v1.2.3


From 2294b3af05e9b3fe0b84a78971e709037bd7593c Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Tue, 6 Jun 2017 12:37:39 +0300
Subject: device property: Introduce fwnode_device_is_available()

Add fwnode_device_is_available() to tell whether the device corresponding
to a certain fwnode_handle is available for use.

Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/property.c  |  9 +++++++++
 drivers/base/property.c  | 10 ++++++++++
 drivers/of/property.c    |  6 ++++++
 include/linux/fwnode.h   |  1 +
 include/linux/property.h |  1 +
 5 files changed, 27 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c
index a24ca61294eb..917c789f953d 100644
--- a/drivers/acpi/property.c
+++ b/drivers/acpi/property.c
@@ -1121,6 +1121,14 @@ int acpi_graph_get_remote_endpoint(struct fwnode_handle *fwnode,
 	return 0;
 }
 
+static bool acpi_fwnode_device_is_available(struct fwnode_handle *fwnode)
+{
+	if (!is_acpi_device_node(fwnode))
+		return false;
+
+	return acpi_device_is_present(to_acpi_device_node(fwnode));
+}
+
 static bool acpi_fwnode_property_present(struct fwnode_handle *fwnode,
 					 const char *propname)
 {
@@ -1216,6 +1224,7 @@ static int acpi_fwnode_graph_parse_endpoint(struct fwnode_handle *fwnode,
 }
 
 const struct fwnode_operations acpi_fwnode_ops = {
+	.device_is_available = acpi_fwnode_device_is_available,
 	.property_present = acpi_fwnode_property_present,
 	.property_read_int_array = acpi_fwnode_property_read_int_array,
 	.property_read_string_array = acpi_fwnode_property_read_string_array,
diff --git a/drivers/base/property.c b/drivers/base/property.c
index e1a58ac8840e..b979f8a2f4fb 100644
--- a/drivers/base/property.c
+++ b/drivers/base/property.c
@@ -1021,6 +1021,16 @@ void fwnode_handle_put(struct fwnode_handle *fwnode)
 }
 EXPORT_SYMBOL_GPL(fwnode_handle_put);
 
+/**
+ * fwnode_device_is_available - check if a device is available for use
+ * @fwnode: Pointer to the fwnode of the device.
+ */
+bool fwnode_device_is_available(struct fwnode_handle *fwnode)
+{
+	return fwnode_call_int_op(fwnode, device_is_available);
+}
+EXPORT_SYMBOL_GPL(fwnode_device_is_available);
+
 /**
  * device_get_child_node_count - return the number of child nodes for device
  * @dev: Device to cound the child nodes for
diff --git a/drivers/of/property.c b/drivers/of/property.c
index e859e41e33f3..c96389b7c6b3 100644
--- a/drivers/of/property.c
+++ b/drivers/of/property.c
@@ -775,6 +775,11 @@ static void of_fwnode_put(struct fwnode_handle *fwnode)
 	of_node_put(to_of_node(fwnode));
 }
 
+static bool of_fwnode_device_is_available(struct fwnode_handle *fwnode)
+{
+	return of_device_is_available(to_of_node(fwnode));
+}
+
 static bool of_fwnode_property_present(struct fwnode_handle *fwnode,
 				       const char *propname)
 {
@@ -895,6 +900,7 @@ static int of_fwnode_graph_parse_endpoint(struct fwnode_handle *fwnode,
 const struct fwnode_operations of_fwnode_ops = {
 	.get = of_fwnode_get,
 	.put = of_fwnode_put,
+	.device_is_available = of_fwnode_device_is_available,
 	.property_present = of_fwnode_property_present,
 	.property_read_int_array = of_fwnode_property_read_int_array,
 	.property_read_string_array = of_fwnode_property_read_string_array,
diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h
index e315d867d631..9ab375419189 100644
--- a/include/linux/fwnode.h
+++ b/include/linux/fwnode.h
@@ -66,6 +66,7 @@ struct fwnode_endpoint {
 struct fwnode_operations {
 	void (*get)(struct fwnode_handle *fwnode);
 	void (*put)(struct fwnode_handle *fwnode);
+	bool (*device_is_available)(struct fwnode_handle *fwnode);
 	bool (*property_present)(struct fwnode_handle *fwnode,
 				 const char *propname);
 	int (*property_read_int_array)(struct fwnode_handle *fwnode,
diff --git a/include/linux/property.h b/include/linux/property.h
index 2f482616a2f2..7be014af78ed 100644
--- a/include/linux/property.h
+++ b/include/linux/property.h
@@ -51,6 +51,7 @@ int device_property_read_string(struct device *dev, const char *propname,
 int device_property_match_string(struct device *dev,
 				 const char *propname, const char *string);
 
+bool fwnode_device_is_available(struct fwnode_handle *fwnode);
 bool fwnode_property_present(struct fwnode_handle *fwnode, const char *propname);
 int fwnode_property_read_u8_array(struct fwnode_handle *fwnode,
 				  const char *propname, u8 *val,
-- 
cgit v1.2.3


From 125ee6b3b0fa920c730b0991e6f083a9f5b1e4c3 Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Tue, 6 Jun 2017 12:37:40 +0300
Subject: device property: Add FW type agnostic fwnode_graph_get_remote_node

Add fwnode_graph_get_remote_node() function which is equivalent to
of_graph_get_remote_node() on OF.

Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/property.c  | 37 +++++++++++++++++++++++++++++++++++++
 include/linux/property.h |  2 ++
 2 files changed, 39 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/property.c b/drivers/base/property.c
index b979f8a2f4fb..ac3590b1f93d 100644
--- a/drivers/base/property.c
+++ b/drivers/base/property.c
@@ -1215,6 +1215,43 @@ fwnode_graph_get_remote_endpoint(struct fwnode_handle *fwnode)
 }
 EXPORT_SYMBOL_GPL(fwnode_graph_get_remote_endpoint);
 
+/**
+ * fwnode_graph_get_remote_node - get remote parent node for given port/endpoint
+ * @fwnode: pointer to parent fwnode_handle containing graph port/endpoint
+ * @port_id: identifier of the parent port node
+ * @endpoint_id: identifier of the endpoint node
+ *
+ * Return: Remote fwnode handle associated with remote endpoint node linked
+ *	   to @node. Use fwnode_node_put() on it when done.
+ */
+struct fwnode_handle *fwnode_graph_get_remote_node(struct fwnode_handle *fwnode,
+						   u32 port_id, u32 endpoint_id)
+{
+	struct fwnode_handle *endpoint = NULL;
+
+	while ((endpoint = fwnode_graph_get_next_endpoint(fwnode, endpoint))) {
+		struct fwnode_endpoint fwnode_ep;
+		struct fwnode_handle *remote;
+		int ret;
+
+		ret = fwnode_graph_parse_endpoint(endpoint, &fwnode_ep);
+		if (ret < 0)
+			continue;
+
+		if (fwnode_ep.port != port_id || fwnode_ep.id != endpoint_id)
+			continue;
+
+		remote = fwnode_graph_get_remote_port_parent(endpoint);
+		if (!remote)
+			return NULL;
+
+		return fwnode_device_is_available(remote) ? remote : NULL;
+	}
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(fwnode_graph_get_remote_node);
+
 /**
  * fwnode_graph_parse_endpoint - parse common endpoint node properties
  * @fwnode: pointer to endpoint fwnode_handle
diff --git a/include/linux/property.h b/include/linux/property.h
index 7be014af78ed..0597a743aa66 100644
--- a/include/linux/property.h
+++ b/include/linux/property.h
@@ -281,6 +281,8 @@ struct fwnode_handle *fwnode_graph_get_remote_port(
 	struct fwnode_handle *fwnode);
 struct fwnode_handle *fwnode_graph_get_remote_endpoint(
 	struct fwnode_handle *fwnode);
+struct fwnode_handle *fwnode_graph_get_remote_node(struct fwnode_handle *fwnode,
+						   u32 port, u32 endpoint);
 
 int fwnode_graph_parse_endpoint(struct fwnode_handle *fwnode,
 				struct fwnode_endpoint *endpoint);
-- 
cgit v1.2.3


From 6a71d8d77795e0f7d887baa95bfc0d1d2bc74899 Mon Sep 17 00:00:00 2001
From: Kieran Bingham <kieran.bingham+renesas@ideasonboard.com>
Date: Tue, 6 Jun 2017 12:37:41 +0300
Subject: device property: Add fwnode_graph_get_port_parent

Provide a helper to obtain the parent device fwnode without first
parsing the remote-endpoint as per fwnode_graph_get_remote_port_parent.

Signed-off-by: Kieran Bingham <kieran.bingham+renesas@ideasonboard.com>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/property.c  | 28 ++++++++++++++++++++++++----
 include/linux/property.h |  2 ++
 2 files changed, 26 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/property.c b/drivers/base/property.c
index ac3590b1f93d..692007e5a94b 100644
--- a/drivers/base/property.c
+++ b/drivers/base/property.c
@@ -1170,6 +1170,26 @@ fwnode_graph_get_next_endpoint(struct fwnode_handle *fwnode,
 }
 EXPORT_SYMBOL_GPL(fwnode_graph_get_next_endpoint);
 
+/**
+ * fwnode_graph_get_port_parent - Return the device fwnode of a port endpoint
+ * @endpoint: Endpoint firmware node of the port
+ *
+ * Return: the firmware node of the device the @endpoint belongs to.
+ */
+struct fwnode_handle *
+fwnode_graph_get_port_parent(struct fwnode_handle *endpoint)
+{
+	struct fwnode_handle *port, *parent;
+
+	port = fwnode_get_parent(endpoint);
+	parent = fwnode_call_ptr_op(port, graph_get_port_parent);
+
+	fwnode_handle_put(port);
+
+	return parent;
+}
+EXPORT_SYMBOL_GPL(fwnode_graph_get_port_parent);
+
 /**
  * fwnode_graph_get_remote_port_parent - Return fwnode of a remote device
  * @fwnode: Endpoint firmware node pointing to the remote endpoint
@@ -1179,12 +1199,12 @@ EXPORT_SYMBOL_GPL(fwnode_graph_get_next_endpoint);
 struct fwnode_handle *
 fwnode_graph_get_remote_port_parent(struct fwnode_handle *fwnode)
 {
-	struct fwnode_handle *port, *parent;
+	struct fwnode_handle *endpoint, *parent;
 
-	port = fwnode_graph_get_remote_port(fwnode);
-	parent = fwnode_call_ptr_op(port, graph_get_port_parent);
+	endpoint = fwnode_graph_get_remote_endpoint(fwnode);
+	parent = fwnode_graph_get_port_parent(endpoint);
 
-	fwnode_handle_put(port);
+	fwnode_handle_put(endpoint);
 
 	return parent;
 }
diff --git a/include/linux/property.h b/include/linux/property.h
index 0597a743aa66..7e77039e6b81 100644
--- a/include/linux/property.h
+++ b/include/linux/property.h
@@ -275,6 +275,8 @@ void *device_get_mac_address(struct device *dev, char *addr, int alen);
 
 struct fwnode_handle *fwnode_graph_get_next_endpoint(
 	struct fwnode_handle *fwnode, struct fwnode_handle *prev);
+struct fwnode_handle *
+fwnode_graph_get_port_parent(struct fwnode_handle *fwnode);
 struct fwnode_handle *fwnode_graph_get_remote_port_parent(
 	struct fwnode_handle *fwnode);
 struct fwnode_handle *fwnode_graph_get_remote_port(
-- 
cgit v1.2.3


From 3c85d6db5e5f05ae6c3d7f5a0ceceb43746a5ca7 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Mon, 19 Jun 2017 04:12:00 +0200
Subject: sched/loadavg: Generalize "_idle" naming to "_nohz"

The loadavg naming code still assumes that nohz == idle whereas its code
is actually handling well both nohz idle and nohz full.

So lets fix the naming according to what the code actually does, to
unconfuse the reader.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1497838322-10913-2-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 Documentation/trace/ftrace.txt |  2 +-
 include/linux/sched/nohz.h     |  8 +++----
 kernel/sched/loadavg.c         | 51 +++++++++++++++++++++---------------------
 kernel/time/tick-sched.c       |  4 ++--
 4 files changed, 33 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
index 94a987bd2bc5..fff8ff6d4893 100644
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt
@@ -1609,7 +1609,7 @@ Doing the same with chrt -r 5 and function-trace set.
   <idle>-0       3dN.2   14us : sched_avg_update <-__cpu_load_update
   <idle>-0       3dN.2   14us : _raw_spin_unlock <-cpu_load_update_nohz
   <idle>-0       3dN.2   14us : sub_preempt_count <-_raw_spin_unlock
-  <idle>-0       3dN.1   15us : calc_load_exit_idle <-tick_nohz_idle_exit
+  <idle>-0       3dN.1   15us : calc_load_nohz_stop <-tick_nohz_idle_exit
   <idle>-0       3dN.1   15us : touch_softlockup_watchdog <-tick_nohz_idle_exit
   <idle>-0       3dN.1   15us : hrtimer_cancel <-tick_nohz_idle_exit
   <idle>-0       3dN.1   15us : hrtimer_try_to_cancel <-hrtimer_cancel
diff --git a/include/linux/sched/nohz.h b/include/linux/sched/nohz.h
index 4995b717500b..7d3f75db23e5 100644
--- a/include/linux/sched/nohz.h
+++ b/include/linux/sched/nohz.h
@@ -23,11 +23,11 @@ static inline void set_cpu_sd_state_idle(void) { }
 #endif
 
 #ifdef CONFIG_NO_HZ_COMMON
-void calc_load_enter_idle(void);
-void calc_load_exit_idle(void);
+void calc_load_nohz_start(void);
+void calc_load_nohz_stop(void);
 #else
-static inline void calc_load_enter_idle(void) { }
-static inline void calc_load_exit_idle(void) { }
+static inline void calc_load_nohz_start(void) { }
+static inline void calc_load_nohz_stop(void) { }
 #endif /* CONFIG_NO_HZ_COMMON */
 
 #if defined(CONFIG_NO_HZ_COMMON) && defined(CONFIG_SMP)
diff --git a/kernel/sched/loadavg.c b/kernel/sched/loadavg.c
index f15fb2bdbc0d..f14716a3522f 100644
--- a/kernel/sched/loadavg.c
+++ b/kernel/sched/loadavg.c
@@ -117,7 +117,7 @@ calc_load(unsigned long load, unsigned long exp, unsigned long active)
  * load-average relies on per-cpu sampling from the tick, it is affected by
  * NO_HZ.
  *
- * The basic idea is to fold the nr_active delta into a global idle-delta upon
+ * The basic idea is to fold the nr_active delta into a global NO_HZ-delta upon
  * entering NO_HZ state such that we can include this as an 'extra' cpu delta
  * when we read the global state.
  *
@@ -126,7 +126,7 @@ calc_load(unsigned long load, unsigned long exp, unsigned long active)
  *  - When we go NO_HZ idle during the window, we can negate our sample
  *    contribution, causing under-accounting.
  *
- *    We avoid this by keeping two idle-delta counters and flipping them
+ *    We avoid this by keeping two NO_HZ-delta counters and flipping them
  *    when the window starts, thus separating old and new NO_HZ load.
  *
  *    The only trick is the slight shift in index flip for read vs write.
@@ -137,22 +137,22 @@ calc_load(unsigned long load, unsigned long exp, unsigned long active)
  *    r:0 0 1           1 0           0 1           1 0
  *    w:0 1 1           0 0           1 1           0 0
  *
- *    This ensures we'll fold the old idle contribution in this window while
+ *    This ensures we'll fold the old NO_HZ contribution in this window while
  *    accumlating the new one.
  *
- *  - When we wake up from NO_HZ idle during the window, we push up our
+ *  - When we wake up from NO_HZ during the window, we push up our
  *    contribution, since we effectively move our sample point to a known
  *    busy state.
  *
  *    This is solved by pushing the window forward, and thus skipping the
- *    sample, for this cpu (effectively using the idle-delta for this cpu which
+ *    sample, for this cpu (effectively using the NO_HZ-delta for this cpu which
  *    was in effect at the time the window opened). This also solves the issue
- *    of having to deal with a cpu having been in NOHZ idle for multiple
- *    LOAD_FREQ intervals.
+ *    of having to deal with a cpu having been in NO_HZ for multiple LOAD_FREQ
+ *    intervals.
  *
  * When making the ILB scale, we should try to pull this in as well.
  */
-static atomic_long_t calc_load_idle[2];
+static atomic_long_t calc_load_nohz[2];
 static int calc_load_idx;
 
 static inline int calc_load_write_idx(void)
@@ -167,7 +167,7 @@ static inline int calc_load_write_idx(void)
 
 	/*
 	 * If the folding window started, make sure we start writing in the
-	 * next idle-delta.
+	 * next NO_HZ-delta.
 	 */
 	if (!time_before(jiffies, READ_ONCE(calc_load_update)))
 		idx++;
@@ -180,24 +180,24 @@ static inline int calc_load_read_idx(void)
 	return calc_load_idx & 1;
 }
 
-void calc_load_enter_idle(void)
+void calc_load_nohz_start(void)
 {
 	struct rq *this_rq = this_rq();
 	long delta;
 
 	/*
-	 * We're going into NOHZ mode, if there's any pending delta, fold it
-	 * into the pending idle delta.
+	 * We're going into NO_HZ mode, if there's any pending delta, fold it
+	 * into the pending NO_HZ delta.
 	 */
 	delta = calc_load_fold_active(this_rq, 0);
 	if (delta) {
 		int idx = calc_load_write_idx();
 
-		atomic_long_add(delta, &calc_load_idle[idx]);
+		atomic_long_add(delta, &calc_load_nohz[idx]);
 	}
 }
 
-void calc_load_exit_idle(void)
+void calc_load_nohz_stop(void)
 {
 	struct rq *this_rq = this_rq();
 
@@ -217,13 +217,13 @@ void calc_load_exit_idle(void)
 		this_rq->calc_load_update += LOAD_FREQ;
 }
 
-static long calc_load_fold_idle(void)
+static long calc_load_nohz_fold(void)
 {
 	int idx = calc_load_read_idx();
 	long delta = 0;
 
-	if (atomic_long_read(&calc_load_idle[idx]))
-		delta = atomic_long_xchg(&calc_load_idle[idx], 0);
+	if (atomic_long_read(&calc_load_nohz[idx]))
+		delta = atomic_long_xchg(&calc_load_nohz[idx], 0);
 
 	return delta;
 }
@@ -299,9 +299,9 @@ calc_load_n(unsigned long load, unsigned long exp,
 
 /*
  * NO_HZ can leave us missing all per-cpu ticks calling
- * calc_load_account_active(), but since an idle CPU folds its delta into
- * calc_load_tasks_idle per calc_load_account_idle(), all we need to do is fold
- * in the pending idle delta if our idle period crossed a load cycle boundary.
+ * calc_load_fold_active(), but since a NO_HZ CPU folds its delta into
+ * calc_load_nohz per calc_load_nohz_start(), all we need to do is fold
+ * in the pending NO_HZ delta if our NO_HZ period crossed a load cycle boundary.
  *
  * Once we've updated the global active value, we need to apply the exponential
  * weights adjusted to the number of cycles missed.
@@ -330,7 +330,7 @@ static void calc_global_nohz(void)
 	}
 
 	/*
-	 * Flip the idle index...
+	 * Flip the NO_HZ index...
 	 *
 	 * Make sure we first write the new time then flip the index, so that
 	 * calc_load_write_idx() will see the new time when it reads the new
@@ -341,7 +341,7 @@ static void calc_global_nohz(void)
 }
 #else /* !CONFIG_NO_HZ_COMMON */
 
-static inline long calc_load_fold_idle(void) { return 0; }
+static inline long calc_load_nohz_fold(void) { return 0; }
 static inline void calc_global_nohz(void) { }
 
 #endif /* CONFIG_NO_HZ_COMMON */
@@ -362,9 +362,9 @@ void calc_global_load(unsigned long ticks)
 		return;
 
 	/*
-	 * Fold the 'old' idle-delta to include all NO_HZ cpus.
+	 * Fold the 'old' NO_HZ-delta to include all NO_HZ cpus.
 	 */
-	delta = calc_load_fold_idle();
+	delta = calc_load_nohz_fold();
 	if (delta)
 		atomic_long_add(delta, &calc_load_tasks);
 
@@ -378,7 +378,8 @@ void calc_global_load(unsigned long ticks)
 	WRITE_ONCE(calc_load_update, sample_window + LOAD_FREQ);
 
 	/*
-	 * In case we idled for multiple LOAD_FREQ intervals, catch up in bulk.
+	 * In case we went to NO_HZ for multiple LOAD_FREQ intervals
+	 * catch up in bulk.
 	 */
 	calc_global_nohz();
 }
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 9c2dc64e31d8..b1b58a07e042 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -783,7 +783,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
 	 */
 	if (!ts->tick_stopped) {
 		nohz_balance_enter_idle(cpu);
-		calc_load_enter_idle();
+		calc_load_nohz_start();
 		cpu_load_update_nohz_start();
 
 		ts->last_tick = hrtimer_get_expires(&ts->sched_timer);
@@ -823,7 +823,7 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
 	 */
 	timer_clear_idle();
 
-	calc_load_exit_idle();
+	calc_load_nohz_stop();
 	touch_softlockup_watchdog_sched();
 	/*
 	 * Cancel the scheduled timer and restore the tick
-- 
cgit v1.2.3


From a8ade55ffd1c1acef053a2d05f30e91a1c114f58 Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Wed, 7 Jun 2017 17:49:56 +0300
Subject: net/mlx5e: Offload TC matching on ip ttl

Enable offloading of TC matching on ip ttl / hop-limit

As matching on ttl is supported only by newer HW brands (ConnectX-5),
we should do capability check before attempting to offload that.

Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: Paul Blakey <paulb@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 11 ++++++++---
 include/linux/mlx5/mlx5_ifc.h                   |  5 +++--
 2 files changed, 11 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 633a91a71218..382dede903a3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -904,11 +904,16 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, mask->tos >> 2);
 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, key->tos  >> 2);
 
-		if (mask->tos)
-			*min_inline = MLX5_INLINE_MODE_IP;
+		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, mask->ttl);
+		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, key->ttl);
 
-		if (mask->ttl) /* currently not supported */
+		if (mask->ttl &&
+		    !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
+						ft_field_support.outer_ipv4_ttl))
 			return -EOPNOTSUPP;
+
+		if (mask->tos || mask->ttl)
+			*min_inline = MLX5_INLINE_MODE_IP;
 	}
 
 	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) {
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index e86ef880a149..694f51d388d9 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -243,7 +243,7 @@ struct mlx5_ifc_flow_table_fields_supported_bits {
 	u8         outer_first_prio[0x1];
 	u8         outer_first_cfi[0x1];
 	u8         outer_first_vid[0x1];
-	u8         reserved_at_7[0x1];
+	u8         outer_ipv4_ttl[0x1];
 	u8         outer_second_prio[0x1];
 	u8         outer_second_cfi[0x1];
 	u8         outer_second_vid[0x1];
@@ -380,7 +380,8 @@ struct mlx5_ifc_fte_match_set_lyr_2_4_bits {
 	u8         tcp_sport[0x10];
 	u8         tcp_dport[0x10];
 
-	u8         reserved_at_c0[0x20];
+	u8         reserved_at_c0[0x18];
+	u8         ttl_hoplimit[0x8];
 
 	u8         udp_sport[0x10];
 	u8         udp_dport[0x10];
-- 
cgit v1.2.3


From 0c0316f516f51e6fcd0b23d61c37f9f5f846f978 Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Tue, 13 Jun 2017 11:09:57 +0300
Subject: net/mlx5e: Add header re-write offloading of IPv6 hop-limit

For environments where flow-based ipv6 router is offloaded.

Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: Paul Blakey <paulb@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 1 +
 include/linux/mlx5/mlx5_ifc.h                   | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 0318d6f6e1da..3c536f560dd2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -1081,6 +1081,7 @@ static struct mlx5_fields fields[] = {
 	OFFLOAD(DIPV6_95_64,  4, ip6.daddr.s6_addr32[1], 0),
 	OFFLOAD(DIPV6_63_32,  4, ip6.daddr.s6_addr32[2], 0),
 	OFFLOAD(DIPV6_31_0,   4, ip6.daddr.s6_addr32[3], 0),
+	OFFLOAD(IPV6_HOPLIMIT, 1, ip6.hop_limit, 0),
 
 	OFFLOAD(TCP_SPORT, 2, tcp.source,  0),
 	OFFLOAD(TCP_DPORT, 2, tcp.dest,    0),
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 694f51d388d9..4b547f551a2e 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -4620,6 +4620,7 @@ enum {
 	MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0    = 0x14,
 	MLX5_ACTION_IN_FIELD_OUT_SIPV4         = 0x15,
 	MLX5_ACTION_IN_FIELD_OUT_DIPV4         = 0x16,
+	MLX5_ACTION_IN_FIELD_OUT_IPV6_HOPLIMIT = 0x47,
 };
 
 struct mlx5_ifc_alloc_modify_header_context_out_bits {
-- 
cgit v1.2.3


From 4717628938423fcba0aa8fa889e9fed4eb6a655f Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Tue, 18 Apr 2017 13:35:39 +0300
Subject: net/mlx5: Add MCC (Management Component Control) register definitions

MCC (Management Component Control) allows to control a firmware
component update.

MCDA (Management Component Data Access) allows to read and write
a firmware component.

MCQI (Management Component Query Information) allows to query
information about firmware components.

Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Yotam Gigi <yotamg@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/driver.h   |  3 ++
 include/linux/mlx5/mlx5_ifc.h | 82 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 85 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index bf15e87da8fa..750701b3b863 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -131,6 +131,9 @@ enum {
 	MLX5_REG_MPCNT		 = 0x9051,
 	MLX5_REG_MTPPS		 = 0x9053,
 	MLX5_REG_MTPPSE		 = 0x9054,
+	MLX5_REG_MCQI		 = 0x9061,
+	MLX5_REG_MCC		 = 0x9062,
+	MLX5_REG_MCDA		 = 0x9063,
 	MLX5_REG_MCAM		 = 0x907f,
 };
 
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 4b547f551a2e..28468ad804be 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -8165,6 +8165,85 @@ struct mlx5_ifc_mtppse_reg_bits {
 	u8         reserved_at_40[0x40];
 };
 
+struct mlx5_ifc_mcqi_cap_bits {
+	u8         supported_info_bitmask[0x20];
+
+	u8         component_size[0x20];
+
+	u8         max_component_size[0x20];
+
+	u8         log_mcda_word_size[0x4];
+	u8         reserved_at_64[0xc];
+	u8         mcda_max_write_size[0x10];
+
+	u8         rd_en[0x1];
+	u8         reserved_at_81[0x1];
+	u8         match_chip_id[0x1];
+	u8         match_psid[0x1];
+	u8         check_user_timestamp[0x1];
+	u8         match_base_guid_mac[0x1];
+	u8         reserved_at_86[0x1a];
+};
+
+struct mlx5_ifc_mcqi_reg_bits {
+	u8         read_pending_component[0x1];
+	u8         reserved_at_1[0xf];
+	u8         component_index[0x10];
+
+	u8         reserved_at_20[0x20];
+
+	u8         reserved_at_40[0x1b];
+	u8         info_type[0x5];
+
+	u8         info_size[0x20];
+
+	u8         offset[0x20];
+
+	u8         reserved_at_a0[0x10];
+	u8         data_size[0x10];
+
+	u8         data[0][0x20];
+};
+
+struct mlx5_ifc_mcc_reg_bits {
+	u8         reserved_at_0[0x4];
+	u8         time_elapsed_since_last_cmd[0xc];
+	u8         reserved_at_10[0x8];
+	u8         instruction[0x8];
+
+	u8         reserved_at_20[0x10];
+	u8         component_index[0x10];
+
+	u8         reserved_at_40[0x8];
+	u8         update_handle[0x18];
+
+	u8         handle_owner_type[0x4];
+	u8         handle_owner_host_id[0x4];
+	u8         reserved_at_68[0x1];
+	u8         control_progress[0x7];
+	u8         error_code[0x8];
+	u8         reserved_at_78[0x4];
+	u8         control_state[0x4];
+
+	u8         component_size[0x20];
+
+	u8         reserved_at_a0[0x60];
+};
+
+struct mlx5_ifc_mcda_reg_bits {
+	u8         reserved_at_0[0x8];
+	u8         update_handle[0x18];
+
+	u8         offset[0x20];
+
+	u8         reserved_at_40[0x10];
+	u8         size[0x10];
+
+	u8         reserved_at_60[0x20];
+
+	u8         data[0][0x20];
+};
+
 union mlx5_ifc_ports_control_registers_document_bits {
 	struct mlx5_ifc_bufferx_reg_bits bufferx_reg;
 	struct mlx5_ifc_eth_2819_cntrs_grp_data_layout_bits eth_2819_cntrs_grp_data_layout;
@@ -8214,6 +8293,9 @@ union mlx5_ifc_ports_control_registers_document_bits {
 	struct mlx5_ifc_mtppse_reg_bits mtppse_reg;
 	struct mlx5_ifc_fpga_ctrl_bits fpga_ctrl_bits;
 	struct mlx5_ifc_fpga_cap_bits fpga_cap_bits;
+	struct mlx5_ifc_mcqi_reg_bits mcqi_reg;
+	struct mlx5_ifc_mcc_reg_bits mcc_reg;
+	struct mlx5_ifc_mcda_reg_bits mcda_reg;
 	u8         reserved_at_0[0x60e0];
 };
 
-- 
cgit v1.2.3


From 0ab87743cc8c5bcd482daf71961ed5fc45349e01 Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Sun, 11 Jun 2017 15:25:38 +0300
Subject: net/mlx5: Enhance MCAM reg to allow query on access reg support

Enhance MCAM to allow the driver to query which access regs are
supported. For now, expose the regs needed for FW flashing.

Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: Gal Pressman <galp@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/device.h   |  3 +++
 include/linux/mlx5/mlx5_ifc.h | 13 +++++++++++++
 2 files changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index b26a478930eb..556e1c31b5d0 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1094,6 +1094,9 @@ enum mlx5_mcam_feature_groups {
 #define MLX5_CAP_PCAM_FEATURE(mdev, fld) \
 	MLX5_GET(pcam_reg, (mdev)->caps.pcam, feature_cap_mask.enhanced_features.fld)
 
+#define MLX5_CAP_MCAM_REG(mdev, reg) \
+	MLX5_GET(mcam_reg, (mdev)->caps.mcam, mng_access_reg_cap_mask.access_regs.reg)
+
 #define MLX5_CAP_MCAM_FEATURE(mdev, fld) \
 	MLX5_GET(mcam_reg, (mdev)->caps.mcam, mng_feature_cap_mask.enhanced_features.fld)
 
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 28468ad804be..8f197b070cea 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -7745,6 +7745,18 @@ struct mlx5_ifc_mcam_enhanced_features_bits {
 	u8         pcie_performance_group[0x1];
 };
 
+struct mlx5_ifc_mcam_access_reg_bits {
+	u8         reserved_at_0[0x1c];
+	u8         mcda[0x1];
+	u8         mcc[0x1];
+	u8         mcqi[0x1];
+	u8         reserved_at_1f[0x1];
+
+	u8         regs_95_to_64[0x20];
+	u8         regs_63_to_32[0x20];
+	u8         regs_31_to_0[0x20];
+};
+
 struct mlx5_ifc_mcam_reg_bits {
 	u8         reserved_at_0[0x8];
 	u8         feature_group[0x8];
@@ -7754,6 +7766,7 @@ struct mlx5_ifc_mcam_reg_bits {
 	u8         reserved_at_20[0x20];
 
 	union {
+		struct mlx5_ifc_mcam_access_reg_bits access_regs;
 		u8         reserved_at_0[0x80];
 	} mng_access_reg_cap_mask;
 
-- 
cgit v1.2.3


From 137ffd15f71ec29ff1a57728081569698591225a Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Tue, 13 Jun 2017 18:12:13 +0300
Subject: net/mlx5: Fix offset of hca cap reserved field

The offending commit pushed fwd the field by two bits but
didn't increment the offset, fix that. Currently, no damage
was done b/c this is just a field name, but lets have it right.

Fixes: f32f5bd2eb7e ('net/mlx5: Configure cache line size for start and end padding')
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reported-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/mlx5_ifc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 8f197b070cea..d6b99d5d0f24 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -824,7 +824,7 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         cc_modify_allowed[0x1];
 	u8         start_pad[0x1];
 	u8         cache_line_128byte[0x1];
-	u8         reserved_at_163[0xb];
+	u8         reserved_at_165[0xb];
 	u8         gid_table_size[0x10];
 
 	u8         out_of_seq_cnt[0x1];
-- 
cgit v1.2.3


From c07dfcb45877fbc6798fa042bab3c4b85378efd4 Mon Sep 17 00:00:00 2001
From: Tahsin Erdogan <tahsin@google.com>
Date: Thu, 22 Jun 2017 10:29:53 -0400
Subject: mbcache: make mbcache naming more generic

Make names more generic so that mbcache usage is not limited to
block sharing. In a subsequent patch in the series
("ext4: xattr inode deduplication"), we start using the mbcache code
for sharing xattr inodes. With that patch, old mb_cache_entry.e_block
field could be holding either a block number or an inode number.

Signed-off-by: Tahsin Erdogan <tahsin@google.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext2/xattr.c         | 18 +++++++++---------
 fs/ext4/xattr.c         | 10 +++++-----
 fs/mbcache.c            | 43 +++++++++++++++++++++----------------------
 include/linux/mbcache.h | 11 +++++------
 4 files changed, 40 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index fbdb8f171893..1e5f76070580 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -493,8 +493,8 @@ bad_block:		ext2_error(sb, "ext2_xattr_set",
 			 * This must happen under buffer lock for
 			 * ext2_xattr_set2() to reliably detect modified block
 			 */
-			mb_cache_entry_delete_block(EXT2_SB(sb)->s_mb_cache,
-						    hash, bh->b_blocknr);
+			mb_cache_entry_delete(EXT2_SB(sb)->s_mb_cache, hash,
+					      bh->b_blocknr);
 
 			/* keep the buffer locked while modifying it. */
 		} else {
@@ -721,8 +721,8 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
 			 * This must happen under buffer lock for
 			 * ext2_xattr_set2() to reliably detect freed block
 			 */
-			mb_cache_entry_delete_block(ext2_mb_cache,
-						    hash, old_bh->b_blocknr);
+			mb_cache_entry_delete(ext2_mb_cache, hash,
+					      old_bh->b_blocknr);
 			/* Free the old block. */
 			ea_bdebug(old_bh, "freeing");
 			ext2_free_blocks(inode, old_bh->b_blocknr, 1);
@@ -795,8 +795,8 @@ ext2_xattr_delete_inode(struct inode *inode)
 		 * This must happen under buffer lock for ext2_xattr_set2() to
 		 * reliably detect freed block
 		 */
-		mb_cache_entry_delete_block(EXT2_SB(inode->i_sb)->s_mb_cache,
-					    hash, bh->b_blocknr);
+		mb_cache_entry_delete(EXT2_SB(inode->i_sb)->s_mb_cache, hash,
+				      bh->b_blocknr);
 		ext2_free_blocks(inode, EXT2_I(inode)->i_file_acl, 1);
 		get_bh(bh);
 		bforget(bh);
@@ -907,11 +907,11 @@ again:
 	while (ce) {
 		struct buffer_head *bh;
 
-		bh = sb_bread(inode->i_sb, ce->e_block);
+		bh = sb_bread(inode->i_sb, ce->e_value);
 		if (!bh) {
 			ext2_error(inode->i_sb, "ext2_xattr_cache_find",
 				"inode %ld: block %ld read error",
-				inode->i_ino, (unsigned long) ce->e_block);
+				inode->i_ino, (unsigned long) ce->e_value);
 		} else {
 			lock_buffer(bh);
 			/*
@@ -931,7 +931,7 @@ again:
 			} else if (le32_to_cpu(HDR(bh)->h_refcount) >
 				   EXT2_XATTR_REFCOUNT_MAX) {
 				ea_idebug(inode, "block %ld refcount %d>%d",
-					  (unsigned long) ce->e_block,
+					  (unsigned long) ce->e_value,
 					  le32_to_cpu(HDR(bh)->h_refcount),
 					  EXT2_XATTR_REFCOUNT_MAX);
 			} else if (!ext2_xattr_cmp(header, HDR(bh))) {
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index fed54001c9e6..85da7792afd0 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -678,7 +678,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
 		 * This must happen under buffer lock for
 		 * ext4_xattr_block_set() to reliably detect freed block
 		 */
-		mb_cache_entry_delete_block(ext4_mb_cache, hash, bh->b_blocknr);
+		mb_cache_entry_delete(ext4_mb_cache, hash, bh->b_blocknr);
 		get_bh(bh);
 		unlock_buffer(bh);
 		ext4_free_blocks(handle, inode, bh, 0, 1,
@@ -1113,8 +1113,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
 			 * ext4_xattr_block_set() to reliably detect modified
 			 * block
 			 */
-			mb_cache_entry_delete_block(ext4_mb_cache, hash,
-						    bs->bh->b_blocknr);
+			mb_cache_entry_delete(ext4_mb_cache, hash,
+					      bs->bh->b_blocknr);
 			ea_bdebug(bs->bh, "modifying in-place");
 			error = ext4_xattr_set_entry(i, s, handle, inode);
 			if (!error) {
@@ -2236,10 +2236,10 @@ ext4_xattr_cache_find(struct inode *inode, struct ext4_xattr_header *header,
 	while (ce) {
 		struct buffer_head *bh;
 
-		bh = sb_bread(inode->i_sb, ce->e_block);
+		bh = sb_bread(inode->i_sb, ce->e_value);
 		if (!bh) {
 			EXT4_ERROR_INODE(inode, "block %lu read error",
-					 (unsigned long) ce->e_block);
+					 (unsigned long)ce->e_value);
 		} else if (ext4_xattr_cmp(header, BHDR(bh)) == 0) {
 			*pce = ce;
 			return bh;
diff --git a/fs/mbcache.c b/fs/mbcache.c
index b19be429d655..45a8d52dc991 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -10,7 +10,7 @@
 /*
  * Mbcache is a simple key-value store. Keys need not be unique, however
  * key-value pairs are expected to be unique (we use this fact in
- * mb_cache_entry_delete_block()).
+ * mb_cache_entry_delete()).
  *
  * Ext2 and ext4 use this cache for deduplication of extended attribute blocks.
  * They use hash of a block contents as a key and block number as a value.
@@ -62,15 +62,15 @@ static inline struct hlist_bl_head *mb_cache_entry_head(struct mb_cache *cache,
  * @cache - cache where the entry should be created
  * @mask - gfp mask with which the entry should be allocated
  * @key - key of the entry
- * @block - block that contains data
- * @reusable - is the block reusable by other inodes?
+ * @value - value of the entry
+ * @reusable - is the entry reusable by others?
  *
- * Creates entry in @cache with key @key and records that data is stored in
- * block @block. The function returns -EBUSY if entry with the same key
- * and for the same block already exists in cache. Otherwise 0 is returned.
+ * Creates entry in @cache with key @key and value @value. The function returns
+ * -EBUSY if entry with the same key and value already exists in cache.
+ * Otherwise 0 is returned.
  */
 int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
-			  sector_t block, bool reusable)
+			  u64 value, bool reusable)
 {
 	struct mb_cache_entry *entry, *dup;
 	struct hlist_bl_node *dup_node;
@@ -91,12 +91,12 @@ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
 	/* One ref for hash, one ref returned */
 	atomic_set(&entry->e_refcnt, 1);
 	entry->e_key = key;
-	entry->e_block = block;
+	entry->e_value = value;
 	entry->e_reusable = reusable;
 	head = mb_cache_entry_head(cache, key);
 	hlist_bl_lock(head);
 	hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) {
-		if (dup->e_key == key && dup->e_block == block) {
+		if (dup->e_key == key && dup->e_value == value) {
 			hlist_bl_unlock(head);
 			kmem_cache_free(mb_entry_cache, entry);
 			return -EBUSY;
@@ -187,13 +187,13 @@ struct mb_cache_entry *mb_cache_entry_find_next(struct mb_cache *cache,
 EXPORT_SYMBOL(mb_cache_entry_find_next);
 
 /*
- * mb_cache_entry_get - get a cache entry by block number (and key)
+ * mb_cache_entry_get - get a cache entry by value (and key)
  * @cache - cache we work with
- * @key - key of block number @block
- * @block - block number
+ * @key - key
+ * @value - value
  */
 struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key,
-					  sector_t block)
+					  u64 value)
 {
 	struct hlist_bl_node *node;
 	struct hlist_bl_head *head;
@@ -202,7 +202,7 @@ struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key,
 	head = mb_cache_entry_head(cache, key);
 	hlist_bl_lock(head);
 	hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
-		if (entry->e_key == key && entry->e_block == block) {
+		if (entry->e_key == key && entry->e_value == value) {
 			atomic_inc(&entry->e_refcnt);
 			goto out;
 		}
@@ -214,15 +214,14 @@ out:
 }
 EXPORT_SYMBOL(mb_cache_entry_get);
 
-/* mb_cache_entry_delete_block - remove information about block from cache
+/* mb_cache_entry_delete - remove a cache entry
  * @cache - cache we work with
- * @key - key of block @block
- * @block - block number
+ * @key - key
+ * @value - value
  *
- * Remove entry from cache @cache with key @key with data stored in @block.
+ * Remove entry from cache @cache with key @key and value @value.
  */
-void mb_cache_entry_delete_block(struct mb_cache *cache, u32 key,
-				 sector_t block)
+void mb_cache_entry_delete(struct mb_cache *cache, u32 key, u64 value)
 {
 	struct hlist_bl_node *node;
 	struct hlist_bl_head *head;
@@ -231,7 +230,7 @@ void mb_cache_entry_delete_block(struct mb_cache *cache, u32 key,
 	head = mb_cache_entry_head(cache, key);
 	hlist_bl_lock(head);
 	hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
-		if (entry->e_key == key && entry->e_block == block) {
+		if (entry->e_key == key && entry->e_value == value) {
 			/* We keep hash list reference to keep entry alive */
 			hlist_bl_del_init(&entry->e_hash_list);
 			hlist_bl_unlock(head);
@@ -248,7 +247,7 @@ void mb_cache_entry_delete_block(struct mb_cache *cache, u32 key,
 	}
 	hlist_bl_unlock(head);
 }
-EXPORT_SYMBOL(mb_cache_entry_delete_block);
+EXPORT_SYMBOL(mb_cache_entry_delete);
 
 /* mb_cache_entry_touch - cache entry got used
  * @cache - cache the entry belongs to
diff --git a/include/linux/mbcache.h b/include/linux/mbcache.h
index 86c9a8b480c5..e1bc73414983 100644
--- a/include/linux/mbcache.h
+++ b/include/linux/mbcache.h
@@ -19,15 +19,15 @@ struct mb_cache_entry {
 	u32			e_key;
 	u32			e_referenced:1;
 	u32			e_reusable:1;
-	/* Block number of hashed block - stable during lifetime of the entry */
-	sector_t		e_block;
+	/* User provided value - stable during lifetime of the entry */
+	u64			e_value;
 };
 
 struct mb_cache *mb_cache_create(int bucket_bits);
 void mb_cache_destroy(struct mb_cache *cache);
 
 int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
-			  sector_t block, bool reusable);
+			  u64 value, bool reusable);
 void __mb_cache_entry_free(struct mb_cache_entry *entry);
 static inline int mb_cache_entry_put(struct mb_cache *cache,
 				     struct mb_cache_entry *entry)
@@ -38,10 +38,9 @@ static inline int mb_cache_entry_put(struct mb_cache *cache,
 	return 1;
 }
 
-void mb_cache_entry_delete_block(struct mb_cache *cache, u32 key,
-				  sector_t block);
+void mb_cache_entry_delete(struct mb_cache *cache, u32 key, u64 value);
 struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key,
-					  sector_t block);
+					  u64 value);
 struct mb_cache_entry *mb_cache_entry_find_first(struct mb_cache *cache,
 						 u32 key);
 struct mb_cache_entry *mb_cache_entry_find_next(struct mb_cache *cache,
-- 
cgit v1.2.3


From 2f74f09bce4f8d0236f20174a6daae63e10fe733 Mon Sep 17 00:00:00 2001
From: Tyler Baicar <tbaicar@codeaurora.org>
Date: Wed, 21 Jun 2017 12:17:07 -0600
Subject: efi: parse ARM processor error

Add support for ARM Common Platform Error Record (CPER).
UEFI 2.6 specification adds support for ARM specific
processor error information to be reported as part of the
CPER records. This provides more detail on for processor error logs.

Signed-off-by: Tyler Baicar <tbaicar@codeaurora.org>
CC: Jonathan (Zhixiong) Zhang <zjzhang@codeaurora.org>
Reviewed-by: James Morse <james.morse@arm.com>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/firmware/efi/cper.c | 129 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/cper.h        |  54 +++++++++++++++++++
 2 files changed, 183 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c
index 229cf9277524..eac08548d233 100644
--- a/drivers/firmware/efi/cper.c
+++ b/drivers/firmware/efi/cper.c
@@ -110,12 +110,15 @@ void cper_print_bits(const char *pfx, unsigned int bits,
 static const char * const proc_type_strs[] = {
 	"IA32/X64",
 	"IA64",
+	"ARM",
 };
 
 static const char * const proc_isa_strs[] = {
 	"IA32",
 	"IA64",
 	"X64",
+	"ARM A32/T32",
+	"ARM A64",
 };
 
 static const char * const proc_error_type_strs[] = {
@@ -184,6 +187,122 @@ static void cper_print_proc_generic(const char *pfx,
 		printk("%s""IP: 0x%016llx\n", pfx, proc->ip);
 }
 
+#if defined(CONFIG_ARM64) || defined(CONFIG_ARM)
+static const char * const arm_reg_ctx_strs[] = {
+	"AArch32 general purpose registers",
+	"AArch32 EL1 context registers",
+	"AArch32 EL2 context registers",
+	"AArch32 secure context registers",
+	"AArch64 general purpose registers",
+	"AArch64 EL1 context registers",
+	"AArch64 EL2 context registers",
+	"AArch64 EL3 context registers",
+	"Misc. system register structure",
+};
+
+static void cper_print_proc_arm(const char *pfx,
+				const struct cper_sec_proc_arm *proc)
+{
+	int i, len, max_ctx_type;
+	struct cper_arm_err_info *err_info;
+	struct cper_arm_ctx_info *ctx_info;
+	char newpfx[64];
+
+	printk("%sMIDR: 0x%016llx\n", pfx, proc->midr);
+
+	len = proc->section_length - (sizeof(*proc) +
+		proc->err_info_num * (sizeof(*err_info)));
+	if (len < 0) {
+		printk("%ssection length: %d\n", pfx, proc->section_length);
+		printk("%ssection length is too small\n", pfx);
+		printk("%sfirmware-generated error record is incorrect\n", pfx);
+		printk("%sERR_INFO_NUM is %d\n", pfx, proc->err_info_num);
+		return;
+	}
+
+	if (proc->validation_bits & CPER_ARM_VALID_MPIDR)
+		printk("%sMultiprocessor Affinity Register (MPIDR): 0x%016llx\n",
+			pfx, proc->mpidr);
+
+	if (proc->validation_bits & CPER_ARM_VALID_AFFINITY_LEVEL)
+		printk("%serror affinity level: %d\n", pfx,
+			proc->affinity_level);
+
+	if (proc->validation_bits & CPER_ARM_VALID_RUNNING_STATE) {
+		printk("%srunning state: 0x%x\n", pfx, proc->running_state);
+		printk("%sPower State Coordination Interface state: %d\n",
+			pfx, proc->psci_state);
+	}
+
+	snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP);
+
+	err_info = (struct cper_arm_err_info *)(proc + 1);
+	for (i = 0; i < proc->err_info_num; i++) {
+		printk("%sError info structure %d:\n", pfx, i);
+
+		printk("%snum errors: %d\n", pfx, err_info->multiple_error + 1);
+
+		if (err_info->validation_bits & CPER_ARM_INFO_VALID_FLAGS) {
+			if (err_info->flags & CPER_ARM_INFO_FLAGS_FIRST)
+				printk("%sfirst error captured\n", newpfx);
+			if (err_info->flags & CPER_ARM_INFO_FLAGS_LAST)
+				printk("%slast error captured\n", newpfx);
+			if (err_info->flags & CPER_ARM_INFO_FLAGS_PROPAGATED)
+				printk("%spropagated error captured\n",
+				       newpfx);
+			if (err_info->flags & CPER_ARM_INFO_FLAGS_OVERFLOW)
+				printk("%soverflow occurred, error info is incomplete\n",
+				       newpfx);
+		}
+
+		printk("%serror_type: %d, %s\n", newpfx, err_info->type,
+			err_info->type < ARRAY_SIZE(proc_error_type_strs) ?
+			proc_error_type_strs[err_info->type] : "unknown");
+		if (err_info->validation_bits & CPER_ARM_INFO_VALID_ERR_INFO)
+			printk("%serror_info: 0x%016llx\n", newpfx,
+			       err_info->error_info);
+		if (err_info->validation_bits & CPER_ARM_INFO_VALID_VIRT_ADDR)
+			printk("%svirtual fault address: 0x%016llx\n",
+				newpfx, err_info->virt_fault_addr);
+		if (err_info->validation_bits & CPER_ARM_INFO_VALID_PHYSICAL_ADDR)
+			printk("%sphysical fault address: 0x%016llx\n",
+				newpfx, err_info->physical_fault_addr);
+		err_info += 1;
+	}
+
+	ctx_info = (struct cper_arm_ctx_info *)err_info;
+	max_ctx_type = ARRAY_SIZE(arm_reg_ctx_strs) - 1;
+	for (i = 0; i < proc->context_info_num; i++) {
+		int size = sizeof(*ctx_info) + ctx_info->size;
+
+		printk("%sContext info structure %d:\n", pfx, i);
+		if (len < size) {
+			printk("%ssection length is too small\n", newpfx);
+			printk("%sfirmware-generated error record is incorrect\n", pfx);
+			return;
+		}
+		if (ctx_info->type > max_ctx_type) {
+			printk("%sInvalid context type: %d (max: %d)\n",
+				newpfx, ctx_info->type, max_ctx_type);
+			return;
+		}
+		printk("%sregister context type: %s\n", newpfx,
+			arm_reg_ctx_strs[ctx_info->type]);
+		print_hex_dump(newpfx, "", DUMP_PREFIX_OFFSET, 16, 4,
+				(ctx_info + 1), ctx_info->size, 0);
+		len -= size;
+		ctx_info = (struct cper_arm_ctx_info *)((long)ctx_info + size);
+	}
+
+	if (len > 0) {
+		printk("%sVendor specific error info has %u bytes:\n", pfx,
+		       len);
+		print_hex_dump(newpfx, "", DUMP_PREFIX_OFFSET, 16, 4, ctx_info,
+				len, true);
+	}
+}
+#endif
+
 static const char * const mem_err_type_strs[] = {
 	"unknown",
 	"no error",
@@ -456,6 +575,16 @@ cper_estatus_print_section(const char *pfx, struct acpi_hest_generic_data *gdata
 			cper_print_pcie(newpfx, pcie, gdata);
 		else
 			goto err_section_too_small;
+#if defined(CONFIG_ARM64) || defined(CONFIG_ARM)
+	} else if (!uuid_le_cmp(*sec_type, CPER_SEC_PROC_ARM)) {
+		struct cper_sec_proc_arm *arm_err = acpi_hest_get_payload(gdata);
+
+		printk("%ssection_type: ARM processor error\n", newpfx);
+		if (gdata->error_data_length >= sizeof(*arm_err))
+			cper_print_proc_arm(newpfx, arm_err);
+		else
+			goto err_section_too_small;
+#endif
 	} else
 		printk("%s""section type: unknown, %pUl\n", newpfx, sec_type);
 
diff --git a/include/linux/cper.h b/include/linux/cper.h
index dcacb1a72e26..4c671fc2081e 100644
--- a/include/linux/cper.h
+++ b/include/linux/cper.h
@@ -180,6 +180,10 @@ enum {
 #define CPER_SEC_PROC_IPF						\
 	UUID_LE(0xE429FAF1, 0x3CB7, 0x11D4, 0x0B, 0xCA, 0x07, 0x00,	\
 		0x80, 0xC7, 0x3C, 0x88, 0x81)
+/* Processor Specific: ARM */
+#define CPER_SEC_PROC_ARM						\
+	UUID_LE(0xE19E3D16, 0xBC11, 0x11E4, 0x9C, 0xAA, 0xC2, 0x05,	\
+		0x1D, 0x5D, 0x46, 0xB0)
 /* Platform Memory */
 #define CPER_SEC_PLATFORM_MEM						\
 	UUID_LE(0xA5BC1114, 0x6F64, 0x4EDE, 0xB8, 0x63, 0x3E, 0x83,	\
@@ -255,6 +259,22 @@ enum {
 
 #define CPER_PCIE_SLOT_SHIFT			3
 
+#define CPER_ARM_VALID_MPIDR			BIT(0)
+#define CPER_ARM_VALID_AFFINITY_LEVEL		BIT(1)
+#define CPER_ARM_VALID_RUNNING_STATE		BIT(2)
+#define CPER_ARM_VALID_VENDOR_INFO		BIT(3)
+
+#define CPER_ARM_INFO_VALID_MULTI_ERR		BIT(0)
+#define CPER_ARM_INFO_VALID_FLAGS		BIT(1)
+#define CPER_ARM_INFO_VALID_ERR_INFO		BIT(2)
+#define CPER_ARM_INFO_VALID_VIRT_ADDR		BIT(3)
+#define CPER_ARM_INFO_VALID_PHYSICAL_ADDR	BIT(4)
+
+#define CPER_ARM_INFO_FLAGS_FIRST		BIT(0)
+#define CPER_ARM_INFO_FLAGS_LAST		BIT(1)
+#define CPER_ARM_INFO_FLAGS_PROPAGATED		BIT(2)
+#define CPER_ARM_INFO_FLAGS_OVERFLOW		BIT(3)
+
 /*
  * All tables and structs must be byte-packed to match CPER
  * specification, since the tables are provided by the system BIOS
@@ -340,6 +360,40 @@ struct cper_ia_proc_ctx {
 	__u64	mm_reg_addr;
 };
 
+/* ARM Processor Error Section */
+struct cper_sec_proc_arm {
+	__u32	validation_bits;
+	__u16	err_info_num;		/* Number of Processor Error Info */
+	__u16	context_info_num;	/* Number of Processor Context Info Records*/
+	__u32	section_length;
+	__u8	affinity_level;
+	__u8	reserved[3];		/* must be zero */
+	__u64	mpidr;
+	__u64	midr;
+	__u32	running_state;		/* Bit 0 set - Processor running. PSCI = 0 */
+	__u32	psci_state;
+};
+
+/* ARM Processor Error Information Structure */
+struct cper_arm_err_info {
+	__u8	version;
+	__u8	length;
+	__u16	validation_bits;
+	__u8	type;
+	__u16	multiple_error;
+	__u8	flags;
+	__u64	error_info;
+	__u64	virt_fault_addr;
+	__u64	physical_fault_addr;
+};
+
+/* ARM Processor Context Information Structure */
+struct cper_arm_ctx_info {
+	__u16	version;
+	__u16	type;
+	__u32	size;
+};
+
 /* Old Memory Error Section UEFI 2.1, 2.2 */
 struct cper_sec_mem_err_old {
 	__u64	validation_bits;
-- 
cgit v1.2.3


From 7a9ca53aea10ad4677a0f347ad7639c304b80194 Mon Sep 17 00:00:00 2001
From: Tahsin Erdogan <tahsin@google.com>
Date: Thu, 22 Jun 2017 11:46:48 -0400
Subject: quota: add get_inode_usage callback to transfer multi-inode charges

Ext4 ea_inode feature allows storing xattr values in external inodes to
be able to store values that are bigger than a block in size. Ext4 also
has deduplication support for these type of inodes. With deduplication,
the actual storage waste is eliminated but the users of such inodes are
still charged full quota for the inodes as if there was no sharing
happening in the background.

This design requires ext4 to manually charge the users because the
inodes are shared.

An implication of this is that, if someone calls chown on a file that
has such references we need to transfer the quota for the file and xattr
inodes. Current dquot_transfer() function implicitly transfers one inode
charge. With ea_inode feature, we would like to transfer multiple inode
charges.

Add get_inode_usage callback which can interrogate the total number of
inodes that were charged for a given inode.

[ Applied fix from Colin King to make sure the 'ret' variable is
  initialized on the successful return path.  Detected by
  CoverityScan, CID#1446616 ("Uninitialized scalar variable") --tytso]

Signed-off-by: Tahsin Erdogan <tahsin@google.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Jan Kara <jack@suse.cz>
---
 fs/ext4/inode.c       |  7 +++++++
 fs/ext4/ioctl.c       |  6 ++++++
 fs/ext4/super.c       | 21 ++++++++++----------
 fs/ext4/xattr.c       | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/ext4/xattr.h       |  2 ++
 fs/quota/dquot.c      | 16 +++++++++++----
 include/linux/quota.h |  2 ++
 7 files changed, 95 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 962f28a0e176..d9733aa955e9 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5295,7 +5295,14 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
 			error = PTR_ERR(handle);
 			goto err_out;
 		}
+
+		/* dquot_transfer() calls back ext4_get_inode_usage() which
+		 * counts xattr inode references.
+		 */
+		down_read(&EXT4_I(inode)->xattr_sem);
 		error = dquot_transfer(inode, attr);
+		up_read(&EXT4_I(inode)->xattr_sem);
+
 		if (error) {
 			ext4_journal_stop(handle);
 			return error;
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index dde8deb11e59..42b3a73143cf 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -373,7 +373,13 @@ static int ext4_ioctl_setproject(struct file *filp, __u32 projid)
 
 	transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid));
 	if (!IS_ERR(transfer_to[PRJQUOTA])) {
+
+		/* __dquot_transfer() calls back ext4_get_inode_usage() which
+		 * counts xattr inode references.
+		 */
+		down_read(&EXT4_I(inode)->xattr_sem);
 		err = __dquot_transfer(inode, transfer_to);
+		up_read(&EXT4_I(inode)->xattr_sem);
 		dqput(transfer_to[PRJQUOTA]);
 		if (err)
 			goto out_dirty;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index d501f8256dc4..5ac76e8d4013 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1263,16 +1263,17 @@ static struct dquot **ext4_get_dquots(struct inode *inode)
 }
 
 static const struct dquot_operations ext4_quota_operations = {
-	.get_reserved_space = ext4_get_reserved_space,
-	.write_dquot	= ext4_write_dquot,
-	.acquire_dquot	= ext4_acquire_dquot,
-	.release_dquot	= ext4_release_dquot,
-	.mark_dirty	= ext4_mark_dquot_dirty,
-	.write_info	= ext4_write_info,
-	.alloc_dquot	= dquot_alloc,
-	.destroy_dquot	= dquot_destroy,
-	.get_projid	= ext4_get_projid,
-	.get_next_id	= ext4_get_next_id,
+	.get_reserved_space	= ext4_get_reserved_space,
+	.write_dquot		= ext4_write_dquot,
+	.acquire_dquot		= ext4_acquire_dquot,
+	.release_dquot		= ext4_release_dquot,
+	.mark_dirty		= ext4_mark_dquot_dirty,
+	.write_info		= ext4_write_info,
+	.alloc_dquot		= dquot_alloc,
+	.destroy_dquot		= dquot_destroy,
+	.get_projid		= ext4_get_projid,
+	.get_inode_usage	= ext4_get_inode_usage,
+	.get_next_id		= ext4_get_next_id,
 };
 
 static const struct quotactl_ops ext4_qctl_operations = {
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index a4c8fe3692a2..22bfb6221a2d 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -733,6 +733,61 @@ static void ext4_xattr_update_super_block(handle_t *handle,
 	}
 }
 
+int ext4_get_inode_usage(struct inode *inode, qsize_t *usage)
+{
+	struct ext4_iloc iloc = { .bh = NULL };
+	struct buffer_head *bh = NULL;
+	struct ext4_inode *raw_inode;
+	struct ext4_xattr_ibody_header *header;
+	struct ext4_xattr_entry *entry;
+	qsize_t ea_inode_refs = 0;
+	void *end;
+	int ret;
+
+	lockdep_assert_held_read(&EXT4_I(inode)->xattr_sem);
+
+	if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) {
+		ret = ext4_get_inode_loc(inode, &iloc);
+		if (ret)
+			goto out;
+		raw_inode = ext4_raw_inode(&iloc);
+		header = IHDR(inode, raw_inode);
+		end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
+		ret = xattr_check_inode(inode, header, end);
+		if (ret)
+			goto out;
+
+		for (entry = IFIRST(header); !IS_LAST_ENTRY(entry);
+		     entry = EXT4_XATTR_NEXT(entry))
+			if (entry->e_value_inum)
+				ea_inode_refs++;
+	}
+
+	if (EXT4_I(inode)->i_file_acl) {
+		bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
+		if (!bh) {
+			ret = -EIO;
+			goto out;
+		}
+
+		if (ext4_xattr_check_block(inode, bh)) {
+			ret = -EFSCORRUPTED;
+			goto out;
+		}
+
+		for (entry = BFIRST(bh); !IS_LAST_ENTRY(entry);
+		     entry = EXT4_XATTR_NEXT(entry))
+			if (entry->e_value_inum)
+				ea_inode_refs++;
+	}
+	*usage = ea_inode_refs + 1;
+	ret = 0;
+out:
+	brelse(iloc.bh);
+	brelse(bh);
+	return ret;
+}
+
 static inline size_t round_up_cluster(struct inode *inode, size_t length)
 {
 	struct super_block *sb = inode->i_sb;
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index 67616cb9a059..26119a67c8c3 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -193,3 +193,5 @@ extern void ext4_xattr_inode_set_class(struct inode *ea_inode);
 #else
 static inline void ext4_xattr_inode_set_class(struct inode *ea_inode) { }
 #endif
+
+extern int ext4_get_inode_usage(struct inode *inode, qsize_t *usage);
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 48813aeaab80..53a17496c5c5 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1910,6 +1910,7 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
 {
 	qsize_t space, cur_space;
 	qsize_t rsv_space = 0;
+	qsize_t inode_usage = 1;
 	struct dquot *transfer_from[MAXQUOTAS] = {};
 	int cnt, ret = 0;
 	char is_valid[MAXQUOTAS] = {};
@@ -1919,6 +1920,13 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
 
 	if (IS_NOQUOTA(inode))
 		return 0;
+
+	if (inode->i_sb->dq_op->get_inode_usage) {
+		ret = inode->i_sb->dq_op->get_inode_usage(inode, &inode_usage);
+		if (ret)
+			return ret;
+	}
+
 	/* Initialize the arrays */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		warn_to[cnt].w_type = QUOTA_NL_NOWARN;
@@ -1946,7 +1954,7 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
 			continue;
 		is_valid[cnt] = 1;
 		transfer_from[cnt] = i_dquot(inode)[cnt];
-		ret = check_idq(transfer_to[cnt], 1, &warn_to[cnt]);
+		ret = check_idq(transfer_to[cnt], inode_usage, &warn_to[cnt]);
 		if (ret)
 			goto over_quota;
 		ret = check_bdq(transfer_to[cnt], space, 0, &warn_to[cnt]);
@@ -1963,7 +1971,7 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
 		/* Due to IO error we might not have transfer_from[] structure */
 		if (transfer_from[cnt]) {
 			int wtype;
-			wtype = info_idq_free(transfer_from[cnt], 1);
+			wtype = info_idq_free(transfer_from[cnt], inode_usage);
 			if (wtype != QUOTA_NL_NOWARN)
 				prepare_warning(&warn_from_inodes[cnt],
 						transfer_from[cnt], wtype);
@@ -1971,13 +1979,13 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
 			if (wtype != QUOTA_NL_NOWARN)
 				prepare_warning(&warn_from_space[cnt],
 						transfer_from[cnt], wtype);
-			dquot_decr_inodes(transfer_from[cnt], 1);
+			dquot_decr_inodes(transfer_from[cnt], inode_usage);
 			dquot_decr_space(transfer_from[cnt], cur_space);
 			dquot_free_reserved_space(transfer_from[cnt],
 						  rsv_space);
 		}
 
-		dquot_incr_inodes(transfer_to[cnt], 1);
+		dquot_incr_inodes(transfer_to[cnt], inode_usage);
 		dquot_incr_space(transfer_to[cnt], cur_space);
 		dquot_resv_space(transfer_to[cnt], rsv_space);
 
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 3434eef2a5aa..bfd077ca6ac3 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -332,6 +332,8 @@ struct dquot_operations {
 	 * quota code only */
 	qsize_t *(*get_reserved_space) (struct inode *);
 	int (*get_projid) (struct inode *, kprojid_t *);/* Get project ID */
+	/* Get number of inodes that were charged for a given inode */
+	int (*get_inode_usage) (struct inode *, qsize_t *);
 	/* Get next ID with active quota structure */
 	int (*get_next_id) (struct super_block *sb, struct kqid *qid);
 };
-- 
cgit v1.2.3


From 92af08990cc49408119ca2549dfe9e37235864d8 Mon Sep 17 00:00:00 2001
From: Frank Rowand <frank.rowand@sony.com>
Date: Tue, 20 Jun 2017 16:38:28 -0700
Subject: of: make of_fdt_is_compatible() static

The callers of of_fdt_is_compatible() are all in fdt.c so
make it static.

Signed-off-by: Frank Rowand <frank.rowand@sony.com>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 drivers/of/fdt.c       | 2 +-
 include/linux/of_fdt.h | 3 ---
 2 files changed, 1 insertion(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 3080d9dd031d..5c71cb933426 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -91,7 +91,7 @@ void of_fdt_limit_memory(int limit)
  * On match, returns a non-zero value with smaller values returned for more
  * specific compatible values.
  */
-int of_fdt_is_compatible(const void *blob,
+static int of_fdt_is_compatible(const void *blob,
 		      unsigned long node, const char *compat)
 {
 	const char *cp;
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index 1dfbfd0d8040..013c5418aeec 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -31,9 +31,6 @@ extern void *of_fdt_get_property(const void *blob,
 				 unsigned long node,
 				 const char *name,
 				 int *size);
-extern int of_fdt_is_compatible(const void *blob,
-				unsigned long node,
-				const char *compat);
 extern bool of_fdt_is_big_endian(const void *blob,
 				 unsigned long node);
 extern int of_fdt_match(const void *blob, unsigned long node,
-- 
cgit v1.2.3


From d59f6617eef0f76e34f7a9993f5645c5ef467e42 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 20 Jun 2017 01:37:05 +0200
Subject: genirq: Allow fwnode to carry name information only

In order to provide proper debug interface it's required to have domain
names available when the domain is added. Non fwnode based architectures
like x86 have no way to do so.

It's not possible to use domain ops or host data for this as domain ops
might be the same for several instances, but the names have to be unique.

Extend the irqchip fwnode to allow transporting the domain name. If no node
is supplied, create a 'unknown-N' placeholder.

Warn if an invalid node is supplied and treat it like no node. This happens
e.g. with i2 devices on x86 which hand in an ACPI type node which has no
interface for retrieving the name.

[ Folded a fix from Marc to make DT name parsing work ]

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Keith Busch <keith.busch@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Christoph Hellwig <hch@lst.de>
Link: http://lkml.kernel.org/r/20170619235443.588784933@linutronix.de
---
 include/linux/irqdomain.h |  31 +++++++++++++-
 kernel/irq/irqdomain.c    | 105 ++++++++++++++++++++++++++++++++++++++++------
 2 files changed, 122 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 9f3616085423..9cf32a2fbe69 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -189,6 +189,9 @@ enum {
 	/* Irq domain implements MSI remapping */
 	IRQ_DOMAIN_FLAG_MSI_REMAP	= (1 << 5),
 
+	/* Irq domain name was allocated in __irq_domain_add() */
+	IRQ_DOMAIN_NAME_ALLOCATED	= (1 << 6),
+
 	/*
 	 * Flags starting from IRQ_DOMAIN_FLAG_NONCORE are reserved
 	 * for implementation specific purposes and ignored by the
@@ -203,7 +206,33 @@ static inline struct device_node *irq_domain_get_of_node(struct irq_domain *d)
 }
 
 #ifdef CONFIG_IRQ_DOMAIN
-struct fwnode_handle *irq_domain_alloc_fwnode(void *data);
+struct fwnode_handle *__irq_domain_alloc_fwnode(unsigned int type, int id,
+						const char *name, void *data);
+
+enum {
+	IRQCHIP_FWNODE_REAL,
+	IRQCHIP_FWNODE_NAMED,
+	IRQCHIP_FWNODE_NAMED_ID,
+};
+
+static inline
+struct fwnode_handle *irq_domain_alloc_named_fwnode(const char *name)
+{
+	return __irq_domain_alloc_fwnode(IRQCHIP_FWNODE_NAMED, 0, name, NULL);
+}
+
+static inline
+struct fwnode_handle *irq_domain_alloc_named_id_fwnode(const char *name, int id)
+{
+	return __irq_domain_alloc_fwnode(IRQCHIP_FWNODE_NAMED_ID, id, name,
+					 NULL);
+}
+
+static inline struct fwnode_handle *irq_domain_alloc_fwnode(void *data)
+{
+	return __irq_domain_alloc_fwnode(IRQCHIP_FWNODE_REAL, 0, NULL, data);
+}
+
 void irq_domain_free_fwnode(struct fwnode_handle *fwnode);
 struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size,
 				    irq_hw_number_t hwirq_max, int direct_max,
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 70b9da72018b..e1b925bea205 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -26,39 +26,61 @@ static struct irq_domain *irq_default_domain;
 static void irq_domain_check_hierarchy(struct irq_domain *domain);
 
 struct irqchip_fwid {
-	struct fwnode_handle fwnode;
-	char *name;
-	void *data;
+	struct fwnode_handle	fwnode;
+	unsigned int		type;
+	char			*name;
+	void			*data;
 };
 
 /**
  * irq_domain_alloc_fwnode - Allocate a fwnode_handle suitable for
  *                           identifying an irq domain
- * @data: optional user-provided data
+ * @type:	Type of irqchip_fwnode. See linux/irqdomain.h
+ * @name:	Optional user provided domain name
+ * @id:		Optional user provided id if name != NULL
+ * @data:	Optional user-provided data
  *
- * Allocate a struct device_node, and return a poiner to the embedded
+ * Allocate a struct irqchip_fwid, and return a poiner to the embedded
  * fwnode_handle (or NULL on failure).
+ *
+ * Note: The types IRQCHIP_FWNODE_NAMED and IRQCHIP_FWNODE_NAMED_ID are
+ * solely to transport name information to irqdomain creation code. The
+ * node is not stored. For other types the pointer is kept in the irq
+ * domain struct.
  */
-struct fwnode_handle *irq_domain_alloc_fwnode(void *data)
+struct fwnode_handle *__irq_domain_alloc_fwnode(unsigned int type, int id,
+						const char *name, void *data)
 {
 	struct irqchip_fwid *fwid;
-	char *name;
+	char *n;
 
 	fwid = kzalloc(sizeof(*fwid), GFP_KERNEL);
-	name = kasprintf(GFP_KERNEL, "irqchip@%p", data);
 
-	if (!fwid || !name) {
+	switch (type) {
+	case IRQCHIP_FWNODE_NAMED:
+		n = kasprintf(GFP_KERNEL, "%s", name);
+		break;
+	case IRQCHIP_FWNODE_NAMED_ID:
+		n = kasprintf(GFP_KERNEL, "%s-%d", name, id);
+		break;
+	default:
+		n = kasprintf(GFP_KERNEL, "irqchip@%p", data);
+		break;
+	}
+
+	if (!fwid || !n) {
 		kfree(fwid);
-		kfree(name);
+		kfree(n);
 		return NULL;
 	}
 
-	fwid->name = name;
+	fwid->type = type;
+	fwid->name = n;
 	fwid->data = data;
 	fwid->fwnode.type = FWNODE_IRQCHIP;
 	return &fwid->fwnode;
 }
-EXPORT_SYMBOL_GPL(irq_domain_alloc_fwnode);
+EXPORT_SYMBOL_GPL(__irq_domain_alloc_fwnode);
 
 /**
  * irq_domain_free_fwnode - Free a non-OF-backed fwnode_handle
@@ -97,20 +119,75 @@ struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size,
 				    void *host_data)
 {
 	struct device_node *of_node = to_of_node(fwnode);
+	struct irqchip_fwid *fwid;
 	struct irq_domain *domain;
 
+	static atomic_t unknown_domains;
+
 	domain = kzalloc_node(sizeof(*domain) + (sizeof(unsigned int) * size),
 			      GFP_KERNEL, of_node_to_nid(of_node));
 	if (WARN_ON(!domain))
 		return NULL;
 
+	if (fwnode && is_fwnode_irqchip(fwnode)) {
+		fwid = container_of(fwnode, struct irqchip_fwid, fwnode);
+
+		switch (fwid->type) {
+		case IRQCHIP_FWNODE_NAMED:
+		case IRQCHIP_FWNODE_NAMED_ID:
+			domain->name = kstrdup(fwid->name, GFP_KERNEL);
+			if (!domain->name) {
+				kfree(domain);
+				return NULL;
+			}
+			domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED;
+			break;
+		default:
+			domain->fwnode = fwnode;
+			domain->name = fwid->name;
+			break;
+		}
+	} else if (of_node) {
+		char *name;
+
+		/*
+		 * DT paths contain '/', which debugfs is legitimately
+		 * unhappy about. Replace them with ':', which does
+		 * the trick and is not as offensive as '\'...
+		 */
+		name = kstrdup(of_node_full_name(of_node), GFP_KERNEL);
+		if (!name) {
+			kfree(domain);
+			return NULL;
+		}
+
+		strreplace(name, '/', ':');
+
+		domain->name = name;
+		domain->fwnode = fwnode;
+		domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED;
+	}
+
+	if (!domain->name) {
+		if (fwnode) {
+			pr_err("Invalid fwnode type (%d) for irqdomain\n",
+			       fwnode->type);
+		}
+		domain->name = kasprintf(GFP_KERNEL, "unknown-%d",
+					 atomic_inc_return(&unknown_domains));
+		if (!domain->name) {
+			kfree(domain);
+			return NULL;
+		}
+		domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED;
+	}
+
 	of_node_get(of_node);
 
 	/* Fill structure */
 	INIT_RADIX_TREE(&domain->revmap_tree, GFP_KERNEL);
 	domain->ops = ops;
 	domain->host_data = host_data;
-	domain->fwnode = fwnode;
 	domain->hwirq_max = hwirq_max;
 	domain->revmap_size = size;
 	domain->revmap_direct_max_irq = direct_max;
@@ -152,6 +229,8 @@ void irq_domain_remove(struct irq_domain *domain)
 	pr_debug("Removed domain %s\n", domain->name);
 
 	of_node_put(irq_domain_get_of_node(domain));
+	if (domain->flags & IRQ_DOMAIN_NAME_ALLOCATED)
+		kfree(domain->name);
 	kfree(domain);
 }
 EXPORT_SYMBOL_GPL(irq_domain_remove);
-- 
cgit v1.2.3


From 9dc6be3d419398eae9a19cd09b7969ceff8eaf10 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 20 Jun 2017 01:37:16 +0200
Subject: genirq/irqdomain: Add map counter

Add a map counter instead of counting radix tree entries for
diagnosis. That also gives correct information for linear domains.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Keith Busch <keith.busch@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Christoph Hellwig <hch@lst.de>
Link: http://lkml.kernel.org/r/20170619235444.459397746@linutronix.de
---
 include/linux/irqdomain.h | 2 ++
 kernel/irq/irqdomain.c    | 4 ++++
 2 files changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 9cf32a2fbe69..17ccd54d936d 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -130,6 +130,7 @@ struct irq_domain_chip_generic;
  * @host_data: private data pointer for use by owner.  Not touched by irq_domain
  *             core code.
  * @flags: host per irq_domain flags
+ * @mapcount: The number of mapped interrupts
  *
  * Optional elements
  * @of_node: Pointer to device tree nodes associated with the irq_domain. Used
@@ -152,6 +153,7 @@ struct irq_domain {
 	const struct irq_domain_ops *ops;
 	void *host_data;
 	unsigned int flags;
+	unsigned int mapcount;
 
 	/* Optional data */
 	struct fwnode_handle *fwnode;
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index e1b925bea205..8d5805c655b6 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -423,6 +423,7 @@ void irq_domain_disassociate(struct irq_domain *domain, unsigned int irq)
 
 	irq_data->domain = NULL;
 	irq_data->hwirq = 0;
+	domain->mapcount--;
 
 	/* Clear reverse map for this hwirq */
 	if (hwirq < domain->revmap_size) {
@@ -474,6 +475,7 @@ int irq_domain_associate(struct irq_domain *domain, unsigned int virq,
 			domain->name = irq_data->chip->name;
 	}
 
+	domain->mapcount++;
 	if (hwirq < domain->revmap_size) {
 		domain->linear_revmap[hwirq] = virq;
 	} else {
@@ -1081,6 +1083,7 @@ static void irq_domain_insert_irq(int virq)
 		struct irq_domain *domain = data->domain;
 		irq_hw_number_t hwirq = data->hwirq;
 
+		domain->mapcount++;
 		if (hwirq < domain->revmap_size) {
 			domain->linear_revmap[hwirq] = virq;
 		} else {
@@ -1110,6 +1113,7 @@ static void irq_domain_remove_irq(int virq)
 		struct irq_domain *domain = data->domain;
 		irq_hw_number_t hwirq = data->hwirq;
 
+		domain->mapcount--;
 		if (hwirq < domain->revmap_size) {
 			domain->linear_revmap[hwirq] = 0;
 		} else {
-- 
cgit v1.2.3


From 087cdfb662ae50e3826e7cd2e54b6519d07b60f0 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 20 Jun 2017 01:37:17 +0200
Subject: genirq/debugfs: Add proper debugfs interface

Debugging (hierarchical) interupt domains is tedious as there is no
information about the hierarchy and no information about states of
interrupts in the various domain levels.

Add a debugfs directory 'irq' and subdirectories 'domains' and 'irqs'.

The domains directory contains the domain files. The content is information
about the domain. If the domain is part of a hierarchy then the parent
domains are printed as well.

# ls /sys/kernel/debug/irq/domains/
default     INTEL-IR-2	    INTEL-IR-MSI-2  IO-APIC-IR-2  PCI-MSI
DMAR-MSI    INTEL-IR-3	    INTEL-IR-MSI-3  IO-APIC-IR-3  unknown-1
INTEL-IR-0  INTEL-IR-MSI-0  IO-APIC-IR-0    IO-APIC-IR-4  VECTOR
INTEL-IR-1  INTEL-IR-MSI-1  IO-APIC-IR-1    PCI-HT

# cat /sys/kernel/debug/irq/domains/VECTOR
name:   VECTOR
 size:   0
 mapped: 216
 flags:  0x00000041

# cat /sys/kernel/debug/irq/domains/IO-APIC-IR-0
name:   IO-APIC-IR-0
 size:   24
 mapped: 19
 flags:  0x00000041
 parent: INTEL-IR-3
    name:   INTEL-IR-3
     size:   65536
     mapped: 167
     flags:  0x00000041
     parent: VECTOR
        name:   VECTOR
         size:   0
         mapped: 216
         flags:  0x00000041

Unfortunately there is no per cpu information about the VECTOR domain (yet).

The irqs directory contains detailed information about mapped interrupts.

# cat /sys/kernel/debug/irq/irqs/3
handler:  handle_edge_irq
status:   0x00004000
istate:   0x00000000
ddepth:   1
wdepth:   0
dstate:   0x01018000
            IRQD_IRQ_DISABLED
            IRQD_SINGLE_TARGET
            IRQD_MOVE_PCNTXT
node:     0
affinity: 0-143
effectiv: 0
pending:
domain:  IO-APIC-IR-0
 hwirq:   0x3
 chip:    IR-IO-APIC
  flags:   0x10
             IRQCHIP_SKIP_SET_WAKE
 parent:
    domain:  INTEL-IR-3
     hwirq:   0x20000
     chip:    INTEL-IR
      flags:   0x0
     parent:
        domain:  VECTOR
         hwirq:   0x3
         chip:    APIC
          flags:   0x0

This was developed to simplify the debugging of the managed affinity
changes.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Keith Busch <keith.busch@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Christoph Hellwig <hch@lst.de>
Link: http://lkml.kernel.org/r/20170619235444.537566163@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irqdesc.h   |   4 +
 include/linux/irqdomain.h |   4 +
 kernel/irq/Kconfig        |  11 +++
 kernel/irq/Makefile       |   1 +
 kernel/irq/debugfs.c      | 215 ++++++++++++++++++++++++++++++++++++++++++++++
 kernel/irq/internals.h    |  22 +++++
 kernel/irq/irqdesc.c      |   1 +
 kernel/irq/irqdomain.c    |  87 ++++++++++++++++++-
 kernel/irq/manage.c       |   1 +
 9 files changed, 345 insertions(+), 1 deletion(-)
 create mode 100644 kernel/irq/debugfs.c

(limited to 'include/linux')

diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index c9be57931b58..d425a3a09722 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -46,6 +46,7 @@ struct pt_regs;
  * @rcu:		rcu head for delayed free
  * @kobj:		kobject used to represent this struct in sysfs
  * @dir:		/proc/irq/ procfs entry
+ * @debugfs_file:	dentry for the debugfs file
  * @name:		flow handler name for /proc/interrupts output
  */
 struct irq_desc {
@@ -88,6 +89,9 @@ struct irq_desc {
 #ifdef CONFIG_PROC_FS
 	struct proc_dir_entry	*dir;
 #endif
+#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
+	struct dentry		*debugfs_file;
+#endif
 #ifdef CONFIG_SPARSE_IRQ
 	struct rcu_head		rcu;
 	struct kobject		kobj;
diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 17ccd54d936d..914b0c31d233 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -139,6 +139,7 @@ struct irq_domain_chip_generic;
  *      setting up one or more generic chips for interrupt controllers
  *      drivers using the generic chip library which uses this pointer.
  * @parent: Pointer to parent irq_domain to support hierarchy irq_domains
+ * @debugfs_file: dentry for the domain debugfs file
  *
  * Revmap data, used internally by irq_domain
  * @revmap_direct_max_irq: The largest hwirq that can be set for controllers that
@@ -162,6 +163,9 @@ struct irq_domain {
 #ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
 	struct irq_domain *parent;
 #endif
+#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
+	struct dentry		*debugfs_file;
+#endif
 
 	/* reverse map data. The linear map gets appended to the irq_domain */
 	irq_hw_number_t hwirq_max;
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index 3bbfd6a9c475..8d9498e51585 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -108,4 +108,15 @@ config SPARSE_IRQ
 
 	  If you don't know what to do here, say N.
 
+config GENERIC_IRQ_DEBUGFS
+	bool "Expose irq internals in debugfs"
+	depends on DEBUG_FS
+	default n
+	---help---
+
+	  Exposes internal state information through debugfs. Mostly for
+	  developers and debugging of hard to diagnose interrupt problems.
+
+	  If you don't know what to do here, say N.
+
 endmenu
diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile
index 1d3ee3169202..c61fc9c2d1f7 100644
--- a/kernel/irq/Makefile
+++ b/kernel/irq/Makefile
@@ -10,3 +10,4 @@ obj-$(CONFIG_PM_SLEEP) += pm.o
 obj-$(CONFIG_GENERIC_MSI_IRQ) += msi.o
 obj-$(CONFIG_GENERIC_IRQ_IPI) += ipi.o
 obj-$(CONFIG_SMP) += affinity.o
+obj-$(CONFIG_GENERIC_IRQ_DEBUGFS) += debugfs.o
diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c
new file mode 100644
index 000000000000..50ee2f6593e8
--- /dev/null
+++ b/kernel/irq/debugfs.c
@@ -0,0 +1,215 @@
+/*
+ * Copyright 2017 Thomas Gleixner <tglx@linutronix.de>
+ *
+ * This file is licensed under the GPL V2.
+ */
+#include <linux/debugfs.h>
+#include <linux/irqdomain.h>
+#include <linux/irq.h>
+
+#include "internals.h"
+
+static struct dentry *irq_dir;
+
+struct irq_bit_descr {
+	unsigned int	mask;
+	char		*name;
+};
+#define BIT_MASK_DESCR(m)	{ .mask = m, .name = #m }
+
+static void irq_debug_show_bits(struct seq_file *m, int ind, unsigned int state,
+				const struct irq_bit_descr *sd, int size)
+{
+	int i;
+
+	for (i = 0; i < size; i++, sd++) {
+		if (state & sd->mask)
+			seq_printf(m, "%*s%s\n", ind + 12, "", sd->name);
+	}
+}
+
+#ifdef CONFIG_SMP
+static void irq_debug_show_masks(struct seq_file *m, struct irq_desc *desc)
+{
+	struct irq_data *data = irq_desc_get_irq_data(desc);
+	struct cpumask *msk;
+
+	msk = irq_data_get_affinity_mask(data);
+	seq_printf(m, "affinity: %*pbl\n", cpumask_pr_args(msk));
+#ifdef CONFIG_GENERIC_PENDING_IRQ
+	msk = desc->pending_mask;
+	seq_printf(m, "pending:  %*pbl\n", cpumask_pr_args(msk));
+#endif
+}
+#else
+static void irq_debug_show_masks(struct seq_file *m, struct irq_desc *desc) { }
+#endif
+
+static const struct irq_bit_descr irqchip_flags[] = {
+	BIT_MASK_DESCR(IRQCHIP_SET_TYPE_MASKED),
+	BIT_MASK_DESCR(IRQCHIP_EOI_IF_HANDLED),
+	BIT_MASK_DESCR(IRQCHIP_MASK_ON_SUSPEND),
+	BIT_MASK_DESCR(IRQCHIP_ONOFFLINE_ENABLED),
+	BIT_MASK_DESCR(IRQCHIP_SKIP_SET_WAKE),
+	BIT_MASK_DESCR(IRQCHIP_ONESHOT_SAFE),
+	BIT_MASK_DESCR(IRQCHIP_EOI_THREADED),
+};
+
+static void
+irq_debug_show_chip(struct seq_file *m, struct irq_data *data, int ind)
+{
+	struct irq_chip *chip = data->chip;
+
+	if (!chip) {
+		seq_printf(m, "chip: None\n");
+		return;
+	}
+	seq_printf(m, "%*schip:    %s\n", ind, "", chip->name);
+	seq_printf(m, "%*sflags:   0x%lx\n", ind + 1, "", chip->flags);
+	irq_debug_show_bits(m, ind, chip->flags, irqchip_flags,
+			    ARRAY_SIZE(irqchip_flags));
+}
+
+static void
+irq_debug_show_data(struct seq_file *m, struct irq_data *data, int ind)
+{
+	seq_printf(m, "%*sdomain:  %s\n", ind, "",
+		   data->domain ? data->domain->name : "");
+	seq_printf(m, "%*shwirq:   0x%lx\n", ind + 1, "", data->hwirq);
+	irq_debug_show_chip(m, data, ind + 1);
+#ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
+	if (!data->parent_data)
+		return;
+	seq_printf(m, "%*sparent:\n", ind + 1, "");
+	irq_debug_show_data(m, data->parent_data, ind + 4);
+#endif
+}
+
+static const struct irq_bit_descr irqdata_states[] = {
+	BIT_MASK_DESCR(IRQ_TYPE_EDGE_RISING),
+	BIT_MASK_DESCR(IRQ_TYPE_EDGE_FALLING),
+	BIT_MASK_DESCR(IRQ_TYPE_LEVEL_HIGH),
+	BIT_MASK_DESCR(IRQ_TYPE_LEVEL_LOW),
+	BIT_MASK_DESCR(IRQD_LEVEL),
+
+	BIT_MASK_DESCR(IRQD_ACTIVATED),
+	BIT_MASK_DESCR(IRQD_IRQ_STARTED),
+	BIT_MASK_DESCR(IRQD_IRQ_DISABLED),
+	BIT_MASK_DESCR(IRQD_IRQ_MASKED),
+	BIT_MASK_DESCR(IRQD_IRQ_INPROGRESS),
+
+	BIT_MASK_DESCR(IRQD_PER_CPU),
+	BIT_MASK_DESCR(IRQD_NO_BALANCING),
+
+	BIT_MASK_DESCR(IRQD_MOVE_PCNTXT),
+	BIT_MASK_DESCR(IRQD_AFFINITY_SET),
+	BIT_MASK_DESCR(IRQD_SETAFFINITY_PENDING),
+	BIT_MASK_DESCR(IRQD_AFFINITY_MANAGED),
+	BIT_MASK_DESCR(IRQD_MANAGED_SHUTDOWN),
+
+	BIT_MASK_DESCR(IRQD_FORWARDED_TO_VCPU),
+
+	BIT_MASK_DESCR(IRQD_WAKEUP_STATE),
+	BIT_MASK_DESCR(IRQD_WAKEUP_ARMED),
+};
+
+static const struct irq_bit_descr irqdesc_states[] = {
+	BIT_MASK_DESCR(_IRQ_NOPROBE),
+	BIT_MASK_DESCR(_IRQ_NOREQUEST),
+	BIT_MASK_DESCR(_IRQ_NOTHREAD),
+	BIT_MASK_DESCR(_IRQ_NOAUTOEN),
+	BIT_MASK_DESCR(_IRQ_NESTED_THREAD),
+	BIT_MASK_DESCR(_IRQ_PER_CPU_DEVID),
+	BIT_MASK_DESCR(_IRQ_IS_POLLED),
+	BIT_MASK_DESCR(_IRQ_DISABLE_UNLAZY),
+};
+
+static const struct irq_bit_descr irqdesc_istates[] = {
+	BIT_MASK_DESCR(IRQS_AUTODETECT),
+	BIT_MASK_DESCR(IRQS_SPURIOUS_DISABLED),
+	BIT_MASK_DESCR(IRQS_POLL_INPROGRESS),
+	BIT_MASK_DESCR(IRQS_ONESHOT),
+	BIT_MASK_DESCR(IRQS_REPLAY),
+	BIT_MASK_DESCR(IRQS_WAITING),
+	BIT_MASK_DESCR(IRQS_PENDING),
+	BIT_MASK_DESCR(IRQS_SUSPENDED),
+};
+
+
+static int irq_debug_show(struct seq_file *m, void *p)
+{
+	struct irq_desc *desc = m->private;
+	struct irq_data *data;
+
+	raw_spin_lock_irq(&desc->lock);
+	data = irq_desc_get_irq_data(desc);
+	seq_printf(m, "handler:  %pf\n", desc->handle_irq);
+	seq_printf(m, "status:   0x%08x\n", desc->status_use_accessors);
+	irq_debug_show_bits(m, 0, desc->status_use_accessors, irqdesc_states,
+			    ARRAY_SIZE(irqdesc_states));
+	seq_printf(m, "istate:   0x%08x\n", desc->istate);
+	irq_debug_show_bits(m, 0, desc->istate, irqdesc_istates,
+			    ARRAY_SIZE(irqdesc_istates));
+	seq_printf(m, "ddepth:   %u\n", desc->depth);
+	seq_printf(m, "wdepth:   %u\n", desc->wake_depth);
+	seq_printf(m, "dstate:   0x%08x\n", irqd_get(data));
+	irq_debug_show_bits(m, 0, irqd_get(data), irqdata_states,
+			    ARRAY_SIZE(irqdata_states));
+	seq_printf(m, "node:     %d\n", irq_data_get_node(data));
+	irq_debug_show_masks(m, desc);
+	irq_debug_show_data(m, data, 0);
+	raw_spin_unlock_irq(&desc->lock);
+	return 0;
+}
+
+static int irq_debug_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, irq_debug_show, inode->i_private);
+}
+
+static const struct file_operations dfs_irq_ops = {
+	.open		= irq_debug_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+void irq_add_debugfs_entry(unsigned int irq, struct irq_desc *desc)
+{
+	char name [10];
+
+	if (!irq_dir || !desc || desc->debugfs_file)
+		return;
+
+	sprintf(name, "%d", irq);
+	desc->debugfs_file = debugfs_create_file(name, 0444, irq_dir, desc,
+						 &dfs_irq_ops);
+}
+
+void irq_remove_debugfs_entry(struct irq_desc *desc)
+{
+	if (desc->debugfs_file)
+		debugfs_remove(desc->debugfs_file);
+}
+
+static int __init irq_debugfs_init(void)
+{
+	struct dentry *root_dir;
+	int irq;
+
+	root_dir = debugfs_create_dir("irq", NULL);
+	if (!root_dir)
+		return -ENOMEM;
+
+	irq_domain_debugfs_init(root_dir);
+
+	irq_dir = debugfs_create_dir("irqs", root_dir);
+
+	irq_lock_sparse();
+	for_each_active_irq(irq)
+		irq_add_debugfs_entry(irq, irq_to_desc(irq));
+	irq_unlock_sparse();
+
+	return 0;
+}
+__initcall(irq_debugfs_init);
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 921a2419720c..094db5bfb83f 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -169,6 +169,11 @@ irq_put_desc_unlock(struct irq_desc *desc, unsigned long flags)
 
 #define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors)
 
+static inline unsigned int irqd_get(struct irq_data *d)
+{
+	return __irqd_to_state(d);
+}
+
 /*
  * Manipulation functions for irq_data.state
  */
@@ -237,3 +242,20 @@ irq_init_generic_chip(struct irq_chip_generic *gc, const char *name,
 		      int num_ct, unsigned int irq_base,
 		      void __iomem *reg_base, irq_flow_handler_t handler) { }
 #endif /* CONFIG_GENERIC_IRQ_CHIP */
+
+#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
+void irq_add_debugfs_entry(unsigned int irq, struct irq_desc *desc);
+void irq_remove_debugfs_entry(struct irq_desc *desc);
+# ifdef CONFIG_IRQ_DOMAIN
+void irq_domain_debugfs_init(struct dentry *root);
+# else
+static inline void irq_domain_debugfs_init(struct dentry *root);
+# endif
+#else /* CONFIG_GENERIC_IRQ_DEBUGFS */
+static inline void irq_add_debugfs_entry(unsigned int irq, struct irq_desc *d)
+{
+}
+static inline void irq_remove_debugfs_entry(struct irq_desc *d)
+{
+}
+#endif /* CONFIG_GENERIC_IRQ_DEBUGFS */
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 09abce2ea8f0..feade536b6d1 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -394,6 +394,7 @@ static void free_desc(unsigned int irq)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 
+	irq_remove_debugfs_entry(desc);
 	unregister_irq_proc(irq, desc);
 
 	/*
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 8d5805c655b6..75e1f0851c33 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -29,9 +29,17 @@ struct irqchip_fwid {
 	struct fwnode_handle	fwnode;
 	unsigned int		type;
 	char			*name;
-	void			*data;
+	void *data;
 };
 
+#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
+static void debugfs_add_domain_dir(struct irq_domain *d);
+static void debugfs_remove_domain_dir(struct irq_domain *d);
+#else
+static inline void debugfs_add_domain_dir(struct irq_domain *d) { }
+static inline void debugfs_remove_domain_dir(struct irq_domain *d) { }
+#endif
+
 /**
  * irq_domain_alloc_fwnode - Allocate a fwnode_handle suitable for
  *                           identifying an irq domain
@@ -194,6 +202,7 @@ struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size,
 	irq_domain_check_hierarchy(domain);
 
 	mutex_lock(&irq_domain_mutex);
+	debugfs_add_domain_dir(domain);
 	list_add(&domain->link, &irq_domain_list);
 	mutex_unlock(&irq_domain_mutex);
 
@@ -213,6 +222,7 @@ EXPORT_SYMBOL_GPL(__irq_domain_add);
 void irq_domain_remove(struct irq_domain *domain)
 {
 	mutex_lock(&irq_domain_mutex);
+	debugfs_remove_domain_dir(domain);
 
 	WARN_ON(!radix_tree_empty(&domain->revmap_tree));
 
@@ -1599,3 +1609,78 @@ static void irq_domain_check_hierarchy(struct irq_domain *domain)
 {
 }
 #endif	/* CONFIG_IRQ_DOMAIN_HIERARCHY */
+
+#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
+static struct dentry *domain_dir;
+
+static void
+irq_domain_debug_show_one(struct seq_file *m, struct irq_domain *d, int ind)
+{
+	seq_printf(m, "%*sname:   %s\n", ind, "", d->name);
+	seq_printf(m, "%*ssize:   %u\n", ind + 1, "",
+		   d->revmap_size + d->revmap_direct_max_irq);
+	seq_printf(m, "%*smapped: %u\n", ind + 1, "", d->mapcount);
+	seq_printf(m, "%*sflags:  0x%08x\n", ind +1 , "", d->flags);
+#ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
+	if (!d->parent)
+		return;
+	seq_printf(m, "%*sparent: %s\n", ind + 1, "", d->parent->name);
+	irq_domain_debug_show_one(m, d->parent, ind + 4);
+#endif
+}
+
+static int irq_domain_debug_show(struct seq_file *m, void *p)
+{
+	struct irq_domain *d = m->private;
+
+	/* Default domain? Might be NULL */
+	if (!d) {
+		if (!irq_default_domain)
+			return 0;
+		d = irq_default_domain;
+	}
+	irq_domain_debug_show_one(m, d, 0);
+	return 0;
+}
+
+static int irq_domain_debug_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, irq_domain_debug_show, inode->i_private);
+}
+
+static const struct file_operations dfs_domain_ops = {
+	.open		= irq_domain_debug_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static void debugfs_add_domain_dir(struct irq_domain *d)
+{
+	if (!d->name || !domain_dir || d->debugfs_file)
+		return;
+	d->debugfs_file = debugfs_create_file(d->name, 0444, domain_dir, d,
+					      &dfs_domain_ops);
+}
+
+static void debugfs_remove_domain_dir(struct irq_domain *d)
+{
+	if (d->debugfs_file)
+		debugfs_remove(d->debugfs_file);
+}
+
+void __init irq_domain_debugfs_init(struct dentry *root)
+{
+	struct irq_domain *d;
+
+	domain_dir = debugfs_create_dir("domains", root);
+	if (!domain_dir)
+		return;
+
+	debugfs_create_file("default", 0444, domain_dir, NULL, &dfs_domain_ops);
+	mutex_lock(&irq_domain_mutex);
+	list_for_each_entry(d, &irq_domain_list, link)
+		debugfs_add_domain_dir(d);
+	mutex_unlock(&irq_domain_mutex);
+}
+#endif
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 4c34696ca575..284f4eb1ffbe 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1398,6 +1398,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 		wake_up_process(new->secondary->thread);
 
 	register_irq_proc(irq, desc);
+	irq_add_debugfs_entry(irq, desc);
 	new->dir = NULL;
 	register_handler_proc(irq, new);
 	free_cpumask_var(mask);
-- 
cgit v1.2.3


From 1bb0401680da156ce1549e915e711bf5b2534cc5 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 20 Jun 2017 01:37:18 +0200
Subject: genirq: Add missing comment for IRQD_STARTED

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Keith Busch <keith.busch@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Christoph Hellwig <hch@lst.de>
Link: http://lkml.kernel.org/r/20170619235444.614913014@linutronix.de
---
 include/linux/irq.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index d996314b6522..7e62e10e5856 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -200,6 +200,7 @@ struct irq_data {
  * IRQD_WAKEUP_ARMED		- Wakeup mode armed
  * IRQD_FORWARDED_TO_VCPU	- The interrupt is forwarded to a VCPU
  * IRQD_AFFINITY_MANAGED	- Affinity is auto-managed by the kernel
+ * IRQD_IRQ_STARTED		- Startup state of the interrupt
  */
 enum {
 	IRQD_TRIGGER_MASK		= 0xf,
-- 
cgit v1.2.3


From cdd16365b0bd7c0cd19e2cc768b6bdc8021f32c3 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 20 Jun 2017 01:37:19 +0200
Subject: genirq: Provide irq_fixup_move_pending()

If an CPU goes offline, the interrupts are migrated away, but a eventually
pending interrupt move, which has not yet been made effective is kept
pending even if the outgoing CPU is the sole target of the pending affinity
mask. What's worse is, that the pending affinity mask is discarded even if
it would contain a valid subset of the online CPUs.

Implement a helper function which allows to avoid these issues.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Keith Busch <keith.busch@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Christoph Hellwig <hch@lst.de>
Link: http://lkml.kernel.org/r/20170619235444.691345468@linutronix.de
---
 include/linux/irq.h    |  5 +++++
 kernel/irq/migration.c | 30 ++++++++++++++++++++++++++++++
 2 files changed, 35 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 7e62e10e5856..d008065e2f4d 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -491,9 +491,14 @@ extern void irq_migrate_all_off_this_cpu(void);
 #if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_PENDING_IRQ)
 void irq_move_irq(struct irq_data *data);
 void irq_move_masked_irq(struct irq_data *data);
+bool irq_fixup_move_pending(struct irq_desc *desc, bool force_clear);
 #else
 static inline void irq_move_irq(struct irq_data *data) { }
 static inline void irq_move_masked_irq(struct irq_data *data) { }
+static inline bool irq_fixup_move_pending(struct irq_desc *desc, bool fclear)
+{
+	return false;
+}
 #endif
 
 extern int no_irq_affinity;
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
index 37ddb7bda651..6ca054a3f91d 100644
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -4,6 +4,36 @@
 
 #include "internals.h"
 
+/**
+ * irq_fixup_move_pending - Cleanup irq move pending from a dying CPU
+ * @desc:		Interrupt descpriptor to clean up
+ * @force_clear:	If set clear the move pending bit unconditionally.
+ *			If not set, clear it only when the dying CPU is the
+ *			last one in the pending mask.
+ *
+ * Returns true if the pending bit was set and the pending mask contains an
+ * online CPU other than the dying CPU.
+ */
+bool irq_fixup_move_pending(struct irq_desc *desc, bool force_clear)
+{
+	struct irq_data *data = irq_desc_get_irq_data(desc);
+
+	if (!irqd_is_setaffinity_pending(data))
+		return false;
+
+	/*
+	 * The outgoing CPU might be the last online target in a pending
+	 * interrupt move. If that's the case clear the pending move bit.
+	 */
+	if (cpumask_any_and(desc->pending_mask, cpu_online_mask) >= nr_cpu_ids) {
+		irqd_clr_move_pending(data);
+		return false;
+	}
+	if (force_clear)
+		irqd_clr_move_pending(data);
+	return true;
+}
+
 void irq_move_masked_irq(struct irq_data *idata)
 {
 	struct irq_desc *desc = irq_data_to_desc(idata);
-- 
cgit v1.2.3


From f0383c24b4855f6a4b5a358c7b2d2c16e0437e9b Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 20 Jun 2017 01:37:29 +0200
Subject: genirq/cpuhotplug: Add support for cleaning up move in progress

In order to move x86 to the generic hotplug migration code, add support for
cleaning up move in progress bits.

On architectures which have this x86 specific (mis)feature not enabled,
this is optimized out by the compiler.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Keith Busch <keith.busch@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Christoph Hellwig <hch@lst.de>
Link: http://lkml.kernel.org/r/20170619235445.525817311@linutronix.de
---
 arch/x86/include/asm/irq.h |  1 -
 include/linux/irq.h        |  2 ++
 kernel/irq/cpuhotplug.c    | 28 ++++++++++++++++++++++++++--
 kernel/irq/internals.h     | 10 +++++++++-
 4 files changed, 37 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index 16d3fa211962..668cca540025 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -29,7 +29,6 @@ struct irq_desc;
 #include <linux/cpumask.h>
 extern int check_irq_vectors_for_cpu_disable(void);
 extern void fixup_irqs(void);
-extern void irq_force_complete_move(struct irq_desc *desc);
 #endif
 
 #ifdef CONFIG_HAVE_KVM
diff --git a/include/linux/irq.h b/include/linux/irq.h
index d008065e2f4d..299271a4953c 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -491,10 +491,12 @@ extern void irq_migrate_all_off_this_cpu(void);
 #if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_PENDING_IRQ)
 void irq_move_irq(struct irq_data *data);
 void irq_move_masked_irq(struct irq_data *data);
+void irq_force_complete_move(struct irq_desc *desc);
 bool irq_fixup_move_pending(struct irq_desc *desc, bool force_clear);
 #else
 static inline void irq_move_irq(struct irq_data *data) { }
 static inline void irq_move_masked_irq(struct irq_data *data) { }
+static inline void irq_force_complete_move(struct irq_desc *desc) { }
 static inline bool irq_fixup_move_pending(struct irq_desc *desc, bool fclear)
 {
 	return false;
diff --git a/kernel/irq/cpuhotplug.c b/kernel/irq/cpuhotplug.c
index 09b20e127aee..4be4bd669d81 100644
--- a/kernel/irq/cpuhotplug.c
+++ b/kernel/irq/cpuhotplug.c
@@ -18,7 +18,7 @@ static bool migrate_one_irq(struct irq_desc *desc)
 {
 	struct irq_data *d = irq_desc_get_irq_data(desc);
 	struct irq_chip *chip = irq_data_get_irq_chip(d);
-	const struct cpumask *affinity = d->common->affinity;
+	const struct cpumask *affinity;
 	bool brokeaff = false;
 	int err;
 
@@ -41,9 +41,33 @@ static bool migrate_one_irq(struct irq_desc *desc)
 	 * Note: Do not check desc->action as this might be a chained
 	 * interrupt.
 	 */
+	affinity = irq_data_get_affinity_mask(d);
 	if (irqd_is_per_cpu(d) || !irqd_is_started(d) ||
-	    !cpumask_test_cpu(smp_processor_id(), affinity))
+	    !cpumask_test_cpu(smp_processor_id(), affinity)) {
+		/*
+		 * If an irq move is pending, abort it if the dying CPU is
+		 * the sole target.
+		 */
+		irq_fixup_move_pending(desc, false);
 		return false;
+	}
+
+	/*
+	 * Complete an eventually pending irq move cleanup. If this
+	 * interrupt was moved in hard irq context, then the vectors need
+	 * to be cleaned up. It can't wait until this interrupt actually
+	 * happens and this CPU was involved.
+	 */
+	irq_force_complete_move(desc);
+
+	/*
+	 * If there is a setaffinity pending, then try to reuse the pending
+	 * mask, so the last change of the affinity does not get lost. If
+	 * there is no move pending or the pending mask does not contain
+	 * any online CPU, use the current affinity mask.
+	 */
+	if (irq_fixup_move_pending(desc, true))
+		affinity = irq_desc_get_pending_mask(desc);
 
 	if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
 		affinity = cpu_online_mask;
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 20b197f0a7b5..fd4fa8382b8f 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -268,6 +268,10 @@ irq_get_pending(struct cpumask *mask, struct irq_desc *desc)
 {
 	cpumask_copy(mask, desc->pending_mask);
 }
+static inline struct cpumask *irq_desc_get_pending_mask(struct irq_desc *desc)
+{
+	return desc->pending_mask;
+}
 #else /* CONFIG_GENERIC_PENDING_IRQ */
 static inline bool irq_can_move_pcntxt(struct irq_data *data)
 {
@@ -285,7 +289,11 @@ static inline void
 irq_get_pending(struct cpumask *mask, struct irq_desc *desc)
 {
 }
-#endif /* CONFIG_GENERIC_PENDING_IRQ */
+static inline struct cpumask *irq_desc_get_pending_mask(struct irq_desc *desc)
+{
+	return NULL;
+}
+#endif /* !CONFIG_GENERIC_PENDING_IRQ */
 
 #ifdef CONFIG_GENERIC_IRQ_DEBUGFS
 void irq_add_debugfs_entry(unsigned int irq, struct irq_desc *desc);
-- 
cgit v1.2.3


From 36d84fb45140f151fa4e145381dbce5e5ffed24d Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 20 Jun 2017 01:37:34 +0200
Subject: genirq: Move irq_fixup_move_pending() to core

Now that x86 uses the generic code, the function declaration and inline
stub can move to the core internal header.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Keith Busch <keith.busch@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Christoph Hellwig <hch@lst.de>
Link: http://lkml.kernel.org/r/20170619235445.928156166@linutronix.de
---
 include/linux/irq.h    | 5 -----
 kernel/irq/internals.h | 5 +++++
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 299271a4953c..2b7e5a70d05f 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -492,15 +492,10 @@ extern void irq_migrate_all_off_this_cpu(void);
 void irq_move_irq(struct irq_data *data);
 void irq_move_masked_irq(struct irq_data *data);
 void irq_force_complete_move(struct irq_desc *desc);
-bool irq_fixup_move_pending(struct irq_desc *desc, bool force_clear);
 #else
 static inline void irq_move_irq(struct irq_data *data) { }
 static inline void irq_move_masked_irq(struct irq_data *data) { }
 static inline void irq_force_complete_move(struct irq_desc *desc) { }
-static inline bool irq_fixup_move_pending(struct irq_desc *desc, bool fclear)
-{
-	return false;
-}
 #endif
 
 extern int no_irq_affinity;
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index fd4fa8382b8f..040806f1124c 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -272,6 +272,7 @@ static inline struct cpumask *irq_desc_get_pending_mask(struct irq_desc *desc)
 {
 	return desc->pending_mask;
 }
+bool irq_fixup_move_pending(struct irq_desc *desc, bool force_clear);
 #else /* CONFIG_GENERIC_PENDING_IRQ */
 static inline bool irq_can_move_pcntxt(struct irq_data *data)
 {
@@ -293,6 +294,10 @@ static inline struct cpumask *irq_desc_get_pending_mask(struct irq_desc *desc)
 {
 	return NULL;
 }
+static inline bool irq_fixup_move_pending(struct irq_desc *desc, bool fclear)
+{
+	return false;
+}
 #endif /* !CONFIG_GENERIC_PENDING_IRQ */
 
 #ifdef CONFIG_GENERIC_IRQ_DEBUGFS
-- 
cgit v1.2.3


From 0d3f54257dc300f2db480d6a46b34bdb87f18c1b Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 20 Jun 2017 01:37:38 +0200
Subject: genirq: Introduce effective affinity mask

There is currently no way to evaluate the effective affinity mask of a
given interrupt. Many irq chips allow only a single target CPU or a subset
of CPUs in the affinity mask.

Updating the mask at the time of setting the affinity to the subset would
be counterproductive because information for cpu hotplug about assigned
interrupt affinities gets lost. On CPU hotplug it's also pointless to force
migrate an interrupt, which is not targeted at the CPU effectively. But
currently the information is not available.

Provide a seperate mask to be updated by the irq_chip->irq_set_affinity()
implementations. Implement the read only proc files so the user can see the
effective mask as well w/o trying to deduce it from /proc/interrupts.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Keith Busch <keith.busch@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Christoph Hellwig <hch@lst.de>
Link: http://lkml.kernel.org/r/20170619235446.247834245@linutronix.de
---
 include/linux/irq.h  | 29 +++++++++++++++++
 kernel/irq/Kconfig   |  4 +++
 kernel/irq/debugfs.c |  4 +++
 kernel/irq/irqdesc.c | 14 ++++++++
 kernel/irq/proc.c    | 90 ++++++++++++++++++++++++++++++++++++++++++++++++----
 5 files changed, 134 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 2b7e5a70d05f..4087ef268ba9 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -137,6 +137,9 @@ struct irq_domain;
  * @affinity:		IRQ affinity on SMP. If this is an IPI
  *			related irq, then this is the mask of the
  *			CPUs to which an IPI can be sent.
+ * @effective_affinity:	The effective IRQ affinity on SMP as some irq
+ *			chips do not allow multi CPU destinations.
+ *			A subset of @affinity.
  * @msi_desc:		MSI descriptor
  * @ipi_offset:		Offset of first IPI target cpu in @affinity. Optional.
  */
@@ -148,6 +151,9 @@ struct irq_common_data {
 	void			*handler_data;
 	struct msi_desc		*msi_desc;
 	cpumask_var_t		affinity;
+#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
+	cpumask_var_t		effective_affinity;
+#endif
 #ifdef CONFIG_GENERIC_IRQ_IPI
 	unsigned int		ipi_offset;
 #endif
@@ -737,6 +743,29 @@ static inline struct cpumask *irq_data_get_affinity_mask(struct irq_data *d)
 	return d->common->affinity;
 }
 
+#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
+static inline
+struct cpumask *irq_data_get_effective_affinity_mask(struct irq_data *d)
+{
+	return d->common->effective_affinity;
+}
+static inline void irq_data_update_effective_affinity(struct irq_data *d,
+						      const struct cpumask *m)
+{
+	cpumask_copy(d->common->effective_affinity, m);
+}
+#else
+static inline void irq_data_update_effective_affinity(struct irq_data *d,
+						      const struct cpumask *m)
+{
+}
+static inline
+struct cpumask *irq_data_get_effective_affinity_mask(struct irq_data *d)
+{
+	return d->common->affinity;
+}
+#endif
+
 unsigned int arch_dynirq_lower_bound(unsigned int from);
 
 int __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node,
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index 8d9498e51585..fcbb1d6d51cb 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -21,6 +21,10 @@ config GENERIC_IRQ_SHOW
 config GENERIC_IRQ_SHOW_LEVEL
        bool
 
+# Supports effective affinity mask
+config GENERIC_IRQ_EFFECTIVE_AFF_MASK
+       bool
+
 # Facility to allocate a hardware interrupt. This is legacy support
 # and should not be used in new code. Use irq domains instead.
 config GENERIC_IRQ_LEGACY_ALLOC_HWIRQ
diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c
index 50ee2f6593e8..edbef252d0c4 100644
--- a/kernel/irq/debugfs.c
+++ b/kernel/irq/debugfs.c
@@ -36,6 +36,10 @@ static void irq_debug_show_masks(struct seq_file *m, struct irq_desc *desc)
 
 	msk = irq_data_get_affinity_mask(data);
 	seq_printf(m, "affinity: %*pbl\n", cpumask_pr_args(msk));
+#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
+	msk = irq_data_get_effective_affinity_mask(data);
+	seq_printf(m, "effectiv: %*pbl\n", cpumask_pr_args(msk));
+#endif
 #ifdef CONFIG_GENERIC_PENDING_IRQ
 	msk = desc->pending_mask;
 	seq_printf(m, "pending:  %*pbl\n", cpumask_pr_args(msk));
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 48d4f0365e52..35a95fadcfda 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -60,8 +60,19 @@ static int alloc_masks(struct irq_desc *desc, int node)
 				     GFP_KERNEL, node))
 		return -ENOMEM;
 
+#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
+	if (!zalloc_cpumask_var_node(&desc->irq_common_data.effective_affinity,
+				     GFP_KERNEL, node)) {
+		free_cpumask_var(desc->irq_common_data.affinity);
+		return -ENOMEM;
+	}
+#endif
+
 #ifdef CONFIG_GENERIC_PENDING_IRQ
 	if (!zalloc_cpumask_var_node(&desc->pending_mask, GFP_KERNEL, node)) {
+#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
+		free_cpumask_var(desc->irq_common_data.effective_affinity);
+#endif
 		free_cpumask_var(desc->irq_common_data.affinity);
 		return -ENOMEM;
 	}
@@ -324,6 +335,9 @@ static void free_masks(struct irq_desc *desc)
 	free_cpumask_var(desc->pending_mask);
 #endif
 	free_cpumask_var(desc->irq_common_data.affinity);
+#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
+	free_cpumask_var(desc->irq_common_data.effective_affinity);
+#endif
 }
 #else
 static inline void free_masks(struct irq_desc *desc) { }
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index cbc4c5e377ec..7f9642a1e267 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -37,19 +37,47 @@ static struct proc_dir_entry *root_irq_dir;
 
 #ifdef CONFIG_SMP
 
+enum {
+	AFFINITY,
+	AFFINITY_LIST,
+	EFFECTIVE,
+	EFFECTIVE_LIST,
+};
+
 static int show_irq_affinity(int type, struct seq_file *m)
 {
 	struct irq_desc *desc = irq_to_desc((long)m->private);
-	const struct cpumask *mask = desc->irq_common_data.affinity;
+	const struct cpumask *mask;
 
+	switch (type) {
+	case AFFINITY:
+	case AFFINITY_LIST:
+		mask = desc->irq_common_data.affinity;
 #ifdef CONFIG_GENERIC_PENDING_IRQ
-	if (irqd_is_setaffinity_pending(&desc->irq_data))
-		mask = desc->pending_mask;
+		if (irqd_is_setaffinity_pending(&desc->irq_data))
+			mask = desc->pending_mask;
 #endif
-	if (type)
+		break;
+	case EFFECTIVE:
+	case EFFECTIVE_LIST:
+#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
+		mask = desc->irq_common_data.effective_affinity;
+		break;
+#else
+		return -EINVAL;
+#endif
+	};
+
+	switch (type) {
+	case AFFINITY_LIST:
+	case EFFECTIVE_LIST:
 		seq_printf(m, "%*pbl\n", cpumask_pr_args(mask));
-	else
+		break;
+	case AFFINITY:
+	case EFFECTIVE:
 		seq_printf(m, "%*pb\n", cpumask_pr_args(mask));
+		break;
+	}
 	return 0;
 }
 
@@ -80,12 +108,12 @@ static int irq_affinity_hint_proc_show(struct seq_file *m, void *v)
 int no_irq_affinity;
 static int irq_affinity_proc_show(struct seq_file *m, void *v)
 {
-	return show_irq_affinity(0, m);
+	return show_irq_affinity(AFFINITY, m);
 }
 
 static int irq_affinity_list_proc_show(struct seq_file *m, void *v)
 {
-	return show_irq_affinity(1, m);
+	return show_irq_affinity(AFFINITY_LIST, m);
 }
 
 
@@ -185,6 +213,44 @@ static const struct file_operations irq_affinity_list_proc_fops = {
 	.write		= irq_affinity_list_proc_write,
 };
 
+#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
+static int irq_effective_aff_proc_show(struct seq_file *m, void *v)
+{
+	return show_irq_affinity(EFFECTIVE, m);
+}
+
+static int irq_effective_aff_list_proc_show(struct seq_file *m, void *v)
+{
+	return show_irq_affinity(EFFECTIVE_LIST, m);
+}
+
+static int irq_effective_aff_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, irq_effective_aff_proc_show, PDE_DATA(inode));
+}
+
+static int irq_effective_aff_list_proc_open(struct inode *inode,
+					    struct file *file)
+{
+	return single_open(file, irq_effective_aff_list_proc_show,
+			   PDE_DATA(inode));
+}
+
+static const struct file_operations irq_effective_aff_proc_fops = {
+	.open		= irq_effective_aff_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static const struct file_operations irq_effective_aff_list_proc_fops = {
+	.open		= irq_effective_aff_list_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+#endif
+
 static int default_affinity_show(struct seq_file *m, void *v)
 {
 	seq_printf(m, "%*pb\n", cpumask_pr_args(irq_default_affinity));
@@ -364,6 +430,12 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc)
 
 	proc_create_data("node", 0444, desc->dir,
 			 &irq_node_proc_fops, irqp);
+# ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
+	proc_create_data("effective_affinity", 0444, desc->dir,
+			 &irq_effective_aff_proc_fops, irqp);
+	proc_create_data("effective_affinity_list", 0444, desc->dir,
+			 &irq_effective_aff_list_proc_fops, irqp);
+# endif
 #endif
 	proc_create_data("spurious", 0444, desc->dir,
 			 &irq_spurious_proc_fops, (void *)(long)irq);
@@ -383,6 +455,10 @@ void unregister_irq_proc(unsigned int irq, struct irq_desc *desc)
 	remove_proc_entry("affinity_hint", desc->dir);
 	remove_proc_entry("smp_affinity_list", desc->dir);
 	remove_proc_entry("node", desc->dir);
+# ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
+	remove_proc_entry("effective_affinity", desc->dir);
+	remove_proc_entry("effective_affinity_list", desc->dir);
+# endif
 #endif
 	remove_proc_entry("spurious", desc->dir);
 
-- 
cgit v1.2.3


From 54fdf6a0875ca380647ac1cc9b5b8f2dbbbfa131 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 20 Jun 2017 01:37:47 +0200
Subject: genirq: Introduce IRQD_MANAGED_SHUTDOWN

Affinity managed interrupts should keep their assigned affinity accross CPU
hotplug. To avoid magic hackery in device drivers, the core code shall
manage them transparently. This will set these interrupts into a managed
shutdown state when the last CPU of the assigned affinity mask goes
offline. The interrupt will be restarted when one of the CPUs in the
assigned affinity mask comes back online.

Introduce the necessary state flag and the accessor functions.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Keith Busch <keith.busch@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Christoph Hellwig <hch@lst.de>
Link: http://lkml.kernel.org/r/20170619235446.954523476@linutronix.de
---
 include/linux/irq.h    |  8 ++++++++
 kernel/irq/internals.h | 10 ++++++++++
 2 files changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 4087ef268ba9..0e37276c5315 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -207,6 +207,8 @@ struct irq_data {
  * IRQD_FORWARDED_TO_VCPU	- The interrupt is forwarded to a VCPU
  * IRQD_AFFINITY_MANAGED	- Affinity is auto-managed by the kernel
  * IRQD_IRQ_STARTED		- Startup state of the interrupt
+ * IRQD_MANAGED_SHUTDOWN	- Interrupt was shutdown due to empty affinity
+ *				  mask. Applies only to affinity managed irqs.
  */
 enum {
 	IRQD_TRIGGER_MASK		= 0xf,
@@ -225,6 +227,7 @@ enum {
 	IRQD_FORWARDED_TO_VCPU		= (1 << 20),
 	IRQD_AFFINITY_MANAGED		= (1 << 21),
 	IRQD_IRQ_STARTED		= (1 << 22),
+	IRQD_MANAGED_SHUTDOWN		= (1 << 23),
 };
 
 #define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors)
@@ -343,6 +346,11 @@ static inline bool irqd_is_started(struct irq_data *d)
 	return __irqd_to_state(d) & IRQD_IRQ_STARTED;
 }
 
+static inline bool irqd_is_managed_shutdown(struct irq_data *d)
+{
+	return __irqd_to_state(d) & IRQD_MANAGED_SHUTDOWN;
+}
+
 #undef __irqd_to_state
 
 static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 040806f1124c..ca4666b4cd39 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -193,6 +193,16 @@ static inline void irqd_clr_move_pending(struct irq_data *d)
 	__irqd_to_state(d) &= ~IRQD_SETAFFINITY_PENDING;
 }
 
+static inline void irqd_set_managed_shutdown(struct irq_data *d)
+{
+	__irqd_to_state(d) |= IRQD_MANAGED_SHUTDOWN;
+}
+
+static inline void irqd_clr_managed_shutdown(struct irq_data *d)
+{
+	__irqd_to_state(d) &= ~IRQD_MANAGED_SHUTDOWN;
+}
+
 static inline void irqd_clear(struct irq_data *d, unsigned int mask)
 {
 	__irqd_to_state(d) &= ~mask;
-- 
cgit v1.2.3


From 761ea388e8c4e3ac883a94e16bcc8c51fa419d4f Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 20 Jun 2017 01:37:50 +0200
Subject: genirq: Handle managed irqs gracefully in irq_startup()

Affinity managed interrupts should keep their assigned affinity accross CPU
hotplug. To avoid magic hackery in device drivers, the core code shall
manage them transparently and set these interrupts into a managed shutdown
state when the last CPU of the assigned affinity mask goes offline. The
interrupt will be restarted when one of the CPUs in the assigned affinity
mask comes back online.

Add the necessary logic to irq_startup(). If an interrupt is requested and
started up, the code checks whether it is affinity managed and if so, it
checks whether a CPU in the interrupts affinity mask is online. If not, it
puts the interrupt into managed shutdown state.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Keith Busch <keith.busch@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Christoph Hellwig <hch@lst.de>
Link: http://lkml.kernel.org/r/20170619235447.189851170@linutronix.de
---
 include/linux/irq.h |  2 +-
 kernel/irq/chip.c   | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 62 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 0e37276c5315..807042b46af1 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -346,7 +346,7 @@ static inline bool irqd_is_started(struct irq_data *d)
 	return __irqd_to_state(d) & IRQD_IRQ_STARTED;
 }
 
-static inline bool irqd_is_managed_shutdown(struct irq_data *d)
+static inline bool irqd_is_managed_and_shutdown(struct irq_data *d)
 {
 	return __irqd_to_state(d) & IRQD_MANAGED_SHUTDOWN;
 }
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index b7599e952d3b..fc89eeb8a6b4 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -195,6 +195,52 @@ static void irq_state_set_started(struct irq_desc *desc)
 	irqd_set(&desc->irq_data, IRQD_IRQ_STARTED);
 }
 
+enum {
+	IRQ_STARTUP_NORMAL,
+	IRQ_STARTUP_MANAGED,
+	IRQ_STARTUP_ABORT,
+};
+
+#ifdef CONFIG_SMP
+static int
+__irq_startup_managed(struct irq_desc *desc, struct cpumask *aff, bool force)
+{
+	struct irq_data *d = irq_desc_get_irq_data(desc);
+
+	if (!irqd_affinity_is_managed(d))
+		return IRQ_STARTUP_NORMAL;
+
+	irqd_clr_managed_shutdown(d);
+
+	if (cpumask_any_and(aff, cpu_online_mask) > nr_cpu_ids) {
+		/*
+		 * Catch code which fiddles with enable_irq() on a managed
+		 * and potentially shutdown IRQ. Chained interrupt
+		 * installment or irq auto probing should not happen on
+		 * managed irqs either. Emit a warning, break the affinity
+		 * and start it up as a normal interrupt.
+		 */
+		if (WARN_ON_ONCE(force))
+			return IRQ_STARTUP_NORMAL;
+		/*
+		 * The interrupt was requested, but there is no online CPU
+		 * in it's affinity mask. Put it into managed shutdown
+		 * state and let the cpu hotplug mechanism start it up once
+		 * a CPU in the mask becomes available.
+		 */
+		irqd_set_managed_shutdown(d);
+		return IRQ_STARTUP_ABORT;
+	}
+	return IRQ_STARTUP_MANAGED;
+}
+#else
+static int
+__irq_startup_managed(struct irq_desc *desc, struct cpumask *aff, bool force)
+{
+	return IRQ_STARTUP_NORMAL;
+}
+#endif
+
 static int __irq_startup(struct irq_desc *desc)
 {
 	struct irq_data *d = irq_desc_get_irq_data(desc);
@@ -214,15 +260,27 @@ static int __irq_startup(struct irq_desc *desc)
 
 int irq_startup(struct irq_desc *desc, bool resend, bool force)
 {
+	struct irq_data *d = irq_desc_get_irq_data(desc);
+	struct cpumask *aff = irq_data_get_affinity_mask(d);
 	int ret = 0;
 
 	desc->depth = 0;
 
-	if (irqd_is_started(&desc->irq_data)) {
+	if (irqd_is_started(d)) {
 		irq_enable(desc);
 	} else {
-		ret = __irq_startup(desc);
-		irq_setup_affinity(desc);
+		switch (__irq_startup_managed(desc, aff, force)) {
+		case IRQ_STARTUP_NORMAL:
+			ret = __irq_startup(desc);
+			irq_setup_affinity(desc);
+			break;
+		case IRQ_STARTUP_MANAGED:
+			ret = __irq_startup(desc);
+			irq_set_affinity_locked(d, aff, false);
+			break;
+		case IRQ_STARTUP_ABORT:
+			return 0;
+		}
 	}
 	if (resend)
 		check_irq_resend(desc);
-- 
cgit v1.2.3


From c5cb83bb337c25caae995d992d1cdf9b317f83de Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 20 Jun 2017 01:37:51 +0200
Subject: genirq/cpuhotplug: Handle managed IRQs on CPU hotplug

If a CPU goes offline, interrupts affine to the CPU are moved away. If the
outgoing CPU is the last CPU in the affinity mask the migration code breaks
the affinity and sets it it all online cpus.

This is a problem for affinity managed interrupts as CPU hotplug is often
used for power management purposes. If the affinity is broken, the
interrupt is not longer affine to the CPUs to which it was allocated.

The affinity spreading allows to lay out multi queue devices in a way that
they are assigned to a single CPU or a group of CPUs. If the last CPU goes
offline, then the queue is not longer used, so the interrupt can be
shutdown gracefully and parked until one of the assigned CPUs comes online
again.

Add a graceful shutdown mechanism into the irq affinity breaking code path,
mark the irq as MANAGED_SHUTDOWN and leave the affinity mask unmodified.

In the online path, scan the active interrupts for managed interrupts and
if the interrupt is functional and the newly online CPU is part of the
affinity mask, restart the interrupt if it is marked MANAGED_SHUTDOWN or if
the interrupts is started up, try to add the CPU back to the effective
affinity mask.

Originally-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Keith Busch <keith.busch@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20170619235447.273417334@linutronix.de
---
 include/linux/cpuhotplug.h |  1 +
 include/linux/irq.h        |  5 +++++
 kernel/cpu.c               |  5 +++++
 kernel/irq/cpuhotplug.c    | 45 +++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 56 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 0f2a80377520..c15f22c54535 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -124,6 +124,7 @@ enum cpuhp_state {
 	CPUHP_AP_ONLINE_IDLE,
 	CPUHP_AP_SMPBOOT_THREADS,
 	CPUHP_AP_X86_VDSO_VMA_ONLINE,
+	CPUHP_AP_IRQ_AFFINITY_ONLINE,
 	CPUHP_AP_PERF_ONLINE,
 	CPUHP_AP_PERF_X86_ONLINE,
 	CPUHP_AP_PERF_X86_UNCORE_ONLINE,
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 807042b46af1..19cea6326599 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -500,7 +500,12 @@ extern int irq_set_affinity_locked(struct irq_data *data,
 				   const struct cpumask *cpumask, bool force);
 extern int irq_set_vcpu_affinity(unsigned int irq, void *vcpu_info);
 
+#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_IRQ_MIGRATION)
 extern void irq_migrate_all_off_this_cpu(void);
+extern int irq_affinity_online_cpu(unsigned int cpu);
+#else
+# define irq_affinity_online_cpu	NULL
+#endif
 
 #if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_PENDING_IRQ)
 void irq_move_irq(struct irq_data *data);
diff --git a/kernel/cpu.c b/kernel/cpu.c
index cb5103413bd8..b86b32ebb3b2 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -1252,6 +1252,11 @@ static struct cpuhp_step cpuhp_ap_states[] = {
 		.startup.single		= smpboot_unpark_threads,
 		.teardown.single	= NULL,
 	},
+	[CPUHP_AP_IRQ_AFFINITY_ONLINE] = {
+		.name			= "irq/affinity:online",
+		.startup.single		= irq_affinity_online_cpu,
+		.teardown.single	= NULL,
+	},
 	[CPUHP_AP_PERF_ONLINE] = {
 		.name			= "perf:online",
 		.startup.single		= perf_event_init_cpu,
diff --git a/kernel/irq/cpuhotplug.c b/kernel/irq/cpuhotplug.c
index 0b093db3336b..b7964e72ded7 100644
--- a/kernel/irq/cpuhotplug.c
+++ b/kernel/irq/cpuhotplug.c
@@ -83,6 +83,15 @@ static bool migrate_one_irq(struct irq_desc *desc)
 		chip->irq_mask(d);
 
 	if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
+		/*
+		 * If the interrupt is managed, then shut it down and leave
+		 * the affinity untouched.
+		 */
+		if (irqd_affinity_is_managed(d)) {
+			irqd_set_managed_shutdown(d);
+			irq_shutdown(desc);
+			return false;
+		}
 		affinity = cpu_online_mask;
 		brokeaff = true;
 	}
@@ -129,3 +138,39 @@ void irq_migrate_all_off_this_cpu(void)
 		}
 	}
 }
+
+static void irq_restore_affinity_of_irq(struct irq_desc *desc, unsigned int cpu)
+{
+	struct irq_data *data = irq_desc_get_irq_data(desc);
+	const struct cpumask *affinity = irq_data_get_affinity_mask(data);
+
+	if (!irqd_affinity_is_managed(data) || !desc->action ||
+	    !irq_data_get_irq_chip(data) || !cpumask_test_cpu(cpu, affinity))
+		return;
+
+	if (irqd_is_managed_and_shutdown(data))
+		irq_startup(desc, IRQ_RESEND, IRQ_START_COND);
+	else
+		irq_set_affinity_locked(data, affinity, false);
+}
+
+/**
+ * irq_affinity_online_cpu - Restore affinity for managed interrupts
+ * @cpu:	Upcoming CPU for which interrupts should be restored
+ */
+int irq_affinity_online_cpu(unsigned int cpu)
+{
+	struct irq_desc *desc;
+	unsigned int irq;
+
+	irq_lock_sparse();
+	for_each_active_irq(irq) {
+		desc = irq_to_desc(irq);
+		raw_spin_lock_irq(&desc->lock);
+		irq_restore_affinity_of_irq(desc, cpu);
+		raw_spin_unlock_irq(&desc->lock);
+	}
+	irq_unlock_sparse();
+
+	return 0;
+}
-- 
cgit v1.2.3


From d52dd44175bd27ad9d8e34a994fb80877c1f6d61 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 20 Jun 2017 01:37:52 +0200
Subject: genirq: Introduce IRQD_SINGLE_TARGET flag

Many interrupt chips allow only a single CPU as interrupt target. The core
code has no knowledge about that. That's unfortunate as it could avoid
trying to readd a newly online CPU to the effective affinity mask.

Add the status flag and the necessary accessors.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Keith Busch <keith.busch@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Christoph Hellwig <hch@lst.de>
Link: http://lkml.kernel.org/r/20170619235447.352343969@linutronix.de
---
 include/linux/irq.h  | 16 ++++++++++++++++
 kernel/irq/debugfs.c |  1 +
 2 files changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 19cea6326599..00db35b61e9e 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -209,6 +209,7 @@ struct irq_data {
  * IRQD_IRQ_STARTED		- Startup state of the interrupt
  * IRQD_MANAGED_SHUTDOWN	- Interrupt was shutdown due to empty affinity
  *				  mask. Applies only to affinity managed irqs.
+ * IRQD_SINGLE_TARGET		- IRQ allows only a single affinity target
  */
 enum {
 	IRQD_TRIGGER_MASK		= 0xf,
@@ -228,6 +229,7 @@ enum {
 	IRQD_AFFINITY_MANAGED		= (1 << 21),
 	IRQD_IRQ_STARTED		= (1 << 22),
 	IRQD_MANAGED_SHUTDOWN		= (1 << 23),
+	IRQD_SINGLE_TARGET		= (1 << 24),
 };
 
 #define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors)
@@ -276,6 +278,20 @@ static inline bool irqd_is_level_type(struct irq_data *d)
 	return __irqd_to_state(d) & IRQD_LEVEL;
 }
 
+/*
+ * Must only be called of irqchip.irq_set_affinity() or low level
+ * hieararchy domain allocation functions.
+ */
+static inline void irqd_set_single_target(struct irq_data *d)
+{
+	__irqd_to_state(d) |= IRQD_SINGLE_TARGET;
+}
+
+static inline bool irqd_is_single_target(struct irq_data *d)
+{
+	return __irqd_to_state(d) & IRQD_SINGLE_TARGET;
+}
+
 static inline bool irqd_is_wakeup_set(struct irq_data *d)
 {
 	return __irqd_to_state(d) & IRQD_WAKEUP_STATE;
diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c
index edbef252d0c4..dbd6e78db213 100644
--- a/kernel/irq/debugfs.c
+++ b/kernel/irq/debugfs.c
@@ -105,6 +105,7 @@ static const struct irq_bit_descr irqdata_states[] = {
 	BIT_MASK_DESCR(IRQD_PER_CPU),
 	BIT_MASK_DESCR(IRQD_NO_BALANCING),
 
+	BIT_MASK_DESCR(IRQD_SINGLE_TARGET),
 	BIT_MASK_DESCR(IRQD_MOVE_PCNTXT),
 	BIT_MASK_DESCR(IRQD_AFFINITY_SET),
 	BIT_MASK_DESCR(IRQD_SETAFFINITY_PENDING),
-- 
cgit v1.2.3


From 61d0a000b7746665c7cfcff766532f6f2a922a61 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 22 Jun 2017 11:34:57 +0100
Subject: genirq/irqdomain: Add irq_domain_update_bus_token helper

We can have irq domains that are identified by the same fwnode
(because they are serviced by the same HW), and yet have different
functionnality (because they serve different busses, for example).
This is what we use the bus_token field.

Since we don't use this field when generating the domain name,
all the aliasing domains will get the same name, and the debugfs
file creation fails. Also, bus_token is updated by individual drivers,
and the core code is unaware of that update.

In order to sort this mess, let's introduce a helper that takes care
of updating bus_token, and regenerate the debugfs file.

A separate patch will update all the individual users.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irqdomain.h |  3 +++
 kernel/irq/irqdomain.c    | 31 +++++++++++++++++++++++++++++++
 2 files changed, 34 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 914b0c31d233..222f47af12f4 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -273,6 +273,9 @@ static inline bool is_fwnode_irqchip(struct fwnode_handle *fwnode)
 	return fwnode && fwnode->type == FWNODE_IRQCHIP;
 }
 
+extern void irq_domain_update_bus_token(struct irq_domain *domain,
+					enum irq_domain_bus_token bus_token);
+
 static inline
 struct irq_domain *irq_find_matching_fwnode(struct fwnode_handle *fwnode,
 					    enum irq_domain_bus_token bus_token)
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 75e1f0851c33..f6adeaeb4c16 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -245,6 +245,37 @@ void irq_domain_remove(struct irq_domain *domain)
 }
 EXPORT_SYMBOL_GPL(irq_domain_remove);
 
+void irq_domain_update_bus_token(struct irq_domain *domain,
+				 enum irq_domain_bus_token bus_token)
+{
+	char *name;
+
+	if (domain->bus_token == bus_token)
+		return;
+
+	mutex_lock(&irq_domain_mutex);
+
+	domain->bus_token = bus_token;
+
+	name = kasprintf(GFP_KERNEL, "%s-%d", domain->name, bus_token);
+	if (!name) {
+		mutex_unlock(&irq_domain_mutex);
+		return;
+	}
+
+	debugfs_remove_domain_dir(domain);
+
+	if (domain->flags & IRQ_DOMAIN_NAME_ALLOCATED)
+		kfree(domain->name);
+	else
+		domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED;
+
+	domain->name = name;
+	debugfs_add_domain_dir(domain);
+
+	mutex_unlock(&irq_domain_mutex);
+}
+
 /**
  * irq_domain_add_simple() - Register an irq_domain and optionally map a range of irqs
  * @of_node: pointer to interrupt controller's device tree node.
-- 
cgit v1.2.3


From 6a6544e520abecd484ab8b67fb50d1fc003f3275 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 20 Jun 2017 22:17:44 +0100
Subject: genirq/irqdomain: Remove auto-recursive hierarchy support

It did seem like a good idea at the time, but it never really
caught on, and auto-recursive domains remain unused 3 years after
having been introduced.

Oh well, time for a late spring cleanup.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irqdomain.h |  9 +++-----
 kernel/irq/irqdomain.c    | 55 +++++++++++------------------------------------
 kernel/irq/msi.c          |  2 +-
 3 files changed, 17 insertions(+), 49 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 222f47af12f4..cac77a5c5555 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -180,8 +180,8 @@ enum {
 	/* Irq domain is hierarchical */
 	IRQ_DOMAIN_FLAG_HIERARCHY	= (1 << 0),
 
-	/* Core calls alloc/free recursive through the domain hierarchy. */
-	IRQ_DOMAIN_FLAG_AUTO_RECURSIVE	= (1 << 1),
+	/* Irq domain name was allocated in __irq_domain_add() */
+	IRQ_DOMAIN_NAME_ALLOCATED	= (1 << 6),
 
 	/* Irq domain is an IPI domain with virq per cpu */
 	IRQ_DOMAIN_FLAG_IPI_PER_CPU	= (1 << 2),
@@ -195,9 +195,6 @@ enum {
 	/* Irq domain implements MSI remapping */
 	IRQ_DOMAIN_FLAG_MSI_REMAP	= (1 << 5),
 
-	/* Irq domain name was allocated in __irq_domain_add() */
-	IRQ_DOMAIN_NAME_ALLOCATED	= (1 << 6),
-
 	/*
 	 * Flags starting from IRQ_DOMAIN_FLAG_NONCORE are reserved
 	 * for implementation specific purposes and ignored by the
@@ -448,7 +445,7 @@ static inline int irq_domain_alloc_irqs(struct irq_domain *domain,
 				       NULL);
 }
 
-extern int irq_domain_alloc_irqs_recursive(struct irq_domain *domain,
+extern int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain,
 					   unsigned int irq_base,
 					   unsigned int nr_irqs, void *arg);
 extern int irq_domain_set_hwirq_and_chip(struct irq_domain *domain,
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index f6adeaeb4c16..14fe862aa2e3 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -1342,43 +1342,18 @@ void irq_domain_free_irqs_top(struct irq_domain *domain, unsigned int virq,
 	irq_domain_free_irqs_common(domain, virq, nr_irqs);
 }
 
-static bool irq_domain_is_auto_recursive(struct irq_domain *domain)
-{
-	return domain->flags & IRQ_DOMAIN_FLAG_AUTO_RECURSIVE;
-}
-
-static void irq_domain_free_irqs_recursive(struct irq_domain *domain,
+static void irq_domain_free_irqs_hierarchy(struct irq_domain *domain,
 					   unsigned int irq_base,
 					   unsigned int nr_irqs)
 {
 	domain->ops->free(domain, irq_base, nr_irqs);
-	if (irq_domain_is_auto_recursive(domain)) {
-		BUG_ON(!domain->parent);
-		irq_domain_free_irqs_recursive(domain->parent, irq_base,
-					       nr_irqs);
-	}
 }
 
-int irq_domain_alloc_irqs_recursive(struct irq_domain *domain,
+int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain,
 				    unsigned int irq_base,
 				    unsigned int nr_irqs, void *arg)
 {
-	int ret = 0;
-	struct irq_domain *parent = domain->parent;
-	bool recursive = irq_domain_is_auto_recursive(domain);
-
-	BUG_ON(recursive && !parent);
-	if (recursive)
-		ret = irq_domain_alloc_irqs_recursive(parent, irq_base,
-						      nr_irqs, arg);
-	if (ret < 0)
-		return ret;
-
-	ret = domain->ops->alloc(domain, irq_base, nr_irqs, arg);
-	if (ret < 0 && recursive)
-		irq_domain_free_irqs_recursive(parent, irq_base, nr_irqs);
-
-	return ret;
+	return domain->ops->alloc(domain, irq_base, nr_irqs, arg);
 }
 
 /**
@@ -1439,7 +1414,7 @@ int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
 	}
 
 	mutex_lock(&irq_domain_mutex);
-	ret = irq_domain_alloc_irqs_recursive(domain, virq, nr_irqs, arg);
+	ret = irq_domain_alloc_irqs_hierarchy(domain, virq, nr_irqs, arg);
 	if (ret < 0) {
 		mutex_unlock(&irq_domain_mutex);
 		goto out_free_irq_data;
@@ -1474,7 +1449,7 @@ void irq_domain_free_irqs(unsigned int virq, unsigned int nr_irqs)
 	mutex_lock(&irq_domain_mutex);
 	for (i = 0; i < nr_irqs; i++)
 		irq_domain_remove_irq(virq + i);
-	irq_domain_free_irqs_recursive(data->domain, virq, nr_irqs);
+	irq_domain_free_irqs_hierarchy(data->domain, virq, nr_irqs);
 	mutex_unlock(&irq_domain_mutex);
 
 	irq_domain_free_irq_data(virq, nr_irqs);
@@ -1494,15 +1469,11 @@ int irq_domain_alloc_irqs_parent(struct irq_domain *domain,
 				 unsigned int irq_base, unsigned int nr_irqs,
 				 void *arg)
 {
-	/* irq_domain_alloc_irqs_recursive() has called parent's alloc() */
-	if (irq_domain_is_auto_recursive(domain))
-		return 0;
+	if (!domain->parent)
+		return -ENOSYS;
 
-	domain = domain->parent;
-	if (domain)
-		return irq_domain_alloc_irqs_recursive(domain, irq_base,
-						       nr_irqs, arg);
-	return -ENOSYS;
+	return irq_domain_alloc_irqs_hierarchy(domain->parent, irq_base,
+					       nr_irqs, arg);
 }
 EXPORT_SYMBOL_GPL(irq_domain_alloc_irqs_parent);
 
@@ -1517,10 +1488,10 @@ EXPORT_SYMBOL_GPL(irq_domain_alloc_irqs_parent);
 void irq_domain_free_irqs_parent(struct irq_domain *domain,
 				 unsigned int irq_base, unsigned int nr_irqs)
 {
-	/* irq_domain_free_irqs_recursive() will call parent's free */
-	if (!irq_domain_is_auto_recursive(domain) && domain->parent)
-		irq_domain_free_irqs_recursive(domain->parent, irq_base,
-					       nr_irqs);
+	if (!domain->parent)
+		return;
+
+	irq_domain_free_irqs_hierarchy(domain->parent, irq_base, nr_irqs);
 }
 EXPORT_SYMBOL_GPL(irq_domain_free_irqs_parent);
 
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index 9e3f1857c6bd..48eadf416c24 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -315,7 +315,7 @@ int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev,
 
 		ops->set_desc(arg, desc);
 		/* Assumes the domain mutex is held! */
-		ret = irq_domain_alloc_irqs_recursive(domain, virq, 1, arg);
+		ret = irq_domain_alloc_irqs_hierarchy(domain, virq, 1, arg);
 		if (ret)
 			break;
 
-- 
cgit v1.2.3


From 297b64c74385fc7ea5dfff66105ab6465f2df49a Mon Sep 17 00:00:00 2001
From: Tyler Baicar <tbaicar@codeaurora.org>
Date: Wed, 21 Jun 2017 12:17:12 -0600
Subject: ras: acpi / apei: generate trace event for unrecognized CPER section

The UEFI spec includes non-standard section type support in the
Common Platform Error Record. This is defined in section N.2.3 of
UEFI version 2.5.

Currently if the CPER section's type (UUID) does not match any
section type that the kernel knows how to parse, a trace event is
not generated.

Generate a trace event which contains the raw error data for
non-standard section type error records.

Signed-off-by: Tyler Baicar <tbaicar@codeaurora.org>
CC: Jonathan (Zhixiong) Zhang <zjzhang@codeaurora.org>
Tested-by: Shiju Jose <shiju.jose@huawei.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/acpi/apei/ghes.c | 18 ++++++++++++++++++
 drivers/ras/ras.c        | 10 +++++++++-
 include/linux/ras.h      | 12 ++++++++++++
 include/linux/uuid.h     |  4 +++-
 include/ras/ras_event.h  | 45 +++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 87 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 7a91ac7d6b75..ab36ad628c68 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -45,11 +45,14 @@
 #include <linux/aer.h>
 #include <linux/nmi.h>
 #include <linux/sched/clock.h>
+#include <linux/uuid.h>
+#include <linux/ras.h>
 
 #include <acpi/actbl1.h>
 #include <acpi/ghes.h>
 #include <acpi/apei.h>
 #include <asm/tlbflush.h>
+#include <ras/ras_event.h>
 
 #include "apei-internal.h"
 
@@ -461,11 +464,19 @@ static void ghes_do_proc(struct ghes *ghes,
 	int sev, sec_sev;
 	struct acpi_hest_generic_data *gdata;
 	guid_t *sec_type;
+	guid_t *fru_id = &NULL_UUID_LE;
+	char *fru_text = "";
 
 	sev = ghes_severity(estatus->error_severity);
 	apei_estatus_for_each_section(estatus, gdata) {
 		sec_type = (guid_t *)gdata->section_type;
 		sec_sev = ghes_severity(gdata->error_severity);
+		if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
+			fru_id = (guid_t *)gdata->fru_id;
+
+		if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
+			fru_text = gdata->fru_text;
+
 		if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
 			struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
 
@@ -506,6 +517,13 @@ static void ghes_do_proc(struct ghes *ghes,
 
 		}
 #endif
+		else {
+			void *err = acpi_hest_get_payload(gdata);
+
+			log_non_standard_event(sec_type, fru_id, fru_text,
+					       sec_sev, err,
+					       gdata->error_data_length);
+		}
 	}
 }
 
diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c
index 94f8038864b4..e87fd9e32ee2 100644
--- a/drivers/ras/ras.c
+++ b/drivers/ras/ras.c
@@ -7,11 +7,19 @@
 
 #include <linux/init.h>
 #include <linux/ras.h>
+#include <linux/uuid.h>
 
 #define CREATE_TRACE_POINTS
 #define TRACE_INCLUDE_PATH ../../include/ras
 #include <ras/ras_event.h>
 
+void log_non_standard_event(const uuid_le *sec_type, const uuid_le *fru_id,
+			    const char *fru_text, const u8 sev, const u8 *err,
+			    const u32 len)
+{
+	trace_non_standard_event(sec_type, fru_id, fru_text, sev, err, len);
+}
+
 static int __init ras_init(void)
 {
 	int rc = 0;
@@ -27,7 +35,7 @@ subsys_initcall(ras_init);
 EXPORT_TRACEPOINT_SYMBOL_GPL(extlog_mem_event);
 #endif
 EXPORT_TRACEPOINT_SYMBOL_GPL(mc_event);
-
+EXPORT_TRACEPOINT_SYMBOL_GPL(non_standard_event);
 
 int __init parse_ras_param(char *str)
 {
diff --git a/include/linux/ras.h b/include/linux/ras.h
index ffb147185e8d..62fac3042dce 100644
--- a/include/linux/ras.h
+++ b/include/linux/ras.h
@@ -2,6 +2,7 @@
 #define __RAS_H__
 
 #include <asm/errno.h>
+#include <linux/uuid.h>
 
 #ifdef CONFIG_DEBUG_FS
 int ras_userspace_consumers(void);
@@ -22,4 +23,15 @@ static inline void __init cec_init(void)	{ }
 static inline int cec_add_elem(u64 pfn)		{ return -ENODEV; }
 #endif
 
+#ifdef CONFIG_RAS
+void log_non_standard_event(const guid_t *sec_type,
+			    const guid_t *fru_id, const char *fru_text,
+			    const u8 sev, const u8 *err, const u32 len);
+#else
+static void log_non_standard_event(const guid_t *sec_type,
+				   const guid_t *fru_id, const char *fru_text,
+				   const u8 sev, const u8 *err,
+				   const u32 len) { return; }
+#endif
+
 #endif /* __RAS_H__ */
diff --git a/include/linux/uuid.h b/include/linux/uuid.h
index 75f7182d5360..61641faca38b 100644
--- a/include/linux/uuid.h
+++ b/include/linux/uuid.h
@@ -18,8 +18,10 @@
 
 #include <uapi/linux/uuid.h>
 
+#define UUID_SIZE 16
+
 typedef struct {
-	__u8 b[16];
+	__u8 b[UUID_SIZE];
 } uuid_t;
 
 #define UUID_INIT(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7)			\
diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
index 1791a12cfa85..4f79ba94fa6b 100644
--- a/include/ras/ras_event.h
+++ b/include/ras/ras_event.h
@@ -161,6 +161,51 @@ TRACE_EVENT(mc_event,
 		  __get_str(driver_detail))
 );
 
+/*
+ * Non-Standard Section Report
+ *
+ * This event is generated when hardware detected a hardware
+ * error event, which may be of non-standard section as defined
+ * in UEFI spec appendix "Common Platform Error Record", or may
+ * be of sections for which TRACE_EVENT is not defined.
+ *
+ */
+TRACE_EVENT(non_standard_event,
+
+	TP_PROTO(const uuid_le *sec_type,
+		 const uuid_le *fru_id,
+		 const char *fru_text,
+		 const u8 sev,
+		 const u8 *err,
+		 const u32 len),
+
+	TP_ARGS(sec_type, fru_id, fru_text, sev, err, len),
+
+	TP_STRUCT__entry(
+		__array(char, sec_type, UUID_SIZE)
+		__array(char, fru_id, UUID_SIZE)
+		__string(fru_text, fru_text)
+		__field(u8, sev)
+		__field(u32, len)
+		__dynamic_array(u8, buf, len)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->sec_type, sec_type, UUID_SIZE);
+		memcpy(__entry->fru_id, fru_id, UUID_SIZE);
+		__assign_str(fru_text, fru_text);
+		__entry->sev = sev;
+		__entry->len = len;
+		memcpy(__get_dynamic_array(buf), err, len);
+	),
+
+	TP_printk("severity: %d; sec type:%pU; FRU: %pU %s; data len:%d; raw data:%s",
+		  __entry->sev, __entry->sec_type,
+		  __entry->fru_id, __get_str(fru_text),
+		  __entry->len,
+		  __print_hex(__get_dynamic_array(buf), __entry->len))
+);
+
 /*
  * PCIe AER Trace event
  *
-- 
cgit v1.2.3


From e9279e83ad1f4b5af541a522a81888f828210b40 Mon Sep 17 00:00:00 2001
From: Tyler Baicar <tbaicar@codeaurora.org>
Date: Wed, 21 Jun 2017 12:17:13 -0600
Subject: trace, ras: add ARM processor error trace event

Currently there are trace events for the various RAS
errors with the exception of ARM processor type errors.
Add a new trace event for such errors so that the user
will know when they occur. These trace events are
consistent with the ARM processor error section type
defined in UEFI 2.6 spec section N.2.4.4.

Signed-off-by: Tyler Baicar <tbaicar@codeaurora.org>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Reviewed-by: Xie XiuQi <xiexiuqi@huawei.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/acpi/apei/ghes.c    |  6 +++++-
 drivers/firmware/efi/cper.c |  1 +
 drivers/ras/ras.c           |  6 ++++++
 include/linux/ras.h         |  3 +++
 include/ras/ras_event.h     | 45 +++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 60 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index ab36ad628c68..5073d035bb6e 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -517,7 +517,11 @@ static void ghes_do_proc(struct ghes *ghes,
 
 		}
 #endif
-		else {
+		else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) {
+			struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata);
+
+			log_arm_hw_error(err);
+		} else {
 			void *err = acpi_hest_get_payload(gdata);
 
 			log_non_standard_event(sec_type, fru_id, fru_text,
diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c
index d5a5855906d6..48a8f69da42a 100644
--- a/drivers/firmware/efi/cper.c
+++ b/drivers/firmware/efi/cper.c
@@ -35,6 +35,7 @@
 #include <linux/printk.h>
 #include <linux/bcd.h>
 #include <acpi/ghes.h>
+#include <ras/ras_event.h>
 
 #define INDENT_SP	" "
 
diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c
index e87fd9e32ee2..39701a5c3f49 100644
--- a/drivers/ras/ras.c
+++ b/drivers/ras/ras.c
@@ -20,6 +20,11 @@ void log_non_standard_event(const uuid_le *sec_type, const uuid_le *fru_id,
 	trace_non_standard_event(sec_type, fru_id, fru_text, sev, err, len);
 }
 
+void log_arm_hw_error(struct cper_sec_proc_arm *err)
+{
+	trace_arm_event(err);
+}
+
 static int __init ras_init(void)
 {
 	int rc = 0;
@@ -36,6 +41,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(extlog_mem_event);
 #endif
 EXPORT_TRACEPOINT_SYMBOL_GPL(mc_event);
 EXPORT_TRACEPOINT_SYMBOL_GPL(non_standard_event);
+EXPORT_TRACEPOINT_SYMBOL_GPL(arm_event);
 
 int __init parse_ras_param(char *str)
 {
diff --git a/include/linux/ras.h b/include/linux/ras.h
index 62fac3042dce..7d61863ff265 100644
--- a/include/linux/ras.h
+++ b/include/linux/ras.h
@@ -3,6 +3,7 @@
 
 #include <asm/errno.h>
 #include <linux/uuid.h>
+#include <linux/cper.h>
 
 #ifdef CONFIG_DEBUG_FS
 int ras_userspace_consumers(void);
@@ -27,11 +28,13 @@ static inline int cec_add_elem(u64 pfn)		{ return -ENODEV; }
 void log_non_standard_event(const guid_t *sec_type,
 			    const guid_t *fru_id, const char *fru_text,
 			    const u8 sev, const u8 *err, const u32 len);
+void log_arm_hw_error(struct cper_sec_proc_arm *err);
 #else
 static void log_non_standard_event(const guid_t *sec_type,
 				   const guid_t *fru_id, const char *fru_text,
 				   const u8 sev, const u8 *err,
 				   const u32 len) { return; }
+static void log_arm_hw_error(struct cper_sec_proc_arm *err) { return; }
 #endif
 
 #endif /* __RAS_H__ */
diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
index 4f79ba94fa6b..429f46fb61e4 100644
--- a/include/ras/ras_event.h
+++ b/include/ras/ras_event.h
@@ -161,6 +161,51 @@ TRACE_EVENT(mc_event,
 		  __get_str(driver_detail))
 );
 
+/*
+ * ARM Processor Events Report
+ *
+ * This event is generated when hardware detects an ARM processor error
+ * has occurred. UEFI 2.6 spec section N.2.4.4.
+ */
+TRACE_EVENT(arm_event,
+
+	TP_PROTO(const struct cper_sec_proc_arm *proc),
+
+	TP_ARGS(proc),
+
+	TP_STRUCT__entry(
+		__field(u64, mpidr)
+		__field(u64, midr)
+		__field(u32, running_state)
+		__field(u32, psci_state)
+		__field(u8, affinity)
+	),
+
+	TP_fast_assign(
+		if (proc->validation_bits & CPER_ARM_VALID_AFFINITY_LEVEL)
+			__entry->affinity = proc->affinity_level;
+		else
+			__entry->affinity = ~0;
+		if (proc->validation_bits & CPER_ARM_VALID_MPIDR)
+			__entry->mpidr = proc->mpidr;
+		else
+			__entry->mpidr = 0ULL;
+		__entry->midr = proc->midr;
+		if (proc->validation_bits & CPER_ARM_VALID_RUNNING_STATE) {
+			__entry->running_state = proc->running_state;
+			__entry->psci_state = proc->psci_state;
+		} else {
+			__entry->running_state = ~0;
+			__entry->psci_state = ~0;
+		}
+	),
+
+	TP_printk("affinity level: %d; MPIDR: %016llx; MIDR: %016llx; "
+		  "running state: %d; PSCI state: %d",
+		  __entry->affinity, __entry->mpidr, __entry->midr,
+		  __entry->running_state, __entry->psci_state)
+);
+
 /*
  * Non-Standard Section Report
  *
-- 
cgit v1.2.3


From 1a0915be192606fee64830b9c5d70b7ed59426b6 Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <rafal@milecki.pl>
Date: Wed, 21 Jun 2017 08:26:46 +0200
Subject: mtd: partitions: add support for partition parsers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some devices have partitions that are kind of containers with extra
subpartitions / volumes instead of e.g. a simple filesystem data. To
support such cases we need to first create normal flash device
partitions and then take care of these special ones.

It's very common case for home routers. Depending on the vendor there
are formats like TRX, Seama, TP-Link, WRGG & more. All of them are used
to embed few partitions into a single one / single firmware file.

Ideally all vendors would use some well documented / standardized format
like UBI (and some probably start doing so), but there are still
countless devices on the market using these poor vendor specific
formats.

This patch extends MTD subsystem by allowing to specify list of parsers
that should be tried for a given partition. Supporting such poor formats
is highly unlikely to be the top priority so these changes try to
minimize maintenance cost to the minimum. It reuses existing code for
these new parsers and just adds a one property and one new function.

This implementation requires setting partition parsers in a flash
parser. A proper change of bcm47xxpart will follow and in the future we
will hopefully also find a solution for doing it with ofpart
("fixed-partitions").

Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/mtdpart.c          | 31 +++++++++++++++++++++++++++++++
 include/linux/mtd/partitions.h |  7 +++++++
 2 files changed, 38 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index c0d464d192ee..2ad9493703f9 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -369,6 +369,35 @@ static inline void free_partition(struct mtd_part *p)
 	kfree(p);
 }
 
+/**
+ * mtd_parse_part - parse MTD partition looking for subpartitions
+ *
+ * @slave: part that is supposed to be a container and should be parsed
+ * @types: NULL-terminated array with names of partition parsers to try
+ *
+ * Some partitions are kind of containers with extra subpartitions (volumes).
+ * There can be various formats of such containers. This function tries to use
+ * specified parsers to analyze given partition and registers found
+ * subpartitions on success.
+ */
+static int mtd_parse_part(struct mtd_part *slave, const char *const *types)
+{
+	struct mtd_partitions parsed;
+	int err;
+
+	err = parse_mtd_partitions(&slave->mtd, types, &parsed, NULL);
+	if (err)
+		return err;
+	else if (!parsed.nr_parts)
+		return -ENOENT;
+
+	err = add_mtd_partitions(&slave->mtd, parsed.parts, parsed.nr_parts);
+
+	mtd_part_parser_cleanup(&parsed);
+
+	return err;
+}
+
 static struct mtd_part *allocate_partition(struct mtd_info *parent,
 			const struct mtd_partition *part, int partno,
 			uint64_t cur_offset)
@@ -758,6 +787,8 @@ int add_mtd_partitions(struct mtd_info *master,
 
 		add_mtd_device(&slave->mtd);
 		mtd_add_partition_attrs(slave);
+		if (parts[i].types)
+			mtd_parse_part(slave, parts[i].types);
 
 		cur_offset = slave->offset + slave->mtd.size;
 	}
diff --git a/include/linux/mtd/partitions.h b/include/linux/mtd/partitions.h
index 06df1e06b6e0..c4beb70dacbd 100644
--- a/include/linux/mtd/partitions.h
+++ b/include/linux/mtd/partitions.h
@@ -20,6 +20,12 @@
  *
  * For each partition, these fields are available:
  * name: string that will be used to label the partition's MTD device.
+ * types: some partitions can be containers using specific format to describe
+ *	embedded subpartitions / volumes. E.g. many home routers use "firmware"
+ *	partition that contains at least kernel and rootfs. In such case an
+ *	extra parser is needed that will detect these dynamic partitions and
+ *	report them to the MTD subsystem. If set this property stores an array
+ *	of parser names to use when looking for subpartitions.
  * size: the partition size; if defined as MTDPART_SIZ_FULL, the partition
  * 	will extend to the end of the master MTD device.
  * offset: absolute starting position within the master MTD device; if
@@ -38,6 +44,7 @@
 
 struct mtd_partition {
 	const char *name;		/* identifier string */
+	const char *const *types;	/* names of parsers to use if any */
 	uint64_t size;			/* partition size */
 	uint64_t offset;		/* offset within the master MTD space */
 	uint32_t mask_flags;		/* master MTD flags to mask out for this partition */
-- 
cgit v1.2.3


From 61a04101c8a486dec586b2657bffede1b3b979f3 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 19 Jun 2017 13:08:52 +0300
Subject: NFC: st-nci: Get rid of platform data

Legacy platform data must go away. We are on the safe side here since
there are no users of it in the kernel.

If anyone by any odd reason needs it the GPIO lookup tables and
built-in device properties at your service.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 MAINTAINERS                          |  1 -
 drivers/nfc/st-nci/i2c.c             | 43 ++----------------------------------
 drivers/nfc/st-nci/spi.c             | 43 ++----------------------------------
 include/linux/platform_data/st-nci.h | 31 --------------------------
 4 files changed, 4 insertions(+), 114 deletions(-)
 delete mode 100644 include/linux/platform_data/st-nci.h

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index 1616d8f10767..f83b9b4d452e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9071,7 +9071,6 @@ F:	include/linux/platform_data/nfcmrvl.h
 F:	include/linux/platform_data/nxp-nci.h
 F:	include/linux/platform_data/pn544.h
 F:	include/linux/platform_data/st21nfca.h
-F:	include/linux/platform_data/st-nci.h
 F:	Documentation/devicetree/bindings/net/nfc/
 
 NFS, SUNRPC, AND LOCKD CLIENTS
diff --git a/drivers/nfc/st-nci/i2c.c b/drivers/nfc/st-nci/i2c.c
index 9dfae0efa922..6d3d8e43fa50 100644
--- a/drivers/nfc/st-nci/i2c.c
+++ b/drivers/nfc/st-nci/i2c.c
@@ -27,7 +27,6 @@
 #include <linux/interrupt.h>
 #include <linux/delay.h>
 #include <linux/nfc.h>
-#include <linux/platform_data/st-nci.h>
 
 #include "st-nci.h"
 
@@ -40,6 +39,7 @@
 #define ST_NCI_I2C_MIN_SIZE 4   /* PCB(1) + NCI Packet header(3) */
 #define ST_NCI_I2C_MAX_SIZE 250 /* req 4.2.1 */
 
+#define ST_NCI_DRIVER_NAME "st_nci"
 #define ST_NCI_I2C_DRIVER_NAME "st_nci_i2c"
 
 #define ST_NCI_GPIO_NAME_RESET "reset"
@@ -281,41 +281,10 @@ static int st_nci_i2c_of_request_resources(struct i2c_client *client)
 	return 0;
 }
 
-static int st_nci_i2c_request_resources(struct i2c_client *client)
-{
-	struct st_nci_nfc_platform_data *pdata;
-	struct st_nci_i2c_phy *phy = i2c_get_clientdata(client);
-	int r;
-
-	pdata = client->dev.platform_data;
-	if (pdata == NULL) {
-		nfc_err(&client->dev, "No platform data\n");
-		return -EINVAL;
-	}
-
-	/* store for later use */
-	phy->gpio_reset = pdata->gpio_reset;
-	phy->irq_polarity = pdata->irq_polarity;
-
-	r = devm_gpio_request_one(&client->dev,
-			phy->gpio_reset, GPIOF_OUT_INIT_HIGH,
-			ST_NCI_GPIO_NAME_RESET);
-	if (r) {
-		pr_err("%s : reset gpio_request failed\n", __FILE__);
-		return r;
-	}
-
-	phy->se_status.is_ese_present = pdata->is_ese_present;
-	phy->se_status.is_uicc_present = pdata->is_uicc_present;
-
-	return 0;
-}
-
 static int st_nci_i2c_probe(struct i2c_client *client,
 				  const struct i2c_device_id *id)
 {
 	struct st_nci_i2c_phy *phy;
-	struct st_nci_nfc_platform_data *pdata;
 	int r;
 
 	dev_dbg(&client->dev, "%s\n", __func__);
@@ -335,20 +304,12 @@ static int st_nci_i2c_probe(struct i2c_client *client,
 
 	i2c_set_clientdata(client, phy);
 
-	pdata = client->dev.platform_data;
-	if (!pdata && client->dev.of_node) {
+	if (client->dev.of_node) {
 		r = st_nci_i2c_of_request_resources(client);
 		if (r) {
 			nfc_err(&client->dev, "No platform data\n");
 			return r;
 		}
-	} else if (pdata) {
-		r = st_nci_i2c_request_resources(client);
-		if (r) {
-			nfc_err(&client->dev,
-				"Cannot get platform resources\n");
-			return r;
-		}
 	} else if (ACPI_HANDLE(&client->dev)) {
 		r = st_nci_i2c_acpi_request_resources(client);
 		if (r) {
diff --git a/drivers/nfc/st-nci/spi.c b/drivers/nfc/st-nci/spi.c
index 89e341eba3eb..ee8ea708d5b7 100644
--- a/drivers/nfc/st-nci/spi.c
+++ b/drivers/nfc/st-nci/spi.c
@@ -28,7 +28,6 @@
 #include <linux/delay.h>
 #include <linux/nfc.h>
 #include <net/nfc/nci.h>
-#include <linux/platform_data/st-nci.h>
 
 #include "st-nci.h"
 
@@ -41,6 +40,7 @@
 #define ST_NCI_SPI_MIN_SIZE 4   /* PCB(1) + NCI Packet header(3) */
 #define ST_NCI_SPI_MAX_SIZE 250 /* req 4.2.1 */
 
+#define ST_NCI_DRIVER_NAME "st_nci"
 #define ST_NCI_SPI_DRIVER_NAME "st_nci_spi"
 
 #define ST_NCI_GPIO_NAME_RESET "reset"
@@ -296,40 +296,9 @@ static int st_nci_spi_of_request_resources(struct spi_device *dev)
 	return 0;
 }
 
-static int st_nci_spi_request_resources(struct spi_device *dev)
-{
-	struct st_nci_nfc_platform_data *pdata;
-	struct st_nci_spi_phy *phy = spi_get_drvdata(dev);
-	int r;
-
-	pdata = dev->dev.platform_data;
-	if (pdata == NULL) {
-		nfc_err(&dev->dev, "No platform data\n");
-		return -EINVAL;
-	}
-
-	/* store for later use */
-	phy->gpio_reset = pdata->gpio_reset;
-	phy->irq_polarity = pdata->irq_polarity;
-
-	r = devm_gpio_request_one(&dev->dev,
-			phy->gpio_reset, GPIOF_OUT_INIT_HIGH,
-			ST_NCI_GPIO_NAME_RESET);
-	if (r) {
-		pr_err("%s : reset gpio_request failed\n", __FILE__);
-		return r;
-	}
-
-	phy->se_status.is_ese_present = pdata->is_ese_present;
-	phy->se_status.is_uicc_present = pdata->is_uicc_present;
-
-	return 0;
-}
-
 static int st_nci_spi_probe(struct spi_device *dev)
 {
 	struct st_nci_spi_phy *phy;
-	struct st_nci_nfc_platform_data *pdata;
 	int r;
 
 	dev_dbg(&dev->dev, "%s\n", __func__);
@@ -351,20 +320,12 @@ static int st_nci_spi_probe(struct spi_device *dev)
 
 	spi_set_drvdata(dev, phy);
 
-	pdata = dev->dev.platform_data;
-	if (!pdata && dev->dev.of_node) {
+	if (dev->dev.of_node) {
 		r = st_nci_spi_of_request_resources(dev);
 		if (r) {
 			nfc_err(&dev->dev, "No platform data\n");
 			return r;
 		}
-	} else if (pdata) {
-		r = st_nci_spi_request_resources(dev);
-		if (r) {
-			nfc_err(&dev->dev,
-				"Cannot get platform resources\n");
-			return r;
-		}
 	} else if (ACPI_HANDLE(&dev->dev)) {
 		r = st_nci_spi_acpi_request_resources(dev);
 		if (r) {
diff --git a/include/linux/platform_data/st-nci.h b/include/linux/platform_data/st-nci.h
deleted file mode 100644
index f6494b347c06..000000000000
--- a/include/linux/platform_data/st-nci.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Driver include for ST NCI NFC chip family.
- *
- * Copyright (C) 2014-2015  STMicroelectronics SAS. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _ST_NCI_H_
-#define _ST_NCI_H_
-
-#define ST_NCI_DRIVER_NAME "st_nci"
-
-struct st_nci_nfc_platform_data {
-	unsigned int gpio_reset;
-	unsigned int irq_polarity;
-	bool is_ese_present;
-	bool is_uicc_present;
-};
-
-#endif /* _ST_NCI_H_ */
-- 
cgit v1.2.3


From 313dd1b629219db50cad532dba6a3b3b22ffe622 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 5 May 2017 23:37:45 -0700
Subject: gcc-plugins: Add the randstruct plugin

This randstruct plugin is modified from Brad Spengler/PaX Team's code
in the last public patch of grsecurity/PaX based on my understanding
of the code. Changes or omissions from the original code are mine and
don't reflect the original grsecurity/PaX code.

The randstruct GCC plugin randomizes the layout of selected structures
at compile time, as a probabilistic defense against attacks that need to
know the layout of structures within the kernel. This is most useful for
"in-house" kernel builds where neither the randomization seed nor other
build artifacts are made available to an attacker. While less useful for
distribution kernels (where the randomization seed must be exposed for
third party kernel module builds), it still has some value there since now
all kernel builds would need to be tracked by an attacker.

In more performance sensitive scenarios, GCC_PLUGIN_RANDSTRUCT_PERFORMANCE
can be selected to make a best effort to restrict randomization to
cacheline-sized groups of elements, and will not randomize bitfields. This
comes at the cost of reduced randomization.

Two annotations are defined,__randomize_layout and __no_randomize_layout,
which respectively tell the plugin to either randomize or not to
randomize instances of the struct in question. Follow-on patches enable
the auto-detection logic for selecting structures for randomization
that contain only function pointers. It is disabled here to assist with
bisection.

Since any randomized structs must be initialized using designated
initializers, __randomize_layout includes the __designated_init annotation
even when the plugin is disabled so that all builds will require
the needed initialization. (With the plugin enabled, annotations for
automatically chosen structures are marked as well.)

The main differences between this implemenation and grsecurity are:
- disable automatic struct selection (to be enabled in follow-up patch)
- add designated_init attribute at runtime and for manual marking
- clarify debugging output to differentiate bad cast warnings
- add whitelisting infrastructure
- support gcc 7's DECL_ALIGN and DECL_MODE changes (Laura Abbott)
- raise minimum required GCC version to 4.7

Earlier versions of this patch series were ported by Michael Leibowitz.

Signed-off-by: Kees Cook <keescook@chromium.org>
---
 Documentation/dontdiff                        |    2 +
 arch/Kconfig                                  |   39 +
 include/linux/compiler-gcc.h                  |    5 +
 include/linux/compiler.h                      |    8 +
 include/linux/vermagic.h                      |    9 +-
 scripts/Makefile.gcc-plugins                  |    4 +
 scripts/gcc-plugins/.gitignore                |    1 +
 scripts/gcc-plugins/Makefile                  |    8 +
 scripts/gcc-plugins/gcc-common.h              |    5 +
 scripts/gcc-plugins/gen-random-seed.sh        |    8 +
 scripts/gcc-plugins/randomize_layout_plugin.c | 1020 +++++++++++++++++++++++++
 11 files changed, 1108 insertions(+), 1 deletion(-)
 create mode 100644 scripts/gcc-plugins/.gitignore
 create mode 100644 scripts/gcc-plugins/gen-random-seed.sh
 create mode 100644 scripts/gcc-plugins/randomize_layout_plugin.c

(limited to 'include/linux')

diff --git a/Documentation/dontdiff b/Documentation/dontdiff
index 77b92221f951..e10a484629e4 100644
--- a/Documentation/dontdiff
+++ b/Documentation/dontdiff
@@ -207,6 +207,8 @@ r200_reg_safe.h
 r300_reg_safe.h
 r420_reg_safe.h
 r600_reg_safe.h
+randomize_layout_hash.h
+randomize_layout_seed.h
 recordmcount
 relocs
 rlim_names.h
diff --git a/arch/Kconfig b/arch/Kconfig
index 6c00e5b00f8b..810bf206f221 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -443,6 +443,45 @@ config GCC_PLUGIN_STRUCTLEAK_VERBOSE
 	  initialized. Since not all existing initializers are detected
 	  by the plugin, this can produce false positive warnings.
 
+config GCC_PLUGIN_RANDSTRUCT
+	bool "Randomize layout of sensitive kernel structures"
+	depends on GCC_PLUGINS
+	select MODVERSIONS if MODULES
+	help
+	  If you say Y here, the layouts of structures explicitly
+	  marked by __randomize_layout will be randomized at
+	  compile-time.  This can introduce the requirement of an
+	  additional information exposure vulnerability for exploits
+	  targeting these structure types.
+
+	  Enabling this feature will introduce some performance impact,
+	  slightly increase memory usage, and prevent the use of forensic
+	  tools like Volatility against the system (unless the kernel
+	  source tree isn't cleaned after kernel installation).
+
+	  The seed used for compilation is located at
+	  scripts/gcc-plgins/randomize_layout_seed.h.  It remains after
+	  a make clean to allow for external modules to be compiled with
+	  the existing seed and will be removed by a make mrproper or
+	  make distclean.
+
+	  Note that the implementation requires gcc 4.7 or newer.
+
+	  This plugin was ported from grsecurity/PaX. More information at:
+	   * https://grsecurity.net/
+	   * https://pax.grsecurity.net/
+
+config GCC_PLUGIN_RANDSTRUCT_PERFORMANCE
+	bool "Use cacheline-aware structure randomization"
+	depends on GCC_PLUGIN_RANDSTRUCT
+	depends on !COMPILE_TEST
+	help
+	  If you say Y here, the RANDSTRUCT randomization will make a
+	  best effort at restricting randomization to cacheline-sized
+	  groups of elements.  It will further not randomize bitfields
+	  in structures.  This reduces the performance hit of RANDSTRUCT
+	  at the cost of weakened randomization.
+
 config HAVE_CC_STACKPROTECTOR
 	bool
 	help
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 386caf6771ed..7deaae3dc87d 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -223,6 +223,11 @@
 /* Mark a function definition as prohibited from being cloned. */
 #define __noclone	__attribute__((__noclone__, __optimize__("no-tracer")))
 
+#ifdef RANDSTRUCT_PLUGIN
+#define __randomize_layout __attribute__((randomize_layout))
+#define __no_randomize_layout __attribute__((no_randomize_layout))
+#endif
+
 #endif /* GCC_VERSION >= 40500 */
 
 #if GCC_VERSION >= 40600
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 80a1dea36cbe..55ee9ee814f8 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -448,6 +448,14 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
 # define __latent_entropy
 #endif
 
+#ifndef __randomize_layout
+# define __randomize_layout __designated_init
+#endif
+
+#ifndef __no_randomize_layout
+# define __no_randomize_layout
+#endif
+
 /*
  * Tell gcc if a function is cold. The compiler will assume any path
  * directly leading to the call is unlikely.
diff --git a/include/linux/vermagic.h b/include/linux/vermagic.h
index 6f8fbcf10dfb..af6c03f7f986 100644
--- a/include/linux/vermagic.h
+++ b/include/linux/vermagic.h
@@ -24,10 +24,17 @@
 #ifndef MODULE_ARCH_VERMAGIC
 #define MODULE_ARCH_VERMAGIC ""
 #endif
+#ifdef RANDSTRUCT_PLUGIN
+#include <generated/randomize_layout_hash.h>
+#define MODULE_RANDSTRUCT_PLUGIN "RANDSTRUCT_PLUGIN_" RANDSTRUCT_HASHED_SEED
+#else
+#define MODULE_RANDSTRUCT_PLUGIN
+#endif
 
 #define VERMAGIC_STRING 						\
 	UTS_RELEASE " "							\
 	MODULE_VERMAGIC_SMP MODULE_VERMAGIC_PREEMPT 			\
 	MODULE_VERMAGIC_MODULE_UNLOAD MODULE_VERMAGIC_MODVERSIONS	\
-	MODULE_ARCH_VERMAGIC
+	MODULE_ARCH_VERMAGIC						\
+	MODULE_RANDSTRUCT_PLUGIN
 
diff --git a/scripts/Makefile.gcc-plugins b/scripts/Makefile.gcc-plugins
index 82335533620e..2e0e2eaa397f 100644
--- a/scripts/Makefile.gcc-plugins
+++ b/scripts/Makefile.gcc-plugins
@@ -29,6 +29,10 @@ ifdef CONFIG_GCC_PLUGINS
   gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_STRUCTLEAK_VERBOSE)	+= -fplugin-arg-structleak_plugin-verbose
   gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_STRUCTLEAK)	+= -DSTRUCTLEAK_PLUGIN
 
+  gcc-plugin-$(CONFIG_GCC_PLUGIN_RANDSTRUCT)	+= randomize_layout_plugin.so
+  gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_RANDSTRUCT)	+= -DRANDSTRUCT_PLUGIN
+  gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_RANDSTRUCT_PERFORMANCE)	+= -fplugin-arg-randomize_layout_plugin-performance-mode
+
   GCC_PLUGINS_CFLAGS := $(strip $(addprefix -fplugin=$(objtree)/scripts/gcc-plugins/, $(gcc-plugin-y)) $(gcc-plugin-cflags-y))
 
   export PLUGINCC GCC_PLUGINS_CFLAGS GCC_PLUGIN GCC_PLUGIN_SUBDIR
diff --git a/scripts/gcc-plugins/.gitignore b/scripts/gcc-plugins/.gitignore
new file mode 100644
index 000000000000..de92ed9e3d83
--- /dev/null
+++ b/scripts/gcc-plugins/.gitignore
@@ -0,0 +1 @@
+randomize_layout_seed.h
diff --git a/scripts/gcc-plugins/Makefile b/scripts/gcc-plugins/Makefile
index 8b29dc17c73c..214eb2335c31 100644
--- a/scripts/gcc-plugins/Makefile
+++ b/scripts/gcc-plugins/Makefile
@@ -18,6 +18,14 @@ endif
 
 export HOSTLIBS
 
+$(obj)/randomize_layout_plugin.o: $(objtree)/$(obj)/randomize_layout_seed.h
+quiet_cmd_create_randomize_layout_seed = GENSEED $@
+cmd_create_randomize_layout_seed = \
+  $(CONFIG_SHELL) $(srctree)/$(src)/gen-random-seed.sh $@ $(objtree)/include/generated/randomize_layout_hash.h
+$(objtree)/$(obj)/randomize_layout_seed.h: FORCE
+	$(call if_changed,create_randomize_layout_seed)
+targets = randomize_layout_seed.h randomize_layout_hash.h
+
 $(HOSTLIBS)-y := $(foreach p,$(GCC_PLUGIN),$(if $(findstring /,$(p)),,$(p)))
 always := $($(HOSTLIBS)-y)
 
diff --git a/scripts/gcc-plugins/gcc-common.h b/scripts/gcc-plugins/gcc-common.h
index 918953ca4527..6948898b3cdf 100644
--- a/scripts/gcc-plugins/gcc-common.h
+++ b/scripts/gcc-plugins/gcc-common.h
@@ -953,4 +953,9 @@ static inline void debug_gimple_stmt(const_gimple s)
 	get_inner_reference(exp, pbitsize, pbitpos, poffset, pmode, punsignedp, preversep, pvolatilep)
 #endif
 
+#if BUILDING_GCC_VERSION < 7000
+#define SET_DECL_ALIGN(decl, align)	DECL_ALIGN(decl) = (align)
+#define SET_DECL_MODE(decl, mode)	DECL_MODE(decl) = (mode)
+#endif
+
 #endif
diff --git a/scripts/gcc-plugins/gen-random-seed.sh b/scripts/gcc-plugins/gen-random-seed.sh
new file mode 100644
index 000000000000..7514850f4815
--- /dev/null
+++ b/scripts/gcc-plugins/gen-random-seed.sh
@@ -0,0 +1,8 @@
+#!/bin/sh
+
+if [ ! -f "$1" ]; then
+	SEED=`od -A n -t x8 -N 32 /dev/urandom | tr -d ' \n'`
+	echo "const char *randstruct_seed = \"$SEED\";" > "$1"
+	HASH=`echo -n "$SEED" | sha256sum | cut -d" " -f1 | tr -d ' \n'`
+	echo "#define RANDSTRUCT_HASHED_SEED \"$HASH\"" > "$2"
+fi
diff --git a/scripts/gcc-plugins/randomize_layout_plugin.c b/scripts/gcc-plugins/randomize_layout_plugin.c
new file mode 100644
index 000000000000..e1d1ba28739f
--- /dev/null
+++ b/scripts/gcc-plugins/randomize_layout_plugin.c
@@ -0,0 +1,1020 @@
+/*
+ * Copyright 2014-2016 by Open Source Security, Inc., Brad Spengler <spender@grsecurity.net>
+ *                   and PaX Team <pageexec@freemail.hu>
+ * Licensed under the GPL v2
+ *
+ * Note: the choice of the license means that the compilation process is
+ *       NOT 'eligible' as defined by gcc's library exception to the GPL v3,
+ *       but for the kernel it doesn't matter since it doesn't link against
+ *       any of the gcc libraries
+ *
+ * Usage:
+ * $ # for 4.5/4.6/C based 4.7
+ * $ gcc -I`gcc -print-file-name=plugin`/include -I`gcc -print-file-name=plugin`/include/c-family -fPIC -shared -O2 -o randomize_layout_plugin.so randomize_layout_plugin.c
+ * $ # for C++ based 4.7/4.8+
+ * $ g++ -I`g++ -print-file-name=plugin`/include -I`g++ -print-file-name=plugin`/include/c-family -fPIC -shared -O2 -o randomize_layout_plugin.so randomize_layout_plugin.c
+ * $ gcc -fplugin=./randomize_layout_plugin.so test.c -O2
+ */
+
+#include "gcc-common.h"
+#include "randomize_layout_seed.h"
+
+#if BUILDING_GCC_MAJOR < 4 || (BUILDING_GCC_MAJOR == 4 && BUILDING_GCC_MINOR < 7)
+#error "The RANDSTRUCT plugin requires GCC 4.7 or newer."
+#endif
+
+#define ORIG_TYPE_NAME(node) \
+	(TYPE_NAME(TYPE_MAIN_VARIANT(node)) != NULL_TREE ? ((const unsigned char *)IDENTIFIER_POINTER(TYPE_NAME(TYPE_MAIN_VARIANT(node)))) : (const unsigned char *)"anonymous")
+
+#define INFORM(loc, msg, ...)	inform(loc, "randstruct: " msg, ##__VA_ARGS__)
+#define MISMATCH(loc, how, ...)	INFORM(loc, "casting between randomized structure pointer types (" how "): %qT and %qT\n", __VA_ARGS__)
+
+__visible int plugin_is_GPL_compatible;
+
+static int performance_mode;
+
+static struct plugin_info randomize_layout_plugin_info = {
+	.version	= "201402201816vanilla",
+	.help		= "disable\t\t\tdo not activate plugin\n"
+			  "performance-mode\tenable cacheline-aware layout randomization\n"
+};
+
+struct whitelist_entry {
+	const char *pathname;
+	const char *lhs;
+	const char *rhs;
+};
+
+static const struct whitelist_entry whitelist[] = {
+	{ }
+};
+
+/* from old Linux dcache.h */
+static inline unsigned long
+partial_name_hash(unsigned long c, unsigned long prevhash)
+{
+	return (prevhash + (c << 4) + (c >> 4)) * 11;
+}
+static inline unsigned int
+name_hash(const unsigned char *name)
+{
+	unsigned long hash = 0;
+	unsigned int len = strlen((const char *)name);
+	while (len--)
+		hash = partial_name_hash(*name++, hash);
+	return (unsigned int)hash;
+}
+
+static tree handle_randomize_layout_attr(tree *node, tree name, tree args, int flags, bool *no_add_attrs)
+{
+	tree type;
+
+	*no_add_attrs = true;
+	if (TREE_CODE(*node) == FUNCTION_DECL) {
+		error("%qE attribute does not apply to functions (%qF)", name, *node);
+		return NULL_TREE;
+	}
+
+	if (TREE_CODE(*node) == PARM_DECL) {
+		error("%qE attribute does not apply to function parameters (%qD)", name, *node);
+		return NULL_TREE;
+	}
+
+	if (TREE_CODE(*node) == VAR_DECL) {
+		error("%qE attribute does not apply to variables (%qD)", name, *node);
+		return NULL_TREE;
+	}
+
+	if (TYPE_P(*node)) {
+		type = *node;
+	} else {
+		gcc_assert(TREE_CODE(*node) == TYPE_DECL);
+		type = TREE_TYPE(*node);
+	}
+
+	if (TREE_CODE(type) != RECORD_TYPE) {
+		error("%qE attribute used on %qT applies to struct types only", name, type);
+		return NULL_TREE;
+	}
+
+	if (lookup_attribute(IDENTIFIER_POINTER(name), TYPE_ATTRIBUTES(type))) {
+		error("%qE attribute is already applied to the type %qT", name, type);
+		return NULL_TREE;
+	}
+
+	*no_add_attrs = false;
+
+	return NULL_TREE;
+}
+
+/* set on complete types that we don't need to inspect further at all */
+static tree handle_randomize_considered_attr(tree *node, tree name, tree args, int flags, bool *no_add_attrs)
+{
+	*no_add_attrs = false;
+	return NULL_TREE;
+}
+
+/*
+ * set on types that we've performed a shuffle on, to prevent re-shuffling
+ * this does not preclude us from inspecting its fields for potential shuffles
+ */
+static tree handle_randomize_performed_attr(tree *node, tree name, tree args, int flags, bool *no_add_attrs)
+{
+	*no_add_attrs = false;
+	return NULL_TREE;
+}
+
+/*
+ * 64bit variant of Bob Jenkins' public domain PRNG
+ * 256 bits of internal state
+ */
+
+typedef unsigned long long u64;
+
+typedef struct ranctx { u64 a; u64 b; u64 c; u64 d; } ranctx;
+
+#define rot(x,k) (((x)<<(k))|((x)>>(64-(k))))
+static u64 ranval(ranctx *x) {
+	u64 e = x->a - rot(x->b, 7);
+	x->a = x->b ^ rot(x->c, 13);
+	x->b = x->c + rot(x->d, 37);
+	x->c = x->d + e;
+	x->d = e + x->a;
+	return x->d;
+}
+
+static void raninit(ranctx *x, u64 *seed) {
+	int i;
+
+	x->a = seed[0];
+	x->b = seed[1];
+	x->c = seed[2];
+	x->d = seed[3];
+
+	for (i=0; i < 30; ++i)
+		(void)ranval(x);
+}
+
+static u64 shuffle_seed[4];
+
+struct partition_group {
+	tree tree_start;
+	unsigned long start;
+	unsigned long length;
+};
+
+static void partition_struct(tree *fields, unsigned long length, struct partition_group *size_groups, unsigned long *num_groups)
+{
+	unsigned long i;
+	unsigned long accum_size = 0;
+	unsigned long accum_length = 0;
+	unsigned long group_idx = 0;
+
+	gcc_assert(length < INT_MAX);
+
+	memset(size_groups, 0, sizeof(struct partition_group) * length);
+
+	for (i = 0; i < length; i++) {
+		if (size_groups[group_idx].tree_start == NULL_TREE) {
+			size_groups[group_idx].tree_start = fields[i];
+			size_groups[group_idx].start = i;
+			accum_length = 0;
+			accum_size = 0;
+		}
+		accum_size += (unsigned long)int_size_in_bytes(TREE_TYPE(fields[i]));
+		accum_length++;
+		if (accum_size >= 64) {
+			size_groups[group_idx].length = accum_length;
+			accum_length = 0;
+			group_idx++;
+		}
+	}
+
+	if (size_groups[group_idx].tree_start != NULL_TREE &&
+	    !size_groups[group_idx].length) {
+		size_groups[group_idx].length = accum_length;
+		group_idx++;
+	}
+
+	*num_groups = group_idx;
+}
+
+static void performance_shuffle(tree *newtree, unsigned long length, ranctx *prng_state)
+{
+	unsigned long i, x;
+	struct partition_group size_group[length];
+	unsigned long num_groups = 0;
+	unsigned long randnum;
+
+	partition_struct(newtree, length, (struct partition_group *)&size_group, &num_groups);
+	for (i = num_groups - 1; i > 0; i--) {
+		struct partition_group tmp;
+		randnum = ranval(prng_state) % (i + 1);
+		tmp = size_group[i];
+		size_group[i] = size_group[randnum];
+		size_group[randnum] = tmp;
+	}
+
+	for (x = 0; x < num_groups; x++) {
+		for (i = size_group[x].start + size_group[x].length - 1; i > size_group[x].start; i--) {
+			tree tmp;
+			if (DECL_BIT_FIELD_TYPE(newtree[i]))
+				continue;
+			randnum = ranval(prng_state) % (i + 1);
+			// we could handle this case differently if desired
+			if (DECL_BIT_FIELD_TYPE(newtree[randnum]))
+				continue;
+			tmp = newtree[i];
+			newtree[i] = newtree[randnum];
+			newtree[randnum] = tmp;
+		}
+	}
+}
+
+static void full_shuffle(tree *newtree, unsigned long length, ranctx *prng_state)
+{
+	unsigned long i, randnum;
+
+	for (i = length - 1; i > 0; i--) {
+		tree tmp;
+		randnum = ranval(prng_state) % (i + 1);
+		tmp = newtree[i];
+		newtree[i] = newtree[randnum];
+		newtree[randnum] = tmp;
+	}
+}
+
+/* modern in-place Fisher-Yates shuffle */
+static void shuffle(const_tree type, tree *newtree, unsigned long length)
+{
+	unsigned long i;
+	u64 seed[4];
+	ranctx prng_state;
+	const unsigned char *structname;
+
+	if (length == 0)
+		return;
+
+	gcc_assert(TREE_CODE(type) == RECORD_TYPE);
+
+	structname = ORIG_TYPE_NAME(type);
+
+#ifdef __DEBUG_PLUGIN
+	fprintf(stderr, "Shuffling struct %s %p\n", (const char *)structname, type);
+#ifdef __DEBUG_VERBOSE
+	debug_tree((tree)type);
+#endif
+#endif
+
+	for (i = 0; i < 4; i++) {
+		seed[i] = shuffle_seed[i];
+		seed[i] ^= name_hash(structname);
+	}
+
+	raninit(&prng_state, (u64 *)&seed);
+
+	if (performance_mode)
+		performance_shuffle(newtree, length, &prng_state);
+	else
+		full_shuffle(newtree, length, &prng_state);
+}
+
+static bool is_flexible_array(const_tree field)
+{
+	const_tree fieldtype;
+	const_tree typesize;
+	const_tree elemtype;
+	const_tree elemsize;
+
+	fieldtype = TREE_TYPE(field);
+	typesize = TYPE_SIZE(fieldtype);
+
+	if (TREE_CODE(fieldtype) != ARRAY_TYPE)
+		return false;
+
+	elemtype = TREE_TYPE(fieldtype);
+	elemsize = TYPE_SIZE(elemtype);
+
+	/* size of type is represented in bits */
+
+	if (typesize == NULL_TREE && TYPE_DOMAIN(fieldtype) != NULL_TREE &&
+	    TYPE_MAX_VALUE(TYPE_DOMAIN(fieldtype)) == NULL_TREE)
+		return true;
+
+	if (typesize != NULL_TREE &&
+	    (TREE_CONSTANT(typesize) && (!tree_to_uhwi(typesize) ||
+	     tree_to_uhwi(typesize) == tree_to_uhwi(elemsize))))
+		return true;
+
+	return false;
+}
+
+static int relayout_struct(tree type)
+{
+	unsigned long num_fields = (unsigned long)list_length(TYPE_FIELDS(type));
+	unsigned long shuffle_length = num_fields;
+	tree field;
+	tree newtree[num_fields];
+	unsigned long i;
+	tree list;
+	tree variant;
+	tree main_variant;
+	expanded_location xloc;
+	bool has_flexarray = false;
+
+	if (TYPE_FIELDS(type) == NULL_TREE)
+		return 0;
+
+	if (num_fields < 2)
+		return 0;
+
+	gcc_assert(TREE_CODE(type) == RECORD_TYPE);
+
+	gcc_assert(num_fields < INT_MAX);
+
+	if (lookup_attribute("randomize_performed", TYPE_ATTRIBUTES(type)) ||
+	    lookup_attribute("no_randomize_layout", TYPE_ATTRIBUTES(TYPE_MAIN_VARIANT(type))))
+		return 0;
+
+	/* Workaround for 3rd-party VirtualBox source that we can't modify ourselves */
+	if (!strcmp((const char *)ORIG_TYPE_NAME(type), "INTNETTRUNKFACTORY") ||
+	    !strcmp((const char *)ORIG_TYPE_NAME(type), "RAWPCIFACTORY"))
+		return 0;
+
+	/* throw out any structs in uapi */
+	xloc = expand_location(DECL_SOURCE_LOCATION(TYPE_FIELDS(type)));
+
+	if (strstr(xloc.file, "/uapi/"))
+		error(G_("attempted to randomize userland API struct %s"), ORIG_TYPE_NAME(type));
+
+	for (field = TYPE_FIELDS(type), i = 0; field; field = TREE_CHAIN(field), i++) {
+		gcc_assert(TREE_CODE(field) == FIELD_DECL);
+		newtree[i] = field;
+	}
+
+	/*
+	 * enforce that we don't randomize the layout of the last
+	 * element of a struct if it's a 0 or 1-length array
+	 * or a proper flexible array
+	 */
+	if (is_flexible_array(newtree[num_fields - 1])) {
+		has_flexarray = true;
+		shuffle_length--;
+	}
+
+	shuffle(type, (tree *)newtree, shuffle_length);
+
+	/*
+	 * set up a bogus anonymous struct field designed to error out on unnamed struct initializers
+	 * as gcc provides no other way to detect such code
+	 */
+	list = make_node(FIELD_DECL);
+	TREE_CHAIN(list) = newtree[0];
+	TREE_TYPE(list) = void_type_node;
+	DECL_SIZE(list) = bitsize_zero_node;
+	DECL_NONADDRESSABLE_P(list) = 1;
+	DECL_FIELD_BIT_OFFSET(list) = bitsize_zero_node;
+	DECL_SIZE_UNIT(list) = size_zero_node;
+	DECL_FIELD_OFFSET(list) = size_zero_node;
+	DECL_CONTEXT(list) = type;
+	// to satisfy the constify plugin
+	TREE_READONLY(list) = 1;
+
+	for (i = 0; i < num_fields - 1; i++)
+		TREE_CHAIN(newtree[i]) = newtree[i+1];
+	TREE_CHAIN(newtree[num_fields - 1]) = NULL_TREE;
+
+	main_variant = TYPE_MAIN_VARIANT(type);
+	for (variant = main_variant; variant; variant = TYPE_NEXT_VARIANT(variant)) {
+		TYPE_FIELDS(variant) = list;
+		TYPE_ATTRIBUTES(variant) = copy_list(TYPE_ATTRIBUTES(variant));
+		TYPE_ATTRIBUTES(variant) = tree_cons(get_identifier("randomize_performed"), NULL_TREE, TYPE_ATTRIBUTES(variant));
+		TYPE_ATTRIBUTES(variant) = tree_cons(get_identifier("designated_init"), NULL_TREE, TYPE_ATTRIBUTES(variant));
+		if (has_flexarray)
+			TYPE_ATTRIBUTES(type) = tree_cons(get_identifier("has_flexarray"), NULL_TREE, TYPE_ATTRIBUTES(type));
+	}
+
+	/*
+	 * force a re-layout of the main variant
+	 * the TYPE_SIZE for all variants will be recomputed
+	 * by finalize_type_size()
+	 */
+	TYPE_SIZE(main_variant) = NULL_TREE;
+	layout_type(main_variant);
+	gcc_assert(TYPE_SIZE(main_variant) != NULL_TREE);
+
+	return 1;
+}
+
+/* from constify plugin */
+static const_tree get_field_type(const_tree field)
+{
+	return strip_array_types(TREE_TYPE(field));
+}
+
+/* from constify plugin */
+static bool is_fptr(const_tree fieldtype)
+{
+	if (TREE_CODE(fieldtype) != POINTER_TYPE)
+		return false;
+
+	return TREE_CODE(TREE_TYPE(fieldtype)) == FUNCTION_TYPE;
+}
+
+/* derived from constify plugin */
+static int is_pure_ops_struct(const_tree node)
+{
+	const_tree field;
+
+	gcc_assert(TREE_CODE(node) == RECORD_TYPE || TREE_CODE(node) == UNION_TYPE);
+
+	/* XXX: Do not apply randomization to all-ftpr structs yet. */
+	return 0;
+
+	for (field = TYPE_FIELDS(node); field; field = TREE_CHAIN(field)) {
+		const_tree fieldtype = get_field_type(field);
+		enum tree_code code = TREE_CODE(fieldtype);
+
+		if (node == fieldtype)
+			continue;
+
+		if (!is_fptr(fieldtype))
+			return 0;
+
+		if (code != RECORD_TYPE && code != UNION_TYPE)
+			continue;
+
+		if (!is_pure_ops_struct(fieldtype))
+			return 0;
+	}
+
+	return 1;
+}
+
+static void randomize_type(tree type)
+{
+	tree variant;
+
+	gcc_assert(TREE_CODE(type) == RECORD_TYPE);
+
+	if (lookup_attribute("randomize_considered", TYPE_ATTRIBUTES(type)))
+		return;
+
+	if (lookup_attribute("randomize_layout", TYPE_ATTRIBUTES(TYPE_MAIN_VARIANT(type))) || is_pure_ops_struct(type))
+		relayout_struct(type);
+
+	for (variant = TYPE_MAIN_VARIANT(type); variant; variant = TYPE_NEXT_VARIANT(variant)) {
+		TYPE_ATTRIBUTES(type) = copy_list(TYPE_ATTRIBUTES(type));
+		TYPE_ATTRIBUTES(type) = tree_cons(get_identifier("randomize_considered"), NULL_TREE, TYPE_ATTRIBUTES(type));
+	}
+#ifdef __DEBUG_PLUGIN
+	fprintf(stderr, "Marking randomize_considered on struct %s\n", ORIG_TYPE_NAME(type));
+#ifdef __DEBUG_VERBOSE
+	debug_tree(type);
+#endif
+#endif
+}
+
+static void update_decl_size(tree decl)
+{
+	tree lastval, lastidx, field, init, type, flexsize;
+	unsigned HOST_WIDE_INT len;
+
+	type = TREE_TYPE(decl);
+
+	if (!lookup_attribute("has_flexarray", TYPE_ATTRIBUTES(type)))
+		return;
+
+	init = DECL_INITIAL(decl);
+	if (init == NULL_TREE || init == error_mark_node)
+		return;
+
+	if (TREE_CODE(init) != CONSTRUCTOR)
+		return;
+
+	len = CONSTRUCTOR_NELTS(init);
+        if (!len)
+		return;
+
+	lastval = CONSTRUCTOR_ELT(init, CONSTRUCTOR_NELTS(init) - 1)->value;
+	lastidx = CONSTRUCTOR_ELT(init, CONSTRUCTOR_NELTS(init) - 1)->index;
+
+	for (field = TYPE_FIELDS(TREE_TYPE(decl)); TREE_CHAIN(field); field = TREE_CHAIN(field))
+		;
+
+	if (lastidx != field)
+		return;
+
+	if (TREE_CODE(lastval) != STRING_CST) {
+		error("Only string constants are supported as initializers "
+		      "for randomized structures with flexible arrays");
+		return;
+	}
+
+	flexsize = bitsize_int(TREE_STRING_LENGTH(lastval) *
+		tree_to_uhwi(TYPE_SIZE(TREE_TYPE(TREE_TYPE(lastval)))));
+
+	DECL_SIZE(decl) = size_binop(PLUS_EXPR, TYPE_SIZE(type), flexsize);
+
+	return;
+}
+
+
+static void randomize_layout_finish_decl(void *event_data, void *data)
+{
+	tree decl = (tree)event_data;
+	tree type;
+
+	if (decl == NULL_TREE || decl == error_mark_node)
+		return;
+
+	type = TREE_TYPE(decl);
+
+	if (TREE_CODE(decl) != VAR_DECL)
+		return;
+
+	if (TREE_CODE(type) != RECORD_TYPE && TREE_CODE(type) != UNION_TYPE)
+		return;
+
+	if (!lookup_attribute("randomize_performed", TYPE_ATTRIBUTES(type)))
+		return;
+
+	DECL_SIZE(decl) = 0;
+	DECL_SIZE_UNIT(decl) = 0;
+	SET_DECL_ALIGN(decl, 0);
+	SET_DECL_MODE (decl, VOIDmode);
+	SET_DECL_RTL(decl, 0);
+	update_decl_size(decl);
+	layout_decl(decl, 0);
+}
+
+static void finish_type(void *event_data, void *data)
+{
+	tree type = (tree)event_data;
+
+	if (type == NULL_TREE || type == error_mark_node)
+		return;
+
+	if (TREE_CODE(type) != RECORD_TYPE)
+		return;
+
+	if (TYPE_FIELDS(type) == NULL_TREE)
+		return;
+
+	if (lookup_attribute("randomize_considered", TYPE_ATTRIBUTES(type)))
+		return;
+
+#ifdef __DEBUG_PLUGIN
+	fprintf(stderr, "Calling randomize_type on %s\n", ORIG_TYPE_NAME(type));
+#endif
+#ifdef __DEBUG_VERBOSE
+	debug_tree(type);
+#endif
+	randomize_type(type);
+
+	return;
+}
+
+static struct attribute_spec randomize_layout_attr = {
+	.name		= "randomize_layout",
+	// related to args
+	.min_length	= 0,
+	.max_length	= 0,
+	.decl_required	= false,
+	// need type declaration
+	.type_required	= true,
+	.function_type_required = false,
+	.handler		= handle_randomize_layout_attr,
+#if BUILDING_GCC_VERSION >= 4007
+	.affects_type_identity  = true
+#endif
+};
+
+static struct attribute_spec no_randomize_layout_attr = {
+	.name		= "no_randomize_layout",
+	// related to args
+	.min_length	= 0,
+	.max_length	= 0,
+	.decl_required	= false,
+	// need type declaration
+	.type_required	= true,
+	.function_type_required = false,
+	.handler		= handle_randomize_layout_attr,
+#if BUILDING_GCC_VERSION >= 4007
+	.affects_type_identity  = true
+#endif
+};
+
+static struct attribute_spec randomize_considered_attr = {
+	.name		= "randomize_considered",
+	// related to args
+	.min_length	= 0,
+	.max_length	= 0,
+	.decl_required	= false,
+	// need type declaration
+	.type_required	= true,
+	.function_type_required = false,
+	.handler		= handle_randomize_considered_attr,
+#if BUILDING_GCC_VERSION >= 4007
+	.affects_type_identity  = false
+#endif
+};
+
+static struct attribute_spec randomize_performed_attr = {
+	.name		= "randomize_performed",
+	// related to args
+	.min_length	= 0,
+	.max_length	= 0,
+	.decl_required	= false,
+	// need type declaration
+	.type_required	= true,
+	.function_type_required = false,
+	.handler		= handle_randomize_performed_attr,
+#if BUILDING_GCC_VERSION >= 4007
+	.affects_type_identity  = false
+#endif
+};
+
+static void register_attributes(void *event_data, void *data)
+{
+	register_attribute(&randomize_layout_attr);
+	register_attribute(&no_randomize_layout_attr);
+	register_attribute(&randomize_considered_attr);
+	register_attribute(&randomize_performed_attr);
+}
+
+static void check_bad_casts_in_constructor(tree var, tree init)
+{
+	unsigned HOST_WIDE_INT idx;
+	tree field, val;
+	tree field_type, val_type;
+
+	FOR_EACH_CONSTRUCTOR_ELT(CONSTRUCTOR_ELTS(init), idx, field, val) {
+		if (TREE_CODE(val) == CONSTRUCTOR) {
+			check_bad_casts_in_constructor(var, val);
+			continue;
+		}
+
+		/* pipacs' plugin creates franken-arrays that differ from those produced by
+		   normal code which all have valid 'field' trees. work around this */
+		if (field == NULL_TREE)
+			continue;
+		field_type = TREE_TYPE(field);
+		val_type = TREE_TYPE(val);
+
+		if (TREE_CODE(field_type) != POINTER_TYPE || TREE_CODE(val_type) != POINTER_TYPE)
+			continue;
+
+		if (field_type == val_type)
+			continue;
+
+		field_type = TYPE_MAIN_VARIANT(strip_array_types(TYPE_MAIN_VARIANT(TREE_TYPE(field_type))));
+		val_type = TYPE_MAIN_VARIANT(strip_array_types(TYPE_MAIN_VARIANT(TREE_TYPE(val_type))));
+
+		if (field_type == void_type_node)
+			continue;
+		if (field_type == val_type)
+			continue;
+		if (TREE_CODE(val_type) != RECORD_TYPE)
+			continue;
+
+		if (!lookup_attribute("randomize_performed", TYPE_ATTRIBUTES(val_type)))
+			continue;
+		MISMATCH(DECL_SOURCE_LOCATION(var), "constructor\n", TYPE_MAIN_VARIANT(field_type), TYPE_MAIN_VARIANT(val_type));
+	}
+}
+
+/* derived from the constify plugin */
+static void check_global_variables(void *event_data, void *data)
+{
+	struct varpool_node *node;
+	tree init;
+
+	FOR_EACH_VARIABLE(node) {
+		tree var = NODE_DECL(node);
+		init = DECL_INITIAL(var);
+		if (init == NULL_TREE)
+			continue;
+
+		if (TREE_CODE(init) != CONSTRUCTOR)
+			continue;
+
+		check_bad_casts_in_constructor(var, init);
+	}
+}
+
+static bool dominated_by_is_err(const_tree rhs, basic_block bb)
+{
+	basic_block dom;
+	gimple dom_stmt;
+	gimple call_stmt;
+	const_tree dom_lhs;
+	const_tree poss_is_err_cond;
+	const_tree poss_is_err_func;
+	const_tree is_err_arg;
+
+	dom = get_immediate_dominator(CDI_DOMINATORS, bb);
+	if (!dom)
+		return false;
+
+	dom_stmt = last_stmt(dom);
+	if (!dom_stmt)
+		return false;
+
+	if (gimple_code(dom_stmt) != GIMPLE_COND)
+		return false;
+
+	if (gimple_cond_code(dom_stmt) != NE_EXPR)
+		return false;
+
+	if (!integer_zerop(gimple_cond_rhs(dom_stmt)))
+		return false;
+
+	poss_is_err_cond = gimple_cond_lhs(dom_stmt);
+
+	if (TREE_CODE(poss_is_err_cond) != SSA_NAME)
+		return false;
+
+	call_stmt = SSA_NAME_DEF_STMT(poss_is_err_cond);
+
+	if (gimple_code(call_stmt) != GIMPLE_CALL)
+		return false;
+
+	dom_lhs = gimple_get_lhs(call_stmt);
+	poss_is_err_func = gimple_call_fndecl(call_stmt);
+	if (!poss_is_err_func)
+		return false;
+	if (dom_lhs != poss_is_err_cond)
+		return false;
+	if (strcmp(DECL_NAME_POINTER(poss_is_err_func), "IS_ERR"))
+		return false;
+
+	is_err_arg = gimple_call_arg(call_stmt, 0);
+	if (!is_err_arg)
+		return false;
+
+	if (is_err_arg != rhs)
+		return false;
+
+	return true;
+}
+
+static void handle_local_var_initializers(void)
+{
+	tree var;
+	unsigned int i;
+
+	FOR_EACH_LOCAL_DECL(cfun, i, var) {
+		tree init = DECL_INITIAL(var);
+		if (!init)
+			continue;
+		if (TREE_CODE(init) != CONSTRUCTOR)
+			continue;
+		check_bad_casts_in_constructor(var, init);
+	}
+}
+
+static bool type_name_eq(gimple stmt, const_tree type_tree, const char *wanted_name)
+{
+	const char *type_name;
+
+	if (type_tree == NULL_TREE)
+		return false;
+
+	switch (TREE_CODE(type_tree)) {
+	case RECORD_TYPE:
+		type_name = TYPE_NAME_POINTER(type_tree);
+		break;
+	case INTEGER_TYPE:
+		if (TYPE_PRECISION(type_tree) == CHAR_TYPE_SIZE)
+			type_name = "char";
+		else {
+			INFORM(gimple_location(stmt), "found non-char INTEGER_TYPE cast comparison: %qT\n", type_tree);
+			debug_tree(type_tree);
+			return false;
+		}
+		break;
+	case POINTER_TYPE:
+		if (TREE_CODE(TREE_TYPE(type_tree)) == VOID_TYPE) {
+			type_name = "void *";
+			break;
+		} else {
+			INFORM(gimple_location(stmt), "found non-void POINTER_TYPE cast comparison %qT\n", type_tree);
+			debug_tree(type_tree);
+			return false;
+		}
+	default:
+		INFORM(gimple_location(stmt), "unhandled cast comparison: %qT\n", type_tree);
+		debug_tree(type_tree);
+		return false;
+	}
+
+	return strcmp(type_name, wanted_name) == 0;
+}
+
+static bool whitelisted_cast(gimple stmt, const_tree lhs_tree, const_tree rhs_tree)
+{
+	const struct whitelist_entry *entry;
+	expanded_location xloc = expand_location(gimple_location(stmt));
+
+	for (entry = whitelist; entry->pathname; entry++) {
+		if (!strstr(xloc.file, entry->pathname))
+			continue;
+
+		if (type_name_eq(stmt, lhs_tree, entry->lhs) && type_name_eq(stmt, rhs_tree, entry->rhs))
+			return true;
+	}
+
+	return false;
+}
+
+/*
+ * iterate over all statements to find "bad" casts:
+ * those where the address of the start of a structure is cast
+ * to a pointer of a structure of a different type, or a
+ * structure pointer type is cast to a different structure pointer type
+ */
+static unsigned int find_bad_casts_execute(void)
+{
+	basic_block bb;
+
+	handle_local_var_initializers();
+
+	FOR_EACH_BB_FN(bb, cfun) {
+		gimple_stmt_iterator gsi;
+
+		for (gsi = gsi_start_bb(bb); !gsi_end_p(gsi); gsi_next(&gsi)) {
+			gimple stmt;
+			const_tree lhs;
+			const_tree lhs_type;
+			const_tree rhs1;
+			const_tree rhs_type;
+			const_tree ptr_lhs_type;
+			const_tree ptr_rhs_type;
+			const_tree op0;
+			const_tree op0_type;
+			enum tree_code rhs_code;
+
+			stmt = gsi_stmt(gsi);
+
+#ifdef __DEBUG_PLUGIN
+#ifdef __DEBUG_VERBOSE
+			debug_gimple_stmt(stmt);
+			debug_tree(gimple_get_lhs(stmt));
+#endif
+#endif
+
+			if (gimple_code(stmt) != GIMPLE_ASSIGN)
+				continue;
+
+#ifdef __DEBUG_PLUGIN
+#ifdef __DEBUG_VERBOSE
+			debug_tree(gimple_assign_rhs1(stmt));
+#endif
+#endif
+
+
+			rhs_code = gimple_assign_rhs_code(stmt);
+
+			if (rhs_code != ADDR_EXPR && rhs_code != SSA_NAME)
+				continue;
+
+			lhs = gimple_get_lhs(stmt);
+			lhs_type = TREE_TYPE(lhs);
+			rhs1 = gimple_assign_rhs1(stmt);
+			rhs_type = TREE_TYPE(rhs1);
+
+			if (TREE_CODE(rhs_type) != POINTER_TYPE ||
+			    TREE_CODE(lhs_type) != POINTER_TYPE)
+				continue;
+
+			ptr_lhs_type = TYPE_MAIN_VARIANT(strip_array_types(TYPE_MAIN_VARIANT(TREE_TYPE(lhs_type))));
+			ptr_rhs_type = TYPE_MAIN_VARIANT(strip_array_types(TYPE_MAIN_VARIANT(TREE_TYPE(rhs_type))));
+
+			if (ptr_rhs_type == void_type_node)
+				continue;
+
+			if (ptr_lhs_type == void_type_node)
+				continue;
+
+			if (dominated_by_is_err(rhs1, bb))
+				continue;
+
+			if (TREE_CODE(ptr_rhs_type) != RECORD_TYPE) {
+#ifndef __DEBUG_PLUGIN
+				if (lookup_attribute("randomize_performed", TYPE_ATTRIBUTES(ptr_lhs_type)))
+#endif
+				{
+					if (!whitelisted_cast(stmt, ptr_lhs_type, ptr_rhs_type))
+						MISMATCH(gimple_location(stmt), "rhs", ptr_lhs_type, ptr_rhs_type);
+				}
+				continue;
+			}
+
+			if (rhs_code == SSA_NAME && ptr_lhs_type == ptr_rhs_type)
+				continue;
+
+			if (rhs_code == ADDR_EXPR) {
+				op0 = TREE_OPERAND(rhs1, 0);
+
+				if (op0 == NULL_TREE)
+					continue;
+
+				if (TREE_CODE(op0) != VAR_DECL)
+					continue;
+
+				op0_type = TYPE_MAIN_VARIANT(strip_array_types(TYPE_MAIN_VARIANT(TREE_TYPE(op0))));
+				if (op0_type == ptr_lhs_type)
+					continue;
+
+#ifndef __DEBUG_PLUGIN
+				if (lookup_attribute("randomize_performed", TYPE_ATTRIBUTES(op0_type)))
+#endif
+				{
+					if (!whitelisted_cast(stmt, ptr_lhs_type, op0_type))
+						MISMATCH(gimple_location(stmt), "op0", ptr_lhs_type, op0_type);
+				}
+			} else {
+				const_tree ssa_name_var = SSA_NAME_VAR(rhs1);
+				/* skip bogus type casts introduced by container_of */
+				if (ssa_name_var != NULL_TREE && DECL_NAME(ssa_name_var) && 
+				    !strcmp((const char *)DECL_NAME_POINTER(ssa_name_var), "__mptr"))
+					continue;
+#ifndef __DEBUG_PLUGIN
+				if (lookup_attribute("randomize_performed", TYPE_ATTRIBUTES(ptr_rhs_type)))
+#endif
+				{
+					if (!whitelisted_cast(stmt, ptr_lhs_type, ptr_rhs_type))
+						MISMATCH(gimple_location(stmt), "ssa", ptr_lhs_type, ptr_rhs_type);
+				}
+			}
+
+		}
+	}
+	return 0;
+}
+
+#define PASS_NAME find_bad_casts
+#define NO_GATE
+#define TODO_FLAGS_FINISH TODO_dump_func
+#include "gcc-generate-gimple-pass.h"
+
+__visible int plugin_init(struct plugin_name_args *plugin_info, struct plugin_gcc_version *version)
+{
+	int i;
+	const char * const plugin_name = plugin_info->base_name;
+	const int argc = plugin_info->argc;
+	const struct plugin_argument * const argv = plugin_info->argv;
+	bool enable = true;
+	int obtained_seed = 0;
+	struct register_pass_info find_bad_casts_pass_info;
+
+	find_bad_casts_pass_info.pass			= make_find_bad_casts_pass();
+	find_bad_casts_pass_info.reference_pass_name	= "ssa";
+	find_bad_casts_pass_info.ref_pass_instance_number	= 1;
+	find_bad_casts_pass_info.pos_op			= PASS_POS_INSERT_AFTER;
+
+	if (!plugin_default_version_check(version, &gcc_version)) {
+		error(G_("incompatible gcc/plugin versions"));
+		return 1;
+	}
+
+	if (strncmp(lang_hooks.name, "GNU C", 5) && !strncmp(lang_hooks.name, "GNU C+", 6)) {
+		inform(UNKNOWN_LOCATION, G_("%s supports C only, not %s"), plugin_name, lang_hooks.name);
+		enable = false;
+	}
+
+	for (i = 0; i < argc; ++i) {
+		if (!strcmp(argv[i].key, "disable")) {
+			enable = false;
+			continue;
+		}
+		if (!strcmp(argv[i].key, "performance-mode")) {
+			performance_mode = 1;
+			continue;
+		}
+		error(G_("unknown option '-fplugin-arg-%s-%s'"), plugin_name, argv[i].key);
+	}
+
+	if (strlen(randstruct_seed) != 64) {
+		error(G_("invalid seed value supplied for %s plugin"), plugin_name);
+		return 1;
+	}
+	obtained_seed = sscanf(randstruct_seed, "%016llx%016llx%016llx%016llx",
+		&shuffle_seed[0], &shuffle_seed[1], &shuffle_seed[2], &shuffle_seed[3]);
+	if (obtained_seed != 4) {
+		error(G_("Invalid seed supplied for %s plugin"), plugin_name);
+		return 1;
+	}
+
+	register_callback(plugin_name, PLUGIN_INFO, NULL, &randomize_layout_plugin_info);
+	if (enable) {
+		register_callback(plugin_name, PLUGIN_ALL_IPA_PASSES_START, check_global_variables, NULL);
+		register_callback(plugin_name, PLUGIN_PASS_MANAGER_SETUP, NULL, &find_bad_casts_pass_info);
+		register_callback(plugin_name, PLUGIN_FINISH_TYPE, finish_type, NULL);
+		register_callback(plugin_name, PLUGIN_FINISH_DECL, randomize_layout_finish_decl, NULL);
+	}
+	register_callback(plugin_name, PLUGIN_ATTRIBUTES, register_attributes, NULL);
+
+	return 0;
+}
-- 
cgit v1.2.3


From 6932ec60cc0a71689150b16b71427cfdc6575602 Mon Sep 17 00:00:00 2001
From: Andreas Färber <afaerber@suse.de>
Date: Mon, 5 Jun 2017 21:04:21 +0200
Subject: soc: actions: owl-sps: Factor out owl_sps_set_pg() for power-gating
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Allow the SMP code to reuse PM domain code for CPU2/CPU3 wakeup.

Signed-off-by: Andreas Färber <afaerber@suse.de>
---
 drivers/soc/actions/Kconfig          |  4 +++
 drivers/soc/actions/Makefile         |  1 +
 drivers/soc/actions/owl-sps-helper.c | 51 ++++++++++++++++++++++++++++++++++++
 drivers/soc/actions/owl-sps.c        | 34 +++---------------------
 include/linux/soc/actions/owl-sps.h  | 11 ++++++++
 5 files changed, 70 insertions(+), 31 deletions(-)
 create mode 100644 drivers/soc/actions/owl-sps-helper.c
 create mode 100644 include/linux/soc/actions/owl-sps.h

(limited to 'include/linux')

diff --git a/drivers/soc/actions/Kconfig b/drivers/soc/actions/Kconfig
index bdf827d5ce78..9d68b5a771c3 100644
--- a/drivers/soc/actions/Kconfig
+++ b/drivers/soc/actions/Kconfig
@@ -1,8 +1,12 @@
 if ARCH_ACTIONS || COMPILE_TEST
 
+config OWL_PM_DOMAINS_HELPER
+	bool
+
 config OWL_PM_DOMAINS
 	bool "Actions Semi SPS power domains"
 	depends on PM
+	select OWL_PM_DOMAINS_HELPER
 	select PM_GENERIC_DOMAINS
 	help
 	  Say 'y' here to enable support for Smart Power System (SPS)
diff --git a/drivers/soc/actions/Makefile b/drivers/soc/actions/Makefile
index 720c34ed16e4..1e101b06bab1 100644
--- a/drivers/soc/actions/Makefile
+++ b/drivers/soc/actions/Makefile
@@ -1 +1,2 @@
+obj-$(CONFIG_OWL_PM_DOMAINS_HELPER) += owl-sps-helper.o
 obj-$(CONFIG_OWL_PM_DOMAINS) += owl-sps.o
diff --git a/drivers/soc/actions/owl-sps-helper.c b/drivers/soc/actions/owl-sps-helper.c
new file mode 100644
index 000000000000..9d7a2c2b44ec
--- /dev/null
+++ b/drivers/soc/actions/owl-sps-helper.c
@@ -0,0 +1,51 @@
+/*
+ * Actions Semi Owl Smart Power System (SPS) shared helpers
+ *
+ * Copyright 2012 Actions Semi Inc.
+ * Author: Actions Semi, Inc.
+ *
+ * Copyright (c) 2017 Andreas Färber
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/delay.h>
+#include <linux/io.h>
+
+#define OWL_SPS_PG_CTL	0x0
+
+int owl_sps_set_pg(void __iomem *base, u32 pwr_mask, u32 ack_mask, bool enable)
+{
+	u32 val;
+	bool ack;
+	int timeout;
+
+	val = readl(base + OWL_SPS_PG_CTL);
+	ack = val & ack_mask;
+	if (ack == enable)
+		return 0;
+
+	if (enable)
+		val |= pwr_mask;
+	else
+		val &= ~pwr_mask;
+
+	writel(val, base + OWL_SPS_PG_CTL);
+
+	for (timeout = 5000; timeout > 0; timeout -= 50) {
+		val = readl(base + OWL_SPS_PG_CTL);
+		if ((val & ack_mask) == (enable ? ack_mask : 0))
+			break;
+		udelay(50);
+	}
+	if (timeout <= 0)
+		return -ETIMEDOUT;
+
+	udelay(10);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(owl_sps_set_pg);
diff --git a/drivers/soc/actions/owl-sps.c b/drivers/soc/actions/owl-sps.c
index 8abb72f01929..875225bfa21c 100644
--- a/drivers/soc/actions/owl-sps.c
+++ b/drivers/soc/actions/owl-sps.c
@@ -12,15 +12,12 @@
  * option) any later version.
  */
 
-#include <linux/delay.h>
-#include <linux/io.h>
 #include <linux/of_address.h>
 #include <linux/of_platform.h>
 #include <linux/pm_domain.h>
+#include <linux/soc/actions/owl-sps.h>
 #include <dt-bindings/power/owl-s500-powergate.h>
 
-#define OWL_SPS_PG_CTL	0x0
-
 struct owl_sps_domain_info {
 	const char *name;
 	int pwr_bit;
@@ -51,37 +48,12 @@ struct owl_sps_domain {
 
 static int owl_sps_set_power(struct owl_sps_domain *pd, bool enable)
 {
-	u32 val, pwr_mask, ack_mask;
-	int timeout;
-	bool ack;
+	u32 pwr_mask, ack_mask;
 
 	ack_mask = BIT(pd->info->ack_bit);
 	pwr_mask = BIT(pd->info->pwr_bit);
-	val = readl(pd->sps->base + OWL_SPS_PG_CTL);
-	ack = val & ack_mask;
-
-	if (ack == enable)
-		return 0;
-
-	if (enable)
-		val |= pwr_mask;
-	else
-		val &= ~pwr_mask;
-
-	writel(val, pd->sps->base + OWL_SPS_PG_CTL);
 
-	for (timeout = 5000; timeout > 0; timeout -= 50) {
-		val = readl(pd->sps->base + OWL_SPS_PG_CTL);
-		if ((val & ack_mask) == (enable ? ack_mask : 0))
-			break;
-		udelay(50);
-	}
-	if (timeout <= 0)
-		return -ETIMEDOUT;
-
-	udelay(10);
-
-	return 0;
+	return owl_sps_set_pg(pd->sps->base, pwr_mask, ack_mask, enable);
 }
 
 static int owl_sps_power_on(struct generic_pm_domain *domain)
diff --git a/include/linux/soc/actions/owl-sps.h b/include/linux/soc/actions/owl-sps.h
new file mode 100644
index 000000000000..33d0dbeceb55
--- /dev/null
+++ b/include/linux/soc/actions/owl-sps.h
@@ -0,0 +1,11 @@
+/*
+ * Copyright (c) 2017 Andreas Färber
+ *
+ * SPDX-License-Identifier: GPL-2.0+
+ */
+#ifndef SOC_ACTIONS_OWL_SPS_H
+#define SOC_ACTIONS_OWL_SPS_H
+
+int owl_sps_set_pg(void __iomem *base, u32 pwr_mask, u32 ack_mask, bool enable);
+
+#endif
-- 
cgit v1.2.3


From 32d602771b624e3a2fc86d5e220e9fa7dced767a Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Wed, 21 Jun 2017 18:25:03 -0700
Subject: xdp: pass XDP flags into install handlers

Pass XDP flags to the xdp ndo.  This will allow drivers to look
at the mode flags and make decisions about offload.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 1 +
 net/core/dev.c            | 5 +++--
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 7c7118b3bd69..b194817631de 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -820,6 +820,7 @@ struct netdev_xdp {
 	union {
 		/* XDP_SETUP_PROG */
 		struct {
+			u32 flags;
 			struct bpf_prog *prog;
 			struct netlink_ext_ack *extack;
 		};
diff --git a/net/core/dev.c b/net/core/dev.c
index df7637733e3c..09f9e99f4a3e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6951,7 +6951,7 @@ bool __dev_xdp_attached(struct net_device *dev, xdp_op_t xdp_op,
 }
 
 static int dev_xdp_install(struct net_device *dev, xdp_op_t xdp_op,
-			   struct netlink_ext_ack *extack,
+			   struct netlink_ext_ack *extack, u32 flags,
 			   struct bpf_prog *prog)
 {
 	struct netdev_xdp xdp;
@@ -6959,6 +6959,7 @@ static int dev_xdp_install(struct net_device *dev, xdp_op_t xdp_op,
 	memset(&xdp, 0, sizeof(xdp));
 	xdp.command = XDP_SETUP_PROG;
 	xdp.extack = extack;
+	xdp.flags = flags;
 	xdp.prog = prog;
 
 	return xdp_op(dev, &xdp);
@@ -7003,7 +7004,7 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
 			return PTR_ERR(prog);
 	}
 
-	err = dev_xdp_install(dev, xdp_op, extack, prog);
+	err = dev_xdp_install(dev, xdp_op, extack, flags, prog);
 	if (err < 0 && prog)
 		bpf_prog_put(prog);
 
-- 
cgit v1.2.3


From ee5d032f7d032e2cea354522a46b211de84c4e8c Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Wed, 21 Jun 2017 18:25:04 -0700
Subject: xdp: add HW offload mode flag for installing programs

Add an installation-time flag for requesting that the program
be installed only if it can be offloaded to HW.

Internally new command for ndo_xdp is added, this way we avoid
putting checks into drivers since they all return -EINVAL on
an unknown command.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h    | 1 +
 include/uapi/linux/if_link.h | 7 +++++--
 net/core/dev.c               | 7 +++++--
 net/core/rtnetlink.c         | 4 ++--
 4 files changed, 13 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b194817631de..a838591aad28 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -807,6 +807,7 @@ enum xdp_netdev_command {
 	 * when it is no longer used.
 	 */
 	XDP_SETUP_PROG,
+	XDP_SETUP_PROG_HW,
 	/* Check if a bpf program is set on the device.  The callee should
 	 * return true if a program is currently attached and running.
 	 */
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index dd88375a6580..ce777ec88e1e 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -891,9 +891,12 @@ enum {
 #define XDP_FLAGS_UPDATE_IF_NOEXIST	(1U << 0)
 #define XDP_FLAGS_SKB_MODE		(1U << 1)
 #define XDP_FLAGS_DRV_MODE		(1U << 2)
+#define XDP_FLAGS_HW_MODE		(1U << 3)
+#define XDP_FLAGS_MODES			(XDP_FLAGS_SKB_MODE | \
+					 XDP_FLAGS_DRV_MODE | \
+					 XDP_FLAGS_HW_MODE)
 #define XDP_FLAGS_MASK			(XDP_FLAGS_UPDATE_IF_NOEXIST | \
-					 XDP_FLAGS_SKB_MODE | \
-					 XDP_FLAGS_DRV_MODE)
+					 XDP_FLAGS_MODES)
 
 /* These are stored into IFLA_XDP_ATTACHED on dump. */
 enum {
diff --git a/net/core/dev.c b/net/core/dev.c
index 09f9e99f4a3e..cd885e9e3363 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6957,7 +6957,10 @@ static int dev_xdp_install(struct net_device *dev, xdp_op_t xdp_op,
 	struct netdev_xdp xdp;
 
 	memset(&xdp, 0, sizeof(xdp));
-	xdp.command = XDP_SETUP_PROG;
+	if (flags & XDP_FLAGS_HW_MODE)
+		xdp.command = XDP_SETUP_PROG_HW;
+	else
+		xdp.command = XDP_SETUP_PROG;
 	xdp.extack = extack;
 	xdp.flags = flags;
 	xdp.prog = prog;
@@ -6985,7 +6988,7 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
 	ASSERT_RTNL();
 
 	xdp_op = xdp_chk = ops->ndo_xdp;
-	if (!xdp_op && (flags & XDP_FLAGS_DRV_MODE))
+	if (!xdp_op && (flags & (XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE)))
 		return -EOPNOTSUPP;
 	if (!xdp_op || (flags & XDP_FLAGS_SKB_MODE))
 		xdp_op = generic_xdp_install;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 8da89c1136e5..a5bedd03a63e 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -16,6 +16,7 @@
  *	Vitaly E. Lavrov		RTA_OK arithmetics was wrong.
  */
 
+#include <linux/bitops.h>
 #include <linux/errno.h>
 #include <linux/module.h>
 #include <linux/types.h>
@@ -2253,8 +2254,7 @@ static int do_setlink(const struct sk_buff *skb,
 				err = -EINVAL;
 				goto errout;
 			}
-			if ((xdp_flags & XDP_FLAGS_SKB_MODE) &&
-			    (xdp_flags & XDP_FLAGS_DRV_MODE)) {
+			if (hweight32(xdp_flags & XDP_FLAGS_MODES) > 1) {
 				err = -EINVAL;
 				goto errout;
 			}
-- 
cgit v1.2.3


From ce158e580a5bdc93286a3b630638bdd47d4ec663 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Wed, 21 Jun 2017 18:25:09 -0700
Subject: xdp: add reporting of offload mode

Extend the XDP_ATTACHED_* values to include offloaded mode.
Let drivers report whether program is installed in the driver
or the HW by changing the prog_attached field from bool to
u8 (type of the netlink attribute).

Exploit the fact that the value of XDP_ATTACHED_DRV is 1,
therefore since all drivers currently assign the mode with
double negation:
       mode = !!xdp_prog;
no drivers have to be modified.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h    | 7 ++++---
 include/uapi/linux/if_link.h | 1 +
 net/core/dev.c               | 3 +--
 net/core/rtnetlink.c         | 6 +++---
 4 files changed, 9 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a838591aad28..68f5d899d1e6 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -809,7 +809,8 @@ enum xdp_netdev_command {
 	XDP_SETUP_PROG,
 	XDP_SETUP_PROG_HW,
 	/* Check if a bpf program is set on the device.  The callee should
-	 * return true if a program is currently attached and running.
+	 * set @prog_attached to one of XDP_ATTACHED_* values, note that "true"
+	 * is equivalent to XDP_ATTACHED_DRV.
 	 */
 	XDP_QUERY_PROG,
 };
@@ -827,7 +828,7 @@ struct netdev_xdp {
 		};
 		/* XDP_QUERY_PROG */
 		struct {
-			bool prog_attached;
+			u8 prog_attached;
 			u32 prog_id;
 		};
 	};
@@ -3307,7 +3308,7 @@ struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 typedef int (*xdp_op_t)(struct net_device *dev, struct netdev_xdp *xdp);
 int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
 		      int fd, u32 flags);
-bool __dev_xdp_attached(struct net_device *dev, xdp_op_t xdp_op, u32 *prog_id);
+u8 __dev_xdp_attached(struct net_device *dev, xdp_op_t xdp_op, u32 *prog_id);
 
 int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
 int dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index ce777ec88e1e..8d062c58d5cb 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -903,6 +903,7 @@ enum {
 	XDP_ATTACHED_NONE = 0,
 	XDP_ATTACHED_DRV,
 	XDP_ATTACHED_SKB,
+	XDP_ATTACHED_HW,
 };
 
 enum {
diff --git a/net/core/dev.c b/net/core/dev.c
index cd885e9e3363..a91572aa73d5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6934,8 +6934,7 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down)
 }
 EXPORT_SYMBOL(dev_change_proto_down);
 
-bool __dev_xdp_attached(struct net_device *dev, xdp_op_t xdp_op,
-			u32 *prog_id)
+u8 __dev_xdp_attached(struct net_device *dev, xdp_op_t xdp_op, u32 *prog_id)
 {
 	struct netdev_xdp xdp;
 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index a5bedd03a63e..9a1bd510c812 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1265,10 +1265,10 @@ static u8 rtnl_xdp_attached_mode(struct net_device *dev, u32 *prog_id)
 		*prog_id = generic_xdp_prog->aux->id;
 		return XDP_ATTACHED_SKB;
 	}
-	if (ops->ndo_xdp && __dev_xdp_attached(dev, ops->ndo_xdp, prog_id))
-		return XDP_ATTACHED_DRV;
+	if (!ops->ndo_xdp)
+		return XDP_ATTACHED_NONE;
 
-	return XDP_ATTACHED_NONE;
+	return __dev_xdp_attached(dev, ops->ndo_xdp, prog_id);
 }
 
 static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev)
-- 
cgit v1.2.3


From 239946314e57711d7da546b67964d0b387a3ee42 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Thu, 22 Jun 2017 15:07:39 -0700
Subject: bpf: possibly avoid extra masking for narrower load in verifier

Commit 31fd85816dbe ("bpf: permits narrower load from bpf program
context fields") permits narrower load for certain ctx fields.
The commit however will already generate a masking even if
the prog-specific ctx conversion produces the result with
narrower size.

For example, for __sk_buff->protocol, the ctx conversion
loads the data into register with 2-byte load.
A narrower 2-byte load should not generate masking.
For __sk_buff->vlan_present, the conversion function
set the result as either 0 or 1, essentially a byte.
The narrower 2-byte or 1-byte load should not generate masking.

To avoid unnecessary masking, prog-specific *_is_valid_access
now passes converted_op_size back to verifier, which indicates
the valid data width after perceived future conversion.
Based on this information, verifier is able to avoid
unnecessary marking.

Since we want more information back from prog-specific
*_is_valid_access checking, all of them are packed into
one data structure for more clarity.

Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h          | 11 +++++-
 include/linux/bpf_verifier.h |  3 +-
 kernel/bpf/verifier.c        | 29 ++++++++++----
 kernel/trace/bpf_trace.c     | 17 +++++---
 net/core/filter.c            | 92 +++++++++++++++++++++++++-------------------
 5 files changed, 97 insertions(+), 55 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 1bcbf0a71f75..deca4e7f2845 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -149,6 +149,15 @@ enum bpf_reg_type {
 
 struct bpf_prog;
 
+/* The information passed from prog-specific *_is_valid_access
+ * back to the verifier.
+ */
+struct bpf_insn_access_aux {
+	enum bpf_reg_type reg_type;
+	int ctx_field_size;
+	int converted_op_size;
+};
+
 struct bpf_verifier_ops {
 	/* return eBPF function prototype for verification */
 	const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
@@ -157,7 +166,7 @@ struct bpf_verifier_ops {
 	 * with 'type' (read or write) is allowed
 	 */
 	bool (*is_valid_access)(int off, int size, enum bpf_access_type type,
-				enum bpf_reg_type *reg_type, int *ctx_field_size);
+				struct bpf_insn_access_aux *info);
 	int (*gen_prologue)(struct bpf_insn *insn, bool direct_write,
 			    const struct bpf_prog *prog);
 	u32 (*convert_ctx_access)(enum bpf_access_type type,
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 189741c0da85..621076f56251 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -73,7 +73,8 @@ struct bpf_insn_aux_data {
 		enum bpf_reg_type ptr_type;	/* pointer type for load/store insns */
 		struct bpf_map *map_ptr;	/* pointer for call insn into lookup_elem */
 	};
-	int ctx_field_size; /* the ctx field size for load/store insns, maybe 0 */
+	int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
+	int converted_op_size; /* the valid value width after perceived conversion */
 };
 
 #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 44b97d958fb7..74ea96ea391b 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -761,22 +761,34 @@ static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
 static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
 			    enum bpf_access_type t, enum bpf_reg_type *reg_type)
 {
-	int ctx_field_size = 0;
+	struct bpf_insn_access_aux info = { .reg_type = *reg_type };
 
 	/* for analyzer ctx accesses are already validated and converted */
 	if (env->analyzer_ops)
 		return 0;
 
 	if (env->prog->aux->ops->is_valid_access &&
-	    env->prog->aux->ops->is_valid_access(off, size, t, reg_type, &ctx_field_size)) {
-		/* a non zero ctx_field_size indicates:
+	    env->prog->aux->ops->is_valid_access(off, size, t, &info)) {
+		/* a non zero info.ctx_field_size indicates:
 		 * . For this field, the prog type specific ctx conversion algorithm
 		 *   only supports whole field access.
 		 * . This ctx access is a candiate for later verifier transformation
 		 *   to load the whole field and then apply a mask to get correct result.
+		 * a non zero info.converted_op_size indicates perceived actual converted
+		 * value width in convert_ctx_access.
 		 */
-		if (ctx_field_size)
-			env->insn_aux_data[insn_idx].ctx_field_size = ctx_field_size;
+		if ((info.ctx_field_size && !info.converted_op_size) ||
+		    (!info.ctx_field_size &&  info.converted_op_size)) {
+			verbose("verifier bug in is_valid_access prog type=%u off=%d size=%d\n",
+				env->prog->type, off, size);
+			return -EACCES;
+		}
+
+		if (info.ctx_field_size) {
+			env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
+			env->insn_aux_data[insn_idx].converted_op_size = info.converted_op_size;
+		}
+		*reg_type = info.reg_type;
 
 		/* remember the offset of last byte accessed in ctx */
 		if (env->prog->aux->max_ctx_offset < off + size)
@@ -3388,7 +3400,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
 	struct bpf_insn insn_buf[16], *insn;
 	struct bpf_prog *new_prog;
 	enum bpf_access_type type;
-	int i, cnt, off, size, ctx_field_size, is_narrower_load, delta = 0;
+	int i, cnt, off, size, ctx_field_size, converted_op_size, is_narrower_load, delta = 0;
 
 	if (ops->gen_prologue) {
 		cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
@@ -3431,7 +3443,8 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
 		off = insn->off;
 		size = bpf_size_to_bytes(BPF_SIZE(insn->code));
 		ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
-		is_narrower_load = (type == BPF_READ && size < ctx_field_size);
+		converted_op_size = env->insn_aux_data[i + delta].converted_op_size;
+		is_narrower_load = type == BPF_READ && size < ctx_field_size;
 
 		/* If the read access is a narrower load of the field,
 		 * convert to a 4/8-byte load, to minimum program type specific
@@ -3453,7 +3466,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
 			verbose("bpf verifier is misconfigured\n");
 			return -EINVAL;
 		}
-		if (is_narrower_load) {
+		if (is_narrower_load && size < converted_op_size) {
 			if (ctx_field_size <= 4)
 				insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
 							(1 << size * 8) - 1);
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 9d3ec8253131..97c46b440cd6 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -479,7 +479,7 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func
 
 /* bpf+kprobe programs can access fields of 'struct pt_regs' */
 static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
-					enum bpf_reg_type *reg_type, int *ctx_field_size)
+					struct bpf_insn_access_aux *info)
 {
 	if (off < 0 || off >= sizeof(struct pt_regs))
 		return false;
@@ -562,7 +562,7 @@ static const struct bpf_func_proto *tp_prog_func_proto(enum bpf_func_id func_id)
 }
 
 static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type,
-				    enum bpf_reg_type *reg_type, int *ctx_field_size)
+				    struct bpf_insn_access_aux *info)
 {
 	if (off < sizeof(void *) || off >= PERF_MAX_TRACE_SIZE)
 		return false;
@@ -581,7 +581,7 @@ const struct bpf_verifier_ops tracepoint_prog_ops = {
 };
 
 static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
-				    enum bpf_reg_type *reg_type, int *ctx_field_size)
+				    struct bpf_insn_access_aux *info)
 {
 	int sample_period_off;
 
@@ -595,12 +595,17 @@ static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type
 	/* permit 1, 2, 4 byte narrower and 8 normal read access to sample_period */
 	sample_period_off = offsetof(struct bpf_perf_event_data, sample_period);
 	if (off >= sample_period_off && off < sample_period_off + sizeof(__u64)) {
-		*ctx_field_size = 8;
+		int allowed;
+
 #ifdef __LITTLE_ENDIAN
-		return (off & 0x7) == 0 && size <= 8 && (size & (size - 1)) == 0;
+		allowed = (off & 0x7) == 0 && size <= 8 && (size & (size - 1)) == 0;
 #else
-		return ((off & 0x7) + size) == 8 && size <= 8 && (size & (size - 1)) == 0;
+		allowed = ((off & 0x7) + size) == 8 && size <= 8 && (size & (size - 1)) == 0;
 #endif
+		if (!allowed)
+			return false;
+		info->ctx_field_size = 8;
+		info->converted_op_size = 8;
 	} else {
 		if (size != sizeof(long))
 			return false;
diff --git a/net/core/filter.c b/net/core/filter.c
index 60ed6f343a63..4b788007415f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2856,8 +2856,37 @@ lwt_xmit_func_proto(enum bpf_func_id func_id)
 	}
 }
 
+static void __set_access_aux_info(int off, struct bpf_insn_access_aux *info)
+{
+	info->ctx_field_size = 4;
+	switch (off) {
+	case offsetof(struct __sk_buff, pkt_type) ...
+	     offsetof(struct __sk_buff, pkt_type) + sizeof(__u32) - 1:
+	case offsetof(struct __sk_buff, vlan_present) ...
+	     offsetof(struct __sk_buff, vlan_present) + sizeof(__u32) - 1:
+		info->converted_op_size = 1;
+		break;
+	case offsetof(struct __sk_buff, queue_mapping) ...
+	     offsetof(struct __sk_buff, queue_mapping) + sizeof(__u32) - 1:
+	case offsetof(struct __sk_buff, protocol) ...
+	     offsetof(struct __sk_buff, protocol) + sizeof(__u32) - 1:
+	case offsetof(struct __sk_buff, vlan_tci) ...
+	     offsetof(struct __sk_buff, vlan_tci) + sizeof(__u32) - 1:
+	case offsetof(struct __sk_buff, vlan_proto) ...
+	     offsetof(struct __sk_buff, vlan_proto) + sizeof(__u32) - 1:
+	case offsetof(struct __sk_buff, tc_index) ...
+	     offsetof(struct __sk_buff, tc_index) + sizeof(__u32) - 1:
+	case offsetof(struct __sk_buff, tc_classid) ...
+	     offsetof(struct __sk_buff, tc_classid) + sizeof(__u32) - 1:
+		info->converted_op_size = 2;
+		break;
+	default:
+		info->converted_op_size = 4;
+	}
+}
+
 static bool __is_valid_access(int off, int size, enum bpf_access_type type,
-			      int *ctx_field_size)
+			      struct bpf_insn_access_aux *info)
 {
 	if (off < 0 || off >= sizeof(struct __sk_buff))
 		return false;
@@ -2875,24 +2904,32 @@ static bool __is_valid_access(int off, int size, enum bpf_access_type type,
 		break;
 	case offsetof(struct __sk_buff, data) ...
 	     offsetof(struct __sk_buff, data) + sizeof(__u32) - 1:
+		if (size != sizeof(__u32))
+			return false;
+		info->reg_type = PTR_TO_PACKET;
+		break;
 	case offsetof(struct __sk_buff, data_end) ...
 	     offsetof(struct __sk_buff, data_end) + sizeof(__u32) - 1:
 		if (size != sizeof(__u32))
 			return false;
+		info->reg_type = PTR_TO_PACKET_END;
 		break;
 	default:
-		/* permit narrower load for not cb/data/data_end fields */
-		*ctx_field_size = 4;
 		if (type == BPF_WRITE) {
 			if (size != sizeof(__u32))
 				return false;
 		} else {
-			if (size != sizeof(__u32))
+			int allowed;
+
+			/* permit narrower load for not cb/data/data_end fields */
 #ifdef __LITTLE_ENDIAN
-				return (off & 0x3) == 0 && (size == 1 || size == 2);
+			allowed = (off & 0x3) == 0 && size <= 4 && (size & (size - 1)) == 0;
 #else
-				return (off & 0x3) + size == 4 && (size == 1 || size == 2);
+			allowed = (off & 0x3) + size == 4 && size <= 4 && (size & (size - 1)) == 0;
 #endif
+			if (!allowed)
+				return false;
+			__set_access_aux_info(off, info);
 		}
 	}
 
@@ -2901,8 +2938,7 @@ static bool __is_valid_access(int off, int size, enum bpf_access_type type,
 
 static bool sk_filter_is_valid_access(int off, int size,
 				      enum bpf_access_type type,
-				      enum bpf_reg_type *reg_type,
-				      int *ctx_field_size)
+				      struct bpf_insn_access_aux *info)
 {
 	switch (off) {
 	case offsetof(struct __sk_buff, tc_classid) ...
@@ -2924,13 +2960,12 @@ static bool sk_filter_is_valid_access(int off, int size,
 		}
 	}
 
-	return __is_valid_access(off, size, type, ctx_field_size);
+	return __is_valid_access(off, size, type, info);
 }
 
 static bool lwt_is_valid_access(int off, int size,
 				enum bpf_access_type type,
-				enum bpf_reg_type *reg_type,
-				int *ctx_field_size)
+				struct bpf_insn_access_aux *info)
 {
 	switch (off) {
 	case offsetof(struct __sk_buff, tc_classid) ...
@@ -2950,22 +2985,12 @@ static bool lwt_is_valid_access(int off, int size,
 		}
 	}
 
-	switch (off) {
-	case offsetof(struct __sk_buff, data):
-		*reg_type = PTR_TO_PACKET;
-		break;
-	case offsetof(struct __sk_buff, data_end):
-		*reg_type = PTR_TO_PACKET_END;
-		break;
-	}
-
-	return __is_valid_access(off, size, type, ctx_field_size);
+	return __is_valid_access(off, size, type, info);
 }
 
 static bool sock_filter_is_valid_access(int off, int size,
 					enum bpf_access_type type,
-					enum bpf_reg_type *reg_type,
-					int *ctx_field_size)
+					struct bpf_insn_access_aux *info)
 {
 	if (type == BPF_WRITE) {
 		switch (off) {
@@ -3028,8 +3053,7 @@ static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write,
 
 static bool tc_cls_act_is_valid_access(int off, int size,
 				       enum bpf_access_type type,
-				       enum bpf_reg_type *reg_type,
-				       int *ctx_field_size)
+				       struct bpf_insn_access_aux *info)
 {
 	if (type == BPF_WRITE) {
 		switch (off) {
@@ -3045,16 +3069,7 @@ static bool tc_cls_act_is_valid_access(int off, int size,
 		}
 	}
 
-	switch (off) {
-	case offsetof(struct __sk_buff, data):
-		*reg_type = PTR_TO_PACKET;
-		break;
-	case offsetof(struct __sk_buff, data_end):
-		*reg_type = PTR_TO_PACKET_END;
-		break;
-	}
-
-	return __is_valid_access(off, size, type, ctx_field_size);
+	return __is_valid_access(off, size, type, info);
 }
 
 static bool __is_valid_xdp_access(int off, int size)
@@ -3071,18 +3086,17 @@ static bool __is_valid_xdp_access(int off, int size)
 
 static bool xdp_is_valid_access(int off, int size,
 				enum bpf_access_type type,
-				enum bpf_reg_type *reg_type,
-				int *ctx_field_size)
+				struct bpf_insn_access_aux *info)
 {
 	if (type == BPF_WRITE)
 		return false;
 
 	switch (off) {
 	case offsetof(struct xdp_md, data):
-		*reg_type = PTR_TO_PACKET;
+		info->reg_type = PTR_TO_PACKET;
 		break;
 	case offsetof(struct xdp_md, data_end):
-		*reg_type = PTR_TO_PACKET_END;
+		info->reg_type = PTR_TO_PACKET_END;
 		break;
 	}
 
-- 
cgit v1.2.3


From 735d8a18433e8d953e4e2b92883bfcc566e382c2 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Fri, 23 Jun 2017 10:33:14 -0700
Subject: net: phy: Support "internal" PHY interface

Now that the Device Tree binding has been updated, update the PHY
library phy_interface_t and phy_modes to support the "internal" PHY
interface type.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phy.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index 23d2e46dd322..1d8d70193782 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -64,6 +64,7 @@
 /* Interface Mode definitions */
 typedef enum {
 	PHY_INTERFACE_MODE_NA,
+	PHY_INTERFACE_MODE_INTERNAL,
 	PHY_INTERFACE_MODE_MII,
 	PHY_INTERFACE_MODE_GMII,
 	PHY_INTERFACE_MODE_SGMII,
@@ -114,6 +115,8 @@ static inline const char *phy_modes(phy_interface_t interface)
 	switch (interface) {
 	case PHY_INTERFACE_MODE_NA:
 		return "";
+	case PHY_INTERFACE_MODE_INTERNAL:
+		return "internal";
 	case PHY_INTERFACE_MODE_MII:
 		return "mii";
 	case PHY_INTERFACE_MODE_GMII:
-- 
cgit v1.2.3


From bce70fef7279243d62adbf5f53998b8d3d016144 Mon Sep 17 00:00:00 2001
From: Shawn Nematbakhsh <shawnn@chromium.org>
Date: Tue, 16 May 2017 17:46:48 +0200
Subject: platform/chrome: cros_ec_lpc: Add R/W helpers to LPC protocol
 variants

Call common functions for read / write to prepare support for future
LPC protocol variants which use different I/O ops than inb / outb.

Signed-off-by: Shawn Nematbakhsh <shawnn@chromium.org>
Signed-off-by: Thierry Escande <thierry.escande@collabora.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Benson Leung <bleung@chromium.org>
---
 drivers/platform/chrome/Makefile          |  3 +-
 drivers/platform/chrome/cros_ec_lpc.c     | 88 +++++++++++++------------------
 drivers/platform/chrome/cros_ec_lpc_reg.c | 64 ++++++++++++++++++++++
 include/linux/mfd/cros_ec_lpc_reg.h       | 47 +++++++++++++++++
 4 files changed, 151 insertions(+), 51 deletions(-)
 create mode 100644 drivers/platform/chrome/cros_ec_lpc_reg.c
 create mode 100644 include/linux/mfd/cros_ec_lpc_reg.h

(limited to 'include/linux')

diff --git a/drivers/platform/chrome/Makefile b/drivers/platform/chrome/Makefile
index 3870afeefcf4..61182fd8f597 100644
--- a/drivers/platform/chrome/Makefile
+++ b/drivers/platform/chrome/Makefile
@@ -5,6 +5,7 @@ cros_ec_devs-objs			:= cros_ec_dev.o cros_ec_sysfs.o \
 					   cros_ec_lightbar.o cros_ec_vbc.o \
 					   cros_ec_debugfs.o
 obj-$(CONFIG_CROS_EC_CHARDEV)		+= cros_ec_devs.o
-obj-$(CONFIG_CROS_EC_LPC)		+= cros_ec_lpc.o
+cros_ec_lpcs-objs			:= cros_ec_lpc.o cros_ec_lpc_reg.o
+obj-$(CONFIG_CROS_EC_LPC)		+= cros_ec_lpcs.o
 obj-$(CONFIG_CROS_EC_PROTO)		+= cros_ec_proto.o
 obj-$(CONFIG_CROS_KBD_LED_BACKLIGHT)	+= cros_kbd_led_backlight.o
diff --git a/drivers/platform/chrome/cros_ec_lpc.c b/drivers/platform/chrome/cros_ec_lpc.c
index f9a245465fd0..6a782a695eb6 100644
--- a/drivers/platform/chrome/cros_ec_lpc.c
+++ b/drivers/platform/chrome/cros_ec_lpc.c
@@ -26,19 +26,22 @@
 #include <linux/io.h>
 #include <linux/mfd/cros_ec.h>
 #include <linux/mfd/cros_ec_commands.h>
+#include <linux/mfd/cros_ec_lpc_reg.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/printk.h>
 
-#define DRV_NAME "cros_ec_lpc"
+#define DRV_NAME "cros_ec_lpcs"
 
 static int ec_response_timed_out(void)
 {
 	unsigned long one_second = jiffies + HZ;
+	u8 data;
 
 	usleep_range(200, 300);
 	do {
-		if (!(inb(EC_LPC_ADDR_HOST_CMD) & EC_LPC_STATUS_BUSY_MASK))
+		if (!(cros_ec_lpc_read_bytes(EC_LPC_ADDR_HOST_CMD, 1, &data) &
+		    EC_LPC_STATUS_BUSY_MASK))
 			return 0;
 		usleep_range(100, 200);
 	} while (time_before(jiffies, one_second));
@@ -51,21 +54,20 @@ static int cros_ec_pkt_xfer_lpc(struct cros_ec_device *ec,
 {
 	struct ec_host_request *request;
 	struct ec_host_response response;
-	u8 sum = 0;
-	int i;
+	u8 sum;
 	int ret = 0;
 	u8 *dout;
 
 	ret = cros_ec_prepare_tx(ec, msg);
 
 	/* Write buffer */
-	for (i = 0; i < ret; i++)
-		outb(ec->dout[i], EC_LPC_ADDR_HOST_PACKET + i);
+	cros_ec_lpc_write_bytes(EC_LPC_ADDR_HOST_PACKET, ret, ec->dout);
 
 	request = (struct ec_host_request *)ec->dout;
 
 	/* Here we go */
-	outb(EC_COMMAND_PROTOCOL_3, EC_LPC_ADDR_HOST_CMD);
+	sum = EC_COMMAND_PROTOCOL_3;
+	cros_ec_lpc_write_bytes(EC_LPC_ADDR_HOST_CMD, 1, &sum);
 
 	if (ec_response_timed_out()) {
 		dev_warn(ec->dev, "EC responsed timed out\n");
@@ -74,17 +76,15 @@ static int cros_ec_pkt_xfer_lpc(struct cros_ec_device *ec,
 	}
 
 	/* Check result */
-	msg->result = inb(EC_LPC_ADDR_HOST_DATA);
+	msg->result = cros_ec_lpc_read_bytes(EC_LPC_ADDR_HOST_DATA, 1, &sum);
 	ret = cros_ec_check_result(ec, msg);
 	if (ret)
 		goto done;
 
 	/* Read back response */
 	dout = (u8 *)&response;
-	for (i = 0; i < sizeof(response); i++) {
-		dout[i] = inb(EC_LPC_ADDR_HOST_PACKET + i);
-		sum += dout[i];
-	}
+	sum = cros_ec_lpc_read_bytes(EC_LPC_ADDR_HOST_PACKET, sizeof(response),
+				     dout);
 
 	msg->result = response.result;
 
@@ -97,11 +97,9 @@ static int cros_ec_pkt_xfer_lpc(struct cros_ec_device *ec,
 	}
 
 	/* Read response and process checksum */
-	for (i = 0; i < response.data_len; i++) {
-		msg->data[i] =
-			inb(EC_LPC_ADDR_HOST_PACKET + sizeof(response) + i);
-		sum += msg->data[i];
-	}
+	sum += cros_ec_lpc_read_bytes(EC_LPC_ADDR_HOST_PACKET +
+				      sizeof(response), response.data_len,
+				      msg->data);
 
 	if (sum) {
 		dev_err(ec->dev,
@@ -121,8 +119,7 @@ static int cros_ec_cmd_xfer_lpc(struct cros_ec_device *ec,
 				struct cros_ec_command *msg)
 {
 	struct ec_lpc_host_args args;
-	int csum;
-	int i;
+	u8 sum;
 	int ret = 0;
 
 	if (msg->outsize > EC_PROTO2_MAX_PARAM_SIZE ||
@@ -139,24 +136,20 @@ static int cros_ec_cmd_xfer_lpc(struct cros_ec_device *ec,
 	args.data_size = msg->outsize;
 
 	/* Initialize checksum */
-	csum = msg->command + args.flags +
-		args.command_version + args.data_size;
+	sum = msg->command + args.flags + args.command_version + args.data_size;
 
 	/* Copy data and update checksum */
-	for (i = 0; i < msg->outsize; i++) {
-		outb(msg->data[i], EC_LPC_ADDR_HOST_PARAM + i);
-		csum += msg->data[i];
-	}
+	sum += cros_ec_lpc_write_bytes(EC_LPC_ADDR_HOST_PARAM, msg->outsize,
+				       msg->data);
 
 	/* Finalize checksum and write args */
-	args.checksum = csum & 0xFF;
-	outb(args.flags, EC_LPC_ADDR_HOST_ARGS);
-	outb(args.command_version, EC_LPC_ADDR_HOST_ARGS + 1);
-	outb(args.data_size, EC_LPC_ADDR_HOST_ARGS + 2);
-	outb(args.checksum, EC_LPC_ADDR_HOST_ARGS + 3);
+	args.checksum = sum;
+	cros_ec_lpc_write_bytes(EC_LPC_ADDR_HOST_ARGS, sizeof(args),
+				(u8 *)&args);
 
 	/* Here we go */
-	outb(msg->command, EC_LPC_ADDR_HOST_CMD);
+	sum = msg->command;
+	cros_ec_lpc_write_bytes(EC_LPC_ADDR_HOST_CMD, 1, &sum);
 
 	if (ec_response_timed_out()) {
 		dev_warn(ec->dev, "EC responsed timed out\n");
@@ -165,16 +158,14 @@ static int cros_ec_cmd_xfer_lpc(struct cros_ec_device *ec,
 	}
 
 	/* Check result */
-	msg->result = inb(EC_LPC_ADDR_HOST_DATA);
+	msg->result = cros_ec_lpc_read_bytes(EC_LPC_ADDR_HOST_DATA, 1, &sum);
 	ret = cros_ec_check_result(ec, msg);
 	if (ret)
 		goto done;
 
 	/* Read back args */
-	args.flags = inb(EC_LPC_ADDR_HOST_ARGS);
-	args.command_version = inb(EC_LPC_ADDR_HOST_ARGS + 1);
-	args.data_size = inb(EC_LPC_ADDR_HOST_ARGS + 2);
-	args.checksum = inb(EC_LPC_ADDR_HOST_ARGS + 3);
+	cros_ec_lpc_read_bytes(EC_LPC_ADDR_HOST_ARGS, sizeof(args),
+			       (u8 *)&args);
 
 	if (args.data_size > msg->insize) {
 		dev_err(ec->dev,
@@ -185,20 +176,17 @@ static int cros_ec_cmd_xfer_lpc(struct cros_ec_device *ec,
 	}
 
 	/* Start calculating response checksum */
-	csum = msg->command + args.flags +
-		args.command_version + args.data_size;
+	sum = msg->command + args.flags + args.command_version + args.data_size;
 
 	/* Read response and update checksum */
-	for (i = 0; i < args.data_size; i++) {
-		msg->data[i] = inb(EC_LPC_ADDR_HOST_PARAM + i);
-		csum += msg->data[i];
-	}
+	sum += cros_ec_lpc_read_bytes(EC_LPC_ADDR_HOST_PARAM, args.data_size,
+				      msg->data);
 
 	/* Verify checksum */
-	if (args.checksum != (csum & 0xFF)) {
+	if (args.checksum != sum) {
 		dev_err(ec->dev,
 			"bad packet checksum, expected %02x, got %02x\n",
-			args.checksum, csum & 0xFF);
+			args.checksum, sum);
 		ret = -EBADMSG;
 		goto done;
 	}
@@ -222,14 +210,13 @@ static int cros_ec_lpc_readmem(struct cros_ec_device *ec, unsigned int offset,
 
 	/* fixed length */
 	if (bytes) {
-		for (; cnt < bytes; i++, s++, cnt++)
-			*s = inb(EC_LPC_ADDR_MEMMAP + i);
-		return cnt;
+		cros_ec_lpc_read_bytes(EC_LPC_ADDR_MEMMAP + offset, bytes, s);
+		return bytes;
 	}
 
 	/* string */
 	for (; i < EC_MEMMAP_SIZE; i++, s++) {
-		*s = inb(EC_LPC_ADDR_MEMMAP + i);
+		cros_ec_lpc_read_bytes(EC_LPC_ADDR_MEMMAP + i, 1, s);
 		cnt++;
 		if (!*s)
 			break;
@@ -242,6 +229,7 @@ static int cros_ec_lpc_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct cros_ec_device *ec_dev;
+	u8 buf[2];
 	int ret;
 
 	if (!devm_request_region(dev, EC_LPC_ADDR_MEMMAP, EC_MEMMAP_SIZE,
@@ -250,8 +238,8 @@ static int cros_ec_lpc_probe(struct platform_device *pdev)
 		return -EBUSY;
 	}
 
-	if ((inb(EC_LPC_ADDR_MEMMAP + EC_MEMMAP_ID) != 'E') ||
-	    (inb(EC_LPC_ADDR_MEMMAP + EC_MEMMAP_ID + 1) != 'C')) {
+	cros_ec_lpc_read_bytes(EC_LPC_ADDR_MEMMAP + EC_MEMMAP_ID, 2, buf);
+	if (buf[0] != 'E' || buf[1] != 'C') {
 		dev_err(dev, "EC ID not detected\n");
 		return -ENODEV;
 	}
diff --git a/drivers/platform/chrome/cros_ec_lpc_reg.c b/drivers/platform/chrome/cros_ec_lpc_reg.c
new file mode 100644
index 000000000000..03c97813171e
--- /dev/null
+++ b/drivers/platform/chrome/cros_ec_lpc_reg.c
@@ -0,0 +1,64 @@
+/*
+ * cros_ec_lpc_reg - LPC access to the Chrome OS Embedded Controller
+ *
+ * Copyright (C) 2016 Google, Inc
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * This driver uses the Chrome OS EC byte-level message-based protocol for
+ * communicating the keyboard state (which keys are pressed) from a keyboard EC
+ * to the AP over some bus (such as i2c, lpc, spi).  The EC does debouncing,
+ * but everything else (including deghosting) is done here.  The main
+ * motivation for this is to keep the EC firmware as simple as possible, since
+ * it cannot be easily upgraded and EC flash/IRAM space is relatively
+ * expensive.
+ */
+
+#include <linux/io.h>
+#include <linux/mfd/cros_ec.h>
+#include <linux/mfd/cros_ec_commands.h>
+
+static u8 lpc_read_bytes(unsigned int offset, unsigned int length, u8 *dest)
+{
+	int i;
+	int sum = 0;
+
+	for (i = 0; i < length; ++i) {
+		dest[i] = inb(offset + i);
+		sum += dest[i];
+	}
+
+	/* Return checksum of all bytes read */
+	return sum;
+}
+
+static u8 lpc_write_bytes(unsigned int offset, unsigned int length, u8 *msg)
+{
+	int i;
+	int sum = 0;
+
+	for (i = 0; i < length; ++i) {
+		outb(msg[i], offset + i);
+		sum += msg[i];
+	}
+
+	/* Return checksum of all bytes written */
+	return sum;
+}
+
+u8 cros_ec_lpc_read_bytes(unsigned int offset, unsigned int length, u8 *dest)
+{
+	return lpc_read_bytes(offset, length, dest);
+}
+
+u8 cros_ec_lpc_write_bytes(unsigned int offset, unsigned int length, u8 *msg)
+{
+	return lpc_write_bytes(offset, length, msg);
+}
diff --git a/include/linux/mfd/cros_ec_lpc_reg.h b/include/linux/mfd/cros_ec_lpc_reg.h
new file mode 100644
index 000000000000..4089bd5c8313
--- /dev/null
+++ b/include/linux/mfd/cros_ec_lpc_reg.h
@@ -0,0 +1,47 @@
+/*
+ * cros_ec_lpc_reg - LPC access to the Chrome OS Embedded Controller
+ *
+ * Copyright (C) 2016 Google, Inc
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * This driver uses the Chrome OS EC byte-level message-based protocol for
+ * communicating the keyboard state (which keys are pressed) from a keyboard EC
+ * to the AP over some bus (such as i2c, lpc, spi).  The EC does debouncing,
+ * but everything else (including deghosting) is done here.  The main
+ * motivation for this is to keep the EC firmware as simple as possible, since
+ * it cannot be easily upgraded and EC flash/IRAM space is relatively
+ * expensive.
+ */
+
+#ifndef __LINUX_MFD_CROS_EC_REG_H
+#define __LINUX_MFD_CROS_EC_REG_H
+
+/**
+ * cros_ec_lpc_read_bytes - Read bytes from a given LPC-mapped address.
+ * Returns 8-bit checksum of all bytes read.
+ *
+ * @offset: Base read address
+ * @length: Number of bytes to read
+ * @dest: Destination buffer
+ */
+u8 cros_ec_lpc_read_bytes(unsigned int offset, unsigned int length, u8 *dest);
+
+/**
+ * cros_ec_lpc_write_bytes - Write bytes to a given LPC-mapped address.
+ * Returns 8-bit checksum of all bytes written.
+ *
+ * @offset: Base write address
+ * @length: Number of bytes to write
+ * @msg: Write data buffer
+ */
+u8 cros_ec_lpc_write_bytes(unsigned int offset, unsigned int length, u8 *msg);
+
+#endif /* __LINUX_MFD_CROS_EC_REG_H */
-- 
cgit v1.2.3


From 8d4a3dc423a2695be51ac864eefb8ba7688b1240 Mon Sep 17 00:00:00 2001
From: Shawn Nematbakhsh <shawnn@chromium.org>
Date: Tue, 16 May 2017 17:46:48 +0200
Subject: platform/chrome: cros_ec_lpc: Add support for mec1322 EC

This adds support for the ChromeOS LPC Microchip Embedded Controller
(mec1322) variant.

mec1322 accesses I/O region [800h, 9ffh] through embedded memory
interface (EMI) rather than LPC.

Signed-off-by: Shawn Nematbakhsh <shawnn@chromium.org>
Signed-off-by: Thierry Escande <thierry.escande@collabora.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Benson Leung <bleung@chromium.org>
---
 drivers/platform/chrome/Kconfig           |  12 +++
 drivers/platform/chrome/Makefile          |   1 +
 drivers/platform/chrome/cros_ec_lpc.c     |   5 ++
 drivers/platform/chrome/cros_ec_lpc_mec.c | 140 ++++++++++++++++++++++++++++++
 drivers/platform/chrome/cros_ec_lpc_reg.c |  69 +++++++++++++++
 include/linux/mfd/cros_ec_lpc_mec.h       |  90 +++++++++++++++++++
 include/linux/mfd/cros_ec_lpc_reg.h       |  14 +++
 7 files changed, 331 insertions(+)
 create mode 100644 drivers/platform/chrome/cros_ec_lpc_mec.c
 create mode 100644 include/linux/mfd/cros_ec_lpc_mec.h

(limited to 'include/linux')

diff --git a/drivers/platform/chrome/Kconfig b/drivers/platform/chrome/Kconfig
index 76bdae1a93bb..6d80fb5076b6 100644
--- a/drivers/platform/chrome/Kconfig
+++ b/drivers/platform/chrome/Kconfig
@@ -59,6 +59,18 @@ config CROS_EC_LPC
           To compile this driver as a module, choose M here: the
           module will be called cros_ec_lpc.
 
+config CROS_EC_LPC_MEC
+	bool "ChromeOS Embedded Controller LPC Microchip EC (MEC) variant"
+	depends on CROS_EC_LPC
+	default n
+	help
+	  If you say Y here, a variant LPC protocol for the Microchip EC
+	  will be used. Note that this variant is not backward compatible
+	  with non-Microchip ECs.
+
+	  If you have a ChromeOS Embedded Controller Microchip EC variant
+	  choose Y here.
+
 config CROS_EC_PROTO
         bool
         help
diff --git a/drivers/platform/chrome/Makefile b/drivers/platform/chrome/Makefile
index 61182fd8f597..66c345ca35fc 100644
--- a/drivers/platform/chrome/Makefile
+++ b/drivers/platform/chrome/Makefile
@@ -6,6 +6,7 @@ cros_ec_devs-objs			:= cros_ec_dev.o cros_ec_sysfs.o \
 					   cros_ec_debugfs.o
 obj-$(CONFIG_CROS_EC_CHARDEV)		+= cros_ec_devs.o
 cros_ec_lpcs-objs			:= cros_ec_lpc.o cros_ec_lpc_reg.o
+cros_ec_lpcs-$(CONFIG_CROS_EC_LPC_MEC)	+= cros_ec_lpc_mec.o
 obj-$(CONFIG_CROS_EC_LPC)		+= cros_ec_lpcs.o
 obj-$(CONFIG_CROS_EC_PROTO)		+= cros_ec_proto.o
 obj-$(CONFIG_CROS_KBD_LED_BACKLIGHT)	+= cros_kbd_led_backlight.o
diff --git a/drivers/platform/chrome/cros_ec_lpc.c b/drivers/platform/chrome/cros_ec_lpc.c
index 6a782a695eb6..bc2dc6210d7b 100644
--- a/drivers/platform/chrome/cros_ec_lpc.c
+++ b/drivers/platform/chrome/cros_ec_lpc.c
@@ -346,10 +346,13 @@ static int __init cros_ec_lpc_init(void)
 		return -ENODEV;
 	}
 
+	cros_ec_lpc_reg_init();
+
 	/* Register the driver */
 	ret = platform_driver_register(&cros_ec_lpc_driver);
 	if (ret) {
 		pr_err(DRV_NAME ": can't register driver: %d\n", ret);
+		cros_ec_lpc_reg_destroy();
 		return ret;
 	}
 
@@ -358,6 +361,7 @@ static int __init cros_ec_lpc_init(void)
 	if (ret) {
 		pr_err(DRV_NAME ": can't register device: %d\n", ret);
 		platform_driver_unregister(&cros_ec_lpc_driver);
+		cros_ec_lpc_reg_destroy();
 		return ret;
 	}
 
@@ -368,6 +372,7 @@ static void __exit cros_ec_lpc_exit(void)
 {
 	platform_device_unregister(&cros_ec_lpc_device);
 	platform_driver_unregister(&cros_ec_lpc_driver);
+	cros_ec_lpc_reg_destroy();
 }
 
 module_init(cros_ec_lpc_init);
diff --git a/drivers/platform/chrome/cros_ec_lpc_mec.c b/drivers/platform/chrome/cros_ec_lpc_mec.c
new file mode 100644
index 000000000000..2eda2c2fc210
--- /dev/null
+++ b/drivers/platform/chrome/cros_ec_lpc_mec.c
@@ -0,0 +1,140 @@
+/*
+ * cros_ec_lpc_mec - LPC variant I/O for Microchip EC
+ *
+ * Copyright (C) 2016 Google, Inc
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * This driver uses the Chrome OS EC byte-level message-based protocol for
+ * communicating the keyboard state (which keys are pressed) from a keyboard EC
+ * to the AP over some bus (such as i2c, lpc, spi).  The EC does debouncing,
+ * but everything else (including deghosting) is done here.  The main
+ * motivation for this is to keep the EC firmware as simple as possible, since
+ * it cannot be easily upgraded and EC flash/IRAM space is relatively
+ * expensive.
+ */
+
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/mfd/cros_ec_commands.h>
+#include <linux/mfd/cros_ec_lpc_mec.h>
+#include <linux/mutex.h>
+#include <linux/types.h>
+
+/*
+ * This mutex must be held while accessing the EMI unit. We can't rely on the
+ * EC mutex because memmap data may be accessed without it being held.
+ */
+static struct mutex io_mutex;
+
+/*
+ * cros_ec_lpc_mec_emi_write_address
+ *
+ * Initialize EMI read / write at a given address.
+ *
+ * @addr:        Starting read / write address
+ * @access_type: Type of access, typically 32-bit auto-increment
+ */
+static void cros_ec_lpc_mec_emi_write_address(u16 addr,
+			enum cros_ec_lpc_mec_emi_access_mode access_type)
+{
+	/* Address relative to start of EMI range */
+	addr -= MEC_EMI_RANGE_START;
+	outb((addr & 0xfc) | access_type, MEC_EMI_EC_ADDRESS_B0);
+	outb((addr >> 8) & 0x7f, MEC_EMI_EC_ADDRESS_B1);
+}
+
+/*
+ * cros_ec_lpc_io_bytes_mec - Read / write bytes to MEC EMI port
+ *
+ * @io_type: MEC_IO_READ or MEC_IO_WRITE, depending on request
+ * @offset:  Base read / write address
+ * @length:  Number of bytes to read / write
+ * @buf:     Destination / source buffer
+ *
+ * @return 8-bit checksum of all bytes read / written
+ */
+u8 cros_ec_lpc_io_bytes_mec(enum cros_ec_lpc_mec_io_type io_type,
+			    unsigned int offset, unsigned int length,
+			    u8 *buf)
+{
+	int i = 0;
+	int io_addr;
+	u8 sum = 0;
+	enum cros_ec_lpc_mec_emi_access_mode access, new_access;
+
+	/*
+	 * Long access cannot be used on misaligned data since reading B0 loads
+	 * the data register and writing B3 flushes.
+	 */
+	if (offset & 0x3 || length < 4)
+		access = ACCESS_TYPE_BYTE;
+	else
+		access = ACCESS_TYPE_LONG_AUTO_INCREMENT;
+
+	mutex_lock(&io_mutex);
+
+	/* Initialize I/O at desired address */
+	cros_ec_lpc_mec_emi_write_address(offset, access);
+
+	/* Skip bytes in case of misaligned offset */
+	io_addr = MEC_EMI_EC_DATA_B0 + (offset & 0x3);
+	while (i < length) {
+		while (io_addr <= MEC_EMI_EC_DATA_B3) {
+			if (io_type == MEC_IO_READ)
+				buf[i] = inb(io_addr++);
+			else
+				outb(buf[i], io_addr++);
+
+			sum += buf[i++];
+			offset++;
+
+			/* Extra bounds check in case of misaligned length */
+			if (i == length)
+				goto done;
+		}
+
+		/*
+		 * Use long auto-increment access except for misaligned write,
+		 * since writing B3 triggers the flush.
+		 */
+		if (length - i < 4 && io_type == MEC_IO_WRITE)
+			new_access = ACCESS_TYPE_BYTE;
+		else
+			new_access = ACCESS_TYPE_LONG_AUTO_INCREMENT;
+
+		if (new_access != access ||
+		    access != ACCESS_TYPE_LONG_AUTO_INCREMENT) {
+			access = new_access;
+			cros_ec_lpc_mec_emi_write_address(offset, access);
+		}
+
+		/* Access [B0, B3] on each loop pass */
+		io_addr = MEC_EMI_EC_DATA_B0;
+	}
+
+done:
+	mutex_unlock(&io_mutex);
+
+	return sum;
+}
+EXPORT_SYMBOL(cros_ec_lpc_io_bytes_mec);
+
+void cros_ec_lpc_mec_init(void)
+{
+	mutex_init(&io_mutex);
+}
+EXPORT_SYMBOL(cros_ec_lpc_mec_init);
+
+void cros_ec_lpc_mec_destroy(void)
+{
+	mutex_destroy(&io_mutex);
+}
+EXPORT_SYMBOL(cros_ec_lpc_mec_destroy);
diff --git a/drivers/platform/chrome/cros_ec_lpc_reg.c b/drivers/platform/chrome/cros_ec_lpc_reg.c
index 03c97813171e..dcc7a3e30604 100644
--- a/drivers/platform/chrome/cros_ec_lpc_reg.c
+++ b/drivers/platform/chrome/cros_ec_lpc_reg.c
@@ -24,6 +24,7 @@
 #include <linux/io.h>
 #include <linux/mfd/cros_ec.h>
 #include <linux/mfd/cros_ec_commands.h>
+#include <linux/mfd/cros_ec_lpc_mec.h>
 
 static u8 lpc_read_bytes(unsigned int offset, unsigned int length, u8 *dest)
 {
@@ -53,12 +54,80 @@ static u8 lpc_write_bytes(unsigned int offset, unsigned int length, u8 *msg)
 	return sum;
 }
 
+#ifdef CONFIG_CROS_EC_LPC_MEC
+
 u8 cros_ec_lpc_read_bytes(unsigned int offset, unsigned int length, u8 *dest)
 {
+	if (length == 0)
+		return 0;
+
+	/* Access desired range through EMI interface */
+	if (offset >= MEC_EMI_RANGE_START && offset <= MEC_EMI_RANGE_END) {
+		/* Ensure we don't straddle EMI region */
+		if (WARN_ON(offset + length - 1 > MEC_EMI_RANGE_END))
+			return 0;
+
+		return cros_ec_lpc_io_bytes_mec(MEC_IO_READ, offset, length,
+						dest);
+	}
+
+	if (WARN_ON(offset + length > MEC_EMI_RANGE_START &&
+		    offset < MEC_EMI_RANGE_START))
+		return 0;
+
 	return lpc_read_bytes(offset, length, dest);
 }
 
 u8 cros_ec_lpc_write_bytes(unsigned int offset, unsigned int length, u8 *msg)
 {
+	if (length == 0)
+		return 0;
+
+	/* Access desired range through EMI interface */
+	if (offset >= MEC_EMI_RANGE_START && offset <= MEC_EMI_RANGE_END) {
+		/* Ensure we don't straddle EMI region */
+		if (WARN_ON(offset + length - 1 > MEC_EMI_RANGE_END))
+			return 0;
+
+		return cros_ec_lpc_io_bytes_mec(MEC_IO_WRITE, offset, length,
+						msg);
+	}
+
+	if (WARN_ON(offset + length > MEC_EMI_RANGE_START &&
+		    offset < MEC_EMI_RANGE_START))
+		return 0;
+
 	return lpc_write_bytes(offset, length, msg);
 }
+
+void cros_ec_lpc_reg_init(void)
+{
+	cros_ec_lpc_mec_init();
+}
+
+void cros_ec_lpc_reg_destroy(void)
+{
+	cros_ec_lpc_mec_destroy();
+}
+
+#else /* CONFIG_CROS_EC_LPC_MEC */
+
+u8 cros_ec_lpc_read_bytes(unsigned int offset, unsigned int length, u8 *dest)
+{
+	return lpc_read_bytes(offset, length, dest);
+}
+
+u8 cros_ec_lpc_write_bytes(unsigned int offset, unsigned int length, u8 *msg)
+{
+	return lpc_write_bytes(offset, length, msg);
+}
+
+void cros_ec_lpc_reg_init(void)
+{
+}
+
+void cros_ec_lpc_reg_destroy(void)
+{
+}
+
+#endif /* CONFIG_CROS_EC_LPC_MEC */
diff --git a/include/linux/mfd/cros_ec_lpc_mec.h b/include/linux/mfd/cros_ec_lpc_mec.h
new file mode 100644
index 000000000000..176496ddc66c
--- /dev/null
+++ b/include/linux/mfd/cros_ec_lpc_mec.h
@@ -0,0 +1,90 @@
+/*
+ * cros_ec_lpc_mec - LPC variant I/O for Microchip EC
+ *
+ * Copyright (C) 2016 Google, Inc
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * This driver uses the Chrome OS EC byte-level message-based protocol for
+ * communicating the keyboard state (which keys are pressed) from a keyboard EC
+ * to the AP over some bus (such as i2c, lpc, spi).  The EC does debouncing,
+ * but everything else (including deghosting) is done here.  The main
+ * motivation for this is to keep the EC firmware as simple as possible, since
+ * it cannot be easily upgraded and EC flash/IRAM space is relatively
+ * expensive.
+ */
+
+#ifndef __LINUX_MFD_CROS_EC_MEC_H
+#define __LINUX_MFD_CROS_EC_MEC_H
+
+#include <linux/mfd/cros_ec_commands.h>
+
+enum cros_ec_lpc_mec_emi_access_mode {
+	/* 8-bit access */
+	ACCESS_TYPE_BYTE = 0x0,
+	/* 16-bit access */
+	ACCESS_TYPE_WORD = 0x1,
+	/* 32-bit access */
+	ACCESS_TYPE_LONG = 0x2,
+	/*
+	 * 32-bit access, read or write of MEC_EMI_EC_DATA_B3 causes the
+	 * EC data register to be incremented.
+	 */
+	ACCESS_TYPE_LONG_AUTO_INCREMENT = 0x3,
+};
+
+enum cros_ec_lpc_mec_io_type {
+	MEC_IO_READ,
+	MEC_IO_WRITE,
+};
+
+/* Access IO ranges 0x800 thru 0x9ff using EMI interface instead of LPC */
+#define MEC_EMI_RANGE_START EC_HOST_CMD_REGION0
+#define MEC_EMI_RANGE_END   (EC_LPC_ADDR_MEMMAP + EC_MEMMAP_SIZE)
+
+/* EMI registers are relative to base */
+#define MEC_EMI_BASE 0x800
+#define MEC_EMI_HOST_TO_EC (MEC_EMI_BASE + 0)
+#define MEC_EMI_EC_TO_HOST (MEC_EMI_BASE + 1)
+#define MEC_EMI_EC_ADDRESS_B0 (MEC_EMI_BASE + 2)
+#define MEC_EMI_EC_ADDRESS_B1 (MEC_EMI_BASE + 3)
+#define MEC_EMI_EC_DATA_B0 (MEC_EMI_BASE + 4)
+#define MEC_EMI_EC_DATA_B1 (MEC_EMI_BASE + 5)
+#define MEC_EMI_EC_DATA_B2 (MEC_EMI_BASE + 6)
+#define MEC_EMI_EC_DATA_B3 (MEC_EMI_BASE + 7)
+
+/*
+ * cros_ec_lpc_mec_init
+ *
+ * Initialize MEC I/O.
+ */
+void cros_ec_lpc_mec_init(void);
+
+/*
+ * cros_ec_lpc_mec_destroy
+ *
+ * Cleanup MEC I/O.
+ */
+void cros_ec_lpc_mec_destroy(void);
+
+/**
+ * cros_ec_lpc_io_bytes_mec - Read / write bytes to MEC EMI port
+ *
+ * @io_type: MEC_IO_READ or MEC_IO_WRITE, depending on request
+ * @offset:  Base read / write address
+ * @length:  Number of bytes to read / write
+ * @buf:     Destination / source buffer
+ *
+ * @return 8-bit checksum of all bytes read / written
+ */
+u8 cros_ec_lpc_io_bytes_mec(enum cros_ec_lpc_mec_io_type io_type,
+			    unsigned int offset, unsigned int length, u8 *buf);
+
+#endif /* __LINUX_MFD_CROS_EC_MEC_H */
diff --git a/include/linux/mfd/cros_ec_lpc_reg.h b/include/linux/mfd/cros_ec_lpc_reg.h
index 4089bd5c8313..5560bef63c2b 100644
--- a/include/linux/mfd/cros_ec_lpc_reg.h
+++ b/include/linux/mfd/cros_ec_lpc_reg.h
@@ -44,4 +44,18 @@ u8 cros_ec_lpc_read_bytes(unsigned int offset, unsigned int length, u8 *dest);
  */
 u8 cros_ec_lpc_write_bytes(unsigned int offset, unsigned int length, u8 *msg);
 
+/**
+ * cros_ec_lpc_reg_init
+ *
+ * Initialize register I/O.
+ */
+void cros_ec_lpc_reg_init(void);
+
+/**
+ * cros_ec_lpc_reg_destroy
+ *
+ * Cleanup reg I/O.
+ */
+void cros_ec_lpc_reg_destroy(void);
+
 #endif /* __LINUX_MFD_CROS_EC_REG_H */
-- 
cgit v1.2.3


From be3ebebf4377fe924f0419f78fc82cf01a31e692 Mon Sep 17 00:00:00 2001
From: Eric Caruso <ejcaruso@chromium.org>
Date: Tue, 16 May 2017 17:46:48 +0200
Subject: platform/chrome: cros_ec_lightbar - Add lightbar program feature to
 sysfs

Add a program feature so we can upload and run programs for lightbar
sequences. We should be able to use this to shift sequences out of the
EC and save space there.

  $ cat <suitable program bin> > /sys/devices/.../cros_ec/program
  $ echo program > /sys/devices/.../cros_ec/sequence

Signed-off-by: Eric Caruso <ejcaruso@chromium.org>
Signed-off-by: Guenter Roeck <groeck@chromium.org>
Signed-off-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Benson Leung <bleung@chromium.org>
---
 drivers/platform/chrome/cros_ec_lightbar.c | 69 +++++++++++++++++++++++++++++-
 include/linux/mfd/cros_ec_commands.h       | 12 +++++-
 2 files changed, 79 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/platform/chrome/cros_ec_lightbar.c b/drivers/platform/chrome/cros_ec_lightbar.c
index 8df3d447cacf..26675059707e 100644
--- a/drivers/platform/chrome/cros_ec_lightbar.c
+++ b/drivers/platform/chrome/cros_ec_lightbar.c
@@ -295,7 +295,8 @@ exit:
 
 static char const *seqname[] = {
 	"ERROR", "S5", "S3", "S0", "S5S3", "S3S0",
-	"S0S3", "S3S5", "STOP", "RUN", "PULSE", "TEST", "KONAMI",
+	"S0S3", "S3S5", "STOP", "RUN", "KONAMI",
+	"TAP", "PROGRAM",
 };
 
 static ssize_t sequence_show(struct device *dev,
@@ -390,6 +391,69 @@ exit:
 	return ret;
 }
 
+static ssize_t program_store(struct device *dev, struct device_attribute *attr,
+			     const char *buf, size_t count)
+{
+	int extra_bytes, max_size, ret;
+	struct ec_params_lightbar *param;
+	struct cros_ec_command *msg;
+	struct cros_ec_dev *ec = container_of(dev, struct cros_ec_dev,
+					      class_dev);
+
+	/*
+	 * We might need to reject the program for size reasons. The EC
+	 * enforces a maximum program size, but we also don't want to try
+	 * and send a program that is too big for the protocol. In order
+	 * to ensure the latter, we also need to ensure we have extra bytes
+	 * to represent the rest of the packet.
+	 */
+	extra_bytes = sizeof(*param) - sizeof(param->set_program.data);
+	max_size = min(EC_LB_PROG_LEN, ec->ec_dev->max_request - extra_bytes);
+	if (count > max_size) {
+		dev_err(dev, "Program is %u bytes, too long to send (max: %u)",
+			(unsigned int)count, max_size);
+
+		return -EINVAL;
+	}
+
+	msg = alloc_lightbar_cmd_msg(ec);
+	if (!msg)
+		return -ENOMEM;
+
+	ret = lb_throttle();
+	if (ret)
+		goto exit;
+
+	dev_info(dev, "Copying %zu byte program to EC", count);
+
+	param = (struct ec_params_lightbar *)msg->data;
+	param->cmd = LIGHTBAR_CMD_SET_PROGRAM;
+
+	param->set_program.size = count;
+	memcpy(param->set_program.data, buf, count);
+
+	/*
+	 * We need to set the message size manually or else it will use
+	 * EC_LB_PROG_LEN. This might be too long, and the program
+	 * is unlikely to use all of the space.
+	 */
+	msg->outsize = count + extra_bytes;
+
+	ret = cros_ec_cmd_xfer(ec->ec_dev, msg);
+	if (ret < 0)
+		goto exit;
+	if (msg->result != EC_RES_SUCCESS) {
+		ret = -EINVAL;
+		goto exit;
+	}
+
+	ret = count;
+exit:
+	kfree(msg);
+
+	return ret;
+}
+
 /* Module initialization */
 
 static DEVICE_ATTR_RW(interval_msec);
@@ -397,12 +461,15 @@ static DEVICE_ATTR_RO(version);
 static DEVICE_ATTR_WO(brightness);
 static DEVICE_ATTR_WO(led_rgb);
 static DEVICE_ATTR_RW(sequence);
+static DEVICE_ATTR_WO(program);
+
 static struct attribute *__lb_cmds_attrs[] = {
 	&dev_attr_interval_msec.attr,
 	&dev_attr_version.attr,
 	&dev_attr_brightness.attr,
 	&dev_attr_led_rgb.attr,
 	&dev_attr_sequence.attr,
+	&dev_attr_program.attr,
 	NULL,
 };
 
diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h
index 1b19e424e1cf..dbea5802e83b 100644
--- a/include/linux/mfd/cros_ec_commands.h
+++ b/include/linux/mfd/cros_ec_commands.h
@@ -1162,6 +1162,13 @@ struct lightbar_params_v1 {
 	struct rgb_s color[8];			/* 0-3 are Google colors */
 } __packed;
 
+/* Lightbar program */
+#define EC_LB_PROG_LEN 192
+struct lightbar_program {
+	uint8_t size;
+	uint8_t data[EC_LB_PROG_LEN];
+};
+
 struct ec_params_lightbar {
 	uint8_t cmd;		      /* Command (see enum lightbar_command) */
 	union {
@@ -1188,6 +1195,7 @@ struct ec_params_lightbar {
 
 		struct lightbar_params_v0 set_params_v0;
 		struct lightbar_params_v1 set_params_v1;
+		struct lightbar_program set_program;
 	};
 } __packed;
 
@@ -1220,7 +1228,8 @@ struct ec_response_lightbar {
 		struct {
 			/* no return params */
 		} off, on, init, set_brightness, seq, reg, set_rgb,
-			demo, set_params_v0, set_params_v1;
+			demo, set_params_v0, set_params_v1,
+			set_program;
 	};
 } __packed;
 
@@ -1244,6 +1253,7 @@ enum lightbar_command {
 	LIGHTBAR_CMD_GET_DEMO = 15,
 	LIGHTBAR_CMD_GET_PARAMS_V1 = 16,
 	LIGHTBAR_CMD_SET_PARAMS_V1 = 17,
+	LIGHTBAR_CMD_SET_PROGRAM = 18,
 	LIGHTBAR_NUM_CMDS
 };
 
-- 
cgit v1.2.3


From 405c84308c4335ee7cb58b9304b77b85e61f7129 Mon Sep 17 00:00:00 2001
From: Eric Caruso <ejcaruso@chromium.org>
Date: Tue, 16 May 2017 17:46:48 +0200
Subject: platform/chrome: cros_ec_lightbar - Control of suspend/resume
 lightbar sequence

Don't let EC control suspend/resume sequence. If the EC controls the
lightbar and sets the sequence when it notices the chipset transitioning
between states, we can't make exceptions for cases where we don't want
to activate the lightbar. Instead, let's move the suspend/resume
notifications into the kernel so we can selectively play the sequences.

Signed-off-by: Eric Caruso <ejcaruso@chromium.org>
Signed-off-by: Guenter Roeck <groeck@chromium.org>
Signed-off-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Benson Leung <bleung@chromium.org>
---
 drivers/platform/chrome/cros_ec_dev.c      | 37 +++++++++++++
 drivers/platform/chrome/cros_ec_dev.h      |  6 +++
 drivers/platform/chrome/cros_ec_lightbar.c | 85 ++++++++++++++++++++++++++++--
 include/linux/mfd/cros_ec_commands.h       | 11 +++-
 4 files changed, 134 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/platform/chrome/cros_ec_dev.c b/drivers/platform/chrome/cros_ec_dev.c
index 20ce1c23fb5c..7c2622319211 100644
--- a/drivers/platform/chrome/cros_ec_dev.c
+++ b/drivers/platform/chrome/cros_ec_dev.c
@@ -21,6 +21,7 @@
 #include <linux/mfd/core.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
+#include <linux/pm.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
 
@@ -435,6 +436,10 @@ static int ec_device_probe(struct platform_device *pdev)
 	if (cros_ec_check_features(ec, EC_FEATURE_MOTION_SENSE))
 		cros_ec_sensors_register(ec);
 
+	/* Take control of the lightbar from the EC. */
+	if (ec_has_lightbar(ec))
+		lb_manual_suspend_ctrl(ec, 1);
+
 	return 0;
 
 failed:
@@ -446,6 +451,10 @@ static int ec_device_remove(struct platform_device *pdev)
 {
 	struct cros_ec_dev *ec = dev_get_drvdata(&pdev->dev);
 
+	/* Let the EC take over the lightbar again. */
+	if (ec_has_lightbar(ec))
+		lb_manual_suspend_ctrl(ec, 0);
+
 	cros_ec_debugfs_remove(ec);
 
 	cdev_del(&ec->cdev);
@@ -459,9 +468,37 @@ static const struct platform_device_id cros_ec_id[] = {
 };
 MODULE_DEVICE_TABLE(platform, cros_ec_id);
 
+static int ec_device_suspend(struct device *dev)
+{
+	struct cros_ec_dev *ec = dev_get_drvdata(dev);
+
+	if (ec_has_lightbar(ec))
+		lb_suspend(ec);
+
+	return 0;
+}
+
+static int ec_device_resume(struct device *dev)
+{
+	struct cros_ec_dev *ec = dev_get_drvdata(dev);
+
+	if (ec_has_lightbar(ec))
+		lb_resume(ec);
+
+	return 0;
+}
+
+static const struct dev_pm_ops cros_ec_dev_pm_ops = {
+#ifdef CONFIG_PM_SLEEP
+	.suspend = ec_device_suspend,
+	.resume = ec_device_resume,
+#endif
+};
+
 static struct platform_driver cros_ec_dev_driver = {
 	.driver = {
 		.name = "cros-ec-ctl",
+		.pm = &cros_ec_dev_pm_ops,
 	},
 	.probe = ec_device_probe,
 	.remove = ec_device_remove,
diff --git a/drivers/platform/chrome/cros_ec_dev.h b/drivers/platform/chrome/cros_ec_dev.h
index bfd2c84c3571..45e9453608c5 100644
--- a/drivers/platform/chrome/cros_ec_dev.h
+++ b/drivers/platform/chrome/cros_ec_dev.h
@@ -43,4 +43,10 @@ struct cros_ec_readmem {
 #define CROS_EC_DEV_IOCXCMD   _IOWR(CROS_EC_DEV_IOC, 0, struct cros_ec_command)
 #define CROS_EC_DEV_IOCRDMEM  _IOWR(CROS_EC_DEV_IOC, 1, struct cros_ec_readmem)
 
+/* Lightbar utilities */
+extern bool ec_has_lightbar(struct cros_ec_dev *ec);
+extern int lb_manual_suspend_ctrl(struct cros_ec_dev *ec, uint8_t enable);
+extern int lb_suspend(struct cros_ec_dev *ec);
+extern int lb_resume(struct cros_ec_dev *ec);
+
 #endif /* _CROS_EC_DEV_H_ */
diff --git a/drivers/platform/chrome/cros_ec_lightbar.c b/drivers/platform/chrome/cros_ec_lightbar.c
index 26675059707e..4df379dc4bb9 100644
--- a/drivers/platform/chrome/cros_ec_lightbar.c
+++ b/drivers/platform/chrome/cros_ec_lightbar.c
@@ -341,6 +341,80 @@ exit:
 	return ret;
 }
 
+static int lb_send_empty_cmd(struct cros_ec_dev *ec, uint8_t cmd)
+{
+	struct ec_params_lightbar *param;
+	struct cros_ec_command *msg;
+	int ret;
+
+	msg = alloc_lightbar_cmd_msg(ec);
+	if (!msg)
+		return -ENOMEM;
+
+	param = (struct ec_params_lightbar *)msg->data;
+	param->cmd = cmd;
+
+	ret = lb_throttle();
+	if (ret)
+		goto error;
+
+	ret = cros_ec_cmd_xfer(ec->ec_dev, msg);
+	if (ret < 0)
+		goto error;
+	if (msg->result != EC_RES_SUCCESS) {
+		ret = -EINVAL;
+		goto error;
+	}
+	ret = 0;
+error:
+	kfree(msg);
+
+	return ret;
+}
+
+int lb_manual_suspend_ctrl(struct cros_ec_dev *ec, uint8_t enable)
+{
+	struct ec_params_lightbar *param;
+	struct cros_ec_command *msg;
+	int ret;
+
+	msg = alloc_lightbar_cmd_msg(ec);
+	if (!msg)
+		return -ENOMEM;
+
+	param = (struct ec_params_lightbar *)msg->data;
+
+	param->cmd = LIGHTBAR_CMD_MANUAL_SUSPEND_CTRL;
+	param->manual_suspend_ctrl.enable = enable;
+
+	ret = lb_throttle();
+	if (ret)
+		goto error;
+
+	ret = cros_ec_cmd_xfer(ec->ec_dev, msg);
+	if (ret < 0)
+		goto error;
+	if (msg->result != EC_RES_SUCCESS) {
+		ret = -EINVAL;
+		goto error;
+	}
+	ret = 0;
+error:
+	kfree(msg);
+
+	return ret;
+}
+
+int lb_suspend(struct cros_ec_dev *ec)
+{
+	return lb_send_empty_cmd(ec, LIGHTBAR_CMD_SUSPEND);
+}
+
+int lb_resume(struct cros_ec_dev *ec)
+{
+	return lb_send_empty_cmd(ec, LIGHTBAR_CMD_RESUME);
+}
+
 static ssize_t sequence_store(struct device *dev, struct device_attribute *attr,
 			      const char *buf, size_t count)
 {
@@ -473,6 +547,11 @@ static struct attribute *__lb_cmds_attrs[] = {
 	NULL,
 };
 
+bool ec_has_lightbar(struct cros_ec_dev *ec)
+{
+	return !!get_lightbar_version(ec, NULL, NULL);
+}
+
 static umode_t cros_ec_lightbar_attrs_are_visible(struct kobject *kobj,
 						  struct attribute *a, int n)
 {
@@ -489,10 +568,10 @@ static umode_t cros_ec_lightbar_attrs_are_visible(struct kobject *kobj,
 		return 0;
 
 	/* Only instantiate this stuff if the EC has a lightbar */
-	if (get_lightbar_version(ec, NULL, NULL))
+	if (ec_has_lightbar(ec))
 		return a->mode;
-	else
-		return 0;
+
+	return 0;
 }
 
 struct attribute_group cros_ec_lightbar_attr_group = {
diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h
index dbea5802e83b..190c8f4afa02 100644
--- a/include/linux/mfd/cros_ec_commands.h
+++ b/include/linux/mfd/cros_ec_commands.h
@@ -1175,7 +1175,7 @@ struct ec_params_lightbar {
 		struct {
 			/* no args */
 		} dump, off, on, init, get_seq, get_params_v0, get_params_v1,
-			version, get_brightness, get_demo;
+			version, get_brightness, get_demo, suspend, resume;
 
 		struct {
 			uint8_t num;
@@ -1193,6 +1193,10 @@ struct ec_params_lightbar {
 			uint8_t led;
 		} get_rgb;
 
+		struct {
+			uint8_t enable;
+		} manual_suspend_ctrl;
+
 		struct lightbar_params_v0 set_params_v0;
 		struct lightbar_params_v1 set_params_v1;
 		struct lightbar_program set_program;
@@ -1229,7 +1233,7 @@ struct ec_response_lightbar {
 			/* no return params */
 		} off, on, init, set_brightness, seq, reg, set_rgb,
 			demo, set_params_v0, set_params_v1,
-			set_program;
+			set_program, manual_suspend_ctrl, suspend, resume;
 	};
 } __packed;
 
@@ -1254,6 +1258,9 @@ enum lightbar_command {
 	LIGHTBAR_CMD_GET_PARAMS_V1 = 16,
 	LIGHTBAR_CMD_SET_PARAMS_V1 = 17,
 	LIGHTBAR_CMD_SET_PROGRAM = 18,
+	LIGHTBAR_CMD_MANUAL_SUSPEND_CTRL = 19,
+	LIGHTBAR_CMD_SUSPEND = 20,
+	LIGHTBAR_CMD_RESUME = 21,
 	LIGHTBAR_NUM_CMDS
 };
 
-- 
cgit v1.2.3


From 3b7b314053d021601940c50b07f5f1423ae67e21 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 23 Jun 2017 15:08:52 -0700
Subject: slub: make sysfs file removal asynchronous

Commit bf5eb3de3847 ("slub: separate out sysfs_slab_release() from
sysfs_slab_remove()") made slub sysfs file removals synchronous to
kmem_cache shutdown.

Unfortunately, this created a possible ABBA deadlock between slab_mutex
and sysfs draining mechanism triggering the following lockdep warning.

  ======================================================
  [ INFO: possible circular locking dependency detected ]
  4.10.0-test+ #48 Not tainted
  -------------------------------------------------------
  rmmod/1211 is trying to acquire lock:
   (s_active#120){++++.+}, at: [<ffffffff81308073>] kernfs_remove+0x23/0x40

  but task is already holding lock:
   (slab_mutex){+.+.+.}, at: [<ffffffff8120f691>] kmem_cache_destroy+0x41/0x2d0

  which lock already depends on the new lock.

  the existing dependency chain (in reverse order) is:

  -> #1 (slab_mutex){+.+.+.}:
	 lock_acquire+0xf6/0x1f0
	 __mutex_lock+0x75/0x950
	 mutex_lock_nested+0x1b/0x20
	 slab_attr_store+0x75/0xd0
	 sysfs_kf_write+0x45/0x60
	 kernfs_fop_write+0x13c/0x1c0
	 __vfs_write+0x28/0x120
	 vfs_write+0xc8/0x1e0
	 SyS_write+0x49/0xa0
	 entry_SYSCALL_64_fastpath+0x1f/0xc2

  -> #0 (s_active#120){++++.+}:
	 __lock_acquire+0x10ed/0x1260
	 lock_acquire+0xf6/0x1f0
	 __kernfs_remove+0x254/0x320
	 kernfs_remove+0x23/0x40
	 sysfs_remove_dir+0x51/0x80
	 kobject_del+0x18/0x50
	 __kmem_cache_shutdown+0x3e6/0x460
	 kmem_cache_destroy+0x1fb/0x2d0
	 kvm_exit+0x2d/0x80 [kvm]
	 vmx_exit+0x19/0xa1b [kvm_intel]
	 SyS_delete_module+0x198/0x1f0
	 entry_SYSCALL_64_fastpath+0x1f/0xc2

  other info that might help us debug this:

   Possible unsafe locking scenario:

	 CPU0                    CPU1
	 ----                    ----
    lock(slab_mutex);
				 lock(s_active#120);
				 lock(slab_mutex);
    lock(s_active#120);

   *** DEADLOCK ***

  2 locks held by rmmod/1211:
   #0:  (cpu_hotplug.dep_map){++++++}, at: [<ffffffff810a7877>] get_online_cpus+0x37/0x80
   #1:  (slab_mutex){+.+.+.}, at: [<ffffffff8120f691>] kmem_cache_destroy+0x41/0x2d0

  stack backtrace:
  CPU: 3 PID: 1211 Comm: rmmod Not tainted 4.10.0-test+ #48
  Hardware name: Hewlett-Packard HP Compaq Pro 6300 SFF/339A, BIOS K01 v02.05 05/07/2012
  Call Trace:
   print_circular_bug+0x1be/0x210
   __lock_acquire+0x10ed/0x1260
   lock_acquire+0xf6/0x1f0
   __kernfs_remove+0x254/0x320
   kernfs_remove+0x23/0x40
   sysfs_remove_dir+0x51/0x80
   kobject_del+0x18/0x50
   __kmem_cache_shutdown+0x3e6/0x460
   kmem_cache_destroy+0x1fb/0x2d0
   kvm_exit+0x2d/0x80 [kvm]
   vmx_exit+0x19/0xa1b [kvm_intel]
   SyS_delete_module+0x198/0x1f0
   ? SyS_delete_module+0x5/0x1f0
   entry_SYSCALL_64_fastpath+0x1f/0xc2

It'd be the cleanest to deal with the issue by removing sysfs files
without holding slab_mutex before the rest of shutdown; however, given
the current code structure, it is pretty difficult to do so.

This patch punts sysfs file removal to a work item.  Before commit
bf5eb3de3847, the removal was punted to a RCU delayed work item which is
executed after release.  Now, we're punting to a different work item on
shutdown which still maintains the goal removing the sysfs files earlier
when destroying kmem_caches.

Link: http://lkml.kernel.org/r/20170620204512.GI21326@htj.duckdns.org
Fixes: bf5eb3de3847 ("slub: separate out sysfs_slab_release() from sysfs_slab_remove()")
Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Tested-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slub_def.h |  1 +
 mm/slub.c                | 40 ++++++++++++++++++++++++++--------------
 2 files changed, 27 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 07ef550c6627..93315d6b21a8 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -84,6 +84,7 @@ struct kmem_cache {
 	int red_left_pad;	/* Left redzone padding size */
 #ifdef CONFIG_SYSFS
 	struct kobject kobj;	/* For sysfs */
+	struct work_struct kobj_remove_work;
 #endif
 #ifdef CONFIG_MEMCG
 	struct memcg_cache_params memcg_params;
diff --git a/mm/slub.c b/mm/slub.c
index 7449593fca72..8addc535bcdc 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -5625,6 +5625,28 @@ static char *create_unique_id(struct kmem_cache *s)
 	return name;
 }
 
+static void sysfs_slab_remove_workfn(struct work_struct *work)
+{
+	struct kmem_cache *s =
+		container_of(work, struct kmem_cache, kobj_remove_work);
+
+	if (!s->kobj.state_in_sysfs)
+		/*
+		 * For a memcg cache, this may be called during
+		 * deactivation and again on shutdown.  Remove only once.
+		 * A cache is never shut down before deactivation is
+		 * complete, so no need to worry about synchronization.
+		 */
+		return;
+
+#ifdef CONFIG_MEMCG
+	kset_unregister(s->memcg_kset);
+#endif
+	kobject_uevent(&s->kobj, KOBJ_REMOVE);
+	kobject_del(&s->kobj);
+	kobject_put(&s->kobj);
+}
+
 static int sysfs_slab_add(struct kmem_cache *s)
 {
 	int err;
@@ -5632,6 +5654,8 @@ static int sysfs_slab_add(struct kmem_cache *s)
 	struct kset *kset = cache_kset(s);
 	int unmergeable = slab_unmergeable(s);
 
+	INIT_WORK(&s->kobj_remove_work, sysfs_slab_remove_workfn);
+
 	if (!kset) {
 		kobject_init(&s->kobj, &slab_ktype);
 		return 0;
@@ -5695,20 +5719,8 @@ static void sysfs_slab_remove(struct kmem_cache *s)
 		 */
 		return;
 
-	if (!s->kobj.state_in_sysfs)
-		/*
-		 * For a memcg cache, this may be called during
-		 * deactivation and again on shutdown.  Remove only once.
-		 * A cache is never shut down before deactivation is
-		 * complete, so no need to worry about synchronization.
-		 */
-		return;
-
-#ifdef CONFIG_MEMCG
-	kset_unregister(s->memcg_kset);
-#endif
-	kobject_uevent(&s->kobj, KOBJ_REMOVE);
-	kobject_del(&s->kobj);
+	kobject_get(&s->kobj);
+	schedule_work(&s->kobj_remove_work);
 }
 
 void sysfs_slab_release(struct kmem_cache *s)
-- 
cgit v1.2.3


From 829a4e8c0e9aab17bcfe2ddb070388b8ada26292 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Wed, 21 Jun 2017 10:29:13 +0530
Subject: PM / OPP: Add dev_pm_opp_{set|put}_clkname()

In order to support OPP switching, OPP layer needs to get pointer to the
clock for the device. Simple cases work fine without using the routines
added by this patch (i.e.  by passing connection-id as NULL), but for a
device with multiple clocks available, the OPP core needs to know the
exact name of the clk to use.

Add a new set of APIs to get that done.

Tested-by: Rajendra Nayak <rnayak@codeaurora.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/opp/core.c | 67 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/pm_opp.h        |  9 ++++++
 2 files changed, 76 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c
index 5ee7aadf0abf..a8cc14fd8ae4 100644
--- a/drivers/base/power/opp/core.c
+++ b/drivers/base/power/opp/core.c
@@ -1362,6 +1362,73 @@ void dev_pm_opp_put_regulators(struct opp_table *opp_table)
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_put_regulators);
 
+/**
+ * dev_pm_opp_set_clkname() - Set clk name for the device
+ * @dev: Device for which clk name is being set.
+ * @name: Clk name.
+ *
+ * In order to support OPP switching, OPP layer needs to get pointer to the
+ * clock for the device. Simple cases work fine without using this routine (i.e.
+ * by passing connection-id as NULL), but for a device with multiple clocks
+ * available, the OPP core needs to know the exact name of the clk to use.
+ *
+ * This must be called before any OPPs are initialized for the device.
+ */
+struct opp_table *dev_pm_opp_set_clkname(struct device *dev, const char *name)
+{
+	struct opp_table *opp_table;
+	int ret;
+
+	opp_table = dev_pm_opp_get_opp_table(dev);
+	if (!opp_table)
+		return ERR_PTR(-ENOMEM);
+
+	/* This should be called before OPPs are initialized */
+	if (WARN_ON(!list_empty(&opp_table->opp_list))) {
+		ret = -EBUSY;
+		goto err;
+	}
+
+	/* Already have default clk set, free it */
+	if (!IS_ERR(opp_table->clk))
+		clk_put(opp_table->clk);
+
+	/* Find clk for the device */
+	opp_table->clk = clk_get(dev, name);
+	if (IS_ERR(opp_table->clk)) {
+		ret = PTR_ERR(opp_table->clk);
+		if (ret != -EPROBE_DEFER) {
+			dev_err(dev, "%s: Couldn't find clock: %d\n", __func__,
+				ret);
+		}
+		goto err;
+	}
+
+	return opp_table;
+
+err:
+	dev_pm_opp_put_opp_table(opp_table);
+
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_set_clkname);
+
+/**
+ * dev_pm_opp_put_clkname() - Releases resources blocked for clk.
+ * @opp_table: OPP table returned from dev_pm_opp_set_clkname().
+ */
+void dev_pm_opp_put_clkname(struct opp_table *opp_table)
+{
+	/* Make sure there are no concurrent readers while updating opp_table */
+	WARN_ON(!list_empty(&opp_table->opp_list));
+
+	clk_put(opp_table->clk);
+	opp_table->clk = ERR_PTR(-EINVAL);
+
+	dev_pm_opp_put_opp_table(opp_table);
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_put_clkname);
+
 /**
  * dev_pm_opp_register_set_opp_helper() - Register custom set OPP helper
  * @dev: Device for which the helper is getting registered.
diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index a6685b3dde26..51ec727b4824 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -121,6 +121,8 @@ struct opp_table *dev_pm_opp_set_prop_name(struct device *dev, const char *name)
 void dev_pm_opp_put_prop_name(struct opp_table *opp_table);
 struct opp_table *dev_pm_opp_set_regulators(struct device *dev, const char * const names[], unsigned int count);
 void dev_pm_opp_put_regulators(struct opp_table *opp_table);
+struct opp_table *dev_pm_opp_set_clkname(struct device *dev, const char * name);
+void dev_pm_opp_put_clkname(struct opp_table *opp_table);
 struct opp_table *dev_pm_opp_register_set_opp_helper(struct device *dev, int (*set_opp)(struct dev_pm_set_opp_data *data));
 void dev_pm_opp_register_put_opp_helper(struct opp_table *opp_table);
 int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq);
@@ -257,6 +259,13 @@ static inline struct opp_table *dev_pm_opp_set_regulators(struct device *dev, co
 
 static inline void dev_pm_opp_put_regulators(struct opp_table *opp_table) {}
 
+static inline struct opp_table *dev_pm_opp_set_clkname(struct device *dev, const char * name)
+{
+	return ERR_PTR(-ENOTSUPP);
+}
+
+static inline void dev_pm_opp_put_clkname(struct opp_table *opp_table) {}
+
 static inline int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq)
 {
 	return -ENOTSUPP;
-- 
cgit v1.2.3


From 27e47a6342e21b005a15a1f0afea0b6f179e0a71 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 22 May 2017 18:14:06 -0700
Subject: fscrypt: inline fscrypt_free_filename()

fscrypt_free_filename() only needs to do a kfree() of crypto_buf.name,
which works well as an inline function.  We can skip setting the various
pointers to NULL, since no user cares about it (the name is always freed
just before it goes out of scope).

Signed-off-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: David Gstir <david@sigma-star.at>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/crypto/fname.c            | 9 ---------
 include/linux/fscrypt_supp.h | 7 ++++++-
 2 files changed, 6 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c
index d1bb02b1ee58..ad9f814fdead 100644
--- a/fs/crypto/fname.c
+++ b/fs/crypto/fname.c
@@ -453,12 +453,3 @@ errout:
 	return ret;
 }
 EXPORT_SYMBOL(fscrypt_setup_filename);
-
-void fscrypt_free_filename(struct fscrypt_name *fname)
-{
-	kfree(fname->crypto_buf.name);
-	fname->crypto_buf.name = NULL;
-	fname->usr_fname = NULL;
-	fname->disk_name.name = NULL;
-}
-EXPORT_SYMBOL(fscrypt_free_filename);
diff --git a/include/linux/fscrypt_supp.h b/include/linux/fscrypt_supp.h
index cd4e82c17304..32e2fcf13b01 100644
--- a/include/linux/fscrypt_supp.h
+++ b/include/linux/fscrypt_supp.h
@@ -47,7 +47,12 @@ extern void fscrypt_put_encryption_info(struct inode *, struct fscrypt_info *);
 /* fname.c */
 extern int fscrypt_setup_filename(struct inode *, const struct qstr *,
 				int lookup, struct fscrypt_name *);
-extern void fscrypt_free_filename(struct fscrypt_name *);
+
+static inline void fscrypt_free_filename(struct fscrypt_name *fname)
+{
+	kfree(fname->crypto_buf.name);
+}
+
 extern u32 fscrypt_fname_encrypted_size(const struct inode *, u32);
 extern int fscrypt_fname_alloc_buffer(const struct inode *, u32,
 				struct fscrypt_str *);
-- 
cgit v1.2.3


From b7e7cf7a66a27e62c5f873a0068cee34094bf5d7 Mon Sep 17 00:00:00 2001
From: Daniel Walter <dwalter@sigma-star.at>
Date: Mon, 19 Jun 2017 09:27:58 +0200
Subject: fscrypt: add support for AES-128-CBC

fscrypt provides facilities to use different encryption algorithms which
are selectable by userspace when setting the encryption policy. Currently,
only AES-256-XTS for file contents and AES-256-CBC-CTS for file names are
implemented. This is a clear case of kernel offers the mechanism and
userspace selects a policy. Similar to what dm-crypt and ecryptfs have.

This patch adds support for using AES-128-CBC for file contents and
AES-128-CBC-CTS for file name encryption. To mitigate watermarking
attacks, IVs are generated using the ESSIV algorithm. While AES-CBC is
actually slightly less secure than AES-XTS from a security point of view,
there is more widespread hardware support. Using AES-CBC gives us the
acceptable performance while still providing a moderate level of security
for persistent storage.

Especially low-powered embedded devices with crypto accelerators such as
CAAM or CESA often only support AES-CBC. Since using AES-CBC over AES-XTS
is basically thought of a last resort, we use AES-128-CBC over AES-256-CBC
since it has less encryption rounds and yields noticeable better
performance starting from a file size of just a few kB.

Signed-off-by: Daniel Walter <dwalter@sigma-star.at>
[david@sigma-star.at: addressed review comments]
Signed-off-by: David Gstir <david@sigma-star.at>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/crypto/Kconfig              |   1 +
 fs/crypto/crypto.c             |  23 ++++--
 fs/crypto/fscrypt_private.h    |   9 ++-
 fs/crypto/keyinfo.c            | 173 ++++++++++++++++++++++++++++++++---------
 fs/crypto/policy.c             |   8 +-
 include/linux/fscrypt_common.h |  16 ++--
 include/uapi/linux/fs.h        |   2 +
 7 files changed, 174 insertions(+), 58 deletions(-)

(limited to 'include/linux')

diff --git a/fs/crypto/Kconfig b/fs/crypto/Kconfig
index 08b46e6e3995..02b7d91c9231 100644
--- a/fs/crypto/Kconfig
+++ b/fs/crypto/Kconfig
@@ -7,6 +7,7 @@ config FS_ENCRYPTION
 	select CRYPTO_XTS
 	select CRYPTO_CTS
 	select CRYPTO_CTR
+	select CRYPTO_SHA256
 	select KEYS
 	help
 	  Enable encryption of files and directories.  This
diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
index 6d6eca394d4d..c7835df7e7b8 100644
--- a/fs/crypto/crypto.c
+++ b/fs/crypto/crypto.c
@@ -26,6 +26,7 @@
 #include <linux/ratelimit.h>
 #include <linux/dcache.h>
 #include <linux/namei.h>
+#include <crypto/aes.h>
 #include "fscrypt_private.h"
 
 static unsigned int num_prealloc_crypto_pages = 32;
@@ -147,8 +148,8 @@ int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw,
 {
 	struct {
 		__le64 index;
-		u8 padding[FS_XTS_TWEAK_SIZE - sizeof(__le64)];
-	} xts_tweak;
+		u8 padding[FS_IV_SIZE - sizeof(__le64)];
+	} iv;
 	struct skcipher_request *req = NULL;
 	DECLARE_FS_COMPLETION_RESULT(ecr);
 	struct scatterlist dst, src;
@@ -158,6 +159,16 @@ int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw,
 
 	BUG_ON(len == 0);
 
+	BUILD_BUG_ON(sizeof(iv) != FS_IV_SIZE);
+	BUILD_BUG_ON(AES_BLOCK_SIZE != FS_IV_SIZE);
+	iv.index = cpu_to_le64(lblk_num);
+	memset(iv.padding, 0, sizeof(iv.padding));
+
+	if (ci->ci_essiv_tfm != NULL) {
+		crypto_cipher_encrypt_one(ci->ci_essiv_tfm, (u8 *)&iv,
+					  (u8 *)&iv);
+	}
+
 	req = skcipher_request_alloc(tfm, gfp_flags);
 	if (!req) {
 		printk_ratelimited(KERN_ERR
@@ -170,15 +181,11 @@ int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw,
 		req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
 		page_crypt_complete, &ecr);
 
-	BUILD_BUG_ON(sizeof(xts_tweak) != FS_XTS_TWEAK_SIZE);
-	xts_tweak.index = cpu_to_le64(lblk_num);
-	memset(xts_tweak.padding, 0, sizeof(xts_tweak.padding));
-
 	sg_init_table(&dst, 1);
 	sg_set_page(&dst, dest_page, len, offs);
 	sg_init_table(&src, 1);
 	sg_set_page(&src, src_page, len, offs);
-	skcipher_request_set_crypt(req, &src, &dst, len, &xts_tweak);
+	skcipher_request_set_crypt(req, &src, &dst, len, &iv);
 	if (rw == FS_DECRYPT)
 		res = crypto_skcipher_decrypt(req);
 	else
@@ -477,6 +484,8 @@ static void __exit fscrypt_exit(void)
 		destroy_workqueue(fscrypt_read_workqueue);
 	kmem_cache_destroy(fscrypt_ctx_cachep);
 	kmem_cache_destroy(fscrypt_info_cachep);
+
+	fscrypt_essiv_cleanup();
 }
 module_exit(fscrypt_exit);
 
diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h
index 1e1f8a361b75..a1d5021c31ef 100644
--- a/fs/crypto/fscrypt_private.h
+++ b/fs/crypto/fscrypt_private.h
@@ -12,10 +12,13 @@
 #define _FSCRYPT_PRIVATE_H
 
 #include <linux/fscrypt_supp.h>
+#include <crypto/hash.h>
 
 /* Encryption parameters */
-#define FS_XTS_TWEAK_SIZE		16
+#define FS_IV_SIZE			16
 #define FS_AES_128_ECB_KEY_SIZE		16
+#define FS_AES_128_CBC_KEY_SIZE		16
+#define FS_AES_128_CTS_KEY_SIZE		16
 #define FS_AES_256_GCM_KEY_SIZE		32
 #define FS_AES_256_CBC_KEY_SIZE		32
 #define FS_AES_256_CTS_KEY_SIZE		32
@@ -54,6 +57,7 @@ struct fscrypt_info {
 	u8 ci_filename_mode;
 	u8 ci_flags;
 	struct crypto_skcipher *ci_ctfm;
+	struct crypto_cipher *ci_essiv_tfm;
 	u8 ci_master_key[FS_KEY_DESCRIPTOR_SIZE];
 };
 
@@ -87,4 +91,7 @@ extern int fscrypt_do_page_crypto(const struct inode *inode,
 extern struct page *fscrypt_alloc_bounce_page(struct fscrypt_ctx *ctx,
 					      gfp_t gfp_flags);
 
+/* keyinfo.c */
+extern void __exit fscrypt_essiv_cleanup(void);
+
 #endif /* _FSCRYPT_PRIVATE_H */
diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c
index 179e578b875b..018c588c7ac3 100644
--- a/fs/crypto/keyinfo.c
+++ b/fs/crypto/keyinfo.c
@@ -10,8 +10,13 @@
 
 #include <keys/user-type.h>
 #include <linux/scatterlist.h>
+#include <linux/ratelimit.h>
+#include <crypto/aes.h>
+#include <crypto/sha.h>
 #include "fscrypt_private.h"
 
+static struct crypto_shash *essiv_hash_tfm;
+
 static void derive_crypt_complete(struct crypto_async_request *req, int rc)
 {
 	struct fscrypt_completion_result *ecr = req->data;
@@ -27,13 +32,13 @@ static void derive_crypt_complete(struct crypto_async_request *req, int rc)
  * derive_key_aes() - Derive a key using AES-128-ECB
  * @deriving_key: Encryption key used for derivation.
  * @source_key:   Source key to which to apply derivation.
- * @derived_key:  Derived key.
+ * @derived_raw_key:  Derived raw key.
  *
  * Return: Zero on success; non-zero otherwise.
  */
 static int derive_key_aes(u8 deriving_key[FS_AES_128_ECB_KEY_SIZE],
-				u8 source_key[FS_AES_256_XTS_KEY_SIZE],
-				u8 derived_key[FS_AES_256_XTS_KEY_SIZE])
+				const struct fscrypt_key *source_key,
+				u8 derived_raw_key[FS_MAX_KEY_SIZE])
 {
 	int res = 0;
 	struct skcipher_request *req = NULL;
@@ -60,10 +65,10 @@ static int derive_key_aes(u8 deriving_key[FS_AES_128_ECB_KEY_SIZE],
 	if (res < 0)
 		goto out;
 
-	sg_init_one(&src_sg, source_key, FS_AES_256_XTS_KEY_SIZE);
-	sg_init_one(&dst_sg, derived_key, FS_AES_256_XTS_KEY_SIZE);
-	skcipher_request_set_crypt(req, &src_sg, &dst_sg,
-					FS_AES_256_XTS_KEY_SIZE, NULL);
+	sg_init_one(&src_sg, source_key->raw, source_key->size);
+	sg_init_one(&dst_sg, derived_raw_key, source_key->size);
+	skcipher_request_set_crypt(req, &src_sg, &dst_sg, source_key->size,
+				   NULL);
 	res = crypto_skcipher_encrypt(req);
 	if (res == -EINPROGRESS || res == -EBUSY) {
 		wait_for_completion(&ecr.completion);
@@ -77,7 +82,7 @@ out:
 
 static int validate_user_key(struct fscrypt_info *crypt_info,
 			struct fscrypt_context *ctx, u8 *raw_key,
-			const char *prefix)
+			const char *prefix, int min_keysize)
 {
 	char *description;
 	struct key *keyring_key;
@@ -111,50 +116,60 @@ static int validate_user_key(struct fscrypt_info *crypt_info,
 	master_key = (struct fscrypt_key *)ukp->data;
 	BUILD_BUG_ON(FS_AES_128_ECB_KEY_SIZE != FS_KEY_DERIVATION_NONCE_SIZE);
 
-	if (master_key->size != FS_AES_256_XTS_KEY_SIZE) {
+	if (master_key->size < min_keysize || master_key->size > FS_MAX_KEY_SIZE
+	    || master_key->size % AES_BLOCK_SIZE != 0) {
 		printk_once(KERN_WARNING
 				"%s: key size incorrect: %d\n",
 				__func__, master_key->size);
 		res = -ENOKEY;
 		goto out;
 	}
-	res = derive_key_aes(ctx->nonce, master_key->raw, raw_key);
+	res = derive_key_aes(ctx->nonce, master_key, raw_key);
 out:
 	up_read(&keyring_key->sem);
 	key_put(keyring_key);
 	return res;
 }
 
+static const struct {
+	const char *cipher_str;
+	int keysize;
+} available_modes[] = {
+	[FS_ENCRYPTION_MODE_AES_256_XTS] = { "xts(aes)",
+					     FS_AES_256_XTS_KEY_SIZE },
+	[FS_ENCRYPTION_MODE_AES_256_CTS] = { "cts(cbc(aes))",
+					     FS_AES_256_CTS_KEY_SIZE },
+	[FS_ENCRYPTION_MODE_AES_128_CBC] = { "cbc(aes)",
+					     FS_AES_128_CBC_KEY_SIZE },
+	[FS_ENCRYPTION_MODE_AES_128_CTS] = { "cts(cbc(aes))",
+					     FS_AES_128_CTS_KEY_SIZE },
+};
+
 static int determine_cipher_type(struct fscrypt_info *ci, struct inode *inode,
 				 const char **cipher_str_ret, int *keysize_ret)
 {
-	if (S_ISREG(inode->i_mode)) {
-		if (ci->ci_data_mode == FS_ENCRYPTION_MODE_AES_256_XTS) {
-			*cipher_str_ret = "xts(aes)";
-			*keysize_ret = FS_AES_256_XTS_KEY_SIZE;
-			return 0;
-		}
-		pr_warn_once("fscrypto: unsupported contents encryption mode "
-			     "%d for inode %lu\n",
-			     ci->ci_data_mode, inode->i_ino);
-		return -ENOKEY;
+	u32 mode;
+
+	if (!fscrypt_valid_enc_modes(ci->ci_data_mode, ci->ci_filename_mode)) {
+		pr_warn_ratelimited("fscrypt: inode %lu uses unsupported encryption modes (contents mode %d, filenames mode %d)\n",
+				    inode->i_ino,
+				    ci->ci_data_mode, ci->ci_filename_mode);
+		return -EINVAL;
 	}
 
-	if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) {
-		if (ci->ci_filename_mode == FS_ENCRYPTION_MODE_AES_256_CTS) {
-			*cipher_str_ret = "cts(cbc(aes))";
-			*keysize_ret = FS_AES_256_CTS_KEY_SIZE;
-			return 0;
-		}
-		pr_warn_once("fscrypto: unsupported filenames encryption mode "
-			     "%d for inode %lu\n",
-			     ci->ci_filename_mode, inode->i_ino);
-		return -ENOKEY;
+	if (S_ISREG(inode->i_mode)) {
+		mode = ci->ci_data_mode;
+	} else if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) {
+		mode = ci->ci_filename_mode;
+	} else {
+		WARN_ONCE(1, "fscrypt: filesystem tried to load encryption info for inode %lu, which is not encryptable (file type %d)\n",
+			  inode->i_ino, (inode->i_mode & S_IFMT));
+		return -EINVAL;
 	}
 
-	pr_warn_once("fscrypto: unsupported file type %d for inode %lu\n",
-		     (inode->i_mode & S_IFMT), inode->i_ino);
-	return -ENOKEY;
+	*cipher_str_ret = available_modes[mode].cipher_str;
+	*keysize_ret = available_modes[mode].keysize;
+	return 0;
 }
 
 static void put_crypt_info(struct fscrypt_info *ci)
@@ -163,9 +178,76 @@ static void put_crypt_info(struct fscrypt_info *ci)
 		return;
 
 	crypto_free_skcipher(ci->ci_ctfm);
+	crypto_free_cipher(ci->ci_essiv_tfm);
 	kmem_cache_free(fscrypt_info_cachep, ci);
 }
 
+static int derive_essiv_salt(const u8 *key, int keysize, u8 *salt)
+{
+	struct crypto_shash *tfm = READ_ONCE(essiv_hash_tfm);
+
+	/* init hash transform on demand */
+	if (unlikely(!tfm)) {
+		struct crypto_shash *prev_tfm;
+
+		tfm = crypto_alloc_shash("sha256", 0, 0);
+		if (IS_ERR(tfm)) {
+			pr_warn_ratelimited("fscrypt: error allocating SHA-256 transform: %ld\n",
+					    PTR_ERR(tfm));
+			return PTR_ERR(tfm);
+		}
+		prev_tfm = cmpxchg(&essiv_hash_tfm, NULL, tfm);
+		if (prev_tfm) {
+			crypto_free_shash(tfm);
+			tfm = prev_tfm;
+		}
+	}
+
+	{
+		SHASH_DESC_ON_STACK(desc, tfm);
+		desc->tfm = tfm;
+		desc->flags = 0;
+
+		return crypto_shash_digest(desc, key, keysize, salt);
+	}
+}
+
+static int init_essiv_generator(struct fscrypt_info *ci, const u8 *raw_key,
+				int keysize)
+{
+	int err;
+	struct crypto_cipher *essiv_tfm;
+	u8 salt[SHA256_DIGEST_SIZE];
+
+	essiv_tfm = crypto_alloc_cipher("aes", 0, 0);
+	if (IS_ERR(essiv_tfm))
+		return PTR_ERR(essiv_tfm);
+
+	ci->ci_essiv_tfm = essiv_tfm;
+
+	err = derive_essiv_salt(raw_key, keysize, salt);
+	if (err)
+		goto out;
+
+	/*
+	 * Using SHA256 to derive the salt/key will result in AES-256 being
+	 * used for IV generation. File contents encryption will still use the
+	 * configured keysize (AES-128) nevertheless.
+	 */
+	err = crypto_cipher_setkey(essiv_tfm, salt, sizeof(salt));
+	if (err)
+		goto out;
+
+out:
+	memzero_explicit(salt, sizeof(salt));
+	return err;
+}
+
+void __exit fscrypt_essiv_cleanup(void)
+{
+	crypto_free_shash(essiv_hash_tfm);
+}
+
 int fscrypt_get_encryption_info(struct inode *inode)
 {
 	struct fscrypt_info *crypt_info;
@@ -212,6 +294,7 @@ int fscrypt_get_encryption_info(struct inode *inode)
 	crypt_info->ci_data_mode = ctx.contents_encryption_mode;
 	crypt_info->ci_filename_mode = ctx.filenames_encryption_mode;
 	crypt_info->ci_ctfm = NULL;
+	crypt_info->ci_essiv_tfm = NULL;
 	memcpy(crypt_info->ci_master_key, ctx.master_key_descriptor,
 				sizeof(crypt_info->ci_master_key));
 
@@ -228,10 +311,12 @@ int fscrypt_get_encryption_info(struct inode *inode)
 	if (!raw_key)
 		goto out;
 
-	res = validate_user_key(crypt_info, &ctx, raw_key, FS_KEY_DESC_PREFIX);
+	res = validate_user_key(crypt_info, &ctx, raw_key, FS_KEY_DESC_PREFIX,
+				keysize);
 	if (res && inode->i_sb->s_cop->key_prefix) {
 		int res2 = validate_user_key(crypt_info, &ctx, raw_key,
-					     inode->i_sb->s_cop->key_prefix);
+					     inode->i_sb->s_cop->key_prefix,
+					     keysize);
 		if (res2) {
 			if (res2 == -ENOKEY)
 				res = -ENOKEY;
@@ -243,18 +328,30 @@ int fscrypt_get_encryption_info(struct inode *inode)
 	ctfm = crypto_alloc_skcipher(cipher_str, 0, 0);
 	if (!ctfm || IS_ERR(ctfm)) {
 		res = ctfm ? PTR_ERR(ctfm) : -ENOMEM;
-		printk(KERN_DEBUG
-		       "%s: error %d (inode %u) allocating crypto tfm\n",
-		       __func__, res, (unsigned) inode->i_ino);
+		pr_debug("%s: error %d (inode %lu) allocating crypto tfm\n",
+			 __func__, res, inode->i_ino);
 		goto out;
 	}
 	crypt_info->ci_ctfm = ctfm;
 	crypto_skcipher_clear_flags(ctfm, ~0);
 	crypto_skcipher_set_flags(ctfm, CRYPTO_TFM_REQ_WEAK_KEY);
+	/*
+	 * if the provided key is longer than keysize, we use the first
+	 * keysize bytes of the derived key only
+	 */
 	res = crypto_skcipher_setkey(ctfm, raw_key, keysize);
 	if (res)
 		goto out;
 
+	if (S_ISREG(inode->i_mode) &&
+	    crypt_info->ci_data_mode == FS_ENCRYPTION_MODE_AES_128_CBC) {
+		res = init_essiv_generator(crypt_info, raw_key, keysize);
+		if (res) {
+			pr_debug("%s: error %d (inode %lu) allocating essiv tfm\n",
+				 __func__, res, inode->i_ino);
+			goto out;
+		}
+	}
 	if (cmpxchg(&inode->i_crypt_info, NULL, crypt_info) == NULL)
 		crypt_info = NULL;
 out:
diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c
index 210976e7a269..9914d51dff86 100644
--- a/fs/crypto/policy.c
+++ b/fs/crypto/policy.c
@@ -38,12 +38,8 @@ static int create_encryption_context_from_policy(struct inode *inode,
 	memcpy(ctx.master_key_descriptor, policy->master_key_descriptor,
 					FS_KEY_DESCRIPTOR_SIZE);
 
-	if (!fscrypt_valid_contents_enc_mode(
-				policy->contents_encryption_mode))
-		return -EINVAL;
-
-	if (!fscrypt_valid_filenames_enc_mode(
-				policy->filenames_encryption_mode))
+	if (!fscrypt_valid_enc_modes(policy->contents_encryption_mode,
+				     policy->filenames_encryption_mode))
 		return -EINVAL;
 
 	if (policy->flags & ~FS_POLICY_FLAGS_VALID)
diff --git a/include/linux/fscrypt_common.h b/include/linux/fscrypt_common.h
index 0a30c106c1e5..4022c61f7e9b 100644
--- a/include/linux/fscrypt_common.h
+++ b/include/linux/fscrypt_common.h
@@ -91,14 +91,18 @@ static inline bool fscrypt_dummy_context_enabled(struct inode *inode)
 	return false;
 }
 
-static inline bool fscrypt_valid_contents_enc_mode(u32 mode)
+static inline bool fscrypt_valid_enc_modes(u32 contents_mode,
+					u32 filenames_mode)
 {
-	return (mode == FS_ENCRYPTION_MODE_AES_256_XTS);
-}
+	if (contents_mode == FS_ENCRYPTION_MODE_AES_128_CBC &&
+	    filenames_mode == FS_ENCRYPTION_MODE_AES_128_CTS)
+		return true;
 
-static inline bool fscrypt_valid_filenames_enc_mode(u32 mode)
-{
-	return (mode == FS_ENCRYPTION_MODE_AES_256_CTS);
+	if (contents_mode == FS_ENCRYPTION_MODE_AES_256_XTS &&
+	    filenames_mode == FS_ENCRYPTION_MODE_AES_256_CTS)
+		return true;
+
+	return false;
 }
 
 static inline bool fscrypt_is_dot_dotdot(const struct qstr *str)
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 24e61a54feaa..a2a3ffb06038 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -272,6 +272,8 @@ struct fsxattr {
 #define FS_ENCRYPTION_MODE_AES_256_GCM		2
 #define FS_ENCRYPTION_MODE_AES_256_CBC		3
 #define FS_ENCRYPTION_MODE_AES_256_CTS		4
+#define FS_ENCRYPTION_MODE_AES_128_CBC		5
+#define FS_ENCRYPTION_MODE_AES_128_CTS		6
 
 struct fscrypt_policy {
 	__u8 version;
-- 
cgit v1.2.3


From c250b7dd8e73b5f7d88d231fbaac92e3360a7234 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Thu, 22 Jun 2017 12:14:40 -0700
Subject: fscrypt: make ->dummy_context() return bool

This makes it consistent with ->is_encrypted(), ->empty_dir(), and
fscrypt_dummy_context_enabled().

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/super.c                | 2 +-
 include/linux/fscrypt_common.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 0b177da9ea82..f01d2c5bc3fa 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1205,7 +1205,7 @@ retry:
 	return res;
 }
 
-static int ext4_dummy_context(struct inode *inode)
+static bool ext4_dummy_context(struct inode *inode)
 {
 	return DUMMY_ENCRYPTION_ENABLED(EXT4_SB(inode->i_sb));
 }
diff --git a/include/linux/fscrypt_common.h b/include/linux/fscrypt_common.h
index 4022c61f7e9b..e3e1208e0f54 100644
--- a/include/linux/fscrypt_common.h
+++ b/include/linux/fscrypt_common.h
@@ -77,7 +77,7 @@ struct fscrypt_operations {
 	const char *key_prefix;
 	int (*get_context)(struct inode *, void *, size_t);
 	int (*set_context)(struct inode *, const void *, size_t, void *);
-	int (*dummy_context)(struct inode *);
+	bool (*dummy_context)(struct inode *);
 	bool (*is_encrypted)(struct inode *);
 	bool (*empty_dir)(struct inode *);
 	unsigned (*max_namelen)(struct inode *);
-- 
cgit v1.2.3


From b2d3d61adb7b73cfe5f82404f7a130a76fc64232 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Fri, 23 Jun 2017 16:11:07 +0200
Subject: genirq/timings: Add infrastructure to track the interrupt timings

The interrupt framework gives a lot of information about each interrupt. It
does not keep track of when those interrupts occur though, which is a
prerequisite for estimating the next interrupt arrival for power management
purposes.

Add a mechanism to record the timestamp for each interrupt occurrences in a
per-CPU circular buffer to help with the prediction of the next occurrence
using a statistical model.

Each CPU can store up to IRQ_TIMINGS_SIZE events <irq, timestamp>, the
current value of IRQ_TIMINGS_SIZE is 32.

Each event is encoded into a single u64, where the high 48 bits are used
for the timestamp and the low 16 bits are for the irq number.

A static key is introduced so when the irq prediction is switched off at
runtime, the overhead is near to zero.

It results in most of the code in internals.h for inline reasons and a very
few in the new file timings.c. The latter will contain more in the next patch
which will provide the statistical model for the next event prediction.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Nicolas Pitre <nicolas.pitre@linaro.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Cc: "Rafael J . Wysocki" <rafael@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Link: http://lkml.kernel.org/r/1498227072-5980-1-git-send-email-daniel.lezcano@linaro.org
---
 include/linux/interrupt.h |  5 +++
 kernel/irq/Kconfig        |  3 ++
 kernel/irq/Makefile       |  1 +
 kernel/irq/handle.c       |  2 ++
 kernel/irq/internals.h    | 90 +++++++++++++++++++++++++++++++++++++++++++++++
 kernel/irq/manage.c       |  3 ++
 kernel/irq/timings.c      | 30 ++++++++++++++++
 7 files changed, 134 insertions(+)
 create mode 100644 kernel/irq/timings.c

(limited to 'include/linux')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index a6fba4804672..9f617238a2f7 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -703,6 +703,11 @@ static inline void init_irq_proc(void)
 }
 #endif
 
+#ifdef CONFIG_IRQ_TIMINGS
+void irq_timings_enable(void);
+void irq_timings_disable(void);
+#endif
+
 struct seq_file;
 int show_interrupts(struct seq_file *p, void *v);
 int arch_show_interrupts(struct seq_file *p, int prec);
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index fcbb1d6d51cb..27c4e774071c 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -85,6 +85,9 @@ config GENERIC_MSI_IRQ_DOMAIN
 config HANDLE_DOMAIN_IRQ
 	bool
 
+config IRQ_TIMINGS
+	bool
+
 config IRQ_DOMAIN_DEBUG
 	bool "Expose hardware/virtual IRQ mapping via debugfs"
 	depends on IRQ_DOMAIN && DEBUG_FS
diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile
index c61fc9c2d1f7..e4aef7351f2b 100644
--- a/kernel/irq/Makefile
+++ b/kernel/irq/Makefile
@@ -1,5 +1,6 @@
 
 obj-y := irqdesc.o handle.o manage.o spurious.o resend.o chip.o dummychip.o devres.o
+obj-$(CONFIG_IRQ_TIMINGS) += timings.o
 obj-$(CONFIG_GENERIC_IRQ_CHIP) += generic-chip.o
 obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o
 obj-$(CONFIG_IRQ_DOMAIN) += irqdomain.o
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index d3f24905852c..eb4d3e8945b8 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -138,6 +138,8 @@ irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc, unsigned int *flags
 	unsigned int irq = desc->irq_data.irq;
 	struct irqaction *action;
 
+	record_irq_time(desc);
+
 	for_each_action_of_desc(desc, action) {
 		irqreturn_t res;
 
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index a573e0771baf..b95b74920433 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -8,6 +8,7 @@
 #include <linux/irqdesc.h>
 #include <linux/kernel_stat.h>
 #include <linux/pm_runtime.h>
+#include <linux/sched/clock.h>
 
 #ifdef CONFIG_SPARSE_IRQ
 # define IRQ_BITMAP_BITS	(NR_IRQS + 8196)
@@ -57,6 +58,7 @@ enum {
 	IRQS_WAITING		= 0x00000080,
 	IRQS_PENDING		= 0x00000200,
 	IRQS_SUSPENDED		= 0x00000800,
+	IRQS_TIMINGS		= 0x00001000,
 };
 
 #include "debug.h"
@@ -255,6 +257,94 @@ static inline void
 irq_pm_remove_action(struct irq_desc *desc, struct irqaction *action) { }
 #endif
 
+#ifdef CONFIG_IRQ_TIMINGS
+
+#define IRQ_TIMINGS_SHIFT	5
+#define IRQ_TIMINGS_SIZE	(1 << IRQ_TIMINGS_SHIFT)
+#define IRQ_TIMINGS_MASK	(IRQ_TIMINGS_SIZE - 1)
+
+/**
+ * struct irq_timings - irq timings storing structure
+ * @values: a circular buffer of u64 encoded <timestamp,irq> values
+ * @count: the number of elements in the array
+ */
+struct irq_timings {
+	u64	values[IRQ_TIMINGS_SIZE];
+	int	count;
+};
+
+DECLARE_PER_CPU(struct irq_timings, irq_timings);
+
+static inline void irq_remove_timings(struct irq_desc *desc)
+{
+	desc->istate &= ~IRQS_TIMINGS;
+}
+
+static inline void irq_setup_timings(struct irq_desc *desc, struct irqaction *act)
+{
+	/*
+	 * We don't need the measurement because the idle code already
+	 * knows the next expiry event.
+	 */
+	if (act->flags & __IRQF_TIMER)
+		return;
+
+	desc->istate |= IRQS_TIMINGS;
+}
+
+extern void irq_timings_enable(void);
+extern void irq_timings_disable(void);
+
+DECLARE_STATIC_KEY_FALSE(irq_timing_enabled);
+
+/*
+ * The interrupt number and the timestamp are encoded into a single
+ * u64 variable to optimize the size.
+ * 48 bit time stamp and 16 bit IRQ number is way sufficient.
+ *  Who cares an IRQ after 78 hours of idle time?
+ */
+static inline u64 irq_timing_encode(u64 timestamp, int irq)
+{
+	return (timestamp << 16) | irq;
+}
+
+static inline int irq_timing_decode(u64 value, u64 *timestamp)
+{
+	*timestamp = value >> 16;
+	return value & U16_MAX;
+}
+
+/*
+ * The function record_irq_time is only called in one place in the
+ * interrupts handler. We want this function always inline so the code
+ * inside is embedded in the function and the static key branching
+ * code can act at the higher level. Without the explicit
+ * __always_inline we can end up with a function call and a small
+ * overhead in the hotpath for nothing.
+ */
+static __always_inline void record_irq_time(struct irq_desc *desc)
+{
+	if (!static_branch_likely(&irq_timing_enabled))
+		return;
+
+	if (desc->istate & IRQS_TIMINGS) {
+		struct irq_timings *timings = this_cpu_ptr(&irq_timings);
+
+		timings->values[timings->count & IRQ_TIMINGS_MASK] =
+			irq_timing_encode(local_clock(),
+					  irq_desc_get_irq(desc));
+
+		timings->count++;
+	}
+}
+#else
+static inline void irq_remove_timings(struct irq_desc *desc) {}
+static inline void irq_setup_timings(struct irq_desc *desc,
+				     struct irqaction *act) {};
+static inline void record_irq_time(struct irq_desc *desc) {}
+#endif /* CONFIG_IRQ_TIMINGS */
+
+
 #ifdef CONFIG_GENERIC_IRQ_CHIP
 void irq_init_generic_chip(struct irq_chip_generic *gc, const char *name,
 			   int num_ct, unsigned int irq_base,
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 3577c091ac7b..5c11c1730ba5 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1348,6 +1348,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 
 	raw_spin_unlock_irqrestore(&desc->lock, flags);
 
+	irq_setup_timings(desc, new);
+
 	/*
 	 * Strictly no need to wake it up, but hung_task complains
 	 * when no hard interrupt wakes the thread up.
@@ -1474,6 +1476,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
 		irq_settings_clr_disable_unlazy(desc);
 		irq_shutdown(desc);
 		irq_release_resources(desc);
+		irq_remove_timings(desc);
 	}
 
 #ifdef CONFIG_SMP
diff --git a/kernel/irq/timings.c b/kernel/irq/timings.c
new file mode 100644
index 000000000000..56cf6870fa26
--- /dev/null
+++ b/kernel/irq/timings.c
@@ -0,0 +1,30 @@
+/*
+ * linux/kernel/irq/timings.c
+ *
+ * Copyright (C) 2016, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#include <linux/percpu.h>
+#include <linux/static_key.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+
+#include "internals.h"
+
+DEFINE_STATIC_KEY_FALSE(irq_timing_enabled);
+
+DEFINE_PER_CPU(struct irq_timings, irq_timings);
+
+void irq_timings_enable(void)
+{
+	static_branch_enable(&irq_timing_enabled);
+}
+
+void irq_timings_disable(void)
+{
+	static_branch_disable(&irq_timing_enabled);
+}
-- 
cgit v1.2.3


From e1c921495534002d727b15a76a2f8c20b6b108b5 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Fri, 23 Jun 2017 16:11:08 +0200
Subject: genirq/timings: Add infrastructure for estimating the next interrupt
 arrival time

An interrupt behaves with a burst of activity with periodic interval of time
followed by one or two peaks of longer interval.

As the time intervals are periodic, statistically speaking they follow a normal
distribution and each interrupts can be tracked individually.

Add a mechanism to compute the statistics on all interrupts, except the
timers which are deterministic from a prediction point of view, as their
expiry time is known.

The goal is to extract the periodicity for each interrupt, with the last
timestamp and sum them, so the next event can be predicted to a certain
extent.

Taking the earliest prediction gives the expected wakeup on the system
(assuming a timer won't expire before).

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Nicolas Pitre <nicolas.pitre@linaro.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Cc: "Rafael J . Wysocki" <rafael@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Link: http://lkml.kernel.org/r/1498227072-5980-2-git-send-email-daniel.lezcano@linaro.org
---
 include/linux/interrupt.h |   1 +
 kernel/irq/internals.h    |  19 +++
 kernel/irq/timings.c      | 339 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 359 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 9f617238a2f7..37f8e354f564 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -706,6 +706,7 @@ static inline void init_irq_proc(void)
 #ifdef CONFIG_IRQ_TIMINGS
 void irq_timings_enable(void);
 void irq_timings_disable(void);
+u64 irq_timings_next_event(u64 now);
 #endif
 
 struct seq_file;
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index b95b74920433..9da14d125df4 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -275,13 +275,21 @@ struct irq_timings {
 
 DECLARE_PER_CPU(struct irq_timings, irq_timings);
 
+extern void irq_timings_free(int irq);
+extern int irq_timings_alloc(int irq);
+
 static inline void irq_remove_timings(struct irq_desc *desc)
 {
 	desc->istate &= ~IRQS_TIMINGS;
+
+	irq_timings_free(irq_desc_get_irq(desc));
 }
 
 static inline void irq_setup_timings(struct irq_desc *desc, struct irqaction *act)
 {
+	int irq = irq_desc_get_irq(desc);
+	int ret;
+
 	/*
 	 * We don't need the measurement because the idle code already
 	 * knows the next expiry event.
@@ -289,6 +297,17 @@ static inline void irq_setup_timings(struct irq_desc *desc, struct irqaction *ac
 	if (act->flags & __IRQF_TIMER)
 		return;
 
+	/*
+	 * In case the timing allocation fails, we just want to warn,
+	 * not fail, so letting the system boot anyway.
+	 */
+	ret = irq_timings_alloc(irq);
+	if (ret) {
+		pr_warn("Failed to allocate irq timing stats for irq%d (%d)",
+			irq, ret);
+		return;
+	}
+
 	desc->istate |= IRQS_TIMINGS;
 }
 
diff --git a/kernel/irq/timings.c b/kernel/irq/timings.c
index 56cf6870fa26..c8c1d073fbf1 100644
--- a/kernel/irq/timings.c
+++ b/kernel/irq/timings.c
@@ -8,10 +8,16 @@
  * published by the Free Software Foundation.
  *
  */
+#include <linux/kernel.h>
 #include <linux/percpu.h>
+#include <linux/slab.h>
 #include <linux/static_key.h>
 #include <linux/interrupt.h>
+#include <linux/idr.h>
 #include <linux/irq.h>
+#include <linux/math64.h>
+
+#include <trace/events/irq.h>
 
 #include "internals.h"
 
@@ -19,6 +25,18 @@ DEFINE_STATIC_KEY_FALSE(irq_timing_enabled);
 
 DEFINE_PER_CPU(struct irq_timings, irq_timings);
 
+struct irqt_stat {
+	u64	next_evt;
+	u64	last_ts;
+	u64	variance;
+	u32	avg;
+	u32	nr_samples;
+	int	anomalies;
+	int	valid;
+};
+
+static DEFINE_IDR(irqt_stats);
+
 void irq_timings_enable(void)
 {
 	static_branch_enable(&irq_timing_enabled);
@@ -28,3 +46,324 @@ void irq_timings_disable(void)
 {
 	static_branch_disable(&irq_timing_enabled);
 }
+
+/**
+ * irqs_update - update the irq timing statistics with a new timestamp
+ *
+ * @irqs: an irqt_stat struct pointer
+ * @ts: the new timestamp
+ *
+ * The statistics are computed online, in other words, the code is
+ * designed to compute the statistics on a stream of values rather
+ * than doing multiple passes on the values to compute the average,
+ * then the variance. The integer division introduces a loss of
+ * precision but with an acceptable error margin regarding the results
+ * we would have with the double floating precision: we are dealing
+ * with nanosec, so big numbers, consequently the mantisse is
+ * negligeable, especially when converting the time in usec
+ * afterwards.
+ *
+ * The computation happens at idle time. When the CPU is not idle, the
+ * interrupts' timestamps are stored in the circular buffer, when the
+ * CPU goes idle and this routine is called, all the buffer's values
+ * are injected in the statistical model continuying to extend the
+ * statistics from the previous busy-idle cycle.
+ *
+ * The observations showed a device will trigger a burst of periodic
+ * interrupts followed by one or two peaks of longer time, for
+ * instance when a SD card device flushes its cache, then the periodic
+ * intervals occur again. A one second inactivity period resets the
+ * stats, that gives us the certitude the statistical values won't
+ * exceed 1x10^9, thus the computation won't overflow.
+ *
+ * Basically, the purpose of the algorithm is to watch the periodic
+ * interrupts and eliminate the peaks.
+ *
+ * An interrupt is considered periodically stable if the interval of
+ * its occurences follow the normal distribution, thus the values
+ * comply with:
+ *
+ *      avg - 3 x stddev < value < avg + 3 x stddev
+ *
+ * Which can be simplified to:
+ *
+ *      -3 x stddev < value - avg < 3 x stddev
+ *
+ *      abs(value - avg) < 3 x stddev
+ *
+ * In order to save a costly square root computation, we use the
+ * variance. For the record, stddev = sqrt(variance). The equation
+ * above becomes:
+ *
+ *      abs(value - avg) < 3 x sqrt(variance)
+ *
+ * And finally we square it:
+ *
+ *      (value - avg) ^ 2 < (3 x sqrt(variance)) ^ 2
+ *
+ *      (value - avg) x (value - avg) < 9 x variance
+ *
+ * Statistically speaking, any values out of this interval is
+ * considered as an anomaly and is discarded. However, a normal
+ * distribution appears when the number of samples is 30 (it is the
+ * rule of thumb in statistics, cf. "30 samples" on Internet). When
+ * there are three consecutive anomalies, the statistics are resetted.
+ *
+ */
+static void irqs_update(struct irqt_stat *irqs, u64 ts)
+{
+	u64 old_ts = irqs->last_ts;
+	u64 variance = 0;
+	u64 interval;
+	s64 diff;
+
+	/*
+	 * The timestamps are absolute time values, we need to compute
+	 * the timing interval between two interrupts.
+	 */
+	irqs->last_ts = ts;
+
+	/*
+	 * The interval type is u64 in order to deal with the same
+	 * type in our computation, that prevent mindfuck issues with
+	 * overflow, sign and division.
+	 */
+	interval = ts - old_ts;
+
+	/*
+	 * The interrupt triggered more than one second apart, that
+	 * ends the sequence as predictible for our purpose. In this
+	 * case, assume we have the beginning of a sequence and the
+	 * timestamp is the first value. As it is impossible to
+	 * predict anything at this point, return.
+	 *
+	 * Note the first timestamp of the sequence will always fall
+	 * in this test because the old_ts is zero. That is what we
+	 * want as we need another timestamp to compute an interval.
+	 */
+	if (interval >= NSEC_PER_SEC) {
+		memset(irqs, 0, sizeof(*irqs));
+		irqs->last_ts = ts;
+		return;
+	}
+
+	/*
+	 * Pre-compute the delta with the average as the result is
+	 * used several times in this function.
+	 */
+	diff = interval - irqs->avg;
+
+	/*
+	 * Increment the number of samples.
+	 */
+	irqs->nr_samples++;
+
+	/*
+	 * Online variance divided by the number of elements if there
+	 * is more than one sample.  Normally the formula is division
+	 * by nr_samples - 1 but we assume the number of element will be
+	 * more than 32 and dividing by 32 instead of 31 is enough
+	 * precise.
+	 */
+	if (likely(irqs->nr_samples > 1))
+		variance = irqs->variance >> IRQ_TIMINGS_SHIFT;
+
+	/*
+	 * The rule of thumb in statistics for the normal distribution
+	 * is having at least 30 samples in order to have the model to
+	 * apply. Values outside the interval are considered as an
+	 * anomaly.
+	 */
+	if ((irqs->nr_samples >= 30) && ((diff * diff) > (9 * variance))) {
+		/*
+		 * After three consecutive anomalies, we reset the
+		 * stats as it is no longer stable enough.
+		 */
+		if (irqs->anomalies++ >= 3) {
+			memset(irqs, 0, sizeof(*irqs));
+			irqs->last_ts = ts;
+			return;
+		}
+	} else {
+		/*
+		 * The anomalies must be consecutives, so at this
+		 * point, we reset the anomalies counter.
+		 */
+		irqs->anomalies = 0;
+	}
+
+	/*
+	 * The interrupt is considered stable enough to try to predict
+	 * the next event on it.
+	 */
+	irqs->valid = 1;
+
+	/*
+	 * Online average algorithm:
+	 *
+	 *  new_average = average + ((value - average) / count)
+	 *
+	 * The variance computation depends on the new average
+	 * to be computed here first.
+	 *
+	 */
+	irqs->avg = irqs->avg + (diff >> IRQ_TIMINGS_SHIFT);
+
+	/*
+	 * Online variance algorithm:
+	 *
+	 *  new_variance = variance + (value - average) x (value - new_average)
+	 *
+	 * Warning: irqs->avg is updated with the line above, hence
+	 * 'interval - irqs->avg' is no longer equal to 'diff'
+	 */
+	irqs->variance = irqs->variance + (diff * (interval - irqs->avg));
+
+	/*
+	 * Update the next event
+	 */
+	irqs->next_evt = ts + irqs->avg;
+}
+
+/**
+ * irq_timings_next_event - Return when the next event is supposed to arrive
+ *
+ * During the last busy cycle, the number of interrupts is incremented
+ * and stored in the irq_timings structure. This information is
+ * necessary to:
+ *
+ * - know if the index in the table wrapped up:
+ *
+ *      If more than the array size interrupts happened during the
+ *      last busy/idle cycle, the index wrapped up and we have to
+ *      begin with the next element in the array which is the last one
+ *      in the sequence, otherwise it is a the index 0.
+ *
+ * - have an indication of the interrupts activity on this CPU
+ *   (eg. irq/sec)
+ *
+ * The values are 'consumed' after inserting in the statistical model,
+ * thus the count is reinitialized.
+ *
+ * The array of values **must** be browsed in the time direction, the
+ * timestamp must increase between an element and the next one.
+ *
+ * Returns a nanosec time based estimation of the earliest interrupt,
+ * U64_MAX otherwise.
+ */
+u64 irq_timings_next_event(u64 now)
+{
+	struct irq_timings *irqts = this_cpu_ptr(&irq_timings);
+	struct irqt_stat *irqs;
+	struct irqt_stat __percpu *s;
+	u64 ts, next_evt = U64_MAX;
+	int i, irq = 0;
+
+	/*
+	 * This function must be called with the local irq disabled in
+	 * order to prevent the timings circular buffer to be updated
+	 * while we are reading it.
+	 */
+	WARN_ON_ONCE(!irqs_disabled());
+
+	/*
+	 * Number of elements in the circular buffer: If it happens it
+	 * was flushed before, then the number of elements could be
+	 * smaller than IRQ_TIMINGS_SIZE, so the count is used,
+	 * otherwise the array size is used as we wrapped. The index
+	 * begins from zero when we did not wrap. That could be done
+	 * in a nicer way with the proper circular array structure
+	 * type but with the cost of extra computation in the
+	 * interrupt handler hot path. We choose efficiency.
+	 *
+	 * Inject measured irq/timestamp to the statistical model
+	 * while decrementing the counter because we consume the data
+	 * from our circular buffer.
+	 */
+	for (i = irqts->count & IRQ_TIMINGS_MASK,
+		     irqts->count = min(IRQ_TIMINGS_SIZE, irqts->count);
+	     irqts->count > 0; irqts->count--, i = (i + 1) & IRQ_TIMINGS_MASK) {
+
+		irq = irq_timing_decode(irqts->values[i], &ts);
+
+		s = idr_find(&irqt_stats, irq);
+		if (s) {
+			irqs = this_cpu_ptr(s);
+			irqs_update(irqs, ts);
+		}
+	}
+
+	/*
+	 * Look in the list of interrupts' statistics, the earliest
+	 * next event.
+	 */
+	idr_for_each_entry(&irqt_stats, s, i) {
+
+		irqs = this_cpu_ptr(s);
+
+		if (!irqs->valid)
+			continue;
+
+		if (irqs->next_evt <= now) {
+			irq = i;
+			next_evt = now;
+
+			/*
+			 * This interrupt mustn't use in the future
+			 * until new events occur and update the
+			 * statistics.
+			 */
+			irqs->valid = 0;
+			break;
+		}
+
+		if (irqs->next_evt < next_evt) {
+			irq = i;
+			next_evt = irqs->next_evt;
+		}
+	}
+
+	return next_evt;
+}
+
+void irq_timings_free(int irq)
+{
+	struct irqt_stat __percpu *s;
+
+	s = idr_find(&irqt_stats, irq);
+	if (s) {
+		free_percpu(s);
+		idr_remove(&irqt_stats, irq);
+	}
+}
+
+int irq_timings_alloc(int irq)
+{
+	struct irqt_stat __percpu *s;
+	int id;
+
+	/*
+	 * Some platforms can have the same private interrupt per cpu,
+	 * so this function may be be called several times with the
+	 * same interrupt number. Just bail out in case the per cpu
+	 * stat structure is already allocated.
+	 */
+	s = idr_find(&irqt_stats, irq);
+	if (s)
+		return 0;
+
+	s = alloc_percpu(*s);
+	if (!s)
+		return -ENOMEM;
+
+	idr_preload(GFP_KERNEL);
+	id = idr_alloc(&irqt_stats, s, irq, irq + 1, GFP_NOWAIT);
+	idr_preload_end();
+
+	if (id < 0) {
+		free_percpu(s);
+		return id;
+	}
+
+	return 0;
+}
-- 
cgit v1.2.3


From cbf4b3867875206aa548a8c6d7c886f3299d619e Mon Sep 17 00:00:00 2001
From: Okash Khawaja <okash.khawaja@gmail.com>
Date: Sat, 17 Jun 2017 22:32:55 +0100
Subject: tty: define tty_open_by_driver when CONFIG_TTY is not defined

This patch adds definition of tty_open_by_driver when CONFIG_TTY is not
defined. This was supposed to have been included in commit
12e84c71b7d4ee38d51377fd494ac748ee4e6912 ("tty: export
tty_open_by_driver"). The patch follows convention for other such
functions and returns NULL.

Signed-off-by: Okash Khawaja <okash.khawaja@gmail.com>
Reviewed-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/tty.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/tty.h b/include/linux/tty.h
index 5c3f01f49b10..b75b2d51ba2b 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -422,6 +422,9 @@ static inline int __init tty_init(void)
 { return 0; }
 static inline const char *tty_name(const struct tty_struct *tty)
 { return "(none)"; }
+static inline struct tty_struct *tty_open_by_driver(dev_t device,
+		struct inode *inode, struct file *filp)
+{ return NULL; }
 #endif
 
 extern struct ktermios tty_std_termios;
-- 
cgit v1.2.3


From f3ecab38240b624b37e003b5089a93682b109699 Mon Sep 17 00:00:00 2001
From: "Mintz, Yuval" <Yuval.Mintz@cavium.com>
Date: Sun, 25 Jun 2017 11:09:12 +0300
Subject: net: Remove ndo_dfwd_start_xmit

Looks like commit f663dd9aaf9e ("net: core: explicitly select a txq before doing l2 forwarding")
has removed the need for this dedicated xmit function [it even explicitly
states so in its commit log message] but it hasn't removed the definition
of the ndo.

Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
CC: Jason Wang <jasowang@redhat.com>
CC: John Fastabend <john.r.fastabend@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 68f5d899d1e6..85f01d673340 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1114,12 +1114,6 @@ struct xfrmdev_ops {
  *	by 'ndo_dfwd_add_station'. 'pdev' is the net device backing
  *	the station and priv is the structure returned by the add
  *	operation.
- * netdev_tx_t (*ndo_dfwd_start_xmit)(struct sk_buff *skb,
- *				      struct net_device *dev,
- *				      void *priv);
- *	Callback to use for xmit over the accelerated station. This
- *	is used in place of ndo_start_xmit on accelerated net
- *	devices.
  * int (*ndo_set_tx_maxrate)(struct net_device *dev,
  *			     int queue_index, u32 maxrate);
  *	Called when a user wants to set a max-rate limitation of specific
@@ -1316,9 +1310,6 @@ struct net_device_ops {
 	void			(*ndo_dfwd_del_station)(struct net_device *pdev,
 							void *priv);
 
-	netdev_tx_t		(*ndo_dfwd_start_xmit) (struct sk_buff *skb,
-							struct net_device *dev,
-							void *priv);
 	int			(*ndo_get_lock_subclass)(struct net_device *dev);
 	int			(*ndo_set_tx_maxrate)(struct net_device *dev,
 						      int queue_index,
-- 
cgit v1.2.3


From f59dd9c886acb3abb188e8e94a99436560976835 Mon Sep 17 00:00:00 2001
From: Deepa Dinamani <deepa.kernel@gmail.com>
Date: Sat, 24 Jun 2017 11:45:02 -0700
Subject: time: add get_timespec64 and put_timespec64

Add helper functions to convert between struct timespec64 and
struct timespec at userspace boundaries.

This is a preparatory patch to use timespec64 as the basic type
internally in the kernel as timespec is not y2038 safe on 32 bit systems.
The patch helps the cause by containing all data conversions at the
userspace boundaries within these functions.

Suggested-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Deepa Dinamani <deepa.kernel@gmail.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/compat.h |  2 ++
 include/linux/time.h   |  5 +++++
 kernel/compat.c        | 44 ++++++++++++++++++++++++++++++++++++++++++++
 kernel/time/time.c     | 28 ++++++++++++++++++++++++++++
 4 files changed, 79 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/compat.h b/include/linux/compat.h
index 425563c7647b..3eb04016ffa9 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -164,6 +164,8 @@ extern int compat_get_timespec(struct timespec *, const void __user *);
 extern int compat_put_timespec(const struct timespec *, void __user *);
 extern int compat_get_timeval(struct timeval *, const void __user *);
 extern int compat_put_timeval(const struct timeval *, void __user *);
+extern int compat_get_timespec64(struct timespec64 *, const void __user *);
+extern int compat_put_timespec64(const struct timespec64 *, void __user *);
 
 /*
  * This function convert a timespec if necessary and returns a *user
diff --git a/include/linux/time.h b/include/linux/time.h
index c0543f5f25de..36afb579495f 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -8,6 +8,11 @@
 
 extern struct timezone sys_tz;
 
+int get_timespec64(struct timespec64 *ts,
+		const struct timespec __user *uts);
+int put_timespec64(const struct timespec64 *ts,
+		struct timespec __user *uts);
+
 #define TIME_T_MAX	(time_t)((1UL << ((sizeof(time_t) << 3) - 1)) - 1)
 
 static inline int timespec_equal(const struct timespec *a,
diff --git a/kernel/compat.c b/kernel/compat.c
index ebd8bdc3fd68..73f26ba44a8a 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -120,6 +120,50 @@ static int __compat_put_timespec(const struct timespec *ts, struct compat_timesp
 			__put_user(ts->tv_nsec, &cts->tv_nsec)) ? -EFAULT : 0;
 }
 
+static int __compat_get_timespec64(struct timespec64 *ts64,
+				   const struct compat_timespec __user *cts)
+{
+	struct compat_timespec ts;
+	int ret;
+
+	ret = copy_from_user(&ts, cts, sizeof(ts));
+	if (ret)
+		return -EFAULT;
+
+	ts64->tv_sec = ts.tv_sec;
+	ts64->tv_nsec = ts.tv_nsec;
+
+	return 0;
+}
+
+static int __compat_put_timespec64(const struct timespec64 *ts64,
+				   struct compat_timespec __user *cts)
+{
+	struct compat_timespec ts = {
+		.tv_sec = ts64->tv_sec,
+		.tv_nsec = ts64->tv_nsec
+	};
+	return copy_to_user(cts, &ts, sizeof(ts)) ? -EFAULT : 0;
+}
+
+int compat_get_timespec64(struct timespec64 *ts, const void __user *uts)
+{
+	if (COMPAT_USE_64BIT_TIME)
+		return copy_from_user(ts, uts, sizeof(*ts)) ? -EFAULT : 0;
+	else
+		return __compat_get_timespec64(ts, uts);
+}
+EXPORT_SYMBOL_GPL(compat_get_timespec64);
+
+int compat_put_timespec64(const struct timespec64 *ts, void __user *uts)
+{
+	if (COMPAT_USE_64BIT_TIME)
+		return copy_to_user(uts, ts, sizeof(*ts)) ? -EFAULT : 0;
+	else
+		return __compat_put_timespec64(ts, uts);
+}
+EXPORT_SYMBOL_GPL(compat_put_timespec64);
+
 int compat_get_timeval(struct timeval *tv, const void __user *utv)
 {
 	if (COMPAT_USE_64BIT_TIME)
diff --git a/kernel/time/time.c b/kernel/time/time.c
index 7c89e437c4d7..adb9853ca6b0 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -890,3 +890,31 @@ struct timespec64 timespec64_add_safe(const struct timespec64 lhs,
 
 	return res;
 }
+
+int get_timespec64(struct timespec64 *ts,
+		   const struct timespec __user *uts)
+{
+	struct timespec kts;
+	int ret;
+
+	ret = copy_from_user(&kts, uts, sizeof(kts));
+	if (ret)
+		return -EFAULT;
+
+	ts->tv_sec = kts.tv_sec;
+	ts->tv_nsec = kts.tv_nsec;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(get_timespec64);
+
+int put_timespec64(const struct timespec64 *ts,
+		   struct timespec __user *uts)
+{
+	struct timespec kts = {
+		.tv_sec = ts->tv_sec,
+		.tv_nsec = ts->tv_nsec
+	};
+	return copy_to_user(uts, &kts, sizeof(kts)) ? -EFAULT : 0;
+}
+EXPORT_SYMBOL_GPL(put_timespec64);
-- 
cgit v1.2.3


From d5b7ffbfbdacc29e4db035f90665951668fa9c58 Mon Sep 17 00:00:00 2001
From: Deepa Dinamani <deepa.kernel@gmail.com>
Date: Sat, 24 Jun 2017 11:45:03 -0700
Subject: time: introduce {get,put}_itimerspec64

As we change the user space type for the timerfd and posix timer
functions to newer data types, we need some form of conversion
helpers to avoid duplicating that logic.

Suggested-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Deepa Dinamani <deepa.kernel@gmail.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/compat.h       |  4 ++++
 include/linux/posix-timers.h |  1 -
 include/linux/time.h         | 13 +++++++++++++
 kernel/compat.c              | 21 +++++++++++++++++++++
 kernel/time/time.c           | 30 ++++++++++++++++++++++++++++++
 5 files changed, 68 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/compat.h b/include/linux/compat.h
index 3eb04016ffa9..2ed54020ace0 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -166,6 +166,10 @@ extern int compat_get_timeval(struct timeval *, const void __user *);
 extern int compat_put_timeval(const struct timeval *, void __user *);
 extern int compat_get_timespec64(struct timespec64 *, const void __user *);
 extern int compat_put_timespec64(const struct timespec64 *, void __user *);
+extern int get_compat_itimerspec64(struct itimerspec64 *its,
+			const struct compat_itimerspec __user *uits);
+extern int put_compat_itimerspec64(const struct itimerspec64 *its,
+			struct compat_itimerspec __user *uits);
 
 /*
  * This function convert a timespec if necessary and returns a *user
diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 29f1b7f09ced..62839fd04dce 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -113,5 +113,4 @@ void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx,
 void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new);
 
 void posixtimer_rearm(struct siginfo *info);
-
 #endif
diff --git a/include/linux/time.h b/include/linux/time.h
index 36afb579495f..f9858d7e6361 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -12,6 +12,10 @@ int get_timespec64(struct timespec64 *ts,
 		const struct timespec __user *uts);
 int put_timespec64(const struct timespec64 *ts,
 		struct timespec __user *uts);
+int get_itimerspec64(struct itimerspec64 *it,
+			const struct itimerspec __user *uit);
+int put_itimerspec64(const struct itimerspec64 *it,
+			struct itimerspec __user *uit);
 
 #define TIME_T_MAX	(time_t)((1UL << ((sizeof(time_t) << 3) - 1)) - 1)
 
@@ -275,4 +279,13 @@ static __always_inline void timespec_add_ns(struct timespec *a, u64 ns)
 	a->tv_nsec = ns;
 }
 
+static inline bool itimerspec64_valid(const struct itimerspec64 *its)
+{
+	if (!timespec64_valid(&(its->it_interval)) ||
+		!timespec64_valid(&(its->it_value)))
+		return false;
+
+	return true;
+}
+
 #endif
diff --git a/kernel/compat.c b/kernel/compat.c
index 73f26ba44a8a..a350deda503a 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -586,6 +586,27 @@ int put_compat_itimerspec(struct compat_itimerspec __user *dst,
 	return 0;
 }
 
+int get_compat_itimerspec64(struct itimerspec64 *its,
+			const struct compat_itimerspec __user *uits)
+{
+
+	if (__compat_get_timespec64(&its->it_interval, &uits->it_interval) ||
+	    __compat_get_timespec64(&its->it_value, &uits->it_value))
+		return -EFAULT;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(get_compat_itimerspec64);
+
+int put_compat_itimerspec64(const struct itimerspec64 *its,
+			struct compat_itimerspec __user *uits)
+{
+	if (__compat_put_timespec64(&its->it_interval, &uits->it_interval) ||
+	    __compat_put_timespec64(&its->it_value, &uits->it_value))
+		return -EFAULT;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(put_compat_itimerspec64);
+
 /*
  * We currently only need the following fields from the sigevent
  * structure: sigev_value, sigev_signo, sig_notify and (sometimes
diff --git a/kernel/time/time.c b/kernel/time/time.c
index adb9853ca6b0..44a8c1402133 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -918,3 +918,33 @@ int put_timespec64(const struct timespec64 *ts,
 	return copy_to_user(uts, &kts, sizeof(kts)) ? -EFAULT : 0;
 }
 EXPORT_SYMBOL_GPL(put_timespec64);
+
+int get_itimerspec64(struct itimerspec64 *it,
+			const struct itimerspec __user *uit)
+{
+	int ret;
+
+	ret = get_timespec64(&it->it_interval, &uit->it_interval);
+	if (ret)
+		return ret;
+
+	ret = get_timespec64(&it->it_value, &uit->it_value);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(get_itimerspec64);
+
+int put_itimerspec64(const struct itimerspec64 *it,
+			struct itimerspec __user *uit)
+{
+	int ret;
+
+	ret = put_timespec64(&it->it_interval, &uit->it_interval);
+	if (ret)
+		return ret;
+
+	ret = put_timespec64(&it->it_value, &uit->it_value);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(put_itimerspec64);
-- 
cgit v1.2.3


From 9902747ec57d11b27c98e53d66112ecceed43c82 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 26 Jun 2017 10:24:27 +0200
Subject: Revert "ktime: Simplify ktime_compare implementation"

Thierry bisected boot failures to this simplification commit.

Reverts: 3f1d472055bb ("ktime: Simplify ktime_compare implementation")
Reported-by: Thierry Reding <thierry.reding@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Mariusz Skamra <mariuszx.skamra@intel.com>
---
 include/linux/ktime.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ktime.h b/include/linux/ktime.h
index 04817b1ca019..0c8bd45c8206 100644
--- a/include/linux/ktime.h
+++ b/include/linux/ktime.h
@@ -108,7 +108,11 @@ static inline ktime_t timeval_to_ktime(struct timeval tv)
  */
 static inline int ktime_compare(const ktime_t cmp1, const ktime_t cmp2)
 {
-	return ktime_sub(cmp1, cmp2);
+	if (cmp1 < cmp2)
+		return -1;
+	if (cmp1 > cmp2)
+		return 1;
+	return 0;
 }
 
 /**
-- 
cgit v1.2.3


From 5985ea8bd5d1b820b909af49fbc2767a990080a6 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Fri, 23 Jun 2017 16:05:11 -0400
Subject: ftrace: Have the cached module list show in set_ftrace_filter

When writing in a module filter into set_ftrace_filter for a module that is
not yet loaded, it it cached, and will be executed when the module is loaded
(although that is not implemented yet at this commit). Display the list of
cached modules to be traced.

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/ftrace.h |   3 +-
 kernel/trace/ftrace.c  | 112 +++++++++++++++++++++++++++++++++++++++++++------
 2 files changed, 102 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 1b6992e994e6..9fb9a67dc9d4 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -446,7 +446,8 @@ enum {
 	FTRACE_ITER_PRINTALL	= (1 << 2),
 	FTRACE_ITER_DO_PROBES	= (1 << 3),
 	FTRACE_ITER_PROBE	= (1 << 4),
-	FTRACE_ITER_ENABLED	= (1 << 5),
+	FTRACE_ITER_MOD		= (1 << 5),
+	FTRACE_ITER_ENABLED	= (1 << 6),
 };
 
 void arch_ftrace_update_code(int command);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 1867edec6269..bfdbce78064b 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3112,6 +3112,7 @@ ftrace_allocate_pages(unsigned long num_to_init)
 struct ftrace_iterator {
 	loff_t				pos;
 	loff_t				func_pos;
+	loff_t				mod_pos;
 	struct ftrace_page		*pg;
 	struct dyn_ftrace		*func;
 	struct ftrace_func_probe	*probe;
@@ -3119,6 +3120,8 @@ struct ftrace_iterator {
 	struct trace_parser		parser;
 	struct ftrace_hash		*hash;
 	struct ftrace_ops		*ops;
+	struct trace_array		*tr;
+	struct list_head		*mod_list;
 	int				pidx;
 	int				idx;
 	unsigned			flags;
@@ -3203,13 +3206,13 @@ static void *t_probe_start(struct seq_file *m, loff_t *pos)
 	if (!(iter->flags & FTRACE_ITER_DO_PROBES))
 		return NULL;
 
-	if (iter->func_pos > *pos)
+	if (iter->mod_pos > *pos)
 		return NULL;
 
 	iter->probe = NULL;
 	iter->probe_entry = NULL;
 	iter->pidx = 0;
-	for (l = 0; l <= (*pos - iter->func_pos); ) {
+	for (l = 0; l <= (*pos - iter->mod_pos); ) {
 		p = t_probe_next(m, &l);
 		if (!p)
 			break;
@@ -3247,6 +3250,82 @@ t_probe_show(struct seq_file *m, struct ftrace_iterator *iter)
 	return 0;
 }
 
+static void *
+t_mod_next(struct seq_file *m, loff_t *pos)
+{
+	struct ftrace_iterator *iter = m->private;
+	struct trace_array *tr = iter->tr;
+
+	(*pos)++;
+	iter->pos = *pos;
+
+	iter->mod_list = iter->mod_list->next;
+
+	if (iter->mod_list == &tr->mod_trace ||
+	    iter->mod_list == &tr->mod_notrace) {
+		iter->flags &= ~FTRACE_ITER_MOD;
+		return NULL;
+	}
+
+	iter->mod_pos = *pos;
+
+	return iter;
+}
+
+static void *t_mod_start(struct seq_file *m, loff_t *pos)
+{
+	struct ftrace_iterator *iter = m->private;
+	void *p = NULL;
+	loff_t l;
+
+	if (iter->func_pos > *pos)
+		return NULL;
+
+	iter->mod_pos = iter->func_pos;
+
+	/* probes are only available if tr is set */
+	if (!iter->tr)
+		return NULL;
+
+	for (l = 0; l <= (*pos - iter->func_pos); ) {
+		p = t_mod_next(m, &l);
+		if (!p)
+			break;
+	}
+	if (!p) {
+		iter->flags &= ~FTRACE_ITER_MOD;
+		return t_probe_start(m, pos);
+	}
+
+	/* Only set this if we have an item */
+	iter->flags |= FTRACE_ITER_MOD;
+
+	return iter;
+}
+
+static int
+t_mod_show(struct seq_file *m, struct ftrace_iterator *iter)
+{
+	struct ftrace_mod_load *ftrace_mod;
+	struct trace_array *tr = iter->tr;
+
+	if (WARN_ON_ONCE(!iter->mod_list) ||
+			 iter->mod_list == &tr->mod_trace ||
+			 iter->mod_list == &tr->mod_notrace)
+		return -EIO;
+
+	ftrace_mod = list_entry(iter->mod_list, struct ftrace_mod_load, list);
+
+	if (ftrace_mod->func)
+		seq_printf(m, "%s", ftrace_mod->func);
+	else
+		seq_putc(m, '*');
+
+	seq_printf(m, ":mod:%s\n", ftrace_mod->module);
+
+	return 0;
+}
+
 static void *
 t_func_next(struct seq_file *m, loff_t *pos)
 {
@@ -3288,7 +3367,7 @@ static void *
 t_next(struct seq_file *m, void *v, loff_t *pos)
 {
 	struct ftrace_iterator *iter = m->private;
-	loff_t l = *pos; /* t_hash_start() must use original pos */
+	loff_t l = *pos; /* t_probe_start() must use original pos */
 	void *ret;
 
 	if (unlikely(ftrace_disabled))
@@ -3297,16 +3376,19 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
 	if (iter->flags & FTRACE_ITER_PROBE)
 		return t_probe_next(m, pos);
 
+	if (iter->flags & FTRACE_ITER_MOD)
+		return t_mod_next(m, pos);
+
 	if (iter->flags & FTRACE_ITER_PRINTALL) {
 		/* next must increment pos, and t_probe_start does not */
 		(*pos)++;
-		return t_probe_start(m, &l);
+		return t_mod_start(m, &l);
 	}
 
 	ret = t_func_next(m, pos);
 
 	if (!ret)
-		return t_probe_start(m, &l);
+		return t_mod_start(m, &l);
 
 	return ret;
 }
@@ -3315,7 +3397,7 @@ static void reset_iter_read(struct ftrace_iterator *iter)
 {
 	iter->pos = 0;
 	iter->func_pos = 0;
-	iter->flags &= ~(FTRACE_ITER_PRINTALL | FTRACE_ITER_PROBE);
+	iter->flags &= ~(FTRACE_ITER_PRINTALL | FTRACE_ITER_PROBE | FTRACE_ITER_MOD);
 }
 
 static void *t_start(struct seq_file *m, loff_t *pos)
@@ -3344,15 +3426,15 @@ static void *t_start(struct seq_file *m, loff_t *pos)
 	    ftrace_hash_empty(iter->hash)) {
 		iter->func_pos = 1; /* Account for the message */
 		if (*pos > 0)
-			return t_probe_start(m, pos);
+			return t_mod_start(m, pos);
 		iter->flags |= FTRACE_ITER_PRINTALL;
 		/* reset in case of seek/pread */
 		iter->flags &= ~FTRACE_ITER_PROBE;
 		return iter;
 	}
 
-	if (iter->flags & FTRACE_ITER_PROBE)
-		return t_probe_start(m, pos);
+	if (iter->flags & FTRACE_ITER_MOD)
+		return t_mod_start(m, pos);
 
 	/*
 	 * Unfortunately, we need to restart at ftrace_pages_start
@@ -3368,7 +3450,7 @@ static void *t_start(struct seq_file *m, loff_t *pos)
 	}
 
 	if (!p)
-		return t_probe_start(m, pos);
+		return t_mod_start(m, pos);
 
 	return iter;
 }
@@ -3402,6 +3484,9 @@ static int t_show(struct seq_file *m, void *v)
 	if (iter->flags & FTRACE_ITER_PROBE)
 		return t_probe_show(m, iter);
 
+	if (iter->flags & FTRACE_ITER_MOD)
+		return t_mod_show(m, iter);
+
 	if (iter->flags & FTRACE_ITER_PRINTALL) {
 		if (iter->flags & FTRACE_ITER_NOTRACE)
 			seq_puts(m, "#### no functions disabled ####\n");
@@ -3528,17 +3613,20 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag,
 
 	iter->ops = ops;
 	iter->flags = flag;
+	iter->tr = tr;
 
 	mutex_lock(&ops->func_hash->regex_lock);
 
 	if (flag & FTRACE_ITER_NOTRACE) {
 		hash = ops->func_hash->notrace_hash;
-		mod_head = tr ? &tr->mod_trace : NULL;
+		mod_head = tr ? &tr->mod_notrace : NULL;
 	} else {
 		hash = ops->func_hash->filter_hash;
-		mod_head = tr ? &tr->mod_notrace : NULL;
+		mod_head = tr ? &tr->mod_trace : NULL;
 	}
 
+	iter->mod_list = mod_head;
+
 	if (file->f_mode & FMODE_WRITE) {
 		const int size_bits = FTRACE_HASH_DEFAULT_BITS;
 
-- 
cgit v1.2.3


From 8c08f0d5c6fb10ff93ffb1cbf416f4f1c3a52a80 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Mon, 26 Jun 2017 11:47:31 -0400
Subject: ftrace: Have cached module filters be an active filter

When a module filter is added to set_ftrace_filter, if the module is not
loaded, it is cached. This should be considered an active filter, and
function tracing should be filtered by this. That is, if a cached module
filter is the only filter set, then no function tracing should be happening,
as all the functions available will be filtered out.

This makes sense, as the reason to add a cached module filter, is to trace
the module when you load it. There shouldn't be any other tracing happening
until then.

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/ftrace.h |  2 ++
 kernel/trace/ftrace.c  | 20 +++++++++++++++-----
 kernel/trace/trace.h   |  7 ++++++-
 3 files changed, 23 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 9fb9a67dc9d4..5857390ac35a 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -120,6 +120,7 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops);
  *            this ops will fail to register or set_filter_ip.
  * PID     - Is affected by set_ftrace_pid (allows filtering on those pids)
  * RCU     - Set when the ops can only be called when RCU is watching.
+ * TRACE_ARRAY - The ops->private points to a trace_array descriptor.
  */
 enum {
 	FTRACE_OPS_FL_ENABLED			= 1 << 0,
@@ -138,6 +139,7 @@ enum {
 	FTRACE_OPS_FL_IPMODIFY			= 1 << 13,
 	FTRACE_OPS_FL_PID			= 1 << 14,
 	FTRACE_OPS_FL_RCU			= 1 << 15,
+	FTRACE_OPS_FL_TRACE_ARRAY		= 1 << 16,
 };
 
 #ifdef CONFIG_DYNAMIC_FTRACE
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index f1ccf8be9df7..914539e3e301 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1410,6 +1410,9 @@ alloc_and_copy_ftrace_hash(int size_bits, struct ftrace_hash *hash)
 	if (!new_hash)
 		return NULL;
 
+	if (hash)
+		new_hash->flags = hash->flags;
+
 	/* Empty hash? */
 	if (ftrace_hash_empty(hash))
 		return new_hash;
@@ -1454,7 +1457,7 @@ __ftrace_hash_move(struct ftrace_hash *src)
 	/*
 	 * If the new source is empty, just return the empty_hash.
 	 */
-	if (!src->count)
+	if (ftrace_hash_empty(src))
 		return EMPTY_HASH;
 
 	/*
@@ -1471,6 +1474,8 @@ __ftrace_hash_move(struct ftrace_hash *src)
 	if (!new_hash)
 		return NULL;
 
+	new_hash->flags = src->flags;
+
 	size = 1 << src->size_bits;
 	for (i = 0; i < size; i++) {
 		hhd = &src->buckets[i];
@@ -1701,7 +1706,7 @@ static bool __ftrace_hash_rec_update(struct ftrace_ops *ops,
 	struct dyn_ftrace *rec;
 	bool update = false;
 	int count = 0;
-	int all = 0;
+	int all = false;
 
 	/* Only update if the ops has been registered */
 	if (!(ops->flags & FTRACE_OPS_FL_ENABLED))
@@ -1722,7 +1727,7 @@ static bool __ftrace_hash_rec_update(struct ftrace_ops *ops,
 		hash = ops->func_hash->filter_hash;
 		other_hash = ops->func_hash->notrace_hash;
 		if (ftrace_hash_empty(hash))
-			all = 1;
+			all = true;
 	} else {
 		inc = !inc;
 		hash = ops->func_hash->notrace_hash;
@@ -4028,6 +4033,9 @@ static void process_mod_list(struct list_head *head, struct ftrace_ops *ops,
 		free_ftrace_mod(ftrace_mod);
 	}
 
+	if (enable && list_empty(head))
+		new_hash->flags &= ~FTRACE_HASH_FL_MOD;
+
 	mutex_lock(&ftrace_lock);
 
 	ret = ftrace_hash_move_and_update_ops(ops, orig_hash,
@@ -5035,9 +5043,11 @@ int ftrace_regex_release(struct inode *inode, struct file *file)
 	if (file->f_mode & FMODE_WRITE) {
 		filter_hash = !!(iter->flags & FTRACE_ITER_FILTER);
 
-		if (filter_hash)
+		if (filter_hash) {
 			orig_hash = &iter->ops->func_hash->filter_hash;
-		else
+			if (!list_empty(&iter->tr->mod_trace))
+				iter->hash->flags |= FTRACE_HASH_FL_MOD;
+		} else
 			orig_hash = &iter->ops->func_hash->notrace_hash;
 
 		mutex_lock(&ftrace_lock);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index d63550cdbdfa..13823951e42b 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -773,10 +773,15 @@ struct ftrace_mod_load {
 	int			 enable;
 };
 
+enum {
+	FTRACE_HASH_FL_MOD	= (1 << 0),
+};
+
 struct ftrace_hash {
 	unsigned long		size_bits;
 	struct hlist_head	*buckets;
 	unsigned long		count;
+	unsigned long		flags;
 	struct rcu_head		rcu;
 };
 
@@ -785,7 +790,7 @@ ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip);
 
 static __always_inline bool ftrace_hash_empty(struct ftrace_hash *hash)
 {
-	return !hash || !hash->count;
+	return !hash || !(hash->count || (hash->flags & FTRACE_HASH_FL_MOD));
 }
 
 /* Standard output formatting function used for function return traces */
-- 
cgit v1.2.3


From f8475cef90082bf0902ddab106112de130d90395 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Fri, 23 Jun 2017 22:11:52 -0700
Subject: x86: use common aperfmperf_khz_on_cpu() to calculate KHz using
 APERF/MPERF

The goal of this change is to give users a uniform and meaningful
result when they read /sys/...cpufreq/scaling_cur_freq
on modern x86 hardware, as compared to what they get today.

Modern x86 processors include the hardware needed
to accurately calculate frequency over an interval --
APERF, MPERF, and the TSC.

Here we provide an x86 routine to make this calculation
on supported hardware, and use it in preference to any
driver driver-specific cpufreq_driver.get() routine.

MHz is computed like so:

MHz = base_MHz * delta_APERF / delta_MPERF

MHz is the average frequency of the busy processor
over a measurement interval.  The interval is
defined to be the time between successive invocations
of aperfmperf_khz_on_cpu(), which are expected to to
happen on-demand when users read sysfs attribute
cpufreq/scaling_cur_freq.

As with previous methods of calculating MHz,
idle time is excluded.

base_MHz above is from TSC calibration global "cpu_khz".

This x86 native method to calculate MHz returns a meaningful result
no matter if P-states are controlled by hardware or firmware
and/or if the Linux cpufreq sub-system is or is-not installed.

When this routine is invoked more frequently, the measurement
interval becomes shorter.  However, the code limits re-computation
to 10ms intervals so that average frequency remains meaningful.

Discerning users are encouraged to take advantage of
the turbostat(8) utility, which can gracefully handle
concurrent measurement intervals of arbitrary length.

Signed-off-by: Len Brown <len.brown@intel.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/x86/kernel/cpu/Makefile     |  1 +
 arch/x86/kernel/cpu/aperfmperf.c | 79 ++++++++++++++++++++++++++++++++++++++++
 drivers/cpufreq/cpufreq.c        | 12 +++++-
 include/linux/cpufreq.h          |  2 +
 4 files changed, 93 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/kernel/cpu/aperfmperf.c

(limited to 'include/linux')

diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 52000010c62e..cdf82492b770 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -21,6 +21,7 @@ obj-y			+= common.o
 obj-y			+= rdrand.o
 obj-y			+= match.o
 obj-y			+= bugs.o
+obj-$(CONFIG_CPU_FREQ)	+= aperfmperf.o
 
 obj-$(CONFIG_PROC_FS)	+= proc.o
 obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c
new file mode 100644
index 000000000000..d869c8671e36
--- /dev/null
+++ b/arch/x86/kernel/cpu/aperfmperf.c
@@ -0,0 +1,79 @@
+/*
+ * x86 APERF/MPERF KHz calculation for
+ * /sys/.../cpufreq/scaling_cur_freq
+ *
+ * Copyright (C) 2017 Intel Corp.
+ * Author: Len Brown <len.brown@intel.com>
+ *
+ * This file is licensed under GPLv2.
+ */
+
+#include <linux/jiffies.h>
+#include <linux/math64.h>
+#include <linux/percpu.h>
+#include <linux/smp.h>
+
+struct aperfmperf_sample {
+	unsigned int	khz;
+	unsigned long	jiffies;
+	u64	aperf;
+	u64	mperf;
+};
+
+static DEFINE_PER_CPU(struct aperfmperf_sample, samples);
+
+/*
+ * aperfmperf_snapshot_khz()
+ * On the current CPU, snapshot APERF, MPERF, and jiffies
+ * unless we already did it within 10ms
+ * calculate kHz, save snapshot
+ */
+static void aperfmperf_snapshot_khz(void *dummy)
+{
+	u64 aperf, aperf_delta;
+	u64 mperf, mperf_delta;
+	struct aperfmperf_sample *s = this_cpu_ptr(&samples);
+
+	/* Don't bother re-computing within 10 ms */
+	if (time_before(jiffies, s->jiffies + HZ/100))
+		return;
+
+	rdmsrl(MSR_IA32_APERF, aperf);
+	rdmsrl(MSR_IA32_MPERF, mperf);
+
+	aperf_delta = aperf - s->aperf;
+	mperf_delta = mperf - s->mperf;
+
+	/*
+	 * There is no architectural guarantee that MPERF
+	 * increments faster than we can read it.
+	 */
+	if (mperf_delta == 0)
+		return;
+
+	/*
+	 * if (cpu_khz * aperf_delta) fits into ULLONG_MAX, then
+	 *	khz = (cpu_khz * aperf_delta) / mperf_delta
+	 */
+	if (div64_u64(ULLONG_MAX, cpu_khz) > aperf_delta)
+		s->khz = div64_u64((cpu_khz * aperf_delta), mperf_delta);
+	else	/* khz = aperf_delta / (mperf_delta / cpu_khz) */
+		s->khz = div64_u64(aperf_delta,
+			div64_u64(mperf_delta, cpu_khz));
+	s->jiffies = jiffies;
+	s->aperf = aperf;
+	s->mperf = mperf;
+}
+
+unsigned int arch_freq_get_on_cpu(int cpu)
+{
+	if (!cpu_khz)
+		return 0;
+
+	if (!static_cpu_has(X86_FEATURE_APERFMPERF))
+		return 0;
+
+	smp_call_function_single(cpu, aperfmperf_snapshot_khz, NULL, 1);
+
+	return per_cpu(samples.khz, cpu);
+}
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 26b643d57847..6e7424d12de9 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -632,11 +632,21 @@ show_one(cpuinfo_transition_latency, cpuinfo.transition_latency);
 show_one(scaling_min_freq, min);
 show_one(scaling_max_freq, max);
 
+__weak unsigned int arch_freq_get_on_cpu(int cpu)
+{
+	return 0;
+}
+
 static ssize_t show_scaling_cur_freq(struct cpufreq_policy *policy, char *buf)
 {
 	ssize_t ret;
+	unsigned int freq;
 
-	if (cpufreq_driver && cpufreq_driver->setpolicy && cpufreq_driver->get)
+	freq = arch_freq_get_on_cpu(policy->cpu);
+	if (freq)
+		ret = sprintf(buf, "%u\n", freq);
+	else if (cpufreq_driver && cpufreq_driver->setpolicy &&
+			cpufreq_driver->get)
 		ret = sprintf(buf, "%u\n", cpufreq_driver->get(policy->cpu));
 	else
 		ret = sprintf(buf, "%u\n", policy->cur);
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index a5ce0bbeadb5..905117bd5012 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -883,6 +883,8 @@ static inline bool policy_has_boost_freq(struct cpufreq_policy *policy)
 }
 #endif
 
+extern unsigned int arch_freq_get_on_cpu(int cpu);
+
 /* the following are really really optional */
 extern struct freq_attr cpufreq_freq_attr_scaling_available_freqs;
 extern struct freq_attr cpufreq_freq_attr_scaling_boost_freqs;
-- 
cgit v1.2.3


From fc61ed51270e86440cf7cf84cbe1d86753592932 Mon Sep 17 00:00:00 2001
From: Okash Khawaja <okash.khawaja@gmail.com>
Date: Sun, 25 Jun 2017 19:40:00 +0100
Subject: tty: add function to convert device name to number

The function converts strings like ttyS0 and ttyUSB0 to dev_t like
(4, 64) and (188, 0). It does this by scanning tty_drivers list for
corresponding device name and index. If the driver is not registered,
this function returns -ENODEV. It also acquires tty_mutex.

Signed-off-by: Okash Khawaja <okash.khawaja@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/tty_io.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/tty.h  |  3 +++
 2 files changed, 53 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index 49abf04c90b2..974b13d24401 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -325,6 +325,56 @@ static struct tty_driver *get_tty_driver(dev_t device, int *index)
 	return NULL;
 }
 
+/**
+ *	tty_dev_name_to_number	-	return dev_t for device name
+ *	@name: user space name of device under /dev
+ *	@number: pointer to dev_t that this function will populate
+ *
+ *	This function converts device names like ttyS0 or ttyUSB1 into dev_t
+ *	like (4, 64) or (188, 1). If no corresponding driver is registered then
+ *	the function returns -ENODEV.
+ *
+ *	Locking: this acquires tty_mutex to protect the tty_drivers list from
+ *		being modified while we are traversing it, and makes sure to
+ *		release it before exiting.
+ */
+int tty_dev_name_to_number(const char *name, dev_t *number)
+{
+	struct tty_driver *p;
+	int ret;
+	int index, prefix_length = 0;
+	const char *str;
+
+	for (str = name; *str && !isdigit(*str); str++)
+		;
+
+	if (!*str)
+		return -EINVAL;
+
+	ret = kstrtoint(str, 10, &index);
+	if (ret)
+		return ret;
+
+	prefix_length = str - name;
+	mutex_lock(&tty_mutex);
+
+	list_for_each_entry(p, &tty_drivers, tty_drivers)
+		if (prefix_length == strlen(p->name) && strncmp(name,
+					p->name, prefix_length) == 0) {
+			if (index < p->num) {
+				*number = MKDEV(p->major, p->minor_start + index);
+				goto out;
+			}
+		}
+
+	/* if here then driver wasn't found */
+	ret = -ENODEV;
+out:
+	mutex_unlock(&tty_mutex);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(tty_dev_name_to_number);
+
 #ifdef CONFIG_CONSOLE_POLL
 
 /**
diff --git a/include/linux/tty.h b/include/linux/tty.h
index b75b2d51ba2b..8156a9ee6fe6 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -402,6 +402,7 @@ extern int __init tty_init(void);
 extern const char *tty_name(const struct tty_struct *tty);
 extern struct tty_struct *tty_open_by_driver(dev_t device, struct inode *inode,
 		struct file *filp);
+extern int tty_dev_name_to_number(const char *name, dev_t *number);
 #else
 static inline void tty_kref_put(struct tty_struct *tty)
 { }
@@ -425,6 +426,8 @@ static inline const char *tty_name(const struct tty_struct *tty)
 static inline struct tty_struct *tty_open_by_driver(dev_t device,
 		struct inode *inode, struct file *filp)
 { return NULL; }
+static inline int tty_dev_name_to_number(const char *name, dev_t *number)
+{ return -ENOTSUPP; }
 #endif
 
 extern struct ktermios tty_std_termios;
-- 
cgit v1.2.3


From 2a0165a034ac024b60cca49c61e46f4afa2e4d98 Mon Sep 17 00:00:00 2001
From: Mohamad Haj Yahia <mohamad@mellanox.com>
Date: Thu, 30 Mar 2017 17:09:00 +0300
Subject: net/mlx5: Cancel delayed recovery work when unloading the driver

Draining the health workqueue will ignore future health works including
the one that report hardware failure and thus we can't enter error state
Instead cancel the recovery flow and make sure only recovery flow won't
be scheduled.

Fixes: 5e44fca50470 ('net/mlx5: Only cancel recovery work when cleaning up device')
Signed-off-by: Mohamad Haj Yahia <mohamad@mellanox.com>
Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/health.c | 15 ++++++++++++++-
 drivers/net/ethernet/mellanox/mlx5/core/main.c   |  2 +-
 include/linux/mlx5/driver.h                      |  1 +
 3 files changed, 16 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index f27f84ffbc85..8a8b5f0e497c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -67,6 +67,7 @@ enum {
 
 enum {
 	MLX5_DROP_NEW_HEALTH_WORK,
+	MLX5_DROP_NEW_RECOVERY_WORK,
 };
 
 static u8 get_nic_state(struct mlx5_core_dev *dev)
@@ -193,7 +194,7 @@ static void health_care(struct work_struct *work)
 	mlx5_handle_bad_state(dev);
 
 	spin_lock(&health->wq_lock);
-	if (!test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags))
+	if (!test_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags))
 		schedule_delayed_work(&health->recover_work, recover_delay);
 	else
 		dev_err(&dev->pdev->dev,
@@ -313,6 +314,7 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev)
 	init_timer(&health->timer);
 	health->sick = 0;
 	clear_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
+	clear_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags);
 	health->health = &dev->iseg->health;
 	health->health_counter = &dev->iseg->health_counter;
 
@@ -335,11 +337,22 @@ void mlx5_drain_health_wq(struct mlx5_core_dev *dev)
 
 	spin_lock(&health->wq_lock);
 	set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
+	set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags);
 	spin_unlock(&health->wq_lock);
 	cancel_delayed_work_sync(&health->recover_work);
 	cancel_work_sync(&health->work);
 }
 
+void mlx5_drain_health_recovery(struct mlx5_core_dev *dev)
+{
+	struct mlx5_core_health *health = &dev->priv.health;
+
+	spin_lock(&health->wq_lock);
+	set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags);
+	spin_unlock(&health->wq_lock);
+	cancel_delayed_work_sync(&dev->priv.health.recover_work);
+}
+
 void mlx5_health_cleanup(struct mlx5_core_dev *dev)
 {
 	struct mlx5_core_health *health = &dev->priv.health;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index fd47b5134841..524c16f72e83 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1228,7 +1228,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
 	int err = 0;
 
 	if (cleanup)
-		mlx5_drain_health_wq(dev);
+		mlx5_drain_health_recovery(dev);
 
 	mutex_lock(&dev->intf_state_mutex);
 	if (test_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state)) {
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 93273d9ea4d1..ba260330ce5e 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -925,6 +925,7 @@ int mlx5_health_init(struct mlx5_core_dev *dev);
 void mlx5_start_health_poll(struct mlx5_core_dev *dev);
 void mlx5_stop_health_poll(struct mlx5_core_dev *dev);
 void mlx5_drain_health_wq(struct mlx5_core_dev *dev);
+void mlx5_drain_health_recovery(struct mlx5_core_dev *dev);
 int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size,
 			struct mlx5_buf *buf, int node);
 int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, struct mlx5_buf *buf);
-- 
cgit v1.2.3


From 52ec462eca9b87b8036209483efe1c6cf9c49d9a Mon Sep 17 00:00:00 2001
From: Ilan Tayari <ilant@mellanox.com>
Date: Sun, 26 Mar 2017 17:01:57 +0300
Subject: net/mlx5: Add reserved-gids support

Reserved GIDs are entries in the GID table in use by the mlx5_core
and its submodules (e.g. FPGA, SRIOV, E-Swtich, netdev).
The entries are reserved at the high indexes of the GID table.

A mlx5 submodule may reserve a certain amount of GIDs for its own use
during the load sequence by calling mlx5_core_reserve_gids, and must
also take care to un-reserve these GIDs when it closes.
Reservation is only allowed during the load sequence and before any
interfaces (e.g. mlx5_ib or mlx5_en) are up.

After reservation, a submodule may call mlx5_core_reserved_gid_alloc/
free to allocate entries from the reserved GIDs pool.

Reserve a GID table entry for every supported FPGA QP.

A later patch in the patchset will remove them from being reported to
IB core.
Another such patch will make use of these for FPGA QPs in Innova NIC.

Added lib/mlx5.h to serve as a library for mlx5 submodlues, and to
expose only public mlx5 API, more mlx5 library files will be added in
future submissions.

Signed-off-by: Ilan Tayari <ilant@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/Makefile   |   2 +-
 .../net/ethernet/mellanox/mlx5/core/fpga/core.c    |  31 ++++-
 .../net/ethernet/mellanox/mlx5/core/fpga/core.h    |   5 +
 drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c  | 154 +++++++++++++++++++++
 drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h |  43 ++++++
 drivers/net/ethernet/mellanox/mlx5/core/main.c     |  11 +-
 include/linux/mlx5/driver.h                        |  17 +++
 7 files changed, 260 insertions(+), 3 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 5ad093a21a6e..738867bab21f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -4,7 +4,7 @@ subdir-ccflags-y += -I$(src)
 mlx5_core-y :=	main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
 		health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \
 		mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \
-		fs_counters.o rl.o lag.o dev.o
+		fs_counters.o rl.o lag.o dev.o lib/gid.o
 
 mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
index d88b332e9669..92d8b1b6e598 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
@@ -35,6 +35,7 @@
 #include <linux/mlx5/driver.h>
 
 #include "mlx5_core.h"
+#include "lib/mlx5.h"
 #include "fpga/core.h"
 
 static const char *const mlx5_fpga_error_strings[] = {
@@ -104,6 +105,7 @@ int mlx5_fpga_device_start(struct mlx5_core_dev *mdev)
 {
 	struct mlx5_fpga_device *fdev = mdev->fpga;
 	unsigned long flags;
+	unsigned int max_num_qps;
 	int err;
 
 	if (!fdev)
@@ -123,6 +125,9 @@ int mlx5_fpga_device_start(struct mlx5_core_dev *mdev)
 		       mlx5_fpga_image_name(fdev->last_oper_image),
 		       MLX5_CAP_FPGA(fdev->mdev, image_version));
 
+	max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps);
+	err = mlx5_core_reserve_gids(mdev, max_num_qps);
+
 out:
 	spin_lock_irqsave(&fdev->state_lock, flags);
 	fdev->state = err ? MLX5_FPGA_STATUS_FAILURE : MLX5_FPGA_STATUS_SUCCESS;
@@ -151,9 +156,33 @@ int mlx5_fpga_device_init(struct mlx5_core_dev *mdev)
 	return 0;
 }
 
+void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev)
+{
+	struct mlx5_fpga_device *fdev = mdev->fpga;
+	unsigned int max_num_qps;
+	unsigned long flags;
+
+	if (!fdev)
+		return;
+
+	spin_lock_irqsave(&fdev->state_lock, flags);
+	if (fdev->state != MLX5_FPGA_STATUS_SUCCESS) {
+		spin_unlock_irqrestore(&fdev->state_lock, flags);
+		return;
+	}
+	fdev->state = MLX5_FPGA_STATUS_NONE;
+	spin_unlock_irqrestore(&fdev->state_lock, flags);
+
+	max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps);
+	mlx5_core_unreserve_gids(mdev, max_num_qps);
+}
+
 void mlx5_fpga_device_cleanup(struct mlx5_core_dev *mdev)
 {
-	kfree(mdev->fpga);
+	struct mlx5_fpga_device *fdev = mdev->fpga;
+
+	mlx5_fpga_device_stop(mdev);
+	kfree(fdev);
 	mdev->fpga = NULL;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
index c55044d66778..557d83973ade 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
@@ -71,6 +71,7 @@ struct mlx5_fpga_device {
 int mlx5_fpga_device_init(struct mlx5_core_dev *mdev);
 void mlx5_fpga_device_cleanup(struct mlx5_core_dev *mdev);
 int mlx5_fpga_device_start(struct mlx5_core_dev *mdev);
+void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev);
 void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data);
 
 #else
@@ -89,6 +90,10 @@ static inline int mlx5_fpga_device_start(struct mlx5_core_dev *mdev)
 	return 0;
 }
 
+static inline void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev)
+{
+}
+
 static inline void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event,
 				   void *data)
 {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c
new file mode 100644
index 000000000000..4d0db481f6c4
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c
@@ -0,0 +1,154 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mlx5/driver.h>
+#include <linux/etherdevice.h>
+#include <linux/idr.h>
+#include "mlx5_core.h"
+
+void mlx5_init_reserved_gids(struct mlx5_core_dev *dev)
+{
+	unsigned int tblsz = MLX5_CAP_ROCE(dev, roce_address_table_size);
+
+	ida_init(&dev->roce.reserved_gids.ida);
+	dev->roce.reserved_gids.start = tblsz;
+	dev->roce.reserved_gids.count = 0;
+}
+
+void mlx5_cleanup_reserved_gids(struct mlx5_core_dev *dev)
+{
+	WARN_ON(!ida_is_empty(&dev->roce.reserved_gids.ida));
+	dev->roce.reserved_gids.start = 0;
+	dev->roce.reserved_gids.count = 0;
+	ida_destroy(&dev->roce.reserved_gids.ida);
+}
+
+int mlx5_core_reserve_gids(struct mlx5_core_dev *dev, unsigned int count)
+{
+	if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
+		mlx5_core_err(dev, "Cannot reserve GIDs when interfaces are up\n");
+		return -EPERM;
+	}
+	if (dev->roce.reserved_gids.start < count) {
+		mlx5_core_warn(dev, "GID table exhausted attempting to reserve %d more GIDs\n",
+			       count);
+		return -ENOMEM;
+	}
+	if (dev->roce.reserved_gids.count + count > MLX5_MAX_RESERVED_GIDS) {
+		mlx5_core_warn(dev, "Unable to reserve %d more GIDs\n", count);
+		return -ENOMEM;
+	}
+
+	dev->roce.reserved_gids.start -= count;
+	dev->roce.reserved_gids.count += count;
+	mlx5_core_dbg(dev, "Reserved %u GIDs starting at %u\n",
+		      dev->roce.reserved_gids.count,
+		      dev->roce.reserved_gids.start);
+	return 0;
+}
+
+void mlx5_core_unreserve_gids(struct mlx5_core_dev *dev, unsigned int count)
+{
+	WARN(test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state), "Unreserving GIDs when interfaces are up");
+	WARN(count > dev->roce.reserved_gids.count, "Unreserving %u GIDs when only %u reserved",
+	     count, dev->roce.reserved_gids.count);
+
+	dev->roce.reserved_gids.start += count;
+	dev->roce.reserved_gids.count -= count;
+	mlx5_core_dbg(dev, "%u GIDs starting at %u left reserved\n",
+		      dev->roce.reserved_gids.count,
+		      dev->roce.reserved_gids.start);
+}
+
+int mlx5_core_reserved_gid_alloc(struct mlx5_core_dev *dev, int *gid_index)
+{
+	int end = dev->roce.reserved_gids.start +
+		  dev->roce.reserved_gids.count;
+	int index = 0;
+
+	index = ida_simple_get(&dev->roce.reserved_gids.ida,
+			       dev->roce.reserved_gids.start, end,
+			       GFP_KERNEL);
+	if (index < 0)
+		return index;
+
+	mlx5_core_dbg(dev, "Allodating reserved GID %u\n", index);
+	*gid_index = index;
+	return 0;
+}
+
+void mlx5_core_reserved_gid_free(struct mlx5_core_dev *dev, int gid_index)
+{
+	mlx5_core_dbg(dev, "Freeing reserved GID %u\n", gid_index);
+	ida_simple_remove(&dev->roce.reserved_gids.ida, gid_index);
+}
+
+unsigned int mlx5_core_reserved_gids_count(struct mlx5_core_dev *dev)
+{
+	return dev->roce.reserved_gids.count;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_reserved_gids_count);
+
+int mlx5_core_roce_gid_set(struct mlx5_core_dev *dev, unsigned int index,
+			   u8 roce_version, u8 roce_l3_type, const u8 *gid,
+			   const u8 *mac, bool vlan, u16 vlan_id)
+{
+#define MLX5_SET_RA(p, f, v) MLX5_SET(roce_addr_layout, p, f, v)
+	u32  in[MLX5_ST_SZ_DW(set_roce_address_in)] = {0};
+	u32 out[MLX5_ST_SZ_DW(set_roce_address_out)] = {0};
+	void *in_addr = MLX5_ADDR_OF(set_roce_address_in, in, roce_address);
+	char *addr_l3_addr = MLX5_ADDR_OF(roce_addr_layout, in_addr,
+					  source_l3_address);
+	void *addr_mac = MLX5_ADDR_OF(roce_addr_layout, in_addr,
+				      source_mac_47_32);
+	int gidsz = MLX5_FLD_SZ_BYTES(roce_addr_layout, source_l3_address);
+
+	if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
+		return -EINVAL;
+
+	if (gid) {
+		if (vlan) {
+			MLX5_SET_RA(in_addr, vlan_valid, 1);
+			MLX5_SET_RA(in_addr, vlan_id, vlan_id);
+		}
+
+		ether_addr_copy(addr_mac, mac);
+		MLX5_SET_RA(in_addr, roce_version, roce_version);
+		MLX5_SET_RA(in_addr, roce_l3_type, roce_l3_type);
+		memcpy(addr_l3_addr, gid, gidsz);
+	}
+
+	MLX5_SET(set_roce_address_in, in, roce_address_index, index);
+	MLX5_SET(set_roce_address_in, in, opcode, MLX5_CMD_OP_SET_ROCE_ADDRESS);
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+EXPORT_SYMBOL(mlx5_core_roce_gid_set);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
new file mode 100644
index 000000000000..7550b1cc8c6a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies, Ltd.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __LIB_MLX5_H__
+#define __LIB_MLX5_H__
+
+void mlx5_init_reserved_gids(struct mlx5_core_dev *dev);
+void mlx5_cleanup_reserved_gids(struct mlx5_core_dev *dev);
+int  mlx5_core_reserve_gids(struct mlx5_core_dev *dev, unsigned int count);
+void mlx5_core_unreserve_gids(struct mlx5_core_dev *dev, unsigned int count);
+int  mlx5_core_reserved_gid_alloc(struct mlx5_core_dev *dev, int *gid_index);
+void mlx5_core_reserved_gid_free(struct mlx5_core_dev *dev, int gid_index);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 9a5a475d9e00..55f9fccfc394 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -56,6 +56,7 @@
 #ifdef CONFIG_MLX5_CORE_EN
 #include "eswitch.h"
 #endif
+#include "lib/mlx5.h"
 #include "fpga/core.h"
 
 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
@@ -936,6 +937,8 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
 
 	mlx5_init_mkey_table(dev);
 
+	mlx5_init_reserved_gids(dev);
+
 	err = mlx5_init_rl_table(dev);
 	if (err) {
 		dev_err(&pdev->dev, "Failed to init rate limiting\n");
@@ -986,6 +989,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
 	mlx5_eswitch_cleanup(dev->priv.eswitch);
 #endif
 	mlx5_cleanup_rl_table(dev);
+	mlx5_cleanup_reserved_gids(dev);
 	mlx5_cleanup_mkey_table(dev);
 	mlx5_cleanup_srq_table(dev);
 	mlx5_cleanup_qp_table(dev);
@@ -1160,7 +1164,7 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
 	err = mlx5_fpga_device_start(dev);
 	if (err) {
 		dev_err(&pdev->dev, "fpga device start failed %d\n", err);
-		goto err_reg_dev;
+		goto err_fpga_start;
 	}
 
 	if (mlx5_device_registered(dev)) {
@@ -1181,6 +1185,9 @@ out:
 	return 0;
 
 err_reg_dev:
+	mlx5_fpga_device_stop(dev);
+
+err_fpga_start:
 	mlx5_sriov_detach(dev);
 
 err_sriov:
@@ -1260,6 +1267,8 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
 	if (mlx5_device_registered(dev))
 		mlx5_detach_device(dev);
 
+	mlx5_fpga_device_stop(dev);
+
 	mlx5_sriov_detach(dev);
 #ifdef CONFIG_MLX5_CORE_EN
 	mlx5_eswitch_detach(dev->priv.eswitch);
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 750701b3b863..08e99bd2cd77 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -44,6 +44,7 @@
 #include <linux/workqueue.h>
 #include <linux/mempool.h>
 #include <linux/interrupt.h>
+#include <linux/idr.h>
 
 #include <linux/mlx5/device.h>
 #include <linux/mlx5/doorbell.h>
@@ -737,6 +738,14 @@ struct mlx5e_resources {
 	struct mlx5_sq_bfreg       bfreg;
 };
 
+#define MLX5_MAX_RESERVED_GIDS 8
+
+struct mlx5_rsvd_gids {
+	unsigned int start;
+	unsigned int count;
+	struct ida ida;
+};
+
 struct mlx5_core_dev {
 	struct pci_dev	       *pdev;
 	/* sync pci state */
@@ -766,6 +775,9 @@ struct mlx5_core_dev {
 	atomic_t		num_qps;
 	u32			issi;
 	struct mlx5e_resources  mlx5e_res;
+	struct {
+		struct mlx5_rsvd_gids	reserved_gids;
+	} roce;
 #ifdef CONFIG_MLX5_FPGA
 	struct mlx5_fpga_device *fpga;
 #endif
@@ -1045,6 +1057,11 @@ int mlx5_alloc_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg,
 		     bool map_wc, bool fast_path);
 void mlx5_free_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg);
 
+unsigned int mlx5_core_reserved_gids_count(struct mlx5_core_dev *dev);
+int mlx5_core_roce_gid_set(struct mlx5_core_dev *dev, unsigned int index,
+			   u8 roce_version, u8 roce_l3_type, const u8 *gid,
+			   const u8 *mac, bool vlan, u16 vlan_id);
+
 static inline int fw_initializing(struct mlx5_core_dev *dev)
 {
 	return ioread32be(&dev->iseg->initializing) >> 31;
-- 
cgit v1.2.3


From a6f7d2aff623bb7572d4bca1caf5820e0cd5a586 Mon Sep 17 00:00:00 2001
From: Ilan Tayari <ilant@mellanox.com>
Date: Sun, 26 Mar 2017 17:23:42 +0300
Subject: net/mlx5: Add support for multiple RoCE enable

Previously, only mlx5_ib enabled RoCE on the port, but FPGA needs it as
well.
Add support for counting number of enables, so that FPGA and IB can work
in parallel and independently.
Program the HW to enable RoCE on the first enable call, and program to
disable RoCE on the last disable call.

Signed-off-by: Ilan Tayari <ilant@mellanox.com>
Reviewed-by: Boris Pismenny <borisp@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/vport.c | 4 ++++
 include/linux/mlx5/driver.h                     | 1 +
 2 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index 06019d00ab7b..5abfec1c3399 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -926,12 +926,16 @@ static int mlx5_nic_vport_update_roce_state(struct mlx5_core_dev *mdev,
 
 int mlx5_nic_vport_enable_roce(struct mlx5_core_dev *mdev)
 {
+	if (atomic_inc_return(&mdev->roce.roce_en) != 1)
+		return 0;
 	return mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_ENABLED);
 }
 EXPORT_SYMBOL_GPL(mlx5_nic_vport_enable_roce);
 
 int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev)
 {
+	if (atomic_dec_return(&mdev->roce.roce_en) != 0)
+		return 0;
 	return mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_DISABLED);
 }
 EXPORT_SYMBOL_GPL(mlx5_nic_vport_disable_roce);
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 08e99bd2cd77..32b0835d4491 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -777,6 +777,7 @@ struct mlx5_core_dev {
 	struct mlx5e_resources  mlx5e_res;
 	struct {
 		struct mlx5_rsvd_gids	reserved_gids;
+		atomic_t                roce_en;
 	} roce;
 #ifdef CONFIG_MLX5_FPGA
 	struct mlx5_fpga_device *fpga;
-- 
cgit v1.2.3


From 6062118d5cd2b90369278cdf831aeffb84ae3943 Mon Sep 17 00:00:00 2001
From: Ilan Tayari <ilant@mellanox.com>
Date: Mon, 27 Mar 2017 14:52:09 +0300
Subject: net/mlx5: FPGA, Add FW commands for FPGA QPs

The FPGA QP is a high-bandwidth communication channel between the host
CPU and the FPGA device. It allows performing DMA operations between
host memory and the FPGA logic via the ConnectX chip.

Add ConnectX FW commands which create and manipulate FPGA QPs.

Signed-off-by: Ilan Tayari <ilant@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c      |  10 ++
 drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c |  98 ++++++++++
 drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h |  21 +++
 include/linux/mlx5/mlx5_ifc.h                      |   5 +
 include/linux/mlx5/mlx5_ifc_fpga.h                 | 199 +++++++++++++++++++++
 5 files changed, 333 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 4d5bd01f1ebb..f5a2c605749f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -307,6 +307,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
 	case MLX5_CMD_OP_SET_FLOW_TABLE_ROOT:
 	case MLX5_CMD_OP_DEALLOC_ENCAP_HEADER:
 	case MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT:
+	case MLX5_CMD_OP_FPGA_DESTROY_QP:
 		return MLX5_CMD_STAT_OK;
 
 	case MLX5_CMD_OP_QUERY_HCA_CAP:
@@ -419,6 +420,10 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
 	case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
 	case MLX5_CMD_OP_ALLOC_ENCAP_HEADER:
 	case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT:
+	case MLX5_CMD_OP_FPGA_CREATE_QP:
+	case MLX5_CMD_OP_FPGA_MODIFY_QP:
+	case MLX5_CMD_OP_FPGA_QUERY_QP:
+	case MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS:
 		*status = MLX5_DRIVER_STATUS_ABORTED;
 		*synd = MLX5_DRIVER_SYND;
 		return -EIO;
@@ -585,6 +590,11 @@ const char *mlx5_command_str(int command)
 	MLX5_COMMAND_STR_CASE(DEALLOC_ENCAP_HEADER);
 	MLX5_COMMAND_STR_CASE(ALLOC_MODIFY_HEADER_CONTEXT);
 	MLX5_COMMAND_STR_CASE(DEALLOC_MODIFY_HEADER_CONTEXT);
+	MLX5_COMMAND_STR_CASE(FPGA_CREATE_QP);
+	MLX5_COMMAND_STR_CASE(FPGA_MODIFY_QP);
+	MLX5_COMMAND_STR_CASE(FPGA_QUERY_QP);
+	MLX5_COMMAND_STR_CASE(FPGA_QUERY_QP_COUNTERS);
+	MLX5_COMMAND_STR_CASE(FPGA_DESTROY_QP);
 	default: return "unknown command opcode";
 	}
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c
index 99cba644b4fc..8308ccbad85a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c
@@ -33,6 +33,7 @@
 #include <linux/etherdevice.h>
 #include <linux/mlx5/cmd.h>
 #include <linux/mlx5/driver.h>
+#include <linux/mlx5/device.h>
 
 #include "mlx5_core.h"
 #include "fpga/cmd.h"
@@ -62,3 +63,100 @@ int mlx5_fpga_query(struct mlx5_core_dev *dev, struct mlx5_fpga_query *query)
 	query->oper_image = MLX5_GET(fpga_ctrl, out, flash_select_oper);
 	return 0;
 }
+
+int mlx5_fpga_create_qp(struct mlx5_core_dev *dev, void *fpga_qpc,
+			u32 *fpga_qpn)
+{
+	u32 in[MLX5_ST_SZ_DW(fpga_create_qp_in)] = {0};
+	u32 out[MLX5_ST_SZ_DW(fpga_create_qp_out)];
+	int ret;
+
+	MLX5_SET(fpga_create_qp_in, in, opcode, MLX5_CMD_OP_FPGA_CREATE_QP);
+	memcpy(MLX5_ADDR_OF(fpga_create_qp_in, in, fpga_qpc), fpga_qpc,
+	       MLX5_FLD_SZ_BYTES(fpga_create_qp_in, fpga_qpc));
+
+	ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	if (ret)
+		return ret;
+
+	memcpy(fpga_qpc, MLX5_ADDR_OF(fpga_create_qp_out, out, fpga_qpc),
+	       MLX5_FLD_SZ_BYTES(fpga_create_qp_out, fpga_qpc));
+	*fpga_qpn = MLX5_GET(fpga_create_qp_out, out, fpga_qpn);
+	return ret;
+}
+
+int mlx5_fpga_modify_qp(struct mlx5_core_dev *dev, u32 fpga_qpn,
+			enum mlx5_fpga_qpc_field_select fields,
+			void *fpga_qpc)
+{
+	u32 in[MLX5_ST_SZ_DW(fpga_modify_qp_in)] = {0};
+	u32 out[MLX5_ST_SZ_DW(fpga_modify_qp_out)];
+
+	MLX5_SET(fpga_modify_qp_in, in, opcode, MLX5_CMD_OP_FPGA_MODIFY_QP);
+	MLX5_SET(fpga_modify_qp_in, in, field_select, fields);
+	MLX5_SET(fpga_modify_qp_in, in, fpga_qpn, fpga_qpn);
+	memcpy(MLX5_ADDR_OF(fpga_modify_qp_in, in, fpga_qpc), fpga_qpc,
+	       MLX5_FLD_SZ_BYTES(fpga_modify_qp_in, fpga_qpc));
+
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_fpga_query_qp(struct mlx5_core_dev *dev,
+		       u32 fpga_qpn, void *fpga_qpc)
+{
+	u32 in[MLX5_ST_SZ_DW(fpga_query_qp_in)] = {0};
+	u32 out[MLX5_ST_SZ_DW(fpga_query_qp_out)];
+	int ret;
+
+	MLX5_SET(fpga_query_qp_in, in, opcode, MLX5_CMD_OP_FPGA_QUERY_QP);
+	MLX5_SET(fpga_query_qp_in, in, fpga_qpn, fpga_qpn);
+
+	ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	if (ret)
+		return ret;
+
+	memcpy(fpga_qpc, MLX5_ADDR_OF(fpga_query_qp_out, in, fpga_qpc),
+	       MLX5_FLD_SZ_BYTES(fpga_query_qp_out, fpga_qpc));
+	return ret;
+}
+
+int mlx5_fpga_destroy_qp(struct mlx5_core_dev *dev, u32 fpga_qpn)
+{
+	u32 in[MLX5_ST_SZ_DW(fpga_destroy_qp_in)] = {0};
+	u32 out[MLX5_ST_SZ_DW(fpga_destroy_qp_out)];
+
+	MLX5_SET(fpga_destroy_qp_in, in, opcode, MLX5_CMD_OP_FPGA_DESTROY_QP);
+	MLX5_SET(fpga_destroy_qp_in, in, fpga_qpn, fpga_qpn);
+
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_fpga_query_qp_counters(struct mlx5_core_dev *dev, u32 fpga_qpn,
+				bool clear, struct mlx5_fpga_qp_counters *data)
+{
+	u32 in[MLX5_ST_SZ_DW(fpga_query_qp_counters_in)] = {0};
+	u32 out[MLX5_ST_SZ_DW(fpga_query_qp_counters_out)];
+	int ret;
+
+	MLX5_SET(fpga_query_qp_counters_in, in, opcode,
+		 MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS);
+	MLX5_SET(fpga_query_qp_counters_in, in, clear, clear);
+	MLX5_SET(fpga_query_qp_counters_in, in, fpga_qpn, fpga_qpn);
+
+	ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	if (ret)
+		return ret;
+
+	data->rx_ack_packets = MLX5_GET64(fpga_query_qp_counters_out, out,
+					  rx_ack_packets);
+	data->rx_send_packets = MLX5_GET64(fpga_query_qp_counters_out, out,
+					   rx_send_packets);
+	data->tx_ack_packets = MLX5_GET64(fpga_query_qp_counters_out, out,
+					  tx_ack_packets);
+	data->tx_send_packets = MLX5_GET64(fpga_query_qp_counters_out, out,
+					   tx_send_packets);
+	data->rx_total_drop = MLX5_GET64(fpga_query_qp_counters_out, out,
+					 rx_total_drop);
+
+	return ret;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h
index a74396a61bc3..b851580d846f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h
@@ -53,7 +53,28 @@ struct mlx5_fpga_query {
 	enum mlx5_fpga_status status;
 };
 
+enum mlx5_fpga_qpc_field_select {
+	MLX5_FPGA_QPC_STATE = BIT(0),
+};
+
+struct mlx5_fpga_qp_counters {
+	u64 rx_ack_packets;
+	u64 rx_send_packets;
+	u64 tx_ack_packets;
+	u64 tx_send_packets;
+	u64 rx_total_drop;
+};
+
 int mlx5_fpga_caps(struct mlx5_core_dev *dev, u32 *caps);
 int mlx5_fpga_query(struct mlx5_core_dev *dev, struct mlx5_fpga_query *query);
 
+int mlx5_fpga_create_qp(struct mlx5_core_dev *dev, void *fpga_qpc,
+			u32 *fpga_qpn);
+int mlx5_fpga_modify_qp(struct mlx5_core_dev *dev, u32 fpga_qpn,
+			enum mlx5_fpga_qpc_field_select fields, void *fpga_qpc);
+int mlx5_fpga_query_qp(struct mlx5_core_dev *dev, u32 fpga_qpn, void *fpga_qpc);
+int mlx5_fpga_query_qp_counters(struct mlx5_core_dev *dev, u32 fpga_qpn,
+				bool clear, struct mlx5_fpga_qp_counters *data);
+int mlx5_fpga_destroy_qp(struct mlx5_core_dev *dev, u32 fpga_qpn);
+
 #endif /* __MLX5_FPGA_H__ */
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index d6b99d5d0f24..a8b3fcaa33ff 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -232,6 +232,11 @@ enum {
 	MLX5_CMD_OP_DEALLOC_ENCAP_HEADER          = 0x93e,
 	MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT   = 0x940,
 	MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT = 0x941,
+	MLX5_CMD_OP_FPGA_CREATE_QP                = 0x960,
+	MLX5_CMD_OP_FPGA_MODIFY_QP                = 0x961,
+	MLX5_CMD_OP_FPGA_QUERY_QP                 = 0x962,
+	MLX5_CMD_OP_FPGA_DESTROY_QP               = 0x963,
+	MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS        = 0x964,
 	MLX5_CMD_OP_MAX
 };
 
diff --git a/include/linux/mlx5/mlx5_ifc_fpga.h b/include/linux/mlx5/mlx5_ifc_fpga.h
index 0032d10ac6cf..30d4b697fab6 100644
--- a/include/linux/mlx5/mlx5_ifc_fpga.h
+++ b/include/linux/mlx5/mlx5_ifc_fpga.h
@@ -141,4 +141,203 @@ struct mlx5_ifc_fpga_error_event_bits {
 	u8         reserved_at_60[0x80];
 };
 
+enum mlx5_ifc_fpga_qp_state {
+	MLX5_FPGA_QPC_STATE_INIT    = 0x0,
+	MLX5_FPGA_QPC_STATE_ACTIVE  = 0x1,
+	MLX5_FPGA_QPC_STATE_ERROR   = 0x2,
+};
+
+enum mlx5_ifc_fpga_qp_type {
+	MLX5_FPGA_QPC_QP_TYPE_SHELL_QP    = 0x0,
+	MLX5_FPGA_QPC_QP_TYPE_SANDBOX_QP  = 0x1,
+};
+
+enum mlx5_ifc_fpga_qp_service_type {
+	MLX5_FPGA_QPC_ST_RC  = 0x0,
+};
+
+struct mlx5_ifc_fpga_qpc_bits {
+	u8         state[0x4];
+	u8         reserved_at_4[0x1b];
+	u8         qp_type[0x1];
+
+	u8         reserved_at_20[0x4];
+	u8         st[0x4];
+	u8         reserved_at_28[0x10];
+	u8         traffic_class[0x8];
+
+	u8         ether_type[0x10];
+	u8         prio[0x3];
+	u8         dei[0x1];
+	u8         vid[0xc];
+
+	u8         reserved_at_60[0x20];
+
+	u8         reserved_at_80[0x8];
+	u8         next_rcv_psn[0x18];
+
+	u8         reserved_at_a0[0x8];
+	u8         next_send_psn[0x18];
+
+	u8         reserved_at_c0[0x10];
+	u8         pkey[0x10];
+
+	u8         reserved_at_e0[0x8];
+	u8         remote_qpn[0x18];
+
+	u8         reserved_at_100[0x15];
+	u8         rnr_retry[0x3];
+	u8         reserved_at_118[0x5];
+	u8         retry_count[0x3];
+
+	u8         reserved_at_120[0x20];
+
+	u8         reserved_at_140[0x10];
+	u8         remote_mac_47_32[0x10];
+
+	u8         remote_mac_31_0[0x20];
+
+	u8         remote_ip[16][0x8];
+
+	u8         reserved_at_200[0x40];
+
+	u8         reserved_at_240[0x10];
+	u8         fpga_mac_47_32[0x10];
+
+	u8         fpga_mac_31_0[0x20];
+
+	u8         fpga_ip[16][0x8];
+};
+
+struct mlx5_ifc_fpga_create_qp_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_at_10[0x10];
+
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_at_40[0x40];
+
+	struct mlx5_ifc_fpga_qpc_bits fpga_qpc;
+};
+
+struct mlx5_ifc_fpga_create_qp_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_at_40[0x8];
+	u8         fpga_qpn[0x18];
+
+	u8         reserved_at_60[0x20];
+
+	struct mlx5_ifc_fpga_qpc_bits fpga_qpc;
+};
+
+struct mlx5_ifc_fpga_modify_qp_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_at_10[0x10];
+
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_at_40[0x8];
+	u8         fpga_qpn[0x18];
+
+	u8         field_select[0x20];
+
+	struct mlx5_ifc_fpga_qpc_bits fpga_qpc;
+};
+
+struct mlx5_ifc_fpga_modify_qp_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_fpga_query_qp_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_at_10[0x10];
+
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_at_40[0x8];
+	u8         fpga_qpn[0x18];
+
+	u8         reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_fpga_query_qp_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_at_40[0x40];
+
+	struct mlx5_ifc_fpga_qpc_bits fpga_qpc;
+};
+
+struct mlx5_ifc_fpga_query_qp_counters_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_at_10[0x10];
+
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+
+	u8         clear[0x1];
+	u8         reserved_at_41[0x7];
+	u8         fpga_qpn[0x18];
+
+	u8         reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_fpga_query_qp_counters_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_at_40[0x40];
+
+	u8         rx_ack_packets[0x40];
+
+	u8         rx_send_packets[0x40];
+
+	u8         tx_ack_packets[0x40];
+
+	u8         tx_send_packets[0x40];
+
+	u8         rx_total_drop[0x40];
+
+	u8         reserved_at_1c0[0x1c0];
+};
+
+struct mlx5_ifc_fpga_destroy_qp_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_at_10[0x10];
+
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_at_40[0x8];
+	u8         fpga_qpn[0x18];
+
+	u8         reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_fpga_destroy_qp_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_at_40[0x40];
+};
+
 #endif /* MLX5_IFC_FPGA_H */
-- 
cgit v1.2.3


From c43051d72a8dc4a00d49db27292a76d26e8df7af Mon Sep 17 00:00:00 2001
From: Ilan Tayari <ilant@mellanox.com>
Date: Tue, 18 Apr 2017 12:54:27 +0300
Subject: net/mlx5: FPGA, Add SBU bypass and reset flows

The Innova FPGA includes shell hardware and Sandbox-Unit (SBU) hardware.
The shell hardware is handled by mlx5_core itself, while the SBU is
handled by a client driver.

Reset the SBU to a well-known initial state when initializing a new
device, and set the FPGA to bypass mode when uninitializing a device.
This allows the client driver to assume that its device has been
reset when a new device is detected.

During SBU reset, the FPGA is put into SBU-bypass mode. In this mode
packets do not pass through the SBU, so it cannot affect the network
data stream at all.

A factory-image does not have an SBU, so skip these flows.

Signed-off-by: Ilan Tayari <ilant@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c | 11 ++++++
 drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h |  1 +
 .../net/ethernet/mellanox/mlx5/core/fpga/core.c    | 40 ++++++++++++++++++++++
 include/linux/mlx5/mlx5_ifc_fpga.h                 |  9 +++++
 4 files changed, 61 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c
index 8308ccbad85a..a5fdb4cf0b9c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c
@@ -47,6 +47,17 @@ int mlx5_fpga_caps(struct mlx5_core_dev *dev, u32 *caps)
 				    MLX5_REG_FPGA_CAP, 0, 0);
 }
 
+int mlx5_fpga_ctrl_op(struct mlx5_core_dev *dev, u8 op)
+{
+	u32 in[MLX5_ST_SZ_DW(fpga_ctrl)] = {0};
+	u32 out[MLX5_ST_SZ_DW(fpga_ctrl)];
+
+	MLX5_SET(fpga_ctrl, in, operation, op);
+
+	return mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
+				    MLX5_REG_FPGA_CTRL, 0, true);
+}
+
 int mlx5_fpga_query(struct mlx5_core_dev *dev, struct mlx5_fpga_query *query)
 {
 	u32 in[MLX5_ST_SZ_DW(fpga_ctrl)] = {0};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h
index b851580d846f..8943056163f3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h
@@ -67,6 +67,7 @@ struct mlx5_fpga_qp_counters {
 
 int mlx5_fpga_caps(struct mlx5_core_dev *dev, u32 *caps);
 int mlx5_fpga_query(struct mlx5_core_dev *dev, struct mlx5_fpga_query *query);
+int mlx5_fpga_ctrl_op(struct mlx5_core_dev *dev, u8 op);
 
 int mlx5_fpga_create_qp(struct mlx5_core_dev *dev, void *fpga_qpc,
 			u32 *fpga_qpn);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
index 7f859a3ad5d2..31e5a2627eb8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
@@ -102,6 +102,29 @@ static int mlx5_fpga_device_load_check(struct mlx5_fpga_device *fdev)
 	return 0;
 }
 
+int mlx5_fpga_device_brb(struct mlx5_fpga_device *fdev)
+{
+	int err;
+	struct mlx5_core_dev *mdev = fdev->mdev;
+
+	err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON);
+	if (err) {
+		mlx5_fpga_err(fdev, "Failed to set bypass on: %d\n", err);
+		return err;
+	}
+	err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_RESET_SANDBOX);
+	if (err) {
+		mlx5_fpga_err(fdev, "Failed to reset SBU: %d\n", err);
+		return err;
+	}
+	err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_OFF);
+	if (err) {
+		mlx5_fpga_err(fdev, "Failed to set bypass off: %d\n", err);
+		return err;
+	}
+	return 0;
+}
+
 int mlx5_fpga_device_start(struct mlx5_core_dev *mdev)
 {
 	struct mlx5_fpga_device *fdev = mdev->fpga;
@@ -135,8 +158,17 @@ int mlx5_fpga_device_start(struct mlx5_core_dev *mdev)
 	if (err)
 		goto err_rsvd_gid;
 
+	if (fdev->last_oper_image == MLX5_FPGA_IMAGE_USER) {
+		err = mlx5_fpga_device_brb(fdev);
+		if (err)
+			goto err_conn_init;
+	}
+
 	goto out;
 
+err_conn_init:
+	mlx5_fpga_conn_device_cleanup(fdev);
+
 err_rsvd_gid:
 	mlx5_core_unreserve_gids(mdev, max_num_qps);
 out:
@@ -172,6 +204,7 @@ void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev)
 	struct mlx5_fpga_device *fdev = mdev->fpga;
 	unsigned int max_num_qps;
 	unsigned long flags;
+	int err;
 
 	if (!fdev)
 		return;
@@ -184,6 +217,13 @@ void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev)
 	fdev->state = MLX5_FPGA_STATUS_NONE;
 	spin_unlock_irqrestore(&fdev->state_lock, flags);
 
+	if (fdev->last_oper_image == MLX5_FPGA_IMAGE_USER) {
+		err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON);
+		if (err)
+			mlx5_fpga_err(fdev, "Failed to re-set SBU bypass on: %d\n",
+				      err);
+	}
+
 	mlx5_fpga_conn_device_cleanup(fdev);
 	max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps);
 	mlx5_core_unreserve_gids(mdev, max_num_qps);
diff --git a/include/linux/mlx5/mlx5_ifc_fpga.h b/include/linux/mlx5/mlx5_ifc_fpga.h
index 30d4b697fab6..0694077c9634 100644
--- a/include/linux/mlx5/mlx5_ifc_fpga.h
+++ b/include/linux/mlx5/mlx5_ifc_fpga.h
@@ -108,6 +108,15 @@ struct mlx5_ifc_fpga_cap_bits {
 	u8         reserved_at_500[0x300];
 };
 
+enum {
+	MLX5_FPGA_CTRL_OPERATION_LOAD                = 0x1,
+	MLX5_FPGA_CTRL_OPERATION_RESET               = 0x2,
+	MLX5_FPGA_CTRL_OPERATION_FLASH_SELECT        = 0x3,
+	MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON   = 0x4,
+	MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_OFF  = 0x5,
+	MLX5_FPGA_CTRL_OPERATION_RESET_SANDBOX       = 0x6,
+};
+
 struct mlx5_ifc_fpga_ctrl_bits {
 	u8         reserved_at_0[0x8];
 	u8         operation[0x8];
-- 
cgit v1.2.3


From a9956d35d199beb406727a4496bc5d7f09c82976 Mon Sep 17 00:00:00 2001
From: Ilan Tayari <ilant@mellanox.com>
Date: Tue, 18 Apr 2017 13:10:41 +0300
Subject: net/mlx5: FPGA, Add SBU infrastructure

Add interface to initialize and interact with Innova FPGA SBU
connections.
A client driver may use these functions to set up a high-speed DMA
connection with its SBU hardware logic, and send/receive messages
over this connection.

A later patch in this patchset will make use of these functions for
Innova IPSec offload in mlx5 Ethernet driver.

Add commands to retrieve Innova FPGA SBU capabilities, and to
read/write Innova FPGA configuration space registers and memory,
over internal I2C.

At high level, the FPGA configuration space is divided such:
 0x00000000 - 0x007fffff is reserved for the SBU
 0x00800000 - 0xffffffff is reserved for the Shell
0x400000000 - ...        is DDR memory

A later patchset will add support for accessing FPGA CrSpace and memory
over a high-speed connection. This is the reason for the ACCESS_TYPE
enumeration, which currently only supports I2C.

Signed-off-by: Ilan Tayari <ilant@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/Makefile   |   2 +-
 drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c |  65 ++++++++
 drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h |   3 +
 drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c | 164 +++++++++++++++++++++
 drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h |  98 ++++++++++++
 include/linux/mlx5/device.h                        |   3 +
 include/linux/mlx5/driver.h                        |   1 +
 include/linux/mlx5/mlx5_ifc.h                      |   1 +
 include/linux/mlx5/mlx5_ifc_fpga.h                 |  13 ++
 9 files changed, 349 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 5221b1235c47..676388fde239 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -6,7 +6,7 @@ mlx5_core-y :=	main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
 		mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \
 		fs_counters.o rl.o lag.o dev.o lib/gid.o
 
-mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o
+mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o
 
 mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o eswitch_offloads.o \
 		en_main.o en_common.o en_fs.o en_ethtool.o en_tx.o \
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c
index a5fdb4cf0b9c..5cb855fd618f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c
@@ -38,6 +38,39 @@
 #include "mlx5_core.h"
 #include "fpga/cmd.h"
 
+#define MLX5_FPGA_ACCESS_REG_SZ (MLX5_ST_SZ_DW(fpga_access_reg) + \
+				 MLX5_FPGA_ACCESS_REG_SIZE_MAX)
+
+int mlx5_fpga_access_reg(struct mlx5_core_dev *dev, u8 size, u64 addr,
+			 void *buf, bool write)
+{
+	u32 in[MLX5_FPGA_ACCESS_REG_SZ] = {0};
+	u32 out[MLX5_FPGA_ACCESS_REG_SZ];
+	int err;
+
+	if (size & 3)
+		return -EINVAL;
+	if (addr & 3)
+		return -EINVAL;
+	if (size > MLX5_FPGA_ACCESS_REG_SIZE_MAX)
+		return -EINVAL;
+
+	MLX5_SET(fpga_access_reg, in, size, size);
+	MLX5_SET64(fpga_access_reg, in, address, addr);
+	if (write)
+		memcpy(MLX5_ADDR_OF(fpga_access_reg, in, data), buf, size);
+
+	err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
+				   MLX5_REG_FPGA_ACCESS_REG, 0, write);
+	if (err)
+		return err;
+
+	if (!write)
+		memcpy(buf, MLX5_ADDR_OF(fpga_access_reg, out, data), size);
+
+	return 0;
+}
+
 int mlx5_fpga_caps(struct mlx5_core_dev *dev, u32 *caps)
 {
 	u32 in[MLX5_ST_SZ_DW(fpga_cap)] = {0};
@@ -58,6 +91,38 @@ int mlx5_fpga_ctrl_op(struct mlx5_core_dev *dev, u8 op)
 				    MLX5_REG_FPGA_CTRL, 0, true);
 }
 
+int mlx5_fpga_sbu_caps(struct mlx5_core_dev *dev, void *caps, int size)
+{
+	unsigned int cap_size = MLX5_CAP_FPGA(dev, sandbox_extended_caps_len);
+	u64 addr = MLX5_CAP64_FPGA(dev, sandbox_extended_caps_addr);
+	unsigned int read;
+	int ret = 0;
+
+	if (cap_size > size) {
+		mlx5_core_warn(dev, "Not enough buffer %u for FPGA SBU caps %u",
+			       size, cap_size);
+		return -EINVAL;
+	}
+
+	while (cap_size > 0) {
+		read = min_t(unsigned int, cap_size,
+			     MLX5_FPGA_ACCESS_REG_SIZE_MAX);
+
+		ret = mlx5_fpga_access_reg(dev, read, addr, caps, false);
+		if (ret) {
+			mlx5_core_warn(dev, "Error reading FPGA SBU caps %u bytes at address 0x%llx: %d",
+				       read, addr, ret);
+			return ret;
+		}
+
+		cap_size -= read;
+		addr += read;
+		caps += read;
+	}
+
+	return ret;
+}
+
 int mlx5_fpga_query(struct mlx5_core_dev *dev, struct mlx5_fpga_query *query)
 {
 	u32 in[MLX5_ST_SZ_DW(fpga_ctrl)] = {0};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h
index 8943056163f3..94bdfd47c3f0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h
@@ -68,6 +68,9 @@ struct mlx5_fpga_qp_counters {
 int mlx5_fpga_caps(struct mlx5_core_dev *dev, u32 *caps);
 int mlx5_fpga_query(struct mlx5_core_dev *dev, struct mlx5_fpga_query *query);
 int mlx5_fpga_ctrl_op(struct mlx5_core_dev *dev, u8 op);
+int mlx5_fpga_access_reg(struct mlx5_core_dev *dev, u8 size, u64 addr,
+			 void *buf, bool write);
+int mlx5_fpga_sbu_caps(struct mlx5_core_dev *dev, void *caps, int size);
 
 int mlx5_fpga_create_qp(struct mlx5_core_dev *dev, void *fpga_qpc,
 			u32 *fpga_qpn);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c
new file mode 100644
index 000000000000..3c11d6e2160a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c
@@ -0,0 +1,164 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/mlx5/device.h>
+
+#include "fpga/core.h"
+#include "fpga/conn.h"
+#include "fpga/sdk.h"
+
+struct mlx5_fpga_conn *
+mlx5_fpga_sbu_conn_create(struct mlx5_fpga_device *fdev,
+			  struct mlx5_fpga_conn_attr *attr)
+{
+	return mlx5_fpga_conn_create(fdev, attr, MLX5_FPGA_QPC_QP_TYPE_SANDBOX_QP);
+}
+EXPORT_SYMBOL(mlx5_fpga_sbu_conn_create);
+
+void mlx5_fpga_sbu_conn_destroy(struct mlx5_fpga_conn *conn)
+{
+	mlx5_fpga_conn_destroy(conn);
+}
+EXPORT_SYMBOL(mlx5_fpga_sbu_conn_destroy);
+
+int mlx5_fpga_sbu_conn_sendmsg(struct mlx5_fpga_conn *conn,
+			       struct mlx5_fpga_dma_buf *buf)
+{
+	return mlx5_fpga_conn_send(conn, buf);
+}
+EXPORT_SYMBOL(mlx5_fpga_sbu_conn_sendmsg);
+
+static int mlx5_fpga_mem_read_i2c(struct mlx5_fpga_device *fdev, size_t size,
+				  u64 addr, u8 *buf)
+{
+	size_t max_size = MLX5_FPGA_ACCESS_REG_SIZE_MAX;
+	size_t bytes_done = 0;
+	u8 actual_size;
+	int err;
+
+	if (!fdev->mdev)
+		return -ENOTCONN;
+
+	while (bytes_done < size) {
+		actual_size = min(max_size, (size - bytes_done));
+
+		err = mlx5_fpga_access_reg(fdev->mdev, actual_size,
+					   addr + bytes_done,
+					   buf + bytes_done, false);
+		if (err) {
+			mlx5_fpga_err(fdev, "Failed to read over I2C: %d\n",
+				      err);
+			break;
+		}
+
+		bytes_done += actual_size;
+	}
+
+	return err;
+}
+
+static int mlx5_fpga_mem_write_i2c(struct mlx5_fpga_device *fdev, size_t size,
+				   u64 addr, u8 *buf)
+{
+	size_t max_size = MLX5_FPGA_ACCESS_REG_SIZE_MAX;
+	size_t bytes_done = 0;
+	u8 actual_size;
+	int err;
+
+	if (!fdev->mdev)
+		return -ENOTCONN;
+
+	while (bytes_done < size) {
+		actual_size = min(max_size, (size - bytes_done));
+
+		err = mlx5_fpga_access_reg(fdev->mdev, actual_size,
+					   addr + bytes_done,
+					   buf + bytes_done, true);
+		if (err) {
+			mlx5_fpga_err(fdev, "Failed to write FPGA crspace\n");
+			break;
+		}
+
+		bytes_done += actual_size;
+	}
+
+	return err;
+}
+
+int mlx5_fpga_mem_read(struct mlx5_fpga_device *fdev, size_t size, u64 addr,
+		       void *buf, enum mlx5_fpga_access_type access_type)
+{
+	int ret;
+
+	switch (access_type) {
+	case MLX5_FPGA_ACCESS_TYPE_I2C:
+		ret = mlx5_fpga_mem_read_i2c(fdev, size, addr, buf);
+		if (ret)
+			return ret;
+		break;
+	default:
+		mlx5_fpga_warn(fdev, "Unexpected read access_type %u\n",
+			       access_type);
+		return -EACCES;
+	}
+
+	return size;
+}
+EXPORT_SYMBOL(mlx5_fpga_mem_read);
+
+int mlx5_fpga_mem_write(struct mlx5_fpga_device *fdev, size_t size, u64 addr,
+			void *buf, enum mlx5_fpga_access_type access_type)
+{
+	int ret;
+
+	switch (access_type) {
+	case MLX5_FPGA_ACCESS_TYPE_I2C:
+		ret = mlx5_fpga_mem_write_i2c(fdev, size, addr, buf);
+		if (ret)
+			return ret;
+		break;
+	default:
+		mlx5_fpga_warn(fdev, "Unexpected write access_type %u\n",
+			       access_type);
+		return -EACCES;
+	}
+
+	return size;
+}
+EXPORT_SYMBOL(mlx5_fpga_mem_write);
+
+int mlx5_fpga_get_sbu_caps(struct mlx5_fpga_device *fdev, int size, void *buf)
+{
+	return mlx5_fpga_sbu_caps(fdev->mdev, buf, size);
+}
+EXPORT_SYMBOL(mlx5_fpga_get_sbu_caps);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h
index 331798d99a37..baa537e54a49 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h
@@ -42,6 +42,11 @@
  * This header defines the in-kernel API for Innova FPGA client drivers.
  */
 
+enum mlx5_fpga_access_type {
+	MLX5_FPGA_ACCESS_TYPE_I2C = 0x0,
+	MLX5_FPGA_ACCESS_TYPE_DONTCARE = 0x0,
+};
+
 struct mlx5_fpga_conn;
 struct mlx5_fpga_device;
 
@@ -103,4 +108,97 @@ struct mlx5_fpga_conn_attr {
 	void *cb_arg;
 };
 
+/**
+ * mlx5_fpga_sbu_conn_create() - Initialize a new FPGA SBU connection
+ * @fdev: The FPGA device
+ * @attr: Attributes of the new connection
+ *
+ * Sets up a new FPGA SBU connection with the specified attributes.
+ * The receive callback function may be called for incoming messages even
+ * before this function returns.
+ *
+ * The caller must eventually destroy the connection by calling
+ * mlx5_fpga_sbu_conn_destroy.
+ *
+ * Return: A new connection, or ERR_PTR() error value otherwise.
+ */
+struct mlx5_fpga_conn *
+mlx5_fpga_sbu_conn_create(struct mlx5_fpga_device *fdev,
+			  struct mlx5_fpga_conn_attr *attr);
+
+/**
+ * mlx5_fpga_sbu_conn_destroy() - Destroy an FPGA SBU connection
+ * @conn: The FPGA SBU connection to destroy
+ *
+ * Cleans up an FPGA SBU connection which was previously created with
+ * mlx5_fpga_sbu_conn_create.
+ */
+void mlx5_fpga_sbu_conn_destroy(struct mlx5_fpga_conn *conn);
+
+/**
+ * mlx5_fpga_sbu_conn_sendmsg() - Queue the transmission of a packet
+ * @fdev: An FPGA SBU connection
+ * @buf: The packet buffer
+ *
+ * Queues a packet for transmission over an FPGA SBU connection.
+ * The buffer should not be modified or freed until completion.
+ * Upon completion, the buf's complete() callback is invoked, indicating the
+ * success or error status of the transmission.
+ *
+ * Return: 0 if successful, or an error value otherwise.
+ */
+int mlx5_fpga_sbu_conn_sendmsg(struct mlx5_fpga_conn *conn,
+			       struct mlx5_fpga_dma_buf *buf);
+
+/**
+ * mlx5_fpga_mem_read() - Read from FPGA memory address space
+ * @fdev: The FPGA device
+ * @size: Size of chunk to read, in bytes
+ * @addr: Starting address to read from, in FPGA address space
+ * @buf: Buffer to read into
+ * @access_type: Method for reading
+ *
+ * Reads from the specified address into the specified buffer.
+ * The address may point to configuration space or to DDR.
+ * Large reads may be performed internally as several non-atomic operations.
+ * This function may sleep, so should not be called from atomic contexts.
+ *
+ * Return: 0 if successful, or an error value otherwise.
+ */
+int mlx5_fpga_mem_read(struct mlx5_fpga_device *fdev, size_t size, u64 addr,
+		       void *buf, enum mlx5_fpga_access_type access_type);
+
+/**
+ * mlx5_fpga_mem_write() - Write to FPGA memory address space
+ * @fdev: The FPGA device
+ * @size: Size of chunk to write, in bytes
+ * @addr: Starting address to write to, in FPGA address space
+ * @buf: Buffer which contains data to write
+ * @access_type: Method for writing
+ *
+ * Writes the specified buffer data to FPGA memory at the specified address.
+ * The address may point to configuration space or to DDR.
+ * Large writes may be performed internally as several non-atomic operations.
+ * This function may sleep, so should not be called from atomic contexts.
+ *
+ * Return: 0 if successful, or an error value otherwise.
+ */
+int mlx5_fpga_mem_write(struct mlx5_fpga_device *fdev, size_t size, u64 addr,
+			void *buf, enum mlx5_fpga_access_type access_type);
+
+/**
+ * mlx5_fpga_get_sbu_caps() - Read the SBU capabilities
+ * @fdev: The FPGA device
+ * @size: Size of the buffer to read into
+ * @buf: Buffer to read the capabilities into
+ *
+ * Reads the FPGA SBU capabilities into the specified buffer.
+ * The format of the capabilities buffer is SBU-dependent.
+ *
+ * Return: 0 if successful
+ *         -EINVAL if the buffer is not large enough to contain SBU caps
+ *         or any other error value otherwise.
+ */
+int mlx5_fpga_get_sbu_caps(struct mlx5_fpga_device *fdev, int size, void *buf);
+
 #endif /* MLX5_FPGA_SDK_H */
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 556e1c31b5d0..f31a0b5377e1 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1103,6 +1103,9 @@ enum mlx5_mcam_feature_groups {
 #define MLX5_CAP_FPGA(mdev, cap) \
 	MLX5_GET(fpga_cap, (mdev)->caps.hca_cur[MLX5_CAP_FPGA], cap)
 
+#define MLX5_CAP64_FPGA(mdev, cap) \
+	MLX5_GET64(fpga_cap, (mdev)->caps.hca_cur[MLX5_CAP_FPGA], cap)
+
 enum {
 	MLX5_CMD_STAT_OK			= 0x0,
 	MLX5_CMD_STAT_INT_ERR			= 0x1,
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 32b0835d4491..2ab4ae3e3a1a 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -111,6 +111,7 @@ enum {
 	MLX5_REG_DCBX_APP        = 0x4021,
 	MLX5_REG_FPGA_CAP	 = 0x4022,
 	MLX5_REG_FPGA_CTRL	 = 0x4023,
+	MLX5_REG_FPGA_ACCESS_REG = 0x4024,
 	MLX5_REG_PCAP		 = 0x5001,
 	MLX5_REG_PMTU		 = 0x5003,
 	MLX5_REG_PTYS		 = 0x5004,
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index a8b3fcaa33ff..c72f9735119d 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -8309,6 +8309,7 @@ union mlx5_ifc_ports_control_registers_document_bits {
 	struct mlx5_ifc_sltp_reg_bits sltp_reg;
 	struct mlx5_ifc_mtpps_reg_bits mtpps_reg;
 	struct mlx5_ifc_mtppse_reg_bits mtppse_reg;
+	struct mlx5_ifc_fpga_access_reg_bits fpga_access_reg;
 	struct mlx5_ifc_fpga_ctrl_bits fpga_ctrl_bits;
 	struct mlx5_ifc_fpga_cap_bits fpga_cap_bits;
 	struct mlx5_ifc_mcqi_reg_bits mcqi_reg;
diff --git a/include/linux/mlx5/mlx5_ifc_fpga.h b/include/linux/mlx5/mlx5_ifc_fpga.h
index 0694077c9634..a3576654179e 100644
--- a/include/linux/mlx5/mlx5_ifc_fpga.h
+++ b/include/linux/mlx5/mlx5_ifc_fpga.h
@@ -150,6 +150,19 @@ struct mlx5_ifc_fpga_error_event_bits {
 	u8         reserved_at_60[0x80];
 };
 
+#define MLX5_FPGA_ACCESS_REG_SIZE_MAX 64
+
+struct mlx5_ifc_fpga_access_reg_bits {
+	u8         reserved_at_0[0x20];
+
+	u8         reserved_at_20[0x10];
+	u8         size[0x10];
+
+	u8         address[0x40];
+
+	u8         data[0][0x8];
+};
+
 enum mlx5_ifc_fpga_qp_state {
 	MLX5_FPGA_QPC_STATE_INIT    = 0x0,
 	MLX5_FPGA_QPC_STATE_ACTIVE  = 0x1,
-- 
cgit v1.2.3


From bebb23e6cb02d2fc752905e39d09ff6152852c6c Mon Sep 17 00:00:00 2001
From: Ilan Tayari <ilant@mellanox.com>
Date: Tue, 25 Apr 2017 22:42:31 +0300
Subject: net/mlx5: Accel, Add IPSec acceleration interface

Add routines for manipulating the hardware IPSec SA database (SADB).

In Innova IPSec, a Security Association (SA) is added or deleted
via a command message over the SBU connection.
The HW then sends a response message over the same connection.

Add implementation for Innova IPSec (FPGA-based) hardware.

These routines will be used by the IPSec offload support in a later patch
However they may also be used by others such as RDMA and RoCE IPSec.

mlx5/accel is a middle acceleration layer to allow mlx5e and other ULPs
to work directly with mlx5_core rather than Innova FPGA or other mlx5
acceleration providers.

In this patchset we add Innova IPSec support and mlx5/accel delegates
IPSec offloads to Innova routines.

In the future, when IPSec/TLS or any other acceleration gets integrated
into ConnectX chip, mlx5/accel layer will provide the integrated
acceleration, rather than the Innova one.

Signed-off-by: Ilan Tayari <ilant@mellanox.com>
Signed-off-by: Boris Pismenny <borisp@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/Kconfig    |   4 +
 drivers/net/ethernet/mellanox/mlx5/core/Makefile   |   5 +-
 .../net/ethernet/mellanox/mlx5/core/accel/ipsec.c  |  78 +++++
 .../net/ethernet/mellanox/mlx5/core/accel/ipsec.h  | 138 ++++++++
 .../net/ethernet/mellanox/mlx5/core/fpga/core.h    |   2 +
 .../net/ethernet/mellanox/mlx5/core/fpga/ipsec.c   | 376 +++++++++++++++++++++
 .../net/ethernet/mellanox/mlx5/core/fpga/ipsec.h   |  94 ++++++
 drivers/net/ethernet/mellanox/mlx5/core/main.c     |   9 +
 include/linux/mlx5/mlx5_ifc_fpga.h                 |  67 ++++
 9 files changed, 772 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
index cf1ef48bfd8d..d6c6cea8ebab 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
@@ -11,9 +11,13 @@ config MLX5_CORE
 	  Core driver for low level functionality of the ConnectX-4 and
 	  Connect-IB cards by Mellanox Technologies.
 
+config MLX5_ACCEL
+	bool
+
 config MLX5_FPGA
         bool "Mellanox Technologies Innova support"
         depends on MLX5_CORE
+	select MLX5_ACCEL
         ---help---
           Build support for the Innova family of network cards by Mellanox
           Technologies. Innova network cards are comprised of a ConnectX chip
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 676388fde239..33557526f597 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -6,7 +6,10 @@ mlx5_core-y :=	main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
 		mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \
 		fs_counters.o rl.o lag.o dev.o lib/gid.o
 
-mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o
+mlx5_core-$(CONFIG_MLX5_ACCEL) += accel/ipsec.o
+
+mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o \
+		fpga/ipsec.o
 
 mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o eswitch_offloads.o \
 		en_main.o en_common.o en_fs.o en_ethtool.o en_tx.o \
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c
new file mode 100644
index 000000000000..53e69edaedde
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/mlx5/device.h>
+
+#include "accel/ipsec.h"
+#include "mlx5_core.h"
+#include "fpga/ipsec.h"
+
+void *mlx5_accel_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev,
+				   struct mlx5_accel_ipsec_sa *cmd)
+{
+	if (!MLX5_IPSEC_DEV(mdev))
+		return ERR_PTR(-EOPNOTSUPP);
+
+	return mlx5_fpga_ipsec_sa_cmd_exec(mdev, cmd);
+}
+
+int mlx5_accel_ipsec_sa_cmd_wait(void *ctx)
+{
+	return mlx5_fpga_ipsec_sa_cmd_wait(ctx);
+}
+
+u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev)
+{
+	return mlx5_fpga_ipsec_device_caps(mdev);
+}
+
+unsigned int mlx5_accel_ipsec_counters_count(struct mlx5_core_dev *mdev)
+{
+	return mlx5_fpga_ipsec_counters_count(mdev);
+}
+
+int mlx5_accel_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
+				   unsigned int count)
+{
+	return mlx5_fpga_ipsec_counters_read(mdev, counters, count);
+}
+
+int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev)
+{
+	return mlx5_fpga_ipsec_init(mdev);
+}
+
+void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev)
+{
+	mlx5_fpga_ipsec_cleanup(mdev);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h
new file mode 100644
index 000000000000..d6e20fea9554
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __MLX5_ACCEL_IPSEC_H__
+#define __MLX5_ACCEL_IPSEC_H__
+
+#ifdef CONFIG_MLX5_ACCEL
+
+#include <linux/mlx5/driver.h>
+
+enum {
+	MLX5_ACCEL_IPSEC_DEVICE = BIT(1),
+	MLX5_ACCEL_IPSEC_IPV6 = BIT(2),
+	MLX5_ACCEL_IPSEC_ESP = BIT(3),
+	MLX5_ACCEL_IPSEC_LSO = BIT(4),
+};
+
+#define MLX5_IPSEC_SADB_IP_AH       BIT(7)
+#define MLX5_IPSEC_SADB_IP_ESP      BIT(6)
+#define MLX5_IPSEC_SADB_SA_VALID    BIT(5)
+#define MLX5_IPSEC_SADB_SPI_EN      BIT(4)
+#define MLX5_IPSEC_SADB_DIR_SX      BIT(3)
+#define MLX5_IPSEC_SADB_IPV6        BIT(2)
+
+enum {
+	MLX5_IPSEC_CMD_ADD_SA = 0,
+	MLX5_IPSEC_CMD_DEL_SA = 1,
+};
+
+enum mlx5_accel_ipsec_enc_mode {
+	MLX5_IPSEC_SADB_MODE_NONE = 0,
+	MLX5_IPSEC_SADB_MODE_AES_GCM_128_AUTH_128 = 1,
+	MLX5_IPSEC_SADB_MODE_AES_GCM_256_AUTH_128 = 3,
+};
+
+#define MLX5_IPSEC_DEV(mdev) (mlx5_accel_ipsec_device_caps(mdev) & \
+			      MLX5_ACCEL_IPSEC_DEVICE)
+
+struct mlx5_accel_ipsec_sa {
+	__be32 cmd;
+	u8 key_enc[32];
+	u8 key_auth[32];
+	__be32 sip[4];
+	__be32 dip[4];
+	union {
+		struct {
+			__be32 reserved;
+			u8 salt_iv[8];
+			__be32 salt;
+		} __packed gcm;
+		struct {
+			u8 salt[16];
+		} __packed cbc;
+	};
+	__be32 spi;
+	__be32 sw_sa_handle;
+	__be16 tfclen;
+	u8 enc_mode;
+	u8 sip_masklen;
+	u8 dip_masklen;
+	u8 flags;
+	u8 reserved[2];
+} __packed;
+
+/**
+ * mlx5_accel_ipsec_sa_cmd_exec - Execute an IPSec SADB command
+ * @mdev: mlx5 device
+ * @cmd: command to execute
+ * May be called from atomic context. Returns context pointer, or error
+ * Caller must eventually call mlx5_accel_ipsec_sa_cmd_wait from non-atomic
+ * context, to cleanup the context pointer
+ */
+void *mlx5_accel_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev,
+				   struct mlx5_accel_ipsec_sa *cmd);
+
+/**
+ * mlx5_accel_ipsec_sa_cmd_wait - Wait for command execution completion
+ * @context: Context pointer returned from call to mlx5_accel_ipsec_sa_cmd_exec
+ * Sleeps (killable) until command execution is complete.
+ * Returns the command result, or -EINTR if killed
+ */
+int mlx5_accel_ipsec_sa_cmd_wait(void *context);
+
+u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev);
+
+unsigned int mlx5_accel_ipsec_counters_count(struct mlx5_core_dev *mdev);
+int mlx5_accel_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
+				   unsigned int count);
+
+int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev);
+void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev);
+
+#else
+
+#define MLX5_IPSEC_DEV(mdev) false
+
+static inline int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev)
+{
+	return 0;
+}
+
+static inline void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev)
+{
+}
+
+#endif
+
+#endif	/* __MLX5_ACCEL_IPSEC_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
index f64fa1cdc195..82405ed84725 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
@@ -51,6 +51,8 @@ struct mlx5_fpga_device {
 		struct mlx5_core_mkey mkey;
 		struct mlx5_uars_page *uar;
 	} conn_res;
+
+	struct mlx5_fpga_ipsec *ipsec;
 };
 
 #define mlx5_fpga_dbg(__adev, format, ...) \
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
new file mode 100644
index 000000000000..42970e2a05ff
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
@@ -0,0 +1,376 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/mlx5/driver.h>
+
+#include "mlx5_core.h"
+#include "fpga/ipsec.h"
+#include "fpga/sdk.h"
+#include "fpga/core.h"
+
+#define SBU_QP_QUEUE_SIZE 8
+
+enum mlx5_ipsec_response_syndrome {
+	MLX5_IPSEC_RESPONSE_SUCCESS = 0,
+	MLX5_IPSEC_RESPONSE_ILLEGAL_REQUEST = 1,
+	MLX5_IPSEC_RESPONSE_SADB_ISSUE = 2,
+	MLX5_IPSEC_RESPONSE_WRITE_RESPONSE_ISSUE = 3,
+};
+
+enum mlx5_fpga_ipsec_sacmd_status {
+	MLX5_FPGA_IPSEC_SACMD_PENDING,
+	MLX5_FPGA_IPSEC_SACMD_SEND_FAIL,
+	MLX5_FPGA_IPSEC_SACMD_COMPLETE,
+};
+
+struct mlx5_ipsec_command_context {
+	struct mlx5_fpga_dma_buf buf;
+	struct mlx5_accel_ipsec_sa sa;
+	enum mlx5_fpga_ipsec_sacmd_status status;
+	int status_code;
+	struct completion complete;
+	struct mlx5_fpga_device *dev;
+	struct list_head list; /* Item in pending_cmds */
+};
+
+struct mlx5_ipsec_sadb_resp {
+	__be32 syndrome;
+	__be32 sw_sa_handle;
+	u8 reserved[24];
+} __packed;
+
+struct mlx5_fpga_ipsec {
+	struct list_head pending_cmds;
+	spinlock_t pending_cmds_lock; /* Protects pending_cmds */
+	u32 caps[MLX5_ST_SZ_DW(ipsec_extended_cap)];
+	struct mlx5_fpga_conn *conn;
+};
+
+static bool mlx5_fpga_is_ipsec_device(struct mlx5_core_dev *mdev)
+{
+	if (!mdev->fpga || !MLX5_CAP_GEN(mdev, fpga))
+		return false;
+
+	if (MLX5_CAP_FPGA(mdev, ieee_vendor_id) !=
+	    MLX5_FPGA_CAP_SANDBOX_VENDOR_ID_MLNX)
+		return false;
+
+	if (MLX5_CAP_FPGA(mdev, sandbox_product_id) !=
+	    MLX5_FPGA_CAP_SANDBOX_PRODUCT_ID_IPSEC)
+		return false;
+
+	return true;
+}
+
+static void mlx5_fpga_ipsec_send_complete(struct mlx5_fpga_conn *conn,
+					  struct mlx5_fpga_device *fdev,
+					  struct mlx5_fpga_dma_buf *buf,
+					  u8 status)
+{
+	struct mlx5_ipsec_command_context *context;
+
+	if (status) {
+		context = container_of(buf, struct mlx5_ipsec_command_context,
+				       buf);
+		mlx5_fpga_warn(fdev, "IPSec command send failed with status %u\n",
+			       status);
+		context->status = MLX5_FPGA_IPSEC_SACMD_SEND_FAIL;
+		complete(&context->complete);
+	}
+}
+
+static inline int syndrome_to_errno(enum mlx5_ipsec_response_syndrome syndrome)
+{
+	switch (syndrome) {
+	case MLX5_IPSEC_RESPONSE_SUCCESS:
+		return 0;
+	case MLX5_IPSEC_RESPONSE_SADB_ISSUE:
+		return -EEXIST;
+	case MLX5_IPSEC_RESPONSE_ILLEGAL_REQUEST:
+		return -EINVAL;
+	case MLX5_IPSEC_RESPONSE_WRITE_RESPONSE_ISSUE:
+		return -EIO;
+	}
+	return -EIO;
+}
+
+static void mlx5_fpga_ipsec_recv(void *cb_arg, struct mlx5_fpga_dma_buf *buf)
+{
+	struct mlx5_ipsec_sadb_resp *resp = buf->sg[0].data;
+	struct mlx5_ipsec_command_context *context;
+	enum mlx5_ipsec_response_syndrome syndrome;
+	struct mlx5_fpga_device *fdev = cb_arg;
+	unsigned long flags;
+
+	if (buf->sg[0].size < sizeof(*resp)) {
+		mlx5_fpga_warn(fdev, "Short receive from FPGA IPSec: %u < %zu bytes\n",
+			       buf->sg[0].size, sizeof(*resp));
+		return;
+	}
+
+	mlx5_fpga_dbg(fdev, "mlx5_ipsec recv_cb syndrome %08x sa_id %x\n",
+		      ntohl(resp->syndrome), ntohl(resp->sw_sa_handle));
+
+	spin_lock_irqsave(&fdev->ipsec->pending_cmds_lock, flags);
+	context = list_first_entry_or_null(&fdev->ipsec->pending_cmds,
+					   struct mlx5_ipsec_command_context,
+					   list);
+	if (context)
+		list_del(&context->list);
+	spin_unlock_irqrestore(&fdev->ipsec->pending_cmds_lock, flags);
+
+	if (!context) {
+		mlx5_fpga_warn(fdev, "Received IPSec offload response without pending command request\n");
+		return;
+	}
+	mlx5_fpga_dbg(fdev, "Handling response for %p\n", context);
+
+	if (context->sa.sw_sa_handle != resp->sw_sa_handle) {
+		mlx5_fpga_err(fdev, "mismatch SA handle. cmd 0x%08x vs resp 0x%08x\n",
+			      ntohl(context->sa.sw_sa_handle),
+			      ntohl(resp->sw_sa_handle));
+		return;
+	}
+
+	syndrome = ntohl(resp->syndrome);
+	context->status_code = syndrome_to_errno(syndrome);
+	context->status = MLX5_FPGA_IPSEC_SACMD_COMPLETE;
+
+	if (context->status_code)
+		mlx5_fpga_warn(fdev, "IPSec SADB command failed with syndrome %08x\n",
+			       syndrome);
+	complete(&context->complete);
+}
+
+void *mlx5_fpga_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev,
+				  struct mlx5_accel_ipsec_sa *cmd)
+{
+	struct mlx5_ipsec_command_context *context;
+	struct mlx5_fpga_device *fdev = mdev->fpga;
+	unsigned long flags;
+	int res = 0;
+
+	BUILD_BUG_ON((sizeof(struct mlx5_accel_ipsec_sa) & 3) != 0);
+	if (!fdev || !fdev->ipsec)
+		return ERR_PTR(-EOPNOTSUPP);
+
+	context = kzalloc(sizeof(*context), GFP_ATOMIC);
+	if (!context)
+		return ERR_PTR(-ENOMEM);
+
+	memcpy(&context->sa, cmd, sizeof(*cmd));
+	context->buf.complete = mlx5_fpga_ipsec_send_complete;
+	context->buf.sg[0].size = sizeof(context->sa);
+	context->buf.sg[0].data = &context->sa;
+	init_completion(&context->complete);
+	context->dev = fdev;
+	spin_lock_irqsave(&fdev->ipsec->pending_cmds_lock, flags);
+	list_add_tail(&context->list, &fdev->ipsec->pending_cmds);
+	spin_unlock_irqrestore(&fdev->ipsec->pending_cmds_lock, flags);
+
+	context->status = MLX5_FPGA_IPSEC_SACMD_PENDING;
+
+	res = mlx5_fpga_sbu_conn_sendmsg(fdev->ipsec->conn, &context->buf);
+	if (res) {
+		mlx5_fpga_warn(fdev, "Failure sending IPSec command: %d\n",
+			       res);
+		spin_lock_irqsave(&fdev->ipsec->pending_cmds_lock, flags);
+		list_del(&context->list);
+		spin_unlock_irqrestore(&fdev->ipsec->pending_cmds_lock, flags);
+		kfree(context);
+		return ERR_PTR(res);
+	}
+	/* Context will be freed by wait func after completion */
+	return context;
+}
+
+int mlx5_fpga_ipsec_sa_cmd_wait(void *ctx)
+{
+	struct mlx5_ipsec_command_context *context = ctx;
+	int res;
+
+	res = wait_for_completion_killable(&context->complete);
+	if (res) {
+		mlx5_fpga_warn(context->dev, "Failure waiting for IPSec command response\n");
+		return -EINTR;
+	}
+
+	if (context->status == MLX5_FPGA_IPSEC_SACMD_COMPLETE)
+		res = context->status_code;
+	else
+		res = -EIO;
+
+	kfree(context);
+	return res;
+}
+
+u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev)
+{
+	struct mlx5_fpga_device *fdev = mdev->fpga;
+	u32 ret = 0;
+
+	if (mlx5_fpga_is_ipsec_device(mdev))
+		ret |= MLX5_ACCEL_IPSEC_DEVICE;
+	else
+		return ret;
+
+	if (!fdev->ipsec)
+		return ret;
+
+	if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, esp))
+		ret |= MLX5_ACCEL_IPSEC_ESP;
+
+	if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, ipv6))
+		ret |= MLX5_ACCEL_IPSEC_IPV6;
+
+	if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, lso))
+		ret |= MLX5_ACCEL_IPSEC_LSO;
+
+	return ret;
+}
+
+unsigned int mlx5_fpga_ipsec_counters_count(struct mlx5_core_dev *mdev)
+{
+	struct mlx5_fpga_device *fdev = mdev->fpga;
+
+	if (!fdev || !fdev->ipsec)
+		return 0;
+
+	return MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps,
+			number_of_ipsec_counters);
+}
+
+int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
+				  unsigned int counters_count)
+{
+	struct mlx5_fpga_device *fdev = mdev->fpga;
+	unsigned int i;
+	u32 *data;
+	u32 count;
+	u64 addr;
+	int ret;
+
+	if (!fdev || !fdev->ipsec)
+		return 0;
+
+	addr = (u64)MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps,
+			     ipsec_counters_addr_low) +
+	       ((u64)MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps,
+			     ipsec_counters_addr_high) << 32);
+
+	count = mlx5_fpga_ipsec_counters_count(mdev);
+
+	data = kzalloc(sizeof(u32) * count * 2, GFP_KERNEL);
+	if (!data) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = mlx5_fpga_mem_read(fdev, count * sizeof(u64), addr, data,
+				 MLX5_FPGA_ACCESS_TYPE_DONTCARE);
+	if (ret < 0) {
+		mlx5_fpga_err(fdev, "Failed to read IPSec counters from HW: %d\n",
+			      ret);
+		goto out;
+	}
+	ret = 0;
+
+	if (count > counters_count)
+		count = counters_count;
+
+	/* Each counter is low word, then high. But each word is big-endian */
+	for (i = 0; i < count; i++)
+		counters[i] = (u64)ntohl(data[i * 2]) |
+			      ((u64)ntohl(data[i * 2 + 1]) << 32);
+
+out:
+	kfree(data);
+	return ret;
+}
+
+int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev)
+{
+	struct mlx5_fpga_conn_attr init_attr = {0};
+	struct mlx5_fpga_device *fdev = mdev->fpga;
+	struct mlx5_fpga_conn *conn;
+	int err;
+
+	if (!mlx5_fpga_is_ipsec_device(mdev))
+		return 0;
+
+	fdev->ipsec = kzalloc(sizeof(*fdev->ipsec), GFP_KERNEL);
+	if (!fdev->ipsec)
+		return -ENOMEM;
+
+	err = mlx5_fpga_get_sbu_caps(fdev, sizeof(fdev->ipsec->caps),
+				     fdev->ipsec->caps);
+	if (err) {
+		mlx5_fpga_err(fdev, "Failed to retrieve IPSec extended capabilities: %d\n",
+			      err);
+		goto error;
+	}
+
+	INIT_LIST_HEAD(&fdev->ipsec->pending_cmds);
+	spin_lock_init(&fdev->ipsec->pending_cmds_lock);
+
+	init_attr.rx_size = SBU_QP_QUEUE_SIZE;
+	init_attr.tx_size = SBU_QP_QUEUE_SIZE;
+	init_attr.recv_cb = mlx5_fpga_ipsec_recv;
+	init_attr.cb_arg = fdev;
+	conn = mlx5_fpga_sbu_conn_create(fdev, &init_attr);
+	if (IS_ERR(conn)) {
+		err = PTR_ERR(conn);
+		mlx5_fpga_err(fdev, "Error creating IPSec command connection %d\n",
+			      err);
+		goto error;
+	}
+	fdev->ipsec->conn = conn;
+	return 0;
+
+error:
+	kfree(fdev->ipsec);
+	fdev->ipsec = NULL;
+	return err;
+}
+
+void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev)
+{
+	struct mlx5_fpga_device *fdev = mdev->fpga;
+
+	if (!mlx5_fpga_is_ipsec_device(mdev))
+		return;
+
+	mlx5_fpga_sbu_conn_destroy(fdev->ipsec->conn);
+	kfree(fdev->ipsec);
+	fdev->ipsec = NULL;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h
new file mode 100644
index 000000000000..26a3e4b56972
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __MLX5_FPGA_IPSEC_H__
+#define __MLX5_FPGA_IPSEC_H__
+
+#include "accel/ipsec.h"
+
+#ifdef CONFIG_MLX5_FPGA
+
+void *mlx5_fpga_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev,
+				  struct mlx5_accel_ipsec_sa *cmd);
+int mlx5_fpga_ipsec_sa_cmd_wait(void *context);
+
+u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev);
+unsigned int mlx5_fpga_ipsec_counters_count(struct mlx5_core_dev *mdev);
+int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
+				  unsigned int counters_count);
+
+int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev);
+void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev);
+
+#else
+
+static inline void *mlx5_fpga_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev,
+						struct mlx5_accel_ipsec_sa *cmd)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline int mlx5_fpga_ipsec_sa_cmd_wait(void *context)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev)
+{
+	return 0;
+}
+
+static inline unsigned int
+mlx5_fpga_ipsec_counters_count(struct mlx5_core_dev *mdev)
+{
+	return 0;
+}
+
+static inline int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev,
+						u64 *counters)
+{
+	return 0;
+}
+
+static inline int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev)
+{
+	return 0;
+}
+
+static inline void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev)
+{
+}
+
+#endif /* CONFIG_MLX5_FPGA */
+
+#endif	/* __MLX5_FPGA_SADB_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 684612778677..719f8e974482 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -58,6 +58,7 @@
 #endif
 #include "lib/mlx5.h"
 #include "fpga/core.h"
+#include "accel/ipsec.h"
 
 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
 MODULE_DESCRIPTION("Mellanox Connect-IB, ConnectX-4 core driver");
@@ -1169,6 +1170,11 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
 		dev_err(&pdev->dev, "fpga device start failed %d\n", err);
 		goto err_fpga_start;
 	}
+	err = mlx5_accel_ipsec_init(dev);
+	if (err) {
+		dev_err(&pdev->dev, "IPSec device start failed %d\n", err);
+		goto err_ipsec_start;
+	}
 
 	if (mlx5_device_registered(dev)) {
 		mlx5_attach_device(dev);
@@ -1188,6 +1194,8 @@ out:
 	return 0;
 
 err_reg_dev:
+	mlx5_accel_ipsec_cleanup(dev);
+err_ipsec_start:
 	mlx5_fpga_device_stop(dev);
 
 err_fpga_start:
@@ -1267,6 +1275,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
 	if (mlx5_device_registered(dev))
 		mlx5_detach_device(dev);
 
+	mlx5_accel_ipsec_cleanup(dev);
 	mlx5_fpga_device_stop(dev);
 
 	mlx5_sriov_detach(dev);
diff --git a/include/linux/mlx5/mlx5_ifc_fpga.h b/include/linux/mlx5/mlx5_ifc_fpga.h
index a3576654179e..255a88d08078 100644
--- a/include/linux/mlx5/mlx5_ifc_fpga.h
+++ b/include/linux/mlx5/mlx5_ifc_fpga.h
@@ -32,6 +32,14 @@
 #ifndef MLX5_IFC_FPGA_H
 #define MLX5_IFC_FPGA_H
 
+enum {
+	MLX5_FPGA_CAP_SANDBOX_VENDOR_ID_MLNX = 0x2c9,
+};
+
+enum {
+	MLX5_FPGA_CAP_SANDBOX_PRODUCT_ID_IPSEC    = 0x2,
+};
+
 struct mlx5_ifc_fpga_shell_caps_bits {
 	u8         max_num_qps[0x10];
 	u8         reserved_at_10[0x8];
@@ -362,4 +370,63 @@ struct mlx5_ifc_fpga_destroy_qp_out_bits {
 	u8         reserved_at_40[0x40];
 };
 
+struct mlx5_ifc_ipsec_extended_cap_bits {
+	u8         encapsulation[0x20];
+
+	u8         reserved_0[0x15];
+	u8         ipv4_fragment[0x1];
+	u8         ipv6[0x1];
+	u8         esn[0x1];
+	u8         lso[0x1];
+	u8         transport_and_tunnel_mode[0x1];
+	u8         tunnel_mode[0x1];
+	u8         transport_mode[0x1];
+	u8         ah_esp[0x1];
+	u8         esp[0x1];
+	u8         ah[0x1];
+	u8         ipv4_options[0x1];
+
+	u8         auth_alg[0x20];
+
+	u8         enc_alg[0x20];
+
+	u8         sa_cap[0x20];
+
+	u8         reserved_1[0x10];
+	u8         number_of_ipsec_counters[0x10];
+
+	u8         ipsec_counters_addr_low[0x20];
+	u8         ipsec_counters_addr_high[0x20];
+};
+
+struct mlx5_ifc_ipsec_counters_bits {
+	u8         dec_in_packets[0x40];
+
+	u8         dec_out_packets[0x40];
+
+	u8         dec_bypass_packets[0x40];
+
+	u8         enc_in_packets[0x40];
+
+	u8         enc_out_packets[0x40];
+
+	u8         enc_bypass_packets[0x40];
+
+	u8         drop_dec_packets[0x40];
+
+	u8         failed_auth_dec_packets[0x40];
+
+	u8         drop_enc_packets[0x40];
+
+	u8         success_add_sa[0x40];
+
+	u8         fail_add_sa[0x40];
+
+	u8         success_delete_sa[0x40];
+
+	u8         fail_delete_sa[0x40];
+
+	u8         dropped_cmd[0x40];
+};
+
 #endif /* MLX5_IFC_FPGA_H */
-- 
cgit v1.2.3


From 547eede070eb981f1442e494f08f4567dcf1d1c7 Mon Sep 17 00:00:00 2001
From: Ilan Tayari <ilant@mellanox.com>
Date: Tue, 18 Apr 2017 16:04:28 +0300
Subject: net/mlx5e: IPSec, Innova IPSec offload infrastructure

Add Innova IPSec ESP crypto offload configuration paths.
Detect Innova IPSec device and set the NETIF_F_HW_ESP flag.
Configure Security Associations using the API introduced in a previous
patch.

Add Software-parser hardware descriptor layout
Software-Parser (swp) is a hardware feature in ConnectX which allows the
host software to specify protocol header offsets in the TX path, thus
overriding the hardware parser.
This is useful for protocols that the ASIC may not be able to parse on
its own.

Note that due to inline metadata, XDP is not supported in Innova IPSec.

Signed-off-by: Ilan Tayari <ilant@mellanox.com>
Signed-off-by: Yossi Kuperman <yossiku@mellanox.com>
Signed-off-by: Yevgeny Kliteynik <kliteyn@mellanox.com>
Signed-off-by: Boris Pismenny <borisp@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 MAINTAINERS                                        |  10 +
 drivers/net/ethernet/mellanox/mlx5/core/Kconfig    |  12 +
 drivers/net/ethernet/mellanox/mlx5/core/Makefile   |   2 +
 drivers/net/ethernet/mellanox/mlx5/core/en.h       |   3 +
 .../ethernet/mellanox/mlx5/core/en_accel/ipsec.c   | 415 +++++++++++++++++++++
 .../ethernet/mellanox/mlx5/core/en_accel/ipsec.h   |  78 ++++
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  |  14 +
 include/linux/mlx5/mlx5_ifc.h                      |   8 +-
 include/linux/mlx5/qp.h                            |  14 +-
 9 files changed, 552 insertions(+), 4 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index 71a74555afdf..c324460d5042 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8327,6 +8327,16 @@ Q:	http://patchwork.ozlabs.org/project/netdev/list/
 F:	drivers/net/ethernet/mellanox/mlx5/core/fpga/*
 F:	include/linux/mlx5/mlx5_ifc_fpga.h
 
+MELLANOX ETHERNET INNOVA IPSEC DRIVER
+M:	Ilan Tayari <ilant@mellanox.com>
+R:	Boris Pismenny <borisp@mellanox.com>
+L:	netdev@vger.kernel.org
+S:	Supported
+W:	http://www.mellanox.com
+Q:	http://patchwork.ozlabs.org/project/netdev/list/
+F:	drivers/net/ethernet/mellanox/mlx5/core/en_ipsec/*
+F:	drivers/net/ethernet/mellanox/mlx5/core/ipsec*
+
 MELLANOX ETHERNET SWITCH DRIVERS
 M:	Jiri Pirko <jiri@mellanox.com>
 M:	Ido Schimmel <idosch@mellanox.com>
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
index d6c6cea8ebab..5aee05992f27 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
@@ -52,3 +52,15 @@ config MLX5_CORE_IPOIB
 	default n
 	---help---
 	  MLX5 IPoIB offloads & acceleration support.
+
+config MLX5_EN_IPSEC
+	bool "IPSec XFRM cryptography-offload accelaration"
+	depends on MLX5_ACCEL
+	depends on MLX5_CORE_EN
+	depends on XFRM_OFFLOAD
+	depends on INET_ESP_OFFLOAD || INET6_ESP_OFFLOAD
+	default n
+	---help---
+	  Build support for IPsec cryptography-offload accelaration in the NIC.
+	  Note: Support for hardware with this capability needs to be selected
+	  for this option to become available.
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 33557526f597..7e81084a75ea 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -19,3 +19,5 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o eswitch_offloads.o \
 mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) +=  en_dcbnl.o
 
 mlx5_core-$(CONFIG_MLX5_CORE_IPOIB) += ipoib/ipoib.o ipoib/ethtool.o
+
+mlx5_core-$(CONFIG_MLX5_EN_IPSEC) += en_accel/ipsec.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index f93f44d1d1cf..535ffd78a34e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -784,6 +784,9 @@ struct mlx5e_priv {
 
 	const struct mlx5e_profile *profile;
 	void                      *ppriv;
+#ifdef CONFIG_MLX5_EN_IPSEC
+	struct mlx5e_ipsec        *ipsec;
+#endif
 };
 
 struct mlx5e_profile {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
new file mode 100644
index 000000000000..06d9d6ad93ad
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
@@ -0,0 +1,415 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <crypto/internal/geniv.h>
+#include <crypto/aead.h>
+#include <linux/inetdevice.h>
+#include <linux/netdevice.h>
+#include <linux/module.h>
+
+#include "en.h"
+#include "accel/ipsec.h"
+#include "en_accel/ipsec.h"
+
+struct mlx5e_ipsec_sa_entry {
+	struct hlist_node hlist; /* Item in SADB_RX hashtable */
+	unsigned int handle; /* Handle in SADB_RX */
+	struct xfrm_state *x;
+	struct mlx5e_ipsec *ipsec;
+	void *context;
+};
+
+static int mlx5e_ipsec_sadb_rx_add(struct mlx5e_ipsec_sa_entry *sa_entry)
+{
+	struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&ipsec->sadb_rx_lock, flags);
+	ret = ida_simple_get(&ipsec->halloc, 1, 0, GFP_KERNEL);
+	if (ret < 0)
+		goto out;
+
+	sa_entry->handle = ret;
+	hash_add_rcu(ipsec->sadb_rx, &sa_entry->hlist, sa_entry->handle);
+	ret = 0;
+
+out:
+	spin_unlock_irqrestore(&ipsec->sadb_rx_lock, flags);
+	return ret;
+}
+
+static void mlx5e_ipsec_sadb_rx_del(struct mlx5e_ipsec_sa_entry *sa_entry)
+{
+	struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ipsec->sadb_rx_lock, flags);
+	hash_del_rcu(&sa_entry->hlist);
+	spin_unlock_irqrestore(&ipsec->sadb_rx_lock, flags);
+}
+
+static void mlx5e_ipsec_sadb_rx_free(struct mlx5e_ipsec_sa_entry *sa_entry)
+{
+	struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
+	unsigned long flags;
+
+	/* Wait for the hash_del_rcu call in sadb_rx_del to affect data path */
+	synchronize_rcu();
+	spin_lock_irqsave(&ipsec->sadb_rx_lock, flags);
+	ida_simple_remove(&ipsec->halloc, sa_entry->handle);
+	spin_unlock_irqrestore(&ipsec->sadb_rx_lock, flags);
+}
+
+static enum mlx5_accel_ipsec_enc_mode mlx5e_ipsec_enc_mode(struct xfrm_state *x)
+{
+	unsigned int key_len = (x->aead->alg_key_len + 7) / 8 - 4;
+
+	switch (key_len) {
+	case 16:
+		return MLX5_IPSEC_SADB_MODE_AES_GCM_128_AUTH_128;
+	case 32:
+		return MLX5_IPSEC_SADB_MODE_AES_GCM_256_AUTH_128;
+	default:
+		netdev_warn(x->xso.dev, "Bad key len: %d for alg %s\n",
+			    key_len, x->aead->alg_name);
+		return -1;
+	}
+}
+
+static void mlx5e_ipsec_build_hw_sa(u32 op, struct mlx5e_ipsec_sa_entry *sa_entry,
+				    struct mlx5_accel_ipsec_sa *hw_sa)
+{
+	struct xfrm_state *x = sa_entry->x;
+	struct aead_geniv_ctx *geniv_ctx;
+	unsigned int crypto_data_len;
+	struct crypto_aead *aead;
+	unsigned int key_len;
+	int ivsize;
+
+	memset(hw_sa, 0, sizeof(*hw_sa));
+
+	if (op == MLX5_IPSEC_CMD_ADD_SA) {
+		crypto_data_len = (x->aead->alg_key_len + 7) / 8;
+		key_len = crypto_data_len - 4; /* 4 bytes salt at end */
+		aead = x->data;
+		geniv_ctx = crypto_aead_ctx(aead);
+		ivsize = crypto_aead_ivsize(aead);
+
+		memcpy(&hw_sa->key_enc, x->aead->alg_key, key_len);
+		/* Duplicate 128 bit key twice according to HW layout */
+		if (key_len == 16)
+			memcpy(&hw_sa->key_enc[16], x->aead->alg_key, key_len);
+		memcpy(&hw_sa->gcm.salt_iv, geniv_ctx->salt, ivsize);
+		hw_sa->gcm.salt = *((__be32 *)(x->aead->alg_key + key_len));
+	}
+
+	hw_sa->cmd = htonl(op);
+	hw_sa->flags |= MLX5_IPSEC_SADB_SA_VALID | MLX5_IPSEC_SADB_SPI_EN;
+	if (x->props.family == AF_INET) {
+		hw_sa->sip[3] = x->props.saddr.a4;
+		hw_sa->dip[3] = x->id.daddr.a4;
+		hw_sa->sip_masklen = 32;
+		hw_sa->dip_masklen = 32;
+	} else {
+		memcpy(hw_sa->sip, x->props.saddr.a6, sizeof(hw_sa->sip));
+		memcpy(hw_sa->dip, x->id.daddr.a6, sizeof(hw_sa->dip));
+		hw_sa->sip_masklen = 128;
+		hw_sa->dip_masklen = 128;
+		hw_sa->flags |= MLX5_IPSEC_SADB_IPV6;
+	}
+	hw_sa->spi = x->id.spi;
+	hw_sa->sw_sa_handle = htonl(sa_entry->handle);
+	switch (x->id.proto) {
+	case IPPROTO_ESP:
+		hw_sa->flags |= MLX5_IPSEC_SADB_IP_ESP;
+		break;
+	case IPPROTO_AH:
+		hw_sa->flags |= MLX5_IPSEC_SADB_IP_AH;
+		break;
+	default:
+		break;
+	}
+	hw_sa->enc_mode = mlx5e_ipsec_enc_mode(x);
+	if (!(x->xso.flags & XFRM_OFFLOAD_INBOUND))
+		hw_sa->flags |= MLX5_IPSEC_SADB_DIR_SX;
+}
+
+static inline int mlx5e_xfrm_validate_state(struct xfrm_state *x)
+{
+	struct net_device *netdev = x->xso.dev;
+	struct mlx5e_priv *priv;
+
+	priv = netdev_priv(netdev);
+
+	if (x->props.aalgo != SADB_AALG_NONE) {
+		netdev_info(netdev, "Cannot offload authenticated xfrm states\n");
+		return -EINVAL;
+	}
+	if (x->props.ealgo != SADB_X_EALG_AES_GCM_ICV16) {
+		netdev_info(netdev, "Only AES-GCM-ICV16 xfrm state may be offloaded\n");
+		return -EINVAL;
+	}
+	if (x->props.calgo != SADB_X_CALG_NONE) {
+		netdev_info(netdev, "Cannot offload compressed xfrm states\n");
+		return -EINVAL;
+	}
+	if (x->props.flags & XFRM_STATE_ESN) {
+		netdev_info(netdev, "Cannot offload ESN xfrm states\n");
+		return -EINVAL;
+	}
+	if (x->props.family != AF_INET &&
+	    x->props.family != AF_INET6) {
+		netdev_info(netdev, "Only IPv4/6 xfrm states may be offloaded\n");
+		return -EINVAL;
+	}
+	if (x->props.mode != XFRM_MODE_TRANSPORT &&
+	    x->props.mode != XFRM_MODE_TUNNEL) {
+		dev_info(&netdev->dev, "Only transport and tunnel xfrm states may be offloaded\n");
+		return -EINVAL;
+	}
+	if (x->id.proto != IPPROTO_ESP) {
+		netdev_info(netdev, "Only ESP xfrm state may be offloaded\n");
+		return -EINVAL;
+	}
+	if (x->encap) {
+		netdev_info(netdev, "Encapsulated xfrm state may not be offloaded\n");
+		return -EINVAL;
+	}
+	if (!x->aead) {
+		netdev_info(netdev, "Cannot offload xfrm states without aead\n");
+		return -EINVAL;
+	}
+	if (x->aead->alg_icv_len != 128) {
+		netdev_info(netdev, "Cannot offload xfrm states with AEAD ICV length other than 128bit\n");
+		return -EINVAL;
+	}
+	if ((x->aead->alg_key_len != 128 + 32) &&
+	    (x->aead->alg_key_len != 256 + 32)) {
+		netdev_info(netdev, "Cannot offload xfrm states with AEAD key length other than 128/256 bit\n");
+		return -EINVAL;
+	}
+	if (x->tfcpad) {
+		netdev_info(netdev, "Cannot offload xfrm states with tfc padding\n");
+		return -EINVAL;
+	}
+	if (!x->geniv) {
+		netdev_info(netdev, "Cannot offload xfrm states without geniv\n");
+		return -EINVAL;
+	}
+	if (strcmp(x->geniv, "seqiv")) {
+		netdev_info(netdev, "Cannot offload xfrm states with geniv other than seqiv\n");
+		return -EINVAL;
+	}
+	if (x->props.family == AF_INET6 &&
+	    !(mlx5_accel_ipsec_device_caps(priv->mdev) & MLX5_ACCEL_IPSEC_IPV6)) {
+		netdev_info(netdev, "IPv6 xfrm state offload is not supported by this device\n");
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int mlx5e_xfrm_add_state(struct xfrm_state *x)
+{
+	struct mlx5e_ipsec_sa_entry *sa_entry = NULL;
+	struct net_device *netdev = x->xso.dev;
+	struct mlx5_accel_ipsec_sa hw_sa;
+	struct mlx5e_priv *priv;
+	void *context;
+	int err;
+
+	priv = netdev_priv(netdev);
+
+	err = mlx5e_xfrm_validate_state(x);
+	if (err)
+		return err;
+
+	sa_entry = kzalloc(sizeof(*sa_entry), GFP_KERNEL);
+	if (!sa_entry) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	sa_entry->x = x;
+	sa_entry->ipsec = priv->ipsec;
+
+	/* Add the SA to handle processed incoming packets before the add SA
+	 * completion was received
+	 */
+	if (x->xso.flags & XFRM_OFFLOAD_INBOUND) {
+		err = mlx5e_ipsec_sadb_rx_add(sa_entry);
+		if (err) {
+			netdev_info(netdev, "Failed adding to SADB_RX: %d\n", err);
+			goto err_entry;
+		}
+	}
+
+	mlx5e_ipsec_build_hw_sa(MLX5_IPSEC_CMD_ADD_SA, sa_entry, &hw_sa);
+	context = mlx5_accel_ipsec_sa_cmd_exec(sa_entry->ipsec->en_priv->mdev, &hw_sa);
+	if (IS_ERR(context)) {
+		err = PTR_ERR(context);
+		goto err_sadb_rx;
+	}
+
+	err = mlx5_accel_ipsec_sa_cmd_wait(context);
+	if (err)
+		goto err_sadb_rx;
+
+	x->xso.offload_handle = (unsigned long)sa_entry;
+	goto out;
+
+err_sadb_rx:
+	if (x->xso.flags & XFRM_OFFLOAD_INBOUND) {
+		mlx5e_ipsec_sadb_rx_del(sa_entry);
+		mlx5e_ipsec_sadb_rx_free(sa_entry);
+	}
+err_entry:
+	kfree(sa_entry);
+out:
+	return err;
+}
+
+static void mlx5e_xfrm_del_state(struct xfrm_state *x)
+{
+	struct mlx5e_ipsec_sa_entry *sa_entry;
+	struct mlx5_accel_ipsec_sa hw_sa;
+	void *context;
+
+	if (!x->xso.offload_handle)
+		return;
+
+	sa_entry = (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle;
+	WARN_ON(sa_entry->x != x);
+
+	if (x->xso.flags & XFRM_OFFLOAD_INBOUND)
+		mlx5e_ipsec_sadb_rx_del(sa_entry);
+
+	mlx5e_ipsec_build_hw_sa(MLX5_IPSEC_CMD_DEL_SA, sa_entry, &hw_sa);
+	context = mlx5_accel_ipsec_sa_cmd_exec(sa_entry->ipsec->en_priv->mdev, &hw_sa);
+	if (IS_ERR(context))
+		return;
+
+	sa_entry->context = context;
+}
+
+static void mlx5e_xfrm_free_state(struct xfrm_state *x)
+{
+	struct mlx5e_ipsec_sa_entry *sa_entry;
+	int res;
+
+	if (!x->xso.offload_handle)
+		return;
+
+	sa_entry = (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle;
+	WARN_ON(sa_entry->x != x);
+
+	res = mlx5_accel_ipsec_sa_cmd_wait(sa_entry->context);
+	sa_entry->context = NULL;
+	if (res) {
+		/* Leftover object will leak */
+		return;
+	}
+
+	if (x->xso.flags & XFRM_OFFLOAD_INBOUND)
+		mlx5e_ipsec_sadb_rx_free(sa_entry);
+
+	kfree(sa_entry);
+}
+
+int mlx5e_ipsec_init(struct mlx5e_priv *priv)
+{
+	struct mlx5e_ipsec *ipsec = NULL;
+
+	if (!MLX5_IPSEC_DEV(priv->mdev)) {
+		netdev_dbg(priv->netdev, "Not an IPSec offload device\n");
+		return 0;
+	}
+
+	ipsec = kzalloc(sizeof(*ipsec), GFP_KERNEL);
+	if (!ipsec)
+		return -ENOMEM;
+
+	hash_init(ipsec->sadb_rx);
+	spin_lock_init(&ipsec->sadb_rx_lock);
+	ida_init(&ipsec->halloc);
+	ipsec->en_priv = priv;
+	ipsec->en_priv->ipsec = ipsec;
+	netdev_dbg(priv->netdev, "IPSec attached to netdevice\n");
+	return 0;
+}
+
+void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv)
+{
+	struct mlx5e_ipsec *ipsec = priv->ipsec;
+
+	if (!ipsec)
+		return;
+
+	ida_destroy(&ipsec->halloc);
+	kfree(ipsec);
+	priv->ipsec = NULL;
+}
+
+static const struct xfrmdev_ops mlx5e_ipsec_xfrmdev_ops = {
+	.xdo_dev_state_add	= mlx5e_xfrm_add_state,
+	.xdo_dev_state_delete	= mlx5e_xfrm_del_state,
+	.xdo_dev_state_free	= mlx5e_xfrm_free_state,
+};
+
+void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv)
+{
+	struct mlx5_core_dev *mdev = priv->mdev;
+	struct net_device *netdev = priv->netdev;
+
+	if (!priv->ipsec)
+		return;
+
+	if (!(mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_ESP) ||
+	    !MLX5_CAP_ETH(mdev, swp)) {
+		mlx5_core_dbg(mdev, "mlx5e: ESP and SWP offload not supported\n");
+		return;
+	}
+
+	mlx5_core_info(mdev, "mlx5e: IPSec ESP acceleration enabled\n");
+	netdev->xfrmdev_ops = &mlx5e_ipsec_xfrmdev_ops;
+	netdev->features |= NETIF_F_HW_ESP;
+	netdev->hw_enc_features |= NETIF_F_HW_ESP;
+
+	if (!MLX5_CAP_ETH(mdev, swp_csum)) {
+		mlx5_core_dbg(mdev, "mlx5e: SWP checksum not supported\n");
+		return;
+	}
+
+	netdev->features |= NETIF_F_HW_ESP_TX_CSUM;
+	netdev->hw_enc_features |= NETIF_F_HW_ESP_TX_CSUM;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
new file mode 100644
index 000000000000..b9423a2873e2
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __MLX5E_IPSEC_H__
+#define __MLX5E_IPSEC_H__
+
+#ifdef CONFIG_MLX5_EN_IPSEC
+
+#include <linux/mlx5/device.h>
+#include <net/xfrm.h>
+#include <linux/idr.h>
+
+#define MLX5E_IPSEC_SADB_RX_BITS 10
+
+struct mlx5e_priv;
+
+struct mlx5e_ipsec {
+	struct mlx5e_priv *en_priv;
+	DECLARE_HASHTABLE(sadb_rx, MLX5E_IPSEC_SADB_RX_BITS);
+	spinlock_t sadb_rx_lock; /* Protects sadb_rx and halloc */
+	struct ida halloc;
+};
+
+int mlx5e_ipsec_init(struct mlx5e_priv *priv);
+void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv);
+void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv);
+
+struct xfrm_state *mlx5e_ipsec_sadb_rx_lookup(struct mlx5e_ipsec *dev,
+					      unsigned int handle);
+
+#else
+
+static inline int mlx5e_ipsec_init(struct mlx5e_priv *priv)
+{
+	return 0;
+}
+
+static inline void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv)
+{
+}
+
+static inline void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv)
+{
+}
+
+#endif
+
+#endif	/* __MLX5E_IPSEC_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 9f99f624004f..aa5a7aa59ff3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -39,6 +39,7 @@
 #include "en.h"
 #include "en_tc.h"
 #include "en_rep.h"
+#include "en_accel/ipsec.h"
 #include "vxlan.h"
 
 struct mlx5e_rq_param {
@@ -3555,6 +3556,12 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
 		goto unlock;
 	}
 
+	if ((netdev->features & NETIF_F_HW_ESP) && prog) {
+		netdev_warn(netdev, "can't set XDP with IPSec offload\n");
+		err = -EINVAL;
+		goto unlock;
+	}
+
 	was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
 	/* no need for full reset when exchanging programs */
 	reset = (!priv->channels.params.xdp_prog || !prog);
@@ -4046,6 +4053,8 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
 	if (MLX5_CAP_GEN(mdev, vport_group_manager))
 		netdev->switchdev_ops = &mlx5e_switchdev_ops;
 #endif
+
+	mlx5e_ipsec_build_netdev(priv);
 }
 
 static void mlx5e_create_q_counter(struct mlx5e_priv *priv)
@@ -4074,14 +4083,19 @@ static void mlx5e_nic_init(struct mlx5_core_dev *mdev,
 			   void *ppriv)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
+	int err;
 
 	mlx5e_build_nic_netdev_priv(mdev, netdev, profile, ppriv);
+	err = mlx5e_ipsec_init(priv);
+	if (err)
+		mlx5_core_err(mdev, "IPSec initialization failed, %d\n", err);
 	mlx5e_build_nic_netdev(netdev);
 	mlx5e_vxlan_init(priv);
 }
 
 static void mlx5e_nic_cleanup(struct mlx5e_priv *priv)
 {
+	mlx5e_ipsec_cleanup(priv);
 	mlx5e_vxlan_cleanup(priv);
 
 	if (priv->channels.params.xdp_prog)
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index c72f9735119d..87869c04849a 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -605,7 +605,10 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits {
 	u8         tunnel_statless_gre[0x1];
 	u8         tunnel_stateless_vxlan[0x1];
 
-	u8         reserved_at_20[0x20];
+	u8         swp[0x1];
+	u8         swp_csum[0x1];
+	u8         swp_lso[0x1];
+	u8         reserved_at_23[0x1d];
 
 	u8         reserved_at_40[0x10];
 	u8         lro_min_mss_size[0x10];
@@ -2438,7 +2441,8 @@ struct mlx5_ifc_sqc_bits {
 	u8	   min_wqe_inline_mode[0x3];
 	u8         state[0x4];
 	u8         reg_umr[0x1];
-	u8         reserved_at_d[0x13];
+	u8         allow_swp[0x1];
+	u8         reserved_at_e[0x12];
 
 	u8         reserved_at_20[0x8];
 	u8         user_index[0x18];
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index 1f637f4d1265..6f41270d80c0 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -225,10 +225,20 @@ enum {
 	MLX5_ETH_WQE_INSERT_VLAN        = 1 << 15,
 };
 
+enum {
+	MLX5_ETH_WQE_SWP_INNER_L3_IPV6  = 1 << 0,
+	MLX5_ETH_WQE_SWP_INNER_L4_UDP   = 1 << 1,
+	MLX5_ETH_WQE_SWP_OUTER_L3_IPV6  = 1 << 4,
+	MLX5_ETH_WQE_SWP_OUTER_L4_UDP   = 1 << 5,
+};
+
 struct mlx5_wqe_eth_seg {
-	u8              rsvd0[4];
+	u8              swp_outer_l4_offset;
+	u8              swp_outer_l3_offset;
+	u8              swp_inner_l4_offset;
+	u8              swp_inner_l3_offset;
 	u8              cs_flags;
-	u8              rsvd1;
+	u8              swp_flags;
 	__be16          mss;
 	__be32          rsvd2;
 	union {
-- 
cgit v1.2.3


From 29d99b966d60029a11d08b9b004cd84b21ce0d67 Mon Sep 17 00:00:00 2001
From: Shawn Nematbakhsh <shawnn@chromium.org>
Date: Tue, 14 Feb 2017 20:58:02 +0100
Subject: cros_ec: Don't signal wake event for non-wake host events

The subset of wake-enabled host events is defined by the EC, but the EC
may still send non-wake host events if we're in the process of
suspending. Get the mask of wake-enabled host events from the EC and
filter out non-wake events to prevent spurious aborted suspend
attempts.

Signed-off-by: Shawn Nematbakhsh <shawnn@chromium.org>
Signed-off-by: Thierry Escande <thierry.escande@collabora.com>
Acked-for-MFD-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Benson Leung <bleung@chromium.org>
---
 drivers/mfd/cros_ec.c                   | 13 ++++--
 drivers/platform/chrome/cros_ec_lpc.c   |  3 +-
 drivers/platform/chrome/cros_ec_proto.c | 76 ++++++++++++++++++++++++++++++---
 include/linux/mfd/cros_ec.h             |  5 ++-
 4 files changed, 87 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/cros_ec.c b/drivers/mfd/cros_ec.c
index d4a407e466b5..fc2569f3e7c9 100644
--- a/drivers/mfd/cros_ec.c
+++ b/drivers/mfd/cros_ec.c
@@ -54,12 +54,19 @@ static const struct mfd_cell ec_pd_cell = {
 static irqreturn_t ec_irq_thread(int irq, void *data)
 {
 	struct cros_ec_device *ec_dev = data;
+	bool wake_event = true;
 	int ret;
 
-	if (device_may_wakeup(ec_dev->dev))
+	ret = cros_ec_get_next_event(ec_dev, &wake_event);
+
+	/*
+	 * Signal only if wake host events or any interrupt if
+	 * cros_ec_get_next_event() returned an error (default value for
+	 * wake_event is true)
+	 */
+	if (wake_event && device_may_wakeup(ec_dev->dev))
 		pm_wakeup_event(ec_dev->dev, 0);
 
-	ret = cros_ec_get_next_event(ec_dev);
 	if (ret > 0)
 		blocking_notifier_call_chain(&ec_dev->event_notifier,
 					     0, ec_dev);
@@ -221,7 +228,7 @@ EXPORT_SYMBOL(cros_ec_suspend);
 
 static void cros_ec_drain_events(struct cros_ec_device *ec_dev)
 {
-	while (cros_ec_get_next_event(ec_dev) > 0)
+	while (cros_ec_get_next_event(ec_dev, NULL) > 0)
 		blocking_notifier_call_chain(&ec_dev->event_notifier,
 					     1, ec_dev);
 }
diff --git a/drivers/platform/chrome/cros_ec_lpc.c b/drivers/platform/chrome/cros_ec_lpc.c
index eeb187e558ce..2b6436d1b6a4 100644
--- a/drivers/platform/chrome/cros_ec_lpc.c
+++ b/drivers/platform/chrome/cros_ec_lpc.c
@@ -231,7 +231,8 @@ static void cros_ec_lpc_acpi_notify(acpi_handle device, u32 value, void *data)
 {
 	struct cros_ec_device *ec_dev = data;
 
-	if (ec_dev->mkbp_event_supported && cros_ec_get_next_event(ec_dev) > 0)
+	if (ec_dev->mkbp_event_supported &&
+	    cros_ec_get_next_event(ec_dev, NULL) > 0)
 		blocking_notifier_call_chain(&ec_dev->event_notifier, 0,
 					     ec_dev);
 }
diff --git a/drivers/platform/chrome/cros_ec_proto.c b/drivers/platform/chrome/cros_ec_proto.c
index d6942a67a182..8dfa7fcb1248 100644
--- a/drivers/platform/chrome/cros_ec_proto.c
+++ b/drivers/platform/chrome/cros_ec_proto.c
@@ -150,6 +150,40 @@ int cros_ec_check_result(struct cros_ec_device *ec_dev,
 }
 EXPORT_SYMBOL(cros_ec_check_result);
 
+/*
+ * cros_ec_get_host_event_wake_mask
+ *
+ * Get the mask of host events that cause wake from suspend.
+ *
+ * @ec_dev: EC device to call
+ * @msg: message structure to use
+ * @mask: result when function returns >=0.
+ *
+ * LOCKING:
+ * the caller has ec_dev->lock mutex, or the caller knows there is
+ * no other command in progress.
+ */
+static int cros_ec_get_host_event_wake_mask(struct cros_ec_device *ec_dev,
+					    struct cros_ec_command *msg,
+					    uint32_t *mask)
+{
+	struct ec_response_host_event_mask *r;
+	int ret;
+
+	msg->command = EC_CMD_HOST_EVENT_GET_WAKE_MASK;
+	msg->version = 0;
+	msg->outsize = 0;
+	msg->insize = sizeof(*r);
+
+	ret = send_command(ec_dev, msg);
+	if (ret > 0) {
+		r = (struct ec_response_host_event_mask *)msg->data;
+		*mask = r->mask;
+	}
+
+	return ret;
+}
+
 static int cros_ec_host_command_proto_query(struct cros_ec_device *ec_dev,
 					    int devidx,
 					    struct cros_ec_command *msg)
@@ -387,6 +421,15 @@ int cros_ec_query_all(struct cros_ec_device *ec_dev)
 	else
 		ec_dev->mkbp_event_supported = 1;
 
+	/*
+	 * Get host event wake mask, assume all events are wake events
+	 * if unavailable.
+	 */
+	ret = cros_ec_get_host_event_wake_mask(ec_dev, proto_msg,
+					       &ec_dev->host_event_wake_mask);
+	if (ret < 0)
+		ec_dev->host_event_wake_mask = U32_MAX;
+
 	ret = 0;
 
 exit:
@@ -504,12 +547,35 @@ static int get_keyboard_state_event(struct cros_ec_device *ec_dev)
 	return ec_dev->event_size;
 }
 
-int cros_ec_get_next_event(struct cros_ec_device *ec_dev)
+int cros_ec_get_next_event(struct cros_ec_device *ec_dev, bool *wake_event)
 {
-	if (ec_dev->mkbp_event_supported)
-		return get_next_event(ec_dev);
-	else
-		return get_keyboard_state_event(ec_dev);
+	u32 host_event;
+	int ret;
+
+	if (!ec_dev->mkbp_event_supported) {
+		ret = get_keyboard_state_event(ec_dev);
+		if (ret < 0)
+			return ret;
+
+		if (wake_event)
+			*wake_event = true;
+
+		return ret;
+	}
+
+	ret = get_next_event(ec_dev);
+	if (ret < 0)
+		return ret;
+
+	if (wake_event) {
+		host_event = cros_ec_get_host_event(ec_dev);
+
+		/* Consider non-host_event as wake event */
+		*wake_event = !host_event ||
+			      !!(host_event & ec_dev->host_event_wake_mask);
+	}
+
+	return ret;
 }
 EXPORT_SYMBOL(cros_ec_get_next_event);
 
diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h
index 3b16c9009749..4e887ba22635 100644
--- a/include/linux/mfd/cros_ec.h
+++ b/include/linux/mfd/cros_ec.h
@@ -149,6 +149,7 @@ struct cros_ec_device {
 
 	struct ec_response_get_next_event event_data;
 	int event_size;
+	u32 host_event_wake_mask;
 };
 
 /**
@@ -299,10 +300,12 @@ int cros_ec_query_all(struct cros_ec_device *ec_dev);
  * cros_ec_get_next_event -  Fetch next event from the ChromeOS EC
  *
  * @ec_dev: Device to fetch event from
+ * @wake_event: Pointer to a bool set to true upon return if the event might be
+ *              treated as a wake event. Ignored if null.
  *
  * Returns: 0 on success, Linux error number on failure
  */
-int cros_ec_get_next_event(struct cros_ec_device *ec_dev);
+int cros_ec_get_next_event(struct cros_ec_device *ec_dev, bool *wake_event);
 
 /**
  * cros_ec_get_host_event - Return a mask of event set by the EC.
-- 
cgit v1.2.3


From d914ba37d7145acb9fd3bb23075c2d56e5a44eb6 Mon Sep 17 00:00:00 2001
From: Joel Fernandes <joelaf@google.com>
Date: Mon, 26 Jun 2017 19:01:55 -0700
Subject: tracing: Add support for recording tgid of tasks

Inorder to support recording of tgid, the following changes are made:

* Introduce a new API (tracing_record_taskinfo) to additionally record the tgid
  along with the task's comm at the same time. This has has the benefit of not
  setting trace_cmdline_save before all the information for a task is saved.
* Add a new API tracing_record_taskinfo_sched_switch to record task information
  for 2 tasks at a time (previous and next) and use it from sched_switch probe.
* Preserve the old API (tracing_record_cmdline) and create it as a wrapper
  around the new one so that existing callers aren't affected.
* Reuse the existing sched_switch and sched_wakeup probes to record tgid
  information and add a new option 'record-tgid' to enable recording of tgid

When record-tgid option isn't enabled to being with, we take care to make sure
that there's isn't memory or runtime overhead.

Link: http://lkml.kernel.org/r/20170627020155.5139-1-joelaf@google.com

Cc: kernel-team@android.com
Cc: Ingo Molnar <mingo@redhat.com>
Tested-by: Michael Sartain <mikesart@gmail.com>
Signed-off-by: Joel Fernandes <joelaf@google.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/trace_events.h      |  13 ++++-
 kernel/trace/trace.c              | 105 ++++++++++++++++++++++++++++++++++----
 kernel/trace/trace.h              |   7 +++
 kernel/trace/trace_events.c       |  42 ++++++++++++++-
 kernel/trace/trace_sched_switch.c |  72 +++++++++++++++++++++-----
 5 files changed, 213 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index a556805eff8a..f73cedfa2e0b 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -151,7 +151,15 @@ trace_event_buffer_lock_reserve(struct ring_buffer **current_buffer,
 				int type, unsigned long len,
 				unsigned long flags, int pc);
 
-void tracing_record_cmdline(struct task_struct *tsk);
+#define TRACE_RECORD_CMDLINE	BIT(0)
+#define TRACE_RECORD_TGID	BIT(1)
+
+void tracing_record_taskinfo(struct task_struct *task, int flags);
+void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
+					  struct task_struct *next, int flags);
+
+void tracing_record_cmdline(struct task_struct *task);
+void tracing_record_tgid(struct task_struct *task);
 
 int trace_output_call(struct trace_iterator *iter, char *name, char *fmt, ...);
 
@@ -290,6 +298,7 @@ struct trace_subsystem_dir;
 enum {
 	EVENT_FILE_FL_ENABLED_BIT,
 	EVENT_FILE_FL_RECORDED_CMD_BIT,
+	EVENT_FILE_FL_RECORDED_TGID_BIT,
 	EVENT_FILE_FL_FILTERED_BIT,
 	EVENT_FILE_FL_NO_SET_FILTER_BIT,
 	EVENT_FILE_FL_SOFT_MODE_BIT,
@@ -303,6 +312,7 @@ enum {
  * Event file flags:
  *  ENABLED	  - The event is enabled
  *  RECORDED_CMD  - The comms should be recorded at sched_switch
+ *  RECORDED_TGID - The tgids should be recorded at sched_switch
  *  FILTERED	  - The event has a filter attached
  *  NO_SET_FILTER - Set when filter has error and is to be ignored
  *  SOFT_MODE     - The event is enabled/disabled by SOFT_DISABLED
@@ -315,6 +325,7 @@ enum {
 enum {
 	EVENT_FILE_FL_ENABLED		= (1 << EVENT_FILE_FL_ENABLED_BIT),
 	EVENT_FILE_FL_RECORDED_CMD	= (1 << EVENT_FILE_FL_RECORDED_CMD_BIT),
+	EVENT_FILE_FL_RECORDED_TGID	= (1 << EVENT_FILE_FL_RECORDED_TGID_BIT),
 	EVENT_FILE_FL_FILTERED		= (1 << EVENT_FILE_FL_FILTERED_BIT),
 	EVENT_FILE_FL_NO_SET_FILTER	= (1 << EVENT_FILE_FL_NO_SET_FILTER_BIT),
 	EVENT_FILE_FL_SOFT_MODE		= (1 << EVENT_FILE_FL_SOFT_MODE_BIT),
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 14318ce92b13..ab9db750dd29 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -87,7 +87,7 @@ dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
  * tracing is active, only save the comm when a trace event
  * occurred.
  */
-static DEFINE_PER_CPU(bool, trace_cmdline_save);
+static DEFINE_PER_CPU(bool, trace_taskinfo_save);
 
 /*
  * Kill all tracing for good (never come back).
@@ -790,7 +790,7 @@ EXPORT_SYMBOL_GPL(tracing_on);
 static __always_inline void
 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
 {
-	__this_cpu_write(trace_cmdline_save, true);
+	__this_cpu_write(trace_taskinfo_save, true);
 
 	/* If this is the temp buffer, we need to commit fully */
 	if (this_cpu_read(trace_buffered_event) == event) {
@@ -1709,6 +1709,8 @@ void tracing_reset_all_online_cpus(void)
 	}
 }
 
+static int *tgid_map;
+
 #define SAVED_CMDLINES_DEFAULT 128
 #define NO_CMDLINE_MAP UINT_MAX
 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
@@ -1722,7 +1724,7 @@ struct saved_cmdlines_buffer {
 static struct saved_cmdlines_buffer *savedcmd;
 
 /* temporary disable recording */
-static atomic_t trace_record_cmdline_disabled __read_mostly;
+static atomic_t trace_record_taskinfo_disabled __read_mostly;
 
 static inline char *get_saved_cmdlines(int idx)
 {
@@ -1990,16 +1992,87 @@ void trace_find_cmdline(int pid, char comm[])
 	preempt_enable();
 }
 
-void tracing_record_cmdline(struct task_struct *tsk)
+int trace_find_tgid(int pid)
+{
+	if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
+		return 0;
+
+	return tgid_map[pid];
+}
+
+static int trace_save_tgid(struct task_struct *tsk)
 {
-	if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
+	if (unlikely(!tgid_map || !tsk->pid || tsk->pid > PID_MAX_DEFAULT))
+		return 0;
+
+	tgid_map[tsk->pid] = tsk->tgid;
+	return 1;
+}
+
+static bool tracing_record_taskinfo_skip(int flags)
+{
+	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
+		return true;
+	if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
+		return true;
+	if (!__this_cpu_read(trace_taskinfo_save))
+		return true;
+	return false;
+}
+
+/**
+ * tracing_record_taskinfo - record the task info of a task
+ *
+ * @task  - task to record
+ * @flags - TRACE_RECORD_CMDLINE for recording comm
+ *        - TRACE_RECORD_TGID for recording tgid
+ */
+void tracing_record_taskinfo(struct task_struct *task, int flags)
+{
+	if (tracing_record_taskinfo_skip(flags))
+		return;
+	if ((flags & TRACE_RECORD_CMDLINE) && !trace_save_cmdline(task))
+		return;
+	if ((flags & TRACE_RECORD_TGID) && !trace_save_tgid(task))
 		return;
 
-	if (!__this_cpu_read(trace_cmdline_save))
+	__this_cpu_write(trace_taskinfo_save, false);
+}
+
+/**
+ * tracing_record_taskinfo_sched_switch - record task info for sched_switch
+ *
+ * @prev - previous task during sched_switch
+ * @next - next task during sched_switch
+ * @flags - TRACE_RECORD_CMDLINE for recording comm
+ *          TRACE_RECORD_TGID for recording tgid
+ */
+void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
+					  struct task_struct *next, int flags)
+{
+	if (tracing_record_taskinfo_skip(flags))
 		return;
 
-	if (trace_save_cmdline(tsk))
-		__this_cpu_write(trace_cmdline_save, false);
+	if ((flags & TRACE_RECORD_CMDLINE) &&
+	    (!trace_save_cmdline(prev) || !trace_save_cmdline(next)))
+		return;
+
+	if ((flags & TRACE_RECORD_TGID) &&
+	    (!trace_save_tgid(prev) || !trace_save_tgid(next)))
+		return;
+
+	__this_cpu_write(trace_taskinfo_save, false);
+}
+
+/* Helpers to record a specific task information */
+void tracing_record_cmdline(struct task_struct *task)
+{
+	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
+}
+
+void tracing_record_tgid(struct task_struct *task)
+{
+	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
 }
 
 /*
@@ -3144,7 +3217,7 @@ static void *s_start(struct seq_file *m, loff_t *pos)
 #endif
 
 	if (!iter->snapshot)
-		atomic_inc(&trace_record_cmdline_disabled);
+		atomic_inc(&trace_record_taskinfo_disabled);
 
 	if (*pos != iter->pos) {
 		iter->ent = NULL;
@@ -3189,7 +3262,7 @@ static void s_stop(struct seq_file *m, void *p)
 #endif
 
 	if (!iter->snapshot)
-		atomic_dec(&trace_record_cmdline_disabled);
+		atomic_dec(&trace_record_taskinfo_disabled);
 
 	trace_access_unlock(iter->cpu_file);
 	trace_event_read_unlock();
@@ -4236,6 +4309,18 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
 	if (mask == TRACE_ITER_RECORD_CMD)
 		trace_event_enable_cmd_record(enabled);
 
+	if (mask == TRACE_ITER_RECORD_TGID) {
+		if (!tgid_map)
+			tgid_map = kzalloc((PID_MAX_DEFAULT + 1) * sizeof(*tgid_map),
+					   GFP_KERNEL);
+		if (!tgid_map) {
+			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
+			return -ENOMEM;
+		}
+
+		trace_event_enable_tgid_record(enabled);
+	}
+
 	if (mask == TRACE_ITER_EVENT_FORK)
 		trace_event_follow_fork(tr, enabled);
 
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 13823951e42b..6ade1c55cc3a 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -640,6 +640,9 @@ void set_graph_array(struct trace_array *tr);
 
 void tracing_start_cmdline_record(void);
 void tracing_stop_cmdline_record(void);
+void tracing_start_tgid_record(void);
+void tracing_stop_tgid_record(void);
+
 int register_tracer(struct tracer *type);
 int is_tracing_stopped(void);
 
@@ -700,6 +703,7 @@ static inline void __trace_stack(struct trace_array *tr, unsigned long flags,
 extern u64 ftrace_now(int cpu);
 
 extern void trace_find_cmdline(int pid, char comm[]);
+extern int trace_find_tgid(int pid);
 extern void trace_event_follow_fork(struct trace_array *tr, bool enable);
 
 #ifdef CONFIG_DYNAMIC_FTRACE
@@ -1124,6 +1128,7 @@ extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
 		C(CONTEXT_INFO,		"context-info"),   /* Print pid/cpu/time */ \
 		C(LATENCY_FMT,		"latency-format"),	\
 		C(RECORD_CMD,		"record-cmd"),		\
+		C(RECORD_TGID,		"record-tgid"),		\
 		C(OVERWRITE,		"overwrite"),		\
 		C(STOP_ON_FREE,		"disable_on_free"),	\
 		C(IRQ_INFO,		"irq-info"),		\
@@ -1440,6 +1445,8 @@ struct ftrace_event_field *
 trace_find_event_field(struct trace_event_call *call, char *name);
 
 extern void trace_event_enable_cmd_record(bool enable);
+extern void trace_event_enable_tgid_record(bool enable);
+
 extern int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr);
 extern int event_trace_del_tracer(struct trace_array *tr);
 
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 83dfd0dbbbfe..36132f9280e6 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -343,6 +343,28 @@ void trace_event_enable_cmd_record(bool enable)
 	mutex_unlock(&event_mutex);
 }
 
+void trace_event_enable_tgid_record(bool enable)
+{
+	struct trace_event_file *file;
+	struct trace_array *tr;
+
+	mutex_lock(&event_mutex);
+	do_for_each_event_file(tr, file) {
+		if (!(file->flags & EVENT_FILE_FL_ENABLED))
+			continue;
+
+		if (enable) {
+			tracing_start_tgid_record();
+			set_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags);
+		} else {
+			tracing_stop_tgid_record();
+			clear_bit(EVENT_FILE_FL_RECORDED_TGID_BIT,
+				  &file->flags);
+		}
+	} while_for_each_event_file();
+	mutex_unlock(&event_mutex);
+}
+
 static int __ftrace_event_enable_disable(struct trace_event_file *file,
 					 int enable, int soft_disable)
 {
@@ -381,6 +403,12 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file,
 				tracing_stop_cmdline_record();
 				clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
 			}
+
+			if (file->flags & EVENT_FILE_FL_RECORDED_TGID) {
+				tracing_stop_tgid_record();
+				clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
+			}
+
 			call->class->reg(call, TRACE_REG_UNREGISTER, file);
 		}
 		/* If in SOFT_MODE, just set the SOFT_DISABLE_BIT, else clear it */
@@ -407,18 +435,30 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file,
 		}
 
 		if (!(file->flags & EVENT_FILE_FL_ENABLED)) {
+			bool cmd = false, tgid = false;
 
 			/* Keep the event disabled, when going to SOFT_MODE. */
 			if (soft_disable)
 				set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
 
 			if (tr->trace_flags & TRACE_ITER_RECORD_CMD) {
+				cmd = true;
 				tracing_start_cmdline_record();
 				set_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
 			}
+
+			if (tr->trace_flags & TRACE_ITER_RECORD_TGID) {
+				tgid = true;
+				tracing_start_tgid_record();
+				set_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags);
+			}
+
 			ret = call->class->reg(call, TRACE_REG_REGISTER, file);
 			if (ret) {
-				tracing_stop_cmdline_record();
+				if (cmd)
+					tracing_stop_cmdline_record();
+				if (tgid)
+					tracing_stop_tgid_record();
 				pr_info("event trace: Could not enable event "
 					"%s\n", trace_event_name(call));
 				break;
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index 4c896a0101bd..b341c02730be 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -12,27 +12,38 @@
 
 #include "trace.h"
 
-static int			sched_ref;
+#define RECORD_CMDLINE	1
+#define RECORD_TGID	2
+
+static int		sched_cmdline_ref;
+static int		sched_tgid_ref;
 static DEFINE_MUTEX(sched_register_mutex);
 
 static void
 probe_sched_switch(void *ignore, bool preempt,
 		   struct task_struct *prev, struct task_struct *next)
 {
-	if (unlikely(!sched_ref))
-		return;
+	int flags;
+
+	flags = (RECORD_TGID * !!sched_tgid_ref) +
+		(RECORD_CMDLINE * !!sched_cmdline_ref);
 
-	tracing_record_cmdline(prev);
-	tracing_record_cmdline(next);
+	if (!flags)
+		return;
+	tracing_record_taskinfo_sched_switch(prev, next, flags);
 }
 
 static void
 probe_sched_wakeup(void *ignore, struct task_struct *wakee)
 {
-	if (unlikely(!sched_ref))
-		return;
+	int flags;
+
+	flags = (RECORD_TGID * !!sched_tgid_ref) +
+		(RECORD_CMDLINE * !!sched_cmdline_ref);
 
-	tracing_record_cmdline(current);
+	if (!flags)
+		return;
+	tracing_record_taskinfo(current, flags);
 }
 
 static int tracing_sched_register(void)
@@ -75,28 +86,61 @@ static void tracing_sched_unregister(void)
 	unregister_trace_sched_wakeup(probe_sched_wakeup, NULL);
 }
 
-static void tracing_start_sched_switch(void)
+static void tracing_start_sched_switch(int ops)
 {
+	bool sched_register = (!sched_cmdline_ref && !sched_tgid_ref);
 	mutex_lock(&sched_register_mutex);
-	if (!(sched_ref++))
+
+	switch (ops) {
+	case RECORD_CMDLINE:
+		sched_cmdline_ref++;
+		break;
+
+	case RECORD_TGID:
+		sched_tgid_ref++;
+		break;
+	}
+
+	if (sched_register && (sched_cmdline_ref || sched_tgid_ref))
 		tracing_sched_register();
 	mutex_unlock(&sched_register_mutex);
 }
 
-static void tracing_stop_sched_switch(void)
+static void tracing_stop_sched_switch(int ops)
 {
 	mutex_lock(&sched_register_mutex);
-	if (!(--sched_ref))
+
+	switch (ops) {
+	case RECORD_CMDLINE:
+		sched_cmdline_ref--;
+		break;
+
+	case RECORD_TGID:
+		sched_tgid_ref--;
+		break;
+	}
+
+	if (!sched_cmdline_ref && !sched_tgid_ref)
 		tracing_sched_unregister();
 	mutex_unlock(&sched_register_mutex);
 }
 
 void tracing_start_cmdline_record(void)
 {
-	tracing_start_sched_switch();
+	tracing_start_sched_switch(RECORD_CMDLINE);
 }
 
 void tracing_stop_cmdline_record(void)
 {
-	tracing_stop_sched_switch();
+	tracing_stop_sched_switch(RECORD_CMDLINE);
+}
+
+void tracing_start_tgid_record(void)
+{
+	tracing_start_sched_switch(RECORD_TGID);
+}
+
+void tracing_stop_tgid_record(void)
+{
+	tracing_stop_sched_switch(RECORD_TGID);
 }
-- 
cgit v1.2.3


From c75b1d9421f80f4143e389d2d50ddfc8a28c8c35 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Tue, 27 Jun 2017 11:47:04 -0600
Subject: fs: add fcntl() interface for setting/getting write life time hints

Define a set of write life time hints:

RWH_WRITE_LIFE_NOT_SET	No hint information set
RWH_WRITE_LIFE_NONE	No hints about write life time
RWH_WRITE_LIFE_SHORT	Data written has a short life time
RWH_WRITE_LIFE_MEDIUM	Data written has a medium life time
RWH_WRITE_LIFE_LONG	Data written has a long life time
RWH_WRITE_LIFE_EXTREME	Data written has an extremely long life time

The intent is for these values to be relative to each other, no
absolute meaning should be attached to these flag names.

Add an fcntl interface for querying these flags, and also for
setting them as well:

F_GET_RW_HINT		Returns the read/write hint set on the
			underlying inode.

F_SET_RW_HINT		Set one of the above write hints on the
			underlying inode.

F_GET_FILE_RW_HINT	Returns the read/write hint set on the
			file descriptor.

F_SET_FILE_RW_HINT	Set one of the above write hints on the
			file descriptor.

The user passes in a 64-bit pointer to get/set these values, and
the interface returns 0/-1 on success/error.

Sample program testing/implementing basic setting/getting of write
hints is below.

Add support for storing the write life time hint in the inode flags
and in struct file as well, and pass them to the kiocb flags. If
both a file and its corresponding inode has a write hint, then we
use the one in the file, if available. The file hint can be used
for sync/direct IO, for buffered writeback only the inode hint
is available.

This is in preparation for utilizing these hints in the block layer,
to guide on-media data placement.

/*
 * writehint.c: get or set an inode write hint
 */
 #include <stdio.h>
 #include <fcntl.h>
 #include <stdlib.h>
 #include <unistd.h>
 #include <stdbool.h>
 #include <inttypes.h>

 #ifndef F_GET_RW_HINT
 #define F_LINUX_SPECIFIC_BASE	1024
 #define F_GET_RW_HINT		(F_LINUX_SPECIFIC_BASE + 11)
 #define F_SET_RW_HINT		(F_LINUX_SPECIFIC_BASE + 12)
 #endif

static char *str[] = { "RWF_WRITE_LIFE_NOT_SET", "RWH_WRITE_LIFE_NONE",
			"RWH_WRITE_LIFE_SHORT", "RWH_WRITE_LIFE_MEDIUM",
			"RWH_WRITE_LIFE_LONG", "RWH_WRITE_LIFE_EXTREME" };

int main(int argc, char *argv[])
{
	uint64_t hint;
	int fd, ret;

	if (argc < 2) {
		fprintf(stderr, "%s: file <hint>\n", argv[0]);
		return 1;
	}

	fd = open(argv[1], O_RDONLY);
	if (fd < 0) {
		perror("open");
		return 2;
	}

	if (argc > 2) {
		hint = atoi(argv[2]);
		ret = fcntl(fd, F_SET_RW_HINT, &hint);
		if (ret < 0) {
			perror("fcntl: F_SET_RW_HINT");
			return 4;
		}
	}

	ret = fcntl(fd, F_GET_RW_HINT, &hint);
	if (ret < 0) {
		perror("fcntl: F_GET_RW_HINT");
		return 3;
	}

	printf("%s: hint %s\n", argv[1], str[hint]);
	close(fd);
	return 0;
}

Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/fcntl.c                 | 62 ++++++++++++++++++++++++++++++++++++++++++++++
 fs/inode.c                 |  1 +
 fs/open.c                  |  1 +
 include/linux/fs.h         | 47 ++++++++++++++++++++++++++---------
 include/uapi/linux/fcntl.h | 21 ++++++++++++++++
 5 files changed, 120 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/fs/fcntl.c b/fs/fcntl.c
index f4e7267d117f..67bdc6e8ccad 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -243,6 +243,62 @@ static int f_getowner_uids(struct file *filp, unsigned long arg)
 }
 #endif
 
+static bool rw_hint_valid(enum rw_hint hint)
+{
+	switch (hint) {
+	case RWF_WRITE_LIFE_NOT_SET:
+	case RWH_WRITE_LIFE_NONE:
+	case RWH_WRITE_LIFE_SHORT:
+	case RWH_WRITE_LIFE_MEDIUM:
+	case RWH_WRITE_LIFE_LONG:
+	case RWH_WRITE_LIFE_EXTREME:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static long fcntl_rw_hint(struct file *file, unsigned int cmd,
+			  unsigned long arg)
+{
+	struct inode *inode = file_inode(file);
+	u64 *argp = (u64 __user *)arg;
+	enum rw_hint hint;
+
+	switch (cmd) {
+	case F_GET_FILE_RW_HINT:
+		if (put_user(file_write_hint(file), argp))
+			return -EFAULT;
+		return 0;
+	case F_SET_FILE_RW_HINT:
+		if (get_user(hint, argp))
+			return -EFAULT;
+		if (!rw_hint_valid(hint))
+			return -EINVAL;
+
+		spin_lock(&file->f_lock);
+		file->f_write_hint = hint;
+		spin_unlock(&file->f_lock);
+		return 0;
+	case F_GET_RW_HINT:
+		if (put_user(inode->i_write_hint, argp))
+			return -EFAULT;
+		return 0;
+	case F_SET_RW_HINT:
+		if (get_user(hint, argp))
+			return -EFAULT;
+		if (!rw_hint_valid(hint))
+			return -EINVAL;
+
+		inode_lock(inode);
+		inode->i_write_hint = hint;
+		inode_unlock(inode);
+		return 0;
+	default:
+		return -EINVAL;
+	}
+}
+
 static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
 		struct file *filp)
 {
@@ -337,6 +393,12 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
 	case F_GET_SEALS:
 		err = shmem_fcntl(filp, cmd, arg);
 		break;
+	case F_GET_RW_HINT:
+	case F_SET_RW_HINT:
+	case F_GET_FILE_RW_HINT:
+	case F_SET_FILE_RW_HINT:
+		err = fcntl_rw_hint(filp, cmd, arg);
+		break;
 	default:
 		break;
 	}
diff --git a/fs/inode.c b/fs/inode.c
index db5914783a71..f0e5fc77e6a4 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -146,6 +146,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
 	i_gid_write(inode, 0);
 	atomic_set(&inode->i_writecount, 0);
 	inode->i_size = 0;
+	inode->i_write_hint = WRITE_LIFE_NOT_SET;
 	inode->i_blocks = 0;
 	inode->i_bytes = 0;
 	inode->i_generation = 0;
diff --git a/fs/open.c b/fs/open.c
index cd0c5be8d012..3fe0c4aa7d27 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -759,6 +759,7 @@ static int do_dentry_open(struct file *f,
 	     likely(f->f_op->write || f->f_op->write_iter))
 		f->f_mode |= FMODE_CAN_WRITE;
 
+	f->f_write_hint = WRITE_LIFE_NOT_SET;
 	f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
 
 	file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4574121f4746..65adbddb3163 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -20,6 +20,7 @@
 #include <linux/rwsem.h>
 #include <linux/capability.h>
 #include <linux/semaphore.h>
+#include <linux/fcntl.h>
 #include <linux/fiemap.h>
 #include <linux/rculist_bl.h>
 #include <linux/atomic.h>
@@ -265,6 +266,18 @@ struct page;
 struct address_space;
 struct writeback_control;
 
+/*
+ * Write life time hint values.
+ */
+enum rw_hint {
+	WRITE_LIFE_NOT_SET	= 0,
+	WRITE_LIFE_NONE		= RWH_WRITE_LIFE_NONE,
+	WRITE_LIFE_SHORT	= RWH_WRITE_LIFE_SHORT,
+	WRITE_LIFE_MEDIUM	= RWH_WRITE_LIFE_MEDIUM,
+	WRITE_LIFE_LONG		= RWH_WRITE_LIFE_LONG,
+	WRITE_LIFE_EXTREME	= RWH_WRITE_LIFE_EXTREME,
+};
+
 #define IOCB_EVENTFD		(1 << 0)
 #define IOCB_APPEND		(1 << 1)
 #define IOCB_DIRECT		(1 << 2)
@@ -280,6 +293,7 @@ struct kiocb {
 	void (*ki_complete)(struct kiocb *iocb, long ret, long ret2);
 	void			*private;
 	int			ki_flags;
+	enum rw_hint		ki_hint;
 };
 
 static inline bool is_sync_kiocb(struct kiocb *kiocb)
@@ -287,16 +301,6 @@ static inline bool is_sync_kiocb(struct kiocb *kiocb)
 	return kiocb->ki_complete == NULL;
 }
 
-static inline int iocb_flags(struct file *file);
-
-static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
-{
-	*kiocb = (struct kiocb) {
-		.ki_filp = filp,
-		.ki_flags = iocb_flags(filp),
-	};
-}
-
 /*
  * "descriptor" for what we're up to with a read.
  * This allows us to use the same read code yet
@@ -597,6 +601,7 @@ struct inode {
 	spinlock_t		i_lock;	/* i_blocks, i_bytes, maybe i_size */
 	unsigned short          i_bytes;
 	unsigned int		i_blkbits;
+	enum rw_hint		i_write_hint;
 	blkcnt_t		i_blocks;
 
 #ifdef __NEED_I_SIZE_ORDERED
@@ -851,6 +856,7 @@ struct file {
 	 * Must not be taken from IRQ context.
 	 */
 	spinlock_t		f_lock;
+	enum rw_hint		f_write_hint;
 	atomic_long_t		f_count;
 	unsigned int 		f_flags;
 	fmode_t			f_mode;
@@ -1026,8 +1032,6 @@ struct file_lock_context {
 #define OFFT_OFFSET_MAX	INT_LIMIT(off_t)
 #endif
 
-#include <linux/fcntl.h>
-
 extern void send_sigio(struct fown_struct *fown, int fd, int band);
 
 /*
@@ -1878,6 +1882,25 @@ static inline bool HAS_UNMAPPED_ID(struct inode *inode)
 	return !uid_valid(inode->i_uid) || !gid_valid(inode->i_gid);
 }
 
+static inline enum rw_hint file_write_hint(struct file *file)
+{
+	if (file->f_write_hint != WRITE_LIFE_NOT_SET)
+		return file->f_write_hint;
+
+	return file_inode(file)->i_write_hint;
+}
+
+static inline int iocb_flags(struct file *file);
+
+static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
+{
+	*kiocb = (struct kiocb) {
+		.ki_filp = filp,
+		.ki_flags = iocb_flags(filp),
+		.ki_hint = file_write_hint(filp),
+	};
+}
+
 /*
  * Inode state bits.  Protected by inode->i_lock
  *
diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h
index 813afd6eee71..ec69d55bcec7 100644
--- a/include/uapi/linux/fcntl.h
+++ b/include/uapi/linux/fcntl.h
@@ -42,6 +42,27 @@
 #define F_SEAL_WRITE	0x0008	/* prevent writes */
 /* (1U << 31) is reserved for signed error codes */
 
+/*
+ * Set/Get write life time hints. {GET,SET}_RW_HINT operate on the
+ * underlying inode, while {GET,SET}_FILE_RW_HINT operate only on
+ * the specific file.
+ */
+#define F_GET_RW_HINT		(F_LINUX_SPECIFIC_BASE + 11)
+#define F_SET_RW_HINT		(F_LINUX_SPECIFIC_BASE + 12)
+#define F_GET_FILE_RW_HINT	(F_LINUX_SPECIFIC_BASE + 13)
+#define F_SET_FILE_RW_HINT	(F_LINUX_SPECIFIC_BASE + 14)
+
+/*
+ * Valid hint values for F_{GET,SET}_RW_HINT. 0 is "not set", or can be
+ * used to clear any hints previously set.
+ */
+#define RWF_WRITE_LIFE_NOT_SET	0
+#define RWH_WRITE_LIFE_NONE	1
+#define RWH_WRITE_LIFE_SHORT	2
+#define RWH_WRITE_LIFE_MEDIUM	3
+#define RWH_WRITE_LIFE_LONG	4
+#define RWH_WRITE_LIFE_EXTREME	5
+
 /*
  * Types of directory notifications that may be requested.
  */
-- 
cgit v1.2.3


From cb6934f8ea1a595902ca37e250e0917d4dd7b2a7 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Tue, 27 Jun 2017 09:22:02 -0600
Subject: block: add support for write hints in a bio

No functional changes in this patch, we just use up some holes
in the bio and request structures to define a write hint that
we psas down the stack.

Ensure that we don't merge requests that have different life time
hints assigned to them, and that we inherit the write hint when
cloning a bio.

Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c               |  2 ++
 block/blk-core.c          |  1 +
 block/blk-merge.c         | 14 ++++++++++++++
 include/linux/blk_types.h |  1 +
 include/linux/blkdev.h    |  2 ++
 5 files changed, 20 insertions(+)

(limited to 'include/linux')

diff --git a/block/bio.c b/block/bio.c
index 89a51bd49ab7..9cf98b29588a 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -596,6 +596,7 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
 	bio->bi_bdev = bio_src->bi_bdev;
 	bio_set_flag(bio, BIO_CLONED);
 	bio->bi_opf = bio_src->bi_opf;
+	bio->bi_write_hint = bio_src->bi_write_hint;
 	bio->bi_iter = bio_src->bi_iter;
 	bio->bi_io_vec = bio_src->bi_io_vec;
 
@@ -679,6 +680,7 @@ struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask,
 		return NULL;
 	bio->bi_bdev		= bio_src->bi_bdev;
 	bio->bi_opf		= bio_src->bi_opf;
+	bio->bi_write_hint	= bio_src->bi_write_hint;
 	bio->bi_iter.bi_sector	= bio_src->bi_iter.bi_sector;
 	bio->bi_iter.bi_size	= bio_src->bi_iter.bi_size;
 
diff --git a/block/blk-core.c b/block/blk-core.c
index 3c18ea60cb1c..af393d5a9680 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1765,6 +1765,7 @@ void blk_init_request_from_bio(struct request *req, struct bio *bio)
 		req->ioprio = ioc->ioprio;
 	else
 		req->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0);
+	req->write_hint = bio->bi_write_hint;
 	blk_rq_bio_prep(req->q, req, bio);
 }
 EXPORT_SYMBOL_GPL(blk_init_request_from_bio);
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 5df13041b851..99038830fb42 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -672,6 +672,13 @@ static struct request *attempt_merge(struct request_queue *q,
 	    !blk_write_same_mergeable(req->bio, next->bio))
 		return NULL;
 
+	/*
+	 * Don't allow merge of different write hints, or for a hint with
+	 * non-hint IO.
+	 */
+	if (req->write_hint != next->write_hint)
+		return NULL;
+
 	/*
 	 * If we are allowed to merge, then append bio list
 	 * from next to rq and release next. merge_requests_fn
@@ -791,6 +798,13 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
 	    !blk_write_same_mergeable(rq->bio, bio))
 		return false;
 
+	/*
+	 * Don't allow merge of different write hints, or for a hint with
+	 * non-hint IO.
+	 */
+	if (rq->write_hint != bio->bi_write_hint)
+		return false;
+
 	return true;
 }
 
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index e210da6d14b8..d2eb87c84d82 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -56,6 +56,7 @@ struct bio {
 						 */
 	unsigned short		bi_flags;	/* status, etc and bvec pool number */
 	unsigned short		bi_ioprio;
+	unsigned short		bi_write_hint;
 
 	struct bvec_iter	bi_iter;
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index bf2157141d53..0eebd3bcfd85 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -225,6 +225,8 @@ struct request {
 
 	unsigned int extra_len;	/* length of alignment and padding */
 
+	unsigned short write_hint;
+
 	unsigned long deadline;
 	struct list_head timeout_list;
 
-- 
cgit v1.2.3


From f793dfd3f39a3dc50468b06498606b3a906f42f1 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Mon, 26 Jun 2017 08:15:27 -0600
Subject: blk-mq: expose write hints through debugfs

Useful to verify that things are working the way they should.
Reading the file will return number of kb written with each
write hint. Writing the file will reset the statistics. No care
is taken to ensure that we don't race on updates.

Drivers will write to q->write_hints[] if they handle a given
write hint.

Reviewed-by: Andreas Dilger <adilger@dilger.ca>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-debugfs.c | 24 ++++++++++++++++++++++++
 include/linux/blkdev.h |  3 +++
 2 files changed, 27 insertions(+)

(limited to 'include/linux')

diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 9edebbdce0bd..9ebc2945f991 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -135,6 +135,29 @@ static void print_stat(struct seq_file *m, struct blk_rq_stat *stat)
 	}
 }
 
+static int queue_write_hint_show(void *data, struct seq_file *m)
+{
+	struct request_queue *q = data;
+	int i;
+
+	for (i = 0; i < BLK_MAX_WRITE_HINTS; i++)
+		seq_printf(m, "hint%d: %llu\n", i, q->write_hints[i]);
+
+	return 0;
+}
+
+static ssize_t queue_write_hint_store(void *data, const char __user *buf,
+				      size_t count, loff_t *ppos)
+{
+	struct request_queue *q = data;
+	int i;
+
+	for (i = 0; i < BLK_MAX_WRITE_HINTS; i++)
+		q->write_hints[i] = 0;
+
+	return count;
+}
+
 static int queue_poll_stat_show(void *data, struct seq_file *m)
 {
 	struct request_queue *q = data;
@@ -730,6 +753,7 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_queue_attrs[] = {
 	{"poll_stat", 0400, queue_poll_stat_show},
 	{"requeue_list", 0400, .seq_ops = &queue_requeue_list_seq_ops},
 	{"state", 0600, queue_state_show, queue_state_write},
+	{"write_hints", 0600, queue_write_hint_show, queue_write_hint_store},
 	{},
 };
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 0eebd3bcfd85..e1e289ab66b9 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -596,6 +596,9 @@ struct request_queue {
 	void			*rq_alloc_data;
 
 	struct work_struct	release_work;
+
+#define BLK_MAX_WRITE_HINTS	5
+	u64			write_hints[BLK_MAX_WRITE_HINTS];
 };
 
 #define QUEUE_FLAG_QUEUED	1	/* uses generic tag queueing */
-- 
cgit v1.2.3


From f5d118406247acfc4fc481e441e01ea4d6318fdc Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Tue, 27 Jun 2017 12:03:06 -0600
Subject: nvme: add support for streams and directives

This adds support for Directives in NVMe, particular for the Streams
directive. Support for Directives is a new feature in NVMe 1.3. It
allows a user to pass in information about where to store the data, so
that it the device can do so most effiently. If an application is
managing and writing data with different life times, mixing differently
retentioned data onto the same locations on flash can cause write
amplification to grow. This, in turn, will reduce performance and life
time of the device.

Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/host/core.c | 151 +++++++++++++++++++++++++++++++++++++++++++++--
 drivers/nvme/host/nvme.h |   4 ++
 include/linux/nvme.h     |  48 +++++++++++++++
 3 files changed, 199 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index aee37b73231d..5c50f53e32f3 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -65,6 +65,10 @@ static bool force_apst;
 module_param(force_apst, bool, 0644);
 MODULE_PARM_DESC(force_apst, "allow APST for newly enumerated devices even if quirked off");
 
+static bool streams;
+module_param(streams, bool, 0644);
+MODULE_PARM_DESC(streams, "turn on support for Streams write directives");
+
 struct workqueue_struct *nvme_wq;
 EXPORT_SYMBOL_GPL(nvme_wq);
 
@@ -297,6 +301,105 @@ struct request *nvme_alloc_request(struct request_queue *q,
 }
 EXPORT_SYMBOL_GPL(nvme_alloc_request);
 
+static int nvme_toggle_streams(struct nvme_ctrl *ctrl, bool enable)
+{
+	struct nvme_command c;
+
+	memset(&c, 0, sizeof(c));
+
+	c.directive.opcode = nvme_admin_directive_send;
+	c.directive.nsid = cpu_to_le32(0xffffffff);
+	c.directive.doper = NVME_DIR_SND_ID_OP_ENABLE;
+	c.directive.dtype = NVME_DIR_IDENTIFY;
+	c.directive.tdtype = NVME_DIR_STREAMS;
+	c.directive.endir = enable ? NVME_DIR_ENDIR : 0;
+
+	return nvme_submit_sync_cmd(ctrl->admin_q, &c, NULL, 0);
+}
+
+static int nvme_disable_streams(struct nvme_ctrl *ctrl)
+{
+	return nvme_toggle_streams(ctrl, false);
+}
+
+static int nvme_enable_streams(struct nvme_ctrl *ctrl)
+{
+	return nvme_toggle_streams(ctrl, true);
+}
+
+static int nvme_get_stream_params(struct nvme_ctrl *ctrl,
+				  struct streams_directive_params *s, u32 nsid)
+{
+	struct nvme_command c;
+
+	memset(&c, 0, sizeof(c));
+	memset(s, 0, sizeof(*s));
+
+	c.directive.opcode = nvme_admin_directive_recv;
+	c.directive.nsid = cpu_to_le32(nsid);
+	c.directive.numd = sizeof(*s);
+	c.directive.doper = NVME_DIR_RCV_ST_OP_PARAM;
+	c.directive.dtype = NVME_DIR_STREAMS;
+
+	return nvme_submit_sync_cmd(ctrl->admin_q, &c, s, sizeof(*s));
+}
+
+static int nvme_configure_directives(struct nvme_ctrl *ctrl)
+{
+	struct streams_directive_params s;
+	int ret;
+
+	if (!(ctrl->oacs & NVME_CTRL_OACS_DIRECTIVES))
+		return 0;
+	if (!streams)
+		return 0;
+
+	ret = nvme_enable_streams(ctrl);
+	if (ret)
+		return ret;
+
+	ret = nvme_get_stream_params(ctrl, &s, 0xffffffff);
+	if (ret)
+		return ret;
+
+	ctrl->nssa = le16_to_cpu(s.nssa);
+	if (ctrl->nssa < BLK_MAX_WRITE_HINTS - 1) {
+		dev_info(ctrl->device, "too few streams (%u) available\n",
+					ctrl->nssa);
+		nvme_disable_streams(ctrl);
+		return 0;
+	}
+
+	ctrl->nr_streams = min_t(unsigned, ctrl->nssa, BLK_MAX_WRITE_HINTS - 1);
+	dev_info(ctrl->device, "Using %u streams\n", ctrl->nr_streams);
+	return 0;
+}
+
+/*
+ * Check if 'req' has a write hint associated with it. If it does, assign
+ * a valid namespace stream to the write.
+ */
+static void nvme_assign_write_stream(struct nvme_ctrl *ctrl,
+				     struct request *req, u16 *control,
+				     u32 *dsmgmt)
+{
+	enum rw_hint streamid = req->write_hint;
+
+	if (streamid == WRITE_LIFE_NOT_SET || streamid == WRITE_LIFE_NONE)
+		streamid = 0;
+	else {
+		streamid--;
+		if (WARN_ON_ONCE(streamid > ctrl->nr_streams))
+			return;
+
+		*control |= NVME_RW_DTYPE_STREAMS;
+		*dsmgmt |= streamid << 16;
+	}
+
+	if (streamid < ARRAY_SIZE(req->q->write_hints))
+		req->q->write_hints[streamid] += blk_rq_bytes(req) >> 9;
+}
+
 static inline void nvme_setup_flush(struct nvme_ns *ns,
 		struct nvme_command *cmnd)
 {
@@ -348,6 +451,7 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
 static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
 		struct request *req, struct nvme_command *cmnd)
 {
+	struct nvme_ctrl *ctrl = ns->ctrl;
 	u16 control = 0;
 	u32 dsmgmt = 0;
 
@@ -375,6 +479,9 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
 	cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
 	cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
 
+	if (req_op(req) == REQ_OP_WRITE && ctrl->nr_streams)
+		nvme_assign_write_stream(ctrl, req, &control, &dsmgmt);
+
 	if (ns->ms) {
 		switch (ns->pi_type) {
 		case NVME_NS_DPS_PI_TYPE3:
@@ -1094,8 +1201,15 @@ static void nvme_config_discard(struct nvme_ns *ns)
 	BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) <
 			NVME_DSM_MAX_RANGES);
 
-	ns->queue->limits.discard_alignment = logical_block_size;
-	ns->queue->limits.discard_granularity = logical_block_size;
+	if (ctrl->nr_streams && ns->sws && ns->sgs) {
+		unsigned int sz = logical_block_size * ns->sws * ns->sgs;
+
+		ns->queue->limits.discard_alignment = sz;
+		ns->queue->limits.discard_granularity = sz;
+	} else {
+		ns->queue->limits.discard_alignment = logical_block_size;
+		ns->queue->limits.discard_granularity = logical_block_size;
+	}
 	blk_queue_max_discard_sectors(ns->queue, UINT_MAX);
 	blk_queue_max_discard_segments(ns->queue, NVME_DSM_MAX_RANGES);
 	queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue);
@@ -1135,6 +1249,7 @@ static int nvme_revalidate_ns(struct nvme_ns *ns, struct nvme_id_ns **id)
 static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
 {
 	struct nvme_ns *ns = disk->private_data;
+	struct nvme_ctrl *ctrl = ns->ctrl;
 	u16 bs;
 
 	/*
@@ -1149,7 +1264,7 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
 
 	blk_mq_freeze_queue(disk->queue);
 
-	if (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)
+	if (ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)
 		nvme_prep_integrity(disk, id, bs);
 	blk_queue_logical_block_size(ns->queue, bs);
 	if (ns->noiob)
@@ -1161,7 +1276,7 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
 	else
 		set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
 
-	if (ns->ctrl->oncs & NVME_CTRL_ONCS_DSM)
+	if (ctrl->oncs & NVME_CTRL_ONCS_DSM)
 		nvme_config_discard(ns);
 	blk_mq_unfreeze_queue(disk->queue);
 }
@@ -1766,6 +1881,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
 		dev_pm_qos_hide_latency_tolerance(ctrl->device);
 
 	nvme_configure_apst(ctrl);
+	nvme_configure_directives(ctrl);
 
 	ctrl->identified = true;
 
@@ -2158,6 +2274,32 @@ static struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 	return ret;
 }
 
+static int nvme_setup_streams_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns)
+{
+	struct streams_directive_params s;
+	int ret;
+
+	if (!ctrl->nr_streams)
+		return 0;
+
+	ret = nvme_get_stream_params(ctrl, &s, ns->ns_id);
+	if (ret)
+		return ret;
+
+	ns->sws = le32_to_cpu(s.sws);
+	ns->sgs = le16_to_cpu(s.sgs);
+
+	if (ns->sws) {
+		unsigned int bs = 1 << ns->lba_shift;
+
+		blk_queue_io_min(ns->queue, bs * ns->sws);
+		if (ns->sgs)
+			blk_queue_io_opt(ns->queue, bs * ns->sws * ns->sgs);
+	}
+
+	return 0;
+}
+
 static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 {
 	struct nvme_ns *ns;
@@ -2187,6 +2329,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 
 	blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
 	nvme_set_queue_limits(ctrl, ns->queue);
+	nvme_setup_streams_ns(ctrl, ns);
 
 	sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->instance);
 
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index ec8c7363934d..f616835afc4c 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -147,6 +147,8 @@ struct nvme_ctrl {
 	u16 oncs;
 	u16 vid;
 	u16 oacs;
+	u16 nssa;
+	u16 nr_streams;
 	atomic_t abort_limit;
 	u8 event_limit;
 	u8 vwc;
@@ -199,6 +201,8 @@ struct nvme_ns {
 	unsigned ns_id;
 	int lba_shift;
 	u16 ms;
+	u16 sgs;
+	u32 sws;
 	bool ext;
 	u8 pi_type;
 	unsigned long flags;
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 291587a0743f..f516a975bb21 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -253,6 +253,7 @@ enum {
 	NVME_CTRL_ONCS_WRITE_ZEROES		= 1 << 3,
 	NVME_CTRL_VWC_PRESENT			= 1 << 0,
 	NVME_CTRL_OACS_SEC_SUPP                 = 1 << 0,
+	NVME_CTRL_OACS_DIRECTIVES		= 1 << 5,
 	NVME_CTRL_OACS_DBBUF_SUPP		= 1 << 7,
 };
 
@@ -303,6 +304,19 @@ enum {
 	NVME_ID_CNS_CTRL_LIST		= 0x13,
 };
 
+enum {
+	NVME_DIR_IDENTIFY		= 0x00,
+	NVME_DIR_STREAMS		= 0x01,
+	NVME_DIR_SND_ID_OP_ENABLE	= 0x01,
+	NVME_DIR_SND_ST_OP_REL_ID	= 0x01,
+	NVME_DIR_SND_ST_OP_REL_RSC	= 0x02,
+	NVME_DIR_RCV_ID_OP_PARAM	= 0x01,
+	NVME_DIR_RCV_ST_OP_PARAM	= 0x01,
+	NVME_DIR_RCV_ST_OP_STATUS	= 0x02,
+	NVME_DIR_RCV_ST_OP_RESOURCE	= 0x03,
+	NVME_DIR_ENDIR			= 0x01,
+};
+
 enum {
 	NVME_NS_FEAT_THIN	= 1 << 0,
 	NVME_NS_FLBAS_LBA_MASK	= 0xf,
@@ -560,6 +574,7 @@ enum {
 	NVME_RW_PRINFO_PRCHK_APP	= 1 << 11,
 	NVME_RW_PRINFO_PRCHK_GUARD	= 1 << 12,
 	NVME_RW_PRINFO_PRACT		= 1 << 13,
+	NVME_RW_DTYPE_STREAMS		= 1 << 4,
 };
 
 struct nvme_dsm_cmd {
@@ -634,6 +649,8 @@ enum nvme_admin_opcode {
 	nvme_admin_download_fw		= 0x11,
 	nvme_admin_ns_attach		= 0x15,
 	nvme_admin_keep_alive		= 0x18,
+	nvme_admin_directive_send	= 0x19,
+	nvme_admin_directive_recv	= 0x1a,
 	nvme_admin_dbbuf		= 0x7C,
 	nvme_admin_format_nvm		= 0x80,
 	nvme_admin_security_send	= 0x81,
@@ -797,6 +814,24 @@ struct nvme_get_log_page_command {
 	__u32			rsvd14[2];
 };
 
+struct nvme_directive_cmd {
+	__u8			opcode;
+	__u8			flags;
+	__u16			command_id;
+	__le32			nsid;
+	__u64			rsvd2[2];
+	union nvme_data_ptr	dptr;
+	__le32			numd;
+	__u8			doper;
+	__u8			dtype;
+	__le16			dspec;
+	__u8			endir;
+	__u8			tdtype;
+	__u16			rsvd15;
+
+	__u32			rsvd16[3];
+};
+
 /*
  * Fabrics subcommands.
  */
@@ -927,6 +962,18 @@ struct nvme_dbbuf {
 	__u32			rsvd12[6];
 };
 
+struct streams_directive_params {
+	__u16	msl;
+	__u16	nssa;
+	__u16	nsso;
+	__u8	rsvd[10];
+	__u32	sws;
+	__u16	sgs;
+	__u16	nsa;
+	__u16	nso;
+	__u8	rsvd2[6];
+};
+
 struct nvme_command {
 	union {
 		struct nvme_common_command common;
@@ -947,6 +994,7 @@ struct nvme_command {
 		struct nvmf_property_set_command prop_set;
 		struct nvmf_property_get_command prop_get;
 		struct nvme_dbbuf dbbuf;
+		struct nvme_directive_cmd directive;
 	};
 };
 
-- 
cgit v1.2.3


From 3bce016a4c5975e4279bfb3cbd6d0332b856cc72 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 19 Jun 2017 09:26:21 +0200
Subject: block: move bounce declarations to block/blk.h

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk.h            | 13 +++++++++++++
 block/bounce.c         |  1 +
 include/linux/blkdev.h | 13 -------------
 3 files changed, 14 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk.h b/block/blk.h
index 798691a5e5e9..01ebb8185f6b 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -336,4 +336,17 @@ static inline void blk_throtl_bio_endio(struct bio *bio) { }
 static inline void blk_throtl_stat_add(struct request *rq, u64 time) { }
 #endif
 
+#ifdef CONFIG_BOUNCE
+extern int init_emergency_isa_pool(void);
+extern void blk_queue_bounce(struct request_queue *q, struct bio **bio);
+#else
+static inline int init_emergency_isa_pool(void)
+{
+	return 0;
+}
+static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio)
+{
+}
+#endif /* CONFIG_BOUNCE */
+
 #endif /* BLK_INTERNAL_H */
diff --git a/block/bounce.c b/block/bounce.c
index 27c5cc0f1ed5..36ba44491703 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -22,6 +22,7 @@
 #include <asm/tlbflush.h>
 
 #include <trace/events/block.h>
+#include "blk.h"
 
 #define POOL_SIZE	64
 #define ISA_POOL_SIZE	16
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index e1e289ab66b9..e7eef48c97c9 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -884,19 +884,6 @@ extern unsigned long blk_max_low_pfn, blk_max_pfn;
 #define BLK_DEFAULT_SG_TIMEOUT	(60 * HZ)
 #define BLK_MIN_SG_TIMEOUT	(7 * HZ)
 
-#ifdef CONFIG_BOUNCE
-extern int init_emergency_isa_pool(void);
-extern void blk_queue_bounce(struct request_queue *q, struct bio **bio);
-#else
-static inline int init_emergency_isa_pool(void)
-{
-	return 0;
-}
-static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio)
-{
-}
-#endif /* CONFIG_MMU */
-
 struct rq_map_data {
 	struct page **pages;
 	int page_order;
-- 
cgit v1.2.3


From 1c4bc3ab9a064d98cdf6de6b44f89d5c3757fa32 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 19 Jun 2017 09:26:22 +0200
Subject: block: remove the queue_bounce_pfn helper

Only used inside the bounce code, and opencoding it makes it more obvious
what is going on.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bounce.c         | 6 +++---
 include/linux/blkdev.h | 5 -----
 2 files changed, 3 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/block/bounce.c b/block/bounce.c
index 36ba44491703..5793c2dc1a15 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -203,7 +203,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
 	bio_for_each_segment(from, *bio_orig, iter) {
 		if (i++ < BIO_MAX_PAGES)
 			sectors += from.bv_len >> 9;
-		if (page_to_pfn(from.bv_page) > queue_bounce_pfn(q))
+		if (page_to_pfn(from.bv_page) > q->limits.bounce_pfn)
 			bounce = true;
 	}
 	if (!bounce)
@@ -220,7 +220,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
 	bio_for_each_segment_all(to, bio, i) {
 		struct page *page = to->bv_page;
 
-		if (page_to_pfn(page) <= queue_bounce_pfn(q))
+		if (page_to_pfn(page) <= q->limits.bounce_pfn)
 			continue;
 
 		to->bv_page = mempool_alloc(pool, q->bounce_gfp);
@@ -272,7 +272,7 @@ void blk_queue_bounce(struct request_queue *q, struct bio **bio_orig)
 	 * don't waste time iterating over bio segments
 	 */
 	if (!(q->bounce_gfp & GFP_DMA)) {
-		if (queue_bounce_pfn(q) >= blk_max_pfn)
+		if (q->limits.bounce_pfn >= blk_max_pfn)
 			return;
 		pool = page_pool;
 	} else {
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index e7eef48c97c9..25f6a0cb27d3 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1385,11 +1385,6 @@ enum blk_default_limits {
 
 #define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist)
 
-static inline unsigned long queue_bounce_pfn(struct request_queue *q)
-{
-	return q->limits.bounce_pfn;
-}
-
 static inline unsigned long queue_segment_boundary(struct request_queue *q)
 {
 	return q->limits.seg_boundary_mask;
-- 
cgit v1.2.3


From f2b612578e163b49661ece2fe01dfafb0e78f545 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 29 May 2017 23:00:34 -0700
Subject: x86, libnvdimm, pmem: move arch_invalidate_pmem() to libnvdimm

Kill this globally defined wrapper and move to libnvdimm so that we can
ultimately remove include/linux/pmem.h and asm/pmem.h.

Cc: <x86@kernel.org>
Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 arch/x86/include/asm/pmem.h |  5 -----
 arch/x86/mm/pageattr.c      |  6 ++++++
 drivers/nvdimm/claim.c      |  3 ++-
 drivers/nvdimm/pmem.c       |  2 +-
 drivers/nvdimm/pmem.h       |  4 ++++
 include/linux/pmem.h        | 19 -------------------
 6 files changed, 13 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h
index 4759a179aa52..b61a25a895a7 100644
--- a/arch/x86/include/asm/pmem.h
+++ b/arch/x86/include/asm/pmem.h
@@ -43,10 +43,5 @@ static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n)
 				__func__, dst, src, rem))
 		BUG();
 }
-
-static inline void arch_invalidate_pmem(void *addr, size_t size)
-{
-	clflush_cache_range(addr, size);
-}
 #endif /* CONFIG_ARCH_HAS_PMEM_API */
 #endif /* __ASM_X86_PMEM_H__ */
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index c8520b2c62d2..757b0bcdf712 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -150,6 +150,12 @@ void clflush_cache_range(void *vaddr, unsigned int size)
 }
 EXPORT_SYMBOL_GPL(clflush_cache_range);
 
+void arch_invalidate_pmem(void *addr, size_t size)
+{
+	clflush_cache_range(addr, size);
+}
+EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
+
 static void __cpa_flush_all(void *arg)
 {
 	unsigned long cache = (unsigned long)arg;
diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c
index b8b9c8ca7862..d2e16c0401df 100644
--- a/drivers/nvdimm/claim.c
+++ b/drivers/nvdimm/claim.c
@@ -14,6 +14,7 @@
 #include <linux/sizes.h>
 #include <linux/pmem.h>
 #include "nd-core.h"
+#include "pmem.h"
 #include "pfn.h"
 #include "btt.h"
 #include "nd.h"
@@ -272,7 +273,7 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
 				cleared /= 512;
 				badblocks_clear(&nsio->bb, sector, cleared);
 			}
-			invalidate_pmem(nsio->addr + offset, size);
+			arch_invalidate_pmem(nsio->addr + offset, size);
 		} else
 			rc = -EIO;
 	}
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 3b87702d46bb..68737bc68a07 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -71,7 +71,7 @@ static int pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset,
 		badblocks_clear(&pmem->bb, sector, cleared);
 	}
 
-	invalidate_pmem(pmem->virt_addr + offset, len);
+	arch_invalidate_pmem(pmem->virt_addr + offset, len);
 
 	return rc;
 }
diff --git a/drivers/nvdimm/pmem.h b/drivers/nvdimm/pmem.h
index c4b3371c7f88..00005900c1b7 100644
--- a/drivers/nvdimm/pmem.h
+++ b/drivers/nvdimm/pmem.h
@@ -7,10 +7,14 @@
 
 #ifdef CONFIG_ARCH_HAS_PMEM_API
 void arch_wb_cache_pmem(void *addr, size_t size);
+void arch_invalidate_pmem(void *addr, size_t size);
 #else
 static inline void arch_wb_cache_pmem(void *addr, size_t size)
 {
 }
+static inline void arch_invalidate_pmem(void *addr, size_t size)
+{
+}
 #endif
 
 /* this definition is in it's own header for tools/testing/nvdimm to consume */
diff --git a/include/linux/pmem.h b/include/linux/pmem.h
index 33ae761f010a..559c00848583 100644
--- a/include/linux/pmem.h
+++ b/include/linux/pmem.h
@@ -30,11 +30,6 @@ static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n)
 {
 	BUG();
 }
-
-static inline void arch_invalidate_pmem(void *addr, size_t size)
-{
-	BUG();
-}
 #endif
 
 static inline bool arch_has_pmem_api(void)
@@ -61,18 +56,4 @@ static inline void memcpy_to_pmem(void *dst, const void *src, size_t n)
 	else
 		memcpy(dst, src, n);
 }
-
-/**
- * invalidate_pmem - flush a pmem range from the cache hierarchy
- * @addr:	virtual start address
- * @size:	bytes to invalidate (internally aligned to cache line size)
- *
- * For platforms that support clearing poison this flushes any poisoned
- * ranges out of the cache
- */
-static inline void invalidate_pmem(void *addr, size_t size)
-{
-	if (arch_has_pmem_api())
-		arch_invalidate_pmem(addr, size);
-}
 #endif /* __PMEM_H__ */
-- 
cgit v1.2.3


From ca6a4657e5420dec727256717e905ebc3c751352 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Fri, 13 Jan 2017 20:36:58 -0800
Subject: x86, libnvdimm, pmem: remove global pmem api

Now that all callers of the pmem api have been converted to dax helpers that
call back to the pmem driver, we can remove include/linux/pmem.h and
asm/pmem.h.

Cc: <x86@kernel.org>
Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Toshi Kani <toshi.kani@hpe.com>
Cc: Oliver O'Halloran <oohall@gmail.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 MAINTAINERS                     |  4 +--
 arch/x86/include/asm/pmem.h     | 47 --------------------------------
 drivers/acpi/nfit/core.c        |  3 +--
 drivers/nvdimm/claim.c          |  1 -
 drivers/nvdimm/dimm_devs.c      |  8 ++++++
 drivers/nvdimm/namespace_devs.c |  6 +----
 drivers/nvdimm/pmem.c           |  1 -
 drivers/nvdimm/pmem.h           |  2 ++
 drivers/nvdimm/region_devs.c    |  1 -
 fs/dax.c                        |  1 -
 include/linux/libnvdimm.h       |  1 +
 include/linux/pmem.h            | 59 -----------------------------------------
 12 files changed, 14 insertions(+), 120 deletions(-)
 delete mode 100644 arch/x86/include/asm/pmem.h
 delete mode 100644 include/linux/pmem.h

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index 7a28acd7f525..1636ce420251 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7592,9 +7592,7 @@ M:	Ross Zwisler <ross.zwisler@linux.intel.com>
 L:	linux-nvdimm@lists.01.org
 Q:	https://patchwork.kernel.org/project/linux-nvdimm/list/
 S:	Supported
-F:	drivers/nvdimm/pmem.c
-F:	include/linux/pmem.h
-F:	arch/*/include/asm/pmem.h
+F:	drivers/nvdimm/pmem*
 
 LIGHTNVM PLATFORM SUPPORT
 M:	Matias Bjorling <mb@lightnvm.io>
diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h
deleted file mode 100644
index b61a25a895a7..000000000000
--- a/arch/x86/include/asm/pmem.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright(c) 2015 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- */
-#ifndef __ASM_X86_PMEM_H__
-#define __ASM_X86_PMEM_H__
-
-#include <linux/uaccess.h>
-#include <asm/cacheflush.h>
-#include <asm/cpufeature.h>
-#include <asm/special_insns.h>
-
-#ifdef CONFIG_ARCH_HAS_PMEM_API
-/**
- * arch_memcpy_to_pmem - copy data to persistent memory
- * @dst: destination buffer for the copy
- * @src: source buffer for the copy
- * @n: length of the copy in bytes
- *
- * Copy data to persistent memory media via non-temporal stores so that
- * a subsequent pmem driver flush operation will drain posted write queues.
- */
-static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n)
-{
-	int rem;
-
-	/*
-	 * We are copying between two kernel buffers, if
-	 * __copy_from_user_inatomic_nocache() returns an error (page
-	 * fault) we would have already reported a general protection fault
-	 * before the WARN+BUG.
-	 */
-	rem = __copy_from_user_inatomic_nocache(dst, (void __user *) src, n);
-	if (WARN(rem, "%s: fault copying %p <- %p unwritten: %d\n",
-				__func__, dst, src, rem))
-		BUG();
-}
-#endif /* CONFIG_ARCH_HAS_PMEM_API */
-#endif /* __ASM_X86_PMEM_H__ */
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index cbd5596e7562..ac2436538b7e 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -20,7 +20,6 @@
 #include <linux/list.h>
 #include <linux/acpi.h>
 #include <linux/sort.h>
-#include <linux/pmem.h>
 #include <linux/io.h>
 #include <linux/nd.h>
 #include <asm/cacheflush.h>
@@ -1956,7 +1955,7 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
 	nfit_blk->bdw_offset = nfit_mem->bdw->offset;
 	mmio = &nfit_blk->mmio[BDW];
 	mmio->addr.base = devm_nvdimm_memremap(dev, nfit_mem->spa_bdw->address,
-                        nfit_mem->spa_bdw->length, ARCH_MEMREMAP_PMEM);
+                        nfit_mem->spa_bdw->length, nd_blk_memremap_flags(ndbr));
 	if (!mmio->addr.base) {
 		dev_dbg(dev, "%s: %s failed to map bdw\n", __func__,
 				nvdimm_name(nvdimm));
diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c
index d2e16c0401df..3beedf173902 100644
--- a/drivers/nvdimm/claim.c
+++ b/drivers/nvdimm/claim.c
@@ -12,7 +12,6 @@
  */
 #include <linux/device.h>
 #include <linux/sizes.h>
-#include <linux/pmem.h>
 #include "nd-core.h"
 #include "pmem.h"
 #include "pfn.h"
diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c
index 9852a3355509..6a1e7a3c0c17 100644
--- a/drivers/nvdimm/dimm_devs.c
+++ b/drivers/nvdimm/dimm_devs.c
@@ -20,6 +20,7 @@
 #include <linux/mm.h>
 #include "nd-core.h"
 #include "label.h"
+#include "pmem.h"
 #include "nd.h"
 
 static DEFINE_IDA(dimm_ida);
@@ -235,6 +236,13 @@ struct nvdimm *nd_blk_region_to_dimm(struct nd_blk_region *ndbr)
 }
 EXPORT_SYMBOL_GPL(nd_blk_region_to_dimm);
 
+unsigned long nd_blk_memremap_flags(struct nd_blk_region *ndbr)
+{
+	/* pmem mapping properties are private to libnvdimm */
+	return ARCH_MEMREMAP_PMEM;
+}
+EXPORT_SYMBOL_GPL(nd_blk_memremap_flags);
+
 struct nvdimm_drvdata *to_ndd(struct nd_mapping *nd_mapping)
 {
 	struct nvdimm *nvdimm = nd_mapping->nvdimm;
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index 2f9dfbd2dbec..4e9261ef8a95 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -14,10 +14,10 @@
 #include <linux/device.h>
 #include <linux/sort.h>
 #include <linux/slab.h>
-#include <linux/pmem.h>
 #include <linux/list.h>
 #include <linux/nd.h>
 #include "nd-core.h"
+#include "pmem.h"
 #include "nd.h"
 
 static void namespace_io_release(struct device *dev)
@@ -155,11 +155,7 @@ bool pmem_should_map_pages(struct device *dev)
 				IORES_DESC_NONE) == REGION_MIXED)
 		return false;
 
-#ifdef ARCH_MEMREMAP_PMEM
 	return ARCH_MEMREMAP_PMEM == MEMREMAP_WB;
-#else
-	return false;
-#endif
 }
 EXPORT_SYMBOL(pmem_should_map_pages);
 
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 68737bc68a07..06f6c27ec1e9 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -28,7 +28,6 @@
 #include <linux/blk-mq.h>
 #include <linux/pfn_t.h>
 #include <linux/slab.h>
-#include <linux/pmem.h>
 #include <linux/uio.h>
 #include <linux/dax.h>
 #include <linux/nd.h>
diff --git a/drivers/nvdimm/pmem.h b/drivers/nvdimm/pmem.h
index 00005900c1b7..fce248a1fc87 100644
--- a/drivers/nvdimm/pmem.h
+++ b/drivers/nvdimm/pmem.h
@@ -6,9 +6,11 @@
 #include <linux/fs.h>
 
 #ifdef CONFIG_ARCH_HAS_PMEM_API
+#define ARCH_MEMREMAP_PMEM MEMREMAP_WB
 void arch_wb_cache_pmem(void *addr, size_t size);
 void arch_invalidate_pmem(void *addr, size_t size);
 #else
+#define ARCH_MEMREMAP_PMEM MEMREMAP_WT
 static inline void arch_wb_cache_pmem(void *addr, size_t size)
 {
 }
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index 985b0e11bd73..3c06a6ea6958 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -15,7 +15,6 @@
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/hash.h>
-#include <linux/pmem.h>
 #include <linux/sort.h>
 #include <linux/io.h>
 #include <linux/nd.h>
diff --git a/fs/dax.c b/fs/dax.c
index 554b8e7d921c..6d8699feae2e 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -25,7 +25,6 @@
 #include <linux/mm.h>
 #include <linux/mutex.h>
 #include <linux/pagevec.h>
-#include <linux/pmem.h>
 #include <linux/sched.h>
 #include <linux/sched/signal.h>
 #include <linux/uio.h>
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index 6c807017128d..b2f659bd661d 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -159,6 +159,7 @@ void *nd_region_provider_data(struct nd_region *nd_region);
 void *nd_blk_region_provider_data(struct nd_blk_region *ndbr);
 void nd_blk_region_set_provider_data(struct nd_blk_region *ndbr, void *data);
 struct nvdimm *nd_blk_region_to_dimm(struct nd_blk_region *ndbr);
+unsigned long nd_blk_memremap_flags(struct nd_blk_region *ndbr);
 unsigned int nd_region_acquire_lane(struct nd_region *nd_region);
 void nd_region_release_lane(struct nd_region *nd_region, unsigned int lane);
 u64 nd_fletcher64(void *addr, size_t len, bool le);
diff --git a/include/linux/pmem.h b/include/linux/pmem.h
deleted file mode 100644
index 559c00848583..000000000000
--- a/include/linux/pmem.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright(c) 2015 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- */
-#ifndef __PMEM_H__
-#define __PMEM_H__
-
-#include <linux/io.h>
-#include <linux/uio.h>
-
-#ifdef CONFIG_ARCH_HAS_PMEM_API
-#define ARCH_MEMREMAP_PMEM MEMREMAP_WB
-#include <asm/pmem.h>
-#else
-#define ARCH_MEMREMAP_PMEM MEMREMAP_WT
-/*
- * These are simply here to enable compilation, all call sites gate
- * calling these symbols with arch_has_pmem_api() and redirect to the
- * implementation in asm/pmem.h.
- */
-static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n)
-{
-	BUG();
-}
-#endif
-
-static inline bool arch_has_pmem_api(void)
-{
-	return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API);
-}
-
-/**
- * memcpy_to_pmem - copy data to persistent memory
- * @dst: destination buffer for the copy
- * @src: source buffer for the copy
- * @n: length of the copy in bytes
- *
- * Perform a memory copy that results in the destination of the copy
- * being effectively evicted from, or never written to, the processor
- * cache hierarchy after the copy completes.  After memcpy_to_pmem()
- * data may still reside in cpu or platform buffers, so this operation
- * must be followed by a blkdev_issue_flush() on the pmem block device.
- */
-static inline void memcpy_to_pmem(void *dst, const void *src, size_t n)
-{
-	if (arch_has_pmem_api())
-		arch_memcpy_to_pmem(dst, src, n);
-	else
-		memcpy(dst, src, n);
-}
-#endif /* __PMEM_H__ */
-- 
cgit v1.2.3


From 5d61e43b3975c0582003329d9de9d5e85abf5d33 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 27 Jun 2017 13:06:22 -0700
Subject: dax: remove default copy_from_iter fallback

Require all dax-drivers to register a ->copy_from_iter() operation so
that it is clear which dax_operations are optional and which must be
implemented for filesystem-dax to operate.

Cc: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Suggested-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 arch/powerpc/sysdev/axonram.c | 8 ++++++++
 drivers/block/brd.c           | 8 ++++++++
 drivers/dax/super.c           | 2 --
 drivers/s390/block/dcssblk.c  | 8 ++++++++
 include/linux/dax.h           | 2 +-
 5 files changed, 25 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
index a7fe5fee744f..2799706106c6 100644
--- a/arch/powerpc/sysdev/axonram.c
+++ b/arch/powerpc/sysdev/axonram.c
@@ -45,6 +45,7 @@
 #include <linux/of_device.h>
 #include <linux/of_platform.h>
 #include <linux/pfn_t.h>
+#include <linux/uio.h>
 
 #include <asm/page.h>
 #include <asm/prom.h>
@@ -163,8 +164,15 @@ axon_ram_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pa
 	return __axon_ram_direct_access(bank, pgoff, nr_pages, kaddr, pfn);
 }
 
+static size_t axon_ram_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
+		void *addr, size_t bytes, struct iov_iter *i)
+{
+	return copy_from_iter(addr, bytes, i);
+}
+
 static const struct dax_operations axon_ram_dax_ops = {
 	.direct_access = axon_ram_dax_direct_access,
+	.copy_from_iter = axon_ram_copy_from_iter,
 };
 
 /**
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 57b574f2f66a..f2a7ac350f6a 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -22,6 +22,7 @@
 #ifdef CONFIG_BLK_DEV_RAM_DAX
 #include <linux/pfn_t.h>
 #include <linux/dax.h>
+#include <linux/uio.h>
 #endif
 
 #include <linux/uaccess.h>
@@ -354,8 +355,15 @@ static long brd_dax_direct_access(struct dax_device *dax_dev,
 	return __brd_direct_access(brd, pgoff, nr_pages, kaddr, pfn);
 }
 
+static size_t brd_dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
+		void *addr, size_t bytes, struct iov_iter *i)
+{
+	return copy_from_iter(addr, bytes, i);
+}
+
 static const struct dax_operations brd_dax_ops = {
 	.direct_access = brd_dax_direct_access,
+	.copy_from_iter = brd_dax_copy_from_iter,
 };
 #endif
 
diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index b7729e4d351a..9e0160b950d7 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -179,8 +179,6 @@ size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
 	if (!dax_alive(dax_dev))
 		return 0;
 
-	if (!dax_dev->ops->copy_from_iter)
-		return copy_from_iter(addr, bytes, i);
 	return dax_dev->ops->copy_from_iter(dax_dev, pgoff, addr, bytes, i);
 }
 EXPORT_SYMBOL_GPL(dax_copy_from_iter);
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index 36e5280af3e4..88fa7b3f7a9d 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -18,6 +18,7 @@
 #include <linux/interrupt.h>
 #include <linux/platform_device.h>
 #include <linux/pfn_t.h>
+#include <linux/uio.h>
 #include <linux/dax.h>
 #include <asm/extmem.h>
 #include <asm/io.h>
@@ -43,8 +44,15 @@ static const struct block_device_operations dcssblk_devops = {
 	.release 	= dcssblk_release,
 };
 
+static size_t dcssblk_dax_copy_from_iter(struct dax_device *dax_dev,
+		pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i)
+{
+	return copy_from_iter(addr, bytes, i);
+}
+
 static const struct dax_operations dcssblk_dax_ops = {
 	.direct_access = dcssblk_dax_direct_access,
+	.copy_from_iter = dcssblk_dax_copy_from_iter,
 };
 
 struct dcssblk_dev_info {
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 1f6b6072af64..73fca1bebaf3 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -16,7 +16,7 @@ struct dax_operations {
 	 */
 	long (*direct_access)(struct dax_device *, pgoff_t, long,
 			void **, pfn_t *);
-	/* copy_from_iter: dax-driver override for default copy_from_iter */
+	/* copy_from_iter: required operation for fs-dax direct-i/o */
 	size_t (*copy_from_iter)(struct dax_device *, pgoff_t, void *, size_t,
 			struct iov_iter *);
 	/* flush: optional driver-specific cache management after writes */
-- 
cgit v1.2.3


From 8370c2dc4c7b91be7e1231130f0ae08b5aebecf4 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Sat, 24 Jun 2017 01:56:13 +0200
Subject: PCI / PM: Drop pme_interrupt flag from struct pci_dev

The pme_interrupt flag in struct pci_dev is set when PMEs generated
by the device are going to be signaled via root port PME interrupts.

Ironically enough, that information is only used by the code setting
up device wakeup through ACPI which returns as soon as it sees the
pme_interrupt flag set while setting up "remote runtime wakeup".
That is questionable, however, because in theory there may be PCIe
devices using out-of-band PME signaling under root ports handled
by the native PME code or devices requiring wakeup power setup to be
carried out by AML.  For such devices, ACPI wakeup should be invoked
regardless of whether or not native PME signaling is used in general.

For this reason, drop the pme_interrupt flag and rework the code
using it which then allows the ACPI-based device wakeup handling
in PCI to be consolidated to use one code path for both "runtime
remote wakeup" and system wakeup (from sleep states).

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/acpi/device_pm.c | 10 +++++--
 drivers/pci/pci-acpi.c   | 68 +++++++++++-------------------------------------
 drivers/pci/pcie/pme.c   | 14 +++++-----
 include/acpi/acpi_bus.h  |  5 ++++
 include/linux/pci.h      |  1 -
 5 files changed, 34 insertions(+), 64 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c
index d2e985a4bac2..28938b5a334e 100644
--- a/drivers/acpi/device_pm.c
+++ b/drivers/acpi/device_pm.c
@@ -496,6 +496,13 @@ bool acpi_bus_can_wakeup(acpi_handle handle)
 }
 EXPORT_SYMBOL(acpi_bus_can_wakeup);
 
+bool acpi_pm_device_can_wakeup(struct device *dev)
+{
+	struct acpi_device *adev = ACPI_COMPANION(dev);
+
+	return adev ? acpi_device_can_wakeup(adev) : false;
+}
+
 /**
  * acpi_dev_pm_get_state - Get preferred power state of ACPI device.
  * @dev: Device whose preferred target power state to return.
@@ -737,8 +744,7 @@ int acpi_pm_set_device_wakeup(struct device *dev, bool enable)
 
 	error = acpi_device_wakeup(adev, acpi_target_system_state(), enable);
 	if (!error)
-		dev_dbg(dev, "Wakeup %s by ACPI\n",
-			enable ? "enabled" : "disabled");
+		dev_dbg(dev, "Wakeup %s by ACPI\n", enable ? "enabled" : "disabled");
 
 	return error;
 }
diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index eb014b8ab01a..de255bc9736b 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -569,67 +569,29 @@ static pci_power_t acpi_pci_get_power_state(struct pci_dev *dev)
 	return state_conv[state];
 }
 
-static bool acpi_pci_can_wakeup(struct pci_dev *dev)
-{
-	struct acpi_device *adev = ACPI_COMPANION(&dev->dev);
-	return adev ? acpi_device_can_wakeup(adev) : false;
-}
-
-static void acpi_pci_propagate_wakeup_enable(struct pci_bus *bus, bool enable)
+static int acpi_pci_propagate_wakeup(struct pci_bus *bus, bool enable)
 {
 	while (bus->parent) {
-		if (!acpi_pm_set_device_wakeup(&bus->self->dev, enable))
-			return;
-		bus = bus->parent;
-	}
-
-	/* We have reached the root bus. */
-	if (bus->bridge)
-		acpi_pm_set_device_wakeup(bus->bridge, enable);
-}
+		if (acpi_pm_device_can_wakeup(&bus->self->dev))
+			return acpi_pm_set_device_wakeup(&bus->self->dev, enable);
 
-static int acpi_pci_sleep_wake(struct pci_dev *dev, bool enable)
-{
-	if (acpi_pci_can_wakeup(dev))
-		return acpi_pm_set_device_wakeup(&dev->dev, enable);
-
-	acpi_pci_propagate_wakeup_enable(dev->bus, enable);
-	return 0;
-}
-
-static void acpi_pci_propagate_run_wake(struct pci_bus *bus, bool enable)
-{
-	while (bus->parent) {
-		struct pci_dev *bridge = bus->self;
-
-		if (bridge->pme_interrupt)
-			return;
-		if (!acpi_pm_set_device_wakeup(&bridge->dev, enable))
-			return;
 		bus = bus->parent;
 	}
 
 	/* We have reached the root bus. */
-	if (bus->bridge)
-		acpi_pm_set_device_wakeup(bus->bridge, enable);
+	if (bus->bridge) {
+		if (acpi_pm_device_can_wakeup(bus->bridge))
+			return acpi_pm_set_device_wakeup(bus->bridge, enable);
+	}
+	return 0;
 }
 
-static int acpi_pci_run_wake(struct pci_dev *dev, bool enable)
+static int acpi_pci_wakeup(struct pci_dev *dev, bool enable)
 {
-	/*
-	 * Per PCI Express Base Specification Revision 2.0 section
-	 * 5.3.3.2 Link Wakeup, platform support is needed for D3cold
-	 * waking up to power on the main link even if there is PME
-	 * support for D3cold
-	 */
-	if (dev->pme_interrupt && !dev->runtime_d3cold)
-		return 0;
-
-	if (!acpi_pm_set_device_wakeup(&dev->dev, enable))
-		return 0;
+	if (acpi_pm_device_can_wakeup(&dev->dev))
+		return acpi_pm_set_device_wakeup(&dev->dev, enable);
 
-	acpi_pci_propagate_run_wake(dev->bus, enable);
-	return 0;
+	return acpi_pci_propagate_wakeup(dev->bus, enable);
 }
 
 static bool acpi_pci_need_resume(struct pci_dev *dev)
@@ -653,8 +615,8 @@ static const struct pci_platform_pm_ops acpi_pci_platform_pm = {
 	.set_state = acpi_pci_set_power_state,
 	.get_state = acpi_pci_get_power_state,
 	.choose_state = acpi_pci_choose_state,
-	.sleep_wake = acpi_pci_sleep_wake,
-	.run_wake = acpi_pci_run_wake,
+	.sleep_wake = acpi_pci_wakeup,
+	.run_wake = acpi_pci_wakeup,
 	.need_resume = acpi_pci_need_resume,
 };
 
@@ -778,7 +740,7 @@ static void pci_acpi_setup(struct device *dev)
 
 	device_set_wakeup_capable(dev, true);
 	device_set_run_wake(dev, true);
-	acpi_pci_sleep_wake(pci_dev, false);
+	acpi_pci_wakeup(pci_dev, false);
 }
 
 static void pci_acpi_cleanup(struct device *dev)
diff --git a/drivers/pci/pcie/pme.c b/drivers/pci/pcie/pme.c
index 2dd1c68e6de8..1db1615838ac 100644
--- a/drivers/pci/pcie/pme.c
+++ b/drivers/pci/pcie/pme.c
@@ -294,31 +294,29 @@ static irqreturn_t pcie_pme_irq(int irq, void *context)
 }
 
 /**
- * pcie_pme_set_native - Set the PME interrupt flag for given device.
+ * pcie_pme_can_wakeup - Set the wakeup capability flag.
  * @dev: PCI device to handle.
  * @ign: Ignored.
  */
-static int pcie_pme_set_native(struct pci_dev *dev, void *ign)
+static int pcie_pme_can_wakeup(struct pci_dev *dev, void *ign)
 {
 	device_set_run_wake(&dev->dev, true);
-	dev->pme_interrupt = true;
 	return 0;
 }
 
 /**
- * pcie_pme_mark_devices - Set the PME interrupt flag for devices below a port.
+ * pcie_pme_mark_devices - Set the wakeup flag for devices below a port.
  * @port: PCIe root port or event collector to handle.
  *
  * For each device below given root port, including the port itself (or for each
  * root complex integrated endpoint if @port is a root complex event collector)
- * set the flag indicating that it can signal run-time wake-up events via PCIe
- * PME interrupts.
+ * set the flag indicating that it can signal run-time wake-up events.
  */
 static void pcie_pme_mark_devices(struct pci_dev *port)
 {
-	pcie_pme_set_native(port, NULL);
+	pcie_pme_can_wakeup(port, NULL);
 	if (port->subordinate)
-		pci_walk_bus(port->subordinate, pcie_pme_set_native, NULL);
+		pci_walk_bus(port->subordinate, pcie_pme_can_wakeup, NULL);
 }
 
 /**
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 6bf0f843f7d7..be6b6bb4ef9c 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -602,6 +602,7 @@ void acpi_pm_wakeup_event(struct device *dev);
 acpi_status acpi_add_pm_notifier(struct acpi_device *adev, struct device *dev,
 			void (*func)(struct acpi_device_wakeup_context *context));
 acpi_status acpi_remove_pm_notifier(struct acpi_device *adev);
+bool acpi_pm_device_can_wakeup(struct device *dev);
 int acpi_pm_device_sleep_state(struct device *, int *, int);
 int acpi_pm_set_device_wakeup(struct device *dev, bool enable);
 #else
@@ -618,6 +619,10 @@ static inline acpi_status acpi_remove_pm_notifier(struct acpi_device *adev)
 {
 	return AE_SUPPORT;
 }
+static inline bool acpi_pm_device_can_wakeup(struct device *dev)
+{
+	return false;
+}
 static inline int acpi_pm_device_sleep_state(struct device *d, int *p, int m)
 {
 	if (p)
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 8039f9f0ca05..d3d5bca82b43 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -307,7 +307,6 @@ struct pci_dev {
 	u8		pm_cap;		/* PM capability offset */
 	unsigned int	pme_support:5;	/* Bitmask of states from which PME#
 					   can be generated */
-	unsigned int	pme_interrupt:1;
 	unsigned int	pme_poll:1;	/* Poll device's PME status bit */
 	unsigned int	d1_support:1;	/* Low power state D1 is supported */
 	unsigned int	d2_support:1;	/* Low power state D2 is supported */
-- 
cgit v1.2.3


From 0847684cfc5f0e9f009919bfdcb041d60e19b856 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Sat, 24 Jun 2017 01:57:35 +0200
Subject: PCI / PM: Simplify device wakeup settings code

After previous changes it is not necessary to distinguish between
device wakeup for run time and device wakeup from system sleep states
any more, so rework the PCI device wakeup settings code accordingly.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/pci-acpi.c   |  3 +--
 drivers/pci/pci-driver.c |  2 +-
 drivers/pci/pci-mid.c    | 10 ++--------
 drivers/pci/pci.c        | 36 ++++++++++--------------------------
 drivers/pci/pci.h        |  9 ++-------
 include/linux/pci.h      |  9 +--------
 6 files changed, 17 insertions(+), 52 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index de255bc9736b..138a3c55d80e 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -615,8 +615,7 @@ static const struct pci_platform_pm_ops acpi_pci_platform_pm = {
 	.set_state = acpi_pci_set_power_state,
 	.get_state = acpi_pci_get_power_state,
 	.choose_state = acpi_pci_choose_state,
-	.sleep_wake = acpi_pci_wakeup,
-	.run_wake = acpi_pci_wakeup,
+	.set_wakeup = acpi_pci_wakeup,
 	.need_resume = acpi_pci_need_resume,
 };
 
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index 192e7b681b96..ffe7d54d9328 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -1216,7 +1216,7 @@ static int pci_pm_runtime_resume(struct device *dev)
 
 	pci_restore_standard_config(pci_dev);
 	pci_fixup_device(pci_fixup_resume_early, pci_dev);
-	__pci_enable_wake(pci_dev, PCI_D0, true, false);
+	pci_enable_wake(pci_dev, PCI_D0, false);
 	pci_fixup_device(pci_fixup_resume, pci_dev);
 
 	rc = pm->runtime_resume(dev);
diff --git a/drivers/pci/pci-mid.c b/drivers/pci/pci-mid.c
index 1c4af7227bca..a4ac940c7696 100644
--- a/drivers/pci/pci-mid.c
+++ b/drivers/pci/pci-mid.c
@@ -39,12 +39,7 @@ static pci_power_t mid_pci_choose_state(struct pci_dev *pdev)
 	return PCI_D3hot;
 }
 
-static int mid_pci_sleep_wake(struct pci_dev *dev, bool enable)
-{
-	return 0;
-}
-
-static int mid_pci_run_wake(struct pci_dev *dev, bool enable)
+static int mid_pci_wakeup(struct pci_dev *dev, bool enable)
 {
 	return 0;
 }
@@ -59,8 +54,7 @@ static const struct pci_platform_pm_ops mid_pci_platform_pm = {
 	.set_state	= mid_pci_set_power_state,
 	.get_state	= mid_pci_get_power_state,
 	.choose_state	= mid_pci_choose_state,
-	.sleep_wake	= mid_pci_sleep_wake,
-	.run_wake	= mid_pci_run_wake,
+	.set_wakeup	= mid_pci_wakeup,
 	.need_resume	= mid_pci_need_resume,
 };
 
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 5641035e58fa..d378262d30e3 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -574,8 +574,7 @@ static const struct pci_platform_pm_ops *pci_platform_pm;
 int pci_set_platform_pm(const struct pci_platform_pm_ops *ops)
 {
 	if (!ops->is_manageable || !ops->set_state  || !ops->get_state ||
-	    !ops->choose_state  || !ops->sleep_wake || !ops->run_wake  ||
-	    !ops->need_resume)
+	    !ops->choose_state  || !ops->set_wakeup || !ops->need_resume)
 		return -EINVAL;
 	pci_platform_pm = ops;
 	return 0;
@@ -603,16 +602,10 @@ static inline pci_power_t platform_pci_choose_state(struct pci_dev *dev)
 			pci_platform_pm->choose_state(dev) : PCI_POWER_ERROR;
 }
 
-static inline int platform_pci_sleep_wake(struct pci_dev *dev, bool enable)
+static inline int platform_pci_set_wakeup(struct pci_dev *dev, bool enable)
 {
 	return pci_platform_pm ?
-			pci_platform_pm->sleep_wake(dev, enable) : -ENODEV;
-}
-
-static inline int platform_pci_run_wake(struct pci_dev *dev, bool enable)
-{
-	return pci_platform_pm ?
-			pci_platform_pm->run_wake(dev, enable) : -ENODEV;
+			pci_platform_pm->set_wakeup(dev, enable) : -ENODEV;
 }
 
 static inline bool platform_pci_need_resume(struct pci_dev *dev)
@@ -1889,10 +1882,9 @@ void pci_pme_active(struct pci_dev *dev, bool enable)
 EXPORT_SYMBOL(pci_pme_active);
 
 /**
- * __pci_enable_wake - enable PCI device as wakeup event source
+ * pci_enable_wake - enable PCI device as wakeup event source
  * @dev: PCI device affected
  * @state: PCI state from which device will issue wakeup events
- * @runtime: True if the events are to be generated at run time
  * @enable: True to enable event generation; false to disable
  *
  * This enables the device as a wakeup event source, or disables it.
@@ -1908,14 +1900,10 @@ EXPORT_SYMBOL(pci_pme_active);
  * Error code depending on the platform is returned if both the platform and
  * the native mechanism fail to enable the generation of wake-up events
  */
-int __pci_enable_wake(struct pci_dev *dev, pci_power_t state,
-		      bool runtime, bool enable)
+int pci_enable_wake(struct pci_dev *dev, pci_power_t state, bool enable)
 {
 	int ret = 0;
 
-	if (enable && !runtime && !device_may_wakeup(&dev->dev))
-		return -EINVAL;
-
 	/*
 	 * Don't do the same thing twice in a row for one device, but restore
 	 * PME Enable in case it has been updated by config space restoration.
@@ -1938,24 +1926,20 @@ int __pci_enable_wake(struct pci_dev *dev, pci_power_t state,
 			pci_pme_active(dev, true);
 		else
 			ret = 1;
-		error = runtime ? platform_pci_run_wake(dev, true) :
-					platform_pci_sleep_wake(dev, true);
+		error = platform_pci_set_wakeup(dev, true);
 		if (ret)
 			ret = error;
 		if (!ret)
 			dev->wakeup_prepared = true;
 	} else {
-		if (runtime)
-			platform_pci_run_wake(dev, false);
-		else
-			platform_pci_sleep_wake(dev, false);
+		platform_pci_set_wakeup(dev, false);
 		pci_pme_active(dev, false);
 		dev->wakeup_prepared = false;
 	}
 
 	return ret;
 }
-EXPORT_SYMBOL(__pci_enable_wake);
+EXPORT_SYMBOL(pci_enable_wake);
 
 /**
  * pci_wake_from_d3 - enable/disable device to wake up from D3_hot or D3_cold
@@ -2097,12 +2081,12 @@ int pci_finish_runtime_suspend(struct pci_dev *dev)
 
 	dev->runtime_d3cold = target_state == PCI_D3cold;
 
-	__pci_enable_wake(dev, target_state, true, pci_dev_run_wake(dev));
+	pci_enable_wake(dev, target_state, pci_dev_run_wake(dev));
 
 	error = pci_set_power_state(dev, target_state);
 
 	if (error) {
-		__pci_enable_wake(dev, target_state, true, false);
+		pci_enable_wake(dev, target_state, false);
 		dev->runtime_d3cold = false;
 	}
 
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index f8113e5b9812..240b2c0fed4b 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -47,11 +47,7 @@ int pci_probe_reset_function(struct pci_dev *dev);
  *                platform; to be used during system-wide transitions from a
  *                sleeping state to the working state and vice versa
  *
- * @sleep_wake: enables/disables the system wake up capability of given device
- *
- * @run_wake: enables/disables the platform to generate run-time wake-up events
- *		for given device (the device's wake-up capability has to be
- *		enabled by @sleep_wake for this feature to work)
+ * @set_wakeup: enables/disables wakeup capability for the device
  *
  * @need_resume: returns 'true' if the given device (which is currently
  *		suspended) needs to be resumed to be configured for system
@@ -65,8 +61,7 @@ struct pci_platform_pm_ops {
 	int (*set_state)(struct pci_dev *dev, pci_power_t state);
 	pci_power_t (*get_state)(struct pci_dev *dev);
 	pci_power_t (*choose_state)(struct pci_dev *dev);
-	int (*sleep_wake)(struct pci_dev *dev, bool enable);
-	int (*run_wake)(struct pci_dev *dev, bool enable);
+	int (*set_wakeup)(struct pci_dev *dev, bool enable);
 	bool (*need_resume)(struct pci_dev *dev);
 };
 
diff --git a/include/linux/pci.h b/include/linux/pci.h
index d3d5bca82b43..ff200c84240b 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1097,8 +1097,7 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state);
 pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state);
 bool pci_pme_capable(struct pci_dev *dev, pci_power_t state);
 void pci_pme_active(struct pci_dev *dev, bool enable);
-int __pci_enable_wake(struct pci_dev *dev, pci_power_t state,
-		      bool runtime, bool enable);
+int pci_enable_wake(struct pci_dev *dev, pci_power_t state, bool enable);
 int pci_wake_from_d3(struct pci_dev *dev, bool enable);
 int pci_prepare_to_sleep(struct pci_dev *dev);
 int pci_back_from_sleep(struct pci_dev *dev);
@@ -1108,12 +1107,6 @@ void pci_pme_wakeup_bus(struct pci_bus *bus);
 void pci_d3cold_enable(struct pci_dev *dev);
 void pci_d3cold_disable(struct pci_dev *dev);
 
-static inline int pci_enable_wake(struct pci_dev *dev, pci_power_t state,
-				  bool enable)
-{
-	return __pci_enable_wake(dev, state, false, enable);
-}
-
 /* PCI Virtual Channel */
 int pci_save_vc_state(struct pci_dev *dev);
 void pci_restore_vc_state(struct pci_dev *dev);
-- 
cgit v1.2.3


From de3ef1eb1cd0cc3a75f7a3661e10ed827f370ab8 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Sat, 24 Jun 2017 01:58:53 +0200
Subject: PM / core: Drop run_wake flag from struct dev_pm_info

The run_wake flag in struct dev_pm_info is used to indicate whether
or not the device is capable of generating remote wakeup signals at
run time (or in the system working state), but the distinction
between runtime remote wakeup and system wakeup signaling has always
been rather artificial.  The only practical reason for it to exist
at the core level was that ACPI and PCI treated those two cases
differently, but that's not the case any more after recent changes.

For this reason, get rid of the run_wake flag and, when applicable,
use device_set_wakeup_capable() and device_can_wakeup() instead of
device_set_run_wake() and device_run_wake(), respectively.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
---
 Documentation/power/runtime_pm.txt |  7 ++-----
 drivers/acpi/pci_root.c            |  5 ++---
 drivers/pci/pci-acpi.c             |  5 +----
 drivers/pci/pci.c                  |  6 +++---
 drivers/pci/pcie/pme.c             |  2 +-
 drivers/usb/dwc3/dwc3-pci.c        |  3 +--
 drivers/usb/host/uhci-pci.c        |  2 +-
 include/linux/pm.h                 |  1 -
 include/linux/pm_runtime.h         | 12 ------------
 9 files changed, 11 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt
index ee69d7532172..0fde3dcf077a 100644
--- a/Documentation/power/runtime_pm.txt
+++ b/Documentation/power/runtime_pm.txt
@@ -105,9 +105,9 @@ knows what to do to handle the device).
 
 In particular, if the driver requires remote wakeup capability (i.e. hardware
 mechanism allowing the device to request a change of its power state, such as
-PCI PME) for proper functioning and device_run_wake() returns 'false' for the
+PCI PME) for proper functioning and device_can_wakeup() returns 'false' for the
 device, then ->runtime_suspend() should return -EBUSY.  On the other hand, if
-device_run_wake() returns 'true' for the device and the device is put into a
+device_can_wakeup() returns 'true' for the device and the device is put into a
 low-power state during the execution of the suspend callback, it is expected
 that remote wakeup will be enabled for the device.  Generally, remote wakeup
 should be enabled for all input devices put into low-power states at run time.
@@ -253,9 +253,6 @@ defined in include/linux/pm.h:
       being executed for that device and it is not practical to wait for the
       suspend to complete; means "start a resume as soon as you've suspended"
 
-  unsigned int run_wake;
-    - set if the device is capable of generating runtime wake-up events
-
   enum rpm_status runtime_status;
     - the runtime PM status of the device; this field's initial value is
       RPM_SUSPENDED, which means that each device is initially regarded by the
diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index 783acbe25520..0d34e622a8b5 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -608,8 +608,7 @@ static int acpi_pci_root_add(struct acpi_device *device,
 		pcie_no_aspm();
 
 	pci_acpi_add_bus_pm_notifier(device);
-	if (device->wakeup.flags.valid)
-		device_set_run_wake(root->bus->bridge, true);
+	device_set_wakeup_capable(root->bus->bridge, device->wakeup.flags.valid);
 
 	if (hotadd) {
 		pcibios_resource_survey_bus(root->bus);
@@ -649,7 +648,7 @@ static void acpi_pci_root_remove(struct acpi_device *device)
 	pci_stop_root_bus(root->bus);
 
 	pci_ioapic_remove(root);
-	device_set_run_wake(root->bus->bridge, false);
+	device_set_wakeup_capable(root->bus->bridge, false);
 	pci_acpi_remove_bus_pm_notifier(device);
 
 	pci_remove_root_bus(root->bus);
diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index 138a3c55d80e..e70c1c7ba1bf 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -738,7 +738,6 @@ static void pci_acpi_setup(struct device *dev)
 		return;
 
 	device_set_wakeup_capable(dev, true);
-	device_set_run_wake(dev, true);
 	acpi_pci_wakeup(pci_dev, false);
 }
 
@@ -750,10 +749,8 @@ static void pci_acpi_cleanup(struct device *dev)
 		return;
 
 	pci_acpi_remove_pm_notifier(adev);
-	if (adev->wakeup.flags.valid) {
+	if (adev->wakeup.flags.valid)
 		device_set_wakeup_capable(dev, false);
-		device_set_run_wake(dev, false);
-	}
 }
 
 static bool pci_acpi_bus_match(struct device *dev)
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index d378262d30e3..0b5302a9fdae 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -2105,7 +2105,7 @@ bool pci_dev_run_wake(struct pci_dev *dev)
 {
 	struct pci_bus *bus = dev->bus;
 
-	if (device_run_wake(&dev->dev))
+	if (device_can_wakeup(&dev->dev))
 		return true;
 
 	if (!dev->pme_support)
@@ -2118,7 +2118,7 @@ bool pci_dev_run_wake(struct pci_dev *dev)
 	while (bus->parent) {
 		struct pci_dev *bridge = bus->self;
 
-		if (device_run_wake(&bridge->dev))
+		if (device_can_wakeup(&bridge->dev))
 			return true;
 
 		bus = bus->parent;
@@ -2126,7 +2126,7 @@ bool pci_dev_run_wake(struct pci_dev *dev)
 
 	/* We have reached the root bus. */
 	if (bus->bridge)
-		return device_run_wake(bus->bridge);
+		return device_can_wakeup(bus->bridge);
 
 	return false;
 }
diff --git a/drivers/pci/pcie/pme.c b/drivers/pci/pcie/pme.c
index 1db1615838ac..80e58d25006d 100644
--- a/drivers/pci/pcie/pme.c
+++ b/drivers/pci/pcie/pme.c
@@ -300,7 +300,7 @@ static irqreturn_t pcie_pme_irq(int irq, void *context)
  */
 static int pcie_pme_can_wakeup(struct pci_dev *dev, void *ign)
 {
-	device_set_run_wake(&dev->dev, true);
+	device_set_wakeup_capable(&dev->dev, true);
 	return 0;
 }
 
diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c
index fe851544d7fb..7e995df7a797 100644
--- a/drivers/usb/dwc3/dwc3-pci.c
+++ b/drivers/usb/dwc3/dwc3-pci.c
@@ -230,7 +230,6 @@ static int dwc3_pci_probe(struct pci_dev *pci,
 	}
 
 	device_init_wakeup(dev, true);
-	device_set_run_wake(dev, true);
 	pci_set_drvdata(pci, dwc);
 	pm_runtime_put(dev);
 
@@ -310,7 +309,7 @@ static int dwc3_pci_runtime_suspend(struct device *dev)
 {
 	struct dwc3_pci		*dwc = dev_get_drvdata(dev);
 
-	if (device_run_wake(dev))
+	if (device_can_wakeup(dev))
 		return dwc3_pci_dsm(dwc, PCI_INTEL_BXT_STATE_D3);
 
 	return -EBUSY;
diff --git a/drivers/usb/host/uhci-pci.c b/drivers/usb/host/uhci-pci.c
index 02260cfdedb1..49effdc0d857 100644
--- a/drivers/usb/host/uhci-pci.c
+++ b/drivers/usb/host/uhci-pci.c
@@ -131,7 +131,7 @@ static int uhci_pci_init(struct usb_hcd *hcd)
 
 	/* Intel controllers use non-PME wakeup signalling */
 	if (to_pci_dev(uhci_dev(uhci))->vendor == PCI_VENDOR_ID_INTEL)
-		device_set_run_wake(uhci_dev(uhci), 1);
+		device_set_wakeup_capable(uhci_dev(uhci), true);
 
 	/* Set up pointers to PCI-specific functions */
 	uhci->reset_hc = uhci_pci_reset_hc;
diff --git a/include/linux/pm.h b/include/linux/pm.h
index a0894bc52bb4..b8b4df09fd8f 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -584,7 +584,6 @@ struct dev_pm_info {
 	unsigned int		idle_notification:1;
 	unsigned int		request_pending:1;
 	unsigned int		deferred_resume:1;
-	unsigned int		run_wake:1;
 	unsigned int		runtime_auto:1;
 	bool			ignore_children:1;
 	unsigned int		no_callbacks:1;
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index ca4823e675e2..2efb08a60e63 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -76,16 +76,6 @@ static inline void pm_runtime_put_noidle(struct device *dev)
 	atomic_add_unless(&dev->power.usage_count, -1, 0);
 }
 
-static inline bool device_run_wake(struct device *dev)
-{
-	return dev->power.run_wake;
-}
-
-static inline void device_set_run_wake(struct device *dev, bool enable)
-{
-	dev->power.run_wake = enable;
-}
-
 static inline bool pm_runtime_suspended(struct device *dev)
 {
 	return dev->power.runtime_status == RPM_SUSPENDED
@@ -163,8 +153,6 @@ static inline void pm_runtime_forbid(struct device *dev) {}
 static inline void pm_suspend_ignore_children(struct device *dev, bool enable) {}
 static inline void pm_runtime_get_noresume(struct device *dev) {}
 static inline void pm_runtime_put_noidle(struct device *dev) {}
-static inline bool device_run_wake(struct device *dev) { return false; }
-static inline void device_set_run_wake(struct device *dev, bool enable) {}
 static inline bool pm_runtime_suspended(struct device *dev) { return false; }
 static inline bool pm_runtime_active(struct device *dev) { return true; }
 static inline bool pm_runtime_status_suspended(struct device *dev) { return false; }
-- 
cgit v1.2.3


From fd851a3cdc196bfc1d229b5f22369069af532bf8 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Mon, 29 May 2017 12:22:23 +1000
Subject: spin loop primitives for busy waiting

Current busy-wait loops are implemented by repeatedly calling cpu_relax()
to give an arch option for a low-latency option to improve power and/or
SMT resource contention.

This poses some difficulties for powerpc, which has SMT priority setting
instructions (priorities determine how ifetch cycles are apportioned).
powerpc's cpu_relax() is implemented by setting a low priority then
setting normal priority. This has several problems:

 - Changing thread priority can have some execution cost and potential
   impact to other threads in the core. It's inefficient to execute them
   every time around a busy-wait loop.

 - Depending on implementation details, a `low ; medium` sequence may
   not have much if any affect. Some software with similar pattern
   actually inserts a lot of nops between, in order to cause a few fetch
   cycles with the low priority.

 - The busy-wait loop runs with regular priority. This might only be a few
   fetch cycles, but if there are several threads running such loops, they
   could cause a noticable impact on a non-idle thread.

Implement spin_begin, spin_end primitives that can be used around busy
wait loops, which default to no-ops. And spin_cpu_relax which defaults to
cpu_relax.

This will allow architectures to hook the entry and exit of busy-wait
loops, and will allow powerpc to set low SMT priority at entry, and
normal priority at exit.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 include/linux/processor.h | 70 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 70 insertions(+)
 create mode 100644 include/linux/processor.h

(limited to 'include/linux')

diff --git a/include/linux/processor.h b/include/linux/processor.h
new file mode 100644
index 000000000000..da0c5e56ca02
--- /dev/null
+++ b/include/linux/processor.h
@@ -0,0 +1,70 @@
+/* Misc low level processor primitives */
+#ifndef _LINUX_PROCESSOR_H
+#define _LINUX_PROCESSOR_H
+
+#include <asm/processor.h>
+
+/*
+ * spin_begin is used before beginning a busy-wait loop, and must be paired
+ * with spin_end when the loop is exited. spin_cpu_relax must be called
+ * within the loop.
+ *
+ * The loop body should be as small and fast as possible, on the order of
+ * tens of instructions/cycles as a guide. It should and avoid calling
+ * cpu_relax, or any "spin" or sleep type of primitive including nested uses
+ * of these primitives. It should not lock or take any other resource.
+ * Violations of these guidelies will not cause a bug, but may cause sub
+ * optimal performance.
+ *
+ * These loops are optimized to be used where wait times are expected to be
+ * less than the cost of a context switch (and associated overhead).
+ *
+ * Detection of resource owner and decision to spin or sleep or guest-yield
+ * (e.g., spin lock holder vcpu preempted, or mutex owner not on CPU) can be
+ * tested within the loop body.
+ */
+#ifndef spin_begin
+#define spin_begin()
+#endif
+
+#ifndef spin_cpu_relax
+#define spin_cpu_relax() cpu_relax()
+#endif
+
+/*
+ * spin_cpu_yield may be called to yield (undirected) to the hypervisor if
+ * necessary. This should be used if the wait is expected to take longer
+ * than context switch overhead, but we can't sleep or do a directed yield.
+ */
+#ifndef spin_cpu_yield
+#define spin_cpu_yield() cpu_relax_yield()
+#endif
+
+#ifndef spin_end
+#define spin_end()
+#endif
+
+/*
+ * spin_until_cond can be used to wait for a condition to become true. It
+ * may be expected that the first iteration will true in the common case
+ * (no spinning), so that callers should not require a first "likely" test
+ * for the uncontended case before using this primitive.
+ *
+ * Usage and implementation guidelines are the same as for the spin_begin
+ * primitives, above.
+ */
+#ifndef spin_until_cond
+#define spin_until_cond(cond)					\
+do {								\
+	if (unlikely(!(cond))) {				\
+		spin_begin();					\
+		do {						\
+			spin_cpu_relax();			\
+		} while (!(cond));				\
+		spin_end();					\
+	}							\
+} while (0)
+
+#endif
+
+#endif /* _LINUX_PROCESSOR_H */
-- 
cgit v1.2.3


From f51f288e237cbcfd3dbd1d4fa2d3dec00d7253e2 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 22 May 2017 10:58:49 +0200
Subject: dma-mapping: remove DMA_ERROR_CODE

And update the documentation - dma_mapping_error has been supported
everywhere for a long time.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 Documentation/DMA-API-HOWTO.txt | 31 +++++--------------------------
 include/linux/dma-mapping.h     |  5 -----
 2 files changed, 5 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/DMA-API-HOWTO.txt b/Documentation/DMA-API-HOWTO.txt
index 979228bc9035..4ed388356898 100644
--- a/Documentation/DMA-API-HOWTO.txt
+++ b/Documentation/DMA-API-HOWTO.txt
@@ -550,32 +550,11 @@ and to unmap it:
 	dma_unmap_single(dev, dma_handle, size, direction);
 
 You should call dma_mapping_error() as dma_map_single() could fail and return
-error. Not all DMA implementations support the dma_mapping_error() interface.
-However, it is a good practice to call dma_mapping_error() interface, which
-will invoke the generic mapping error check interface. Doing so will ensure
-that the mapping code will work correctly on all DMA implementations without
-any dependency on the specifics of the underlying implementation. Using the
-returned address without checking for errors could result in failures ranging
-from panics to silent data corruption. A couple of examples of incorrect ways
-to check for errors that make assumptions about the underlying DMA
-implementation are as follows and these are applicable to dma_map_page() as
-well.
-
-Incorrect example 1:
-	dma_addr_t dma_handle;
-
-	dma_handle = dma_map_single(dev, addr, size, direction);
-	if ((dma_handle & 0xffff != 0) || (dma_handle >= 0x1000000)) {
-		goto map_error;
-	}
-
-Incorrect example 2:
-	dma_addr_t dma_handle;
-
-	dma_handle = dma_map_single(dev, addr, size, direction);
-	if (dma_handle == DMA_ERROR_CODE) {
-		goto map_error;
-	}
+error.  Doing so will ensure that the mapping code will work correctly on all
+DMA implementations without any dependency on the specifics of the underlying
+implementation. Using the returned address without checking for errors could
+result in failures ranging from panics to silent data corruption.  The same
+applies to dma_map_page() as well.
 
 You should call dma_unmap_single() when the DMA activity is finished, e.g.,
 from the interrupt which told you that the DMA transfer is done.
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 4f3eecedca2d..a57875309bfd 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -546,12 +546,7 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 
 	if (get_dma_ops(dev)->mapping_error)
 		return get_dma_ops(dev)->mapping_error(dev, dma_addr);
-
-#ifdef DMA_ERROR_CODE
-	return dma_addr == DMA_ERROR_CODE;
-#else
 	return 0;
-#endif
 }
 
 #ifndef HAVE_ARCH_DMA_SUPPORTED
-- 
cgit v1.2.3


From 447d899b18169b2ee5e42b2fa8b32dbb40a30a24 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 22 May 2017 11:40:56 +0200
Subject: dma-mapping: remove HAVE_ARCH_DMA_SUPPORTED

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/dma-mapping.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index a57875309bfd..3e5908656226 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -549,7 +549,6 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 	return 0;
 }
 
-#ifndef HAVE_ARCH_DMA_SUPPORTED
 static inline int dma_supported(struct device *dev, u64 mask)
 {
 	const struct dma_map_ops *ops = get_dma_ops(dev);
@@ -560,7 +559,6 @@ static inline int dma_supported(struct device *dev, u64 mask)
 		return 1;
 	return ops->dma_supported(dev, mask);
 }
-#endif
 
 #ifndef HAVE_ARCH_DMA_SET_MASK
 static inline int dma_set_mask(struct device *dev, u64 mask)
-- 
cgit v1.2.3


From 8cc9c26029d8ac3c627ecf8545b617fb78def5d4 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 5 Jun 2017 17:05:25 +0200
Subject: dma-mapping: remove the set_dma_mask method

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/powerpc/kernel/dma.c   | 4 ----
 include/linux/dma-mapping.h | 6 ------
 2 files changed, 10 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
index 41c749586bd2..466c9f07b288 100644
--- a/arch/powerpc/kernel/dma.c
+++ b/arch/powerpc/kernel/dma.c
@@ -316,10 +316,6 @@ EXPORT_SYMBOL(dma_set_coherent_mask);
 
 int __dma_set_mask(struct device *dev, u64 dma_mask)
 {
-	const struct dma_map_ops *dma_ops = get_dma_ops(dev);
-
-	if ((dma_ops != NULL) && (dma_ops->set_dma_mask != NULL))
-		return dma_ops->set_dma_mask(dev, dma_mask);
 	if (!dev->dma_mask || !dma_supported(dev, dma_mask))
 		return -EIO;
 	*dev->dma_mask = dma_mask;
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 3e5908656226..527f2ed8c645 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -127,7 +127,6 @@ struct dma_map_ops {
 				   enum dma_data_direction dir);
 	int (*mapping_error)(struct device *dev, dma_addr_t dma_addr);
 	int (*dma_supported)(struct device *dev, u64 mask);
-	int (*set_dma_mask)(struct device *dev, u64 mask);
 #ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK
 	u64 (*get_required_mask)(struct device *dev);
 #endif
@@ -563,11 +562,6 @@ static inline int dma_supported(struct device *dev, u64 mask)
 #ifndef HAVE_ARCH_DMA_SET_MASK
 static inline int dma_set_mask(struct device *dev, u64 mask)
 {
-	const struct dma_map_ops *ops = get_dma_ops(dev);
-
-	if (ops->set_dma_mask)
-		return ops->set_dma_mask(dev, mask);
-
 	if (!dev->dma_mask || !dma_supported(dev, mask))
 		return -EIO;
 	*dev->dma_mask = mask;
-- 
cgit v1.2.3


From 03b643866d889d6edc87cdcee2b3880b7879a441 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 12 Jun 2017 19:05:09 +0200
Subject: dma-mapping: remove dmam_free_noncoherent

This function was never used since it was added.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Tejun Heo <tj@kernel.org>
---
 Documentation/driver-model/devres.txt |  1 -
 drivers/base/dma-mapping.c            | 20 --------------------
 include/linux/dma-mapping.h           |  2 --
 3 files changed, 23 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt
index e72587fe477d..9070ff06b558 100644
--- a/Documentation/driver-model/devres.txt
+++ b/Documentation/driver-model/devres.txt
@@ -243,7 +243,6 @@ DMA
   dmam_alloc_noncoherent()
   dmam_declare_coherent_memory()
   dmam_free_coherent()
-  dmam_free_noncoherent()
   dmam_pool_create()
   dmam_pool_destroy()
 
diff --git a/drivers/base/dma-mapping.c b/drivers/base/dma-mapping.c
index f3deb6af42ad..5940c9dace36 100644
--- a/drivers/base/dma-mapping.c
+++ b/drivers/base/dma-mapping.c
@@ -148,26 +148,6 @@ void *dmam_alloc_noncoherent(struct device *dev, size_t size,
 }
 EXPORT_SYMBOL(dmam_alloc_noncoherent);
 
-/**
- * dmam_free_coherent - Managed dma_free_noncoherent()
- * @dev: Device to free noncoherent memory for
- * @size: Size of allocation
- * @vaddr: Virtual address of the memory to free
- * @dma_handle: DMA handle of the memory to free
- *
- * Managed dma_free_noncoherent().
- */
-void dmam_free_noncoherent(struct device *dev, size_t size, void *vaddr,
-			   dma_addr_t dma_handle)
-{
-	struct dma_devres match_data = { size, vaddr, dma_handle };
-
-	dma_free_noncoherent(dev, size, vaddr, dma_handle);
-	WARN_ON(!devres_destroy(dev, dmam_noncoherent_release, dmam_match,
-				&match_data));
-}
-EXPORT_SYMBOL(dmam_free_noncoherent);
-
 #ifdef CONFIG_HAVE_GENERIC_DMA_COHERENT
 
 static void dmam_coherent_decl_release(struct device *dev, void *res)
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 527f2ed8c645..4038dd34afa3 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -736,8 +736,6 @@ extern void dmam_free_coherent(struct device *dev, size_t size, void *vaddr,
 			       dma_addr_t dma_handle);
 extern void *dmam_alloc_noncoherent(struct device *dev, size_t size,
 				    dma_addr_t *dma_handle, gfp_t gfp);
-extern void dmam_free_noncoherent(struct device *dev, size_t size, void *vaddr,
-				  dma_addr_t dma_handle);
 #ifdef CONFIG_HAVE_GENERIC_DMA_COHERENT
 extern int dmam_declare_coherent_memory(struct device *dev,
 					phys_addr_t phys_addr,
-- 
cgit v1.2.3


From 63d36c95500400642f656ba1970980746cf437f3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 12 Jun 2017 19:15:04 +0200
Subject: dma-mapping: replace dmam_alloc_noncoherent with dmam_alloc_attrs

dmam_alloc_noncoherent is a trivial wrapper around dmam_alloc_attrs,
that hardcodes one particular flag.  Make the devres code more
flexible by allowing the callers to pass arbitrary flags.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Tejun Heo <tj@kernel.org>
---
 Documentation/driver-model/devres.txt |  2 +-
 drivers/base/dma-mapping.c            | 36 ++++++++++++++++-------------------
 drivers/video/fbdev/au1200fb.c        |  5 +++--
 include/linux/dma-mapping.h           |  5 +++--
 4 files changed, 23 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt
index 9070ff06b558..7e08c02b5393 100644
--- a/Documentation/driver-model/devres.txt
+++ b/Documentation/driver-model/devres.txt
@@ -240,7 +240,7 @@ CLOCK
 
 DMA
   dmam_alloc_coherent()
-  dmam_alloc_noncoherent()
+  dmam_alloc_attrs()
   dmam_declare_coherent_memory()
   dmam_free_coherent()
   dmam_pool_create()
diff --git a/drivers/base/dma-mapping.c b/drivers/base/dma-mapping.c
index 5940c9dace36..10e7c022e8cf 100644
--- a/drivers/base/dma-mapping.c
+++ b/drivers/base/dma-mapping.c
@@ -22,20 +22,15 @@ struct dma_devres {
 	size_t		size;
 	void		*vaddr;
 	dma_addr_t	dma_handle;
+	unsigned long	attrs;
 };
 
-static void dmam_coherent_release(struct device *dev, void *res)
+static void dmam_release(struct device *dev, void *res)
 {
 	struct dma_devres *this = res;
 
-	dma_free_coherent(dev, this->size, this->vaddr, this->dma_handle);
-}
-
-static void dmam_noncoherent_release(struct device *dev, void *res)
-{
-	struct dma_devres *this = res;
-
-	dma_free_noncoherent(dev, this->size, this->vaddr, this->dma_handle);
+	dma_free_attrs(dev, this->size, this->vaddr, this->dma_handle,
+			this->attrs);
 }
 
 static int dmam_match(struct device *dev, void *res, void *match_data)
@@ -69,7 +64,7 @@ void *dmam_alloc_coherent(struct device *dev, size_t size,
 	struct dma_devres *dr;
 	void *vaddr;
 
-	dr = devres_alloc(dmam_coherent_release, sizeof(*dr), gfp);
+	dr = devres_alloc(dmam_release, sizeof(*dr), gfp);
 	if (!dr)
 		return NULL;
 
@@ -104,35 +99,35 @@ void dmam_free_coherent(struct device *dev, size_t size, void *vaddr,
 	struct dma_devres match_data = { size, vaddr, dma_handle };
 
 	dma_free_coherent(dev, size, vaddr, dma_handle);
-	WARN_ON(devres_destroy(dev, dmam_coherent_release, dmam_match,
-			       &match_data));
+	WARN_ON(devres_destroy(dev, dmam_release, dmam_match, &match_data));
 }
 EXPORT_SYMBOL(dmam_free_coherent);
 
 /**
- * dmam_alloc_non_coherent - Managed dma_alloc_noncoherent()
+ * dmam_alloc_attrs - Managed dma_alloc_attrs()
  * @dev: Device to allocate non_coherent memory for
  * @size: Size of allocation
  * @dma_handle: Out argument for allocated DMA handle
  * @gfp: Allocation flags
+ * @attrs: Flags in the DMA_ATTR_* namespace.
  *
- * Managed dma_alloc_noncoherent().  Memory allocated using this
- * function will be automatically released on driver detach.
+ * Managed dma_alloc_attrs().  Memory allocated using this function will be
+ * automatically released on driver detach.
  *
  * RETURNS:
  * Pointer to allocated memory on success, NULL on failure.
  */
-void *dmam_alloc_noncoherent(struct device *dev, size_t size,
-			     dma_addr_t *dma_handle, gfp_t gfp)
+void *dmam_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle,
+		gfp_t gfp, unsigned long attrs)
 {
 	struct dma_devres *dr;
 	void *vaddr;
 
-	dr = devres_alloc(dmam_noncoherent_release, sizeof(*dr), gfp);
+	dr = devres_alloc(dmam_release, sizeof(*dr), gfp);
 	if (!dr)
 		return NULL;
 
-	vaddr = dma_alloc_noncoherent(dev, size, dma_handle, gfp);
+	vaddr = dma_alloc_attrs(dev, size, dma_handle, gfp, attrs);
 	if (!vaddr) {
 		devres_free(dr);
 		return NULL;
@@ -141,12 +136,13 @@ void *dmam_alloc_noncoherent(struct device *dev, size_t size,
 	dr->vaddr = vaddr;
 	dr->dma_handle = *dma_handle;
 	dr->size = size;
+	dr->attrs = attrs;
 
 	devres_add(dev, dr);
 
 	return vaddr;
 }
-EXPORT_SYMBOL(dmam_alloc_noncoherent);
+EXPORT_SYMBOL(dmam_alloc_attrs);
 
 #ifdef CONFIG_HAVE_GENERIC_DMA_COHERENT
 
diff --git a/drivers/video/fbdev/au1200fb.c b/drivers/video/fbdev/au1200fb.c
index 6c2b2ca4a909..5f04b4096c42 100644
--- a/drivers/video/fbdev/au1200fb.c
+++ b/drivers/video/fbdev/au1200fb.c
@@ -1694,9 +1694,10 @@ static int au1200fb_drv_probe(struct platform_device *dev)
 		/* Allocate the framebuffer to the maximum screen size */
 		fbdev->fb_len = (win->w[plane].xres * win->w[plane].yres * bpp) / 8;
 
-		fbdev->fb_mem = dmam_alloc_noncoherent(&dev->dev,
+		fbdev->fb_mem = dmam_alloc_attrs(&dev->dev,
 				PAGE_ALIGN(fbdev->fb_len),
-				&fbdev->fb_phys, GFP_KERNEL);
+				&fbdev->fb_phys, GFP_KERNEL,
+				DMA_ATTR_NON_CONSISTENT);
 		if (!fbdev->fb_mem) {
 			print_err("fail to allocate frambuffer (size: %dK))",
 				  fbdev->fb_len / 1024);
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 4038dd34afa3..843ab866e0f4 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -734,8 +734,9 @@ extern void *dmam_alloc_coherent(struct device *dev, size_t size,
 				 dma_addr_t *dma_handle, gfp_t gfp);
 extern void dmam_free_coherent(struct device *dev, size_t size, void *vaddr,
 			       dma_addr_t dma_handle);
-extern void *dmam_alloc_noncoherent(struct device *dev, size_t size,
-				    dma_addr_t *dma_handle, gfp_t gfp);
+extern void *dmam_alloc_attrs(struct device *dev, size_t size,
+			      dma_addr_t *dma_handle, gfp_t gfp,
+			      unsigned long attrs);
 #ifdef CONFIG_HAVE_GENERIC_DMA_COHERENT
 extern int dmam_declare_coherent_memory(struct device *dev,
 					phys_addr_t phys_addr,
-- 
cgit v1.2.3


From 7aa1f42752f0d31a5bb6d0d5bac92fc8c2044ce2 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Sun, 18 Jun 2017 16:15:59 +0300
Subject: nvme: use a single NVME_AQ_DEPTH and relax it to 32

No need to differentiate fabrics from pci/loop, also lower
it to 32 as we don't really need 256 inflight admin commands.

Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Max Gurtovoy <maxg@mellanox.com>
Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/host/fabrics.c     |  8 +-------
 drivers/nvme/host/fc.c          |  2 +-
 drivers/nvme/host/pci.c         |  1 -
 drivers/nvme/host/rdma.c        | 10 +++++-----
 drivers/nvme/target/discovery.c |  2 +-
 drivers/nvme/target/loop.c      |  4 +---
 drivers/nvme/target/rdma.c      |  2 +-
 include/linux/nvme.h            |  2 +-
 8 files changed, 11 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 7ca2d4d70aec..a59a243b81c6 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -392,13 +392,7 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl)
 	cmd.connect.opcode = nvme_fabrics_command;
 	cmd.connect.fctype = nvme_fabrics_type_connect;
 	cmd.connect.qid = 0;
-
-	/*
-	 * fabrics spec sets a minimum of depth 32 for admin queue,
-	 * so set the queue with this depth always until
-	 * justification otherwise.
-	 */
-	cmd.connect.sqsize = cpu_to_le16(NVMF_AQ_DEPTH - 1);
+	cmd.connect.sqsize = cpu_to_le16(NVME_AQ_DEPTH - 1);
 
 	/*
 	 * Set keep-alive timeout in seconds granularity (ms * 1000)
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 5165007e86a6..5d5ecefd8dbe 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -36,7 +36,7 @@
  */
 #define NVME_FC_NR_AEN_COMMANDS	1
 #define NVME_FC_AQ_BLKMQ_DEPTH	\
-	(NVMF_AQ_DEPTH - NVME_FC_NR_AEN_COMMANDS)
+	(NVME_AQ_DEPTH - NVME_FC_NR_AEN_COMMANDS)
 #define AEN_CMDID_BASE		(NVME_FC_AQ_BLKMQ_DEPTH + 1)
 
 enum nvme_fc_queue_flags {
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 2a9ee769ce9e..32a98e2740ad 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -36,7 +36,6 @@
 #include "nvme.h"
 
 #define NVME_Q_DEPTH		1024
-#define NVME_AQ_DEPTH		256
 #define SQ_SIZE(depth)		(depth * sizeof(struct nvme_command))
 #define CQ_SIZE(depth)		(depth * sizeof(struct nvme_completion))
 
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 01dc723e6acf..bc0322bf7d27 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -48,7 +48,7 @@
  */
 #define NVME_RDMA_NR_AEN_COMMANDS      1
 #define NVME_RDMA_AQ_BLKMQ_DEPTH       \
-	(NVMF_AQ_DEPTH - NVME_RDMA_NR_AEN_COMMANDS)
+	(NVME_AQ_DEPTH - NVME_RDMA_NR_AEN_COMMANDS)
 
 struct nvme_rdma_device {
 	struct ib_device       *dev;
@@ -719,7 +719,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
 	if (ret)
 		goto requeue;
 
-	ret = nvme_rdma_init_queue(ctrl, 0, NVMF_AQ_DEPTH);
+	ret = nvme_rdma_init_queue(ctrl, 0, NVME_AQ_DEPTH);
 	if (ret)
 		goto requeue;
 
@@ -1291,8 +1291,8 @@ static int nvme_rdma_route_resolved(struct nvme_rdma_queue *queue)
 	 * specified by the Fabrics standard.
 	 */
 	if (priv.qid == 0) {
-		priv.hrqsize = cpu_to_le16(NVMF_AQ_DEPTH);
-		priv.hsqsize = cpu_to_le16(NVMF_AQ_DEPTH - 1);
+		priv.hrqsize = cpu_to_le16(NVME_AQ_DEPTH);
+		priv.hsqsize = cpu_to_le16(NVME_AQ_DEPTH - 1);
 	} else {
 		/*
 		 * current interpretation of the fabrics spec
@@ -1530,7 +1530,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl)
 {
 	int error;
 
-	error = nvme_rdma_init_queue(ctrl, 0, NVMF_AQ_DEPTH);
+	error = nvme_rdma_init_queue(ctrl, 0, NVME_AQ_DEPTH);
 	if (error)
 		return error;
 
diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c
index c7a90384dd75..8f3b57b4c97b 100644
--- a/drivers/nvme/target/discovery.c
+++ b/drivers/nvme/target/discovery.c
@@ -53,7 +53,7 @@ static void nvmet_format_discovery_entry(struct nvmf_disc_rsp_page_hdr *hdr,
 	e->portid = port->disc_addr.portid;
 	/* we support only dynamic controllers */
 	e->cntlid = cpu_to_le16(NVME_CNTLID_DYNAMIC);
-	e->asqsz = cpu_to_le16(NVMF_AQ_DEPTH);
+	e->asqsz = cpu_to_le16(NVME_AQ_DEPTH);
 	e->subtype = type;
 	memcpy(e->trsvcid, port->disc_addr.trsvcid, NVMF_TRSVCID_SIZE);
 	memcpy(e->traddr, port->disc_addr.traddr, NVMF_TRADDR_SIZE);
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index f67606523724..86c09e2a1490 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -21,8 +21,6 @@
 #include "../host/nvme.h"
 #include "../host/fabrics.h"
 
-#define NVME_LOOP_AQ_DEPTH		256
-
 #define NVME_LOOP_MAX_SEGMENTS		256
 
 /*
@@ -31,7 +29,7 @@
  */
 #define NVME_LOOP_NR_AEN_COMMANDS	1
 #define NVME_LOOP_AQ_BLKMQ_DEPTH	\
-	(NVME_LOOP_AQ_DEPTH - NVME_LOOP_NR_AEN_COMMANDS)
+	(NVME_AQ_DEPTH - NVME_LOOP_NR_AEN_COMMANDS)
 
 struct nvme_loop_iod {
 	struct nvme_request	nvme_req;
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 9e45cde63376..32aa10b521c8 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -1027,7 +1027,7 @@ nvmet_rdma_parse_cm_connect_req(struct rdma_conn_param *conn,
 	queue->recv_queue_size = le16_to_cpu(req->hsqsize) + 1;
 	queue->send_queue_size = le16_to_cpu(req->hrqsize);
 
-	if (!queue->host_qid && queue->recv_queue_size > NVMF_AQ_DEPTH)
+	if (!queue->host_qid && queue->recv_queue_size > NVME_AQ_DEPTH)
 		return NVME_RDMA_CM_INVALID_HSQSIZE;
 
 	/* XXX: Should we enforce some kind of max for IO queues? */
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index f516a975bb21..6b8ee9e628e1 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -87,7 +87,7 @@ enum {
 	NVMF_RDMA_CMS_RDMA_CM	= 1, /* Sockets based endpoint addressing */
 };
 
-#define NVMF_AQ_DEPTH		32
+#define NVME_AQ_DEPTH		32
 
 enum {
 	NVME_REG_CAP	= 0x0000,	/* Controller Capabilities */
-- 
cgit v1.2.3


From fd25d19f6b8da315332bb75936605fb45d3ea981 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 21 Jun 2017 13:00:26 -0700
Subject: locking/refcount: Create unchecked atomic_t implementation

Many subsystems will not use refcount_t unless there is a way to build the
kernel so that there is no regression in speed compared to atomic_t. This
adds CONFIG_REFCOUNT_FULL to enable the full refcount_t implementation
which has the validation but is slightly slower. When not enabled,
refcount_t uses the basic unchecked atomic_t routines, which results in
no code changes compared to just using atomic_t directly.

Signed-off-by: Kees Cook <keescook@chromium.org>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: David S. Miller <davem@davemloft.net>
Cc: David Windsor <dwindsor@gmail.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Elena Reshetova <elena.reshetova@intel.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Hans Liljestrand <ishkamiel@gmail.com>
Cc: James Bottomley <James.Bottomley@hansenpartnership.com>
Cc: Jann Horn <jannh@google.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Manfred Spraul <manfred@colorfullife.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Serge E. Hallyn <serge@hallyn.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: arozansk@redhat.com
Cc: axboe@kernel.dk
Cc: linux-arch <linux-arch@vger.kernel.org>
Link: http://lkml.kernel.org/r/20170621200026.GA115679@beast
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/Kconfig             |  9 +++++++++
 include/linux/refcount.h | 42 ++++++++++++++++++++++++++++++++++++++++++
 lib/refcount.c           |  3 +++
 3 files changed, 54 insertions(+)

(limited to 'include/linux')

diff --git a/arch/Kconfig b/arch/Kconfig
index 6c00e5b00f8b..f76b214cf7ad 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -867,4 +867,13 @@ config STRICT_MODULE_RWX
 config ARCH_WANT_RELAX_ORDER
 	bool
 
+config REFCOUNT_FULL
+	bool "Perform full reference count validation at the expense of speed"
+	help
+	  Enabling this switches the refcounting infrastructure from a fast
+	  unchecked atomic_t implementation to a fully state checked
+	  implementation, which can be (slightly) slower but provides protections
+	  against various use-after-free conditions that can be used in
+	  security flaw exploits.
+
 source "kernel/gcov/Kconfig"
diff --git a/include/linux/refcount.h b/include/linux/refcount.h
index b34aa649d204..bb71f2871dac 100644
--- a/include/linux/refcount.h
+++ b/include/linux/refcount.h
@@ -41,6 +41,7 @@ static inline unsigned int refcount_read(const refcount_t *r)
 	return atomic_read(&r->refs);
 }
 
+#ifdef CONFIG_REFCOUNT_FULL
 extern __must_check bool refcount_add_not_zero(unsigned int i, refcount_t *r);
 extern void refcount_add(unsigned int i, refcount_t *r);
 
@@ -52,6 +53,47 @@ extern void refcount_sub(unsigned int i, refcount_t *r);
 
 extern __must_check bool refcount_dec_and_test(refcount_t *r);
 extern void refcount_dec(refcount_t *r);
+#else
+static inline __must_check bool refcount_add_not_zero(unsigned int i, refcount_t *r)
+{
+	return atomic_add_unless(&r->refs, i, 0);
+}
+
+static inline void refcount_add(unsigned int i, refcount_t *r)
+{
+	atomic_add(i, &r->refs);
+}
+
+static inline __must_check bool refcount_inc_not_zero(refcount_t *r)
+{
+	return atomic_add_unless(&r->refs, 1, 0);
+}
+
+static inline void refcount_inc(refcount_t *r)
+{
+	atomic_inc(&r->refs);
+}
+
+static inline __must_check bool refcount_sub_and_test(unsigned int i, refcount_t *r)
+{
+	return atomic_sub_and_test(i, &r->refs);
+}
+
+static inline void refcount_sub(unsigned int i, refcount_t *r)
+{
+	atomic_sub(i, &r->refs);
+}
+
+static inline __must_check bool refcount_dec_and_test(refcount_t *r)
+{
+	return atomic_dec_and_test(&r->refs);
+}
+
+static inline void refcount_dec(refcount_t *r)
+{
+	atomic_dec(&r->refs);
+}
+#endif /* CONFIG_REFCOUNT_FULL */
 
 extern __must_check bool refcount_dec_if_one(refcount_t *r);
 extern __must_check bool refcount_dec_not_one(refcount_t *r);
diff --git a/lib/refcount.c b/lib/refcount.c
index 9f906783987e..5d0582a9480c 100644
--- a/lib/refcount.c
+++ b/lib/refcount.c
@@ -37,6 +37,8 @@
 #include <linux/refcount.h>
 #include <linux/bug.h>
 
+#ifdef CONFIG_REFCOUNT_FULL
+
 /**
  * refcount_add_not_zero - add a value to a refcount unless it is 0
  * @i: the value to add to the refcount
@@ -225,6 +227,7 @@ void refcount_dec(refcount_t *r)
 	WARN_ONCE(refcount_dec_and_test(r), "refcount_t: decrement hit 0; leaking memory.\n");
 }
 EXPORT_SYMBOL(refcount_dec);
+#endif /* CONFIG_REFCOUNT_FULL */
 
 /**
  * refcount_dec_if_one - decrement a refcount if it is 1
-- 
cgit v1.2.3


From a80a32341fbabd4276165a9ce4fa4c80168c0bef Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 23 Jun 2017 17:17:35 -0400
Subject: svcrdma: Remove svc_rdma_marshal.c

svc_rdma_marshal.c has one remaining exported function --
svc_rdma_xdr_decode_req -- and it has a single call site. Take
the same approach as the sendto path, and move this function
into the source file where it is called.

This is a refactoring change only.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_rdma.h         |   3 -
 net/sunrpc/xprtrdma/Makefile            |   4 +-
 net/sunrpc/xprtrdma/svc_rdma_marshal.c  | 168 --------------------------------
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 126 ++++++++++++++++++++++++
 4 files changed, 128 insertions(+), 173 deletions(-)
 delete mode 100644 net/sunrpc/xprtrdma/svc_rdma_marshal.c

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index f3787d800ba4..3ca991657889 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -185,9 +185,6 @@ extern int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt,
 				    __be32 *rdma_resp,
 				    struct xdr_buf *rcvbuf);
 
-/* svc_rdma_marshal.c */
-extern int svc_rdma_xdr_decode_req(struct xdr_buf *);
-
 /* svc_rdma_recvfrom.c */
 extern int svc_rdma_recvfrom(struct svc_rqst *);
 extern int rdma_read_chunk_lcl(struct svcxprt_rdma *, struct svc_rqst *,
diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile
index c1ae8142ab73..b8213ddce2f2 100644
--- a/net/sunrpc/xprtrdma/Makefile
+++ b/net/sunrpc/xprtrdma/Makefile
@@ -3,6 +3,6 @@ obj-$(CONFIG_SUNRPC_XPRT_RDMA) += rpcrdma.o
 rpcrdma-y := transport.o rpc_rdma.o verbs.o \
 	fmr_ops.o frwr_ops.o \
 	svc_rdma.o svc_rdma_backchannel.o svc_rdma_transport.o \
-	svc_rdma_marshal.o svc_rdma_sendto.o svc_rdma_recvfrom.o \
-	svc_rdma_rw.o module.o
+	svc_rdma_sendto.o svc_rdma_recvfrom.o svc_rdma_rw.o \
+	module.o
 rpcrdma-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel.o
diff --git a/net/sunrpc/xprtrdma/svc_rdma_marshal.c b/net/sunrpc/xprtrdma/svc_rdma_marshal.c
deleted file mode 100644
index bdcf7d85a3dc..000000000000
--- a/net/sunrpc/xprtrdma/svc_rdma_marshal.c
+++ /dev/null
@@ -1,168 +0,0 @@
-/*
- * Copyright (c) 2016 Oracle. All rights reserved.
- * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the BSD-type
- * license below:
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *      Redistributions of source code must retain the above copyright
- *      notice, this list of conditions and the following disclaimer.
- *
- *      Redistributions in binary form must reproduce the above
- *      copyright notice, this list of conditions and the following
- *      disclaimer in the documentation and/or other materials provided
- *      with the distribution.
- *
- *      Neither the name of the Network Appliance, Inc. nor the names of
- *      its contributors may be used to endorse or promote products
- *      derived from this software without specific prior written
- *      permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Tom Tucker <tom@opengridcomputing.com>
- */
-
-#include <linux/sunrpc/xdr.h>
-#include <linux/sunrpc/debug.h>
-#include <asm/unaligned.h>
-#include <linux/sunrpc/rpc_rdma.h>
-#include <linux/sunrpc/svc_rdma.h>
-
-#define RPCDBG_FACILITY	RPCDBG_SVCXPRT
-
-static __be32 *xdr_check_read_list(__be32 *p, __be32 *end)
-{
-	__be32 *next;
-
-	while (*p++ != xdr_zero) {
-		next = p + rpcrdma_readchunk_maxsz - 1;
-		if (next > end)
-			return NULL;
-		p = next;
-	}
-	return p;
-}
-
-static __be32 *xdr_check_write_list(__be32 *p, __be32 *end)
-{
-	__be32 *next;
-
-	while (*p++ != xdr_zero) {
-		next = p + 1 + be32_to_cpup(p) * rpcrdma_segment_maxsz;
-		if (next > end)
-			return NULL;
-		p = next;
-	}
-	return p;
-}
-
-static __be32 *xdr_check_reply_chunk(__be32 *p, __be32 *end)
-{
-	__be32 *next;
-
-	if (*p++ != xdr_zero) {
-		next = p + 1 + be32_to_cpup(p) * rpcrdma_segment_maxsz;
-		if (next > end)
-			return NULL;
-		p = next;
-	}
-	return p;
-}
-
-/**
- * svc_rdma_xdr_decode_req - Parse incoming RPC-over-RDMA header
- * @rq_arg: Receive buffer
- *
- * On entry, xdr->head[0].iov_base points to first byte in the
- * RPC-over-RDMA header.
- *
- * On successful exit, head[0] points to first byte past the
- * RPC-over-RDMA header. For RDMA_MSG, this is the RPC message.
- * The length of the RPC-over-RDMA header is returned.
- */
-int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg)
-{
-	__be32 *p, *end, *rdma_argp;
-	unsigned int hdr_len;
-
-	/* Verify that there's enough bytes for header + something */
-	if (rq_arg->len <= RPCRDMA_HDRLEN_ERR)
-		goto out_short;
-
-	rdma_argp = rq_arg->head[0].iov_base;
-	if (*(rdma_argp + 1) != rpcrdma_version)
-		goto out_version;
-
-	switch (*(rdma_argp + 3)) {
-	case rdma_msg:
-	case rdma_nomsg:
-		break;
-
-	case rdma_done:
-		goto out_drop;
-
-	case rdma_error:
-		goto out_drop;
-
-	default:
-		goto out_proc;
-	}
-
-	end = (__be32 *)((unsigned long)rdma_argp + rq_arg->len);
-	p = xdr_check_read_list(rdma_argp + 4, end);
-	if (!p)
-		goto out_inval;
-	p = xdr_check_write_list(p, end);
-	if (!p)
-		goto out_inval;
-	p = xdr_check_reply_chunk(p, end);
-	if (!p)
-		goto out_inval;
-	if (p > end)
-		goto out_inval;
-
-	rq_arg->head[0].iov_base = p;
-	hdr_len = (unsigned long)p - (unsigned long)rdma_argp;
-	rq_arg->head[0].iov_len -= hdr_len;
-	return hdr_len;
-
-out_short:
-	dprintk("svcrdma: header too short = %d\n", rq_arg->len);
-	return -EINVAL;
-
-out_version:
-	dprintk("svcrdma: bad xprt version: %u\n",
-		be32_to_cpup(rdma_argp + 1));
-	return -EPROTONOSUPPORT;
-
-out_drop:
-	dprintk("svcrdma: dropping RDMA_DONE/ERROR message\n");
-	return 0;
-
-out_proc:
-	dprintk("svcrdma: bad rdma procedure (%u)\n",
-		be32_to_cpup(rdma_argp + 3));
-	return -EINVAL;
-
-out_inval:
-	dprintk("svcrdma: failed to parse transport header\n");
-	return -EINVAL;
-}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 27a99bf5b1a6..55ad335bbef1 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -1,4 +1,5 @@
 /*
+ * Copyright (c) 2016, 2017 Oracle. All rights reserved.
  * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved.
  * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
  *
@@ -40,6 +41,7 @@
  * Author: Tom Tucker <tom@opengridcomputing.com>
  */
 
+#include <linux/sunrpc/xdr.h>
 #include <linux/sunrpc/debug.h>
 #include <linux/sunrpc/rpc_rdma.h>
 #include <linux/spinlock.h>
@@ -115,6 +117,130 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
 	rqstp->rq_arg.tail[0].iov_len = 0;
 }
 
+static __be32 *xdr_check_read_list(__be32 *p, __be32 *end)
+{
+	__be32 *next;
+
+	while (*p++ != xdr_zero) {
+		next = p + rpcrdma_readchunk_maxsz - 1;
+		if (next > end)
+			return NULL;
+		p = next;
+	}
+	return p;
+}
+
+static __be32 *xdr_check_write_list(__be32 *p, __be32 *end)
+{
+	__be32 *next;
+
+	while (*p++ != xdr_zero) {
+		next = p + 1 + be32_to_cpup(p) * rpcrdma_segment_maxsz;
+		if (next > end)
+			return NULL;
+		p = next;
+	}
+	return p;
+}
+
+static __be32 *xdr_check_reply_chunk(__be32 *p, __be32 *end)
+{
+	__be32 *next;
+
+	if (*p++ != xdr_zero) {
+		next = p + 1 + be32_to_cpup(p) * rpcrdma_segment_maxsz;
+		if (next > end)
+			return NULL;
+		p = next;
+	}
+	return p;
+}
+
+/* On entry, xdr->head[0].iov_base points to first byte in the
+ * RPC-over-RDMA header.
+ *
+ * On successful exit, head[0] points to first byte past the
+ * RPC-over-RDMA header. For RDMA_MSG, this is the RPC message.
+ * The length of the RPC-over-RDMA header is returned.
+ *
+ * Assumptions:
+ * - The transport header is entirely contained in the head iovec.
+ */
+static int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg)
+{
+	__be32 *p, *end, *rdma_argp;
+	unsigned int hdr_len;
+	char *proc;
+
+	/* Verify that there's enough bytes for header + something */
+	if (rq_arg->len <= RPCRDMA_HDRLEN_ERR)
+		goto out_short;
+
+	rdma_argp = rq_arg->head[0].iov_base;
+	if (*(rdma_argp + 1) != rpcrdma_version)
+		goto out_version;
+
+	switch (*(rdma_argp + 3)) {
+	case rdma_msg:
+		proc = "RDMA_MSG";
+		break;
+	case rdma_nomsg:
+		proc = "RDMA_NOMSG";
+		break;
+
+	case rdma_done:
+		goto out_drop;
+
+	case rdma_error:
+		goto out_drop;
+
+	default:
+		goto out_proc;
+	}
+
+	end = (__be32 *)((unsigned long)rdma_argp + rq_arg->len);
+	p = xdr_check_read_list(rdma_argp + 4, end);
+	if (!p)
+		goto out_inval;
+	p = xdr_check_write_list(p, end);
+	if (!p)
+		goto out_inval;
+	p = xdr_check_reply_chunk(p, end);
+	if (!p)
+		goto out_inval;
+	if (p > end)
+		goto out_inval;
+
+	rq_arg->head[0].iov_base = p;
+	hdr_len = (unsigned long)p - (unsigned long)rdma_argp;
+	rq_arg->head[0].iov_len -= hdr_len;
+	dprintk("svcrdma: received %s request for XID 0x%08x, hdr_len=%u\n",
+		proc, be32_to_cpup(rdma_argp), hdr_len);
+	return hdr_len;
+
+out_short:
+	dprintk("svcrdma: header too short = %d\n", rq_arg->len);
+	return -EINVAL;
+
+out_version:
+	dprintk("svcrdma: bad xprt version: %u\n",
+		be32_to_cpup(rdma_argp + 1));
+	return -EPROTONOSUPPORT;
+
+out_drop:
+	dprintk("svcrdma: dropping RDMA_DONE/ERROR message\n");
+	return 0;
+
+out_proc:
+	dprintk("svcrdma: bad rdma procedure (%u)\n",
+		be32_to_cpup(rdma_argp + 3));
+	return -EINVAL;
+
+out_inval:
+	dprintk("svcrdma: failed to parse transport header\n");
+	return -EINVAL;
+}
+
 /* Issue an RDMA_READ using the local lkey to map the data sink */
 int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
 			struct svc_rqst *rqstp,
-- 
cgit v1.2.3


From 5136f6365ce3eace5a926e10f16ed2a233db5ba9 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 27 Jun 2017 14:30:28 -0400
Subject: cgroup: implement "nsdelegate" mount option

Currently, cgroup only supports delegation to !root users and cgroup
namespaces don't get any special treatments.  This limits the
usefulness of cgroup namespaces as they by themselves can't be safe
delegation boundaries.  A process inside a cgroup can change the
resource control knobs of the parent in the namespace root and may
move processes in and out of the namespace if cgroups outside its
namespace are visible somehow.

This patch adds a new mount option "nsdelegate" which makes cgroup
namespaces delegation boundaries.  If set, cgroup behaves as if write
permission based delegation took place at namespace boundaries -
writes to the resource control knobs from the namespace root are
denied and migration crossing the namespace boundary aren't allowed
from inside the namespace.

This allows cgroup namespace to function as a delegation boundary by
itself.

v2: Silently ignore nsdelegate specified on !init mounts.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Aravind Anbudurai <aru7@fb.com>
Cc: Serge Hallyn <serge@hallyn.com>
Cc: Eric Biederman <ebiederm@xmission.com>
---
 Documentation/cgroup-v2.txt | 61 +++++++++++++++++++++----------
 include/linux/cgroup-defs.h |  9 +++++
 kernel/cgroup/cgroup.c      | 88 ++++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 135 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/cgroup-v2.txt b/Documentation/cgroup-v2.txt
index 0260ed053efd..558c3a739baf 100644
--- a/Documentation/cgroup-v2.txt
+++ b/Documentation/cgroup-v2.txt
@@ -149,6 +149,16 @@ during boot, before manual intervention is possible. To make testing
 and experimenting easier, the kernel parameter cgroup_no_v1= allows
 disabling controllers in v1 and make them always available in v2.
 
+cgroup v2 currently supports the following mount options.
+
+  nsdelegate
+
+	Consider cgroup namespaces as delegation boundaries.  This
+	option is system wide and can only be set on mount or modified
+	through remount from the init namespace.  The mount option is
+	ignored on non-init namespace mounts.  Please refer to the
+	Delegation section for details.
+
 
 2-2. Organizing Processes
 
@@ -308,19 +318,27 @@ file.
 
 2-5-1. Model of Delegation
 
-A cgroup can be delegated to a less privileged user by granting write
-access of the directory and its "cgroup.procs" and
-"cgroup.subtree_control" files to the user.  Note that resource
-control interface files in a given directory control the distribution
-of the parent's resources and thus must not be delegated along with
-the directory.
-
-Once delegated, the user can build sub-hierarchy under the directory,
-organize processes as it sees fit and further distribute the resources
-it received from the parent.  The limits and other settings of all
-resource controllers are hierarchical and regardless of what happens
-in the delegated sub-hierarchy, nothing can escape the resource
-restrictions imposed by the parent.
+A cgroup can be delegated in two ways.  First, to a less privileged
+user by granting write access of the directory and its "cgroup.procs"
+and "cgroup.subtree_control" files to the user.  Second, if the
+"nsdelegate" mount option is set, automatically to a cgroup namespace
+on namespace creation.
+
+Because the resource control interface files in a given directory
+control the distribution of the parent's resources, the delegatee
+shouldn't be allowed to write to them.  For the first method, this is
+achieved by not granting access to these files.  For the second, the
+kernel rejects writes to all files other than "cgroup.procs" and
+"cgroup.subtree_control" on a namespace root from inside the
+namespace.
+
+The end results are equivalent for both delegation types.  Once
+delegated, the user can build sub-hierarchy under the directory,
+organize processes inside it as it sees fit and further distribute the
+resources it received from the parent.  The limits and other settings
+of all resource controllers are hierarchical and regardless of what
+happens in the delegated sub-hierarchy, nothing can escape the
+resource restrictions imposed by the parent.
 
 Currently, cgroup doesn't impose any restrictions on the number of
 cgroups in or nesting depth of a delegated sub-hierarchy; however,
@@ -330,10 +348,12 @@ this may be limited explicitly in the future.
 2-5-2. Delegation Containment
 
 A delegated sub-hierarchy is contained in the sense that processes
-can't be moved into or out of the sub-hierarchy by the delegatee.  For
-a process with a non-root euid to migrate a target process into a
-cgroup by writing its PID to the "cgroup.procs" file, the following
-conditions must be met.
+can't be moved into or out of the sub-hierarchy by the delegatee.
+
+For delegations to a less privileged user, this is achieved by
+requiring the following conditions for a process with a non-root euid
+to migrate a target process into a cgroup by writing its PID to the
+"cgroup.procs" file.
 
 - The writer must have write access to the "cgroup.procs" file.
 
@@ -360,6 +380,11 @@ destination cgroup C00 is above the points of delegation and U0 would
 not have write access to its "cgroup.procs" files and thus the write
 will be denied with -EACCES.
 
+For delegations to namespaces, containment is achieved by requiring
+that both the source and destination cgroups are reachable from the
+namespace of the process which is attempting the migration.  If either
+is not reachable, the migration is rejected with -ENOENT.
+
 
 2-6. Guidelines
 
@@ -1414,7 +1439,7 @@ D. Deprecated v1 Core Features
 
 - Multiple hierarchies including named ones are not supported.
 
-- All mount options and remounting are not supported.
+- All v1 mount options are not supported.
 
 - The "tasks" file is removed and "cgroup.procs" is not sorted.
 
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 3bc4196bf217..09f4c7df1478 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -67,12 +67,21 @@ enum {
 enum {
 	CGRP_ROOT_NOPREFIX	= (1 << 1), /* mounted subsystems have no named prefix */
 	CGRP_ROOT_XATTR		= (1 << 2), /* supports extended attributes */
+
+	/*
+	 * Consider namespaces as delegation boundaries.  If this flag is
+	 * set, controller specific interface files in a namespace root
+	 * aren't writeable from inside the namespace.
+	 */
+	CGRP_ROOT_NS_DELEGATE	= (1 << 3),
 };
 
 /* cftype->flags */
 enum {
 	CFTYPE_ONLY_ON_ROOT	= (1 << 0),	/* only create on root cgrp */
 	CFTYPE_NOT_ON_ROOT	= (1 << 1),	/* don't create on root cgrp */
+	CFTYPE_NS_DELEGATABLE	= (1 << 2),	/* writeable beyond delegation boundaries */
+
 	CFTYPE_NO_PREFIX	= (1 << 3),	/* (DON'T USE FOR NEW FILES) no subsys prefix */
 	CFTYPE_WORLD_WRITABLE	= (1 << 4),	/* (DON'T USE FOR NEW FILES) S_IWUGO */
 
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index d48069ee84c2..620794a20a33 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -1547,10 +1547,56 @@ int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
 	return len;
 }
 
+static int parse_cgroup_root_flags(char *data, unsigned int *root_flags)
+{
+	char *token;
+
+	*root_flags = 0;
+
+	if (!data)
+		return 0;
+
+	while ((token = strsep(&data, ",")) != NULL) {
+		if (!strcmp(token, "nsdelegate")) {
+			*root_flags |= CGRP_ROOT_NS_DELEGATE;
+			continue;
+		}
+
+		pr_err("cgroup2: unknown option \"%s\"\n", token);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void apply_cgroup_root_flags(unsigned int root_flags)
+{
+	if (current->nsproxy->cgroup_ns == &init_cgroup_ns) {
+		if (root_flags & CGRP_ROOT_NS_DELEGATE)
+			cgrp_dfl_root.flags |= CGRP_ROOT_NS_DELEGATE;
+		else
+			cgrp_dfl_root.flags &= ~CGRP_ROOT_NS_DELEGATE;
+	}
+}
+
+static int cgroup_show_options(struct seq_file *seq, struct kernfs_root *kf_root)
+{
+	if (cgrp_dfl_root.flags & CGRP_ROOT_NS_DELEGATE)
+		seq_puts(seq, ",nsdelegate");
+	return 0;
+}
+
 static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data)
 {
-	pr_err("remount is not allowed\n");
-	return -EINVAL;
+	unsigned int root_flags;
+	int ret;
+
+	ret = parse_cgroup_root_flags(data, &root_flags);
+	if (ret)
+		return ret;
+
+	apply_cgroup_root_flags(root_flags);
+	return 0;
 }
 
 /*
@@ -1790,6 +1836,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
 {
 	struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
 	struct dentry *dentry;
+	int ret;
 
 	get_cgroup_ns(ns);
 
@@ -1807,16 +1854,21 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
 		cgroup_enable_task_cg_lists();
 
 	if (fs_type == &cgroup2_fs_type) {
-		if (data) {
-			pr_err("cgroup2: unknown option \"%s\"\n", (char *)data);
+		unsigned int root_flags;
+
+		ret = parse_cgroup_root_flags(data, &root_flags);
+		if (ret) {
 			put_cgroup_ns(ns);
-			return ERR_PTR(-EINVAL);
+			return ERR_PTR(ret);
 		}
+
 		cgrp_dfl_visible = true;
 		cgroup_get_live(&cgrp_dfl_root.cgrp);
 
 		dentry = cgroup_do_mount(&cgroup2_fs_type, flags, &cgrp_dfl_root,
 					 CGROUP2_SUPER_MAGIC, ns);
+		if (!IS_ERR(dentry))
+			apply_cgroup_root_flags(root_flags);
 	} else {
 		dentry = cgroup1_mount(&cgroup_fs_type, flags, data,
 				       CGROUP_SUPER_MAGIC, ns);
@@ -2364,6 +2416,8 @@ static int cgroup_procs_write_permission(struct task_struct *task,
 					 struct kernfs_open_file *of)
 {
 	struct super_block *sb = of->file->f_path.dentry->d_sb;
+	struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
+	struct cgroup *root_cgrp = ns->root_cset->dfl_cgrp;
 	struct cgroup *src_cgrp, *com_cgrp;
 	struct inode *inode;
 	int ret;
@@ -2407,6 +2461,15 @@ static int cgroup_procs_write_permission(struct task_struct *task,
 	if (ret)
 		return ret;
 
+	/*
+	 * If namespaces are delegation boundaries, %current must be able
+	 * to see both source and destination cgroups from its namespace.
+	 */
+	if ((cgrp_dfl_root.flags & CGRP_ROOT_NS_DELEGATE) &&
+	    (!cgroup_is_descendant(src_cgrp, root_cgrp) ||
+	     !cgroup_is_descendant(dst_cgrp, root_cgrp)))
+		return -ENOENT;
+
 	return 0;
 }
 
@@ -2971,11 +3034,23 @@ static void cgroup_file_release(struct kernfs_open_file *of)
 static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf,
 				 size_t nbytes, loff_t off)
 {
+	struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
 	struct cgroup *cgrp = of->kn->parent->priv;
 	struct cftype *cft = of->kn->priv;
 	struct cgroup_subsys_state *css;
 	int ret;
 
+	/*
+	 * If namespaces are delegation boundaries, disallow writes to
+	 * files in an non-init namespace root from inside the namespace
+	 * except for the files explicitly marked delegatable -
+	 * cgroup.procs and cgroup.subtree_control.
+	 */
+	if ((cgrp->root->flags & CGRP_ROOT_NS_DELEGATE) &&
+	    !(cft->flags & CFTYPE_NS_DELEGATABLE) &&
+	    ns != &init_cgroup_ns && ns->root_cset->dfl_cgrp == cgrp)
+		return -EPERM;
+
 	if (cft->write)
 		return cft->write(of, buf, nbytes, off);
 
@@ -3809,6 +3884,7 @@ static int cgroup_procs_show(struct seq_file *s, void *v)
 static struct cftype cgroup_base_files[] = {
 	{
 		.name = "cgroup.procs",
+		.flags = CFTYPE_NS_DELEGATABLE,
 		.file_offset = offsetof(struct cgroup, procs_file),
 		.release = cgroup_procs_release,
 		.seq_start = cgroup_procs_start,
@@ -3822,6 +3898,7 @@ static struct cftype cgroup_base_files[] = {
 	},
 	{
 		.name = "cgroup.subtree_control",
+		.flags = CFTYPE_NS_DELEGATABLE,
 		.seq_show = cgroup_subtree_control_show,
 		.write = cgroup_subtree_control_write,
 	},
@@ -4410,6 +4487,7 @@ int cgroup_rmdir(struct kernfs_node *kn)
 }
 
 static struct kernfs_syscall_ops cgroup_kf_syscall_ops = {
+	.show_options		= cgroup_show_options,
 	.remount_fs		= cgroup_remount,
 	.mkdir			= cgroup_mkdir,
 	.rmdir			= cgroup_rmdir,
-- 
cgit v1.2.3


From a71411dbf6c82ba2eb2519717c04ffb19bc4dda5 Mon Sep 17 00:00:00 2001
From: Michael Grzeschik <m.grzeschik@pengutronix.de>
Date: Fri, 23 Jun 2017 14:35:09 +0200
Subject: regmap: irq: add chip option mask_writeonly

Some irq controllers have writeonly/multipurpose register layouts. In
those cases we read invalid data back. Here we add the option
mask_writeonly as masking option.

Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap-irq.c | 40 +++++++++++++++++++++++++---------------
 include/linux/regmap.h           |  2 ++
 2 files changed, 27 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/regmap/regmap-irq.c b/drivers/base/regmap/regmap-irq.c
index c4a1eadb093f..429ca8ed7e51 100644
--- a/drivers/base/regmap/regmap-irq.c
+++ b/drivers/base/regmap/regmap-irq.c
@@ -60,6 +60,16 @@ static void regmap_irq_lock(struct irq_data *data)
 	mutex_lock(&d->lock);
 }
 
+static int regmap_irq_update_bits(struct regmap_irq_chip_data *d,
+				  unsigned int reg, unsigned int mask,
+				  unsigned int val)
+{
+	if (d->chip->mask_writeonly)
+		return regmap_write_bits(d->map, reg, mask, val);
+	else
+		return regmap_update_bits(d->map, reg, mask, val);
+}
+
 static void regmap_irq_sync_unlock(struct irq_data *data)
 {
 	struct regmap_irq_chip_data *d = irq_data_get_irq_chip_data(data);
@@ -84,11 +94,11 @@ static void regmap_irq_sync_unlock(struct irq_data *data)
 		reg = d->chip->mask_base +
 			(i * map->reg_stride * d->irq_reg_stride);
 		if (d->chip->mask_invert) {
-			ret = regmap_update_bits(d->map, reg,
+			ret = regmap_irq_update_bits(d, reg,
 					 d->mask_buf_def[i], ~d->mask_buf[i]);
 		} else if (d->chip->unmask_base) {
 			/* set mask with mask_base register */
-			ret = regmap_update_bits(d->map, reg,
+			ret = regmap_irq_update_bits(d, reg,
 					d->mask_buf_def[i], ~d->mask_buf[i]);
 			if (ret < 0)
 				dev_err(d->map->dev,
@@ -97,12 +107,12 @@ static void regmap_irq_sync_unlock(struct irq_data *data)
 			unmask_offset = d->chip->unmask_base -
 							d->chip->mask_base;
 			/* clear mask with unmask_base register */
-			ret = regmap_update_bits(d->map,
+			ret = regmap_irq_update_bits(d,
 					reg + unmask_offset,
 					d->mask_buf_def[i],
 					d->mask_buf[i]);
 		} else {
-			ret = regmap_update_bits(d->map, reg,
+			ret = regmap_irq_update_bits(d, reg,
 					 d->mask_buf_def[i], d->mask_buf[i]);
 		}
 		if (ret != 0)
@@ -113,11 +123,11 @@ static void regmap_irq_sync_unlock(struct irq_data *data)
 			(i * map->reg_stride * d->irq_reg_stride);
 		if (d->wake_buf) {
 			if (d->chip->wake_invert)
-				ret = regmap_update_bits(d->map, reg,
+				ret = regmap_irq_update_bits(d, reg,
 							 d->mask_buf_def[i],
 							 ~d->wake_buf[i]);
 			else
-				ret = regmap_update_bits(d->map, reg,
+				ret = regmap_irq_update_bits(d, reg,
 							 d->mask_buf_def[i],
 							 d->wake_buf[i]);
 			if (ret != 0)
@@ -153,10 +163,10 @@ static void regmap_irq_sync_unlock(struct irq_data *data)
 		reg = d->chip->type_base +
 			(i * map->reg_stride * d->type_reg_stride);
 		if (d->chip->type_invert)
-			ret = regmap_update_bits(d->map, reg,
+			ret = regmap_irq_update_bits(d, reg,
 				d->type_buf_def[i], ~d->type_buf[i]);
 		else
-			ret = regmap_update_bits(d->map, reg,
+			ret = regmap_irq_update_bits(d, reg,
 				d->type_buf_def[i], d->type_buf[i]);
 		if (ret != 0)
 			dev_err(d->map->dev, "Failed to sync type in %x\n",
@@ -519,17 +529,17 @@ int regmap_add_irq_chip(struct regmap *map, int irq, int irq_flags,
 		reg = chip->mask_base +
 			(i * map->reg_stride * d->irq_reg_stride);
 		if (chip->mask_invert)
-			ret = regmap_update_bits(map, reg,
+			ret = regmap_irq_update_bits(d, reg,
 					 d->mask_buf[i], ~d->mask_buf[i]);
 		else if (d->chip->unmask_base) {
 			unmask_offset = d->chip->unmask_base -
 					d->chip->mask_base;
-			ret = regmap_update_bits(d->map,
+			ret = regmap_irq_update_bits(d,
 					reg + unmask_offset,
 					d->mask_buf[i],
 					d->mask_buf[i]);
 		} else
-			ret = regmap_update_bits(map, reg,
+			ret = regmap_irq_update_bits(d, reg,
 					 d->mask_buf[i], d->mask_buf[i]);
 		if (ret != 0) {
 			dev_err(map->dev, "Failed to set masks in 0x%x: %d\n",
@@ -575,11 +585,11 @@ int regmap_add_irq_chip(struct regmap *map, int irq, int irq_flags,
 				(i * map->reg_stride * d->irq_reg_stride);
 
 			if (chip->wake_invert)
-				ret = regmap_update_bits(map, reg,
+				ret = regmap_irq_update_bits(d, reg,
 							 d->mask_buf_def[i],
 							 0);
 			else
-				ret = regmap_update_bits(map, reg,
+				ret = regmap_irq_update_bits(d, reg,
 							 d->mask_buf_def[i],
 							 d->wake_buf[i]);
 			if (ret != 0) {
@@ -603,10 +613,10 @@ int regmap_add_irq_chip(struct regmap *map, int irq, int irq_flags,
 			reg = chip->type_base +
 				(i * map->reg_stride * d->type_reg_stride);
 			if (chip->type_invert)
-				ret = regmap_update_bits(map, reg,
+				ret = regmap_irq_update_bits(d, reg,
 					d->type_buf_def[i], 0xFF);
 			else
-				ret = regmap_update_bits(map, reg,
+				ret = regmap_irq_update_bits(d, reg,
 					d->type_buf_def[i], 0x0);
 			if (ret != 0) {
 				dev_err(map->dev,
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index e88649225a60..400172b59f24 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -884,6 +884,7 @@ struct regmap_irq {
  *
  * @status_base: Base status register address.
  * @mask_base:   Base mask register address.
+ * @mask_writeonly: Base mask register is write only.
  * @unmask_base:  Base unmask register address. for chips who have
  *                separate mask and unmask registers
  * @ack_base:    Base ack address. If zero then the chip is clear on read.
@@ -927,6 +928,7 @@ struct regmap_irq_chip {
 	unsigned int wake_base;
 	unsigned int type_base;
 	unsigned int irq_reg_stride;
+	bool mask_writeonly:1;
 	bool init_ack_masked:1;
 	bool mask_invert:1;
 	bool use_ack:1;
-- 
cgit v1.2.3


From 5d6dee80a1e94cc284d03e06d930e60e8d3ecf7d Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Wed, 28 Jun 2017 13:50:05 -0600
Subject: vfio: New external user group/file match

At the point where the kvm-vfio pseudo device wants to release its
vfio group reference, we can't always acquire a new reference to make
that happen.  The group can be in a state where we wouldn't allow a
new reference to be added.  This new helper function allows a caller
to match a file to a group to facilitate this.  Given a file and
group, report if they match.  Thus the caller needs to already have a
group reference to match to the file.  This allows the deletion of a
group without acquiring a new reference.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Tested-by: Eric Auger <eric.auger@redhat.com>
Cc: stable@vger.kernel.org
---
 drivers/vfio/vfio.c  |  9 +++++++++
 include/linux/vfio.h |  2 ++
 virt/kvm/vfio.c      | 27 +++++++++++++++++++--------
 3 files changed, 30 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 54dd2fbf83d9..7597a377eb4e 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -1776,6 +1776,15 @@ void vfio_group_put_external_user(struct vfio_group *group)
 }
 EXPORT_SYMBOL_GPL(vfio_group_put_external_user);
 
+bool vfio_external_group_match_file(struct vfio_group *test_group,
+				    struct file *filep)
+{
+	struct vfio_group *group = filep->private_data;
+
+	return (filep->f_op == &vfio_group_fops) && (group == test_group);
+}
+EXPORT_SYMBOL_GPL(vfio_external_group_match_file);
+
 int vfio_external_user_iommu_id(struct vfio_group *group)
 {
 	return iommu_group_id(group->iommu_group);
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index edf9b2cad277..9b34d0af5d27 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -97,6 +97,8 @@ extern void vfio_unregister_iommu_driver(
  */
 extern struct vfio_group *vfio_group_get_external_user(struct file *filep);
 extern void vfio_group_put_external_user(struct vfio_group *group);
+extern bool vfio_external_group_match_file(struct vfio_group *group,
+					   struct file *filep);
 extern int vfio_external_user_iommu_id(struct vfio_group *group);
 extern long vfio_external_check_extension(struct vfio_group *group,
 					  unsigned long arg);
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
index 6e002d0f3191..d99850c462a1 100644
--- a/virt/kvm/vfio.c
+++ b/virt/kvm/vfio.c
@@ -51,6 +51,22 @@ static struct vfio_group *kvm_vfio_group_get_external_user(struct file *filep)
 	return vfio_group;
 }
 
+static bool kvm_vfio_external_group_match_file(struct vfio_group *group,
+					       struct file *filep)
+{
+	bool ret, (*fn)(struct vfio_group *, struct file *);
+
+	fn = symbol_get(vfio_external_group_match_file);
+	if (!fn)
+		return false;
+
+	ret = fn(group, filep);
+
+	symbol_put(vfio_external_group_match_file);
+
+	return ret;
+}
+
 static void kvm_vfio_group_put_external_user(struct vfio_group *vfio_group)
 {
 	void (*fn)(struct vfio_group *);
@@ -231,18 +247,13 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg)
 		if (!f.file)
 			return -EBADF;
 
-		vfio_group = kvm_vfio_group_get_external_user(f.file);
-		fdput(f);
-
-		if (IS_ERR(vfio_group))
-			return PTR_ERR(vfio_group);
-
 		ret = -ENOENT;
 
 		mutex_lock(&kv->lock);
 
 		list_for_each_entry(kvg, &kv->group_list, node) {
-			if (kvg->vfio_group != vfio_group)
+			if (!kvm_vfio_external_group_match_file(kvg->vfio_group,
+								f.file))
 				continue;
 
 			list_del(&kvg->node);
@@ -260,7 +271,7 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg)
 
 		mutex_unlock(&kv->lock);
 
-		kvm_vfio_group_put_external_user(vfio_group);
+		fdput(f);
 
 		kvm_vfio_update_coherency(dev);
 
-- 
cgit v1.2.3


From dff79b91b8f3279cbe60727368adff1f3a5ab16e Mon Sep 17 00:00:00 2001
From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Date: Wed, 28 Jun 2017 15:13:52 -0500
Subject: PCI: Add pci_free_host_bridge() interface

Commit a52d1443bba1 ("PCI: Export host bridge registration interface")
exported the pci_alloc_host_bridge() interface so that PCI host controllers
drivers can make use of it.

Introduce pci_alloc_host_bridge() kernel counterpart to free the host
bridge data structures, pci_free_host_bridge(), export it and update kernel
functions releasing host bridge objects allocated memory to make use of it.

Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: Arnd Bergmann <arnd@arndb.de>
---
 drivers/pci/probe.c | 12 +++++++++---
 include/linux/pci.h |  1 +
 2 files changed, 10 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 586d83d8be4d..cbf0d0c1b009 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -517,9 +517,7 @@ static void pci_release_host_bridge_dev(struct device *dev)
 	if (bridge->release_fn)
 		bridge->release_fn(bridge);
 
-	pci_free_resource_list(&bridge->windows);
-
-	kfree(bridge);
+	pci_free_host_bridge(bridge);
 }
 
 struct pci_host_bridge *pci_alloc_host_bridge(size_t priv)
@@ -537,6 +535,14 @@ struct pci_host_bridge *pci_alloc_host_bridge(size_t priv)
 }
 EXPORT_SYMBOL(pci_alloc_host_bridge);
 
+void pci_free_host_bridge(struct pci_host_bridge *bridge)
+{
+	pci_free_resource_list(&bridge->windows);
+
+	kfree(bridge);
+}
+EXPORT_SYMBOL(pci_free_host_bridge);
+
 static const unsigned char pcix_bus_speed[] = {
 	PCI_SPEED_UNKNOWN,		/* 0 */
 	PCI_SPEED_66MHz_PCIX,		/* 1 */
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 33c2b0b77429..9095b38c2fa3 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -458,6 +458,7 @@ static inline struct pci_host_bridge *pci_host_bridge_from_priv(void *priv)
 }
 
 struct pci_host_bridge *pci_alloc_host_bridge(size_t priv);
+void pci_free_host_bridge(struct pci_host_bridge *bridge);
 int pci_register_host_bridge(struct pci_host_bridge *bridge);
 struct pci_host_bridge *pci_find_host_bridge(struct pci_bus *bus);
 
-- 
cgit v1.2.3


From 5c3f18cce08364ef68163228c0b42725d64cd353 Mon Sep 17 00:00:00 2001
From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Date: Wed, 28 Jun 2017 15:13:53 -0500
Subject: PCI: Add devm_pci_alloc_host_bridge() interface

Struct pci_host_bridge can be allocated by PCI host bridge drivers which
usually allocate and map memory through devm managed interfaces.

Add a devm version for the pci_alloc_host_bridge() interface to simplify
PCI host controller driver porting and simplify the driver failure paths.

Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: Arnd Bergmann <arnd@arndb.de>
---
 Documentation/driver-model/devres.txt |  1 +
 drivers/pci/probe.c                   | 24 ++++++++++++++++++++++--
 include/linux/pci.h                   |  2 ++
 3 files changed, 25 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt
index e72587fe477d..ffbc8913cc76 100644
--- a/Documentation/driver-model/devres.txt
+++ b/Documentation/driver-model/devres.txt
@@ -342,6 +342,7 @@ PER-CPU MEM
   devm_free_percpu()
 
 PCI
+  devm_pci_alloc_host_bridge()  : managed PCI host bridge allocation
   devm_pci_remap_cfgspace()	: ioremap PCI configuration space
   devm_pci_remap_cfg_resource()	: ioremap PCI configuration space resource
   pcim_enable_device()		: after success, all PCI ops become managed
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index cbf0d0c1b009..e3f69655ca87 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -510,14 +510,18 @@ static struct pci_bus *pci_alloc_bus(struct pci_bus *parent)
 	return b;
 }
 
-static void pci_release_host_bridge_dev(struct device *dev)
+static void devm_pci_release_host_bridge_dev(struct device *dev)
 {
 	struct pci_host_bridge *bridge = to_pci_host_bridge(dev);
 
 	if (bridge->release_fn)
 		bridge->release_fn(bridge);
+}
 
-	pci_free_host_bridge(bridge);
+static void pci_release_host_bridge_dev(struct device *dev)
+{
+	devm_pci_release_host_bridge_dev(dev);
+	pci_free_host_bridge(to_pci_host_bridge(dev));
 }
 
 struct pci_host_bridge *pci_alloc_host_bridge(size_t priv)
@@ -535,6 +539,22 @@ struct pci_host_bridge *pci_alloc_host_bridge(size_t priv)
 }
 EXPORT_SYMBOL(pci_alloc_host_bridge);
 
+struct pci_host_bridge *devm_pci_alloc_host_bridge(struct device *dev,
+						   size_t priv)
+{
+	struct pci_host_bridge *bridge;
+
+	bridge = devm_kzalloc(dev, sizeof(*bridge) + priv, GFP_KERNEL);
+	if (!bridge)
+		return NULL;
+
+	INIT_LIST_HEAD(&bridge->windows);
+	bridge->dev.release = devm_pci_release_host_bridge_dev;
+
+	return bridge;
+}
+EXPORT_SYMBOL(devm_pci_alloc_host_bridge);
+
 void pci_free_host_bridge(struct pci_host_bridge *bridge)
 {
 	pci_free_resource_list(&bridge->windows);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 9095b38c2fa3..d39a66dc67aa 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -458,6 +458,8 @@ static inline struct pci_host_bridge *pci_host_bridge_from_priv(void *priv)
 }
 
 struct pci_host_bridge *pci_alloc_host_bridge(size_t priv);
+struct pci_host_bridge *devm_pci_alloc_host_bridge(struct device *dev,
+						   size_t priv);
 void pci_free_host_bridge(struct pci_host_bridge *bridge);
 int pci_register_host_bridge(struct pci_host_bridge *bridge);
 struct pci_host_bridge *pci_find_host_bridge(struct pci_bus *bus);
-- 
cgit v1.2.3


From 1228c4b6c19a76a2691cfb1403ad1eebf5852b76 Mon Sep 17 00:00:00 2001
From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Date: Wed, 28 Jun 2017 15:13:55 -0500
Subject: PCI: Add pci_scan_root_bus_bridge() interface

The current pci_scan_root_bus() interface is made up of two main code
paths:

  - pci_create_root_bus()
  - pci_scan_child_bus()

pci_create_root_bus() is a wrapper function that allows to create a struct
pci_host_bridge structure, initialize it with the passed parameters and
register it with the kernel.

As the struct pci_host_bridge require additional struct members,
pci_create_root_bus() parameters list has grown in time, making it unwieldy
to add further parameters to it in case the struct pci_host_bridge gains
more members fields to augment its functionality.

Since PCI core code provides functions to allocate struct pci_host_bridge,
instead of forcing the pci_create_root_bus() interface to add new
parameters to cater for new struct pci_host_bridge functionality, it is
more suitable to add an interface in PCI core code to scan a PCI bus
straight from a struct pci_host_bridge created and customized by each
specific PCI host controller driver.

Add a pci_scan_root_bus_bridge() function to allow PCI host controller
drivers to create and initialize struct pci_host_bridge and scan the
resulting bus.

Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: Arnd Bergmann <arnd@arndb.de>
---
 drivers/pci/probe.c | 39 +++++++++++++++++++++++++++++++++++++++
 include/linux/pci.h |  1 +
 2 files changed, 40 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index e3f69655ca87..690f0b377a0f 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -2426,6 +2426,45 @@ void pci_bus_release_busn_res(struct pci_bus *b)
 			res, ret ? "can not be" : "is");
 }
 
+int pci_scan_root_bus_bridge(struct pci_host_bridge *bridge)
+{
+	struct resource_entry *window;
+	bool found = false;
+	struct pci_bus *b;
+	int max, bus, ret;
+
+	if (!bridge)
+		return -EINVAL;
+
+	resource_list_for_each_entry(window, &bridge->windows)
+		if (window->res->flags & IORESOURCE_BUS) {
+			found = true;
+			break;
+		}
+
+	ret = pci_register_host_bridge(bridge);
+	if (ret < 0)
+		return ret;
+
+	b = bridge->bus;
+	bus = bridge->busnr;
+
+	if (!found) {
+		dev_info(&b->dev,
+		 "No busn resource found for root bus, will use [bus %02x-ff]\n",
+			bus);
+		pci_bus_insert_busn_res(b, bus, 255);
+	}
+
+	max = pci_scan_child_bus(b);
+
+	if (!found)
+		pci_bus_update_busn_res_end(b, max);
+
+	return 0;
+}
+EXPORT_SYMBOL(pci_scan_root_bus_bridge);
+
 struct pci_bus *pci_scan_root_bus_msi(struct device *parent, int bus,
 		struct pci_ops *ops, void *sysdata,
 		struct list_head *resources, struct msi_controller *msi)
diff --git a/include/linux/pci.h b/include/linux/pci.h
index d39a66dc67aa..fe1eafd637c3 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -857,6 +857,7 @@ struct pci_bus *pci_scan_root_bus_msi(struct device *parent, int bus,
 struct pci_bus *pci_scan_root_bus(struct device *parent, int bus,
 					     struct pci_ops *ops, void *sysdata,
 					     struct list_head *resources);
+int pci_scan_root_bus_bridge(struct pci_host_bridge *bridge);
 struct pci_bus *pci_add_new_bus(struct pci_bus *parent, struct pci_dev *dev,
 				int busnr);
 void pcie_update_link_speed(struct pci_bus *bus, u16 link_status);
-- 
cgit v1.2.3


From cea9bc0be624fb0dc488cb10df40be1323b6b758 Mon Sep 17 00:00:00 2001
From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Date: Wed, 28 Jun 2017 15:13:55 -0500
Subject: PCI: Make pci_register_host_bridge() PCI core internal

With the introduction of pci_scan_root_bus_bridge() there is no need to
export pci_register_host_bridge() to other kernel subsystems other than the
PCI compilation unit that needs it.

Make pci_register_host_bridge() static to its compilation unit and convert
the existing drivers usage over to pci_scan_root_bus_bridge().

Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: Arnd Bergmann <arnd@arndb.de>
---
 drivers/pci/host/pci-ftpci100.c | 5 ++---
 drivers/pci/host/pci-tegra.c    | 4 +---
 drivers/pci/probe.c             | 3 +--
 include/linux/pci.h             | 1 -
 4 files changed, 4 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/host/pci-ftpci100.c b/drivers/pci/host/pci-ftpci100.c
index ce5700eec1da..2ee2ffb0a50b 100644
--- a/drivers/pci/host/pci-ftpci100.c
+++ b/drivers/pci/host/pci-ftpci100.c
@@ -527,14 +527,13 @@ static int faraday_pci_probe(struct platform_device *pdev)
 		return ret;
 
 	list_splice_init(&res, &host->windows);
-	ret = pci_register_host_bridge(host);
+	ret = pci_scan_root_bus_bridge(host);
 	if (ret) {
-		dev_err(dev, "failed to register host: %d\n", ret);
+		dev_err(dev, "failed to scan host: %d\n", ret);
 		return ret;
 	}
 	p->bus = host->bus;
 
-	pci_scan_child_bus(p->bus);
 	pci_fixup_irqs(pci_common_swizzle, of_irq_parse_and_map_pci);
 	pci_bus_assign_resources(p->bus);
 	pci_bus_add_devices(p->bus);
diff --git a/drivers/pci/host/pci-tegra.c b/drivers/pci/host/pci-tegra.c
index 84c98a2bffeb..0383418457be 100644
--- a/drivers/pci/host/pci-tegra.c
+++ b/drivers/pci/host/pci-tegra.c
@@ -2285,14 +2285,12 @@ static int tegra_pcie_probe(struct platform_device *pdev)
 	host->dev.parent = &pdev->dev;
 	host->ops = &tegra_pcie_ops;
 
-	err = pci_register_host_bridge(host);
+	err = pci_scan_root_bus_bridge(host);
 	if (err < 0) {
 		dev_err(dev, "failed to register host: %d\n", err);
 		goto disable_msi;
 	}
 
-	pci_scan_child_bus(host->bus);
-
 	pci_fixup_irqs(pci_common_swizzle, tegra_pcie_map_irq);
 	pci_bus_size_bridges(host->bus);
 	pci_bus_assign_resources(host->bus);
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 690f0b377a0f..5c457c17cf5c 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -746,7 +746,7 @@ static void pci_set_bus_msi_domain(struct pci_bus *bus)
 	dev_set_msi_domain(&bus->dev, d);
 }
 
-int pci_register_host_bridge(struct pci_host_bridge *bridge)
+static int pci_register_host_bridge(struct pci_host_bridge *bridge)
 {
 	struct device *parent = bridge->dev.parent;
 	struct resource_entry *window, *n;
@@ -861,7 +861,6 @@ free:
 	kfree(bus);
 	return err;
 }
-EXPORT_SYMBOL(pci_register_host_bridge);
 
 static struct pci_bus *pci_alloc_child_bus(struct pci_bus *parent,
 					   struct pci_dev *bridge, int busnr)
diff --git a/include/linux/pci.h b/include/linux/pci.h
index fe1eafd637c3..b56dc13f47c2 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -461,7 +461,6 @@ struct pci_host_bridge *pci_alloc_host_bridge(size_t priv);
 struct pci_host_bridge *devm_pci_alloc_host_bridge(struct device *dev,
 						   size_t priv);
 void pci_free_host_bridge(struct pci_host_bridge *bridge);
-int pci_register_host_bridge(struct pci_host_bridge *bridge);
 struct pci_host_bridge *pci_find_host_bridge(struct pci_bus *bus);
 
 void pci_set_host_bridge_release(struct pci_host_bridge *bridge,
-- 
cgit v1.2.3


From 4b855ad37194f7bdbb200ce7a1c7051fecb56a08 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 26 Jun 2017 12:20:57 +0200
Subject: blk-mq: Create hctx for each present CPU

Currently we only create hctx for online CPUs, which can lead to a lot
of churn due to frequent soft offline / online operations.  Instead
allocate one for each present CPU to avoid this and dramatically simplify
the code.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Cc: Keith Busch <keith.busch@intel.com>
Cc: linux-block@vger.kernel.org
Cc: linux-nvme@lists.infradead.org
Link: http://lkml.kernel.org/r/20170626102058.10200-3-hch@lst.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 block/blk-mq.c             | 120 +++++----------------------------------------
 block/blk-mq.h             |   5 --
 include/linux/cpuhotplug.h |   1 -
 3 files changed, 11 insertions(+), 115 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index bb66c96850b1..dd390e27824d 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -37,9 +37,6 @@
 #include "blk-wbt.h"
 #include "blk-mq-sched.h"
 
-static DEFINE_MUTEX(all_q_mutex);
-static LIST_HEAD(all_q_list);
-
 static void blk_mq_poll_stats_start(struct request_queue *q);
 static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb);
 static void __blk_mq_stop_hw_queues(struct request_queue *q, bool sync);
@@ -1975,8 +1972,8 @@ static void blk_mq_init_cpu_queues(struct request_queue *q,
 		INIT_LIST_HEAD(&__ctx->rq_list);
 		__ctx->queue = q;
 
-		/* If the cpu isn't online, the cpu is mapped to first hctx */
-		if (!cpu_online(i))
+		/* If the cpu isn't present, the cpu is mapped to first hctx */
+		if (!cpu_present(i))
 			continue;
 
 		hctx = blk_mq_map_queue(q, i);
@@ -2019,8 +2016,7 @@ static void blk_mq_free_map_and_requests(struct blk_mq_tag_set *set,
 	}
 }
 
-static void blk_mq_map_swqueue(struct request_queue *q,
-			       const struct cpumask *online_mask)
+static void blk_mq_map_swqueue(struct request_queue *q)
 {
 	unsigned int i, hctx_idx;
 	struct blk_mq_hw_ctx *hctx;
@@ -2038,13 +2034,11 @@ static void blk_mq_map_swqueue(struct request_queue *q,
 	}
 
 	/*
-	 * Map software to hardware queues
+	 * Map software to hardware queues.
+	 *
+	 * If the cpu isn't present, the cpu is mapped to first hctx.
 	 */
-	for_each_possible_cpu(i) {
-		/* If the cpu isn't online, the cpu is mapped to first hctx */
-		if (!cpumask_test_cpu(i, online_mask))
-			continue;
-
+	for_each_present_cpu(i) {
 		hctx_idx = q->mq_map[i];
 		/* unmapped hw queue can be remapped after CPU topo changed */
 		if (!set->tags[hctx_idx] &&
@@ -2330,16 +2324,8 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 		blk_queue_softirq_done(q, set->ops->complete);
 
 	blk_mq_init_cpu_queues(q, set->nr_hw_queues);
-
-	get_online_cpus();
-	mutex_lock(&all_q_mutex);
-
-	list_add_tail(&q->all_q_node, &all_q_list);
 	blk_mq_add_queue_tag_set(set, q);
-	blk_mq_map_swqueue(q, cpu_online_mask);
-
-	mutex_unlock(&all_q_mutex);
-	put_online_cpus();
+	blk_mq_map_swqueue(q);
 
 	if (!(set->flags & BLK_MQ_F_NO_SCHED)) {
 		int ret;
@@ -2365,18 +2351,12 @@ void blk_mq_free_queue(struct request_queue *q)
 {
 	struct blk_mq_tag_set	*set = q->tag_set;
 
-	mutex_lock(&all_q_mutex);
-	list_del_init(&q->all_q_node);
-	mutex_unlock(&all_q_mutex);
-
 	blk_mq_del_queue_tag_set(q);
-
 	blk_mq_exit_hw_queues(q, set, set->nr_hw_queues);
 }
 
 /* Basically redo blk_mq_init_queue with queue frozen */
-static void blk_mq_queue_reinit(struct request_queue *q,
-				const struct cpumask *online_mask)
+static void blk_mq_queue_reinit(struct request_queue *q)
 {
 	WARN_ON_ONCE(!atomic_read(&q->mq_freeze_depth));
 
@@ -2389,76 +2369,12 @@ static void blk_mq_queue_reinit(struct request_queue *q,
 	 * involves free and re-allocate memory, worthy doing?)
 	 */
 
-	blk_mq_map_swqueue(q, online_mask);
+	blk_mq_map_swqueue(q);
 
 	blk_mq_sysfs_register(q);
 	blk_mq_debugfs_register_hctxs(q);
 }
 
-/*
- * New online cpumask which is going to be set in this hotplug event.
- * Declare this cpumasks as global as cpu-hotplug operation is invoked
- * one-by-one and dynamically allocating this could result in a failure.
- */
-static struct cpumask cpuhp_online_new;
-
-static void blk_mq_queue_reinit_work(void)
-{
-	struct request_queue *q;
-
-	mutex_lock(&all_q_mutex);
-	/*
-	 * We need to freeze and reinit all existing queues.  Freezing
-	 * involves synchronous wait for an RCU grace period and doing it
-	 * one by one may take a long time.  Start freezing all queues in
-	 * one swoop and then wait for the completions so that freezing can
-	 * take place in parallel.
-	 */
-	list_for_each_entry(q, &all_q_list, all_q_node)
-		blk_freeze_queue_start(q);
-	list_for_each_entry(q, &all_q_list, all_q_node)
-		blk_mq_freeze_queue_wait(q);
-
-	list_for_each_entry(q, &all_q_list, all_q_node)
-		blk_mq_queue_reinit(q, &cpuhp_online_new);
-
-	list_for_each_entry(q, &all_q_list, all_q_node)
-		blk_mq_unfreeze_queue(q);
-
-	mutex_unlock(&all_q_mutex);
-}
-
-static int blk_mq_queue_reinit_dead(unsigned int cpu)
-{
-	cpumask_copy(&cpuhp_online_new, cpu_online_mask);
-	blk_mq_queue_reinit_work();
-	return 0;
-}
-
-/*
- * Before hotadded cpu starts handling requests, new mappings must be
- * established.  Otherwise, these requests in hw queue might never be
- * dispatched.
- *
- * For example, there is a single hw queue (hctx) and two CPU queues (ctx0
- * for CPU0, and ctx1 for CPU1).
- *
- * Now CPU1 is just onlined and a request is inserted into ctx1->rq_list
- * and set bit0 in pending bitmap as ctx1->index_hw is still zero.
- *
- * And then while running hw queue, blk_mq_flush_busy_ctxs() finds bit0 is set
- * in pending bitmap and tries to retrieve requests in hctx->ctxs[0]->rq_list.
- * But htx->ctxs[0] is a pointer to ctx0, so the request in ctx1->rq_list is
- * ignored.
- */
-static int blk_mq_queue_reinit_prepare(unsigned int cpu)
-{
-	cpumask_copy(&cpuhp_online_new, cpu_online_mask);
-	cpumask_set_cpu(cpu, &cpuhp_online_new);
-	blk_mq_queue_reinit_work();
-	return 0;
-}
-
 static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
 {
 	int i;
@@ -2669,7 +2585,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
 	blk_mq_update_queue_map(set);
 	list_for_each_entry(q, &set->tag_list, tag_set_list) {
 		blk_mq_realloc_hw_ctxs(set, q);
-		blk_mq_queue_reinit(q, cpu_online_mask);
+		blk_mq_queue_reinit(q);
 	}
 
 	list_for_each_entry(q, &set->tag_list, tag_set_list)
@@ -2885,24 +2801,10 @@ bool blk_mq_poll(struct request_queue *q, blk_qc_t cookie)
 }
 EXPORT_SYMBOL_GPL(blk_mq_poll);
 
-void blk_mq_disable_hotplug(void)
-{
-	mutex_lock(&all_q_mutex);
-}
-
-void blk_mq_enable_hotplug(void)
-{
-	mutex_unlock(&all_q_mutex);
-}
-
 static int __init blk_mq_init(void)
 {
 	cpuhp_setup_state_multi(CPUHP_BLK_MQ_DEAD, "block/mq:dead", NULL,
 				blk_mq_hctx_notify_dead);
-
-	cpuhp_setup_state_nocalls(CPUHP_BLK_MQ_PREPARE, "block/mq:prepare",
-				  blk_mq_queue_reinit_prepare,
-				  blk_mq_queue_reinit_dead);
 	return 0;
 }
 subsys_initcall(blk_mq_init);
diff --git a/block/blk-mq.h b/block/blk-mq.h
index cc67b48e3551..558df56544d2 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -56,11 +56,6 @@ void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
 				bool at_head);
 void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
 				struct list_head *list);
-/*
- * CPU hotplug helpers
- */
-void blk_mq_enable_hotplug(void);
-void blk_mq_disable_hotplug(void);
 
 /*
  * CPU -> queue mappings
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index c15f22c54535..7f815d915977 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -58,7 +58,6 @@ enum cpuhp_state {
 	CPUHP_XEN_EVTCHN_PREPARE,
 	CPUHP_ARM_SHMOBILE_SCU_PREPARE,
 	CPUHP_SH_SH3X_PREPARE,
-	CPUHP_BLK_MQ_PREPARE,
 	CPUHP_NET_FLOW_PREPARE,
 	CPUHP_TOPOLOGY_PREPARE,
 	CPUHP_NET_IUCV_PREPARE,
-- 
cgit v1.2.3


From 9ae3b3f52c62ddd5eb12c57f195f4f38121faa01 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 28 Jun 2017 15:30:13 -0600
Subject: block: provide bio_uninit() free freeing integrity/task associations

Wen reports significant memory leaks with DIF and O_DIRECT:

"With nvme devive + T10 enabled, On a system it has 256GB and started
logging /proc/meminfo & /proc/slabinfo for every minute and in an hour
it increased by 15968128 kB or ~15+GB.. Approximately 256 MB / minute
leaking.

/proc/meminfo | grep SUnreclaim...

SUnreclaim:      6752128 kB
SUnreclaim:      6874880 kB
SUnreclaim:      7238080 kB
....
SUnreclaim:     22307264 kB
SUnreclaim:     22485888 kB
SUnreclaim:     22720256 kB

When testcases with T10 enabled call into __blkdev_direct_IO_simple,
code doesn't free memory allocated by bio_integrity_alloc. The patch
fixes the issue. HTX has been run with +60 hours without failure."

Since __blkdev_direct_IO_simple() allocates the bio on the stack, it
doesn't go through the regular bio free. This means that any ancillary
data allocated with the bio through the stack is not freed. Hence, we
can leak the integrity data associated with the bio, if the device is
using DIF/DIX.

Fix this by providing a bio_uninit() and export it, so that we can use
it to free this data. Note that this is a minimal fix for this issue.
Any current user of bio's that are allocated outside of
bio_alloc_bioset() suffers from this issue, most notably some drivers.
We will fix those in a more comprehensive patch for 4.13. This also
means that the commit marked as being fixed by this isn't the real
culprit, it's just the most obvious one out there.

Fixes: 542ff7bf18c6 ("block: new direct I/O implementation")
Reported-by: Wen Xiong <wenxiong@linux.vnet.ibm.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c         | 12 +++++++++---
 fs/block_dev.c      |  5 ++++-
 include/linux/bio.h |  1 +
 3 files changed, 14 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/block/bio.c b/block/bio.c
index 888e7801c638..26b0810fb8ea 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -240,20 +240,21 @@ fallback:
 	return bvl;
 }
 
-static void __bio_free(struct bio *bio)
+void bio_uninit(struct bio *bio)
 {
 	bio_disassociate_task(bio);
 
 	if (bio_integrity(bio))
 		bio_integrity_free(bio);
 }
+EXPORT_SYMBOL(bio_uninit);
 
 static void bio_free(struct bio *bio)
 {
 	struct bio_set *bs = bio->bi_pool;
 	void *p;
 
-	__bio_free(bio);
+	bio_uninit(bio);
 
 	if (bs) {
 		bvec_free(bs->bvec_pool, bio->bi_io_vec, BVEC_POOL_IDX(bio));
@@ -271,6 +272,11 @@ static void bio_free(struct bio *bio)
 	}
 }
 
+/*
+ * Users of this function have their own bio allocation. Subsequently,
+ * they must remember to pair any call to bio_init() with bio_uninit()
+ * when IO has completed, or when the bio is released.
+ */
 void bio_init(struct bio *bio, struct bio_vec *table,
 	      unsigned short max_vecs)
 {
@@ -297,7 +303,7 @@ void bio_reset(struct bio *bio)
 {
 	unsigned long flags = bio->bi_flags & (~0UL << BIO_RESET_BITS);
 
-	__bio_free(bio);
+	bio_uninit(bio);
 
 	memset(bio, 0, BIO_RESET_BYTES);
 	bio->bi_flags = flags;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 519599dddd36..0a7404ef9335 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -263,7 +263,10 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
 		kfree(vecs);
 
 	if (unlikely(bio.bi_error))
-		return bio.bi_error;
+		ret = bio.bi_error;
+
+	bio_uninit(&bio);
+
 	return ret;
 }
 
diff --git a/include/linux/bio.h b/include/linux/bio.h
index d1b04b0e99cf..a7e29fa0981f 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -426,6 +426,7 @@ extern void bio_advance(struct bio *, unsigned);
 
 extern void bio_init(struct bio *bio, struct bio_vec *table,
 		     unsigned short max_vecs);
+extern void bio_uninit(struct bio *);
 extern void bio_reset(struct bio *);
 void bio_chain(struct bio *, struct bio *);
 
-- 
cgit v1.2.3


From 0607512d0a8d7fac86667466b884095e04b10a59 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 27 Jun 2017 17:35:41 +0200
Subject: ras: mark stub functions as 'inline'

With CONFIG_RAS disabled, we get two harmless warnings about
unused functions:

include/linux/ras.h:37:13: error: 'log_arm_hw_error' defined but not used [-Werror=unused-function]
 static void log_arm_hw_error(struct cper_sec_proc_arm *err) { return; }
include/linux/ras.h:33:13: error: 'log_non_standard_event' defined but not used [-Werror=unused-function]
 static void log_non_standard_event(const guid_t *sec_type,

Clearly these are meant to be 'inline', like the other stubs
in the same header.

Fixes: 297b64c74385 ("ras: acpi / apei: generate trace event for unrecognized CPER section")
Fixes: e9279e83ad1f ("trace, ras: add ARM processor error trace event")
Acked-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 include/linux/ras.h | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ras.h b/include/linux/ras.h
index 7d61863ff265..be5338a35d57 100644
--- a/include/linux/ras.h
+++ b/include/linux/ras.h
@@ -30,11 +30,13 @@ void log_non_standard_event(const guid_t *sec_type,
 			    const u8 sev, const u8 *err, const u32 len);
 void log_arm_hw_error(struct cper_sec_proc_arm *err);
 #else
-static void log_non_standard_event(const guid_t *sec_type,
-				   const guid_t *fru_id, const char *fru_text,
-				   const u8 sev, const u8 *err,
-				   const u32 len) { return; }
-static void log_arm_hw_error(struct cper_sec_proc_arm *err) { return; }
+static inline void
+log_non_standard_event(const guid_t *sec_type,
+		       const guid_t *fru_id, const char *fru_text,
+		       const u8 sev, const u8 *err, const u32 len)
+{ return; }
+static inline void
+log_arm_hw_error(struct cper_sec_proc_arm *err) { return; }
 #endif
 
 #endif /* __RAS_H__ */
-- 
cgit v1.2.3


From 425562429d4f3b134c7390249c23a3f647aad199 Mon Sep 17 00:00:00 2001
From: Jacopo Mondi <jacopo+renesas@jmondi.org>
Date: Thu, 22 Jun 2017 12:00:58 +0200
Subject: pinctrl: generic: Add output-enable property

Add output-enable generic pin configuration property.
This properties allows enabling/disabling pin's output capabilities
without actually driving any value on the line.

Acked-by: Rob Herring <robh@kernel.org>
[Added inline elaborations on buffer enabling/disabling]
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 .../devicetree/bindings/pinctrl/pinctrl-bindings.txt      | 10 ++++++++--
 drivers/pinctrl/pinconf-generic.c                         |  3 +++
 include/linux/pinctrl/pinconf-generic.h                   | 15 +++++++++++----
 3 files changed, 22 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt b/Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt
index f01d154090da..2d4ef9f0fded 100644
--- a/Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt
+++ b/Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt
@@ -251,14 +251,20 @@ drive-push-pull		- drive actively high and low
 drive-open-drain	- drive with open drain
 drive-open-source	- drive with open source
 drive-strength		- sink or source at most X mA
-input-enable		- enable input on pin (no effect on output)
-input-disable		- disable input on pin (no effect on output)
+input-enable		- enable input on pin (no effect on output, such as
+			  enabling an input buffer)
+input-disable		- disable input on pin (no effect on output, such as
+			  disabling an input buffer)
 input-schmitt-enable	- enable schmitt-trigger mode
 input-schmitt-disable	- disable schmitt-trigger mode
 input-debounce		- debounce mode with debound time X
 power-source		- select between different power supplies
 low-power-enable	- enable low power mode
 low-power-disable	- disable low power mode
+output-disable		- disable output on a pin (such as disable an output
+			  buffer)
+output-enable		- enable output on a pin without actively driving it
+			  (such as enabling an output buffer)
 output-low		- set the pin to output mode with low level
 output-high		- set the pin to output mode with high level
 slew-rate		- set the slew rate
diff --git a/drivers/pinctrl/pinconf-generic.c b/drivers/pinctrl/pinconf-generic.c
index 720a19fd38d2..fc0c230aa11f 100644
--- a/drivers/pinctrl/pinconf-generic.c
+++ b/drivers/pinctrl/pinconf-generic.c
@@ -44,6 +44,7 @@ static const struct pin_config_item conf_items[] = {
 	PCONFDUMP(PIN_CONFIG_INPUT_SCHMITT, "input schmitt trigger", NULL, false),
 	PCONFDUMP(PIN_CONFIG_INPUT_SCHMITT_ENABLE, "input schmitt enabled", NULL, false),
 	PCONFDUMP(PIN_CONFIG_LOW_POWER_MODE, "pin low power", "mode", true),
+	PCONFDUMP(PIN_CONFIG_OUTPUT_ENABLE, "output enabled", NULL, false),
 	PCONFDUMP(PIN_CONFIG_OUTPUT, "pin output", "level", true),
 	PCONFDUMP(PIN_CONFIG_POWER_SOURCE, "pin power source", "selector", true),
 	PCONFDUMP(PIN_CONFIG_SLEW_RATE, "slew rate", NULL, true),
@@ -172,6 +173,8 @@ static const struct pinconf_generic_params dt_params[] = {
 	{ "input-schmitt-enable", PIN_CONFIG_INPUT_SCHMITT_ENABLE, 1 },
 	{ "low-power-disable", PIN_CONFIG_LOW_POWER_MODE, 0 },
 	{ "low-power-enable", PIN_CONFIG_LOW_POWER_MODE, 1 },
+	{ "output-disable", PIN_CONFIG_OUTPUT_ENABLE, 0 },
+	{ "output-enable", PIN_CONFIG_OUTPUT_ENABLE, 1 },
 	{ "output-high", PIN_CONFIG_OUTPUT, 1, },
 	{ "output-low", PIN_CONFIG_OUTPUT, 0, },
 	{ "power-source", PIN_CONFIG_POWER_SOURCE, 0 },
diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h
index 7620eb127cff..231d3075815a 100644
--- a/include/linux/pinctrl/pinconf-generic.h
+++ b/include/linux/pinctrl/pinconf-generic.h
@@ -73,10 +73,16 @@
  *	operation, if several modes of operation are supported these can be
  *	passed in the argument on a custom form, else just use argument 1
  *	to indicate low power mode, argument 0 turns low power mode off.
- * @PIN_CONFIG_OUTPUT: this will configure the pin as an output. Use argument
- *	1 to indicate high level, argument 0 to indicate low level. (Please
- *	see Documentation/pinctrl.txt, section "GPIO mode pitfalls" for a
- *	discussion around this parameter.)
+ * @PIN_CONFIG_OUTPUT_ENABLE: this will enable the pin's output mode
+ * 	without driving a value there. For most platforms this reduces to
+ * 	enable the output buffers and then let the pin controller current
+ * 	configuration (eg. the currently selected mux function) drive values on
+ * 	the line. Use argument 1 to enable output mode, argument 0 to disable
+ * 	it.
+ * @PIN_CONFIG_OUTPUT: this will configure the pin as an output and drive a
+ * 	value on the line. Use argument 1 to indicate high level, argument 0 to
+ * 	indicate low level. (Please see Documentation/pinctrl.txt, section
+ * 	"GPIO mode pitfalls" for a discussion around this parameter.)
  * @PIN_CONFIG_POWER_SOURCE: if the pin can select between different power
  *	supplies, the argument to this parameter (on a custom format) tells
  *	the driver which alternative power source to use.
@@ -105,6 +111,7 @@ enum pin_config_param {
 	PIN_CONFIG_INPUT_SCHMITT,
 	PIN_CONFIG_INPUT_SCHMITT_ENABLE,
 	PIN_CONFIG_LOW_POWER_MODE,
+	PIN_CONFIG_OUTPUT_ENABLE,
 	PIN_CONFIG_OUTPUT,
 	PIN_CONFIG_POWER_SOURCE,
 	PIN_CONFIG_SLEW_RATE,
-- 
cgit v1.2.3


From 6e0c90d691cd5d90569f5918ab03eb76c81f9c6e Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 26 Jun 2017 21:28:41 -0700
Subject: libnvdimm, pmem, dax: export a cache control attribute

The dax_flush() operation can be turned into a nop on platforms where
firmware arranges for cpu caches to be flushed on a power-fail event.
The ACPI 6.2 specification defines a mechanism for the platform to
indicate this capability so the kernel can select the proper default.
However, for other platforms, the administrator must toggle this setting
manually.

Given this flush setting is a dax-specific mechanism we advertise it
through a 'dax' attribute group hanging off a host device. For example,
a 'pmem0' block-device gets a 'dax' sysfs-subdirectory with a
'write_cache' attribute to control response to dax cache flush requests.
This is similar to the 'queue/write_cache' attribute that appears under
block devices.

Cc: Jan Kara <jack@suse.cz>
Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Suggested-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dax/super.c   | 79 +++++++++++++++++++++++++++++++++++++++++++++++++++
 drivers/nvdimm/pmem.c | 10 +++++++
 include/linux/dax.h   |  3 ++
 3 files changed, 92 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index 8bf71195921b..4827251782a1 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -119,6 +119,8 @@ EXPORT_SYMBOL_GPL(__bdev_dax_supported);
 enum dax_device_flags {
 	/* !alive + rcu grace period == no new operations / mappings */
 	DAXDEV_ALIVE,
+	/* gate whether dax_flush() calls the low level flush routine */
+	DAXDEV_WRITE_CACHE,
 };
 
 /**
@@ -139,6 +141,71 @@ struct dax_device {
 	const struct dax_operations *ops;
 };
 
+static ssize_t write_cache_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct dax_device *dax_dev = dax_get_by_host(dev_name(dev));
+	ssize_t rc;
+
+	WARN_ON_ONCE(!dax_dev);
+	if (!dax_dev)
+		return -ENXIO;
+
+	rc = sprintf(buf, "%d\n", !!test_bit(DAXDEV_WRITE_CACHE,
+				&dax_dev->flags));
+	put_dax(dax_dev);
+	return rc;
+}
+
+static ssize_t write_cache_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	bool write_cache;
+	int rc = strtobool(buf, &write_cache);
+	struct dax_device *dax_dev = dax_get_by_host(dev_name(dev));
+
+	WARN_ON_ONCE(!dax_dev);
+	if (!dax_dev)
+		return -ENXIO;
+
+	if (rc)
+		len = rc;
+	else if (write_cache)
+		set_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags);
+	else
+		clear_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags);
+
+	put_dax(dax_dev);
+	return len;
+}
+static DEVICE_ATTR_RW(write_cache);
+
+static umode_t dax_visible(struct kobject *kobj, struct attribute *a, int n)
+{
+	struct device *dev = container_of(kobj, typeof(*dev), kobj);
+	struct dax_device *dax_dev = dax_get_by_host(dev_name(dev));
+
+	WARN_ON_ONCE(!dax_dev);
+	if (!dax_dev)
+		return 0;
+
+	if (a == &dev_attr_write_cache.attr && !dax_dev->ops->flush)
+		return 0;
+	return a->mode;
+}
+
+static struct attribute *dax_attributes[] = {
+	&dev_attr_write_cache.attr,
+	NULL,
+};
+
+struct attribute_group dax_attribute_group = {
+	.name = "dax",
+	.attrs = dax_attributes,
+	.is_visible = dax_visible,
+};
+EXPORT_SYMBOL_GPL(dax_attribute_group);
+
 /**
  * dax_direct_access() - translate a device pgoff to an absolute pfn
  * @dax_dev: a dax_device instance representing the logical memory range
@@ -194,11 +261,23 @@ void dax_flush(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
 	if (!dax_alive(dax_dev))
 		return;
 
+	if (!test_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags))
+		return;
+
 	if (dax_dev->ops->flush)
 		dax_dev->ops->flush(dax_dev, pgoff, addr, size);
 }
 EXPORT_SYMBOL_GPL(dax_flush);
 
+void dax_write_cache(struct dax_device *dax_dev, bool wc)
+{
+	if (wc)
+		set_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags);
+	else
+		clear_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags);
+}
+EXPORT_SYMBOL_GPL(dax_write_cache);
+
 bool dax_alive(struct dax_device *dax_dev)
 {
 	lockdep_assert_held(&dax_srcu);
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 06f6c27ec1e9..7339d184070e 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -253,6 +253,11 @@ static const struct dax_operations pmem_dax_ops = {
 	.flush = pmem_dax_flush,
 };
 
+static const struct attribute_group *pmem_attribute_groups[] = {
+	&dax_attribute_group,
+	NULL,
+};
+
 static void pmem_release_queue(void *q)
 {
 	blk_cleanup_queue(q);
@@ -287,6 +292,7 @@ static int pmem_attach_disk(struct device *dev,
 	struct pmem_device *pmem;
 	struct resource pfn_res;
 	struct request_queue *q;
+	struct device *gendev;
 	struct gendisk *disk;
 	void *addr;
 
@@ -384,8 +390,12 @@ static int pmem_attach_disk(struct device *dev,
 		put_disk(disk);
 		return -ENOMEM;
 	}
+	dax_write_cache(dax_dev, true);
 	pmem->dax_dev = dax_dev;
 
+	gendev = disk_to_dev(disk);
+	gendev->groups = pmem_attribute_groups;
+
 	device_add_disk(dev, disk);
 	if (devm_add_action_or_reset(dev, pmem_release_disk, pmem))
 		return -ENOMEM;
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 73fca1bebaf3..8f39db7439c3 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -23,6 +23,8 @@ struct dax_operations {
 	void (*flush)(struct dax_device *, pgoff_t, void *, size_t);
 };
 
+extern struct attribute_group dax_attribute_group;
+
 #if IS_ENABLED(CONFIG_DAX)
 struct dax_device *dax_get_by_host(const char *host);
 void put_dax(struct dax_device *dax_dev);
@@ -84,6 +86,7 @@ size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
 		size_t bytes, struct iov_iter *i);
 void dax_flush(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
 		size_t size);
+void dax_write_cache(struct dax_device *dax_dev, bool wc);
 
 /*
  * We use lowest available bit in exceptional entry for locking, one bit for
-- 
cgit v1.2.3


From 0b277961f4484fb3f142caaa1dd1748cb0b2cbee Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Fri, 9 Jun 2017 09:46:50 -0700
Subject: libnvdimm, pmem: disable dax flushing when pmem is fronting a
 volatile region

The pmem driver attaches to both persistent and volatile memory ranges
advertised by the ACPI NFIT. When the region is volatile it is redundant
to spend cycles flushing caches at fsync(). Check if the hosting region
is volatile and do not set dax_write_cache() if it is.

Cc: Jan Kara <jack@suse.cz>
Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/pmem.c        | 13 ++++++++-----
 drivers/nvdimm/region_devs.c |  6 ++++++
 include/linux/libnvdimm.h    |  1 +
 3 files changed, 15 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 7339d184070e..e7a40f77f729 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -284,10 +284,10 @@ static int pmem_attach_disk(struct device *dev,
 	struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
 	struct nd_region *nd_region = to_nd_region(dev->parent);
 	struct vmem_altmap __altmap, *altmap = NULL;
+	int nid = dev_to_node(dev), fua, wbc;
 	struct resource *res = &nsio->res;
 	struct nd_pfn *nd_pfn = NULL;
 	struct dax_device *dax_dev;
-	int nid = dev_to_node(dev);
 	struct nd_pfn_sb *pfn_sb;
 	struct pmem_device *pmem;
 	struct resource pfn_res;
@@ -314,9 +314,12 @@ static int pmem_attach_disk(struct device *dev,
 	dev_set_drvdata(dev, pmem);
 	pmem->phys_addr = res->start;
 	pmem->size = resource_size(res);
-	if (!IS_ENABLED(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE)
-			|| nvdimm_has_flush(nd_region) < 0)
+	fua = nvdimm_has_flush(nd_region);
+	if (!IS_ENABLED(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) || fua < 0) {
 		dev_warn(dev, "unable to guarantee persistence of writes\n");
+		fua = 0;
+	}
+	wbc = nvdimm_has_cache(nd_region);
 
 	if (!devm_request_mem_region(dev, res->start, resource_size(res),
 				dev_name(&ndns->dev))) {
@@ -360,7 +363,7 @@ static int pmem_attach_disk(struct device *dev,
 		return PTR_ERR(addr);
 	pmem->virt_addr = addr;
 
-	blk_queue_write_cache(q, true, true);
+	blk_queue_write_cache(q, wbc, fua);
 	blk_queue_make_request(q, pmem_make_request);
 	blk_queue_physical_block_size(q, PAGE_SIZE);
 	blk_queue_max_hw_sectors(q, UINT_MAX);
@@ -390,7 +393,7 @@ static int pmem_attach_disk(struct device *dev,
 		put_disk(disk);
 		return -ENOMEM;
 	}
-	dax_write_cache(dax_dev, true);
+	dax_write_cache(dax_dev, wbc);
 	pmem->dax_dev = dax_dev;
 
 	gendev = disk_to_dev(disk);
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index 53a64a16aba4..0c3b089b280a 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -1060,6 +1060,12 @@ int nvdimm_has_flush(struct nd_region *nd_region)
 }
 EXPORT_SYMBOL_GPL(nvdimm_has_flush);
 
+int nvdimm_has_cache(struct nd_region *nd_region)
+{
+	return is_nd_pmem(&nd_region->dev);
+}
+EXPORT_SYMBOL_GPL(nvdimm_has_cache);
+
 void __exit nd_region_devs_exit(void)
 {
 	ida_destroy(&region_ida);
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index b2f659bd661d..a8ee1d0afd70 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -165,4 +165,5 @@ void nd_region_release_lane(struct nd_region *nd_region, unsigned int lane);
 u64 nd_fletcher64(void *addr, size_t len, bool le);
 void nvdimm_flush(struct nd_region *nd_region);
 int nvdimm_has_flush(struct nd_region *nd_region);
+int nvdimm_has_cache(struct nd_region *nd_region);
 #endif /* __LIBNVDIMM_H__ */
-- 
cgit v1.2.3


From 14dc6f04f49dc12614d7e90928b495b8d73cd471 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Tue, 27 Jun 2017 23:08:34 -0700
Subject: bpf: Add syscall lookup support for fd array and htab

This patch allows userspace to do BPF_MAP_LOOKUP_ELEM on
BPF_MAP_TYPE_PROG_ARRAY,
BPF_MAP_TYPE_ARRAY_OF_MAPS and
BPF_MAP_TYPE_HASH_OF_MAPS.

The lookup returns a prog-id or map-id to the userspace.
The userspace can then use the BPF_PROG_GET_FD_BY_ID
or BPF_MAP_GET_FD_BY_ID to get a fd.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h     |  3 +++
 kernel/bpf/arraymap.c   | 27 +++++++++++++++++++++++++++
 kernel/bpf/hashtab.c    | 21 +++++++++++++++++++++
 kernel/bpf/map_in_map.c |  5 +++++
 kernel/bpf/map_in_map.h |  1 +
 kernel/bpf/syscall.c    | 16 +++++++++++++---
 6 files changed, 70 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index deca4e7f2845..5175729270d7 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -36,6 +36,7 @@ struct bpf_map_ops {
 				int fd);
 	void (*map_fd_put_ptr)(void *ptr);
 	u32 (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf);
+	u32 (*map_fd_sys_lookup_elem)(void *ptr);
 };
 
 struct bpf_map {
@@ -288,9 +289,11 @@ int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value);
 
 int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
 				 void *key, void *value, u64 map_flags);
+int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value);
 void bpf_fd_array_map_clear(struct bpf_map *map);
 int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file,
 				void *key, void *value, u64 map_flags);
+int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value);
 
 /* memcpy that is used with 8-byte aligned pointers, power-of-8 size and
  * forced to use 'long' read/writes to try to atomically copy long counters.
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index ecb43542246e..d771a3872500 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -334,6 +334,26 @@ static void *fd_array_map_lookup_elem(struct bpf_map *map, void *key)
 	return NULL;
 }
 
+/* only called from syscall */
+int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value)
+{
+	void **elem, *ptr;
+	int ret =  0;
+
+	if (!map->ops->map_fd_sys_lookup_elem)
+		return -ENOTSUPP;
+
+	rcu_read_lock();
+	elem = array_map_lookup_elem(map, key);
+	if (elem && (ptr = READ_ONCE(*elem)))
+		*value = map->ops->map_fd_sys_lookup_elem(ptr);
+	else
+		ret = -ENOENT;
+	rcu_read_unlock();
+
+	return ret;
+}
+
 /* only called from syscall */
 int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
 				 void *key, void *value, u64 map_flags)
@@ -400,6 +420,11 @@ static void prog_fd_array_put_ptr(void *ptr)
 	bpf_prog_put(ptr);
 }
 
+static u32 prog_fd_array_sys_lookup_elem(void *ptr)
+{
+	return ((struct bpf_prog *)ptr)->aux->id;
+}
+
 /* decrement refcnt of all bpf_progs that are stored in this map */
 void bpf_fd_array_map_clear(struct bpf_map *map)
 {
@@ -418,6 +443,7 @@ const struct bpf_map_ops prog_array_map_ops = {
 	.map_delete_elem = fd_array_map_delete_elem,
 	.map_fd_get_ptr = prog_fd_array_get_ptr,
 	.map_fd_put_ptr = prog_fd_array_put_ptr,
+	.map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem,
 };
 
 static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file,
@@ -585,4 +611,5 @@ const struct bpf_map_ops array_of_maps_map_ops = {
 	.map_delete_elem = fd_array_map_delete_elem,
 	.map_fd_get_ptr = bpf_map_fd_get_ptr,
 	.map_fd_put_ptr = bpf_map_fd_put_ptr,
+	.map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem,
 };
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 004334ea13ba..4fb463172aa8 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -1243,6 +1243,26 @@ static void fd_htab_map_free(struct bpf_map *map)
 	htab_map_free(map);
 }
 
+/* only called from syscall */
+int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value)
+{
+	void **ptr;
+	int ret = 0;
+
+	if (!map->ops->map_fd_sys_lookup_elem)
+		return -ENOTSUPP;
+
+	rcu_read_lock();
+	ptr = htab_map_lookup_elem(map, key);
+	if (ptr)
+		*value = map->ops->map_fd_sys_lookup_elem(READ_ONCE(*ptr));
+	else
+		ret = -ENOENT;
+	rcu_read_unlock();
+
+	return ret;
+}
+
 /* only called from syscall */
 int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file,
 				void *key, void *value, u64 map_flags)
@@ -1305,4 +1325,5 @@ const struct bpf_map_ops htab_of_maps_map_ops = {
 	.map_delete_elem = htab_map_delete_elem,
 	.map_fd_get_ptr = bpf_map_fd_get_ptr,
 	.map_fd_put_ptr = bpf_map_fd_put_ptr,
+	.map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem,
 };
diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c
index 59bcdf821ae4..1da574612bea 100644
--- a/kernel/bpf/map_in_map.c
+++ b/kernel/bpf/map_in_map.c
@@ -95,3 +95,8 @@ void bpf_map_fd_put_ptr(void *ptr)
 	 */
 	bpf_map_put(ptr);
 }
+
+u32 bpf_map_fd_sys_lookup_elem(void *ptr)
+{
+	return ((struct bpf_map *)ptr)->id;
+}
diff --git a/kernel/bpf/map_in_map.h b/kernel/bpf/map_in_map.h
index 177fadb689dc..6183db9ec08c 100644
--- a/kernel/bpf/map_in_map.h
+++ b/kernel/bpf/map_in_map.h
@@ -19,5 +19,6 @@ bool bpf_map_meta_equal(const struct bpf_map *meta0,
 void *bpf_map_fd_get_ptr(struct bpf_map *map, struct file *map_file,
 			 int ufd);
 void bpf_map_fd_put_ptr(void *ptr);
+u32 bpf_map_fd_sys_lookup_elem(void *ptr);
 
 #endif
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 8942c820d620..4409ccca8831 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -24,6 +24,13 @@
 #include <linux/kernel.h>
 #include <linux/idr.h>
 
+#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY || \
+			   (map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
+			   (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \
+			   (map)->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
+#define IS_FD_HASH(map) ((map)->map_type == BPF_MAP_TYPE_HASH_OF_MAPS)
+#define IS_FD_MAP(map) (IS_FD_ARRAY(map) || IS_FD_HASH(map))
+
 DEFINE_PER_CPU(int, bpf_prog_active);
 static DEFINE_IDR(prog_idr);
 static DEFINE_SPINLOCK(prog_idr_lock);
@@ -411,6 +418,8 @@ static int map_lookup_elem(union bpf_attr *attr)
 	    map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
 	    map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
 		value_size = round_up(map->value_size, 8) * num_possible_cpus();
+	else if (IS_FD_MAP(map))
+		value_size = sizeof(u32);
 	else
 		value_size = map->value_size;
 
@@ -426,9 +435,10 @@ static int map_lookup_elem(union bpf_attr *attr)
 		err = bpf_percpu_array_copy(map, key, value);
 	} else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
 		err = bpf_stackmap_copy(map, key, value);
-	} else if (map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
-		   map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
-		err = -ENOTSUPP;
+	} else if (IS_FD_ARRAY(map)) {
+		err = bpf_fd_array_map_lookup_elem(map, key, value);
+	} else if (IS_FD_HASH(map)) {
+		err = bpf_fd_htab_map_lookup_elem(map, key, value);
 	} else {
 		rcu_read_lock();
 		ptr = map->ops->map_lookup_elem(map, key);
-- 
cgit v1.2.3


From 14e494542636b7a685c5bf27e695e3bb9ec3fe7d Mon Sep 17 00:00:00 2001
From: Vishal Verma <vishal.l.verma@intel.com>
Date: Wed, 28 Jun 2017 14:25:00 -0600
Subject: libnvdimm, btt: BTT updates for UEFI 2.7 format

The UEFI 2.7 specification defines an updated BTT metadata format,
bumping the revision to 2.0. Add support for the new format, while
retaining compatibility for the old 1.1 format.

Cc: Toshi Kani <toshi.kani@hpe.com>
Cc: Linda Knippers <linda.knippers@hpe.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/btt.c            | 28 +++++++++++++------
 drivers/nvdimm/btt.h            |  2 ++
 drivers/nvdimm/btt_devs.c       | 46 +++++++++++++++++++++++++++----
 drivers/nvdimm/claim.c          |  1 +
 drivers/nvdimm/label.c          |  6 ++++
 drivers/nvdimm/label.h          |  1 +
 drivers/nvdimm/namespace_devs.c | 61 +++++++++++++++++++++++++++++++++++++++--
 drivers/nvdimm/nd.h             |  3 ++
 include/linux/nd.h              |  1 +
 9 files changed, 134 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index 983718b8fd9b..7ca11df80ae8 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -37,8 +37,8 @@ static int arena_read_bytes(struct arena_info *arena, resource_size_t offset,
 	struct nd_btt *nd_btt = arena->nd_btt;
 	struct nd_namespace_common *ndns = nd_btt->ndns;
 
-	/* arena offsets are 4K from the base of the device */
-	offset += SZ_4K;
+	/* arena offsets may be shifted from the base of the device */
+	offset += arena->nd_btt->initial_offset;
 	return nvdimm_read_bytes(ndns, offset, buf, n, flags);
 }
 
@@ -48,8 +48,8 @@ static int arena_write_bytes(struct arena_info *arena, resource_size_t offset,
 	struct nd_btt *nd_btt = arena->nd_btt;
 	struct nd_namespace_common *ndns = nd_btt->ndns;
 
-	/* arena offsets are 4K from the base of the device */
-	offset += SZ_4K;
+	/* arena offsets may be shifted from the base of the device */
+	offset += arena->nd_btt->initial_offset;
 	return nvdimm_write_bytes(ndns, offset, buf, n, flags);
 }
 
@@ -576,8 +576,8 @@ static struct arena_info *alloc_arena(struct btt *btt, size_t size,
 	arena->internal_lbasize = roundup(arena->external_lbasize,
 					INT_LBASIZE_ALIGNMENT);
 	arena->nfree = BTT_DEFAULT_NFREE;
-	arena->version_major = 1;
-	arena->version_minor = 1;
+	arena->version_major = btt->nd_btt->version_major;
+	arena->version_minor = btt->nd_btt->version_minor;
 
 	if (available % BTT_PG_SIZE)
 		available -= (available % BTT_PG_SIZE);
@@ -1425,6 +1425,7 @@ int nvdimm_namespace_attach_btt(struct nd_namespace_common *ndns)
 {
 	struct nd_btt *nd_btt = to_nd_btt(ndns->claim);
 	struct nd_region *nd_region;
+	struct btt_sb *btt_sb;
 	struct btt *btt;
 	size_t rawsize;
 
@@ -1433,10 +1434,21 @@ int nvdimm_namespace_attach_btt(struct nd_namespace_common *ndns)
 		return -ENODEV;
 	}
 
-	rawsize = nvdimm_namespace_capacity(ndns) - SZ_4K;
+	btt_sb = devm_kzalloc(&nd_btt->dev, sizeof(*btt_sb), GFP_KERNEL);
+
+	/*
+	 * If this returns < 0, that is ok as it just means there wasn't
+	 * an existing BTT, and we're creating a new one. We still need to
+	 * call this as we need the version dependent fields in nd_btt to be
+	 * set correctly based on the holder class
+	 */
+	nd_btt_version(nd_btt, ndns, btt_sb);
+
+	rawsize = nvdimm_namespace_capacity(ndns) - nd_btt->initial_offset;
 	if (rawsize < ARENA_MIN_SIZE) {
 		dev_dbg(&nd_btt->dev, "%s must be at least %ld bytes\n",
-				dev_name(&ndns->dev), ARENA_MIN_SIZE + SZ_4K);
+				dev_name(&ndns->dev),
+				ARENA_MIN_SIZE + nd_btt->initial_offset);
 		return -ENXIO;
 	}
 	nd_region = to_nd_region(nd_btt->dev.parent);
diff --git a/drivers/nvdimm/btt.h b/drivers/nvdimm/btt.h
index b2f8651e5395..888e862907a0 100644
--- a/drivers/nvdimm/btt.h
+++ b/drivers/nvdimm/btt.h
@@ -184,5 +184,7 @@ struct btt {
 };
 
 bool nd_btt_arena_is_valid(struct nd_btt *nd_btt, struct btt_sb *super);
+int nd_btt_version(struct nd_btt *nd_btt, struct nd_namespace_common *ndns,
+		struct btt_sb *btt_sb);
 
 #endif
diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c
index 31d875a91569..3e359d282f8e 100644
--- a/drivers/nvdimm/btt_devs.c
+++ b/drivers/nvdimm/btt_devs.c
@@ -260,20 +260,55 @@ bool nd_btt_arena_is_valid(struct nd_btt *nd_btt, struct btt_sb *super)
 }
 EXPORT_SYMBOL(nd_btt_arena_is_valid);
 
+int nd_btt_version(struct nd_btt *nd_btt, struct nd_namespace_common *ndns,
+		struct btt_sb *btt_sb)
+{
+	if (ndns->claim_class == NVDIMM_CCLASS_BTT2) {
+		/* Probe/setup for BTT v2.0 */
+		nd_btt->initial_offset = 0;
+		nd_btt->version_major = 2;
+		nd_btt->version_minor = 0;
+		if (nvdimm_read_bytes(ndns, 0, btt_sb, sizeof(*btt_sb), 0))
+			return -ENXIO;
+		if (!nd_btt_arena_is_valid(nd_btt, btt_sb))
+			return -ENODEV;
+		if ((le16_to_cpu(btt_sb->version_major) != 2) ||
+				(le16_to_cpu(btt_sb->version_minor) != 0))
+			return -ENODEV;
+	} else {
+		/*
+		 * Probe/setup for BTT v1.1 (NVDIMM_CCLASS_NONE or
+		 * NVDIMM_CCLASS_BTT)
+		 */
+		nd_btt->initial_offset = SZ_4K;
+		nd_btt->version_major = 1;
+		nd_btt->version_minor = 1;
+		if (nvdimm_read_bytes(ndns, SZ_4K, btt_sb, sizeof(*btt_sb), 0))
+			return -ENXIO;
+		if (!nd_btt_arena_is_valid(nd_btt, btt_sb))
+			return -ENODEV;
+		if ((le16_to_cpu(btt_sb->version_major) != 1) ||
+				(le16_to_cpu(btt_sb->version_minor) != 1))
+			return -ENODEV;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(nd_btt_version);
+
 static int __nd_btt_probe(struct nd_btt *nd_btt,
 		struct nd_namespace_common *ndns, struct btt_sb *btt_sb)
 {
+	int rc;
+
 	if (!btt_sb || !ndns || !nd_btt)
 		return -ENODEV;
 
-	if (nvdimm_read_bytes(ndns, SZ_4K, btt_sb, sizeof(*btt_sb), 0))
-		return -ENXIO;
-
 	if (nvdimm_namespace_capacity(ndns) < SZ_16M)
 		return -ENXIO;
 
-	if (!nd_btt_arena_is_valid(nd_btt, btt_sb))
-		return -ENODEV;
+	rc = nd_btt_version(nd_btt, ndns, btt_sb);
+	if (rc < 0)
+		return rc;
 
 	nd_btt->lbasize = le32_to_cpu(btt_sb->external_lbasize);
 	nd_btt->uuid = kmemdup(btt_sb->uuid, 16, GFP_KERNEL);
@@ -298,6 +333,7 @@ int nd_btt_probe(struct device *dev, struct nd_namespace_common *ndns)
 	switch (ndns->claim_class) {
 	case NVDIMM_CCLASS_NONE:
 	case NVDIMM_CCLASS_BTT:
+	case NVDIMM_CCLASS_BTT2:
 		break;
 	default:
 		return -ENODEV;
diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c
index de9b1cce242e..8d23f68737d9 100644
--- a/drivers/nvdimm/claim.c
+++ b/drivers/nvdimm/claim.c
@@ -189,6 +189,7 @@ ssize_t nd_namespace_store(struct device *dev,
 	case NVDIMM_CCLASS_NONE:
 		break;
 	case NVDIMM_CCLASS_BTT:
+	case NVDIMM_CCLASS_BTT2:
 		if (!is_nd_btt(dev)) {
 			len = -EBUSY;
 			goto out_attach;
diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c
index 235f2089fab2..922b68718a1a 100644
--- a/drivers/nvdimm/label.c
+++ b/drivers/nvdimm/label.c
@@ -21,6 +21,7 @@
 #include "nd.h"
 
 static guid_t nvdimm_btt_guid;
+static guid_t nvdimm_btt2_guid;
 static guid_t nvdimm_pfn_guid;
 static guid_t nvdimm_dax_guid;
 
@@ -578,6 +579,8 @@ enum nvdimm_claim_class to_nvdimm_cclass(guid_t *guid)
 {
 	if (guid_equal(guid, &nvdimm_btt_guid))
 		return NVDIMM_CCLASS_BTT;
+	else if (guid_equal(guid, &nvdimm_btt2_guid))
+		return NVDIMM_CCLASS_BTT2;
 	else if (guid_equal(guid, &nvdimm_pfn_guid))
 		return NVDIMM_CCLASS_PFN;
 	else if (guid_equal(guid, &nvdimm_dax_guid))
@@ -593,6 +596,8 @@ static const guid_t *to_abstraction_guid(enum nvdimm_claim_class claim_class,
 {
 	if (claim_class == NVDIMM_CCLASS_BTT)
 		return &nvdimm_btt_guid;
+	else if (claim_class == NVDIMM_CCLASS_BTT2)
+		return &nvdimm_btt2_guid;
 	else if (claim_class == NVDIMM_CCLASS_PFN)
 		return &nvdimm_pfn_guid;
 	else if (claim_class == NVDIMM_CCLASS_DAX)
@@ -1158,6 +1163,7 @@ int nd_blk_namespace_label_update(struct nd_region *nd_region,
 int __init nd_label_init(void)
 {
 	WARN_ON(guid_parse(NVDIMM_BTT_GUID, &nvdimm_btt_guid));
+	WARN_ON(guid_parse(NVDIMM_BTT2_GUID, &nvdimm_btt2_guid));
 	WARN_ON(guid_parse(NVDIMM_PFN_GUID, &nvdimm_pfn_guid));
 	WARN_ON(guid_parse(NVDIMM_DAX_GUID, &nvdimm_dax_guid));
 
diff --git a/drivers/nvdimm/label.h b/drivers/nvdimm/label.h
index 7c8e2cc9e73e..1ebf4d3d01ba 100644
--- a/drivers/nvdimm/label.h
+++ b/drivers/nvdimm/label.h
@@ -113,6 +113,7 @@ struct nd_namespace_label {
 };
 
 #define NVDIMM_BTT_GUID "8aed63a2-29a2-4c66-8b12-f05d15d3922a"
+#define NVDIMM_BTT2_GUID "18633bfc-1735-4217-8ac9-17239282d3f8"
 #define NVDIMM_PFN_GUID "266400ba-fb9f-4677-bcb0-968f11d0d225"
 #define NVDIMM_DAX_GUID "97a86d9c-3cdd-4eda-986f-5068b4f80088"
 
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index f05d9b0672bf..c96e31330213 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -1411,6 +1411,58 @@ static ssize_t dpa_extents_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(dpa_extents);
 
+static int btt_claim_class(struct device *dev)
+{
+	struct nd_region *nd_region = to_nd_region(dev->parent);
+	int i, loop_bitmask = 0;
+
+	for (i = 0; i < nd_region->ndr_mappings; i++) {
+		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+		struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+		struct nd_namespace_index *nsindex;
+
+		nsindex = to_namespace_index(ndd, ndd->ns_current);
+		if (nsindex == NULL)
+			loop_bitmask |= 1;
+		else {
+			/* check whether existing labels are v1.1 or v1.2 */
+			if (__le16_to_cpu(nsindex->major) == 1
+					&& __le16_to_cpu(nsindex->minor) == 1)
+				loop_bitmask |= 2;
+			else
+				loop_bitmask |= 4;
+		}
+	}
+	/*
+	 * If nsindex is null loop_bitmask's bit 0 will be set, and if an index
+	 * block is found, a v1.1 label for any mapping will set bit 1, and a
+	 * v1.2 label will set bit 2.
+	 *
+	 * At the end of the loop, at most one of the three bits must be set.
+	 * If multiple bits were set, it means the different mappings disagree
+	 * about their labels, and this must be cleaned up first.
+	 *
+	 * If all the label index blocks are found to agree, nsindex of NULL
+	 * implies labels haven't been initialized yet, and when they will,
+	 * they will be of the 1.2 format, so we can assume BTT2.0
+	 *
+	 * If 1.1 labels are found, we enforce BTT1.1, and if 1.2 labels are
+	 * found, we enforce BTT2.0
+	 *
+	 * If the loop was never entered, default to BTT1.1 (legacy namespaces)
+	 */
+	switch (loop_bitmask) {
+	case 0:
+	case 2:
+		return NVDIMM_CCLASS_BTT;
+	case 1:
+	case 4:
+		return NVDIMM_CCLASS_BTT2;
+	default:
+		return -ENXIO;
+	}
+}
+
 static ssize_t holder_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
@@ -1433,7 +1485,7 @@ static ssize_t __holder_class_store(struct device *dev, const char *buf)
 		return -EBUSY;
 
 	if (strcmp(buf, "btt") == 0 || strcmp(buf, "btt\n") == 0)
-		ndns->claim_class = NVDIMM_CCLASS_BTT;
+		ndns->claim_class = btt_claim_class(dev);
 	else if (strcmp(buf, "pfn") == 0 || strcmp(buf, "pfn\n") == 0)
 		ndns->claim_class = NVDIMM_CCLASS_PFN;
 	else if (strcmp(buf, "dax") == 0 || strcmp(buf, "dax\n") == 0)
@@ -1443,6 +1495,10 @@ static ssize_t __holder_class_store(struct device *dev, const char *buf)
 	else
 		return -EINVAL;
 
+	/* btt_claim_class() could've returned an error */
+	if (ndns->claim_class < 0)
+		return ndns->claim_class;
+
 	return 0;
 }
 
@@ -1474,7 +1530,8 @@ static ssize_t holder_class_show(struct device *dev,
 	device_lock(dev);
 	if (ndns->claim_class == NVDIMM_CCLASS_NONE)
 		rc = sprintf(buf, "\n");
-	else if (ndns->claim_class == NVDIMM_CCLASS_BTT)
+	else if ((ndns->claim_class == NVDIMM_CCLASS_BTT) ||
+			(ndns->claim_class == NVDIMM_CCLASS_BTT2))
 		rc = sprintf(buf, "btt\n");
 	else if (ndns->claim_class == NVDIMM_CCLASS_PFN)
 		rc = sprintf(buf, "pfn\n");
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index e802c877d783..e1b5715bd91f 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -195,6 +195,9 @@ struct nd_btt {
 	u64 size;
 	u8 *uuid;
 	int id;
+	int initial_offset;
+	u16 version_major;
+	u16 version_minor;
 };
 
 enum nd_pfn_mode {
diff --git a/include/linux/nd.h b/include/linux/nd.h
index 96069c543890..5dc6b695437d 100644
--- a/include/linux/nd.h
+++ b/include/linux/nd.h
@@ -24,6 +24,7 @@ enum nvdimm_event {
 enum nvdimm_claim_class {
 	NVDIMM_CCLASS_NONE,
 	NVDIMM_CCLASS_BTT,
+	NVDIMM_CCLASS_BTT2,
 	NVDIMM_CCLASS_PFN,
 	NVDIMM_CCLASS_DAX,
 	NVDIMM_CCLASS_UNKNOWN,
-- 
cgit v1.2.3


From 18e9710ee59ce3bd2a2512ddcd3f7ceebe8b8d17 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 27 May 2017 11:16:51 +0300
Subject: fs: implement vfs_iter_read using do_iter_read

De-dupliate some code and allow for passing the flags argument to
vfs_iter_read.  Additional it properly updates atime now.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/block/loop.c              |  4 ++--
 drivers/target/target_core_file.c |  2 +-
 fs/coda/file.c                    |  2 +-
 fs/read_write.c                   | 29 +++++++++--------------------
 include/linux/fs.h                |  3 ++-
 5 files changed, 15 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index ebbd0c3fe0ed..b64649bec64e 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -342,7 +342,7 @@ static int lo_read_simple(struct loop_device *lo, struct request *rq,
 
 	rq_for_each_segment(bvec, rq, iter) {
 		iov_iter_bvec(&i, ITER_BVEC, &bvec, 1, bvec.bv_len);
-		len = vfs_iter_read(lo->lo_backing_file, &i, &pos);
+		len = vfs_iter_read(lo->lo_backing_file, &i, &pos, 0);
 		if (len < 0)
 			return len;
 
@@ -383,7 +383,7 @@ static int lo_read_transfer(struct loop_device *lo, struct request *rq,
 		b.bv_len = bvec.bv_len;
 
 		iov_iter_bvec(&i, ITER_BVEC, &b, 1, b.bv_len);
-		len = vfs_iter_read(lo->lo_backing_file, &i, &pos);
+		len = vfs_iter_read(lo->lo_backing_file, &i, &pos, 0);
 		if (len < 0) {
 			ret = len;
 			goto out_free_page;
diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c
index 73b8f93a5fef..2befc0d7fdb1 100644
--- a/drivers/target/target_core_file.c
+++ b/drivers/target/target_core_file.c
@@ -275,7 +275,7 @@ static int fd_do_rw(struct se_cmd *cmd, struct file *fd,
 	if (is_write)
 		ret = vfs_iter_write(fd, &iter, &pos);
 	else
-		ret = vfs_iter_read(fd, &iter, &pos);
+		ret = vfs_iter_read(fd, &iter, &pos, 0);
 
 	if (is_write) {
 		if (ret < 0 || ret != data_length) {
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 9d956cd6d46f..f1102822bcfd 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -34,7 +34,7 @@ coda_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 
 	BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
 
-	return vfs_iter_read(cfi->cfi_container, to, &iocb->ki_pos);
+	return vfs_iter_read(cfi->cfi_container, to, &iocb->ki_pos, 0);
 }
 
 static ssize_t
diff --git a/fs/read_write.c b/fs/read_write.c
index 64b61a032a56..4dab30b62f0e 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -356,26 +356,6 @@ out_putf:
 }
 #endif
 
-ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos)
-{
-	struct kiocb kiocb;
-	ssize_t ret;
-
-	if (!file->f_op->read_iter)
-		return -EINVAL;
-
-	init_sync_kiocb(&kiocb, file);
-	kiocb.ki_pos = *ppos;
-
-	iter->type |= READ;
-	ret = call_read_iter(file, &kiocb, iter);
-	BUG_ON(ret == -EIOCBQUEUED);
-	if (ret > 0)
-		*ppos = kiocb.ki_pos;
-	return ret;
-}
-EXPORT_SYMBOL(vfs_iter_read);
-
 ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos)
 {
 	struct kiocb kiocb;
@@ -944,6 +924,15 @@ out:
 	return ret;
 }
 
+ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos,
+		int flags)
+{
+	if (!file->f_op->read_iter)
+		return -EINVAL;
+	return do_iter_read(file, iter, ppos, flags);
+}
+EXPORT_SYMBOL(vfs_iter_read);
+
 static ssize_t do_iter_write(struct file *file, struct iov_iter *iter,
 		loff_t *pos, int flags)
 {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 803e5a9b2654..3f6a4f4efb32 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2789,7 +2789,8 @@ extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *);
 extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *);
 extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t);
 
-ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos);
+ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos,
+		int flags);
 ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos);
 
 /* fs/block_dev.c */
-- 
cgit v1.2.3


From abbb65899aecfc97bda64b6816d1e501754cfe1f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 27 May 2017 11:16:52 +0300
Subject: fs: implement vfs_iter_write using do_iter_write

De-dupliate some code and allow for passing the flags argument to
vfs_iter_write.  Additionally it now properly updates timestamps.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/block/loop.c              |  2 +-
 drivers/target/target_core_file.c |  4 ++--
 fs/coda/file.c                    |  2 +-
 fs/read_write.c                   | 29 +++++++++--------------------
 fs/splice.c                       |  2 +-
 include/linux/fs.h                |  3 ++-
 6 files changed, 16 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index b64649bec64e..2a0d997efda4 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -266,7 +266,7 @@ static int lo_write_bvec(struct file *file, struct bio_vec *bvec, loff_t *ppos)
 	iov_iter_bvec(&i, ITER_BVEC, bvec, 1, bvec->bv_len);
 
 	file_start_write(file);
-	bw = vfs_iter_write(file, &i, ppos);
+	bw = vfs_iter_write(file, &i, ppos, 0);
 	file_end_write(file);
 
 	if (likely(bw ==  bvec->bv_len))
diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c
index 2befc0d7fdb1..e921948415c7 100644
--- a/drivers/target/target_core_file.c
+++ b/drivers/target/target_core_file.c
@@ -273,7 +273,7 @@ static int fd_do_rw(struct se_cmd *cmd, struct file *fd,
 
 	iov_iter_bvec(&iter, ITER_BVEC, bvec, sgl_nents, len);
 	if (is_write)
-		ret = vfs_iter_write(fd, &iter, &pos);
+		ret = vfs_iter_write(fd, &iter, &pos, 0);
 	else
 		ret = vfs_iter_read(fd, &iter, &pos, 0);
 
@@ -409,7 +409,7 @@ fd_execute_write_same(struct se_cmd *cmd)
 	}
 
 	iov_iter_bvec(&iter, ITER_BVEC, bvec, nolb, len);
-	ret = vfs_iter_write(fd_dev->fd_file, &iter, &pos);
+	ret = vfs_iter_write(fd_dev->fd_file, &iter, &pos, 0);
 
 	kfree(bvec);
 	if (ret < 0 || ret != len) {
diff --git a/fs/coda/file.c b/fs/coda/file.c
index f1102822bcfd..363402fcb3ed 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -51,7 +51,7 @@ coda_file_write_iter(struct kiocb *iocb, struct iov_iter *to)
 	host_file = cfi->cfi_container;
 	file_start_write(host_file);
 	inode_lock(coda_inode);
-	ret = vfs_iter_write(cfi->cfi_container, to, &iocb->ki_pos);
+	ret = vfs_iter_write(cfi->cfi_container, to, &iocb->ki_pos, 0);
 	coda_inode->i_size = file_inode(host_file)->i_size;
 	coda_inode->i_blocks = (coda_inode->i_size + 511) >> 9;
 	coda_inode->i_mtime = coda_inode->i_ctime = current_time(coda_inode);
diff --git a/fs/read_write.c b/fs/read_write.c
index 4dab30b62f0e..acfd3dc63d57 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -356,26 +356,6 @@ out_putf:
 }
 #endif
 
-ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos)
-{
-	struct kiocb kiocb;
-	ssize_t ret;
-
-	if (!file->f_op->write_iter)
-		return -EINVAL;
-
-	init_sync_kiocb(&kiocb, file);
-	kiocb.ki_pos = *ppos;
-
-	iter->type |= WRITE;
-	ret = call_write_iter(file, &kiocb, iter);
-	BUG_ON(ret == -EIOCBQUEUED);
-	if (ret > 0)
-		*ppos = kiocb.ki_pos;
-	return ret;
-}
-EXPORT_SYMBOL(vfs_iter_write);
-
 int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t count)
 {
 	struct inode *inode;
@@ -962,6 +942,15 @@ static ssize_t do_iter_write(struct file *file, struct iov_iter *iter,
 	return ret;
 }
 
+ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos,
+		int flags)
+{
+	if (!file->f_op->write_iter)
+		return -EINVAL;
+	return do_iter_write(file, iter, ppos, flags);
+}
+EXPORT_SYMBOL(vfs_iter_write);
+
 ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
 		  unsigned long vlen, loff_t *pos, int flags)
 {
diff --git a/fs/splice.c b/fs/splice.c
index 540c4a44756c..ae41201d0325 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -762,7 +762,7 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
 
 		iov_iter_bvec(&from, ITER_BVEC | WRITE, array, n,
 			      sd.total_len - left);
-		ret = vfs_iter_write(out, &from, &sd.pos);
+		ret = vfs_iter_write(out, &from, &sd.pos, 0);
 		if (ret <= 0)
 			break;
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3f6a4f4efb32..c67f1f8ee789 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2791,7 +2791,8 @@ extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t);
 
 ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos,
 		int flags);
-ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos);
+ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos,
+		int flags);
 
 /* fs/block_dev.c */
 extern ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to);
-- 
cgit v1.2.3


From 9c5f6908de03a4f52ba7364b11fcd6116225480c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 29 Jun 2017 21:39:54 -0400
Subject: copy_{from,to}_user(): move kasan checks and might_fault()
 out-of-line

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/uaccess.h | 16 ++++++++--------
 lib/usercopy.c          | 10 ++++++++--
 2 files changed, 16 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 201418d5e15c..e57328896a16 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -109,8 +109,11 @@ static inline unsigned long
 _copy_from_user(void *to, const void __user *from, unsigned long n)
 {
 	unsigned long res = n;
-	if (likely(access_ok(VERIFY_READ, from, n)))
+	might_fault();
+	if (likely(access_ok(VERIFY_READ, from, n))) {
+		kasan_check_write(to, n);
 		res = raw_copy_from_user(to, from, n);
+	}
 	if (unlikely(res))
 		memset(to + (n - res), 0, res);
 	return res;
@@ -124,8 +127,11 @@ _copy_from_user(void *, const void __user *, unsigned long);
 static inline unsigned long
 _copy_to_user(void __user *to, const void *from, unsigned long n)
 {
-	if (access_ok(VERIFY_WRITE, to, n))
+	might_fault();
+	if (access_ok(VERIFY_WRITE, to, n)) {
+		kasan_check_read(from, n);
 		n = raw_copy_to_user(to, from, n);
+	}
 	return n;
 }
 #else
@@ -146,9 +152,6 @@ copy_from_user(void *to, const void __user *from, unsigned long n)
 {
 	int sz = __compiletime_object_size(to);
 
-	might_fault();
-	kasan_check_write(to, n);
-
 	if (likely(sz < 0 || sz >= n)) {
 		check_object_size(to, n, false);
 		n = _copy_from_user(to, from, n);
@@ -165,9 +168,6 @@ copy_to_user(void __user *to, const void *from, unsigned long n)
 {
 	int sz = __compiletime_object_size(from);
 
-	kasan_check_read(from, n);
-	might_fault();
-
 	if (likely(sz < 0 || sz >= n)) {
 		check_object_size(from, n, true);
 		n = _copy_to_user(to, from, n);
diff --git a/lib/usercopy.c b/lib/usercopy.c
index 1b6010a3beb8..f5d9f08ee032 100644
--- a/lib/usercopy.c
+++ b/lib/usercopy.c
@@ -6,8 +6,11 @@
 unsigned long _copy_from_user(void *to, const void __user *from, unsigned long n)
 {
 	unsigned long res = n;
-	if (likely(access_ok(VERIFY_READ, from, n)))
+	might_fault();
+	if (likely(access_ok(VERIFY_READ, from, n))) {
+		kasan_check_write(to, n);
 		res = raw_copy_from_user(to, from, n);
+	}
 	if (unlikely(res))
 		memset(to + (n - res), 0, res);
 	return res;
@@ -18,8 +21,11 @@ EXPORT_SYMBOL(_copy_from_user);
 #ifndef INLINE_COPY_TO_USER
 unsigned long _copy_to_user(void *to, const void __user *from, unsigned long n)
 {
-	if (likely(access_ok(VERIFY_WRITE, to, n)))
+	might_fault();
+	if (likely(access_ok(VERIFY_WRITE, to, n))) {
+		kasan_check_read(from, n);
 		n = raw_copy_to_user(to, from, n);
+	}
 	return n;
 }
 EXPORT_SYMBOL(_copy_to_user);
-- 
cgit v1.2.3


From b0377fedb6528087ed319b0d054d6ed82240372c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 29 Jun 2017 21:42:43 -0400
Subject: copy_{to,from}_user(): consolidate object size checks

... and move them into thread_info.h, next to check_object_size()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/thread_info.h | 27 +++++++++++++++++++++++++++
 include/linux/uaccess.h     | 28 ++--------------------------
 2 files changed, 29 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index d7d3ea637dd0..250a27614328 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -113,6 +113,33 @@ static inline void check_object_size(const void *ptr, unsigned long n,
 { }
 #endif /* CONFIG_HARDENED_USERCOPY */
 
+extern void __compiletime_error("copy source size is too small")
+__bad_copy_from(void);
+extern void __compiletime_error("copy destination size is too small")
+__bad_copy_to(void);
+
+static inline void copy_overflow(int size, unsigned long count)
+{
+	WARN(1, "Buffer overflow detected (%d < %lu)!\n", size, count);
+}
+
+static __always_inline bool
+check_copy_size(const void *addr, size_t bytes, bool is_source)
+{
+	int sz = __compiletime_object_size(addr);
+	if (unlikely(sz >= 0 && sz < bytes)) {
+		if (!__builtin_constant_p(bytes))
+			copy_overflow(sz, bytes);
+		else if (is_source)
+			__bad_copy_from();
+		else
+			__bad_copy_to();
+		return false;
+	}
+	check_object_size(addr, bytes, is_source);
+	return true;
+}
+
 #ifndef arch_setup_new_exec
 static inline void arch_setup_new_exec(void) { }
 #endif
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index e57328896a16..80b587085e79 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -139,43 +139,19 @@ extern unsigned long
 _copy_to_user(void __user *, const void *, unsigned long);
 #endif
 
-extern void __compiletime_error("usercopy buffer size is too small")
-__bad_copy_user(void);
-
-static inline void copy_user_overflow(int size, unsigned long count)
-{
-	WARN(1, "Buffer overflow detected (%d < %lu)!\n", size, count);
-}
-
 static __always_inline unsigned long __must_check
 copy_from_user(void *to, const void __user *from, unsigned long n)
 {
-	int sz = __compiletime_object_size(to);
-
-	if (likely(sz < 0 || sz >= n)) {
-		check_object_size(to, n, false);
+	if (likely(check_copy_size(to, n, false)))
 		n = _copy_from_user(to, from, n);
-	} else if (!__builtin_constant_p(n))
-		copy_user_overflow(sz, n);
-	else
-		__bad_copy_user();
-
 	return n;
 }
 
 static __always_inline unsigned long __must_check
 copy_to_user(void __user *to, const void *from, unsigned long n)
 {
-	int sz = __compiletime_object_size(from);
-
-	if (likely(sz < 0 || sz >= n)) {
-		check_object_size(from, n, true);
+	if (likely(check_copy_size(from, n, true)))
 		n = _copy_to_user(to, from, n);
-	} else if (!__builtin_constant_p(n))
-		copy_user_overflow(sz, n);
-	else
-		__bad_copy_user();
-
 	return n;
 }
 #ifdef CONFIG_COMPAT
-- 
cgit v1.2.3


From aa28de275a248879f9828cb9f7ee7e119c72ff96 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 29 Jun 2017 21:45:10 -0400
Subject: iov_iter/hardening: move object size checks to inlined part

There we actually have useful information about object sizes.
Note: this patch has them done for all iov_iter flavours.
Right now we do them twice in iovec case, but that'll change
very shortly.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/uio.h | 58 ++++++++++++++++++++++++++++++++++++++++++++++++-----
 lib/iov_iter.c      | 22 ++++++++++----------
 2 files changed, 64 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/uio.h b/include/linux/uio.h
index f2d36a3d3005..243e2362fe1a 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -10,6 +10,7 @@
 #define __LINUX_UIO_H
 
 #include <linux/kernel.h>
+#include <linux/thread_info.h>
 #include <uapi/linux/uio.h>
 
 struct page;
@@ -91,11 +92,58 @@ size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
 			 struct iov_iter *i);
 size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
 			 struct iov_iter *i);
-size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i);
-size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i);
-bool copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i);
-size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i);
-bool copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i);
+
+size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i);
+size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i);
+bool _copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i);
+size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i);
+bool _copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i);
+
+static __always_inline __must_check
+size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
+{
+	if (unlikely(!check_copy_size(addr, bytes, true)))
+		return bytes;
+	else
+		return _copy_to_iter(addr, bytes, i);
+}
+
+static __always_inline __must_check
+size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
+{
+	if (unlikely(!check_copy_size(addr, bytes, false)))
+		return bytes;
+	else
+		return _copy_from_iter(addr, bytes, i);
+}
+
+static __always_inline __must_check
+bool copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i)
+{
+	if (unlikely(!check_copy_size(addr, bytes, false)))
+		return false;
+	else
+		return _copy_from_iter_full(addr, bytes, i);
+}
+
+static __always_inline __must_check
+size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
+{
+	if (unlikely(!check_copy_size(addr, bytes, false)))
+		return bytes;
+	else
+		return _copy_from_iter_nocache(addr, bytes, i);
+}
+
+static __always_inline __must_check
+bool copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i)
+{
+	if (unlikely(!check_copy_size(addr, bytes, false)))
+		return false;
+	else
+		return _copy_from_iter_full_nocache(addr, bytes, i);
+}
+
 size_t iov_iter_zero(size_t bytes, struct iov_iter *);
 unsigned long iov_iter_alignment(const struct iov_iter *i);
 unsigned long iov_iter_gap_alignment(const struct iov_iter *i);
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index f835964c9485..bc4a63ebe91a 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -535,7 +535,7 @@ static size_t copy_pipe_to_iter(const void *addr, size_t bytes,
 	return bytes;
 }
 
-size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
+size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
 {
 	const char *from = addr;
 	if (unlikely(i->type & ITER_PIPE))
@@ -550,9 +550,9 @@ size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
 
 	return bytes;
 }
-EXPORT_SYMBOL(copy_to_iter);
+EXPORT_SYMBOL(_copy_to_iter);
 
-size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
+size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
 {
 	char *to = addr;
 	if (unlikely(i->type & ITER_PIPE)) {
@@ -569,9 +569,9 @@ size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
 
 	return bytes;
 }
-EXPORT_SYMBOL(copy_from_iter);
+EXPORT_SYMBOL(_copy_from_iter);
 
-bool copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i)
+bool _copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i)
 {
 	char *to = addr;
 	if (unlikely(i->type & ITER_PIPE)) {
@@ -594,9 +594,9 @@ bool copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i)
 	iov_iter_advance(i, bytes);
 	return true;
 }
-EXPORT_SYMBOL(copy_from_iter_full);
+EXPORT_SYMBOL(_copy_from_iter_full);
 
-size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
+size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
 {
 	char *to = addr;
 	if (unlikely(i->type & ITER_PIPE)) {
@@ -613,9 +613,9 @@ size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
 
 	return bytes;
 }
-EXPORT_SYMBOL(copy_from_iter_nocache);
+EXPORT_SYMBOL(_copy_from_iter_nocache);
 
-bool copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i)
+bool _copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i)
 {
 	char *to = addr;
 	if (unlikely(i->type & ITER_PIPE)) {
@@ -637,7 +637,7 @@ bool copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i)
 	iov_iter_advance(i, bytes);
 	return true;
 }
-EXPORT_SYMBOL(copy_from_iter_full_nocache);
+EXPORT_SYMBOL(_copy_from_iter_full_nocache);
 
 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
 			 struct iov_iter *i)
@@ -663,7 +663,7 @@ size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
 	}
 	if (i->type & (ITER_BVEC|ITER_KVEC)) {
 		void *kaddr = kmap_atomic(page);
-		size_t wanted = copy_from_iter(kaddr + offset, bytes, i);
+		size_t wanted = _copy_from_iter(kaddr + offset, bytes, i);
 		kunmap_atomic(kaddr);
 		return wanted;
 	} else
-- 
cgit v1.2.3


From 3cb57df37bf3c87c7bbd2bd6f94d9d48c1c8e2ae Mon Sep 17 00:00:00 2001
From: Peter Oh <peter.oh@bowerswilkins.com>
Date: Tue, 27 Jun 2017 15:07:29 -0700
Subject: ieee80211: update public action codes

Update Public Action field values as updated in IEEE Std 802.11-2016,
so that modules/drivers can refer it.

Signed-off-by: Peter Oh <peter.oh@bowerswilkins.com>
Reviewed-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 include/linux/ieee80211.h | 35 ++++++++++++++++++++++++++++++++++-
 1 file changed, 34 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 34e1bcd2d7ff..55a604ad459f 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -2113,10 +2113,43 @@ enum ieee80211_key_len {
 
 #define PMK_MAX_LEN			48
 
-/* Public action codes */
+/* Public action codes (IEEE Std 802.11-2016, 9.6.8.1, Table 9-307) */
 enum ieee80211_pub_actioncode {
+	WLAN_PUB_ACTION_20_40_BSS_COEX = 0,
+	WLAN_PUB_ACTION_DSE_ENABLEMENT = 1,
+	WLAN_PUB_ACTION_DSE_DEENABLEMENT = 2,
+	WLAN_PUB_ACTION_DSE_REG_LOC_ANN = 3,
 	WLAN_PUB_ACTION_EXT_CHANSW_ANN = 4,
+	WLAN_PUB_ACTION_DSE_MSMT_REQ = 5,
+	WLAN_PUB_ACTION_DSE_MSMT_RESP = 6,
+	WLAN_PUB_ACTION_MSMT_PILOT = 7,
+	WLAN_PUB_ACTION_DSE_PC = 8,
+	WLAN_PUB_ACTION_VENDOR_SPECIFIC = 9,
+	WLAN_PUB_ACTION_GAS_INITIAL_REQ = 10,
+	WLAN_PUB_ACTION_GAS_INITIAL_RESP = 11,
+	WLAN_PUB_ACTION_GAS_COMEBACK_REQ = 12,
+	WLAN_PUB_ACTION_GAS_COMEBACK_RESP = 13,
 	WLAN_PUB_ACTION_TDLS_DISCOVER_RES = 14,
+	WLAN_PUB_ACTION_LOC_TRACK_NOTI = 15,
+	WLAN_PUB_ACTION_QAB_REQUEST_FRAME = 16,
+	WLAN_PUB_ACTION_QAB_RESPONSE_FRAME = 17,
+	WLAN_PUB_ACTION_QMF_POLICY = 18,
+	WLAN_PUB_ACTION_QMF_POLICY_CHANGE = 19,
+	WLAN_PUB_ACTION_QLOAD_REQUEST = 20,
+	WLAN_PUB_ACTION_QLOAD_REPORT = 21,
+	WLAN_PUB_ACTION_HCCA_TXOP_ADVERT = 22,
+	WLAN_PUB_ACTION_HCCA_TXOP_RESPONSE = 23,
+	WLAN_PUB_ACTION_PUBLIC_KEY = 24,
+	WLAN_PUB_ACTION_CHANNEL_AVAIL_QUERY = 25,
+	WLAN_PUB_ACTION_CHANNEL_SCHEDULE_MGMT = 26,
+	WLAN_PUB_ACTION_CONTACT_VERI_SIGNAL = 27,
+	WLAN_PUB_ACTION_GDD_ENABLEMENT_REQ = 28,
+	WLAN_PUB_ACTION_GDD_ENABLEMENT_RESP = 29,
+	WLAN_PUB_ACTION_NETWORK_CHANNEL_CONTROL = 30,
+	WLAN_PUB_ACTION_WHITE_SPACE_MAP_ANN = 31,
+	WLAN_PUB_ACTION_FTM_REQUEST = 32,
+	WLAN_PUB_ACTION_FTM = 33,
+	WLAN_PUB_ACTION_FILS_DISCOVERY = 34,
 };
 
 /* TDLS action codes */
-- 
cgit v1.2.3


From c0edd7c9acd0eaee149ab6cb4441cc71a1af87f0 Mon Sep 17 00:00:00 2001
From: Deepa Dinamani <deepa.kernel@gmail.com>
Date: Sat, 24 Jun 2017 11:45:06 -0700
Subject: nanosleep: Use get_timespec64() and put_timespec64()

Usage of these apis and their compat versions makes
the syscalls: clock_nanosleep and nanosleep and
their compat implementations simpler.

This is a preparatory patch to isolate data conversions to
struct timespec64 at userspace boundaries. This helps contain
the changes needed to transition to new y2038 safe types.

Signed-off-by: Deepa Dinamani <deepa.kernel@gmail.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/hrtimer.h        |  2 +-
 kernel/time/alarmtimer.c       |  4 ++--
 kernel/time/hrtimer.c          | 30 +++++++++++++-----------------
 kernel/time/posix-cpu-timers.c |  8 ++------
 kernel/time/posix-timers.c     | 20 ++++++++------------
 5 files changed, 26 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 255edd5e7a74..012c37fdb688 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -453,7 +453,7 @@ static inline u64 hrtimer_forward_now(struct hrtimer *timer,
 
 /* Precise sleep: */
 
-extern int nanosleep_copyout(struct restart_block *, struct timespec *);
+extern int nanosleep_copyout(struct restart_block *, struct timespec64 *);
 extern long hrtimer_nanosleep(const struct timespec64 *rqtp,
 			      const enum hrtimer_mode mode,
 			      const clockid_t clockid);
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index c991cf212c6d..0b8ff7d257ea 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -712,14 +712,14 @@ static int alarmtimer_do_nsleep(struct alarm *alarm, ktime_t absexp,
 		alarmtimer_freezerset(absexp, type);
 	restart = &current->restart_block;
 	if (restart->nanosleep.type != TT_NONE) {
-		struct timespec rmt;
+		struct timespec64 rmt;
 		ktime_t rem;
 
 		rem = ktime_sub(absexp, alarm_bases[type].gettime());
 
 		if (rem <= 0)
 			return 0;
-		rmt = ktime_to_timespec(rem);
+		rmt = ktime_to_timespec64(rem);
 
 		return nanosleep_copyout(restart, &rmt);
 	}
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 81da124f1115..88f75f92ef36 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1440,17 +1440,17 @@ void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)
 }
 EXPORT_SYMBOL_GPL(hrtimer_init_sleeper);
 
-int nanosleep_copyout(struct restart_block *restart, struct timespec *ts)
+int nanosleep_copyout(struct restart_block *restart, struct timespec64 *ts)
 {
 	switch(restart->nanosleep.type) {
 #ifdef CONFIG_COMPAT
 	case TT_COMPAT:
-		if (compat_put_timespec(ts, restart->nanosleep.compat_rmtp))
+		if (compat_put_timespec64(ts, restart->nanosleep.compat_rmtp))
 			return -EFAULT;
 		break;
 #endif
 	case TT_NATIVE:
-		if (copy_to_user(restart->nanosleep.rmtp, ts, sizeof(struct timespec)))
+		if (put_timespec64(ts, restart->nanosleep.rmtp))
 			return -EFAULT;
 		break;
 	default:
@@ -1485,11 +1485,11 @@ static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mod
 	restart = &current->restart_block;
 	if (restart->nanosleep.type != TT_NONE) {
 		ktime_t rem = hrtimer_expires_remaining(&t->timer);
-		struct timespec rmt;
+		struct timespec64 rmt;
 
 		if (rem <= 0)
 			return 0;
-		rmt = ktime_to_timespec(rem);
+		rmt = ktime_to_timespec64(rem);
 
 		return nanosleep_copyout(restart, &rmt);
 	}
@@ -1546,19 +1546,17 @@ out:
 SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp,
 		struct timespec __user *, rmtp)
 {
-	struct timespec64 tu64;
-	struct timespec tu;
+	struct timespec64 tu;
 
-	if (copy_from_user(&tu, rqtp, sizeof(tu)))
+	if (get_timespec64(&tu, rqtp))
 		return -EFAULT;
 
-	tu64 = timespec_to_timespec64(tu);
-	if (!timespec64_valid(&tu64))
+	if (!timespec64_valid(&tu))
 		return -EINVAL;
 
 	current->restart_block.nanosleep.type = rmtp ? TT_NATIVE : TT_NONE;
 	current->restart_block.nanosleep.rmtp = rmtp;
-	return hrtimer_nanosleep(&tu64, HRTIMER_MODE_REL, CLOCK_MONOTONIC);
+	return hrtimer_nanosleep(&tu, HRTIMER_MODE_REL, CLOCK_MONOTONIC);
 }
 
 #ifdef CONFIG_COMPAT
@@ -1566,19 +1564,17 @@ SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp,
 COMPAT_SYSCALL_DEFINE2(nanosleep, struct compat_timespec __user *, rqtp,
 		       struct compat_timespec __user *, rmtp)
 {
-	struct timespec64 tu64;
-	struct timespec tu;
+	struct timespec64 tu;
 
-	if (compat_get_timespec(&tu, rqtp))
+	if (compat_get_timespec64(&tu, rqtp))
 		return -EFAULT;
 
-	tu64 = timespec_to_timespec64(tu);
-	if (!timespec64_valid(&tu64))
+	if (!timespec64_valid(&tu))
 		return -EINVAL;
 
 	current->restart_block.nanosleep.type = rmtp ? TT_COMPAT : TT_NONE;
 	current->restart_block.nanosleep.compat_rmtp = rmtp;
-	return hrtimer_nanosleep(&tu64, HRTIMER_MODE_REL, CLOCK_MONOTONIC);
+	return hrtimer_nanosleep(&tu, HRTIMER_MODE_REL, CLOCK_MONOTONIC);
 }
 #endif
 
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 9df618ee64cf..7323da5950cc 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -1314,12 +1314,8 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
 		 */
 		restart = &current->restart_block;
 		restart->nanosleep.expires = expires;
-		if (restart->nanosleep.type != TT_NONE) {
-			struct timespec ts;
-
-			ts = timespec64_to_timespec(it.it_value);
-			error = nanosleep_copyout(restart, &ts);
-		}
+		if (restart->nanosleep.type != TT_NONE)
+			error = nanosleep_copyout(restart, &it.it_value);
 	}
 
 	return error;
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 39322ae5dd87..4b0fc3b0a1c4 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -1213,26 +1213,24 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags,
 		struct timespec __user *, rmtp)
 {
 	const struct k_clock *kc = clockid_to_kclock(which_clock);
-	struct timespec64 t64;
-	struct timespec t;
+	struct timespec64 t;
 
 	if (!kc)
 		return -EINVAL;
 	if (!kc->nsleep)
 		return -ENANOSLEEP_NOTSUP;
 
-	if (copy_from_user(&t, rqtp, sizeof (struct timespec)))
+	if (get_timespec64(&t, rqtp))
 		return -EFAULT;
 
-	t64 = timespec_to_timespec64(t);
-	if (!timespec64_valid(&t64))
+	if (!timespec64_valid(&t))
 		return -EINVAL;
 	if (flags & TIMER_ABSTIME)
 		rmtp = NULL;
 	current->restart_block.nanosleep.type = rmtp ? TT_NATIVE : TT_NONE;
 	current->restart_block.nanosleep.rmtp = rmtp;
 
-	return kc->nsleep(which_clock, flags, &t64);
+	return kc->nsleep(which_clock, flags, &t);
 }
 
 #ifdef CONFIG_COMPAT
@@ -1241,26 +1239,24 @@ COMPAT_SYSCALL_DEFINE4(clock_nanosleep, clockid_t, which_clock, int, flags,
 		       struct compat_timespec __user *, rmtp)
 {
 	const struct k_clock *kc = clockid_to_kclock(which_clock);
-	struct timespec64 t64;
-	struct timespec t;
+	struct timespec64 t;
 
 	if (!kc)
 		return -EINVAL;
 	if (!kc->nsleep)
 		return -ENANOSLEEP_NOTSUP;
 
-	if (compat_get_timespec(&t, rqtp))
+	if (compat_get_timespec64(&t, rqtp))
 		return -EFAULT;
 
-	t64 = timespec_to_timespec64(t);
-	if (!timespec64_valid(&t64))
+	if (!timespec64_valid(&t))
 		return -EINVAL;
 	if (flags & TIMER_ABSTIME)
 		rmtp = NULL;
 	current->restart_block.nanosleep.type = rmtp ? TT_COMPAT : TT_NONE;
 	current->restart_block.nanosleep.compat_rmtp = rmtp;
 
-	return kc->nsleep(which_clock, flags, &t64);
+	return kc->nsleep(which_clock, flags, &t);
 }
 #endif
 
-- 
cgit v1.2.3


From 3859a271a003aba01e45b85c9d8b355eb7bf25f9 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 28 Oct 2016 01:22:25 -0700
Subject: randstruct: Mark various structs for randomization

This marks many critical kernel structures for randomization. These are
structures that have been targeted in the past in security exploits, or
contain functions pointers, pointers to function pointer tables, lists,
workqueues, ref-counters, credentials, permissions, or are otherwise
sensitive. This initial list was extracted from Brad Spengler/PaX Team's
code in the last public patch of grsecurity/PaX based on my understanding
of the code. Changes or omissions from the original code are mine and
don't reflect the original grsecurity/PaX code.

Left out of this list is task_struct, which requires special handling
and will be covered in a subsequent patch.

Signed-off-by: Kees Cook <keescook@chromium.org>
---
 arch/x86/include/asm/processor.h |  2 +-
 fs/mount.h                       |  4 ++--
 fs/namei.c                       |  2 +-
 fs/proc/internal.h               |  6 +++---
 include/linux/binfmts.h          |  4 ++--
 include/linux/cdev.h             |  2 +-
 include/linux/cred.h             |  4 ++--
 include/linux/dcache.h           |  2 +-
 include/linux/fs.h               | 17 +++++++++--------
 include/linux/fs_struct.h        |  2 +-
 include/linux/ipc.h              |  2 +-
 include/linux/ipc_namespace.h    |  2 +-
 include/linux/key-type.h         |  4 ++--
 include/linux/kmod.h             |  2 +-
 include/linux/kobject.h          |  2 +-
 include/linux/lsm_hooks.h        |  4 ++--
 include/linux/mm_types.h         |  4 ++--
 include/linux/module.h           |  4 ++--
 include/linux/mount.h            |  2 +-
 include/linux/msg.h              |  2 +-
 include/linux/path.h             |  2 +-
 include/linux/pid_namespace.h    |  2 +-
 include/linux/proc_ns.h          |  2 +-
 include/linux/sched.h            |  2 +-
 include/linux/sched/signal.h     |  2 +-
 include/linux/sem.h              |  2 +-
 include/linux/shm.h              |  2 +-
 include/linux/sysctl.h           |  2 +-
 include/linux/tty.h              |  2 +-
 include/linux/tty_driver.h       |  4 ++--
 include/linux/user_namespace.h   |  2 +-
 include/linux/utsname.h          |  2 +-
 include/net/af_unix.h            |  2 +-
 include/net/neighbour.h          |  2 +-
 include/net/net_namespace.h      |  2 +-
 include/net/sock.h               |  2 +-
 kernel/futex.c                   |  4 ++--
 security/keys/internal.h         |  2 +-
 38 files changed, 57 insertions(+), 56 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 3cada998a402..e2335edb9fc5 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -129,7 +129,7 @@ struct cpuinfo_x86 {
 	/* Index into per_cpu list: */
 	u16			cpu_index;
 	u32			microcode;
-};
+} __randomize_layout;
 
 struct cpuid_regs {
 	u32 eax, ebx, ecx, edx;
diff --git a/fs/mount.h b/fs/mount.h
index bf1fda6eed8f..e406b286fba1 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -16,7 +16,7 @@ struct mnt_namespace {
 	u64 event;
 	unsigned int		mounts; /* # of mounts in the namespace */
 	unsigned int		pending_mounts;
-};
+} __randomize_layout;
 
 struct mnt_pcp {
 	int mnt_count;
@@ -68,7 +68,7 @@ struct mount {
 	struct hlist_head mnt_pins;
 	struct fs_pin mnt_umount;
 	struct dentry *mnt_ex_mountpoint;
-};
+} __randomize_layout;
 
 #define MNT_NS_INTERNAL ERR_PTR(-EINVAL) /* distinct from any mnt_namespace */
 
diff --git a/fs/namei.c b/fs/namei.c
index 6571a5f5112e..1764620ac383 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -524,7 +524,7 @@ struct nameidata {
 	struct inode	*link_inode;
 	unsigned	root_seq;
 	int		dfd;
-};
+} __randomize_layout;
 
 static void set_nameidata(struct nameidata *p, int dfd, struct filename *name)
 {
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index c5ae09b6c726..07b16318223f 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -51,7 +51,7 @@ struct proc_dir_entry {
 	spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */
 	u8 namelen;
 	char name[];
-};
+} __randomize_layout;
 
 union proc_op {
 	int (*proc_get_link)(struct dentry *, struct path *);
@@ -70,7 +70,7 @@ struct proc_inode {
 	struct list_head sysctl_inodes;
 	const struct proc_ns_operations *ns_ops;
 	struct inode vfs_inode;
-};
+} __randomize_layout;
 
 /*
  * General functions
@@ -279,7 +279,7 @@ struct proc_maps_private {
 #ifdef CONFIG_NUMA
 	struct mempolicy *task_mempolicy;
 #endif
-};
+} __randomize_layout;
 
 struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode);
 
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 05488da3aee9..3ae9013eeaaa 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -46,7 +46,7 @@ struct linux_binprm {
 	unsigned interp_flags;
 	unsigned interp_data;
 	unsigned long loader, exec;
-};
+} __randomize_layout;
 
 #define BINPRM_FLAGS_ENFORCE_NONDUMP_BIT 0
 #define BINPRM_FLAGS_ENFORCE_NONDUMP (1 << BINPRM_FLAGS_ENFORCE_NONDUMP_BIT)
@@ -81,7 +81,7 @@ struct linux_binfmt {
 	int (*load_shlib)(struct file *);
 	int (*core_dump)(struct coredump_params *cprm);
 	unsigned long min_coredump;	/* minimal dump size */
-};
+} __randomize_layout;
 
 extern void __register_binfmt(struct linux_binfmt *fmt, int insert);
 
diff --git a/include/linux/cdev.h b/include/linux/cdev.h
index 408bc09ce497..cb28eb21e3ca 100644
--- a/include/linux/cdev.h
+++ b/include/linux/cdev.h
@@ -17,7 +17,7 @@ struct cdev {
 	struct list_head list;
 	dev_t dev;
 	unsigned int count;
-};
+} __randomize_layout;
 
 void cdev_init(struct cdev *, const struct file_operations *);
 
diff --git a/include/linux/cred.h b/include/linux/cred.h
index b03e7d049a64..82c8a9e1aabb 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -31,7 +31,7 @@ struct group_info {
 	atomic_t	usage;
 	int		ngroups;
 	kgid_t		gid[0];
-};
+} __randomize_layout;
 
 /**
  * get_group_info - Get a reference to a group info structure
@@ -145,7 +145,7 @@ struct cred {
 	struct user_namespace *user_ns; /* user_ns the caps and keyrings are relative to. */
 	struct group_info *group_info;	/* supplementary groups for euid/fsgid */
 	struct rcu_head	rcu;		/* RCU deletion hook */
-};
+} __randomize_layout;
 
 extern void __put_cred(struct cred *);
 extern void exit_creds(struct task_struct *);
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index d2e38dc6172c..7eb262e13d3c 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -113,7 +113,7 @@ struct dentry {
 		struct hlist_bl_node d_in_lookup_hash;	/* only for in-lookup ones */
 	 	struct rcu_head d_rcu;
 	} d_u;
-};
+} __randomize_layout;
 
 /*
  * dentry->d_lock spinlock nesting subclasses:
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 803e5a9b2654..8f28143486c4 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -275,7 +275,7 @@ struct kiocb {
 	void (*ki_complete)(struct kiocb *iocb, long ret, long ret2);
 	void			*private;
 	int			ki_flags;
-};
+} __randomize_layout;
 
 static inline bool is_sync_kiocb(struct kiocb *kiocb)
 {
@@ -392,7 +392,7 @@ struct address_space {
 	gfp_t			gfp_mask;	/* implicit gfp mask for allocations */
 	struct list_head	private_list;	/* ditto */
 	void			*private_data;	/* ditto */
-} __attribute__((aligned(sizeof(long))));
+} __attribute__((aligned(sizeof(long)))) __randomize_layout;
 	/*
 	 * On most architectures that alignment is already the case; but
 	 * must be enforced here for CRIS, to let the least significant bit
@@ -435,7 +435,7 @@ struct block_device {
 	int			bd_fsfreeze_count;
 	/* Mutex for freeze */
 	struct mutex		bd_fsfreeze_mutex;
-};
+} __randomize_layout;
 
 /*
  * Radix-tree tags, for tagging dirty and writeback pages within the pagecache
@@ -653,7 +653,7 @@ struct inode {
 #endif
 
 	void			*i_private; /* fs or device private pointer */
-};
+} __randomize_layout;
 
 static inline unsigned int i_blocksize(const struct inode *node)
 {
@@ -868,7 +868,8 @@ struct file {
 	struct list_head	f_tfile_llink;
 #endif /* #ifdef CONFIG_EPOLL */
 	struct address_space	*f_mapping;
-} __attribute__((aligned(4)));	/* lest something weird decides that 2 is OK */
+} __randomize_layout
+  __attribute__((aligned(4)));	/* lest something weird decides that 2 is OK */
 
 struct file_handle {
 	__u32 handle_bytes;
@@ -1005,7 +1006,7 @@ struct file_lock {
 			int state;		/* state of grant or error if -ve */
 		} afs;
 	} fl_u;
-};
+} __randomize_layout;
 
 struct file_lock_context {
 	spinlock_t		flc_lock;
@@ -1404,7 +1405,7 @@ struct super_block {
 
 	spinlock_t		s_inode_wblist_lock;
 	struct list_head	s_inodes_wb;	/* writeback inodes */
-};
+} __randomize_layout;
 
 /* Helper functions so that in most cases filesystems will
  * not need to deal directly with kuid_t and kgid_t and can
@@ -1690,7 +1691,7 @@ struct file_operations {
 			u64);
 	ssize_t (*dedupe_file_range)(struct file *, u64, u64, struct file *,
 			u64);
-};
+} __randomize_layout;
 
 struct inode_operations {
 	struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int);
diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h
index 0efc3e62843a..7a026240cbb1 100644
--- a/include/linux/fs_struct.h
+++ b/include/linux/fs_struct.h
@@ -12,7 +12,7 @@ struct fs_struct {
 	int umask;
 	int in_exec;
 	struct path root, pwd;
-};
+} __randomize_layout;
 
 extern struct kmem_cache *fs_cachep;
 
diff --git a/include/linux/ipc.h b/include/linux/ipc.h
index 71fd92d81b26..ea0eb0b5f98c 100644
--- a/include/linux/ipc.h
+++ b/include/linux/ipc.h
@@ -20,6 +20,6 @@ struct kern_ipc_perm {
 	umode_t		mode;
 	unsigned long	seq;
 	void		*security;
-} ____cacheline_aligned_in_smp;
+} ____cacheline_aligned_in_smp __randomize_layout;
 
 #endif /* _LINUX_IPC_H */
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index 848e5796400e..65327ee0936b 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -61,7 +61,7 @@ struct ipc_namespace {
 	struct ucounts *ucounts;
 
 	struct ns_common ns;
-};
+} __randomize_layout;
 
 extern struct ipc_namespace init_ipc_ns;
 extern spinlock_t mq_lock;
diff --git a/include/linux/key-type.h b/include/linux/key-type.h
index 8496cf64575c..9520fc3c3b9a 100644
--- a/include/linux/key-type.h
+++ b/include/linux/key-type.h
@@ -45,7 +45,7 @@ struct key_preparsed_payload {
 	size_t		datalen;	/* Raw datalen */
 	size_t		quotalen;	/* Quota length for proposed payload */
 	time_t		expiry;		/* Expiry time of key */
-};
+} __randomize_layout;
 
 typedef int (*request_key_actor_t)(struct key_construction *key,
 				   const char *op, void *aux);
@@ -158,7 +158,7 @@ struct key_type {
 	/* internal fields */
 	struct list_head	link;		/* link in types list */
 	struct lock_class_key	lock_class;	/* key->sem lock class */
-};
+} __randomize_layout;
 
 extern struct key_type key_type_keyring;
 
diff --git a/include/linux/kmod.h b/include/linux/kmod.h
index c4e441e00db5..655082c88fd9 100644
--- a/include/linux/kmod.h
+++ b/include/linux/kmod.h
@@ -64,7 +64,7 @@ struct subprocess_info {
 	int (*init)(struct subprocess_info *info, struct cred *new);
 	void (*cleanup)(struct subprocess_info *info);
 	void *data;
-};
+} __randomize_layout;
 
 extern int
 call_usermodehelper(const char *path, char **argv, char **envp, int wait);
diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index ca85cb80e99a..084513350317 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -172,7 +172,7 @@ struct kset {
 	spinlock_t list_lock;
 	struct kobject kobj;
 	const struct kset_uevent_ops *uevent_ops;
-};
+} __randomize_layout;
 
 extern void kset_init(struct kset *kset);
 extern int __must_check kset_register(struct kset *kset);
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 080f34e66017..565163fc9ad4 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -1876,7 +1876,7 @@ struct security_hook_heads {
 	struct list_head audit_rule_match;
 	struct list_head audit_rule_free;
 #endif /* CONFIG_AUDIT */
-};
+} __randomize_layout;
 
 /*
  * Security module hook list structure.
@@ -1887,7 +1887,7 @@ struct security_hook_list {
 	struct list_head		*head;
 	union security_list_options	hook;
 	char				*lsm;
-};
+} __randomize_layout;
 
 /*
  * Initializing a security_hook_list structure takes
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 45cdb27791a3..ff151814a02d 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -342,7 +342,7 @@ struct vm_area_struct {
 	struct mempolicy *vm_policy;	/* NUMA policy for the VMA */
 #endif
 	struct vm_userfaultfd_ctx vm_userfaultfd_ctx;
-};
+} __randomize_layout;
 
 struct core_thread {
 	struct task_struct *task;
@@ -500,7 +500,7 @@ struct mm_struct {
 	atomic_long_t hugetlb_usage;
 #endif
 	struct work_struct async_put_work;
-};
+} __randomize_layout;
 
 extern struct mm_struct init_mm;
 
diff --git a/include/linux/module.h b/include/linux/module.h
index 21f56393602f..d93111d7def6 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -45,7 +45,7 @@ struct module_kobject {
 	struct kobject *drivers_dir;
 	struct module_param_attrs *mp;
 	struct completion *kobj_completion;
-};
+} __randomize_layout;
 
 struct module_attribute {
 	struct attribute attr;
@@ -475,7 +475,7 @@ struct module {
 	ctor_fn_t *ctors;
 	unsigned int num_ctors;
 #endif
-} ____cacheline_aligned;
+} ____cacheline_aligned __randomize_layout;
 #ifndef MODULE_ARCH_INIT
 #define MODULE_ARCH_INIT {}
 #endif
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 8e0352af06b7..1ce85e6fd95f 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -67,7 +67,7 @@ struct vfsmount {
 	struct dentry *mnt_root;	/* root of the mounted tree */
 	struct super_block *mnt_sb;	/* pointer to superblock */
 	int mnt_flags;
-};
+} __randomize_layout;
 
 struct file; /* forward dec */
 struct path;
diff --git a/include/linux/msg.h b/include/linux/msg.h
index f3f302f9c197..a001305f5a79 100644
--- a/include/linux/msg.h
+++ b/include/linux/msg.h
@@ -29,7 +29,7 @@ struct msg_queue {
 	struct list_head q_messages;
 	struct list_head q_receivers;
 	struct list_head q_senders;
-};
+} __randomize_layout;
 
 /* Helper routines for sys_msgsnd and sys_msgrcv */
 extern long do_msgsnd(int msqid, long mtype, void __user *mtext,
diff --git a/include/linux/path.h b/include/linux/path.h
index d1372186f431..cde895cc4af4 100644
--- a/include/linux/path.h
+++ b/include/linux/path.h
@@ -7,7 +7,7 @@ struct vfsmount;
 struct path {
 	struct vfsmount *mnt;
 	struct dentry *dentry;
-};
+} __randomize_layout;
 
 extern void path_get(const struct path *);
 extern void path_put(const struct path *);
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index c2a989dee876..b09136f88cf4 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -52,7 +52,7 @@ struct pid_namespace {
 	int hide_pid;
 	int reboot;	/* group exit code if this pidns was rebooted */
 	struct ns_common ns;
-};
+} __randomize_layout;
 
 extern struct pid_namespace init_pid_ns;
 
diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h
index 58ab28d81fc2..06844b54dfc1 100644
--- a/include/linux/proc_ns.h
+++ b/include/linux/proc_ns.h
@@ -21,7 +21,7 @@ struct proc_ns_operations {
 	int (*install)(struct nsproxy *nsproxy, struct ns_common *ns);
 	struct user_namespace *(*owner)(struct ns_common *ns);
 	struct ns_common *(*get_parent)(struct ns_common *ns);
-};
+} __randomize_layout;
 
 extern const struct proc_ns_operations netns_operations;
 extern const struct proc_ns_operations utsns_operations;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2b69fc650201..f833254fce00 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -408,7 +408,7 @@ struct sched_rt_entity {
 	/* rq "owned" by this entity/group: */
 	struct rt_rq			*my_q;
 #endif
-};
+} __randomize_layout;
 
 struct sched_dl_entity {
 	struct rb_node			rb_node;
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index c06d63b3a583..2a0dd40b15db 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -222,7 +222,7 @@ struct signal_struct {
 	struct mutex cred_guard_mutex;	/* guard against foreign influences on
 					 * credential calculations
 					 * (notably. ptrace) */
-};
+} __randomize_layout;
 
 /*
  * Bits in flags field of signal_struct.
diff --git a/include/linux/sem.h b/include/linux/sem.h
index 9edec926e9d9..23bcbdfad4a6 100644
--- a/include/linux/sem.h
+++ b/include/linux/sem.h
@@ -21,7 +21,7 @@ struct sem_array {
 	int			sem_nsems;	/* no. of semaphores in array */
 	int			complex_count;	/* pending complex operations */
 	unsigned int		use_global_lock;/* >0: global lock required */
-};
+} __randomize_layout;
 
 #ifdef CONFIG_SYSVIPC
 
diff --git a/include/linux/shm.h b/include/linux/shm.h
index 04e881829625..0fb7061ec54c 100644
--- a/include/linux/shm.h
+++ b/include/linux/shm.h
@@ -22,7 +22,7 @@ struct shmid_kernel /* private to the kernel */
 	/* The task created the shm object.  NULL if the task is dead. */
 	struct task_struct	*shm_creator;
 	struct list_head	shm_clist;	/* list by creator */
-};
+} __randomize_layout;
 
 /* shm_mode upper byte flags */
 #define	SHM_DEST	01000	/* segment will be destroyed on last detach */
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 80d07816def0..9ddeef2c03e2 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -117,7 +117,7 @@ struct ctl_table
 	struct ctl_table_poll *poll;
 	void *extra1;
 	void *extra2;
-};
+} __randomize_layout;
 
 struct ctl_node {
 	struct rb_node node;
diff --git a/include/linux/tty.h b/include/linux/tty.h
index d07cd2105a6c..73f8d0977bb0 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -333,7 +333,7 @@ struct tty_struct {
 	/* If the tty has a pending do_SAK, queue it here - akpm */
 	struct work_struct SAK_work;
 	struct tty_port *port;
-};
+} __randomize_layout;
 
 /* Each of a tty's open files has private_data pointing to tty_file_private */
 struct tty_file_private {
diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h
index b742b5e47cc2..00b2213f6a35 100644
--- a/include/linux/tty_driver.h
+++ b/include/linux/tty_driver.h
@@ -291,7 +291,7 @@ struct tty_operations {
 	void (*poll_put_char)(struct tty_driver *driver, int line, char ch);
 #endif
 	const struct file_operations *proc_fops;
-};
+} __randomize_layout;
 
 struct tty_driver {
 	int	magic;		/* magic number for this structure */
@@ -325,7 +325,7 @@ struct tty_driver {
 
 	const struct tty_operations *ops;
 	struct list_head tty_drivers;
-};
+} __randomize_layout;
 
 extern struct list_head tty_drivers;
 
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 32354b4b4b2b..b3575ce29148 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -66,7 +66,7 @@ struct user_namespace {
 #endif
 	struct ucounts		*ucounts;
 	int ucount_max[UCOUNT_COUNTS];
-};
+} __randomize_layout;
 
 struct ucounts {
 	struct hlist_node node;
diff --git a/include/linux/utsname.h b/include/linux/utsname.h
index 60f0bb83b313..da826ed059cf 100644
--- a/include/linux/utsname.h
+++ b/include/linux/utsname.h
@@ -26,7 +26,7 @@ struct uts_namespace {
 	struct user_namespace *user_ns;
 	struct ucounts *ucounts;
 	struct ns_common ns;
-};
+} __randomize_layout;
 extern struct uts_namespace init_uts_ns;
 
 #ifdef CONFIG_UTS_NS
diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index fd60eccb59a6..64e2a1e24a2c 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -36,7 +36,7 @@ struct unix_skb_parms {
 	u32			secid;		/* Security ID		*/
 #endif
 	u32			consumed;
-};
+} __randomize_layout;
 
 #define UNIXCB(skb) 	(*(struct unix_skb_parms *)&((skb)->cb))
 
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index e4dd3a214034..a62959d2b3f7 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -155,7 +155,7 @@ struct neighbour {
 	struct rcu_head		rcu;
 	struct net_device	*dev;
 	u8			primary_key[0];
-};
+} __randomize_layout;
 
 struct neigh_ops {
 	int			family;
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index fe80bb48ab1f..a224196d16ac 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -147,7 +147,7 @@ struct net {
 #endif
 	struct sock		*diag_nlsk;
 	atomic_t		fnhe_genid;
-};
+} __randomize_layout;
 
 #include <linux/seq_file_net.h>
 
diff --git a/include/net/sock.h b/include/net/sock.h
index f33e3d134e0b..d349297db9e9 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1113,7 +1113,7 @@ struct proto {
 	atomic_t		socks;
 #endif
 	int			(*diag_destroy)(struct sock *sk, int err);
-};
+} __randomize_layout;
 
 int proto_register(struct proto *prot, int alloc_slab);
 void proto_unregister(struct proto *prot);
diff --git a/kernel/futex.c b/kernel/futex.c
index 357348a6cf6b..5616511abf39 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -212,7 +212,7 @@ struct futex_pi_state {
 	atomic_t refcount;
 
 	union futex_key key;
-};
+} __randomize_layout;
 
 /**
  * struct futex_q - The hashed futex queue entry, one per waiting task
@@ -246,7 +246,7 @@ struct futex_q {
 	struct rt_mutex_waiter *rt_waiter;
 	union futex_key *requeue_pi_key;
 	u32 bitset;
-};
+} __randomize_layout;
 
 static const struct futex_q futex_q_init = {
 	/* list gets initialized in queue_me()*/
diff --git a/security/keys/internal.h b/security/keys/internal.h
index c0f8682eba69..6494954e9980 100644
--- a/security/keys/internal.h
+++ b/security/keys/internal.h
@@ -197,7 +197,7 @@ struct request_key_auth {
 	void			*callout_info;
 	size_t			callout_len;
 	pid_t			pid;
-};
+} __randomize_layout;
 
 extern struct key_type key_type_request_key_auth;
 extern struct key *request_key_auth_new(struct key *target,
-- 
cgit v1.2.3


From 29e48ce87f1eaaa4b1fe3d9af90c586ac2d1fb74 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 5 Apr 2017 22:43:33 -0700
Subject: task_struct: Allow randomized layout

This marks most of the layout of task_struct as randomizable, but leaves
thread_info and scheduler state untouched at the start, and thread_struct
untouched at the end.

Other parts of the kernel use unnamed structures, but the 0-day builder
using gcc-4.4 blows up on static initializers. Officially, it's documented
as only working on gcc 4.6 and later, which further confuses me:
	https://gcc.gnu.org/wiki/C11Status
The structure layout randomization already requires gcc 4.7, but instead
of depending on the plugin being enabled, just check the gcc versions
for wider build testing. At Linus's suggestion, the marking is hidden
in a macro to reduce how ugly it looks. Additionally, indenting is left
unchanged since it would make things harder to read.

Randomization of task_struct is modified from Brad Spengler/PaX Team's
code in the last public patch of grsecurity/PaX based on my understanding
of the code. Changes or omissions from the original code are mine and
don't reflect the original grsecurity/PaX code.

Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/linux/compiler-gcc.h | 13 ++++++++++++-
 include/linux/compiler.h     |  5 +++++
 include/linux/sched.h        | 14 ++++++++++++++
 3 files changed, 31 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 7deaae3dc87d..c4a66c036692 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -231,6 +231,7 @@
 #endif /* GCC_VERSION >= 40500 */
 
 #if GCC_VERSION >= 40600
+
 /*
  * When used with Link Time Optimization, gcc can optimize away C functions or
  * variables which are referenced only from assembly code.  __visible tells the
@@ -238,7 +239,17 @@
  * this.
  */
 #define __visible	__attribute__((externally_visible))
-#endif
+
+/*
+ * RANDSTRUCT_PLUGIN wants to use an anonymous struct, but it is only
+ * possible since GCC 4.6. To provide as much build testing coverage
+ * as possible, this is used for all GCC 4.6+ builds, and not just on
+ * RANDSTRUCT_PLUGIN builds.
+ */
+#define randomized_struct_fields_start	struct {
+#define randomized_struct_fields_end	} __randomize_layout;
+
+#endif /* GCC_VERSION >= 40600 */
 
 
 #if GCC_VERSION >= 40900 && !defined(__CHECKER__)
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 55ee9ee814f8..0b4ac3e8c63e 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -456,6 +456,11 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
 # define __no_randomize_layout
 #endif
 
+#ifndef randomized_struct_fields_start
+# define randomized_struct_fields_start
+# define randomized_struct_fields_end
+#endif
+
 /*
  * Tell gcc if a function is cold. The compiler will assume any path
  * directly leading to the call is unlikely.
diff --git a/include/linux/sched.h b/include/linux/sched.h
index f833254fce00..e2ad3531e7fe 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -490,6 +490,13 @@ struct task_struct {
 #endif
 	/* -1 unrunnable, 0 runnable, >0 stopped: */
 	volatile long			state;
+
+	/*
+	 * This begins the randomizable portion of task_struct. Only
+	 * scheduling-critical items should be added above here.
+	 */
+	randomized_struct_fields_start
+
 	void				*stack;
 	atomic_t			usage;
 	/* Per task flags (PF_*), defined further below: */
@@ -1051,6 +1058,13 @@ struct task_struct {
 	/* Used by LSM modules for access restriction: */
 	void				*security;
 #endif
+
+	/*
+	 * New fields for task_struct should be added above here, so that
+	 * they are included in the randomized portion of task_struct.
+	 */
+	randomized_struct_fields_end
+
 	/* CPU-specific state of this task: */
 	struct thread_struct		thread;
 
-- 
cgit v1.2.3


From dbd1877754338dc98818d200cd25cf398d8b5d9e Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Wed, 28 Jun 2017 21:25:48 -0700
Subject: hashtable: remove repeated phrase from a comment

"in a rcu enabled hashtable" is repeated twice in a comment.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hashtable.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/hashtable.h b/include/linux/hashtable.h
index 661e5c2a8e2a..082dc1bd0801 100644
--- a/include/linux/hashtable.h
+++ b/include/linux/hashtable.h
@@ -167,7 +167,6 @@ static inline void hash_del_rcu(struct hlist_node *node)
 /**
  * hash_for_each_possible_rcu - iterate over all possible objects hashing to the
  * same bucket in an rcu enabled hashtable
- * in a rcu enabled hashtable
  * @name: hashtable to iterate
  * @obj: the type * to use as a loop cursor for each entry
  * @member: the name of the hlist_node within the struct
-- 
cgit v1.2.3


From 53869cebce4bc53f71a080e7830600d4ae1ab712 Mon Sep 17 00:00:00 2001
From: "Reshetova, Elena" <elena.reshetova@intel.com>
Date: Fri, 30 Jun 2017 13:07:57 +0300
Subject: net: convert nf_bridge_info.use from atomic_t to refcount_t

refcount_t type and corresponding API should be
used instead of atomic_t when the variable is used as
a reference counter. This allows to avoid accidental
refcounter overflows that might lead to use-after-free
situations.

Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
Signed-off-by: Hans Liljestrand <ishkamiel@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David Windsor <dwindsor@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h               | 6 +++---
 include/net/netfilter/br_netfilter.h | 2 +-
 net/bridge/br_netfilter_hooks.c      | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index a17e235639ae..005793e01bd2 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -252,7 +252,7 @@ struct nf_conntrack {
 
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 struct nf_bridge_info {
-	atomic_t		use;
+	refcount_t		use;
 	enum {
 		BRNF_PROTO_UNCHANGED,
 		BRNF_PROTO_8021Q,
@@ -3589,13 +3589,13 @@ static inline void nf_conntrack_get(struct nf_conntrack *nfct)
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 static inline void nf_bridge_put(struct nf_bridge_info *nf_bridge)
 {
-	if (nf_bridge && atomic_dec_and_test(&nf_bridge->use))
+	if (nf_bridge && refcount_dec_and_test(&nf_bridge->use))
 		kfree(nf_bridge);
 }
 static inline void nf_bridge_get(struct nf_bridge_info *nf_bridge)
 {
 	if (nf_bridge)
-		atomic_inc(&nf_bridge->use);
+		refcount_inc(&nf_bridge->use);
 }
 #endif /* CONFIG_BRIDGE_NETFILTER */
 static inline void nf_reset(struct sk_buff *skb)
diff --git a/include/net/netfilter/br_netfilter.h b/include/net/netfilter/br_netfilter.h
index 0b0c35c37125..925524ede6c8 100644
--- a/include/net/netfilter/br_netfilter.h
+++ b/include/net/netfilter/br_netfilter.h
@@ -8,7 +8,7 @@ static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb)
 	skb->nf_bridge = kzalloc(sizeof(struct nf_bridge_info), GFP_ATOMIC);
 
 	if (likely(skb->nf_bridge))
-		atomic_set(&(skb->nf_bridge->use), 1);
+		refcount_set(&(skb->nf_bridge->use), 1);
 
 	return skb->nf_bridge;
 }
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 067cf0313449..2261e5194c82 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -149,12 +149,12 @@ static inline struct nf_bridge_info *nf_bridge_unshare(struct sk_buff *skb)
 {
 	struct nf_bridge_info *nf_bridge = skb->nf_bridge;
 
-	if (atomic_read(&nf_bridge->use) > 1) {
+	if (refcount_read(&nf_bridge->use) > 1) {
 		struct nf_bridge_info *tmp = nf_bridge_alloc(skb);
 
 		if (tmp) {
 			memcpy(tmp, nf_bridge, sizeof(struct nf_bridge_info));
-			atomic_set(&tmp->use, 1);
+			refcount_set(&tmp->use, 1);
 		}
 		nf_bridge_put(nf_bridge);
 		nf_bridge = tmp;
-- 
cgit v1.2.3


From 633547973ffc32fd2c815639d4675e1531f0896f Mon Sep 17 00:00:00 2001
From: "Reshetova, Elena" <elena.reshetova@intel.com>
Date: Fri, 30 Jun 2017 13:07:58 +0300
Subject: net: convert sk_buff.users from atomic_t to refcount_t

refcount_t type and corresponding API should be
used instead of atomic_t when the variable is used as
a reference counter. This allows to avoid accidental
refcounter overflows that might lead to use-after-free
situations.

Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
Signed-off-by: Hans Liljestrand <ishkamiel@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David Windsor <dwindsor@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/hw/nes/nes_cm.c |  4 ++--
 drivers/isdn/mISDN/socket.c        |  2 +-
 drivers/net/rionet.c               |  2 +-
 drivers/s390/net/ctcm_main.c       | 26 +++++++++++++-------------
 drivers/s390/net/netiucv.c         | 10 +++++-----
 drivers/s390/net/qeth_core_main.c  |  4 ++--
 include/linux/skbuff.h             | 10 +++++-----
 net/core/datagram.c                |  4 ++--
 net/core/dev.c                     | 10 +++++-----
 net/core/netpoll.c                 |  4 ++--
 net/core/pktgen.c                  | 16 ++++++++--------
 net/core/rtnetlink.c               |  2 +-
 net/core/skbuff.c                  |  8 ++++----
 net/dccp/ipv6.c                    |  2 +-
 net/ipv6/syncookies.c              |  2 +-
 net/ipv6/tcp_ipv6.c                |  2 +-
 net/key/af_key.c                   |  4 ++--
 net/netlink/af_netlink.c           |  6 +++---
 net/rxrpc/skbuff.c                 | 12 ++++++------
 net/sctp/outqueue.c                |  2 +-
 net/sctp/socket.c                  |  2 +-
 21 files changed, 67 insertions(+), 67 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index 30b256a2c54e..de4025deaa4a 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -742,7 +742,7 @@ int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb,
 
 	if (type == NES_TIMER_TYPE_SEND) {
 		new_send->seq_num = ntohl(tcp_hdr(skb)->seq);
-		atomic_inc(&new_send->skb->users);
+		refcount_inc(&new_send->skb->users);
 		spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
 		cm_node->send_entry = new_send;
 		add_ref_cm_node(cm_node);
@@ -924,7 +924,7 @@ static void nes_cm_timer_tick(unsigned long pass)
 						  flags);
 				break;
 			}
-			atomic_inc(&send_entry->skb->users);
+			refcount_inc(&send_entry->skb->users);
 			cm_packets_retrans++;
 			nes_debug(NES_DBG_CM, "Retransmitting send_entry %p "
 				  "for node %p, jiffies = %lu, time to send = "
diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c
index 99e5f9751e8b..c5603d1a07d6 100644
--- a/drivers/isdn/mISDN/socket.c
+++ b/drivers/isdn/mISDN/socket.c
@@ -155,7 +155,7 @@ mISDN_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 	copied = skb->len + MISDN_HEADER_LEN;
 	if (len < copied) {
 		if (flags & MSG_PEEK)
-			atomic_dec(&skb->users);
+			refcount_dec(&skb->users);
 		else
 			skb_queue_head(&sk->sk_receive_queue, skb);
 		return -ENOSPC;
diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c
index 300bb1479b3a..e9f101c9bae2 100644
--- a/drivers/net/rionet.c
+++ b/drivers/net/rionet.c
@@ -201,7 +201,7 @@ static int rionet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 				rionet_queue_tx_msg(skb, ndev,
 					nets[rnet->mport->id].active[i]);
 				if (count)
-					atomic_inc(&skb->users);
+					refcount_inc(&skb->users);
 				count++;
 			}
 	} else if (RIONET_MAC_MATCH(eth->h_dest)) {
diff --git a/drivers/s390/net/ctcm_main.c b/drivers/s390/net/ctcm_main.c
index 99121352c57b..e8782a8619f7 100644
--- a/drivers/s390/net/ctcm_main.c
+++ b/drivers/s390/net/ctcm_main.c
@@ -483,7 +483,7 @@ static int ctcm_transmit_skb(struct channel *ch, struct sk_buff *skb)
 			spin_unlock_irqrestore(&ch->collect_lock, saveflags);
 			return -EBUSY;
 		} else {
-			atomic_inc(&skb->users);
+			refcount_inc(&skb->users);
 			header.length = l;
 			header.type = be16_to_cpu(skb->protocol);
 			header.unused = 0;
@@ -500,7 +500,7 @@ static int ctcm_transmit_skb(struct channel *ch, struct sk_buff *skb)
 	 * Protect skb against beeing free'd by upper
 	 * layers.
 	 */
-	atomic_inc(&skb->users);
+	refcount_inc(&skb->users);
 	ch->prof.txlen += skb->len;
 	header.length = skb->len + LL_HEADER_LENGTH;
 	header.type = be16_to_cpu(skb->protocol);
@@ -517,14 +517,14 @@ static int ctcm_transmit_skb(struct channel *ch, struct sk_buff *skb)
 	if (hi) {
 		nskb = alloc_skb(skb->len, GFP_ATOMIC | GFP_DMA);
 		if (!nskb) {
-			atomic_dec(&skb->users);
+			refcount_dec(&skb->users);
 			skb_pull(skb, LL_HEADER_LENGTH + 2);
 			ctcm_clear_busy(ch->netdev);
 			return -ENOMEM;
 		} else {
 			skb_put_data(nskb, skb->data, skb->len);
-			atomic_inc(&nskb->users);
-			atomic_dec(&skb->users);
+			refcount_inc(&nskb->users);
+			refcount_dec(&skb->users);
 			dev_kfree_skb_irq(skb);
 			skb = nskb;
 		}
@@ -542,7 +542,7 @@ static int ctcm_transmit_skb(struct channel *ch, struct sk_buff *skb)
 			 * Remove our header. It gets added
 			 * again on retransmit.
 			 */
-			atomic_dec(&skb->users);
+			refcount_dec(&skb->users);
 			skb_pull(skb, LL_HEADER_LENGTH + 2);
 			ctcm_clear_busy(ch->netdev);
 			return -ENOMEM;
@@ -553,7 +553,7 @@ static int ctcm_transmit_skb(struct channel *ch, struct sk_buff *skb)
 		ch->ccw[1].count = skb->len;
 		skb_copy_from_linear_data(skb,
 				skb_put(ch->trans_skb, skb->len), skb->len);
-		atomic_dec(&skb->users);
+		refcount_dec(&skb->users);
 		dev_kfree_skb_irq(skb);
 		ccw_idx = 0;
 	} else {
@@ -679,7 +679,7 @@ static int ctcmpc_transmit_skb(struct channel *ch, struct sk_buff *skb)
 
 	if ((fsm_getstate(ch->fsm) != CTC_STATE_TXIDLE) || grp->in_sweep) {
 		spin_lock_irqsave(&ch->collect_lock, saveflags);
-		atomic_inc(&skb->users);
+		refcount_inc(&skb->users);
 		p_header = kmalloc(PDU_HEADER_LENGTH, gfp_type());
 
 		if (!p_header) {
@@ -716,7 +716,7 @@ static int ctcmpc_transmit_skb(struct channel *ch, struct sk_buff *skb)
 	 * Protect skb against beeing free'd by upper
 	 * layers.
 	 */
-	atomic_inc(&skb->users);
+	refcount_inc(&skb->users);
 
 	/*
 	 * IDAL support in CTCM is broken, so we have to
@@ -729,8 +729,8 @@ static int ctcmpc_transmit_skb(struct channel *ch, struct sk_buff *skb)
 			goto nomem_exit;
 		} else {
 			skb_put_data(nskb, skb->data, skb->len);
-			atomic_inc(&nskb->users);
-			atomic_dec(&skb->users);
+			refcount_inc(&nskb->users);
+			refcount_dec(&skb->users);
 			dev_kfree_skb_irq(skb);
 			skb = nskb;
 		}
@@ -810,7 +810,7 @@ static int ctcmpc_transmit_skb(struct channel *ch, struct sk_buff *skb)
 		ch->trans_skb->len = 0;
 		ch->ccw[1].count = skb->len;
 		skb_put_data(ch->trans_skb, skb->data, skb->len);
-		atomic_dec(&skb->users);
+		refcount_dec(&skb->users);
 		dev_kfree_skb_irq(skb);
 		ccw_idx = 0;
 		CTCM_PR_DBGDATA("%s(%s): trans_skb len: %04x\n"
@@ -855,7 +855,7 @@ nomem_exit:
 			"%s(%s): MEMORY allocation ERROR\n",
 			CTCM_FUNTAIL, ch->id);
 	rc = -ENOMEM;
-	atomic_dec(&skb->users);
+	refcount_dec(&skb->users);
 	dev_kfree_skb_any(skb);
 	fsm_event(priv->mpcg->fsm, MPCG_EVENT_INOP, dev);
 done:
diff --git a/drivers/s390/net/netiucv.c b/drivers/s390/net/netiucv.c
index 7db427c0a6a4..1579695f4e64 100644
--- a/drivers/s390/net/netiucv.c
+++ b/drivers/s390/net/netiucv.c
@@ -743,7 +743,7 @@ static void conn_action_txdone(fsm_instance *fi, int event, void *arg)
 	conn->prof.tx_pending--;
 	if (single_flag) {
 		if ((skb = skb_dequeue(&conn->commit_queue))) {
-			atomic_dec(&skb->users);
+			refcount_dec(&skb->users);
 			if (privptr) {
 				privptr->stats.tx_packets++;
 				privptr->stats.tx_bytes +=
@@ -766,7 +766,7 @@ static void conn_action_txdone(fsm_instance *fi, int event, void *arg)
 		txbytes += skb->len;
 		txpackets++;
 		stat_maxcq++;
-		atomic_dec(&skb->users);
+		refcount_dec(&skb->users);
 		dev_kfree_skb_any(skb);
 	}
 	if (conn->collect_len > conn->prof.maxmulti)
@@ -958,7 +958,7 @@ static void netiucv_purge_skb_queue(struct sk_buff_head *q)
 	struct sk_buff *skb;
 
 	while ((skb = skb_dequeue(q))) {
-		atomic_dec(&skb->users);
+		refcount_dec(&skb->users);
 		dev_kfree_skb_any(skb);
 	}
 }
@@ -1176,7 +1176,7 @@ static int netiucv_transmit_skb(struct iucv_connection *conn,
 			IUCV_DBF_TEXT(data, 2,
 				      "EBUSY from netiucv_transmit_skb\n");
 		} else {
-			atomic_inc(&skb->users);
+			refcount_inc(&skb->users);
 			skb_queue_tail(&conn->collect_queue, skb);
 			conn->collect_len += l;
 			rc = 0;
@@ -1245,7 +1245,7 @@ static int netiucv_transmit_skb(struct iucv_connection *conn,
 		} else {
 			if (copied)
 				dev_kfree_skb(skb);
-			atomic_inc(&nskb->users);
+			refcount_inc(&nskb->users);
 			skb_queue_tail(&conn->commit_queue, nskb);
 		}
 	}
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 3b657d5b7e49..aec06e10b969 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -1242,7 +1242,7 @@ static void qeth_release_skbs(struct qeth_qdio_out_buffer *buf)
 				iucv->sk_txnotify(skb, TX_NOTIFY_GENERALERROR);
 			}
 		}
-		atomic_dec(&skb->users);
+		refcount_dec(&skb->users);
 		dev_kfree_skb_any(skb);
 		skb = skb_dequeue(&buf->skb_list);
 	}
@@ -3975,7 +3975,7 @@ static inline int qeth_fill_buffer(struct qeth_qdio_out_q *queue,
 	int flush_cnt = 0, hdr_len, large_send = 0;
 
 	buffer = buf->buffer;
-	atomic_inc(&skb->users);
+	refcount_inc(&skb->users);
 	skb_queue_tail(&buf->skb_list, skb);
 
 	/*check first on TSO ....*/
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 005793e01bd2..90cbd86152da 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -761,7 +761,7 @@ struct sk_buff {
 	unsigned char		*head,
 				*data;
 	unsigned int		truesize;
-	atomic_t		users;
+	refcount_t		users;
 };
 
 #ifdef __KERNEL__
@@ -872,9 +872,9 @@ static inline bool skb_unref(struct sk_buff *skb)
 {
 	if (unlikely(!skb))
 		return false;
-	if (likely(atomic_read(&skb->users) == 1))
+	if (likely(refcount_read(&skb->users) == 1))
 		smp_rmb();
-	else if (likely(!atomic_dec_and_test(&skb->users)))
+	else if (likely(!refcount_dec_and_test(&skb->users)))
 		return false;
 
 	return true;
@@ -1283,7 +1283,7 @@ static inline struct sk_buff *skb_queue_prev(const struct sk_buff_head *list,
  */
 static inline struct sk_buff *skb_get(struct sk_buff *skb)
 {
-	atomic_inc(&skb->users);
+	refcount_inc(&skb->users);
 	return skb;
 }
 
@@ -1384,7 +1384,7 @@ static inline void __skb_header_release(struct sk_buff *skb)
  */
 static inline int skb_shared(const struct sk_buff *skb)
 {
-	return atomic_read(&skb->users) != 1;
+	return refcount_read(&skb->users) != 1;
 }
 
 /**
diff --git a/net/core/datagram.c b/net/core/datagram.c
index e5311a7c70da..95d43543ac91 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -188,7 +188,7 @@ struct sk_buff *__skb_try_recv_from_queue(struct sock *sk,
 				}
 			}
 			*peeked = 1;
-			atomic_inc(&skb->users);
+			refcount_inc(&skb->users);
 		} else {
 			__skb_unlink(skb, queue);
 			if (destructor)
@@ -358,7 +358,7 @@ int __sk_queue_drop_skb(struct sock *sk, struct sk_buff_head *sk_queue,
 		spin_lock_bh(&sk_queue->lock);
 		if (skb == skb_peek(sk_queue)) {
 			__skb_unlink(skb, sk_queue);
-			atomic_dec(&skb->users);
+			refcount_dec(&skb->users);
 			if (destructor)
 				destructor(sk, skb);
 			err = 0;
diff --git a/net/core/dev.c b/net/core/dev.c
index 88927f1a3e4f..b9994898d11b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1862,7 +1862,7 @@ static inline int deliver_skb(struct sk_buff *skb,
 {
 	if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
 		return -ENOMEM;
-	atomic_inc(&skb->users);
+	refcount_inc(&skb->users);
 	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
 }
 
@@ -2484,10 +2484,10 @@ void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason)
 	if (unlikely(!skb))
 		return;
 
-	if (likely(atomic_read(&skb->users) == 1)) {
+	if (likely(refcount_read(&skb->users) == 1)) {
 		smp_rmb();
-		atomic_set(&skb->users, 0);
-	} else if (likely(!atomic_dec_and_test(&skb->users))) {
+		refcount_set(&skb->users, 0);
+	} else if (likely(!refcount_dec_and_test(&skb->users))) {
 		return;
 	}
 	get_kfree_skb_cb(skb)->reason = reason;
@@ -3955,7 +3955,7 @@ static __latent_entropy void net_tx_action(struct softirq_action *h)
 
 			clist = clist->next;
 
-			WARN_ON(atomic_read(&skb->users));
+			WARN_ON(refcount_read(&skb->users));
 			if (likely(get_kfree_skb_cb(skb)->reason == SKB_REASON_CONSUMED))
 				trace_consume_skb(skb);
 			else
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 37c1e34ddd85..a835155c85f9 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -277,7 +277,7 @@ static void zap_completion_queue(void)
 			struct sk_buff *skb = clist;
 			clist = clist->next;
 			if (!skb_irq_freeable(skb)) {
-				atomic_inc(&skb->users);
+				refcount_inc(&skb->users);
 				dev_kfree_skb_any(skb); /* put this one back */
 			} else {
 				__kfree_skb(skb);
@@ -309,7 +309,7 @@ repeat:
 		return NULL;
 	}
 
-	atomic_set(&skb->users, 1);
+	refcount_set(&skb->users, 1);
 	skb_reserve(skb, reserve);
 	return skb;
 }
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 2dd42c5b0366..6e1e10ff433a 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3363,7 +3363,7 @@ static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev)
 {
 	ktime_t idle_start = ktime_get();
 
-	while (atomic_read(&(pkt_dev->skb->users)) != 1) {
+	while (refcount_read(&(pkt_dev->skb->users)) != 1) {
 		if (signal_pending(current))
 			break;
 
@@ -3420,7 +3420,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
 	if (pkt_dev->xmit_mode == M_NETIF_RECEIVE) {
 		skb = pkt_dev->skb;
 		skb->protocol = eth_type_trans(skb, skb->dev);
-		atomic_add(burst, &skb->users);
+		refcount_add(burst, &skb->users);
 		local_bh_disable();
 		do {
 			ret = netif_receive_skb(skb);
@@ -3428,11 +3428,11 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
 				pkt_dev->errors++;
 			pkt_dev->sofar++;
 			pkt_dev->seq_num++;
-			if (atomic_read(&skb->users) != burst) {
+			if (refcount_read(&skb->users) != burst) {
 				/* skb was queued by rps/rfs or taps,
 				 * so cannot reuse this skb
 				 */
-				atomic_sub(burst - 1, &skb->users);
+				WARN_ON(refcount_sub_and_test(burst - 1, &skb->users));
 				/* get out of the loop and wait
 				 * until skb is consumed
 				 */
@@ -3446,7 +3446,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
 		goto out; /* Skips xmit_mode M_START_XMIT */
 	} else if (pkt_dev->xmit_mode == M_QUEUE_XMIT) {
 		local_bh_disable();
-		atomic_inc(&pkt_dev->skb->users);
+		refcount_inc(&pkt_dev->skb->users);
 
 		ret = dev_queue_xmit(pkt_dev->skb);
 		switch (ret) {
@@ -3487,7 +3487,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
 		pkt_dev->last_ok = 0;
 		goto unlock;
 	}
-	atomic_add(burst, &pkt_dev->skb->users);
+	refcount_add(burst, &pkt_dev->skb->users);
 
 xmit_more:
 	ret = netdev_start_xmit(pkt_dev->skb, odev, txq, --burst > 0);
@@ -3513,11 +3513,11 @@ xmit_more:
 		/* fallthru */
 	case NETDEV_TX_BUSY:
 		/* Retry it next time */
-		atomic_dec(&(pkt_dev->skb->users));
+		refcount_dec(&(pkt_dev->skb->users));
 		pkt_dev->last_ok = 0;
 	}
 	if (unlikely(burst))
-		atomic_sub(burst, &pkt_dev->skb->users);
+		WARN_ON(refcount_sub_and_test(burst, &pkt_dev->skb->users));
 unlock:
 	HARD_TX_UNLOCK(odev, txq);
 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index ed51de525a88..d1ba90980be1 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -649,7 +649,7 @@ int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned int g
 
 	NETLINK_CB(skb).dst_group = group;
 	if (echo)
-		atomic_inc(&skb->users);
+		refcount_inc(&skb->users);
 	netlink_broadcast(rtnl, skb, pid, group, GFP_KERNEL);
 	if (echo)
 		err = netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index f75897a33fa4..45dc6620dd74 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -176,7 +176,7 @@ struct sk_buff *__alloc_skb_head(gfp_t gfp_mask, int node)
 	memset(skb, 0, offsetof(struct sk_buff, tail));
 	skb->head = NULL;
 	skb->truesize = sizeof(struct sk_buff);
-	atomic_set(&skb->users, 1);
+	refcount_set(&skb->users, 1);
 
 	skb->mac_header = (typeof(skb->mac_header))~0U;
 out:
@@ -247,7 +247,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 	/* Account for allocated memory : skb + skb->head */
 	skb->truesize = SKB_TRUESIZE(size);
 	skb->pfmemalloc = pfmemalloc;
-	atomic_set(&skb->users, 1);
+	refcount_set(&skb->users, 1);
 	skb->head = data;
 	skb->data = data;
 	skb_reset_tail_pointer(skb);
@@ -314,7 +314,7 @@ struct sk_buff *__build_skb(void *data, unsigned int frag_size)
 
 	memset(skb, 0, offsetof(struct sk_buff, tail));
 	skb->truesize = SKB_TRUESIZE(size);
-	atomic_set(&skb->users, 1);
+	refcount_set(&skb->users, 1);
 	skb->head = data;
 	skb->data = data;
 	skb_reset_tail_pointer(skb);
@@ -915,7 +915,7 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
 	C(head_frag);
 	C(data);
 	C(truesize);
-	atomic_set(&n->users, 1);
+	refcount_set(&n->users, 1);
 
 	atomic_inc(&(skb_shinfo(skb)->dataref));
 	skb->cloned = 1;
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 4fccc0c37fbd..c376af5bfdfb 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -353,7 +353,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	if (ipv6_opt_accepted(sk, skb, IP6CB(skb)) ||
 	    np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
 	    np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
-		atomic_inc(&skb->users);
+		refcount_inc(&skb->users);
 		ireq->pktopts = skb;
 	}
 	ireq->ir_iif = sk->sk_bound_dev_if;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 2f7e99af67db..7b75b0620730 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -194,7 +194,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	if (ipv6_opt_accepted(sk, skb, &TCP_SKB_CB(skb)->header.h6) ||
 	    np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
 	    np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
-		atomic_inc(&skb->users);
+		refcount_inc(&skb->users);
 		ireq->pktopts = skb;
 	}
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index f85cbfc183d6..f1a4881d9835 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -734,7 +734,7 @@ static void tcp_v6_init_req(struct request_sock *req,
 	     np->rxopt.bits.rxinfo ||
 	     np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
 	     np->rxopt.bits.rxohlim || np->repflow)) {
-		atomic_inc(&skb->users);
+		refcount_inc(&skb->users);
 		ireq->pktopts = skb;
 	}
 }
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 376fdcf7a6b9..287964a570e9 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -203,11 +203,11 @@ static int pfkey_broadcast_one(struct sk_buff *skb, struct sk_buff **skb2,
 
 	sock_hold(sk);
 	if (*skb2 == NULL) {
-		if (atomic_read(&skb->users) != 1) {
+		if (refcount_read(&skb->users) != 1) {
 			*skb2 = skb_clone(skb, allocation);
 		} else {
 			*skb2 = skb;
-			atomic_inc(&skb->users);
+			refcount_inc(&skb->users);
 		}
 	}
 	if (*skb2 != NULL) {
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index a88745e4b7df..05030ad1a36c 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1848,7 +1848,7 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 	}
 
 	if (dst_group) {
-		atomic_inc(&skb->users);
+		refcount_inc(&skb->users);
 		netlink_broadcast(sk, skb, dst_portid, dst_group, GFP_KERNEL);
 	}
 	err = netlink_unicast(sk, skb, dst_portid, msg->msg_flags&MSG_DONTWAIT);
@@ -2226,7 +2226,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
 	struct netlink_sock *nlk;
 	int ret;
 
-	atomic_inc(&skb->users);
+	refcount_inc(&skb->users);
 
 	sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid);
 	if (sk == NULL) {
@@ -2431,7 +2431,7 @@ int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 portid,
 		int exclude_portid = 0;
 
 		if (report) {
-			atomic_inc(&skb->users);
+			refcount_inc(&skb->users);
 			exclude_portid = portid;
 		}
 
diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c
index 67b02c45271b..b8985d01876a 100644
--- a/net/rxrpc/skbuff.c
+++ b/net/rxrpc/skbuff.c
@@ -27,7 +27,7 @@ void rxrpc_new_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
 {
 	const void *here = __builtin_return_address(0);
 	int n = atomic_inc_return(select_skb_count(op));
-	trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here);
+	trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here);
 }
 
 /*
@@ -38,7 +38,7 @@ void rxrpc_see_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
 	const void *here = __builtin_return_address(0);
 	if (skb) {
 		int n = atomic_read(select_skb_count(op));
-		trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here);
+		trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here);
 	}
 }
 
@@ -49,7 +49,7 @@ void rxrpc_get_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
 {
 	const void *here = __builtin_return_address(0);
 	int n = atomic_inc_return(select_skb_count(op));
-	trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here);
+	trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here);
 	skb_get(skb);
 }
 
@@ -63,7 +63,7 @@ void rxrpc_free_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
 		int n;
 		CHECK_SLAB_OKAY(&skb->users);
 		n = atomic_dec_return(select_skb_count(op));
-		trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here);
+		trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here);
 		kfree_skb(skb);
 	}
 }
@@ -78,7 +78,7 @@ void rxrpc_lose_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
 		int n;
 		CHECK_SLAB_OKAY(&skb->users);
 		n = atomic_dec_return(select_skb_count(op));
-		trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here);
+		trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here);
 		kfree_skb(skb);
 	}
 }
@@ -93,7 +93,7 @@ void rxrpc_purge_queue(struct sk_buff_head *list)
 	while ((skb = skb_dequeue((list))) != NULL) {
 		int n = atomic_dec_return(select_skb_count(rxrpc_skb_rx_purged));
 		trace_rxrpc_skb(skb, rxrpc_skb_rx_purged,
-				atomic_read(&skb->users), n, here);
+				refcount_read(&skb->users), n, here);
 		kfree_skb(skb);
 	}
 }
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 20299df163b9..e8762702a313 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -1102,7 +1102,7 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
 				 sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)) :
 				 "illegal chunk", ntohl(chunk->subh.data_hdr->tsn),
 				 chunk->skb ? chunk->skb->head : NULL, chunk->skb ?
-				 atomic_read(&chunk->skb->users) : -1);
+				 refcount_read(&chunk->skb->users) : -1);
 
 			/* Add the chunk to the packet.  */
 			status = sctp_packet_transmit_chunk(packet, chunk, 0, gfp);
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 7b6e20eb9451..b497ee8ae279 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -7563,7 +7563,7 @@ struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags,
 		if (flags & MSG_PEEK) {
 			skb = skb_peek(&sk->sk_receive_queue);
 			if (skb)
-				atomic_inc(&skb->users);
+				refcount_inc(&skb->users);
 		} else {
 			skb = __skb_dequeue(&sk->sk_receive_queue);
 		}
-- 
cgit v1.2.3


From 2638595afccf6554bfe55268ff9b2d3ac3dff2e6 Mon Sep 17 00:00:00 2001
From: "Reshetova, Elena" <elena.reshetova@intel.com>
Date: Fri, 30 Jun 2017 13:07:59 +0300
Subject: net: convert sk_buff_fclones.fclone_ref from atomic_t to refcount_t

refcount_t type and corresponding API should be
used instead of atomic_t when the variable is used as
a reference counter. This allows to avoid accidental
refcounter overflows that might lead to use-after-free
situations.

Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
Signed-off-by: Hans Liljestrand <ishkamiel@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David Windsor <dwindsor@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  4 ++--
 net/core/skbuff.c      | 10 +++++-----
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 90cbd86152da..d0b9f3846eab 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -915,7 +915,7 @@ struct sk_buff_fclones {
 
 	struct sk_buff	skb2;
 
-	atomic_t	fclone_ref;
+	refcount_t	fclone_ref;
 };
 
 /**
@@ -935,7 +935,7 @@ static inline bool skb_fclone_busy(const struct sock *sk,
 	fclones = container_of(skb, struct sk_buff_fclones, skb1);
 
 	return skb->fclone == SKB_FCLONE_ORIG &&
-	       atomic_read(&fclones->fclone_ref) > 1 &&
+	       refcount_read(&fclones->fclone_ref) > 1 &&
 	       fclones->skb2.sk == sk;
 }
 
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 45dc6620dd74..659dfc0494c5 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -268,7 +268,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 
 		kmemcheck_annotate_bitfield(&fclones->skb2, flags1);
 		skb->fclone = SKB_FCLONE_ORIG;
-		atomic_set(&fclones->fclone_ref, 1);
+		refcount_set(&fclones->fclone_ref, 1);
 
 		fclones->skb2.fclone = SKB_FCLONE_CLONE;
 	}
@@ -629,7 +629,7 @@ static void kfree_skbmem(struct sk_buff *skb)
 		 * This test would have no chance to be true for the clone,
 		 * while here, branch prediction will be good.
 		 */
-		if (atomic_read(&fclones->fclone_ref) == 1)
+		if (refcount_read(&fclones->fclone_ref) == 1)
 			goto fastpath;
 		break;
 
@@ -637,7 +637,7 @@ static void kfree_skbmem(struct sk_buff *skb)
 		fclones = container_of(skb, struct sk_buff_fclones, skb2);
 		break;
 	}
-	if (!atomic_dec_and_test(&fclones->fclone_ref))
+	if (!refcount_dec_and_test(&fclones->fclone_ref))
 		return;
 fastpath:
 	kmem_cache_free(skbuff_fclone_cache, fclones);
@@ -1027,9 +1027,9 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
 		return NULL;
 
 	if (skb->fclone == SKB_FCLONE_ORIG &&
-	    atomic_read(&fclones->fclone_ref) == 1) {
+	    refcount_read(&fclones->fclone_ref) == 1) {
 		n = &fclones->skb2;
-		atomic_set(&fclones->fclone_ref, 2);
+		refcount_set(&fclones->fclone_ref, 2);
 	} else {
 		if (skb_pfmemalloc(skb))
 			gfp_mask |= __GFP_MEMALLOC;
-- 
cgit v1.2.3


From 14afee4b6092fde451ee17604e5f5c89da33e71e Mon Sep 17 00:00:00 2001
From: "Reshetova, Elena" <elena.reshetova@intel.com>
Date: Fri, 30 Jun 2017 13:08:00 +0300
Subject: net: convert sock.sk_wmem_alloc from atomic_t to refcount_t

refcount_t type and corresponding API should be
used instead of atomic_t when the variable is used as
a reference counter. This allows to avoid accidental
refcounter overflows that might lead to use-after-free
situations.

Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
Signed-off-by: Hans Liljestrand <ishkamiel@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David Windsor <dwindsor@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/atm/fore200e.c   | 12 +-----------
 drivers/atm/he.c         |  2 +-
 drivers/atm/idt77252.c   |  4 ++--
 include/linux/atmdev.h   |  2 +-
 include/net/sock.h       |  8 ++++----
 net/atm/br2684.c         |  2 +-
 net/atm/clip.c           |  2 +-
 net/atm/common.c         | 10 +++++-----
 net/atm/lec.c            |  4 ++--
 net/atm/mpc.c            |  4 ++--
 net/atm/pppoatm.c        |  2 +-
 net/atm/raw.c            |  2 +-
 net/atm/signaling.c      |  2 +-
 net/caif/caif_socket.c   |  2 +-
 net/core/datagram.c      |  2 +-
 net/core/skbuff.c        |  2 +-
 net/core/sock.c          | 26 +++++++++++++-------------
 net/ipv4/af_inet.c       |  2 +-
 net/ipv4/esp4.c          |  2 +-
 net/ipv4/ip_output.c     |  6 +++---
 net/ipv4/tcp.c           |  4 ++--
 net/ipv4/tcp_offload.c   |  2 +-
 net/ipv4/tcp_output.c    | 15 +++++++--------
 net/ipv6/esp6.c          |  2 +-
 net/ipv6/ip6_output.c    |  4 ++--
 net/kcm/kcmproc.c        |  2 +-
 net/key/af_key.c         |  2 +-
 net/netlink/af_netlink.c |  2 +-
 net/packet/af_packet.c   |  4 ++--
 net/phonet/socket.c      |  2 +-
 net/rds/tcp_send.c       |  2 +-
 net/rxrpc/af_rxrpc.c     |  4 ++--
 net/sched/sch_atm.c      |  2 +-
 net/sctp/output.c        |  2 +-
 net/sctp/proc.c          |  2 +-
 net/sctp/socket.c        |  4 ++--
 net/unix/af_unix.c       |  6 +++---
 37 files changed, 74 insertions(+), 85 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c
index 7584ae1ded85..f0433adcd8fc 100644
--- a/drivers/atm/fore200e.c
+++ b/drivers/atm/fore200e.c
@@ -924,12 +924,7 @@ fore200e_tx_irq(struct fore200e* fore200e)
 		else {
 		    dev_kfree_skb_any(entry->skb);
 		}
-#if 1
-		/* race fixed by the above incarnation mechanism, but... */
-		if (atomic_read(&sk_atm(vcc)->sk_wmem_alloc) < 0) {
-		    atomic_set(&sk_atm(vcc)->sk_wmem_alloc, 0);
-		}
-#endif
+
 		/* check error condition */
 		if (*entry->status & STATUS_ERROR)
 		    atomic_inc(&vcc->stats->tx_err);
@@ -1130,13 +1125,9 @@ fore200e_push_rpd(struct fore200e* fore200e, struct atm_vcc* vcc, struct rpd* rp
 	return -ENOMEM;
     }
 
-    ASSERT(atomic_read(&sk_atm(vcc)->sk_wmem_alloc) >= 0);
-
     vcc->push(vcc, skb);
     atomic_inc(&vcc->stats->rx);
 
-    ASSERT(atomic_read(&sk_atm(vcc)->sk_wmem_alloc) >= 0);
-
     return 0;
 }
 
@@ -1572,7 +1563,6 @@ fore200e_send(struct atm_vcc *vcc, struct sk_buff *skb)
     unsigned long           flags;
 
     ASSERT(vcc);
-    ASSERT(atomic_read(&sk_atm(vcc)->sk_wmem_alloc) >= 0);
     ASSERT(fore200e);
     ASSERT(fore200e_vcc);
 
diff --git a/drivers/atm/he.c b/drivers/atm/he.c
index 461da2bce8ef..37ee21c5a5ca 100644
--- a/drivers/atm/he.c
+++ b/drivers/atm/he.c
@@ -2395,7 +2395,7 @@ he_close(struct atm_vcc *vcc)
 		 * TBRQ, the host issues the close command to the adapter.
 		 */
 
-		while (((tx_inuse = atomic_read(&sk_atm(vcc)->sk_wmem_alloc)) > 1) &&
+		while (((tx_inuse = refcount_read(&sk_atm(vcc)->sk_wmem_alloc)) > 1) &&
 		       (retry < MAX_RETRY)) {
 			msleep(sleep);
 			if (sleep < 250)
diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c
index 4e64de380bda..60bacba03d17 100644
--- a/drivers/atm/idt77252.c
+++ b/drivers/atm/idt77252.c
@@ -724,7 +724,7 @@ push_on_scq(struct idt77252_dev *card, struct vc_map *vc, struct sk_buff *skb)
 		struct sock *sk = sk_atm(vcc);
 
 		vc->estimator->cells += (skb->len + 47) / 48;
-		if (atomic_read(&sk->sk_wmem_alloc) >
+		if (refcount_read(&sk->sk_wmem_alloc) >
 		    (sk->sk_sndbuf >> 1)) {
 			u32 cps = vc->estimator->maxcps;
 
@@ -2009,7 +2009,7 @@ idt77252_send_oam(struct atm_vcc *vcc, void *cell, int flags)
 		atomic_inc(&vcc->stats->tx_err);
 		return -ENOMEM;
 	}
-	atomic_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc);
+	refcount_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc);
 
 	skb_put_data(skb, cell, 52);
 
diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h
index c1da539f5e28..4d97a89da066 100644
--- a/include/linux/atmdev.h
+++ b/include/linux/atmdev.h
@@ -254,7 +254,7 @@ static inline void atm_return(struct atm_vcc *vcc,int truesize)
 
 static inline int atm_may_send(struct atm_vcc *vcc,unsigned int size)
 {
-	return (size + atomic_read(&sk_atm(vcc)->sk_wmem_alloc)) <
+	return (size + refcount_read(&sk_atm(vcc)->sk_wmem_alloc)) <
 	       sk_atm(vcc)->sk_sndbuf;
 }
 
diff --git a/include/net/sock.h b/include/net/sock.h
index 00d09140e354..5284e50fc81a 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -390,7 +390,7 @@ struct sock {
 
 	/* ===== cache line for TX ===== */
 	int			sk_wmem_queued;
-	atomic_t		sk_wmem_alloc;
+	refcount_t		sk_wmem_alloc;
 	unsigned long		sk_tsq_flags;
 	struct sk_buff		*sk_send_head;
 	struct sk_buff_head	sk_write_queue;
@@ -1911,7 +1911,7 @@ static inline int skb_copy_to_page_nocache(struct sock *sk, struct iov_iter *fro
  */
 static inline int sk_wmem_alloc_get(const struct sock *sk)
 {
-	return atomic_read(&sk->sk_wmem_alloc) - 1;
+	return refcount_read(&sk->sk_wmem_alloc) - 1;
 }
 
 /**
@@ -2055,7 +2055,7 @@ static inline unsigned long sock_wspace(struct sock *sk)
 	int amt = 0;
 
 	if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
-		amt = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc);
+		amt = sk->sk_sndbuf - refcount_read(&sk->sk_wmem_alloc);
 		if (amt < 0)
 			amt = 0;
 	}
@@ -2136,7 +2136,7 @@ bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag);
  */
 static inline bool sock_writeable(const struct sock *sk)
 {
-	return atomic_read(&sk->sk_wmem_alloc) < (sk->sk_sndbuf >> 1);
+	return refcount_read(&sk->sk_wmem_alloc) < (sk->sk_sndbuf >> 1);
 }
 
 static inline gfp_t gfp_any(void)
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index fca84e111c89..4e111196f902 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -252,7 +252,7 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct net_device *dev,
 
 	ATM_SKB(skb)->vcc = atmvcc = brvcc->atmvcc;
 	pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, atmvcc, atmvcc->dev);
-	atomic_add(skb->truesize, &sk_atm(atmvcc)->sk_wmem_alloc);
+	refcount_add(skb->truesize, &sk_atm(atmvcc)->sk_wmem_alloc);
 	ATM_SKB(skb)->atm_options = atmvcc->atm_options;
 	dev->stats.tx_packets++;
 	dev->stats.tx_bytes += skb->len;
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 47c36f449df0..f271a7bcf5b2 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -381,7 +381,7 @@ static netdev_tx_t clip_start_xmit(struct sk_buff *skb,
 		memcpy(here, llc_oui, sizeof(llc_oui));
 		((__be16 *) here)[3] = skb->protocol;
 	}
-	atomic_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc);
+	refcount_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc);
 	ATM_SKB(skb)->atm_options = vcc->atm_options;
 	entry->vccs->last_use = jiffies;
 	pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, vcc, vcc->dev);
diff --git a/net/atm/common.c b/net/atm/common.c
index f06422f4108d..8a4f99114cd2 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -80,9 +80,9 @@ static void vcc_sock_destruct(struct sock *sk)
 		printk(KERN_DEBUG "%s: rmem leakage (%d bytes) detected.\n",
 		       __func__, atomic_read(&sk->sk_rmem_alloc));
 
-	if (atomic_read(&sk->sk_wmem_alloc))
+	if (refcount_read(&sk->sk_wmem_alloc))
 		printk(KERN_DEBUG "%s: wmem leakage (%d bytes) detected.\n",
-		       __func__, atomic_read(&sk->sk_wmem_alloc));
+		       __func__, refcount_read(&sk->sk_wmem_alloc));
 }
 
 static void vcc_def_wakeup(struct sock *sk)
@@ -101,7 +101,7 @@ static inline int vcc_writable(struct sock *sk)
 	struct atm_vcc *vcc = atm_sk(sk);
 
 	return (vcc->qos.txtp.max_sdu +
-		atomic_read(&sk->sk_wmem_alloc)) <= sk->sk_sndbuf;
+		refcount_read(&sk->sk_wmem_alloc)) <= sk->sk_sndbuf;
 }
 
 static void vcc_write_space(struct sock *sk)
@@ -156,7 +156,7 @@ int vcc_create(struct net *net, struct socket *sock, int protocol, int family, i
 	memset(&vcc->local, 0, sizeof(struct sockaddr_atmsvc));
 	memset(&vcc->remote, 0, sizeof(struct sockaddr_atmsvc));
 	vcc->qos.txtp.max_sdu = 1 << 16; /* for meta VCs */
-	atomic_set(&sk->sk_wmem_alloc, 1);
+	refcount_set(&sk->sk_wmem_alloc, 1);
 	atomic_set(&sk->sk_rmem_alloc, 0);
 	vcc->push = NULL;
 	vcc->pop = NULL;
@@ -630,7 +630,7 @@ int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t size)
 		goto out;
 	}
 	pr_debug("%d += %d\n", sk_wmem_alloc_get(sk), skb->truesize);
-	atomic_add(skb->truesize, &sk->sk_wmem_alloc);
+	refcount_add(skb->truesize, &sk->sk_wmem_alloc);
 
 	skb->dev = NULL; /* for paths shared with net_device interfaces */
 	ATM_SKB(skb)->atm_options = vcc->atm_options;
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 09cfe87f0a44..75545717fa46 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -181,7 +181,7 @@ lec_send(struct atm_vcc *vcc, struct sk_buff *skb)
 	ATM_SKB(skb)->vcc = vcc;
 	ATM_SKB(skb)->atm_options = vcc->atm_options;
 
-	atomic_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc);
+	refcount_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc);
 	if (vcc->send(vcc, skb) < 0) {
 		dev->stats.tx_dropped++;
 		return;
@@ -345,7 +345,7 @@ static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb)
 	int i;
 	char *tmp;		/* FIXME */
 
-	atomic_sub(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc);
+	WARN_ON(refcount_sub_and_test(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc));
 	mesg = (struct atmlec_msg *)skb->data;
 	tmp = skb->data;
 	tmp += sizeof(struct atmlec_msg);
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index a190800572bd..680a4b9095a1 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -555,7 +555,7 @@ static int send_via_shortcut(struct sk_buff *skb, struct mpoa_client *mpc)
 					sizeof(struct llc_snap_hdr));
 	}
 
-	atomic_add(skb->truesize, &sk_atm(entry->shortcut)->sk_wmem_alloc);
+	refcount_add(skb->truesize, &sk_atm(entry->shortcut)->sk_wmem_alloc);
 	ATM_SKB(skb)->atm_options = entry->shortcut->atm_options;
 	entry->shortcut->send(entry->shortcut, skb);
 	entry->packets_fwded++;
@@ -911,7 +911,7 @@ static int msg_from_mpoad(struct atm_vcc *vcc, struct sk_buff *skb)
 
 	struct mpoa_client *mpc = find_mpc_by_vcc(vcc);
 	struct k_message *mesg = (struct k_message *)skb->data;
-	atomic_sub(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc);
+	WARN_ON(refcount_sub_and_test(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc));
 
 	if (mpc == NULL) {
 		pr_info("no mpc found\n");
diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c
index c4e09846d1de..21d9d341a619 100644
--- a/net/atm/pppoatm.c
+++ b/net/atm/pppoatm.c
@@ -350,7 +350,7 @@ static int pppoatm_send(struct ppp_channel *chan, struct sk_buff *skb)
 		return 1;
 	}
 
-	atomic_add(skb->truesize, &sk_atm(ATM_SKB(skb)->vcc)->sk_wmem_alloc);
+	refcount_add(skb->truesize, &sk_atm(ATM_SKB(skb)->vcc)->sk_wmem_alloc);
 	ATM_SKB(skb)->atm_options = ATM_SKB(skb)->vcc->atm_options;
 	pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n",
 		 skb, ATM_SKB(skb)->vcc, ATM_SKB(skb)->vcc->dev);
diff --git a/net/atm/raw.c b/net/atm/raw.c
index 2e17e97a7a8b..821c0797553d 100644
--- a/net/atm/raw.c
+++ b/net/atm/raw.c
@@ -35,7 +35,7 @@ static void atm_pop_raw(struct atm_vcc *vcc, struct sk_buff *skb)
 
 	pr_debug("(%d) %d -= %d\n",
 		 vcc->vci, sk_wmem_alloc_get(sk), skb->truesize);
-	atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
+	WARN_ON(refcount_sub_and_test(skb->truesize, &sk->sk_wmem_alloc));
 	dev_kfree_skb_any(skb);
 	sk->sk_write_space(sk);
 }
diff --git a/net/atm/signaling.c b/net/atm/signaling.c
index f640a99e14b8..983c3a21a133 100644
--- a/net/atm/signaling.c
+++ b/net/atm/signaling.c
@@ -67,7 +67,7 @@ static int sigd_send(struct atm_vcc *vcc, struct sk_buff *skb)
 	struct sock *sk;
 
 	msg = (struct atmsvc_msg *) skb->data;
-	atomic_sub(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc);
+	WARN_ON(refcount_sub_and_test(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc));
 	vcc = *(struct atm_vcc **) &msg->vcc;
 	pr_debug("%d (0x%lx)\n", (int)msg->type, (unsigned long)vcc);
 	sk = sk_atm(vcc);
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 7506b853a84d..632d5a416d97 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -1013,7 +1013,7 @@ static const struct proto_ops caif_stream_ops = {
 static void caif_sock_destructor(struct sock *sk)
 {
 	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
-	caif_assert(!atomic_read(&sk->sk_wmem_alloc));
+	caif_assert(!refcount_read(&sk->sk_wmem_alloc));
 	caif_assert(sk_unhashed(sk));
 	caif_assert(!sk->sk_socket);
 	if (!sock_flag(sk, SOCK_DEAD)) {
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 95d43543ac91..454ec8923333 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -614,7 +614,7 @@ int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from)
 		skb->data_len += copied;
 		skb->len += copied;
 		skb->truesize += truesize;
-		atomic_add(truesize, &skb->sk->sk_wmem_alloc);
+		refcount_add(truesize, &skb->sk->sk_wmem_alloc);
 		while (copied) {
 			int size = min_t(int, copied, PAGE_SIZE - start);
 			skb_fill_page_desc(skb, frag++, pages[n], start, size);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 659dfc0494c5..c267713cd383 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3024,7 +3024,7 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
 		get_page(pfrag->page);
 
 		skb->truesize += copy;
-		atomic_add(copy, &sk->sk_wmem_alloc);
+		refcount_add(copy, &sk->sk_wmem_alloc);
 		skb->len += copy;
 		skb->data_len += copy;
 		offset += copy;
diff --git a/net/core/sock.c b/net/core/sock.c
index 6f4b090241c1..0866d59489cb 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1528,7 +1528,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
 		if (likely(sk->sk_net_refcnt))
 			get_net(net);
 		sock_net_set(sk, net);
-		atomic_set(&sk->sk_wmem_alloc, 1);
+		refcount_set(&sk->sk_wmem_alloc, 1);
 
 		mem_cgroup_sk_alloc(sk);
 		cgroup_sk_alloc(&sk->sk_cgrp_data);
@@ -1552,7 +1552,7 @@ static void __sk_destruct(struct rcu_head *head)
 		sk->sk_destruct(sk);
 
 	filter = rcu_dereference_check(sk->sk_filter,
-				       atomic_read(&sk->sk_wmem_alloc) == 0);
+				       refcount_read(&sk->sk_wmem_alloc) == 0);
 	if (filter) {
 		sk_filter_uncharge(sk, filter);
 		RCU_INIT_POINTER(sk->sk_filter, NULL);
@@ -1602,7 +1602,7 @@ void sk_free(struct sock *sk)
 	 * some packets are still in some tx queue.
 	 * If not null, sock_wfree() will call __sk_free(sk) later
 	 */
-	if (atomic_dec_and_test(&sk->sk_wmem_alloc))
+	if (refcount_dec_and_test(&sk->sk_wmem_alloc))
 		__sk_free(sk);
 }
 EXPORT_SYMBOL(sk_free);
@@ -1659,7 +1659,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
 		/*
 		 * sk_wmem_alloc set to one (see sk_free() and sock_wfree())
 		 */
-		atomic_set(&newsk->sk_wmem_alloc, 1);
+		refcount_set(&newsk->sk_wmem_alloc, 1);
 		atomic_set(&newsk->sk_omem_alloc, 0);
 		sk_init_common(newsk);
 
@@ -1787,7 +1787,7 @@ void sock_wfree(struct sk_buff *skb)
 		 * Keep a reference on sk_wmem_alloc, this will be released
 		 * after sk_write_space() call
 		 */
-		atomic_sub(len - 1, &sk->sk_wmem_alloc);
+		WARN_ON(refcount_sub_and_test(len - 1, &sk->sk_wmem_alloc));
 		sk->sk_write_space(sk);
 		len = 1;
 	}
@@ -1795,7 +1795,7 @@ void sock_wfree(struct sk_buff *skb)
 	 * if sk_wmem_alloc reaches 0, we must finish what sk_free()
 	 * could not do because of in-flight packets
 	 */
-	if (atomic_sub_and_test(len, &sk->sk_wmem_alloc))
+	if (refcount_sub_and_test(len, &sk->sk_wmem_alloc))
 		__sk_free(sk);
 }
 EXPORT_SYMBOL(sock_wfree);
@@ -1807,7 +1807,7 @@ void __sock_wfree(struct sk_buff *skb)
 {
 	struct sock *sk = skb->sk;
 
-	if (atomic_sub_and_test(skb->truesize, &sk->sk_wmem_alloc))
+	if (refcount_sub_and_test(skb->truesize, &sk->sk_wmem_alloc))
 		__sk_free(sk);
 }
 
@@ -1829,7 +1829,7 @@ void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
 	 * is enough to guarantee sk_free() wont free this sock until
 	 * all in-flight packets are completed
 	 */
-	atomic_add(skb->truesize, &sk->sk_wmem_alloc);
+	refcount_add(skb->truesize, &sk->sk_wmem_alloc);
 }
 EXPORT_SYMBOL(skb_set_owner_w);
 
@@ -1852,7 +1852,7 @@ void skb_orphan_partial(struct sk_buff *skb)
 		struct sock *sk = skb->sk;
 
 		if (atomic_inc_not_zero(&sk->sk_refcnt)) {
-			atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
+			WARN_ON(refcount_sub_and_test(skb->truesize, &sk->sk_wmem_alloc));
 			skb->destructor = sock_efree;
 		}
 	} else {
@@ -1912,7 +1912,7 @@ EXPORT_SYMBOL(sock_i_ino);
 struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
 			     gfp_t priority)
 {
-	if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
+	if (force || refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
 		struct sk_buff *skb = alloc_skb(size, priority);
 		if (skb) {
 			skb_set_owner_w(skb, sk);
@@ -1987,7 +1987,7 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo)
 			break;
 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
-		if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
+		if (refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
 			break;
 		if (sk->sk_shutdown & SEND_SHUTDOWN)
 			break;
@@ -2310,7 +2310,7 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
 		if (sk->sk_type == SOCK_STREAM) {
 			if (sk->sk_wmem_queued < prot->sysctl_wmem[0])
 				return 1;
-		} else if (atomic_read(&sk->sk_wmem_alloc) <
+		} else if (refcount_read(&sk->sk_wmem_alloc) <
 			   prot->sysctl_wmem[0])
 				return 1;
 	}
@@ -2577,7 +2577,7 @@ static void sock_def_write_space(struct sock *sk)
 	/* Do not wake up a writer until he can make "significant"
 	 * progress.  --DaveM
 	 */
-	if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
+	if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
 		wq = rcu_dereference(sk->sk_wq);
 		if (skwq_has_sleeper(wq))
 			wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 58925b6597de..76c2077c3f5b 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -150,7 +150,7 @@ void inet_sock_destruct(struct sock *sk)
 	}
 
 	WARN_ON(atomic_read(&sk->sk_rmem_alloc));
-	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
+	WARN_ON(refcount_read(&sk->sk_wmem_alloc));
 	WARN_ON(sk->sk_wmem_queued);
 	WARN_ON(sk->sk_forward_alloc);
 
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 1f18b4650253..0cbee0a666ff 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -307,7 +307,7 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
 			skb->data_len += tailen;
 			skb->truesize += tailen;
 			if (sk)
-				atomic_add(tailen, &sk->sk_wmem_alloc);
+				refcount_add(tailen, &sk->sk_wmem_alloc);
 
 			goto out;
 		}
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 532b36e9ce2a..2e61e2af251a 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1037,7 +1037,7 @@ alloc_new_skb:
 						(flags & MSG_DONTWAIT), &err);
 			} else {
 				skb = NULL;
-				if (atomic_read(&sk->sk_wmem_alloc) <=
+				if (refcount_read(&sk->sk_wmem_alloc) <=
 				    2 * sk->sk_sndbuf)
 					skb = sock_wmalloc(sk,
 							   alloclen + hh_len + 15, 1,
@@ -1145,7 +1145,7 @@ alloc_new_skb:
 			skb->len += copy;
 			skb->data_len += copy;
 			skb->truesize += copy;
-			atomic_add(copy, &sk->sk_wmem_alloc);
+			refcount_add(copy, &sk->sk_wmem_alloc);
 		}
 		offset += copy;
 		length -= copy;
@@ -1369,7 +1369,7 @@ ssize_t	ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
 		skb->len += len;
 		skb->data_len += len;
 		skb->truesize += len;
-		atomic_add(len, &sk->sk_wmem_alloc);
+		refcount_add(len, &sk->sk_wmem_alloc);
 		offset += len;
 		size -= len;
 	}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 4793fb78d93b..fae45e402742 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -664,7 +664,7 @@ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb,
 	return skb->len < size_goal &&
 	       sysctl_tcp_autocorking &&
 	       skb != tcp_write_queue_head(sk) &&
-	       atomic_read(&sk->sk_wmem_alloc) > skb->truesize;
+	       refcount_read(&sk->sk_wmem_alloc) > skb->truesize;
 }
 
 static void tcp_push(struct sock *sk, int flags, int mss_now,
@@ -692,7 +692,7 @@ static void tcp_push(struct sock *sk, int flags, int mss_now,
 		/* It is possible TX completion already happened
 		 * before we set TSQ_THROTTLED.
 		 */
-		if (atomic_read(&sk->sk_wmem_alloc) > skb->truesize)
+		if (refcount_read(&sk->sk_wmem_alloc) > skb->truesize)
 			return;
 	}
 
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index bc68da38ea86..11f69bbf9307 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -152,7 +152,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
 		swap(gso_skb->sk, skb->sk);
 		swap(gso_skb->destructor, skb->destructor);
 		sum_truesize += skb->truesize;
-		atomic_add(sum_truesize - gso_skb->truesize,
+		refcount_add(sum_truesize - gso_skb->truesize,
 			   &skb->sk->sk_wmem_alloc);
 	}
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 9a9c395b6235..1d79137f3795 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -861,12 +861,11 @@ void tcp_wfree(struct sk_buff *skb)
 	struct sock *sk = skb->sk;
 	struct tcp_sock *tp = tcp_sk(sk);
 	unsigned long flags, nval, oval;
-	int wmem;
 
 	/* Keep one reference on sk_wmem_alloc.
 	 * Will be released by sk_free() from here or tcp_tasklet_func()
 	 */
-	wmem = atomic_sub_return(skb->truesize - 1, &sk->sk_wmem_alloc);
+	WARN_ON(refcount_sub_and_test(skb->truesize - 1, &sk->sk_wmem_alloc));
 
 	/* If this softirq is serviced by ksoftirqd, we are likely under stress.
 	 * Wait until our queues (qdisc + devices) are drained.
@@ -875,7 +874,7 @@ void tcp_wfree(struct sk_buff *skb)
 	 * - chance for incoming ACK (processed by another cpu maybe)
 	 *   to migrate this flow (skb->ooo_okay will be eventually set)
 	 */
-	if (wmem >= SKB_TRUESIZE(1) && this_cpu_ksoftirqd() == current)
+	if (refcount_read(&sk->sk_wmem_alloc) >= SKB_TRUESIZE(1) && this_cpu_ksoftirqd() == current)
 		goto out;
 
 	for (oval = READ_ONCE(sk->sk_tsq_flags);; oval = nval) {
@@ -925,7 +924,7 @@ enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer)
 		if (nval != oval)
 			continue;
 
-		if (!atomic_inc_not_zero(&sk->sk_wmem_alloc))
+		if (!refcount_inc_not_zero(&sk->sk_wmem_alloc))
 			break;
 		/* queue this socket to tasklet queue */
 		tsq = this_cpu_ptr(&tsq_tasklet);
@@ -1045,7 +1044,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	skb->sk = sk;
 	skb->destructor = skb_is_tcp_pure_ack(skb) ? __sock_wfree : tcp_wfree;
 	skb_set_hash_from_sk(skb, sk);
-	atomic_add(skb->truesize, &sk->sk_wmem_alloc);
+	refcount_add(skb->truesize, &sk->sk_wmem_alloc);
 
 	skb_set_dst_pending_confirm(skb, sk->sk_dst_pending_confirm);
 
@@ -2176,7 +2175,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
 	limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes);
 	limit <<= factor;
 
-	if (atomic_read(&sk->sk_wmem_alloc) > limit) {
+	if (refcount_read(&sk->sk_wmem_alloc) > limit) {
 		/* Always send the 1st or 2nd skb in write queue.
 		 * No need to wait for TX completion to call us back,
 		 * after softirq/tasklet schedule.
@@ -2192,7 +2191,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
 		 * test again the condition.
 		 */
 		smp_mb__after_atomic();
-		if (atomic_read(&sk->sk_wmem_alloc) > limit)
+		if (refcount_read(&sk->sk_wmem_alloc) > limit)
 			return true;
 	}
 	return false;
@@ -2812,7 +2811,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
 	/* Do not sent more than we queued. 1/4 is reserved for possible
 	 * copying overhead: fragmentation, tunneling, mangling etc.
 	 */
-	if (atomic_read(&sk->sk_wmem_alloc) >
+	if (refcount_read(&sk->sk_wmem_alloc) >
 	    min_t(u32, sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2),
 		  sk->sk_sndbuf))
 		return -EAGAIN;
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 71faffdd55d9..9ed35473dcb5 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -275,7 +275,7 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
 			skb->data_len += tailen;
 			skb->truesize += tailen;
 			if (sk)
-				atomic_add(tailen, &sk->sk_wmem_alloc);
+				refcount_add(tailen, &sk->sk_wmem_alloc);
 
 			goto out;
 		}
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 5f2657d98e82..1422d6c08377 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1472,7 +1472,7 @@ alloc_new_skb:
 						(flags & MSG_DONTWAIT), &err);
 			} else {
 				skb = NULL;
-				if (atomic_read(&sk->sk_wmem_alloc) <=
+				if (refcount_read(&sk->sk_wmem_alloc) <=
 				    2 * sk->sk_sndbuf)
 					skb = sock_wmalloc(sk,
 							   alloclen + hh_len, 1,
@@ -1581,7 +1581,7 @@ alloc_new_skb:
 			skb->len += copy;
 			skb->data_len += copy;
 			skb->truesize += copy;
-			atomic_add(copy, &sk->sk_wmem_alloc);
+			refcount_add(copy, &sk->sk_wmem_alloc);
 		}
 		offset += copy;
 		length -= copy;
diff --git a/net/kcm/kcmproc.c b/net/kcm/kcmproc.c
index bf75c9231cca..c343ac60bf50 100644
--- a/net/kcm/kcmproc.c
+++ b/net/kcm/kcmproc.c
@@ -162,7 +162,7 @@ static void kcm_format_psock(struct kcm_psock *psock, struct seq_file *seq,
 		   psock->sk->sk_receive_queue.qlen,
 		   atomic_read(&psock->sk->sk_rmem_alloc),
 		   psock->sk->sk_write_queue.qlen,
-		   atomic_read(&psock->sk->sk_wmem_alloc));
+		   refcount_read(&psock->sk->sk_wmem_alloc));
 
 	if (psock->done)
 		seq_puts(seq, "Done ");
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 287964a570e9..e466579c18fa 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -109,7 +109,7 @@ static void pfkey_sock_destruct(struct sock *sk)
 	}
 
 	WARN_ON(atomic_read(&sk->sk_rmem_alloc));
-	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
+	WARN_ON(refcount_read(&sk->sk_wmem_alloc));
 
 	atomic_dec(&net_pfkey->socks_nr);
 }
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 05030ad1a36c..8ced52e91181 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -372,7 +372,7 @@ static void netlink_sock_destruct(struct sock *sk)
 	}
 
 	WARN_ON(atomic_read(&sk->sk_rmem_alloc));
-	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
+	WARN_ON(refcount_read(&sk->sk_wmem_alloc));
 	WARN_ON(nlk_sk(sk)->groups);
 }
 
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index f9349a495caf..90fd38d5c458 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1317,7 +1317,7 @@ static void packet_sock_destruct(struct sock *sk)
 	skb_queue_purge(&sk->sk_error_queue);
 
 	WARN_ON(atomic_read(&sk->sk_rmem_alloc));
-	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
+	WARN_ON(refcount_read(&sk->sk_wmem_alloc));
 
 	if (!sock_flag(sk, SOCK_DEAD)) {
 		pr_err("Attempt to release alive packet socket: %p\n", sk);
@@ -2523,7 +2523,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
 	skb->data_len = to_write;
 	skb->len += to_write;
 	skb->truesize += to_write;
-	atomic_add(to_write, &po->sk.sk_wmem_alloc);
+	refcount_add(to_write, &po->sk.sk_wmem_alloc);
 
 	while (likely(to_write)) {
 		nr_frags = skb_shinfo(skb)->nr_frags;
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index 64634e3ec2fc..29c7f754c70d 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -360,7 +360,7 @@ static unsigned int pn_socket_poll(struct file *file, struct socket *sock,
 		return POLLHUP;
 
 	if (sk->sk_state == TCP_ESTABLISHED &&
-		atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf &&
+		refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf &&
 		atomic_read(&pn->tx_credits))
 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
 
diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c
index 52d11d7725c8..0d8616aa5bad 100644
--- a/net/rds/tcp_send.c
+++ b/net/rds/tcp_send.c
@@ -202,7 +202,7 @@ void rds_tcp_write_space(struct sock *sk)
 	tc->t_last_seen_una = rds_tcp_snd_una(tc);
 	rds_send_path_drop_acked(cp, rds_tcp_snd_una(tc), rds_tcp_is_acked);
 
-	if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf)
+	if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf)
 		queue_delayed_work(rds_wq, &cp->cp_send_w, 0);
 
 out:
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 58ae0db52ea1..f1299f54627a 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -53,7 +53,7 @@ static void rxrpc_sock_destructor(struct sock *);
  */
 static inline int rxrpc_writable(struct sock *sk)
 {
-	return atomic_read(&sk->sk_wmem_alloc) < (size_t) sk->sk_sndbuf;
+	return refcount_read(&sk->sk_wmem_alloc) < (size_t) sk->sk_sndbuf;
 }
 
 /*
@@ -730,7 +730,7 @@ static void rxrpc_sock_destructor(struct sock *sk)
 
 	rxrpc_purge_queue(&sk->sk_receive_queue);
 
-	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
+	WARN_ON(refcount_read(&sk->sk_wmem_alloc));
 	WARN_ON(!sk_unhashed(sk));
 	WARN_ON(sk->sk_socket);
 
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index de162592eee0..572fe2584e48 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -498,7 +498,7 @@ static void sch_atm_dequeue(unsigned long data)
 			ATM_SKB(skb)->vcc = flow->vcc;
 			memcpy(skb_push(skb, flow->hdr_len), flow->hdr,
 			       flow->hdr_len);
-			atomic_add(skb->truesize,
+			refcount_add(skb->truesize,
 				   &sk_atm(flow->vcc)->sk_wmem_alloc);
 			/* atm.atm_options are already set by atm_tc_enqueue */
 			flow->vcc->send(flow->vcc, skb);
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 89cee1482d35..9bf9d84a96b7 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -402,7 +402,7 @@ static void sctp_packet_set_owner_w(struct sk_buff *skb, struct sock *sk)
 	 * therefore only reserve a single byte to keep socket around until
 	 * the packet has been transmitted.
 	 */
-	atomic_inc(&sk->sk_wmem_alloc);
+	refcount_inc(&sk->sk_wmem_alloc);
 }
 
 static int sctp_packet_pack(struct sctp_packet *packet,
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index 8e34db56bc1d..26b4be6b4172 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -363,7 +363,7 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
 		assoc->stream.outcnt, assoc->max_retrans,
 		assoc->init_retries, assoc->shutdown_retries,
 		assoc->rtx_data_chunks,
-		atomic_read(&sk->sk_wmem_alloc),
+		refcount_read(&sk->sk_wmem_alloc),
 		sk->sk_wmem_queued,
 		sk->sk_sndbuf,
 		sk->sk_rcvbuf);
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index b497ee8ae279..15401d09efc4 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -164,7 +164,7 @@ static inline void sctp_set_owner_w(struct sctp_chunk *chunk)
 				sizeof(struct sk_buff) +
 				sizeof(struct sctp_chunk);
 
-	atomic_add(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc);
+	refcount_add(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc);
 	sk->sk_wmem_queued += chunk->skb->truesize;
 	sk_mem_charge(sk, chunk->skb->truesize);
 }
@@ -7684,7 +7684,7 @@ static void sctp_wfree(struct sk_buff *skb)
 				sizeof(struct sk_buff) +
 				sizeof(struct sctp_chunk);
 
-	atomic_sub(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc);
+	WARN_ON(refcount_sub_and_test(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc));
 
 	/*
 	 * This undoes what is done via sctp_set_owner_w and sk_mem_charge
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 1a0c961f4ffe..7c2e21ebbedc 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -442,7 +442,7 @@ static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
 static int unix_writable(const struct sock *sk)
 {
 	return sk->sk_state != TCP_LISTEN &&
-	       (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
+	       (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
 }
 
 static void unix_write_space(struct sock *sk)
@@ -487,7 +487,7 @@ static void unix_sock_destructor(struct sock *sk)
 
 	skb_queue_purge(&sk->sk_receive_queue);
 
-	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
+	WARN_ON(refcount_read(&sk->sk_wmem_alloc));
 	WARN_ON(!sk_unhashed(sk));
 	WARN_ON(sk->sk_socket);
 	if (!sock_flag(sk, SOCK_DEAD)) {
@@ -2033,7 +2033,7 @@ alloc_skb:
 	skb->len += size;
 	skb->data_len += size;
 	skb->truesize += size;
-	atomic_add(size, &sk->sk_wmem_alloc);
+	refcount_add(size, &sk->sk_wmem_alloc);
 
 	if (newskb) {
 		err = unix_scm_to_skb(&scm, skb, false);
-- 
cgit v1.2.3


From 8851ab526791530d00bbbd0952512d68684a44b8 Mon Sep 17 00:00:00 2001
From: "Reshetova, Elena" <elena.reshetova@intel.com>
Date: Fri, 30 Jun 2017 13:08:02 +0300
Subject: net: convert ip_mc_list.refcnt from atomic_t to refcount_t

refcount_t type and corresponding API should be
used instead of atomic_t when the variable is used as
a reference counter. This allows to avoid accidental
refcounter overflows that might lead to use-after-free
situations.

Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
Signed-off-by: Hans Liljestrand <ishkamiel@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David Windsor <dwindsor@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/igmp.h |  3 ++-
 net/ipv4/igmp.c      | 10 +++++-----
 2 files changed, 7 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index 12f6fba6d21a..97caf1821de8 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -18,6 +18,7 @@
 #include <linux/skbuff.h>
 #include <linux/timer.h>
 #include <linux/in.h>
+#include <linux/refcount.h>
 #include <uapi/linux/igmp.h>
 
 static inline struct igmphdr *igmp_hdr(const struct sk_buff *skb)
@@ -84,7 +85,7 @@ struct ip_mc_list {
 	struct ip_mc_list __rcu *next_hash;
 	struct timer_list	timer;
 	int			users;
-	atomic_t		refcnt;
+	refcount_t		refcnt;
 	spinlock_t		lock;
 	char			tm_running;
 	char			reporter;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index c4032302d7cd..28f14afd0dd3 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -173,7 +173,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
 
 static void ip_ma_put(struct ip_mc_list *im)
 {
-	if (atomic_dec_and_test(&im->refcnt)) {
+	if (refcount_dec_and_test(&im->refcnt)) {
 		in_dev_put(im->interface);
 		kfree_rcu(im, rcu);
 	}
@@ -199,7 +199,7 @@ static void igmp_stop_timer(struct ip_mc_list *im)
 {
 	spin_lock_bh(&im->lock);
 	if (del_timer(&im->timer))
-		atomic_dec(&im->refcnt);
+		refcount_dec(&im->refcnt);
 	im->tm_running = 0;
 	im->reporter = 0;
 	im->unsolicit_count = 0;
@@ -213,7 +213,7 @@ static void igmp_start_timer(struct ip_mc_list *im, int max_delay)
 
 	im->tm_running = 1;
 	if (!mod_timer(&im->timer, jiffies+tv+2))
-		atomic_inc(&im->refcnt);
+		refcount_inc(&im->refcnt);
 }
 
 static void igmp_gq_start_timer(struct in_device *in_dev)
@@ -249,7 +249,7 @@ static void igmp_mod_timer(struct ip_mc_list *im, int max_delay)
 			spin_unlock_bh(&im->lock);
 			return;
 		}
-		atomic_dec(&im->refcnt);
+		refcount_dec(&im->refcnt);
 	}
 	igmp_start_timer(im, max_delay);
 	spin_unlock_bh(&im->lock);
@@ -1374,7 +1374,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
 	/* initial mode is (EX, empty) */
 	im->sfmode = MCAST_EXCLUDE;
 	im->sfcount[MCAST_EXCLUDE] = 1;
-	atomic_set(&im->refcnt, 1);
+	refcount_set(&im->refcnt, 1);
 	spin_lock_init(&im->lock);
 #ifdef CONFIG_IP_MULTICAST
 	setup_timer(&im->timer, igmp_timer_expire, (unsigned long)im);
-- 
cgit v1.2.3


From 7658b36f1b3122c298213eed344f622e836b281b Mon Sep 17 00:00:00 2001
From: "Reshetova, Elena" <elena.reshetova@intel.com>
Date: Fri, 30 Jun 2017 13:08:03 +0300
Subject: net: convert in_device.refcnt from atomic_t to refcount_t

refcount_t type and corresponding API should be
used instead of atomic_t when the variable is used as
a reference counter. This allows to avoid accidental
refcounter overflows that might lead to use-after-free
situations.

Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
Signed-off-by: Hans Liljestrand <ishkamiel@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David Windsor <dwindsor@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inetdevice.h | 11 ++++++-----
 net/ipv4/devinet.c         |  2 +-
 2 files changed, 7 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index e7c04c4e4bcd..fb3f809e34e4 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -11,6 +11,7 @@
 #include <linux/timer.h>
 #include <linux/sysctl.h>
 #include <linux/rtnetlink.h>
+#include <linux/refcount.h>
 
 struct ipv4_devconf {
 	void	*sysctl;
@@ -22,7 +23,7 @@ struct ipv4_devconf {
 
 struct in_device {
 	struct net_device	*dev;
-	atomic_t		refcnt;
+	refcount_t		refcnt;
 	int			dead;
 	struct in_ifaddr	*ifa_list;	/* IP ifaddr chain		*/
 
@@ -219,7 +220,7 @@ static inline struct in_device *in_dev_get(const struct net_device *dev)
 	rcu_read_lock();
 	in_dev = __in_dev_get_rcu(dev);
 	if (in_dev)
-		atomic_inc(&in_dev->refcnt);
+		refcount_inc(&in_dev->refcnt);
 	rcu_read_unlock();
 	return in_dev;
 }
@@ -240,12 +241,12 @@ void in_dev_finish_destroy(struct in_device *idev);
 
 static inline void in_dev_put(struct in_device *idev)
 {
-	if (atomic_dec_and_test(&idev->refcnt))
+	if (refcount_dec_and_test(&idev->refcnt))
 		in_dev_finish_destroy(idev);
 }
 
-#define __in_dev_put(idev)  atomic_dec(&(idev)->refcnt)
-#define in_dev_hold(idev)   atomic_inc(&(idev)->refcnt)
+#define __in_dev_put(idev)  refcount_dec(&(idev)->refcnt)
+#define in_dev_hold(idev)   refcount_inc(&(idev)->refcnt)
 
 #endif /* __KERNEL__ */
 
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index a7dd088d5fc9..38d9af9b917c 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -252,7 +252,7 @@ static struct in_device *inetdev_init(struct net_device *dev)
 	/* Reference in_dev->dev */
 	dev_hold(dev);
 	/* Account for reference dev->ip_ptr (below) */
-	in_dev_hold(in_dev);
+	refcount_set(&in_dev->refcnt, 1);
 
 	err = devinet_sysctl_register(in_dev);
 	if (err) {
-- 
cgit v1.2.3


From 433cea4d9bbb83cc848b80c51bb849a2ceb49379 Mon Sep 17 00:00:00 2001
From: "Reshetova, Elena" <elena.reshetova@intel.com>
Date: Fri, 30 Jun 2017 13:08:04 +0300
Subject: net: convert netpoll_info.refcnt from atomic_t to refcount_t

refcount_t type and corresponding API should be
used instead of atomic_t when the variable is used as
a reference counter. This allows to avoid accidental
refcounter overflows that might lead to use-after-free
situations.

Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
Signed-off-by: Hans Liljestrand <ishkamiel@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David Windsor <dwindsor@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netpoll.h | 3 ++-
 net/core/netpoll.c      | 6 +++---
 2 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index 1828900c9411..27c0aaa22cb0 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -11,6 +11,7 @@
 #include <linux/interrupt.h>
 #include <linux/rcupdate.h>
 #include <linux/list.h>
+#include <linux/refcount.h>
 
 union inet_addr {
 	__u32		all[4];
@@ -34,7 +35,7 @@ struct netpoll {
 };
 
 struct netpoll_info {
-	atomic_t refcnt;
+	refcount_t refcnt;
 
 	struct semaphore dev_lock;
 
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index a835155c85f9..d3408a693166 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -632,7 +632,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
 		skb_queue_head_init(&npinfo->txq);
 		INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);
 
-		atomic_set(&npinfo->refcnt, 1);
+		refcount_set(&npinfo->refcnt, 1);
 
 		ops = np->dev->netdev_ops;
 		if (ops->ndo_netpoll_setup) {
@@ -642,7 +642,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
 		}
 	} else {
 		npinfo = rtnl_dereference(ndev->npinfo);
-		atomic_inc(&npinfo->refcnt);
+		refcount_inc(&npinfo->refcnt);
 	}
 
 	npinfo->netpoll = np;
@@ -821,7 +821,7 @@ void __netpoll_cleanup(struct netpoll *np)
 
 	synchronize_srcu(&netpoll_srcu);
 
-	if (atomic_dec_and_test(&npinfo->refcnt)) {
+	if (refcount_dec_and_test(&npinfo->refcnt)) {
 		const struct net_device_ops *ops;
 
 		ops = np->dev->netdev_ops;
-- 
cgit v1.2.3


From 7db5bb33add5afe6c64e00516b0c928bfc937466 Mon Sep 17 00:00:00 2001
From: Jerry Hoemann <jerry.hoemann@hpe.com>
Date: Fri, 30 Jun 2017 20:53:24 -0700
Subject: libnvdimm, acpi, nfit: Add bus level dsm mask for pass thru.

Add a bus level dsm_mask to nvdimm_bus_descriptor to allow the passthru
calling mechanism to specify a different mask from the cmd_mask.

Populate bus_dsm_mask and use it to filter dsm calls that user can
make through the pass thru interface.

Signed-off-by: Jerry Hoemann <jerry.hoemann@hpe.com>
[djbw: use command number constants instead of a magic mask value]
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/acpi/nfit/core.c  | 27 +++++++++++++++++++++++++++
 include/linux/libnvdimm.h |  1 +
 2 files changed, 28 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index b16e4ef7a5ca..b7d7cc4fd2d0 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -253,6 +253,8 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
 		cmd_name = nvdimm_bus_cmd_name(cmd);
 		cmd_mask = nd_desc->cmd_mask;
 		dsm_mask = cmd_mask;
+		if (cmd == ND_CMD_CALL)
+			dsm_mask = nd_desc->bus_dsm_mask;
 		desc = nd_cmd_bus_desc(cmd);
 		guid = to_nfit_uuid(NFIT_DEV_BUS);
 		handle = adev->handle;
@@ -1608,11 +1610,23 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
 			acpi_desc);
 }
 
+/*
+ * These constants are private because there are no kernel consumers of
+ * these commands.
+ */
+enum nfit_aux_cmds {
+        NFIT_CMD_TRANSLATE_SPA = 5,
+        NFIT_CMD_ARS_INJECT_SET = 7,
+        NFIT_CMD_ARS_INJECT_CLEAR = 8,
+        NFIT_CMD_ARS_INJECT_GET = 9,
+};
+
 static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc)
 {
 	struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
 	const guid_t *guid = to_nfit_uuid(NFIT_DEV_BUS);
 	struct acpi_device *adev;
+	unsigned long dsm_mask;
 	int i;
 
 	nd_desc->cmd_mask = acpi_desc->bus_cmd_force_en;
@@ -1624,6 +1638,19 @@ static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc)
 		if (acpi_check_dsm(adev->handle, guid, 1, 1ULL << i))
 			set_bit(i, &nd_desc->cmd_mask);
 	set_bit(ND_CMD_CALL, &nd_desc->cmd_mask);
+
+	dsm_mask =
+		(1 << ND_CMD_ARS_CAP) |
+		(1 << ND_CMD_ARS_START) |
+		(1 << ND_CMD_ARS_STATUS) |
+		(1 << ND_CMD_CLEAR_ERROR) |
+		(1 << NFIT_CMD_TRANSLATE_SPA) |
+		(1 << NFIT_CMD_ARS_INJECT_SET) |
+		(1 << NFIT_CMD_ARS_INJECT_CLEAR) |
+		(1 << NFIT_CMD_ARS_INJECT_GET);
+	for_each_set_bit(i, &dsm_mask, BITS_PER_LONG)
+		if (acpi_check_dsm(adev->handle, guid, 1, 1ULL << i))
+			set_bit(i, &nd_desc->bus_dsm_mask);
 }
 
 static ssize_t range_index_show(struct device *dev,
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index 4b9f178c82e6..6aee1a6e4e63 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -55,6 +55,7 @@ typedef int (*ndctl_fn)(struct nvdimm_bus_descriptor *nd_desc,
 
 struct nvdimm_bus_descriptor {
 	const struct attribute_group **attr_groups;
+	unsigned long bus_dsm_mask;
 	unsigned long cmd_mask;
 	struct module *module;
 	char *provider_name;
-- 
cgit v1.2.3


From ae146d9b76589d636d11c5e4382bbba2fe8bdb9b Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Fri, 30 Jun 2017 11:52:12 +0800
Subject: sctp: remove the typedef sctp_sctphdr_t

This patch is to remove the typedef sctp_sctphdr_t, and replace
with struct sctphdr in the places where it's using this typedef.

It is also to fix some indents and use sizeof(variable) instead
of sizeof(type).

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sctp.h                    |  4 ++--
 net/netfilter/ipset/ip_set_getport.c    |  4 ++--
 net/netfilter/ipvs/ip_vs_core.c         |  6 +++---
 net/netfilter/ipvs/ip_vs_proto_sctp.c   | 15 +++++++--------
 net/netfilter/nf_conntrack_proto_sctp.c |  2 +-
 net/netfilter/nf_nat_proto_sctp.c       |  2 +-
 net/netfilter/xt_sctp.c                 | 16 ++++++++--------
 7 files changed, 24 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sctp.h b/include/linux/sctp.h
index 7a4804c4a593..85540ec4b561 100644
--- a/include/linux/sctp.h
+++ b/include/linux/sctp.h
@@ -57,12 +57,12 @@
 #include <uapi/linux/sctp.h>
 
 /* Section 3.1.  SCTP Common Header Format */
-typedef struct sctphdr {
+struct sctphdr {
 	__be16 source;
 	__be16 dest;
 	__be32 vtag;
 	__le32 checksum;
-} sctp_sctphdr_t;
+};
 
 static inline struct sctphdr *sctp_hdr(const struct sk_buff *skb)
 {
diff --git a/net/netfilter/ipset/ip_set_getport.c b/net/netfilter/ipset/ip_set_getport.c
index 42c3e3ba1b94..3f09cdb42562 100644
--- a/net/netfilter/ipset/ip_set_getport.c
+++ b/net/netfilter/ipset/ip_set_getport.c
@@ -38,8 +38,8 @@ get_port(const struct sk_buff *skb, int protocol, unsigned int protooff,
 		break;
 	}
 	case IPPROTO_SCTP: {
-		sctp_sctphdr_t _sh;
-		const sctp_sctphdr_t *sh;
+		struct sctphdr _sh;
+		const struct sctphdr *sh;
 
 		sh = skb_header_pointer(skb, protooff, sizeof(_sh), &_sh);
 		if (!sh)
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index ad99c1ceea6f..6f39af9fd6df 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1038,8 +1038,8 @@ static int ip_vs_out_icmp_v6(struct netns_ipvs *ipvs, struct sk_buff *skb,
 static inline int is_sctp_abort(const struct sk_buff *skb, int nh_len)
 {
 	sctp_chunkhdr_t *sch, schunk;
-	sch = skb_header_pointer(skb, nh_len + sizeof(sctp_sctphdr_t),
-			sizeof(schunk), &schunk);
+	sch = skb_header_pointer(skb, nh_len + sizeof(struct sctphdr),
+				 sizeof(schunk), &schunk);
 	if (sch == NULL)
 		return 0;
 	if (sch->type == SCTP_CID_ABORT)
@@ -1072,7 +1072,7 @@ static inline bool is_new_conn(const struct sk_buff *skb,
 	case IPPROTO_SCTP: {
 		sctp_chunkhdr_t *sch, schunk;
 
-		sch = skb_header_pointer(skb, iph->len + sizeof(sctp_sctphdr_t),
+		sch = skb_header_pointer(skb, iph->len + sizeof(struct sctphdr),
 					 sizeof(schunk), &schunk);
 		if (sch == NULL)
 			return false;
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 56f8e4b204ff..6b38cadab822 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -16,15 +16,14 @@ sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
 {
 	struct ip_vs_service *svc;
 	sctp_chunkhdr_t _schunkh, *sch;
-	sctp_sctphdr_t *sh, _sctph;
+	struct sctphdr *sh, _sctph;
 	__be16 _ports[2], *ports = NULL;
 
 	if (likely(!ip_vs_iph_icmp(iph))) {
 		sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph);
 		if (sh) {
-			sch = skb_header_pointer(
-				skb, iph->len + sizeof(sctp_sctphdr_t),
-				sizeof(_schunkh), &_schunkh);
+			sch = skb_header_pointer(skb, iph->len + sizeof(_sctph),
+						 sizeof(_schunkh), &_schunkh);
 			if (sch && (sch->type == SCTP_CID_INIT ||
 				    sysctl_sloppy_sctp(ipvs)))
 				ports = &sh->source;
@@ -77,7 +76,7 @@ sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
 	return 1;
 }
 
-static void sctp_nat_csum(struct sk_buff *skb, sctp_sctphdr_t *sctph,
+static void sctp_nat_csum(struct sk_buff *skb, struct sctphdr *sctph,
 			  unsigned int sctphoff)
 {
 	sctph->checksum = sctp_compute_cksum(skb, sctphoff);
@@ -88,7 +87,7 @@ static int
 sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
 		  struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
 {
-	sctp_sctphdr_t *sctph;
+	struct sctphdr *sctph;
 	unsigned int sctphoff = iph->len;
 	bool payload_csum = false;
 
@@ -135,7 +134,7 @@ static int
 sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
 		  struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
 {
-	sctp_sctphdr_t *sctph;
+	struct sctphdr *sctph;
 	unsigned int sctphoff = iph->len;
 	bool payload_csum = false;
 
@@ -389,7 +388,7 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
 	ihl = ip_hdrlen(skb);
 #endif
 
-	cofs = ihl + sizeof(sctp_sctphdr_t);
+	cofs = ihl + sizeof(struct sctphdr);
 	sch = skb_header_pointer(skb, cofs, sizeof(_sctpch), &_sctpch);
 	if (sch == NULL)
 		return;
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 1c5b14a6cab3..db87af41c342 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -190,7 +190,7 @@ static void sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
 }
 
 #define for_each_sctp_chunk(skb, sch, _sch, offset, dataoff, count)	\
-for ((offset) = (dataoff) + sizeof(sctp_sctphdr_t), (count) = 0;	\
+for ((offset) = (dataoff) + sizeof(struct sctphdr), (count) = 0;	\
 	(offset) < (skb)->len &&					\
 	((sch) = skb_header_pointer((skb), (offset), sizeof(_sch), &(_sch)));	\
 	(offset) += (ntohs((sch)->length) + 3) & ~3, (count)++)
diff --git a/net/netfilter/nf_nat_proto_sctp.c b/net/netfilter/nf_nat_proto_sctp.c
index 804e8a0ab36e..c57ee3240b1d 100644
--- a/net/netfilter/nf_nat_proto_sctp.c
+++ b/net/netfilter/nf_nat_proto_sctp.c
@@ -32,7 +32,7 @@ sctp_manip_pkt(struct sk_buff *skb,
 	       const struct nf_conntrack_tuple *tuple,
 	       enum nf_nat_manip_type maniptype)
 {
-	sctp_sctphdr_t *hdr;
+	struct sctphdr *hdr;
 	int hdrsize = 8;
 
 	/* This could be an inner header returned in imcp packet; in such
diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
index 4dedb96d1a06..0f20ea4f511e 100644
--- a/net/netfilter/xt_sctp.c
+++ b/net/netfilter/xt_sctp.c
@@ -118,8 +118,8 @@ static bool
 sctp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_sctp_info *info = par->matchinfo;
-	const sctp_sctphdr_t *sh;
-	sctp_sctphdr_t _sh;
+	const struct sctphdr *sh;
+	struct sctphdr _sh;
 
 	if (par->fragoff != 0) {
 		pr_debug("Dropping non-first fragment.. FIXME\n");
@@ -136,13 +136,13 @@ sctp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 
 	return  SCCHECK(ntohs(sh->source) >= info->spts[0]
 			&& ntohs(sh->source) <= info->spts[1],
-			XT_SCTP_SRC_PORTS, info->flags, info->invflags)
-		&& SCCHECK(ntohs(sh->dest) >= info->dpts[0]
+			XT_SCTP_SRC_PORTS, info->flags, info->invflags) &&
+		SCCHECK(ntohs(sh->dest) >= info->dpts[0]
 			&& ntohs(sh->dest) <= info->dpts[1],
-			XT_SCTP_DEST_PORTS, info->flags, info->invflags)
-		&& SCCHECK(match_packet(skb, par->thoff + sizeof(sctp_sctphdr_t),
-					info, &par->hotdrop),
-			   XT_SCTP_CHUNK_TYPES, info->flags, info->invflags);
+			XT_SCTP_DEST_PORTS, info->flags, info->invflags) &&
+		SCCHECK(match_packet(skb, par->thoff + sizeof(_sh),
+				     info, &par->hotdrop),
+			XT_SCTP_CHUNK_TYPES, info->flags, info->invflags);
 }
 
 static int sctp_mt_check(const struct xt_mtchk_param *par)
-- 
cgit v1.2.3


From 922dbc5be2186659d2c453a53f2ae569e55b6101 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Fri, 30 Jun 2017 11:52:13 +0800
Subject: sctp: remove the typedef sctp_chunkhdr_t

This patch is to remove the typedef sctp_chunkhdr_t, and replace
with struct sctp_chunkhdr in the places where it's using this
typedef.

It is also to fix some indents and use sizeof(variable) instead
of sizeof(type)., especially in sctp_new.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sctp.h                    | 34 ++++++++---------
 include/net/sctp/sctp.h                 |  2 +-
 net/netfilter/ipvs/ip_vs_core.c         |  4 +-
 net/netfilter/ipvs/ip_vs_proto_sctp.c   |  6 +--
 net/netfilter/nf_conntrack_proto_sctp.c | 29 +++++++-------
 net/netfilter/xt_sctp.c                 |  4 +-
 net/sctp/input.c                        | 20 +++++-----
 net/sctp/inqueue.c                      | 15 ++++----
 net/sctp/sm_make_chunk.c                | 17 +++++----
 net/sctp/sm_sideeffect.c                |  5 ++-
 net/sctp/sm_statefuns.c                 | 67 +++++++++++++++++----------------
 net/sctp/ulpevent.c                     |  2 +-
 12 files changed, 102 insertions(+), 103 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sctp.h b/include/linux/sctp.h
index 85540ec4b561..9ad5b9e8df78 100644
--- a/include/linux/sctp.h
+++ b/include/linux/sctp.h
@@ -70,11 +70,11 @@ static inline struct sctphdr *sctp_hdr(const struct sk_buff *skb)
 }
 
 /* Section 3.2.  Chunk Field Descriptions. */
-typedef struct sctp_chunkhdr {
+struct sctp_chunkhdr {
 	__u8 type;
 	__u8 flags;
 	__be16 length;
-} sctp_chunkhdr_t;
+};
 
 
 /* Section 3.2.  Chunk Type Values.
@@ -236,8 +236,8 @@ typedef struct sctp_datahdr {
 } sctp_datahdr_t;
 
 typedef struct sctp_data_chunk {
-        sctp_chunkhdr_t chunk_hdr;
-        sctp_datahdr_t  data_hdr;
+	struct sctp_chunkhdr chunk_hdr;
+	sctp_datahdr_t  data_hdr;
 } sctp_data_chunk_t;
 
 /* DATA Chuck Specific Flags */
@@ -267,7 +267,7 @@ typedef struct sctp_inithdr {
 } sctp_inithdr_t;
 
 typedef struct sctp_init_chunk {
-	sctp_chunkhdr_t chunk_hdr;
+	struct sctp_chunkhdr chunk_hdr;
 	sctp_inithdr_t init_hdr;
 } sctp_init_chunk_t;
 
@@ -386,7 +386,7 @@ typedef struct sctp_sackhdr {
 } sctp_sackhdr_t;
 
 typedef struct sctp_sack_chunk {
-	sctp_chunkhdr_t chunk_hdr;
+	struct sctp_chunkhdr chunk_hdr;
 	sctp_sackhdr_t sack_hdr;
 } sctp_sack_chunk_t;
 
@@ -403,7 +403,7 @@ typedef struct sctp_heartbeathdr {
 } sctp_heartbeathdr_t;
 
 typedef struct sctp_heartbeat_chunk {
-	sctp_chunkhdr_t chunk_hdr;
+	struct sctp_chunkhdr chunk_hdr;
 	sctp_heartbeathdr_t hb_hdr;
 } sctp_heartbeat_chunk_t;
 
@@ -413,7 +413,7 @@ typedef struct sctp_heartbeat_chunk {
  * chunk descriptor.
  */
 typedef struct sctp_abort_chunk {
-        sctp_chunkhdr_t uh;
+	struct sctp_chunkhdr uh;
 } sctp_abort_chunk_t;
 
 
@@ -425,8 +425,8 @@ typedef struct sctp_shutdownhdr {
 } sctp_shutdownhdr_t;
 
 struct sctp_shutdown_chunk_t {
-        sctp_chunkhdr_t    chunk_hdr;
-        sctp_shutdownhdr_t shutdown_hdr;
+	struct sctp_chunkhdr chunk_hdr;
+	sctp_shutdownhdr_t shutdown_hdr;
 };
 
 /* RFC 2960.  Section 3.3.10 Operation Error (ERROR) (9) */
@@ -438,8 +438,8 @@ typedef struct sctp_errhdr {
 } sctp_errhdr_t;
 
 typedef struct sctp_operr_chunk {
-        sctp_chunkhdr_t chunk_hdr;
-	sctp_errhdr_t   err_hdr;
+	struct sctp_chunkhdr chunk_hdr;
+	sctp_errhdr_t err_hdr;
 } sctp_operr_chunk_t;
 
 /* RFC 2960 3.3.10 - Operation Error
@@ -528,7 +528,7 @@ typedef struct sctp_ecnehdr {
 } sctp_ecnehdr_t;
 
 typedef struct sctp_ecne_chunk {
-	sctp_chunkhdr_t chunk_hdr;
+	struct sctp_chunkhdr chunk_hdr;
 	sctp_ecnehdr_t ence_hdr;
 } sctp_ecne_chunk_t;
 
@@ -540,7 +540,7 @@ typedef struct sctp_cwrhdr {
 } sctp_cwrhdr_t;
 
 typedef struct sctp_cwr_chunk {
-	sctp_chunkhdr_t chunk_hdr;
+	struct sctp_chunkhdr chunk_hdr;
 	sctp_cwrhdr_t cwr_hdr;
 } sctp_cwr_chunk_t;
 
@@ -649,7 +649,7 @@ typedef struct sctp_addiphdr {
 } sctp_addiphdr_t;
 
 typedef struct sctp_addip_chunk {
-	sctp_chunkhdr_t chunk_hdr;
+	struct sctp_chunkhdr chunk_hdr;
 	sctp_addiphdr_t addip_hdr;
 } sctp_addip_chunk_t;
 
@@ -709,7 +709,7 @@ typedef struct sctp_authhdr {
 } sctp_authhdr_t;
 
 typedef struct sctp_auth_chunk {
-	sctp_chunkhdr_t chunk_hdr;
+	struct sctp_chunkhdr chunk_hdr;
 	sctp_authhdr_t auth_hdr;
 } sctp_auth_chunk_t;
 
@@ -719,7 +719,7 @@ struct sctp_infox {
 };
 
 struct sctp_reconf_chunk {
-	sctp_chunkhdr_t chunk_hdr;
+	struct sctp_chunkhdr chunk_hdr;
 	__u8 params[0];
 };
 
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 069582ee5d7f..d756bd095683 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -478,7 +478,7 @@ _sctp_walk_errors((err), (chunk_hdr), ntohs((chunk_hdr)->length))
 
 #define _sctp_walk_errors(err, chunk_hdr, end)\
 for (err = (sctp_errhdr_t *)((void *)chunk_hdr + \
-	    sizeof(sctp_chunkhdr_t));\
+	    sizeof(struct sctp_chunkhdr));\
      (void *)err <= (void *)chunk_hdr + end - ntohs(err->length) &&\
      ntohs(err->length) >= sizeof(sctp_errhdr_t); \
      err = (sctp_errhdr_t *)((void *)err + SCTP_PAD4(ntohs(err->length))))
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 6f39af9fd6df..e31956b58aba 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1037,7 +1037,7 @@ static int ip_vs_out_icmp_v6(struct netns_ipvs *ipvs, struct sk_buff *skb,
  */
 static inline int is_sctp_abort(const struct sk_buff *skb, int nh_len)
 {
-	sctp_chunkhdr_t *sch, schunk;
+	struct sctp_chunkhdr *sch, schunk;
 	sch = skb_header_pointer(skb, nh_len + sizeof(struct sctphdr),
 				 sizeof(schunk), &schunk);
 	if (sch == NULL)
@@ -1070,7 +1070,7 @@ static inline bool is_new_conn(const struct sk_buff *skb,
 		return th->syn;
 	}
 	case IPPROTO_SCTP: {
-		sctp_chunkhdr_t *sch, schunk;
+		struct sctp_chunkhdr *sch, schunk;
 
 		sch = skb_header_pointer(skb, iph->len + sizeof(struct sctphdr),
 					 sizeof(schunk), &schunk);
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 6b38cadab822..3ffad4adaddf 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -15,7 +15,7 @@ sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
 		   struct ip_vs_iphdr *iph)
 {
 	struct ip_vs_service *svc;
-	sctp_chunkhdr_t _schunkh, *sch;
+	struct sctp_chunkhdr _schunkh, *sch;
 	struct sctphdr *sh, _sctph;
 	__be16 _ports[2], *ports = NULL;
 
@@ -377,7 +377,7 @@ static inline void
 set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
 		int direction, const struct sk_buff *skb)
 {
-	sctp_chunkhdr_t _sctpch, *sch;
+	struct sctp_chunkhdr _sctpch, *sch;
 	unsigned char chunk_type;
 	int event, next_state;
 	int ihl, cofs;
@@ -409,7 +409,7 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
 	    (sch->type == SCTP_CID_COOKIE_ACK)) {
 		int clen = ntohs(sch->length);
 
-		if (clen >= sizeof(sctp_chunkhdr_t)) {
+		if (clen >= sizeof(_sctpch)) {
 			sch = skb_header_pointer(skb, cofs + ALIGN(clen, 4),
 						 sizeof(_sctpch), &_sctpch);
 			if (sch && sch->type == SCTP_CID_ABORT)
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index db87af41c342..b841a8aeee7c 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -202,7 +202,7 @@ static int do_basic_checks(struct nf_conn *ct,
 			   unsigned long *map)
 {
 	u_int32_t offset, count;
-	sctp_chunkhdr_t _sch, *sch;
+	struct sctp_chunkhdr _sch, *sch;
 	int flag;
 
 	flag = 0;
@@ -397,7 +397,7 @@ static int sctp_packet(struct nf_conn *ct,
 		    sch->type == SCTP_CID_INIT_ACK) {
 			sctp_inithdr_t _inithdr, *ih;
 
-			ih = skb_header_pointer(skb, offset + sizeof(sctp_chunkhdr_t),
+			ih = skb_header_pointer(skb, offset + sizeof(_sch),
 						sizeof(_inithdr), &_inithdr);
 			if (ih == NULL)
 				goto out_unlock;
@@ -471,23 +471,20 @@ static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
 
 		/* Copy the vtag into the state info */
 		if (sch->type == SCTP_CID_INIT) {
-			if (sh->vtag == 0) {
-				sctp_inithdr_t _inithdr, *ih;
+			sctp_inithdr_t _inithdr, *ih;
+			/* Sec 8.5.1 (A) */
+			if (sh->vtag)
+				return false;
 
-				ih = skb_header_pointer(skb, offset + sizeof(sctp_chunkhdr_t),
-							sizeof(_inithdr), &_inithdr);
-				if (ih == NULL)
-					return false;
+			ih = skb_header_pointer(skb, offset + sizeof(_sch),
+						sizeof(_inithdr), &_inithdr);
+			if (!ih)
+				return false;
 
-				pr_debug("Setting vtag %x for new conn\n",
-					 ih->init_tag);
+			pr_debug("Setting vtag %x for new conn\n",
+				 ih->init_tag);
 
-				ct->proto.sctp.vtag[IP_CT_DIR_REPLY] =
-								ih->init_tag;
-			} else {
-				/* Sec 8.5.1 (A) */
-				return false;
-			}
+			ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = ih->init_tag;
 		} else if (sch->type == SCTP_CID_HEARTBEAT) {
 			pr_debug("Setting vtag %x for secondary conntrack\n",
 				 sh->vtag);
diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
index 0f20ea4f511e..2d2fa1d53ea6 100644
--- a/net/netfilter/xt_sctp.c
+++ b/net/netfilter/xt_sctp.c
@@ -42,8 +42,8 @@ match_packet(const struct sk_buff *skb,
 	     bool *hotdrop)
 {
 	u_int32_t chunkmapcopy[256 / sizeof (u_int32_t)];
-	const sctp_chunkhdr_t *sch;
-	sctp_chunkhdr_t _sch;
+	const struct sctp_chunkhdr *sch;
+	struct sctp_chunkhdr _sch;
 	int chunk_match_type = info->chunk_match_type;
 	const struct xt_sctp_flag_info *flag_info = info->flag_info;
 	int flag_count = info->flag_count;
diff --git a/net/sctp/input.c b/net/sctp/input.c
index ba9ad32fc447..a9994c4afc18 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -663,19 +663,19 @@ out_unlock:
  */
 static int sctp_rcv_ootb(struct sk_buff *skb)
 {
-	sctp_chunkhdr_t *ch, _ch;
+	struct sctp_chunkhdr *ch, _ch;
 	int ch_end, offset = 0;
 
 	/* Scan through all the chunks in the packet.  */
 	do {
 		/* Make sure we have at least the header there */
-		if (offset + sizeof(sctp_chunkhdr_t) > skb->len)
+		if (offset + sizeof(_ch) > skb->len)
 			break;
 
 		ch = skb_header_pointer(skb, offset, sizeof(*ch), &_ch);
 
 		/* Break out if chunk length is less then minimal. */
-		if (ntohs(ch->length) < sizeof(sctp_chunkhdr_t))
+		if (ntohs(ch->length) < sizeof(_ch))
 			break;
 
 		ch_end = offset + SCTP_PAD4(ntohs(ch->length));
@@ -1106,7 +1106,7 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct net *net,
  */
 static struct sctp_association *__sctp_rcv_asconf_lookup(
 					struct net *net,
-					sctp_chunkhdr_t *ch,
+					struct sctp_chunkhdr *ch,
 					const union sctp_addr *laddr,
 					__be16 peer_port,
 					struct sctp_transport **transportp)
@@ -1144,7 +1144,7 @@ static struct sctp_association *__sctp_rcv_walk_lookup(struct net *net,
 				      struct sctp_transport **transportp)
 {
 	struct sctp_association *asoc = NULL;
-	sctp_chunkhdr_t *ch;
+	struct sctp_chunkhdr *ch;
 	int have_auth = 0;
 	unsigned int chunk_num = 1;
 	__u8 *ch_end;
@@ -1152,10 +1152,10 @@ static struct sctp_association *__sctp_rcv_walk_lookup(struct net *net,
 	/* Walk through the chunks looking for AUTH or ASCONF chunks
 	 * to help us find the association.
 	 */
-	ch = (sctp_chunkhdr_t *) skb->data;
+	ch = (struct sctp_chunkhdr *)skb->data;
 	do {
 		/* Break out if chunk length is less then minimal. */
-		if (ntohs(ch->length) < sizeof(sctp_chunkhdr_t))
+		if (ntohs(ch->length) < sizeof(*ch))
 			break;
 
 		ch_end = ((__u8 *)ch) + SCTP_PAD4(ntohs(ch->length));
@@ -1192,7 +1192,7 @@ static struct sctp_association *__sctp_rcv_walk_lookup(struct net *net,
 		if (asoc)
 			break;
 
-		ch = (sctp_chunkhdr_t *) ch_end;
+		ch = (struct sctp_chunkhdr *)ch_end;
 		chunk_num++;
 	} while (ch_end < skb_tail_pointer(skb));
 
@@ -1210,7 +1210,7 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct net *net,
 				      const union sctp_addr *laddr,
 				      struct sctp_transport **transportp)
 {
-	sctp_chunkhdr_t *ch;
+	struct sctp_chunkhdr *ch;
 
 	/* We do not allow GSO frames here as we need to linearize and
 	 * then cannot guarantee frame boundaries. This shouldn't be an
@@ -1220,7 +1220,7 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct net *net,
 	if ((skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) == SKB_GSO_SCTP)
 		return NULL;
 
-	ch = (sctp_chunkhdr_t *) skb->data;
+	ch = (struct sctp_chunkhdr *)skb->data;
 
 	/* The code below will attempt to walk the chunk and extract
 	 * parameter information.  Before we do that, we need to verify
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index f731de3e8428..48392552ee7c 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -99,7 +99,7 @@ void sctp_inq_push(struct sctp_inq *q, struct sctp_chunk *chunk)
 struct sctp_chunkhdr *sctp_inq_peek(struct sctp_inq *queue)
 {
 	struct sctp_chunk *chunk;
-	sctp_chunkhdr_t *ch = NULL;
+	struct sctp_chunkhdr *ch = NULL;
 
 	chunk = queue->in_progress;
 	/* If there is no more chunks in this packet, say so */
@@ -108,7 +108,7 @@ struct sctp_chunkhdr *sctp_inq_peek(struct sctp_inq *queue)
 	    chunk->pdiscard)
 		    return NULL;
 
-	ch = (sctp_chunkhdr_t *)chunk->chunk_end;
+	ch = (struct sctp_chunkhdr *)chunk->chunk_end;
 
 	return ch;
 }
@@ -122,7 +122,7 @@ struct sctp_chunkhdr *sctp_inq_peek(struct sctp_inq *queue)
 struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue)
 {
 	struct sctp_chunk *chunk;
-	sctp_chunkhdr_t *ch = NULL;
+	struct sctp_chunkhdr *ch = NULL;
 
 	/* The assumption is that we are safe to process the chunks
 	 * at this time.
@@ -151,7 +151,7 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue)
 			chunk = queue->in_progress = NULL;
 		} else {
 			/* Nothing to do. Next chunk in the packet, please. */
-			ch = (sctp_chunkhdr_t *) chunk->chunk_end;
+			ch = (struct sctp_chunkhdr *)chunk->chunk_end;
 			/* Force chunk->skb->data to chunk->chunk_end.  */
 			skb_pull(chunk->skb, chunk->chunk_end - chunk->skb->data);
 			/* We are guaranteed to pull a SCTP header. */
@@ -195,7 +195,7 @@ next_chunk:
 
 new_skb:
 		/* This is the first chunk in the packet.  */
-		ch = (sctp_chunkhdr_t *) chunk->skb->data;
+		ch = (struct sctp_chunkhdr *)chunk->skb->data;
 		chunk->singleton = 1;
 		chunk->data_accepted = 0;
 		chunk->pdiscard = 0;
@@ -214,11 +214,10 @@ new_skb:
 
 	chunk->chunk_hdr = ch;
 	chunk->chunk_end = ((__u8 *)ch) + SCTP_PAD4(ntohs(ch->length));
-	skb_pull(chunk->skb, sizeof(sctp_chunkhdr_t));
+	skb_pull(chunk->skb, sizeof(*ch));
 	chunk->subh.v = NULL; /* Subheader is no longer valid.  */
 
-	if (chunk->chunk_end + sizeof(sctp_chunkhdr_t) <
-	    skb_tail_pointer(chunk->skb)) {
+	if (chunk->chunk_end + sizeof(*ch) < skb_tail_pointer(chunk->skb)) {
 		/* This is not a singleton */
 		chunk->singleton = 0;
 	} else if (chunk->chunk_end > skb_tail_pointer(chunk->skb)) {
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 4b1967997c16..7d4c5a870f0e 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1379,20 +1379,20 @@ static struct sctp_chunk *_sctp_make_chunk(const struct sctp_association *asoc,
 					    gfp_t gfp)
 {
 	struct sctp_chunk *retval;
-	sctp_chunkhdr_t *chunk_hdr;
+	struct sctp_chunkhdr *chunk_hdr;
 	struct sk_buff *skb;
 	struct sock *sk;
 
 	/* No need to allocate LL here, as this is only a chunk. */
-	skb = alloc_skb(SCTP_PAD4(sizeof(sctp_chunkhdr_t) + paylen), gfp);
+	skb = alloc_skb(SCTP_PAD4(sizeof(*chunk_hdr) + paylen), gfp);
 	if (!skb)
 		goto nodata;
 
 	/* Make room for the chunk header.  */
-	chunk_hdr = skb_put(skb, sizeof(sctp_chunkhdr_t));
+	chunk_hdr = (struct sctp_chunkhdr *)skb_put(skb, sizeof(*chunk_hdr));
 	chunk_hdr->type	  = type;
 	chunk_hdr->flags  = flags;
-	chunk_hdr->length = htons(sizeof(sctp_chunkhdr_t));
+	chunk_hdr->length = htons(sizeof(*chunk_hdr));
 
 	sk = asoc ? asoc->base.sk : NULL;
 	retval = sctp_chunkify(skb, asoc, sk, gfp);
@@ -1402,7 +1402,7 @@ static struct sctp_chunk *_sctp_make_chunk(const struct sctp_association *asoc,
 	}
 
 	retval->chunk_hdr = chunk_hdr;
-	retval->chunk_end = ((__u8 *)chunk_hdr) + sizeof(struct sctp_chunkhdr);
+	retval->chunk_end = ((__u8 *)chunk_hdr) + sizeof(*chunk_hdr);
 
 	/* Determine if the chunk needs to be authenticated */
 	if (sctp_auth_send_cid(type, asoc))
@@ -1710,7 +1710,7 @@ struct sctp_association *sctp_unpack_cookie(
 	/* Header size is static data prior to the actual cookie, including
 	 * any padding.
 	 */
-	headersize = sizeof(sctp_chunkhdr_t) +
+	headersize = sizeof(struct sctp_chunkhdr) +
 		     (sizeof(struct sctp_signed_cookie) -
 		      sizeof(struct sctp_cookie));
 	bodysize = ntohs(chunk->chunk_hdr->length) - headersize;
@@ -3218,7 +3218,8 @@ struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc,
 	int	chunk_len;
 	__u32	serial;
 
-	chunk_len = ntohs(asconf->chunk_hdr->length) - sizeof(sctp_chunkhdr_t);
+	chunk_len = ntohs(asconf->chunk_hdr->length) -
+		    sizeof(struct sctp_chunkhdr);
 	hdr = (sctp_addiphdr_t *)asconf->skb->data;
 	serial = ntohl(hdr->serial);
 
@@ -3364,7 +3365,7 @@ static __be16 sctp_get_asconf_response(struct sctp_chunk *asconf_ack,
 		err_code = SCTP_ERROR_REQ_REFUSED;
 
 	asconf_ack_len = ntohs(asconf_ack->chunk_hdr->length) -
-			     sizeof(sctp_chunkhdr_t);
+			 sizeof(struct sctp_chunkhdr);
 
 	/* Skip the addiphdr from the asconf_ack chunk and store a pointer to
 	 * the first asconf_ack parameter.
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index dfe1fcb520ba..b255339f22a3 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -955,9 +955,10 @@ static void sctp_cmd_process_operr(sctp_cmd_seq_t *cmds,
 		switch (err_hdr->cause) {
 		case SCTP_ERROR_UNKNOWN_CHUNK:
 		{
-			sctp_chunkhdr_t *unk_chunk_hdr;
+			struct sctp_chunkhdr *unk_chunk_hdr;
 
-			unk_chunk_hdr = (sctp_chunkhdr_t *)err_hdr->variable;
+			unk_chunk_hdr = (struct sctp_chunkhdr *)
+							err_hdr->variable;
 			switch (unk_chunk_hdr->type) {
 			/* ADDIP 4.1 A9) If the peer responds to an ASCONF with
 			 * an ERROR chunk reporting that it did not recognized
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 8feff96a5bef..2b7c07f19b08 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -235,7 +235,7 @@ sctp_disposition_t sctp_sf_do_4_C(struct net *net,
 		return sctp_sf_violation_chunk(net, ep, asoc, type, arg, commands);
 
 	/* Make sure that the SHUTDOWN_COMPLETE chunk has a valid length. */
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
@@ -368,9 +368,9 @@ sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net,
 		if (err_chunk) {
 			packet = sctp_abort_pkt_new(net, ep, asoc, arg,
 					(__u8 *)(err_chunk->chunk_hdr) +
-					sizeof(sctp_chunkhdr_t),
+					sizeof(struct sctp_chunkhdr),
 					ntohs(err_chunk->chunk_hdr->length) -
-					sizeof(sctp_chunkhdr_t));
+					sizeof(struct sctp_chunkhdr));
 
 			sctp_chunk_free(err_chunk);
 
@@ -417,7 +417,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net,
 	len = 0;
 	if (err_chunk)
 		len = ntohs(err_chunk->chunk_hdr->length) -
-			sizeof(sctp_chunkhdr_t);
+		      sizeof(struct sctp_chunkhdr);
 
 	repl = sctp_make_init_ack(new_asoc, chunk, GFP_ATOMIC, len);
 	if (!repl)
@@ -437,7 +437,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net,
 		 */
 		unk_param = (sctp_unrecognized_param_t *)
 			    ((__u8 *)(err_chunk->chunk_hdr) +
-			    sizeof(sctp_chunkhdr_t));
+			    sizeof(struct sctp_chunkhdr));
 		/* Replace the cause code with the "Unrecognized parameter"
 		 * parameter type.
 		 */
@@ -540,9 +540,9 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net,
 		if (err_chunk) {
 			packet = sctp_abort_pkt_new(net, ep, asoc, arg,
 					(__u8 *)(err_chunk->chunk_hdr) +
-					sizeof(sctp_chunkhdr_t),
+					sizeof(struct sctp_chunkhdr),
 					ntohs(err_chunk->chunk_hdr->length) -
-					sizeof(sctp_chunkhdr_t));
+					sizeof(struct sctp_chunkhdr));
 
 			sctp_chunk_free(err_chunk);
 
@@ -673,7 +673,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net,
 	 * chunk header.  More detailed verification is done
 	 * in sctp_unpack_cookie().
 	 */
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* If the endpoint is not listening or if the number of associations
@@ -691,7 +691,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net,
 	chunk->subh.cookie_hdr =
 		(struct sctp_signed_cookie *)chunk->skb->data;
 	if (!pskb_pull(chunk->skb, ntohs(chunk->chunk_hdr->length) -
-					 sizeof(sctp_chunkhdr_t)))
+					 sizeof(struct sctp_chunkhdr)))
 		goto nomem;
 
 	/* 5.1 D) Upon reception of the COOKIE ECHO chunk, Endpoint
@@ -770,9 +770,10 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net,
 		auth.skb = chunk->auth_chunk;
 		auth.asoc = chunk->asoc;
 		auth.sctp_hdr = chunk->sctp_hdr;
-		auth.chunk_hdr = skb_push(chunk->auth_chunk,
-					  sizeof(sctp_chunkhdr_t));
-		skb_pull(chunk->auth_chunk, sizeof(sctp_chunkhdr_t));
+		auth.chunk_hdr = (struct sctp_chunkhdr *)
+					skb_push(chunk->auth_chunk,
+						 sizeof(struct sctp_chunkhdr));
+		skb_pull(chunk->auth_chunk, sizeof(struct sctp_chunkhdr));
 		auth.transport = chunk->transport;
 
 		ret = sctp_sf_authenticate(net, ep, new_asoc, type, &auth);
@@ -886,7 +887,7 @@ sctp_disposition_t sctp_sf_do_5_1E_ca(struct net *net,
 	/* Verify that the chunk length for the COOKIE-ACK is OK.
 	 * If we don't do this, any bundled chunks may be junked.
 	 */
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
@@ -1099,7 +1100,7 @@ sctp_disposition_t sctp_sf_beat_8_3(struct net *net,
 	 */
 	chunk->subh.hb_hdr = (sctp_heartbeathdr_t *) chunk->skb->data;
 	param_hdr = (sctp_paramhdr_t *) chunk->subh.hb_hdr;
-	paylen = ntohs(chunk->chunk_hdr->length) - sizeof(sctp_chunkhdr_t);
+	paylen = ntohs(chunk->chunk_hdr->length) - sizeof(struct sctp_chunkhdr);
 
 	if (ntohs(param_hdr->length) > paylen)
 		return sctp_sf_violation_paramlen(net, ep, asoc, type, arg,
@@ -1164,7 +1165,7 @@ sctp_disposition_t sctp_sf_backbeat_8_3(struct net *net,
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* Make sure that the HEARTBEAT-ACK chunk has a valid length.  */
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t) +
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr) +
 					    sizeof(sctp_sender_hb_info_t)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
@@ -1469,9 +1470,9 @@ static sctp_disposition_t sctp_sf_do_unexpected_init(
 		if (err_chunk) {
 			packet = sctp_abort_pkt_new(net, ep, asoc, arg,
 					(__u8 *)(err_chunk->chunk_hdr) +
-					sizeof(sctp_chunkhdr_t),
+					sizeof(struct sctp_chunkhdr),
 					ntohs(err_chunk->chunk_hdr->length) -
-					sizeof(sctp_chunkhdr_t));
+					sizeof(struct sctp_chunkhdr));
 
 			if (packet) {
 				sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT,
@@ -1535,7 +1536,7 @@ static sctp_disposition_t sctp_sf_do_unexpected_init(
 	len = 0;
 	if (err_chunk) {
 		len = ntohs(err_chunk->chunk_hdr->length) -
-			sizeof(sctp_chunkhdr_t);
+		      sizeof(struct sctp_chunkhdr);
 	}
 
 	repl = sctp_make_init_ack(new_asoc, chunk, GFP_ATOMIC, len);
@@ -1556,7 +1557,7 @@ static sctp_disposition_t sctp_sf_do_unexpected_init(
 		 */
 		unk_param = (sctp_unrecognized_param_t *)
 			    ((__u8 *)(err_chunk->chunk_hdr) +
-			    sizeof(sctp_chunkhdr_t));
+			    sizeof(struct sctp_chunkhdr));
 		/* Replace the cause code with the "Unrecognized parameter"
 		 * parameter type.
 		 */
@@ -2044,7 +2045,7 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(struct net *net,
 	 * enough for the chunk header.  Cookie length verification is
 	 * done later.
 	 */
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
@@ -2053,7 +2054,7 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(struct net *net,
 	 */
 	chunk->subh.cookie_hdr = (struct sctp_signed_cookie *)chunk->skb->data;
 	if (!pskb_pull(chunk->skb, ntohs(chunk->chunk_hdr->length) -
-					sizeof(sctp_chunkhdr_t)))
+					sizeof(struct sctp_chunkhdr)))
 		goto nomem;
 
 	/* In RFC 2960 5.2.4 3, if both Verification Tags in the State Cookie
@@ -2806,7 +2807,7 @@ sctp_disposition_t sctp_sf_do_9_2_reshutack(struct net *net,
 	struct sctp_chunk *reply;
 
 	/* Make sure that the chunk has a valid length */
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
@@ -3358,7 +3359,7 @@ sctp_disposition_t sctp_sf_do_9_2_final(struct net *net,
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* Make sure that the SHUTDOWN_ACK chunk has a valid length. */
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 	/* 10.2 H) SHUTDOWN COMPLETE notification
@@ -3435,7 +3436,7 @@ sctp_disposition_t sctp_sf_ootb(struct net *net,
 {
 	struct sctp_chunk *chunk = arg;
 	struct sk_buff *skb = chunk->skb;
-	sctp_chunkhdr_t *ch;
+	struct sctp_chunkhdr *ch;
 	sctp_errhdr_t *err;
 	__u8 *ch_end;
 	int ootb_shut_ack = 0;
@@ -3443,10 +3444,10 @@ sctp_disposition_t sctp_sf_ootb(struct net *net,
 
 	SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES);
 
-	ch = (sctp_chunkhdr_t *) chunk->chunk_hdr;
+	ch = (struct sctp_chunkhdr *)chunk->chunk_hdr;
 	do {
 		/* Report violation if the chunk is less then minimal */
-		if (ntohs(ch->length) < sizeof(sctp_chunkhdr_t))
+		if (ntohs(ch->length) < sizeof(*ch))
 			return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
@@ -3487,7 +3488,7 @@ sctp_disposition_t sctp_sf_ootb(struct net *net,
 			}
 		}
 
-		ch = (sctp_chunkhdr_t *) ch_end;
+		ch = (struct sctp_chunkhdr *)ch_end;
 	} while (ch_end < skb_tail_pointer(skb));
 
 	if (ootb_shut_ack)
@@ -3560,7 +3561,7 @@ static sctp_disposition_t sctp_sf_shut_8_4_5(struct net *net,
 	/* If the chunk length is invalid, we don't want to process
 	 * the reset of the packet.
 	 */
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* We need to discard the rest of the packet to prevent
@@ -3591,7 +3592,7 @@ sctp_disposition_t sctp_sf_do_8_5_1_E_sa(struct net *net,
 	struct sctp_chunk *chunk = arg;
 
 	/* Make sure that the SHUTDOWN_ACK chunk has a valid length. */
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
@@ -4256,7 +4257,7 @@ sctp_disposition_t sctp_sf_unk_chunk(struct net *net,
 {
 	struct sctp_chunk *unk_chunk = arg;
 	struct sctp_chunk *err_chunk;
-	sctp_chunkhdr_t *hdr;
+	struct sctp_chunkhdr *hdr;
 
 	pr_debug("%s: processing unknown chunk id:%d\n", __func__, type.chunk);
 
@@ -4267,7 +4268,7 @@ sctp_disposition_t sctp_sf_unk_chunk(struct net *net,
 	 * Since we don't know the chunk type, we use a general
 	 * chunkhdr structure to make a comparison.
 	 */
-	if (!sctp_chunk_length_valid(unk_chunk, sizeof(sctp_chunkhdr_t)))
+	if (!sctp_chunk_length_valid(unk_chunk, sizeof(*hdr)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
@@ -4340,7 +4341,7 @@ sctp_disposition_t sctp_sf_discard_chunk(struct net *net,
 	 * Since we don't know the chunk type, we use a general
 	 * chunkhdr structure to make a comparison.
 	 */
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
@@ -4405,7 +4406,7 @@ sctp_disposition_t sctp_sf_violation(struct net *net,
 	struct sctp_chunk *chunk = arg;
 
 	/* Make sure that the chunk has a valid length. */
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index 17854fb0e512..5f86c5062a98 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -158,7 +158,7 @@ struct sctp_ulpevent  *sctp_ulpevent_make_assoc_change(
 		/* Trim the buffer to the right length.  */
 		skb_trim(skb, sizeof(struct sctp_assoc_change) +
 			 ntohs(chunk->chunk_hdr->length) -
-			 sizeof(sctp_chunkhdr_t));
+			 sizeof(struct sctp_chunkhdr));
 	} else {
 		event = sctp_ulpevent_new(sizeof(struct sctp_assoc_change),
 				  MSG_NOTIFICATION, gfp);
-- 
cgit v1.2.3


From 6d85e68f4cde48f8c2fac6d9c00ca6988cf6e327 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Fri, 30 Jun 2017 11:52:14 +0800
Subject: sctp: remove the typedef sctp_cid_t

This patch is to remove the typedef sctp_cid_t, and replace
with struct sctp_cid in the places where it's using this
typedef.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sctp.h         | 4 ++--
 include/net/sctp/auth.h      | 6 ++++--
 include/net/sctp/constants.h | 4 ++--
 include/net/sctp/structs.h   | 2 +-
 net/sctp/auth.c              | 6 +++---
 net/sctp/sm_make_chunk.c     | 4 ++--
 net/sctp/sm_statetable.c     | 4 ++--
 7 files changed, 16 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sctp.h b/include/linux/sctp.h
index 9ad5b9e8df78..6d7b8846c607 100644
--- a/include/linux/sctp.h
+++ b/include/linux/sctp.h
@@ -82,7 +82,7 @@ struct sctp_chunkhdr {
  * Value field. It takes a value from 0 to 254. The value of 255 is
  * reserved for future use as an extension field.
  */
-typedef enum {
+enum sctp_cid {
 	SCTP_CID_DATA			= 0,
         SCTP_CID_INIT			= 1,
         SCTP_CID_INIT_ACK		= 2,
@@ -109,7 +109,7 @@ typedef enum {
 	SCTP_CID_ASCONF			= 0xC1,
 	SCTP_CID_ASCONF_ACK		= 0x80,
 	SCTP_CID_RECONF			= 0x82,
-} sctp_cid_t; /* enum */
+}; /* enum */
 
 
 /* Section 3.2
diff --git a/include/net/sctp/auth.h b/include/net/sctp/auth.h
index 9b9fb122b31f..171244bd856f 100644
--- a/include/net/sctp/auth.h
+++ b/include/net/sctp/auth.h
@@ -97,8 +97,10 @@ void sctp_auth_asoc_set_default_hmac(struct sctp_association *asoc,
 				     struct sctp_hmac_algo_param *hmacs);
 int sctp_auth_asoc_verify_hmac_id(const struct sctp_association *asoc,
 				    __be16 hmac_id);
-int sctp_auth_send_cid(sctp_cid_t chunk, const struct sctp_association *asoc);
-int sctp_auth_recv_cid(sctp_cid_t chunk, const struct sctp_association *asoc);
+int sctp_auth_send_cid(enum sctp_cid chunk,
+		       const struct sctp_association *asoc);
+int sctp_auth_recv_cid(enum sctp_cid chunk,
+		       const struct sctp_association *asoc);
 void sctp_auth_calculate_hmac(const struct sctp_association *asoc,
 			    struct sk_buff *skb,
 			    struct sctp_auth_chunk *auth, gfp_t gfp);
diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
index b07a745ab69f..02e867bc4b43 100644
--- a/include/net/sctp/constants.h
+++ b/include/net/sctp/constants.h
@@ -130,7 +130,7 @@ typedef enum {
  */
 
 typedef union {
-	sctp_cid_t chunk;
+	enum sctp_cid chunk;
 	sctp_event_timeout_t timeout;
 	sctp_event_other_t other;
 	sctp_event_primitive_t primitive;
@@ -141,7 +141,7 @@ static inline sctp_subtype_t	\
 SCTP_ST_## _name (_type _arg)		\
 { sctp_subtype_t _retval; _retval._elt = _arg; return _retval; }
 
-SCTP_SUBTYPE_CONSTRUCTOR(CHUNK,		sctp_cid_t,		chunk)
+SCTP_SUBTYPE_CONSTRUCTOR(CHUNK,		enum sctp_cid,		chunk)
 SCTP_SUBTYPE_CONSTRUCTOR(TIMEOUT,	sctp_event_timeout_t,	timeout)
 SCTP_SUBTYPE_CONSTRUCTOR(OTHER,		sctp_event_other_t,	other)
 SCTP_SUBTYPE_CONSTRUCTOR(PRIMITIVE,	sctp_event_primitive_t,	primitive)
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index e26763bfabd6..9e9605ecb5c9 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1297,7 +1297,7 @@ int sctp_has_association(struct net *net, const union sctp_addr *laddr,
 
 int sctp_verify_init(struct net *net, const struct sctp_endpoint *ep,
 		     const struct sctp_association *asoc,
-		     sctp_cid_t, sctp_init_chunk_t *peer_init,
+		     enum sctp_cid cid, sctp_init_chunk_t *peer_init,
 		     struct sctp_chunk *chunk, struct sctp_chunk **err_chunk);
 int sctp_process_init(struct sctp_association *, struct sctp_chunk *chunk,
 		      const union sctp_addr *peer,
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index f99d4855d3de..7171dd3d6e40 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -632,7 +632,7 @@ void sctp_auth_asoc_set_default_hmac(struct sctp_association *asoc,
 
 
 /* Check to see if the given chunk is supposed to be authenticated */
-static int __sctp_auth_cid(sctp_cid_t chunk, struct sctp_chunks_param *param)
+static int __sctp_auth_cid(enum sctp_cid chunk, struct sctp_chunks_param *param)
 {
 	unsigned short len;
 	int found = 0;
@@ -668,7 +668,7 @@ static int __sctp_auth_cid(sctp_cid_t chunk, struct sctp_chunks_param *param)
 }
 
 /* Check if peer requested that this chunk is authenticated */
-int sctp_auth_send_cid(sctp_cid_t chunk, const struct sctp_association *asoc)
+int sctp_auth_send_cid(enum sctp_cid chunk, const struct sctp_association *asoc)
 {
 	if (!asoc)
 		return 0;
@@ -680,7 +680,7 @@ int sctp_auth_send_cid(sctp_cid_t chunk, const struct sctp_association *asoc)
 }
 
 /* Check if we requested that peer authenticate this chunk. */
-int sctp_auth_recv_cid(sctp_cid_t chunk, const struct sctp_association *asoc)
+int sctp_auth_recv_cid(enum sctp_cid chunk, const struct sctp_association *asoc)
 {
 	if (!asoc)
 		return 0;
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 7d4c5a870f0e..78c3f214d608 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -2123,7 +2123,7 @@ static sctp_ierror_t sctp_verify_param(struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					union sctp_params param,
-					sctp_cid_t cid,
+					enum sctp_cid cid,
 					struct sctp_chunk *chunk,
 					struct sctp_chunk **err_chunk)
 {
@@ -2240,7 +2240,7 @@ fallthrough:
 
 /* Verify the INIT packet before we process it.  */
 int sctp_verify_init(struct net *net, const struct sctp_endpoint *ep,
-		     const struct sctp_association *asoc, sctp_cid_t cid,
+		     const struct sctp_association *asoc, enum sctp_cid cid,
 		     sctp_init_chunk_t *peer_init, struct sctp_chunk *chunk,
 		     struct sctp_chunk **errp)
 {
diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c
index 419b18ebb056..3e958c1c4b95 100644
--- a/net/sctp/sm_statetable.c
+++ b/net/sctp/sm_statetable.c
@@ -53,7 +53,7 @@ static const sctp_sm_table_entry_t
 timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][SCTP_STATE_NUM_STATES];
 
 static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(struct net *net,
-							    sctp_cid_t cid,
+							    enum sctp_cid cid,
 							    sctp_state_t state);
 
 
@@ -968,7 +968,7 @@ static const sctp_sm_table_entry_t timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][S
 };
 
 static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(struct net *net,
-							    sctp_cid_t cid,
+							    enum sctp_cid cid,
 							    sctp_state_t state)
 {
 	if (state > SCTP_STATE_MAX)
-- 
cgit v1.2.3


From ec431c2cd55c4122e729b7dc45956653a038614b Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Fri, 30 Jun 2017 11:52:15 +0800
Subject: sctp: remove the typedef sctp_cid_action_t

Remove this typedef, there is even no places using it.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sctp.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sctp.h b/include/linux/sctp.h
index 6d7b8846c607..ffdccb4da7e5 100644
--- a/include/linux/sctp.h
+++ b/include/linux/sctp.h
@@ -117,12 +117,12 @@ enum sctp_cid {
  *  the action that must be taken if the processing endpoint does not
  *  recognize the Chunk Type.
  */
-typedef enum {
+enum {
 	SCTP_CID_ACTION_DISCARD     = 0x00,
 	SCTP_CID_ACTION_DISCARD_ERR = 0x40,
 	SCTP_CID_ACTION_SKIP        = 0x80,
 	SCTP_CID_ACTION_SKIP_ERR    = 0xc0,
-} sctp_cid_action_t;
+};
 
 enum { SCTP_CID_ACTION_MASK = 0xc0, };
 
-- 
cgit v1.2.3


From 3c9187049214127d3401926b033d05eb75d69c39 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Fri, 30 Jun 2017 11:52:16 +0800
Subject: sctp: remove the typedef sctp_paramhdr_t

This patch is to remove the typedef sctp_paramhdr_t, and replace
with struct sctp_paramhdr in the places where it's using this
typedef.

It is also to fix some indents and  use sizeof(variable) instead
of sizeof(type).

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sctp.h       | 44 +++++++++++++++++++++----------------------
 include/net/sctp/sctp.h    |  2 +-
 include/net/sctp/structs.h |  5 +++--
 net/sctp/associola.c       |  6 +++---
 net/sctp/auth.c            | 22 +++++++++++++---------
 net/sctp/endpointola.c     |  7 ++++---
 net/sctp/sm_make_chunk.c   | 47 +++++++++++++++++++++++-----------------------
 net/sctp/sm_statefuns.c    |  6 +++---
 net/sctp/socket.c          |  7 ++++---
 net/sctp/stream.c          |  4 ++--
 10 files changed, 79 insertions(+), 71 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sctp.h b/include/linux/sctp.h
index ffdccb4da7e5..142bb6aa88eb 100644
--- a/include/linux/sctp.h
+++ b/include/linux/sctp.h
@@ -162,10 +162,10 @@ enum { SCTP_CHUNK_FLAG_T = 0x01 };
  * Section 3.2.1 Optional/Variable-length Parmaeter Format.
  */
 
-typedef struct sctp_paramhdr {
+struct sctp_paramhdr {
 	__be16 type;
 	__be16 length;
-} sctp_paramhdr_t;
+};
 
 typedef enum {
 
@@ -274,37 +274,37 @@ typedef struct sctp_init_chunk {
 
 /* Section 3.3.2.1. IPv4 Address Parameter (5) */
 typedef struct sctp_ipv4addr_param {
-	sctp_paramhdr_t param_hdr;
+	struct sctp_paramhdr param_hdr;
 	struct in_addr  addr;
 } sctp_ipv4addr_param_t;
 
 /* Section 3.3.2.1. IPv6 Address Parameter (6) */
 typedef struct sctp_ipv6addr_param {
-	sctp_paramhdr_t param_hdr;
+	struct sctp_paramhdr param_hdr;
 	struct in6_addr addr;
 } sctp_ipv6addr_param_t;
 
 /* Section 3.3.2.1 Cookie Preservative (9) */
 typedef struct sctp_cookie_preserve_param {
-	sctp_paramhdr_t param_hdr;
+	struct sctp_paramhdr param_hdr;
 	__be32          lifespan_increment;
 } sctp_cookie_preserve_param_t;
 
 /* Section 3.3.2.1 Host Name Address (11) */
 typedef struct sctp_hostname_param {
-	sctp_paramhdr_t param_hdr;
+	struct sctp_paramhdr param_hdr;
 	uint8_t hostname[0];
 } sctp_hostname_param_t;
 
 /* Section 3.3.2.1 Supported Address Types (12) */
 typedef struct sctp_supported_addrs_param {
-	sctp_paramhdr_t param_hdr;
+	struct sctp_paramhdr param_hdr;
 	__be16 types[0];
 } sctp_supported_addrs_param_t;
 
 /* Appendix A. ECN Capable (32768) */
 typedef struct sctp_ecn_capable_param {
-	sctp_paramhdr_t param_hdr;
+	struct sctp_paramhdr param_hdr;
 } sctp_ecn_capable_param_t;
 
 /* ADDIP Section 3.2.6 Adaptation Layer Indication */
@@ -321,19 +321,19 @@ typedef struct sctp_supported_ext_param {
 
 /* AUTH Section 3.1 Random */
 typedef struct sctp_random_param {
-	sctp_paramhdr_t param_hdr;
+	struct sctp_paramhdr param_hdr;
 	__u8 random_val[0];
 } sctp_random_param_t;
 
 /* AUTH Section 3.2 Chunk List */
 typedef struct sctp_chunks_param {
-	sctp_paramhdr_t param_hdr;
+	struct sctp_paramhdr param_hdr;
 	__u8 chunks[0];
 } sctp_chunks_param_t;
 
 /* AUTH Section 3.3 HMAC Algorithm */
 typedef struct sctp_hmac_algo_param {
-	sctp_paramhdr_t param_hdr;
+	struct sctp_paramhdr param_hdr;
 	__be16 hmac_ids[0];
 } sctp_hmac_algo_param_t;
 
@@ -345,14 +345,14 @@ typedef sctp_init_chunk_t sctp_initack_chunk_t;
 
 /* Section 3.3.3.1 State Cookie (7) */
 typedef struct sctp_cookie_param {
-	sctp_paramhdr_t p;
+	struct sctp_paramhdr p;
 	__u8 body[0];
 } sctp_cookie_param_t;
 
 /* Section 3.3.3.1 Unrecognized Parameters (8) */
 typedef struct sctp_unrecognized_param {
-	sctp_paramhdr_t param_hdr;
-	sctp_paramhdr_t unrecognized;
+	struct sctp_paramhdr param_hdr;
+	struct sctp_paramhdr unrecognized;
 } sctp_unrecognized_param_t;
 
 
@@ -399,7 +399,7 @@ typedef struct sctp_sack_chunk {
  */
 
 typedef struct sctp_heartbeathdr {
-	sctp_paramhdr_t info;
+	struct sctp_paramhdr info;
 } sctp_heartbeathdr_t;
 
 typedef struct sctp_heartbeat_chunk {
@@ -639,7 +639,7 @@ struct sctp_fwdtsn_chunk {
  *	report status of ASCONF processing.
  */
 typedef struct sctp_addip_param {
-	sctp_paramhdr_t	param_hdr;
+	struct sctp_paramhdr	param_hdr;
 	__be32		crr_id;
 } sctp_addip_param_t;
 
@@ -724,7 +724,7 @@ struct sctp_reconf_chunk {
 };
 
 struct sctp_strreset_outreq {
-	sctp_paramhdr_t param_hdr;
+	struct sctp_paramhdr param_hdr;
 	__u32 request_seq;
 	__u32 response_seq;
 	__u32 send_reset_at_tsn;
@@ -732,18 +732,18 @@ struct sctp_strreset_outreq {
 };
 
 struct sctp_strreset_inreq {
-	sctp_paramhdr_t param_hdr;
+	struct sctp_paramhdr param_hdr;
 	__u32 request_seq;
 	__u16 list_of_streams[0];
 };
 
 struct sctp_strreset_tsnreq {
-	sctp_paramhdr_t param_hdr;
+	struct sctp_paramhdr param_hdr;
 	__u32 request_seq;
 };
 
 struct sctp_strreset_addstrm {
-	sctp_paramhdr_t param_hdr;
+	struct sctp_paramhdr param_hdr;
 	__u32 request_seq;
 	__u16 number_of_streams;
 	__u16 reserved;
@@ -760,13 +760,13 @@ enum {
 };
 
 struct sctp_strreset_resp {
-	sctp_paramhdr_t param_hdr;
+	struct sctp_paramhdr param_hdr;
 	__u32 response_seq;
 	__u32 result;
 };
 
 struct sctp_strreset_resptsn {
-	sctp_paramhdr_t param_hdr;
+	struct sctp_paramhdr param_hdr;
 	__u32 response_seq;
 	__u32 result;
 	__u32 senders_next_tsn;
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index d756bd095683..a9519a06a23b 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -470,7 +470,7 @@ _sctp_walk_params((pos), (chunk), ntohs((chunk)->chunk_hdr.length), member)
 #define _sctp_walk_params(pos, chunk, end, member)\
 for (pos.v = chunk->member;\
      pos.v <= (void *)chunk + end - ntohs(pos.p->length) &&\
-     ntohs(pos.p->length) >= sizeof(sctp_paramhdr_t);\
+     ntohs(pos.p->length) >= sizeof(struct sctp_paramhdr);\
      pos.v += SCTP_PAD4(ntohs(pos.p->length)))
 
 #define sctp_walk_errors(err, chunk_hdr)\
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 9e9605ecb5c9..2393d2ee95c1 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -310,9 +310,10 @@ struct sctp_cookie {
 
 	__u32 adaptation_ind;
 
-	__u8 auth_random[sizeof(sctp_paramhdr_t) + SCTP_AUTH_RANDOM_LENGTH];
+	__u8 auth_random[sizeof(struct sctp_paramhdr) +
+			 SCTP_AUTH_RANDOM_LENGTH];
 	__u8 auth_hmacs[SCTP_AUTH_NUM_HMACS * sizeof(__u16) + 2];
-	__u8 auth_chunks[sizeof(sctp_paramhdr_t) + SCTP_AUTH_MAX_CHUNKS];
+	__u8 auth_chunks[sizeof(struct sctp_paramhdr) + SCTP_AUTH_MAX_CHUNKS];
 
 	/* This is a shim for my peer's INIT packet, followed by
 	 * a copy of the raw address list of the association.
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 757be416f778..fa4f530ab7e1 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -71,7 +71,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
 {
 	struct net *net = sock_net(sk);
 	struct sctp_sock *sp;
-	sctp_paramhdr_t *p;
+	struct sctp_paramhdr *p;
 	int i;
 
 	/* Retrieve the SCTP per socket area.  */
@@ -284,9 +284,9 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
 			ntohs(ep->auth_chunk_list->param_hdr.length));
 
 	/* Get the AUTH random number for this association */
-	p = (sctp_paramhdr_t *)asoc->c.auth_random;
+	p = (struct sctp_paramhdr *)asoc->c.auth_random;
 	p->type = SCTP_PARAM_RANDOM;
-	p->length = htons(sizeof(sctp_paramhdr_t) + SCTP_AUTH_RANDOM_LENGTH);
+	p->length = htons(sizeof(*p) + SCTP_AUTH_RANDOM_LENGTH);
 	get_random_bytes(p+1, SCTP_AUTH_RANDOM_LENGTH);
 
 	return asoc;
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index 7171dd3d6e40..8ffa5985cd6e 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -538,7 +538,8 @@ struct sctp_hmac *sctp_auth_asoc_get_hmac(const struct sctp_association *asoc)
 	if (!hmacs)
 		return NULL;
 
-	n_elt = (ntohs(hmacs->param_hdr.length) - sizeof(sctp_paramhdr_t)) >> 1;
+	n_elt = (ntohs(hmacs->param_hdr.length) -
+		 sizeof(struct sctp_paramhdr)) >> 1;
 	for (i = 0; i < n_elt; i++) {
 		id = ntohs(hmacs->hmac_ids[i]);
 
@@ -589,7 +590,8 @@ int sctp_auth_asoc_verify_hmac_id(const struct sctp_association *asoc,
 		return 0;
 
 	hmacs = (struct sctp_hmac_algo_param *)asoc->c.auth_hmacs;
-	n_elt = (ntohs(hmacs->param_hdr.length) - sizeof(sctp_paramhdr_t)) >> 1;
+	n_elt = (ntohs(hmacs->param_hdr.length) -
+		 sizeof(struct sctp_paramhdr)) >> 1;
 
 	return __sctp_auth_find_hmacid(hmacs->hmac_ids, n_elt, hmac_id);
 }
@@ -612,8 +614,8 @@ void sctp_auth_asoc_set_default_hmac(struct sctp_association *asoc,
 	if (asoc->default_hmac_id)
 		return;
 
-	n_params = (ntohs(hmacs->param_hdr.length)
-				- sizeof(sctp_paramhdr_t)) >> 1;
+	n_params = (ntohs(hmacs->param_hdr.length) -
+		    sizeof(struct sctp_paramhdr)) >> 1;
 	ep = asoc->ep;
 	for (i = 0; i < n_params; i++) {
 		id = ntohs(hmacs->hmac_ids[i]);
@@ -641,7 +643,7 @@ static int __sctp_auth_cid(enum sctp_cid chunk, struct sctp_chunks_param *param)
 	if (!param || param->param_hdr.length == 0)
 		return 0;
 
-	len = ntohs(param->param_hdr.length) - sizeof(sctp_paramhdr_t);
+	len = ntohs(param->param_hdr.length) - sizeof(struct sctp_paramhdr);
 
 	/* SCTP-AUTH, Section 3.2
 	 *    The chunk types for INIT, INIT-ACK, SHUTDOWN-COMPLETE and AUTH
@@ -775,7 +777,7 @@ int sctp_auth_ep_add_chunkid(struct sctp_endpoint *ep, __u8 chunk_id)
 
 	/* Check if we can add this chunk to the array */
 	param_len = ntohs(p->param_hdr.length);
-	nchunks = param_len - sizeof(sctp_paramhdr_t);
+	nchunks = param_len - sizeof(struct sctp_paramhdr);
 	if (nchunks == SCTP_NUM_CHUNK_TYPES)
 		return -EINVAL;
 
@@ -812,9 +814,11 @@ int sctp_auth_ep_set_hmacs(struct sctp_endpoint *ep,
 		return -EINVAL;
 
 	for (i = 0; i < hmacs->shmac_num_idents; i++)
-		ep->auth_hmacs_list->hmac_ids[i] = htons(hmacs->shmac_idents[i]);
-	ep->auth_hmacs_list->param_hdr.length = htons(sizeof(sctp_paramhdr_t) +
-				hmacs->shmac_num_idents * sizeof(__u16));
+		ep->auth_hmacs_list->hmac_ids[i] =
+				htons(hmacs->shmac_idents[i]);
+	ep->auth_hmacs_list->param_hdr.length =
+			htons(sizeof(struct sctp_paramhdr) +
+			hmacs->shmac_num_idents * sizeof(__u16));
 	return 0;
 }
 
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 7772ca40ddaf..efbc31877804 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -90,12 +90,13 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
 		 */
 		auth_hmacs->param_hdr.type = SCTP_PARAM_HMAC_ALGO;
 		auth_hmacs->param_hdr.length =
-					htons(sizeof(sctp_paramhdr_t) + 2);
+					htons(sizeof(struct sctp_paramhdr) + 2);
 		auth_hmacs->hmac_ids[0] = htons(SCTP_AUTH_HMAC_ID_SHA1);
 
 		/* Initialize the CHUNKS parameter */
 		auth_chunks->param_hdr.type = SCTP_PARAM_CHUNKS;
-		auth_chunks->param_hdr.length = htons(sizeof(sctp_paramhdr_t));
+		auth_chunks->param_hdr.length =
+					htons(sizeof(struct sctp_paramhdr));
 
 		/* If the Add-IP functionality is enabled, we must
 		 * authenticate, ASCONF and ASCONF-ACK chunks
@@ -104,7 +105,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
 			auth_chunks->chunks[0] = SCTP_CID_ASCONF;
 			auth_chunks->chunks[1] = SCTP_CID_ASCONF_ACK;
 			auth_chunks->param_hdr.length =
-					htons(sizeof(sctp_paramhdr_t) + 2);
+					htons(sizeof(struct sctp_paramhdr) + 2);
 		}
 	}
 
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 78c3f214d608..9f9d40c9a32a 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -229,7 +229,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
 	sctp_supported_ext_param_t ext_param;
 	int num_ext = 0;
 	__u8 extensions[3];
-	sctp_paramhdr_t *auth_chunks = NULL,
+	struct sctp_paramhdr *auth_chunks = NULL,
 			*auth_hmacs = NULL;
 
 	/* RFC 2960 3.3.2 Initiation (INIT) (1)
@@ -286,14 +286,14 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
 		chunksize += sizeof(asoc->c.auth_random);
 
 		/* Add HMACS parameter length if any were defined */
-		auth_hmacs = (sctp_paramhdr_t *)asoc->c.auth_hmacs;
+		auth_hmacs = (struct sctp_paramhdr *)asoc->c.auth_hmacs;
 		if (auth_hmacs->length)
 			chunksize += SCTP_PAD4(ntohs(auth_hmacs->length));
 		else
 			auth_hmacs = NULL;
 
 		/* Add CHUNKS parameter length */
-		auth_chunks = (sctp_paramhdr_t *)asoc->c.auth_chunks;
+		auth_chunks = (struct sctp_paramhdr *)asoc->c.auth_chunks;
 		if (auth_chunks->length)
 			chunksize += SCTP_PAD4(ntohs(auth_chunks->length));
 		else
@@ -397,7 +397,7 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
 	sctp_supported_ext_param_t ext_param;
 	int num_ext = 0;
 	__u8 extensions[3];
-	sctp_paramhdr_t *auth_chunks = NULL,
+	struct sctp_paramhdr *auth_chunks = NULL,
 			*auth_hmacs = NULL,
 			*auth_random = NULL;
 
@@ -448,16 +448,16 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
 		chunksize += sizeof(aiparam);
 
 	if (asoc->peer.auth_capable) {
-		auth_random = (sctp_paramhdr_t *)asoc->c.auth_random;
+		auth_random = (struct sctp_paramhdr *)asoc->c.auth_random;
 		chunksize += ntohs(auth_random->length);
 
-		auth_hmacs = (sctp_paramhdr_t *)asoc->c.auth_hmacs;
+		auth_hmacs = (struct sctp_paramhdr *)asoc->c.auth_hmacs;
 		if (auth_hmacs->length)
 			chunksize += SCTP_PAD4(ntohs(auth_hmacs->length));
 		else
 			auth_hmacs = NULL;
 
-		auth_chunks = (sctp_paramhdr_t *)asoc->c.auth_chunks;
+		auth_chunks = (struct sctp_paramhdr *)asoc->c.auth_chunks;
 		if (auth_chunks->length)
 			chunksize += SCTP_PAD4(ntohs(auth_chunks->length));
 		else
@@ -1085,18 +1085,18 @@ struct sctp_chunk *sctp_make_abort_violation(
 	struct sctp_chunk  *retval;
 	struct sctp_paramhdr phdr;
 
-	retval = sctp_make_abort(asoc, chunk, sizeof(sctp_errhdr_t) + paylen
-					+ sizeof(sctp_paramhdr_t));
+	retval = sctp_make_abort(asoc, chunk, sizeof(sctp_errhdr_t) + paylen +
+					      sizeof(phdr));
 	if (!retval)
 		goto end;
 
-	sctp_init_cause(retval, SCTP_ERROR_PROTO_VIOLATION, paylen
-					+ sizeof(sctp_paramhdr_t));
+	sctp_init_cause(retval, SCTP_ERROR_PROTO_VIOLATION, paylen +
+							    sizeof(phdr));
 
 	phdr.type = htons(chunk->chunk_hdr->type);
 	phdr.length = chunk->chunk_hdr->length;
 	sctp_addto_chunk(retval, paylen, payload);
-	sctp_addto_param(retval, sizeof(sctp_paramhdr_t), &phdr);
+	sctp_addto_param(retval, sizeof(phdr), &phdr);
 
 end:
 	return retval;
@@ -1110,16 +1110,16 @@ struct sctp_chunk *sctp_make_violation_paramlen(
 	struct sctp_chunk *retval;
 	static const char error[] = "The following parameter had invalid length:";
 	size_t payload_len = sizeof(error) + sizeof(sctp_errhdr_t) +
-				sizeof(sctp_paramhdr_t);
+			     sizeof(*param);
 
 	retval = sctp_make_abort(asoc, chunk, payload_len);
 	if (!retval)
 		goto nodata;
 
 	sctp_init_cause(retval, SCTP_ERROR_PROTO_VIOLATION,
-			sizeof(error) + sizeof(sctp_paramhdr_t));
+			sizeof(error) + sizeof(*param));
 	sctp_addto_chunk(retval, sizeof(error), error);
-	sctp_addto_param(retval, sizeof(sctp_paramhdr_t), param);
+	sctp_addto_param(retval, sizeof(*param), param);
 
 nodata:
 	return retval;
@@ -1614,7 +1614,7 @@ static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep,
 	/* Header size is static data prior to the actual cookie, including
 	 * any padding.
 	 */
-	headersize = sizeof(sctp_paramhdr_t) +
+	headersize = sizeof(struct sctp_paramhdr) +
 		     (sizeof(struct sctp_signed_cookie) -
 		      sizeof(struct sctp_cookie));
 	bodysize = sizeof(struct sctp_cookie)
@@ -1975,7 +1975,7 @@ static int sctp_process_hn_param(const struct sctp_association *asoc,
 
 static int sctp_verify_ext_param(struct net *net, union sctp_params param)
 {
-	__u16 num_ext = ntohs(param.p->length) - sizeof(sctp_paramhdr_t);
+	__u16 num_ext = ntohs(param.p->length) - sizeof(struct sctp_paramhdr);
 	int have_auth = 0;
 	int have_asconf = 0;
 	int i;
@@ -2010,7 +2010,7 @@ static void sctp_process_ext_param(struct sctp_association *asoc,
 				    union sctp_params param)
 {
 	struct net *net = sock_net(asoc->base.sk);
-	__u16 num_ext = ntohs(param.p->length) - sizeof(sctp_paramhdr_t);
+	__u16 num_ext = ntohs(param.p->length) - sizeof(struct sctp_paramhdr);
 	int i;
 
 	for (i = 0; i < num_ext; i++) {
@@ -2180,7 +2180,7 @@ static sctp_ierror_t sctp_verify_param(struct net *net,
 		 * cause 'Protocol Violation'.
 		 */
 		if (SCTP_AUTH_RANDOM_LENGTH !=
-			ntohs(param.p->length) - sizeof(sctp_paramhdr_t)) {
+			ntohs(param.p->length) - sizeof(struct sctp_paramhdr)) {
 			sctp_process_inv_paramlength(asoc, param.p,
 							chunk, err_chunk);
 			retval = SCTP_IERROR_ABORT;
@@ -2208,7 +2208,8 @@ static sctp_ierror_t sctp_verify_param(struct net *net,
 			goto fallthrough;
 
 		hmacs = (struct sctp_hmac_algo_param *)param.p;
-		n_elt = (ntohs(param.p->length) - sizeof(sctp_paramhdr_t)) >> 1;
+		n_elt = (ntohs(param.p->length) -
+			 sizeof(struct sctp_paramhdr)) >> 1;
 
 		/* SCTP-AUTH: Section 6.1
 		 * The HMAC algorithm based on SHA-1 MUST be supported and
@@ -2565,7 +2566,7 @@ do_addr_param:
 			asoc->peer.ipv4_address = 1;
 
 		/* Cycle through address types; avoid divide by 0. */
-		sat = ntohs(param.p->length) - sizeof(sctp_paramhdr_t);
+		sat = ntohs(param.p->length) - sizeof(struct sctp_paramhdr);
 		if (sat)
 			sat /= sizeof(__u16);
 
@@ -2592,7 +2593,7 @@ do_addr_param:
 
 	case SCTP_PARAM_STATE_COOKIE:
 		asoc->peer.cookie_len =
-			ntohs(param.p->length) - sizeof(sctp_paramhdr_t);
+			ntohs(param.p->length) - sizeof(struct sctp_paramhdr);
 		asoc->peer.cookie = param.cookie->body;
 		break;
 
@@ -3176,7 +3177,7 @@ bool sctp_verify_asconf(const struct sctp_association *asoc,
 				return false;
 			length = ntohs(param.addip->param_hdr.length);
 			if (length < sizeof(sctp_addip_param_t) +
-				     sizeof(sctp_paramhdr_t))
+				     sizeof(**errp))
 				return false;
 			break;
 		case SCTP_PARAM_SUCCESS_REPORT:
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 2b7c07f19b08..0a01c6858b0d 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -1081,7 +1081,7 @@ sctp_disposition_t sctp_sf_beat_8_3(struct net *net,
 				    void *arg,
 				    sctp_cmd_seq_t *commands)
 {
-	sctp_paramhdr_t *param_hdr;
+	struct sctp_paramhdr *param_hdr;
 	struct sctp_chunk *chunk = arg;
 	struct sctp_chunk *reply;
 	size_t paylen = 0;
@@ -1098,8 +1098,8 @@ sctp_disposition_t sctp_sf_beat_8_3(struct net *net,
 	 * respond with a HEARTBEAT ACK that contains the Heartbeat
 	 * Information field copied from the received HEARTBEAT chunk.
 	 */
-	chunk->subh.hb_hdr = (sctp_heartbeathdr_t *) chunk->skb->data;
-	param_hdr = (sctp_paramhdr_t *) chunk->subh.hb_hdr;
+	chunk->subh.hb_hdr = (sctp_heartbeathdr_t *)chunk->skb->data;
+	param_hdr = (struct sctp_paramhdr *)chunk->subh.hb_hdr;
 	paylen = ntohs(chunk->chunk_hdr->length) - sizeof(struct sctp_chunkhdr);
 
 	if (ntohs(param_hdr->length) > paylen)
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 15401d09efc4..0af103f85c79 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -6033,7 +6033,8 @@ static int sctp_getsockopt_hmac_ident(struct sock *sk, int len,
 		return -EACCES;
 
 	hmacs = ep->auth_hmacs_list;
-	data_len = ntohs(hmacs->param_hdr.length) - sizeof(sctp_paramhdr_t);
+	data_len = ntohs(hmacs->param_hdr.length) -
+		   sizeof(struct sctp_paramhdr);
 
 	if (len < sizeof(struct sctp_hmacalgo) + data_len)
 		return -EINVAL;
@@ -6117,7 +6118,7 @@ static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len,
 		goto num;
 
 	/* See if the user provided enough room for all the data */
-	num_chunks = ntohs(ch->param_hdr.length) - sizeof(sctp_paramhdr_t);
+	num_chunks = ntohs(ch->param_hdr.length) - sizeof(struct sctp_paramhdr);
 	if (len < num_chunks)
 		return -EINVAL;
 
@@ -6165,7 +6166,7 @@ static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len,
 	if (!ch)
 		goto num;
 
-	num_chunks = ntohs(ch->param_hdr.length) - sizeof(sctp_paramhdr_t);
+	num_chunks = ntohs(ch->param_hdr.length) - sizeof(struct sctp_paramhdr);
 	if (len < sizeof(struct sctp_authchunks) + num_chunks)
 		return -EINVAL;
 
diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index 82e6d40052a8..63ea15503714 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -304,7 +304,7 @@ out:
 	return retval;
 }
 
-static sctp_paramhdr_t *sctp_chunk_lookup_strreset_param(
+static struct sctp_paramhdr *sctp_chunk_lookup_strreset_param(
 			struct sctp_association *asoc, __u32 resp_seq,
 			__be16 type)
 {
@@ -749,7 +749,7 @@ struct sctp_chunk *sctp_process_strreset_resp(
 	struct sctp_strreset_resp *resp = param.v;
 	struct sctp_transport *t;
 	__u16 i, nums, flags = 0;
-	sctp_paramhdr_t *req;
+	struct sctp_paramhdr *req;
 	__u32 result;
 
 	req = sctp_chunk_lookup_strreset_param(asoc, resp->response_seq, 0);
-- 
cgit v1.2.3


From 34b4e29b383559e3848eea30af66e94aa72af88c Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Fri, 30 Jun 2017 11:52:17 +0800
Subject: sctp: remove the typedef sctp_param_t

This patch is to remove the typedef sctp_param_t, and replace with
struct sctp_paramhdr in the places where it's using this typedef.

It is also to remove the useless declaration sctp_addip_addr_config
and fix the lack of params for some other functions' declaration.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sctp.h     |  4 ++--
 include/net/sctp/sm.h    | 14 ++++++--------
 net/sctp/sm_make_chunk.c |  2 +-
 3 files changed, 9 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sctp.h b/include/linux/sctp.h
index 142bb6aa88eb..5eecc0f14650 100644
--- a/include/linux/sctp.h
+++ b/include/linux/sctp.h
@@ -167,7 +167,7 @@ struct sctp_paramhdr {
 	__be16 length;
 };
 
-typedef enum {
+enum sctp_param {
 
 	/* RFC 2960 Section 3.3.5 */
 	SCTP_PARAM_HEARTBEAT_INFO		= cpu_to_be16(1),
@@ -207,7 +207,7 @@ typedef enum {
 	SCTP_PARAM_RESET_RESPONSE		= cpu_to_be16(0x0010),
 	SCTP_PARAM_RESET_ADD_OUT_STREAMS	= cpu_to_be16(0x0011),
 	SCTP_PARAM_RESET_ADD_IN_STREAMS		= cpu_to_be16(0x0012),
-} sctp_param_t; /* enum */
+}; /* enum */
 
 
 /* RFC 2960 Section 3.2.1
diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h
index 47113f2c4b0a..245eb22230a2 100644
--- a/include/net/sctp/sm.h
+++ b/include/net/sctp/sm.h
@@ -325,19 +325,17 @@ void sctp_generate_heartbeat_event(unsigned long peer);
 void sctp_generate_reconf_event(unsigned long peer);
 void sctp_generate_proto_unreach_event(unsigned long peer);
 
-void sctp_ootb_pkt_free(struct sctp_packet *);
+void sctp_ootb_pkt_free(struct sctp_packet *packet);
 
-struct sctp_association *sctp_unpack_cookie(const struct sctp_endpoint *,
-				       const struct sctp_association *,
-				       struct sctp_chunk *,
+struct sctp_association *sctp_unpack_cookie(const struct sctp_endpoint *ep,
+				       const struct sctp_association *asoc,
+				       struct sctp_chunk *chunk,
 				       gfp_t gfp, int *err,
 				       struct sctp_chunk **err_chk_p);
-int sctp_addip_addr_config(struct sctp_association *, sctp_param_t,
-			   struct sockaddr_storage*, int);
 
 /* 3rd level prototypes */
-__u32 sctp_generate_tag(const struct sctp_endpoint *);
-__u32 sctp_generate_tsn(const struct sctp_endpoint *);
+__u32 sctp_generate_tag(const struct sctp_endpoint *ep);
+__u32 sctp_generate_tsn(const struct sctp_endpoint *ep);
 
 /* Extern declarations for major data structures.  */
 extern sctp_timer_event_t *sctp_timer_events[SCTP_NUM_TIMEOUT_TYPES];
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 9f9d40c9a32a..3ed2108d1989 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1882,7 +1882,7 @@ struct __sctp_missing {
  * Report a missing mandatory parameter.
  */
 static int sctp_process_missing_param(const struct sctp_association *asoc,
-				      sctp_param_t paramtype,
+				      enum sctp_param paramtype,
 				      struct sctp_chunk *chunk,
 				      struct sctp_chunk **errp)
 {
-- 
cgit v1.2.3


From 0664ed4378907c936fbee811efe95650d32baf34 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Fri, 30 Jun 2017 11:52:18 +0800
Subject: sctp: remove the typedef sctp_param_action_t

Remove this typedef, there is even no places using it.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sctp.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sctp.h b/include/linux/sctp.h
index 5eecc0f14650..d5c0ddadb68b 100644
--- a/include/linux/sctp.h
+++ b/include/linux/sctp.h
@@ -216,12 +216,12 @@ enum sctp_param {
  *  not recognize the Parameter Type.
  *
  */
-typedef enum {
+enum {
 	SCTP_PARAM_ACTION_DISCARD     = cpu_to_be16(0x0000),
 	SCTP_PARAM_ACTION_DISCARD_ERR = cpu_to_be16(0x4000),
 	SCTP_PARAM_ACTION_SKIP        = cpu_to_be16(0x8000),
 	SCTP_PARAM_ACTION_SKIP_ERR    = cpu_to_be16(0xc000),
-} sctp_param_action_t;
+};
 
 enum { SCTP_PARAM_ACTION_MASK = cpu_to_be16(0xc000), };
 
-- 
cgit v1.2.3


From 3583df1a3d7328b42cf116db3fb56b0368fab12b Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Fri, 30 Jun 2017 11:52:19 +0800
Subject: sctp: remove the typedef sctp_datahdr_t

This patch is to remove the typedef sctp_datahdr_t, and replace with
struct sctp_datahdr in the places where it's using this typedef.

It is also to use izeof(variable) instead of sizeof(type).

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sctp.h    |  6 +++---
 net/sctp/sm_statefuns.c | 13 ++++++++-----
 2 files changed, 11 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sctp.h b/include/linux/sctp.h
index d5c0ddadb68b..55d84c143122 100644
--- a/include/linux/sctp.h
+++ b/include/linux/sctp.h
@@ -227,17 +227,17 @@ enum { SCTP_PARAM_ACTION_MASK = cpu_to_be16(0xc000), };
 
 /* RFC 2960 Section 3.3.1 Payload Data (DATA) (0) */
 
-typedef struct sctp_datahdr {
+struct sctp_datahdr {
 	__be32 tsn;
 	__be16 stream;
 	__be16 ssn;
 	__be32 ppid;
 	__u8  payload[0];
-} sctp_datahdr_t;
+};
 
 typedef struct sctp_data_chunk {
 	struct sctp_chunkhdr chunk_hdr;
-	sctp_datahdr_t  data_hdr;
+	struct sctp_datahdr data_hdr;
 } sctp_data_chunk_t;
 
 /* DATA Chuck Specific Flags */
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 0a01c6858b0d..1ba9a9b04466 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -3010,7 +3010,8 @@ sctp_disposition_t sctp_sf_eat_data_6_2(struct net *net,
 		return SCTP_DISPOSITION_ABORT;
 	case SCTP_IERROR_PROTO_VIOLATION:
 		return sctp_sf_abort_violation(net, ep, asoc, chunk, commands,
-			(u8 *)chunk->subh.data_hdr, sizeof(sctp_datahdr_t));
+					       (u8 *)chunk->subh.data_hdr,
+					       sizeof(struct sctp_datahdr));
 	default:
 		BUG();
 	}
@@ -3124,7 +3125,8 @@ sctp_disposition_t sctp_sf_eat_data_fast_4_4(struct net *net,
 		return SCTP_DISPOSITION_ABORT;
 	case SCTP_IERROR_PROTO_VIOLATION:
 		return sctp_sf_abort_violation(net, ep, asoc, chunk, commands,
-			(u8 *)chunk->subh.data_hdr, sizeof(sctp_datahdr_t));
+					       (u8 *)chunk->subh.data_hdr,
+					       sizeof(struct sctp_datahdr));
 	default:
 		BUG();
 	}
@@ -6197,7 +6199,7 @@ static int sctp_eat_data(const struct sctp_association *asoc,
 			 struct sctp_chunk *chunk,
 			 sctp_cmd_seq_t *commands)
 {
-	sctp_datahdr_t *data_hdr;
+	struct sctp_datahdr *data_hdr;
 	struct sctp_chunk *err;
 	size_t datalen;
 	sctp_verb_t deliver;
@@ -6210,8 +6212,9 @@ static int sctp_eat_data(const struct sctp_association *asoc,
 	u16 sid;
 	u8 ordered = 0;
 
-	data_hdr = chunk->subh.data_hdr = (sctp_datahdr_t *)chunk->skb->data;
-	skb_pull(chunk->skb, sizeof(sctp_datahdr_t));
+	data_hdr = (struct sctp_datahdr *)chunk->skb->data;
+	chunk->subh.data_hdr = data_hdr;
+	skb_pull(chunk->skb, sizeof(*data_hdr));
 
 	tsn = ntohl(data_hdr->tsn);
 	pr_debug("%s: TSN 0x%x\n", __func__, tsn);
-- 
cgit v1.2.3


From 9f8d31471548d9b74609335f9a3c75c7b664c8b4 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Fri, 30 Jun 2017 11:52:20 +0800
Subject: sctp: remove the typedef sctp_data_chunk_t

This patch is to remove the typedef sctp_data_chunk_t, and replace
with struct sctp_data_chunk in the places where it's using this
typedef.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sctp.h         | 4 ++--
 include/net/sctp/constants.h | 2 +-
 include/net/sctp/sm.h        | 2 +-
 net/sctp/output.c            | 4 ++--
 net/sctp/sm_statefuns.c      | 6 +++---
 net/sctp/ulpqueue.c          | 2 +-
 6 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sctp.h b/include/linux/sctp.h
index 55d84c143122..91c888f21b24 100644
--- a/include/linux/sctp.h
+++ b/include/linux/sctp.h
@@ -235,10 +235,10 @@ struct sctp_datahdr {
 	__u8  payload[0];
 };
 
-typedef struct sctp_data_chunk {
+struct sctp_data_chunk {
 	struct sctp_chunkhdr chunk_hdr;
 	struct sctp_datahdr data_hdr;
-} sctp_data_chunk_t;
+};
 
 /* DATA Chuck Specific Flags */
 enum {
diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
index 02e867bc4b43..9b18044c551e 100644
--- a/include/net/sctp/constants.h
+++ b/include/net/sctp/constants.h
@@ -152,7 +152,7 @@ SCTP_SUBTYPE_CONSTRUCTOR(PRIMITIVE,	sctp_event_primitive_t,	primitive)
 /* Calculate the actual data size in a data chunk */
 #define SCTP_DATA_SNDSIZE(c) ((int)((unsigned long)(c->chunk_end)\
 		       		- (unsigned long)(c->chunk_hdr)\
-				- sizeof(sctp_data_chunk_t)))
+				- sizeof(struct sctp_data_chunk)))
 
 /* Internal error codes */
 typedef enum {
diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h
index 245eb22230a2..860f378333b5 100644
--- a/include/net/sctp/sm.h
+++ b/include/net/sctp/sm.h
@@ -347,7 +347,7 @@ static inline __u16 sctp_data_size(struct sctp_chunk *chunk)
 	__u16 size;
 
 	size = ntohs(chunk->chunk_hdr->length);
-	size -= sizeof(sctp_data_chunk_t);
+	size -= sizeof(struct sctp_data_chunk);
 
 	return size;
 }
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 9bf9d84a96b7..9d8504985744 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -723,8 +723,8 @@ static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet,
 	/* Check whether this chunk and all the rest of pending data will fit
 	 * or delay in hopes of bundling a full sized packet.
 	 */
-	if (chunk->skb->len + q->out_qlen >
-		transport->pathmtu - packet->overhead - sizeof(sctp_data_chunk_t) - 4)
+	if (chunk->skb->len + q->out_qlen > transport->pathmtu -
+		packet->overhead - sizeof(struct sctp_data_chunk) - 4)
 		/* Enough data queued to fill a packet */
 		return SCTP_XMIT_OK;
 
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 1ba9a9b04466..212fe7614d08 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -2990,7 +2990,7 @@ sctp_disposition_t sctp_sf_eat_data_6_2(struct net *net,
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 	}
 
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_data_chunk_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_data_chunk)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
@@ -3109,7 +3109,7 @@ sctp_disposition_t sctp_sf_eat_data_fast_4_4(struct net *net,
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 	}
 
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_data_chunk_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_data_chunk)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
@@ -6262,7 +6262,7 @@ static int sctp_eat_data(const struct sctp_association *asoc,
 	 * Actually, allow a little bit of overflow (up to a MTU).
 	 */
 	datalen = ntohs(chunk->chunk_hdr->length);
-	datalen -= sizeof(sctp_data_chunk_t);
+	datalen -= sizeof(struct sctp_data_chunk);
 
 	deliver = SCTP_CMD_CHUNK_ULP;
 
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index 25f7e4140566..0225d62a869f 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -1090,7 +1090,7 @@ void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
 
 	if (chunk) {
 		needed = ntohs(chunk->chunk_hdr->length);
-		needed -= sizeof(sctp_data_chunk_t);
+		needed -= sizeof(struct sctp_data_chunk);
 	} else
 		needed = SCTP_DEFAULT_MAXWINDOW;
 
-- 
cgit v1.2.3


From 4ae70c0845faba3096aa2be4b2ebfcc3ac590a67 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Fri, 30 Jun 2017 11:52:21 +0800
Subject: sctp: remove the typedef sctp_inithdr_t

This patch is to remove the typedef sctp_inithdr_t, and replace
with struct sctp_inithdr in the places where it's using this
typedef.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sctp.h                    |  6 +++---
 net/netfilter/nf_conntrack_proto_sctp.c |  4 ++--
 net/sctp/sm_make_chunk.c                |  4 ++--
 net/sctp/sm_statefuns.c                 | 12 ++++++------
 4 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sctp.h b/include/linux/sctp.h
index 91c888f21b24..56241953e57e 100644
--- a/include/linux/sctp.h
+++ b/include/linux/sctp.h
@@ -257,18 +257,18 @@ enum { SCTP_DATA_FRAG_MASK = 0x03, };
  *  This chunk is used to initiate a SCTP association between two
  *  endpoints.
  */
-typedef struct sctp_inithdr {
+struct sctp_inithdr {
 	__be32 init_tag;
 	__be32 a_rwnd;
 	__be16 num_outbound_streams;
 	__be16 num_inbound_streams;
 	__be32 initial_tsn;
 	__u8  params[0];
-} sctp_inithdr_t;
+};
 
 typedef struct sctp_init_chunk {
 	struct sctp_chunkhdr chunk_hdr;
-	sctp_inithdr_t init_hdr;
+	struct sctp_inithdr init_hdr;
 } sctp_init_chunk_t;
 
 
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index b841a8aeee7c..31c6c8ee9d5d 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -395,7 +395,7 @@ static int sctp_packet(struct nf_conn *ct,
 		/* If it is an INIT or an INIT ACK note down the vtag */
 		if (sch->type == SCTP_CID_INIT ||
 		    sch->type == SCTP_CID_INIT_ACK) {
-			sctp_inithdr_t _inithdr, *ih;
+			struct sctp_inithdr _inithdr, *ih;
 
 			ih = skb_header_pointer(skb, offset + sizeof(_sch),
 						sizeof(_inithdr), &_inithdr);
@@ -471,7 +471,7 @@ static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
 
 		/* Copy the vtag into the state info */
 		if (sch->type == SCTP_CID_INIT) {
-			sctp_inithdr_t _inithdr, *ih;
+			struct sctp_inithdr _inithdr, *ih;
 			/* Sec 8.5.1 (A) */
 			if (sh->vtag)
 				return false;
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 3ed2108d1989..8b9ca107fd0c 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -217,7 +217,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
 {
 	struct net *net = sock_net(asoc->base.sk);
 	struct sctp_endpoint *ep = asoc->ep;
-	sctp_inithdr_t init;
+	struct sctp_inithdr init;
 	union sctp_params addrs;
 	size_t chunksize;
 	struct sctp_chunk *retval = NULL;
@@ -385,7 +385,7 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
 				 const struct sctp_chunk *chunk,
 				 gfp_t gfp, int unkparam_len)
 {
-	sctp_inithdr_t initack;
+	struct sctp_inithdr initack;
 	struct sctp_chunk *retval;
 	union sctp_params addrs;
 	struct sctp_sock *sp;
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 212fe7614d08..71b6e3f66b65 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -389,10 +389,10 @@ sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net,
 	}
 
 	/* Grab the INIT header.  */
-	chunk->subh.init_hdr = (sctp_inithdr_t *)chunk->skb->data;
+	chunk->subh.init_hdr = (struct sctp_inithdr *)chunk->skb->data;
 
 	/* Tag the variable length parameters.  */
-	chunk->param_hdr.v = skb_pull(chunk->skb, sizeof(sctp_inithdr_t));
+	chunk->param_hdr.v = skb_pull(chunk->skb, sizeof(struct sctp_inithdr));
 
 	new_asoc = sctp_make_temp_asoc(ep, chunk, GFP_ATOMIC);
 	if (!new_asoc)
@@ -522,7 +522,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net,
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 	/* Grab the INIT header.  */
-	chunk->subh.init_hdr = (sctp_inithdr_t *) chunk->skb->data;
+	chunk->subh.init_hdr = (struct sctp_inithdr *)chunk->skb->data;
 
 	/* Verify the INIT chunk before processing it. */
 	err_chunk = NULL;
@@ -576,7 +576,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net,
 	/* Tag the variable length parameters.  Note that we never
 	 * convert the parameters in an INIT chunk.
 	 */
-	chunk->param_hdr.v = skb_pull(chunk->skb, sizeof(sctp_inithdr_t));
+	chunk->param_hdr.v = skb_pull(chunk->skb, sizeof(struct sctp_inithdr));
 
 	initchunk = (sctp_init_chunk_t *) chunk->chunk_hdr;
 
@@ -1454,10 +1454,10 @@ static sctp_disposition_t sctp_sf_do_unexpected_init(
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 	/* Grab the INIT header.  */
-	chunk->subh.init_hdr = (sctp_inithdr_t *) chunk->skb->data;
+	chunk->subh.init_hdr = (struct sctp_inithdr *)chunk->skb->data;
 
 	/* Tag the variable length parameters.  */
-	chunk->param_hdr.v = skb_pull(chunk->skb, sizeof(sctp_inithdr_t));
+	chunk->param_hdr.v = skb_pull(chunk->skb, sizeof(struct sctp_inithdr));
 
 	/* Verify the INIT chunk before processing it. */
 	err_chunk = NULL;
-- 
cgit v1.2.3


From 01a992bea523d9568cf56a02003c15c9dc40eb20 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Fri, 30 Jun 2017 11:52:22 +0800
Subject: sctp: remove the typedef sctp_init_chunk_t

This patch is to remove the typedef sctp_init_chunk_t, and replace
with struct sctp_init_chunk in the places where it's using this
typedef.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sctp.h       |  6 +++---
 include/net/sctp/command.h |  4 ++--
 include/net/sctp/structs.h |  4 ++--
 net/sctp/input.c           |  4 ++--
 net/sctp/sm_make_chunk.c   |  6 +++---
 net/sctp/sm_sideeffect.c   |  2 +-
 net/sctp/sm_statefuns.c    | 28 ++++++++++++++--------------
 7 files changed, 27 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sctp.h b/include/linux/sctp.h
index 56241953e57e..99e866487e2f 100644
--- a/include/linux/sctp.h
+++ b/include/linux/sctp.h
@@ -266,10 +266,10 @@ struct sctp_inithdr {
 	__u8  params[0];
 };
 
-typedef struct sctp_init_chunk {
+struct sctp_init_chunk {
 	struct sctp_chunkhdr chunk_hdr;
 	struct sctp_inithdr init_hdr;
-} sctp_init_chunk_t;
+};
 
 
 /* Section 3.3.2.1. IPv4 Address Parameter (5) */
@@ -341,7 +341,7 @@ typedef struct sctp_hmac_algo_param {
  *   The INIT ACK chunk is used to acknowledge the initiation of an SCTP
  *   association.
  */
-typedef sctp_init_chunk_t sctp_initack_chunk_t;
+typedef struct sctp_init_chunk sctp_initack_chunk_t;
 
 /* Section 3.3.3.1 State Cookie (7) */
 typedef struct sctp_cookie_param {
diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h
index d4a20d00461c..d4679e7a5ed5 100644
--- a/include/net/sctp/command.h
+++ b/include/net/sctp/command.h
@@ -132,7 +132,7 @@ typedef union {
 	struct sctp_association *asoc;
 	struct sctp_transport *transport;
 	struct sctp_bind_addr *bp;
-	sctp_init_chunk_t *init;
+	struct sctp_init_chunk *init;
 	struct sctp_ulpevent *ulpevent;
 	struct sctp_packet *packet;
 	sctp_sackhdr_t *sackh;
@@ -173,7 +173,7 @@ SCTP_ARG_CONSTRUCTOR(CHUNK,	struct sctp_chunk *, chunk)
 SCTP_ARG_CONSTRUCTOR(ASOC,	struct sctp_association *, asoc)
 SCTP_ARG_CONSTRUCTOR(TRANSPORT,	struct sctp_transport *, transport)
 SCTP_ARG_CONSTRUCTOR(BA,	struct sctp_bind_addr *, bp)
-SCTP_ARG_CONSTRUCTOR(PEER_INIT,	sctp_init_chunk_t *, init)
+SCTP_ARG_CONSTRUCTOR(PEER_INIT,	struct sctp_init_chunk *, init)
 SCTP_ARG_CONSTRUCTOR(ULPEVENT,  struct sctp_ulpevent *, ulpevent)
 SCTP_ARG_CONSTRUCTOR(PACKET,	struct sctp_packet *, packet)
 SCTP_ARG_CONSTRUCTOR(SACKH,	sctp_sackhdr_t *, sackh)
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 2393d2ee95c1..07c11fefa8c4 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1298,11 +1298,11 @@ int sctp_has_association(struct net *net, const union sctp_addr *laddr,
 
 int sctp_verify_init(struct net *net, const struct sctp_endpoint *ep,
 		     const struct sctp_association *asoc,
-		     enum sctp_cid cid, sctp_init_chunk_t *peer_init,
+		     enum sctp_cid cid, struct sctp_init_chunk *peer_init,
 		     struct sctp_chunk *chunk, struct sctp_chunk **err_chunk);
 int sctp_process_init(struct sctp_association *, struct sctp_chunk *chunk,
 		      const union sctp_addr *peer,
-		      sctp_init_chunk_t *init, gfp_t gfp);
+		      struct sctp_init_chunk *init, gfp_t gfp);
 __u32 sctp_generate_tag(const struct sctp_endpoint *);
 __u32 sctp_generate_tsn(const struct sctp_endpoint *);
 
diff --git a/net/sctp/input.c b/net/sctp/input.c
index a9994c4afc18..41eb2ec10460 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -1051,7 +1051,7 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct net *net,
 	union sctp_addr *paddr = &addr;
 	struct sctphdr *sh = sctp_hdr(skb);
 	union sctp_params params;
-	sctp_init_chunk_t *init;
+	struct sctp_init_chunk *init;
 	struct sctp_af *af;
 
 	/*
@@ -1070,7 +1070,7 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct net *net,
 	/* Find the start of the TLVs and the end of the chunk.  This is
 	 * the region we search for address parameters.
 	 */
-	init = (sctp_init_chunk_t *)skb->data;
+	init = (struct sctp_init_chunk *)skb->data;
 
 	/* Walk the parameters looking for embedded addresses. */
 	sctp_walk_params(params, init, init_hdr.params) {
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 8b9ca107fd0c..3af4dd024ec0 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -2242,8 +2242,8 @@ fallthrough:
 /* Verify the INIT packet before we process it.  */
 int sctp_verify_init(struct net *net, const struct sctp_endpoint *ep,
 		     const struct sctp_association *asoc, enum sctp_cid cid,
-		     sctp_init_chunk_t *peer_init, struct sctp_chunk *chunk,
-		     struct sctp_chunk **errp)
+		     struct sctp_init_chunk *peer_init,
+		     struct sctp_chunk *chunk, struct sctp_chunk **errp)
 {
 	union sctp_params param;
 	bool has_cookie = false;
@@ -2307,7 +2307,7 @@ int sctp_verify_init(struct net *net, const struct sctp_endpoint *ep,
  */
 int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk,
 		      const union sctp_addr *peer_addr,
-		      sctp_init_chunk_t *peer_init, gfp_t gfp)
+		      struct sctp_init_chunk *peer_init, gfp_t gfp)
 {
 	struct net *net = sock_net(asoc->base.sk);
 	union sctp_params param;
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index b255339f22a3..d6e5e9e0fd6d 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -647,7 +647,7 @@ static void sctp_cmd_assoc_failed(sctp_cmd_seq_t *commands,
 static int sctp_cmd_process_init(sctp_cmd_seq_t *commands,
 				 struct sctp_association *asoc,
 				 struct sctp_chunk *chunk,
-				 sctp_init_chunk_t *peer_init,
+				 struct sctp_init_chunk *peer_init,
 				 gfp_t gfp)
 {
 	int error;
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 71b6e3f66b65..b2a74c3823ee 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -345,7 +345,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net,
 	 * error, but since we don't have an association, we'll
 	 * just discard the packet.
 	 */
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_init_chunk_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk)))
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* If the INIT is coming toward a closing socket, we'll send back
@@ -360,7 +360,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net,
 	/* Verify the INIT chunk before processing it. */
 	err_chunk = NULL;
 	if (!sctp_verify_init(net, ep, asoc, chunk->chunk_hdr->type,
-			      (sctp_init_chunk_t *)chunk->chunk_hdr, chunk,
+			      (struct sctp_init_chunk *)chunk->chunk_hdr, chunk,
 			      &err_chunk)) {
 		/* This chunk contains fatal error. It is to be discarded.
 		 * Send an ABORT, with causes if there is any.
@@ -405,7 +405,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net,
 
 	/* The call, sctp_process_init(), can fail on memory allocation.  */
 	if (!sctp_process_init(new_asoc, chunk, sctp_source(chunk),
-			       (sctp_init_chunk_t *)chunk->chunk_hdr,
+			       (struct sctp_init_chunk *)chunk->chunk_hdr,
 			       GFP_ATOMIC))
 		goto nomem_init;
 
@@ -503,7 +503,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net,
 				       sctp_cmd_seq_t *commands)
 {
 	struct sctp_chunk *chunk = arg;
-	sctp_init_chunk_t *initchunk;
+	struct sctp_init_chunk *initchunk;
 	struct sctp_chunk *err_chunk;
 	struct sctp_packet *packet;
 
@@ -527,7 +527,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net,
 	/* Verify the INIT chunk before processing it. */
 	err_chunk = NULL;
 	if (!sctp_verify_init(net, ep, asoc, chunk->chunk_hdr->type,
-			      (sctp_init_chunk_t *)chunk->chunk_hdr, chunk,
+			      (struct sctp_init_chunk *)chunk->chunk_hdr, chunk,
 			      &err_chunk)) {
 
 		sctp_error_t error = SCTP_ERROR_NO_RESOURCE;
@@ -578,7 +578,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net,
 	 */
 	chunk->param_hdr.v = skb_pull(chunk->skb, sizeof(struct sctp_inithdr));
 
-	initchunk = (sctp_init_chunk_t *) chunk->chunk_hdr;
+	initchunk = (struct sctp_init_chunk *)chunk->chunk_hdr;
 
 	sctp_add_cmd_sf(commands, SCTP_CMD_PEER_INIT,
 			SCTP_PEER_INIT(initchunk));
@@ -653,7 +653,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net,
 {
 	struct sctp_chunk *chunk = arg;
 	struct sctp_association *new_asoc;
-	sctp_init_chunk_t *peer_init;
+	struct sctp_init_chunk *peer_init;
 	struct sctp_chunk *repl;
 	struct sctp_ulpevent *ev, *ai_ev = NULL;
 	int error = 0;
@@ -1450,7 +1450,7 @@ static sctp_disposition_t sctp_sf_do_unexpected_init(
 	 * In this case, we generate a protocol violation since we have
 	 * an association established.
 	 */
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_init_chunk_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 	/* Grab the INIT header.  */
@@ -1462,7 +1462,7 @@ static sctp_disposition_t sctp_sf_do_unexpected_init(
 	/* Verify the INIT chunk before processing it. */
 	err_chunk = NULL;
 	if (!sctp_verify_init(net, ep, asoc, chunk->chunk_hdr->type,
-			      (sctp_init_chunk_t *)chunk->chunk_hdr, chunk,
+			      (struct sctp_init_chunk *)chunk->chunk_hdr, chunk,
 			      &err_chunk)) {
 		/* This chunk contains fatal error. It is to be discarded.
 		 * Send an ABORT, with causes if there is any.
@@ -1509,7 +1509,7 @@ static sctp_disposition_t sctp_sf_do_unexpected_init(
 	 * place (local tie-tag and per tie-tag) within the state cookie.
 	 */
 	if (!sctp_process_init(new_asoc, chunk, sctp_source(chunk),
-			       (sctp_init_chunk_t *)chunk->chunk_hdr,
+			       (struct sctp_init_chunk *)chunk->chunk_hdr,
 			       GFP_ATOMIC))
 		goto nomem;
 
@@ -1730,7 +1730,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_a(struct net *net,
 					sctp_cmd_seq_t *commands,
 					struct sctp_association *new_asoc)
 {
-	sctp_init_chunk_t *peer_init;
+	struct sctp_init_chunk *peer_init;
 	struct sctp_ulpevent *ev;
 	struct sctp_chunk *repl;
 	struct sctp_chunk *err;
@@ -1845,7 +1845,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_b(struct net *net,
 					sctp_cmd_seq_t *commands,
 					struct sctp_association *new_asoc)
 {
-	sctp_init_chunk_t *peer_init;
+	struct sctp_init_chunk *peer_init;
 	struct sctp_chunk *repl;
 
 	/* new_asoc is a brand-new association, so these are not yet
@@ -6124,9 +6124,9 @@ static struct sctp_packet *sctp_ootb_pkt_new(struct net *net,
 		switch (chunk->chunk_hdr->type) {
 		case SCTP_CID_INIT:
 		{
-			sctp_init_chunk_t *init;
+			struct sctp_init_chunk *init;
 
-			init = (sctp_init_chunk_t *)chunk->chunk_hdr;
+			init = (struct sctp_init_chunk *)chunk->chunk_hdr;
 			vtag = ntohl(init->init_hdr.init_tag);
 			break;
 		}
-- 
cgit v1.2.3


From 40304b2a1567fecc321f640ee4239556dd0f3ee0 Mon Sep 17 00:00:00 2001
From: Lawrence Brakmo <brakmo@fb.com>
Date: Fri, 30 Jun 2017 20:02:40 -0700
Subject: bpf: BPF support for sock_ops

Created a new BPF program type, BPF_PROG_TYPE_SOCK_OPS, and a corresponding
struct that allows BPF programs of this type to access some of the
socket's fields (such as IP addresses, ports, etc.). It uses the
existing bpf cgroups infrastructure so the programs can be attached per
cgroup with full inheritance support. The program will be called at
appropriate times to set relevant connections parameters such as buffer
sizes, SYN and SYN-ACK RTOs, etc., based on connection information such
as IP addresses, port numbers, etc.

Alghough there are already 3 mechanisms to set parameters (sysctls,
route metrics and setsockopts), this new mechanism provides some
distinct advantages. Unlike sysctls, it can set parameters per
connection. In contrast to route metrics, it can also use port numbers
and information provided by a user level program. In addition, it could
set parameters probabilistically for evaluation purposes (i.e. do
something different on 10% of the flows and compare results with the
other 90% of the flows). Also, in cases where IPv6 addresses contain
geographic information, the rules to make changes based on the distance
(or RTT) between the hosts are much easier than route metric rules and
can be global. Finally, unlike setsockopt, it oes not require
application changes and it can be updated easily at any time.

Although the bpf cgroup framework already contains a sock related
program type (BPF_PROG_TYPE_CGROUP_SOCK), I created the new type
(BPF_PROG_TYPE_SOCK_OPS) beccause the existing type expects to be called
only once during the connections's lifetime. In contrast, the new
program type will be called multiple times from different places in the
network stack code.  For example, before sending SYN and SYN-ACKs to set
an appropriate timeout, when the connection is established to set
congestion control, etc. As a result it has "op" field to specify the
type of operation requested.

The purpose of this new program type is to simplify setting connection
parameters, such as buffer sizes, TCP's SYN RTO, etc. For example, it is
easy to use facebook's internal IPv6 addresses to determine if both hosts
of a connection are in the same datacenter. Therefore, it is easy to
write a BPF program to choose a small SYN RTO value when both hosts are
in the same datacenter.

This patch only contains the framework to support the new BPF program
type, following patches add the functionality to set various connection
parameters.

This patch defines a new BPF program type: BPF_PROG_TYPE_SOCKET_OPS
and a new bpf syscall command to load a new program of this type:
BPF_PROG_LOAD_SOCKET_OPS.

Two new corresponding structs (one for the kernel one for the user/BPF
program):

/* kernel version */
struct bpf_sock_ops_kern {
        struct sock *sk;
        __u32  op;
        union {
                __u32 reply;
                __u32 replylong[4];
        };
};

/* user version
 * Some fields are in network byte order reflecting the sock struct
 * Use the bpf_ntohl helper macro in samples/bpf/bpf_endian.h to
 * convert them to host byte order.
 */
struct bpf_sock_ops {
        __u32 op;
        union {
                __u32 reply;
                __u32 replylong[4];
        };
        __u32 family;
        __u32 remote_ip4;     /* In network byte order */
        __u32 local_ip4;      /* In network byte order */
        __u32 remote_ip6[4];  /* In network byte order */
        __u32 local_ip6[4];   /* In network byte order */
        __u32 remote_port;    /* In network byte order */
        __u32 local_port;     /* In host byte horder */
};

Currently there are two types of ops. The first type expects the BPF
program to return a value which is then used by the caller (or a
negative value to indicate the operation is not supported). The second
type expects state changes to be done by the BPF program, for example
through a setsockopt BPF helper function, and they ignore the return
value.

The reply fields of the bpf_sockt_ops struct are there in case a bpf
program needs to return a value larger than an integer.

Signed-off-by: Lawrence Brakmo <brakmo@fb.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf-cgroup.h |  18 +++++
 include/linux/bpf_types.h  |   1 +
 include/linux/filter.h     |   9 +++
 include/net/tcp.h          |  36 ++++++++++
 include/uapi/linux/bpf.h   |  30 ++++++++
 kernel/bpf/cgroup.c        |  37 ++++++++++
 kernel/bpf/syscall.c       |   5 ++
 net/core/filter.c          | 168 +++++++++++++++++++++++++++++++++++++++++++++
 samples/bpf/bpf_load.c     |  13 +++-
 9 files changed, 314 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index c970a25d2a49..360c082e885c 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -7,6 +7,7 @@
 struct sock;
 struct cgroup;
 struct sk_buff;
+struct bpf_sock_ops_kern;
 
 #ifdef CONFIG_CGROUP_BPF
 
@@ -42,6 +43,10 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
 int __cgroup_bpf_run_filter_sk(struct sock *sk,
 			       enum bpf_attach_type type);
 
+int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
+				     struct bpf_sock_ops_kern *sock_ops,
+				     enum bpf_attach_type type);
+
 /* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */
 #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb)			      \
 ({									      \
@@ -75,6 +80,18 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk,
 	__ret;								       \
 })
 
+#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops)				       \
+({									       \
+	int __ret = 0;							       \
+	if (cgroup_bpf_enabled && (sock_ops)->sk) {	       \
+		typeof(sk) __sk = sk_to_full_sk((sock_ops)->sk);	       \
+		if (sk_fullsock(__sk))					       \
+			__ret = __cgroup_bpf_run_filter_sock_ops(__sk,	       \
+								 sock_ops,     \
+							 BPF_CGROUP_SOCK_OPS); \
+	}								       \
+	__ret;								       \
+})
 #else
 
 struct cgroup_bpf {};
@@ -85,6 +102,7 @@ static inline void cgroup_bpf_inherit(struct cgroup *cgrp,
 #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
 
 #endif /* CONFIG_CGROUP_BPF */
 
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 03bf223f18be..3d137c33d664 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -10,6 +10,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock_prog_ops)
 BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_inout_prog_ops)
 BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_inout_prog_ops)
 BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit_prog_ops)
+BPF_PROG_TYPE(BPF_PROG_TYPE_SOCK_OPS, sock_ops_prog_ops)
 #endif
 #ifdef CONFIG_BPF_EVENTS
 BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe_prog_ops)
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 1fa26dc562ce..738f8b14f025 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -898,4 +898,13 @@ static inline int bpf_tell_extensions(void)
 	return SKF_AD_MAX;
 }
 
+struct bpf_sock_ops_kern {
+	struct	sock *sk;
+	u32	op;
+	union {
+		u32 reply;
+		u32 replylong[4];
+	};
+};
+
 #endif /* __LINUX_FILTER_H__ */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index d0751b79d99c..e58500825006 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -46,6 +46,10 @@
 #include <linux/seq_file.h>
 #include <linux/memcontrol.h>
 
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <linux/bpf-cgroup.h>
+
 extern struct inet_hashinfo tcp_hashinfo;
 
 extern struct percpu_counter tcp_orphan_count;
@@ -2021,4 +2025,36 @@ int tcp_set_ulp(struct sock *sk, const char *name);
 void tcp_get_available_ulp(char *buf, size_t len);
 void tcp_cleanup_ulp(struct sock *sk);
 
+/* Call BPF_SOCK_OPS program that returns an int. If the return value
+ * is < 0, then the BPF op failed (for example if the loaded BPF
+ * program does not support the chosen operation or there is no BPF
+ * program loaded).
+ */
+#ifdef CONFIG_BPF
+static inline int tcp_call_bpf(struct sock *sk, int op)
+{
+	struct bpf_sock_ops_kern sock_ops;
+	int ret;
+
+	if (sk_fullsock(sk))
+		sock_owned_by_me(sk);
+
+	memset(&sock_ops, 0, sizeof(sock_ops));
+	sock_ops.sk = sk;
+	sock_ops.op = op;
+
+	ret = BPF_CGROUP_RUN_PROG_SOCK_OPS(&sock_ops);
+	if (ret == 0)
+		ret = sock_ops.reply;
+	else
+		ret = -1;
+	return ret;
+}
+#else
+static inline int tcp_call_bpf(struct sock *sk, int op)
+{
+	return -EPERM;
+}
+#endif
+
 #endif	/* _TCP_H */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f94b48b168dc..01cd485ccd4f 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -120,12 +120,14 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_LWT_IN,
 	BPF_PROG_TYPE_LWT_OUT,
 	BPF_PROG_TYPE_LWT_XMIT,
+	BPF_PROG_TYPE_SOCK_OPS,
 };
 
 enum bpf_attach_type {
 	BPF_CGROUP_INET_INGRESS,
 	BPF_CGROUP_INET_EGRESS,
 	BPF_CGROUP_INET_SOCK_CREATE,
+	BPF_CGROUP_SOCK_OPS,
 	__MAX_BPF_ATTACH_TYPE
 };
 
@@ -720,4 +722,32 @@ struct bpf_map_info {
 	__u32 map_flags;
 } __attribute__((aligned(8)));
 
+/* User bpf_sock_ops struct to access socket values and specify request ops
+ * and their replies.
+ * Some of this fields are in network (bigendian) byte order and may need
+ * to be converted before use (bpf_ntohl() defined in samples/bpf/bpf_endian.h).
+ * New fields can only be added at the end of this structure
+ */
+struct bpf_sock_ops {
+	__u32 op;
+	union {
+		__u32 reply;
+		__u32 replylong[4];
+	};
+	__u32 family;
+	__u32 remote_ip4;	/* Stored in network byte order */
+	__u32 local_ip4;	/* Stored in network byte order */
+	__u32 remote_ip6[4];	/* Stored in network byte order */
+	__u32 local_ip6[4];	/* Stored in network byte order */
+	__u32 remote_port;	/* Stored in network byte order */
+	__u32 local_port;	/* stored in host byte order */
+};
+
+/* List of known BPF sock_ops operators.
+ * New entries can only be added at the end
+ */
+enum {
+	BPF_SOCK_OPS_VOID,
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index ea6033cba947..546113430049 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -236,3 +236,40 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk,
 	return ret;
 }
 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
+
+/**
+ * __cgroup_bpf_run_filter_sock_ops() - Run a program on a sock
+ * @sk: socket to get cgroup from
+ * @sock_ops: bpf_sock_ops_kern struct to pass to program. Contains
+ * sk with connection information (IP addresses, etc.) May not contain
+ * cgroup info if it is a req sock.
+ * @type: The type of program to be exectuted
+ *
+ * socket passed is expected to be of type INET or INET6.
+ *
+ * The program type passed in via @type must be suitable for sock_ops
+ * filtering. No further check is performed to assert that.
+ *
+ * This function will return %-EPERM if any if an attached program was found
+ * and if it returned != 1 during execution. In all other cases, 0 is returned.
+ */
+int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
+				     struct bpf_sock_ops_kern *sock_ops,
+				     enum bpf_attach_type type)
+{
+	struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
+	struct bpf_prog *prog;
+	int ret = 0;
+
+
+	rcu_read_lock();
+
+	prog = rcu_dereference(cgrp->bpf.effective[type]);
+	if (prog)
+		ret = BPF_PROG_RUN(prog, sock_ops) == 1 ? 0 : -EPERM;
+
+	rcu_read_unlock();
+
+	return ret;
+}
+EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 4409ccca8831..d4d47de75bba 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1079,6 +1079,9 @@ static int bpf_prog_attach(const union bpf_attr *attr)
 	case BPF_CGROUP_INET_SOCK_CREATE:
 		ptype = BPF_PROG_TYPE_CGROUP_SOCK;
 		break;
+	case BPF_CGROUP_SOCK_OPS:
+		ptype = BPF_PROG_TYPE_SOCK_OPS;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -1119,6 +1122,7 @@ static int bpf_prog_detach(const union bpf_attr *attr)
 	case BPF_CGROUP_INET_INGRESS:
 	case BPF_CGROUP_INET_EGRESS:
 	case BPF_CGROUP_INET_SOCK_CREATE:
+	case BPF_CGROUP_SOCK_OPS:
 		cgrp = cgroup_get_from_fd(attr->target_fd);
 		if (IS_ERR(cgrp))
 			return PTR_ERR(cgrp);
@@ -1133,6 +1137,7 @@ static int bpf_prog_detach(const union bpf_attr *attr)
 
 	return ret;
 }
+
 #endif /* CONFIG_CGROUP_BPF */
 
 #define BPF_PROG_TEST_RUN_LAST_FIELD test.duration
diff --git a/net/core/filter.c b/net/core/filter.c
index b39c869d22e3..1f6a26c4f8b9 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3110,6 +3110,36 @@ void bpf_warn_invalid_xdp_action(u32 act)
 }
 EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
 
+static bool __is_valid_sock_ops_access(int off, int size)
+{
+	if (off < 0 || off >= sizeof(struct bpf_sock_ops))
+		return false;
+	/* The verifier guarantees that size > 0. */
+	if (off % size != 0)
+		return false;
+	if (size != sizeof(__u32))
+		return false;
+
+	return true;
+}
+
+static bool sock_ops_is_valid_access(int off, int size,
+				     enum bpf_access_type type,
+				     struct bpf_insn_access_aux *info)
+{
+	if (type == BPF_WRITE) {
+		switch (off) {
+		case offsetof(struct bpf_sock_ops, op) ...
+		     offsetof(struct bpf_sock_ops, replylong[3]):
+			break;
+		default:
+			return false;
+		}
+	}
+
+	return __is_valid_sock_ops_access(off, size);
+}
+
 static u32 bpf_convert_ctx_access(enum bpf_access_type type,
 				  const struct bpf_insn *si,
 				  struct bpf_insn *insn_buf,
@@ -3379,6 +3409,138 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
 	return insn - insn_buf;
 }
 
+static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
+				       const struct bpf_insn *si,
+				       struct bpf_insn *insn_buf,
+				       struct bpf_prog *prog)
+{
+	struct bpf_insn *insn = insn_buf;
+	int off;
+
+	switch (si->off) {
+	case offsetof(struct bpf_sock_ops, op) ...
+	     offsetof(struct bpf_sock_ops, replylong[3]):
+		BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, op) !=
+			     FIELD_SIZEOF(struct bpf_sock_ops_kern, op));
+		BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, reply) !=
+			     FIELD_SIZEOF(struct bpf_sock_ops_kern, reply));
+		BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, replylong) !=
+			     FIELD_SIZEOF(struct bpf_sock_ops_kern, replylong));
+		off = si->off;
+		off -= offsetof(struct bpf_sock_ops, op);
+		off += offsetof(struct bpf_sock_ops_kern, op);
+		if (type == BPF_WRITE)
+			*insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
+					      off);
+		else
+			*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
+					      off);
+		break;
+
+	case offsetof(struct bpf_sock_ops, family):
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_family) != 2);
+
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+					      struct bpf_sock_ops_kern, sk),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct bpf_sock_ops_kern, sk));
+		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
+				      offsetof(struct sock_common, skc_family));
+		break;
+
+	case offsetof(struct bpf_sock_ops, remote_ip4):
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_daddr) != 4);
+
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+						struct bpf_sock_ops_kern, sk),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct bpf_sock_ops_kern, sk));
+		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+				      offsetof(struct sock_common, skc_daddr));
+		break;
+
+	case offsetof(struct bpf_sock_ops, local_ip4):
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_rcv_saddr) != 4);
+
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+					      struct bpf_sock_ops_kern, sk),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct bpf_sock_ops_kern, sk));
+		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+				      offsetof(struct sock_common,
+					       skc_rcv_saddr));
+		break;
+
+	case offsetof(struct bpf_sock_ops, remote_ip6[0]) ...
+	     offsetof(struct bpf_sock_ops, remote_ip6[3]):
+#if IS_ENABLED(CONFIG_IPV6)
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
+					  skc_v6_daddr.s6_addr32[0]) != 4);
+
+		off = si->off;
+		off -= offsetof(struct bpf_sock_ops, remote_ip6[0]);
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+						struct bpf_sock_ops_kern, sk),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct bpf_sock_ops_kern, sk));
+		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+				      offsetof(struct sock_common,
+					       skc_v6_daddr.s6_addr32[0]) +
+				      off);
+#else
+		*insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
+#endif
+		break;
+
+	case offsetof(struct bpf_sock_ops, local_ip6[0]) ...
+	     offsetof(struct bpf_sock_ops, local_ip6[3]):
+#if IS_ENABLED(CONFIG_IPV6)
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
+					  skc_v6_rcv_saddr.s6_addr32[0]) != 4);
+
+		off = si->off;
+		off -= offsetof(struct bpf_sock_ops, local_ip6[0]);
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+						struct bpf_sock_ops_kern, sk),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct bpf_sock_ops_kern, sk));
+		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+				      offsetof(struct sock_common,
+					       skc_v6_rcv_saddr.s6_addr32[0]) +
+				      off);
+#else
+		*insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
+#endif
+		break;
+
+	case offsetof(struct bpf_sock_ops, remote_port):
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_dport) != 2);
+
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+						struct bpf_sock_ops_kern, sk),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct bpf_sock_ops_kern, sk));
+		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
+				      offsetof(struct sock_common, skc_dport));
+#ifndef __BIG_ENDIAN_BITFIELD
+		*insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16);
+#endif
+		break;
+
+	case offsetof(struct bpf_sock_ops, local_port):
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_num) != 2);
+
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+						struct bpf_sock_ops_kern, sk),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct bpf_sock_ops_kern, sk));
+		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
+				      offsetof(struct sock_common, skc_num));
+		break;
+	}
+	return insn - insn_buf;
+}
+
 const struct bpf_verifier_ops sk_filter_prog_ops = {
 	.get_func_proto		= sk_filter_func_proto,
 	.is_valid_access	= sk_filter_is_valid_access,
@@ -3428,6 +3590,12 @@ const struct bpf_verifier_ops cg_sock_prog_ops = {
 	.convert_ctx_access	= sock_filter_convert_ctx_access,
 };
 
+const struct bpf_verifier_ops sock_ops_prog_ops = {
+	.get_func_proto		= bpf_base_func_proto,
+	.is_valid_access	= sock_ops_is_valid_access,
+	.convert_ctx_access	= sock_ops_convert_ctx_access,
+};
+
 int sk_detach_filter(struct sock *sk)
 {
 	int ret = -ENOENT;
diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
index a91c57dd8571..a4be7cfa6519 100644
--- a/samples/bpf/bpf_load.c
+++ b/samples/bpf/bpf_load.c
@@ -64,6 +64,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
 	bool is_perf_event = strncmp(event, "perf_event", 10) == 0;
 	bool is_cgroup_skb = strncmp(event, "cgroup/skb", 10) == 0;
 	bool is_cgroup_sk = strncmp(event, "cgroup/sock", 11) == 0;
+	bool is_sockops = strncmp(event, "sockops", 7) == 0;
 	size_t insns_cnt = size / sizeof(struct bpf_insn);
 	enum bpf_prog_type prog_type;
 	char buf[256];
@@ -89,6 +90,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
 		prog_type = BPF_PROG_TYPE_CGROUP_SKB;
 	} else if (is_cgroup_sk) {
 		prog_type = BPF_PROG_TYPE_CGROUP_SOCK;
+	} else if (is_sockops) {
+		prog_type = BPF_PROG_TYPE_SOCK_OPS;
 	} else {
 		printf("Unknown event '%s'\n", event);
 		return -1;
@@ -106,8 +109,11 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
 	if (is_xdp || is_perf_event || is_cgroup_skb || is_cgroup_sk)
 		return 0;
 
-	if (is_socket) {
-		event += 6;
+	if (is_socket || is_sockops) {
+		if (is_socket)
+			event += 6;
+		else
+			event += 7;
 		if (*event != '/')
 			return 0;
 		event++;
@@ -560,7 +566,8 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
 		    memcmp(shname, "xdp", 3) == 0 ||
 		    memcmp(shname, "perf_event", 10) == 0 ||
 		    memcmp(shname, "socket", 6) == 0 ||
-		    memcmp(shname, "cgroup/", 7) == 0)
+		    memcmp(shname, "cgroup/", 7) == 0 ||
+		    memcmp(shname, "sockops", 7) == 0)
 			load_and_attach(shname, data->d_buf, data->d_size);
 	}
 
-- 
cgit v1.2.3


From 5d6dec6fba38c3e2d408df108bb92ef4ac201f18 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Sat, 1 Jul 2017 11:01:29 -0700
Subject: locking/refcount: Remove the half-implemented refcount_sub() API

CONFIG_REFCOUNT_FULL=y (correctly) does not provide a refcount_sub(),
which should not be part of proper refcount design patterns.

Remove the erroneous extern and the later !CONFIG_REFCOUNT_FULL
accidental implementation.

Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Elena Reshetova <elena.reshetova@intel.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 29dee3c03abc ("locking/refcounts: Out-of-line everything")
Link: http://lkml.kernel.org/r/20170701180129.GA17405@beast
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/refcount.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/refcount.h b/include/linux/refcount.h
index bb71f2871dac..591792c8e5b0 100644
--- a/include/linux/refcount.h
+++ b/include/linux/refcount.h
@@ -49,7 +49,6 @@ extern __must_check bool refcount_inc_not_zero(refcount_t *r);
 extern void refcount_inc(refcount_t *r);
 
 extern __must_check bool refcount_sub_and_test(unsigned int i, refcount_t *r);
-extern void refcount_sub(unsigned int i, refcount_t *r);
 
 extern __must_check bool refcount_dec_and_test(refcount_t *r);
 extern void refcount_dec(refcount_t *r);
@@ -79,11 +78,6 @@ static inline __must_check bool refcount_sub_and_test(unsigned int i, refcount_t
 	return atomic_sub_and_test(i, &r->refs);
 }
 
-static inline void refcount_sub(unsigned int i, refcount_t *r)
-{
-	atomic_sub(i, &r->refs);
-}
-
 static inline __must_check bool refcount_dec_and_test(refcount_t *r)
 {
 	return atomic_dec_and_test(&r->refs);
-- 
cgit v1.2.3


From 9ee8a1c4a0e232e9b86e03f7c628ff0286444e00 Mon Sep 17 00:00:00 2001
From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Date: Wed, 28 Jun 2017 15:14:01 -0500
Subject: PCI: Remove pci_scan_root_bus_msi()

The pci_scan_root_bus_bridge() function allows passing a parameterized
struct pci_host_bridge and scanning the resulting PCI bus; since the struct
msi_controller is part of the struct pci_host_bridge and the struct
pci_host_bridge can now be passed to pci_scan_root_bus_bridge() explicitly,
there is no need for a scan interface with a MSI controller parameter.

With all PCI host controller drivers and platform code relying on
pci_scan_root_bus_msi() converted over to pci_scan_root_bus_bridge() the
pci_scan_root_bus_msi() becomes obsolete and can be removed.

Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/probe.c | 27 +++++----------------------
 include/linux/pci.h |  4 ----
 2 files changed, 5 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 5c457c17cf5c..bd42ed42c199 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -2324,9 +2324,8 @@ void __weak pcibios_remove_bus(struct pci_bus *bus)
 {
 }
 
-static struct pci_bus *pci_create_root_bus_msi(struct device *parent,
-		int bus, struct pci_ops *ops, void *sysdata,
-		struct list_head *resources, struct msi_controller *msi)
+struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
+		struct pci_ops *ops, void *sysdata, struct list_head *resources)
 {
 	int error;
 	struct pci_host_bridge *bridge;
@@ -2341,7 +2340,6 @@ static struct pci_bus *pci_create_root_bus_msi(struct device *parent,
 	bridge->sysdata = sysdata;
 	bridge->busnr = bus;
 	bridge->ops = ops;
-	bridge->msi = msi;
 
 	error = pci_register_host_bridge(bridge);
 	if (error < 0)
@@ -2353,13 +2351,6 @@ err_out:
 	kfree(bridge);
 	return NULL;
 }
-
-struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
-		struct pci_ops *ops, void *sysdata, struct list_head *resources)
-{
-	return pci_create_root_bus_msi(parent, bus, ops, sysdata, resources,
-				       NULL);
-}
 EXPORT_SYMBOL_GPL(pci_create_root_bus);
 
 int pci_bus_insert_busn_res(struct pci_bus *b, int bus, int bus_max)
@@ -2464,9 +2455,8 @@ int pci_scan_root_bus_bridge(struct pci_host_bridge *bridge)
 }
 EXPORT_SYMBOL(pci_scan_root_bus_bridge);
 
-struct pci_bus *pci_scan_root_bus_msi(struct device *parent, int bus,
-		struct pci_ops *ops, void *sysdata,
-		struct list_head *resources, struct msi_controller *msi)
+struct pci_bus *pci_scan_root_bus(struct device *parent, int bus,
+		struct pci_ops *ops, void *sysdata, struct list_head *resources)
 {
 	struct resource_entry *window;
 	bool found = false;
@@ -2479,7 +2469,7 @@ struct pci_bus *pci_scan_root_bus_msi(struct device *parent, int bus,
 			break;
 		}
 
-	b = pci_create_root_bus_msi(parent, bus, ops, sysdata, resources, msi);
+	b = pci_create_root_bus(parent, bus, ops, sysdata, resources);
 	if (!b)
 		return NULL;
 
@@ -2497,13 +2487,6 @@ struct pci_bus *pci_scan_root_bus_msi(struct device *parent, int bus,
 
 	return b;
 }
-
-struct pci_bus *pci_scan_root_bus(struct device *parent, int bus,
-		struct pci_ops *ops, void *sysdata, struct list_head *resources)
-{
-	return pci_scan_root_bus_msi(parent, bus, ops, sysdata, resources,
-				     NULL);
-}
 EXPORT_SYMBOL(pci_scan_root_bus);
 
 struct pci_bus *pci_scan_bus(int bus, struct pci_ops *ops,
diff --git a/include/linux/pci.h b/include/linux/pci.h
index b56dc13f47c2..9022b542556a 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -849,10 +849,6 @@ struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
 int pci_bus_insert_busn_res(struct pci_bus *b, int bus, int busmax);
 int pci_bus_update_busn_res_end(struct pci_bus *b, int busmax);
 void pci_bus_release_busn_res(struct pci_bus *b);
-struct pci_bus *pci_scan_root_bus_msi(struct device *parent, int bus,
-				      struct pci_ops *ops, void *sysdata,
-				      struct list_head *resources,
-				      struct msi_controller *msi);
 struct pci_bus *pci_scan_root_bus(struct device *parent, int bus,
 					     struct pci_ops *ops, void *sysdata,
 					     struct list_head *resources);
-- 
cgit v1.2.3


From 3aa8a41e0bf5565946082d23ae589c1a8559494a Mon Sep 17 00:00:00 2001
From: Matthew Minter <matt@masarand.com>
Date: Wed, 28 Jun 2017 15:14:02 -0500
Subject: PCI: Add IRQ mapping function pointers to pci_host_bridge struct

In order to defer IRQ assignment arches must be able to register functions
to map and swizzle interrupts.  These registered functions are stored in
the pci_host_bridge struct.

Signed-off-by: Matthew Minter <matt@masarand.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 include/linux/pci.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 9022b542556a..3c5d8b026d6e 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -432,6 +432,8 @@ struct pci_host_bridge {
 	void *sysdata;
 	int busnr;
 	struct list_head windows;	/* resource_entry */
+	u8 (*swizzle_irq)(struct pci_dev *, u8 *); /* platform IRQ swizzler */
+	int (*map_irq)(const struct pci_dev *, u8, u8);
 	void (*release_fn)(struct pci_host_bridge *);
 	void *release_data;
 	struct msi_controller *msi;
-- 
cgit v1.2.3


From 47a650f2795b00297a5a3eab7aaa46bdb2bbe304 Mon Sep 17 00:00:00 2001
From: Matthew Minter <matt@masarand.com>
Date: Wed, 28 Jun 2017 15:14:02 -0500
Subject: PCI: Add pci_assign_irq() function and have pci_fixup_irqs() use it

Here we delete the static pdev_fixup_irq() function which is currently what
pci_fixup_irqs() uses to actually assign the IRQs and replace it with the
pci_assign_irq() function which changes the interface and uses the new
function pointers stored in the host bridge structure.

Eventually this will allow pci_fixup_irqs() to be removed entirely and the
new deferred assignment code path will call pci_assign_irq() directly.
However to ensure current users continue to work, a new implementation of
pci_fixup_irqs() is introduced which simply wraps the functionality of
pci_assign_irq().

Signed-off-by: Matthew Minter <matt@masarand.com>
[lorenzo.pieralisi@arm.com: reworked comments/log]
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/setup-irq.c | 45 +++++++++++++++++++++++++++++++++++----------
 include/linux/pci.h     |  1 +
 2 files changed, 36 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/setup-irq.c b/drivers/pci/setup-irq.c
index 95c225be49d1..81eda3d93a5d 100644
--- a/drivers/pci/setup-irq.c
+++ b/drivers/pci/setup-irq.c
@@ -15,6 +15,7 @@
 #include <linux/errno.h>
 #include <linux/ioport.h>
 #include <linux/cache.h>
+#include "pci.h"
 
 void __weak pcibios_update_irq(struct pci_dev *dev, int irq)
 {
@@ -22,12 +23,17 @@ void __weak pcibios_update_irq(struct pci_dev *dev, int irq)
 	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
 }
 
-static void pdev_fixup_irq(struct pci_dev *dev,
-			   u8 (*swizzle)(struct pci_dev *, u8 *),
-			   int (*map_irq)(const struct pci_dev *, u8, u8))
+void pci_assign_irq(struct pci_dev *dev)
 {
-	u8 pin, slot;
+	u8 pin;
+	u8 slot = -1;
 	int irq = 0;
+	struct pci_host_bridge *hbrg = pci_find_host_bridge(dev->bus);
+
+	if (!(hbrg->map_irq)) {
+		dev_dbg(&dev->dev, "runtime IRQ mapping not provided by arch\n");
+		return;
+	}
 
 	/* If this device is not on the primary bus, we need to figure out
 	   which interrupt pin it will come in on.   We know which slot it
@@ -40,17 +46,22 @@ static void pdev_fixup_irq(struct pci_dev *dev,
 	if (pin > 4)
 		pin = 1;
 
-	if (pin != 0) {
+	if (pin) {
 		/* Follow the chain of bridges, swizzling as we go.  */
-		slot = (*swizzle)(dev, &pin);
+		if (hbrg->swizzle_irq)
+			slot = (*(hbrg->swizzle_irq))(dev, &pin);
 
-		irq = (*map_irq)(dev, slot, pin);
+		/*
+		 * If a swizzling function is not used map_irq must
+		 * ignore slot
+		 */
+		irq = (*(hbrg->map_irq))(dev, slot, pin);
 		if (irq == -1)
 			irq = 0;
 	}
 	dev->irq = irq;
 
-	dev_dbg(&dev->dev, "fixup irq: got %d\n", dev->irq);
+	dev_dbg(&dev->dev, "assign IRQ: got %d\n", dev->irq);
 
 	/* Always tell the device, so the driver knows what is
 	   the real IRQ to use; the device does not use it. */
@@ -60,9 +71,23 @@ static void pdev_fixup_irq(struct pci_dev *dev,
 void pci_fixup_irqs(u8 (*swizzle)(struct pci_dev *, u8 *),
 		    int (*map_irq)(const struct pci_dev *, u8, u8))
 {
+	/*
+	 * Implement pci_fixup_irqs() through pci_assign_irq().
+	 * This code should be remove eventually, it is a wrapper
+	 * around pci_assign_irq() interface to keep current
+	 * pci_fixup_irqs() behaviour unchanged on architecture
+	 * code still relying on its interface.
+	 */
 	struct pci_dev *dev = NULL;
+	struct pci_host_bridge *hbrg = NULL;
 
-	for_each_pci_dev(dev)
-		pdev_fixup_irq(dev, swizzle, map_irq);
+	for_each_pci_dev(dev) {
+		hbrg = pci_find_host_bridge(dev->bus);
+		hbrg->swizzle_irq = swizzle;
+		hbrg->map_irq = map_irq;
+		pci_assign_irq(dev);
+		hbrg->swizzle_irq = NULL;
+		hbrg->map_irq = NULL;
+	}
 }
 EXPORT_SYMBOL_GPL(pci_fixup_irqs);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 3c5d8b026d6e..5c1c0ae38dd3 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1147,6 +1147,7 @@ void pdev_enable_device(struct pci_dev *);
 int pci_enable_resources(struct pci_dev *, int mask);
 void pci_fixup_irqs(u8 (*)(struct pci_dev *, u8 *),
 		    int (*)(const struct pci_dev *, u8, u8));
+void pci_assign_irq(struct pci_dev *dev);
 struct resource *pci_find_resource(struct pci_dev *dev, struct resource *res);
 #define HAVE_PCI_REQ_REGIONS	2
 int __must_check pci_request_regions(struct pci_dev *, const char *);
-- 
cgit v1.2.3


From 401e000ab90d7b81d8ea0735e3ff909548754876 Mon Sep 17 00:00:00 2001
From: Sylvain 'ythier' Hitier <sylvain.hitier@gmail.com>
Date: Sun, 2 Jul 2017 15:21:56 +0200
Subject: moduleparam: fix doc: hwparam_irq configures an IRQ

Signed-off-by: Sylvain 'ythier' Hitier <sylvain.hitier@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/moduleparam.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index 6be1949ebcdf..1ee7b30dafec 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -457,7 +457,7 @@ enum hwparam_type {
 	hwparam_ioport,		/* Module parameter configures an I/O port */
 	hwparam_iomem,		/* Module parameter configures an I/O mem address */
 	hwparam_ioport_or_iomem, /* Module parameter could be either, depending on other option */
-	hwparam_irq,		/* Module parameter configures an I/O port */
+	hwparam_irq,		/* Module parameter configures an IRQ */
 	hwparam_dma,		/* Module parameter configures a DMA channel */
 	hwparam_dma_addr,	/* Module parameter configures a DMA buffer address */
 	hwparam_other,		/* Module parameter configures some other value */
-- 
cgit v1.2.3


From 334fd34d76f237c0ee58dfc400d2c4e34d660544 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Thu, 29 Jun 2017 11:43:20 -0700
Subject: vfs: Add page_cache_seek_hole_data helper

Both ext4 and xfs implement seeking for the next hole or piece of data
in unwritten extents by scanning the page cache, and both versions share
the same bug when iterating the buffers of a page: the start offset into
the page isn't taken into account, so when a page fits more than two
filesystem blocks, things will go wrong.  For example, on a filesystem
with a block size of 1k, the following command will fail:

  xfs_io -f -c "falloc 0 4k" \
            -c "pwrite 1k 1k" \
            -c "pwrite 3k 1k" \
            -c "seek -a -r 0" foo

In this example, neither lseek(fd, 1024, SEEK_HOLE) nor lseek(fd, 2048,
SEEK_DATA) will return the correct result.

Introduce a generic vfs helper for seeking in the page cache that gets
this right.  The next commits will replace the filesystem specific
implementations.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
[hch: dropped the export]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/buffer.c                 | 124 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/buffer_head.h |   2 +
 2 files changed, 126 insertions(+)

(limited to 'include/linux')

diff --git a/fs/buffer.c b/fs/buffer.c
index 161be58c5cb0..b3674eb7c9c0 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -3492,6 +3492,130 @@ int bh_submit_read(struct buffer_head *bh)
 }
 EXPORT_SYMBOL(bh_submit_read);
 
+/*
+ * Seek for SEEK_DATA / SEEK_HOLE within @page, starting at @lastoff.
+ *
+ * Returns the offset within the file on success, and -ENOENT otherwise.
+ */
+static loff_t
+page_seek_hole_data(struct page *page, loff_t lastoff, int whence)
+{
+	loff_t offset = page_offset(page);
+	struct buffer_head *bh, *head;
+	bool seek_data = whence == SEEK_DATA;
+
+	if (lastoff < offset)
+		lastoff = offset;
+
+	bh = head = page_buffers(page);
+	do {
+		offset += bh->b_size;
+		if (lastoff >= offset)
+			continue;
+
+		/*
+		 * Unwritten extents that have data in the page cache covering
+		 * them can be identified by the BH_Unwritten state flag.
+		 * Pages with multiple buffers might have a mix of holes, data
+		 * and unwritten extents - any buffer with valid data in it
+		 * should have BH_Uptodate flag set on it.
+		 */
+
+		if ((buffer_unwritten(bh) || buffer_uptodate(bh)) == seek_data)
+			return lastoff;
+
+		lastoff = offset;
+	} while ((bh = bh->b_this_page) != head);
+	return -ENOENT;
+}
+
+/*
+ * Seek for SEEK_DATA / SEEK_HOLE in the page cache.
+ *
+ * Within unwritten extents, the page cache determines which parts are holes
+ * and which are data: unwritten and uptodate buffer heads count as data;
+ * everything else counts as a hole.
+ *
+ * Returns the resulting offset on successs, and -ENOENT otherwise.
+ */
+loff_t
+page_cache_seek_hole_data(struct inode *inode, loff_t offset, loff_t length,
+			  int whence)
+{
+	pgoff_t index = offset >> PAGE_SHIFT;
+	pgoff_t end = DIV_ROUND_UP(offset + length, PAGE_SIZE);
+	loff_t lastoff = offset;
+	struct pagevec pvec;
+
+	if (length <= 0)
+		return -ENOENT;
+
+	pagevec_init(&pvec, 0);
+
+	do {
+		unsigned want, nr_pages, i;
+
+		want = min_t(unsigned, end - index, PAGEVEC_SIZE);
+		nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index, want);
+		if (nr_pages == 0)
+			break;
+
+		for (i = 0; i < nr_pages; i++) {
+			struct page *page = pvec.pages[i];
+
+			/*
+			 * At this point, the page may be truncated or
+			 * invalidated (changing page->mapping to NULL), or
+			 * even swizzled back from swapper_space to tmpfs file
+			 * mapping.  However, page->index will not change
+			 * because we have a reference on the page.
+                         *
+			 * If current page offset is beyond where we've ended,
+			 * we've found a hole.
+                         */
+			if (whence == SEEK_HOLE &&
+			    lastoff < page_offset(page))
+				goto check_range;
+
+			/* Searching done if the page index is out of range. */
+			if (page->index >= end)
+				goto not_found;
+
+			lock_page(page);
+			if (likely(page->mapping == inode->i_mapping) &&
+			    page_has_buffers(page)) {
+				lastoff = page_seek_hole_data(page, lastoff, whence);
+				if (lastoff >= 0) {
+					unlock_page(page);
+					goto check_range;
+				}
+			}
+			unlock_page(page);
+			lastoff = page_offset(page) + PAGE_SIZE;
+		}
+
+		/* Searching done if fewer pages returned than wanted. */
+		if (nr_pages < want)
+			break;
+
+		index = pvec.pages[i - 1]->index + 1;
+		pagevec_release(&pvec);
+	} while (index < end);
+
+	/* When no page at lastoff and we are not done, we found a hole. */
+	if (whence != SEEK_HOLE)
+		goto not_found;
+
+check_range:
+	if (lastoff < offset + length)
+		goto out;
+not_found:
+	lastoff = -ENOENT;
+out:
+	pagevec_release(&pvec);
+	return lastoff;
+}
+
 void __init buffer_init(void)
 {
 	unsigned long nrpages;
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index bd029e52ef5e..ad4e024ce17e 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -201,6 +201,8 @@ void write_boundary_block(struct block_device *bdev,
 			sector_t bblock, unsigned blocksize);
 int bh_uptodate_or_lock(struct buffer_head *bh);
 int bh_submit_read(struct buffer_head *bh);
+loff_t page_cache_seek_hole_data(struct inode *inode, loff_t offset,
+				 loff_t length, int whence);
 
 extern int buffer_heads_over_limit;
 
-- 
cgit v1.2.3


From 0ed3b0d45fd39142e418220f518c8959c1a5f596 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Thu, 29 Jun 2017 11:43:21 -0700
Subject: vfs: Add iomap_seek_hole and iomap_seek_data helpers

Filesystems can use this for implementing lseek SEEK_HOLE / SEEK_DATA
support via iomap.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
[hch: split functions, coding style cleanups]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/iomap.c            | 94 +++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/iomap.h |  4 +++
 2 files changed, 98 insertions(+)

(limited to 'include/linux')

diff --git a/fs/iomap.c b/fs/iomap.c
index 4b10892967a5..432eed8f091f 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -584,6 +584,100 @@ int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fi,
 }
 EXPORT_SYMBOL_GPL(iomap_fiemap);
 
+static loff_t
+iomap_seek_hole_actor(struct inode *inode, loff_t offset, loff_t length,
+		      void *data, struct iomap *iomap)
+{
+	switch (iomap->type) {
+	case IOMAP_UNWRITTEN:
+		offset = page_cache_seek_hole_data(inode, offset, length,
+						   SEEK_HOLE);
+		if (offset < 0)
+			return length;
+		/* fall through */
+	case IOMAP_HOLE:
+		*(loff_t *)data = offset;
+		return 0;
+	default:
+		return length;
+	}
+}
+
+loff_t
+iomap_seek_hole(struct inode *inode, loff_t offset, const struct iomap_ops *ops)
+{
+	loff_t size = i_size_read(inode);
+	loff_t length = size - offset;
+	loff_t ret;
+
+	/* Nothing to be found beyond the end of the file. */
+	if (offset >= size)
+		return -ENXIO;
+
+	while (length > 0) {
+		ret = iomap_apply(inode, offset, length, IOMAP_REPORT, ops,
+				  &offset, iomap_seek_hole_actor);
+		if (ret < 0)
+			return ret;
+		if (ret == 0)
+			break;
+
+		offset += ret;
+		length -= ret;
+	}
+
+	return offset;
+}
+EXPORT_SYMBOL_GPL(iomap_seek_hole);
+
+static loff_t
+iomap_seek_data_actor(struct inode *inode, loff_t offset, loff_t length,
+		      void *data, struct iomap *iomap)
+{
+	switch (iomap->type) {
+	case IOMAP_HOLE:
+		return length;
+	case IOMAP_UNWRITTEN:
+		offset = page_cache_seek_hole_data(inode, offset, length,
+						   SEEK_DATA);
+		if (offset < 0)
+			return length;
+		/*FALLTHRU*/
+	default:
+		*(loff_t *)data = offset;
+		return 0;
+	}
+}
+
+loff_t
+iomap_seek_data(struct inode *inode, loff_t offset, const struct iomap_ops *ops)
+{
+	loff_t size = i_size_read(inode);
+	loff_t length = size - offset;
+	loff_t ret;
+
+	/* Nothing to be found beyond the end of the file. */
+	if (offset >= size)
+		return -ENXIO;
+
+	while (length > 0) {
+		ret = iomap_apply(inode, offset, length, IOMAP_REPORT, ops,
+				  &offset, iomap_seek_data_actor);
+		if (ret < 0)
+			return ret;
+		if (ret == 0)
+			break;
+
+		offset += ret;
+		length -= ret;
+	}
+
+	if (length <= 0)
+		return -ENXIO;
+	return offset;
+}
+EXPORT_SYMBOL_GPL(iomap_seek_data);
+
 /*
  * Private flags for iomap_dio, must not overlap with the public ones in
  * iomap.h:
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index f753e788da31..8a03f5dcd89b 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -83,6 +83,10 @@ int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
 int iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops);
 int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 		loff_t start, loff_t len, const struct iomap_ops *ops);
+loff_t iomap_seek_hole(struct inode *inode, loff_t offset,
+		const struct iomap_ops *ops);
+loff_t iomap_seek_data(struct inode *inode, loff_t offset,
+		const struct iomap_ops *ops);
 
 /*
  * Flags for direct I/O ->end_io:
-- 
cgit v1.2.3


From 277036f05be242540b7bfe75f226107d04f51b06 Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Fri, 2 Jun 2017 07:43:27 +0200
Subject: platform: Accept const properties

Aligns us with device_add_properties, the function we call.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
---
 drivers/base/platform.c         | 2 +-
 include/linux/platform_device.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index a102152301c8..71ea6f4d33c2 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -344,7 +344,7 @@ EXPORT_SYMBOL_GPL(platform_device_add_data);
  * platform device is released.
  */
 int platform_device_add_properties(struct platform_device *pdev,
-				   struct property_entry *properties)
+				   const struct property_entry *properties)
 {
 	return device_add_properties(&pdev->dev, properties);
 }
diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index 98c2a7c7108e..49f634d96118 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -172,7 +172,7 @@ extern int platform_device_add_resources(struct platform_device *pdev,
 extern int platform_device_add_data(struct platform_device *pdev,
 				    const void *data, size_t size);
 extern int platform_device_add_properties(struct platform_device *pdev,
-					  struct property_entry *properties);
+				const struct property_entry *properties);
 extern int platform_device_add(struct platform_device *pdev);
 extern void platform_device_del(struct platform_device *pdev);
 extern void platform_device_put(struct platform_device *pdev);
-- 
cgit v1.2.3


From c851a9dc4359c6b19722de568e9f543c1c23481c Mon Sep 17 00:00:00 2001
From: "Kalderon, Michal" <Michal.Kalderon@cavium.com>
Date: Sun, 2 Jul 2017 10:29:21 +0300
Subject: qed: Introduce iWARP personality

iWARP personality introduced the need for differentiating in several
places in the code whether we are RoCE, iWARP or either. This
leads to introducing new macros for querying the personality.

Signed-off-by: Michal Kalderon <Michal.Kalderon@cavium.com>
Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: Ariel Elior <Ariel.Elior@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed.h      | 26 +++++++++++++++++++++++---
 drivers/net/ethernet/qlogic/qed/qed_cxt.c  |  8 ++++----
 drivers/net/ethernet/qlogic/qed/qed_dev.c  | 12 +++++-------
 drivers/net/ethernet/qlogic/qed/qed_l2.c   |  3 +--
 drivers/net/ethernet/qlogic/qed/qed_ll2.c  |  2 +-
 drivers/net/ethernet/qlogic/qed/qed_main.c | 17 ++++++++---------
 include/linux/qed/common_hsi.h             |  2 +-
 7 files changed, 43 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index 14b08ee9e3ad..22e1171c317e 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -210,14 +210,16 @@ struct qed_tunn_update_params {
 
 /* The PCI personality is not quite synonymous to protocol ID:
  * 1. All personalities need CORE connections
- * 2. The Ethernet personality may support also the RoCE protocol
+ * 2. The Ethernet personality may support also the RoCE/iWARP protocol
  */
 enum qed_pci_personality {
 	QED_PCI_ETH,
 	QED_PCI_FCOE,
 	QED_PCI_ISCSI,
 	QED_PCI_ETH_ROCE,
-	QED_PCI_DEFAULT /* default in shmem */
+	QED_PCI_ETH_IWARP,
+	QED_PCI_ETH_RDMA,
+	QED_PCI_DEFAULT, /* default in shmem */
 };
 
 /* All VFs are symmetric, all counters are PF + all VFs */
@@ -277,6 +279,7 @@ enum qed_dev_cap {
 	QED_DEV_CAP_FCOE,
 	QED_DEV_CAP_ISCSI,
 	QED_DEV_CAP_ROCE,
+	QED_DEV_CAP_IWARP,
 };
 
 enum qed_wol_support {
@@ -286,7 +289,24 @@ enum qed_wol_support {
 
 struct qed_hw_info {
 	/* PCI personality */
-	enum qed_pci_personality	personality;
+	enum qed_pci_personality personality;
+#define QED_IS_RDMA_PERSONALITY(dev)			    \
+	((dev)->hw_info.personality == QED_PCI_ETH_ROCE ||  \
+	 (dev)->hw_info.personality == QED_PCI_ETH_IWARP || \
+	 (dev)->hw_info.personality == QED_PCI_ETH_RDMA)
+#define QED_IS_ROCE_PERSONALITY(dev)			   \
+	((dev)->hw_info.personality == QED_PCI_ETH_ROCE || \
+	 (dev)->hw_info.personality == QED_PCI_ETH_RDMA)
+#define QED_IS_IWARP_PERSONALITY(dev)			    \
+	((dev)->hw_info.personality == QED_PCI_ETH_IWARP || \
+	 (dev)->hw_info.personality == QED_PCI_ETH_RDMA)
+#define QED_IS_L2_PERSONALITY(dev)		      \
+	((dev)->hw_info.personality == QED_PCI_ETH || \
+	 QED_IS_RDMA_PERSONALITY(dev))
+#define QED_IS_FCOE_PERSONALITY(dev) \
+	((dev)->hw_info.personality == QED_PCI_FCOE)
+#define QED_IS_ISCSI_PERSONALITY(dev) \
+	((dev)->hw_info.personality == QED_PCI_ISCSI)
 
 	/* Resource Allocation scheme results */
 	u32				resc_start[QED_MAX_RESC];
diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
index e201214764db..38716f77c21d 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
@@ -853,7 +853,7 @@ u32 qed_cxt_cfg_ilt_compute_excess(struct qed_hwfn *p_hwfn, u32 used_lines)
 	if (!excess_lines)
 		return 0;
 
-	if (p_hwfn->hw_info.personality != QED_PCI_ETH_ROCE)
+	if (!QED_IS_RDMA_PERSONALITY(p_hwfn))
 		return 0;
 
 	p_mngr = p_hwfn->p_cxt_mngr;
@@ -1033,7 +1033,7 @@ static int qed_ilt_blk_alloc(struct qed_hwfn *p_hwfn,
 	u32 lines, line, sz_left, lines_to_skip = 0;
 
 	/* Special handling for RoCE that supports dynamic allocation */
-	if ((p_hwfn->hw_info.personality == QED_PCI_ETH_ROCE) &&
+	if (QED_IS_RDMA_PERSONALITY(p_hwfn) &&
 	    ((ilt_client == ILT_CLI_CDUT) || ilt_client == ILT_CLI_TSDM))
 		return 0;
 
@@ -1833,7 +1833,7 @@ static void qed_tm_init_pf(struct qed_hwfn *p_hwfn)
 		tm_offset += tm_iids.pf_tids[i];
 	}
 
-	if (p_hwfn->hw_info.personality == QED_PCI_ETH_ROCE)
+	if (QED_IS_RDMA_PERSONALITY(p_hwfn))
 		active_seg_mask = 0;
 
 	STORE_RT_REG(p_hwfn, TM_REG_PF_ENABLE_TASK_RT_OFFSET, active_seg_mask);
@@ -2344,7 +2344,7 @@ qed_cxt_dynamic_ilt_alloc(struct qed_hwfn *p_hwfn,
 		       last_cid_allocated - 1);
 
 		if (!p_hwfn->b_rdma_enabled_in_prs) {
-			/* Enable RoCE search */
+			/* Enable RDMA search */
 			qed_wr(p_hwfn, p_ptt, p_hwfn->rdma_prs_search_reg, 1);
 			p_hwfn->b_rdma_enabled_in_prs = true;
 		}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index 49667ad9042d..68e61823bfc0 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -936,7 +936,7 @@ int qed_resc_alloc(struct qed_dev *cdev)
 
 		/* EQ */
 		n_eqes = qed_chain_get_capacity(&p_hwfn->p_spq->chain);
-		if (p_hwfn->hw_info.personality == QED_PCI_ETH_ROCE) {
+		if (QED_IS_RDMA_PERSONALITY(p_hwfn)) {
 			num_cons = qed_cxt_get_proto_cid_count(p_hwfn,
 							       PROTOCOLID_ROCE,
 							       NULL) * 2;
@@ -2057,7 +2057,7 @@ static void qed_hw_set_feat(struct qed_hwfn *p_hwfn)
 	qed_int_get_num_sbs(p_hwfn, &sb_cnt);
 
 	if (IS_ENABLED(CONFIG_QED_RDMA) &&
-	    p_hwfn->hw_info.personality == QED_PCI_ETH_ROCE) {
+	    QED_IS_RDMA_PERSONALITY(p_hwfn)) {
 		/* Roce CNQ each requires: 1 status block + 1 CNQ. We divide
 		 * the status blocks equally between L2 / RoCE but with
 		 * consideration as to how many l2 queues / cnqs we have.
@@ -2068,9 +2068,7 @@ static void qed_hw_set_feat(struct qed_hwfn *p_hwfn)
 
 		non_l2_sbs = feat_num[QED_RDMA_CNQ];
 	}
-
-	if (p_hwfn->hw_info.personality == QED_PCI_ETH_ROCE ||
-	    p_hwfn->hw_info.personality == QED_PCI_ETH) {
+	if (QED_IS_L2_PERSONALITY(p_hwfn)) {
 		/* Start by allocating VF queues, then PF's */
 		feat_num[QED_VF_L2_QUE] = min_t(u32,
 						RESC_NUM(p_hwfn, QED_L2_QUEUE),
@@ -2083,12 +2081,12 @@ static void qed_hw_set_feat(struct qed_hwfn *p_hwfn)
 							 QED_VF_L2_QUE));
 	}
 
-	if (p_hwfn->hw_info.personality == QED_PCI_FCOE)
+	if (QED_IS_FCOE_PERSONALITY(p_hwfn))
 		feat_num[QED_FCOE_CQ] =  min_t(u32, sb_cnt.cnt,
 					       RESC_NUM(p_hwfn,
 							QED_CMDQS_CQS));
 
-	if (p_hwfn->hw_info.personality == QED_PCI_ISCSI)
+	if (QED_IS_ISCSI_PERSONALITY(p_hwfn))
 		feat_num[QED_ISCSI_CQ] = min_t(u32, sb_cnt.cnt,
 					       RESC_NUM(p_hwfn,
 							QED_CMDQS_CQS));
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c
index e57699bfbdfa..27ea54ba7e1b 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
@@ -79,8 +79,7 @@ int qed_l2_alloc(struct qed_hwfn *p_hwfn)
 	unsigned long **pp_qids;
 	u32 i;
 
-	if (p_hwfn->hw_info.personality != QED_PCI_ETH &&
-	    p_hwfn->hw_info.personality != QED_PCI_ETH_ROCE)
+	if (!QED_IS_L2_PERSONALITY(p_hwfn))
 		return 0;
 
 	p_l2_info = kzalloc(sizeof(*p_l2_info), GFP_KERNEL);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
index 17f9b0a7b553..be66f19d577d 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
@@ -1421,7 +1421,7 @@ int qed_ll2_establish_connection(void *cxt, u8 connection_handle)
 	if (rc)
 		goto out;
 
-	if (p_hwfn->hw_info.personality != QED_PCI_ETH_ROCE)
+	if (!QED_IS_RDMA_PERSONALITY(p_hwfn))
 		qed_wr(p_hwfn, p_ptt, PRS_REG_USE_LIGHT_L2, 1);
 
 	qed_ll2_establish_connection_ooo(p_hwfn, p_ll2_conn);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index 16cc30b11cce..b11399606990 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -237,6 +237,8 @@ err0:
 int qed_fill_dev_info(struct qed_dev *cdev,
 		      struct qed_dev_info *dev_info)
 {
+	struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_hw_info *hw_info = &p_hwfn->hw_info;
 	struct qed_tunnel_info *tun = &cdev->tunnel;
 	struct qed_ptt  *ptt;
 
@@ -260,11 +262,10 @@ int qed_fill_dev_info(struct qed_dev *cdev,
 	dev_info->pci_mem_start = cdev->pci_params.mem_start;
 	dev_info->pci_mem_end = cdev->pci_params.mem_end;
 	dev_info->pci_irq = cdev->pci_params.irq;
-	dev_info->rdma_supported = (cdev->hwfns[0].hw_info.personality ==
-				    QED_PCI_ETH_ROCE);
+	dev_info->rdma_supported = QED_IS_RDMA_PERSONALITY(p_hwfn);
 	dev_info->is_mf_default = IS_MF_DEFAULT(&cdev->hwfns[0]);
 	dev_info->dev_type = cdev->type;
-	ether_addr_copy(dev_info->hw_mac, cdev->hwfns[0].hw_info.hw_mac_addr);
+	ether_addr_copy(dev_info->hw_mac, hw_info->hw_mac_addr);
 
 	if (IS_PF(cdev)) {
 		dev_info->fw_major = FW_MAJOR_VERSION;
@@ -274,8 +275,7 @@ int qed_fill_dev_info(struct qed_dev *cdev,
 		dev_info->mf_mode = cdev->mf_mode;
 		dev_info->tx_switching = true;
 
-		if (QED_LEADING_HWFN(cdev)->hw_info.b_wol_support ==
-		    QED_WOL_SUPPORT_PME)
+		if (hw_info->b_wol_support == QED_WOL_SUPPORT_PME)
 			dev_info->wol_support = true;
 
 		dev_info->abs_pf_id = QED_LEADING_HWFN(cdev)->abs_pf_id;
@@ -304,7 +304,7 @@ int qed_fill_dev_info(struct qed_dev *cdev,
 				    &dev_info->mfw_rev, NULL);
 	}
 
-	dev_info->mtu = QED_LEADING_HWFN(cdev)->hw_info.mtu;
+	dev_info->mtu = hw_info->mtu;
 
 	return 0;
 }
@@ -790,7 +790,7 @@ static int qed_slowpath_setup_int(struct qed_dev *cdev,
 				       cdev->num_hwfns;
 
 	if (!IS_ENABLED(CONFIG_QED_RDMA) ||
-	    QED_LEADING_HWFN(cdev)->hw_info.personality != QED_PCI_ETH_ROCE)
+	    !QED_IS_RDMA_PERSONALITY(QED_LEADING_HWFN(cdev)))
 		return 0;
 
 	for_each_hwfn(cdev, i)
@@ -931,8 +931,7 @@ static void qed_update_pf_params(struct qed_dev *cdev,
 	/* In case we might support RDMA, don't allow qede to be greedy
 	 * with the L2 contexts. Allow for 64 queues [rx, tx, xdp] per hwfn.
 	 */
-	if (QED_LEADING_HWFN(cdev)->hw_info.personality ==
-	    QED_PCI_ETH_ROCE) {
+	if (QED_IS_RDMA_PERSONALITY(QED_LEADING_HWFN(cdev))) {
 		u16 *num_cons;
 
 		num_cons = &params->eth_pf_params.num_cons;
diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h
index a567cbf8c5b4..885ae1379b5a 100644
--- a/include/linux/qed/common_hsi.h
+++ b/include/linux/qed/common_hsi.h
@@ -778,7 +778,7 @@ enum protocol_type {
 	PROTOCOLID_ROCE,
 	PROTOCOLID_CORE,
 	PROTOCOLID_ETH,
-	PROTOCOLID_RESERVED4,
+	PROTOCOLID_IWARP,
 	PROTOCOLID_RESERVED5,
 	PROTOCOLID_PREROCE,
 	PROTOCOLID_COMMON,
-- 
cgit v1.2.3


From 67b40dccc45ff5d488aad17114e80e00029fd854 Mon Sep 17 00:00:00 2001
From: "Kalderon, Michal" <Michal.Kalderon@cavium.com>
Date: Sun, 2 Jul 2017 10:29:22 +0300
Subject: qed: Implement iWARP initialization, teardown and qp operations

This patch adds iWARP support for flows that have common code
between RoCE and iWARP, such as initialization, teardown and
qp setup verbs: create, destroy, modify, query.
It introduces the iWARP specific files qed_iwarp.[ch] and
iwarp_common.h

Signed-off-by: Michal Kalderon <Michal.Kalderon@cavium.com>
Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: Ariel Elior <Ariel.Elior@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/Makefile    |   2 +-
 drivers/net/ethernet/qlogic/qed/qed_dev.c   |   9 +-
 drivers/net/ethernet/qlogic/qed/qed_hsi.h   |   1 +
 drivers/net/ethernet/qlogic/qed/qed_iwarp.c | 531 ++++++++++++++++++++++++++++
 drivers/net/ethernet/qlogic/qed/qed_iwarp.h |  85 +++++
 drivers/net/ethernet/qlogic/qed/qed_rdma.c  | 133 +++++--
 drivers/net/ethernet/qlogic/qed/qed_rdma.h  |   3 +
 drivers/net/ethernet/qlogic/qed/qed_roce.c  |  20 ++
 drivers/net/ethernet/qlogic/qed/qed_sp.h    |   5 +-
 include/linux/qed/iwarp_common.h            |  53 +++
 include/linux/qed/qed_rdma_if.h             |   1 +
 11 files changed, 803 insertions(+), 40 deletions(-)
 create mode 100644 drivers/net/ethernet/qlogic/qed/qed_iwarp.c
 create mode 100644 drivers/net/ethernet/qlogic/qed/qed_iwarp.h
 create mode 100644 include/linux/qed/iwarp_common.h

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/qlogic/qed/Makefile b/drivers/net/ethernet/qlogic/qed/Makefile
index 67452380b60e..82dd47068e18 100644
--- a/drivers/net/ethernet/qlogic/qed/Makefile
+++ b/drivers/net/ethernet/qlogic/qed/Makefile
@@ -5,6 +5,6 @@ qed-y := qed_cxt.o qed_dev.o qed_hw.o qed_init_fw_funcs.o qed_init_ops.o \
 	 qed_selftest.o qed_dcbx.o qed_debug.o qed_ptp.o
 qed-$(CONFIG_QED_SRIOV) += qed_sriov.o qed_vf.o
 qed-$(CONFIG_QED_LL2) += qed_ll2.o
-qed-$(CONFIG_QED_RDMA) += qed_roce.o qed_rdma.o
+qed-$(CONFIG_QED_RDMA) += qed_roce.o qed_rdma.o qed_iwarp.o
 qed-$(CONFIG_QED_ISCSI) += qed_iscsi.o qed_ooo.o
 qed-$(CONFIG_QED_FCOE) += qed_fcoe.o
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index 68e61823bfc0..6c8505dc5c31 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -937,8 +937,15 @@ int qed_resc_alloc(struct qed_dev *cdev)
 		/* EQ */
 		n_eqes = qed_chain_get_capacity(&p_hwfn->p_spq->chain);
 		if (QED_IS_RDMA_PERSONALITY(p_hwfn)) {
+			enum protocol_type rdma_proto;
+
+			if (QED_IS_ROCE_PERSONALITY(p_hwfn))
+				rdma_proto = PROTOCOLID_ROCE;
+			else
+				rdma_proto = PROTOCOLID_IWARP;
+
 			num_cons = qed_cxt_get_proto_cid_count(p_hwfn,
-							       PROTOCOLID_ROCE,
+							       rdma_proto,
 							       NULL) * 2;
 			n_eqes += num_cons + 2 * MAX_NUM_VFS_BB;
 		} else if (p_hwfn->hw_info.personality == QED_PCI_ISCSI) {
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index 3bf3614b3084..31fb0bffa098 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -46,6 +46,7 @@
 #include <linux/qed/fcoe_common.h>
 #include <linux/qed/eth_common.h>
 #include <linux/qed/iscsi_common.h>
+#include <linux/qed/iwarp_common.h>
 #include <linux/qed/rdma_common.h>
 #include <linux/qed/roce_common.h>
 #include <linux/qed/qed_fcoe_if.h>
diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
new file mode 100644
index 000000000000..a8bd5f8edec6
--- /dev/null
+++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
@@ -0,0 +1,531 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015-2017  QLogic Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and /or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "qed_cxt.h"
+#include "qed_hw.h"
+#include "qed_rdma.h"
+#include "qed_reg_addr.h"
+#include "qed_sp.h"
+
+#define QED_IWARP_ORD_DEFAULT		32
+#define QED_IWARP_IRD_DEFAULT		32
+#define QED_IWARP_RCV_WND_SIZE_DEF	(256 * 1024)
+#define QED_IWARP_RCV_WND_SIZE_MIN	(64 * 1024)
+#define QED_IWARP_TS_EN			BIT(0)
+#define QED_IWARP_PARAM_CRC_NEEDED	(1)
+#define QED_IWARP_PARAM_P2P		(1)
+
+static int qed_iwarp_async_event(struct qed_hwfn *p_hwfn,
+				 u8 fw_event_code, u16 echo,
+				 union event_ring_data *data,
+				 u8 fw_return_code);
+
+/* Override devinfo with iWARP specific values */
+void qed_iwarp_init_devinfo(struct qed_hwfn *p_hwfn)
+{
+	struct qed_rdma_device *dev = p_hwfn->p_rdma_info->dev;
+
+	dev->max_inline = IWARP_REQ_MAX_INLINE_DATA_SIZE;
+	dev->max_qp = min_t(u32,
+			    IWARP_MAX_QPS,
+			    p_hwfn->p_rdma_info->num_qps);
+
+	dev->max_cq = dev->max_qp;
+
+	dev->max_qp_resp_rd_atomic_resc = QED_IWARP_IRD_DEFAULT;
+	dev->max_qp_req_rd_atomic_resc = QED_IWARP_ORD_DEFAULT;
+}
+
+void qed_iwarp_init_hw(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+	p_hwfn->rdma_prs_search_reg = PRS_REG_SEARCH_TCP;
+	qed_wr(p_hwfn, p_ptt, p_hwfn->rdma_prs_search_reg, 1);
+	p_hwfn->b_rdma_enabled_in_prs = true;
+}
+
+static void qed_iwarp_cid_cleaned(struct qed_hwfn *p_hwfn, u32 cid)
+{
+	cid -= qed_cxt_get_proto_cid_start(p_hwfn, p_hwfn->p_rdma_info->proto);
+
+	spin_lock_bh(&p_hwfn->p_rdma_info->lock);
+	qed_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->cid_map, cid);
+	spin_unlock_bh(&p_hwfn->p_rdma_info->lock);
+}
+
+static int qed_iwarp_alloc_cid(struct qed_hwfn *p_hwfn, u32 *cid)
+{
+	int rc;
+
+	spin_lock_bh(&p_hwfn->p_rdma_info->lock);
+	rc = qed_rdma_bmap_alloc_id(p_hwfn, &p_hwfn->p_rdma_info->cid_map, cid);
+	spin_unlock_bh(&p_hwfn->p_rdma_info->lock);
+	if (rc) {
+		DP_NOTICE(p_hwfn, "Failed in allocating iwarp cid\n");
+		return rc;
+	}
+	*cid += qed_cxt_get_proto_cid_start(p_hwfn, p_hwfn->p_rdma_info->proto);
+
+	rc = qed_cxt_dynamic_ilt_alloc(p_hwfn, QED_ELEM_CXT, *cid);
+	if (rc)
+		qed_iwarp_cid_cleaned(p_hwfn, *cid);
+
+	return rc;
+}
+
+int qed_iwarp_create_qp(struct qed_hwfn *p_hwfn,
+			struct qed_rdma_qp *qp,
+			struct qed_rdma_create_qp_out_params *out_params)
+{
+	struct iwarp_create_qp_ramrod_data *p_ramrod;
+	struct qed_sp_init_data init_data;
+	struct qed_spq_entry *p_ent;
+	u16 physical_queue;
+	u32 cid;
+	int rc;
+
+	qp->shared_queue = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
+					      IWARP_SHARED_QUEUE_PAGE_SIZE,
+					      &qp->shared_queue_phys_addr,
+					      GFP_KERNEL);
+	if (!qp->shared_queue)
+		return -ENOMEM;
+
+	out_params->sq_pbl_virt = (u8 *)qp->shared_queue +
+	    IWARP_SHARED_QUEUE_PAGE_SQ_PBL_OFFSET;
+	out_params->sq_pbl_phys = qp->shared_queue_phys_addr +
+	    IWARP_SHARED_QUEUE_PAGE_SQ_PBL_OFFSET;
+	out_params->rq_pbl_virt = (u8 *)qp->shared_queue +
+	    IWARP_SHARED_QUEUE_PAGE_RQ_PBL_OFFSET;
+	out_params->rq_pbl_phys = qp->shared_queue_phys_addr +
+	    IWARP_SHARED_QUEUE_PAGE_RQ_PBL_OFFSET;
+
+	rc = qed_iwarp_alloc_cid(p_hwfn, &cid);
+	if (rc)
+		goto err1;
+
+	qp->icid = (u16)cid;
+
+	memset(&init_data, 0, sizeof(init_data));
+	init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+	init_data.cid = qp->icid;
+	init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
+
+	rc = qed_sp_init_request(p_hwfn, &p_ent,
+				 IWARP_RAMROD_CMD_ID_CREATE_QP,
+				 PROTOCOLID_IWARP, &init_data);
+	if (rc)
+		goto err2;
+
+	p_ramrod = &p_ent->ramrod.iwarp_create_qp;
+
+	SET_FIELD(p_ramrod->flags,
+		  IWARP_CREATE_QP_RAMROD_DATA_FMR_AND_RESERVED_EN,
+		  qp->fmr_and_reserved_lkey);
+
+	SET_FIELD(p_ramrod->flags,
+		  IWARP_CREATE_QP_RAMROD_DATA_SIGNALED_COMP, qp->signal_all);
+
+	SET_FIELD(p_ramrod->flags,
+		  IWARP_CREATE_QP_RAMROD_DATA_RDMA_RD_EN,
+		  qp->incoming_rdma_read_en);
+
+	SET_FIELD(p_ramrod->flags,
+		  IWARP_CREATE_QP_RAMROD_DATA_RDMA_WR_EN,
+		  qp->incoming_rdma_write_en);
+
+	SET_FIELD(p_ramrod->flags,
+		  IWARP_CREATE_QP_RAMROD_DATA_ATOMIC_EN,
+		  qp->incoming_atomic_en);
+
+	SET_FIELD(p_ramrod->flags,
+		  IWARP_CREATE_QP_RAMROD_DATA_SRQ_FLG, qp->use_srq);
+
+	p_ramrod->pd = qp->pd;
+	p_ramrod->sq_num_pages = qp->sq_num_pages;
+	p_ramrod->rq_num_pages = qp->rq_num_pages;
+
+	p_ramrod->qp_handle_for_cqe.hi = cpu_to_le32(qp->qp_handle.hi);
+	p_ramrod->qp_handle_for_cqe.lo = cpu_to_le32(qp->qp_handle.lo);
+
+	p_ramrod->cq_cid_for_sq =
+	    cpu_to_le32((p_hwfn->hw_info.opaque_fid << 16) | qp->sq_cq_id);
+	p_ramrod->cq_cid_for_rq =
+	    cpu_to_le32((p_hwfn->hw_info.opaque_fid << 16) | qp->rq_cq_id);
+
+	p_ramrod->dpi = cpu_to_le16(qp->dpi);
+
+	physical_queue = qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_OFLD);
+	p_ramrod->physical_q0 = cpu_to_le16(physical_queue);
+	physical_queue = qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_ACK);
+	p_ramrod->physical_q1 = cpu_to_le16(physical_queue);
+
+	rc = qed_spq_post(p_hwfn, p_ent, NULL);
+	if (rc)
+		goto err2;
+
+	return rc;
+
+err2:
+	qed_iwarp_cid_cleaned(p_hwfn, cid);
+err1:
+	dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+			  IWARP_SHARED_QUEUE_PAGE_SIZE,
+			  qp->shared_queue, qp->shared_queue_phys_addr);
+
+	return rc;
+}
+
+static int qed_iwarp_modify_fw(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp)
+{
+	struct iwarp_modify_qp_ramrod_data *p_ramrod;
+	struct qed_sp_init_data init_data;
+	struct qed_spq_entry *p_ent;
+	int rc;
+
+	/* Get SPQ entry */
+	memset(&init_data, 0, sizeof(init_data));
+	init_data.cid = qp->icid;
+	init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+	init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
+
+	rc = qed_sp_init_request(p_hwfn, &p_ent,
+				 IWARP_RAMROD_CMD_ID_MODIFY_QP,
+				 p_hwfn->p_rdma_info->proto, &init_data);
+	if (rc)
+		return rc;
+
+	p_ramrod = &p_ent->ramrod.iwarp_modify_qp;
+	SET_FIELD(p_ramrod->flags, IWARP_MODIFY_QP_RAMROD_DATA_STATE_TRANS_EN,
+		  0x1);
+	if (qp->iwarp_state == QED_IWARP_QP_STATE_CLOSING)
+		p_ramrod->transition_to_state = IWARP_MODIFY_QP_STATE_CLOSING;
+	else
+		p_ramrod->transition_to_state = IWARP_MODIFY_QP_STATE_ERROR;
+
+	rc = qed_spq_post(p_hwfn, p_ent, NULL);
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "QP(0x%x)rc=%d\n", qp->icid, rc);
+
+	return rc;
+}
+
+enum qed_iwarp_qp_state qed_roce2iwarp_state(enum qed_roce_qp_state state)
+{
+	switch (state) {
+	case QED_ROCE_QP_STATE_RESET:
+	case QED_ROCE_QP_STATE_INIT:
+	case QED_ROCE_QP_STATE_RTR:
+		return QED_IWARP_QP_STATE_IDLE;
+	case QED_ROCE_QP_STATE_RTS:
+		return QED_IWARP_QP_STATE_RTS;
+	case QED_ROCE_QP_STATE_SQD:
+		return QED_IWARP_QP_STATE_CLOSING;
+	case QED_ROCE_QP_STATE_ERR:
+		return QED_IWARP_QP_STATE_ERROR;
+	case QED_ROCE_QP_STATE_SQE:
+		return QED_IWARP_QP_STATE_TERMINATE;
+	default:
+		return QED_IWARP_QP_STATE_ERROR;
+	}
+}
+
+static enum qed_roce_qp_state
+qed_iwarp2roce_state(enum qed_iwarp_qp_state state)
+{
+	switch (state) {
+	case QED_IWARP_QP_STATE_IDLE:
+		return QED_ROCE_QP_STATE_INIT;
+	case QED_IWARP_QP_STATE_RTS:
+		return QED_ROCE_QP_STATE_RTS;
+	case QED_IWARP_QP_STATE_TERMINATE:
+		return QED_ROCE_QP_STATE_SQE;
+	case QED_IWARP_QP_STATE_CLOSING:
+		return QED_ROCE_QP_STATE_SQD;
+	case QED_IWARP_QP_STATE_ERROR:
+		return QED_ROCE_QP_STATE_ERR;
+	default:
+		return QED_ROCE_QP_STATE_ERR;
+	}
+}
+
+const char *iwarp_state_names[] = {
+	"IDLE",
+	"RTS",
+	"TERMINATE",
+	"CLOSING",
+	"ERROR",
+};
+
+int
+qed_iwarp_modify_qp(struct qed_hwfn *p_hwfn,
+		    struct qed_rdma_qp *qp,
+		    enum qed_iwarp_qp_state new_state, bool internal)
+{
+	enum qed_iwarp_qp_state prev_iw_state;
+	bool modify_fw = false;
+	int rc = 0;
+
+	/* modify QP can be called from upper-layer or as a result of async
+	 * RST/FIN... therefore need to protect
+	 */
+	spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.qp_lock);
+	prev_iw_state = qp->iwarp_state;
+
+	if (prev_iw_state == new_state) {
+		spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.qp_lock);
+		return 0;
+	}
+
+	switch (prev_iw_state) {
+	case QED_IWARP_QP_STATE_IDLE:
+		switch (new_state) {
+		case QED_IWARP_QP_STATE_RTS:
+			qp->iwarp_state = QED_IWARP_QP_STATE_RTS;
+			break;
+		case QED_IWARP_QP_STATE_ERROR:
+			qp->iwarp_state = QED_IWARP_QP_STATE_ERROR;
+			if (!internal)
+				modify_fw = true;
+			break;
+		default:
+			break;
+		}
+		break;
+	case QED_IWARP_QP_STATE_RTS:
+		switch (new_state) {
+		case QED_IWARP_QP_STATE_CLOSING:
+			if (!internal)
+				modify_fw = true;
+
+			qp->iwarp_state = QED_IWARP_QP_STATE_CLOSING;
+			break;
+		case QED_IWARP_QP_STATE_ERROR:
+			if (!internal)
+				modify_fw = true;
+			qp->iwarp_state = QED_IWARP_QP_STATE_ERROR;
+			break;
+		default:
+			break;
+		}
+		break;
+	case QED_IWARP_QP_STATE_ERROR:
+		switch (new_state) {
+		case QED_IWARP_QP_STATE_IDLE:
+
+			qp->iwarp_state = new_state;
+			break;
+		case QED_IWARP_QP_STATE_CLOSING:
+			/* could happen due to race... do nothing.... */
+			break;
+		default:
+			rc = -EINVAL;
+		}
+		break;
+	case QED_IWARP_QP_STATE_TERMINATE:
+	case QED_IWARP_QP_STATE_CLOSING:
+		qp->iwarp_state = new_state;
+		break;
+	default:
+		break;
+	}
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "QP(0x%x) %s --> %s%s\n",
+		   qp->icid,
+		   iwarp_state_names[prev_iw_state],
+		   iwarp_state_names[qp->iwarp_state],
+		   internal ? "internal" : "");
+
+	spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.qp_lock);
+
+	if (modify_fw)
+		rc = qed_iwarp_modify_fw(p_hwfn, qp);
+
+	return rc;
+}
+
+int qed_iwarp_fw_destroy(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp)
+{
+	struct qed_sp_init_data init_data;
+	struct qed_spq_entry *p_ent;
+	int rc;
+
+	/* Get SPQ entry */
+	memset(&init_data, 0, sizeof(init_data));
+	init_data.cid = qp->icid;
+	init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+	init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
+
+	rc = qed_sp_init_request(p_hwfn, &p_ent,
+				 IWARP_RAMROD_CMD_ID_DESTROY_QP,
+				 p_hwfn->p_rdma_info->proto, &init_data);
+	if (rc)
+		return rc;
+
+	rc = qed_spq_post(p_hwfn, p_ent, NULL);
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "QP(0x%x) rc = %d\n", qp->icid, rc);
+
+	return rc;
+}
+
+int qed_iwarp_destroy_qp(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp)
+{
+	int rc = 0;
+
+	if (qp->iwarp_state != QED_IWARP_QP_STATE_ERROR) {
+		rc = qed_iwarp_modify_qp(p_hwfn, qp,
+					 QED_IWARP_QP_STATE_ERROR, false);
+		if (rc)
+			return rc;
+	}
+
+	rc = qed_iwarp_fw_destroy(p_hwfn, qp);
+
+	if (qp->shared_queue)
+		dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+				  IWARP_SHARED_QUEUE_PAGE_SIZE,
+				  qp->shared_queue, qp->shared_queue_phys_addr);
+
+	return rc;
+}
+
+#define QED_IWARP_MAX_CID_CLEAN_TIME  100
+#define QED_IWARP_MAX_NO_PROGRESS_CNT 5
+
+/* This function waits for all the bits of a bmap to be cleared, as long as
+ * there is progress ( i.e. the number of bits left to be cleared decreases )
+ * the function continues.
+ */
+static int
+qed_iwarp_wait_cid_map_cleared(struct qed_hwfn *p_hwfn, struct qed_bmap *bmap)
+{
+	int prev_weight = 0;
+	int wait_count = 0;
+	int weight = 0;
+
+	weight = bitmap_weight(bmap->bitmap, bmap->max_count);
+	prev_weight = weight;
+
+	while (weight) {
+		msleep(QED_IWARP_MAX_CID_CLEAN_TIME);
+
+		weight = bitmap_weight(bmap->bitmap, bmap->max_count);
+
+		if (prev_weight == weight) {
+			wait_count++;
+		} else {
+			prev_weight = weight;
+			wait_count = 0;
+		}
+
+		if (wait_count > QED_IWARP_MAX_NO_PROGRESS_CNT) {
+			DP_NOTICE(p_hwfn,
+				  "%s bitmap wait timed out (%d cids pending)\n",
+				  bmap->name, weight);
+			return -EBUSY;
+		}
+	}
+	return 0;
+}
+
+static int qed_iwarp_wait_for_all_cids(struct qed_hwfn *p_hwfn)
+{
+	/* Now wait for all cids to be completed */
+	return qed_iwarp_wait_cid_map_cleared(p_hwfn,
+					      &p_hwfn->p_rdma_info->cid_map);
+}
+
+int qed_iwarp_alloc(struct qed_hwfn *p_hwfn)
+{
+	spin_lock_init(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+
+	return 0;
+}
+
+void qed_iwarp_resc_free(struct qed_hwfn *p_hwfn)
+{
+}
+
+int qed_iwarp_setup(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
+		    struct qed_rdma_start_in_params *params)
+{
+	struct qed_iwarp_info *iwarp_info;
+	u32 rcv_wnd_size;
+	int rc = 0;
+
+	iwarp_info = &p_hwfn->p_rdma_info->iwarp;
+
+	iwarp_info->tcp_flags = QED_IWARP_TS_EN;
+	rcv_wnd_size = QED_IWARP_RCV_WND_SIZE_DEF;
+
+	/* value 0 is used for ilog2(QED_IWARP_RCV_WND_SIZE_MIN) */
+	iwarp_info->rcv_wnd_scale = ilog2(rcv_wnd_size) -
+	    ilog2(QED_IWARP_RCV_WND_SIZE_MIN);
+	iwarp_info->crc_needed = QED_IWARP_PARAM_CRC_NEEDED;
+	iwarp_info->mpa_rev = MPA_NEGOTIATION_TYPE_ENHANCED;
+
+	iwarp_info->peer2peer = QED_IWARP_PARAM_P2P;
+
+	spin_lock_init(&p_hwfn->p_rdma_info->iwarp.qp_lock);
+
+	qed_spq_register_async_cb(p_hwfn, PROTOCOLID_IWARP,
+				  qed_iwarp_async_event);
+
+	return rc;
+}
+
+int qed_iwarp_stop(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+	int rc;
+
+	rc = qed_iwarp_wait_for_all_cids(p_hwfn);
+	if (rc)
+		return rc;
+
+	qed_spq_unregister_async_cb(p_hwfn, PROTOCOLID_IWARP);
+
+	return 0;
+}
+
+static int qed_iwarp_async_event(struct qed_hwfn *p_hwfn,
+				 u8 fw_event_code, u16 echo,
+				 union event_ring_data *data,
+				 u8 fw_return_code)
+{
+	return 0;
+}
+
+void
+qed_iwarp_query_qp(struct qed_rdma_qp *qp,
+		   struct qed_rdma_query_qp_out_params *out_params)
+{
+	out_params->state = qed_iwarp2roce_state(qp->iwarp_state);
+}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.h b/drivers/net/ethernet/qlogic/qed/qed_iwarp.h
new file mode 100644
index 000000000000..05e5e45be6cf
--- /dev/null
+++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.h
@@ -0,0 +1,85 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015-2017  QLogic Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and /or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _QED_IWARP_H
+#define _QED_IWARP_H
+
+enum qed_iwarp_qp_state {
+	QED_IWARP_QP_STATE_IDLE,
+	QED_IWARP_QP_STATE_RTS,
+	QED_IWARP_QP_STATE_TERMINATE,
+	QED_IWARP_QP_STATE_CLOSING,
+	QED_IWARP_QP_STATE_ERROR,
+};
+
+enum qed_iwarp_qp_state qed_roce2iwarp_state(enum qed_roce_qp_state state);
+
+struct qed_iwarp_info {
+	spinlock_t iw_lock;	/* for iwarp resources */
+	spinlock_t qp_lock;	/* for teardown races */
+	u32 rcv_wnd_scale;
+	u16 max_mtu;
+	u8 mac_addr[ETH_ALEN];
+	u8 crc_needed;
+	u8 tcp_flags;
+	u8 peer2peer;
+	enum mpa_negotiation_mode mpa_rev;
+	enum mpa_rtr_type rtr_type;
+};
+
+int qed_iwarp_alloc(struct qed_hwfn *p_hwfn);
+
+int qed_iwarp_setup(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
+		    struct qed_rdma_start_in_params *params);
+
+int qed_iwarp_stop(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
+
+void qed_iwarp_resc_free(struct qed_hwfn *p_hwfn);
+
+void qed_iwarp_init_devinfo(struct qed_hwfn *p_hwfn);
+
+void qed_iwarp_init_hw(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
+
+int qed_iwarp_create_qp(struct qed_hwfn *p_hwfn,
+			struct qed_rdma_qp *qp,
+			struct qed_rdma_create_qp_out_params *out_params);
+
+int qed_iwarp_modify_qp(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp,
+			enum qed_iwarp_qp_state new_state, bool internal);
+
+int qed_iwarp_destroy_qp(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp);
+
+int qed_iwarp_fw_destroy(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp);
+
+void qed_iwarp_query_qp(struct qed_rdma_qp *qp,
+			struct qed_rdma_query_qp_out_params *out_params);
+
+#endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
index df76e212f86e..ee6887f8c260 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
@@ -161,7 +161,10 @@ static int qed_rdma_alloc(struct qed_hwfn *p_hwfn,
 	num_cons = qed_cxt_get_proto_cid_count(p_hwfn, p_rdma_info->proto,
 					       NULL);
 
-	p_rdma_info->num_qps = num_cons / 2;
+	if (QED_IS_IWARP_PERSONALITY(p_hwfn))
+		p_rdma_info->num_qps = num_cons;
+	else
+		p_rdma_info->num_qps = num_cons / 2; /* 2 cids per qp */
 
 	num_tasks = qed_cxt_get_proto_tid_count(p_hwfn, PROTOCOLID_ROCE);
 
@@ -252,6 +255,13 @@ static int qed_rdma_alloc(struct qed_hwfn *p_hwfn,
 			   "Failed to allocate real cid bitmap, rc = %d\n", rc);
 		goto free_cid_map;
 	}
+
+	if (QED_IS_IWARP_PERSONALITY(p_hwfn))
+		rc = qed_iwarp_alloc(p_hwfn);
+
+	if (rc)
+		goto free_cid_map;
+
 	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Allocation successful\n");
 	return 0;
 
@@ -329,6 +339,9 @@ static void qed_rdma_resc_free(struct qed_hwfn *p_hwfn)
 {
 	struct qed_rdma_info *p_rdma_info = p_hwfn->p_rdma_info;
 
+	if (QED_IS_IWARP_PERSONALITY(p_hwfn))
+		qed_iwarp_resc_free(p_hwfn);
+
 	qed_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->cid_map, 1);
 	qed_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->pd_map, 1);
 	qed_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->dpi_map, 1);
@@ -470,6 +483,9 @@ static void qed_rdma_init_devinfo(struct qed_hwfn *p_hwfn,
 
 	if (pci_status_control & PCI_EXP_DEVCTL2_LTR_EN)
 		SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_ATOMIC_OP, 1);
+
+	if (QED_IS_IWARP_PERSONALITY(p_hwfn))
+		qed_iwarp_init_devinfo(p_hwfn);
 }
 
 static void qed_rdma_init_port(struct qed_hwfn *p_hwfn)
@@ -490,29 +506,17 @@ static void qed_rdma_init_port(struct qed_hwfn *p_hwfn)
 
 static int qed_rdma_init_hw(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
-	u32 ll2_ethertype_en;
+	int rc = 0;
 
 	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Initializing HW\n");
 	p_hwfn->b_rdma_enabled_in_prs = false;
 
-	qed_wr(p_hwfn, p_ptt, PRS_REG_ROCE_DEST_QP_MAX_PF, 0);
-
-	p_hwfn->rdma_prs_search_reg = PRS_REG_SEARCH_ROCE;
-
-	/* We delay writing to this reg until first cid is allocated. See
-	 * qed_cxt_dynamic_ilt_alloc function for more details
-	 */
-	ll2_ethertype_en = qed_rd(p_hwfn, p_ptt, PRS_REG_LIGHT_L2_ETHERTYPE_EN);
-	qed_wr(p_hwfn, p_ptt, PRS_REG_LIGHT_L2_ETHERTYPE_EN,
-	       (ll2_ethertype_en | 0x01));
-
-	if (qed_cxt_get_proto_cid_start(p_hwfn, PROTOCOLID_ROCE) % 2) {
-		DP_NOTICE(p_hwfn, "The first RoCE's cid should be even\n");
-		return -EINVAL;
-	}
+	if (QED_IS_IWARP_PERSONALITY(p_hwfn))
+		qed_iwarp_init_hw(p_hwfn, p_ptt);
+	else
+		rc = qed_roce_init_hw(p_hwfn, p_ptt);
 
-	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Initializing HW - Done\n");
-	return 0;
+	return rc;
 }
 
 static int qed_rdma_start_fw(struct qed_hwfn *p_hwfn,
@@ -544,7 +548,10 @@ static int qed_rdma_start_fw(struct qed_hwfn *p_hwfn,
 	if (rc)
 		return rc;
 
-	p_ramrod = &p_ent->ramrod.roce_init_func.rdma;
+	if (QED_IS_IWARP_PERSONALITY(p_hwfn))
+		p_ramrod = &p_ent->ramrod.iwarp_init_func.rdma;
+	else
+		p_ramrod = &p_ent->ramrod.roce_init_func.rdma;
 
 	p_params_header = &p_ramrod->params_header;
 	p_params_header->cnq_start_offset = (u8)RESC_START(p_hwfn,
@@ -641,7 +648,15 @@ static int qed_rdma_setup(struct qed_hwfn *p_hwfn,
 	if (rc)
 		return rc;
 
-	qed_roce_setup(p_hwfn);
+	if (QED_IS_IWARP_PERSONALITY(p_hwfn)) {
+		rc = qed_iwarp_setup(p_hwfn, p_ptt, params);
+		if (rc)
+			return rc;
+	} else {
+		rc = qed_roce_setup(p_hwfn);
+		if (rc)
+			return rc;
+	}
 
 	return qed_rdma_start_fw(p_hwfn, params, p_ptt);
 }
@@ -675,7 +690,16 @@ int qed_rdma_stop(void *rdma_cxt)
 	qed_wr(p_hwfn, p_ptt, PRS_REG_LIGHT_L2_ETHERTYPE_EN,
 	       (ll2_ethertype_en & 0xFFFE));
 
-	qed_roce_stop(p_hwfn);
+	if (QED_IS_IWARP_PERSONALITY(p_hwfn)) {
+		rc = qed_iwarp_stop(p_hwfn, p_ptt);
+		if (rc) {
+			qed_ptt_release(p_hwfn, p_ptt);
+			return rc;
+		}
+	} else {
+		qed_roce_stop(p_hwfn);
+	}
+
 	qed_ptt_release(p_hwfn, p_ptt);
 
 	/* Get SPQ entry */
@@ -810,7 +834,9 @@ static int qed_fill_rdma_dev_info(struct qed_dev *cdev,
 
 	memset(info, 0, sizeof(*info));
 
-	info->rdma_type = QED_RDMA_TYPE_ROCE;
+	info->rdma_type = QED_IS_ROCE_PERSONALITY(p_hwfn) ?
+	    QED_RDMA_TYPE_ROCE : QED_RDMA_TYPE_IWARP;
+
 	info->user_dpm_enabled = (p_hwfn->db_bar_no_edpm == 0);
 
 	qed_fill_dev_info(cdev, &info->common);
@@ -1112,7 +1138,7 @@ static int qed_rdma_query_qp(void *rdma_cxt,
 			     struct qed_rdma_query_qp_out_params *out_params)
 {
 	struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
-	int rc;
+	int rc = 0;
 
 	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid);
 
@@ -1138,7 +1164,10 @@ static int qed_rdma_query_qp(void *rdma_cxt,
 	out_params->max_dest_rd_atomic = qp->max_rd_atomic_resp;
 	out_params->sqd_async = qp->sqd_async;
 
-	rc = qed_roce_query_qp(p_hwfn, qp, out_params);
+	if (QED_IS_IWARP_PERSONALITY(p_hwfn))
+		qed_iwarp_query_qp(qp, out_params);
+	else
+		rc = qed_roce_query_qp(p_hwfn, qp, out_params);
 
 	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Query QP, rc = %d\n", rc);
 	return rc;
@@ -1151,7 +1180,10 @@ static int qed_rdma_destroy_qp(void *rdma_cxt, struct qed_rdma_qp *qp)
 
 	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid);
 
-	rc = qed_roce_destroy_qp(p_hwfn, qp);
+	if (QED_IS_IWARP_PERSONALITY(p_hwfn))
+		rc = qed_iwarp_destroy_qp(p_hwfn, qp);
+	else
+		rc = qed_roce_destroy_qp(p_hwfn, qp);
 
 	/* free qp params struct */
 	kfree(qp);
@@ -1190,20 +1222,27 @@ qed_rdma_create_qp(void *rdma_cxt,
 		return NULL;
 	}
 
+	if (QED_IS_IWARP_PERSONALITY(p_hwfn)) {
+		if (in_params->sq_num_pages * sizeof(struct regpair) >
+		    IWARP_SHARED_QUEUE_PAGE_SQ_PBL_MAX_SIZE) {
+			DP_NOTICE(p_hwfn->cdev,
+				  "Sq num pages: %d exceeds maximum\n",
+				  in_params->sq_num_pages);
+			return NULL;
+		}
+		if (in_params->rq_num_pages * sizeof(struct regpair) >
+		    IWARP_SHARED_QUEUE_PAGE_RQ_PBL_MAX_SIZE) {
+			DP_NOTICE(p_hwfn->cdev,
+				  "Rq num pages: %d exceeds maximum\n",
+				  in_params->rq_num_pages);
+			return NULL;
+		}
+	}
+
 	qp = kzalloc(sizeof(*qp), GFP_KERNEL);
 	if (!qp)
 		return NULL;
 
-	rc = qed_roce_alloc_cid(p_hwfn, &qp->icid);
-	qp->qpid = ((0xFF << 16) | qp->icid);
-
-	DP_INFO(p_hwfn, "ROCE qpid=%x\n", qp->qpid);
-
-	if (rc) {
-		kfree(qp);
-		return NULL;
-	}
-
 	qp->cur_state = QED_ROCE_QP_STATE_RESET;
 	qp->qp_handle.hi = cpu_to_le32(in_params->qp_handle_hi);
 	qp->qp_handle.lo = cpu_to_le32(in_params->qp_handle_lo);
@@ -1226,6 +1265,19 @@ qed_rdma_create_qp(void *rdma_cxt,
 	qp->e2e_flow_control_en = qp->use_srq ? false : true;
 	qp->stats_queue = in_params->stats_queue;
 
+	if (QED_IS_IWARP_PERSONALITY(p_hwfn)) {
+		rc = qed_iwarp_create_qp(p_hwfn, qp, out_params);
+		qp->qpid = qp->icid;
+	} else {
+		rc = qed_roce_alloc_cid(p_hwfn, &qp->icid);
+		qp->qpid = ((0xFF << 16) | qp->icid);
+	}
+
+	if (rc) {
+		kfree(qp);
+		return NULL;
+	}
+
 	out_params->icid = qp->icid;
 	out_params->qp_id = qp->qpid;
 
@@ -1324,7 +1376,14 @@ static int qed_rdma_modify_qp(void *rdma_cxt,
 			   qp->cur_state);
 	}
 
-	rc = qed_roce_modify_qp(p_hwfn, qp, prev_state, params);
+	if (QED_IS_IWARP_PERSONALITY(p_hwfn)) {
+		enum qed_iwarp_qp_state new_state =
+		    qed_roce2iwarp_state(qp->cur_state);
+
+		rc = qed_iwarp_modify_qp(p_hwfn, qp, new_state, 0);
+	} else {
+		rc = qed_roce_modify_qp(p_hwfn, qp, prev_state, params);
+	}
 
 	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Modify QP, rc = %d\n", rc);
 	return rc;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.h b/drivers/net/ethernet/qlogic/qed/qed_rdma.h
index d91e5c4069a6..90e4e0f13727 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_rdma.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.h
@@ -42,6 +42,7 @@
 #include "qed.h"
 #include "qed_dev_api.h"
 #include "qed_hsi.h"
+#include "qed_iwarp.h"
 #include "qed_roce.h"
 
 #define QED_RDMA_MAX_FMR                    (RDMA_MAX_TIDS)
@@ -97,6 +98,7 @@ struct qed_rdma_info {
 	u16 queue_zone_base;
 	u16 max_queue_zones;
 	enum protocol_type proto;
+	struct qed_iwarp_info iwarp;
 };
 
 struct qed_rdma_qp {
@@ -105,6 +107,7 @@ struct qed_rdma_qp {
 	u32 qpid;
 	u16 icid;
 	enum qed_roce_qp_state cur_state;
+	enum qed_iwarp_qp_state iwarp_state;
 	bool use_srq;
 	bool signal_all;
 	bool fmr_and_reserved_lkey;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c
index e53adc3d009b..fb7c2d1562ae 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_roce.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c
@@ -1149,3 +1149,23 @@ int qed_roce_setup(struct qed_hwfn *p_hwfn)
 					 qed_roce_async_event);
 }
 
+int qed_roce_init_hw(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+	u32 ll2_ethertype_en;
+
+	qed_wr(p_hwfn, p_ptt, PRS_REG_ROCE_DEST_QP_MAX_PF, 0);
+
+	p_hwfn->rdma_prs_search_reg = PRS_REG_SEARCH_ROCE;
+
+	ll2_ethertype_en = qed_rd(p_hwfn, p_ptt, PRS_REG_LIGHT_L2_ETHERTYPE_EN);
+	qed_wr(p_hwfn, p_ptt, PRS_REG_LIGHT_L2_ETHERTYPE_EN,
+	       (ll2_ethertype_en | 0x01));
+
+	if (qed_cxt_get_proto_cid_start(p_hwfn, PROTOCOLID_ROCE) % 2) {
+		DP_NOTICE(p_hwfn, "The first RoCE's cid should be even\n");
+		return -EINVAL;
+	}
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Initializing HW - Done\n");
+	return 0;
+}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp.h b/drivers/net/ethernet/qlogic/qed/qed_sp.h
index 56c95fb9a26d..c3752c56e54d 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_sp.h
@@ -104,12 +104,15 @@ union ramrod_data {
 	struct roce_query_qp_req_ramrod_data roce_query_qp_req;
 	struct roce_destroy_qp_resp_ramrod_data roce_destroy_qp_resp;
 	struct roce_destroy_qp_req_ramrod_data roce_destroy_qp_req;
+	struct roce_init_func_ramrod_data roce_init_func;
 	struct rdma_create_cq_ramrod_data rdma_create_cq;
 	struct rdma_destroy_cq_ramrod_data rdma_destroy_cq;
 	struct rdma_srq_create_ramrod_data rdma_create_srq;
 	struct rdma_srq_destroy_ramrod_data rdma_destroy_srq;
 	struct rdma_srq_modify_ramrod_data rdma_modify_srq;
-	struct roce_init_func_ramrod_data roce_init_func;
+	struct iwarp_create_qp_ramrod_data iwarp_create_qp;
+	struct iwarp_modify_qp_ramrod_data iwarp_modify_qp;
+	struct iwarp_init_func_ramrod_data iwarp_init_func;
 	struct fcoe_init_ramrod_params fcoe_init;
 	struct fcoe_conn_offload_ramrod_params fcoe_conn_ofld;
 	struct fcoe_conn_terminate_ramrod_params fcoe_conn_terminate;
diff --git a/include/linux/qed/iwarp_common.h b/include/linux/qed/iwarp_common.h
new file mode 100644
index 000000000000..b8b3e1cfae90
--- /dev/null
+++ b/include/linux/qed/iwarp_common.h
@@ -0,0 +1,53 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015-2017  QLogic Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and /or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __IWARP_COMMON__
+#define __IWARP_COMMON__
+#include <linux/qed/rdma_common.h>
+/************************/
+/* IWARP FW CONSTANTS	*/
+/************************/
+
+#define IWARP_ACTIVE_MODE 0
+#define IWARP_PASSIVE_MODE 1
+
+#define IWARP_SHARED_QUEUE_PAGE_SIZE		(0x8000)
+#define IWARP_SHARED_QUEUE_PAGE_RQ_PBL_OFFSET   (0x4000)
+#define IWARP_SHARED_QUEUE_PAGE_RQ_PBL_MAX_SIZE (0x1000)
+#define IWARP_SHARED_QUEUE_PAGE_SQ_PBL_OFFSET   (0x5000)
+#define IWARP_SHARED_QUEUE_PAGE_SQ_PBL_MAX_SIZE (0x3000)
+
+#define IWARP_REQ_MAX_INLINE_DATA_SIZE          (128)
+#define IWARP_REQ_MAX_SINGLE_SQ_WQE_SIZE        (176)
+
+#define IWARP_MAX_QPS                           (64 * 1024)
+
+#endif /* __IWARP_COMMON__ */
diff --git a/include/linux/qed/qed_rdma_if.h b/include/linux/qed/qed_rdma_if.h
index ff9be01b5f53..5b4bb09a3354 100644
--- a/include/linux/qed/qed_rdma_if.h
+++ b/include/linux/qed/qed_rdma_if.h
@@ -491,6 +491,7 @@ struct qed_roce_ll2_packet {
 
 enum qed_rdma_type {
 	QED_RDMA_TYPE_ROCE,
+	QED_RDMA_TYPE_IWARP
 };
 
 struct qed_dev_rdma_info {
-- 
cgit v1.2.3


From 526d1d05e456c9cfc077694d18b5f521e2338f18 Mon Sep 17 00:00:00 2001
From: "Kalderon, Michal" <Michal.Kalderon@cavium.com>
Date: Sun, 2 Jul 2017 10:29:23 +0300
Subject: qed: Rename some ll2 related defines

Make some names more generic as they will be used by iWARP too.

Signed-off-by: Michal Kalderon <Michal.Kalderon@cavium.com>
Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: Ariel Elior <Ariel.Elior@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed.h     |  2 +-
 drivers/net/ethernet/qlogic/qed/qed_ll2.c | 29 ++++++++++++++---------------
 include/linux/qed/qed_ll2_if.h            |  2 +-
 3 files changed, 16 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index 22e1171c317e..fd8cd5e17121 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -779,7 +779,7 @@ static inline u8 qed_concrete_to_sw_fid(struct qed_dev *cdev,
 }
 
 #define PURE_LB_TC 8
-#define OOO_LB_TC 9
+#define PKT_LB_TC 9
 
 int qed_configure_vport_wfq(struct qed_dev *cdev, u16 vp_id, u32 rate);
 void qed_configure_vp_wfq_on_link_change(struct qed_dev *cdev,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
index be66f19d577d..e235fb267ab9 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
@@ -309,7 +309,7 @@ static void qed_ll2_txq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle)
 		list_del(&p_pkt->list_entry);
 		b_last_packet = list_empty(&p_tx->active_descq);
 		list_add_tail(&p_pkt->list_entry, &p_tx->free_descq);
-		if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_ISCSI_OOO) {
+		if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_OOO) {
 			struct qed_ooo_buffer *p_buffer;
 
 			p_buffer = (struct qed_ooo_buffer *)p_pkt->cookie;
@@ -532,7 +532,7 @@ static void qed_ll2_rxq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle)
 
 		list_move_tail(&p_pkt->list_entry, &p_rx->free_descq);
 
-		if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_ISCSI_OOO) {
+		if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_OOO) {
 			struct qed_ooo_buffer *p_buffer;
 
 			p_buffer = (struct qed_ooo_buffer *)p_pkt->cookie;
@@ -893,8 +893,7 @@ static int qed_sp_ll2_rx_queue_start(struct qed_hwfn *p_hwfn,
 	p_ramrod->drop_ttl0_flg = p_ll2_conn->input.rx_drop_ttl0_flg;
 	p_ramrod->inner_vlan_removal_en = p_ll2_conn->input.rx_vlan_removal_en;
 	p_ramrod->queue_id = p_ll2_conn->queue_id;
-	p_ramrod->main_func_queue = (conn_type == QED_LL2_TYPE_ISCSI_OOO) ? 0
-									  : 1;
+	p_ramrod->main_func_queue = (conn_type == QED_LL2_TYPE_OOO) ? 0 : 1;
 
 	if ((IS_MF_DEFAULT(p_hwfn) || IS_MF_SI(p_hwfn)) &&
 	    p_ramrod->main_func_queue && (conn_type != QED_LL2_TYPE_ROCE)) {
@@ -924,7 +923,7 @@ static int qed_sp_ll2_tx_queue_start(struct qed_hwfn *p_hwfn,
 	if (!QED_LL2_TX_REGISTERED(p_ll2_conn))
 		return 0;
 
-	if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_ISCSI_OOO)
+	if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_OOO)
 		p_ll2_conn->tx_stats_en = 0;
 	else
 		p_ll2_conn->tx_stats_en = 1;
@@ -955,10 +954,10 @@ static int qed_sp_ll2_tx_queue_start(struct qed_hwfn *p_hwfn,
 	p_ramrod->pbl_size = cpu_to_le16(pbl_size);
 
 	switch (p_ll2_conn->input.tx_tc) {
-	case LB_TC:
+	case PURE_LB_TC:
 		pq_id = qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_LB);
 		break;
-	case OOO_LB_TC:
+	case PKT_LB_TC:
 		pq_id = qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_OOO);
 		break;
 	default:
@@ -973,7 +972,7 @@ static int qed_sp_ll2_tx_queue_start(struct qed_hwfn *p_hwfn,
 		p_ramrod->conn_type = PROTOCOLID_FCOE;
 		break;
 	case QED_LL2_TYPE_ISCSI:
-	case QED_LL2_TYPE_ISCSI_OOO:
+	case QED_LL2_TYPE_OOO:
 		p_ramrod->conn_type = PROTOCOLID_ISCSI;
 		break;
 	case QED_LL2_TYPE_ROCE:
@@ -1142,7 +1141,7 @@ qed_ll2_acquire_connection_ooo(struct qed_hwfn *p_hwfn,
 	u16 buf_idx;
 	int rc = 0;
 
-	if (p_ll2_info->input.conn_type != QED_LL2_TYPE_ISCSI_OOO)
+	if (p_ll2_info->input.conn_type != QED_LL2_TYPE_OOO)
 		return rc;
 
 	/* Correct number of requested OOO buffers if needed */
@@ -1280,7 +1279,7 @@ int qed_ll2_acquire_connection(void *cxt, struct qed_ll2_acquire_data *data)
 		goto q_allocate_fail;
 
 	/* Register callbacks for the Rx/Tx queues */
-	if (data->input.conn_type == QED_LL2_TYPE_ISCSI_OOO) {
+	if (data->input.conn_type == QED_LL2_TYPE_OOO) {
 		comp_rx_cb = qed_ll2_lb_rxq_completion;
 		comp_tx_cb = qed_ll2_lb_txq_completion;
 	} else {
@@ -1339,7 +1338,7 @@ static void
 qed_ll2_establish_connection_ooo(struct qed_hwfn *p_hwfn,
 				 struct qed_ll2_info *p_ll2_conn)
 {
-	if (p_ll2_conn->input.conn_type != QED_LL2_TYPE_ISCSI_OOO)
+	if (p_ll2_conn->input.conn_type != QED_LL2_TYPE_OOO)
 		return;
 
 	qed_ooo_release_all_isles(p_hwfn, p_hwfn->p_ooo_info);
@@ -1794,7 +1793,7 @@ int qed_ll2_terminate_connection(void *cxt, u8 connection_handle)
 		qed_ll2_rxq_flush(p_hwfn, connection_handle);
 	}
 
-	if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_ISCSI_OOO)
+	if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_OOO)
 		qed_ooo_release_all_isles(p_hwfn, p_hwfn->p_ooo_info);
 
 	if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_FCOE) {
@@ -1816,7 +1815,7 @@ static void qed_ll2_release_connection_ooo(struct qed_hwfn *p_hwfn,
 {
 	struct qed_ooo_buffer *p_buffer;
 
-	if (p_ll2_conn->input.conn_type != QED_LL2_TYPE_ISCSI_OOO)
+	if (p_ll2_conn->input.conn_type != QED_LL2_TYPE_OOO)
 		return;
 
 	qed_ooo_release_all_isles(p_hwfn, p_hwfn->p_ooo_info);
@@ -2063,7 +2062,7 @@ static void qed_ll2_set_conn_data(struct qed_dev *cdev,
 	ll2_cbs.cookie = QED_LEADING_HWFN(cdev);
 
 	if (lb) {
-		data->input.tx_tc = OOO_LB_TC;
+		data->input.tx_tc = PKT_LB_TC;
 		data->input.tx_dest = QED_LL2_TX_DEST_LB;
 	} else {
 		data->input.tx_tc = 0;
@@ -2080,7 +2079,7 @@ static int qed_ll2_start_ooo(struct qed_dev *cdev,
 	int rc;
 
 	qed_ll2_set_conn_data(cdev, &data, params,
-			      QED_LL2_TYPE_ISCSI_OOO, handle, true);
+			      QED_LL2_TYPE_OOO, handle, true);
 
 	rc = qed_ll2_acquire_connection(hwfn, &data);
 	if (rc) {
diff --git a/include/linux/qed/qed_ll2_if.h b/include/linux/qed/qed_ll2_if.h
index 5958b45eb699..c9c56bc42a82 100644
--- a/include/linux/qed/qed_ll2_if.h
+++ b/include/linux/qed/qed_ll2_if.h
@@ -47,7 +47,7 @@ enum qed_ll2_conn_type {
 	QED_LL2_TYPE_FCOE,
 	QED_LL2_TYPE_ISCSI,
 	QED_LL2_TYPE_TEST,
-	QED_LL2_TYPE_ISCSI_OOO,
+	QED_LL2_TYPE_OOO,
 	QED_LL2_TYPE_RESERVED2,
 	QED_LL2_TYPE_ROCE,
 	QED_LL2_TYPE_RESERVED3,
-- 
cgit v1.2.3


From cc4ad324e7e247bb4979791dd4f2ff11419d9742 Mon Sep 17 00:00:00 2001
From: "Kalderon, Michal" <Michal.Kalderon@cavium.com>
Date: Sun, 2 Jul 2017 10:29:24 +0300
Subject: qed: Add iWARP support in ll2 connections

Add a new connection type for iWARP ll2 connections for setting
correct ll2 filters and connection type to FW.

Signed-off-by: Michal Kalderon <Michal.Kalderon@cavium.com>
Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: Ariel Elior <Ariel.Elior@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_ll2.c | 13 +++++++++++--
 include/linux/qed/qed_ll2_if.h            |  1 +
 2 files changed, 12 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
index e235fb267ab9..c06ad4f0758e 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
@@ -896,7 +896,8 @@ static int qed_sp_ll2_rx_queue_start(struct qed_hwfn *p_hwfn,
 	p_ramrod->main_func_queue = (conn_type == QED_LL2_TYPE_OOO) ? 0 : 1;
 
 	if ((IS_MF_DEFAULT(p_hwfn) || IS_MF_SI(p_hwfn)) &&
-	    p_ramrod->main_func_queue && (conn_type != QED_LL2_TYPE_ROCE)) {
+	    p_ramrod->main_func_queue && (conn_type != QED_LL2_TYPE_ROCE) &&
+	    (conn_type != QED_LL2_TYPE_IWARP)) {
 		p_ramrod->mf_si_bcast_accept_all = 1;
 		p_ramrod->mf_si_mcast_accept_all = 1;
 	} else {
@@ -972,12 +973,20 @@ static int qed_sp_ll2_tx_queue_start(struct qed_hwfn *p_hwfn,
 		p_ramrod->conn_type = PROTOCOLID_FCOE;
 		break;
 	case QED_LL2_TYPE_ISCSI:
-	case QED_LL2_TYPE_OOO:
 		p_ramrod->conn_type = PROTOCOLID_ISCSI;
 		break;
 	case QED_LL2_TYPE_ROCE:
 		p_ramrod->conn_type = PROTOCOLID_ROCE;
 		break;
+	case QED_LL2_TYPE_IWARP:
+		p_ramrod->conn_type = PROTOCOLID_IWARP;
+		break;
+	case QED_LL2_TYPE_OOO:
+		if (p_hwfn->hw_info.personality == QED_PCI_ISCSI)
+			p_ramrod->conn_type = PROTOCOLID_ISCSI;
+		else
+			p_ramrod->conn_type = PROTOCOLID_IWARP;
+		break;
 	default:
 		p_ramrod->conn_type = PROTOCOLID_ETH;
 		DP_NOTICE(p_hwfn, "Unknown connection type: %d\n", conn_type);
diff --git a/include/linux/qed/qed_ll2_if.h b/include/linux/qed/qed_ll2_if.h
index c9c56bc42a82..dd7a3b86bb9e 100644
--- a/include/linux/qed/qed_ll2_if.h
+++ b/include/linux/qed/qed_ll2_if.h
@@ -50,6 +50,7 @@ enum qed_ll2_conn_type {
 	QED_LL2_TYPE_OOO,
 	QED_LL2_TYPE_RESERVED2,
 	QED_LL2_TYPE_ROCE,
+	QED_LL2_TYPE_IWARP,
 	QED_LL2_TYPE_RESERVED3,
 	MAX_QED_LL2_RX_CONN_TYPE
 };
-- 
cgit v1.2.3


From 65a91a6cdb868a28b919ca133c0f9d9dfd9a635a Mon Sep 17 00:00:00 2001
From: "Kalderon, Michal" <Michal.Kalderon@cavium.com>
Date: Sun, 2 Jul 2017 10:29:26 +0300
Subject: qed: iWARP CM add listener functions and initial SYN processing

This patch adds the ability to add and remove listeners and identify
whether the SYN packet received is intended for iWARP or not. If
a listener is not found the SYN packet is posted back to the chip.

Signed-off-by: Michal Kalderon <Michal.Kalderon@cavium.com>
Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: Ariel Elior <Ariel.Elior@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_iwarp.c | 269 +++++++++++++++++++++++++++-
 drivers/net/ethernet/qlogic/qed/qed_iwarp.h |  23 +++
 drivers/net/ethernet/qlogic/qed/qed_rdma.c  |   2 +
 include/linux/qed/qed_rdma_if.h             |  52 ++++++
 4 files changed, 343 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
index f3b4b32100f5..2bab57c6bae8 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
@@ -31,6 +31,10 @@
  */
 #include <linux/if_ether.h>
 #include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/spinlock.h>
+#include <linux/tcp.h>
 #include "qed_cxt.h"
 #include "qed_hw.h"
 #include "qed_ll2.h"
@@ -477,6 +481,31 @@ void qed_iwarp_resc_free(struct qed_hwfn *p_hwfn)
 {
 }
 
+static void
+qed_iwarp_print_cm_info(struct qed_hwfn *p_hwfn,
+			struct qed_iwarp_cm_info *cm_info)
+{
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "ip_version = %d\n",
+		   cm_info->ip_version);
+
+	if (cm_info->ip_version == QED_TCP_IPV4)
+		DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+			   "remote_ip %pI4h:%x, local_ip %pI4h:%x vlan=%x\n",
+			   cm_info->remote_ip, cm_info->remote_port,
+			   cm_info->local_ip, cm_info->local_port,
+			   cm_info->vlan);
+	else
+		DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+			   "remote_ip %pI6h:%x, local_ip %pI6h:%x vlan=%x\n",
+			   cm_info->remote_ip, cm_info->remote_port,
+			   cm_info->local_ip, cm_info->local_port,
+			   cm_info->vlan);
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+		   "private_data_len = %x ord = %d, ird = %d\n",
+		   cm_info->private_data_len, cm_info->ord, cm_info->ird);
+}
+
 static int
 qed_iwarp_ll2_post_rx(struct qed_hwfn *p_hwfn,
 		      struct qed_iwarp_ll2_buff *buf, u8 handle)
@@ -497,11 +526,147 @@ qed_iwarp_ll2_post_rx(struct qed_hwfn *p_hwfn,
 	return rc;
 }
 
+static struct qed_iwarp_listener *
+qed_iwarp_get_listener(struct qed_hwfn *p_hwfn,
+		       struct qed_iwarp_cm_info *cm_info)
+{
+	struct qed_iwarp_listener *listener = NULL;
+	static const u32 ip_zero[4] = { 0, 0, 0, 0 };
+	bool found = false;
+
+	qed_iwarp_print_cm_info(p_hwfn, cm_info);
+
+	list_for_each_entry(listener,
+			    &p_hwfn->p_rdma_info->iwarp.listen_list,
+			    list_entry) {
+		if (listener->port == cm_info->local_port) {
+			if (!memcmp(listener->ip_addr,
+				    ip_zero, sizeof(ip_zero))) {
+				found = true;
+				break;
+			}
+
+			if (!memcmp(listener->ip_addr,
+				    cm_info->local_ip,
+				    sizeof(cm_info->local_ip)) &&
+			    (listener->vlan == cm_info->vlan)) {
+				found = true;
+				break;
+			}
+		}
+	}
+
+	if (found) {
+		DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "listener found = %p\n",
+			   listener);
+		return listener;
+	}
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "listener not found\n");
+	return NULL;
+}
+
+static int
+qed_iwarp_parse_rx_pkt(struct qed_hwfn *p_hwfn,
+		       struct qed_iwarp_cm_info *cm_info,
+		       void *buf,
+		       u8 *remote_mac_addr,
+		       u8 *local_mac_addr,
+		       int *payload_len, int *tcp_start_offset)
+{
+	struct vlan_ethhdr *vethh;
+	bool vlan_valid = false;
+	struct ipv6hdr *ip6h;
+	struct ethhdr *ethh;
+	struct tcphdr *tcph;
+	struct iphdr *iph;
+	int eth_hlen;
+	int ip_hlen;
+	int eth_type;
+	int i;
+
+	ethh = buf;
+	eth_type = ntohs(ethh->h_proto);
+	if (eth_type == ETH_P_8021Q) {
+		vlan_valid = true;
+		vethh = (struct vlan_ethhdr *)ethh;
+		cm_info->vlan = ntohs(vethh->h_vlan_TCI) & VLAN_VID_MASK;
+		eth_type = ntohs(vethh->h_vlan_encapsulated_proto);
+	}
+
+	eth_hlen = ETH_HLEN + (vlan_valid ? sizeof(u32) : 0);
+
+	memcpy(remote_mac_addr, ethh->h_source, ETH_ALEN);
+
+	memcpy(local_mac_addr, ethh->h_dest, ETH_ALEN);
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "eth_type =%d source mac: %pM\n",
+		   eth_type, ethh->h_source);
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "eth_hlen=%d destination mac: %pM\n",
+		   eth_hlen, ethh->h_dest);
+
+	iph = (struct iphdr *)((u8 *)(ethh) + eth_hlen);
+
+	if (eth_type == ETH_P_IP) {
+		cm_info->local_ip[0] = ntohl(iph->daddr);
+		cm_info->remote_ip[0] = ntohl(iph->saddr);
+		cm_info->ip_version = TCP_IPV4;
+
+		ip_hlen = (iph->ihl) * sizeof(u32);
+		*payload_len = ntohs(iph->tot_len) - ip_hlen;
+	} else if (eth_type == ETH_P_IPV6) {
+		ip6h = (struct ipv6hdr *)iph;
+		for (i = 0; i < 4; i++) {
+			cm_info->local_ip[i] =
+			    ntohl(ip6h->daddr.in6_u.u6_addr32[i]);
+			cm_info->remote_ip[i] =
+			    ntohl(ip6h->saddr.in6_u.u6_addr32[i]);
+		}
+		cm_info->ip_version = TCP_IPV6;
+
+		ip_hlen = sizeof(*ip6h);
+		*payload_len = ntohs(ip6h->payload_len);
+	} else {
+		DP_NOTICE(p_hwfn, "Unexpected ethertype on ll2 %x\n", eth_type);
+		return -EINVAL;
+	}
+
+	tcph = (struct tcphdr *)((u8 *)iph + ip_hlen);
+
+	if (!tcph->syn) {
+		DP_NOTICE(p_hwfn,
+			  "Only SYN type packet expected on this ll2 conn, iph->ihl=%d source=%d dest=%d\n",
+			  iph->ihl, tcph->source, tcph->dest);
+		return -EINVAL;
+	}
+
+	cm_info->local_port = ntohs(tcph->dest);
+	cm_info->remote_port = ntohs(tcph->source);
+
+	qed_iwarp_print_cm_info(p_hwfn, cm_info);
+
+	*tcp_start_offset = eth_hlen + ip_hlen;
+
+	return 0;
+}
+
 static void
 qed_iwarp_ll2_comp_syn_pkt(void *cxt, struct qed_ll2_comp_rx_data *data)
 {
 	struct qed_iwarp_ll2_buff *buf = data->cookie;
+	struct qed_iwarp_listener *listener;
+	struct qed_ll2_tx_pkt_info tx_pkt;
+	struct qed_iwarp_cm_info cm_info;
 	struct qed_hwfn *p_hwfn = cxt;
+	u8 remote_mac_addr[ETH_ALEN];
+	u8 local_mac_addr[ETH_ALEN];
+	int tcp_start_offset;
+	u8 ll2_syn_handle;
+	int payload_len;
+	int rc;
+
+	memset(&cm_info, 0, sizeof(cm_info));
 
 	if (GET_FIELD(data->parse_flags,
 		      PARSING_AND_ERR_FLAGS_L4CHKSMWASCALCULATED) &&
@@ -510,11 +675,52 @@ qed_iwarp_ll2_comp_syn_pkt(void *cxt, struct qed_ll2_comp_rx_data *data)
 		goto err;
 	}
 
-	/* Process SYN packet - added later on in series */
+	rc = qed_iwarp_parse_rx_pkt(p_hwfn, &cm_info, (u8 *)(buf->data) +
+				    data->u.placement_offset, remote_mac_addr,
+				    local_mac_addr, &payload_len,
+				    &tcp_start_offset);
+	if (rc)
+		goto err;
+
+	/* Check if there is a listener for this 4-tuple+vlan */
+	ll2_syn_handle = p_hwfn->p_rdma_info->iwarp.ll2_syn_handle;
+	listener = qed_iwarp_get_listener(p_hwfn, &cm_info);
+	if (!listener) {
+		DP_VERBOSE(p_hwfn,
+			   QED_MSG_RDMA,
+			   "SYN received on tuple not listened on parse_flags=%d packet len=%d\n",
+			   data->parse_flags, data->length.packet_length);
+
+		memset(&tx_pkt, 0, sizeof(tx_pkt));
+		tx_pkt.num_of_bds = 1;
+		tx_pkt.vlan = data->vlan;
+
+		if (GET_FIELD(data->parse_flags,
+			      PARSING_AND_ERR_FLAGS_TAG8021QEXIST))
+			SET_FIELD(tx_pkt.bd_flags,
+				  CORE_TX_BD_DATA_VLAN_INSERTION, 1);
+
+		tx_pkt.l4_hdr_offset_w = (data->length.packet_length) >> 2;
+		tx_pkt.tx_dest = QED_LL2_TX_DEST_LB;
+		tx_pkt.first_frag = buf->data_phys_addr +
+				    data->u.placement_offset;
+		tx_pkt.first_frag_len = data->length.packet_length;
+		tx_pkt.cookie = buf;
+
+		rc = qed_ll2_prepare_tx_packet(p_hwfn, ll2_syn_handle,
+					       &tx_pkt, true);
+
+		if (rc) {
+			DP_NOTICE(p_hwfn,
+				  "Can't post SYN back to chip rc=%d\n", rc);
+			goto err;
+		}
+		return;
+	}
 
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Received syn on listening port\n");
 err:
-	qed_iwarp_ll2_post_rx(p_hwfn, buf,
-			      p_hwfn->p_rdma_info->iwarp.ll2_syn_handle);
+	qed_iwarp_ll2_post_rx(p_hwfn, buf, ll2_syn_handle);
 }
 
 static void qed_iwarp_ll2_rel_rx_pkt(void *cxt, u8 connection_handle,
@@ -700,6 +906,7 @@ int qed_iwarp_setup(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
 	iwarp_info->peer2peer = QED_IWARP_PARAM_P2P;
 
 	spin_lock_init(&p_hwfn->p_rdma_info->iwarp.qp_lock);
+	INIT_LIST_HEAD(&p_hwfn->p_rdma_info->iwarp.listen_list);
 
 	qed_spq_register_async_cb(p_hwfn, PROTOCOLID_IWARP,
 				  qed_iwarp_async_event);
@@ -728,6 +935,62 @@ static int qed_iwarp_async_event(struct qed_hwfn *p_hwfn,
 	return 0;
 }
 
+int
+qed_iwarp_create_listen(void *rdma_cxt,
+			struct qed_iwarp_listen_in *iparams,
+			struct qed_iwarp_listen_out *oparams)
+{
+	struct qed_hwfn *p_hwfn = rdma_cxt;
+	struct qed_iwarp_listener *listener;
+
+	listener = kzalloc(sizeof(*listener), GFP_KERNEL);
+	if (!listener)
+		return -ENOMEM;
+
+	listener->ip_version = iparams->ip_version;
+	memcpy(listener->ip_addr, iparams->ip_addr, sizeof(listener->ip_addr));
+	listener->port = iparams->port;
+	listener->vlan = iparams->vlan;
+
+	listener->event_cb = iparams->event_cb;
+	listener->cb_context = iparams->cb_context;
+	listener->max_backlog = iparams->max_backlog;
+	oparams->handle = listener;
+
+	spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+	list_add_tail(&listener->list_entry,
+		      &p_hwfn->p_rdma_info->iwarp.listen_list);
+	spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+
+	DP_VERBOSE(p_hwfn,
+		   QED_MSG_RDMA,
+		   "callback=%p handle=%p ip=%x:%x:%x:%x port=0x%x vlan=0x%x\n",
+		   listener->event_cb,
+		   listener,
+		   listener->ip_addr[0],
+		   listener->ip_addr[1],
+		   listener->ip_addr[2],
+		   listener->ip_addr[3], listener->port, listener->vlan);
+
+	return 0;
+}
+
+int qed_iwarp_destroy_listen(void *rdma_cxt, void *handle)
+{
+	struct qed_iwarp_listener *listener = handle;
+	struct qed_hwfn *p_hwfn = rdma_cxt;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "handle=%p\n", handle);
+
+	spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+	list_del(&listener->list_entry);
+	spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+
+	kfree(listener);
+
+	return 0;
+}
+
 void
 qed_iwarp_query_qp(struct qed_rdma_qp *qp,
 		   struct qed_rdma_query_qp_out_params *out_params)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.h b/drivers/net/ethernet/qlogic/qed/qed_iwarp.h
index 068b8594f1c5..29005ac83a6a 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.h
@@ -54,6 +54,7 @@ struct qed_iwarp_ll2_buff {
 };
 
 struct qed_iwarp_info {
+	struct list_head listen_list;	/* qed_iwarp_listener */
 	spinlock_t iw_lock;	/* for iwarp resources */
 	spinlock_t qp_lock;	/* for teardown races */
 	u32 rcv_wnd_scale;
@@ -67,6 +68,21 @@ struct qed_iwarp_info {
 	enum mpa_rtr_type rtr_type;
 };
 
+struct qed_iwarp_listener {
+	struct list_head list_entry;
+
+	/* The event_cb function is called for connection requests.
+	 * The cb_context is passed to the event_cb function.
+	 */
+	iwarp_event_handler event_cb;
+	void *cb_context;
+	u32 max_backlog;
+	u32 ip_addr[4];
+	u16 port;
+	u16 vlan;
+	u8 ip_version;
+};
+
 int qed_iwarp_alloc(struct qed_hwfn *p_hwfn);
 
 int qed_iwarp_setup(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
@@ -94,4 +110,11 @@ int qed_iwarp_fw_destroy(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp);
 void qed_iwarp_query_qp(struct qed_rdma_qp *qp,
 			struct qed_rdma_query_qp_out_params *out_params);
 
+int
+qed_iwarp_create_listen(void *rdma_cxt,
+			struct qed_iwarp_listen_in *iparams,
+			struct qed_iwarp_listen_out *oparams);
+
+int qed_iwarp_destroy_listen(void *rdma_cxt, void *handle);
+
 #endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
index ee6887f8c260..29de915007a0 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
@@ -1772,6 +1772,8 @@ static const struct qed_rdma_ops qed_rdma_ops_pass = {
 	.ll2_set_fragment_of_tx_packet = &qed_ll2_set_fragment_of_tx_packet,
 	.ll2_set_mac_filter = &qed_roce_ll2_set_mac_filter,
 	.ll2_get_stats = &qed_ll2_get_stats,
+	.iwarp_create_listen = &qed_iwarp_create_listen,
+	.iwarp_destroy_listen = &qed_iwarp_destroy_listen,
 };
 
 const struct qed_rdma_ops *qed_get_rdma_ops(void)
diff --git a/include/linux/qed/qed_rdma_if.h b/include/linux/qed/qed_rdma_if.h
index 5b4bb09a3354..28df5688ad0c 100644
--- a/include/linux/qed/qed_rdma_if.h
+++ b/include/linux/qed/qed_rdma_if.h
@@ -470,6 +470,52 @@ struct qed_rdma_counters_out_params {
 #define QED_ROCE_TX_HEAD_FAILURE        (1)
 #define QED_ROCE_TX_FRAG_FAILURE        (2)
 
+enum qed_iwarp_event_type {
+	QED_IWARP_EVENT_MPA_REQUEST,	/* Passive side request received */
+};
+
+enum qed_tcp_ip_version {
+	QED_TCP_IPV4,
+	QED_TCP_IPV6,
+};
+
+struct qed_iwarp_cm_info {
+	enum qed_tcp_ip_version ip_version;
+	u32 remote_ip[4];
+	u32 local_ip[4];
+	u16 remote_port;
+	u16 local_port;
+	u16 vlan;
+	u8 ord;
+	u8 ird;
+	u16 private_data_len;
+	const void *private_data;
+};
+
+struct qed_iwarp_cm_event_params {
+	enum qed_iwarp_event_type event;
+	const struct qed_iwarp_cm_info *cm_info;
+	void *ep_context;	/* To be passed to accept call */
+	int status;
+};
+
+typedef int (*iwarp_event_handler) (void *context,
+				    struct qed_iwarp_cm_event_params *event);
+
+struct qed_iwarp_listen_in {
+	iwarp_event_handler event_cb;
+	void *cb_context;	/* passed to event_cb */
+	u32 max_backlog;
+	enum qed_tcp_ip_version ip_version;
+	u32 ip_addr[4];
+	u16 port;
+	u16 vlan;
+};
+
+struct qed_iwarp_listen_out {
+	void *handle;
+};
+
 struct qed_roce_ll2_header {
 	void *vaddr;
 	dma_addr_t baddr;
@@ -576,6 +622,12 @@ struct qed_rdma_ops {
 	int (*ll2_set_mac_filter)(struct qed_dev *cdev,
 				  u8 *old_mac_address, u8 *new_mac_address);
 
+	int (*iwarp_create_listen)(void *rdma_cxt,
+				   struct qed_iwarp_listen_in *iparams,
+				   struct qed_iwarp_listen_out *oparams);
+
+	int (*iwarp_destroy_listen)(void *rdma_cxt, void *handle);
+
 };
 
 const struct qed_rdma_ops *qed_get_rdma_ops(void);
-- 
cgit v1.2.3


From 456a584947d5b92d5e5a62cc68125ab5f150aa8c Mon Sep 17 00:00:00 2001
From: "Kalderon, Michal" <Michal.Kalderon@cavium.com>
Date: Sun, 2 Jul 2017 10:29:27 +0300
Subject: qed: iWARP CM add passive side connect

This patch implements the passive side connect.
It addresses pre-allocating resources, creating a connection
element upon valid SYN packet received. Calling upper layer and
implementation of the accept/reject calls.

Error handling is not part of this patch.

Signed-off-by: Michal Kalderon <Michal.Kalderon@cavium.com>
Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: Ariel Elior <Ariel.Elior@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed.h       |   2 +
 drivers/net/ethernet/qlogic/qed/qed_dev.c   |  11 +
 drivers/net/ethernet/qlogic/qed/qed_iwarp.c | 939 +++++++++++++++++++++++++++-
 drivers/net/ethernet/qlogic/qed/qed_iwarp.h |  62 ++
 drivers/net/ethernet/qlogic/qed/qed_l2.c    |  13 -
 drivers/net/ethernet/qlogic/qed/qed_rdma.h  |   2 +
 drivers/net/ethernet/qlogic/qed/qed_sp.h    |   2 +
 include/linux/qed/common_hsi.h              |   2 +
 include/linux/qed/qed_rdma_if.h             |  26 +-
 9 files changed, 1039 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index fd8cd5e17121..91003bc6f00b 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -789,6 +789,8 @@ void qed_configure_vp_wfq_on_link_change(struct qed_dev *cdev,
 void qed_clean_wfq_db(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
 int qed_device_num_engines(struct qed_dev *cdev);
 int qed_device_get_port_id(struct qed_dev *cdev);
+void qed_set_fw_mac_addr(__le16 *fw_msb,
+			 __le16 *fw_mid, __le16 *fw_lsb, u8 *mac);
 
 #define QED_LEADING_HWFN(dev)   (&dev->hwfns[0])
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index 6c8505dc5c31..4060a6ad9be3 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -4127,3 +4127,14 @@ int qed_device_get_port_id(struct qed_dev *cdev)
 {
 	return (QED_LEADING_HWFN(cdev)->abs_pf_id) % qed_device_num_ports(cdev);
 }
+
+void qed_set_fw_mac_addr(__le16 *fw_msb,
+			 __le16 *fw_mid, __le16 *fw_lsb, u8 *mac)
+{
+	((u8 *)fw_msb)[0] = mac[1];
+	((u8 *)fw_msb)[1] = mac[0];
+	((u8 *)fw_mid)[0] = mac[3];
+	((u8 *)fw_mid)[1] = mac[2];
+	((u8 *)fw_lsb)[0] = mac[5];
+	((u8 *)fw_lsb)[1] = mac[4];
+}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
index 2bab57c6bae8..a6dadae1f998 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
@@ -44,9 +44,30 @@
 
 #define QED_IWARP_ORD_DEFAULT		32
 #define QED_IWARP_IRD_DEFAULT		32
+#define QED_IWARP_MAX_FW_MSS		4120
+
+#define QED_EP_SIG 0xecabcdef
+
+struct mpa_v2_hdr {
+	__be16 ird;
+	__be16 ord;
+};
+
+#define MPA_V2_PEER2PEER_MODEL  0x8000
+#define MPA_V2_SEND_RTR         0x4000	/* on ird */
+#define MPA_V2_READ_RTR         0x4000	/* on ord */
+#define MPA_V2_WRITE_RTR        0x8000
+#define MPA_V2_IRD_ORD_MASK     0x3FFF
+
+#define MPA_REV2(_mpa_rev) ((_mpa_rev) == MPA_NEGOTIATION_TYPE_ENHANCED)
+
+#define QED_IWARP_INVALID_TCP_CID	0xffffffff
 #define QED_IWARP_RCV_WND_SIZE_DEF	(256 * 1024)
 #define QED_IWARP_RCV_WND_SIZE_MIN	(64 * 1024)
+#define TIMESTAMP_HEADER_SIZE		(12)
+
 #define QED_IWARP_TS_EN			BIT(0)
+#define QED_IWARP_DA_EN			BIT(1)
 #define QED_IWARP_PARAM_CRC_NEEDED	(1)
 #define QED_IWARP_PARAM_P2P		(1)
 
@@ -63,7 +84,8 @@ void qed_iwarp_init_devinfo(struct qed_hwfn *p_hwfn)
 	dev->max_inline = IWARP_REQ_MAX_INLINE_DATA_SIZE;
 	dev->max_qp = min_t(u32,
 			    IWARP_MAX_QPS,
-			    p_hwfn->p_rdma_info->num_qps);
+			    p_hwfn->p_rdma_info->num_qps) -
+		      QED_IWARP_PREALLOC_CNT;
 
 	dev->max_cq = dev->max_qp;
 
@@ -78,12 +100,22 @@ void qed_iwarp_init_hw(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 	p_hwfn->b_rdma_enabled_in_prs = true;
 }
 
+/* We have two cid maps, one for tcp which should be used only from passive
+ * syn processing and replacing a pre-allocated ep in the list. The second
+ * for active tcp and for QPs.
+ */
 static void qed_iwarp_cid_cleaned(struct qed_hwfn *p_hwfn, u32 cid)
 {
 	cid -= qed_cxt_get_proto_cid_start(p_hwfn, p_hwfn->p_rdma_info->proto);
 
 	spin_lock_bh(&p_hwfn->p_rdma_info->lock);
-	qed_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->cid_map, cid);
+
+	if (cid < QED_IWARP_PREALLOC_CNT)
+		qed_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->tcp_cid_map,
+				    cid);
+	else
+		qed_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->cid_map, cid);
+
 	spin_unlock_bh(&p_hwfn->p_rdma_info->lock);
 }
 
@@ -107,6 +139,45 @@ static int qed_iwarp_alloc_cid(struct qed_hwfn *p_hwfn, u32 *cid)
 	return rc;
 }
 
+static void qed_iwarp_set_tcp_cid(struct qed_hwfn *p_hwfn, u32 cid)
+{
+	cid -= qed_cxt_get_proto_cid_start(p_hwfn, p_hwfn->p_rdma_info->proto);
+
+	spin_lock_bh(&p_hwfn->p_rdma_info->lock);
+	qed_bmap_set_id(p_hwfn, &p_hwfn->p_rdma_info->tcp_cid_map, cid);
+	spin_unlock_bh(&p_hwfn->p_rdma_info->lock);
+}
+
+/* This function allocates a cid for passive tcp (called from syn receive)
+ * the reason it's separate from the regular cid allocation is because it
+ * is assured that these cids already have ilt allocated. They are preallocated
+ * to ensure that we won't need to allocate memory during syn processing
+ */
+static int qed_iwarp_alloc_tcp_cid(struct qed_hwfn *p_hwfn, u32 *cid)
+{
+	int rc;
+
+	spin_lock_bh(&p_hwfn->p_rdma_info->lock);
+
+	rc = qed_rdma_bmap_alloc_id(p_hwfn,
+				    &p_hwfn->p_rdma_info->tcp_cid_map, cid);
+
+	spin_unlock_bh(&p_hwfn->p_rdma_info->lock);
+
+	if (rc) {
+		DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+			   "can't allocate iwarp tcp cid max-count=%d\n",
+			   p_hwfn->p_rdma_info->tcp_cid_map.max_count);
+
+		*cid = QED_IWARP_INVALID_TCP_CID;
+		return rc;
+	}
+
+	*cid += qed_cxt_get_proto_cid_start(p_hwfn,
+					    p_hwfn->p_rdma_info->proto);
+	return 0;
+}
+
 int qed_iwarp_create_qp(struct qed_hwfn *p_hwfn,
 			struct qed_rdma_qp *qp,
 			struct qed_rdma_create_qp_out_params *out_params)
@@ -403,6 +474,26 @@ int qed_iwarp_fw_destroy(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp)
 	return rc;
 }
 
+static void qed_iwarp_destroy_ep(struct qed_hwfn *p_hwfn,
+				 struct qed_iwarp_ep *ep,
+				 bool remove_from_active_list)
+{
+	dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+			  sizeof(*ep->ep_buffer_virt),
+			  ep->ep_buffer_virt, ep->ep_buffer_phys);
+
+	if (remove_from_active_list) {
+		spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+		list_del(&ep->list_entry);
+		spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+	}
+
+	if (ep->qp)
+		ep->qp->ep = NULL;
+
+	kfree(ep);
+}
+
 int qed_iwarp_destroy_qp(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp)
 {
 	int rc = 0;
@@ -424,6 +515,507 @@ int qed_iwarp_destroy_qp(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp)
 	return rc;
 }
 
+static int
+qed_iwarp_create_ep(struct qed_hwfn *p_hwfn, struct qed_iwarp_ep **ep_out)
+{
+	struct qed_iwarp_ep *ep;
+	int rc;
+
+	ep = kzalloc(sizeof(*ep), GFP_KERNEL);
+	if (!ep)
+		return -ENOMEM;
+
+	ep->state = QED_IWARP_EP_INIT;
+
+	ep->ep_buffer_virt = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
+						sizeof(*ep->ep_buffer_virt),
+						&ep->ep_buffer_phys,
+						GFP_KERNEL);
+	if (!ep->ep_buffer_virt) {
+		rc = -ENOMEM;
+		goto err;
+	}
+
+	ep->sig = QED_EP_SIG;
+
+	*ep_out = ep;
+
+	return 0;
+
+err:
+	kfree(ep);
+	return rc;
+}
+
+static void
+qed_iwarp_print_tcp_ramrod(struct qed_hwfn *p_hwfn,
+			   struct iwarp_tcp_offload_ramrod_data *p_tcp_ramrod)
+{
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "local_mac=%x %x %x, remote_mac=%x %x %x\n",
+		   p_tcp_ramrod->tcp.local_mac_addr_lo,
+		   p_tcp_ramrod->tcp.local_mac_addr_mid,
+		   p_tcp_ramrod->tcp.local_mac_addr_hi,
+		   p_tcp_ramrod->tcp.remote_mac_addr_lo,
+		   p_tcp_ramrod->tcp.remote_mac_addr_mid,
+		   p_tcp_ramrod->tcp.remote_mac_addr_hi);
+
+	if (p_tcp_ramrod->tcp.ip_version == TCP_IPV4) {
+		DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+			   "local_ip=%pI4h:%x, remote_ip=%pI4h%x, vlan=%x\n",
+			   p_tcp_ramrod->tcp.local_ip,
+			   p_tcp_ramrod->tcp.local_port,
+			   p_tcp_ramrod->tcp.remote_ip,
+			   p_tcp_ramrod->tcp.remote_port,
+			   p_tcp_ramrod->tcp.vlan_id);
+	} else {
+		DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+			   "local_ip=%pI6h:%x, remote_ip=%pI6h:%x, vlan=%x\n",
+			   p_tcp_ramrod->tcp.local_ip,
+			   p_tcp_ramrod->tcp.local_port,
+			   p_tcp_ramrod->tcp.remote_ip,
+			   p_tcp_ramrod->tcp.remote_port,
+			   p_tcp_ramrod->tcp.vlan_id);
+	}
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+		   "flow_label=%x, ttl=%x, tos_or_tc=%x, mss=%x, rcv_wnd_scale=%x, connect_mode=%x, flags=%x\n",
+		   p_tcp_ramrod->tcp.flow_label,
+		   p_tcp_ramrod->tcp.ttl,
+		   p_tcp_ramrod->tcp.tos_or_tc,
+		   p_tcp_ramrod->tcp.mss,
+		   p_tcp_ramrod->tcp.rcv_wnd_scale,
+		   p_tcp_ramrod->tcp.connect_mode,
+		   p_tcp_ramrod->tcp.flags);
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "syn_ip_payload_length=%x, lo=%x, hi=%x\n",
+		   p_tcp_ramrod->tcp.syn_ip_payload_length,
+		   p_tcp_ramrod->tcp.syn_phy_addr_lo,
+		   p_tcp_ramrod->tcp.syn_phy_addr_hi);
+}
+
+static int
+qed_iwarp_tcp_offload(struct qed_hwfn *p_hwfn, struct qed_iwarp_ep *ep)
+{
+	struct qed_iwarp_info *iwarp_info = &p_hwfn->p_rdma_info->iwarp;
+	struct iwarp_tcp_offload_ramrod_data *p_tcp_ramrod;
+	struct tcp_offload_params_opt2 *tcp;
+	struct qed_sp_init_data init_data;
+	struct qed_spq_entry *p_ent;
+	dma_addr_t async_output_phys;
+	dma_addr_t in_pdata_phys;
+	u16 physical_q;
+	u8 tcp_flags;
+	int rc;
+	int i;
+
+	memset(&init_data, 0, sizeof(init_data));
+	init_data.cid = ep->tcp_cid;
+	init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+	init_data.comp_mode = QED_SPQ_MODE_CB;
+
+	rc = qed_sp_init_request(p_hwfn, &p_ent,
+				 IWARP_RAMROD_CMD_ID_TCP_OFFLOAD,
+				 PROTOCOLID_IWARP, &init_data);
+	if (rc)
+		return rc;
+
+	p_tcp_ramrod = &p_ent->ramrod.iwarp_tcp_offload;
+
+	in_pdata_phys = ep->ep_buffer_phys +
+			offsetof(struct qed_iwarp_ep_memory, in_pdata);
+	DMA_REGPAIR_LE(p_tcp_ramrod->iwarp.incoming_ulp_buffer.addr,
+		       in_pdata_phys);
+
+	p_tcp_ramrod->iwarp.incoming_ulp_buffer.len =
+	    cpu_to_le16(sizeof(ep->ep_buffer_virt->in_pdata));
+
+	async_output_phys = ep->ep_buffer_phys +
+			    offsetof(struct qed_iwarp_ep_memory, async_output);
+	DMA_REGPAIR_LE(p_tcp_ramrod->iwarp.async_eqe_output_buf,
+		       async_output_phys);
+
+	p_tcp_ramrod->iwarp.handle_for_async.hi = cpu_to_le32(PTR_HI(ep));
+	p_tcp_ramrod->iwarp.handle_for_async.lo = cpu_to_le32(PTR_LO(ep));
+
+	physical_q = qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_OFLD);
+	p_tcp_ramrod->iwarp.physical_q0 = cpu_to_le16(physical_q);
+	physical_q = qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_ACK);
+	p_tcp_ramrod->iwarp.physical_q1 = cpu_to_le16(physical_q);
+	p_tcp_ramrod->iwarp.mpa_mode = iwarp_info->mpa_rev;
+
+	tcp = &p_tcp_ramrod->tcp;
+	qed_set_fw_mac_addr(&tcp->remote_mac_addr_hi,
+			    &tcp->remote_mac_addr_mid,
+			    &tcp->remote_mac_addr_lo, ep->remote_mac_addr);
+	qed_set_fw_mac_addr(&tcp->local_mac_addr_hi, &tcp->local_mac_addr_mid,
+			    &tcp->local_mac_addr_lo, ep->local_mac_addr);
+
+	tcp->vlan_id = cpu_to_le16(ep->cm_info.vlan);
+
+	tcp_flags = p_hwfn->p_rdma_info->iwarp.tcp_flags;
+	tcp->flags = 0;
+	SET_FIELD(tcp->flags, TCP_OFFLOAD_PARAMS_OPT2_TS_EN,
+		  !!(tcp_flags & QED_IWARP_TS_EN));
+
+	SET_FIELD(tcp->flags, TCP_OFFLOAD_PARAMS_OPT2_DA_EN,
+		  !!(tcp_flags & QED_IWARP_DA_EN));
+
+	tcp->ip_version = ep->cm_info.ip_version;
+
+	for (i = 0; i < 4; i++) {
+		tcp->remote_ip[i] = cpu_to_le32(ep->cm_info.remote_ip[i]);
+		tcp->local_ip[i] = cpu_to_le32(ep->cm_info.local_ip[i]);
+	}
+
+	tcp->remote_port = cpu_to_le16(ep->cm_info.remote_port);
+	tcp->local_port = cpu_to_le16(ep->cm_info.local_port);
+	tcp->mss = cpu_to_le16(ep->mss);
+	tcp->flow_label = 0;
+	tcp->ttl = 0x40;
+	tcp->tos_or_tc = 0;
+
+	tcp->rcv_wnd_scale = (u8)p_hwfn->p_rdma_info->iwarp.rcv_wnd_scale;
+	tcp->connect_mode = ep->connect_mode;
+
+	if (ep->connect_mode == TCP_CONNECT_PASSIVE) {
+		tcp->syn_ip_payload_length =
+			cpu_to_le16(ep->syn_ip_payload_length);
+		tcp->syn_phy_addr_hi = DMA_HI_LE(ep->syn_phy_addr);
+		tcp->syn_phy_addr_lo = DMA_LO_LE(ep->syn_phy_addr);
+	}
+
+	qed_iwarp_print_tcp_ramrod(p_hwfn, p_tcp_ramrod);
+
+	rc = qed_spq_post(p_hwfn, p_ent, NULL);
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+		   "EP(0x%x) Offload completed rc=%d\n", ep->tcp_cid, rc);
+
+	return rc;
+}
+
+static void
+qed_iwarp_mpa_received(struct qed_hwfn *p_hwfn, struct qed_iwarp_ep *ep)
+{
+	struct qed_iwarp_info *iwarp_info = &p_hwfn->p_rdma_info->iwarp;
+	struct qed_iwarp_cm_event_params params;
+	struct mpa_v2_hdr *mpa_v2;
+	union async_output *async_data;
+	u16 mpa_ord, mpa_ird;
+	u8 mpa_hdr_size = 0;
+	u8 mpa_rev;
+
+	async_data = &ep->ep_buffer_virt->async_output;
+
+	mpa_rev = async_data->mpa_request.mpa_handshake_mode;
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+		   "private_data_len=%x handshake_mode=%x private_data=(%x)\n",
+		   async_data->mpa_request.ulp_data_len,
+		   mpa_rev, *((u32 *)((u8 *)ep->ep_buffer_virt->in_pdata)));
+
+	if (mpa_rev == MPA_NEGOTIATION_TYPE_ENHANCED) {
+		/* Read ord/ird values from private data buffer */
+		mpa_v2 = (struct mpa_v2_hdr *)ep->ep_buffer_virt->in_pdata;
+		mpa_hdr_size = sizeof(*mpa_v2);
+
+		mpa_ord = ntohs(mpa_v2->ord);
+		mpa_ird = ntohs(mpa_v2->ird);
+
+		/* Temprary store in cm_info incoming ord/ird requested, later
+		 * replace with negotiated value during accept
+		 */
+		ep->cm_info.ord = (u8)min_t(u16,
+					    (mpa_ord & MPA_V2_IRD_ORD_MASK),
+					    QED_IWARP_ORD_DEFAULT);
+
+		ep->cm_info.ird = (u8)min_t(u16,
+					    (mpa_ird & MPA_V2_IRD_ORD_MASK),
+					    QED_IWARP_IRD_DEFAULT);
+
+		/* Peer2Peer negotiation */
+		ep->rtr_type = MPA_RTR_TYPE_NONE;
+		if (mpa_ird & MPA_V2_PEER2PEER_MODEL) {
+			if (mpa_ord & MPA_V2_WRITE_RTR)
+				ep->rtr_type |= MPA_RTR_TYPE_ZERO_WRITE;
+
+			if (mpa_ord & MPA_V2_READ_RTR)
+				ep->rtr_type |= MPA_RTR_TYPE_ZERO_READ;
+
+			if (mpa_ird & MPA_V2_SEND_RTR)
+				ep->rtr_type |= MPA_RTR_TYPE_ZERO_SEND;
+
+			ep->rtr_type &= iwarp_info->rtr_type;
+
+			/* if we're left with no match send our capabilities */
+			if (ep->rtr_type == MPA_RTR_TYPE_NONE)
+				ep->rtr_type = iwarp_info->rtr_type;
+		}
+
+		ep->mpa_rev = MPA_NEGOTIATION_TYPE_ENHANCED;
+	} else {
+		ep->cm_info.ord = QED_IWARP_ORD_DEFAULT;
+		ep->cm_info.ird = QED_IWARP_IRD_DEFAULT;
+		ep->mpa_rev = MPA_NEGOTIATION_TYPE_BASIC;
+	}
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+		   "MPA_NEGOTIATE (v%d): ORD: 0x%x IRD: 0x%x rtr:0x%x ulp_data_len = %x mpa_hdr_size = %x\n",
+		   mpa_rev, ep->cm_info.ord, ep->cm_info.ird, ep->rtr_type,
+		   async_data->mpa_request.ulp_data_len, mpa_hdr_size);
+
+	/* Strip mpa v2 hdr from private data before sending to upper layer */
+	ep->cm_info.private_data = ep->ep_buffer_virt->in_pdata + mpa_hdr_size;
+
+	ep->cm_info.private_data_len = async_data->mpa_request.ulp_data_len -
+				       mpa_hdr_size;
+
+	params.event = QED_IWARP_EVENT_MPA_REQUEST;
+	params.cm_info = &ep->cm_info;
+	params.ep_context = ep;
+	params.status = 0;
+
+	ep->state = QED_IWARP_EP_MPA_REQ_RCVD;
+	ep->event_cb(ep->cb_context, &params);
+}
+
+static int
+qed_iwarp_mpa_offload(struct qed_hwfn *p_hwfn, struct qed_iwarp_ep *ep)
+{
+	struct iwarp_mpa_offload_ramrod_data *p_mpa_ramrod;
+	struct qed_sp_init_data init_data;
+	dma_addr_t async_output_phys;
+	struct qed_spq_entry *p_ent;
+	dma_addr_t out_pdata_phys;
+	dma_addr_t in_pdata_phys;
+	struct qed_rdma_qp *qp;
+	bool reject;
+	int rc;
+
+	if (!ep)
+		return -EINVAL;
+
+	qp = ep->qp;
+	reject = !qp;
+
+	memset(&init_data, 0, sizeof(init_data));
+	init_data.cid = reject ? ep->tcp_cid : qp->icid;
+	init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+
+	init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
+
+	rc = qed_sp_init_request(p_hwfn, &p_ent,
+				 IWARP_RAMROD_CMD_ID_MPA_OFFLOAD,
+				 PROTOCOLID_IWARP, &init_data);
+	if (rc)
+		return rc;
+
+	p_mpa_ramrod = &p_ent->ramrod.iwarp_mpa_offload;
+	out_pdata_phys = ep->ep_buffer_phys +
+			 offsetof(struct qed_iwarp_ep_memory, out_pdata);
+	DMA_REGPAIR_LE(p_mpa_ramrod->common.outgoing_ulp_buffer.addr,
+		       out_pdata_phys);
+	p_mpa_ramrod->common.outgoing_ulp_buffer.len =
+	    ep->cm_info.private_data_len;
+	p_mpa_ramrod->common.crc_needed = p_hwfn->p_rdma_info->iwarp.crc_needed;
+
+	p_mpa_ramrod->common.out_rq.ord = ep->cm_info.ord;
+	p_mpa_ramrod->common.out_rq.ird = ep->cm_info.ird;
+
+	p_mpa_ramrod->tcp_cid = p_hwfn->hw_info.opaque_fid << 16 | ep->tcp_cid;
+
+	in_pdata_phys = ep->ep_buffer_phys +
+			offsetof(struct qed_iwarp_ep_memory, in_pdata);
+	p_mpa_ramrod->tcp_connect_side = ep->connect_mode;
+	DMA_REGPAIR_LE(p_mpa_ramrod->incoming_ulp_buffer.addr,
+		       in_pdata_phys);
+	p_mpa_ramrod->incoming_ulp_buffer.len =
+	    cpu_to_le16(sizeof(ep->ep_buffer_virt->in_pdata));
+	async_output_phys = ep->ep_buffer_phys +
+			    offsetof(struct qed_iwarp_ep_memory, async_output);
+	DMA_REGPAIR_LE(p_mpa_ramrod->async_eqe_output_buf,
+		       async_output_phys);
+	p_mpa_ramrod->handle_for_async.hi = cpu_to_le32(PTR_HI(ep));
+	p_mpa_ramrod->handle_for_async.lo = cpu_to_le32(PTR_LO(ep));
+
+	if (!reject) {
+		DMA_REGPAIR_LE(p_mpa_ramrod->shared_queue_addr,
+			       qp->shared_queue_phys_addr);
+		p_mpa_ramrod->stats_counter_id =
+		    RESC_START(p_hwfn, QED_RDMA_STATS_QUEUE) + qp->stats_queue;
+	} else {
+		p_mpa_ramrod->common.reject = 1;
+	}
+
+	p_mpa_ramrod->mode = ep->mpa_rev;
+	SET_FIELD(p_mpa_ramrod->rtr_pref,
+		  IWARP_MPA_OFFLOAD_RAMROD_DATA_RTR_SUPPORTED, ep->rtr_type);
+
+	ep->state = QED_IWARP_EP_MPA_OFFLOADED;
+	rc = qed_spq_post(p_hwfn, p_ent, NULL);
+	if (!reject)
+		ep->cid = qp->icid;	/* Now they're migrated. */
+
+	DP_VERBOSE(p_hwfn,
+		   QED_MSG_RDMA,
+		   "QP(0x%x) EP(0x%x) MPA Offload rc = %d IRD=0x%x ORD=0x%x rtr_type=%d mpa_rev=%d reject=%d\n",
+		   reject ? 0xffff : qp->icid,
+		   ep->tcp_cid,
+		   rc,
+		   ep->cm_info.ird,
+		   ep->cm_info.ord, ep->rtr_type, ep->mpa_rev, reject);
+	return rc;
+}
+
+static void
+qed_iwarp_return_ep(struct qed_hwfn *p_hwfn, struct qed_iwarp_ep *ep)
+{
+	ep->state = QED_IWARP_EP_INIT;
+	if (ep->qp)
+		ep->qp->ep = NULL;
+	ep->qp = NULL;
+	memset(&ep->cm_info, 0, sizeof(ep->cm_info));
+
+	if (ep->tcp_cid == QED_IWARP_INVALID_TCP_CID) {
+		/* We don't care about the return code, it's ok if tcp_cid
+		 * remains invalid...in this case we'll defer allocation
+		 */
+		qed_iwarp_alloc_tcp_cid(p_hwfn, &ep->tcp_cid);
+	}
+	spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+
+	list_del(&ep->list_entry);
+	list_add_tail(&ep->list_entry,
+		      &p_hwfn->p_rdma_info->iwarp.ep_free_list);
+
+	spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+}
+
+#define QED_IWARP_CONNECT_MODE_STRING(ep) \
+	((ep)->connect_mode == TCP_CONNECT_PASSIVE) ? "Passive" : "Active"
+
+/* Called as a result of the event:
+ * IWARP_EVENT_TYPE_ASYNC_MPA_HANDSHAKE_COMPLETE
+ */
+static void
+qed_iwarp_mpa_complete(struct qed_hwfn *p_hwfn,
+		       struct qed_iwarp_ep *ep, u8 fw_return_code)
+{
+	struct qed_iwarp_cm_event_params params;
+
+	params.event = QED_IWARP_EVENT_PASSIVE_COMPLETE;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+		   "MPA_NEGOTIATE (v%d): ORD: 0x%x IRD: 0x%x\n",
+		   ep->mpa_rev, ep->cm_info.ord, ep->cm_info.ird);
+
+	params.cm_info = &ep->cm_info;
+
+	params.ep_context = ep;
+
+	ep->state = QED_IWARP_EP_CLOSED;
+
+	switch (fw_return_code) {
+	case RDMA_RETURN_OK:
+		ep->qp->max_rd_atomic_req = ep->cm_info.ord;
+		ep->qp->max_rd_atomic_resp = ep->cm_info.ird;
+		qed_iwarp_modify_qp(p_hwfn, ep->qp, QED_IWARP_QP_STATE_RTS, 1);
+		ep->state = QED_IWARP_EP_ESTABLISHED;
+		params.status = 0;
+		break;
+	default:
+		params.status = -ECONNRESET;
+		break;
+	}
+
+	ep->event_cb(ep->cb_context, &params);
+}
+
+static void
+qed_iwarp_mpa_v2_set_private(struct qed_hwfn *p_hwfn,
+			     struct qed_iwarp_ep *ep, u8 *mpa_data_size)
+{
+	struct mpa_v2_hdr *mpa_v2_params;
+	u16 mpa_ird, mpa_ord;
+
+	*mpa_data_size = 0;
+	if (MPA_REV2(ep->mpa_rev)) {
+		mpa_v2_params =
+		    (struct mpa_v2_hdr *)ep->ep_buffer_virt->out_pdata;
+		*mpa_data_size = sizeof(*mpa_v2_params);
+
+		mpa_ird = (u16)ep->cm_info.ird;
+		mpa_ord = (u16)ep->cm_info.ord;
+
+		if (ep->rtr_type != MPA_RTR_TYPE_NONE) {
+			mpa_ird |= MPA_V2_PEER2PEER_MODEL;
+
+			if (ep->rtr_type & MPA_RTR_TYPE_ZERO_SEND)
+				mpa_ird |= MPA_V2_SEND_RTR;
+
+			if (ep->rtr_type & MPA_RTR_TYPE_ZERO_WRITE)
+				mpa_ord |= MPA_V2_WRITE_RTR;
+
+			if (ep->rtr_type & MPA_RTR_TYPE_ZERO_READ)
+				mpa_ord |= MPA_V2_READ_RTR;
+		}
+
+		mpa_v2_params->ird = htons(mpa_ird);
+		mpa_v2_params->ord = htons(mpa_ord);
+
+		DP_VERBOSE(p_hwfn,
+			   QED_MSG_RDMA,
+			   "MPA_NEGOTIATE Header: [%x ord:%x ird] %x ord:%x ird:%x peer2peer:%x rtr_send:%x rtr_write:%x rtr_read:%x\n",
+			   mpa_v2_params->ird,
+			   mpa_v2_params->ord,
+			   *((u32 *)mpa_v2_params),
+			   mpa_ord & MPA_V2_IRD_ORD_MASK,
+			   mpa_ird & MPA_V2_IRD_ORD_MASK,
+			   !!(mpa_ird & MPA_V2_PEER2PEER_MODEL),
+			   !!(mpa_ird & MPA_V2_SEND_RTR),
+			   !!(mpa_ord & MPA_V2_WRITE_RTR),
+			   !!(mpa_ord & MPA_V2_READ_RTR));
+	}
+}
+
+static struct qed_iwarp_ep *qed_iwarp_get_free_ep(struct qed_hwfn *p_hwfn)
+{
+	struct qed_iwarp_ep *ep = NULL;
+	int rc;
+
+	spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+
+	if (list_empty(&p_hwfn->p_rdma_info->iwarp.ep_free_list)) {
+		DP_ERR(p_hwfn, "Ep list is empty\n");
+		goto out;
+	}
+
+	ep = list_first_entry(&p_hwfn->p_rdma_info->iwarp.ep_free_list,
+			      struct qed_iwarp_ep, list_entry);
+
+	/* in some cases we could have failed allocating a tcp cid when added
+	 * from accept / failure... retry now..this is not the common case.
+	 */
+	if (ep->tcp_cid == QED_IWARP_INVALID_TCP_CID) {
+		rc = qed_iwarp_alloc_tcp_cid(p_hwfn, &ep->tcp_cid);
+
+		/* if we fail we could look for another entry with a valid
+		 * tcp_cid, but since we don't expect to reach this anyway
+		 * it's not worth the handling
+		 */
+		if (rc) {
+			ep->tcp_cid = QED_IWARP_INVALID_TCP_CID;
+			ep = NULL;
+			goto out;
+		}
+	}
+
+	list_del(&ep->list_entry);
+
+out:
+	spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+	return ep;
+}
+
 #define QED_IWARP_MAX_CID_CLEAN_TIME  100
 #define QED_IWARP_MAX_NO_PROGRESS_CNT 5
 
@@ -465,20 +1057,213 @@ qed_iwarp_wait_cid_map_cleared(struct qed_hwfn *p_hwfn, struct qed_bmap *bmap)
 
 static int qed_iwarp_wait_for_all_cids(struct qed_hwfn *p_hwfn)
 {
+	int rc;
+	int i;
+
+	rc = qed_iwarp_wait_cid_map_cleared(p_hwfn,
+					    &p_hwfn->p_rdma_info->tcp_cid_map);
+	if (rc)
+		return rc;
+
+	/* Now free the tcp cids from the main cid map */
+	for (i = 0; i < QED_IWARP_PREALLOC_CNT; i++)
+		qed_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->cid_map, i);
+
 	/* Now wait for all cids to be completed */
 	return qed_iwarp_wait_cid_map_cleared(p_hwfn,
 					      &p_hwfn->p_rdma_info->cid_map);
 }
 
+static void qed_iwarp_free_prealloc_ep(struct qed_hwfn *p_hwfn)
+{
+	struct qed_iwarp_ep *ep;
+
+	while (!list_empty(&p_hwfn->p_rdma_info->iwarp.ep_free_list)) {
+		spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+
+		ep = list_first_entry(&p_hwfn->p_rdma_info->iwarp.ep_free_list,
+				      struct qed_iwarp_ep, list_entry);
+
+		if (!ep) {
+			spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+			break;
+		}
+		list_del(&ep->list_entry);
+
+		spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+
+		if (ep->tcp_cid != QED_IWARP_INVALID_TCP_CID)
+			qed_iwarp_cid_cleaned(p_hwfn, ep->tcp_cid);
+
+		qed_iwarp_destroy_ep(p_hwfn, ep, false);
+	}
+}
+
+static int qed_iwarp_prealloc_ep(struct qed_hwfn *p_hwfn, bool init)
+{
+	struct qed_iwarp_ep *ep;
+	int rc = 0;
+	int count;
+	u32 cid;
+	int i;
+
+	count = init ? QED_IWARP_PREALLOC_CNT : 1;
+	for (i = 0; i < count; i++) {
+		rc = qed_iwarp_create_ep(p_hwfn, &ep);
+		if (rc)
+			return rc;
+
+		/* During initialization we allocate from the main pool,
+		 * afterwards we allocate only from the tcp_cid.
+		 */
+		if (init) {
+			rc = qed_iwarp_alloc_cid(p_hwfn, &cid);
+			if (rc)
+				goto err;
+			qed_iwarp_set_tcp_cid(p_hwfn, cid);
+		} else {
+			/* We don't care about the return code, it's ok if
+			 * tcp_cid remains invalid...in this case we'll
+			 * defer allocation
+			 */
+			qed_iwarp_alloc_tcp_cid(p_hwfn, &cid);
+		}
+
+		ep->tcp_cid = cid;
+
+		spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+		list_add_tail(&ep->list_entry,
+			      &p_hwfn->p_rdma_info->iwarp.ep_free_list);
+		spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+	}
+
+	return rc;
+
+err:
+	qed_iwarp_destroy_ep(p_hwfn, ep, false);
+
+	return rc;
+}
+
 int qed_iwarp_alloc(struct qed_hwfn *p_hwfn)
 {
+	int rc;
+
+	/* Allocate bitmap for tcp cid. These are used by passive side
+	 * to ensure it can allocate a tcp cid during dpc that was
+	 * pre-acquired and doesn't require dynamic allocation of ilt
+	 */
+	rc = qed_rdma_bmap_alloc(p_hwfn, &p_hwfn->p_rdma_info->tcp_cid_map,
+				 QED_IWARP_PREALLOC_CNT, "TCP_CID");
+	if (rc) {
+		DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+			   "Failed to allocate tcp cid, rc = %d\n", rc);
+		return rc;
+	}
+
+	INIT_LIST_HEAD(&p_hwfn->p_rdma_info->iwarp.ep_free_list);
 	spin_lock_init(&p_hwfn->p_rdma_info->iwarp.iw_lock);
 
-	return 0;
+	return qed_iwarp_prealloc_ep(p_hwfn, true);
 }
 
 void qed_iwarp_resc_free(struct qed_hwfn *p_hwfn)
 {
+	qed_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->tcp_cid_map, 1);
+}
+
+int qed_iwarp_accept(void *rdma_cxt, struct qed_iwarp_accept_in *iparams)
+{
+	struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
+	struct qed_iwarp_ep *ep;
+	u8 mpa_data_size = 0;
+	int rc;
+
+	ep = (struct qed_iwarp_ep *)iparams->ep_context;
+	if (!ep) {
+		DP_ERR(p_hwfn, "Ep Context receive in accept is NULL\n");
+		return -EINVAL;
+	}
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "QP(0x%x) EP(0x%x)\n",
+		   iparams->qp->icid, ep->tcp_cid);
+
+	if ((iparams->ord > QED_IWARP_ORD_DEFAULT) ||
+	    (iparams->ird > QED_IWARP_IRD_DEFAULT)) {
+		DP_VERBOSE(p_hwfn,
+			   QED_MSG_RDMA,
+			   "QP(0x%x) EP(0x%x) ERROR: Invalid ord(0x%x)/ird(0x%x)\n",
+			   iparams->qp->icid,
+			   ep->tcp_cid, iparams->ord, iparams->ord);
+		return -EINVAL;
+	}
+
+	qed_iwarp_prealloc_ep(p_hwfn, false);
+
+	ep->cb_context = iparams->cb_context;
+	ep->qp = iparams->qp;
+	ep->qp->ep = ep;
+
+	if (ep->mpa_rev == MPA_NEGOTIATION_TYPE_ENHANCED) {
+		/* Negotiate ord/ird: if upperlayer requested ord larger than
+		 * ird advertised by remote, we need to decrease our ord
+		 */
+		if (iparams->ord > ep->cm_info.ird)
+			iparams->ord = ep->cm_info.ird;
+
+		if ((ep->rtr_type & MPA_RTR_TYPE_ZERO_READ) &&
+		    (iparams->ird == 0))
+			iparams->ird = 1;
+	}
+
+	/* Update cm_info ord/ird to be negotiated values */
+	ep->cm_info.ord = iparams->ord;
+	ep->cm_info.ird = iparams->ird;
+
+	qed_iwarp_mpa_v2_set_private(p_hwfn, ep, &mpa_data_size);
+
+	ep->cm_info.private_data = ep->ep_buffer_virt->out_pdata;
+	ep->cm_info.private_data_len = iparams->private_data_len +
+				       mpa_data_size;
+
+	memcpy((u8 *)ep->ep_buffer_virt->out_pdata + mpa_data_size,
+	       iparams->private_data, iparams->private_data_len);
+
+	rc = qed_iwarp_mpa_offload(p_hwfn, ep);
+	if (rc)
+		qed_iwarp_modify_qp(p_hwfn,
+				    iparams->qp, QED_IWARP_QP_STATE_ERROR, 1);
+
+	return rc;
+}
+
+int qed_iwarp_reject(void *rdma_cxt, struct qed_iwarp_reject_in *iparams)
+{
+	struct qed_hwfn *p_hwfn = rdma_cxt;
+	struct qed_iwarp_ep *ep;
+	u8 mpa_data_size = 0;
+
+	ep = iparams->ep_context;
+	if (!ep) {
+		DP_ERR(p_hwfn, "Ep Context receive in reject is NULL\n");
+		return -EINVAL;
+	}
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "EP(0x%x)\n", ep->tcp_cid);
+
+	ep->cb_context = iparams->cb_context;
+	ep->qp = NULL;
+
+	qed_iwarp_mpa_v2_set_private(p_hwfn, ep, &mpa_data_size);
+
+	ep->cm_info.private_data = ep->ep_buffer_virt->out_pdata;
+	ep->cm_info.private_data_len = iparams->private_data_len +
+				       mpa_data_size;
+
+	memcpy((u8 *)ep->ep_buffer_virt->out_pdata + mpa_data_size,
+	       iparams->private_data, iparams->private_data_len);
+
+	return qed_iwarp_mpa_offload(p_hwfn, ep);
 }
 
 static void
@@ -526,6 +1311,38 @@ qed_iwarp_ll2_post_rx(struct qed_hwfn *p_hwfn,
 	return rc;
 }
 
+static bool
+qed_iwarp_ep_exists(struct qed_hwfn *p_hwfn, struct qed_iwarp_cm_info *cm_info)
+{
+	struct qed_iwarp_ep *ep = NULL;
+	bool found = false;
+
+	list_for_each_entry(ep,
+			    &p_hwfn->p_rdma_info->iwarp.ep_list,
+			    list_entry) {
+		if ((ep->cm_info.local_port == cm_info->local_port) &&
+		    (ep->cm_info.remote_port == cm_info->remote_port) &&
+		    (ep->cm_info.vlan == cm_info->vlan) &&
+		    !memcmp(&ep->cm_info.local_ip, cm_info->local_ip,
+			    sizeof(cm_info->local_ip)) &&
+		    !memcmp(&ep->cm_info.remote_ip, cm_info->remote_ip,
+			    sizeof(cm_info->remote_ip))) {
+			found = true;
+			break;
+		}
+	}
+
+	if (found) {
+		DP_NOTICE(p_hwfn,
+			  "SYN received on active connection - dropping\n");
+		qed_iwarp_print_cm_info(p_hwfn, cm_info);
+
+		return true;
+	}
+
+	return false;
+}
+
 static struct qed_iwarp_listener *
 qed_iwarp_get_listener(struct qed_hwfn *p_hwfn,
 		       struct qed_iwarp_cm_info *cm_info)
@@ -596,9 +1413,8 @@ qed_iwarp_parse_rx_pkt(struct qed_hwfn *p_hwfn,
 
 	eth_hlen = ETH_HLEN + (vlan_valid ? sizeof(u32) : 0);
 
-	memcpy(remote_mac_addr, ethh->h_source, ETH_ALEN);
-
-	memcpy(local_mac_addr, ethh->h_dest, ETH_ALEN);
+	ether_addr_copy(remote_mac_addr, ethh->h_source);
+	ether_addr_copy(local_mac_addr, ethh->h_dest);
 
 	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "eth_type =%d source mac: %pM\n",
 		   eth_type, ethh->h_source);
@@ -661,9 +1477,12 @@ qed_iwarp_ll2_comp_syn_pkt(void *cxt, struct qed_ll2_comp_rx_data *data)
 	struct qed_hwfn *p_hwfn = cxt;
 	u8 remote_mac_addr[ETH_ALEN];
 	u8 local_mac_addr[ETH_ALEN];
+	struct qed_iwarp_ep *ep;
 	int tcp_start_offset;
+	u8 ts_hdr_size = 0;
 	u8 ll2_syn_handle;
 	int payload_len;
+	u32 hdr_size;
 	int rc;
 
 	memset(&cm_info, 0, sizeof(cm_info));
@@ -719,6 +1538,49 @@ qed_iwarp_ll2_comp_syn_pkt(void *cxt, struct qed_ll2_comp_rx_data *data)
 	}
 
 	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Received syn on listening port\n");
+	/* There may be an open ep on this connection if this is a syn
+	 * retrasnmit... need to make sure there isn't...
+	 */
+	if (qed_iwarp_ep_exists(p_hwfn, &cm_info))
+		goto err;
+
+	ep = qed_iwarp_get_free_ep(p_hwfn);
+	if (!ep)
+		goto err;
+
+	spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+	list_add_tail(&ep->list_entry, &p_hwfn->p_rdma_info->iwarp.ep_list);
+	spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+
+	ether_addr_copy(ep->remote_mac_addr, remote_mac_addr);
+	ether_addr_copy(ep->local_mac_addr, local_mac_addr);
+
+	memcpy(&ep->cm_info, &cm_info, sizeof(ep->cm_info));
+
+	if (p_hwfn->p_rdma_info->iwarp.tcp_flags & QED_IWARP_TS_EN)
+		ts_hdr_size = TIMESTAMP_HEADER_SIZE;
+
+	hdr_size = ((cm_info.ip_version == QED_TCP_IPV4) ? 40 : 60) +
+		   ts_hdr_size;
+	ep->mss = p_hwfn->p_rdma_info->iwarp.max_mtu - hdr_size;
+	ep->mss = min_t(u16, QED_IWARP_MAX_FW_MSS, ep->mss);
+
+	ep->event_cb = listener->event_cb;
+	ep->cb_context = listener->cb_context;
+	ep->connect_mode = TCP_CONNECT_PASSIVE;
+
+	ep->syn = buf;
+	ep->syn_ip_payload_length = (u16)payload_len;
+	ep->syn_phy_addr = buf->data_phys_addr + data->u.placement_offset +
+			   tcp_start_offset;
+
+	rc = qed_iwarp_tcp_offload(p_hwfn, ep);
+	if (rc) {
+		qed_iwarp_return_ep(p_hwfn, ep);
+		goto err;
+	}
+
+	return;
 err:
 	qed_iwarp_ll2_post_rx(p_hwfn, buf, ll2_syn_handle);
 }
@@ -905,7 +1767,12 @@ int qed_iwarp_setup(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
 
 	iwarp_info->peer2peer = QED_IWARP_PARAM_P2P;
 
+	iwarp_info->rtr_type =  MPA_RTR_TYPE_ZERO_SEND |
+				MPA_RTR_TYPE_ZERO_WRITE |
+				MPA_RTR_TYPE_ZERO_READ;
+
 	spin_lock_init(&p_hwfn->p_rdma_info->iwarp.qp_lock);
+	INIT_LIST_HEAD(&p_hwfn->p_rdma_info->iwarp.ep_list);
 	INIT_LIST_HEAD(&p_hwfn->p_rdma_info->iwarp.listen_list);
 
 	qed_spq_register_async_cb(p_hwfn, PROTOCOLID_IWARP,
@@ -918,6 +1785,7 @@ int qed_iwarp_stop(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
 	int rc;
 
+	qed_iwarp_free_prealloc_ep(p_hwfn);
 	rc = qed_iwarp_wait_for_all_cids(p_hwfn);
 	if (rc)
 		return rc;
@@ -927,11 +1795,70 @@ int qed_iwarp_stop(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 	return qed_iwarp_ll2_stop(p_hwfn, p_ptt);
 }
 
+void
+qed_iwarp_connect_complete(struct qed_hwfn *p_hwfn,
+			   struct qed_iwarp_ep *ep, u8 fw_return_code)
+{
+	/* Done with the SYN packet, post back to ll2 rx */
+	qed_iwarp_ll2_post_rx(p_hwfn, ep->syn,
+			      p_hwfn->p_rdma_info->iwarp.ll2_syn_handle);
+	ep->syn = NULL;
+
+	/* If connect failed - upper layer doesn't know about it */
+	qed_iwarp_mpa_received(p_hwfn, ep);
+}
+
+static inline bool
+qed_iwarp_check_ep_ok(struct qed_hwfn *p_hwfn, struct qed_iwarp_ep *ep)
+{
+	if (!ep || (ep->sig != QED_EP_SIG)) {
+		DP_ERR(p_hwfn, "ERROR ON ASYNC ep=%p\n", ep);
+		return false;
+	}
+
+	return true;
+}
+
 static int qed_iwarp_async_event(struct qed_hwfn *p_hwfn,
 				 u8 fw_event_code, u16 echo,
 				 union event_ring_data *data,
 				 u8 fw_return_code)
 {
+	struct regpair *fw_handle = &data->rdma_data.async_handle;
+	struct qed_iwarp_ep *ep = NULL;
+	u16 cid;
+
+	ep = (struct qed_iwarp_ep *)(uintptr_t)HILO_64(fw_handle->hi,
+						       fw_handle->lo);
+
+	switch (fw_event_code) {
+	case IWARP_EVENT_TYPE_ASYNC_CONNECT_COMPLETE:
+		/* Async completion after TCP 3-way handshake */
+		if (!qed_iwarp_check_ep_ok(p_hwfn, ep))
+			return -EINVAL;
+		DP_VERBOSE(p_hwfn,
+			   QED_MSG_RDMA,
+			   "EP(0x%x) IWARP_EVENT_TYPE_ASYNC_CONNECT_COMPLETE fw_ret_code=%d\n",
+			   ep->tcp_cid, fw_return_code);
+		qed_iwarp_connect_complete(p_hwfn, ep, fw_return_code);
+		break;
+	case IWARP_EVENT_TYPE_ASYNC_MPA_HANDSHAKE_COMPLETE:
+		if (!qed_iwarp_check_ep_ok(p_hwfn, ep))
+			return -EINVAL;
+		DP_VERBOSE(p_hwfn,
+			   QED_MSG_RDMA,
+			   "QP(0x%x) IWARP_EVENT_TYPE_ASYNC_MPA_HANDSHAKE_COMPLETE fw_ret_code=%d\n",
+			   ep->cid, fw_return_code);
+		qed_iwarp_mpa_complete(p_hwfn, ep, fw_return_code);
+		break;
+	case IWARP_EVENT_TYPE_ASYNC_CID_CLEANED:
+		cid = (u16)le32_to_cpu(fw_handle->lo);
+		DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+			   "(0x%x)IWARP_EVENT_TYPE_ASYNC_CID_CLEANED\n", cid);
+		qed_iwarp_cid_cleaned(p_hwfn, cid);
+
+		break;
+	}
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.h b/drivers/net/ethernet/qlogic/qed/qed_iwarp.h
index 29005ac83a6a..bedac98c834c 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.h
@@ -42,6 +42,8 @@ enum qed_iwarp_qp_state {
 
 enum qed_iwarp_qp_state qed_roce2iwarp_state(enum qed_roce_qp_state state);
 
+#define QED_IWARP_PREALLOC_CNT  (256)
+
 #define QED_IWARP_LL2_SYN_TX_SIZE       (128)
 #define QED_IWARP_LL2_SYN_RX_SIZE       (256)
 #define QED_IWARP_MAX_SYN_PKT_SIZE      (128)
@@ -55,6 +57,8 @@ struct qed_iwarp_ll2_buff {
 
 struct qed_iwarp_info {
 	struct list_head listen_list;	/* qed_iwarp_listener */
+	struct list_head ep_list;	/* qed_iwarp_ep */
+	struct list_head ep_free_list;	/* pre-allocated ep's */
 	spinlock_t iw_lock;	/* for iwarp resources */
 	spinlock_t qp_lock;	/* for teardown races */
 	u32 rcv_wnd_scale;
@@ -68,6 +72,61 @@ struct qed_iwarp_info {
 	enum mpa_rtr_type rtr_type;
 };
 
+enum qed_iwarp_ep_state {
+	QED_IWARP_EP_INIT,
+	QED_IWARP_EP_MPA_REQ_RCVD,
+	QED_IWARP_EP_MPA_OFFLOADED,
+	QED_IWARP_EP_ESTABLISHED,
+	QED_IWARP_EP_CLOSED
+};
+
+union async_output {
+	struct iwarp_eqe_data_mpa_async_completion mpa_response;
+	struct iwarp_eqe_data_tcp_async_completion mpa_request;
+};
+
+#define QED_MAX_PRIV_DATA_LEN (512)
+struct qed_iwarp_ep_memory {
+	u8 in_pdata[QED_MAX_PRIV_DATA_LEN];
+	u8 out_pdata[QED_MAX_PRIV_DATA_LEN];
+	union async_output async_output;
+};
+
+/* Endpoint structure represents a TCP connection. This connection can be
+ * associated with a QP or not (in which case QP==NULL)
+ */
+struct qed_iwarp_ep {
+	struct list_head list_entry;
+	struct qed_rdma_qp *qp;
+	struct qed_iwarp_ep_memory *ep_buffer_virt;
+	dma_addr_t ep_buffer_phys;
+	enum qed_iwarp_ep_state state;
+	int sig;
+	struct qed_iwarp_cm_info cm_info;
+	enum tcp_connect_mode connect_mode;
+	enum mpa_rtr_type rtr_type;
+	enum mpa_negotiation_mode mpa_rev;
+	u32 tcp_cid;
+	u32 cid;
+	u16 mss;
+	u8 remote_mac_addr[6];
+	u8 local_mac_addr[6];
+	bool mpa_reply_processed;
+
+	/* For Passive side - syn packet related data */
+	u16 syn_ip_payload_length;
+	struct qed_iwarp_ll2_buff *syn;
+	dma_addr_t syn_phy_addr;
+
+	/* The event_cb function is called for asynchrounous events associated
+	 * with the ep. It is initialized at different entry points depending
+	 * on whether the ep is the tcp connection active side or passive side
+	 * The cb_context is passed to the event_cb function.
+	 */
+	iwarp_event_handler event_cb;
+	void *cb_context;
+};
+
 struct qed_iwarp_listener {
 	struct list_head list_entry;
 
@@ -115,6 +174,9 @@ qed_iwarp_create_listen(void *rdma_cxt,
 			struct qed_iwarp_listen_in *iparams,
 			struct qed_iwarp_listen_out *oparams);
 
+int qed_iwarp_accept(void *rdma_cxt, struct qed_iwarp_accept_in *iparams);
+
+int qed_iwarp_reject(void *rdma_cxt, struct qed_iwarp_reject_in *iparams);
 int qed_iwarp_destroy_listen(void *rdma_cxt, void *handle);
 
 #endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c
index 27ea54ba7e1b..0ba5ec8a9814 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
@@ -1227,19 +1227,6 @@ static enum eth_filter_action qed_filter_action(enum qed_filter_opcode opcode)
 	return action;
 }
 
-static void qed_set_fw_mac_addr(__le16 *fw_msb,
-				__le16 *fw_mid,
-				__le16 *fw_lsb,
-				u8 *mac)
-{
-	((u8 *)fw_msb)[0] = mac[1];
-	((u8 *)fw_msb)[1] = mac[0];
-	((u8 *)fw_mid)[0] = mac[3];
-	((u8 *)fw_mid)[1] = mac[2];
-	((u8 *)fw_lsb)[0] = mac[5];
-	((u8 *)fw_lsb)[1] = mac[4];
-}
-
 static int
 qed_filter_ucast_common(struct qed_hwfn *p_hwfn,
 			u16 opaque_fid,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.h b/drivers/net/ethernet/qlogic/qed/qed_rdma.h
index 90e4e0f13727..18ec9cbd84f5 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_rdma.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.h
@@ -85,6 +85,7 @@ struct qed_rdma_info {
 	struct qed_bmap qp_map;
 	struct qed_bmap srq_map;
 	struct qed_bmap cid_map;
+	struct qed_bmap tcp_cid_map;
 	struct qed_bmap real_cid_map;
 	struct qed_bmap dpi_map;
 	struct qed_bmap toggle_bits;
@@ -167,6 +168,7 @@ struct qed_rdma_qp {
 
 	void *shared_queue;
 	dma_addr_t shared_queue_phys_addr;
+	struct qed_iwarp_ep *ep;
 };
 
 #if IS_ENABLED(CONFIG_QED_RDMA)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp.h b/drivers/net/ethernet/qlogic/qed/qed_sp.h
index c3752c56e54d..ab4ad8a1e2a5 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_sp.h
@@ -111,6 +111,8 @@ union ramrod_data {
 	struct rdma_srq_destroy_ramrod_data rdma_destroy_srq;
 	struct rdma_srq_modify_ramrod_data rdma_modify_srq;
 	struct iwarp_create_qp_ramrod_data iwarp_create_qp;
+	struct iwarp_tcp_offload_ramrod_data iwarp_tcp_offload;
+	struct iwarp_mpa_offload_ramrod_data iwarp_mpa_offload;
 	struct iwarp_modify_qp_ramrod_data iwarp_modify_qp;
 	struct iwarp_init_func_ramrod_data iwarp_init_func;
 	struct fcoe_init_ramrod_params fcoe_init;
diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h
index 885ae1379b5a..39e2a2ac2471 100644
--- a/include/linux/qed/common_hsi.h
+++ b/include/linux/qed/common_hsi.h
@@ -38,6 +38,8 @@
 #include <linux/slab.h>
 
 /* dma_addr_t manip */
+#define PTR_LO(x)               ((u32)(((uintptr_t)(x)) & 0xffffffff))
+#define PTR_HI(x)               ((u32)((((uintptr_t)(x)) >> 16) >> 16))
 #define DMA_LO_LE(x)		cpu_to_le32(lower_32_bits(x))
 #define DMA_HI_LE(x)		cpu_to_le32(upper_32_bits(x))
 #define DMA_REGPAIR_LE(x, val)	do { \
diff --git a/include/linux/qed/qed_rdma_if.h b/include/linux/qed/qed_rdma_if.h
index 28df5688ad0c..c4c241fe2579 100644
--- a/include/linux/qed/qed_rdma_if.h
+++ b/include/linux/qed/qed_rdma_if.h
@@ -471,7 +471,8 @@ struct qed_rdma_counters_out_params {
 #define QED_ROCE_TX_FRAG_FAILURE        (2)
 
 enum qed_iwarp_event_type {
-	QED_IWARP_EVENT_MPA_REQUEST,	/* Passive side request received */
+	QED_IWARP_EVENT_MPA_REQUEST,	  /* Passive side request received */
+	QED_IWARP_EVENT_PASSIVE_COMPLETE, /* ack on mpa response */
 };
 
 enum qed_tcp_ip_version {
@@ -516,6 +517,23 @@ struct qed_iwarp_listen_out {
 	void *handle;
 };
 
+struct qed_iwarp_accept_in {
+	void *ep_context;
+	void *cb_context;
+	struct qed_rdma_qp *qp;
+	const void *private_data;
+	u16 private_data_len;
+	u8 ord;
+	u8 ird;
+};
+
+struct qed_iwarp_reject_in {
+	void *ep_context;
+	void *cb_context;
+	const void *private_data;
+	u16 private_data_len;
+};
+
 struct qed_roce_ll2_header {
 	void *vaddr;
 	dma_addr_t baddr;
@@ -626,6 +644,12 @@ struct qed_rdma_ops {
 				   struct qed_iwarp_listen_in *iparams,
 				   struct qed_iwarp_listen_out *oparams);
 
+	int (*iwarp_accept)(void *rdma_cxt,
+			    struct qed_iwarp_accept_in *iparams);
+
+	int (*iwarp_reject)(void *rdma_cxt,
+			    struct qed_iwarp_reject_in *iparams);
+
 	int (*iwarp_destroy_listen)(void *rdma_cxt, void *handle);
 
 };
-- 
cgit v1.2.3


From 4b0fdd7c8b757125ac7996617d914bbdb9e0348c Mon Sep 17 00:00:00 2001
From: "Kalderon, Michal" <Michal.Kalderon@cavium.com>
Date: Sun, 2 Jul 2017 10:29:28 +0300
Subject: qed: iWARP CM add active side connect

This patch implements the active side connect.
Offload a connection, process MPA reply and send RTR.
In some of the common passive/active functions, the active side
will work in blocking mode.

Signed-off-by: Michal Kalderon <Michal.Kalderon@cavium.com>
Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: Ariel Elior <Ariel.Elior@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_iwarp.c | 240 ++++++++++++++++++++++++++--
 drivers/net/ethernet/qlogic/qed/qed_iwarp.h |   7 +
 drivers/net/ethernet/qlogic/qed/qed_rdma.c  |   4 +
 include/linux/qed/qed_rdma_if.h             |  26 +++
 4 files changed, 265 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
index a6dadae1f998..a5da9fc6f454 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
@@ -611,7 +611,10 @@ qed_iwarp_tcp_offload(struct qed_hwfn *p_hwfn, struct qed_iwarp_ep *ep)
 	memset(&init_data, 0, sizeof(init_data));
 	init_data.cid = ep->tcp_cid;
 	init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
-	init_data.comp_mode = QED_SPQ_MODE_CB;
+	if (ep->connect_mode == TCP_CONNECT_PASSIVE)
+		init_data.comp_mode = QED_SPQ_MODE_CB;
+	else
+		init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
 
 	rc = qed_sp_init_request(p_hwfn, &p_ent,
 				 IWARP_RAMROD_CMD_ID_TCP_OFFLOAD,
@@ -711,7 +714,7 @@ qed_iwarp_mpa_received(struct qed_hwfn *p_hwfn, struct qed_iwarp_ep *ep)
 	DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
 		   "private_data_len=%x handshake_mode=%x private_data=(%x)\n",
 		   async_data->mpa_request.ulp_data_len,
-		   mpa_rev, *((u32 *)((u8 *)ep->ep_buffer_virt->in_pdata)));
+		   mpa_rev, *((u32 *)(ep->ep_buffer_virt->in_pdata)));
 
 	if (mpa_rev == MPA_NEGOTIATION_TYPE_ENHANCED) {
 		/* Read ord/ird values from private data buffer */
@@ -801,7 +804,10 @@ qed_iwarp_mpa_offload(struct qed_hwfn *p_hwfn, struct qed_iwarp_ep *ep)
 	init_data.cid = reject ? ep->tcp_cid : qp->icid;
 	init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
 
-	init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
+	if (ep->connect_mode == TCP_CONNECT_ACTIVE)
+		init_data.comp_mode = QED_SPQ_MODE_CB;
+	else
+		init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
 
 	rc = qed_sp_init_request(p_hwfn, &p_ent,
 				 IWARP_RAMROD_CMD_ID_MPA_OFFLOAD,
@@ -890,6 +896,59 @@ qed_iwarp_return_ep(struct qed_hwfn *p_hwfn, struct qed_iwarp_ep *ep)
 	spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
 }
 
+void
+qed_iwarp_parse_private_data(struct qed_hwfn *p_hwfn, struct qed_iwarp_ep *ep)
+{
+	struct mpa_v2_hdr *mpa_v2_params;
+	union async_output *async_data;
+	u16 mpa_ird, mpa_ord;
+	u8 mpa_data_size = 0;
+
+	if (MPA_REV2(p_hwfn->p_rdma_info->iwarp.mpa_rev)) {
+		mpa_v2_params =
+			(struct mpa_v2_hdr *)(ep->ep_buffer_virt->in_pdata);
+		mpa_data_size = sizeof(*mpa_v2_params);
+		mpa_ird = ntohs(mpa_v2_params->ird);
+		mpa_ord = ntohs(mpa_v2_params->ord);
+
+		ep->cm_info.ird = (u8)(mpa_ord & MPA_V2_IRD_ORD_MASK);
+		ep->cm_info.ord = (u8)(mpa_ird & MPA_V2_IRD_ORD_MASK);
+	}
+	async_data = &ep->ep_buffer_virt->async_output;
+
+	ep->cm_info.private_data = ep->ep_buffer_virt->in_pdata + mpa_data_size;
+	ep->cm_info.private_data_len = async_data->mpa_response.ulp_data_len -
+				       mpa_data_size;
+}
+
+void
+qed_iwarp_mpa_reply_arrived(struct qed_hwfn *p_hwfn, struct qed_iwarp_ep *ep)
+{
+	struct qed_iwarp_cm_event_params params;
+
+	if (ep->connect_mode == TCP_CONNECT_PASSIVE) {
+		DP_NOTICE(p_hwfn,
+			  "MPA reply event not expected on passive side!\n");
+		return;
+	}
+
+	params.event = QED_IWARP_EVENT_ACTIVE_MPA_REPLY;
+
+	qed_iwarp_parse_private_data(p_hwfn, ep);
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+		   "MPA_NEGOTIATE (v%d): ORD: 0x%x IRD: 0x%x\n",
+		   ep->mpa_rev, ep->cm_info.ord, ep->cm_info.ird);
+
+	params.cm_info = &ep->cm_info;
+	params.ep_context = ep;
+	params.status = 0;
+
+	ep->mpa_reply_processed = true;
+
+	ep->event_cb(ep->cb_context, &params);
+}
+
 #define QED_IWARP_CONNECT_MODE_STRING(ep) \
 	((ep)->connect_mode == TCP_CONNECT_PASSIVE) ? "Passive" : "Active"
 
@@ -902,7 +961,13 @@ qed_iwarp_mpa_complete(struct qed_hwfn *p_hwfn,
 {
 	struct qed_iwarp_cm_event_params params;
 
-	params.event = QED_IWARP_EVENT_PASSIVE_COMPLETE;
+	if (ep->connect_mode == TCP_CONNECT_ACTIVE)
+		params.event = QED_IWARP_EVENT_ACTIVE_COMPLETE;
+	else
+		params.event = QED_IWARP_EVENT_PASSIVE_COMPLETE;
+
+	if (ep->connect_mode == TCP_CONNECT_ACTIVE && !ep->mpa_reply_processed)
+		qed_iwarp_parse_private_data(p_hwfn, ep);
 
 	DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
 		   "MPA_NEGOTIATE (v%d): ORD: 0x%x IRD: 0x%x\n",
@@ -977,6 +1042,102 @@ qed_iwarp_mpa_v2_set_private(struct qed_hwfn *p_hwfn,
 	}
 }
 
+int qed_iwarp_connect(void *rdma_cxt,
+		      struct qed_iwarp_connect_in *iparams,
+		      struct qed_iwarp_connect_out *oparams)
+{
+	struct qed_hwfn *p_hwfn = rdma_cxt;
+	struct qed_iwarp_info *iwarp_info;
+	struct qed_iwarp_ep *ep;
+	u8 mpa_data_size = 0;
+	u8 ts_hdr_size = 0;
+	u32 cid;
+	int rc;
+
+	if ((iparams->cm_info.ord > QED_IWARP_ORD_DEFAULT) ||
+	    (iparams->cm_info.ird > QED_IWARP_IRD_DEFAULT)) {
+		DP_NOTICE(p_hwfn,
+			  "QP(0x%x) ERROR: Invalid ord(0x%x)/ird(0x%x)\n",
+			  iparams->qp->icid, iparams->cm_info.ord,
+			  iparams->cm_info.ird);
+
+		return -EINVAL;
+	}
+
+	iwarp_info = &p_hwfn->p_rdma_info->iwarp;
+
+	/* Allocate ep object */
+	rc = qed_iwarp_alloc_cid(p_hwfn, &cid);
+	if (rc)
+		return rc;
+
+	rc = qed_iwarp_create_ep(p_hwfn, &ep);
+	if (rc)
+		goto err;
+
+	ep->tcp_cid = cid;
+
+	spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+	list_add_tail(&ep->list_entry, &p_hwfn->p_rdma_info->iwarp.ep_list);
+	spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+
+	ep->qp = iparams->qp;
+	ep->qp->ep = ep;
+	ether_addr_copy(ep->remote_mac_addr, iparams->remote_mac_addr);
+	ether_addr_copy(ep->local_mac_addr, iparams->local_mac_addr);
+	memcpy(&ep->cm_info, &iparams->cm_info, sizeof(ep->cm_info));
+
+	ep->cm_info.ord = iparams->cm_info.ord;
+	ep->cm_info.ird = iparams->cm_info.ird;
+
+	ep->rtr_type = iwarp_info->rtr_type;
+	if (!iwarp_info->peer2peer)
+		ep->rtr_type = MPA_RTR_TYPE_NONE;
+
+	if ((ep->rtr_type & MPA_RTR_TYPE_ZERO_READ) && (ep->cm_info.ord == 0))
+		ep->cm_info.ord = 1;
+
+	ep->mpa_rev = iwarp_info->mpa_rev;
+
+	qed_iwarp_mpa_v2_set_private(p_hwfn, ep, &mpa_data_size);
+
+	ep->cm_info.private_data = ep->ep_buffer_virt->out_pdata;
+	ep->cm_info.private_data_len = iparams->cm_info.private_data_len +
+				       mpa_data_size;
+
+	memcpy((u8 *)ep->ep_buffer_virt->out_pdata + mpa_data_size,
+	       iparams->cm_info.private_data,
+	       iparams->cm_info.private_data_len);
+
+	if (p_hwfn->p_rdma_info->iwarp.tcp_flags & QED_IWARP_TS_EN)
+		ts_hdr_size = TIMESTAMP_HEADER_SIZE;
+
+	ep->mss = iparams->mss - ts_hdr_size;
+	ep->mss = min_t(u16, QED_IWARP_MAX_FW_MSS, ep->mss);
+
+	ep->event_cb = iparams->event_cb;
+	ep->cb_context = iparams->cb_context;
+	ep->connect_mode = TCP_CONNECT_ACTIVE;
+
+	oparams->ep_context = ep;
+
+	rc = qed_iwarp_tcp_offload(p_hwfn, ep);
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "QP(0x%x) EP(0x%x) rc = %d\n",
+		   iparams->qp->icid, ep->tcp_cid, rc);
+
+	if (rc) {
+		qed_iwarp_destroy_ep(p_hwfn, ep, true);
+		goto err;
+	}
+
+	return rc;
+err:
+	qed_iwarp_cid_cleaned(p_hwfn, cid);
+
+	return rc;
+}
+
 static struct qed_iwarp_ep *qed_iwarp_get_free_ep(struct qed_hwfn *p_hwfn)
 {
 	struct qed_iwarp_ep *ep = NULL;
@@ -1174,12 +1335,12 @@ void qed_iwarp_resc_free(struct qed_hwfn *p_hwfn)
 
 int qed_iwarp_accept(void *rdma_cxt, struct qed_iwarp_accept_in *iparams)
 {
-	struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
+	struct qed_hwfn *p_hwfn = rdma_cxt;
 	struct qed_iwarp_ep *ep;
 	u8 mpa_data_size = 0;
 	int rc;
 
-	ep = (struct qed_iwarp_ep *)iparams->ep_context;
+	ep = iparams->ep_context;
 	if (!ep) {
 		DP_ERR(p_hwfn, "Ep Context receive in accept is NULL\n");
 		return -EINVAL;
@@ -1799,13 +1960,19 @@ void
 qed_iwarp_connect_complete(struct qed_hwfn *p_hwfn,
 			   struct qed_iwarp_ep *ep, u8 fw_return_code)
 {
-	/* Done with the SYN packet, post back to ll2 rx */
-	qed_iwarp_ll2_post_rx(p_hwfn, ep->syn,
-			      p_hwfn->p_rdma_info->iwarp.ll2_syn_handle);
-	ep->syn = NULL;
+	u8 ll2_syn_handle = p_hwfn->p_rdma_info->iwarp.ll2_syn_handle;
+
+	if (ep->connect_mode == TCP_CONNECT_PASSIVE) {
+		/* Done with the SYN packet, post back to ll2 rx */
+		qed_iwarp_ll2_post_rx(p_hwfn, ep->syn, ll2_syn_handle);
 
-	/* If connect failed - upper layer doesn't know about it */
-	qed_iwarp_mpa_received(p_hwfn, ep);
+		ep->syn = NULL;
+
+		/* If connect failed - upper layer doesn't know about it */
+		qed_iwarp_mpa_received(p_hwfn, ep);
+	} else {
+		qed_iwarp_mpa_offload(p_hwfn, ep);
+	}
 }
 
 static inline bool
@@ -1842,6 +2009,16 @@ static int qed_iwarp_async_event(struct qed_hwfn *p_hwfn,
 			   ep->tcp_cid, fw_return_code);
 		qed_iwarp_connect_complete(p_hwfn, ep, fw_return_code);
 		break;
+		/* Async event for active side only */
+	case IWARP_EVENT_TYPE_ASYNC_ENHANCED_MPA_REPLY_ARRIVED:
+		if (!qed_iwarp_check_ep_ok(p_hwfn, ep))
+			return -EINVAL;
+		DP_VERBOSE(p_hwfn,
+			   QED_MSG_RDMA,
+			   "QP(0x%x) IWARP_EVENT_TYPE_ASYNC_MPA_HANDSHAKE_MPA_REPLY_ARRIVED fw_ret_code=%d\n",
+			   ep->cid, fw_return_code);
+		qed_iwarp_mpa_reply_arrived(p_hwfn, ep);
+		break;
 	case IWARP_EVENT_TYPE_ASYNC_MPA_HANDSHAKE_COMPLETE:
 		if (!qed_iwarp_check_ep_ok(p_hwfn, ep))
 			return -EINVAL;
@@ -1918,6 +2095,45 @@ int qed_iwarp_destroy_listen(void *rdma_cxt, void *handle)
 	return 0;
 }
 
+int qed_iwarp_send_rtr(void *rdma_cxt, struct qed_iwarp_send_rtr_in *iparams)
+{
+	struct qed_hwfn *p_hwfn = rdma_cxt;
+	struct qed_sp_init_data init_data;
+	struct qed_spq_entry *p_ent;
+	struct qed_iwarp_ep *ep;
+	struct qed_rdma_qp *qp;
+	int rc;
+
+	ep = iparams->ep_context;
+	if (!ep) {
+		DP_ERR(p_hwfn, "Ep Context receive in send_rtr is NULL\n");
+		return -EINVAL;
+	}
+
+	qp = ep->qp;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "QP(0x%x) EP(0x%x)\n",
+		   qp->icid, ep->tcp_cid);
+
+	memset(&init_data, 0, sizeof(init_data));
+	init_data.cid = qp->icid;
+	init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+	init_data.comp_mode = QED_SPQ_MODE_CB;
+
+	rc = qed_sp_init_request(p_hwfn, &p_ent,
+				 IWARP_RAMROD_CMD_ID_MPA_OFFLOAD_SEND_RTR,
+				 PROTOCOLID_IWARP, &init_data);
+
+	if (rc)
+		return rc;
+
+	rc = qed_spq_post(p_hwfn, p_ent, NULL);
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "rc = 0x%x\n", rc);
+
+	return rc;
+}
+
 void
 qed_iwarp_query_qp(struct qed_rdma_qp *qp,
 		   struct qed_rdma_query_qp_out_params *out_params)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.h b/drivers/net/ethernet/qlogic/qed/qed_iwarp.h
index bedac98c834c..148ef3c33a5d 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.h
@@ -169,6 +169,11 @@ int qed_iwarp_fw_destroy(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp);
 void qed_iwarp_query_qp(struct qed_rdma_qp *qp,
 			struct qed_rdma_query_qp_out_params *out_params);
 
+int
+qed_iwarp_connect(void *rdma_cxt,
+		  struct qed_iwarp_connect_in *iparams,
+		  struct qed_iwarp_connect_out *oparams);
+
 int
 qed_iwarp_create_listen(void *rdma_cxt,
 			struct qed_iwarp_listen_in *iparams,
@@ -179,4 +184,6 @@ int qed_iwarp_accept(void *rdma_cxt, struct qed_iwarp_accept_in *iparams);
 int qed_iwarp_reject(void *rdma_cxt, struct qed_iwarp_reject_in *iparams);
 int qed_iwarp_destroy_listen(void *rdma_cxt, void *handle);
 
+int qed_iwarp_send_rtr(void *rdma_cxt, struct qed_iwarp_send_rtr_in *iparams);
+
 #endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
index 29de915007a0..6fb99518a61f 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
@@ -1772,8 +1772,12 @@ static const struct qed_rdma_ops qed_rdma_ops_pass = {
 	.ll2_set_fragment_of_tx_packet = &qed_ll2_set_fragment_of_tx_packet,
 	.ll2_set_mac_filter = &qed_roce_ll2_set_mac_filter,
 	.ll2_get_stats = &qed_ll2_get_stats,
+	.iwarp_connect = &qed_iwarp_connect,
 	.iwarp_create_listen = &qed_iwarp_create_listen,
 	.iwarp_destroy_listen = &qed_iwarp_destroy_listen,
+	.iwarp_accept = &qed_iwarp_accept,
+	.iwarp_reject = &qed_iwarp_reject,
+	.iwarp_send_rtr = &qed_iwarp_send_rtr,
 };
 
 const struct qed_rdma_ops *qed_get_rdma_ops(void)
diff --git a/include/linux/qed/qed_rdma_if.h b/include/linux/qed/qed_rdma_if.h
index c4c241fe2579..e9514a69b03f 100644
--- a/include/linux/qed/qed_rdma_if.h
+++ b/include/linux/qed/qed_rdma_if.h
@@ -473,6 +473,8 @@ struct qed_rdma_counters_out_params {
 enum qed_iwarp_event_type {
 	QED_IWARP_EVENT_MPA_REQUEST,	  /* Passive side request received */
 	QED_IWARP_EVENT_PASSIVE_COMPLETE, /* ack on mpa response */
+	QED_IWARP_EVENT_ACTIVE_COMPLETE,  /* Active side reply received */
+	QED_IWARP_EVENT_ACTIVE_MPA_REPLY,
 };
 
 enum qed_tcp_ip_version {
@@ -503,6 +505,20 @@ struct qed_iwarp_cm_event_params {
 typedef int (*iwarp_event_handler) (void *context,
 				    struct qed_iwarp_cm_event_params *event);
 
+struct qed_iwarp_connect_in {
+	iwarp_event_handler event_cb;
+	void *cb_context;
+	struct qed_rdma_qp *qp;
+	struct qed_iwarp_cm_info cm_info;
+	u16 mss;
+	u8 remote_mac_addr[ETH_ALEN];
+	u8 local_mac_addr[ETH_ALEN];
+};
+
+struct qed_iwarp_connect_out {
+	void *ep_context;
+};
+
 struct qed_iwarp_listen_in {
 	iwarp_event_handler event_cb;
 	void *cb_context;	/* passed to event_cb */
@@ -534,6 +550,10 @@ struct qed_iwarp_reject_in {
 	u16 private_data_len;
 };
 
+struct qed_iwarp_send_rtr_in {
+	void *ep_context;
+};
+
 struct qed_roce_ll2_header {
 	void *vaddr;
 	dma_addr_t baddr;
@@ -640,6 +660,10 @@ struct qed_rdma_ops {
 	int (*ll2_set_mac_filter)(struct qed_dev *cdev,
 				  u8 *old_mac_address, u8 *new_mac_address);
 
+	int (*iwarp_connect)(void *rdma_cxt,
+			     struct qed_iwarp_connect_in *iparams,
+			     struct qed_iwarp_connect_out *oparams);
+
 	int (*iwarp_create_listen)(void *rdma_cxt,
 				   struct qed_iwarp_listen_in *iparams,
 				   struct qed_iwarp_listen_out *oparams);
@@ -652,6 +676,8 @@ struct qed_rdma_ops {
 
 	int (*iwarp_destroy_listen)(void *rdma_cxt, void *handle);
 
+	int (*iwarp_send_rtr)(void *rdma_cxt,
+			      struct qed_iwarp_send_rtr_in *iparams);
 };
 
 const struct qed_rdma_ops *qed_get_rdma_ops(void);
-- 
cgit v1.2.3


From fc4c6065e661224df3db50780219ac53fee56e2b Mon Sep 17 00:00:00 2001
From: "Kalderon, Michal" <Michal.Kalderon@cavium.com>
Date: Sun, 2 Jul 2017 10:29:29 +0300
Subject: qed: iWARP implement disconnect flows

This patch takes care of active/passive disconnect flows.
Disconnect flows can be initiated remotely, in which case a async event
will arrive from peer and indicated to qedr driver. These
are referred to as exceptions. When a QP is destroyed, it needs to check
that it's associated ep has been closed.

Signed-off-by: Michal Kalderon <Michal.Kalderon@cavium.com>
Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: Ariel Elior <Ariel.Elior@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_iwarp.c | 90 ++++++++++++++++++++++++++++-
 include/linux/qed/qed_rdma_if.h             |  2 +
 2 files changed, 91 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
index a5da9fc6f454..84bcda314cd2 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
@@ -496,6 +496,8 @@ static void qed_iwarp_destroy_ep(struct qed_hwfn *p_hwfn,
 
 int qed_iwarp_destroy_qp(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp)
 {
+	struct qed_iwarp_ep *ep = qp->ep;
+	int wait_count = 0;
 	int rc = 0;
 
 	if (qp->iwarp_state != QED_IWARP_QP_STATE_ERROR) {
@@ -505,6 +507,18 @@ int qed_iwarp_destroy_qp(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp)
 			return rc;
 	}
 
+	/* Make sure ep is closed before returning and freeing memory. */
+	if (ep) {
+		while (ep->state != QED_IWARP_EP_CLOSED && wait_count++ < 200)
+			msleep(100);
+
+		if (ep->state != QED_IWARP_EP_CLOSED)
+			DP_NOTICE(p_hwfn, "ep state close timeout state=%x\n",
+				  ep->state);
+
+		qed_iwarp_destroy_ep(p_hwfn, ep, false);
+	}
+
 	rc = qed_iwarp_fw_destroy(p_hwfn, qp);
 
 	if (qp->shared_queue)
@@ -1956,6 +1970,61 @@ int qed_iwarp_stop(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 	return qed_iwarp_ll2_stop(p_hwfn, p_ptt);
 }
 
+void qed_iwarp_qp_in_error(struct qed_hwfn *p_hwfn,
+			   struct qed_iwarp_ep *ep, u8 fw_return_code)
+{
+	struct qed_iwarp_cm_event_params params;
+
+	qed_iwarp_modify_qp(p_hwfn, ep->qp, QED_IWARP_QP_STATE_ERROR, true);
+
+	params.event = QED_IWARP_EVENT_CLOSE;
+	params.ep_context = ep;
+	params.cm_info = &ep->cm_info;
+	params.status = (fw_return_code == IWARP_QP_IN_ERROR_GOOD_CLOSE) ?
+			 0 : -ECONNRESET;
+
+	ep->state = QED_IWARP_EP_CLOSED;
+	spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+	list_del(&ep->list_entry);
+	spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+
+	ep->event_cb(ep->cb_context, &params);
+}
+
+void qed_iwarp_exception_received(struct qed_hwfn *p_hwfn,
+				  struct qed_iwarp_ep *ep, int fw_ret_code)
+{
+	struct qed_iwarp_cm_event_params params;
+	bool event_cb = false;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "EP(0x%x) fw_ret_code=%d\n",
+		   ep->cid, fw_ret_code);
+
+	switch (fw_ret_code) {
+	case IWARP_EXCEPTION_DETECTED_LLP_CLOSED:
+		params.status = 0;
+		params.event = QED_IWARP_EVENT_DISCONNECT;
+		event_cb = true;
+		break;
+	case IWARP_EXCEPTION_DETECTED_LLP_RESET:
+		params.status = -ECONNRESET;
+		params.event = QED_IWARP_EVENT_DISCONNECT;
+		event_cb = true;
+		break;
+	default:
+		DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+			   "Unhandled exception received...fw_ret_code=%d\n",
+			   fw_ret_code);
+		break;
+	}
+
+	if (event_cb) {
+		params.ep_context = ep;
+		params.cm_info = &ep->cm_info;
+		ep->event_cb(ep->cb_context, &params);
+	}
+}
+
 void
 qed_iwarp_connect_complete(struct qed_hwfn *p_hwfn,
 			   struct qed_iwarp_ep *ep, u8 fw_return_code)
@@ -2009,8 +2078,27 @@ static int qed_iwarp_async_event(struct qed_hwfn *p_hwfn,
 			   ep->tcp_cid, fw_return_code);
 		qed_iwarp_connect_complete(p_hwfn, ep, fw_return_code);
 		break;
-		/* Async event for active side only */
+	case IWARP_EVENT_TYPE_ASYNC_EXCEPTION_DETECTED:
+		if (!qed_iwarp_check_ep_ok(p_hwfn, ep))
+			return -EINVAL;
+		DP_VERBOSE(p_hwfn,
+			   QED_MSG_RDMA,
+			   "QP(0x%x) IWARP_EVENT_TYPE_ASYNC_EXCEPTION_DETECTED fw_ret_code=%d\n",
+			   ep->cid, fw_return_code);
+		qed_iwarp_exception_received(p_hwfn, ep, fw_return_code);
+		break;
+	case IWARP_EVENT_TYPE_ASYNC_QP_IN_ERROR_STATE:
+		/* Async completion for Close Connection ramrod */
+		if (!qed_iwarp_check_ep_ok(p_hwfn, ep))
+			return -EINVAL;
+		DP_VERBOSE(p_hwfn,
+			   QED_MSG_RDMA,
+			   "QP(0x%x) IWARP_EVENT_TYPE_ASYNC_QP_IN_ERROR_STATE fw_ret_code=%d\n",
+			   ep->cid, fw_return_code);
+		qed_iwarp_qp_in_error(p_hwfn, ep, fw_return_code);
+		break;
 	case IWARP_EVENT_TYPE_ASYNC_ENHANCED_MPA_REPLY_ARRIVED:
+		/* Async event for active side only */
 		if (!qed_iwarp_check_ep_ok(p_hwfn, ep))
 			return -EINVAL;
 		DP_VERBOSE(p_hwfn,
diff --git a/include/linux/qed/qed_rdma_if.h b/include/linux/qed/qed_rdma_if.h
index e9514a69b03f..01966c3a3e5d 100644
--- a/include/linux/qed/qed_rdma_if.h
+++ b/include/linux/qed/qed_rdma_if.h
@@ -474,6 +474,8 @@ enum qed_iwarp_event_type {
 	QED_IWARP_EVENT_MPA_REQUEST,	  /* Passive side request received */
 	QED_IWARP_EVENT_PASSIVE_COMPLETE, /* ack on mpa response */
 	QED_IWARP_EVENT_ACTIVE_COMPLETE,  /* Active side reply received */
+	QED_IWARP_EVENT_DISCONNECT,
+	QED_IWARP_EVENT_CLOSE,
 	QED_IWARP_EVENT_ACTIVE_MPA_REPLY,
 };
 
-- 
cgit v1.2.3


From 9816b614346925feac1198e33d2dc5201c4ef74e Mon Sep 17 00:00:00 2001
From: "Kalderon, Michal" <Michal.Kalderon@cavium.com>
Date: Sun, 2 Jul 2017 10:29:30 +0300
Subject: qed: iWARP CM add error handling

This patch introduces error handling for errors that occurred during
connection establishment.

Signed-off-by: Michal Kalderon <Michal.Kalderon@cavium.com>
Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: Ariel Elior <Ariel.Elior@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_iwarp.c | 183 +++++++++++++++++++++++++++-
 include/linux/qed/qed_rdma_if.h             |   9 ++
 2 files changed, 190 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
index 84bcda314cd2..5cd20da2d4e0 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
@@ -1001,12 +1001,75 @@ qed_iwarp_mpa_complete(struct qed_hwfn *p_hwfn,
 		ep->state = QED_IWARP_EP_ESTABLISHED;
 		params.status = 0;
 		break;
+	case IWARP_CONN_ERROR_MPA_TIMEOUT:
+		DP_NOTICE(p_hwfn, "%s(0x%x) MPA timeout\n",
+			  QED_IWARP_CONNECT_MODE_STRING(ep), ep->cid);
+		params.status = -EBUSY;
+		break;
+	case IWARP_CONN_ERROR_MPA_ERROR_REJECT:
+		DP_NOTICE(p_hwfn, "%s(0x%x) MPA Reject\n",
+			  QED_IWARP_CONNECT_MODE_STRING(ep), ep->cid);
+		params.status = -ECONNREFUSED;
+		break;
+	case IWARP_CONN_ERROR_MPA_RST:
+		DP_NOTICE(p_hwfn, "%s(0x%x) MPA reset(tcp cid: 0x%x)\n",
+			  QED_IWARP_CONNECT_MODE_STRING(ep), ep->cid,
+			  ep->tcp_cid);
+		params.status = -ECONNRESET;
+		break;
+	case IWARP_CONN_ERROR_MPA_FIN:
+		DP_NOTICE(p_hwfn, "%s(0x%x) MPA received FIN\n",
+			  QED_IWARP_CONNECT_MODE_STRING(ep), ep->cid);
+		params.status = -ECONNREFUSED;
+		break;
+	case IWARP_CONN_ERROR_MPA_INSUF_IRD:
+		DP_NOTICE(p_hwfn, "%s(0x%x) MPA insufficient ird\n",
+			  QED_IWARP_CONNECT_MODE_STRING(ep), ep->cid);
+		params.status = -ECONNREFUSED;
+		break;
+	case IWARP_CONN_ERROR_MPA_RTR_MISMATCH:
+		DP_NOTICE(p_hwfn, "%s(0x%x) MPA RTR MISMATCH\n",
+			  QED_IWARP_CONNECT_MODE_STRING(ep), ep->cid);
+		params.status = -ECONNREFUSED;
+		break;
+	case IWARP_CONN_ERROR_MPA_INVALID_PACKET:
+		DP_NOTICE(p_hwfn, "%s(0x%x) MPA Invalid Packet\n",
+			  QED_IWARP_CONNECT_MODE_STRING(ep), ep->cid);
+		params.status = -ECONNREFUSED;
+		break;
+	case IWARP_CONN_ERROR_MPA_LOCAL_ERROR:
+		DP_NOTICE(p_hwfn, "%s(0x%x) MPA Local Error\n",
+			  QED_IWARP_CONNECT_MODE_STRING(ep), ep->cid);
+		params.status = -ECONNREFUSED;
+		break;
+	case IWARP_CONN_ERROR_MPA_TERMINATE:
+		DP_NOTICE(p_hwfn, "%s(0x%x) MPA TERMINATE\n",
+			  QED_IWARP_CONNECT_MODE_STRING(ep), ep->cid);
+		params.status = -ECONNREFUSED;
+		break;
 	default:
 		params.status = -ECONNRESET;
 		break;
 	}
 
 	ep->event_cb(ep->cb_context, &params);
+
+	/* on passive side, if there is no associated QP (REJECT) we need to
+	 * return the ep to the pool, (in the regular case we add an element
+	 * in accept instead of this one.
+	 * In both cases we need to remove it from the ep_list.
+	 */
+	if (fw_return_code != RDMA_RETURN_OK) {
+		ep->tcp_cid = QED_IWARP_INVALID_TCP_CID;
+		if ((ep->connect_mode == TCP_CONNECT_PASSIVE) &&
+		    (!ep->qp)) {	/* Rejected */
+			qed_iwarp_return_ep(p_hwfn, ep);
+		} else {
+			spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+			list_del(&ep->list_entry);
+			spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+		}
+	}
 }
 
 static void
@@ -2011,6 +2074,42 @@ void qed_iwarp_exception_received(struct qed_hwfn *p_hwfn,
 		params.event = QED_IWARP_EVENT_DISCONNECT;
 		event_cb = true;
 		break;
+	case IWARP_EXCEPTION_DETECTED_RQ_EMPTY:
+		params.event = QED_IWARP_EVENT_RQ_EMPTY;
+		event_cb = true;
+		break;
+	case IWARP_EXCEPTION_DETECTED_IRQ_FULL:
+		params.event = QED_IWARP_EVENT_IRQ_FULL;
+		event_cb = true;
+		break;
+	case IWARP_EXCEPTION_DETECTED_LLP_TIMEOUT:
+		params.event = QED_IWARP_EVENT_LLP_TIMEOUT;
+		event_cb = true;
+		break;
+	case IWARP_EXCEPTION_DETECTED_REMOTE_PROTECTION_ERROR:
+		params.event = QED_IWARP_EVENT_REMOTE_PROTECTION_ERROR;
+		event_cb = true;
+		break;
+	case IWARP_EXCEPTION_DETECTED_CQ_OVERFLOW:
+		params.event = QED_IWARP_EVENT_CQ_OVERFLOW;
+		event_cb = true;
+		break;
+	case IWARP_EXCEPTION_DETECTED_LOCAL_CATASTROPHIC:
+		params.event = QED_IWARP_EVENT_QP_CATASTROPHIC;
+		event_cb = true;
+		break;
+	case IWARP_EXCEPTION_DETECTED_LOCAL_ACCESS_ERROR:
+		params.event = QED_IWARP_EVENT_LOCAL_ACCESS_ERROR;
+		event_cb = true;
+		break;
+	case IWARP_EXCEPTION_DETECTED_REMOTE_OPERATION_ERROR:
+		params.event = QED_IWARP_EVENT_REMOTE_OPERATION_ERROR;
+		event_cb = true;
+		break;
+	case IWARP_EXCEPTION_DETECTED_TERMINATE_RECEIVED:
+		params.event = QED_IWARP_EVENT_TERMINATE_RECEIVED;
+		event_cb = true;
+		break;
 	default:
 		DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
 			   "Unhandled exception received...fw_ret_code=%d\n",
@@ -2025,6 +2124,66 @@ void qed_iwarp_exception_received(struct qed_hwfn *p_hwfn,
 	}
 }
 
+static void
+qed_iwarp_tcp_connect_unsuccessful(struct qed_hwfn *p_hwfn,
+				   struct qed_iwarp_ep *ep, u8 fw_return_code)
+{
+	struct qed_iwarp_cm_event_params params;
+
+	memset(&params, 0, sizeof(params));
+	params.event = QED_IWARP_EVENT_ACTIVE_COMPLETE;
+	params.ep_context = ep;
+	params.cm_info = &ep->cm_info;
+	ep->state = QED_IWARP_EP_CLOSED;
+
+	switch (fw_return_code) {
+	case IWARP_CONN_ERROR_TCP_CONNECT_INVALID_PACKET:
+		DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+			   "%s(0x%x) TCP connect got invalid packet\n",
+			   QED_IWARP_CONNECT_MODE_STRING(ep), ep->tcp_cid);
+		params.status = -ECONNRESET;
+		break;
+	case IWARP_CONN_ERROR_TCP_CONNECTION_RST:
+		DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+			   "%s(0x%x) TCP Connection Reset\n",
+			   QED_IWARP_CONNECT_MODE_STRING(ep), ep->tcp_cid);
+		params.status = -ECONNRESET;
+		break;
+	case IWARP_CONN_ERROR_TCP_CONNECT_TIMEOUT:
+		DP_NOTICE(p_hwfn, "%s(0x%x) TCP timeout\n",
+			  QED_IWARP_CONNECT_MODE_STRING(ep), ep->tcp_cid);
+		params.status = -EBUSY;
+		break;
+	case IWARP_CONN_ERROR_MPA_NOT_SUPPORTED_VER:
+		DP_NOTICE(p_hwfn, "%s(0x%x) MPA not supported VER\n",
+			  QED_IWARP_CONNECT_MODE_STRING(ep), ep->tcp_cid);
+		params.status = -ECONNREFUSED;
+		break;
+	case IWARP_CONN_ERROR_MPA_INVALID_PACKET:
+		DP_NOTICE(p_hwfn, "%s(0x%x) MPA Invalid Packet\n",
+			  QED_IWARP_CONNECT_MODE_STRING(ep), ep->tcp_cid);
+		params.status = -ECONNRESET;
+		break;
+	default:
+		DP_ERR(p_hwfn,
+		       "%s(0x%x) Unexpected return code tcp connect: %d\n",
+		       QED_IWARP_CONNECT_MODE_STRING(ep),
+		       ep->tcp_cid, fw_return_code);
+		params.status = -ECONNRESET;
+		break;
+	}
+
+	if (ep->connect_mode == TCP_CONNECT_PASSIVE) {
+		ep->tcp_cid = QED_IWARP_INVALID_TCP_CID;
+		qed_iwarp_return_ep(p_hwfn, ep);
+	} else {
+		ep->event_cb(ep->cb_context, &params);
+		spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+		list_del(&ep->list_entry);
+		spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+	}
+}
+
 void
 qed_iwarp_connect_complete(struct qed_hwfn *p_hwfn,
 			   struct qed_iwarp_ep *ep, u8 fw_return_code)
@@ -2038,9 +2197,17 @@ qed_iwarp_connect_complete(struct qed_hwfn *p_hwfn,
 		ep->syn = NULL;
 
 		/* If connect failed - upper layer doesn't know about it */
-		qed_iwarp_mpa_received(p_hwfn, ep);
+		if (fw_return_code == RDMA_RETURN_OK)
+			qed_iwarp_mpa_received(p_hwfn, ep);
+		else
+			qed_iwarp_tcp_connect_unsuccessful(p_hwfn, ep,
+							   fw_return_code);
 	} else {
-		qed_iwarp_mpa_offload(p_hwfn, ep);
+		if (fw_return_code == RDMA_RETURN_OK)
+			qed_iwarp_mpa_offload(p_hwfn, ep);
+		else
+			qed_iwarp_tcp_connect_unsuccessful(p_hwfn, ep,
+							   fw_return_code);
 	}
 }
 
@@ -2123,6 +2290,18 @@ static int qed_iwarp_async_event(struct qed_hwfn *p_hwfn,
 		qed_iwarp_cid_cleaned(p_hwfn, cid);
 
 		break;
+	case IWARP_EVENT_TYPE_ASYNC_CQ_OVERFLOW:
+		DP_NOTICE(p_hwfn, "IWARP_EVENT_TYPE_ASYNC_CQ_OVERFLOW\n");
+
+		p_hwfn->p_rdma_info->events.affiliated_event(
+			p_hwfn->p_rdma_info->events.context,
+			QED_IWARP_EVENT_CQ_OVERFLOW,
+			(void *)fw_handle);
+		break;
+	default:
+		DP_ERR(p_hwfn, "Received unexpected async iwarp event %d\n",
+		       fw_event_code);
+		return -EINVAL;
 	}
 	return 0;
 }
diff --git a/include/linux/qed/qed_rdma_if.h b/include/linux/qed/qed_rdma_if.h
index 01966c3a3e5d..4dd72ba210f5 100644
--- a/include/linux/qed/qed_rdma_if.h
+++ b/include/linux/qed/qed_rdma_if.h
@@ -476,7 +476,16 @@ enum qed_iwarp_event_type {
 	QED_IWARP_EVENT_ACTIVE_COMPLETE,  /* Active side reply received */
 	QED_IWARP_EVENT_DISCONNECT,
 	QED_IWARP_EVENT_CLOSE,
+	QED_IWARP_EVENT_IRQ_FULL,
+	QED_IWARP_EVENT_RQ_EMPTY,
+	QED_IWARP_EVENT_LLP_TIMEOUT,
+	QED_IWARP_EVENT_REMOTE_PROTECTION_ERROR,
+	QED_IWARP_EVENT_CQ_OVERFLOW,
+	QED_IWARP_EVENT_QP_CATASTROPHIC,
 	QED_IWARP_EVENT_ACTIVE_MPA_REPLY,
+	QED_IWARP_EVENT_LOCAL_ACCESS_ERROR,
+	QED_IWARP_EVENT_REMOTE_OPERATION_ERROR,
+	QED_IWARP_EVENT_TERMINATE_RECEIVED
 };
 
 enum qed_tcp_ip_version {
-- 
cgit v1.2.3


From e1069bbfcf3bcf4feb264397f3451184fd66b907 Mon Sep 17 00:00:00 2001
From: Jim Baxter <jim_baxter@mentor.com>
Date: Wed, 28 Jun 2017 21:35:29 +0100
Subject: net: cdc_ncm: Reduce memory use when kernel memory low
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The CDC-NCM driver can require large amounts of memory to create
skb's and this can be a problem when the memory becomes fragmented.

This especially affects embedded systems that have constrained
resources but wish to maximise the throughput of CDC-NCM with 16KiB
NTB's.

The issue is after running for a while the kernel memory can become
fragmented and it needs compacting.
If the NTB allocation is needed before the memory has been compacted
the atomic allocation can fail which can cause increased latency,
large re-transmissions or disconnections depending upon the data
being transmitted at the time.
This situation occurs for less than a second until the kernel has
compacted the memory but the failed devices can take a lot longer to
recover from the failed TX packets.

To ease this temporary situation I modified the CDC-NCM TX path to
temporarily switch into a reduced memory mode which allocates an NTB
that will fit into a USB_CDC_NCM_NTB_MIN_OUT_SIZE (default 2048 Bytes)
sized memory block and only transmit NTB's with a single network frame
until the memory situation is resolved.
Each time this issue occurs we wait for an increasing number of
reduced size allocations before requesting a full size one to not
put additional pressure on a low memory system.

Once the memory is compacted the CDC-NCM data can resume transmitting
at the normal tx_max rate once again.

Signed-off-by: Jim Baxter <jim_baxter@mentor.com>
Reviewed-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_ncm.c   | 54 +++++++++++++++++++++++++++++++++++----------
 include/linux/usb/cdc_ncm.h |  3 +++
 2 files changed, 45 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index 2067743f51ca..d103a1d4fb36 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -89,6 +89,8 @@ static const struct cdc_ncm_stats cdc_ncm_gstrings_stats[] = {
 	CDC_NCM_SIMPLE_STAT(rx_ntbs),
 };
 
+#define CDC_NCM_LOW_MEM_MAX_CNT 10
+
 static int cdc_ncm_get_sset_count(struct net_device __always_unused *netdev, int sset)
 {
 	switch (sset) {
@@ -1055,10 +1057,10 @@ static struct usb_cdc_ncm_ndp16 *cdc_ncm_ndp(struct cdc_ncm_ctx *ctx, struct sk_
 
 	/* align new NDP */
 	if (!(ctx->drvflags & CDC_NCM_FLAG_NDP_TO_END))
-		cdc_ncm_align_tail(skb, ctx->tx_ndp_modulus, 0, ctx->tx_max);
+		cdc_ncm_align_tail(skb, ctx->tx_ndp_modulus, 0, ctx->tx_curr_size);
 
 	/* verify that there is room for the NDP and the datagram (reserve) */
-	if ((ctx->tx_max - skb->len - reserve) < ctx->max_ndp_size)
+	if ((ctx->tx_curr_size - skb->len - reserve) < ctx->max_ndp_size)
 		return NULL;
 
 	/* link to it */
@@ -1111,13 +1113,41 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
 
 	/* allocate a new OUT skb */
 	if (!skb_out) {
-		skb_out = alloc_skb(ctx->tx_max, GFP_ATOMIC);
+		if (ctx->tx_low_mem_val == 0) {
+			ctx->tx_curr_size = ctx->tx_max;
+			skb_out = alloc_skb(ctx->tx_curr_size, GFP_ATOMIC);
+			/* If the memory allocation fails we will wait longer
+			 * each time before attempting another full size
+			 * allocation again to not overload the system
+			 * further.
+			 */
+			if (skb_out == NULL) {
+				ctx->tx_low_mem_max_cnt = min(ctx->tx_low_mem_max_cnt + 1,
+							      (unsigned)CDC_NCM_LOW_MEM_MAX_CNT);
+				ctx->tx_low_mem_val = ctx->tx_low_mem_max_cnt;
+			}
+		}
 		if (skb_out == NULL) {
-			if (skb != NULL) {
-				dev_kfree_skb_any(skb);
-				dev->net->stats.tx_dropped++;
+			/* See if a very small allocation is possible.
+			 * We will send this packet immediately and hope
+			 * that there is more memory available later.
+			 */
+			if (skb)
+				ctx->tx_curr_size = max(skb->len,
+					(u32)USB_CDC_NCM_NTB_MIN_OUT_SIZE);
+			else
+				ctx->tx_curr_size = USB_CDC_NCM_NTB_MIN_OUT_SIZE;
+			skb_out = alloc_skb(ctx->tx_curr_size, GFP_ATOMIC);
+
+			/* No allocation possible so we will abort */
+			if (skb_out == NULL) {
+				if (skb != NULL) {
+					dev_kfree_skb_any(skb);
+					dev->net->stats.tx_dropped++;
+				}
+				goto exit_no_skb;
 			}
-			goto exit_no_skb;
+			ctx->tx_low_mem_val--;
 		}
 		/* fill out the initial 16-bit NTB header */
 		nth16 = skb_put_zero(skb_out, sizeof(struct usb_cdc_ncm_nth16));
@@ -1148,10 +1178,10 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
 		ndp16 = cdc_ncm_ndp(ctx, skb_out, sign, skb->len + ctx->tx_modulus + ctx->tx_remainder);
 
 		/* align beginning of next frame */
-		cdc_ncm_align_tail(skb_out,  ctx->tx_modulus, ctx->tx_remainder, ctx->tx_max);
+		cdc_ncm_align_tail(skb_out,  ctx->tx_modulus, ctx->tx_remainder, ctx->tx_curr_size);
 
 		/* check if we had enough room left for both NDP and frame */
-		if (!ndp16 || skb_out->len + skb->len + delayed_ndp_size > ctx->tx_max) {
+		if (!ndp16 || skb_out->len + skb->len + delayed_ndp_size > ctx->tx_curr_size) {
 			if (n == 0) {
 				/* won't fit, MTU problem? */
 				dev_kfree_skb_any(skb);
@@ -1227,7 +1257,7 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
 	/* If requested, put NDP at end of frame. */
 	if (ctx->drvflags & CDC_NCM_FLAG_NDP_TO_END) {
 		nth16 = (struct usb_cdc_ncm_nth16 *)skb_out->data;
-		cdc_ncm_align_tail(skb_out, ctx->tx_ndp_modulus, 0, ctx->tx_max);
+		cdc_ncm_align_tail(skb_out, ctx->tx_ndp_modulus, 0, ctx->tx_curr_size);
 		nth16->wNdpIndex = cpu_to_le16(skb_out->len);
 		skb_put_data(skb_out, ctx->delayed_ndp16, ctx->max_ndp_size);
 
@@ -1246,9 +1276,9 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
 	 */
 	if (!(dev->driver_info->flags & FLAG_SEND_ZLP) &&
 	    skb_out->len > ctx->min_tx_pkt) {
-		padding_count = ctx->tx_max - skb_out->len;
+		padding_count = ctx->tx_curr_size - skb_out->len;
 		skb_put_zero(skb_out, padding_count);
-	} else if (skb_out->len < ctx->tx_max &&
+	} else if (skb_out->len < ctx->tx_curr_size &&
 		   (skb_out->len % dev->maxpacket) == 0) {
 		skb_put_u8(skb_out, 0);	/* force short packet */
 	}
diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h
index 00d232406f18..021f7a88f52c 100644
--- a/include/linux/usb/cdc_ncm.h
+++ b/include/linux/usb/cdc_ncm.h
@@ -117,6 +117,9 @@ struct cdc_ncm_ctx {
 	u32 tx_curr_frame_num;
 	u32 rx_max;
 	u32 tx_max;
+	u32 tx_curr_size;
+	u32 tx_low_mem_max_cnt;
+	u32 tx_low_mem_val;
 	u32 max_datagram_size;
 	u16 tx_max_datagrams;
 	u16 tx_remainder;
-- 
cgit v1.2.3


From f0f9b4ed23381d97cde2ac64248198bc43608e6d Mon Sep 17 00:00:00 2001
From: Lin Yun Sheng <linyunsheng@huawei.com>
Date: Fri, 30 Jun 2017 17:44:15 +0800
Subject: net: phy: Add phy loopback support in net phy framework

This patch add set_loopback in phy_driver, which is used by MAC
driver to enable or disable phy loopback. it also add a generic
genphy_loopback function, which use BMCR loopback bit to enable
or disable loopback.

Signed-off-by: Lin Yun Sheng <linyunsheng@huawei.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/marvell.c    |  1 +
 drivers/net/phy/phy_device.c | 51 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/phy.h          |  5 +++++
 3 files changed, 57 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index 8400403b3f62..5d314f143aea 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -2171,6 +2171,7 @@ static struct phy_driver marvell_drivers[] = {
 		.get_sset_count = marvell_get_sset_count,
 		.get_strings = marvell_get_strings,
 		.get_stats = marvell_get_stats,
+		.set_loopback = genphy_loopback,
 	},
 	{
 		.phy_id = MARVELL_PHY_ID_88E1540,
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index acf00f071c9a..1790f7fec125 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -1136,6 +1136,39 @@ int phy_resume(struct phy_device *phydev)
 }
 EXPORT_SYMBOL(phy_resume);
 
+int phy_loopback(struct phy_device *phydev, bool enable)
+{
+	struct phy_driver *phydrv = to_phy_driver(phydev->mdio.dev.driver);
+	int ret = 0;
+
+	mutex_lock(&phydev->lock);
+
+	if (enable && phydev->loopback_enabled) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	if (!enable && !phydev->loopback_enabled) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (phydev->drv && phydrv->set_loopback)
+		ret = phydrv->set_loopback(phydev, enable);
+	else
+		ret = -EOPNOTSUPP;
+
+	if (ret)
+		goto out;
+
+	phydev->loopback_enabled = enable;
+
+out:
+	mutex_unlock(&phydev->lock);
+	return ret;
+}
+EXPORT_SYMBOL(phy_loopback);
+
 /* Generic PHY support and helper functions */
 
 /**
@@ -1584,6 +1617,23 @@ int genphy_resume(struct phy_device *phydev)
 }
 EXPORT_SYMBOL(genphy_resume);
 
+int genphy_loopback(struct phy_device *phydev, bool enable)
+{
+	int value;
+
+	value = phy_read(phydev, MII_BMCR);
+	if (value < 0)
+		return value;
+
+	if (enable)
+		value |= BMCR_LOOPBACK;
+	else
+		value &= ~BMCR_LOOPBACK;
+
+	return phy_write(phydev, MII_BMCR, value);
+}
+EXPORT_SYMBOL(genphy_loopback);
+
 static int __set_phy_supported(struct phy_device *phydev, u32 max_speed)
 {
 	/* The default values for phydev->supported are provided by the PHY
@@ -1829,6 +1879,7 @@ static struct phy_driver genphy_driver = {
 	.read_status	= genphy_read_status,
 	.suspend	= genphy_suspend,
 	.resume		= genphy_resume,
+	.set_loopback   = genphy_loopback,
 };
 
 static int __init phy_init(void)
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 1d8d70193782..2a9567bb8186 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -372,6 +372,7 @@ struct phy_c45_device_ids {
  * has_fixups: Set to true if this phy has fixups/quirks.
  * suspended: Set to true if this phy has been suspended successfully.
  * sysfs_links: Internal boolean tracking sysfs symbolic links setup/removal.
+ * loopback_enabled: Set true if this phy has been loopbacked successfully.
  * state: state of the PHY for management purposes
  * dev_flags: Device-specific flags used by the PHY driver.
  * link_timeout: The number of timer firings to wait before the
@@ -409,6 +410,7 @@ struct phy_device {
 	bool has_fixups;
 	bool suspended;
 	bool sysfs_links;
+	bool loopback_enabled;
 
 	enum phy_state state;
 
@@ -648,6 +650,7 @@ struct phy_driver {
 	int (*set_tunable)(struct phy_device *dev,
 			    struct ethtool_tunable *tuna,
 			    const void *data);
+	int (*set_loopback)(struct phy_device *dev, bool enable);
 };
 #define to_phy_driver(d) container_of(to_mdio_common_driver(d),		\
 				      struct phy_driver, mdiodrv)
@@ -793,6 +796,7 @@ void phy_device_remove(struct phy_device *phydev);
 int phy_init_hw(struct phy_device *phydev);
 int phy_suspend(struct phy_device *phydev);
 int phy_resume(struct phy_device *phydev);
+int phy_loopback(struct phy_device *phydev, bool enable);
 struct phy_device *phy_attach(struct net_device *dev, const char *bus_id,
 			      phy_interface_t interface);
 struct phy_device *phy_find_first(struct mii_bus *bus);
@@ -847,6 +851,7 @@ int genphy_update_link(struct phy_device *phydev);
 int genphy_read_status(struct phy_device *phydev);
 int genphy_suspend(struct phy_device *phydev);
 int genphy_resume(struct phy_device *phydev);
+int genphy_loopback(struct phy_device *phydev, bool enable);
 int genphy_soft_reset(struct phy_device *phydev);
 static inline int genphy_no_soft_reset(struct phy_device *phydev)
 {
-- 
cgit v1.2.3


From 0daf4349406074fc03e4889ba5e97e6fb5311bab Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sun, 2 Jul 2017 02:13:25 +0200
Subject: bpf, net: add skb_mac_header_len helper

Add a small skb_mac_header_len() helper similarly as the
skb_network_header_len() we have and replace open coded
places in BPF's bpf_skb_change_proto() helper. Will also
be used in upcoming work.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 5 +++++
 net/core/filter.c      | 4 ++--
 2 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index d0b9f3846eab..3d3ceaac13b1 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2206,6 +2206,11 @@ static inline int skb_mac_offset(const struct sk_buff *skb)
 	return skb_mac_header(skb) - skb->data;
 }
 
+static inline u32 skb_mac_header_len(const struct sk_buff *skb)
+{
+	return skb->network_header - skb->mac_header;
+}
+
 static inline int skb_mac_header_was_set(const struct sk_buff *skb)
 {
 	return skb->mac_header != (typeof(skb->mac_header))~0U;
diff --git a/net/core/filter.c b/net/core/filter.c
index e5c280aa5de6..68d8cd865c4a 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2012,7 +2012,7 @@ static int bpf_skb_net_hdr_pop(struct sk_buff *skb, u32 off, u32 len)
 static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
 {
 	const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
-	u32 off = skb->network_header - skb->mac_header;
+	u32 off = skb_mac_header_len(skb);
 	int ret;
 
 	ret = skb_cow(skb, len_diff);
@@ -2048,7 +2048,7 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
 static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
 {
 	const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
-	u32 off = skb->network_header - skb->mac_header;
+	u32 off = skb_mac_header_len(skb);
 	int ret;
 
 	ret = skb_unclone(skb, GFP_ATOMIC);
-- 
cgit v1.2.3


From f96da09473b52c09125cc9bf7d7d4576ae8229e0 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sun, 2 Jul 2017 02:13:27 +0200
Subject: bpf: simplify narrower ctx access

This work tries to make the semantics and code around the
narrower ctx access a bit easier to follow. Right now
everything is done inside the .is_valid_access(). Offset
matching is done differently for read/write types, meaning
writes don't support narrower access and thus matching only
on offsetof(struct foo, bar) is enough whereas for read
case that supports narrower access we must check for
offsetof(struct foo, bar) + offsetof(struct foo, bar) +
sizeof(<bar>) - 1 for each of the cases. For read cases of
individual members that don't support narrower access (like
packet pointers or skb->cb[] case which has its own narrow
access logic), we check as usual only offsetof(struct foo,
bar) like in write case. Then, for the case where narrower
access is allowed, we also need to set the aux info for the
access. Meaning, ctx_field_size and converted_op_size have
to be set. First is the original field size e.g. sizeof(<bar>)
as in above example from the user facing ctx, and latter
one is the target size after actual rewrite happened, thus
for the kernel facing ctx. Also here we need the range match
and we need to keep track changing convert_ctx_access() and
converted_op_size from is_valid_access() as both are not at
the same location.

We can simplify the code a bit: check_ctx_access() becomes
simpler in that we only store ctx_field_size as a meta data
and later in convert_ctx_accesses() we fetch the target_size
right from the location where we do convert. Should the verifier
be misconfigured we do reject for BPF_WRITE cases or target_size
that are not provided. For the subsystems, we always work on
ranges in is_valid_access() and add small helpers for ranges
and narrow access, convert_ctx_accesses() sets target_size
for the relevant instruction.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Cc: Yonghong Song <yhs@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h      |   9 +-
 include/linux/filter.h   |  47 ++++++++++
 kernel/bpf/verifier.c    |  78 +++++++---------
 kernel/trace/bpf_trace.c |  31 +++---
 net/core/filter.c        | 239 +++++++++++++++++++++--------------------------
 5 files changed, 209 insertions(+), 195 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 5175729270d7..b69e7a5869ff 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -156,9 +156,14 @@ struct bpf_prog;
 struct bpf_insn_access_aux {
 	enum bpf_reg_type reg_type;
 	int ctx_field_size;
-	int converted_op_size;
 };
 
+static inline void
+bpf_ctx_record_field_size(struct bpf_insn_access_aux *aux, u32 size)
+{
+	aux->ctx_field_size = size;
+}
+
 struct bpf_verifier_ops {
 	/* return eBPF function prototype for verification */
 	const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
@@ -173,7 +178,7 @@ struct bpf_verifier_ops {
 	u32 (*convert_ctx_access)(enum bpf_access_type type,
 				  const struct bpf_insn *src,
 				  struct bpf_insn *dst,
-				  struct bpf_prog *prog);
+				  struct bpf_prog *prog, u32 *target_size);
 	int (*test_run)(struct bpf_prog *prog, const union bpf_attr *kattr,
 			union bpf_attr __user *uattr);
 };
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 738f8b14f025..f1fc9baa3509 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -337,6 +337,22 @@ struct bpf_prog_aux;
 	bpf_size;						\
 })
 
+#define bpf_size_to_bytes(bpf_size)				\
+({								\
+	int bytes = -EINVAL;					\
+								\
+	if (bpf_size == BPF_B)					\
+		bytes = sizeof(u8);				\
+	else if (bpf_size == BPF_H)				\
+		bytes = sizeof(u16);				\
+	else if (bpf_size == BPF_W)				\
+		bytes = sizeof(u32);				\
+	else if (bpf_size == BPF_DW)				\
+		bytes = sizeof(u64);				\
+								\
+	bytes;							\
+})
+
 #define BPF_SIZEOF(type)					\
 	({							\
 		const int __size = bytes_to_bpf_size(sizeof(type)); \
@@ -351,6 +367,13 @@ struct bpf_prog_aux;
 		__size;						\
 	})
 
+#define BPF_LDST_BYTES(insn)					\
+	({							\
+		const int __size = bpf_size_to_bytes(BPF_SIZE(insn->code)); \
+		WARN_ON(__size < 0);				\
+		__size;						\
+	})
+
 #define __BPF_MAP_0(m, v, ...) v
 #define __BPF_MAP_1(m, v, t, a, ...) m(t, a)
 #define __BPF_MAP_2(m, v, t, a, ...) m(t, a), __BPF_MAP_1(m, v, __VA_ARGS__)
@@ -401,6 +424,18 @@ struct bpf_prog_aux;
 #define BPF_CALL_4(name, ...)	BPF_CALL_x(4, name, __VA_ARGS__)
 #define BPF_CALL_5(name, ...)	BPF_CALL_x(5, name, __VA_ARGS__)
 
+#define bpf_ctx_range(TYPE, MEMBER)						\
+	offsetof(TYPE, MEMBER) ... offsetofend(TYPE, MEMBER) - 1
+#define bpf_ctx_range_till(TYPE, MEMBER1, MEMBER2)				\
+	offsetof(TYPE, MEMBER1) ... offsetofend(TYPE, MEMBER2) - 1
+
+#define bpf_target_off(TYPE, MEMBER, SIZE, PTR_SIZE)				\
+	({									\
+		BUILD_BUG_ON(FIELD_SIZEOF(TYPE, MEMBER) != (SIZE));		\
+		*(PTR_SIZE) = (SIZE);						\
+		offsetof(TYPE, MEMBER);						\
+	})
+
 #ifdef CONFIG_COMPAT
 /* A struct sock_filter is architecture independent. */
 struct compat_sock_fprog {
@@ -564,6 +599,18 @@ static inline bool bpf_prog_was_classic(const struct bpf_prog *prog)
 	return prog->type == BPF_PROG_TYPE_UNSPEC;
 }
 
+static inline bool
+bpf_ctx_narrow_access_ok(u32 off, u32 size, const u32 size_default)
+{
+	bool off_ok;
+#ifdef __LITTLE_ENDIAN
+	off_ok = (off & (size_default - 1)) == 0;
+#else
+	off_ok = (off & (size_default - 1)) + size == size_default;
+#endif
+	return off_ok && size <= size_default && (size & (size - 1)) == 0;
+}
+
 #define bpf_classic_proglen(fprog) (fprog->len * sizeof(fprog->filter[0]))
 
 #ifdef CONFIG_ARCH_HAS_SET_MEMORY
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 6ea2adcb233b..6f820a044079 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -546,20 +546,6 @@ static int check_reg_arg(struct bpf_reg_state *regs, u32 regno,
 	return 0;
 }
 
-static int bpf_size_to_bytes(int bpf_size)
-{
-	if (bpf_size == BPF_W)
-		return 4;
-	else if (bpf_size == BPF_H)
-		return 2;
-	else if (bpf_size == BPF_B)
-		return 1;
-	else if (bpf_size == BPF_DW)
-		return 8;
-	else
-		return -EINVAL;
-}
-
 static bool is_spillable_regtype(enum bpf_reg_type type)
 {
 	switch (type) {
@@ -761,7 +747,9 @@ static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
 static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
 			    enum bpf_access_type t, enum bpf_reg_type *reg_type)
 {
-	struct bpf_insn_access_aux info = { .reg_type = *reg_type };
+	struct bpf_insn_access_aux info = {
+		.reg_type = *reg_type,
+	};
 
 	/* for analyzer ctx accesses are already validated and converted */
 	if (env->analyzer_ops)
@@ -769,25 +757,14 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off,
 
 	if (env->prog->aux->ops->is_valid_access &&
 	    env->prog->aux->ops->is_valid_access(off, size, t, &info)) {
-		/* a non zero info.ctx_field_size indicates:
-		 * . For this field, the prog type specific ctx conversion algorithm
-		 *   only supports whole field access.
-		 * . This ctx access is a candiate for later verifier transformation
-		 *   to load the whole field and then apply a mask to get correct result.
-		 * a non zero info.converted_op_size indicates perceived actual converted
-		 * value width in convert_ctx_access.
+		/* A non zero info.ctx_field_size indicates that this field is a
+		 * candidate for later verifier transformation to load the whole
+		 * field and then apply a mask when accessed with a narrower
+		 * access than actual ctx access size. A zero info.ctx_field_size
+		 * will only allow for whole field access and rejects any other
+		 * type of narrower access.
 		 */
-		if ((info.ctx_field_size && !info.converted_op_size) ||
-		    (!info.ctx_field_size &&  info.converted_op_size)) {
-			verbose("verifier bug in is_valid_access prog type=%u off=%d size=%d\n",
-				env->prog->type, off, size);
-			return -EACCES;
-		}
-
-		if (info.ctx_field_size) {
-			env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
-			env->insn_aux_data[insn_idx].converted_op_size = info.converted_op_size;
-		}
+		env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
 		*reg_type = info.reg_type;
 
 		/* remember the offset of last byte accessed in ctx */
@@ -3401,11 +3378,13 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of
 static int convert_ctx_accesses(struct bpf_verifier_env *env)
 {
 	const struct bpf_verifier_ops *ops = env->prog->aux->ops;
+	int i, cnt, size, ctx_field_size, delta = 0;
 	const int insn_cnt = env->prog->len;
 	struct bpf_insn insn_buf[16], *insn;
 	struct bpf_prog *new_prog;
 	enum bpf_access_type type;
-	int i, cnt, off, size, ctx_field_size, converted_op_size, is_narrower_load, delta = 0;
+	bool is_narrower_load;
+	u32 target_size;
 
 	if (ops->gen_prologue) {
 		cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
@@ -3445,39 +3424,50 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
 		if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX)
 			continue;
 
-		off = insn->off;
-		size = bpf_size_to_bytes(BPF_SIZE(insn->code));
 		ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
-		converted_op_size = env->insn_aux_data[i + delta].converted_op_size;
-		is_narrower_load = type == BPF_READ && size < ctx_field_size;
+		size = BPF_LDST_BYTES(insn);
 
 		/* If the read access is a narrower load of the field,
 		 * convert to a 4/8-byte load, to minimum program type specific
 		 * convert_ctx_access changes. If conversion is successful,
 		 * we will apply proper mask to the result.
 		 */
+		is_narrower_load = size < ctx_field_size;
 		if (is_narrower_load) {
-			int size_code = BPF_H;
+			u32 off = insn->off;
+			u8 size_code;
+
+			if (type == BPF_WRITE) {
+				verbose("bpf verifier narrow ctx access misconfigured\n");
+				return -EINVAL;
+			}
 
+			size_code = BPF_H;
 			if (ctx_field_size == 4)
 				size_code = BPF_W;
 			else if (ctx_field_size == 8)
 				size_code = BPF_DW;
+
 			insn->off = off & ~(ctx_field_size - 1);
 			insn->code = BPF_LDX | BPF_MEM | size_code;
 		}
-		cnt = ops->convert_ctx_access(type, insn, insn_buf, env->prog);
-		if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
+
+		target_size = 0;
+		cnt = ops->convert_ctx_access(type, insn, insn_buf, env->prog,
+					      &target_size);
+		if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) ||
+		    (ctx_field_size && !target_size)) {
 			verbose("bpf verifier is misconfigured\n");
 			return -EINVAL;
 		}
-		if (is_narrower_load && size < converted_op_size) {
+
+		if (is_narrower_load && size < target_size) {
 			if (ctx_field_size <= 4)
 				insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
-							(1 << size * 8) - 1);
+								(1 << size * 8) - 1);
 			else
 				insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg,
-							(1 << size * 8) - 1);
+								(1 << size * 8) - 1);
 		}
 
 		new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 97c46b440cd6..5c6d538dbf43 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -583,7 +583,8 @@ const struct bpf_verifier_ops tracepoint_prog_ops = {
 static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
 				    struct bpf_insn_access_aux *info)
 {
-	int sample_period_off;
+	const int size_sp = FIELD_SIZEOF(struct bpf_perf_event_data,
+					 sample_period);
 
 	if (off < 0 || off >= sizeof(struct bpf_perf_event_data))
 		return false;
@@ -592,43 +593,35 @@ static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type
 	if (off % size != 0)
 		return false;
 
-	/* permit 1, 2, 4 byte narrower and 8 normal read access to sample_period */
-	sample_period_off = offsetof(struct bpf_perf_event_data, sample_period);
-	if (off >= sample_period_off && off < sample_period_off + sizeof(__u64)) {
-		int allowed;
-
-#ifdef __LITTLE_ENDIAN
-		allowed = (off & 0x7) == 0 && size <= 8 && (size & (size - 1)) == 0;
-#else
-		allowed = ((off & 0x7) + size) == 8 && size <= 8 && (size & (size - 1)) == 0;
-#endif
-		if (!allowed)
+	switch (off) {
+	case bpf_ctx_range(struct bpf_perf_event_data, sample_period):
+		bpf_ctx_record_field_size(info, size_sp);
+		if (!bpf_ctx_narrow_access_ok(off, size, size_sp))
 			return false;
-		info->ctx_field_size = 8;
-		info->converted_op_size = 8;
-	} else {
+		break;
+	default:
 		if (size != sizeof(long))
 			return false;
 	}
+
 	return true;
 }
 
 static u32 pe_prog_convert_ctx_access(enum bpf_access_type type,
 				      const struct bpf_insn *si,
 				      struct bpf_insn *insn_buf,
-				      struct bpf_prog *prog)
+				      struct bpf_prog *prog, u32 *target_size)
 {
 	struct bpf_insn *insn = insn_buf;
 
 	switch (si->off) {
 	case offsetof(struct bpf_perf_event_data, sample_period):
-		BUILD_BUG_ON(FIELD_SIZEOF(struct perf_sample_data, period) != sizeof(u64));
-
 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
 						       data), si->dst_reg, si->src_reg,
 				      offsetof(struct bpf_perf_event_data_kern, data));
 		*insn++ = BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg,
-				      offsetof(struct perf_sample_data, period));
+				      bpf_target_off(struct perf_sample_data, period, 8,
+						     target_size));
 		break;
 	default:
 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
diff --git a/net/core/filter.c b/net/core/filter.c
index 29620df45b7c..94169572d002 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3088,38 +3088,11 @@ lwt_xmit_func_proto(enum bpf_func_id func_id)
 	}
 }
 
-static void __set_access_aux_info(int off, struct bpf_insn_access_aux *info)
+static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type,
+				    struct bpf_insn_access_aux *info)
 {
-	info->ctx_field_size = 4;
-	switch (off) {
-	case offsetof(struct __sk_buff, pkt_type) ...
-	     offsetof(struct __sk_buff, pkt_type) + sizeof(__u32) - 1:
-	case offsetof(struct __sk_buff, vlan_present) ...
-	     offsetof(struct __sk_buff, vlan_present) + sizeof(__u32) - 1:
-		info->converted_op_size = 1;
-		break;
-	case offsetof(struct __sk_buff, queue_mapping) ...
-	     offsetof(struct __sk_buff, queue_mapping) + sizeof(__u32) - 1:
-	case offsetof(struct __sk_buff, protocol) ...
-	     offsetof(struct __sk_buff, protocol) + sizeof(__u32) - 1:
-	case offsetof(struct __sk_buff, vlan_tci) ...
-	     offsetof(struct __sk_buff, vlan_tci) + sizeof(__u32) - 1:
-	case offsetof(struct __sk_buff, vlan_proto) ...
-	     offsetof(struct __sk_buff, vlan_proto) + sizeof(__u32) - 1:
-	case offsetof(struct __sk_buff, tc_index) ...
-	     offsetof(struct __sk_buff, tc_index) + sizeof(__u32) - 1:
-	case offsetof(struct __sk_buff, tc_classid) ...
-	     offsetof(struct __sk_buff, tc_classid) + sizeof(__u32) - 1:
-		info->converted_op_size = 2;
-		break;
-	default:
-		info->converted_op_size = 4;
-	}
-}
+	const int size_default = sizeof(__u32);
 
-static bool __is_valid_access(int off, int size, enum bpf_access_type type,
-			      struct bpf_insn_access_aux *info)
-{
 	if (off < 0 || off >= sizeof(struct __sk_buff))
 		return false;
 
@@ -3128,40 +3101,24 @@ static bool __is_valid_access(int off, int size, enum bpf_access_type type,
 		return false;
 
 	switch (off) {
-	case offsetof(struct __sk_buff, cb[0]) ...
-	     offsetof(struct __sk_buff, cb[4]) + sizeof(__u32) - 1:
-		if (off + size >
-		    offsetof(struct __sk_buff, cb[4]) + sizeof(__u32))
+	case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
+		if (off + size > offsetofend(struct __sk_buff, cb[4]))
 			return false;
 		break;
-	case offsetof(struct __sk_buff, data) ...
-	     offsetof(struct __sk_buff, data) + sizeof(__u32) - 1:
-		if (size != sizeof(__u32))
+	case bpf_ctx_range(struct __sk_buff, data):
+	case bpf_ctx_range(struct __sk_buff, data_end):
+		if (size != size_default)
 			return false;
-		info->reg_type = PTR_TO_PACKET;
-		break;
-	case offsetof(struct __sk_buff, data_end) ...
-	     offsetof(struct __sk_buff, data_end) + sizeof(__u32) - 1:
-		if (size != sizeof(__u32))
-			return false;
-		info->reg_type = PTR_TO_PACKET_END;
 		break;
 	default:
+		/* Only narrow read access allowed for now. */
 		if (type == BPF_WRITE) {
-			if (size != sizeof(__u32))
+			if (size != size_default)
 				return false;
 		} else {
-			int allowed;
-
-			/* permit narrower load for not cb/data/data_end fields */
-#ifdef __LITTLE_ENDIAN
-			allowed = (off & 0x3) == 0 && size <= 4 && (size & (size - 1)) == 0;
-#else
-			allowed = (off & 0x3) + size == 4 && size <= 4 && (size & (size - 1)) == 0;
-#endif
-			if (!allowed)
+			bpf_ctx_record_field_size(info, size_default);
+			if (!bpf_ctx_narrow_access_ok(off, size, size_default))
 				return false;
-			__set_access_aux_info(off, info);
 		}
 	}
 
@@ -3173,26 +3130,22 @@ static bool sk_filter_is_valid_access(int off, int size,
 				      struct bpf_insn_access_aux *info)
 {
 	switch (off) {
-	case offsetof(struct __sk_buff, tc_classid) ...
-	     offsetof(struct __sk_buff, tc_classid) + sizeof(__u32) - 1:
-	case offsetof(struct __sk_buff, data) ...
-	     offsetof(struct __sk_buff, data) + sizeof(__u32) - 1:
-	case offsetof(struct __sk_buff, data_end) ...
-	     offsetof(struct __sk_buff, data_end) + sizeof(__u32) - 1:
+	case bpf_ctx_range(struct __sk_buff, tc_classid):
+	case bpf_ctx_range(struct __sk_buff, data):
+	case bpf_ctx_range(struct __sk_buff, data_end):
 		return false;
 	}
 
 	if (type == BPF_WRITE) {
 		switch (off) {
-		case offsetof(struct __sk_buff, cb[0]) ...
-		     offsetof(struct __sk_buff, cb[4]) + sizeof(__u32) - 1:
+		case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
 			break;
 		default:
 			return false;
 		}
 	}
 
-	return __is_valid_access(off, size, type, info);
+	return bpf_skb_is_valid_access(off, size, type, info);
 }
 
 static bool lwt_is_valid_access(int off, int size,
@@ -3200,24 +3153,31 @@ static bool lwt_is_valid_access(int off, int size,
 				struct bpf_insn_access_aux *info)
 {
 	switch (off) {
-	case offsetof(struct __sk_buff, tc_classid) ...
-	     offsetof(struct __sk_buff, tc_classid) + sizeof(__u32) - 1:
+	case bpf_ctx_range(struct __sk_buff, tc_classid):
 		return false;
 	}
 
 	if (type == BPF_WRITE) {
 		switch (off) {
-		case offsetof(struct __sk_buff, mark):
-		case offsetof(struct __sk_buff, priority):
-		case offsetof(struct __sk_buff, cb[0]) ...
-		     offsetof(struct __sk_buff, cb[4]) + sizeof(__u32) - 1:
+		case bpf_ctx_range(struct __sk_buff, mark):
+		case bpf_ctx_range(struct __sk_buff, priority):
+		case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
 			break;
 		default:
 			return false;
 		}
 	}
 
-	return __is_valid_access(off, size, type, info);
+	switch (off) {
+	case bpf_ctx_range(struct __sk_buff, data):
+		info->reg_type = PTR_TO_PACKET;
+		break;
+	case bpf_ctx_range(struct __sk_buff, data_end):
+		info->reg_type = PTR_TO_PACKET_END;
+		break;
+	}
+
+	return bpf_skb_is_valid_access(off, size, type, info);
 }
 
 static bool sock_filter_is_valid_access(int off, int size,
@@ -3289,19 +3249,27 @@ static bool tc_cls_act_is_valid_access(int off, int size,
 {
 	if (type == BPF_WRITE) {
 		switch (off) {
-		case offsetof(struct __sk_buff, mark):
-		case offsetof(struct __sk_buff, tc_index):
-		case offsetof(struct __sk_buff, priority):
-		case offsetof(struct __sk_buff, cb[0]) ...
-		     offsetof(struct __sk_buff, cb[4]) + sizeof(__u32) - 1:
-		case offsetof(struct __sk_buff, tc_classid):
+		case bpf_ctx_range(struct __sk_buff, mark):
+		case bpf_ctx_range(struct __sk_buff, tc_index):
+		case bpf_ctx_range(struct __sk_buff, priority):
+		case bpf_ctx_range(struct __sk_buff, tc_classid):
+		case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
 			break;
 		default:
 			return false;
 		}
 	}
 
-	return __is_valid_access(off, size, type, info);
+	switch (off) {
+	case bpf_ctx_range(struct __sk_buff, data):
+		info->reg_type = PTR_TO_PACKET;
+		break;
+	case bpf_ctx_range(struct __sk_buff, data_end):
+		info->reg_type = PTR_TO_PACKET_END;
+		break;
+	}
+
+	return bpf_skb_is_valid_access(off, size, type, info);
 }
 
 static bool __is_valid_xdp_access(int off, int size)
@@ -3374,98 +3342,108 @@ static bool sock_ops_is_valid_access(int off, int size,
 static u32 bpf_convert_ctx_access(enum bpf_access_type type,
 				  const struct bpf_insn *si,
 				  struct bpf_insn *insn_buf,
-				  struct bpf_prog *prog)
+				  struct bpf_prog *prog, u32 *target_size)
 {
 	struct bpf_insn *insn = insn_buf;
 	int off;
 
 	switch (si->off) {
 	case offsetof(struct __sk_buff, len):
-		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
-
 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
-				      offsetof(struct sk_buff, len));
+				      bpf_target_off(struct sk_buff, len, 4,
+						     target_size));
 		break;
 
 	case offsetof(struct __sk_buff, protocol):
-		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
-
 		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
-				      offsetof(struct sk_buff, protocol));
+				      bpf_target_off(struct sk_buff, protocol, 2,
+						     target_size));
 		break;
 
 	case offsetof(struct __sk_buff, vlan_proto):
-		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_proto) != 2);
-
 		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
-				      offsetof(struct sk_buff, vlan_proto));
+				      bpf_target_off(struct sk_buff, vlan_proto, 2,
+						     target_size));
 		break;
 
 	case offsetof(struct __sk_buff, priority):
-		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, priority) != 4);
-
 		if (type == BPF_WRITE)
 			*insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
-					      offsetof(struct sk_buff, priority));
+					      bpf_target_off(struct sk_buff, priority, 4,
+							     target_size));
 		else
 			*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
-					      offsetof(struct sk_buff, priority));
+					      bpf_target_off(struct sk_buff, priority, 4,
+							     target_size));
 		break;
 
 	case offsetof(struct __sk_buff, ingress_ifindex):
-		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, skb_iif) != 4);
-
 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
-				      offsetof(struct sk_buff, skb_iif));
+				      bpf_target_off(struct sk_buff, skb_iif, 4,
+						     target_size));
 		break;
 
 	case offsetof(struct __sk_buff, ifindex):
-		BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
-
 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
 				      si->dst_reg, si->src_reg,
 				      offsetof(struct sk_buff, dev));
 		*insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
-				      offsetof(struct net_device, ifindex));
+				      bpf_target_off(struct net_device, ifindex, 4,
+						     target_size));
 		break;
 
 	case offsetof(struct __sk_buff, hash):
-		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
-
 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
-				      offsetof(struct sk_buff, hash));
+				      bpf_target_off(struct sk_buff, hash, 4,
+						     target_size));
 		break;
 
 	case offsetof(struct __sk_buff, mark):
-		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
-
 		if (type == BPF_WRITE)
 			*insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
-					      offsetof(struct sk_buff, mark));
+					      bpf_target_off(struct sk_buff, mark, 4,
+							     target_size));
 		else
 			*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
-					      offsetof(struct sk_buff, mark));
+					      bpf_target_off(struct sk_buff, mark, 4,
+							     target_size));
 		break;
 
 	case offsetof(struct __sk_buff, pkt_type):
-		return convert_skb_access(SKF_AD_PKTTYPE, si->dst_reg,
-					  si->src_reg, insn);
+		*target_size = 1;
+		*insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->src_reg,
+				      PKT_TYPE_OFFSET());
+		*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, PKT_TYPE_MAX);
+#ifdef __BIG_ENDIAN_BITFIELD
+		*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, 5);
+#endif
+		break;
 
 	case offsetof(struct __sk_buff, queue_mapping):
-		return convert_skb_access(SKF_AD_QUEUE, si->dst_reg,
-					  si->src_reg, insn);
+		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
+				      bpf_target_off(struct sk_buff, queue_mapping, 2,
+						     target_size));
+		break;
 
 	case offsetof(struct __sk_buff, vlan_present):
-		return convert_skb_access(SKF_AD_VLAN_TAG_PRESENT,
-					  si->dst_reg, si->src_reg, insn);
-
 	case offsetof(struct __sk_buff, vlan_tci):
-		return convert_skb_access(SKF_AD_VLAN_TAG,
-					  si->dst_reg, si->src_reg, insn);
+		BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
+
+		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
+				      bpf_target_off(struct sk_buff, vlan_tci, 2,
+						     target_size));
+		if (si->off == offsetof(struct __sk_buff, vlan_tci)) {
+			*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg,
+						~VLAN_TAG_PRESENT);
+		} else {
+			*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, 12);
+			*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, 1);
+		}
+		break;
 
 	case offsetof(struct __sk_buff, cb[0]) ...
-	     offsetof(struct __sk_buff, cb[4]) + sizeof(__u32) - 1:
+	     offsetofend(struct __sk_buff, cb[4]) - 1:
 		BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, data) < 20);
 		BUILD_BUG_ON((offsetof(struct sk_buff, cb) +
 			      offsetof(struct qdisc_skb_cb, data)) %
@@ -3491,6 +3469,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
 		off -= offsetof(struct __sk_buff, tc_classid);
 		off += offsetof(struct sk_buff, cb);
 		off += offsetof(struct qdisc_skb_cb, tc_classid);
+		*target_size = 2;
 		if (type == BPF_WRITE)
 			*insn++ = BPF_STX_MEM(BPF_H, si->dst_reg,
 					      si->src_reg, off);
@@ -3516,14 +3495,14 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
 
 	case offsetof(struct __sk_buff, tc_index):
 #ifdef CONFIG_NET_SCHED
-		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tc_index) != 2);
-
 		if (type == BPF_WRITE)
 			*insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, si->src_reg,
-					      offsetof(struct sk_buff, tc_index));
+					      bpf_target_off(struct sk_buff, tc_index, 2,
+							     target_size));
 		else
 			*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
-					      offsetof(struct sk_buff, tc_index));
+					      bpf_target_off(struct sk_buff, tc_index, 2,
+							     target_size));
 #else
 		if (type == BPF_WRITE)
 			*insn++ = BPF_MOV64_REG(si->dst_reg, si->dst_reg);
@@ -3534,10 +3513,9 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
 
 	case offsetof(struct __sk_buff, napi_id):
 #if defined(CONFIG_NET_RX_BUSY_POLL)
-		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, napi_id) != 4);
-
 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
-				      offsetof(struct sk_buff, napi_id));
+				      bpf_target_off(struct sk_buff, napi_id, 4,
+						     target_size));
 		*insn++ = BPF_JMP_IMM(BPF_JGE, si->dst_reg, MIN_NAPI_ID, 1);
 		*insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
 #else
@@ -3552,7 +3530,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
 static u32 sock_filter_convert_ctx_access(enum bpf_access_type type,
 					  const struct bpf_insn *si,
 					  struct bpf_insn *insn_buf,
-					  struct bpf_prog *prog)
+					  struct bpf_prog *prog, u32 *target_size)
 {
 	struct bpf_insn *insn = insn_buf;
 
@@ -3596,22 +3574,22 @@ static u32 sock_filter_convert_ctx_access(enum bpf_access_type type,
 static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type,
 					 const struct bpf_insn *si,
 					 struct bpf_insn *insn_buf,
-					 struct bpf_prog *prog)
+					 struct bpf_prog *prog, u32 *target_size)
 {
 	struct bpf_insn *insn = insn_buf;
 
 	switch (si->off) {
 	case offsetof(struct __sk_buff, ifindex):
-		BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
-
 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
 				      si->dst_reg, si->src_reg,
 				      offsetof(struct sk_buff, dev));
 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
-				      offsetof(struct net_device, ifindex));
+				      bpf_target_off(struct net_device, ifindex, 4,
+						     target_size));
 		break;
 	default:
-		return bpf_convert_ctx_access(type, si, insn_buf, prog);
+		return bpf_convert_ctx_access(type, si, insn_buf, prog,
+					      target_size);
 	}
 
 	return insn - insn_buf;
@@ -3620,7 +3598,7 @@ static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type,
 static u32 xdp_convert_ctx_access(enum bpf_access_type type,
 				  const struct bpf_insn *si,
 				  struct bpf_insn *insn_buf,
-				  struct bpf_prog *prog)
+				  struct bpf_prog *prog, u32 *target_size)
 {
 	struct bpf_insn *insn = insn_buf;
 
@@ -3643,7 +3621,8 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
 static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
 				       const struct bpf_insn *si,
 				       struct bpf_insn *insn_buf,
-				       struct bpf_prog *prog)
+				       struct bpf_prog *prog,
+				       u32 *target_size)
 {
 	struct bpf_insn *insn = insn_buf;
 	int off;
-- 
cgit v1.2.3


From 775755ed3c65fb2d31f9268162495d76eaa2c281 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 1 Jun 2017 13:10:38 +0200
Subject: PCI: Split ->reset_notify() method into ->reset_prepare() and
 ->reset_done()

The pci_error_handlers->reset_notify() method had a flag to indicate
whether to prepare for or clean up after a reset.  The prepare and done
cases have no shared functionality whatsoever, so split them into separate
methods.

[bhelgaas: changelog, update locking comments]
Link: http://lkml.kernel.org/r/20170601111039.8913-3-hch@lst.de
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/net/ethernet/intel/fm10k/fm10k_pci.c | 36 +++++--------
 drivers/net/wireless/marvell/mwifiex/pcie.c  | 75 ++++++++++++++++------------
 drivers/nvme/host/pci.c                      | 15 +++---
 drivers/pci/pci.c                            | 34 ++++++-------
 include/linux/pci.h                          |  3 +-
 5 files changed, 82 insertions(+), 81 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
index 3e26d27ad213..63784576ae8b 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
@@ -2348,30 +2348,19 @@ static void fm10k_io_resume(struct pci_dev *pdev)
 		netif_device_attach(netdev);
 }
 
-/**
- * fm10k_io_reset_notify - called when PCI function is reset
- * @pdev: Pointer to PCI device
- *
- * This callback is called when the PCI function is reset such as from
- * /sys/class/net/<enpX>/device/reset or similar. When prepare is true, it
- * means we should prepare for a function reset. If prepare is false, it means
- * the function reset just occurred.
- */
-static void fm10k_io_reset_notify(struct pci_dev *pdev, bool prepare)
+static void fm10k_io_reset_prepare(struct pci_dev *pdev)
 {
-	struct fm10k_intfc *interface = pci_get_drvdata(pdev);
-	int err = 0;
-
-	if (prepare) {
-		/* warn incase we have any active VF devices */
-		if (pci_num_vf(pdev))
-			dev_warn(&pdev->dev,
-				 "PCIe FLR may cause issues for any active VF devices\n");
+	/* warn incase we have any active VF devices */
+	if (pci_num_vf(pdev))
+		dev_warn(&pdev->dev,
+			 "PCIe FLR may cause issues for any active VF devices\n");
+	fm10k_prepare_suspend(pci_get_drvdata(pdev));
+}
 
-		fm10k_prepare_suspend(interface);
-	} else {
-		err = fm10k_handle_resume(interface);
-	}
+static void fm10k_io_reset_done(struct pci_dev *pdev)
+{
+	struct fm10k_intfc *interface = pci_get_drvdata(pdev);
+	int err = fm10k_handle_resume(interface);
 
 	if (err) {
 		dev_warn(&pdev->dev,
@@ -2384,7 +2373,8 @@ static const struct pci_error_handlers fm10k_err_handler = {
 	.error_detected = fm10k_io_error_detected,
 	.slot_reset = fm10k_io_slot_reset,
 	.resume = fm10k_io_resume,
-	.reset_notify = fm10k_io_reset_notify,
+	.reset_prepare = fm10k_io_reset_prepare,
+	.reset_done = fm10k_io_reset_done,
 };
 
 static struct pci_driver fm10k_driver = {
diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.c b/drivers/net/wireless/marvell/mwifiex/pcie.c
index ac62bce50e96..279adf124fc9 100644
--- a/drivers/net/wireless/marvell/mwifiex/pcie.c
+++ b/drivers/net/wireless/marvell/mwifiex/pcie.c
@@ -346,11 +346,13 @@ static const struct pci_device_id mwifiex_ids[] = {
 
 MODULE_DEVICE_TABLE(pci, mwifiex_ids);
 
-static void mwifiex_pcie_reset_notify(struct pci_dev *pdev, bool prepare)
+/*
+ * Cleanup all software without cleaning anything related to PCIe and HW.
+ */
+static void mwifiex_pcie_reset_prepare(struct pci_dev *pdev)
 {
 	struct pcie_service_card *card = pci_get_drvdata(pdev);
 	struct mwifiex_adapter *adapter = card->adapter;
-	int ret;
 
 	if (!adapter) {
 		dev_err(&pdev->dev, "%s: adapter structure is not valid\n",
@@ -359,37 +361,48 @@ static void mwifiex_pcie_reset_notify(struct pci_dev *pdev, bool prepare)
 	}
 
 	mwifiex_dbg(adapter, INFO,
-		    "%s: vendor=0x%4.04x device=0x%4.04x rev=%d %s\n",
-		    __func__, pdev->vendor, pdev->device,
-		    pdev->revision,
-		    prepare ? "Pre-FLR" : "Post-FLR");
-
-	if (prepare) {
-		/* Kernel would be performing FLR after this notification.
-		 * Cleanup all software without cleaning anything related to
-		 * PCIe and HW.
-		 */
-		mwifiex_shutdown_sw(adapter);
-		adapter->surprise_removed = true;
-		clear_bit(MWIFIEX_IFACE_WORK_DEVICE_DUMP, &card->work_flags);
-		clear_bit(MWIFIEX_IFACE_WORK_CARD_RESET, &card->work_flags);
-	} else {
-		/* Kernel stores and restores PCIe function context before and
-		 * after performing FLR respectively. Reconfigure the software
-		 * and firmware including firmware redownload
-		 */
-		adapter->surprise_removed = false;
-		ret = mwifiex_reinit_sw(adapter);
-		if (ret) {
-			dev_err(&pdev->dev, "reinit failed: %d\n", ret);
-			return;
-		}
-	}
+		    "%s: vendor=0x%4.04x device=0x%4.04x rev=%d Pre-FLR\n",
+		    __func__, pdev->vendor, pdev->device, pdev->revision);
+
+	mwifiex_shutdown_sw(adapter);
+	adapter->surprise_removed = true;
+	clear_bit(MWIFIEX_IFACE_WORK_DEVICE_DUMP, &card->work_flags);
+	clear_bit(MWIFIEX_IFACE_WORK_CARD_RESET, &card->work_flags);
 	mwifiex_dbg(adapter, INFO, "%s, successful\n", __func__);
 }
 
-static const struct pci_error_handlers mwifiex_pcie_err_handler[] = {
-		{ .reset_notify = mwifiex_pcie_reset_notify, },
+/*
+ * Kernel stores and restores PCIe function context before and after performing
+ * FLR respectively. Reconfigure the software and firmware including firmware
+ * redownload.
+ */
+static void mwifiex_pcie_reset_done(struct pci_dev *pdev)
+{
+	struct pcie_service_card *card = pci_get_drvdata(pdev);
+	struct mwifiex_adapter *adapter = card->adapter;
+	int ret;
+
+	if (!adapter) {
+		dev_err(&pdev->dev, "%s: adapter structure is not valid\n",
+			__func__);
+		return;
+	}
+
+	mwifiex_dbg(adapter, INFO,
+		    "%s: vendor=0x%4.04x device=0x%4.04x rev=%d Post-FLR\n",
+		    __func__, pdev->vendor, pdev->device, pdev->revision);
+
+	adapter->surprise_removed = false;
+	ret = mwifiex_reinit_sw(adapter);
+	if (ret)
+		dev_err(&pdev->dev, "reinit failed: %d\n", ret);
+	else
+		mwifiex_dbg(adapter, INFO, "%s, successful\n", __func__);
+}
+
+static const struct pci_error_handlers mwifiex_pcie_err_handler = {
+	.reset_prepare		= mwifiex_pcie_reset_prepare,
+	.reset_done		= mwifiex_pcie_reset_done,
 };
 
 #ifdef CONFIG_PM_SLEEP
@@ -410,7 +423,7 @@ static struct pci_driver __refdata mwifiex_pcie = {
 	},
 #endif
 	.shutdown = mwifiex_pcie_shutdown,
-	.err_handler = mwifiex_pcie_err_handler,
+	.err_handler = &mwifiex_pcie_err_handler,
 };
 
 /*
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index fed803232edc..9a3d69b8df98 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -2145,14 +2145,14 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	return result;
 }
 
-static void nvme_reset_notify(struct pci_dev *pdev, bool prepare)
+static void nvme_reset_prepare(struct pci_dev *pdev)
 {
-	struct nvme_dev *dev = pci_get_drvdata(pdev);
+	nvme_dev_disable(pci_get_drvdata(pdev), false);
+}
 
-	if (prepare)
-		nvme_dev_disable(dev, false);
-	else
-		nvme_reset(dev);
+static void nvme_reset_done(struct pci_dev *pdev)
+{
+	nvme_reset(pci_get_drvdata(pdev));
 }
 
 static void nvme_shutdown(struct pci_dev *pdev)
@@ -2275,7 +2275,8 @@ static const struct pci_error_handlers nvme_err_handler = {
 	.error_detected	= nvme_error_detected,
 	.slot_reset	= nvme_slot_reset,
 	.resume		= nvme_error_resume,
-	.reset_notify	= nvme_reset_notify,
+	.reset_prepare	= nvme_reset_prepare,
+	.reset_done	= nvme_reset_done,
 };
 
 static const struct pci_device_id nvme_id_table[] = {
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index f4587f6f8739..56407eb1dc88 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -4130,32 +4130,18 @@ static void pci_dev_unlock(struct pci_dev *dev)
 	pci_cfg_access_unlock(dev);
 }
 
-/**
- * pci_reset_notify - notify device driver of reset
- * @dev: device to be notified of reset
- * @prepare: 'true' if device is about to be reset; 'false' if reset attempt
- *           completed
- *
- * Must be called prior to device access being disabled and after device
- * access is restored.
- */
-static void pci_reset_notify(struct pci_dev *dev, bool prepare)
+static void pci_dev_save_and_disable(struct pci_dev *dev)
 {
 	const struct pci_error_handlers *err_handler =
 			dev->driver ? dev->driver->err_handler : NULL;
 
 	/*
-	 * dev->driver->err_handler->reset_notify() is protected against
+	 * dev->driver->err_handler->reset_prepare() is protected against
 	 * races with ->remove() by the device lock, which must be held by
 	 * the caller.
 	 */
-	if (err_handler && err_handler->reset_notify)
-		err_handler->reset_notify(dev, prepare);
-}
-
-static void pci_dev_save_and_disable(struct pci_dev *dev)
-{
-	pci_reset_notify(dev, true);
+	if (err_handler && err_handler->reset_prepare)
+		err_handler->reset_prepare(dev);
 
 	/*
 	 * Wake-up device prior to save.  PM registers default to D0 after
@@ -4177,8 +4163,18 @@ static void pci_dev_save_and_disable(struct pci_dev *dev)
 
 static void pci_dev_restore(struct pci_dev *dev)
 {
+	const struct pci_error_handlers *err_handler =
+			dev->driver ? dev->driver->err_handler : NULL;
+
 	pci_restore_state(dev);
-	pci_reset_notify(dev, false);
+
+	/*
+	 * dev->driver->err_handler->reset_done() is protected against
+	 * races with ->remove() by the device lock, which must be held by
+	 * the caller.
+	 */
+	if (err_handler && err_handler->reset_done)
+		err_handler->reset_done(dev);
 }
 
 static int pci_dev_reset(struct pci_dev *dev, int probe)
diff --git a/include/linux/pci.h b/include/linux/pci.h
index c7cfdff2529c..c5937ee7e774 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -698,7 +698,8 @@ struct pci_error_handlers {
 	pci_ers_result_t (*slot_reset)(struct pci_dev *dev);
 
 	/* PCI function reset prepare or completed */
-	void (*reset_notify)(struct pci_dev *dev, bool prepare);
+	void (*reset_prepare)(struct pci_dev *dev);
+	void (*reset_done)(struct pci_dev *dev);
 
 	/* Device driver may resume normal operations */
 	void (*resume)(struct pci_dev *dev);
-- 
cgit v1.2.3


From 468138d78510688fb5476f98d23f11ac6a63229a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 2 May 2017 19:52:17 -0400
Subject: binfmt_flat: flat_{get,put}_addr_from_rp() should be able to fail

on MMU targets EFAULT is possible here.  Make both return 0 or error,
passing what used to be the return value of flat_get_addr_from_rp()
by reference.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/arm/include/asm/flat.h        | 25 +++++++++++++++++++++---
 arch/blackfin/include/asm/flat.h   | 25 ++++++++++++++----------
 arch/blackfin/kernel/flat.c        | 13 ++++++-------
 arch/c6x/include/asm/flat.h        | 15 +++++++++++++--
 arch/h8300/include/asm/flat.h      | 24 +++++++++++++++++------
 arch/m32r/include/asm/flat.h       | 19 ++++++++-----------
 arch/m68k/include/asm/flat.h       | 23 +++++++++++++++++++---
 arch/microblaze/include/asm/flat.h | 34 ++++++++++++++++-----------------
 arch/sh/include/asm/flat.h         | 15 +++++++++++++--
 arch/xtensa/include/asm/flat.h     | 15 +++++++++++++--
 fs/binfmt_flat.c                   | 39 +++++++++++++++++++++-----------------
 include/linux/flat.h               |  2 +-
 12 files changed, 168 insertions(+), 81 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/include/asm/flat.h b/arch/arm/include/asm/flat.h
index acf1d14b89a6..29d3a1524bce 100644
--- a/arch/arm/include/asm/flat.h
+++ b/arch/arm/include/asm/flat.h
@@ -5,12 +5,31 @@
 #ifndef __ARM_FLAT_H__
 #define __ARM_FLAT_H__
 
+#include <linux/uaccess.h>
+
 #define	flat_argvp_envp_on_stack()		1
 #define	flat_old_ram_flag(flags)		(flags)
 #define	flat_reloc_valid(reloc, size)		((reloc) <= (size))
-#define	flat_get_addr_from_rp(rp, relval, flags, persistent) \
-	({ unsigned long __val; __get_user_unaligned(__val, rp); __val; })
-#define	flat_put_addr_at_rp(rp, val, relval)	__put_user_unaligned(val, rp)
+
+static inline int flat_get_addr_from_rp(u32 __user *rp, u32 relval, u32 flags,
+					u32 *addr, u32 *persistent)
+{
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+	return copy_from_user(addr, rp, 4) ? -EFAULT : 0;
+#else
+	return get_user(*addr, rp);
+#endif
+}
+
+static inline int flat_put_addr_at_rp(u32 __user *rp, u32 addr, u32 rel)
+{
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+	return copy_to_user(rp, &addr, 4) ? -EFAULT : 0;
+#else
+	return put_user(addr, rp);
+#endif
+}
+
 #define	flat_get_relocate_addr(rel)		(rel)
 #define	flat_set_persistent(relval, p)		0
 
diff --git a/arch/blackfin/include/asm/flat.h b/arch/blackfin/include/asm/flat.h
index c1314c56dd18..296d7f56fbfd 100644
--- a/arch/blackfin/include/asm/flat.h
+++ b/arch/blackfin/include/asm/flat.h
@@ -14,23 +14,28 @@
 #define	flat_argvp_envp_on_stack()		0
 #define	flat_old_ram_flag(flags)		(flags)
 
-extern unsigned long bfin_get_addr_from_rp (unsigned long *ptr,
-					unsigned long relval,
-					unsigned long flags,
-					unsigned long *persistent);
+extern unsigned long bfin_get_addr_from_rp (u32 *ptr, u32 relval,
+					u32 flags, u32 *persistent);
 
-extern void bfin_put_addr_at_rp(unsigned long *ptr, unsigned long addr,
-		                unsigned long relval);
+extern void bfin_put_addr_at_rp(u32 *ptr, u32 addr, u32 relval);
 
 /* The amount by which a relocation can exceed the program image limits
    without being regarded as an error.  */
 
 #define	flat_reloc_valid(reloc, size)	((reloc) <= (size))
 
-#define	flat_get_addr_from_rp(rp, relval, flags, persistent)	\
-	bfin_get_addr_from_rp(rp, relval, flags, persistent)
-#define	flat_put_addr_at_rp(rp, val, relval)	\
-	bfin_put_addr_at_rp(rp, val, relval)
+static inline int flat_get_addr_from_rp(u32 __user *rp, u32 relval, u32 flags,
+					u32 *addr, u32 *persistent)
+{
+	*addr = bfin_get_addr_from_rp(rp, relval, flags, persistent);
+	return 0;
+}
+
+static inline int flat_put_addr_at_rp(u32 __user *rp, u32 val, u32 relval)
+{
+	bfin_put_addr_at_rp(rp, val, relval);
+	return 0;
+}
 
 /* Convert a relocation entry into an address.  */
 static inline unsigned long
diff --git a/arch/blackfin/kernel/flat.c b/arch/blackfin/kernel/flat.c
index b5b658449616..d29ab6a2e909 100644
--- a/arch/blackfin/kernel/flat.c
+++ b/arch/blackfin/kernel/flat.c
@@ -13,14 +13,14 @@
 #define FLAT_BFIN_RELOC_TYPE_16H_BIT 1
 #define FLAT_BFIN_RELOC_TYPE_32_BIT 2
 
-unsigned long bfin_get_addr_from_rp(unsigned long *ptr,
-		unsigned long relval,
-		unsigned long flags,
-		unsigned long *persistent)
+unsigned long bfin_get_addr_from_rp(u32 *ptr,
+		u32 relval,
+		u32 flags,
+		u32 *persistent)
 {
 	unsigned short *usptr = (unsigned short *)ptr;
 	int type = (relval >> 26) & 7;
-	unsigned long val;
+	u32 val;
 
 	switch (type) {
 	case FLAT_BFIN_RELOC_TYPE_16_BIT:
@@ -59,8 +59,7 @@ EXPORT_SYMBOL(bfin_get_addr_from_rp);
  * Insert the address ADDR into the symbol reference at RP;
  * RELVAL is the raw relocation-table entry from which RP is derived
  */
-void bfin_put_addr_at_rp(unsigned long *ptr, unsigned long addr,
-		unsigned long relval)
+void bfin_put_addr_at_rp(u32 *ptr, u32 addr, u32 relval)
 {
 	unsigned short *usptr = (unsigned short *)ptr;
 	int type = (relval >> 26) & 7;
diff --git a/arch/c6x/include/asm/flat.h b/arch/c6x/include/asm/flat.h
index a1858bd5f6c8..6f1feb00bd52 100644
--- a/arch/c6x/include/asm/flat.h
+++ b/arch/c6x/include/asm/flat.h
@@ -1,11 +1,22 @@
 #ifndef __ASM_C6X_FLAT_H
 #define __ASM_C6X_FLAT_H
 
+#include <asm/unaligned.h>
+
 #define flat_argvp_envp_on_stack()			0
 #define flat_old_ram_flag(flags)			(flags)
 #define flat_reloc_valid(reloc, size)			((reloc) <= (size))
-#define flat_get_addr_from_rp(rp, relval, flags, p)	get_unaligned(rp)
-#define flat_put_addr_at_rp(rp, val, relval)		put_unaligned(val, rp)
+static inline int flat_get_addr_from_rp(u32 __user *rp, u32 relval, u32 flags,
+					u32 *addr, u32 *persistent)
+{
+	*addr = get_unaligned((__force u32 *)rp);
+	return 0;
+}
+static inline int flat_put_addr_at_rp(u32 __user *rp, u32 addr, u32 rel)
+{
+	put_unaligned(addr, (__force u32 *)rp);
+	return 0;
+}
 #define flat_get_relocate_addr(rel)			(rel)
 #define flat_set_persistent(relval, p)			0
 
diff --git a/arch/h8300/include/asm/flat.h b/arch/h8300/include/asm/flat.h
index a4898eccf2bf..18d024251738 100644
--- a/arch/h8300/include/asm/flat.h
+++ b/arch/h8300/include/asm/flat.h
@@ -5,6 +5,8 @@
 #ifndef __H8300_FLAT_H__
 #define __H8300_FLAT_H__
 
+#include <asm/unaligned.h>
+
 #define	flat_argvp_envp_on_stack()		1
 #define	flat_old_ram_flag(flags)		1
 #define	flat_reloc_valid(reloc, size)		((reloc) <= (size))
@@ -18,11 +20,21 @@
  */
 
 #define	flat_get_relocate_addr(rel)		(rel & ~0x00000001)
-#define flat_get_addr_from_rp(rp, relval, flags, persistent) \
-	({(void)persistent; \
-		get_unaligned(rp) & (((flags) & FLAT_FLAG_GOTPIC) ?	\
-				     0xffffffff : 0x00ffffff); })
-#define flat_put_addr_at_rp(rp, addr, rel) \
-	put_unaligned(((*(char *)(rp)) << 24) | ((addr) & 0x00ffffff), (rp))
+static inline int flat_get_addr_from_rp(u32 __user *rp, u32 relval, u32 flags,
+					u32 *addr, u32 *persistent)
+{
+	u32 val = get_unaligned((__force u32 *)rp);
+	if (!(flags & FLAT_FLAG_GOTPIC)
+		val &= 0x00ffffff;
+	*addr = val;
+	return 0;
+}
+
+static inline int flat_put_addr_at_rp(u32 __user *rp, u32 addr, u32 rel)
+{
+	u32 *p = (__force u32 *)rp;
+	put_unaligned((addr & 0x00ffffff) | (*(char *)p << 24), p);
+	return 0;
+}
 
 #endif /* __H8300_FLAT_H__ */
diff --git a/arch/m32r/include/asm/flat.h b/arch/m32r/include/asm/flat.h
index 5d711c4688fb..455ce7ddbf14 100644
--- a/arch/m32r/include/asm/flat.h
+++ b/arch/m32r/include/asm/flat.h
@@ -17,11 +17,6 @@
 #define	flat_set_persistent(relval, p)		0
 #define	flat_reloc_valid(reloc, size)		\
 	(((reloc) - textlen_for_m32r_lo16_data) <= (size))
-#define flat_get_addr_from_rp(rp, relval, flags, persistent) \
-	m32r_flat_get_addr_from_rp(rp, relval, (text_len) )
-
-#define flat_put_addr_at_rp(rp, addr, relval) \
-	m32r_flat_put_addr_at_rp(rp, addr, relval)
 
 /* Convert a relocation entry into an address.  */
 static inline unsigned long
@@ -57,9 +52,9 @@ flat_get_relocate_addr (unsigned long relval)
 
 static unsigned long textlen_for_m32r_lo16_data = 0;
 
-static inline unsigned long m32r_flat_get_addr_from_rp (unsigned long *rp,
-                                                        unsigned long relval,
-						        unsigned long textlen)
+static inline unsigned long m32r_flat_get_addr_from_rp (u32 *rp,
+                                                        u32 relval,
+						        u32 textlen)
 {
         unsigned int reloc = flat_m32r_get_reloc_type (relval);
 	textlen_for_m32r_lo16_data = 0;
@@ -100,9 +95,7 @@ static inline unsigned long m32r_flat_get_addr_from_rp (unsigned long *rp,
 	return ~0;      /* bogus value */
 }
 
-static inline void m32r_flat_put_addr_at_rp (unsigned long *rp,
-					     unsigned long addr,
-                                             unsigned long relval)
+static inline void flat_put_addr_at_rp(u32 *rp, u32 addr, u32 relval)
 {
         unsigned int reloc = flat_m32r_get_reloc_type (relval);
 	if (reloc & 0xf0) {
@@ -142,4 +135,8 @@ static inline void m32r_flat_put_addr_at_rp (unsigned long *rp,
 	}
 }
 
+// kludge - text_len is a local variable in the only user.
+#define flat_get_addr_from_rp(rp, relval, flags, addr, persistent) \
+	(m32r_flat_get_addr_from_rp(rp, relval, text_len), 0)
+
 #endif /* __ASM_M32R_FLAT_H */
diff --git a/arch/m68k/include/asm/flat.h b/arch/m68k/include/asm/flat.h
index 00c392b0cabd..48b62790fe70 100644
--- a/arch/m68k/include/asm/flat.h
+++ b/arch/m68k/include/asm/flat.h
@@ -5,12 +5,29 @@
 #ifndef __M68KNOMMU_FLAT_H__
 #define __M68KNOMMU_FLAT_H__
 
+#include <linux/uaccess.h>
+
 #define	flat_argvp_envp_on_stack()		1
 #define	flat_old_ram_flag(flags)		(flags)
 #define	flat_reloc_valid(reloc, size)		((reloc) <= (size))
-#define	flat_get_addr_from_rp(rp, relval, flags, p) \
-	({ unsigned long __val; __get_user_unaligned(__val, rp); __val; })
-#define	flat_put_addr_at_rp(rp, val, relval)	__put_user_unaligned(val, rp)
+static inline int flat_get_addr_from_rp(u32 __user *rp, u32 relval, u32 flags,
+					u32 *addr, u32 *persistent)
+{
+#ifdef CONFIG_CPU_HAS_NO_UNALIGNED
+	return copy_from_user(addr, rp, 4) ? -EFAULT : 0;
+#else
+	return get_user(*addr, rp);
+#endif
+}
+
+static inline int flat_put_addr_at_rp(u32 __user *rp, u32 addr, u32 rel)
+{
+#ifdef CONFIG_CPU_HAS_NO_UNALIGNED
+	return copy_to_user(rp, &addr, 4) ? -EFAULT : 0;
+#else
+	return put_user(addr, rp);
+#endif
+}
 #define	flat_get_relocate_addr(rel)		(rel)
 
 static inline int flat_set_persistent(unsigned long relval,
diff --git a/arch/microblaze/include/asm/flat.h b/arch/microblaze/include/asm/flat.h
index 6847c1512c7b..f23c3d266bae 100644
--- a/arch/microblaze/include/asm/flat.h
+++ b/arch/microblaze/include/asm/flat.h
@@ -32,29 +32,27 @@
  * reference
  */
 
-static inline unsigned long
-flat_get_addr_from_rp(unsigned long *rp, unsigned long relval,
-			unsigned long flags, unsigned long *persistent)
+static inline int flat_get_addr_from_rp(u32 __user *rp, u32 relval, u32 flags,
+					u32 *addr, u32 *persistent)
 {
-	unsigned long addr;
-	(void)flags;
+	u32 *p = (__force u32 *)rp;
 
 	/* Is it a split 64/32 reference? */
 	if (relval & 0x80000000) {
 		/* Grab the two halves of the reference */
-		unsigned long val_hi, val_lo;
+		u32 val_hi, val_lo;
 
-		val_hi = get_unaligned(rp);
-		val_lo = get_unaligned(rp+1);
+		val_hi = get_unaligned(p);
+		val_lo = get_unaligned(p+1);
 
 		/* Crack the address out */
-		addr = ((val_hi & 0xffff) << 16) + (val_lo & 0xffff);
+		*addr = ((val_hi & 0xffff) << 16) + (val_lo & 0xffff);
 	} else {
 		/* Get the address straight out */
-		addr = get_unaligned(rp);
+		*addr = get_unaligned(p);
 	}
 
-	return addr;
+	return 0;
 }
 
 /*
@@ -63,25 +61,27 @@ flat_get_addr_from_rp(unsigned long *rp, unsigned long relval,
  */
 
 static inline void
-flat_put_addr_at_rp(unsigned long *rp, unsigned long addr, unsigned long relval)
+flat_put_addr_at_rp(u32 __user *rp, u32 addr, u32 relval)
 {
+	u32 *p = (__force u32 *)rp;
 	/* Is this a split 64/32 reloc? */
 	if (relval & 0x80000000) {
 		/* Get the two "halves" */
-		unsigned long val_hi = get_unaligned(rp);
-		unsigned long val_lo = get_unaligned(rp + 1);
+		unsigned long val_hi = get_unaligned(p);
+		unsigned long val_lo = get_unaligned(p + 1);
 
 		/* insert the address */
 		val_hi = (val_hi & 0xffff0000) | addr >> 16;
 		val_lo = (val_lo & 0xffff0000) | (addr & 0xffff);
 
 		/* store the two halves back into memory */
-		put_unaligned(val_hi, rp);
-		put_unaligned(val_lo, rp+1);
+		put_unaligned(val_hi, p);
+		put_unaligned(val_lo, p+1);
 	} else {
 		/* Put it straight in, no messing around */
-		put_unaligned(addr, rp);
+		put_unaligned(addr, p);
 	}
+	return 0;
 }
 
 #define	flat_get_relocate_addr(rel)	(rel & 0x7fffffff)
diff --git a/arch/sh/include/asm/flat.h b/arch/sh/include/asm/flat.h
index 5d84df5e27f6..275fcae23539 100644
--- a/arch/sh/include/asm/flat.h
+++ b/arch/sh/include/asm/flat.h
@@ -12,11 +12,22 @@
 #ifndef __ASM_SH_FLAT_H
 #define __ASM_SH_FLAT_H
 
+#include <asm/unaligned.h>
+
 #define	flat_argvp_envp_on_stack()		0
 #define	flat_old_ram_flag(flags)		(flags)
 #define	flat_reloc_valid(reloc, size)		((reloc) <= (size))
-#define	flat_get_addr_from_rp(rp, relval, flags, p)	get_unaligned(rp)
-#define	flat_put_addr_at_rp(rp, val, relval)	put_unaligned(val,rp)
+static inline int flat_get_addr_from_rp(u32 __user *rp, u32 relval, u32 flags,
+					u32 *addr, u32 *persistent)
+{
+	*addr = get_unaligned((__force u32 *)rp);
+	return 0;
+}
+static inline int flat_put_addr_at_rp(u32 __user *rp, u32 addr, u32 rel)
+{
+	put_unaligned(addr, (__force u32 *)rp);
+	return 0;
+}
 #define	flat_get_relocate_addr(rel)		(rel)
 #define	flat_set_persistent(relval, p)		({ (void)p; 0; })
 
diff --git a/arch/xtensa/include/asm/flat.h b/arch/xtensa/include/asm/flat.h
index 94c44abf15e4..60e0d6a45795 100644
--- a/arch/xtensa/include/asm/flat.h
+++ b/arch/xtensa/include/asm/flat.h
@@ -1,11 +1,22 @@
 #ifndef __ASM_XTENSA_FLAT_H
 #define __ASM_XTENSA_FLAT_H
 
+#include <asm/unaligned.h>
+
 #define flat_argvp_envp_on_stack()			0
 #define flat_old_ram_flag(flags)			(flags)
 #define flat_reloc_valid(reloc, size)			((reloc) <= (size))
-#define flat_get_addr_from_rp(rp, relval, flags, p)	get_unaligned(rp)
-#define flat_put_addr_at_rp(rp, val, relval	)	put_unaligned(val, rp)
+static inline int flat_get_addr_from_rp(u32 __user *rp, u32 relval, u32 flags,
+					u32 *addr, u32 *persistent)
+{
+	*addr = get_unaligned((__force u32 *)rp);
+	return 0;
+}
+static inline int flat_put_addr_at_rp(u32 __user *rp, u32 addr, u32 rel)
+{
+	put_unaligned(addr, (__force u32 *)rp);
+	return 0;
+}
 #define flat_get_relocate_addr(rel)			(rel)
 #define flat_set_persistent(relval, p)			0
 
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 2edcefc0a294..69ec23daa25e 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -422,9 +422,9 @@ static int load_flat_file(struct linux_binprm *bprm,
 {
 	struct flat_hdr *hdr;
 	unsigned long textpos, datapos, realdatastart;
-	unsigned long text_len, data_len, bss_len, stack_len, full_data, flags;
+	u32 text_len, data_len, bss_len, stack_len, full_data, flags;
 	unsigned long len, memp, memp_size, extra, rlim;
-	unsigned long __user *reloc, *rp;
+	u32 __user *reloc, *rp;
 	struct inode *inode;
 	int i, rev, relocs;
 	loff_t fpos;
@@ -596,13 +596,13 @@ static int load_flat_file(struct linux_binprm *bprm,
 			goto err;
 		}
 
-		reloc = (unsigned long __user *)
+		reloc = (u32 __user *)
 			(datapos + (ntohl(hdr->reloc_start) - text_len));
 		memp = realdatastart;
 		memp_size = len;
 	} else {
 
-		len = text_len + data_len + extra + MAX_SHARED_LIBS * sizeof(unsigned long);
+		len = text_len + data_len + extra + MAX_SHARED_LIBS * sizeof(u32);
 		len = PAGE_ALIGN(len);
 		textpos = vm_mmap(NULL, 0, len,
 			PROT_READ | PROT_EXEC | PROT_WRITE, MAP_PRIVATE, 0);
@@ -618,10 +618,10 @@ static int load_flat_file(struct linux_binprm *bprm,
 
 		realdatastart = textpos + ntohl(hdr->data_start);
 		datapos = ALIGN(realdatastart +
-				MAX_SHARED_LIBS * sizeof(unsigned long),
+				MAX_SHARED_LIBS * sizeof(u32),
 				FLAT_DATA_ALIGN);
 
-		reloc = (unsigned long __user *)
+		reloc = (u32 __user *)
 			(datapos + (ntohl(hdr->reloc_start) - text_len));
 		memp = textpos;
 		memp_size = len;
@@ -694,7 +694,7 @@ static int load_flat_file(struct linux_binprm *bprm,
 			ret = result;
 			pr_err("Unable to read code+data+bss, errno %d\n", ret);
 			vm_munmap(textpos, text_len + data_len + extra +
-				MAX_SHARED_LIBS * sizeof(unsigned long));
+				MAX_SHARED_LIBS * sizeof(u32));
 			goto err;
 		}
 	}
@@ -754,8 +754,8 @@ static int load_flat_file(struct linux_binprm *bprm,
 	 * image.
 	 */
 	if (flags & FLAT_FLAG_GOTPIC) {
-		for (rp = (unsigned long __user *)datapos; ; rp++) {
-			unsigned long addr, rp_val;
+		for (rp = (u32 __user *)datapos; ; rp++) {
+			u32 addr, rp_val;
 			if (get_user(rp_val, rp))
 				return -EFAULT;
 			if (rp_val == 0xffffffff)
@@ -784,9 +784,9 @@ static int load_flat_file(struct linux_binprm *bprm,
 	 * __start to address 4 so that is okay).
 	 */
 	if (rev > OLD_FLAT_VERSION) {
-		unsigned long __maybe_unused persistent = 0;
+		u32 __maybe_unused persistent = 0;
 		for (i = 0; i < relocs; i++) {
-			unsigned long addr, relval;
+			u32 addr, relval;
 
 			/*
 			 * Get the address of the pointer to be
@@ -799,15 +799,18 @@ static int load_flat_file(struct linux_binprm *bprm,
 			if (flat_set_persistent(relval, &persistent))
 				continue;
 			addr = flat_get_relocate_addr(relval);
-			rp = (unsigned long __user *)calc_reloc(addr, libinfo, id, 1);
-			if (rp == (unsigned long __user *)RELOC_FAILED) {
+			rp = (u32 __user *)calc_reloc(addr, libinfo, id, 1);
+			if (rp == (u32 __user *)RELOC_FAILED) {
 				ret = -ENOEXEC;
 				goto err;
 			}
 
 			/* Get the pointer's value.  */
-			addr = flat_get_addr_from_rp(rp, relval, flags,
-							&persistent);
+			ret = flat_get_addr_from_rp(rp, relval, flags,
+							&addr, &persistent);
+			if (unlikely(ret))
+				goto err;
+
 			if (addr != 0) {
 				/*
 				 * Do the relocation.  PIC relocs in the data section are
@@ -822,12 +825,14 @@ static int load_flat_file(struct linux_binprm *bprm,
 				}
 
 				/* Write back the relocated pointer.  */
-				flat_put_addr_at_rp(rp, addr, relval);
+				ret = flat_put_addr_at_rp(rp, addr, relval);
+				if (unlikely(ret))
+					goto err;
 			}
 		}
 	} else {
 		for (i = 0; i < relocs; i++) {
-			unsigned long relval;
+			u32 relval;
 			if (get_user(relval, reloc + i))
 				return -EFAULT;
 			relval = ntohl(relval);
diff --git a/include/linux/flat.h b/include/linux/flat.h
index 2c1eb15c4ba4..7d542dfd0def 100644
--- a/include/linux/flat.h
+++ b/include/linux/flat.h
@@ -9,8 +9,8 @@
 #ifndef _LINUX_FLAT_H
 #define _LINUX_FLAT_H
 
-#include <asm/flat.h>
 #include <uapi/linux/flat.h>
+#include <asm/flat.h>
 
 /*
  * While it would be nice to keep this header clean,  users of older
-- 
cgit v1.2.3


From fbd08e7673f950854679e5d79a30bb25e77a9d08 Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Thu, 29 Jun 2017 11:31:10 -0700
Subject: bio-integrity: fix interface for bio_integrity_trim

bio_integrity_trim inherent it's interface from bio_trim and accept
offset and size, but this API is error prone because data offset
must always be insync with bio's data offset. That is why we have
integrity update hook in bio_advance()

So only meaningful values are: offset == 0, sectors == bio_sectors(bio)
Let's just remove them completely.

Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio-integrity.c | 11 ++---------
 block/bio.c           |  4 ++--
 drivers/md/dm.c       |  2 +-
 include/linux/bio.h   |  5 ++---
 4 files changed, 7 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 8c2253c59edb..3a0d71199fb0 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -433,22 +433,15 @@ EXPORT_SYMBOL(bio_integrity_advance);
 /**
  * bio_integrity_trim - Trim integrity vector
  * @bio:	bio whose integrity vector to update
- * @offset:	offset to first data sector
- * @sectors:	number of data sectors
  *
  * Description: Used to trim the integrity vector in a cloned bio.
- * The ivec will be advanced corresponding to 'offset' data sectors
- * and the length will be truncated corresponding to 'len' data
- * sectors.
  */
-void bio_integrity_trim(struct bio *bio, unsigned int offset,
-			unsigned int sectors)
+void bio_integrity_trim(struct bio *bio)
 {
 	struct bio_integrity_payload *bip = bio_integrity(bio);
 	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
 
-	bio_integrity_advance(bio, offset << 9);
-	bip->bip_iter.bi_size = bio_integrity_bytes(bi, sectors);
+	bip->bip_iter.bi_size = bio_integrity_bytes(bi, bio_sectors(bio));
 }
 EXPORT_SYMBOL(bio_integrity_trim);
 
diff --git a/block/bio.c b/block/bio.c
index 5b4b32a2f8d0..a6b225324a61 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1868,7 +1868,7 @@ struct bio *bio_split(struct bio *bio, int sectors,
 	split->bi_iter.bi_size = sectors << 9;
 
 	if (bio_integrity(split))
-		bio_integrity_trim(split, 0, sectors);
+		bio_integrity_trim(split);
 
 	bio_advance(bio, split->bi_iter.bi_size);
 
@@ -1902,7 +1902,7 @@ void bio_trim(struct bio *bio, int offset, int size)
 	bio->bi_iter.bi_size = size;
 
 	if (bio_integrity(bio))
-		bio_integrity_trim(bio, 0, size);
+		bio_integrity_trim(bio);
 
 }
 EXPORT_SYMBOL_GPL(bio_trim);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 402946035308..13e714ea7a42 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1153,7 +1153,7 @@ static int clone_bio(struct dm_target_io *tio, struct bio *bio,
 	clone->bi_iter.bi_size = to_bytes(len);
 
 	if (unlikely(bio_integrity(bio) != NULL))
-		bio_integrity_trim(clone, 0, len);
+		bio_integrity_trim(clone);
 
 	return 0;
 }
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 664a27da276d..1d74f5120369 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -728,7 +728,7 @@ extern bool bio_integrity_enabled(struct bio *bio);
 extern int bio_integrity_prep(struct bio *);
 extern void bio_integrity_endio(struct bio *);
 extern void bio_integrity_advance(struct bio *, unsigned int);
-extern void bio_integrity_trim(struct bio *, unsigned int, unsigned int);
+extern void bio_integrity_trim(struct bio *);
 extern int bio_integrity_clone(struct bio *, struct bio *, gfp_t);
 extern int bioset_integrity_create(struct bio_set *, int);
 extern void bioset_integrity_free(struct bio_set *);
@@ -778,8 +778,7 @@ static inline void bio_integrity_advance(struct bio *bio,
 	return;
 }
 
-static inline void bio_integrity_trim(struct bio *bio, unsigned int offset,
-				      unsigned int sectors)
+static inline void bio_integrity_trim(struct bio *bio)
 {
 	return;
 }
-- 
cgit v1.2.3


From e23947bd76f00701f9407af23e671f4da96f5f25 Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Thu, 29 Jun 2017 11:31:11 -0700
Subject: bio-integrity: fold bio_integrity_enabled to bio_integrity_prep

Currently all integrity prep hooks are open-coded, and if prepare fails
we ignore it's code and fail bio with EIO. Let's return real error to
upper layer, so later caller may react accordingly.

In fact no one want to use bio_integrity_prep() w/o bio_integrity_enabled,
so it is reasonable to fold it in to one function.

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
[hch: merged with the latest block tree,
	return bool from bio_integrity_prep]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 Documentation/block/data-integrity.txt |  6 +--
 block/bio-integrity.c                  | 88 +++++++++++++++-------------------
 block/blk-core.c                       |  5 +-
 block/blk-mq.c                         |  4 +-
 drivers/nvdimm/blk.c                   | 13 +----
 drivers/nvdimm/btt.c                   | 13 +----
 include/linux/bio.h                    | 12 ++---
 7 files changed, 50 insertions(+), 91 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/block/data-integrity.txt b/Documentation/block/data-integrity.txt
index f56ec97f0d14..934c44ea0c57 100644
--- a/Documentation/block/data-integrity.txt
+++ b/Documentation/block/data-integrity.txt
@@ -192,7 +192,7 @@ will require extra work due to the application tag.
     supported by the block device.
 
 
-    int bio_integrity_prep(bio);
+    bool bio_integrity_prep(bio);
 
       To generate IMD for WRITE and to set up buffers for READ, the
       filesystem must call bio_integrity_prep(bio).
@@ -201,9 +201,7 @@ will require extra work due to the application tag.
       sector must be set, and the bio should have all data pages
       added.  It is up to the caller to ensure that the bio does not
       change while I/O is in progress.
-
-      bio_integrity_prep() should only be called if
-      bio_integrity_enabled() returned 1.
+      Complete bio with error if prepare failed for some reson.
 
 
 5.3 PASSING EXISTING INTEGRITY METADATA
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 3a0d71199fb0..44c4c52681c2 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -159,44 +159,6 @@ int bio_integrity_add_page(struct bio *bio, struct page *page,
 }
 EXPORT_SYMBOL(bio_integrity_add_page);
 
-/**
- * bio_integrity_enabled - Check whether integrity can be passed
- * @bio:	bio to check
- *
- * Description: Determines whether bio_integrity_prep() can be called
- * on this bio or not.	bio data direction and target device must be
- * set prior to calling.  The functions honors the write_generate and
- * read_verify flags in sysfs.
- */
-bool bio_integrity_enabled(struct bio *bio)
-{
-	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
-
-	if (bio_op(bio) != REQ_OP_READ && bio_op(bio) != REQ_OP_WRITE)
-		return false;
-
-	if (!bio_sectors(bio))
-		return false;
-
-	/* Already protected? */
-	if (bio_integrity(bio))
-		return false;
-
-	if (bi == NULL)
-		return false;
-
-	if (bio_data_dir(bio) == READ && bi->profile->verify_fn != NULL &&
-	    (bi->flags & BLK_INTEGRITY_VERIFY))
-		return true;
-
-	if (bio_data_dir(bio) == WRITE && bi->profile->generate_fn != NULL &&
-	    (bi->flags & BLK_INTEGRITY_GENERATE))
-		return true;
-
-	return false;
-}
-EXPORT_SYMBOL(bio_integrity_enabled);
-
 /**
  * bio_integrity_intervals - Return number of integrity intervals for a bio
  * @bi:		blk_integrity profile for device
@@ -262,14 +224,15 @@ static blk_status_t bio_integrity_process(struct bio *bio,
  * bio_integrity_prep - Prepare bio for integrity I/O
  * @bio:	bio to prepare
  *
- * Description: Allocates a buffer for integrity metadata, maps the
- * pages and attaches them to a bio.  The bio must have data
- * direction, target device and start sector set priot to calling.  In
- * the WRITE case, integrity metadata will be generated using the
- * block device's integrity function.  In the READ case, the buffer
+ * Description:  Checks if the bio already has an integrity payload attached.
+ * If it does, the payload has been generated by another kernel subsystem,
+ * and we just pass it through. Otherwise allocates integrity payload.
+ * The bio must have data direction, target device and start sector set priot
+ * to calling.  In the WRITE case, integrity metadata will be generated using
+ * the block device's integrity function.  In the READ case, the buffer
  * will be prepared for DMA and a suitable end_io handler set up.
  */
-int bio_integrity_prep(struct bio *bio)
+bool bio_integrity_prep(struct bio *bio)
 {
 	struct bio_integrity_payload *bip;
 	struct blk_integrity *bi;
@@ -279,20 +242,41 @@ int bio_integrity_prep(struct bio *bio)
 	unsigned int len, nr_pages;
 	unsigned int bytes, offset, i;
 	unsigned int intervals;
+	blk_status_t status;
 
 	bi = bdev_get_integrity(bio->bi_bdev);
 	q = bdev_get_queue(bio->bi_bdev);
-	BUG_ON(bi == NULL);
-	BUG_ON(bio_integrity(bio));
+	if (bio_op(bio) != REQ_OP_READ && bio_op(bio) != REQ_OP_WRITE)
+		return true;
+
+	if (!bio_sectors(bio))
+		return true;
 
+	/* Already protected? */
+	if (bio_integrity(bio))
+		return true;
+
+	if (bi == NULL)
+		return true;
+
+	if (bio_data_dir(bio) == READ) {
+		if (!bi->profile->verify_fn ||
+		    !(bi->flags & BLK_INTEGRITY_VERIFY))
+			return true;
+	} else {
+		if (!bi->profile->generate_fn ||
+		    !(bi->flags & BLK_INTEGRITY_GENERATE))
+			return true;
+	}
 	intervals = bio_integrity_intervals(bi, bio_sectors(bio));
 
 	/* Allocate kernel buffer for protection data */
 	len = intervals * bi->tuple_size;
 	buf = kmalloc(len, GFP_NOIO | q->bounce_gfp);
+	status = BLK_STS_RESOURCE;
 	if (unlikely(buf == NULL)) {
 		printk(KERN_ERR "could not allocate integrity buffer\n");
-		return -ENOMEM;
+		goto err_end_io;
 	}
 
 	end = (((unsigned long) buf) + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
@@ -304,7 +288,8 @@ int bio_integrity_prep(struct bio *bio)
 	if (IS_ERR(bip)) {
 		printk(KERN_ERR "could not allocate data integrity bioset\n");
 		kfree(buf);
-		return PTR_ERR(bip);
+		status = BLK_STS_RESOURCE;
+		goto err_end_io;
 	}
 
 	bip->bip_flags |= BIP_BLOCK_INTEGRITY;
@@ -349,8 +334,13 @@ int bio_integrity_prep(struct bio *bio)
 	/* Auto-generate integrity metadata if this is a write */
 	if (bio_data_dir(bio) == WRITE)
 		bio_integrity_process(bio, bi->profile->generate_fn);
+	return true;
+
+err_end_io:
+	bio->bi_status = status;
+	bio_endio(bio);
+	return false;
 
-	return 0;
 }
 EXPORT_SYMBOL(bio_integrity_prep);
 
diff --git a/block/blk-core.c b/block/blk-core.c
index af393d5a9680..970b9c9638c5 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1787,11 +1787,8 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
 
 	blk_queue_split(q, &bio);
 
-	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
-		bio->bi_status = BLK_STS_IOERR;
-		bio_endio(bio);
+	if (!bio_integrity_prep(bio))
 		return BLK_QC_T_NONE;
-	}
 
 	if (op_is_flush(bio->bi_opf)) {
 		spin_lock_irq(q->queue_lock);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index ced2b000ca02..77617fb12661 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1550,10 +1550,8 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 
 	blk_queue_split(q, &bio);
 
-	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
-		bio_io_error(bio);
+	if (!bio_integrity_prep(bio))
 		return BLK_QC_T_NONE;
-	}
 
 	if (!is_flush_fua && !blk_queue_nomerges(q) &&
 	    blk_attempt_plug_merge(q, bio, &request_count, &same_queue_rq))
diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c
index f12d23c49771..1a578b2a437b 100644
--- a/drivers/nvdimm/blk.c
+++ b/drivers/nvdimm/blk.c
@@ -179,16 +179,8 @@ static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio)
 	int err = 0, rw;
 	bool do_acct;
 
-	/*
-	 * bio_integrity_enabled also checks if the bio already has an
-	 * integrity payload attached. If it does, we *don't* do a
-	 * bio_integrity_prep here - the payload has been generated by
-	 * another kernel subsystem, and we just pass it through.
-	 */
-	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
-		bio->bi_status = BLK_STS_IOERR;
-		goto out;
-	}
+	if (!bio_integrity_prep(bio))
+		return BLK_QC_T_NONE;
 
 	bip = bio_integrity(bio);
 	nsblk = q->queuedata;
@@ -212,7 +204,6 @@ static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio)
 	if (do_acct)
 		nd_iostat_end(bio, start);
 
- out:
 	bio_endio(bio);
 	return BLK_QC_T_NONE;
 }
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index b6ba0618ea46..b5caaee78bbf 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -1203,16 +1203,8 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
 	int err = 0;
 	bool do_acct;
 
-	/*
-	 * bio_integrity_enabled also checks if the bio already has an
-	 * integrity payload attached. If it does, we *don't* do a
-	 * bio_integrity_prep here - the payload has been generated by
-	 * another kernel subsystem, and we just pass it through.
-	 */
-	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
-		bio->bi_status = BLK_STS_IOERR;
-		goto out;
-	}
+	if (!bio_integrity_prep(bio))
+		return BLK_QC_T_NONE;
 
 	do_acct = nd_iostat_start(bio, &start);
 	bio_for_each_segment(bvec, bio, iter) {
@@ -1239,7 +1231,6 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
 	if (do_acct)
 		nd_iostat_end(bio, start);
 
-out:
 	bio_endio(bio);
 	return BLK_QC_T_NONE;
 }
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 1d74f5120369..b3b5f5a89a9c 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -724,8 +724,7 @@ struct biovec_slab {
 extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int);
 extern void bio_integrity_free(struct bio *);
 extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int);
-extern bool bio_integrity_enabled(struct bio *bio);
-extern int bio_integrity_prep(struct bio *);
+extern bool bio_integrity_prep(struct bio *);
 extern void bio_integrity_endio(struct bio *);
 extern void bio_integrity_advance(struct bio *, unsigned int);
 extern void bio_integrity_trim(struct bio *);
@@ -741,11 +740,6 @@ static inline void *bio_integrity(struct bio *bio)
 	return NULL;
 }
 
-static inline bool bio_integrity_enabled(struct bio *bio)
-{
-	return false;
-}
-
 static inline int bioset_integrity_create(struct bio_set *bs, int pool_size)
 {
 	return 0;
@@ -756,9 +750,9 @@ static inline void bioset_integrity_free (struct bio_set *bs)
 	return;
 }
 
-static inline int bio_integrity_prep(struct bio *bio)
+static inline bool bio_integrity_prep(struct bio *bio)
 {
-	return 0;
+	return true;
 }
 
 static inline void bio_integrity_free(struct bio *bio)
-- 
cgit v1.2.3


From 128b6f9fdd9ace9e56cb3a263b4bc269658f9c40 Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Thu, 29 Jun 2017 11:31:12 -0700
Subject: t10-pi: Move opencoded contants to common header

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/t10-pi.c                   | 9 +++------
 drivers/scsi/lpfc/lpfc_scsi.c    | 5 +++--
 drivers/scsi/qla2xxx/qla_isr.c   | 8 ++++----
 drivers/target/target_core_sbc.c | 2 +-
 include/linux/t10-pi.h           | 2 ++
 5 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/block/t10-pi.c b/block/t10-pi.c
index 3416dadf7b15..a98db384048f 100644
--- a/block/t10-pi.c
+++ b/block/t10-pi.c
@@ -28,9 +28,6 @@
 
 typedef __be16 (csum_fn) (void *, unsigned int);
 
-static const __be16 APP_ESCAPE = (__force __be16) 0xffff;
-static const __be32 REF_ESCAPE = (__force __be32) 0xffffffff;
-
 static __be16 t10_pi_crc_fn(void *data, unsigned int len)
 {
 	return cpu_to_be16(crc_t10dif(data, len));
@@ -82,7 +79,7 @@ static blk_status_t t10_pi_verify(struct blk_integrity_iter *iter,
 		switch (type) {
 		case 1:
 		case 2:
-			if (pi->app_tag == APP_ESCAPE)
+			if (pi->app_tag == T10_PI_APP_ESCAPE)
 				goto next;
 
 			if (be32_to_cpu(pi->ref_tag) !=
@@ -95,8 +92,8 @@ static blk_status_t t10_pi_verify(struct blk_integrity_iter *iter,
 			}
 			break;
 		case 3:
-			if (pi->app_tag == APP_ESCAPE &&
-			    pi->ref_tag == REF_ESCAPE)
+			if (pi->app_tag == T10_PI_APP_ESCAPE &&
+			    pi->ref_tag == T10_PI_REF_ESCAPE)
 				goto next;
 			break;
 		}
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index 54fd0c81ceaf..99d2e990b231 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -26,6 +26,7 @@
 #include <linux/export.h>
 #include <linux/delay.h>
 #include <asm/unaligned.h>
+#include <linux/t10-pi.h>
 #include <linux/crc-t10dif.h>
 #include <net/checksum.h>
 
@@ -2934,8 +2935,8 @@ lpfc_calc_bg_err(struct lpfc_hba *phba, struct lpfc_scsi_buf *lpfc_cmd)
 				 * First check to see if a protection data
 				 * check is valid
 				 */
-				if ((src->ref_tag == 0xffffffff) ||
-				    (src->app_tag == 0xffff)) {
+				if ((src->ref_tag == T10_PI_REF_ESCAPE) ||
+				    (src->app_tag == T10_PI_APP_ESCAPE)) {
 					start_ref_tag++;
 					goto skipit;
 				}
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index 2572121b765b..de031aed94f6 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -1950,9 +1950,9 @@ qla2x00_handle_dif_error(srb_t *sp, struct sts_entry_24xx *sts24)
 	 * For type     3: ref & app tag is all 'f's
 	 * For type 0,1,2: app tag is all 'f's
 	 */
-	if ((a_app_tag == 0xffff) &&
+	if ((a_app_tag == T10_PI_APP_ESCAPE) &&
 	    ((scsi_get_prot_type(cmd) != SCSI_PROT_DIF_TYPE3) ||
-	     (a_ref_tag == 0xffffffff))) {
+	     (a_ref_tag == T10_PI_REF_ESCAPE))) {
 		uint32_t blocks_done, resid;
 		sector_t lba_s = scsi_get_lba(cmd);
 
@@ -1994,9 +1994,9 @@ qla2x00_handle_dif_error(srb_t *sp, struct sts_entry_24xx *sts24)
 			spt = page_address(sg_page(sg)) + sg->offset;
 			spt += j;
 
-			spt->app_tag = 0xffff;
+			spt->app_tag = T10_PI_APP_ESCAPE;
 			if (scsi_get_prot_type(cmd) == SCSI_PROT_DIF_TYPE3)
-				spt->ref_tag = 0xffffffff;
+				spt->ref_tag = T10_PI_REF_ESCAPE;
 		}
 
 		return 0;
diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c
index 4316f7b65fb7..dc9456e7dac9 100644
--- a/drivers/target/target_core_sbc.c
+++ b/drivers/target/target_core_sbc.c
@@ -1450,7 +1450,7 @@ sbc_dif_verify(struct se_cmd *cmd, sector_t start, unsigned int sectors,
 				 (unsigned long long)sector, sdt->guard_tag,
 				 sdt->app_tag, be32_to_cpu(sdt->ref_tag));
 
-			if (sdt->app_tag == cpu_to_be16(0xffff)) {
+			if (sdt->app_tag == T10_PI_APP_ESCAPE) {
 				dsg_off += block_size;
 				goto next;
 			}
diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h
index 9375d23a24e7..635a3c5706bd 100644
--- a/include/linux/t10-pi.h
+++ b/include/linux/t10-pi.h
@@ -33,6 +33,8 @@ struct t10_pi_tuple {
 	__be32 ref_tag;		/* Target LBA or indirect LBA */
 };
 
+#define T10_PI_APP_ESCAPE cpu_to_be16(0xffff)
+#define T10_PI_REF_ESCAPE cpu_to_be32(0xffffffff)
 
 extern const struct blk_integrity_profile t10_pi_type1_crc;
 extern const struct blk_integrity_profile t10_pi_type1_ip;
-- 
cgit v1.2.3


From b1fb2c52b2d85f51f36f1661409f9aeef94265ff Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Thu, 29 Jun 2017 11:31:13 -0700
Subject: block: guard bvec iteration logic

Currently if some one try to advance bvec beyond it's size we simply
dump WARN_ONCE and continue to iterate beyond bvec array boundaries.
This simply means that we endup dereferencing/corrupting random memory
region.

Sane reaction would be to propagate error back to calling context
But bvec_iter_advance's calling context is not always good for error
handling. For safity reason let truncate iterator size to zero which
will break external iteration loop which prevent us from unpredictable
memory range corruption. And even it caller ignores an error, it will
corrupt it's own bvecs, not others.

This patch does:
- Return error back to caller with hope that it will react on this
- Truncate iterator size

Code was added long time ago here 4550dd6c, luckily no one hit it
in real life :)

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
[hch: switch to true/false returns instead of errno values]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvdimm/blk.c |  3 ++-
 drivers/nvdimm/btt.c |  3 ++-
 include/linux/bio.h  |  4 +++-
 include/linux/bvec.h | 14 +++++++++-----
 4 files changed, 16 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c
index 1a578b2a437b..345acca576b3 100644
--- a/drivers/nvdimm/blk.c
+++ b/drivers/nvdimm/blk.c
@@ -106,7 +106,8 @@ static int nd_blk_rw_integrity(struct nd_namespace_blk *nsblk,
 
 		len -= cur_len;
 		dev_offset += cur_len;
-		bvec_iter_advance(bip->bip_vec, &bip->bip_iter, cur_len);
+		if (!bvec_iter_advance(bip->bip_vec, &bip->bip_iter, cur_len))
+			return -EIO;
 	}
 
 	return err;
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index b5caaee78bbf..d00c10f382f0 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -985,7 +985,8 @@ static int btt_rw_integrity(struct btt *btt, struct bio_integrity_payload *bip,
 
 		len -= cur_len;
 		meta_nsoff += cur_len;
-		bvec_iter_advance(bip->bip_vec, &bip->bip_iter, cur_len);
+		if (!bvec_iter_advance(bip->bip_vec, &bip->bip_iter, cur_len))
+			return -EIO;
 	}
 
 	return ret;
diff --git a/include/linux/bio.h b/include/linux/bio.h
index b3b5f5a89a9c..d5e8689f86b8 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -167,8 +167,10 @@ static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
 
 	if (bio_no_advance_iter(bio))
 		iter->bi_size -= bytes;
-	else
+	else {
 		bvec_iter_advance(bio->bi_io_vec, iter, bytes);
+		/* TODO: It is reasonable to complete bio with error here. */
+	}
 }
 
 #define __bio_for_each_segment(bvl, bio, iter, start)			\
diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index 89b65b82d98f..de317b4c13c1 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -22,6 +22,7 @@
 
 #include <linux/kernel.h>
 #include <linux/bug.h>
+#include <linux/errno.h>
 
 /*
  * was unsigned short, but we might as well be ready for > 64kB I/O pages
@@ -66,12 +67,14 @@ struct bvec_iter {
 	.bv_offset	= bvec_iter_offset((bvec), (iter)),	\
 })
 
-static inline void bvec_iter_advance(const struct bio_vec *bv,
-				     struct bvec_iter *iter,
-				     unsigned bytes)
+static inline bool bvec_iter_advance(const struct bio_vec *bv,
+		struct bvec_iter *iter, unsigned bytes)
 {
-	WARN_ONCE(bytes > iter->bi_size,
-		  "Attempted to advance past end of bvec iter\n");
+	if (WARN_ONCE(bytes > iter->bi_size,
+		     "Attempted to advance past end of bvec iter\n")) {
+		iter->bi_size = 0;
+		return false;
+	}
 
 	while (bytes) {
 		unsigned iter_len = bvec_iter_len(bv, *iter);
@@ -86,6 +89,7 @@ static inline void bvec_iter_advance(const struct bio_vec *bv,
 			iter->bi_idx++;
 		}
 	}
+	return true;
 }
 
 #define for_each_bvec(bvl, bio_vec, iter, start)			\
-- 
cgit v1.2.3


From f9df1cd99ebd82f05e8f5e0aa7e38cb8d3c791d7 Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Thu, 29 Jun 2017 11:31:14 -0700
Subject: bio: add bvec_iter rewind API

Some ->bi_end_io handlers (for example: pi_verify or decrypt handlers)
need to know original data vector, but after bio traverse io-stack it may
be advanced, splited and relocated many times so it is hard to guess
original iterator. Let's add 'bi_done' conter which accounts number
of bytes iterator was advanced during it's evolution. Later end_io handler
may easily restore original iterator by rewinding iterator to
iter->bi_done.

Note: this change makes sizeof (struct bvec_iter) multiple to 8

Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
[hch: switched to true/false return]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/bio.h  | 19 +++++++++++++++++--
 include/linux/bvec.h | 27 +++++++++++++++++++++++++++
 2 files changed, 44 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index d5e8689f86b8..1eba19580185 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -165,14 +165,29 @@ static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
 {
 	iter->bi_sector += bytes >> 9;
 
-	if (bio_no_advance_iter(bio))
+	if (bio_no_advance_iter(bio)) {
 		iter->bi_size -= bytes;
-	else {
+		iter->bi_done += bytes;
+	} else {
 		bvec_iter_advance(bio->bi_io_vec, iter, bytes);
 		/* TODO: It is reasonable to complete bio with error here. */
 	}
 }
 
+static inline bool bio_rewind_iter(struct bio *bio, struct bvec_iter *iter,
+		unsigned int bytes)
+{
+	iter->bi_sector -= bytes >> 9;
+
+	if (bio_no_advance_iter(bio)) {
+		iter->bi_size += bytes;
+		iter->bi_done -= bytes;
+		return true;
+	}
+
+	return bvec_iter_rewind(bio->bi_io_vec, iter, bytes);
+}
+
 #define __bio_for_each_segment(bvl, bio, iter, start)			\
 	for (iter = (start);						\
 	     (iter).bi_size &&						\
diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index de317b4c13c1..ec8a4d7af6bd 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -40,6 +40,8 @@ struct bvec_iter {
 
 	unsigned int		bi_idx;		/* current index into bvl_vec */
 
+	unsigned int            bi_done;	/* number of bytes completed */
+
 	unsigned int            bi_bvec_done;	/* number of bytes completed in
 						   current bvec */
 };
@@ -83,6 +85,7 @@ static inline bool bvec_iter_advance(const struct bio_vec *bv,
 		bytes -= len;
 		iter->bi_size -= len;
 		iter->bi_bvec_done += len;
+		iter->bi_done += len;
 
 		if (iter->bi_bvec_done == __bvec_iter_bvec(bv, *iter)->bv_len) {
 			iter->bi_bvec_done = 0;
@@ -92,6 +95,30 @@ static inline bool bvec_iter_advance(const struct bio_vec *bv,
 	return true;
 }
 
+static inline bool bvec_iter_rewind(const struct bio_vec *bv,
+				     struct bvec_iter *iter,
+				     unsigned int bytes)
+{
+	while (bytes) {
+		unsigned len = min(bytes, iter->bi_bvec_done);
+
+		if (iter->bi_bvec_done == 0) {
+			if (WARN_ONCE(iter->bi_idx == 0,
+				      "Attempted to rewind iter beyond "
+				      "bvec's boundaries\n")) {
+				return false;
+			}
+			iter->bi_idx--;
+			iter->bi_bvec_done = __bvec_iter_bvec(bv, *iter)->bv_len;
+			continue;
+		}
+		bytes -= len;
+		iter->bi_size += len;
+		iter->bi_bvec_done -= len;
+	}
+	return true;
+}
+
 #define for_each_bvec(bvl, bio_vec, iter, start)			\
 	for (iter = (start);						\
 	     (iter).bi_size &&						\
-- 
cgit v1.2.3


From 7c20f11680a441df09de7235206f70115fbf6290 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 3 Jul 2017 16:58:43 -0600
Subject: bio-integrity: stop abusing bi_end_io

And instead call directly into the integrity code from bio_end_io.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio-integrity.c | 39 ++++++++++++---------------------------
 block/bio.c           |  5 ++---
 block/blk.h           | 11 +++++++++++
 include/linux/bio.h   |  9 ---------
 4 files changed, 25 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 8df4eb103ba9..f733beab6ca2 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -102,7 +102,7 @@ EXPORT_SYMBOL(bio_integrity_alloc);
  * Description: Used to free the integrity portion of a bio. Usually
  * called from bio_free().
  */
-void bio_integrity_free(struct bio *bio)
+static void bio_integrity_free(struct bio *bio)
 {
 	struct bio_integrity_payload *bip = bio_integrity(bio);
 	struct bio_set *bs = bio->bi_pool;
@@ -120,8 +120,8 @@ void bio_integrity_free(struct bio *bio)
 	}
 
 	bio->bi_integrity = NULL;
+	bio->bi_opf &= ~REQ_INTEGRITY;
 }
-EXPORT_SYMBOL(bio_integrity_free);
 
 /**
  * bio_integrity_add_page - Attach integrity metadata
@@ -326,12 +326,6 @@ bool bio_integrity_prep(struct bio *bio)
 		offset = 0;
 	}
 
-	/* Install custom I/O completion handler if read verify is enabled */
-	if (bio_data_dir(bio) == READ) {
-		bip->bip_end_io = bio->bi_end_io;
-		bio->bi_end_io = bio_integrity_endio;
-	}
-
 	/* Auto-generate integrity metadata if this is a write */
 	if (bio_data_dir(bio) == WRITE) {
 		bio_integrity_process(bio, &bio->bi_iter,
@@ -375,13 +369,12 @@ static void bio_integrity_verify_fn(struct work_struct *work)
 		bio->bi_status = BLK_STS_IOERR;
 	}
 
-	/* Restore original bio completion handler */
-	bio->bi_end_io = bip->bip_end_io;
+	bio_integrity_free(bio);
 	bio_endio(bio);
 }
 
 /**
- * bio_integrity_endio - Integrity I/O completion function
+ * __bio_integrity_endio - Integrity I/O completion function
  * @bio:	Protected bio
  * @error:	Pointer to errno
  *
@@ -392,27 +385,19 @@ static void bio_integrity_verify_fn(struct work_struct *work)
  * in process context.	This function postpones completion
  * accordingly.
  */
-void bio_integrity_endio(struct bio *bio)
+bool __bio_integrity_endio(struct bio *bio)
 {
-	struct bio_integrity_payload *bip = bio_integrity(bio);
+	if (bio_op(bio) == REQ_OP_READ && !bio->bi_status) {
+		struct bio_integrity_payload *bip = bio_integrity(bio);
 
-	BUG_ON(bip->bip_bio != bio);
-
-	/* In case of an I/O error there is no point in verifying the
-	 * integrity metadata.  Restore original bio end_io handler
-	 * and run it.
-	 */
-	if (bio->bi_status) {
-		bio->bi_end_io = bip->bip_end_io;
-		bio_endio(bio);
-
-		return;
+		INIT_WORK(&bip->bip_work, bio_integrity_verify_fn);
+		queue_work(kintegrityd_wq, &bip->bip_work);
+		return false;
 	}
 
-	INIT_WORK(&bip->bip_work, bio_integrity_verify_fn);
-	queue_work(kintegrityd_wq, &bip->bip_work);
+	bio_integrity_free(bio);
+	return true;
 }
-EXPORT_SYMBOL(bio_integrity_endio);
 
 /**
  * bio_integrity_advance - Advance integrity vector
diff --git a/block/bio.c b/block/bio.c
index a6b225324a61..9cabf5d0be20 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -243,9 +243,6 @@ fallback:
 void bio_uninit(struct bio *bio)
 {
 	bio_disassociate_task(bio);
-
-	if (bio_integrity(bio))
-		bio_integrity_free(bio);
 }
 EXPORT_SYMBOL(bio_uninit);
 
@@ -1813,6 +1810,8 @@ void bio_endio(struct bio *bio)
 again:
 	if (!bio_remaining_done(bio))
 		return;
+	if (!bio_integrity_endio(bio))
+		return;
 
 	/*
 	 * Need to have a real endio function for chained bios, otherwise
diff --git a/block/blk.h b/block/blk.h
index 01ebb8185f6b..3a3d715bd725 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -81,10 +81,21 @@ static inline void blk_queue_enter_live(struct request_queue *q)
 
 #ifdef CONFIG_BLK_DEV_INTEGRITY
 void blk_flush_integrity(void);
+bool __bio_integrity_endio(struct bio *);
+static inline bool bio_integrity_endio(struct bio *bio)
+{
+	if (bio_integrity(bio))
+		return __bio_integrity_endio(bio);
+	return true;
+}
 #else
 static inline void blk_flush_integrity(void)
 {
 }
+static inline bool bio_integrity_endio(struct bio *bio)
+{
+	return true;
+}
 #endif
 
 void blk_timeout_work(struct work_struct *work);
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 1eba19580185..7b1cf4ba0902 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -320,8 +320,6 @@ struct bio_integrity_payload {
 
 	struct bvec_iter	bip_iter;
 
-	bio_end_io_t		*bip_end_io;	/* saved I/O completion fn */
-
 	unsigned short		bip_slab;	/* slab the bip came from */
 	unsigned short		bip_vcnt;	/* # of integrity bio_vecs */
 	unsigned short		bip_max_vcnt;	/* integrity bio_vec slots */
@@ -739,10 +737,8 @@ struct biovec_slab {
 		bip_for_each_vec(_bvl, _bio->bi_integrity, _iter)
 
 extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int);
-extern void bio_integrity_free(struct bio *);
 extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int);
 extern bool bio_integrity_prep(struct bio *);
-extern void bio_integrity_endio(struct bio *);
 extern void bio_integrity_advance(struct bio *, unsigned int);
 extern void bio_integrity_trim(struct bio *);
 extern int bio_integrity_clone(struct bio *, struct bio *, gfp_t);
@@ -772,11 +768,6 @@ static inline bool bio_integrity_prep(struct bio *bio)
 	return true;
 }
 
-static inline void bio_integrity_free(struct bio *bio)
-{
-	return;
-}
-
 static inline int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
 				      gfp_t gfp_mask)
 {
-- 
cgit v1.2.3


From 9114014cf4e6df0b22d764380ae1fc54f1a7a8b2 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 29 Jun 2017 23:33:37 +0200
Subject: genirq: Add mutex to irq desc to serialize request/free_irq()

The irq_request/release_resources() callbacks ar currently invoked under
desc->lock with interrupts disabled. This is a source of problems on RT and
conceptually not required.

Add a seperate mutex to struct irq_desc which allows to serialize
request/free_irq(), which can be used to move the resource functions out of
the desc->lock held region.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Cc: Heiko Stuebner <heiko@sntech.de>
Cc: Julia Cartwright <julia@ni.com>
Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: Brian Norris <briannorris@chromium.org>
Cc: Doug Anderson <dianders@chromium.org>
Cc: linux-rockchip@lists.infradead.org
Cc: John Keeping <john@metanate.com>
Cc: linux-gpio@vger.kernel.org
Link: http://lkml.kernel.org/r/20170629214344.039220922@linutronix.de
---
 include/linux/irqdesc.h | 3 +++
 kernel/irq/irqdesc.c    | 1 +
 kernel/irq/manage.c     | 8 ++++++++
 3 files changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index d425a3a09722..3e90a094798d 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -3,6 +3,7 @@
 
 #include <linux/rcupdate.h>
 #include <linux/kobject.h>
+#include <linux/mutex.h>
 
 /*
  * Core internal functions to deal with irq descriptors
@@ -45,6 +46,7 @@ struct pt_regs;
  *			IRQF_FORCE_RESUME set
  * @rcu:		rcu head for delayed free
  * @kobj:		kobject used to represent this struct in sysfs
+ * @request_mutex:	mutex to protect request/free before locking desc->lock
  * @dir:		/proc/irq/ procfs entry
  * @debugfs_file:	dentry for the debugfs file
  * @name:		flow handler name for /proc/interrupts output
@@ -96,6 +98,7 @@ struct irq_desc {
 	struct rcu_head		rcu;
 	struct kobject		kobj;
 #endif
+	struct mutex		request_mutex;
 	int			parent_irq;
 	struct module		*owner;
 	const char		*name;
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 948b50e78549..906a67e58391 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -373,6 +373,7 @@ static struct irq_desc *alloc_desc(int irq, int node, unsigned int flags,
 
 	raw_spin_lock_init(&desc->lock);
 	lockdep_set_class(&desc->lock, &irq_desc_lock_class);
+	mutex_init(&desc->request_mutex);
 	init_rcu_head(&desc->rcu);
 
 	desc_set_defaults(irq, desc, node, affinity, owner);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 0934e02fa04e..0139908b8d53 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1167,6 +1167,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 	if (desc->irq_data.chip->flags & IRQCHIP_ONESHOT_SAFE)
 		new->flags &= ~IRQF_ONESHOT;
 
+	mutex_lock(&desc->request_mutex);
+
 	chip_bus_lock(desc);
 
 	/*
@@ -1350,6 +1352,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 
 	raw_spin_unlock_irqrestore(&desc->lock, flags);
 	chip_bus_sync_unlock(desc);
+	mutex_unlock(&desc->request_mutex);
 
 	irq_setup_timings(desc, new);
 
@@ -1383,6 +1386,8 @@ out_unlock:
 
 	chip_bus_sync_unlock(desc);
 
+	mutex_unlock(&desc->request_mutex);
+
 out_thread:
 	if (new->thread) {
 		struct task_struct *t = new->thread;
@@ -1446,6 +1451,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
 	if (!desc)
 		return NULL;
 
+	mutex_lock(&desc->request_mutex);
 	chip_bus_lock(desc);
 	raw_spin_lock_irqsave(&desc->lock, flags);
 
@@ -1521,6 +1527,8 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
 		}
 	}
 
+	mutex_unlock(&desc->request_mutex);
+
 	irq_chip_pm_put(&desc->irq_data);
 	module_put(desc->owner);
 	kfree(action->secondary);
-- 
cgit v1.2.3


From 119d0312c766773ca3238b9d926077664eed22be Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 25 May 2017 16:28:49 -0400
Subject: kill __copy_in_user()

no users left

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/uaccess.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 201418d5e15c..97c93bc6f72a 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -180,12 +180,6 @@ copy_to_user(void __user *to, const void *from, unsigned long n)
 }
 #ifdef CONFIG_COMPAT
 static __always_inline unsigned long __must_check
-__copy_in_user(void __user *to, const void *from, unsigned long n)
-{
-	might_fault();
-	return raw_copy_in_user(to, from, n);
-}
-static __always_inline unsigned long __must_check
 copy_in_user(void __user *to, const void *from, unsigned long n)
 {
 	might_fault();
-- 
cgit v1.2.3


From ad0af7104dadccd55cd2b390271677fac142650f Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Wed, 21 Jun 2017 15:28:32 +0300
Subject: vfs: introduce inode 'inuse' lock

Added an i_state flag I_INUSE and helpers to set/clear/test the bit.

The 'inuse' lock is an 'advisory' inode lock, that can be used to extend
exclusive create protection beyond parent->i_mutex lock among cooperating
users.

This is going to be used by overlayfs to get exclusive ownership on upper
and work dirs among overlayfs mounts.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/overlayfs.h |  2 ++
 fs/overlayfs/util.c      | 31 +++++++++++++++++++++++++++++++
 include/linux/fs.h       |  4 ++++
 3 files changed, 37 insertions(+)

(limited to 'include/linux')

diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index b1be3d39ac9d..5e958427463d 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -220,6 +220,8 @@ int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
 int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry);
 void ovl_set_flag(unsigned long flag, struct inode *inode);
 bool ovl_test_flag(unsigned long flag, struct inode *inode);
+bool ovl_inuse_trylock(struct dentry *dentry);
+void ovl_inuse_unlock(struct dentry *dentry);
 
 static inline bool ovl_is_impuredir(struct dentry *dentry)
 {
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index f093fcf2b4bd..adccd74162d6 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -347,3 +347,34 @@ bool ovl_test_flag(unsigned long flag, struct inode *inode)
 {
 	return test_bit(flag, &OVL_I(inode)->flags);
 }
+
+/**
+ * Caller must hold a reference to inode to prevent it from being freed while
+ * it is marked inuse.
+ */
+bool ovl_inuse_trylock(struct dentry *dentry)
+{
+	struct inode *inode = d_inode(dentry);
+	bool locked = false;
+
+	spin_lock(&inode->i_lock);
+	if (!(inode->i_state & I_OVL_INUSE)) {
+		inode->i_state |= I_OVL_INUSE;
+		locked = true;
+	}
+	spin_unlock(&inode->i_lock);
+
+	return locked;
+}
+
+void ovl_inuse_unlock(struct dentry *dentry)
+{
+	if (dentry) {
+		struct inode *inode = d_inode(dentry);
+
+		spin_lock(&inode->i_lock);
+		WARN_ON(!(inode->i_state & I_OVL_INUSE));
+		inode->i_state &= ~I_OVL_INUSE;
+		spin_unlock(&inode->i_lock);
+	}
+}
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3e68cabb8457..75a5fafaf096 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1930,6 +1930,9 @@ static inline bool HAS_UNMAPPED_ID(struct inode *inode)
  *			wb stat updates to grab mapping->tree_lock.  See
  *			inode_switch_wb_work_fn() for details.
  *
+ * I_OVL_INUSE		Used by overlayfs to get exclusive ownership on upper
+ *			and work dirs among overlayfs mounts.
+ *
  * Q: What is the difference between I_WILL_FREE and I_FREEING?
  */
 #define I_DIRTY_SYNC		(1 << 0)
@@ -1950,6 +1953,7 @@ static inline bool HAS_UNMAPPED_ID(struct inode *inode)
 #define __I_DIRTY_TIME_EXPIRED	12
 #define I_DIRTY_TIME_EXPIRED	(1 << __I_DIRTY_TIME_EXPIRED)
 #define I_WB_SWITCH		(1 << 13)
+#define I_OVL_INUSE			(1 << 14)
 
 #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES)
 #define I_DIRTY_ALL (I_DIRTY | I_DIRTY_TIME)
-- 
cgit v1.2.3


From 458bc30cec26c2716746ae215ed23773257e417d Mon Sep 17 00:00:00 2001
From: "Reshetova, Elena" <elena.reshetova@intel.com>
Date: Tue, 4 Jul 2017 15:53:01 +0300
Subject: net, atm: convert atm_dev.refcnt from atomic_t to refcount_t

refcount_t type and corresponding API should be
used instead of atomic_t when the variable is used as
a reference counter. This allows to avoid accidental
refcounter overflows that might lead to use-after-free
situations.

Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
Signed-off-by: Hans Liljestrand <ishkamiel@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David Windsor <dwindsor@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/atmdev.h | 7 ++++---
 net/atm/proc.c         | 2 +-
 net/atm/resources.c    | 2 +-
 3 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h
index 4d97a89da066..0ec9bdb1cc9f 100644
--- a/include/linux/atmdev.h
+++ b/include/linux/atmdev.h
@@ -11,6 +11,7 @@
 #include <linux/uio.h>
 #include <net/sock.h>
 #include <linux/atomic.h>
+#include <linux/refcount.h>
 #include <uapi/linux/atmdev.h>
 
 #ifdef CONFIG_PROC_FS
@@ -158,7 +159,7 @@ struct atm_dev {
 	struct k_atm_dev_stats stats;	/* statistics */
 	char		signal;		/* signal status (ATM_PHY_SIG_*) */
 	int		link_rate;	/* link rate (default: OC3) */
-	atomic_t	refcnt;		/* reference count */
+	refcount_t	refcnt;		/* reference count */
 	spinlock_t	lock;		/* protect internal members */
 #ifdef CONFIG_PROC_FS
 	struct proc_dir_entry *proc_entry; /* proc entry */
@@ -261,13 +262,13 @@ static inline int atm_may_send(struct atm_vcc *vcc,unsigned int size)
 
 static inline void atm_dev_hold(struct atm_dev *dev)
 {
-	atomic_inc(&dev->refcnt);
+	refcount_inc(&dev->refcnt);
 }
 
 
 static inline void atm_dev_put(struct atm_dev *dev)
 {
-	if (atomic_dec_and_test(&dev->refcnt)) {
+	if (refcount_dec_and_test(&dev->refcnt)) {
 		BUG_ON(!test_bit(ATM_DF_REMOVED, &dev->flags));
 		if (dev->ops->dev_close)
 			dev->ops->dev_close(dev);
diff --git a/net/atm/proc.c b/net/atm/proc.c
index 27c9c01c537d..4caca2a90ec4 100644
--- a/net/atm/proc.c
+++ b/net/atm/proc.c
@@ -61,7 +61,7 @@ static void atm_dev_info(struct seq_file *seq, const struct atm_dev *dev)
 	add_stats(seq, "0", &dev->stats.aal0);
 	seq_puts(seq, "  ");
 	add_stats(seq, "5", &dev->stats.aal5);
-	seq_printf(seq, "\t[%d]", atomic_read(&dev->refcnt));
+	seq_printf(seq, "\t[%d]", refcount_read(&dev->refcnt));
 	seq_putc(seq, '\n');
 }
 
diff --git a/net/atm/resources.c b/net/atm/resources.c
index 0447d5d0b639..918244757b7d 100644
--- a/net/atm/resources.c
+++ b/net/atm/resources.c
@@ -109,7 +109,7 @@ struct atm_dev *atm_dev_register(const char *type, struct device *parent,
 	else
 		memset(&dev->flags, 0, sizeof(dev->flags));
 	memset(&dev->stats, 0, sizeof(dev->stats));
-	atomic_set(&dev->refcnt, 1);
+	refcount_set(&dev->refcnt, 1);
 
 	if (atm_proc_dev_register(dev) < 0) {
 		pr_err("atm_proc_dev_register failed for dev %s\n", type);
-- 
cgit v1.2.3


From 0fa104726b6cc7b1ebb4c60d55cb6abda745f4b6 Mon Sep 17 00:00:00 2001
From: "Reshetova, Elena" <elena.reshetova@intel.com>
Date: Tue, 4 Jul 2017 15:53:13 +0300
Subject: net, sunrpc: convert gss_cl_ctx.count from atomic_t to refcount_t

refcount_t type and corresponding API should be
used instead of atomic_t when the variable is used as
a reference counter. This allows to avoid accidental
refcounter overflows that might lead to use-after-free
situations.

Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
Signed-off-by: Hans Liljestrand <ishkamiel@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David Windsor <dwindsor@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sunrpc/auth_gss.h | 3 ++-
 net/sunrpc/auth_gss/auth_gss.c  | 6 +++---
 2 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/auth_gss.h b/include/linux/sunrpc/auth_gss.h
index 36eebc451b41..cebdf8745901 100644
--- a/include/linux/sunrpc/auth_gss.h
+++ b/include/linux/sunrpc/auth_gss.h
@@ -13,6 +13,7 @@
 #define _LINUX_SUNRPC_AUTH_GSS_H
 
 #ifdef __KERNEL__
+#include <linux/refcount.h>
 #include <linux/sunrpc/auth.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/sunrpc/gss_api.h>
@@ -65,7 +66,7 @@ struct rpc_gss_init_res {
  * the wire when communicating with a server. */
 
 struct gss_cl_ctx {
-	atomic_t		count;
+	refcount_t		count;
 	enum rpc_gss_proc	gc_proc;
 	u32			gc_seq;
 	spinlock_t		gc_seq_lock;
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 4f16953e4954..72f129c74acd 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -117,14 +117,14 @@ static const struct rpc_pipe_ops gss_upcall_ops_v1;
 static inline struct gss_cl_ctx *
 gss_get_ctx(struct gss_cl_ctx *ctx)
 {
-	atomic_inc(&ctx->count);
+	refcount_inc(&ctx->count);
 	return ctx;
 }
 
 static inline void
 gss_put_ctx(struct gss_cl_ctx *ctx)
 {
-	if (atomic_dec_and_test(&ctx->count))
+	if (refcount_dec_and_test(&ctx->count))
 		gss_free_ctx(ctx);
 }
 
@@ -200,7 +200,7 @@ gss_alloc_context(void)
 		ctx->gc_proc = RPC_GSS_PROC_DATA;
 		ctx->gc_seq = 1;	/* NetApp 6.4R1 doesn't accept seq. no. 0 */
 		spin_lock_init(&ctx->gc_seq_lock);
-		atomic_set(&ctx->count,1);
+		refcount_set(&ctx->count,1);
 	}
 	return ctx;
 }
-- 
cgit v1.2.3


From cd0ae1d395a8bfc208437ce612413e58f5137499 Mon Sep 17 00:00:00 2001
From: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Date: Wed, 21 Jun 2017 19:23:18 +0200
Subject: s390/crash: Remove unused KEXEC_NOTE_BYTES

After commmit 692f66f26a4c19 ("crash: move crashkernel parsing and vmcore
related code under CONFIG_CRASH_CORE") the KEXEC_NOTE_BYTES macro is not
used anymore and for s390 we create the ELF header in the new kernel
anyway. Therefore remove the macro.

Reported-by: Xunlei Pang <xpang@redhat.com>
Reviewed-by: Mikhail Zaslonko <zaslonko@linux.vnet.ibm.com>
Signed-off-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/kexec.h | 18 ------------------
 include/linux/crash_core.h    |  5 +++++
 include/linux/kexec.h         |  9 ---------
 3 files changed, 5 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h
index 2f924bc30e35..dccf24ee26d3 100644
--- a/arch/s390/include/asm/kexec.h
+++ b/arch/s390/include/asm/kexec.h
@@ -41,24 +41,6 @@
 /* The native architecture */
 #define KEXEC_ARCH KEXEC_ARCH_S390
 
-/*
- * Size for s390x ELF notes per CPU
- *
- * Seven notes plus zero note at the end: prstatus, fpregset, timer,
- * tod_cmp, tod_reg, control regs, and prefix
- */
-#define KEXEC_NOTE_BYTES \
-	(ALIGN(sizeof(struct elf_note), 4) * 8 + \
-	 ALIGN(sizeof("CORE"), 4) * 7 + \
-	 ALIGN(sizeof(struct elf_prstatus), 4) + \
-	 ALIGN(sizeof(elf_fpregset_t), 4) + \
-	 ALIGN(sizeof(u64), 4) + \
-	 ALIGN(sizeof(u64), 4) + \
-	 ALIGN(sizeof(u32), 4) + \
-	 ALIGN(sizeof(u64) * 16, 4) + \
-	 ALIGN(sizeof(u32), 4) \
-	)
-
 /* Provide a dummy definition to avoid build failures. */
 static inline void crash_setup_regs(struct pt_regs *newregs,
 					struct pt_regs *oldregs) { }
diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
index 541a197ba4a2..4090a42578a8 100644
--- a/include/linux/crash_core.h
+++ b/include/linux/crash_core.h
@@ -10,6 +10,11 @@
 #define CRASH_CORE_NOTE_NAME_BYTES ALIGN(sizeof(CRASH_CORE_NOTE_NAME), 4)
 #define CRASH_CORE_NOTE_DESC_BYTES ALIGN(sizeof(struct elf_prstatus), 4)
 
+/*
+ * The per-cpu notes area is a list of notes terminated by a "NULL"
+ * note header.  For kdump, the code in vmcore.c runs in the context
+ * of the second kernel to combine them into one note.
+ */
 #define CRASH_CORE_NOTE_BYTES	   ((CRASH_CORE_NOTE_HEAD_BYTES * 2) +	\
 				     CRASH_CORE_NOTE_NAME_BYTES +	\
 				     CRASH_CORE_NOTE_DESC_BYTES)
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index c9481ebcbc0c..65888418fb69 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -62,15 +62,6 @@
 
 #define KEXEC_CORE_NOTE_NAME	CRASH_CORE_NOTE_NAME
 
-/*
- * The per-cpu notes area is a list of notes terminated by a "NULL"
- * note header.  For kdump, the code in vmcore.c runs in the context
- * of the second kernel to combine them into one note.
- */
-#ifndef KEXEC_NOTE_BYTES
-#define KEXEC_NOTE_BYTES	CRASH_CORE_NOTE_BYTES
-#endif
-
 /*
  * This structure is used to hold the arguments that are used when loading
  * kernel binaries.
-- 
cgit v1.2.3


From 1c3eda01a79b8e9237d91c52c5a75b20983f47c6 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 29 Jun 2017 19:15:07 +0200
Subject: vtime, sched/cputime: Remove vtime_account_user()

It's an unnecessary function between vtime_user_exit() and
account_user_time().

Tested-by: Luiz Capitulino <lcapitulino@redhat.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wanpeng Li <kernellwp@gmail.com>
Link: http://lkml.kernel.org/r/1498756511-11714-2-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/vtime.h  |  9 +--------
 kernel/sched/cputime.c | 12 ++++++------
 2 files changed, 7 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/vtime.h b/include/linux/vtime.h
index 0681fe25abeb..18b405e3cd93 100644
--- a/include/linux/vtime.h
+++ b/include/linux/vtime.h
@@ -67,19 +67,12 @@ static inline void vtime_account_system(struct task_struct *tsk) { }
 
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
 extern void arch_vtime_task_switch(struct task_struct *tsk);
-extern void vtime_account_user(struct task_struct *tsk);
 extern void vtime_user_enter(struct task_struct *tsk);
-
-static inline void vtime_user_exit(struct task_struct *tsk)
-{
-	vtime_account_user(tsk);
-}
-
+extern void vtime_user_exit(struct task_struct *tsk);
 extern void vtime_guest_enter(struct task_struct *tsk);
 extern void vtime_guest_exit(struct task_struct *tsk);
 extern void vtime_init_idle(struct task_struct *tsk, int cpu);
 #else /* !CONFIG_VIRT_CPU_ACCOUNTING_GEN  */
-static inline void vtime_account_user(struct task_struct *tsk) { }
 static inline void vtime_user_enter(struct task_struct *tsk) { }
 static inline void vtime_user_exit(struct task_struct *tsk) { }
 static inline void vtime_guest_enter(struct task_struct *tsk) { }
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 84a419bdf5aa..5adc896d0f64 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -724,21 +724,21 @@ void vtime_account_system(struct task_struct *tsk)
 	write_seqcount_end(&tsk->vtime_seqcount);
 }
 
-void vtime_account_user(struct task_struct *tsk)
+void vtime_user_enter(struct task_struct *tsk)
 {
 	write_seqcount_begin(&tsk->vtime_seqcount);
-	tsk->vtime_snap_whence = VTIME_SYS;
 	if (vtime_delta(tsk))
-		account_user_time(tsk, get_vtime_delta(tsk));
+		__vtime_account_system(tsk);
+	tsk->vtime_snap_whence = VTIME_USER;
 	write_seqcount_end(&tsk->vtime_seqcount);
 }
 
-void vtime_user_enter(struct task_struct *tsk)
+void vtime_user_exit(struct task_struct *tsk)
 {
 	write_seqcount_begin(&tsk->vtime_seqcount);
+	tsk->vtime_snap_whence = VTIME_SYS;
 	if (vtime_delta(tsk))
-		__vtime_account_system(tsk);
-	tsk->vtime_snap_whence = VTIME_USER;
+		account_user_time(tsk, get_vtime_delta(tsk));
 	write_seqcount_end(&tsk->vtime_seqcount);
 }
 
-- 
cgit v1.2.3


From 60a9ce57e7c5ac1df3a39fb941022bbfa40c0862 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 29 Jun 2017 19:15:09 +0200
Subject: sched/cputime: Rename vtime fields

The current "snapshot" based naming on vtime fields suggests we record
some past event but that's a low level picture of their actual purpose
which comes out blurry. The real point of these fields is to run a basic
state machine that tracks down cputime entry while switching between
contexts.

So lets reflect that with more meaningful names.

Tested-by: Luiz Capitulino <lcapitulino@redhat.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wanpeng Li <kernellwp@gmail.com>
Link: http://lkml.kernel.org/r/1498756511-11714-4-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/init_task.h |  4 ++--
 include/linux/sched.h     |  4 ++--
 kernel/fork.c             |  4 ++--
 kernel/sched/cputime.c    | 30 +++++++++++++++---------------
 4 files changed, 21 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index e049526bc188..3d537331cd4e 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -171,8 +171,8 @@ extern struct cred init_cred;
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
 # define INIT_VTIME(tsk)						\
 	.vtime_seqcount = SEQCNT_ZERO(tsk.vtime_seqcount),	\
-	.vtime_snap = 0,				\
-	.vtime_snap_whence = VTIME_SYS,
+	.vtime_starttime = 0,				\
+	.vtime_state = VTIME_SYS,
 #else
 # define INIT_VTIME(tsk)
 #endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9c4ca7433d9d..ff001646549e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -689,7 +689,7 @@ struct task_struct {
 	struct prev_cputime		prev_cputime;
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
 	seqcount_t			vtime_seqcount;
-	unsigned long long		vtime_snap;
+	unsigned long long		vtime_starttime;
 	enum {
 		/* Task is sleeping or running in a CPU with VTIME inactive: */
 		VTIME_INACTIVE = 0,
@@ -697,7 +697,7 @@ struct task_struct {
 		VTIME_USER,
 		/* Task runs in kernelspace in a CPU with VTIME active: */
 		VTIME_SYS,
-	} vtime_snap_whence;
+	} vtime_state;
 #endif
 
 #ifdef CONFIG_NO_HZ_FULL
diff --git a/kernel/fork.c b/kernel/fork.c
index e53770d2bf95..83c4f9bf3e14 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1638,8 +1638,8 @@ static __latent_entropy struct task_struct *copy_process(
 
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
 	seqcount_init(&p->vtime_seqcount);
-	p->vtime_snap = 0;
-	p->vtime_snap_whence = VTIME_INACTIVE;
+	p->vtime_starttime = 0;
+	p->vtime_state = VTIME_INACTIVE;
 #endif
 
 #if defined(SPLIT_RSS_COUNTING)
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index ab68927e8e94..8c64753067c5 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -683,10 +683,10 @@ static u64 vtime_delta(struct task_struct *tsk)
 {
 	unsigned long now = READ_ONCE(jiffies);
 
-	if (time_before(now, (unsigned long)tsk->vtime_snap))
+	if (time_before(now, (unsigned long)tsk->vtime_starttime))
 		return 0;
 
-	return jiffies_to_nsecs(now - tsk->vtime_snap);
+	return jiffies_to_nsecs(now - tsk->vtime_starttime);
 }
 
 static u64 get_vtime_delta(struct task_struct *tsk)
@@ -701,10 +701,10 @@ static u64 get_vtime_delta(struct task_struct *tsk)
 	 * elapsed time. Limit account_other_time to prevent rounding
 	 * errors from causing elapsed vtime to go negative.
 	 */
-	delta = jiffies_to_nsecs(now - tsk->vtime_snap);
+	delta = jiffies_to_nsecs(now - tsk->vtime_starttime);
 	other = account_other_time(delta);
-	WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_INACTIVE);
-	tsk->vtime_snap = now;
+	WARN_ON_ONCE(tsk->vtime_state == VTIME_INACTIVE);
+	tsk->vtime_starttime = now;
 
 	return delta - other;
 }
@@ -746,7 +746,7 @@ void vtime_guest_enter(struct task_struct *tsk)
 {
 	/*
 	 * The flags must be updated under the lock with
-	 * the vtime_snap flush and update.
+	 * the vtime_starttime flush and update.
 	 * That enforces a right ordering and update sequence
 	 * synchronization against the reader (task_gtime())
 	 * that can thus safely catch up with a tickless delta.
@@ -776,12 +776,12 @@ void vtime_account_idle(struct task_struct *tsk)
 void arch_vtime_task_switch(struct task_struct *prev)
 {
 	write_seqcount_begin(&prev->vtime_seqcount);
-	prev->vtime_snap_whence = VTIME_INACTIVE;
+	prev->vtime_state = VTIME_INACTIVE;
 	write_seqcount_end(&prev->vtime_seqcount);
 
 	write_seqcount_begin(&current->vtime_seqcount);
-	current->vtime_snap_whence = VTIME_SYS;
-	current->vtime_snap = jiffies;
+	current->vtime_state = VTIME_SYS;
+	current->vtime_starttime = jiffies;
 	write_seqcount_end(&current->vtime_seqcount);
 }
 
@@ -791,8 +791,8 @@ void vtime_init_idle(struct task_struct *t, int cpu)
 
 	local_irq_save(flags);
 	write_seqcount_begin(&t->vtime_seqcount);
-	t->vtime_snap_whence = VTIME_SYS;
-	t->vtime_snap = jiffies;
+	t->vtime_state = VTIME_SYS;
+	t->vtime_starttime = jiffies;
 	write_seqcount_end(&t->vtime_seqcount);
 	local_irq_restore(flags);
 }
@@ -809,7 +809,7 @@ u64 task_gtime(struct task_struct *t)
 		seq = read_seqcount_begin(&t->vtime_seqcount);
 
 		gtime = t->gtime;
-		if (t->vtime_snap_whence == VTIME_SYS && t->flags & PF_VCPU)
+		if (t->vtime_state == VTIME_SYS && t->flags & PF_VCPU)
 			gtime += vtime_delta(t);
 
 	} while (read_seqcount_retry(&t->vtime_seqcount, seq));
@@ -840,7 +840,7 @@ void task_cputime(struct task_struct *t, u64 *utime, u64 *stime)
 		*stime = t->stime;
 
 		/* Task is sleeping, nothing to add */
-		if (t->vtime_snap_whence == VTIME_INACTIVE || is_idle_task(t))
+		if (t->vtime_state == VTIME_INACTIVE || is_idle_task(t))
 			continue;
 
 		delta = vtime_delta(t);
@@ -849,9 +849,9 @@ void task_cputime(struct task_struct *t, u64 *utime, u64 *stime)
 		 * Task runs either in user or kernel space, add pending nohz time to
 		 * the right place.
 		 */
-		if (t->vtime_snap_whence == VTIME_USER || t->flags & PF_VCPU)
+		if (t->vtime_state == VTIME_USER || t->flags & PF_VCPU)
 			*utime += delta;
-		else if (t->vtime_snap_whence == VTIME_SYS)
+		else if (t->vtime_state == VTIME_SYS)
 			*stime += delta;
 	} while (read_seqcount_retry(&t->vtime_seqcount, seq));
 }
-- 
cgit v1.2.3


From bac5b6b6b11560f323e71d0ebac4061cfe5f56c0 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 29 Jun 2017 19:15:10 +0200
Subject: sched/cputime: Move the vtime task fields to their own struct

We are about to add vtime accumulation fields to the task struct. Let's
avoid more bloatification and gather vtime information to their own
struct.

Tested-by: Luiz Capitulino <lcapitulino@redhat.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wanpeng Li <kernellwp@gmail.com>
Link: http://lkml.kernel.org/r/1498756511-11714-5-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/init_task.h |   6 +--
 include/linux/sched.h     |  26 ++++++-----
 kernel/fork.c             |   6 +--
 kernel/sched/cputime.c    | 112 ++++++++++++++++++++++++++--------------------
 4 files changed, 86 insertions(+), 64 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 3d537331cd4e..a2f6707e9fc0 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -170,9 +170,9 @@ extern struct cred init_cred;
 
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
 # define INIT_VTIME(tsk)						\
-	.vtime_seqcount = SEQCNT_ZERO(tsk.vtime_seqcount),	\
-	.vtime_starttime = 0,				\
-	.vtime_state = VTIME_SYS,
+	.vtime.seqcount = SEQCNT_ZERO(tsk.vtime.seqcount),		\
+	.vtime.starttime = 0,						\
+	.vtime.state = VTIME_SYS,
 #else
 # define INIT_VTIME(tsk)
 #endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ff001646549e..eeff8a024f0c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -223,6 +223,21 @@ struct task_cputime {
 #define prof_exp			stime
 #define sched_exp			sum_exec_runtime
 
+enum vtime_state {
+	/* Task is sleeping or running in a CPU with VTIME inactive: */
+	VTIME_INACTIVE = 0,
+	/* Task runs in userspace in a CPU with VTIME active: */
+	VTIME_USER,
+	/* Task runs in kernelspace in a CPU with VTIME active: */
+	VTIME_SYS,
+};
+
+struct vtime {
+	seqcount_t		seqcount;
+	unsigned long long	starttime;
+	enum vtime_state	state;
+};
+
 struct sched_info {
 #ifdef CONFIG_SCHED_INFO
 	/* Cumulative counters: */
@@ -688,16 +703,7 @@ struct task_struct {
 	u64				gtime;
 	struct prev_cputime		prev_cputime;
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
-	seqcount_t			vtime_seqcount;
-	unsigned long long		vtime_starttime;
-	enum {
-		/* Task is sleeping or running in a CPU with VTIME inactive: */
-		VTIME_INACTIVE = 0,
-		/* Task runs in userspace in a CPU with VTIME active: */
-		VTIME_USER,
-		/* Task runs in kernelspace in a CPU with VTIME active: */
-		VTIME_SYS,
-	} vtime_state;
+	struct vtime			vtime;
 #endif
 
 #ifdef CONFIG_NO_HZ_FULL
diff --git a/kernel/fork.c b/kernel/fork.c
index 83c4f9bf3e14..d927ec11aa7a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1637,9 +1637,9 @@ static __latent_entropy struct task_struct *copy_process(
 	prev_cputime_init(&p->prev_cputime);
 
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
-	seqcount_init(&p->vtime_seqcount);
-	p->vtime_starttime = 0;
-	p->vtime_state = VTIME_INACTIVE;
+	seqcount_init(&p->vtime.seqcount);
+	p->vtime.starttime = 0;
+	p->vtime.state = VTIME_INACTIVE;
 #endif
 
 #if defined(SPLIT_RSS_COUNTING)
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 8c64753067c5..9ee725edcbe0 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -679,17 +679,17 @@ void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st)
 #endif /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
-static u64 vtime_delta(struct task_struct *tsk)
+static u64 vtime_delta(struct vtime *vtime)
 {
 	unsigned long now = READ_ONCE(jiffies);
 
-	if (time_before(now, (unsigned long)tsk->vtime_starttime))
+	if (time_before(now, (unsigned long)vtime->starttime))
 		return 0;
 
-	return jiffies_to_nsecs(now - tsk->vtime_starttime);
+	return jiffies_to_nsecs(now - vtime->starttime);
 }
 
-static u64 get_vtime_delta(struct task_struct *tsk)
+static u64 get_vtime_delta(struct vtime *vtime)
 {
 	unsigned long now = READ_ONCE(jiffies);
 	u64 delta, other;
@@ -701,49 +701,56 @@ static u64 get_vtime_delta(struct task_struct *tsk)
 	 * elapsed time. Limit account_other_time to prevent rounding
 	 * errors from causing elapsed vtime to go negative.
 	 */
-	delta = jiffies_to_nsecs(now - tsk->vtime_starttime);
+	delta = jiffies_to_nsecs(now - vtime->starttime);
 	other = account_other_time(delta);
-	WARN_ON_ONCE(tsk->vtime_state == VTIME_INACTIVE);
-	tsk->vtime_starttime = now;
+	WARN_ON_ONCE(vtime->state == VTIME_INACTIVE);
+	vtime->starttime = now;
 
 	return delta - other;
 }
 
 static void __vtime_account_system(struct task_struct *tsk)
 {
-	account_system_time(tsk, irq_count(), get_vtime_delta(tsk));
+	account_system_time(tsk, irq_count(), get_vtime_delta(&tsk->vtime));
 }
 
 void vtime_account_system(struct task_struct *tsk)
 {
-	if (!vtime_delta(tsk))
+	struct vtime *vtime = &tsk->vtime;
+
+	if (!vtime_delta(vtime))
 		return;
 
-	write_seqcount_begin(&tsk->vtime_seqcount);
+	write_seqcount_begin(&vtime->seqcount);
 	__vtime_account_system(tsk);
-	write_seqcount_end(&tsk->vtime_seqcount);
+	write_seqcount_end(&vtime->seqcount);
 }
 
 void vtime_user_enter(struct task_struct *tsk)
 {
-	write_seqcount_begin(&tsk->vtime_seqcount);
-	if (vtime_delta(tsk))
+	struct vtime *vtime = &tsk->vtime;
+
+	write_seqcount_begin(&vtime->seqcount);
+	if (vtime_delta(vtime))
 		__vtime_account_system(tsk);
-	tsk->vtime_snap_whence = VTIME_USER;
-	write_seqcount_end(&tsk->vtime_seqcount);
+	vtime->state = VTIME_USER;
+	write_seqcount_end(&vtime->seqcount);
 }
 
 void vtime_user_exit(struct task_struct *tsk)
 {
-	write_seqcount_begin(&tsk->vtime_seqcount);
-	if (vtime_delta(tsk))
-		account_user_time(tsk, get_vtime_delta(tsk));
-	tsk->vtime_snap_whence = VTIME_SYS;
-	write_seqcount_end(&tsk->vtime_seqcount);
+	struct vtime *vtime = &tsk->vtime;
+
+	write_seqcount_begin(&vtime->seqcount);
+	if (vtime_delta(vtime))
+		account_user_time(tsk, get_vtime_delta(vtime));
+	vtime->state = VTIME_SYS;
+	write_seqcount_end(&vtime->seqcount);
 }
 
 void vtime_guest_enter(struct task_struct *tsk)
 {
+	struct vtime *vtime = &tsk->vtime;
 	/*
 	 * The flags must be updated under the lock with
 	 * the vtime_starttime flush and update.
@@ -751,54 +758,62 @@ void vtime_guest_enter(struct task_struct *tsk)
 	 * synchronization against the reader (task_gtime())
 	 * that can thus safely catch up with a tickless delta.
 	 */
-	write_seqcount_begin(&tsk->vtime_seqcount);
-	if (vtime_delta(tsk))
+	write_seqcount_begin(&vtime->seqcount);
+	if (vtime_delta(vtime))
 		__vtime_account_system(tsk);
 	current->flags |= PF_VCPU;
-	write_seqcount_end(&tsk->vtime_seqcount);
+	write_seqcount_end(&vtime->seqcount);
 }
 EXPORT_SYMBOL_GPL(vtime_guest_enter);
 
 void vtime_guest_exit(struct task_struct *tsk)
 {
-	write_seqcount_begin(&tsk->vtime_seqcount);
+	struct vtime *vtime = &tsk->vtime;
+
+	write_seqcount_begin(&vtime->seqcount);
 	__vtime_account_system(tsk);
 	current->flags &= ~PF_VCPU;
-	write_seqcount_end(&tsk->vtime_seqcount);
+	write_seqcount_end(&vtime->seqcount);
 }
 EXPORT_SYMBOL_GPL(vtime_guest_exit);
 
 void vtime_account_idle(struct task_struct *tsk)
 {
-	account_idle_time(get_vtime_delta(tsk));
+	account_idle_time(get_vtime_delta(&tsk->vtime));
 }
 
 void arch_vtime_task_switch(struct task_struct *prev)
 {
-	write_seqcount_begin(&prev->vtime_seqcount);
-	prev->vtime_state = VTIME_INACTIVE;
-	write_seqcount_end(&prev->vtime_seqcount);
+	struct vtime *vtime = &prev->vtime;
 
-	write_seqcount_begin(&current->vtime_seqcount);
-	current->vtime_state = VTIME_SYS;
-	current->vtime_starttime = jiffies;
-	write_seqcount_end(&current->vtime_seqcount);
+	write_seqcount_begin(&vtime->seqcount);
+	vtime->state = VTIME_INACTIVE;
+	write_seqcount_end(&vtime->seqcount);
+
+	vtime = &current->vtime;
+
+	write_seqcount_begin(&vtime->seqcount);
+	vtime->state = VTIME_SYS;
+	vtime->starttime = jiffies;
+	write_seqcount_end(&vtime->seqcount);
 }
 
 void vtime_init_idle(struct task_struct *t, int cpu)
 {
+	struct vtime *vtime = &t->vtime;
 	unsigned long flags;
 
 	local_irq_save(flags);
-	write_seqcount_begin(&t->vtime_seqcount);
-	t->vtime_state = VTIME_SYS;
-	t->vtime_starttime = jiffies;
-	write_seqcount_end(&t->vtime_seqcount);
+	write_seqcount_begin(&vtime->seqcount);
+	vtime->state = VTIME_SYS;
+	vtime->starttime = jiffies;
+	write_seqcount_end(&vtime->seqcount);
 	local_irq_restore(flags);
 }
 
 u64 task_gtime(struct task_struct *t)
 {
+	struct vtime *vtime = &t->vtime;
 	unsigned int seq;
 	u64 gtime;
 
@@ -806,13 +821,13 @@ u64 task_gtime(struct task_struct *t)
 		return t->gtime;
 
 	do {
-		seq = read_seqcount_begin(&t->vtime_seqcount);
+		seq = read_seqcount_begin(&vtime->seqcount);
 
 		gtime = t->gtime;
-		if (t->vtime_state == VTIME_SYS && t->flags & PF_VCPU)
-			gtime += vtime_delta(t);
+		if (vtime->state == VTIME_SYS && t->flags & PF_VCPU)
+			gtime += vtime_delta(vtime);
 
-	} while (read_seqcount_retry(&t->vtime_seqcount, seq));
+	} while (read_seqcount_retry(&vtime->seqcount, seq));
 
 	return gtime;
 }
@@ -824,8 +839,9 @@ u64 task_gtime(struct task_struct *t)
  */
 void task_cputime(struct task_struct *t, u64 *utime, u64 *stime)
 {
-	u64 delta;
+	struct vtime *vtime = &t->vtime;
 	unsigned int seq;
+	u64 delta;
 
 	if (!vtime_accounting_enabled()) {
 		*utime = t->utime;
@@ -834,25 +850,25 @@ void task_cputime(struct task_struct *t, u64 *utime, u64 *stime)
 	}
 
 	do {
-		seq = read_seqcount_begin(&t->vtime_seqcount);
+		seq = read_seqcount_begin(&vtime->seqcount);
 
 		*utime = t->utime;
 		*stime = t->stime;
 
 		/* Task is sleeping, nothing to add */
-		if (t->vtime_state == VTIME_INACTIVE || is_idle_task(t))
+		if (vtime->state == VTIME_INACTIVE || is_idle_task(t))
 			continue;
 
-		delta = vtime_delta(t);
+		delta = vtime_delta(vtime);
 
 		/*
 		 * Task runs either in user or kernel space, add pending nohz time to
 		 * the right place.
 		 */
-		if (t->vtime_state == VTIME_USER || t->flags & PF_VCPU)
+		if (vtime->state == VTIME_USER || t->flags & PF_VCPU)
 			*utime += delta;
-		else if (t->vtime_state == VTIME_SYS)
+		else if (vtime->state == VTIME_SYS)
 			*stime += delta;
-	} while (read_seqcount_retry(&t->vtime_seqcount, seq));
+	} while (read_seqcount_retry(&vtime->seqcount, seq));
 }
 #endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
-- 
cgit v1.2.3


From 2a42eb9594a1480b4ead9e036e06ee1290e5fa6d Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpeng.li@hotmail.com>
Date: Thu, 29 Jun 2017 19:15:11 +0200
Subject: sched/cputime: Accumulate vtime on top of nsec clocksource

Currently the cputime source used by vtime is jiffies. When we cross
a context boundary and jiffies have changed since the last snapshot, the
pending cputime is accounted to the switching out context.

This system works ok if the ticks are not aligned across CPUs. If they
instead are aligned (ie: all fire at the same time) and the CPUs run in
userspace, the jiffies change is only observed on tick exit and therefore
the user cputime is accounted as system cputime. This is because the
CPU that maintains timekeeping fires its tick at the same time as the
others. It updates jiffies in the middle of the tick and the other CPUs
see that update on IRQ exit:

    CPU 0 (timekeeper)                  CPU 1
    -------------------              -------------
                      jiffies = N
    ...                              run in userspace for a jiffy
    tick entry                       tick entry (sees jiffies = N)
    set jiffies = N + 1
    tick exit                        tick exit (sees jiffies = N + 1)
                                                account 1 jiffy as stime

Fix this with using a nanosec clock source instead of jiffies. The
cputime is then accumulated and flushed everytime the pending delta
reaches a jiffy in order to mitigate the accounting overhead.

[ fweisbec: changelog, rebase on struct vtime, field renames, add delta
  on cputime readers, keep idle vtime as-is (low overhead accounting),
  harmonize clock sources. ]

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Reported-by: Luiz Capitulino <lcapitulino@redhat.com>
Tested-by: Luiz Capitulino <lcapitulino@redhat.com>
Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wanpeng Li <kernellwp@gmail.com>
Link: http://lkml.kernel.org/r/1498756511-11714-6-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h  |  3 +++
 kernel/sched/cputime.c | 64 +++++++++++++++++++++++++++++++++-----------------
 2 files changed, 45 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index eeff8a024f0c..4818126c5153 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -236,6 +236,9 @@ struct vtime {
 	seqcount_t		seqcount;
 	unsigned long long	starttime;
 	enum vtime_state	state;
+	u64			utime;
+	u64			stime;
+	u64			gtime;
 };
 
 struct sched_info {
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 9ee725edcbe0..6e3ea4ac1bda 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -681,18 +681,19 @@ void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st)
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
 static u64 vtime_delta(struct vtime *vtime)
 {
-	unsigned long now = READ_ONCE(jiffies);
+	unsigned long long clock;
 
-	if (time_before(now, (unsigned long)vtime->starttime))
+	clock = sched_clock_cpu(smp_processor_id());
+	if (clock < vtime->starttime)
 		return 0;
 
-	return jiffies_to_nsecs(now - vtime->starttime);
+	return clock - vtime->starttime;
 }
 
 static u64 get_vtime_delta(struct vtime *vtime)
 {
-	unsigned long now = READ_ONCE(jiffies);
-	u64 delta, other;
+	u64 delta = vtime_delta(vtime);
+	u64 other;
 
 	/*
 	 * Unlike tick based timing, vtime based timing never has lost
@@ -701,17 +702,31 @@ static u64 get_vtime_delta(struct vtime *vtime)
 	 * elapsed time. Limit account_other_time to prevent rounding
 	 * errors from causing elapsed vtime to go negative.
 	 */
-	delta = jiffies_to_nsecs(now - vtime->starttime);
 	other = account_other_time(delta);
 	WARN_ON_ONCE(vtime->state == VTIME_INACTIVE);
-	vtime->starttime = now;
+	vtime->starttime += delta;
 
 	return delta - other;
 }
 
-static void __vtime_account_system(struct task_struct *tsk)
+static void __vtime_account_system(struct task_struct *tsk,
+				   struct vtime *vtime)
 {
-	account_system_time(tsk, irq_count(), get_vtime_delta(&tsk->vtime));
+	vtime->stime += get_vtime_delta(vtime);
+	if (vtime->stime >= TICK_NSEC) {
+		account_system_time(tsk, irq_count(), vtime->stime);
+		vtime->stime = 0;
+	}
+}
+
+static void vtime_account_guest(struct task_struct *tsk,
+				struct vtime *vtime)
+{
+	vtime->gtime += get_vtime_delta(vtime);
+	if (vtime->gtime >= TICK_NSEC) {
+		account_guest_time(tsk, vtime->gtime);
+		vtime->gtime = 0;
+	}
 }
 
 void vtime_account_system(struct task_struct *tsk)
@@ -722,7 +737,11 @@ void vtime_account_system(struct task_struct *tsk)
 		return;
 
 	write_seqcount_begin(&vtime->seqcount);
-	__vtime_account_system(tsk);
+	/* We might have scheduled out from guest path */
+	if (current->flags & PF_VCPU)
+		vtime_account_guest(tsk, vtime);
+	else
+		__vtime_account_system(tsk, vtime);
 	write_seqcount_end(&vtime->seqcount);
 }
 
@@ -731,8 +750,7 @@ void vtime_user_enter(struct task_struct *tsk)
 	struct vtime *vtime = &tsk->vtime;
 
 	write_seqcount_begin(&vtime->seqcount);
-	if (vtime_delta(vtime))
-		__vtime_account_system(tsk);
+	__vtime_account_system(tsk, vtime);
 	vtime->state = VTIME_USER;
 	write_seqcount_end(&vtime->seqcount);
 }
@@ -742,8 +760,11 @@ void vtime_user_exit(struct task_struct *tsk)
 	struct vtime *vtime = &tsk->vtime;
 
 	write_seqcount_begin(&vtime->seqcount);
-	if (vtime_delta(vtime))
-		account_user_time(tsk, get_vtime_delta(vtime));
+	vtime->utime += get_vtime_delta(vtime);
+	if (vtime->utime >= TICK_NSEC) {
+		account_user_time(tsk, vtime->utime);
+		vtime->utime = 0;
+	}
 	vtime->state = VTIME_SYS;
 	write_seqcount_end(&vtime->seqcount);
 }
@@ -759,8 +780,7 @@ void vtime_guest_enter(struct task_struct *tsk)
 	 * that can thus safely catch up with a tickless delta.
 	 */
 	write_seqcount_begin(&vtime->seqcount);
-	if (vtime_delta(vtime))
-		__vtime_account_system(tsk);
+	__vtime_account_system(tsk, vtime);
 	current->flags |= PF_VCPU;
 	write_seqcount_end(&vtime->seqcount);
 }
@@ -771,7 +791,7 @@ void vtime_guest_exit(struct task_struct *tsk)
 	struct vtime *vtime = &tsk->vtime;
 
 	write_seqcount_begin(&vtime->seqcount);
-	__vtime_account_system(tsk);
+	vtime_account_guest(tsk, vtime);
 	current->flags &= ~PF_VCPU;
 	write_seqcount_end(&vtime->seqcount);
 }
@@ -794,7 +814,7 @@ void arch_vtime_task_switch(struct task_struct *prev)
 
 	write_seqcount_begin(&vtime->seqcount);
 	vtime->state = VTIME_SYS;
-	vtime->starttime = jiffies;
+	vtime->starttime = sched_clock_cpu(smp_processor_id());
 	write_seqcount_end(&vtime->seqcount);
 }
 
@@ -806,7 +826,7 @@ void vtime_init_idle(struct task_struct *t, int cpu)
 	local_irq_save(flags);
 	write_seqcount_begin(&vtime->seqcount);
 	vtime->state = VTIME_SYS;
-	vtime->starttime = jiffies;
+	vtime->starttime = sched_clock_cpu(cpu);
 	write_seqcount_end(&vtime->seqcount);
 	local_irq_restore(flags);
 }
@@ -825,7 +845,7 @@ u64 task_gtime(struct task_struct *t)
 
 		gtime = t->gtime;
 		if (vtime->state == VTIME_SYS && t->flags & PF_VCPU)
-			gtime += vtime_delta(vtime);
+			gtime += vtime->gtime + vtime_delta(vtime);
 
 	} while (read_seqcount_retry(&vtime->seqcount, seq));
 
@@ -866,9 +886,9 @@ void task_cputime(struct task_struct *t, u64 *utime, u64 *stime)
 		 * the right place.
 		 */
 		if (vtime->state == VTIME_USER || t->flags & PF_VCPU)
-			*utime += delta;
+			*utime += vtime->utime + delta;
 		else if (vtime->state == VTIME_SYS)
-			*stime += delta;
+			*stime += vtime->stime + delta;
 	} while (read_seqcount_retry(&vtime->seqcount, seq));
 }
 #endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
-- 
cgit v1.2.3


From 86d35afb8e07d99f8bfba4eadf93d918b4741f66 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 5 Jul 2017 10:14:38 +0200
Subject: MAINTAINERS: Add Frederic Weisbecker as nohz/dyntics maintainer

Frederic has been improving and maintaining the nohz/dynticks kernel features
for years, so make his de facto maintainership official.

Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 MAINTAINERS                | 14 ++++++++++++--
 include/linux/sched/nohz.h |  2 +-
 2 files changed, 13 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index d357695ee4fe..a7be0d553dba 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6034,7 +6034,7 @@ F:	drivers/hid/hid-sensor-*
 F:	drivers/iio/*/hid-*
 F:	include/linux/hid-sensor-*
 
-HIGH-RESOLUTION TIMERS, CLOCKEVENTS, DYNTICKS
+HIGH-RESOLUTION TIMERS, CLOCKEVENTS
 M:	Thomas Gleixner <tglx@linutronix.de>
 L:	linux-kernel@vger.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/core
@@ -6042,7 +6042,6 @@ S:	Maintained
 F:	Documentation/timers/
 F:	kernel/time/hrtimer.c
 F:	kernel/time/clockevents.c
-F:	kernel/time/tick*.*
 F:	kernel/time/timer_*.c
 F:	include/linux/clockchips.h
 F:	include/linux/hrtimer.h
@@ -9090,6 +9089,17 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/lftan/nios2.git
 S:	Maintained
 F:	arch/nios2/
 
+NOHZ, DYNTICKS SUPPORT
+M:	Frederic Weisbecker <fweisbec@gmail.com>
+M:	Thomas Gleixner <tglx@linutronix.de>
+M:	Ingo Molnar <mingo@kernel.org>
+L:	linux-kernel@vger.kernel.org
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/nohz
+S:	Maintained
+F:	kernel/time/tick*.*
+F:	include/linux/tick.h
+F:	include/linux/sched/nohz.h
+
 NOKIA N900 CAMERA SUPPORT (ET8EK8 SENSOR, AD5820 FOCUS)
 M:	Pavel Machek <pavel@ucw.cz>
 M:	Sakari Ailus <sakari.ailus@iki.fi>
diff --git a/include/linux/sched/nohz.h b/include/linux/sched/nohz.h
index 7d3f75db23e5..028d17b918a7 100644
--- a/include/linux/sched/nohz.h
+++ b/include/linux/sched/nohz.h
@@ -2,7 +2,7 @@
 #define _LINUX_SCHED_NOHZ_H
 
 /*
- * This is the interface between the scheduler and nohz/dyntics:
+ * This is the interface between the scheduler and nohz/dynticks:
  */
 
 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
-- 
cgit v1.2.3


From 2b69c8280c8b29cdeb78b8e92e20ed35f730d319 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Wed, 5 Jul 2017 15:26:48 -0400
Subject: mm: drop "wait" parameter from write_one_page()

The callers all set it to 1.

Also, make it clear that this function will not set any sort of AS_*
error, and that the caller must do so if necessary.  No existing caller
uses this on normal files, so none of them need it.

Also, add __must_check here since, in general, the callers need to handle
an error here in some fashion.

Link: http://lkml.kernel.org/r/20170525103303.6524-1-jlayton@redhat.com
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Matthew Wilcox <mawilcox@microsoft.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/exofs/dir.c        |  2 +-
 fs/ext2/dir.c         |  2 +-
 fs/jfs/jfs_metapage.c |  4 ++--
 fs/minix/dir.c        |  2 +-
 fs/sysv/dir.c         |  2 +-
 fs/ufs/dir.c          |  2 +-
 include/linux/mm.h    |  2 +-
 mm/page-writeback.c   | 14 +++++++-------
 8 files changed, 15 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c
index 8eeb694332fe..98233a97b7b8 100644
--- a/fs/exofs/dir.c
+++ b/fs/exofs/dir.c
@@ -72,7 +72,7 @@ static int exofs_commit_chunk(struct page *page, loff_t pos, unsigned len)
 	set_page_dirty(page);
 
 	if (IS_DIRSYNC(dir))
-		err = write_one_page(page, 1);
+		err = write_one_page(page);
 	else
 		unlock_page(page);
 
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index d9650c9508e4..e2709695b177 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -100,7 +100,7 @@ static int ext2_commit_chunk(struct page *page, loff_t pos, unsigned len)
 	}
 
 	if (IS_DIRSYNC(dir)) {
-		err = write_one_page(page, 1);
+		err = write_one_page(page);
 		if (!err)
 			err = sync_inode_metadata(dir, 1);
 	} else {
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 489aaa1403e5..744fa3c079e6 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -711,7 +711,7 @@ void force_metapage(struct metapage *mp)
 	get_page(page);
 	lock_page(page);
 	set_page_dirty(page);
-	write_one_page(page, 1);
+	write_one_page(page);
 	clear_bit(META_forcewrite, &mp->flag);
 	put_page(page);
 }
@@ -756,7 +756,7 @@ void release_metapage(struct metapage * mp)
 		set_page_dirty(page);
 		if (test_bit(META_sync, &mp->flag)) {
 			clear_bit(META_sync, &mp->flag);
-			write_one_page(page, 1);
+			write_one_page(page);
 			lock_page(page); /* write_one_page unlocks the page */
 		}
 	} else if (mp->lsn)	/* discard_metapage doesn't remove it */
diff --git a/fs/minix/dir.c b/fs/minix/dir.c
index 7edc9b395700..baa9721f1299 100644
--- a/fs/minix/dir.c
+++ b/fs/minix/dir.c
@@ -57,7 +57,7 @@ static int dir_commit_chunk(struct page *page, loff_t pos, unsigned len)
 		mark_inode_dirty(dir);
 	}
 	if (IS_DIRSYNC(dir))
-		err = write_one_page(page, 1);
+		err = write_one_page(page);
 	else
 		unlock_page(page);
 	return err;
diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c
index 5bdae85ceef7..f5191cb2c947 100644
--- a/fs/sysv/dir.c
+++ b/fs/sysv/dir.c
@@ -45,7 +45,7 @@ static int dir_commit_chunk(struct page *page, loff_t pos, unsigned len)
 		mark_inode_dirty(dir);
 	}
 	if (IS_DIRSYNC(dir))
-		err = write_one_page(page, 1);
+		err = write_one_page(page);
 	else
 		unlock_page(page);
 	return err;
diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c
index de01b8f2aa78..48609f1d9580 100644
--- a/fs/ufs/dir.c
+++ b/fs/ufs/dir.c
@@ -53,7 +53,7 @@ static int ufs_commit_chunk(struct page *page, loff_t pos, unsigned len)
 		mark_inode_dirty(dir);
 	}
 	if (IS_DIRSYNC(dir))
-		err = write_one_page(page, 1);
+		err = write_one_page(page);
 	else
 		unlock_page(page);
 	return err;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7cb17c6b97de..ca9c8b27cecb 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2199,7 +2199,7 @@ extern void filemap_map_pages(struct vm_fault *vmf,
 extern int filemap_page_mkwrite(struct vm_fault *vmf);
 
 /* mm/page-writeback.c */
-int write_one_page(struct page *page, int wait);
+int __must_check write_one_page(struct page *page);
 void task_dirty_inc(struct task_struct *tsk);
 
 /* readahead.c */
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 143c1c25d680..b901fe52b153 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2366,15 +2366,16 @@ int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
 }
 
 /**
- * write_one_page - write out a single page and optionally wait on I/O
+ * write_one_page - write out a single page and wait on I/O
  * @page: the page to write
- * @wait: if true, wait on writeout
  *
  * The page must be locked by the caller and will be unlocked upon return.
  *
- * write_one_page() returns a negative error code if I/O failed.
+ * write_one_page() returns a negative error code if I/O failed. Note that
+ * the address_space is not marked for error. The caller must do this if
+ * needed.
  */
-int write_one_page(struct page *page, int wait)
+int write_one_page(struct page *page)
 {
 	struct address_space *mapping = page->mapping;
 	int ret = 0;
@@ -2385,13 +2386,12 @@ int write_one_page(struct page *page, int wait)
 
 	BUG_ON(!PageLocked(page));
 
-	if (wait)
-		wait_on_page_writeback(page);
+	wait_on_page_writeback(page);
 
 	if (clear_page_dirty_for_io(page)) {
 		get_page(page);
 		ret = mapping->a_ops->writepage(page, &wbc);
-		if (ret == 0 && wait) {
+		if (ret == 0) {
 			wait_on_page_writeback(page);
 			if (PageError(page))
 				ret = -EIO;
-- 
cgit v1.2.3


From 0f41074a65757b46acbdd4293f0de8a70b147406 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Wed, 5 Jul 2017 15:26:50 -0400
Subject: fs: remove call_fsync helper function

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
---
 fs/sync.c          | 2 +-
 include/linux/fs.h | 6 ------
 ipc/shm.c          | 2 +-
 3 files changed, 2 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/fs/sync.c b/fs/sync.c
index 11ba023434b1..2a54c1f22035 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -192,7 +192,7 @@ int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync)
 		spin_unlock(&inode->i_lock);
 		mark_inode_dirty_sync(inode);
 	}
-	return call_fsync(file, start, end, datasync);
+	return file->f_op->fsync(file, start, end, datasync);
 }
 EXPORT_SYMBOL(vfs_fsync_range);
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 803e5a9b2654..2d9e71e2a308 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1739,12 +1739,6 @@ static inline int call_mmap(struct file *file, struct vm_area_struct *vma)
 	return file->f_op->mmap(file, vma);
 }
 
-static inline int call_fsync(struct file *file, loff_t start, loff_t end,
-			     int datasync)
-{
-	return file->f_op->fsync(file, start, end, datasync);
-}
-
 ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
 			      unsigned long nr_segs, unsigned long fast_segs,
 			      struct iovec *fast_pointer,
diff --git a/ipc/shm.c b/ipc/shm.c
index 34c4344e8d4b..f45c7959b264 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -452,7 +452,7 @@ static int shm_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 
 	if (!sfd->file->f_op->fsync)
 		return -EINVAL;
-	return call_fsync(sfd->file, start, end, datasync);
+	return sfd->file->f_op->fsync(sfd->file, start, end, datasync);
 }
 
 static long shm_fallocate(struct file *file, int mode, loff_t offset,
-- 
cgit v1.2.3


From af65207c76ce8e6263a3b097ea35365dde9913d0 Mon Sep 17 00:00:00 2001
From: Tahsin Erdogan <tahsin@google.com>
Date: Thu, 6 Jul 2017 00:01:59 -0400
Subject: ext4: fix __ext4_new_inode() journal credits calculation

ea_inode feature allows creating extended attributes that are up to
64k in size. Update __ext4_new_inode() to pick increased credit limits.

To avoid overallocating too many journal credits, update
__ext4_xattr_set_credits() to make a distinction between xattr create
vs update. This helps __ext4_new_inode() because all attributes are
known to be new, so we can save credits that are normally needed to
delete old values.

Also, have fscrypt specify its maximum context size so that we don't
end up allocating credits for 64k size.

Signed-off-by: Tahsin Erdogan <tahsin@google.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/crypto/policy.c             |  1 +
 fs/ext4/acl.c                  | 13 ++++++-----
 fs/ext4/ialloc.c               | 52 ++++++++++++++++++++++++++++++++++++------
 fs/ext4/super.c                |  3 ++-
 fs/ext4/xattr.c                | 46 ++++++++++++++++++++++---------------
 fs/ext4/xattr.h                |  5 +++-
 include/linux/fscrypt_common.h |  3 +++
 7 files changed, 89 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c
index 210976e7a269..94becf5a1519 100644
--- a/fs/crypto/policy.c
+++ b/fs/crypto/policy.c
@@ -260,6 +260,7 @@ int fscrypt_inherit_context(struct inode *parent, struct inode *child,
 	memcpy(ctx.master_key_descriptor, ci->ci_master_key,
 	       FS_KEY_DESCRIPTOR_SIZE);
 	get_random_bytes(ctx.nonce, FS_KEY_DERIVATION_NONCE_SIZE);
+	BUILD_BUG_ON(sizeof(ctx) != FSCRYPT_SET_CONTEXT_MAX_SIZE);
 	res = parent->i_sb->s_cop->set_context(child, &ctx,
 						sizeof(ctx), fs_data);
 	if (res)
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index 8db03e5c78bc..09441ae07a5b 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -183,7 +183,7 @@ ext4_get_acl(struct inode *inode, int type)
  */
 static int
 __ext4_set_acl(handle_t *handle, struct inode *inode, int type,
-	     struct posix_acl *acl)
+	     struct posix_acl *acl, int xattr_flags)
 {
 	int name_index;
 	void *value = NULL;
@@ -218,7 +218,7 @@ __ext4_set_acl(handle_t *handle, struct inode *inode, int type,
 	}
 
 	error = ext4_xattr_set_handle(handle, inode, name_index, "",
-				      value, size, 0);
+				      value, size, xattr_flags);
 
 	kfree(value);
 	if (!error)
@@ -238,7 +238,8 @@ ext4_set_acl(struct inode *inode, struct posix_acl *acl, int type)
 	if (error)
 		return error;
 retry:
-	error = ext4_xattr_set_credits(inode, acl_size, &credits);
+	error = ext4_xattr_set_credits(inode, acl_size, false /* is_create */,
+				       &credits);
 	if (error)
 		return error;
 
@@ -246,7 +247,7 @@ retry:
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 
-	error = __ext4_set_acl(handle, inode, type, acl);
+	error = __ext4_set_acl(handle, inode, type, acl, 0 /* xattr_flags */);
 	ext4_journal_stop(handle);
 	if (error == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
 		goto retry;
@@ -271,13 +272,13 @@ ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
 
 	if (default_acl) {
 		error = __ext4_set_acl(handle, inode, ACL_TYPE_DEFAULT,
-				       default_acl);
+				       default_acl, XATTR_CREATE);
 		posix_acl_release(default_acl);
 	}
 	if (acl) {
 		if (!error)
 			error = __ext4_set_acl(handle, inode, ACL_TYPE_ACCESS,
-					       acl);
+					       acl, XATTR_CREATE);
 		posix_acl_release(acl);
 	}
 	return error;
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 0c79e3efcaf7..507bfb3344d4 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -766,11 +766,13 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
 	if (!dir || !dir->i_nlink)
 		return ERR_PTR(-EPERM);
 
-	if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb))))
+	sb = dir->i_sb;
+	sbi = EXT4_SB(sb);
+
+	if (unlikely(ext4_forced_shutdown(sbi)))
 		return ERR_PTR(-EIO);
 
-	if ((ext4_encrypted_inode(dir) ||
-	     DUMMY_ENCRYPTION_ENABLED(EXT4_SB(dir->i_sb))) &&
+	if ((ext4_encrypted_inode(dir) || DUMMY_ENCRYPTION_ENABLED(sbi)) &&
 	    (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) &&
 	    !(i_flags & EXT4_EA_INODE_FL)) {
 		err = fscrypt_get_encryption_info(dir);
@@ -778,19 +780,55 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
 			return ERR_PTR(err);
 		if (!fscrypt_has_encryption_key(dir))
 			return ERR_PTR(-ENOKEY);
-		if (!handle)
-			nblocks += EXT4_DATA_TRANS_BLOCKS(dir->i_sb);
 		encrypt = 1;
 	}
 
-	sb = dir->i_sb;
+	if (!handle && sbi->s_journal && !(i_flags & EXT4_EA_INODE_FL)) {
+#ifdef CONFIG_EXT4_FS_POSIX_ACL
+		struct posix_acl *p = get_acl(dir, ACL_TYPE_DEFAULT);
+
+		if (p) {
+			int acl_size = p->a_count * sizeof(ext4_acl_entry);
+
+			nblocks += (S_ISDIR(mode) ? 2 : 1) *
+				__ext4_xattr_set_credits(sb, NULL /* inode */,
+					NULL /* block_bh */, acl_size,
+					true /* is_create */);
+			posix_acl_release(p);
+		}
+#endif
+
+#ifdef CONFIG_SECURITY
+		{
+			int num_security_xattrs = 1;
+
+#ifdef CONFIG_INTEGRITY
+			num_security_xattrs++;
+#endif
+			/*
+			 * We assume that security xattrs are never
+			 * more than 1k.  In practice they are under
+			 * 128 bytes.
+			 */
+			nblocks += num_security_xattrs *
+				__ext4_xattr_set_credits(sb, NULL /* inode */,
+					NULL /* block_bh */, 1024,
+					true /* is_create */);
+		}
+#endif
+		if (encrypt)
+			nblocks += __ext4_xattr_set_credits(sb,
+					NULL /* inode */, NULL /* block_bh */,
+					FSCRYPT_SET_CONTEXT_MAX_SIZE,
+					true /* is_create */);
+	}
+
 	ngroups = ext4_get_groups_count(sb);
 	trace_ext4_request_inode(dir, mode);
 	inode = new_inode(sb);
 	if (!inode)
 		return ERR_PTR(-ENOMEM);
 	ei = EXT4_I(inode);
-	sbi = EXT4_SB(sb);
 
 	/*
 	 * Initialize owners and quota early so that we don't have to account
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 56c971807df5..f666042a3d58 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1194,7 +1194,8 @@ static int ext4_set_context(struct inode *inode, const void *ctx, size_t len,
 	if (res)
 		return res;
 retry:
-	res = ext4_xattr_set_credits(inode, len, &credits);
+	res = ext4_xattr_set_credits(inode, len, false /* is_create */,
+				     &credits);
 	if (res)
 		return res;
 
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 34fa37e7744c..cff4f41ced61 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -830,11 +830,10 @@ static void ext4_xattr_inode_free_quota(struct inode *inode, size_t len)
 	dquot_free_inode(inode);
 }
 
-static int __ext4_xattr_set_credits(struct inode *inode,
-				    struct buffer_head *block_bh,
-				    size_t value_len)
+int __ext4_xattr_set_credits(struct super_block *sb, struct inode *inode,
+			     struct buffer_head *block_bh, size_t value_len,
+			     bool is_create)
 {
-	struct super_block *sb = inode->i_sb;
 	int credits;
 	int blocks;
 
@@ -860,7 +859,7 @@ static int __ext4_xattr_set_credits(struct inode *inode,
 	 * In case of inline data, we may push out the data to a block,
 	 * so we need to reserve credits for this eventuality
 	 */
-	if (ext4_has_inline_data(inode))
+	if (inode && ext4_has_inline_data(inode))
 		credits += ext4_writepage_trans_blocks(inode) + 1;
 
 	/* We are done if ea_inode feature is not enabled. */
@@ -882,19 +881,23 @@ static int __ext4_xattr_set_credits(struct inode *inode,
 	/* Blocks themselves. */
 	credits += blocks;
 
-	/* Dereference ea_inode holding old xattr value.
-	 * Old ea_inode, inode map, block bitmap, group descriptor.
-	 */
-	credits += 4;
+	if (!is_create) {
+		/* Dereference ea_inode holding old xattr value.
+		 * Old ea_inode, inode map, block bitmap, group descriptor.
+		 */
+		credits += 4;
 
-	/* Data blocks for old ea_inode. */
-	blocks = XATTR_SIZE_MAX >> sb->s_blocksize_bits;
+		/* Data blocks for old ea_inode. */
+		blocks = XATTR_SIZE_MAX >> sb->s_blocksize_bits;
 
-	/* Indirection block or one level of extent tree for old ea_inode. */
-	blocks += 1;
+		/* Indirection block or one level of extent tree for old
+		 * ea_inode.
+		 */
+		blocks += 1;
 
-	/* Block bitmap and group descriptor updates for each block. */
-	credits += blocks * 2;
+		/* Block bitmap and group descriptor updates for each block. */
+		credits += blocks * 2;
+	}
 
 	/* We may need to clone the existing xattr block in which case we need
 	 * to increment ref counts for existing ea_inodes referenced by it.
@@ -2263,7 +2266,9 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
 			goto cleanup;
 		}
 
-		credits = __ext4_xattr_set_credits(inode, bh, value_len);
+		credits = __ext4_xattr_set_credits(inode->i_sb, inode, bh,
+						   value_len,
+						   flags & XATTR_CREATE);
 		brelse(bh);
 
 		if (!ext4_handle_has_enough_credits(handle, credits)) {
@@ -2370,7 +2375,8 @@ cleanup:
 	return error;
 }
 
-int ext4_xattr_set_credits(struct inode *inode, size_t value_len, int *credits)
+int ext4_xattr_set_credits(struct inode *inode, size_t value_len,
+			   bool is_create, int *credits)
 {
 	struct buffer_head *bh;
 	int err;
@@ -2386,7 +2392,8 @@ int ext4_xattr_set_credits(struct inode *inode, size_t value_len, int *credits)
 	if (IS_ERR(bh)) {
 		err = PTR_ERR(bh);
 	} else {
-		*credits = __ext4_xattr_set_credits(inode, bh, value_len);
+		*credits = __ext4_xattr_set_credits(inode->i_sb, inode, bh,
+						    value_len, is_create);
 		brelse(bh);
 		err = 0;
 	}
@@ -2417,7 +2424,8 @@ ext4_xattr_set(struct inode *inode, int name_index, const char *name,
 		return error;
 
 retry:
-	error = ext4_xattr_set_credits(inode, value_len, &credits);
+	error = ext4_xattr_set_credits(inode, value_len, flags & XATTR_CREATE,
+				       &credits);
 	if (error)
 		return error;
 
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index 26119a67c8c3..0d2dde1fa87a 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -153,7 +153,10 @@ extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t);
 extern int ext4_xattr_set(struct inode *, int, const char *, const void *, size_t, int);
 extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int);
 extern int ext4_xattr_set_credits(struct inode *inode, size_t value_len,
-				  int *credits);
+				  bool is_create, int *credits);
+extern int __ext4_xattr_set_credits(struct super_block *sb, struct inode *inode,
+				struct buffer_head *block_bh, size_t value_len,
+				bool is_create);
 
 extern int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
 				   struct ext4_xattr_inode_array **array,
diff --git a/include/linux/fscrypt_common.h b/include/linux/fscrypt_common.h
index 0a30c106c1e5..82beaf70e7e2 100644
--- a/include/linux/fscrypt_common.h
+++ b/include/linux/fscrypt_common.h
@@ -83,6 +83,9 @@ struct fscrypt_operations {
 	unsigned (*max_namelen)(struct inode *);
 };
 
+/* Maximum value for the third parameter of fscrypt_operations.set_context(). */
+#define FSCRYPT_SET_CONTEXT_MAX_SIZE	28
+
 static inline bool fscrypt_dummy_context_enabled(struct inode *inode)
 {
 	if (inode->i_sb->s_cop->dummy_context &&
-- 
cgit v1.2.3


From f35157417215ec138c920320c746fdb3e04ef1d5 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 4 Jul 2017 17:25:02 +0100
Subject: Provide a function to create a NUL-terminated string from
 unterminated data

Provide a function, kmemdup_nul(), that will create a NUL-terminated string
from an unterminated character array where the length is known in advance.

This is better than kstrndup() in situations where we already know the
string length as the strnlen() in kstrndup() is superfluous.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/string.h |  1 +
 mm/util.c              | 24 ++++++++++++++++++++++++
 2 files changed, 25 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/string.h b/include/linux/string.h
index 537918f8a98e..3dd944cfe171 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -131,6 +131,7 @@ extern char *kstrdup(const char *s, gfp_t gfp) __malloc;
 extern const char *kstrdup_const(const char *s, gfp_t gfp);
 extern char *kstrndup(const char *s, size_t len, gfp_t gfp);
 extern void *kmemdup(const void *src, size_t len, gfp_t gfp);
+extern char *kmemdup_nul(const char *s, size_t len, gfp_t gfp);
 
 extern char **argv_split(gfp_t gfp, const char *str, int *argcp);
 extern void argv_free(char **argv);
diff --git a/mm/util.c b/mm/util.c
index 26be6407abd7..21ddf90f883d 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -83,6 +83,8 @@ EXPORT_SYMBOL(kstrdup_const);
  * @s: the string to duplicate
  * @max: read at most @max chars from @s
  * @gfp: the GFP mask used in the kmalloc() call when allocating memory
+ *
+ * Note: Use kmemdup_nul() instead if the size is known exactly.
  */
 char *kstrndup(const char *s, size_t max, gfp_t gfp)
 {
@@ -120,6 +122,28 @@ void *kmemdup(const void *src, size_t len, gfp_t gfp)
 }
 EXPORT_SYMBOL(kmemdup);
 
+/**
+ * kmemdup_nul - Create a NUL-terminated string from unterminated data
+ * @s: The data to stringify
+ * @len: The size of the data
+ * @gfp: the GFP mask used in the kmalloc() call when allocating memory
+ */
+char *kmemdup_nul(const char *s, size_t len, gfp_t gfp)
+{
+	char *buf;
+
+	if (!s)
+		return NULL;
+
+	buf = kmalloc_track_caller(len + 1, gfp);
+	if (buf) {
+		memcpy(buf, s, len);
+		buf[len] = '\0';
+	}
+	return buf;
+}
+EXPORT_SYMBOL(kmemdup_nul);
+
 /**
  * memdup_user - duplicate memory region from user space
  *
-- 
cgit v1.2.3


From ee416bcdba9975065de571e09de1f7ebfde2156a Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 4 Jul 2017 17:25:16 +0100
Subject: VFS: Make get_filesystem() return the affected filesystem

Make get_filesystem() return a pointer to the filesystem on which it just
got a ref.

Suggested-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/filesystems.c   | 3 ++-
 include/linux/fs.h | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/filesystems.c b/fs/filesystems.c
index cac75547d35c..591e52d23ed4 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -33,9 +33,10 @@ static struct file_system_type *file_systems;
 static DEFINE_RWLOCK(file_systems_lock);
 
 /* WARNING: This can be used only if we _already_ own a reference */
-void get_filesystem(struct file_system_type *fs)
+struct file_system_type *get_filesystem(struct file_system_type *fs)
 {
 	__module_get(fs->owner);
+	return fs;
 }
 
 void put_filesystem(struct file_system_type *fs)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 803e5a9b2654..bc0c054894b9 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2956,7 +2956,7 @@ extern int generic_block_fiemap(struct inode *inode,
 				struct fiemap_extent_info *fieinfo, u64 start,
 				u64 len, get_block_t *get_block);
 
-extern void get_filesystem(struct file_system_type *fs);
+extern struct file_system_type *get_filesystem(struct file_system_type *fs);
 extern void put_filesystem(struct file_system_type *fs);
 extern struct file_system_type *get_fs_type(const char *name);
 extern struct super_block *get_super(struct block_device *);
-- 
cgit v1.2.3


From cdf01226b26e98c79c13b335fbe0cbbbe850cf44 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 4 Jul 2017 17:25:22 +0100
Subject: VFS: Provide empty name qstr

Provide an empty name (ie. "") qstr for general use.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c            | 8 ++++++--
 fs/gfs2/dir.c          | 3 +--
 fs/namei.c             | 3 +--
 fs/nsfs.c              | 3 +--
 fs/pipe.c              | 3 +--
 include/linux/dcache.h | 5 +++++
 6 files changed, 15 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index a9f995f6859e..95efb7b2bf84 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -90,6 +90,11 @@ EXPORT_SYMBOL(rename_lock);
 
 static struct kmem_cache *dentry_cache __read_mostly;
 
+const struct qstr empty_name = QSTR_INIT("", 0);
+EXPORT_SYMBOL(empty_name);
+const struct qstr slash_name = QSTR_INIT("/", 1);
+EXPORT_SYMBOL(slash_name);
+
 /*
  * This is the single most critical data structure when it comes
  * to the dcache: the hashtable for lookups. Somebody should try
@@ -1578,8 +1583,7 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
 	 */
 	dentry->d_iname[DNAME_INLINE_LEN-1] = 0;
 	if (unlikely(!name)) {
-		static const struct qstr anon = QSTR_INIT("/", 1);
-		name = &anon;
+		name = &slash_name;
 		dname = dentry->d_iname;
 	} else if (name->len > DNAME_INLINE_LEN-1) {
 		size_t size = offsetof(struct external_name, name[1]);
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 79113219be5f..a5dfff6a033e 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -872,7 +872,6 @@ static struct gfs2_leaf *new_leaf(struct inode *inode, struct buffer_head **pbh,
 	struct buffer_head *bh;
 	struct gfs2_leaf *leaf;
 	struct gfs2_dirent *dent;
-	struct qstr name = { .name = "" };
 	struct timespec tv = current_time(inode);
 
 	error = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL);
@@ -896,7 +895,7 @@ static struct gfs2_leaf *new_leaf(struct inode *inode, struct buffer_head **pbh,
 	leaf->lf_sec = cpu_to_be64(tv.tv_sec);
 	memset(leaf->lf_reserved2, 0, sizeof(leaf->lf_reserved2));
 	dent = (struct gfs2_dirent *)(leaf+1);
-	gfs2_qstr2dirent(&name, bh->b_size - sizeof(struct gfs2_leaf), dent);
+	gfs2_qstr2dirent(&empty_name, bh->b_size - sizeof(struct gfs2_leaf), dent);
 	*pbh = bh;
 	return leaf;
 }
diff --git a/fs/namei.c b/fs/namei.c
index 6571a5f5112e..0d35760fee00 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3400,7 +3400,6 @@ out:
 
 struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode, int open_flag)
 {
-	static const struct qstr name = QSTR_INIT("/", 1);
 	struct dentry *child = NULL;
 	struct inode *dir = dentry->d_inode;
 	struct inode *inode;
@@ -3414,7 +3413,7 @@ struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode, int open_flag)
 	if (!dir->i_op->tmpfile)
 		goto out_err;
 	error = -ENOMEM;
-	child = d_alloc(dentry, &name);
+	child = d_alloc(dentry, &slash_name);
 	if (unlikely(!child))
 		goto out_err;
 	error = dir->i_op->tmpfile(dir, child, mode);
diff --git a/fs/nsfs.c b/fs/nsfs.c
index f3db56e83dd2..08127a2b8559 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -53,7 +53,6 @@ static void nsfs_evict(struct inode *inode)
 static void *__ns_get_path(struct path *path, struct ns_common *ns)
 {
 	struct vfsmount *mnt = nsfs_mnt;
-	struct qstr qname = { .name = "", };
 	struct dentry *dentry;
 	struct inode *inode;
 	unsigned long d;
@@ -85,7 +84,7 @@ slow:
 	inode->i_fop = &ns_file_operations;
 	inode->i_private = ns;
 
-	dentry = d_alloc_pseudo(mnt->mnt_sb, &qname);
+	dentry = d_alloc_pseudo(mnt->mnt_sb, &empty_name);
 	if (!dentry) {
 		iput(inode);
 		return ERR_PTR(-ENOMEM);
diff --git a/fs/pipe.c b/fs/pipe.c
index 73b84baf58f8..97e5be897753 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -739,13 +739,12 @@ int create_pipe_files(struct file **res, int flags)
 	struct inode *inode = get_pipe_inode();
 	struct file *f;
 	struct path path;
-	static struct qstr name = { .name = "" };
 
 	if (!inode)
 		return -ENFILE;
 
 	err = -ENOMEM;
-	path.dentry = d_alloc_pseudo(pipe_mnt->mnt_sb, &name);
+	path.dentry = d_alloc_pseudo(pipe_mnt->mnt_sb, &empty_name);
 	if (!path.dentry)
 		goto err_inode;
 	path.mnt = mntget(pipe_mnt);
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index d2e38dc6172c..3f65a4fa72ed 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -55,6 +55,11 @@ struct qstr {
 
 #define QSTR_INIT(n,l) { { { .len = l } }, .name = n }
 
+extern const char empty_string[];
+extern const struct qstr empty_name;
+extern const char slash_string[];
+extern const struct qstr slash_name;
+
 struct dentry_stat_t {
 	long nr_dentry;
 	long nr_unused;
-- 
cgit v1.2.3


From 87d284443d071dc70344dda4b2fb43723686acdb Mon Sep 17 00:00:00 2001
From: Steven Feng <steven_feng@realsil.com.cn>
Date: Tue, 23 May 2017 15:13:24 +0800
Subject: mfd: rtsx: Do retry when DMA transfer error

The request should be resent when DMA transfer error occurred.
For rts5227, the clock rate needs to be reduced when error occurred.

Signed-off-by: Steven Feng <steven_feng@realsil.com.cn>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/rtsx_pcr.c       | 17 +++++++++++++++--
 include/linux/mfd/rtsx_pci.h |  5 +++++
 2 files changed, 20 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/rtsx_pcr.c b/drivers/mfd/rtsx_pcr.c
index 850590aac008..a0ac89dfdf0f 100644
--- a/drivers/mfd/rtsx_pcr.c
+++ b/drivers/mfd/rtsx_pcr.c
@@ -30,6 +30,7 @@
 #include <linux/platform_device.h>
 #include <linux/mfd/core.h>
 #include <linux/mfd/rtsx_pci.h>
+#include <linux/mmc/card.h>
 #include <asm/unaligned.h>
 
 #include "rtsx_pcr.h"
@@ -452,8 +453,12 @@ int rtsx_pci_dma_transfer(struct rtsx_pcr *pcr, struct scatterlist *sglist,
 	}
 
 	spin_lock_irqsave(&pcr->lock, flags);
-	if (pcr->trans_result == TRANS_RESULT_FAIL)
-		err = -EINVAL;
+	if (pcr->trans_result == TRANS_RESULT_FAIL) {
+		err = -EILSEQ;
+		if (pcr->dma_error_count < RTS_MAX_TIMES_FREQ_REDUCTION)
+			pcr->dma_error_count++;
+	}
+
 	else if (pcr->trans_result == TRANS_NO_DEVICE)
 		err = -ENODEV;
 	spin_unlock_irqrestore(&pcr->lock, flags);
@@ -659,6 +664,13 @@ int rtsx_pci_switch_clock(struct rtsx_pcr *pcr, unsigned int card_clock,
 	if (err < 0)
 		return err;
 
+	/* Reduce card clock by 20MHz each time a DMA transfer error occurs */
+	if (card_clock == UHS_SDR104_MAX_DTR &&
+	    pcr->dma_error_count &&
+	    PCI_PID(pcr) == RTS5227_DEVICE_ID)
+		card_clock = UHS_SDR104_MAX_DTR -
+			(pcr->dma_error_count * 20000000);
+
 	card_clock /= 1000000;
 	pcr_dbg(pcr, "Switch card clock to %dMHz\n", card_clock);
 
@@ -894,6 +906,7 @@ static irqreturn_t rtsx_pci_isr(int irq, void *dev_id)
 			pcr->card_removed |= SD_EXIST;
 			pcr->card_inserted &= ~SD_EXIST;
 		}
+		pcr->dma_error_count = 0;
 	}
 
 	if (int_reg & MS_INT) {
diff --git a/include/linux/mfd/rtsx_pci.h b/include/linux/mfd/rtsx_pci.h
index 7eb7cbac0a9a..116816fb9110 100644
--- a/include/linux/mfd/rtsx_pci.h
+++ b/include/linux/mfd/rtsx_pci.h
@@ -850,6 +850,9 @@
 
 #define rtsx_pci_init_cmd(pcr)		((pcr)->ci = 0)
 
+#define RTS5227_DEVICE_ID		0x5227
+#define RTS_MAX_TIMES_FREQ_REDUCTION	8
+
 struct rtsx_pcr;
 
 struct pcr_handle {
@@ -957,6 +960,8 @@ struct rtsx_pcr {
 
 	int				num_slots;
 	struct rtsx_slot		*slots;
+
+	u8				dma_error_count;
 };
 
 #define CHK_PCI_PID(pcr, pid)		((pcr)->pci->device == (pid))
-- 
cgit v1.2.3


From 1e3496000c11ec1ec56cf664b6a01d66de423507 Mon Sep 17 00:00:00 2001
From: Keerthy <j-keerthy@ti.com>
Date: Tue, 13 Jun 2017 10:28:40 +0530
Subject: mfd: Add LP87565 PMIC support

The LP87565 chip is a power management IC for Portable Navigation Systems
and Tablet Computing devices. It contains the following components:

        - Configurable Bucks(Single and multi-phase).
        - Configurable General Purpose Output Signals (GPO).

The LP87565-Q1 variant device uses two 2-phase outputs configuration,
Buck0 is master for Buck0/1 output and Buck2 is master for Buck2/3
output.

Signed-off-by: Keerthy <j-keerthy@ti.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 Documentation/devicetree/bindings/mfd/lp87565.txt |  43 ++++
 drivers/mfd/Kconfig                               |  14 ++
 drivers/mfd/Makefile                              |   1 +
 drivers/mfd/lp87565.c                             | 100 ++++++++
 include/linux/mfd/lp87565.h                       | 270 ++++++++++++++++++++++
 5 files changed, 428 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/mfd/lp87565.txt
 create mode 100644 drivers/mfd/lp87565.c
 create mode 100644 include/linux/mfd/lp87565.h

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/mfd/lp87565.txt b/Documentation/devicetree/bindings/mfd/lp87565.txt
new file mode 100644
index 000000000000..a48df7c08ab0
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/lp87565.txt
@@ -0,0 +1,43 @@
+TI LP87565 PMIC MFD driver
+
+Required properties:
+  - compatible:	"ti,lp87565", "ti,lp87565-q1"
+  - reg:		I2C slave address.
+  - gpio-controller:	Marks the device node as a GPIO Controller.
+  - #gpio-cells:	Should be two.  The first cell is the pin number and
+			the second cell is used to specify flags.
+			See ../gpio/gpio.txt for more information.
+  - xxx-in-supply:	Phandle to parent supply node of each regulator
+			populated under regulators node. xxx should match
+			the supply_name populated in driver.
+Example:
+
+lp87565_pmic: pmic@60 {
+	compatible = "ti,lp87565-q1";
+	reg = <0x60>;
+	gpio-controller;
+	#gpio-cells = <2>;
+
+	buck10-in-supply = <&vsys_3v3>;
+	buck23-in-supply = <&vsys_3v3>;
+
+	regulators: regulators {
+		buck10_reg: buck10 {
+			/* VDD_MPU */
+			regulator-name = "buck10";
+			regulator-min-microvolt = <850000>;
+			regulator-max-microvolt = <1250000>;
+			regulator-always-on;
+			regulator-boot-on;
+		};
+
+		buck23_reg: buck23 {
+			/* VDD_GPU */
+			regulator-name = "buck23";
+			regulator-min-microvolt = <850000>;
+			regulator-max-microvolt = <1250000>;
+			regulator-boot-on;
+			regulator-always-on;
+		};
+	};
+};
diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 1428814c33f9..94ad2c1c3d90 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -1351,6 +1351,20 @@ config MFD_TI_LP873X
 	  This driver can also be built as a module. If so, the module
 	  will be called lp873x.
 
+config MFD_TI_LP87565
+	tristate "TI LP87565 Power Management IC"
+	depends on I2C && OF
+	select MFD_CORE
+	select REGMAP_I2C
+	help
+	  If you say yes here then you get support for the LP87565 series of
+	  Power Management Integrated Circuits (PMIC).
+	  These include voltage regulators, thermal protection, configurable
+	  General Purpose Outputs (GPO) that are used in portable devices.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called lp87565.
+
 config MFD_TPS65218
 	tristate "TI TPS65218 Power Management chips"
 	depends on I2C
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 6f6aed8cfccc..080793b3fd0e 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_HTC_PASIC3)	+= htc-pasic3.o
 obj-$(CONFIG_HTC_I2CPLD)	+= htc-i2cpld.o
 
 obj-$(CONFIG_MFD_TI_LP873X)	+= lp873x.o
+obj-$(CONFIG_MFD_TI_LP87565)	+= lp87565.o
 
 obj-$(CONFIG_MFD_DAVINCI_VOICECODEC)	+= davinci_voicecodec.o
 obj-$(CONFIG_MFD_DM355EVM_MSP)	+= dm355evm_msp.o
diff --git a/drivers/mfd/lp87565.c b/drivers/mfd/lp87565.c
new file mode 100644
index 000000000000..340ad0c63744
--- /dev/null
+++ b/drivers/mfd/lp87565.c
@@ -0,0 +1,100 @@
+/*
+ * Copyright (C) 2017 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * Author: Keerthy <j-keerthy@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/mfd/core.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/regmap.h>
+
+#include <linux/mfd/lp87565.h>
+
+static const struct regmap_config lp87565_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+	.max_register = LP87565_REG_MAX,
+};
+
+static const struct mfd_cell lp87565_cells[] = {
+	{ .name = "lp87565-q1-regulator", },
+	{ .name = "lp87565-q1-gpio", },
+};
+
+static const struct of_device_id of_lp87565_match_table[] = {
+	{ .compatible = "ti,lp87565", },
+	{
+		.compatible = "ti,lp87565-q1",
+		.data = (void *)LP87565_DEVICE_TYPE_LP87565_Q1,
+	},
+	{}
+};
+MODULE_DEVICE_TABLE(of, of_lp87565_match_table);
+
+static int lp87565_probe(struct i2c_client *client,
+			 const struct i2c_device_id *ids)
+{
+	struct lp87565 *lp87565;
+	const struct of_device_id *of_id;
+	int ret;
+	unsigned int otpid;
+
+	lp87565 = devm_kzalloc(&client->dev, sizeof(*lp87565), GFP_KERNEL);
+	if (!lp87565)
+		return -ENOMEM;
+
+	lp87565->dev = &client->dev;
+
+	lp87565->regmap = devm_regmap_init_i2c(client, &lp87565_regmap_config);
+	if (IS_ERR(lp87565->regmap)) {
+		ret = PTR_ERR(lp87565->regmap);
+		dev_err(lp87565->dev,
+			"Failed to initialize register map: %d\n", ret);
+		return ret;
+	}
+
+	ret = regmap_read(lp87565->regmap, LP87565_REG_OTP_REV, &otpid);
+	if (ret) {
+		dev_err(lp87565->dev, "Failed to read OTP ID\n");
+		return ret;
+	}
+
+	lp87565->rev = otpid & LP87565_OTP_REV_OTP_ID;
+
+	of_id = of_match_device(of_lp87565_match_table, &client->dev);
+	if (of_id)
+		lp87565->dev_type = (enum lp87565_device_type)of_id->data;
+
+	i2c_set_clientdata(client, lp87565);
+
+	ret = mfd_add_devices(lp87565->dev, PLATFORM_DEVID_AUTO, lp87565_cells,
+			      ARRAY_SIZE(lp87565_cells), NULL, 0, NULL);
+
+	return ret;
+}
+
+static const struct i2c_device_id lp87565_id_table[] = {
+	{ "lp87565-q1", 0 },
+	{ },
+};
+MODULE_DEVICE_TABLE(i2c, lp87565_id_table);
+
+static struct i2c_driver lp87565_driver = {
+	.driver	= {
+		.name	= "lp87565",
+		.of_match_table = of_lp87565_match_table,
+	},
+	.probe = lp87565_probe,
+	.id_table = lp87565_id_table,
+};
+module_i2c_driver(lp87565_driver);
+
+MODULE_AUTHOR("J Keerthy <j-keerthy@ti.com>");
+MODULE_DESCRIPTION("lp87565 chip family Multi-Function Device driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/mfd/lp87565.h b/include/linux/mfd/lp87565.h
new file mode 100644
index 000000000000..d0c91ba65525
--- /dev/null
+++ b/include/linux/mfd/lp87565.h
@@ -0,0 +1,270 @@
+/*
+ * Functions to access LP87565 power management chip.
+ *
+ * Copyright (C) 2017 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ */
+
+#ifndef __LINUX_MFD_LP87565_H
+#define __LINUX_MFD_LP87565_H
+
+#include <linux/i2c.h>
+#include <linux/regulator/driver.h>
+#include <linux/regulator/machine.h>
+
+enum lp87565_device_type {
+	LP87565_DEVICE_TYPE_UNKNOWN	= 0,
+	LP87565_DEVICE_TYPE_LP87565_Q1,
+};
+
+/* All register addresses */
+#define LP87565_REG_DEV_REV		0X00
+#define LP87565_REG_OTP_REV		0X01
+#define LP87565_REG_BUCK0_CTRL_1		0X02
+#define LP87565_REG_BUCK0_CTRL_2		0X03
+
+#define LP87565_REG_BUCK1_CTRL_1		0X04
+#define LP87565_REG_BUCK1_CTRL_2		0X05
+
+#define LP87565_REG_BUCK2_CTRL_1		0X06
+#define LP87565_REG_BUCK2_CTRL_2		0X07
+
+#define LP87565_REG_BUCK3_CTRL_1		0X08
+#define LP87565_REG_BUCK3_CTRL_2		0X09
+
+#define LP87565_REG_BUCK0_VOUT			0X0A
+#define LP87565_REG_BUCK0_FLOOR_VOUT		0X0B
+
+#define LP87565_REG_BUCK1_VOUT			0X0C
+#define LP87565_REG_BUCK1_FLOOR_VOUT		0X0D
+
+#define LP87565_REG_BUCK2_VOUT			0X0E
+#define LP87565_REG_BUCK2_FLOOR_VOUT		0X0F
+
+#define LP87565_REG_BUCK3_VOUT			0X10
+#define LP87565_REG_BUCK3_FLOOR_VOUT		0X11
+
+#define LP87565_REG_BUCK0_DELAY			0X12
+#define LP87565_REG_BUCK1_DELAY			0X13
+
+#define LP87565_REG_BUCK2_DELAY			0X14
+#define LP87565_REG_BUCK3_DELAY			0X15
+
+#define LP87565_REG_GPO2_DELAY			0X16
+#define LP87565_REG_GPO3_DELAY			0X17
+#define LP87565_REG_RESET			0X18
+#define LP87565_REG_CONFIG			0X19
+
+#define LP87565_REG_INT_TOP_1			0X1A
+#define LP87565_REG_INT_TOP_2			0X1B
+
+#define LP87565_REG_INT_BUCK_0_1		0X1C
+#define LP87565_REG_INT_BUCK_2_3		0X1D
+#define LP87565_REG_TOP_STAT			0X1E
+#define LP87565_REG_BUCK_0_1_STAT		0X1F
+#define LP87565_REG_BUCK_2_3_STAT		0x20
+
+#define LP87565_REG_TOP_MASK_1			0x21
+#define LP87565_REG_TOP_MASK_2			0x22
+
+#define LP87565_REG_BUCK_0_1_MASK		0x23
+#define LP87565_REG_BUCK_2_3_MASK		0x24
+#define LP87565_REG_SEL_I_LOAD			0x25
+
+#define LP87565_REG_I_LOAD_2			0x26
+#define LP87565_REG_I_LOAD_1			0x27
+
+#define LP87565_REG_PGOOD_CTRL1			0x28
+#define LP87565_REG_PGOOD_CTRL2			0x29
+#define LP87565_REG_PGOOD_FLT			0x2A
+#define LP87565_REG_PLL_CTRL			0x2B
+#define LP87565_REG_PIN_FUNCTION		0x2C
+#define LP87565_REG_GPIO_CONFIG			0x2D
+#define LP87565_REG_GPIO_IN			0x2E
+#define LP87565_REG_GPIO_OUT			0x2F
+
+#define LP87565_REG_MAX			LP87565_REG_GPIO_OUT
+
+/* Register field definitions */
+#define LP87565_DEV_REV_DEV_ID			0xC0
+#define LP87565_DEV_REV_ALL_LAYER		0x30
+#define LP87565_DEV_REV_METAL_LAYER		0x0F
+
+#define LP87565_OTP_REV_OTP_ID			0xFF
+
+#define LP87565_BUCK_CTRL_1_EN			BIT(7)
+#define LP87565_BUCK_CTRL_1_EN_PIN_CTRL		BIT(6)
+#define LP87565_BUCK_CTRL_1_PIN_SELECT_EN	0x30
+
+#define LP87565_BUCK_CTRL_1_ROOF_FLOOR_EN	BIT(3)
+#define LP87565_BUCK_CTRL_1_RDIS_EN		BIT(2)
+#define LP87565_BUCK_CTRL_1_FPWM		BIT(1)
+/* Bit0 is reserved for BUCK1 and BUCK3 and valid only for BUCK0 and BUCK2 */
+#define LP87565_BUCK_CTRL_1_FPWM_MP_0_2		BIT(0)
+
+#define LP87565_BUCK_CTRL_2_ILIM		0x38
+#define LP87565_BUCK_CTRL_2_SLEW_RATE		0x07
+
+#define LP87565_BUCK_VSET			0xFF
+#define LP87565_BUCK_FLOOR_VSET			0xFF
+
+#define LP87565_BUCK_SHUTDOWN_DELAY		0xF0
+#define LP87565_BUCK_STARTUP_DELAY		0x0F
+
+#define LP87565_GPIO_SHUTDOWN_DELAY		0xF0
+#define LP87565_GPIO_STARTUP_DELAY		0x0F
+
+#define LP87565_RESET_SW_RESET			BIT(0)
+
+#define LP87565_CONFIG_DOUBLE_DELAY		BIT(7)
+#define LP87565_CONFIG_CLKIN_PD			BIT(6)
+#define LP87565_CONFIG_EN4_PD			BIT(5)
+#define LP87565_CONFIG_EN3_PD			BIT(4)
+#define LP87565_CONFIG_TDIE_WARN_LEVEL		BIT(3)
+#define LP87565_CONFIG_EN2_PD			BIT(2)
+#define LP87565_CONFIG_EN1_PD			BIT(1)
+
+#define LP87565_INT_GPIO			BIT(7)
+#define LP87565_INT_BUCK23			BIT(6)
+#define LP87565_INT_BUCK01			BIT(5)
+#define LP87565_NO_SYNC_CLK			BIT(4)
+#define LP87565_TDIE_SD				BIT(3)
+#define LP87565_TDIE_WARN			BIT(2)
+#define LP87565_INT_OVP				BIT(1)
+#define LP87565_I_LOAD_READY			BIT(0)
+
+#define LP87565_INT_TOP2_RESET_REG		BIT(0)
+
+#define LP87565_BUCK1_PG_INT			BIT(6)
+#define LP87565_BUCK1_SC_INT			BIT(5)
+#define LP87565_BUCK1_ILIM_INT			BIT(4)
+#define LP87565_BUCK0_PG_INT			BIT(2)
+#define LP87565_BUCK0_SC_INT			BIT(1)
+#define LP87565_BUCK0_ILIM_INT			BIT(0)
+
+#define LP87565_BUCK3_PG_INT			BIT(6)
+#define LP87565_BUCK3_SC_INT			BIT(5)
+#define LP87565_BUCK3_ILIM_INT			BIT(4)
+#define LP87565_BUCK2_PG_INT			BIT(2)
+#define LP87565_BUCK2_SC_INT			BIT(1)
+#define LP87565_BUCK2_ILIM_INT			BIT(0)
+
+#define LP87565_SYNC_CLK_STAT			BIT(4)
+#define LP87565_TDIE_SD_STAT			BIT(3)
+#define LP87565_TDIE_WARN_STAT			BIT(2)
+#define LP87565_OVP_STAT			BIT(1)
+
+#define LP87565_BUCK1_STAT			BIT(7)
+#define LP87565_BUCK1_PG_STAT			BIT(6)
+#define LP87565_BUCK1_ILIM_STAT			BIT(4)
+#define LP87565_BUCK0_STAT			BIT(3)
+#define LP87565_BUCK0_PG_STAT			BIT(2)
+#define LP87565_BUCK0_ILIM_STAT			BIT(0)
+
+#define LP87565_BUCK3_STAT			BIT(7)
+#define LP87565_BUCK3_PG_STAT			BIT(6)
+#define LP87565_BUCK3_ILIM_STAT			BIT(4)
+#define LP87565_BUCK2_STAT			BIT(3)
+#define LP87565_BUCK2_PG_STAT			BIT(2)
+#define LP87565_BUCK2_ILIM_STAT			BIT(0)
+
+#define LPL87565_GPIO_MASK			BIT(7)
+#define LPL87565_SYNC_CLK_MASK			BIT(4)
+#define LPL87565_TDIE_WARN_MASK			BIT(2)
+#define LPL87565_I_LOAD_READY_MASK		BIT(0)
+
+#define LPL87565_RESET_REG_MASK			BIT(0)
+
+#define LPL87565_BUCK1_PG_MASK			BIT(6)
+#define LPL87565_BUCK1_ILIM_MASK		BIT(4)
+#define LPL87565_BUCK0_PG_MASK			BIT(2)
+#define LPL87565_BUCK0_ILIM_MASK		BIT(0)
+
+#define LPL87565_BUCK3_PG_MASK			BIT(6)
+#define LPL87565_BUCK3_ILIM_MASK		BIT(4)
+#define LPL87565_BUCK2_PG_MASK			BIT(2)
+#define LPL87565_BUCK2_ILIM_MASK		BIT(0)
+
+#define LP87565_LOAD_CURRENT_BUCK_SELECT	0x3
+
+#define LP87565_I_LOAD2_BUCK_LOAD_CURRENT	0x3
+#define LP87565_I_LOAD1_BUCK_LOAD_CURRENT	0xFF
+
+#define LP87565_PG3_SEL				0xC0
+#define LP87565_PG2_SEL				0x30
+#define LP87565_PG1_SEL				0x0C
+#define LP87565_PG0_SEL				0x03
+
+#define LP87565_HALF_DAY			BIT(7)
+#define LP87565_EN_PG0_NINT			BIT(6)
+#define LP87565_PGOOD_SET_DELAY			BIT(5)
+#define LP87565_EN_PGFLT_STAT			BIT(4)
+#define LP87565_PGOOD_WINDOW			BIT(2)
+#define LP87565_PGOOD_OD			BIT(1)
+#define LP87565_PGOOD_POL			BIT(0)
+
+#define LP87565_PG3_FLT				BIT(3)
+#define LP87565_PG2_FLT				BIT(2)
+#define LP87565_PG1_FLT				BIT(1)
+#define LP87565_PG0_FLT				BIT(0)
+
+#define LP87565_PLL_MODE			0xC0
+#define LP87565_EXT_CLK_FREQ			0x1F
+
+#define LP87565_EN_SPREAD_SPEC			BIT(7)
+#define LP87565_EN_PIN_CTRL_GPIO3		BIT(6)
+#define LP87565_EN_PIN_SELECT_GPIO3		BIT(5)
+#define LP87565_EN_PIN_CTRL_GPIO2		BIT(4)
+#define LP87565_EN_PIN_SELECT_GPIO2		BIT(3)
+#define LP87565_GPIO3_SEL			BIT(2)
+#define LP87565_GPIO2_SEL			BIT(1)
+#define LP87565_GPIO1_SEL			BIT(0)
+
+#define LP87565_GOIO3_OD			BIT(6)
+#define LP87565_GOIO2_OD			BIT(5)
+#define LP87565_GOIO1_OD			BIT(4)
+#define LP87565_GOIO3_DIR			BIT(2)
+#define LP87565_GOIO2_DIR			BIT(1)
+#define LP87565_GOIO1_DIR			BIT(0)
+
+#define LP87565_GOIO3_IN			BIT(2)
+#define LP87565_GOIO2_IN			BIT(1)
+#define LP87565_GOIO1_IN			BIT(0)
+
+#define LP87565_GOIO3_OUT			BIT(2)
+#define LP87565_GOIO2_OUT			BIT(1)
+#define LP87565_GOIO1_OUT			BIT(0)
+
+/* Number of step-down converters available */
+#define LP87565_NUM_BUCK		6
+
+enum LP87565_regulator_id {
+	/* BUCK's */
+	LP87565_BUCK_0,
+	LP87565_BUCK_1,
+	LP87565_BUCK_2,
+	LP87565_BUCK_3,
+	LP87565_BUCK_10,
+	LP87565_BUCK_23,
+};
+
+/**
+ * struct LP87565 - state holder for the LP87565 driver
+ * @dev: struct device pointer for MFD device
+ * @rev: revision of the LP87565
+ * @dev_type: The device type for example lp87565-q1
+ * @lock: lock guarding the data structure
+ * @regmap: register map of the LP87565 PMIC
+ *
+ * Device data may be used to access the LP87565 chip
+ */
+struct lp87565 {
+	struct device *dev;
+	u8 rev;
+	u8 dev_type;
+	struct regmap *regmap;
+};
+#endif /* __LINUX_MFD_LP87565_H */
-- 
cgit v1.2.3


From 4a25220d4e43bb2461823dbc7eb1502a34087958 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 5 Jul 2017 16:24:18 +0100
Subject: hugetlbfs: Implement show_options

Implement the show_options superblock op for hugetlbfs as part of a bid to
get rid of s_options and generic_show_options() to make it easier to
implement a context-based mount where the mount options can be passed
individually over a file descriptor.

Note that the uid and gid should possibly be displayed relative to the
viewer's user namespace.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Nadia Yvette Chambers <nyc@holomorphy.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/hugetlbfs/inode.c    | 70 +++++++++++++++++++++++++++++++++++++++----------
 include/linux/hugetlb.h |  3 +++
 2 files changed, 59 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index d44f5456eb9b..99b3b9836575 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -46,13 +46,13 @@ static const struct inode_operations hugetlbfs_dir_inode_operations;
 static const struct inode_operations hugetlbfs_inode_operations;
 
 struct hugetlbfs_config {
-	kuid_t   uid;
-	kgid_t   gid;
-	umode_t mode;
-	long	max_hpages;
-	long	nr_inodes;
-	struct hstate *hstate;
-	long    min_hpages;
+	struct hstate		*hstate;
+	long			max_hpages;
+	long			nr_inodes;
+	long			min_hpages;
+	kuid_t			uid;
+	kgid_t			gid;
+	umode_t			mode;
 };
 
 struct hugetlbfs_inode_info {
@@ -851,6 +851,46 @@ static int hugetlbfs_migrate_page(struct address_space *mapping,
 	return MIGRATEPAGE_SUCCESS;
 }
 
+/*
+ * Display the mount options in /proc/mounts.
+ */
+static int hugetlbfs_show_options(struct seq_file *m, struct dentry *root)
+{
+	struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(root->d_sb);
+	struct hugepage_subpool *spool = sbinfo->spool;
+	unsigned long hpage_size = huge_page_size(sbinfo->hstate);
+	unsigned hpage_shift = huge_page_shift(sbinfo->hstate);
+	char mod;
+
+	if (!uid_eq(sbinfo->uid, GLOBAL_ROOT_UID))
+		seq_printf(m, ",uid=%u",
+			   from_kuid_munged(&init_user_ns, sbinfo->uid));
+	if (!gid_eq(sbinfo->gid, GLOBAL_ROOT_GID))
+		seq_printf(m, ",gid=%u",
+			   from_kgid_munged(&init_user_ns, sbinfo->gid));
+	if (sbinfo->mode != 0755)
+		seq_printf(m, ",mode=%o", sbinfo->mode);
+	if (sbinfo->max_inodes != -1)
+		seq_printf(m, ",nr_inodes=%lu", sbinfo->max_inodes);
+
+	hpage_size /= 1024;
+	mod = 'K';
+	if (hpage_size >= 1024) {
+		hpage_size /= 1024;
+		mod = 'M';
+	}
+	seq_printf(m, ",pagesize=%lu%c", hpage_size, mod);
+	if (spool) {
+		if (spool->max_hpages != -1)
+			seq_printf(m, ",size=%llu",
+				   (unsigned long long)spool->max_hpages << hpage_shift);
+		if (spool->min_hpages != -1)
+			seq_printf(m, ",min_size=%llu",
+				   (unsigned long long)spool->min_hpages << hpage_shift);
+	}
+	return 0;
+}
+
 static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
 	struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb);
@@ -1008,19 +1048,19 @@ static const struct super_operations hugetlbfs_ops = {
 	.evict_inode	= hugetlbfs_evict_inode,
 	.statfs		= hugetlbfs_statfs,
 	.put_super	= hugetlbfs_put_super,
-	.show_options	= generic_show_options,
+	.show_options	= hugetlbfs_show_options,
 };
 
-enum { NO_SIZE, SIZE_STD, SIZE_PERCENT };
+enum hugetlbfs_size_type { NO_SIZE, SIZE_STD, SIZE_PERCENT };
 
 /*
  * Convert size option passed from command line to number of huge pages
  * in the pool specified by hstate.  Size option could be in bytes
  * (val_type == SIZE_STD) or percentage of the pool (val_type == SIZE_PERCENT).
  */
-static long long
+static long
 hugetlbfs_size_to_hpages(struct hstate *h, unsigned long long size_opt,
-								int val_type)
+			 enum hugetlbfs_size_type val_type)
 {
 	if (val_type == NO_SIZE)
 		return -1;
@@ -1042,7 +1082,7 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
 	substring_t args[MAX_OPT_ARGS];
 	int option;
 	unsigned long long max_size_opt = 0, min_size_opt = 0;
-	int max_val_type = NO_SIZE, min_val_type = NO_SIZE;
+	enum hugetlbfs_size_type max_val_type = NO_SIZE, min_val_type = NO_SIZE;
 
 	if (!options)
 		return 0;
@@ -1156,8 +1196,6 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
 	struct hugetlbfs_config config;
 	struct hugetlbfs_sb_info *sbinfo;
 
-	save_mount_options(sb, data);
-
 	config.max_hpages = -1; /* No limit on size by default */
 	config.nr_inodes = -1; /* No limit on number of inodes by default */
 	config.uid = current_fsuid();
@@ -1178,6 +1216,10 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
 	sbinfo->max_inodes = config.nr_inodes;
 	sbinfo->free_inodes = config.nr_inodes;
 	sbinfo->spool = NULL;
+	sbinfo->uid = config.uid;
+	sbinfo->gid = config.gid;
+	sbinfo->mode = config.mode;
+
 	/*
 	 * Allocate and initialize subpool if maximum or minimum size is
 	 * specified.  Any needed reservations (for minimim size) are taken
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index b857fc8cc2ec..3b6eeaad2f77 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -262,6 +262,9 @@ struct hugetlbfs_sb_info {
 	spinlock_t	stat_lock;
 	struct hstate *hstate;
 	struct hugepage_subpool *spool;
+	kuid_t	uid;
+	kgid_t	gid;
+	umode_t mode;
 };
 
 static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb)
-- 
cgit v1.2.3


From 8476d6cde2cd11a2cb87bd7392fc318eec25c8a0 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Mon, 22 May 2017 00:09:52 +0200
Subject: backlight: adp8860: Move header file out of I2C realm

include/linux/i2c is not for client devices.

Move the header file to a more appropriate location.

Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Acked-by: Daniel Thompson <daniel.thompson@linaro.org>
Acked-by: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 arch/blackfin/mach-bf537/boards/stamp.c |   2 +-
 drivers/video/backlight/adp8860_bl.c    |   2 +-
 include/linux/i2c/adp8860.h             | 154 --------------------------------
 include/linux/platform_data/adp8860.h   | 154 ++++++++++++++++++++++++++++++++
 4 files changed, 156 insertions(+), 156 deletions(-)
 delete mode 100644 include/linux/i2c/adp8860.h
 create mode 100644 include/linux/platform_data/adp8860.h

(limited to 'include/linux')

diff --git a/arch/blackfin/mach-bf537/boards/stamp.c b/arch/blackfin/mach-bf537/boards/stamp.c
index eaec7b4832a2..76983dfba324 100644
--- a/arch/blackfin/mach-bf537/boards/stamp.c
+++ b/arch/blackfin/mach-bf537/boards/stamp.c
@@ -2047,7 +2047,7 @@ static struct adp8870_backlight_platform_data adp8870_pdata = {
 #endif
 
 #if IS_ENABLED(CONFIG_BACKLIGHT_ADP8860)
-#include <linux/i2c/adp8860.h>
+#include <linux/platform_data/adp8860.h>
 static struct led_info adp8860_leds[] = {
 	{
 		.name = "adp8860-led7",
diff --git a/drivers/video/backlight/adp8860_bl.c b/drivers/video/backlight/adp8860_bl.c
index 510e559c060e..e7315bf14d60 100644
--- a/drivers/video/backlight/adp8860_bl.c
+++ b/drivers/video/backlight/adp8860_bl.c
@@ -18,7 +18,7 @@
 #include <linux/slab.h>
 #include <linux/workqueue.h>
 
-#include <linux/i2c/adp8860.h>
+#include <linux/platform_data/adp8860.h>
 #define ADP8860_EXT_FEATURES
 #define ADP8860_USE_LEDS
 
diff --git a/include/linux/i2c/adp8860.h b/include/linux/i2c/adp8860.h
deleted file mode 100644
index 0b4d39855c91..000000000000
--- a/include/linux/i2c/adp8860.h
+++ /dev/null
@@ -1,154 +0,0 @@
-/*
- * Definitions and platform data for Analog Devices
- * Backlight drivers ADP8860
- *
- * Copyright 2009-2010 Analog Devices Inc.
- *
- * Licensed under the GPL-2 or later.
- */
-
-#ifndef __LINUX_I2C_ADP8860_H
-#define __LINUX_I2C_ADP8860_H
-
-#include <linux/leds.h>
-#include <linux/types.h>
-
-#define ID_ADP8860		8860
-
-#define ADP8860_MAX_BRIGHTNESS	0x7F
-#define FLAG_OFFT_SHIFT 8
-
-/*
- * LEDs subdevice platform data
- */
-
-#define ADP8860_LED_DIS_BLINK	(0 << FLAG_OFFT_SHIFT)
-#define ADP8860_LED_OFFT_600ms	(1 << FLAG_OFFT_SHIFT)
-#define ADP8860_LED_OFFT_1200ms	(2 << FLAG_OFFT_SHIFT)
-#define ADP8860_LED_OFFT_1800ms	(3 << FLAG_OFFT_SHIFT)
-
-#define ADP8860_LED_ONT_200ms	0
-#define ADP8860_LED_ONT_600ms	1
-#define ADP8860_LED_ONT_800ms	2
-#define ADP8860_LED_ONT_1200ms	3
-
-#define ADP8860_LED_D7		(7)
-#define ADP8860_LED_D6		(6)
-#define ADP8860_LED_D5		(5)
-#define ADP8860_LED_D4		(4)
-#define ADP8860_LED_D3		(3)
-#define ADP8860_LED_D2		(2)
-#define ADP8860_LED_D1		(1)
-
-/*
- * Backlight subdevice platform data
- */
-
-#define ADP8860_BL_D7		(1 << 6)
-#define ADP8860_BL_D6		(1 << 5)
-#define ADP8860_BL_D5		(1 << 4)
-#define ADP8860_BL_D4		(1 << 3)
-#define ADP8860_BL_D3		(1 << 2)
-#define ADP8860_BL_D2		(1 << 1)
-#define ADP8860_BL_D1		(1 << 0)
-
-#define ADP8860_FADE_T_DIS	0	/* Fade Timer Disabled */
-#define ADP8860_FADE_T_300ms	1	/* 0.3 Sec */
-#define ADP8860_FADE_T_600ms	2
-#define ADP8860_FADE_T_900ms	3
-#define ADP8860_FADE_T_1200ms	4
-#define ADP8860_FADE_T_1500ms	5
-#define ADP8860_FADE_T_1800ms	6
-#define ADP8860_FADE_T_2100ms	7
-#define ADP8860_FADE_T_2400ms	8
-#define ADP8860_FADE_T_2700ms	9
-#define ADP8860_FADE_T_3000ms	10
-#define ADP8860_FADE_T_3500ms	11
-#define ADP8860_FADE_T_4000ms	12
-#define ADP8860_FADE_T_4500ms	13
-#define ADP8860_FADE_T_5000ms	14
-#define ADP8860_FADE_T_5500ms	15	/* 5.5 Sec */
-
-#define ADP8860_FADE_LAW_LINEAR	0
-#define ADP8860_FADE_LAW_SQUARE	1
-#define ADP8860_FADE_LAW_CUBIC1	2
-#define ADP8860_FADE_LAW_CUBIC2	3
-
-#define ADP8860_BL_AMBL_FILT_80ms	0	/* Light sensor filter time */
-#define ADP8860_BL_AMBL_FILT_160ms	1
-#define ADP8860_BL_AMBL_FILT_320ms	2
-#define ADP8860_BL_AMBL_FILT_640ms	3
-#define ADP8860_BL_AMBL_FILT_1280ms	4
-#define ADP8860_BL_AMBL_FILT_2560ms	5
-#define ADP8860_BL_AMBL_FILT_5120ms	6
-#define ADP8860_BL_AMBL_FILT_10240ms	7	/* 10.24 sec */
-
-/*
- * Blacklight current 0..30mA
- */
-#define ADP8860_BL_CUR_mA(I)		((I * 127) / 30)
-
-/*
- * L2 comparator current 0..1106uA
- */
-#define ADP8860_L2_COMP_CURR_uA(I)	((I * 255) / 1106)
-
-/*
- * L3 comparator current 0..138uA
- */
-#define ADP8860_L3_COMP_CURR_uA(I)	((I * 255) / 138)
-
-struct adp8860_backlight_platform_data {
-	u8 bl_led_assign;	/* 1 = Backlight 0 = Individual LED */
-
-	u8 bl_fade_in;		/* Backlight Fade-In Timer */
-	u8 bl_fade_out;		/* Backlight Fade-Out Timer */
-	u8 bl_fade_law;		/* fade-on/fade-off transfer characteristic */
-
-	u8 en_ambl_sens;	/* 1 = enable ambient light sensor */
-	u8 abml_filt;		/* Light sensor filter time */
-
-	u8 l1_daylight_max;	/* use BL_CUR_mA(I) 0 <= I <= 30 mA */
-	u8 l1_daylight_dim;	/* typ = 0, use BL_CUR_mA(I) 0 <= I <= 30 mA */
-	u8 l2_office_max;	/* use BL_CUR_mA(I) 0 <= I <= 30 mA */
-	u8 l2_office_dim;	/* typ = 0, use BL_CUR_mA(I) 0 <= I <= 30 mA */
-	u8 l3_dark_max;		/* use BL_CUR_mA(I) 0 <= I <= 30 mA */
-	u8 l3_dark_dim;		/* typ = 0, use BL_CUR_mA(I) 0 <= I <= 30 mA */
-
-	u8 l2_trip;		/* use L2_COMP_CURR_uA(I) 0 <= I <= 1106 uA */
-	u8 l2_hyst;		/* use L2_COMP_CURR_uA(I) 0 <= I <= 1106 uA */
-	u8 l3_trip;		/* use L3_COMP_CURR_uA(I) 0 <= I <= 551 uA */
-	u8 l3_hyst;		/* use L3_COMP_CURR_uA(I) 0 <= I <= 551 uA */
-
-	/**
-	 * Independent Current Sinks / LEDS
-	 * Sinks not assigned to the Backlight can be exposed to
-	 * user space using the LEDS CLASS interface
-	 */
-
-	int num_leds;
-	struct led_info	*leds;
-	u8 led_fade_in;		/* LED Fade-In Timer */
-	u8 led_fade_out;	/* LED Fade-Out Timer */
-	u8 led_fade_law;	/* fade-on/fade-off transfer characteristic */
-	u8 led_on_time;
-
-	/**
-	 * Gain down disable. Setting this option does not allow the
-	 * charge pump to switch to lower gains. NOT AVAILABLE on ADP8860
-	 * 1 = the charge pump doesn't switch down in gain until all LEDs are 0.
-	 *  The charge pump switches up in gain as needed. This feature is
-	 *  useful if the ADP8863 charge pump is used to drive an external load.
-	 *  This feature must be used when utilizing small fly capacitors
-	 *  (0402 or smaller).
-	 * 0 = the charge pump automatically switches up and down in gain.
-	 *  This provides optimal efficiency, but is not suitable for driving
-	 *  loads that are not connected through the ADP8863 diode drivers.
-	 *  Additionally, the charge pump fly capacitors should be low ESR
-	 * and sized 0603 or greater.
-	 */
-
-	u8 gdwn_dis;
-};
-
-#endif /* __LINUX_I2C_ADP8860_H */
diff --git a/include/linux/platform_data/adp8860.h b/include/linux/platform_data/adp8860.h
new file mode 100644
index 000000000000..0b4d39855c91
--- /dev/null
+++ b/include/linux/platform_data/adp8860.h
@@ -0,0 +1,154 @@
+/*
+ * Definitions and platform data for Analog Devices
+ * Backlight drivers ADP8860
+ *
+ * Copyright 2009-2010 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#ifndef __LINUX_I2C_ADP8860_H
+#define __LINUX_I2C_ADP8860_H
+
+#include <linux/leds.h>
+#include <linux/types.h>
+
+#define ID_ADP8860		8860
+
+#define ADP8860_MAX_BRIGHTNESS	0x7F
+#define FLAG_OFFT_SHIFT 8
+
+/*
+ * LEDs subdevice platform data
+ */
+
+#define ADP8860_LED_DIS_BLINK	(0 << FLAG_OFFT_SHIFT)
+#define ADP8860_LED_OFFT_600ms	(1 << FLAG_OFFT_SHIFT)
+#define ADP8860_LED_OFFT_1200ms	(2 << FLAG_OFFT_SHIFT)
+#define ADP8860_LED_OFFT_1800ms	(3 << FLAG_OFFT_SHIFT)
+
+#define ADP8860_LED_ONT_200ms	0
+#define ADP8860_LED_ONT_600ms	1
+#define ADP8860_LED_ONT_800ms	2
+#define ADP8860_LED_ONT_1200ms	3
+
+#define ADP8860_LED_D7		(7)
+#define ADP8860_LED_D6		(6)
+#define ADP8860_LED_D5		(5)
+#define ADP8860_LED_D4		(4)
+#define ADP8860_LED_D3		(3)
+#define ADP8860_LED_D2		(2)
+#define ADP8860_LED_D1		(1)
+
+/*
+ * Backlight subdevice platform data
+ */
+
+#define ADP8860_BL_D7		(1 << 6)
+#define ADP8860_BL_D6		(1 << 5)
+#define ADP8860_BL_D5		(1 << 4)
+#define ADP8860_BL_D4		(1 << 3)
+#define ADP8860_BL_D3		(1 << 2)
+#define ADP8860_BL_D2		(1 << 1)
+#define ADP8860_BL_D1		(1 << 0)
+
+#define ADP8860_FADE_T_DIS	0	/* Fade Timer Disabled */
+#define ADP8860_FADE_T_300ms	1	/* 0.3 Sec */
+#define ADP8860_FADE_T_600ms	2
+#define ADP8860_FADE_T_900ms	3
+#define ADP8860_FADE_T_1200ms	4
+#define ADP8860_FADE_T_1500ms	5
+#define ADP8860_FADE_T_1800ms	6
+#define ADP8860_FADE_T_2100ms	7
+#define ADP8860_FADE_T_2400ms	8
+#define ADP8860_FADE_T_2700ms	9
+#define ADP8860_FADE_T_3000ms	10
+#define ADP8860_FADE_T_3500ms	11
+#define ADP8860_FADE_T_4000ms	12
+#define ADP8860_FADE_T_4500ms	13
+#define ADP8860_FADE_T_5000ms	14
+#define ADP8860_FADE_T_5500ms	15	/* 5.5 Sec */
+
+#define ADP8860_FADE_LAW_LINEAR	0
+#define ADP8860_FADE_LAW_SQUARE	1
+#define ADP8860_FADE_LAW_CUBIC1	2
+#define ADP8860_FADE_LAW_CUBIC2	3
+
+#define ADP8860_BL_AMBL_FILT_80ms	0	/* Light sensor filter time */
+#define ADP8860_BL_AMBL_FILT_160ms	1
+#define ADP8860_BL_AMBL_FILT_320ms	2
+#define ADP8860_BL_AMBL_FILT_640ms	3
+#define ADP8860_BL_AMBL_FILT_1280ms	4
+#define ADP8860_BL_AMBL_FILT_2560ms	5
+#define ADP8860_BL_AMBL_FILT_5120ms	6
+#define ADP8860_BL_AMBL_FILT_10240ms	7	/* 10.24 sec */
+
+/*
+ * Blacklight current 0..30mA
+ */
+#define ADP8860_BL_CUR_mA(I)		((I * 127) / 30)
+
+/*
+ * L2 comparator current 0..1106uA
+ */
+#define ADP8860_L2_COMP_CURR_uA(I)	((I * 255) / 1106)
+
+/*
+ * L3 comparator current 0..138uA
+ */
+#define ADP8860_L3_COMP_CURR_uA(I)	((I * 255) / 138)
+
+struct adp8860_backlight_platform_data {
+	u8 bl_led_assign;	/* 1 = Backlight 0 = Individual LED */
+
+	u8 bl_fade_in;		/* Backlight Fade-In Timer */
+	u8 bl_fade_out;		/* Backlight Fade-Out Timer */
+	u8 bl_fade_law;		/* fade-on/fade-off transfer characteristic */
+
+	u8 en_ambl_sens;	/* 1 = enable ambient light sensor */
+	u8 abml_filt;		/* Light sensor filter time */
+
+	u8 l1_daylight_max;	/* use BL_CUR_mA(I) 0 <= I <= 30 mA */
+	u8 l1_daylight_dim;	/* typ = 0, use BL_CUR_mA(I) 0 <= I <= 30 mA */
+	u8 l2_office_max;	/* use BL_CUR_mA(I) 0 <= I <= 30 mA */
+	u8 l2_office_dim;	/* typ = 0, use BL_CUR_mA(I) 0 <= I <= 30 mA */
+	u8 l3_dark_max;		/* use BL_CUR_mA(I) 0 <= I <= 30 mA */
+	u8 l3_dark_dim;		/* typ = 0, use BL_CUR_mA(I) 0 <= I <= 30 mA */
+
+	u8 l2_trip;		/* use L2_COMP_CURR_uA(I) 0 <= I <= 1106 uA */
+	u8 l2_hyst;		/* use L2_COMP_CURR_uA(I) 0 <= I <= 1106 uA */
+	u8 l3_trip;		/* use L3_COMP_CURR_uA(I) 0 <= I <= 551 uA */
+	u8 l3_hyst;		/* use L3_COMP_CURR_uA(I) 0 <= I <= 551 uA */
+
+	/**
+	 * Independent Current Sinks / LEDS
+	 * Sinks not assigned to the Backlight can be exposed to
+	 * user space using the LEDS CLASS interface
+	 */
+
+	int num_leds;
+	struct led_info	*leds;
+	u8 led_fade_in;		/* LED Fade-In Timer */
+	u8 led_fade_out;	/* LED Fade-Out Timer */
+	u8 led_fade_law;	/* fade-on/fade-off transfer characteristic */
+	u8 led_on_time;
+
+	/**
+	 * Gain down disable. Setting this option does not allow the
+	 * charge pump to switch to lower gains. NOT AVAILABLE on ADP8860
+	 * 1 = the charge pump doesn't switch down in gain until all LEDs are 0.
+	 *  The charge pump switches up in gain as needed. This feature is
+	 *  useful if the ADP8863 charge pump is used to drive an external load.
+	 *  This feature must be used when utilizing small fly capacitors
+	 *  (0402 or smaller).
+	 * 0 = the charge pump automatically switches up and down in gain.
+	 *  This provides optimal efficiency, but is not suitable for driving
+	 *  loads that are not connected through the ADP8863 diode drivers.
+	 *  Additionally, the charge pump fly capacitors should be low ESR
+	 * and sized 0603 or greater.
+	 */
+
+	u8 gdwn_dis;
+};
+
+#endif /* __LINUX_I2C_ADP8860_H */
-- 
cgit v1.2.3


From 056d6ff470a8e782648fd020940c04d0d4a0d761 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Mon, 22 May 2017 00:09:53 +0200
Subject: video: adp8870: move header file out of I2C realm

include/linux/i2c is not for client devices. Move the header file to a
more appropriate location.

Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Acked-by: Daniel Thompson <daniel.thompson@linaro.org>
Acked-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 arch/blackfin/mach-bf537/boards/stamp.c |   2 +-
 drivers/video/backlight/adp8870_bl.c    |   2 +-
 include/linux/i2c/adp8870.h             | 153 --------------------------------
 include/linux/platform_data/adp8870.h   | 153 ++++++++++++++++++++++++++++++++
 4 files changed, 155 insertions(+), 155 deletions(-)
 delete mode 100644 include/linux/i2c/adp8870.h
 create mode 100644 include/linux/platform_data/adp8870.h

(limited to 'include/linux')

diff --git a/arch/blackfin/mach-bf537/boards/stamp.c b/arch/blackfin/mach-bf537/boards/stamp.c
index 76983dfba324..388b91e21b74 100644
--- a/arch/blackfin/mach-bf537/boards/stamp.c
+++ b/arch/blackfin/mach-bf537/boards/stamp.c
@@ -1995,7 +1995,7 @@ static struct adp5588_gpio_platform_data adp5588_gpio_data = {
 #endif
 
 #if IS_ENABLED(CONFIG_BACKLIGHT_ADP8870)
-#include <linux/i2c/adp8870.h>
+#include <linux/platform_data/adp8870.h>
 static struct led_info adp8870_leds[] = {
 	{
 		.name = "adp8870-led7",
diff --git a/drivers/video/backlight/adp8870_bl.c b/drivers/video/backlight/adp8870_bl.c
index 21acac90fd77..058d1def2d1f 100644
--- a/drivers/video/backlight/adp8870_bl.c
+++ b/drivers/video/backlight/adp8870_bl.c
@@ -18,7 +18,7 @@
 #include <linux/workqueue.h>
 #include <linux/slab.h>
 
-#include <linux/i2c/adp8870.h>
+#include <linux/platform_data/adp8870.h>
 #define ADP8870_EXT_FEATURES
 #define ADP8870_USE_LEDS
 
diff --git a/include/linux/i2c/adp8870.h b/include/linux/i2c/adp8870.h
deleted file mode 100644
index 624dceccbd5b..000000000000
--- a/include/linux/i2c/adp8870.h
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- * Definitions and platform data for Analog Devices
- * Backlight drivers ADP8870
- *
- * Copyright 2009-2010 Analog Devices Inc.
- *
- * Licensed under the GPL-2 or later.
- */
-
-#ifndef __LINUX_I2C_ADP8870_H
-#define __LINUX_I2C_ADP8870_H
-
-#define ID_ADP8870		8870
-
-#define ADP8870_MAX_BRIGHTNESS	0x7F
-#define FLAG_OFFT_SHIFT 8
-
-/*
- * LEDs subdevice platform data
- */
-
-#define ADP8870_LED_DIS_BLINK	(0 << FLAG_OFFT_SHIFT)
-#define ADP8870_LED_OFFT_600ms	(1 << FLAG_OFFT_SHIFT)
-#define ADP8870_LED_OFFT_1200ms	(2 << FLAG_OFFT_SHIFT)
-#define ADP8870_LED_OFFT_1800ms	(3 << FLAG_OFFT_SHIFT)
-
-#define ADP8870_LED_ONT_200ms	0
-#define ADP8870_LED_ONT_600ms	1
-#define ADP8870_LED_ONT_800ms	2
-#define ADP8870_LED_ONT_1200ms	3
-
-#define ADP8870_LED_D7		(7)
-#define ADP8870_LED_D6		(6)
-#define ADP8870_LED_D5		(5)
-#define ADP8870_LED_D4		(4)
-#define ADP8870_LED_D3		(3)
-#define ADP8870_LED_D2		(2)
-#define ADP8870_LED_D1		(1)
-
-/*
- * Backlight subdevice platform data
- */
-
-#define ADP8870_BL_D7		(1 << 6)
-#define ADP8870_BL_D6		(1 << 5)
-#define ADP8870_BL_D5		(1 << 4)
-#define ADP8870_BL_D4		(1 << 3)
-#define ADP8870_BL_D3		(1 << 2)
-#define ADP8870_BL_D2		(1 << 1)
-#define ADP8870_BL_D1		(1 << 0)
-
-#define ADP8870_FADE_T_DIS	0	/* Fade Timer Disabled */
-#define ADP8870_FADE_T_300ms	1	/* 0.3 Sec */
-#define ADP8870_FADE_T_600ms	2
-#define ADP8870_FADE_T_900ms	3
-#define ADP8870_FADE_T_1200ms	4
-#define ADP8870_FADE_T_1500ms	5
-#define ADP8870_FADE_T_1800ms	6
-#define ADP8870_FADE_T_2100ms	7
-#define ADP8870_FADE_T_2400ms	8
-#define ADP8870_FADE_T_2700ms	9
-#define ADP8870_FADE_T_3000ms	10
-#define ADP8870_FADE_T_3500ms	11
-#define ADP8870_FADE_T_4000ms	12
-#define ADP8870_FADE_T_4500ms	13
-#define ADP8870_FADE_T_5000ms	14
-#define ADP8870_FADE_T_5500ms	15	/* 5.5 Sec */
-
-#define ADP8870_FADE_LAW_LINEAR	0
-#define ADP8870_FADE_LAW_SQUARE	1
-#define ADP8870_FADE_LAW_CUBIC1	2
-#define ADP8870_FADE_LAW_CUBIC2	3
-
-#define ADP8870_BL_AMBL_FILT_80ms	0	/* Light sensor filter time */
-#define ADP8870_BL_AMBL_FILT_160ms	1
-#define ADP8870_BL_AMBL_FILT_320ms	2
-#define ADP8870_BL_AMBL_FILT_640ms	3
-#define ADP8870_BL_AMBL_FILT_1280ms	4
-#define ADP8870_BL_AMBL_FILT_2560ms	5
-#define ADP8870_BL_AMBL_FILT_5120ms	6
-#define ADP8870_BL_AMBL_FILT_10240ms	7	/* 10.24 sec */
-
-/*
- * Blacklight current 0..30mA
- */
-#define ADP8870_BL_CUR_mA(I)		((I * 127) / 30)
-
-/*
- * L2 comparator current 0..1106uA
- */
-#define ADP8870_L2_COMP_CURR_uA(I)	((I * 255) / 1106)
-
-/*
- * L3 comparator current 0..551uA
- */
-#define ADP8870_L3_COMP_CURR_uA(I)	((I * 255) / 551)
-
-/*
- * L4 comparator current 0..275uA
- */
-#define ADP8870_L4_COMP_CURR_uA(I)	((I * 255) / 275)
-
-/*
- * L5 comparator current 0..138uA
- */
-#define ADP8870_L5_COMP_CURR_uA(I)	((I * 255) / 138)
-
-struct adp8870_backlight_platform_data {
-	u8 bl_led_assign;	/* 1 = Backlight 0 = Individual LED */
-	u8 pwm_assign;		/* 1 = Enables PWM mode */
-
-	u8 bl_fade_in;		/* Backlight Fade-In Timer */
-	u8 bl_fade_out;		/* Backlight Fade-Out Timer */
-	u8 bl_fade_law;		/* fade-on/fade-off transfer characteristic */
-
-	u8 en_ambl_sens;	/* 1 = enable ambient light sensor */
-	u8 abml_filt;		/* Light sensor filter time */
-
-	u8 l1_daylight_max;	/* use BL_CUR_mA(I) 0 <= I <= 30 mA */
-	u8 l1_daylight_dim;	/* typ = 0, use BL_CUR_mA(I) 0 <= I <= 30 mA */
-	u8 l2_bright_max;	/* use BL_CUR_mA(I) 0 <= I <= 30 mA */
-	u8 l2_bright_dim;	/* typ = 0, use BL_CUR_mA(I) 0 <= I <= 30 mA */
-	u8 l3_office_max;	/* use BL_CUR_mA(I) 0 <= I <= 30 mA */
-	u8 l3_office_dim;	/* typ = 0, use BL_CUR_mA(I) 0 <= I <= 30 mA */
-	u8 l4_indoor_max;	/* use BL_CUR_mA(I) 0 <= I <= 30 mA */
-	u8 l4_indor_dim;	/* typ = 0, use BL_CUR_mA(I) 0 <= I <= 30 mA */
-	u8 l5_dark_max;		/* use BL_CUR_mA(I) 0 <= I <= 30 mA */
-	u8 l5_dark_dim;		/* typ = 0, use BL_CUR_mA(I) 0 <= I <= 30 mA */
-
-	u8 l2_trip;		/* use L2_COMP_CURR_uA(I) 0 <= I <= 1106 uA */
-	u8 l2_hyst;		/* use L2_COMP_CURR_uA(I) 0 <= I <= 1106 uA */
-	u8 l3_trip;		/* use L3_COMP_CURR_uA(I) 0 <= I <= 551 uA */
-	u8 l3_hyst;		/* use L3_COMP_CURR_uA(I) 0 <= I <= 551 uA */
-	u8 l4_trip;		/* use L4_COMP_CURR_uA(I) 0 <= I <= 275 uA */
-	u8 l4_hyst;		/* use L4_COMP_CURR_uA(I) 0 <= I <= 275 uA */
-	u8 l5_trip;		/* use L5_COMP_CURR_uA(I) 0 <= I <= 138 uA */
-	u8 l5_hyst;		/* use L6_COMP_CURR_uA(I) 0 <= I <= 138 uA */
-
-	/**
-	 * Independent Current Sinks / LEDS
-	 * Sinks not assigned to the Backlight can be exposed to
-	 * user space using the LEDS CLASS interface
-	 */
-
-	int num_leds;
-	struct led_info	*leds;
-	u8 led_fade_in;		/* LED Fade-In Timer */
-	u8 led_fade_out;	/* LED Fade-Out Timer */
-	u8 led_fade_law;	/* fade-on/fade-off transfer characteristic */
-	u8 led_on_time;
-};
-
-#endif /* __LINUX_I2C_ADP8870_H */
diff --git a/include/linux/platform_data/adp8870.h b/include/linux/platform_data/adp8870.h
new file mode 100644
index 000000000000..624dceccbd5b
--- /dev/null
+++ b/include/linux/platform_data/adp8870.h
@@ -0,0 +1,153 @@
+/*
+ * Definitions and platform data for Analog Devices
+ * Backlight drivers ADP8870
+ *
+ * Copyright 2009-2010 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#ifndef __LINUX_I2C_ADP8870_H
+#define __LINUX_I2C_ADP8870_H
+
+#define ID_ADP8870		8870
+
+#define ADP8870_MAX_BRIGHTNESS	0x7F
+#define FLAG_OFFT_SHIFT 8
+
+/*
+ * LEDs subdevice platform data
+ */
+
+#define ADP8870_LED_DIS_BLINK	(0 << FLAG_OFFT_SHIFT)
+#define ADP8870_LED_OFFT_600ms	(1 << FLAG_OFFT_SHIFT)
+#define ADP8870_LED_OFFT_1200ms	(2 << FLAG_OFFT_SHIFT)
+#define ADP8870_LED_OFFT_1800ms	(3 << FLAG_OFFT_SHIFT)
+
+#define ADP8870_LED_ONT_200ms	0
+#define ADP8870_LED_ONT_600ms	1
+#define ADP8870_LED_ONT_800ms	2
+#define ADP8870_LED_ONT_1200ms	3
+
+#define ADP8870_LED_D7		(7)
+#define ADP8870_LED_D6		(6)
+#define ADP8870_LED_D5		(5)
+#define ADP8870_LED_D4		(4)
+#define ADP8870_LED_D3		(3)
+#define ADP8870_LED_D2		(2)
+#define ADP8870_LED_D1		(1)
+
+/*
+ * Backlight subdevice platform data
+ */
+
+#define ADP8870_BL_D7		(1 << 6)
+#define ADP8870_BL_D6		(1 << 5)
+#define ADP8870_BL_D5		(1 << 4)
+#define ADP8870_BL_D4		(1 << 3)
+#define ADP8870_BL_D3		(1 << 2)
+#define ADP8870_BL_D2		(1 << 1)
+#define ADP8870_BL_D1		(1 << 0)
+
+#define ADP8870_FADE_T_DIS	0	/* Fade Timer Disabled */
+#define ADP8870_FADE_T_300ms	1	/* 0.3 Sec */
+#define ADP8870_FADE_T_600ms	2
+#define ADP8870_FADE_T_900ms	3
+#define ADP8870_FADE_T_1200ms	4
+#define ADP8870_FADE_T_1500ms	5
+#define ADP8870_FADE_T_1800ms	6
+#define ADP8870_FADE_T_2100ms	7
+#define ADP8870_FADE_T_2400ms	8
+#define ADP8870_FADE_T_2700ms	9
+#define ADP8870_FADE_T_3000ms	10
+#define ADP8870_FADE_T_3500ms	11
+#define ADP8870_FADE_T_4000ms	12
+#define ADP8870_FADE_T_4500ms	13
+#define ADP8870_FADE_T_5000ms	14
+#define ADP8870_FADE_T_5500ms	15	/* 5.5 Sec */
+
+#define ADP8870_FADE_LAW_LINEAR	0
+#define ADP8870_FADE_LAW_SQUARE	1
+#define ADP8870_FADE_LAW_CUBIC1	2
+#define ADP8870_FADE_LAW_CUBIC2	3
+
+#define ADP8870_BL_AMBL_FILT_80ms	0	/* Light sensor filter time */
+#define ADP8870_BL_AMBL_FILT_160ms	1
+#define ADP8870_BL_AMBL_FILT_320ms	2
+#define ADP8870_BL_AMBL_FILT_640ms	3
+#define ADP8870_BL_AMBL_FILT_1280ms	4
+#define ADP8870_BL_AMBL_FILT_2560ms	5
+#define ADP8870_BL_AMBL_FILT_5120ms	6
+#define ADP8870_BL_AMBL_FILT_10240ms	7	/* 10.24 sec */
+
+/*
+ * Blacklight current 0..30mA
+ */
+#define ADP8870_BL_CUR_mA(I)		((I * 127) / 30)
+
+/*
+ * L2 comparator current 0..1106uA
+ */
+#define ADP8870_L2_COMP_CURR_uA(I)	((I * 255) / 1106)
+
+/*
+ * L3 comparator current 0..551uA
+ */
+#define ADP8870_L3_COMP_CURR_uA(I)	((I * 255) / 551)
+
+/*
+ * L4 comparator current 0..275uA
+ */
+#define ADP8870_L4_COMP_CURR_uA(I)	((I * 255) / 275)
+
+/*
+ * L5 comparator current 0..138uA
+ */
+#define ADP8870_L5_COMP_CURR_uA(I)	((I * 255) / 138)
+
+struct adp8870_backlight_platform_data {
+	u8 bl_led_assign;	/* 1 = Backlight 0 = Individual LED */
+	u8 pwm_assign;		/* 1 = Enables PWM mode */
+
+	u8 bl_fade_in;		/* Backlight Fade-In Timer */
+	u8 bl_fade_out;		/* Backlight Fade-Out Timer */
+	u8 bl_fade_law;		/* fade-on/fade-off transfer characteristic */
+
+	u8 en_ambl_sens;	/* 1 = enable ambient light sensor */
+	u8 abml_filt;		/* Light sensor filter time */
+
+	u8 l1_daylight_max;	/* use BL_CUR_mA(I) 0 <= I <= 30 mA */
+	u8 l1_daylight_dim;	/* typ = 0, use BL_CUR_mA(I) 0 <= I <= 30 mA */
+	u8 l2_bright_max;	/* use BL_CUR_mA(I) 0 <= I <= 30 mA */
+	u8 l2_bright_dim;	/* typ = 0, use BL_CUR_mA(I) 0 <= I <= 30 mA */
+	u8 l3_office_max;	/* use BL_CUR_mA(I) 0 <= I <= 30 mA */
+	u8 l3_office_dim;	/* typ = 0, use BL_CUR_mA(I) 0 <= I <= 30 mA */
+	u8 l4_indoor_max;	/* use BL_CUR_mA(I) 0 <= I <= 30 mA */
+	u8 l4_indor_dim;	/* typ = 0, use BL_CUR_mA(I) 0 <= I <= 30 mA */
+	u8 l5_dark_max;		/* use BL_CUR_mA(I) 0 <= I <= 30 mA */
+	u8 l5_dark_dim;		/* typ = 0, use BL_CUR_mA(I) 0 <= I <= 30 mA */
+
+	u8 l2_trip;		/* use L2_COMP_CURR_uA(I) 0 <= I <= 1106 uA */
+	u8 l2_hyst;		/* use L2_COMP_CURR_uA(I) 0 <= I <= 1106 uA */
+	u8 l3_trip;		/* use L3_COMP_CURR_uA(I) 0 <= I <= 551 uA */
+	u8 l3_hyst;		/* use L3_COMP_CURR_uA(I) 0 <= I <= 551 uA */
+	u8 l4_trip;		/* use L4_COMP_CURR_uA(I) 0 <= I <= 275 uA */
+	u8 l4_hyst;		/* use L4_COMP_CURR_uA(I) 0 <= I <= 275 uA */
+	u8 l5_trip;		/* use L5_COMP_CURR_uA(I) 0 <= I <= 138 uA */
+	u8 l5_hyst;		/* use L6_COMP_CURR_uA(I) 0 <= I <= 138 uA */
+
+	/**
+	 * Independent Current Sinks / LEDS
+	 * Sinks not assigned to the Backlight can be exposed to
+	 * user space using the LEDS CLASS interface
+	 */
+
+	int num_leds;
+	struct led_info	*leds;
+	u8 led_fade_in;		/* LED Fade-In Timer */
+	u8 led_fade_out;	/* LED Fade-Out Timer */
+	u8 led_fade_law;	/* fade-on/fade-off transfer characteristic */
+	u8 led_on_time;
+};
+
+#endif /* __LINUX_I2C_ADP8870_H */
-- 
cgit v1.2.3


From 87354e5de04fe727227ff619af164202adcfa4d4 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Thu, 6 Jul 2017 07:02:21 -0400
Subject: buffer: set errors in mapping at the time that the error occurs

I noticed on xfs that I could still sometimes get back an error on fsync
on a fd that was opened after the error condition had been cleared.

The problem is that the buffer code sets the write_io_error flag and
then later checks that flag to set the error in the mapping. That flag
perisists for quite a while however. If the file is later opened with
O_TRUNC, the buffers will then be invalidated and the mapping's error
set such that a subsequent fsync will return error. I think this is
incorrect, as there was no writeback between the open and fsync.

Add a new mark_buffer_write_io_error operation that sets the flag and
the error in the mapping at the same time. Replace all calls to
set_buffer_write_io_error with mark_buffer_write_io_error, and remove
the places that check this flag in order to set the error in the
mapping.

This sets the error in the mapping earlier, at the time that it's first
detected.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
---
 fs/buffer.c                 | 20 +++++++++++++-------
 fs/gfs2/lops.c              |  2 +-
 include/linux/buffer_head.h |  1 +
 3 files changed, 15 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/fs/buffer.c b/fs/buffer.c
index 4be8b914a222..b946149e8214 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -178,7 +178,7 @@ void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
 		set_buffer_uptodate(bh);
 	} else {
 		buffer_io_error(bh, ", lost sync page write");
-		set_buffer_write_io_error(bh);
+		mark_buffer_write_io_error(bh);
 		clear_buffer_uptodate(bh);
 	}
 	unlock_buffer(bh);
@@ -352,8 +352,7 @@ void end_buffer_async_write(struct buffer_head *bh, int uptodate)
 		set_buffer_uptodate(bh);
 	} else {
 		buffer_io_error(bh, ", lost async page write");
-		mapping_set_error(page->mapping, -EIO);
-		set_buffer_write_io_error(bh);
+		mark_buffer_write_io_error(bh);
 		clear_buffer_uptodate(bh);
 		SetPageError(page);
 	}
@@ -481,8 +480,6 @@ static void __remove_assoc_queue(struct buffer_head *bh)
 {
 	list_del_init(&bh->b_assoc_buffers);
 	WARN_ON(!bh->b_assoc_map);
-	if (buffer_write_io_error(bh))
-		mapping_set_error(bh->b_assoc_map, -EIO);
 	bh->b_assoc_map = NULL;
 }
 
@@ -1181,6 +1178,17 @@ void mark_buffer_dirty(struct buffer_head *bh)
 }
 EXPORT_SYMBOL(mark_buffer_dirty);
 
+void mark_buffer_write_io_error(struct buffer_head *bh)
+{
+	set_buffer_write_io_error(bh);
+	/* FIXME: do we need to set this in both places? */
+	if (bh->b_page && bh->b_page->mapping)
+		mapping_set_error(bh->b_page->mapping, -EIO);
+	if (bh->b_assoc_map)
+		mapping_set_error(bh->b_assoc_map, -EIO);
+}
+EXPORT_SYMBOL(mark_buffer_write_io_error);
+
 /*
  * Decrement a buffer_head's reference count.  If all buffers against a page
  * have zero reference count, are clean and unlocked, and if the page is clean
@@ -3279,8 +3287,6 @@ drop_buffers(struct page *page, struct buffer_head **buffers_to_free)
 
 	bh = head;
 	do {
-		if (buffer_write_io_error(bh) && page->mapping)
-			mapping_set_error(page->mapping, -EIO);
 		if (buffer_busy(bh))
 			goto failed;
 		bh = bh->b_this_page;
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index b1f9144b42c7..cd7857ab1a6a 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -182,7 +182,7 @@ static void gfs2_end_log_write_bh(struct gfs2_sbd *sdp, struct bio_vec *bvec,
 		bh = bh->b_this_page;
 	do {
 		if (error)
-			set_buffer_write_io_error(bh);
+			mark_buffer_write_io_error(bh);
 		unlock_buffer(bh);
 		next = bh->b_this_page;
 		size -= bh->b_size;
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index bd029e52ef5e..e0abeba3ced7 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -149,6 +149,7 @@ void buffer_check_dirty_writeback(struct page *page,
  */
 
 void mark_buffer_dirty(struct buffer_head *bh);
+void mark_buffer_write_io_error(struct buffer_head *bh);
 void init_buffer(struct buffer_head *, bh_end_io_t *, void *);
 void touch_buffer(struct buffer_head *bh);
 void set_bh_page(struct buffer_head *bh,
-- 
cgit v1.2.3


From 76341cabbdad65c10a4162e9dfa82a6342afc02f Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Thu, 6 Jul 2017 07:02:22 -0400
Subject: jbd2: don't clear and reset errors after waiting on writeback

Resetting this flag is almost certainly racy, and will be problematic
with some coming changes.

Make filemap_fdatawait_keep_errors return int, but not clear the flag(s).
Have jbd2 call it instead of filemap_fdatawait and don't attempt to
re-set the error flag if it fails.

Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
---
 fs/jbd2/commit.c   | 16 ++++------------
 include/linux/fs.h |  2 +-
 mm/filemap.c       | 16 ++++++++++++++--
 3 files changed, 19 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index b6b194ec1b4f..3c1c31321d9b 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -263,18 +263,10 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
 			continue;
 		jinode->i_flags |= JI_COMMIT_RUNNING;
 		spin_unlock(&journal->j_list_lock);
-		err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping);
-		if (err) {
-			/*
-			 * Because AS_EIO is cleared by
-			 * filemap_fdatawait_range(), set it again so
-			 * that user process can get -EIO from fsync().
-			 */
-			mapping_set_error(jinode->i_vfs_inode->i_mapping, -EIO);
-
-			if (!ret)
-				ret = err;
-		}
+		err = filemap_fdatawait_keep_errors(
+				jinode->i_vfs_inode->i_mapping);
+		if (!ret)
+			ret = err;
 		spin_lock(&journal->j_list_lock);
 		jinode->i_flags &= ~JI_COMMIT_RUNNING;
 		smp_mb();
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 803e5a9b2654..8ac8df1b3550 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2514,7 +2514,7 @@ extern int write_inode_now(struct inode *, int);
 extern int filemap_fdatawrite(struct address_space *);
 extern int filemap_flush(struct address_space *);
 extern int filemap_fdatawait(struct address_space *);
-extern void filemap_fdatawait_keep_errors(struct address_space *);
+extern int filemap_fdatawait_keep_errors(struct address_space *mapping);
 extern int filemap_fdatawait_range(struct address_space *, loff_t lstart,
 				   loff_t lend);
 extern int filemap_write_and_wait(struct address_space *mapping);
diff --git a/mm/filemap.c b/mm/filemap.c
index 6f1be573a5e6..e5711b2728f4 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -309,6 +309,16 @@ int filemap_check_errors(struct address_space *mapping)
 }
 EXPORT_SYMBOL(filemap_check_errors);
 
+static int filemap_check_and_keep_errors(struct address_space *mapping)
+{
+	/* Check for outstanding write errors */
+	if (test_bit(AS_EIO, &mapping->flags))
+		return -EIO;
+	if (test_bit(AS_ENOSPC, &mapping->flags))
+		return -ENOSPC;
+	return 0;
+}
+
 /**
  * __filemap_fdatawrite_range - start writeback on mapping dirty pages in range
  * @mapping:	address space structure to write
@@ -453,15 +463,17 @@ EXPORT_SYMBOL(filemap_fdatawait_range);
  * call sites are system-wide / filesystem-wide data flushers: e.g. sync(2),
  * fsfreeze(8)
  */
-void filemap_fdatawait_keep_errors(struct address_space *mapping)
+int filemap_fdatawait_keep_errors(struct address_space *mapping)
 {
 	loff_t i_size = i_size_read(mapping->host);
 
 	if (i_size == 0)
-		return;
+		return 0;
 
 	__filemap_fdatawait_range(mapping, 0, i_size - 1);
+	return filemap_check_and_keep_errors(mapping);
 }
+EXPORT_SYMBOL(filemap_fdatawait_keep_errors);
 
 /**
  * filemap_fdatawait - wait for all under-writeback pages to complete
-- 
cgit v1.2.3


From 84cbadadc6eafc4798513773a2c8fce37dcd2fb8 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Thu, 6 Jul 2017 07:02:24 -0400
Subject: lib: add errseq_t type and infrastructure for handling it

An errseq_t is a way of recording errors in one place, and allowing any
number of "subscribers" to tell whether an error has been set again
since a previous time.

It's implemented as an unsigned 32-bit value that is managed with atomic
operations. The low order bits are designated to hold an error code
(max size of MAX_ERRNO). The upper bits are used as a counter.

The API works with consumers sampling an errseq_t value at a particular
point in time. Later, that value can be used to tell whether new errors
have been set since that time.

Note that there is a 1 in 512k risk of collisions here if new errors
are being recorded frequently, since we have so few bits to use as a
counter. To mitigate this, one bit is used as a flag to tell whether the
value has been sampled since a new value was recorded. That allows
us to avoid bumping the counter if no one has sampled it since it
was last bumped.

Later patches will build on this infrastructure to change how writeback
errors are tracked in the kernel.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Reviewed-by: NeilBrown <neilb@suse.com>
Reviewed-by: Jan Kara <jack@suse.cz>
---
 MAINTAINERS            |   6 ++
 include/linux/errseq.h |  19 +++++
 lib/Makefile           |   2 +-
 lib/errseq.c           | 208 +++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 234 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/errseq.h
 create mode 100644 lib/errseq.c

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index 9e984645c4b0..c2465dc21946 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4999,6 +4999,12 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kristoffer/linux-hpc.git
 F:	drivers/video/fbdev/s1d13xxxfb.c
 F:	include/video/s1d13xxxfb.h
 
+ERRSEQ ERROR TRACKING INFRASTRUCTURE
+M:	Jeff Layton <jlayton@poochiereds.net>
+S:	Maintained
+F:	lib/errseq.c
+F:	include/linux/errseq.h
+
 ET131X NETWORK DRIVER
 M:	Mark Einon <mark.einon@gmail.com>
 S:	Odd Fixes
diff --git a/include/linux/errseq.h b/include/linux/errseq.h
new file mode 100644
index 000000000000..9e0d444ac88d
--- /dev/null
+++ b/include/linux/errseq.h
@@ -0,0 +1,19 @@
+#ifndef _LINUX_ERRSEQ_H
+#define _LINUX_ERRSEQ_H
+
+/* See lib/errseq.c for more info */
+
+typedef u32	errseq_t;
+
+errseq_t __errseq_set(errseq_t *eseq, int err);
+static inline void errseq_set(errseq_t *eseq, int err)
+{
+	/* Optimize for the common case of no error */
+	if (unlikely(err))
+		__errseq_set(eseq, err);
+}
+
+errseq_t errseq_sample(errseq_t *eseq);
+int errseq_check(errseq_t *eseq, errseq_t since);
+int errseq_check_and_advance(errseq_t *eseq, errseq_t *since);
+#endif
diff --git a/lib/Makefile b/lib/Makefile
index 0166fbc0fa81..519782d9ca3f 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -41,7 +41,7 @@ obj-y += bcd.o div64.o sort.o parser.o debug_locks.o random32.o \
 	 gcd.o lcm.o list_sort.o uuid.o flex_array.o iov_iter.o clz_ctz.o \
 	 bsearch.o find_bit.o llist.o memweight.o kfifo.o \
 	 percpu-refcount.o percpu_ida.o rhashtable.o reciprocal_div.o \
-	 once.o refcount.o usercopy.o
+	 once.o refcount.o usercopy.o errseq.o
 obj-y += string_helpers.o
 obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o
 obj-y += hexdump.o
diff --git a/lib/errseq.c b/lib/errseq.c
new file mode 100644
index 000000000000..841fa24e6e00
--- /dev/null
+++ b/lib/errseq.c
@@ -0,0 +1,208 @@
+#include <linux/err.h>
+#include <linux/bug.h>
+#include <linux/atomic.h>
+#include <linux/errseq.h>
+
+/*
+ * An errseq_t is a way of recording errors in one place, and allowing any
+ * number of "subscribers" to tell whether it has changed since a previous
+ * point where it was sampled.
+ *
+ * It's implemented as an unsigned 32-bit value. The low order bits are
+ * designated to hold an error code (between 0 and -MAX_ERRNO). The upper bits
+ * are used as a counter. This is done with atomics instead of locking so that
+ * these functions can be called from any context.
+ *
+ * The general idea is for consumers to sample an errseq_t value. That value
+ * can later be used to tell whether any new errors have occurred since that
+ * sampling was done.
+ *
+ * Note that there is a risk of collisions if new errors are being recorded
+ * frequently, since we have so few bits to use as a counter.
+ *
+ * To mitigate this, one bit is used as a flag to tell whether the value has
+ * been sampled since a new value was recorded. That allows us to avoid bumping
+ * the counter if no one has sampled it since the last time an error was
+ * recorded.
+ *
+ * A new errseq_t should always be zeroed out.  A errseq_t value of all zeroes
+ * is the special (but common) case where there has never been an error. An all
+ * zero value thus serves as the "epoch" if one wishes to know whether there
+ * has ever been an error set since it was first initialized.
+ */
+
+/* The low bits are designated for error code (max of MAX_ERRNO) */
+#define ERRSEQ_SHIFT		ilog2(MAX_ERRNO + 1)
+
+/* This bit is used as a flag to indicate whether the value has been seen */
+#define ERRSEQ_SEEN		(1 << ERRSEQ_SHIFT)
+
+/* The lowest bit of the counter */
+#define ERRSEQ_CTR_INC		(1 << (ERRSEQ_SHIFT + 1))
+
+/**
+ * __errseq_set - set a errseq_t for later reporting
+ * @eseq: errseq_t field that should be set
+ * @err: error to set
+ *
+ * This function sets the error in *eseq, and increments the sequence counter
+ * if the last sequence was sampled at some point in the past.
+ *
+ * Any error set will always overwrite an existing error.
+ *
+ * Most callers will want to use the errseq_set inline wrapper to efficiently
+ * handle the common case where err is 0.
+ *
+ * We do return an errseq_t here, primarily for debugging purposes. The return
+ * value should not be used as a previously sampled value in later calls as it
+ * will not have the SEEN flag set.
+ */
+errseq_t __errseq_set(errseq_t *eseq, int err)
+{
+	errseq_t cur, old;
+
+	/* MAX_ERRNO must be able to serve as a mask */
+	BUILD_BUG_ON_NOT_POWER_OF_2(MAX_ERRNO + 1);
+
+	/*
+	 * Ensure the error code actually fits where we want it to go. If it
+	 * doesn't then just throw a warning and don't record anything. We
+	 * also don't accept zero here as that would effectively clear a
+	 * previous error.
+	 */
+	old = READ_ONCE(*eseq);
+
+	if (WARN(unlikely(err == 0 || (unsigned int)-err > MAX_ERRNO),
+				"err = %d\n", err))
+		return old;
+
+	for (;;) {
+		errseq_t new;
+
+		/* Clear out error bits and set new error */
+		new = (old & ~(MAX_ERRNO|ERRSEQ_SEEN)) | -err;
+
+		/* Only increment if someone has looked at it */
+		if (old & ERRSEQ_SEEN)
+			new += ERRSEQ_CTR_INC;
+
+		/* If there would be no change, then call it done */
+		if (new == old) {
+			cur = new;
+			break;
+		}
+
+		/* Try to swap the new value into place */
+		cur = cmpxchg(eseq, old, new);
+
+		/*
+		 * Call it success if we did the swap or someone else beat us
+		 * to it for the same value.
+		 */
+		if (likely(cur == old || cur == new))
+			break;
+
+		/* Raced with an update, try again */
+		old = cur;
+	}
+	return cur;
+}
+EXPORT_SYMBOL(__errseq_set);
+
+/**
+ * errseq_sample - grab current errseq_t value
+ * @eseq: pointer to errseq_t to be sampled
+ *
+ * This function allows callers to sample an errseq_t value, marking it as
+ * "seen" if required.
+ */
+errseq_t errseq_sample(errseq_t *eseq)
+{
+	errseq_t old = READ_ONCE(*eseq);
+	errseq_t new = old;
+
+	/*
+	 * For the common case of no errors ever having been set, we can skip
+	 * marking the SEEN bit. Once an error has been set, the value will
+	 * never go back to zero.
+	 */
+	if (old != 0) {
+		new |= ERRSEQ_SEEN;
+		if (old != new)
+			cmpxchg(eseq, old, new);
+	}
+	return new;
+}
+EXPORT_SYMBOL(errseq_sample);
+
+/**
+ * errseq_check - has an error occurred since a particular sample point?
+ * @eseq: pointer to errseq_t value to be checked
+ * @since: previously-sampled errseq_t from which to check
+ *
+ * Grab the value that eseq points to, and see if it has changed "since"
+ * the given value was sampled. The "since" value is not advanced, so there
+ * is no need to mark the value as seen.
+ *
+ * Returns the latest error set in the errseq_t or 0 if it hasn't changed.
+ */
+int errseq_check(errseq_t *eseq, errseq_t since)
+{
+	errseq_t cur = READ_ONCE(*eseq);
+
+	if (likely(cur == since))
+		return 0;
+	return -(cur & MAX_ERRNO);
+}
+EXPORT_SYMBOL(errseq_check);
+
+/**
+ * errseq_check_and_advance - check an errseq_t and advance to current value
+ * @eseq: pointer to value being checked and reported
+ * @since: pointer to previously-sampled errseq_t to check against and advance
+ *
+ * Grab the eseq value, and see whether it matches the value that "since"
+ * points to. If it does, then just return 0.
+ *
+ * If it doesn't, then the value has changed. Set the "seen" flag, and try to
+ * swap it into place as the new eseq value. Then, set that value as the new
+ * "since" value, and return whatever the error portion is set to.
+ *
+ * Note that no locking is provided here for concurrent updates to the "since"
+ * value. The caller must provide that if necessary. Because of this, callers
+ * may want to do a lockless errseq_check before taking the lock and calling
+ * this.
+ */
+int errseq_check_and_advance(errseq_t *eseq, errseq_t *since)
+{
+	int err = 0;
+	errseq_t old, new;
+
+	/*
+	 * Most callers will want to use the inline wrapper to check this,
+	 * so that the common case of no error is handled without needing
+	 * to take the lock that protects the "since" value.
+	 */
+	old = READ_ONCE(*eseq);
+	if (old != *since) {
+		/*
+		 * Set the flag and try to swap it into place if it has
+		 * changed.
+		 *
+		 * We don't care about the outcome of the swap here. If the
+		 * swap doesn't occur, then it has either been updated by a
+		 * writer who is altering the value in some way (updating
+		 * counter or resetting the error), or another reader who is
+		 * just setting the "seen" flag. Either outcome is OK, and we
+		 * can advance "since" and return an error based on what we
+		 * have.
+		 */
+		new = old | ERRSEQ_SEEN;
+		if (new != old)
+			cmpxchg(eseq, old, new);
+		*since = new;
+		err = -(new & MAX_ERRNO);
+	}
+	return err;
+}
+EXPORT_SYMBOL(errseq_check_and_advance);
-- 
cgit v1.2.3


From 5660e13d2fd6af1903d4b0b98020af95ca2d638a Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Thu, 6 Jul 2017 07:02:25 -0400
Subject: fs: new infrastructure for writeback error handling and reporting

Most filesystems currently use mapping_set_error and
filemap_check_errors for setting and reporting/clearing writeback errors
at the mapping level. filemap_check_errors is indirectly called from
most of the filemap_fdatawait_* functions and from
filemap_write_and_wait*. These functions are called from all sorts of
contexts to wait on writeback to finish -- e.g. mostly in fsync, but
also in truncate calls, getattr, etc.

The non-fsync callers are problematic. We should be reporting writeback
errors during fsync, but many places spread over the tree clear out
errors before they can be properly reported, or report errors at
nonsensical times.

If I get -EIO on a stat() call, there is no reason for me to assume that
it is because some previous writeback failed. The fact that it also
clears out the error such that a subsequent fsync returns 0 is a bug,
and a nasty one since that's potentially silent data corruption.

This patch adds a small bit of new infrastructure for setting and
reporting errors during address_space writeback. While the above was my
original impetus for adding this, I think it's also the case that
current fsync semantics are just problematic for userland. Most
applications that call fsync do so to ensure that the data they wrote
has hit the backing store.

In the case where there are multiple writers to the file at the same
time, this is really hard to determine. The first one to call fsync will
see any stored error, and the rest get back 0. The processes with open
fds may not be associated with one another in any way. They could even
be in different containers, so ensuring coordination between all fsync
callers is not really an option.

One way to remedy this would be to track what file descriptor was used
to dirty the file, but that's rather cumbersome and would likely be
slow. However, there is a simpler way to improve the semantics here
without incurring too much overhead.

This set adds an errseq_t to struct address_space, and a corresponding
one is added to struct file. Writeback errors are recorded in the
mapping's errseq_t, and the one in struct file is used as the "since"
value.

This changes the semantics of the Linux fsync implementation such that
applications can now use it to determine whether there were any
writeback errors since fsync(fd) was last called (or since the file was
opened in the case of fsync having never been called).

Note that those writeback errors may have occurred when writing data
that was dirtied via an entirely different fd, but that's the case now
with the current mapping_set_error/filemap_check_error infrastructure.
This will at least prevent you from getting a false report of success.

The new behavior is still consistent with the POSIX spec, and is more
reliable for application developers. This patch just adds some basic
infrastructure for doing this, and ensures that the f_wb_err "cursor"
is properly set when a file is opened. Later patches will change the
existing code to use this new infrastructure for reporting errors at
fsync time.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Reviewed-by: Jan Kara <jack@suse.cz>
---
 drivers/dax/device.c           |  1 +
 fs/block_dev.c                 |  1 +
 fs/file_table.c                |  1 +
 fs/open.c                      |  3 ++
 include/linux/fs.h             | 60 +++++++++++++++++++++++++++++-
 include/trace/events/filemap.h | 57 ++++++++++++++++++++++++++++
 mm/filemap.c                   | 84 ++++++++++++++++++++++++++++++++++++++++++
 7 files changed, 206 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/dax/device.c b/drivers/dax/device.c
index 006e657dfcb9..12943d19bfc4 100644
--- a/drivers/dax/device.c
+++ b/drivers/dax/device.c
@@ -499,6 +499,7 @@ static int dax_open(struct inode *inode, struct file *filp)
 	inode->i_mapping = __dax_inode->i_mapping;
 	inode->i_mapping->host = __dax_inode;
 	filp->f_mapping = inode->i_mapping;
+	filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping);
 	filp->private_data = dev_dax;
 	inode->i_flags = S_DAX;
 
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 519599dddd36..4d62fe771587 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1743,6 +1743,7 @@ static int blkdev_open(struct inode * inode, struct file * filp)
 		return -ENOMEM;
 
 	filp->f_mapping = bdev->bd_inode->i_mapping;
+	filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping);
 
 	return blkdev_get(bdev, filp->f_mode, filp);
 }
diff --git a/fs/file_table.c b/fs/file_table.c
index 954d510b765a..72e861a35a7f 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -168,6 +168,7 @@ struct file *alloc_file(const struct path *path, fmode_t mode,
 	file->f_path = *path;
 	file->f_inode = path->dentry->d_inode;
 	file->f_mapping = path->dentry->d_inode->i_mapping;
+	file->f_wb_err = filemap_sample_wb_err(file->f_mapping);
 	if ((mode & FMODE_READ) &&
 	     likely(fop->read || fop->read_iter))
 		mode |= FMODE_CAN_READ;
diff --git a/fs/open.c b/fs/open.c
index cd0c5be8d012..280d4a963791 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -707,6 +707,9 @@ static int do_dentry_open(struct file *f,
 	f->f_inode = inode;
 	f->f_mapping = inode->i_mapping;
 
+	/* Ensure that we skip any errors that predate opening of the file */
+	f->f_wb_err = filemap_sample_wb_err(f->f_mapping);
+
 	if (unlikely(f->f_flags & O_PATH)) {
 		f->f_mode = FMODE_PATH;
 		f->f_op = &empty_fops;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 8ac8df1b3550..78b5c2901712 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -30,7 +30,7 @@
 #include <linux/percpu-rwsem.h>
 #include <linux/workqueue.h>
 #include <linux/delayed_call.h>
-
+#include <linux/errseq.h>
 #include <asm/byteorder.h>
 #include <uapi/linux/fs.h>
 
@@ -392,6 +392,7 @@ struct address_space {
 	gfp_t			gfp_mask;	/* implicit gfp mask for allocations */
 	struct list_head	private_list;	/* ditto */
 	void			*private_data;	/* ditto */
+	errseq_t		wb_err;
 } __attribute__((aligned(sizeof(long))));
 	/*
 	 * On most architectures that alignment is already the case; but
@@ -868,6 +869,7 @@ struct file {
 	struct list_head	f_tfile_llink;
 #endif /* #ifdef CONFIG_EPOLL */
 	struct address_space	*f_mapping;
+	errseq_t		f_wb_err;
 } __attribute__((aligned(4)));	/* lest something weird decides that 2 is OK */
 
 struct file_handle {
@@ -2526,6 +2528,62 @@ extern int filemap_fdatawrite_range(struct address_space *mapping,
 				loff_t start, loff_t end);
 extern int filemap_check_errors(struct address_space *mapping);
 
+extern void __filemap_set_wb_err(struct address_space *mapping, int err);
+extern int __must_check file_check_and_advance_wb_err(struct file *file);
+extern int __must_check file_write_and_wait_range(struct file *file,
+						loff_t start, loff_t end);
+
+/**
+ * filemap_set_wb_err - set a writeback error on an address_space
+ * @mapping: mapping in which to set writeback error
+ * @err: error to be set in mapping
+ *
+ * When writeback fails in some way, we must record that error so that
+ * userspace can be informed when fsync and the like are called.  We endeavor
+ * to report errors on any file that was open at the time of the error.  Some
+ * internal callers also need to know when writeback errors have occurred.
+ *
+ * When a writeback error occurs, most filesystems will want to call
+ * filemap_set_wb_err to record the error in the mapping so that it will be
+ * automatically reported whenever fsync is called on the file.
+ *
+ * FIXME: mention FS_* flag here?
+ */
+static inline void filemap_set_wb_err(struct address_space *mapping, int err)
+{
+	/* Fastpath for common case of no error */
+	if (unlikely(err))
+		__filemap_set_wb_err(mapping, err);
+}
+
+/**
+ * filemap_check_wb_error - has an error occurred since the mark was sampled?
+ * @mapping: mapping to check for writeback errors
+ * @since: previously-sampled errseq_t
+ *
+ * Grab the errseq_t value from the mapping, and see if it has changed "since"
+ * the given value was sampled.
+ *
+ * If it has then report the latest error set, otherwise return 0.
+ */
+static inline int filemap_check_wb_err(struct address_space *mapping,
+					errseq_t since)
+{
+	return errseq_check(&mapping->wb_err, since);
+}
+
+/**
+ * filemap_sample_wb_err - sample the current errseq_t to test for later errors
+ * @mapping: mapping to be sampled
+ *
+ * Writeback errors are always reported relative to a particular sample point
+ * in the past. This function provides those sample points.
+ */
+static inline errseq_t filemap_sample_wb_err(struct address_space *mapping)
+{
+	return errseq_sample(&mapping->wb_err);
+}
+
 extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end,
 			   int datasync);
 extern int vfs_fsync(struct file *file, int datasync);
diff --git a/include/trace/events/filemap.h b/include/trace/events/filemap.h
index 42febb6bc1d5..ff91325b8123 100644
--- a/include/trace/events/filemap.h
+++ b/include/trace/events/filemap.h
@@ -10,6 +10,7 @@
 #include <linux/memcontrol.h>
 #include <linux/device.h>
 #include <linux/kdev_t.h>
+#include <linux/errseq.h>
 
 DECLARE_EVENT_CLASS(mm_filemap_op_page_cache,
 
@@ -52,6 +53,62 @@ DEFINE_EVENT(mm_filemap_op_page_cache, mm_filemap_add_to_page_cache,
 	TP_ARGS(page)
 	);
 
+TRACE_EVENT(filemap_set_wb_err,
+		TP_PROTO(struct address_space *mapping, errseq_t eseq),
+
+		TP_ARGS(mapping, eseq),
+
+		TP_STRUCT__entry(
+			__field(unsigned long, i_ino)
+			__field(dev_t, s_dev)
+			__field(errseq_t, errseq)
+		),
+
+		TP_fast_assign(
+			__entry->i_ino = mapping->host->i_ino;
+			__entry->errseq = eseq;
+			if (mapping->host->i_sb)
+				__entry->s_dev = mapping->host->i_sb->s_dev;
+			else
+				__entry->s_dev = mapping->host->i_rdev;
+		),
+
+		TP_printk("dev=%d:%d ino=0x%lx errseq=0x%x",
+			MAJOR(__entry->s_dev), MINOR(__entry->s_dev),
+			__entry->i_ino, __entry->errseq)
+);
+
+TRACE_EVENT(file_check_and_advance_wb_err,
+		TP_PROTO(struct file *file, errseq_t old),
+
+		TP_ARGS(file, old),
+
+		TP_STRUCT__entry(
+			__field(struct file *, file);
+			__field(unsigned long, i_ino)
+			__field(dev_t, s_dev)
+			__field(errseq_t, old)
+			__field(errseq_t, new)
+		),
+
+		TP_fast_assign(
+			__entry->file = file;
+			__entry->i_ino = file->f_mapping->host->i_ino;
+			if (file->f_mapping->host->i_sb)
+				__entry->s_dev =
+					file->f_mapping->host->i_sb->s_dev;
+			else
+				__entry->s_dev =
+					file->f_mapping->host->i_rdev;
+			__entry->old = old;
+			__entry->new = file->f_wb_err;
+		),
+
+		TP_printk("file=%p dev=%d:%d ino=0x%lx old=0x%x new=0x%x",
+			__entry->file, MAJOR(__entry->s_dev),
+			MINOR(__entry->s_dev), __entry->i_ino, __entry->old,
+			__entry->new)
+);
 #endif /* _TRACE_FILEMAP_H */
 
 /* This part must be outside protection */
diff --git a/mm/filemap.c b/mm/filemap.c
index eb99b5f23c61..d7a30aefee0d 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -553,6 +553,90 @@ int filemap_write_and_wait_range(struct address_space *mapping,
 }
 EXPORT_SYMBOL(filemap_write_and_wait_range);
 
+void __filemap_set_wb_err(struct address_space *mapping, int err)
+{
+	errseq_t eseq = __errseq_set(&mapping->wb_err, err);
+
+	trace_filemap_set_wb_err(mapping, eseq);
+}
+EXPORT_SYMBOL(__filemap_set_wb_err);
+
+/**
+ * file_check_and_advance_wb_err - report wb error (if any) that was previously
+ * 				   and advance wb_err to current one
+ * @file: struct file on which the error is being reported
+ *
+ * When userland calls fsync (or something like nfsd does the equivalent), we
+ * want to report any writeback errors that occurred since the last fsync (or
+ * since the file was opened if there haven't been any).
+ *
+ * Grab the wb_err from the mapping. If it matches what we have in the file,
+ * then just quickly return 0. The file is all caught up.
+ *
+ * If it doesn't match, then take the mapping value, set the "seen" flag in
+ * it and try to swap it into place. If it works, or another task beat us
+ * to it with the new value, then update the f_wb_err and return the error
+ * portion. The error at this point must be reported via proper channels
+ * (a'la fsync, or NFS COMMIT operation, etc.).
+ *
+ * While we handle mapping->wb_err with atomic operations, the f_wb_err
+ * value is protected by the f_lock since we must ensure that it reflects
+ * the latest value swapped in for this file descriptor.
+ */
+int file_check_and_advance_wb_err(struct file *file)
+{
+	int err = 0;
+	errseq_t old = READ_ONCE(file->f_wb_err);
+	struct address_space *mapping = file->f_mapping;
+
+	/* Locklessly handle the common case where nothing has changed */
+	if (errseq_check(&mapping->wb_err, old)) {
+		/* Something changed, must use slow path */
+		spin_lock(&file->f_lock);
+		old = file->f_wb_err;
+		err = errseq_check_and_advance(&mapping->wb_err,
+						&file->f_wb_err);
+		trace_file_check_and_advance_wb_err(file, old);
+		spin_unlock(&file->f_lock);
+	}
+	return err;
+}
+EXPORT_SYMBOL(file_check_and_advance_wb_err);
+
+/**
+ * file_write_and_wait_range - write out & wait on a file range
+ * @file:	file pointing to address_space with pages
+ * @lstart:	offset in bytes where the range starts
+ * @lend:	offset in bytes where the range ends (inclusive)
+ *
+ * Write out and wait upon file offsets lstart->lend, inclusive.
+ *
+ * Note that @lend is inclusive (describes the last byte to be written) so
+ * that this function can be used to write to the very end-of-file (end = -1).
+ *
+ * After writing out and waiting on the data, we check and advance the
+ * f_wb_err cursor to the latest value, and return any errors detected there.
+ */
+int file_write_and_wait_range(struct file *file, loff_t lstart, loff_t lend)
+{
+	int err = 0, err2;
+	struct address_space *mapping = file->f_mapping;
+
+	if ((!dax_mapping(mapping) && mapping->nrpages) ||
+	    (dax_mapping(mapping) && mapping->nrexceptional)) {
+		err = __filemap_fdatawrite_range(mapping, lstart, lend,
+						 WB_SYNC_ALL);
+		/* See comment of filemap_write_and_wait() */
+		if (err != -EIO)
+			__filemap_fdatawait_range(mapping, lstart, lend);
+	}
+	err2 = file_check_and_advance_wb_err(file);
+	if (!err)
+		err = err2;
+	return err;
+}
+EXPORT_SYMBOL(file_write_and_wait_range);
+
 /**
  * replace_page_cache_page - replace a pagecache page with a new one
  * @old:	page to be replaced
-- 
cgit v1.2.3


From 8ed1e46aaf1bec6a12f4c89637f2c3ef4c70f18e Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Thu, 6 Jul 2017 07:02:26 -0400
Subject: mm: set both AS_EIO/AS_ENOSPC and errseq_t in mapping_set_error

When a writeback error occurs, we want later callers to be able to pick
up that fact when they go to wait on that writeback to complete.
Traditionally, we've used AS_EIO/AS_ENOSPC flags to track that, but
that's problematic since only one "checker" will be informed when an
error occurs.

In later patches, we're going to want to convert many of these callers
to check for errors since a well-defined point in time. For now, ensure
that we can handle both sorts of checks by both setting errors in both
places when there is a writeback failure.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
---
 include/linux/pagemap.h | 31 +++++++++++++++++++++++++------
 1 file changed, 25 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 316a19f6b635..28acc94e0f81 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -28,14 +28,33 @@ enum mapping_flags {
 	AS_NO_WRITEBACK_TAGS = 5,
 };
 
+/**
+ * mapping_set_error - record a writeback error in the address_space
+ * @mapping - the mapping in which an error should be set
+ * @error - the error to set in the mapping
+ *
+ * When writeback fails in some way, we must record that error so that
+ * userspace can be informed when fsync and the like are called.  We endeavor
+ * to report errors on any file that was open at the time of the error.  Some
+ * internal callers also need to know when writeback errors have occurred.
+ *
+ * When a writeback error occurs, most filesystems will want to call
+ * mapping_set_error to record the error in the mapping so that it can be
+ * reported when the application calls fsync(2).
+ */
 static inline void mapping_set_error(struct address_space *mapping, int error)
 {
-	if (unlikely(error)) {
-		if (error == -ENOSPC)
-			set_bit(AS_ENOSPC, &mapping->flags);
-		else
-			set_bit(AS_EIO, &mapping->flags);
-	}
+	if (likely(!error))
+		return;
+
+	/* Record in wb_err for checkers using errseq_t based tracking */
+	filemap_set_wb_err(mapping, error);
+
+	/* Record it in flags for now, for legacy callers */
+	if (error == -ENOSPC)
+		set_bit(AS_ENOSPC, &mapping->flags);
+	else
+		set_bit(AS_EIO, &mapping->flags);
 }
 
 static inline void mapping_set_unevictable(struct address_space *mapping)
-- 
cgit v1.2.3


From 60934b200ddd62187b149a7f32cc0f160c08a7ed Mon Sep 17 00:00:00 2001
From: Serge Semin <fancer.lancer@gmail.com>
Date: Wed, 14 Dec 2016 02:49:13 +0300
Subject: NTB: Make link-state API being declared first

Since link operations are usually performed before memory window access
operations, it's logically better to declare link-related API before any
of MW/Doorbell/Scratchpad methods.

Signed-off-by: Serge Semin <fancer.lancer@gmail.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
---
 include/linux/ntb.h | 137 ++++++++++++++++++++++++++--------------------------
 1 file changed, 69 insertions(+), 68 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ntb.h b/include/linux/ntb.h
index de87ceac110e..d7ab3e1ec88f 100644
--- a/include/linux/ntb.h
+++ b/include/linux/ntb.h
@@ -179,13 +179,13 @@ static inline int ntb_ctx_ops_is_valid(const struct ntb_ctx_ops *ops)
 
 /**
  * struct ntb_ctx_ops - ntb device operations
+ * @link_is_up:		See ntb_link_is_up().
+ * @link_enable:	See ntb_link_enable().
+ * @link_disable:	See ntb_link_disable().
  * @mw_count:		See ntb_mw_count().
  * @mw_get_range:	See ntb_mw_get_range().
  * @mw_set_trans:	See ntb_mw_set_trans().
  * @mw_clear_trans:	See ntb_mw_clear_trans().
- * @link_is_up:		See ntb_link_is_up().
- * @link_enable:	See ntb_link_enable().
- * @link_disable:	See ntb_link_disable().
  * @db_is_unsafe:	See ntb_db_is_unsafe().
  * @db_valid_mask:	See ntb_db_valid_mask().
  * @db_vector_count:	See ntb_db_vector_count().
@@ -212,6 +212,12 @@ static inline int ntb_ctx_ops_is_valid(const struct ntb_ctx_ops *ops)
  * @peer_spad_write:	See ntb_peer_spad_write().
  */
 struct ntb_dev_ops {
+	int (*link_is_up)(struct ntb_dev *ntb,
+			  enum ntb_speed *speed, enum ntb_width *width);
+	int (*link_enable)(struct ntb_dev *ntb,
+			   enum ntb_speed max_speed, enum ntb_width max_width);
+	int (*link_disable)(struct ntb_dev *ntb);
+
 	int (*mw_count)(struct ntb_dev *ntb);
 	int (*mw_get_range)(struct ntb_dev *ntb, int idx,
 			    phys_addr_t *base, resource_size_t *size,
@@ -220,12 +226,6 @@ struct ntb_dev_ops {
 			    dma_addr_t addr, resource_size_t size);
 	int (*mw_clear_trans)(struct ntb_dev *ntb, int idx);
 
-	int (*link_is_up)(struct ntb_dev *ntb,
-			  enum ntb_speed *speed, enum ntb_width *width);
-	int (*link_enable)(struct ntb_dev *ntb,
-			   enum ntb_speed max_speed, enum ntb_width max_width);
-	int (*link_disable)(struct ntb_dev *ntb);
-
 	int (*db_is_unsafe)(struct ntb_dev *ntb);
 	u64 (*db_valid_mask)(struct ntb_dev *ntb);
 	int (*db_vector_count)(struct ntb_dev *ntb);
@@ -265,13 +265,14 @@ static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops)
 {
 	/* commented callbacks are not required: */
 	return
+		ops->link_is_up				&&
+		ops->link_enable			&&
+		ops->link_disable			&&
 		ops->mw_count				&&
 		ops->mw_get_range			&&
 		ops->mw_set_trans			&&
 		/* ops->mw_clear_trans			&& */
-		ops->link_is_up				&&
-		ops->link_enable			&&
-		ops->link_disable			&&
+
 		/* ops->db_is_unsafe			&& */
 		ops->db_valid_mask			&&
 
@@ -440,6 +441,62 @@ void ntb_link_event(struct ntb_dev *ntb);
  */
 void ntb_db_event(struct ntb_dev *ntb, int vector);
 
+/**
+ * ntb_link_is_up() - get the current ntb link state
+ * @ntb:	NTB device context.
+ * @speed:	OUT - The link speed expressed as PCIe generation number.
+ * @width:	OUT - The link width expressed as the number of PCIe lanes.
+ *
+ * Get the current state of the ntb link.  It is recommended to query the link
+ * state once after every link event.  It is safe to query the link state in
+ * the context of the link event callback.
+ *
+ * Return: One if the link is up, zero if the link is down, otherwise a
+ *		negative value indicating the error number.
+ */
+static inline int ntb_link_is_up(struct ntb_dev *ntb,
+				 enum ntb_speed *speed, enum ntb_width *width)
+{
+	return ntb->ops->link_is_up(ntb, speed, width);
+}
+
+/**
+ * ntb_link_enable() - enable the link on the secondary side of the ntb
+ * @ntb:	NTB device context.
+ * @max_speed:	The maximum link speed expressed as PCIe generation number.
+ * @max_width:	The maximum link width expressed as the number of PCIe lanes.
+ *
+ * Enable the link on the secondary side of the ntb.  This can only be done
+ * from the primary side of the ntb in primary or b2b topology.  The ntb device
+ * should train the link to its maximum speed and width, or the requested speed
+ * and width, whichever is smaller, if supported.
+ *
+ * Return: Zero on success, otherwise an error number.
+ */
+static inline int ntb_link_enable(struct ntb_dev *ntb,
+				  enum ntb_speed max_speed,
+				  enum ntb_width max_width)
+{
+	return ntb->ops->link_enable(ntb, max_speed, max_width);
+}
+
+/**
+ * ntb_link_disable() - disable the link on the secondary side of the ntb
+ * @ntb:	NTB device context.
+ *
+ * Disable the link on the secondary side of the ntb.  This can only be
+ * done from the primary side of the ntb in primary or b2b topology.  The ntb
+ * device should disable the link.  Returning from this call must indicate that
+ * a barrier has passed, though with no more writes may pass in either
+ * direction across the link, except if this call returns an error number.
+ *
+ * Return: Zero on success, otherwise an error number.
+ */
+static inline int ntb_link_disable(struct ntb_dev *ntb)
+{
+	return ntb->ops->link_disable(ntb);
+}
+
 /**
  * ntb_mw_count() - get the number of memory windows
  * @ntb:	NTB device context.
@@ -516,62 +573,6 @@ static inline int ntb_mw_clear_trans(struct ntb_dev *ntb, int idx)
 	return ntb->ops->mw_clear_trans(ntb, idx);
 }
 
-/**
- * ntb_link_is_up() - get the current ntb link state
- * @ntb:	NTB device context.
- * @speed:	OUT - The link speed expressed as PCIe generation number.
- * @width:	OUT - The link width expressed as the number of PCIe lanes.
- *
- * Get the current state of the ntb link.  It is recommended to query the link
- * state once after every link event.  It is safe to query the link state in
- * the context of the link event callback.
- *
- * Return: One if the link is up, zero if the link is down, otherwise a
- *		negative value indicating the error number.
- */
-static inline int ntb_link_is_up(struct ntb_dev *ntb,
-				 enum ntb_speed *speed, enum ntb_width *width)
-{
-	return ntb->ops->link_is_up(ntb, speed, width);
-}
-
-/**
- * ntb_link_enable() - enable the link on the secondary side of the ntb
- * @ntb:	NTB device context.
- * @max_speed:	The maximum link speed expressed as PCIe generation number.
- * @max_width:	The maximum link width expressed as the number of PCIe lanes.
- *
- * Enable the link on the secondary side of the ntb.  This can only be done
- * from the primary side of the ntb in primary or b2b topology.  The ntb device
- * should train the link to its maximum speed and width, or the requested speed
- * and width, whichever is smaller, if supported.
- *
- * Return: Zero on success, otherwise an error number.
- */
-static inline int ntb_link_enable(struct ntb_dev *ntb,
-				  enum ntb_speed max_speed,
-				  enum ntb_width max_width)
-{
-	return ntb->ops->link_enable(ntb, max_speed, max_width);
-}
-
-/**
- * ntb_link_disable() - disable the link on the secondary side of the ntb
- * @ntb:	NTB device context.
- *
- * Disable the link on the secondary side of the ntb.  This can only be
- * done from the primary side of the ntb in primary or b2b topology.  The ntb
- * device should disable the link.  Returning from this call must indicate that
- * a barrier has passed, though with no more writes may pass in either
- * direction across the link, except if this call returns an error number.
- *
- * Return: Zero on success, otherwise an error number.
- */
-static inline int ntb_link_disable(struct ntb_dev *ntb)
-{
-	return ntb->ops->link_disable(ntb);
-}
-
 /**
  * ntb_db_is_unsafe() - check if it is safe to use hardware doorbell
  * @ntb:	NTB device context.
-- 
cgit v1.2.3


From 1e5301196a88961b02fe43c73a952f78b2c84712 Mon Sep 17 00:00:00 2001
From: Serge Semin <fancer.lancer@gmail.com>
Date: Wed, 14 Dec 2016 02:49:14 +0300
Subject: NTB: Add indexed ports NTB API

There is some NTB hardware, which can combine more than just two domains
over NTB. For instance, some IDT PCIe-switches can have NTB-functions
activated on more than two-ports. The different domains are distinguished
by ports they are connected to. So the new port-related methods are added to
the NTB API:
 ntb_port_number() - return local port
 ntb_peer_port_count() - return number of peers local port can connect to
 ntb_peer_port_number(pdix) - return port number by it index
 ntb_peer_port_idx(port) - return port index by it number

Current test-drivers aren't changed much. They still support two-ports devices
for the time being while multi-ports hardware drivers aren't added.

By default port-related API is declared for two-ports hardware.
So corresponding hardware drivers won't need to implement it.

Signed-off-by: Serge Semin <fancer.lancer@gmail.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
---
 drivers/ntb/ntb.c               |  54 ++++++++++++++
 drivers/ntb/ntb_transport.c     |   6 ++
 drivers/ntb/test/ntb_perf.c     |   4 ++
 drivers/ntb/test/ntb_pingpong.c |   6 ++
 drivers/ntb/test/ntb_tool.c     |   5 ++
 include/linux/ntb.h             | 156 ++++++++++++++++++++++++++++++++++++++++
 6 files changed, 231 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/ntb/ntb.c b/drivers/ntb/ntb.c
index 2e2530743831..1e92e52813aa 100644
--- a/drivers/ntb/ntb.c
+++ b/drivers/ntb/ntb.c
@@ -191,6 +191,60 @@ void ntb_db_event(struct ntb_dev *ntb, int vector)
 }
 EXPORT_SYMBOL(ntb_db_event);
 
+int ntb_default_port_number(struct ntb_dev *ntb)
+{
+	switch (ntb->topo) {
+	case NTB_TOPO_PRI:
+	case NTB_TOPO_B2B_USD:
+		return NTB_PORT_PRI_USD;
+	case NTB_TOPO_SEC:
+	case NTB_TOPO_B2B_DSD:
+		return NTB_PORT_SEC_DSD;
+	default:
+		break;
+	}
+
+	return -EINVAL;
+}
+EXPORT_SYMBOL(ntb_default_port_number);
+
+int ntb_default_peer_port_count(struct ntb_dev *ntb)
+{
+	return NTB_DEF_PEER_CNT;
+}
+EXPORT_SYMBOL(ntb_default_peer_port_count);
+
+int ntb_default_peer_port_number(struct ntb_dev *ntb, int pidx)
+{
+	if (pidx != NTB_DEF_PEER_IDX)
+		return -EINVAL;
+
+	switch (ntb->topo) {
+	case NTB_TOPO_PRI:
+	case NTB_TOPO_B2B_USD:
+		return NTB_PORT_SEC_DSD;
+	case NTB_TOPO_SEC:
+	case NTB_TOPO_B2B_DSD:
+		return NTB_PORT_PRI_USD;
+	default:
+		break;
+	}
+
+	return -EINVAL;
+}
+EXPORT_SYMBOL(ntb_default_peer_port_number);
+
+int ntb_default_peer_port_idx(struct ntb_dev *ntb, int port)
+{
+	int peer_port = ntb_default_peer_port_number(ntb, NTB_DEF_PEER_IDX);
+
+	if (peer_port == -EINVAL || port != peer_port)
+		return -EINVAL;
+
+	return 0;
+}
+EXPORT_SYMBOL(ntb_default_peer_port_idx);
+
 static int ntb_probe(struct device *dev)
 {
 	struct ntb_dev *ntb;
diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c
index 10e5bf460139..cc6ae35109b1 100644
--- a/drivers/ntb/ntb_transport.c
+++ b/drivers/ntb/ntb_transport.c
@@ -95,6 +95,9 @@ MODULE_PARM_DESC(use_dma, "Use DMA engine to perform large data copy");
 
 static struct dentry *nt_debugfs_dir;
 
+/* Only two-ports NTB devices are supported */
+#define PIDX		NTB_DEF_PEER_IDX
+
 struct ntb_queue_entry {
 	/* ntb_queue list reference */
 	struct list_head entry;
@@ -1064,6 +1067,9 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
 		dev_dbg(&ndev->dev,
 			"scratchpad is unsafe, proceed anyway...\n");
 
+	if (ntb_peer_port_count(ndev) != NTB_DEF_PEER_CNT)
+		dev_warn(&ndev->dev, "Multi-port NTB devices unsupported\n");
+
 	node = dev_to_node(&ndev->dev);
 
 	nt = kzalloc_node(sizeof(*nt), GFP_KERNEL, node);
diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c
index 5cab2831ce99..e52cc4eace90 100644
--- a/drivers/ntb/test/ntb_perf.c
+++ b/drivers/ntb/test/ntb_perf.c
@@ -76,6 +76,7 @@
 #define DMA_RETRIES		20
 #define SZ_4G			(1ULL << 32)
 #define MAX_SEG_ORDER		20 /* no larger than 1M for kmalloc buffer */
+#define PIDX			NTB_DEF_PEER_IDX
 
 MODULE_LICENSE(DRIVER_LICENSE);
 MODULE_VERSION(DRIVER_VERSION);
@@ -766,6 +767,9 @@ static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb)
 		return -EIO;
 	}
 
+	if (ntb_peer_port_count(ntb) != NTB_DEF_PEER_CNT)
+		dev_warn(&ntb->dev, "Multi-port NTB devices unsupported\n");
+
 	node = dev_to_node(&pdev->dev);
 
 	perf = kzalloc_node(sizeof(*perf), GFP_KERNEL, node);
diff --git a/drivers/ntb/test/ntb_pingpong.c b/drivers/ntb/test/ntb_pingpong.c
index 435861189d97..12f8b40cb11a 100644
--- a/drivers/ntb/test/ntb_pingpong.c
+++ b/drivers/ntb/test/ntb_pingpong.c
@@ -90,6 +90,9 @@ static unsigned long db_init = 0x7;
 module_param(db_init, ulong, 0644);
 MODULE_PARM_DESC(db_init, "Initial doorbell bits to ring on the peer");
 
+/* Only two-ports NTB devices are supported */
+#define PIDX		NTB_DEF_PEER_IDX
+
 struct pp_ctx {
 	struct ntb_dev			*ntb;
 	u64				db_bits;
@@ -230,6 +233,9 @@ static int pp_probe(struct ntb_client *client,
 		}
 	}
 
+	if (ntb_peer_port_count(ntb) != NTB_DEF_PEER_CNT)
+		dev_warn(&ntb->dev, "multi-port NTB is unsupported\n");
+
 	pp = kmalloc(sizeof(*pp), GFP_KERNEL);
 	if (!pp) {
 		rc = -ENOMEM;
diff --git a/drivers/ntb/test/ntb_tool.c b/drivers/ntb/test/ntb_tool.c
index 61bf2ef87e0e..690862d90411 100644
--- a/drivers/ntb/test/ntb_tool.c
+++ b/drivers/ntb/test/ntb_tool.c
@@ -120,6 +120,8 @@ MODULE_AUTHOR(DRIVER_AUTHOR);
 MODULE_DESCRIPTION(DRIVER_DESCRIPTION);
 
 #define MAX_MWS 16
+/* Only two-ports devices are supported */
+#define PIDX	NTB_DEF_PEER_IDX
 
 static struct dentry *tool_dbgfs;
 
@@ -919,6 +921,9 @@ static int tool_probe(struct ntb_client *self, struct ntb_dev *ntb)
 	if (ntb_spad_is_unsafe(ntb))
 		dev_dbg(&ntb->dev, "scratchpad is unsafe\n");
 
+	if (ntb_peer_port_count(ntb) != NTB_DEF_PEER_CNT)
+		dev_warn(&ntb->dev, "multi-port NTB is unsupported\n");
+
 	tc = kzalloc(sizeof(*tc), GFP_KERNEL);
 	if (!tc) {
 		rc = -ENOMEM;
diff --git a/include/linux/ntb.h b/include/linux/ntb.h
index d7ab3e1ec88f..d23483bae6f3 100644
--- a/include/linux/ntb.h
+++ b/include/linux/ntb.h
@@ -139,6 +139,20 @@ enum ntb_width {
 	NTB_WIDTH_32 = 32,
 };
 
+/**
+ * enum ntb_default_port - NTB default port number
+ * @NTB_PORT_PRI_USD:	Default port of the NTB_TOPO_PRI/NTB_TOPO_B2B_USD
+ *			topologies
+ * @NTB_PORT_SEC_DSD:	Default port of the NTB_TOPO_SEC/NTB_TOPO_B2B_DSD
+ *			topologies
+ */
+enum ntb_default_port {
+	NTB_PORT_PRI_USD,
+	NTB_PORT_SEC_DSD
+};
+#define NTB_DEF_PEER_CNT	(1)
+#define NTB_DEF_PEER_IDX	(0)
+
 /**
  * struct ntb_client_ops - ntb client operations
  * @probe:		Notify client of a new device.
@@ -179,6 +193,10 @@ static inline int ntb_ctx_ops_is_valid(const struct ntb_ctx_ops *ops)
 
 /**
  * struct ntb_ctx_ops - ntb device operations
+ * @port_number:	See ntb_port_number().
+ * @peer_port_count:	See ntb_peer_port_count().
+ * @peer_port_number:	See ntb_peer_port_number().
+ * @peer_port_idx:	See ntb_peer_port_idx().
  * @link_is_up:		See ntb_link_is_up().
  * @link_enable:	See ntb_link_enable().
  * @link_disable:	See ntb_link_disable().
@@ -212,6 +230,11 @@ static inline int ntb_ctx_ops_is_valid(const struct ntb_ctx_ops *ops)
  * @peer_spad_write:	See ntb_peer_spad_write().
  */
 struct ntb_dev_ops {
+	int (*port_number)(struct ntb_dev *ntb);
+	int (*peer_port_count)(struct ntb_dev *ntb);
+	int (*peer_port_number)(struct ntb_dev *ntb, int pidx);
+	int (*peer_port_idx)(struct ntb_dev *ntb, int port);
+
 	int (*link_is_up)(struct ntb_dev *ntb,
 			  enum ntb_speed *speed, enum ntb_width *width);
 	int (*link_enable)(struct ntb_dev *ntb,
@@ -265,6 +288,9 @@ static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops)
 {
 	/* commented callbacks are not required: */
 	return
+		!ops->peer_port_count == !ops->port_number	&&
+		!ops->peer_port_number == !ops->port_number	&&
+		!ops->peer_port_idx == !ops->port_number	&&
 		ops->link_is_up				&&
 		ops->link_enable			&&
 		ops->link_disable			&&
@@ -441,6 +467,136 @@ void ntb_link_event(struct ntb_dev *ntb);
  */
 void ntb_db_event(struct ntb_dev *ntb, int vector);
 
+/**
+ * ntb_default_port_number() - get the default local port number
+ * @ntb:	NTB device context.
+ *
+ * If hardware driver doesn't specify port_number() callback method, the NTB
+ * is considered with just two ports. So this method returns default local
+ * port number in compliance with topology.
+ *
+ * NOTE Don't call this method directly. The ntb_port_number() function should
+ * be used instead.
+ *
+ * Return: the default local port number
+ */
+int ntb_default_port_number(struct ntb_dev *ntb);
+
+/**
+ * ntb_default_port_count() - get the default number of peer device ports
+ * @ntb:	NTB device context.
+ *
+ * By default hardware driver supports just one peer device.
+ *
+ * NOTE Don't call this method directly. The ntb_peer_port_count() function
+ * should be used instead.
+ *
+ * Return: the default number of peer ports
+ */
+int ntb_default_peer_port_count(struct ntb_dev *ntb);
+
+/**
+ * ntb_default_peer_port_number() - get the default peer port by given index
+ * @ntb:	NTB device context.
+ * @idx:	Peer port index (should not differ from zero).
+ *
+ * By default hardware driver supports just one peer device, so this method
+ * shall return the corresponding value from enum ntb_default_port.
+ *
+ * NOTE Don't call this method directly. The ntb_peer_port_number() function
+ * should be used instead.
+ *
+ * Return: the peer device port or negative value indicating an error
+ */
+int ntb_default_peer_port_number(struct ntb_dev *ntb, int pidx);
+
+/**
+ * ntb_default_peer_port_idx() - get the default peer device port index by
+ *				 given port number
+ * @ntb:	NTB device context.
+ * @port:	Peer port number (should be one of enum ntb_default_port).
+ *
+ * By default hardware driver supports just one peer device, so while
+ * specified port-argument indicates peer port from enum ntb_default_port,
+ * the return value shall be zero.
+ *
+ * NOTE Don't call this method directly. The ntb_peer_port_idx() function
+ * should be used instead.
+ *
+ * Return: the peer port index or negative value indicating an error
+ */
+int ntb_default_peer_port_idx(struct ntb_dev *ntb, int port);
+
+/**
+ * ntb_port_number() - get the local port number
+ * @ntb:	NTB device context.
+ *
+ * Hardware must support at least simple two-ports ntb connection
+ *
+ * Return: the local port number
+ */
+static inline int ntb_port_number(struct ntb_dev *ntb)
+{
+	if (!ntb->ops->port_number)
+		return ntb_default_port_number(ntb);
+
+	return ntb->ops->port_number(ntb);
+}
+
+/**
+ * ntb_peer_port_count() - get the number of peer device ports
+ * @ntb:	NTB device context.
+ *
+ * Hardware may support an access to memory of several remote domains
+ * over multi-port NTB devices. This method returns the number of peers,
+ * local device can have shared memory with.
+ *
+ * Return: the number of peer ports
+ */
+static inline int ntb_peer_port_count(struct ntb_dev *ntb)
+{
+	if (!ntb->ops->peer_port_count)
+		return ntb_default_peer_port_count(ntb);
+
+	return ntb->ops->peer_port_count(ntb);
+}
+
+/**
+ * ntb_peer_port_number() - get the peer port by given index
+ * @ntb:	NTB device context.
+ * @pidx:	Peer port index.
+ *
+ * Peer ports are continuously enumerated by NTB API logic, so this method
+ * lets to retrieve port real number by its index.
+ *
+ * Return: the peer device port or negative value indicating an error
+ */
+static inline int ntb_peer_port_number(struct ntb_dev *ntb, int pidx)
+{
+	if (!ntb->ops->peer_port_number)
+		return ntb_default_peer_port_number(ntb, pidx);
+
+	return ntb->ops->peer_port_number(ntb, pidx);
+}
+
+/**
+ * ntb_peer_port_idx() - get the peer device port index by given port number
+ * @ntb:	NTB device context.
+ * @port:	Peer port number.
+ *
+ * Inverse operation of ntb_peer_port_number(), so one can get port index
+ * by specified port number.
+ *
+ * Return: the peer port index or negative value indicating an error
+ */
+static inline int ntb_peer_port_idx(struct ntb_dev *ntb, int port)
+{
+	if (!ntb->ops->peer_port_idx)
+		return ntb_default_peer_port_idx(ntb, port);
+
+	return ntb->ops->peer_port_idx(ntb, port);
+}
+
 /**
  * ntb_link_is_up() - get the current ntb link state
  * @ntb:	NTB device context.
-- 
cgit v1.2.3


From 4e8c11b7fd29f70eb7af43bae908297689f2c3da Mon Sep 17 00:00:00 2001
From: Serge Semin <fancer.lancer@gmail.com>
Date: Wed, 14 Dec 2016 02:49:15 +0300
Subject: NTB: Alter link-state API to support multi-port devices

Multi-port devices permit the NTB connections between multiple domains,
so a local device can have NTB link being up with one peer and being
down with another. NTB link-state API is appropriately altered to return
a bitfield of the link-states between the local device and possible peers.

Signed-off-by: Serge Semin <fancer.lancer@gmail.com>
Acked-by: Allen Hubbe <Allen.Hubbe@dell.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
---
 drivers/ntb/hw/amd/ntb_hw_amd.c     |  2 +-
 drivers/ntb/hw/intel/ntb_hw_intel.c |  2 +-
 include/linux/ntb.h                 | 31 ++++++++++++++++---------------
 3 files changed, 18 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.c b/drivers/ntb/hw/amd/ntb_hw_amd.c
index 019a158e1128..e71ab4c1fe0e 100644
--- a/drivers/ntb/hw/amd/ntb_hw_amd.c
+++ b/drivers/ntb/hw/amd/ntb_hw_amd.c
@@ -212,7 +212,7 @@ static int amd_link_is_up(struct amd_ntb_dev *ndev)
 	return 0;
 }
 
-static int amd_ntb_link_is_up(struct ntb_dev *ntb,
+static u64 amd_ntb_link_is_up(struct ntb_dev *ntb,
 			      enum ntb_speed *speed,
 			      enum ntb_width *width)
 {
diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.c b/drivers/ntb/hw/intel/ntb_hw_intel.c
index 7b3b6fd63d7d..6b25bf816f28 100644
--- a/drivers/ntb/hw/intel/ntb_hw_intel.c
+++ b/drivers/ntb/hw/intel/ntb_hw_intel.c
@@ -1171,7 +1171,7 @@ static int intel_ntb_mw_set_trans(struct ntb_dev *ntb, int idx,
 	return 0;
 }
 
-static int intel_ntb_link_is_up(struct ntb_dev *ntb,
+static u64 intel_ntb_link_is_up(struct ntb_dev *ntb,
 				enum ntb_speed *speed,
 				enum ntb_width *width)
 {
diff --git a/include/linux/ntb.h b/include/linux/ntb.h
index d23483bae6f3..b2b2924f5f43 100644
--- a/include/linux/ntb.h
+++ b/include/linux/ntb.h
@@ -235,7 +235,7 @@ struct ntb_dev_ops {
 	int (*peer_port_number)(struct ntb_dev *ntb, int pidx);
 	int (*peer_port_idx)(struct ntb_dev *ntb, int port);
 
-	int (*link_is_up)(struct ntb_dev *ntb,
+	u64 (*link_is_up)(struct ntb_dev *ntb,
 			  enum ntb_speed *speed, enum ntb_width *width);
 	int (*link_enable)(struct ntb_dev *ntb,
 			   enum ntb_speed max_speed, enum ntb_width max_width);
@@ -607,25 +607,26 @@ static inline int ntb_peer_port_idx(struct ntb_dev *ntb, int port)
  * state once after every link event.  It is safe to query the link state in
  * the context of the link event callback.
  *
- * Return: One if the link is up, zero if the link is down, otherwise a
- *		negative value indicating the error number.
+ * Return: bitfield of indexed ports link state: bit is set/cleared if the
+ *         link is up/down respectively.
  */
-static inline int ntb_link_is_up(struct ntb_dev *ntb,
+static inline u64 ntb_link_is_up(struct ntb_dev *ntb,
 				 enum ntb_speed *speed, enum ntb_width *width)
 {
 	return ntb->ops->link_is_up(ntb, speed, width);
 }
 
 /**
- * ntb_link_enable() - enable the link on the secondary side of the ntb
+ * ntb_link_enable() - enable the local port ntb connection
  * @ntb:	NTB device context.
  * @max_speed:	The maximum link speed expressed as PCIe generation number.
  * @max_width:	The maximum link width expressed as the number of PCIe lanes.
  *
- * Enable the link on the secondary side of the ntb.  This can only be done
- * from the primary side of the ntb in primary or b2b topology.  The ntb device
- * should train the link to its maximum speed and width, or the requested speed
- * and width, whichever is smaller, if supported.
+ * Enable the NTB/PCIe link on the local or remote (for bridge-to-bridge
+ * topology) side of the bridge. If it's supported the ntb device should train
+ * the link to its maximum speed and width, or the requested speed and width,
+ * whichever is smaller. Some hardware doesn't support PCIe link training, so
+ * the last two arguments will be ignored then.
  *
  * Return: Zero on success, otherwise an error number.
  */
@@ -637,14 +638,14 @@ static inline int ntb_link_enable(struct ntb_dev *ntb,
 }
 
 /**
- * ntb_link_disable() - disable the link on the secondary side of the ntb
+ * ntb_link_disable() - disable the local port ntb connection
  * @ntb:	NTB device context.
  *
- * Disable the link on the secondary side of the ntb.  This can only be
- * done from the primary side of the ntb in primary or b2b topology.  The ntb
- * device should disable the link.  Returning from this call must indicate that
- * a barrier has passed, though with no more writes may pass in either
- * direction across the link, except if this call returns an error number.
+ * Disable the link on the local or remote (for b2b topology) of the ntb.
+ * The ntb device should disable the link.  Returning from this call must
+ * indicate that a barrier has passed, though with no more writes may pass in
+ * either direction across the link, except if this call returns an error
+ * number.
  *
  * Return: Zero on success, otherwise an error number.
  */
-- 
cgit v1.2.3


From 443b9a14ecbe811071467d54d6f2f1182835cc4d Mon Sep 17 00:00:00 2001
From: Serge Semin <fancer.lancer@gmail.com>
Date: Wed, 11 Jan 2017 03:11:33 +0300
Subject: NTB: Alter MW API to support multi-ports devices

Multi-port NTB devices permit to share a memory between all accessible peers.
Memory Windows API is altered to correspondingly initialize and map memory
windows for such devices:
 ntb_mw_count(pidx); - number of inbound memory windows, which can be allocated
for shared buffer with specified peer device.
 ntb_mw_get_align(pidx, widx); - get alignment and size restriction parameters
to properly allocate inbound memory region.
 ntb_peer_mw_count(); - get number of outbound memory windows.
 ntb_peer_mw_get_addr(widx); - get mapping address of an outbound memory window

If hardware supports inbound translation configured on the local ntb port:
 ntb_mw_set_trans(pidx, widx); - set translation address of allocated inbound
memory window so a peer device could access it.
 ntb_mw_clear_trans(pidx, widx); - clear the translation address of an inbound
memory window.

If hardware supports outbound translation configured on the peer ntb port:
 ntb_peer_mw_set_trans(pidx, widx); - set translation address of a memory
window retrieved from a peer device
 ntb_peer_mw_clear_trans(pidx, widx); - clear the translation address of an
outbound memory window

Signed-off-by: Serge Semin <fancer.lancer@gmail.com>
Acked-by: Allen Hubbe <Allen.Hubbe@dell.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
---
 drivers/ntb/hw/amd/ntb_hw_amd.c     |  68 +++++++++---
 drivers/ntb/hw/intel/ntb_hw_intel.c |  90 ++++++++++++----
 drivers/ntb/ntb.c                   |   2 +
 drivers/ntb/ntb_transport.c         |  21 +++-
 drivers/ntb/test/ntb_perf.c         |  17 ++-
 drivers/ntb/test/ntb_tool.c         |  43 +++++---
 include/linux/ntb.h                 | 208 ++++++++++++++++++++++++++++--------
 7 files changed, 342 insertions(+), 107 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.c b/drivers/ntb/hw/amd/ntb_hw_amd.c
index e71ab4c1fe0e..bcefe1df9ce3 100644
--- a/drivers/ntb/hw/amd/ntb_hw_amd.c
+++ b/drivers/ntb/hw/amd/ntb_hw_amd.c
@@ -5,6 +5,7 @@
  *   GPL LICENSE SUMMARY
  *
  *   Copyright (C) 2016 Advanced Micro Devices, Inc. All Rights Reserved.
+ *   Copyright (C) 2016 T-Platforms. All Rights Reserved.
  *
  *   This program is free software; you can redistribute it and/or modify
  *   it under the terms of version 2 of the GNU General Public License as
@@ -13,6 +14,7 @@
  *   BSD LICENSE
  *
  *   Copyright (C) 2016 Advanced Micro Devices, Inc. All Rights Reserved.
+ *   Copyright (C) 2016 T-Platforms. All Rights Reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
  *   modification, are permitted provided that the following conditions
@@ -79,40 +81,42 @@ static int ndev_mw_to_bar(struct amd_ntb_dev *ndev, int idx)
 	return 1 << idx;
 }
 
-static int amd_ntb_mw_count(struct ntb_dev *ntb)
+static int amd_ntb_mw_count(struct ntb_dev *ntb, int pidx)
 {
+	if (pidx != NTB_DEF_PEER_IDX)
+		return -EINVAL;
+
 	return ntb_ndev(ntb)->mw_count;
 }
 
-static int amd_ntb_mw_get_range(struct ntb_dev *ntb, int idx,
-				phys_addr_t *base,
-				resource_size_t *size,
-				resource_size_t *align,
-				resource_size_t *align_size)
+static int amd_ntb_mw_get_align(struct ntb_dev *ntb, int pidx, int idx,
+				resource_size_t *addr_align,
+				resource_size_t *size_align,
+				resource_size_t *size_max)
 {
 	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
 	int bar;
 
+	if (pidx != NTB_DEF_PEER_IDX)
+		return -EINVAL;
+
 	bar = ndev_mw_to_bar(ndev, idx);
 	if (bar < 0)
 		return bar;
 
-	if (base)
-		*base = pci_resource_start(ndev->ntb.pdev, bar);
-
-	if (size)
-		*size = pci_resource_len(ndev->ntb.pdev, bar);
+	if (addr_align)
+		*addr_align = SZ_4K;
 
-	if (align)
-		*align = SZ_4K;
+	if (size_align)
+		*size_align = 1;
 
-	if (align_size)
-		*align_size = 1;
+	if (size_max)
+		*size_max = pci_resource_len(ndev->ntb.pdev, bar);
 
 	return 0;
 }
 
-static int amd_ntb_mw_set_trans(struct ntb_dev *ntb, int idx,
+static int amd_ntb_mw_set_trans(struct ntb_dev *ntb, int pidx, int idx,
 				dma_addr_t addr, resource_size_t size)
 {
 	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
@@ -122,6 +126,9 @@ static int amd_ntb_mw_set_trans(struct ntb_dev *ntb, int idx,
 	u64 base_addr, limit, reg_val;
 	int bar;
 
+	if (pidx != NTB_DEF_PEER_IDX)
+		return -EINVAL;
+
 	bar = ndev_mw_to_bar(ndev, idx);
 	if (bar < 0)
 		return bar;
@@ -284,6 +291,31 @@ static int amd_ntb_link_disable(struct ntb_dev *ntb)
 	return 0;
 }
 
+static int amd_ntb_peer_mw_count(struct ntb_dev *ntb)
+{
+	/* The same as for inbound MWs */
+	return ntb_ndev(ntb)->mw_count;
+}
+
+static int amd_ntb_peer_mw_get_addr(struct ntb_dev *ntb, int idx,
+				    phys_addr_t *base, resource_size_t *size)
+{
+	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+	int bar;
+
+	bar = ndev_mw_to_bar(ndev, idx);
+	if (bar < 0)
+		return bar;
+
+	if (base)
+		*base = pci_resource_start(ndev->ntb.pdev, bar);
+
+	if (size)
+		*size = pci_resource_len(ndev->ntb.pdev, bar);
+
+	return 0;
+}
+
 static u64 amd_ntb_db_valid_mask(struct ntb_dev *ntb)
 {
 	return ntb_ndev(ntb)->db_valid_mask;
@@ -431,8 +463,10 @@ static int amd_ntb_peer_spad_write(struct ntb_dev *ntb,
 
 static const struct ntb_dev_ops amd_ntb_ops = {
 	.mw_count		= amd_ntb_mw_count,
-	.mw_get_range		= amd_ntb_mw_get_range,
+	.mw_get_align		= amd_ntb_mw_get_align,
 	.mw_set_trans		= amd_ntb_mw_set_trans,
+	.peer_mw_count		= amd_ntb_peer_mw_count,
+	.peer_mw_get_addr	= amd_ntb_peer_mw_get_addr,
 	.link_is_up		= amd_ntb_link_is_up,
 	.link_enable		= amd_ntb_link_enable,
 	.link_disable		= amd_ntb_link_disable,
diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.c b/drivers/ntb/hw/intel/ntb_hw_intel.c
index 6b25bf816f28..a9b4ed4d7b52 100644
--- a/drivers/ntb/hw/intel/ntb_hw_intel.c
+++ b/drivers/ntb/hw/intel/ntb_hw_intel.c
@@ -6,6 +6,7 @@
  *
  *   Copyright(c) 2012 Intel Corporation. All rights reserved.
  *   Copyright (C) 2015 EMC Corporation. All Rights Reserved.
+ *   Copyright (C) 2016 T-Platforms. All Rights Reserved.
  *
  *   This program is free software; you can redistribute it and/or modify
  *   it under the terms of version 2 of the GNU General Public License as
@@ -15,6 +16,7 @@
  *
  *   Copyright(c) 2012 Intel Corporation. All rights reserved.
  *   Copyright (C) 2015 EMC Corporation. All Rights Reserved.
+ *   Copyright (C) 2016 T-Platforms. All Rights Reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
  *   modification, are permitted provided that the following conditions
@@ -1035,20 +1037,26 @@ static void ndev_deinit_debugfs(struct intel_ntb_dev *ndev)
 	debugfs_remove_recursive(ndev->debugfs_dir);
 }
 
-static int intel_ntb_mw_count(struct ntb_dev *ntb)
+static int intel_ntb_mw_count(struct ntb_dev *ntb, int pidx)
 {
+	if (pidx != NTB_DEF_PEER_IDX)
+		return -EINVAL;
+
 	return ntb_ndev(ntb)->mw_count;
 }
 
-static int intel_ntb_mw_get_range(struct ntb_dev *ntb, int idx,
-				  phys_addr_t *base,
-				  resource_size_t *size,
-				  resource_size_t *align,
-				  resource_size_t *align_size)
+static int intel_ntb_mw_get_align(struct ntb_dev *ntb, int pidx, int idx,
+				  resource_size_t *addr_align,
+				  resource_size_t *size_align,
+				  resource_size_t *size_max)
 {
 	struct intel_ntb_dev *ndev = ntb_ndev(ntb);
+	resource_size_t bar_size, mw_size;
 	int bar;
 
+	if (pidx != NTB_DEF_PEER_IDX)
+		return -EINVAL;
+
 	if (idx >= ndev->b2b_idx && !ndev->b2b_off)
 		idx += 1;
 
@@ -1056,24 +1064,26 @@ static int intel_ntb_mw_get_range(struct ntb_dev *ntb, int idx,
 	if (bar < 0)
 		return bar;
 
-	if (base)
-		*base = pci_resource_start(ndev->ntb.pdev, bar) +
-			(idx == ndev->b2b_idx ? ndev->b2b_off : 0);
+	bar_size = pci_resource_len(ndev->ntb.pdev, bar);
 
-	if (size)
-		*size = pci_resource_len(ndev->ntb.pdev, bar) -
-			(idx == ndev->b2b_idx ? ndev->b2b_off : 0);
+	if (idx == ndev->b2b_idx)
+		mw_size = bar_size - ndev->b2b_off;
+	else
+		mw_size = bar_size;
+
+	if (addr_align)
+		*addr_align = pci_resource_len(ndev->ntb.pdev, bar);
 
-	if (align)
-		*align = pci_resource_len(ndev->ntb.pdev, bar);
+	if (size_align)
+		*size_align = 1;
 
-	if (align_size)
-		*align_size = 1;
+	if (size_max)
+		*size_max = mw_size;
 
 	return 0;
 }
 
-static int intel_ntb_mw_set_trans(struct ntb_dev *ntb, int idx,
+static int intel_ntb_mw_set_trans(struct ntb_dev *ntb, int pidx, int idx,
 				  dma_addr_t addr, resource_size_t size)
 {
 	struct intel_ntb_dev *ndev = ntb_ndev(ntb);
@@ -1083,6 +1093,9 @@ static int intel_ntb_mw_set_trans(struct ntb_dev *ntb, int idx,
 	u64 base, limit, reg_val;
 	int bar;
 
+	if (pidx != NTB_DEF_PEER_IDX)
+		return -EINVAL;
+
 	if (idx >= ndev->b2b_idx && !ndev->b2b_off)
 		idx += 1;
 
@@ -1249,6 +1262,36 @@ static int intel_ntb_link_disable(struct ntb_dev *ntb)
 	return 0;
 }
 
+static int intel_ntb_peer_mw_count(struct ntb_dev *ntb)
+{
+	/* Numbers of inbound and outbound memory windows match */
+	return ntb_ndev(ntb)->mw_count;
+}
+
+static int intel_ntb_peer_mw_get_addr(struct ntb_dev *ntb, int idx,
+				     phys_addr_t *base, resource_size_t *size)
+{
+	struct intel_ntb_dev *ndev = ntb_ndev(ntb);
+	int bar;
+
+	if (idx >= ndev->b2b_idx && !ndev->b2b_off)
+		idx += 1;
+
+	bar = ndev_mw_to_bar(ndev, idx);
+	if (bar < 0)
+		return bar;
+
+	if (base)
+		*base = pci_resource_start(ndev->ntb.pdev, bar) +
+			(idx == ndev->b2b_idx ? ndev->b2b_off : 0);
+
+	if (size)
+		*size = pci_resource_len(ndev->ntb.pdev, bar) -
+			(idx == ndev->b2b_idx ? ndev->b2b_off : 0);
+
+	return 0;
+}
+
 static int intel_ntb_db_is_unsafe(struct ntb_dev *ntb)
 {
 	return ndev_ignore_unsafe(ntb_ndev(ntb), NTB_UNSAFE_DB);
@@ -1902,7 +1945,7 @@ static int intel_ntb3_link_enable(struct ntb_dev *ntb,
 
 	return 0;
 }
-static int intel_ntb3_mw_set_trans(struct ntb_dev *ntb, int idx,
+static int intel_ntb3_mw_set_trans(struct ntb_dev *ntb, int pidx, int idx,
 				   dma_addr_t addr, resource_size_t size)
 {
 	struct intel_ntb_dev *ndev = ntb_ndev(ntb);
@@ -1912,6 +1955,9 @@ static int intel_ntb3_mw_set_trans(struct ntb_dev *ntb, int idx,
 	u64 base, limit, reg_val;
 	int bar;
 
+	if (pidx != NTB_DEF_PEER_IDX)
+		return -EINVAL;
+
 	if (idx >= ndev->b2b_idx && !ndev->b2b_off)
 		idx += 1;
 
@@ -2906,8 +2952,10 @@ static const struct intel_ntb_xlat_reg skx_sec_xlat = {
 /* operations for primary side of local ntb */
 static const struct ntb_dev_ops intel_ntb_ops = {
 	.mw_count		= intel_ntb_mw_count,
-	.mw_get_range		= intel_ntb_mw_get_range,
+	.mw_get_align		= intel_ntb_mw_get_align,
 	.mw_set_trans		= intel_ntb_mw_set_trans,
+	.peer_mw_count		= intel_ntb_peer_mw_count,
+	.peer_mw_get_addr	= intel_ntb_peer_mw_get_addr,
 	.link_is_up		= intel_ntb_link_is_up,
 	.link_enable		= intel_ntb_link_enable,
 	.link_disable		= intel_ntb_link_disable,
@@ -2932,8 +2980,10 @@ static const struct ntb_dev_ops intel_ntb_ops = {
 
 static const struct ntb_dev_ops intel_ntb3_ops = {
 	.mw_count		= intel_ntb_mw_count,
-	.mw_get_range		= intel_ntb_mw_get_range,
+	.mw_get_align		= intel_ntb_mw_get_align,
 	.mw_set_trans		= intel_ntb3_mw_set_trans,
+	.peer_mw_count		= intel_ntb_peer_mw_count,
+	.peer_mw_get_addr	= intel_ntb_peer_mw_get_addr,
 	.link_is_up		= intel_ntb_link_is_up,
 	.link_enable		= intel_ntb3_link_enable,
 	.link_disable		= intel_ntb_link_disable,
diff --git a/drivers/ntb/ntb.c b/drivers/ntb/ntb.c
index 1e92e52813aa..2551bb2ff4a4 100644
--- a/drivers/ntb/ntb.c
+++ b/drivers/ntb/ntb.c
@@ -5,6 +5,7 @@
  *   GPL LICENSE SUMMARY
  *
  *   Copyright (C) 2015 EMC Corporation. All Rights Reserved.
+ *   Copyright (C) 2016 T-Platforms. All Rights Reserved.
  *
  *   This program is free software; you can redistribute it and/or modify
  *   it under the terms of version 2 of the GNU General Public License as
@@ -18,6 +19,7 @@
  *   BSD LICENSE
  *
  *   Copyright (C) 2015 EMC Corporation. All Rights Reserved.
+ *   Copyright (C) 2016 T-Platforms. All Rights Reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
  *   modification, are permitted provided that the following conditions
diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c
index cc6ae35109b1..771b469cebf0 100644
--- a/drivers/ntb/ntb_transport.c
+++ b/drivers/ntb/ntb_transport.c
@@ -673,7 +673,7 @@ static void ntb_free_mw(struct ntb_transport_ctx *nt, int num_mw)
 	if (!mw->virt_addr)
 		return;
 
-	ntb_mw_clear_trans(nt->ndev, num_mw);
+	ntb_mw_clear_trans(nt->ndev, PIDX, num_mw);
 	dma_free_coherent(&pdev->dev, mw->buff_size,
 			  mw->virt_addr, mw->dma_addr);
 	mw->xlat_size = 0;
@@ -730,7 +730,8 @@ static int ntb_set_mw(struct ntb_transport_ctx *nt, int num_mw,
 	}
 
 	/* Notify HW the memory location of the receive buffer */
-	rc = ntb_mw_set_trans(nt->ndev, num_mw, mw->dma_addr, mw->xlat_size);
+	rc = ntb_mw_set_trans(nt->ndev, PIDX, num_mw, mw->dma_addr,
+			      mw->xlat_size);
 	if (rc) {
 		dev_err(&pdev->dev, "Unable to set mw%d translation", num_mw);
 		ntb_free_mw(nt, num_mw);
@@ -1058,7 +1059,12 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
 	int node;
 	int rc, i;
 
-	mw_count = ntb_mw_count(ndev);
+	mw_count = ntb_mw_count(ndev, PIDX);
+
+	if (!ndev->ops->mw_set_trans) {
+		dev_err(&ndev->dev, "Inbound MW based NTB API is required\n");
+		return -EINVAL;
+	}
 
 	if (ntb_db_is_unsafe(ndev))
 		dev_dbg(&ndev->dev,
@@ -1100,8 +1106,13 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
 	for (i = 0; i < mw_count; i++) {
 		mw = &nt->mw_vec[i];
 
-		rc = ntb_mw_get_range(ndev, i, &mw->phys_addr, &mw->phys_size,
-				      &mw->xlat_align, &mw->xlat_align_size);
+		rc = ntb_mw_get_align(ndev, PIDX, i, &mw->xlat_align,
+				      &mw->xlat_align_size, NULL);
+		if (rc)
+			goto err1;
+
+		rc = ntb_peer_mw_get_addr(ndev, i, &mw->phys_addr,
+					  &mw->phys_size);
 		if (rc)
 			goto err1;
 
diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c
index e52cc4eace90..7f89cceaf350 100644
--- a/drivers/ntb/test/ntb_perf.c
+++ b/drivers/ntb/test/ntb_perf.c
@@ -455,7 +455,7 @@ static void perf_free_mw(struct perf_ctx *perf)
 	if (!mw->virt_addr)
 		return;
 
-	ntb_mw_clear_trans(perf->ntb, 0);
+	ntb_mw_clear_trans(perf->ntb, PIDX, 0);
 	dma_free_coherent(&pdev->dev, mw->buf_size,
 			  mw->virt_addr, mw->dma_addr);
 	mw->xlat_size = 0;
@@ -491,7 +491,7 @@ static int perf_set_mw(struct perf_ctx *perf, resource_size_t size)
 		mw->buf_size = 0;
 	}
 
-	rc = ntb_mw_set_trans(perf->ntb, 0, mw->dma_addr, mw->xlat_size);
+	rc = ntb_mw_set_trans(perf->ntb, PIDX, 0, mw->dma_addr, mw->xlat_size);
 	if (rc) {
 		dev_err(&perf->ntb->dev, "Unable to set mw0 translation\n");
 		perf_free_mw(perf);
@@ -562,8 +562,12 @@ static int perf_setup_mw(struct ntb_dev *ntb, struct perf_ctx *perf)
 
 	mw = &perf->mw;
 
-	rc = ntb_mw_get_range(ntb, 0, &mw->phys_addr, &mw->phys_size,
-			      &mw->xlat_align, &mw->xlat_align_size);
+	rc = ntb_mw_get_align(ntb, PIDX, 0, &mw->xlat_align,
+			      &mw->xlat_align_size, NULL);
+	if (rc)
+		return rc;
+
+	rc = ntb_peer_mw_get_addr(ntb, 0, &mw->phys_addr, &mw->phys_size);
 	if (rc)
 		return rc;
 
@@ -767,6 +771,11 @@ static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb)
 		return -EIO;
 	}
 
+	if (!ntb->ops->mw_set_trans) {
+		dev_err(&ntb->dev, "Need inbound MW based NTB API\n");
+		return -EINVAL;
+	}
+
 	if (ntb_peer_port_count(ntb) != NTB_DEF_PEER_CNT)
 		dev_warn(&ntb->dev, "Multi-port NTB devices unsupported\n");
 
diff --git a/drivers/ntb/test/ntb_tool.c b/drivers/ntb/test/ntb_tool.c
index 690862d90411..cb692473d457 100644
--- a/drivers/ntb/test/ntb_tool.c
+++ b/drivers/ntb/test/ntb_tool.c
@@ -119,7 +119,8 @@ MODULE_VERSION(DRIVER_VERSION);
 MODULE_AUTHOR(DRIVER_AUTHOR);
 MODULE_DESCRIPTION(DRIVER_DESCRIPTION);
 
-#define MAX_MWS 16
+/* It is rare to have hadrware with greater than six MWs */
+#define MAX_MWS	6
 /* Only two-ports devices are supported */
 #define PIDX	NTB_DEF_PEER_IDX
 
@@ -670,28 +671,27 @@ static int tool_setup_mw(struct tool_ctx *tc, int idx, size_t req_size)
 {
 	int rc;
 	struct tool_mw *mw = &tc->mws[idx];
-	phys_addr_t base;
-	resource_size_t size, align, align_size;
+	resource_size_t size, align_addr, align_size;
 	char buf[16];
 
 	if (mw->peer)
 		return 0;
 
-	rc = ntb_mw_get_range(tc->ntb, idx, &base, &size, &align,
-			      &align_size);
+	rc = ntb_mw_get_align(tc->ntb, PIDX, idx, &align_addr,
+				&align_size, &size);
 	if (rc)
 		return rc;
 
 	mw->size = min_t(resource_size_t, req_size, size);
-	mw->size = round_up(mw->size, align);
+	mw->size = round_up(mw->size, align_addr);
 	mw->size = round_up(mw->size, align_size);
 	mw->peer = dma_alloc_coherent(&tc->ntb->pdev->dev, mw->size,
 				      &mw->peer_dma, GFP_KERNEL);
 
-	if (!mw->peer)
+	if (!mw->peer || !IS_ALIGNED(mw->peer_dma, align_addr))
 		return -ENOMEM;
 
-	rc = ntb_mw_set_trans(tc->ntb, idx, mw->peer_dma, mw->size);
+	rc = ntb_mw_set_trans(tc->ntb, PIDX, idx, mw->peer_dma, mw->size);
 	if (rc)
 		goto err_free_dma;
 
@@ -718,7 +718,7 @@ static void tool_free_mw(struct tool_ctx *tc, int idx)
 	struct tool_mw *mw = &tc->mws[idx];
 
 	if (mw->peer) {
-		ntb_mw_clear_trans(tc->ntb, idx);
+		ntb_mw_clear_trans(tc->ntb, PIDX, idx);
 		dma_free_coherent(&tc->ntb->pdev->dev, mw->size,
 				  mw->peer,
 				  mw->peer_dma);
@@ -744,8 +744,9 @@ static ssize_t tool_peer_mw_trans_read(struct file *filep,
 
 	phys_addr_t base;
 	resource_size_t mw_size;
-	resource_size_t align;
+	resource_size_t align_addr;
 	resource_size_t align_size;
+	resource_size_t max_size;
 
 	buf_size = min_t(size_t, size, 512);
 
@@ -753,8 +754,9 @@ static ssize_t tool_peer_mw_trans_read(struct file *filep,
 	if (!buf)
 		return -ENOMEM;
 
-	ntb_mw_get_range(mw->tc->ntb, mw->idx,
-			 &base, &mw_size, &align, &align_size);
+	ntb_mw_get_align(mw->tc->ntb, PIDX, mw->idx,
+			 &align_addr, &align_size, &max_size);
+	ntb_peer_mw_get_addr(mw->tc->ntb, mw->idx, &base, &mw_size);
 
 	off += scnprintf(buf + off, buf_size - off,
 			 "Peer MW %d Information:\n", mw->idx);
@@ -769,12 +771,16 @@ static ssize_t tool_peer_mw_trans_read(struct file *filep,
 
 	off += scnprintf(buf + off, buf_size - off,
 			 "Alignment             \t%lld\n",
-			 (unsigned long long)align);
+			 (unsigned long long)align_addr);
 
 	off += scnprintf(buf + off, buf_size - off,
 			 "Size Alignment        \t%lld\n",
 			 (unsigned long long)align_size);
 
+	off += scnprintf(buf + off, buf_size - off,
+			 "Size Max              \t%lld\n",
+			 (unsigned long long)max_size);
+
 	off += scnprintf(buf + off, buf_size - off,
 			 "Ready                 \t%c\n",
 			 (mw->peer) ? 'Y' : 'N');
@@ -829,8 +835,7 @@ static int tool_init_mw(struct tool_ctx *tc, int idx)
 	phys_addr_t base;
 	int rc;
 
-	rc = ntb_mw_get_range(tc->ntb, idx, &base, &mw->win_size,
-			      NULL, NULL);
+	rc = ntb_peer_mw_get_addr(tc->ntb, idx, &base, &mw->win_size);
 	if (rc)
 		return rc;
 
@@ -915,6 +920,12 @@ static int tool_probe(struct ntb_client *self, struct ntb_dev *ntb)
 	int rc;
 	int i;
 
+	if (!ntb->ops->mw_set_trans) {
+		dev_dbg(&ntb->dev, "need inbound MW based NTB API\n");
+		rc = -EINVAL;
+		goto err_tc;
+	}
+
 	if (ntb_db_is_unsafe(ntb))
 		dev_dbg(&ntb->dev, "doorbell is unsafe\n");
 
@@ -933,7 +944,7 @@ static int tool_probe(struct ntb_client *self, struct ntb_dev *ntb)
 	tc->ntb = ntb;
 	init_waitqueue_head(&tc->link_wq);
 
-	tc->mw_count = min(ntb_mw_count(tc->ntb), MAX_MWS);
+	tc->mw_count = min(ntb_mw_count(tc->ntb, PIDX), MAX_MWS);
 	for (i = 0; i < tc->mw_count; i++) {
 		rc = tool_init_mw(tc, i);
 		if (rc)
diff --git a/include/linux/ntb.h b/include/linux/ntb.h
index b2b2924f5f43..2ea83f91a236 100644
--- a/include/linux/ntb.h
+++ b/include/linux/ntb.h
@@ -5,6 +5,7 @@
  *   GPL LICENSE SUMMARY
  *
  *   Copyright (C) 2015 EMC Corporation. All Rights Reserved.
+ *   Copyright (C) 2016 T-Platforms. All Rights Reserved.
  *
  *   This program is free software; you can redistribute it and/or modify
  *   it under the terms of version 2 of the GNU General Public License as
@@ -18,6 +19,7 @@
  *   BSD LICENSE
  *
  *   Copyright (C) 2015 EMC Corporation. All Rights Reserved.
+ *   Copyright (C) 2016 T-Platforms. All Rights Reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
  *   modification, are permitted provided that the following conditions
@@ -201,9 +203,13 @@ static inline int ntb_ctx_ops_is_valid(const struct ntb_ctx_ops *ops)
  * @link_enable:	See ntb_link_enable().
  * @link_disable:	See ntb_link_disable().
  * @mw_count:		See ntb_mw_count().
- * @mw_get_range:	See ntb_mw_get_range().
+ * @mw_get_align:	See ntb_mw_get_align().
  * @mw_set_trans:	See ntb_mw_set_trans().
  * @mw_clear_trans:	See ntb_mw_clear_trans().
+ * @peer_mw_count:	See ntb_peer_mw_count().
+ * @peer_mw_get_addr:	See ntb_peer_mw_get_addr().
+ * @peer_mw_set_trans:	See ntb_peer_mw_set_trans().
+ * @peer_mw_clear_trans:See ntb_peer_mw_clear_trans().
  * @db_is_unsafe:	See ntb_db_is_unsafe().
  * @db_valid_mask:	See ntb_db_valid_mask().
  * @db_vector_count:	See ntb_db_vector_count().
@@ -241,13 +247,20 @@ struct ntb_dev_ops {
 			   enum ntb_speed max_speed, enum ntb_width max_width);
 	int (*link_disable)(struct ntb_dev *ntb);
 
-	int (*mw_count)(struct ntb_dev *ntb);
-	int (*mw_get_range)(struct ntb_dev *ntb, int idx,
-			    phys_addr_t *base, resource_size_t *size,
-			resource_size_t *align, resource_size_t *align_size);
-	int (*mw_set_trans)(struct ntb_dev *ntb, int idx,
+	int (*mw_count)(struct ntb_dev *ntb, int pidx);
+	int (*mw_get_align)(struct ntb_dev *ntb, int pidx, int widx,
+			    resource_size_t *addr_align,
+			    resource_size_t *size_align,
+			    resource_size_t *size_max);
+	int (*mw_set_trans)(struct ntb_dev *ntb, int pidx, int widx,
 			    dma_addr_t addr, resource_size_t size);
-	int (*mw_clear_trans)(struct ntb_dev *ntb, int idx);
+	int (*mw_clear_trans)(struct ntb_dev *ntb, int pidx, int widx);
+	int (*peer_mw_count)(struct ntb_dev *ntb);
+	int (*peer_mw_get_addr)(struct ntb_dev *ntb, int widx,
+				phys_addr_t *base, resource_size_t *size);
+	int (*peer_mw_set_trans)(struct ntb_dev *ntb, int pidx, int widx,
+				 u64 addr, resource_size_t size);
+	int (*peer_mw_clear_trans)(struct ntb_dev *ntb, int pidx, int widx);
 
 	int (*db_is_unsafe)(struct ntb_dev *ntb);
 	u64 (*db_valid_mask)(struct ntb_dev *ntb);
@@ -295,9 +308,13 @@ static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops)
 		ops->link_enable			&&
 		ops->link_disable			&&
 		ops->mw_count				&&
-		ops->mw_get_range			&&
-		ops->mw_set_trans			&&
+		ops->mw_get_align			&&
+		(ops->mw_set_trans			||
+		 ops->peer_mw_set_trans)		&&
 		/* ops->mw_clear_trans			&& */
+		ops->peer_mw_count			&&
+		ops->peer_mw_get_addr			&&
+		/* ops->peer_mw_clear_trans		&& */
 
 		/* ops->db_is_unsafe			&& */
 		ops->db_valid_mask			&&
@@ -655,79 +672,180 @@ static inline int ntb_link_disable(struct ntb_dev *ntb)
 }
 
 /**
- * ntb_mw_count() - get the number of memory windows
+ * ntb_mw_count() - get the number of inbound memory windows, which could
+ *                  be created for a specified peer device
  * @ntb:	NTB device context.
+ * @pidx:	Port index of peer device.
  *
  * Hardware and topology may support a different number of memory windows.
+ * Moreover different peer devices can support different number of memory
+ * windows. Simply speaking this method returns the number of possible inbound
+ * memory windows to share with specified peer device.
  *
  * Return: the number of memory windows.
  */
-static inline int ntb_mw_count(struct ntb_dev *ntb)
+static inline int ntb_mw_count(struct ntb_dev *ntb, int pidx)
 {
-	return ntb->ops->mw_count(ntb);
+	return ntb->ops->mw_count(ntb, pidx);
 }
 
 /**
- * ntb_mw_get_range() - get the range of a memory window
+ * ntb_mw_get_align() - get the restriction parameters of inbound memory window
  * @ntb:	NTB device context.
- * @idx:	Memory window number.
- * @base:	OUT - the base address for mapping the memory window
- * @size:	OUT - the size for mapping the memory window
- * @align:	OUT - the base alignment for translating the memory window
- * @align_size:	OUT - the size alignment for translating the memory window
- *
- * Get the range of a memory window.  NULL may be given for any output
- * parameter if the value is not needed.  The base and size may be used for
- * mapping the memory window, to access the peer memory.  The alignment and
- * size may be used for translating the memory window, for the peer to access
- * memory on the local system.
- *
- * Return: Zero on success, otherwise an error number.
+ * @pidx:	Port index of peer device.
+ * @widx:	Memory window index.
+ * @addr_align:	OUT - the base alignment for translating the memory window
+ * @size_align:	OUT - the size alignment for translating the memory window
+ * @size_max:	OUT - the maximum size of the memory window
+ *
+ * Get the alignments of an inbound memory window with specified index.
+ * NULL may be given for any output parameter if the value is not needed.
+ * The alignment and size parameters may be used for allocation of proper
+ * shared memory.
+ *
+ * Return: Zero on success, otherwise a negative error number.
  */
-static inline int ntb_mw_get_range(struct ntb_dev *ntb, int idx,
-				   phys_addr_t *base, resource_size_t *size,
-		resource_size_t *align, resource_size_t *align_size)
+static inline int ntb_mw_get_align(struct ntb_dev *ntb, int pidx, int widx,
+				   resource_size_t *addr_align,
+				   resource_size_t *size_align,
+				   resource_size_t *size_max)
 {
-	return ntb->ops->mw_get_range(ntb, idx, base, size,
-			align, align_size);
+	return ntb->ops->mw_get_align(ntb, pidx, widx, addr_align, size_align,
+				      size_max);
 }
 
 /**
- * ntb_mw_set_trans() - set the translation of a memory window
+ * ntb_mw_set_trans() - set the translation of an inbound memory window
  * @ntb:	NTB device context.
- * @idx:	Memory window number.
- * @addr:	The dma address local memory to expose to the peer.
+ * @pidx:	Port index of peer device.
+ * @widx:	Memory window index.
+ * @addr:	The dma address of local memory to expose to the peer.
  * @size:	The size of the local memory to expose to the peer.
  *
  * Set the translation of a memory window.  The peer may access local memory
  * through the window starting at the address, up to the size.  The address
- * must be aligned to the alignment specified by ntb_mw_get_range().  The size
- * must be aligned to the size alignment specified by ntb_mw_get_range().
+ * and size must be aligned in compliance with restrictions of
+ * ntb_mw_get_align(). The region size should not exceed the size_max parameter
+ * of that method.
+ *
+ * This method may not be implemented due to the hardware specific memory
+ * windows interface.
  *
  * Return: Zero on success, otherwise an error number.
  */
-static inline int ntb_mw_set_trans(struct ntb_dev *ntb, int idx,
+static inline int ntb_mw_set_trans(struct ntb_dev *ntb, int pidx, int widx,
 				   dma_addr_t addr, resource_size_t size)
 {
-	return ntb->ops->mw_set_trans(ntb, idx, addr, size);
+	if (!ntb->ops->mw_set_trans)
+		return 0;
+
+	return ntb->ops->mw_set_trans(ntb, pidx, widx, addr, size);
 }
 
 /**
- * ntb_mw_clear_trans() - clear the translation of a memory window
+ * ntb_mw_clear_trans() - clear the translation address of an inbound memory
+ *                        window
  * @ntb:	NTB device context.
- * @idx:	Memory window number.
+ * @pidx:	Port index of peer device.
+ * @widx:	Memory window index.
  *
- * Clear the translation of a memory window.  The peer may no longer access
- * local memory through the window.
+ * Clear the translation of an inbound memory window.  The peer may no longer
+ * access local memory through the window.
  *
  * Return: Zero on success, otherwise an error number.
  */
-static inline int ntb_mw_clear_trans(struct ntb_dev *ntb, int idx)
+static inline int ntb_mw_clear_trans(struct ntb_dev *ntb, int pidx, int widx)
 {
 	if (!ntb->ops->mw_clear_trans)
-		return ntb->ops->mw_set_trans(ntb, idx, 0, 0);
+		return ntb_mw_set_trans(ntb, pidx, widx, 0, 0);
+
+	return ntb->ops->mw_clear_trans(ntb, pidx, widx);
+}
+
+/**
+ * ntb_peer_mw_count() - get the number of outbound memory windows, which could
+ *                       be mapped to access a shared memory
+ * @ntb:	NTB device context.
+ *
+ * Hardware and topology may support a different number of memory windows.
+ * This method returns the number of outbound memory windows supported by
+ * local device.
+ *
+ * Return: the number of memory windows.
+ */
+static inline int ntb_peer_mw_count(struct ntb_dev *ntb)
+{
+	return ntb->ops->peer_mw_count(ntb);
+}
+
+/**
+ * ntb_peer_mw_get_addr() - get map address of an outbound memory window
+ * @ntb:	NTB device context.
+ * @widx:	Memory window index (within ntb_peer_mw_count() return value).
+ * @base:	OUT - the base address of mapping region.
+ * @size:	OUT - the size of mapping region.
+ *
+ * Get base and size of memory region to map.  NULL may be given for any output
+ * parameter if the value is not needed.  The base and size may be used for
+ * mapping the memory window, to access the peer memory.
+ *
+ * Return: Zero on success, otherwise a negative error number.
+ */
+static inline int ntb_peer_mw_get_addr(struct ntb_dev *ntb, int widx,
+				      phys_addr_t *base, resource_size_t *size)
+{
+	return ntb->ops->peer_mw_get_addr(ntb, widx, base, size);
+}
+
+/**
+ * ntb_peer_mw_set_trans() - set a translation address of a memory window
+ *                           retrieved from a peer device
+ * @ntb:	NTB device context.
+ * @pidx:	Port index of peer device the translation address received from.
+ * @widx:	Memory window index.
+ * @addr:	The dma address of the shared memory to access.
+ * @size:	The size of the shared memory to access.
+ *
+ * Set the translation of an outbound memory window.  The local device may
+ * access shared memory allocated by a peer device sent the address.
+ *
+ * This method may not be implemented due to the hardware specific memory
+ * windows interface, so a translation address can be only set on the side,
+ * where shared memory (inbound memory windows) is allocated.
+ *
+ * Return: Zero on success, otherwise an error number.
+ */
+static inline int ntb_peer_mw_set_trans(struct ntb_dev *ntb, int pidx, int widx,
+					u64 addr, resource_size_t size)
+{
+	if (!ntb->ops->peer_mw_set_trans)
+		return 0;
+
+	return ntb->ops->peer_mw_set_trans(ntb, pidx, widx, addr, size);
+}
+
+/**
+ * ntb_peer_mw_clear_trans() - clear the translation address of an outbound
+ *                             memory window
+ * @ntb:	NTB device context.
+ * @pidx:	Port index of peer device.
+ * @widx:	Memory window index.
+ *
+ * Clear the translation of a outbound memory window.  The local device may no
+ * longer access a shared memory through the window.
+ *
+ * This method may not be implemented due to the hardware specific memory
+ * windows interface.
+ *
+ * Return: Zero on success, otherwise an error number.
+ */
+static inline int ntb_peer_mw_clear_trans(struct ntb_dev *ntb, int pidx,
+					  int widx)
+{
+	if (!ntb->ops->peer_mw_clear_trans)
+		return ntb_peer_mw_set_trans(ntb, pidx, widx, 0, 0);
 
-	return ntb->ops->mw_clear_trans(ntb, idx);
+	return ntb->ops->peer_mw_clear_trans(ntb, pidx, widx);
 }
 
 /**
-- 
cgit v1.2.3


From d67288a39584daad11edee9b03d53264ba147453 Mon Sep 17 00:00:00 2001
From: Serge Semin <fancer.lancer@gmail.com>
Date: Wed, 11 Jan 2017 03:13:20 +0300
Subject: NTB: Alter Scratchpads API to support multi-ports devices

Even though there is no any real NTB hardware, which would have both more
than two ports and Scratchpad registers, it is logically correct to have
Scratchpad API accepting a peer port index as well. Intel/AMD drivers utilize
Primary and Secondary topology to split Scratchpad between connected root
devices. Since port-index API introduced, Intel/AMD NTB hardware drivers can
use device port to determine which Scratchpad registers actually belong to
local and peer devices. The same approach can be used if some potential
hardware in future will be multi-port and have some set of Scratchpads.
Here are the brief of changes in the API:
 ntb_spad_count() - return number of Scratchpads per each port
 ntb_peer_spad_addr(pidx, sidx) - address of Scratchpad register of the
peer device with pidx-index
 ntb_peer_spad_read(pidx, sidx) - read specified Scratchpad register of the
peer with pidx-index
 ntb_peer_spad_write(pidx, sidx) - write data to Scratchpad register of the
peer with pidx-index

Since there is hardware which doesn't support Scratchpad registers, the
corresponding API methods are now made optional.

Signed-off-by: Serge Semin <fancer.lancer@gmail.com>
Acked-by: Allen Hubbe <Allen.Hubbe@dell.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
---
 drivers/ntb/hw/amd/ntb_hw_amd.c     | 14 +++----
 drivers/ntb/hw/intel/ntb_hw_intel.c | 14 +++----
 drivers/ntb/ntb_transport.c         | 15 ++++----
 drivers/ntb/test/ntb_perf.c         |  6 +--
 drivers/ntb/test/ntb_pingpong.c     |  8 +++-
 drivers/ntb/test/ntb_tool.c         | 21 +++++++++--
 include/linux/ntb.h                 | 73 +++++++++++++++++++++++--------------
 7 files changed, 94 insertions(+), 57 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.c b/drivers/ntb/hw/amd/ntb_hw_amd.c
index bcefe1df9ce3..891beb9f26c2 100644
--- a/drivers/ntb/hw/amd/ntb_hw_amd.c
+++ b/drivers/ntb/hw/amd/ntb_hw_amd.c
@@ -432,30 +432,30 @@ static int amd_ntb_spad_write(struct ntb_dev *ntb,
 	return 0;
 }
 
-static u32 amd_ntb_peer_spad_read(struct ntb_dev *ntb, int idx)
+static u32 amd_ntb_peer_spad_read(struct ntb_dev *ntb, int pidx, int sidx)
 {
 	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
 	void __iomem *mmio = ndev->self_mmio;
 	u32 offset;
 
-	if (idx < 0 || idx >= ndev->spad_count)
+	if (sidx < 0 || sidx >= ndev->spad_count)
 		return -EINVAL;
 
-	offset = ndev->peer_spad + (idx << 2);
+	offset = ndev->peer_spad + (sidx << 2);
 	return readl(mmio + AMD_SPAD_OFFSET + offset);
 }
 
-static int amd_ntb_peer_spad_write(struct ntb_dev *ntb,
-				   int idx, u32 val)
+static int amd_ntb_peer_spad_write(struct ntb_dev *ntb, int pidx,
+				   int sidx, u32 val)
 {
 	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
 	void __iomem *mmio = ndev->self_mmio;
 	u32 offset;
 
-	if (idx < 0 || idx >= ndev->spad_count)
+	if (sidx < 0 || sidx >= ndev->spad_count)
 		return -EINVAL;
 
-	offset = ndev->peer_spad + (idx << 2);
+	offset = ndev->peer_spad + (sidx << 2);
 	writel(val, mmio + AMD_SPAD_OFFSET + offset);
 
 	return 0;
diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.c b/drivers/ntb/hw/intel/ntb_hw_intel.c
index a9b4ed4d7b52..e00aa39a4544 100644
--- a/drivers/ntb/hw/intel/ntb_hw_intel.c
+++ b/drivers/ntb/hw/intel/ntb_hw_intel.c
@@ -1409,30 +1409,30 @@ static int intel_ntb_spad_write(struct ntb_dev *ntb,
 			       ndev->self_reg->spad);
 }
 
-static int intel_ntb_peer_spad_addr(struct ntb_dev *ntb, int idx,
+static int intel_ntb_peer_spad_addr(struct ntb_dev *ntb, int pidx, int sidx,
 				    phys_addr_t *spad_addr)
 {
 	struct intel_ntb_dev *ndev = ntb_ndev(ntb);
 
-	return ndev_spad_addr(ndev, idx, spad_addr, ndev->peer_addr,
+	return ndev_spad_addr(ndev, sidx, spad_addr, ndev->peer_addr,
 			      ndev->peer_reg->spad);
 }
 
-static u32 intel_ntb_peer_spad_read(struct ntb_dev *ntb, int idx)
+static u32 intel_ntb_peer_spad_read(struct ntb_dev *ntb, int pidx, int sidx)
 {
 	struct intel_ntb_dev *ndev = ntb_ndev(ntb);
 
-	return ndev_spad_read(ndev, idx,
+	return ndev_spad_read(ndev, sidx,
 			      ndev->peer_mmio +
 			      ndev->peer_reg->spad);
 }
 
-static int intel_ntb_peer_spad_write(struct ntb_dev *ntb,
-				     int idx, u32 val)
+static int intel_ntb_peer_spad_write(struct ntb_dev *ntb, int pidx,
+				     int sidx, u32 val)
 {
 	struct intel_ntb_dev *ndev = ntb_ndev(ntb);
 
-	return ndev_spad_write(ndev, idx, val,
+	return ndev_spad_write(ndev, sidx, val,
 			       ndev->peer_mmio +
 			       ndev->peer_reg->spad);
 }
diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c
index 771b469cebf0..9a03c5871efe 100644
--- a/drivers/ntb/ntb_transport.c
+++ b/drivers/ntb/ntb_transport.c
@@ -862,17 +862,17 @@ static void ntb_transport_link_work(struct work_struct *work)
 			size = max_mw_size;
 
 		spad = MW0_SZ_HIGH + (i * 2);
-		ntb_peer_spad_write(ndev, spad, upper_32_bits(size));
+		ntb_peer_spad_write(ndev, PIDX, spad, upper_32_bits(size));
 
 		spad = MW0_SZ_LOW + (i * 2);
-		ntb_peer_spad_write(ndev, spad, lower_32_bits(size));
+		ntb_peer_spad_write(ndev, PIDX, spad, lower_32_bits(size));
 	}
 
-	ntb_peer_spad_write(ndev, NUM_MWS, nt->mw_count);
+	ntb_peer_spad_write(ndev, PIDX, NUM_MWS, nt->mw_count);
 
-	ntb_peer_spad_write(ndev, NUM_QPS, nt->qp_count);
+	ntb_peer_spad_write(ndev, PIDX, NUM_QPS, nt->qp_count);
 
-	ntb_peer_spad_write(ndev, VERSION, NTB_TRANSPORT_VERSION);
+	ntb_peer_spad_write(ndev, PIDX, VERSION, NTB_TRANSPORT_VERSION);
 
 	/* Query the remote side for its info */
 	val = ntb_spad_read(ndev, VERSION);
@@ -948,7 +948,7 @@ static void ntb_qp_link_work(struct work_struct *work)
 
 	val = ntb_spad_read(nt->ndev, QP_LINKS);
 
-	ntb_peer_spad_write(nt->ndev, QP_LINKS, val | BIT(qp->qp_num));
+	ntb_peer_spad_write(nt->ndev, PIDX, QP_LINKS, val | BIT(qp->qp_num));
 
 	/* query remote spad for qp ready bits */
 	dev_dbg_ratelimited(&pdev->dev, "Remote QP link status = %x\n", val);
@@ -2108,8 +2108,7 @@ void ntb_transport_link_down(struct ntb_transport_qp *qp)
 
 	val = ntb_spad_read(qp->ndev, QP_LINKS);
 
-	ntb_peer_spad_write(qp->ndev, QP_LINKS,
-			    val & ~BIT(qp->qp_num));
+	ntb_peer_spad_write(qp->ndev, PIDX, QP_LINKS, val & ~BIT(qp->qp_num));
 
 	if (qp->link_is_up)
 		ntb_send_link_down(qp);
diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c
index 7f89cceaf350..42756a98a728 100644
--- a/drivers/ntb/test/ntb_perf.c
+++ b/drivers/ntb/test/ntb_perf.c
@@ -518,9 +518,9 @@ static void perf_link_work(struct work_struct *work)
 	if (max_mw_size && size > max_mw_size)
 		size = max_mw_size;
 
-	ntb_peer_spad_write(ndev, MW_SZ_HIGH, upper_32_bits(size));
-	ntb_peer_spad_write(ndev, MW_SZ_LOW, lower_32_bits(size));
-	ntb_peer_spad_write(ndev, VERSION, PERF_VERSION);
+	ntb_peer_spad_write(ndev, PIDX, MW_SZ_HIGH, upper_32_bits(size));
+	ntb_peer_spad_write(ndev, PIDX, MW_SZ_LOW, lower_32_bits(size));
+	ntb_peer_spad_write(ndev, PIDX, VERSION, PERF_VERSION);
 
 	/* now read what peer wrote */
 	val = ntb_spad_read(ndev, VERSION);
diff --git a/drivers/ntb/test/ntb_pingpong.c b/drivers/ntb/test/ntb_pingpong.c
index 12f8b40cb11a..938a18bcfc3f 100644
--- a/drivers/ntb/test/ntb_pingpong.c
+++ b/drivers/ntb/test/ntb_pingpong.c
@@ -138,7 +138,7 @@ static void pp_ping(unsigned long ctx)
 			"Ping bits %#llx read %#x write %#x\n",
 			db_bits, spad_rd, spad_wr);
 
-		ntb_peer_spad_write(pp->ntb, 0, spad_wr);
+		ntb_peer_spad_write(pp->ntb, PIDX, 0, spad_wr);
 		ntb_peer_db_set(pp->ntb, db_bits);
 		ntb_db_clear_mask(pp->ntb, db_mask);
 
@@ -225,6 +225,12 @@ static int pp_probe(struct ntb_client *client,
 		}
 	}
 
+	if (ntb_spad_count(ntb) < 1) {
+		dev_dbg(&ntb->dev, "no enough scratchpads\n");
+		rc = -EINVAL;
+		goto err_pp;
+	}
+
 	if (ntb_spad_is_unsafe(ntb)) {
 		dev_dbg(&ntb->dev, "scratchpad is unsafe\n");
 		if (!unsafe) {
diff --git a/drivers/ntb/test/ntb_tool.c b/drivers/ntb/test/ntb_tool.c
index cb692473d457..f002bf48a08d 100644
--- a/drivers/ntb/test/ntb_tool.c
+++ b/drivers/ntb/test/ntb_tool.c
@@ -462,13 +462,22 @@ static TOOL_FOPS_RDWR(tool_spad_fops,
 		      tool_spad_read,
 		      tool_spad_write);
 
+static u32 ntb_tool_peer_spad_read(struct ntb_dev *ntb, int sidx)
+{
+	return ntb_peer_spad_read(ntb, PIDX, sidx);
+}
+
 static ssize_t tool_peer_spad_read(struct file *filep, char __user *ubuf,
 				   size_t size, loff_t *offp)
 {
 	struct tool_ctx *tc = filep->private_data;
 
-	return tool_spadfn_read(tc, ubuf, size, offp,
-				tc->ntb->ops->peer_spad_read);
+	return tool_spadfn_read(tc, ubuf, size, offp, ntb_tool_peer_spad_read);
+}
+
+static int ntb_tool_peer_spad_write(struct ntb_dev *ntb, int sidx, u32 val)
+{
+	return ntb_peer_spad_write(ntb, PIDX, sidx, val);
 }
 
 static ssize_t tool_peer_spad_write(struct file *filep, const char __user *ubuf,
@@ -477,7 +486,7 @@ static ssize_t tool_peer_spad_write(struct file *filep, const char __user *ubuf,
 	struct tool_ctx *tc = filep->private_data;
 
 	return tool_spadfn_write(tc, ubuf, size, offp,
-				 tc->ntb->ops->peer_spad_write);
+				 ntb_tool_peer_spad_write);
 }
 
 static TOOL_FOPS_RDWR(tool_peer_spad_fops,
@@ -926,6 +935,12 @@ static int tool_probe(struct ntb_client *self, struct ntb_dev *ntb)
 		goto err_tc;
 	}
 
+	if (ntb_spad_count(ntb) < 1) {
+		dev_dbg(&ntb->dev, "no enough scratchpads\n");
+		rc = -EINVAL;
+		goto err_tc;
+	}
+
 	if (ntb_db_is_unsafe(ntb))
 		dev_dbg(&ntb->dev, "doorbell is unsafe\n");
 
diff --git a/include/linux/ntb.h b/include/linux/ntb.h
index 2ea83f91a236..4e3cd56af732 100644
--- a/include/linux/ntb.h
+++ b/include/linux/ntb.h
@@ -288,13 +288,14 @@ struct ntb_dev_ops {
 	int (*spad_is_unsafe)(struct ntb_dev *ntb);
 	int (*spad_count)(struct ntb_dev *ntb);
 
-	u32 (*spad_read)(struct ntb_dev *ntb, int idx);
-	int (*spad_write)(struct ntb_dev *ntb, int idx, u32 val);
+	u32 (*spad_read)(struct ntb_dev *ntb, int sidx);
+	int (*spad_write)(struct ntb_dev *ntb, int sidx, u32 val);
 
-	int (*peer_spad_addr)(struct ntb_dev *ntb, int idx,
+	int (*peer_spad_addr)(struct ntb_dev *ntb, int pidx, int sidx,
 			      phys_addr_t *spad_addr);
-	u32 (*peer_spad_read)(struct ntb_dev *ntb, int idx);
-	int (*peer_spad_write)(struct ntb_dev *ntb, int idx, u32 val);
+	u32 (*peer_spad_read)(struct ntb_dev *ntb, int pidx, int sidx);
+	int (*peer_spad_write)(struct ntb_dev *ntb, int pidx, int sidx,
+			       u32 val);
 };
 
 static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops)
@@ -335,13 +336,12 @@ static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops)
 		/* ops->peer_db_read_mask		&& */
 		/* ops->peer_db_set_mask		&& */
 		/* ops->peer_db_clear_mask		&& */
-		/* ops->spad_is_unsafe			&& */
-		ops->spad_count				&&
-		ops->spad_read				&&
-		ops->spad_write				&&
-		/* ops->peer_spad_addr			&& */
-		/* ops->peer_spad_read			&& */
-		ops->peer_spad_write			&&
+		/* !ops->spad_is_unsafe == !ops->spad_count	&& */
+		!ops->spad_read == !ops->spad_count		&&
+		!ops->spad_write == !ops->spad_count		&&
+		/* !ops->peer_spad_addr == !ops->spad_count	&& */
+		/* !ops->peer_spad_read == !ops->spad_count	&& */
+		!ops->peer_spad_write == !ops->spad_count	&&
 		1;
 }
 
@@ -1176,47 +1176,58 @@ static inline int ntb_spad_is_unsafe(struct ntb_dev *ntb)
  * @ntb:	NTB device context.
  *
  * Hardware and topology may support a different number of scratchpads.
+ * Although it must be the same for all ports per NTB device.
  *
  * Return: the number of scratchpads.
  */
 static inline int ntb_spad_count(struct ntb_dev *ntb)
 {
+	if (!ntb->ops->spad_count)
+		return 0;
+
 	return ntb->ops->spad_count(ntb);
 }
 
 /**
  * ntb_spad_read() - read the local scratchpad register
  * @ntb:	NTB device context.
- * @idx:	Scratchpad index.
+ * @sidx:	Scratchpad index.
  *
  * Read the local scratchpad register, and return the value.
  *
  * Return: The value of the local scratchpad register.
  */
-static inline u32 ntb_spad_read(struct ntb_dev *ntb, int idx)
+static inline u32 ntb_spad_read(struct ntb_dev *ntb, int sidx)
 {
-	return ntb->ops->spad_read(ntb, idx);
+	if (!ntb->ops->spad_read)
+		return ~(u32)0;
+
+	return ntb->ops->spad_read(ntb, sidx);
 }
 
 /**
  * ntb_spad_write() - write the local scratchpad register
  * @ntb:	NTB device context.
- * @idx:	Scratchpad index.
+ * @sidx:	Scratchpad index.
  * @val:	Scratchpad value.
  *
  * Write the value to the local scratchpad register.
  *
  * Return: Zero on success, otherwise an error number.
  */
-static inline int ntb_spad_write(struct ntb_dev *ntb, int idx, u32 val)
+static inline int ntb_spad_write(struct ntb_dev *ntb, int sidx, u32 val)
 {
-	return ntb->ops->spad_write(ntb, idx, val);
+	if (!ntb->ops->spad_write)
+		return -EINVAL;
+
+	return ntb->ops->spad_write(ntb, sidx, val);
 }
 
 /**
  * ntb_peer_spad_addr() - address of the peer scratchpad register
  * @ntb:	NTB device context.
- * @idx:	Scratchpad index.
+ * @pidx:	Port index of peer device.
+ * @sidx:	Scratchpad index.
  * @spad_addr:	OUT - The address of the peer scratchpad register.
  *
  * Return the address of the peer doorbell register.  This may be used, for
@@ -1224,45 +1235,51 @@ static inline int ntb_spad_write(struct ntb_dev *ntb, int idx, u32 val)
  *
  * Return: Zero on success, otherwise an error number.
  */
-static inline int ntb_peer_spad_addr(struct ntb_dev *ntb, int idx,
+static inline int ntb_peer_spad_addr(struct ntb_dev *ntb, int pidx, int sidx,
 				     phys_addr_t *spad_addr)
 {
 	if (!ntb->ops->peer_spad_addr)
 		return -EINVAL;
 
-	return ntb->ops->peer_spad_addr(ntb, idx, spad_addr);
+	return ntb->ops->peer_spad_addr(ntb, pidx, sidx, spad_addr);
 }
 
 /**
  * ntb_peer_spad_read() - read the peer scratchpad register
  * @ntb:	NTB device context.
- * @idx:	Scratchpad index.
+ * @pidx:	Port index of peer device.
+ * @sidx:	Scratchpad index.
  *
  * Read the peer scratchpad register, and return the value.
  *
  * Return: The value of the local scratchpad register.
  */
-static inline u32 ntb_peer_spad_read(struct ntb_dev *ntb, int idx)
+static inline u32 ntb_peer_spad_read(struct ntb_dev *ntb, int pidx, int sidx)
 {
 	if (!ntb->ops->peer_spad_read)
-		return 0;
+		return ~(u32)0;
 
-	return ntb->ops->peer_spad_read(ntb, idx);
+	return ntb->ops->peer_spad_read(ntb, pidx, sidx);
 }
 
 /**
  * ntb_peer_spad_write() - write the peer scratchpad register
  * @ntb:	NTB device context.
- * @idx:	Scratchpad index.
+ * @pidx:	Port index of peer device.
+ * @sidx:	Scratchpad index.
  * @val:	Scratchpad value.
  *
  * Write the value to the peer scratchpad register.
  *
  * Return: Zero on success, otherwise an error number.
  */
-static inline int ntb_peer_spad_write(struct ntb_dev *ntb, int idx, u32 val)
+static inline int ntb_peer_spad_write(struct ntb_dev *ntb, int pidx, int sidx,
+				      u32 val)
 {
-	return ntb->ops->peer_spad_write(ntb, idx, val);
+	if (!ntb->ops->peer_spad_write)
+		return -EINVAL;
+
+	return ntb->ops->peer_spad_write(ntb, pidx, sidx, val);
 }
 
 #endif
-- 
cgit v1.2.3


From bc3e49adc279c5505d6df8dd8c7fca45d6d3d21a Mon Sep 17 00:00:00 2001
From: Serge Semin <fancer.lancer@gmail.com>
Date: Tue, 20 Dec 2016 12:48:20 +0300
Subject: NTB: Add Messaging NTB API

Some IDT NTB-capable PCIe-switches have message registers to communicate with
peer devices. This patch adds new NTB API callback methods, which can be used
to utilize these registers functionality:
 ntb_msg_count(); - get number of message registers
 ntb_msg_inbits(); - get bitfield of inbound message registers status
 ntb_msg_outbits(); - get bitfield of outbound message registers status
 ntb_msg_read_sts(); - read the inbound and outbound message registers status
 ntb_msg_clear_sts(); - clear status bits of message registers
 ntb_msg_set_mask(); - mask interrupts raised by status bits of message
registers.
 ntb_msg_clear_mask(); - clear interrupts mask bits of message registers
 ntb_msg_read(midx, *pidx); - read message register with specified index,
additionally getting peer port index which data received from
 ntb_msg_write(midx, pidx); - write data to the specified message register
sending it to the passed peer device connected over a pidx port
 ntb_msg_event(); - notify driver context of a new message event

Of course there is hardware which doesn't support Message registers, so
this API is made optional.

Signed-off-by: Serge Semin <fancer.lancer@gmail.com>
Acked-by: Allen Hubbe <Allen.Hubbe@dell.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
---
 drivers/ntb/ntb.c   |  13 ++++
 include/linux/ntb.h | 205 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 218 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/ntb/ntb.c b/drivers/ntb/ntb.c
index 2551bb2ff4a4..03b80d89b980 100644
--- a/drivers/ntb/ntb.c
+++ b/drivers/ntb/ntb.c
@@ -193,6 +193,19 @@ void ntb_db_event(struct ntb_dev *ntb, int vector)
 }
 EXPORT_SYMBOL(ntb_db_event);
 
+void ntb_msg_event(struct ntb_dev *ntb)
+{
+	unsigned long irqflags;
+
+	spin_lock_irqsave(&ntb->ctx_lock, irqflags);
+	{
+		if (ntb->ctx_ops && ntb->ctx_ops->msg_event)
+			ntb->ctx_ops->msg_event(ntb->ctx);
+	}
+	spin_unlock_irqrestore(&ntb->ctx_lock, irqflags);
+}
+EXPORT_SYMBOL(ntb_msg_event);
+
 int ntb_default_port_number(struct ntb_dev *ntb)
 {
 	switch (ntb->topo) {
diff --git a/include/linux/ntb.h b/include/linux/ntb.h
index 4e3cd56af732..d59688f91618 100644
--- a/include/linux/ntb.h
+++ b/include/linux/ntb.h
@@ -178,10 +178,12 @@ static inline int ntb_client_ops_is_valid(const struct ntb_client_ops *ops)
  * struct ntb_ctx_ops - ntb driver context operations
  * @link_event:		See ntb_link_event().
  * @db_event:		See ntb_db_event().
+ * @msg_event:		See ntb_msg_event().
  */
 struct ntb_ctx_ops {
 	void (*link_event)(void *ctx);
 	void (*db_event)(void *ctx, int db_vector);
+	void (*msg_event)(void *ctx);
 };
 
 static inline int ntb_ctx_ops_is_valid(const struct ntb_ctx_ops *ops)
@@ -190,6 +192,7 @@ static inline int ntb_ctx_ops_is_valid(const struct ntb_ctx_ops *ops)
 	return
 		/* ops->link_event		&& */
 		/* ops->db_event		&& */
+		/* ops->msg_event		&& */
 		1;
 }
 
@@ -234,6 +237,15 @@ static inline int ntb_ctx_ops_is_valid(const struct ntb_ctx_ops *ops)
  * @peer_spad_addr:	See ntb_peer_spad_addr().
  * @peer_spad_read:	See ntb_peer_spad_read().
  * @peer_spad_write:	See ntb_peer_spad_write().
+ * @msg_count:		See ntb_msg_count().
+ * @msg_inbits:		See ntb_msg_inbits().
+ * @msg_outbits:	See ntb_msg_outbits().
+ * @msg_read_sts:	See ntb_msg_read_sts().
+ * @msg_clear_sts:	See ntb_msg_clear_sts().
+ * @msg_set_mask:	See ntb_msg_set_mask().
+ * @msg_clear_mask:	See ntb_msg_clear_mask().
+ * @msg_read:		See ntb_msg_read().
+ * @msg_write:		See ntb_msg_write().
  */
 struct ntb_dev_ops {
 	int (*port_number)(struct ntb_dev *ntb);
@@ -296,6 +308,16 @@ struct ntb_dev_ops {
 	u32 (*peer_spad_read)(struct ntb_dev *ntb, int pidx, int sidx);
 	int (*peer_spad_write)(struct ntb_dev *ntb, int pidx, int sidx,
 			       u32 val);
+
+	int (*msg_count)(struct ntb_dev *ntb);
+	u64 (*msg_inbits)(struct ntb_dev *ntb);
+	u64 (*msg_outbits)(struct ntb_dev *ntb);
+	u64 (*msg_read_sts)(struct ntb_dev *ntb);
+	int (*msg_clear_sts)(struct ntb_dev *ntb, u64 sts_bits);
+	int (*msg_set_mask)(struct ntb_dev *ntb, u64 mask_bits);
+	int (*msg_clear_mask)(struct ntb_dev *ntb, u64 mask_bits);
+	int (*msg_read)(struct ntb_dev *ntb, int midx, int *pidx, u32 *msg);
+	int (*msg_write)(struct ntb_dev *ntb, int midx, int pidx, u32 msg);
 };
 
 static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops)
@@ -342,6 +364,15 @@ static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops)
 		/* !ops->peer_spad_addr == !ops->spad_count	&& */
 		/* !ops->peer_spad_read == !ops->spad_count	&& */
 		!ops->peer_spad_write == !ops->spad_count	&&
+
+		!ops->msg_inbits == !ops->msg_count		&&
+		!ops->msg_outbits == !ops->msg_count		&&
+		!ops->msg_read_sts == !ops->msg_count		&&
+		!ops->msg_clear_sts == !ops->msg_count		&&
+		/* !ops->msg_set_mask == !ops->msg_count	&& */
+		/* !ops->msg_clear_mask == !ops->msg_count	&& */
+		!ops->msg_read == !ops->msg_count		&&
+		!ops->msg_write == !ops->msg_count		&&
 		1;
 }
 
@@ -484,6 +515,18 @@ void ntb_link_event(struct ntb_dev *ntb);
  */
 void ntb_db_event(struct ntb_dev *ntb, int vector);
 
+/**
+ * ntb_msg_event() - notify driver context of a message event
+ * @ntb:	NTB device context.
+ *
+ * Notify the driver context of a message event.  If hardware supports
+ * message registers, this event indicates, that a new message arrived in
+ * some incoming message register or last sent message couldn't be delivered.
+ * The events can be masked/unmasked by the methods ntb_msg_set_mask() and
+ * ntb_msg_clear_mask().
+ */
+void ntb_msg_event(struct ntb_dev *ntb);
+
 /**
  * ntb_default_port_number() - get the default local port number
  * @ntb:	NTB device context.
@@ -1282,4 +1325,166 @@ static inline int ntb_peer_spad_write(struct ntb_dev *ntb, int pidx, int sidx,
 	return ntb->ops->peer_spad_write(ntb, pidx, sidx, val);
 }
 
+/**
+ * ntb_msg_count() - get the number of message registers
+ * @ntb:	NTB device context.
+ *
+ * Hardware may support a different number of message registers.
+ *
+ * Return: the number of message registers.
+ */
+static inline int ntb_msg_count(struct ntb_dev *ntb)
+{
+	if (!ntb->ops->msg_count)
+		return 0;
+
+	return ntb->ops->msg_count(ntb);
+}
+
+/**
+ * ntb_msg_inbits() - get a bitfield of inbound message registers status
+ * @ntb:	NTB device context.
+ *
+ * The method returns the bitfield of status and mask registers, which related
+ * to inbound message registers.
+ *
+ * Return: bitfield of inbound message registers.
+ */
+static inline u64 ntb_msg_inbits(struct ntb_dev *ntb)
+{
+	if (!ntb->ops->msg_inbits)
+		return 0;
+
+	return ntb->ops->msg_inbits(ntb);
+}
+
+/**
+ * ntb_msg_outbits() - get a bitfield of outbound message registers status
+ * @ntb:	NTB device context.
+ *
+ * The method returns the bitfield of status and mask registers, which related
+ * to outbound message registers.
+ *
+ * Return: bitfield of outbound message registers.
+ */
+static inline u64 ntb_msg_outbits(struct ntb_dev *ntb)
+{
+	if (!ntb->ops->msg_outbits)
+		return 0;
+
+	return ntb->ops->msg_outbits(ntb);
+}
+
+/**
+ * ntb_msg_read_sts() - read the message registers status
+ * @ntb:	NTB device context.
+ *
+ * Read the status of message register. Inbound and outbound message registers
+ * related bits can be filtered by masks retrieved from ntb_msg_inbits() and
+ * ntb_msg_outbits().
+ *
+ * Return: status bits of message registers
+ */
+static inline u64 ntb_msg_read_sts(struct ntb_dev *ntb)
+{
+	if (!ntb->ops->msg_read_sts)
+		return 0;
+
+	return ntb->ops->msg_read_sts(ntb);
+}
+
+/**
+ * ntb_msg_clear_sts() - clear status bits of message registers
+ * @ntb:	NTB device context.
+ * @sts_bits:	Status bits to clear.
+ *
+ * Clear bits in the status register.
+ *
+ * Return: Zero on success, otherwise a negative error number.
+ */
+static inline int ntb_msg_clear_sts(struct ntb_dev *ntb, u64 sts_bits)
+{
+	if (!ntb->ops->msg_clear_sts)
+		return -EINVAL;
+
+	return ntb->ops->msg_clear_sts(ntb, sts_bits);
+}
+
+/**
+ * ntb_msg_set_mask() - set mask of message register status bits
+ * @ntb:	NTB device context.
+ * @mask_bits:	Mask bits.
+ *
+ * Mask the message registers status bits from raising the message event.
+ *
+ * Return: Zero on success, otherwise a negative error number.
+ */
+static inline int ntb_msg_set_mask(struct ntb_dev *ntb, u64 mask_bits)
+{
+	if (!ntb->ops->msg_set_mask)
+		return -EINVAL;
+
+	return ntb->ops->msg_set_mask(ntb, mask_bits);
+}
+
+/**
+ * ntb_msg_clear_mask() - clear message registers mask
+ * @ntb:	NTB device context.
+ * @mask_bits:	Mask bits to clear.
+ *
+ * Clear bits in the message events mask register.
+ *
+ * Return: Zero on success, otherwise a negative error number.
+ */
+static inline int ntb_msg_clear_mask(struct ntb_dev *ntb, u64 mask_bits)
+{
+	if (!ntb->ops->msg_clear_mask)
+		return -EINVAL;
+
+	return ntb->ops->msg_clear_mask(ntb, mask_bits);
+}
+
+/**
+ * ntb_msg_read() - read message register with specified index
+ * @ntb:	NTB device context.
+ * @midx:	Message register index
+ * @pidx:	OUT - Port index of peer device a message retrieved from
+ * @msg:	OUT - Data
+ *
+ * Read data from the specified message register. Source port index of a
+ * message is retrieved as well.
+ *
+ * Return: Zero on success, otherwise a negative error number.
+ */
+static inline int ntb_msg_read(struct ntb_dev *ntb, int midx, int *pidx,
+			       u32 *msg)
+{
+	if (!ntb->ops->msg_read)
+		return -EINVAL;
+
+	return ntb->ops->msg_read(ntb, midx, pidx, msg);
+}
+
+/**
+ * ntb_msg_write() - write data to the specified message register
+ * @ntb:	NTB device context.
+ * @midx:	Message register index
+ * @pidx:	Port index of peer device a message being sent to
+ * @msg:	Data to send
+ *
+ * Send data to a specified peer device using the defined message register.
+ * Message event can be raised if the midx registers isn't empty while
+ * calling this method and the corresponding interrupt isn't masked.
+ *
+ * Return: Zero on success, otherwise a negative error number.
+ */
+static inline int ntb_msg_write(struct ntb_dev *ntb, int midx, int pidx,
+				u32 msg)
+{
+	if (!ntb->ops->msg_write)
+		return -EINVAL;
+
+	return ntb->ops->msg_write(ntb, midx, pidx, msg);
+}
+
 #endif
-- 
cgit v1.2.3


From 85dce3aaae98a8440f4a1a2404bcbab890574b46 Mon Sep 17 00:00:00 2001
From: Serge Semin <fancer.lancer@gmail.com>
Date: Wed, 14 Dec 2016 02:49:20 +0300
Subject: NTB: Add PCIe Gen4 link speed

Signed-off-by: Serge Semin <fancer.lancer@gmail.com>
Acked-by: Allen Hubbe <Allen.Hubbe@dell.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
---
 include/linux/ntb.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ntb.h b/include/linux/ntb.h
index d59688f91618..a6f569727845 100644
--- a/include/linux/ntb.h
+++ b/include/linux/ntb.h
@@ -108,6 +108,7 @@ static inline char *ntb_topo_string(enum ntb_topo topo)
  * @NTB_SPEED_GEN1:	Link is trained to gen1 speed.
  * @NTB_SPEED_GEN2:	Link is trained to gen2 speed.
  * @NTB_SPEED_GEN3:	Link is trained to gen3 speed.
+ * @NTB_SPEED_GEN4:	Link is trained to gen4 speed.
  */
 enum ntb_speed {
 	NTB_SPEED_AUTO = -1,
@@ -115,6 +116,7 @@ enum ntb_speed {
 	NTB_SPEED_GEN1 = 1,
 	NTB_SPEED_GEN2 = 2,
 	NTB_SPEED_GEN3 = 3,
+	NTB_SPEED_GEN4 = 4
 };
 
 /**
-- 
cgit v1.2.3


From 3c69f5d6731c43a5b6b9e78b385948e8d76460be Mon Sep 17 00:00:00 2001
From: Serge Semin <fancer.lancer@gmail.com>
Date: Tue, 20 Dec 2016 12:50:09 +0300
Subject: NTB: Add ntb.h comments

Signed-off-by: Serge Semin <fancer.lancer@gmail.com>
Acked-by: Allen Hubbe <Allen.Hubbe@dell.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
---
 include/linux/ntb.h | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ntb.h b/include/linux/ntb.h
index a6f569727845..609e232c00da 100644
--- a/include/linux/ntb.h
+++ b/include/linux/ntb.h
@@ -326,12 +326,17 @@ static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops)
 {
 	/* commented callbacks are not required: */
 	return
+		/* Port operations are required for multiport devices */
 		!ops->peer_port_count == !ops->port_number	&&
 		!ops->peer_port_number == !ops->port_number	&&
 		!ops->peer_port_idx == !ops->port_number	&&
+
+		/* Link operations are required */
 		ops->link_is_up				&&
 		ops->link_enable			&&
 		ops->link_disable			&&
+
+		/* One or both MW interfaces should be developed */
 		ops->mw_count				&&
 		ops->mw_get_align			&&
 		(ops->mw_set_trans			||
@@ -341,12 +346,11 @@ static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops)
 		ops->peer_mw_get_addr			&&
 		/* ops->peer_mw_clear_trans		&& */
 
+		/* Doorbell operations are mostly required */
 		/* ops->db_is_unsafe			&& */
 		ops->db_valid_mask			&&
-
 		/* both set, or both unset */
-		(!ops->db_vector_count == !ops->db_vector_mask) &&
-
+		(!ops->db_vector_count == !ops->db_vector_mask)	&&
 		ops->db_read				&&
 		/* ops->db_set				&& */
 		ops->db_clear				&&
@@ -360,6 +364,8 @@ static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops)
 		/* ops->peer_db_read_mask		&& */
 		/* ops->peer_db_set_mask		&& */
 		/* ops->peer_db_clear_mask		&& */
+
+		/* Scrachpads interface is optional */
 		/* !ops->spad_is_unsafe == !ops->spad_count	&& */
 		!ops->spad_read == !ops->spad_count		&&
 		!ops->spad_write == !ops->spad_count		&&
@@ -367,6 +373,7 @@ static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops)
 		/* !ops->peer_spad_read == !ops->spad_count	&& */
 		!ops->peer_spad_write == !ops->spad_count	&&
 
+		/* Messaging interface is optional */
 		!ops->msg_inbits == !ops->msg_count		&&
 		!ops->msg_outbits == !ops->msg_count		&&
 		!ops->msg_read_sts == !ops->msg_count		&&
@@ -387,13 +394,12 @@ struct ntb_client {
 	struct device_driver		drv;
 	const struct ntb_client_ops	ops;
 };
-
 #define drv_ntb_client(__drv) container_of((__drv), struct ntb_client, drv)
 
 /**
  * struct ntb_device - ntb device
  * @dev:		Linux device object.
- * @pdev:		Pci device entry of the ntb.
+ * @pdev:		PCI device entry of the ntb.
  * @topo:		Detected topology of the ntb.
  * @ops:		See &ntb_dev_ops.
  * @ctx:		See &ntb_ctx_ops.
@@ -414,7 +420,6 @@ struct ntb_dev {
 	/* block unregister until device is fully released */
 	struct completion		released;
 };
-
 #define dev_ntb(__dev) container_of((__dev), struct ntb_dev, dev)
 
 /**
@@ -511,7 +516,7 @@ void ntb_link_event(struct ntb_dev *ntb);
  * multiple interrupt vectors for doorbells, the vector number indicates which
  * vector received the interrupt.  The vector number is relative to the first
  * vector used for doorbells, starting at zero, and must be less than
- ** ntb_db_vector_count().  The driver may call ntb_db_read() to check which
+ * ntb_db_vector_count().  The driver may call ntb_db_read() to check which
  * doorbell bits need service, and ntb_db_vector_mask() to determine which of
  * those bits are associated with the vector number.
  */
-- 
cgit v1.2.3


From c80081b9209713e0fe86d3def395a9fc66503c58 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Thu, 6 Jul 2017 14:29:04 +0200
Subject: genirq: Allow to pass the IRQF_TIMER flag with percpu irq request

The irq timings infrastructure tracks when interrupts occur in order to
statistically predict te next interrupt event.

There is no point to track timer interrupts and try to predict them because
the next expiration time is already known. This can be avoided via the
IRQF_TIMER flag which is passed by timer drivers in request_irq(). It marks
the interrupt as timer based which alloes to ignore these interrupts in the
timings code.

Per CPU interrupts which are requested via request_percpu_+irq() have no
flag argument, so marking per cpu timer interrupts is not possible and they
get tracked pointlessly.

Add __request_percpu_irq() as a variant of request_percpu_irq() with a
flags argument and make request_percpu_irq() an inline wrapper passing
flags = 0.

The flag parameter is restricted to IRQF_TIMER as all other IRQF_ flags
make no sense for per cpu interrupts.

The next step is to convert all existing users of request_percpu_irq() and
then remove the wrapper and the underscores.

[ tglx: Massaged changelog ]

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: peterz@infradead.org
Cc: nicolas.pitre@linaro.org
Cc: vincent.guittot@linaro.org
Cc: rafael@kernel.org
Link: http://lkml.kernel.org/r/1499344144-3964-1-git-send-email-daniel.lezcano@linaro.org
---
 include/linux/interrupt.h | 11 ++++++++++-
 kernel/irq/manage.c       | 15 ++++++++++-----
 2 files changed, 20 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 37f8e354f564..5ac6e238555e 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -152,8 +152,17 @@ request_any_context_irq(unsigned int irq, irq_handler_t handler,
 			unsigned long flags, const char *name, void *dev_id);
 
 extern int __must_check
+__request_percpu_irq(unsigned int irq, irq_handler_t handler,
+		     unsigned long flags, const char *devname,
+		     void __percpu *percpu_dev_id);
+
+static inline int __must_check
 request_percpu_irq(unsigned int irq, irq_handler_t handler,
-		   const char *devname, void __percpu *percpu_dev_id);
+		   const char *devname, void __percpu *percpu_dev_id)
+{
+	return __request_percpu_irq(irq, handler, 0,
+				    devname, percpu_dev_id);
+}
 
 extern const void *free_irq(unsigned int, void *);
 extern void free_percpu_irq(unsigned int, void __percpu *);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 91e1f2390752..5624b2dd6b58 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1950,9 +1950,10 @@ int setup_percpu_irq(unsigned int irq, struct irqaction *act)
 }
 
 /**
- *	request_percpu_irq - allocate a percpu interrupt line
+ *	__request_percpu_irq - allocate a percpu interrupt line
  *	@irq: Interrupt line to allocate
  *	@handler: Function to be called when the IRQ occurs.
+ *	@flags: Interrupt type flags (IRQF_TIMER only)
  *	@devname: An ascii name for the claiming device
  *	@dev_id: A percpu cookie passed back to the handler function
  *
@@ -1965,8 +1966,9 @@ int setup_percpu_irq(unsigned int irq, struct irqaction *act)
  *	the handler gets called with the interrupted CPU's instance of
  *	that variable.
  */
-int request_percpu_irq(unsigned int irq, irq_handler_t handler,
-		       const char *devname, void __percpu *dev_id)
+int __request_percpu_irq(unsigned int irq, irq_handler_t handler,
+			 unsigned long flags, const char *devname,
+			 void __percpu *dev_id)
 {
 	struct irqaction *action;
 	struct irq_desc *desc;
@@ -1980,12 +1982,15 @@ int request_percpu_irq(unsigned int irq, irq_handler_t handler,
 	    !irq_settings_is_per_cpu_devid(desc))
 		return -EINVAL;
 
+	if (flags && flags != IRQF_TIMER)
+		return -EINVAL;
+
 	action = kzalloc(sizeof(struct irqaction), GFP_KERNEL);
 	if (!action)
 		return -ENOMEM;
 
 	action->handler = handler;
-	action->flags = IRQF_PERCPU | IRQF_NO_SUSPEND;
+	action->flags = flags | IRQF_PERCPU | IRQF_NO_SUSPEND;
 	action->name = devname;
 	action->percpu_dev_id = dev_id;
 
@@ -2004,7 +2009,7 @@ int request_percpu_irq(unsigned int irq, irq_handler_t handler,
 
 	return retval;
 }
-EXPORT_SYMBOL_GPL(request_percpu_irq);
+EXPORT_SYMBOL_GPL(__request_percpu_irq);
 
 /**
  *	irq_get_irqchip_state - returns the irqchip state of a interrupt.
-- 
cgit v1.2.3


From 9a04dbcfb33b4012d0ce8c0282f1e3ca694675b1 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Thu, 6 Jul 2017 15:35:24 -0700
Subject: compiler, clang: always inline when CONFIG_OPTIMIZE_INLINING is
 disabled

The motivation for commit abb2ea7dfd82 ("compiler, clang: suppress
warning for unused static inline functions") was to suppress clang's
warnings about unused static inline functions.

For configs without CONFIG_OPTIMIZE_INLINING enabled, such as any non-x86
architecture, `inline' in the kernel implies that
__attribute__((always_inline)) is used.

Some code depends on that behavior, see
  https://lkml.org/lkml/2017/6/13/918:

  net/built-in.o: In function `__xchg_mb':
  arch/arm64/include/asm/cmpxchg.h:99: undefined reference to `__compiletime_assert_99'
  arch/arm64/include/asm/cmpxchg.h:99: undefined reference to `__compiletime_assert_99

The full fix would be to identify these breakages and annotate the
functions with __always_inline instead of `inline'.  But since we are
late in the 4.12-rc cycle, simply carry forward the forced inlining
behavior and work toward moving arm64, and other architectures, toward
CONFIG_OPTIMIZE_INLINING behavior.

Link: http://lkml.kernel.org/r/alpine.DEB.2.10.1706261552200.1075@chino.kir.corp.google.com
Signed-off-by: David Rientjes <rientjes@google.com>
Reported-by: Sodagudi Prasad <psodagud@codeaurora.org>
Tested-by: Sodagudi Prasad <psodagud@codeaurora.org>
Tested-by: Matthias Kaehlcke <mka@chromium.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler-clang.h |  8 --------
 include/linux/compiler-gcc.h   | 18 +++++++++++-------
 2 files changed, 11 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index d614c5ea1b5e..de179993e039 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -15,11 +15,3 @@
  * with any version that can compile the kernel
  */
 #define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
-
-/*
- * GCC does not warn about unused static inline functions for
- * -Wunused-function.  This turns out to avoid the need for complex #ifdef
- * directives.  Suppress the warning in clang as well.
- */
-#undef inline
-#define inline inline __attribute__((unused)) notrace
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 7deaae3dc87d..cd4bbe8242bd 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -66,18 +66,22 @@
 
 /*
  * Force always-inline if the user requests it so via the .config,
- * or if gcc is too old:
+ * or if gcc is too old.
+ * GCC does not warn about unused static inline functions for
+ * -Wunused-function.  This turns out to avoid the need for complex #ifdef
+ * directives.  Suppress the warning in clang as well by using "unused"
+ * function attribute, which is redundant but not harmful for gcc.
  */
 #if !defined(CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) ||		\
     !defined(CONFIG_OPTIMIZE_INLINING) || (__GNUC__ < 4)
-#define inline		inline		__attribute__((always_inline)) notrace
-#define __inline__	__inline__	__attribute__((always_inline)) notrace
-#define __inline	__inline	__attribute__((always_inline)) notrace
+#define inline inline		__attribute__((always_inline,unused)) notrace
+#define __inline__ __inline__	__attribute__((always_inline,unused)) notrace
+#define __inline __inline	__attribute__((always_inline,unused)) notrace
 #else
 /* A lot of inline functions can cause havoc with function tracing */
-#define inline		inline		notrace
-#define __inline__	__inline__	notrace
-#define __inline	__inline	notrace
+#define inline inline		__attribute__((unused)) notrace
+#define __inline__ __inline__	__attribute__((unused)) notrace
+#define __inline __inline	__attribute__((unused)) notrace
 #endif
 
 #define __always_inline	inline __attribute__((always_inline))
-- 
cgit v1.2.3


From 938f846492d6682584cbe4f3f19c4ebffec46311 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Thu, 6 Jul 2017 15:35:52 -0700
Subject: provide linux/set_memory.h

Currently code that wants to use set_memory_ro() etc, needs to include
asm/set_memory.h, which doesn't exist on all arches.  Some code knows it
only builds on arches which have the header, other code guards the
inclusion with an #ifdef, neither is ideal.

So create linux/set_memory.h.  This always exists, so users don't need
an #ifdef just to include the header.

When CONFIG_ARCH_HAS_SET_MEMORY=y it includes asm/set_memory.h,
otherwise it provides empty non-failing implementations.

Link: http://lkml.kernel.org/r/1498717781-29151-1-git-send-email-mpe@ellerman.id.au
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Kees Cook <keescook@chromium.org>
Acked-by: Laura Abbott <labbott@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/set_memory.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)
 create mode 100644 include/linux/set_memory.h

(limited to 'include/linux')

diff --git a/include/linux/set_memory.h b/include/linux/set_memory.h
new file mode 100644
index 000000000000..e5140648f638
--- /dev/null
+++ b/include/linux/set_memory.h
@@ -0,0 +1,20 @@
+/*
+ * Copyright 2017, Michael Ellerman, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation;
+ */
+#ifndef _LINUX_SET_MEMORY_H_
+#define _LINUX_SET_MEMORY_H_
+
+#ifdef CONFIG_ARCH_HAS_SET_MEMORY
+#include <asm/set_memory.h>
+#else
+static inline int set_memory_ro(unsigned long addr, int numpages) { return 0; }
+static inline int set_memory_rw(unsigned long addr, int numpages) { return 0; }
+static inline int set_memory_x(unsigned long addr,  int numpages) { return 0; }
+static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
+#endif
+
+#endif /* _LINUX_SET_MEMORY_H_ */
-- 
cgit v1.2.3


From 820a0b24b261c650cb07ea0f60aea9191f658f25 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Thu, 6 Jul 2017 15:36:01 -0700
Subject: include/linux/filter.h: use linux/set_memory.h

This header always exists, so doesn't require an ifdef around its
inclusion.  When CONFIG_ARCH_HAS_SET_MEMORY=y it includes the asm
header, otherwise it provides empty versions of the set_memory_xx()
routines.

Link: http://lkml.kernel.org/r/1498717781-29151-4-git-send-email-mpe@ellerman.id.au
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Kees Cook <keescook@chromium.org>
Acked-by: Laura Abbott <labbott@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/filter.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index f1fc9baa3509..bfef1e5734f8 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -16,13 +16,10 @@
 #include <linux/sched.h>
 #include <linux/capability.h>
 #include <linux/cryptohash.h>
+#include <linux/set_memory.h>
 
 #include <net/sch_generic.h>
 
-#ifdef CONFIG_ARCH_HAS_SET_MEMORY
-#include <asm/set_memory.h>
-#endif
-
 #include <uapi/linux/filter.h>
 #include <uapi/linux/bpf.h>
 
-- 
cgit v1.2.3


From d3111e6cce6001e71ddc4737d0d412c2300043a2 Mon Sep 17 00:00:00 2001
From: Wei Yang <richard.weiyang@gmail.com>
Date: Thu, 6 Jul 2017 15:36:28 -0700
Subject: mm/slub.c: pack red_left_pad with another int to save a word

Patch series "try to save some memory for kmem_cache in some cases", v2.

kmem_cache is a frequently used data in kernel.  During the code
reading, I found maybe we could save some space in some cases.

1. On 64bit arch, type int will occupy a word if it doesn't sit well.

2. cpu_slab->partial is just used when CONFIG_SLUB_CPU_PARTIAL is set

3. cpu_partial is just used when CONFIG_SLUB_CPU_PARTIAL is set, while
   just save some space on 32bit arch.

This patch (of 3):

On 64bit arch, struct is 8-bytes aligned, so int will occupy a word if
it doesn't sit well.

This patch pack red_left_pad with reserved to save 8 bytes for struct
kmem_cache on a 64bit arch.

Link: http://lkml.kernel.org/r/20170502144533.10729-2-richard.weiyang@gmail.com
Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slub_def.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 93315d6b21a8..070ff84240e7 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -79,9 +79,9 @@ struct kmem_cache {
 	int inuse;		/* Offset to metadata */
 	int align;		/* Alignment */
 	int reserved;		/* Reserved bytes at the end of slabs */
+	int red_left_pad;	/* Left redzone padding size */
 	const char *name;	/* Name (only for display!) */
 	struct list_head list;	/* List of slab caches */
-	int red_left_pad;	/* Left redzone padding size */
 #ifdef CONFIG_SYSFS
 	struct kobject kobj;	/* For sysfs */
 	struct work_struct kobj_remove_work;
-- 
cgit v1.2.3


From a93cf07bc3fb4e7bc924d33c387dabc85086ea38 Mon Sep 17 00:00:00 2001
From: Wei Yang <richard.weiyang@gmail.com>
Date: Thu, 6 Jul 2017 15:36:31 -0700
Subject: mm/slub.c: wrap cpu_slab->partial in CONFIG_SLUB_CPU_PARTIAL

cpu_slab's field partial is used when CONFIG_SLUB_CPU_PARTIAL is set,
which means we can save a pointer's space on each cpu for every slub
item.

This patch wraps cpu_slab->partial in CONFIG_SLUB_CPU_PARTIAL and wraps
its sysfs use too.

[akpm@linux-foundation.org: avoid strange 80-col tricks]
Link: http://lkml.kernel.org/r/20170502144533.10729-3-richard.weiyang@gmail.com
Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slub_def.h | 19 +++++++++++++++++++
 mm/slub.c                | 18 +++++++++++-------
 2 files changed, 30 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 070ff84240e7..a3e9492fed02 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -41,12 +41,31 @@ struct kmem_cache_cpu {
 	void **freelist;	/* Pointer to next available object */
 	unsigned long tid;	/* Globally unique transaction id */
 	struct page *page;	/* The slab from which we are allocating */
+#ifdef CONFIG_SLUB_CPU_PARTIAL
 	struct page *partial;	/* Partially allocated frozen slabs */
+#endif
 #ifdef CONFIG_SLUB_STATS
 	unsigned stat[NR_SLUB_STAT_ITEMS];
 #endif
 };
 
+#ifdef CONFIG_SLUB_CPU_PARTIAL
+#define slub_percpu_partial(c)		((c)->partial)
+
+#define slub_set_percpu_partial(c, p)		\
+({						\
+	slub_percpu_partial(c) = (p)->next;	\
+})
+
+#define slub_percpu_partial_read_once(c)     READ_ONCE(slub_percpu_partial(c))
+#else
+#define slub_percpu_partial(c)			NULL
+
+#define slub_set_percpu_partial(c, p)
+
+#define slub_percpu_partial_read_once(c)	NULL
+#endif // CONFIG_SLUB_CPU_PARTIAL
+
 /*
  * Word size structure that can be atomically updated or read and that
  * contains both the order and the number of objects that a slab of the
diff --git a/mm/slub.c b/mm/slub.c
index 7234e0e03bdc..48071c541275 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2303,7 +2303,7 @@ static bool has_cpu_slab(int cpu, void *info)
 	struct kmem_cache *s = info;
 	struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
 
-	return c->page || c->partial;
+	return c->page || slub_percpu_partial(c);
 }
 
 static void flush_all(struct kmem_cache *s)
@@ -2565,9 +2565,9 @@ load_freelist:
 
 new_slab:
 
-	if (c->partial) {
-		page = c->page = c->partial;
-		c->partial = page->next;
+	if (slub_percpu_partial(c)) {
+		page = c->page = slub_percpu_partial(c);
+		slub_set_percpu_partial(c, page);
 		stat(s, CPU_PARTIAL_ALLOC);
 		goto redo;
 	}
@@ -4754,7 +4754,7 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
 			total += x;
 			nodes[node] += x;
 
-			page = READ_ONCE(c->partial);
+			page = slub_percpu_partial_read_once(c);
 			if (page) {
 				node = page_to_nid(page);
 				if (flags & SO_TOTAL)
@@ -4982,7 +4982,9 @@ static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)
 	int len;
 
 	for_each_online_cpu(cpu) {
-		struct page *page = per_cpu_ptr(s->cpu_slab, cpu)->partial;
+		struct page *page;
+
+		page = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
 
 		if (page) {
 			pages += page->pages;
@@ -4994,7 +4996,9 @@ static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)
 
 #ifdef CONFIG_SMP
 	for_each_online_cpu(cpu) {
-		struct page *page = per_cpu_ptr(s->cpu_slab, cpu) ->partial;
+		struct page *page;
+
+		page = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
 
 		if (page && len < PAGE_SIZE - 20)
 			len += sprintf(buf + len, " C%d=%d(%d)", cpu,
-- 
cgit v1.2.3


From e6d0e1dcf5f07fb04704b87ffab749589d29cb02 Mon Sep 17 00:00:00 2001
From: Wei Yang <richard.weiyang@gmail.com>
Date: Thu, 6 Jul 2017 15:36:34 -0700
Subject: mm/slub.c: wrap kmem_cache->cpu_partial in config
 CONFIG_SLUB_CPU_PARTIAL

kmem_cache->cpu_partial is just used when CONFIG_SLUB_CPU_PARTIAL is
set, so wrap it with config CONFIG_SLUB_CPU_PARTIAL will save some space
on 32bit arch.

This patch wraps kmem_cache->cpu_partial in config CONFIG_SLUB_CPU_PARTIAL
and wraps its sysfs too.

Link: http://lkml.kernel.org/r/20170502144533.10729-4-richard.weiyang@gmail.com
Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slub_def.h | 13 +++++++++
 mm/slub.c                | 69 ++++++++++++++++++++++++++----------------------
 2 files changed, 51 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index a3e9492fed02..cc0faf3a90be 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -86,7 +86,9 @@ struct kmem_cache {
 	int size;		/* The size of an object including meta data */
 	int object_size;	/* The size of an object without meta data */
 	int offset;		/* Free pointer offset. */
+#ifdef CONFIG_SLUB_CPU_PARTIAL
 	int cpu_partial;	/* Number of per cpu partial objects to keep around */
+#endif
 	struct kmem_cache_order_objects oo;
 
 	/* Allocation and freeing of slabs */
@@ -131,6 +133,17 @@ struct kmem_cache {
 	struct kmem_cache_node *node[MAX_NUMNODES];
 };
 
+#ifdef CONFIG_SLUB_CPU_PARTIAL
+#define slub_cpu_partial(s)		((s)->cpu_partial)
+#define slub_set_cpu_partial(s, n)		\
+({						\
+	slub_cpu_partial(s) = (n);		\
+})
+#else
+#define slub_cpu_partial(s)		(0)
+#define slub_set_cpu_partial(s, n)
+#endif // CONFIG_SLUB_CPU_PARTIAL
+
 #ifdef CONFIG_SYSFS
 #define SLAB_SUPPORTS_SYSFS
 void sysfs_slab_release(struct kmem_cache *);
diff --git a/mm/slub.c b/mm/slub.c
index 48071c541275..388f66d1da5e 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1829,7 +1829,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
 			stat(s, CPU_PARTIAL_NODE);
 		}
 		if (!kmem_cache_has_cpu_partial(s)
-			|| available > s->cpu_partial / 2)
+			|| available > slub_cpu_partial(s) / 2)
 			break;
 
 	}
@@ -3404,6 +3404,39 @@ static void set_min_partial(struct kmem_cache *s, unsigned long min)
 	s->min_partial = min;
 }
 
+static void set_cpu_partial(struct kmem_cache *s)
+{
+#ifdef CONFIG_SLUB_CPU_PARTIAL
+	/*
+	 * cpu_partial determined the maximum number of objects kept in the
+	 * per cpu partial lists of a processor.
+	 *
+	 * Per cpu partial lists mainly contain slabs that just have one
+	 * object freed. If they are used for allocation then they can be
+	 * filled up again with minimal effort. The slab will never hit the
+	 * per node partial lists and therefore no locking will be required.
+	 *
+	 * This setting also determines
+	 *
+	 * A) The number of objects from per cpu partial slabs dumped to the
+	 *    per node list when we reach the limit.
+	 * B) The number of objects in cpu partial slabs to extract from the
+	 *    per node list when we run out of per cpu objects. We only fetch
+	 *    50% to keep some capacity around for frees.
+	 */
+	if (!kmem_cache_has_cpu_partial(s))
+		s->cpu_partial = 0;
+	else if (s->size >= PAGE_SIZE)
+		s->cpu_partial = 2;
+	else if (s->size >= 1024)
+		s->cpu_partial = 6;
+	else if (s->size >= 256)
+		s->cpu_partial = 13;
+	else
+		s->cpu_partial = 30;
+#endif
+}
+
 /*
  * calculate_sizes() determines the order and the distribution of data within
  * a slab object.
@@ -3562,33 +3595,7 @@ static int kmem_cache_open(struct kmem_cache *s, unsigned long flags)
 	 */
 	set_min_partial(s, ilog2(s->size) / 2);
 
-	/*
-	 * cpu_partial determined the maximum number of objects kept in the
-	 * per cpu partial lists of a processor.
-	 *
-	 * Per cpu partial lists mainly contain slabs that just have one
-	 * object freed. If they are used for allocation then they can be
-	 * filled up again with minimal effort. The slab will never hit the
-	 * per node partial lists and therefore no locking will be required.
-	 *
-	 * This setting also determines
-	 *
-	 * A) The number of objects from per cpu partial slabs dumped to the
-	 *    per node list when we reach the limit.
-	 * B) The number of objects in cpu partial slabs to extract from the
-	 *    per node list when we run out of per cpu objects. We only fetch
-	 *    50% to keep some capacity around for frees.
-	 */
-	if (!kmem_cache_has_cpu_partial(s))
-		s->cpu_partial = 0;
-	else if (s->size >= PAGE_SIZE)
-		s->cpu_partial = 2;
-	else if (s->size >= 1024)
-		s->cpu_partial = 6;
-	else if (s->size >= 256)
-		s->cpu_partial = 13;
-	else
-		s->cpu_partial = 30;
+	set_cpu_partial(s);
 
 #ifdef CONFIG_NUMA
 	s->remote_node_defrag_ratio = 1000;
@@ -3975,7 +3982,7 @@ void __kmemcg_cache_deactivate(struct kmem_cache *s)
 	 * Disable empty slabs caching. Used to avoid pinning offline
 	 * memory cgroups by kmem pages that can be freed.
 	 */
-	s->cpu_partial = 0;
+	slub_set_cpu_partial(s, 0);
 	s->min_partial = 0;
 
 	/*
@@ -4915,7 +4922,7 @@ SLAB_ATTR(min_partial);
 
 static ssize_t cpu_partial_show(struct kmem_cache *s, char *buf)
 {
-	return sprintf(buf, "%u\n", s->cpu_partial);
+	return sprintf(buf, "%u\n", slub_cpu_partial(s));
 }
 
 static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf,
@@ -4930,7 +4937,7 @@ static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf,
 	if (objects && !kmem_cache_has_cpu_partial(s))
 		return -EINVAL;
 
-	s->cpu_partial = objects;
+	slub_set_cpu_partial(s, objects);
 	flush_all(s);
 	return length;
 }
-- 
cgit v1.2.3


From c4e1be9ec1130fff4d691cdc0e0f9d666009f9ae Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@linux.intel.com>
Date: Thu, 6 Jul 2017 15:36:44 -0700
Subject: mm, sparsemem: break out of loops early

There are a number of times that we loop over NR_MEM_SECTIONS, looking
for section_present() on each section.  But, when we have very large
physical address spaces (large MAX_PHYSMEM_BITS), NR_MEM_SECTIONS
becomes very large, making the loops quite long.

With MAX_PHYSMEM_BITS=46 and a section size of 128MB, the current loops
are 512k iterations, which we barely notice on modern hardware.  But,
raising MAX_PHYSMEM_BITS higher (like we will see on systems that
support 5-level paging) makes this 64x longer and we start to notice,
especially on slower systems like simulators.  A 10-second delay for
512k iterations is annoying.  But, a 640- second delay is crippling.

This does not help if we have extremely sparse physical address spaces,
but those are quite rare.  We expect that most of the "slow" systems
where this matters will also be quite small and non-sparse.

To fix this, we track the highest section we've ever encountered.  This
lets us know when we will *never* see another section_present(), and
lets us break out of the loops earlier.

Doing the whole for_each_present_section_nr() macro is probably
overkill, but it will ensure that any future loop iterations that we
grow are more likely to be correct.

Kirrill said "It shaved almost 40 seconds from boot time in qemu with
5-level paging enabled for me".

Link: http://lkml.kernel.org/r/20170504174434.C45A4735@viggo.jf.intel.com
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Tested-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/base/memory.c  |  4 ++++
 include/linux/mmzone.h |  2 ++
 mm/sparse.c            | 60 ++++++++++++++++++++++++++++++++++++++------------
 3 files changed, 52 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index cc4f1d0cbffe..90225ffee501 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -820,6 +820,10 @@ int __init memory_dev_init(void)
 	 */
 	mutex_lock(&mem_sysfs_mutex);
 	for (i = 0; i < NR_MEM_SECTIONS; i += sections_per_block) {
+		/* Don't iterate over sections we know are !present: */
+		if (i > __highest_present_section_nr)
+			break;
+
 		err = add_memory_block(i);
 		if (!ret)
 			ret = err;
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index ef6a13b7bd3e..fc39f85d273c 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1180,6 +1180,8 @@ static inline struct mem_section *__pfn_to_section(unsigned long pfn)
 	return __nr_to_section(pfn_to_section_nr(pfn));
 }
 
+extern int __highest_present_section_nr;
+
 #ifndef CONFIG_HAVE_ARCH_PFN_VALID
 static inline int pfn_valid(unsigned long pfn)
 {
diff --git a/mm/sparse.c b/mm/sparse.c
index 6903c8fc3085..5032c9a619de 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -168,6 +168,44 @@ void __meminit mminit_validate_memmodel_limits(unsigned long *start_pfn,
 	}
 }
 
+/*
+ * There are a number of times that we loop over NR_MEM_SECTIONS,
+ * looking for section_present() on each.  But, when we have very
+ * large physical address spaces, NR_MEM_SECTIONS can also be
+ * very large which makes the loops quite long.
+ *
+ * Keeping track of this gives us an easy way to break out of
+ * those loops early.
+ */
+int __highest_present_section_nr;
+static void section_mark_present(struct mem_section *ms)
+{
+	int section_nr = __section_nr(ms);
+
+	if (section_nr > __highest_present_section_nr)
+		__highest_present_section_nr = section_nr;
+
+	ms->section_mem_map |= SECTION_MARKED_PRESENT;
+}
+
+static inline int next_present_section_nr(int section_nr)
+{
+	do {
+		section_nr++;
+		if (present_section_nr(section_nr))
+			return section_nr;
+	} while ((section_nr < NR_MEM_SECTIONS) &&
+		 (section_nr <= __highest_present_section_nr));
+
+	return -1;
+}
+#define for_each_present_section_nr(start, section_nr)		\
+	for (section_nr = next_present_section_nr(start-1);	\
+	     ((section_nr >= 0) &&				\
+	      (section_nr < NR_MEM_SECTIONS) &&			\
+	      (section_nr <= __highest_present_section_nr));	\
+	     section_nr = next_present_section_nr(section_nr))
+
 /* Record a memory area against a node. */
 void __init memory_present(int nid, unsigned long start, unsigned long end)
 {
@@ -183,9 +221,10 @@ void __init memory_present(int nid, unsigned long start, unsigned long end)
 		set_section_nid(section, nid);
 
 		ms = __nr_to_section(section);
-		if (!ms->section_mem_map)
-			ms->section_mem_map = sparse_encode_early_nid(nid) |
-							SECTION_MARKED_PRESENT;
+		if (!ms->section_mem_map) {
+			ms->section_mem_map = sparse_encode_early_nid(nid);
+			section_mark_present(ms);
+		}
 	}
 }
 
@@ -476,23 +515,19 @@ static void __init alloc_usemap_and_memmap(void (*alloc_func)
 	int nodeid_begin = 0;
 	unsigned long pnum_begin = 0;
 
-	for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
+	for_each_present_section_nr(0, pnum) {
 		struct mem_section *ms;
 
-		if (!present_section_nr(pnum))
-			continue;
 		ms = __nr_to_section(pnum);
 		nodeid_begin = sparse_early_nid(ms);
 		pnum_begin = pnum;
 		break;
 	}
 	map_count = 1;
-	for (pnum = pnum_begin + 1; pnum < NR_MEM_SECTIONS; pnum++) {
+	for_each_present_section_nr(pnum_begin + 1, pnum) {
 		struct mem_section *ms;
 		int nodeid;
 
-		if (!present_section_nr(pnum))
-			continue;
 		ms = __nr_to_section(pnum);
 		nodeid = sparse_early_nid(ms);
 		if (nodeid == nodeid_begin) {
@@ -561,10 +596,7 @@ void __init sparse_init(void)
 							(void *)map_map);
 #endif
 
-	for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
-		if (!present_section_nr(pnum))
-			continue;
-
+	for_each_present_section_nr(0, pnum) {
 		usemap = usemap_map[pnum];
 		if (!usemap)
 			continue;
@@ -722,7 +754,7 @@ int __meminit sparse_add_one_section(struct zone *zone, unsigned long start_pfn)
 
 	memset(memmap, 0, sizeof(struct page) * PAGES_PER_SECTION);
 
-	ms->section_mem_map |= SECTION_MARKED_PRESENT;
+	section_mark_present(ms);
 
 	ret = sparse_init_one_section(ms, section_nr, memmap, usemap);
 
-- 
cgit v1.2.3


From 38d8b4e6bdc872f07a3149309ab01719c96f3894 Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Thu, 6 Jul 2017 15:37:18 -0700
Subject: mm, THP, swap: delay splitting THP during swap out

Patch series "THP swap: Delay splitting THP during swapping out", v11.

This patchset is to optimize the performance of Transparent Huge Page
(THP) swap.

Recently, the performance of the storage devices improved so fast that
we cannot saturate the disk bandwidth with single logical CPU when do
page swap out even on a high-end server machine.  Because the
performance of the storage device improved faster than that of single
logical CPU.  And it seems that the trend will not change in the near
future.  On the other hand, the THP becomes more and more popular
because of increased memory size.  So it becomes necessary to optimize
THP swap performance.

The advantages of the THP swap support include:

 - Batch the swap operations for the THP to reduce lock
   acquiring/releasing, including allocating/freeing the swap space,
   adding/deleting to/from the swap cache, and writing/reading the swap
   space, etc. This will help improve the performance of the THP swap.

 - The THP swap space read/write will be 2M sequential IO. It is
   particularly helpful for the swap read, which are usually 4k random
   IO. This will improve the performance of the THP swap too.

 - It will help the memory fragmentation, especially when the THP is
   heavily used by the applications. The 2M continuous pages will be
   free up after THP swapping out.

 - It will improve the THP utilization on the system with the swap
   turned on. Because the speed for khugepaged to collapse the normal
   pages into the THP is quite slow. After the THP is split during the
   swapping out, it will take quite long time for the normal pages to
   collapse back into the THP after being swapped in. The high THP
   utilization helps the efficiency of the page based memory management
   too.

There are some concerns regarding THP swap in, mainly because possible
enlarged read/write IO size (for swap in/out) may put more overhead on
the storage device.  To deal with that, the THP swap in should be turned
on only when necessary.  For example, it can be selected via
"always/never/madvise" logic, to be turned on globally, turned off
globally, or turned on only for VMA with MADV_HUGEPAGE, etc.

This patchset is the first step for the THP swap support.  The plan is
to delay splitting THP step by step, finally avoid splitting THP during
the THP swapping out and swap out/in the THP as a whole.

As the first step, in this patchset, the splitting huge page is delayed
from almost the first step of swapping out to after allocating the swap
space for the THP and adding the THP into the swap cache.  This will
reduce lock acquiring/releasing for the locks used for the swap cache
management.

With the patchset, the swap out throughput improves 15.5% (from about
3.73GB/s to about 4.31GB/s) in the vm-scalability swap-w-seq test case
with 8 processes.  The test is done on a Xeon E5 v3 system.  The swap
device used is a RAM simulated PMEM (persistent memory) device.  To test
the sequential swapping out, the test case creates 8 processes, which
sequentially allocate and write to the anonymous pages until the RAM and
part of the swap device is used up.

This patch (of 5):

In this patch, splitting huge page is delayed from almost the first step
of swapping out to after allocating the swap space for the THP
(Transparent Huge Page) and adding the THP into the swap cache.  This
will batch the corresponding operation, thus improve THP swap out
throughput.

This is the first step for the THP swap optimization.  The plan is to
delay splitting the THP step by step and avoid splitting the THP
finally.

In this patch, one swap cluster is used to hold the contents of each THP
swapped out.  So, the size of the swap cluster is changed to that of the
THP (Transparent Huge Page) on x86_64 architecture (512).  For other
architectures which want such THP swap optimization,
ARCH_USES_THP_SWAP_CLUSTER needs to be selected in the Kconfig file for
the architecture.  In effect, this will enlarge swap cluster size by 2
times on x86_64.  Which may make it harder to find a free cluster when
the swap space becomes fragmented.  So that, this may reduce the
continuous swap space allocation and sequential write in theory.  The
performance test in 0day shows no regressions caused by this.

In the future of THP swap optimization, some information of the swapped
out THP (such as compound map count) will be recorded in the
swap_cluster_info data structure.

The mem cgroup swap accounting functions are enhanced to support charge
or uncharge a swap cluster backing a THP as a whole.

The swap cluster allocate/free functions are added to allocate/free a
swap cluster for a THP.  A fair simple algorithm is used for swap
cluster allocation, that is, only the first swap device in priority list
will be tried to allocate the swap cluster.  The function will fail if
the trying is not successful, and the caller will fallback to allocate a
single swap slot instead.  This works good enough for normal cases.  If
the difference of the number of the free swap clusters among multiple
swap devices is significant, it is possible that some THPs are split
earlier than necessary.  For example, this could be caused by big size
difference among multiple swap devices.

The swap cache functions is enhanced to support add/delete THP to/from
the swap cache as a set of (HPAGE_PMD_NR) sub-pages.  This may be
enhanced in the future with multi-order radix tree.  But because we will
split the THP soon during swapping out, that optimization doesn't make
much sense for this first step.

The THP splitting functions are enhanced to support to split THP in swap
cache during swapping out.  The page lock will be held during allocating
the swap cluster, adding the THP into the swap cache and splitting the
THP.  So in the code path other than swapping out, if the THP need to be
split, the PageSwapCache(THP) will be always false.

The swap cluster is only available for SSD, so the THP swap optimization
in this patchset has no effect for HDD.

[ying.huang@intel.com: fix two issues in THP optimize patch]
  Link: http://lkml.kernel.org/r/87k25ed8zo.fsf@yhuang-dev.intel.com
[hannes@cmpxchg.org: extensive cleanups and simplifications, reduce code size]
Link: http://lkml.kernel.org/r/20170515112522.32457-2-ying.huang@intel.com
Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Suggested-by: Andrew Morton <akpm@linux-foundation.org> [for config option]
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> [for changes in huge_memory.c and huge_mm.h]
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Ebru Akagunduz <ebru.akagunduz@gmail.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Shaohua Li <shli@kernel.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/Kconfig            |   1 +
 include/linux/page-flags.h  |   7 +-
 include/linux/swap.h        |  19 +++-
 include/linux/swap_cgroup.h |   6 +-
 mm/Kconfig                  |  12 ++
 mm/huge_memory.c            |  11 +-
 mm/memcontrol.c             |  50 +++++----
 mm/shmem.c                  |   2 +-
 mm/swap_cgroup.c            |  40 +++++--
 mm/swap_slots.c             |  16 ++-
 mm/swap_state.c             | 114 +++++++++++--------
 mm/swapfile.c               | 259 ++++++++++++++++++++++++++++++++------------
 12 files changed, 373 insertions(+), 164 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e767ed24aeb4..1dbbe38f6ec0 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -72,6 +72,7 @@ config X86
 	select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
 	select ARCH_WANT_FRAME_POINTERS
 	select ARCH_WANTS_DYNAMIC_TASK_STRUCT
+	select ARCH_WANTS_THP_SWAP		if X86_64
 	select BUILDTIME_EXTABLE_SORT
 	select CLKEVT_I8253
 	select CLOCKSOURCE_VALIDATE_LAST_CYCLE
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 6b5818d6de32..d33e3280c8ad 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -326,11 +326,14 @@ PAGEFLAG_FALSE(HighMem)
 #ifdef CONFIG_SWAP
 static __always_inline int PageSwapCache(struct page *page)
 {
+#ifdef CONFIG_THP_SWAP
+	page = compound_head(page);
+#endif
 	return PageSwapBacked(page) && test_bit(PG_swapcache, &page->flags);
 
 }
-SETPAGEFLAG(SwapCache, swapcache, PF_NO_COMPOUND)
-CLEARPAGEFLAG(SwapCache, swapcache, PF_NO_COMPOUND)
+SETPAGEFLAG(SwapCache, swapcache, PF_NO_TAIL)
+CLEARPAGEFLAG(SwapCache, swapcache, PF_NO_TAIL)
 #else
 PAGEFLAG_FALSE(SwapCache)
 #endif
diff --git a/include/linux/swap.h b/include/linux/swap.h
index ba5882419a7d..d18876384de0 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -386,9 +386,9 @@ static inline long get_nr_swap_pages(void)
 }
 
 extern void si_swapinfo(struct sysinfo *);
-extern swp_entry_t get_swap_page(void);
+extern swp_entry_t get_swap_page(struct page *page);
 extern swp_entry_t get_swap_page_of_type(int);
-extern int get_swap_pages(int n, swp_entry_t swp_entries[]);
+extern int get_swap_pages(int n, bool cluster, swp_entry_t swp_entries[]);
 extern int add_swap_count_continuation(swp_entry_t, gfp_t);
 extern void swap_shmem_alloc(swp_entry_t);
 extern int swap_duplicate(swp_entry_t);
@@ -515,7 +515,7 @@ static inline int try_to_free_swap(struct page *page)
 	return 0;
 }
 
-static inline swp_entry_t get_swap_page(void)
+static inline swp_entry_t get_swap_page(struct page *page)
 {
 	swp_entry_t entry;
 	entry.val = 0;
@@ -548,7 +548,7 @@ static inline int mem_cgroup_swappiness(struct mem_cgroup *mem)
 #ifdef CONFIG_MEMCG_SWAP
 extern void mem_cgroup_swapout(struct page *page, swp_entry_t entry);
 extern int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry);
-extern void mem_cgroup_uncharge_swap(swp_entry_t entry);
+extern void mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_pages);
 extern long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg);
 extern bool mem_cgroup_swap_full(struct page *page);
 #else
@@ -562,7 +562,8 @@ static inline int mem_cgroup_try_charge_swap(struct page *page,
 	return 0;
 }
 
-static inline void mem_cgroup_uncharge_swap(swp_entry_t entry)
+static inline void mem_cgroup_uncharge_swap(swp_entry_t entry,
+					    unsigned int nr_pages)
 {
 }
 
@@ -577,5 +578,13 @@ static inline bool mem_cgroup_swap_full(struct page *page)
 }
 #endif
 
+#ifdef CONFIG_THP_SWAP
+extern void swapcache_free_cluster(swp_entry_t entry);
+#else
+static inline void swapcache_free_cluster(swp_entry_t entry)
+{
+}
+#endif
+
 #endif /* __KERNEL__*/
 #endif /* _LINUX_SWAP_H */
diff --git a/include/linux/swap_cgroup.h b/include/linux/swap_cgroup.h
index 145306bdc92f..b2b8ec7bda3f 100644
--- a/include/linux/swap_cgroup.h
+++ b/include/linux/swap_cgroup.h
@@ -7,7 +7,8 @@
 
 extern unsigned short swap_cgroup_cmpxchg(swp_entry_t ent,
 					unsigned short old, unsigned short new);
-extern unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id);
+extern unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id,
+					 unsigned int nr_ents);
 extern unsigned short lookup_swap_cgroup_id(swp_entry_t ent);
 extern int swap_cgroup_swapon(int type, unsigned long max_pages);
 extern void swap_cgroup_swapoff(int type);
@@ -15,7 +16,8 @@ extern void swap_cgroup_swapoff(int type);
 #else
 
 static inline
-unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id)
+unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id,
+				  unsigned int nr_ents)
 {
 	return 0;
 }
diff --git a/mm/Kconfig b/mm/Kconfig
index 665cb370ad38..9870baafb096 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -446,6 +446,18 @@ choice
 	  benefit.
 endchoice
 
+config ARCH_WANTS_THP_SWAP
+       def_bool n
+
+config THP_SWAP
+	def_bool y
+	depends on TRANSPARENT_HUGEPAGE && ARCH_WANTS_THP_SWAP
+	help
+	  Swap transparent huge pages in one piece, without splitting.
+	  XXX: For now this only does clustered swap space allocation.
+
+	  For selection by architectures with reasonable THP sizes.
+
 config	TRANSPARENT_HUGE_PAGECACHE
 	def_bool y
 	depends on TRANSPARENT_HUGEPAGE
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index f4d5f9d0f9b7..1a168e4bac4b 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2203,7 +2203,7 @@ static void __split_huge_page_tail(struct page *head, int tail,
 	 * atomic_set() here would be safe on all archs (and not only on x86),
 	 * it's safer to use atomic_inc()/atomic_add().
 	 */
-	if (PageAnon(head)) {
+	if (PageAnon(head) && !PageSwapCache(head)) {
 		page_ref_inc(page_tail);
 	} else {
 		/* Additional pin to radix tree */
@@ -2214,6 +2214,7 @@ static void __split_huge_page_tail(struct page *head, int tail,
 	page_tail->flags |= (head->flags &
 			((1L << PG_referenced) |
 			 (1L << PG_swapbacked) |
+			 (1L << PG_swapcache) |
 			 (1L << PG_mlocked) |
 			 (1L << PG_uptodate) |
 			 (1L << PG_active) |
@@ -2276,7 +2277,11 @@ static void __split_huge_page(struct page *page, struct list_head *list,
 	ClearPageCompound(head);
 	/* See comment in __split_huge_page_tail() */
 	if (PageAnon(head)) {
-		page_ref_inc(head);
+		/* Additional pin to radix tree of swap cache */
+		if (PageSwapCache(head))
+			page_ref_add(head, 2);
+		else
+			page_ref_inc(head);
 	} else {
 		/* Additional pin to radix tree */
 		page_ref_add(head, 2);
@@ -2432,7 +2437,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
 			ret = -EBUSY;
 			goto out;
 		}
-		extra_pins = 0;
+		extra_pins = PageSwapCache(page) ? HPAGE_PMD_NR : 0;
 		mapping = NULL;
 		anon_vma_lock_write(anon_vma);
 	} else {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index d75b38b66ef6..fc51a33ddcd1 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2376,10 +2376,9 @@ void mem_cgroup_split_huge_fixup(struct page *head)
 
 #ifdef CONFIG_MEMCG_SWAP
 static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg,
-					 bool charge)
+				       int nr_entries)
 {
-	int val = (charge) ? 1 : -1;
-	this_cpu_add(memcg->stat->count[MEMCG_SWAP], val);
+	this_cpu_add(memcg->stat->count[MEMCG_SWAP], nr_entries);
 }
 
 /**
@@ -2405,8 +2404,8 @@ static int mem_cgroup_move_swap_account(swp_entry_t entry,
 	new_id = mem_cgroup_id(to);
 
 	if (swap_cgroup_cmpxchg(entry, old_id, new_id) == old_id) {
-		mem_cgroup_swap_statistics(from, false);
-		mem_cgroup_swap_statistics(to, true);
+		mem_cgroup_swap_statistics(from, -1);
+		mem_cgroup_swap_statistics(to, 1);
 		return 0;
 	}
 	return -EINVAL;
@@ -5445,7 +5444,7 @@ void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg,
 		 * let's not wait for it.  The page already received a
 		 * memory+swap charge, drop the swap entry duplicate.
 		 */
-		mem_cgroup_uncharge_swap(entry);
+		mem_cgroup_uncharge_swap(entry, nr_pages);
 	}
 }
 
@@ -5873,9 +5872,9 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
 	 * ancestor for the swap instead and transfer the memory+swap charge.
 	 */
 	swap_memcg = mem_cgroup_id_get_online(memcg);
-	oldid = swap_cgroup_record(entry, mem_cgroup_id(swap_memcg));
+	oldid = swap_cgroup_record(entry, mem_cgroup_id(swap_memcg), 1);
 	VM_BUG_ON_PAGE(oldid, page);
-	mem_cgroup_swap_statistics(swap_memcg, true);
+	mem_cgroup_swap_statistics(swap_memcg, 1);
 
 	page->mem_cgroup = NULL;
 
@@ -5902,19 +5901,20 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
 		css_put(&memcg->css);
 }
 
-/*
- * mem_cgroup_try_charge_swap - try charging a swap entry
+/**
+ * mem_cgroup_try_charge_swap - try charging swap space for a page
  * @page: page being added to swap
  * @entry: swap entry to charge
  *
- * Try to charge @entry to the memcg that @page belongs to.
+ * Try to charge @page's memcg for the swap space at @entry.
  *
  * Returns 0 on success, -ENOMEM on failure.
  */
 int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry)
 {
-	struct mem_cgroup *memcg;
+	unsigned int nr_pages = hpage_nr_pages(page);
 	struct page_counter *counter;
+	struct mem_cgroup *memcg;
 	unsigned short oldid;
 
 	if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) || !do_swap_account)
@@ -5929,25 +5929,27 @@ int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry)
 	memcg = mem_cgroup_id_get_online(memcg);
 
 	if (!mem_cgroup_is_root(memcg) &&
-	    !page_counter_try_charge(&memcg->swap, 1, &counter)) {
+	    !page_counter_try_charge(&memcg->swap, nr_pages, &counter)) {
 		mem_cgroup_id_put(memcg);
 		return -ENOMEM;
 	}
 
-	oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
+	/* Get references for the tail pages, too */
+	if (nr_pages > 1)
+		mem_cgroup_id_get_many(memcg, nr_pages - 1);
+	oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg), nr_pages);
 	VM_BUG_ON_PAGE(oldid, page);
-	mem_cgroup_swap_statistics(memcg, true);
+	mem_cgroup_swap_statistics(memcg, nr_pages);
 
 	return 0;
 }
 
 /**
- * mem_cgroup_uncharge_swap - uncharge a swap entry
+ * mem_cgroup_uncharge_swap - uncharge swap space
  * @entry: swap entry to uncharge
- *
- * Drop the swap charge associated with @entry.
+ * @nr_pages: the amount of swap space to uncharge
  */
-void mem_cgroup_uncharge_swap(swp_entry_t entry)
+void mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_pages)
 {
 	struct mem_cgroup *memcg;
 	unsigned short id;
@@ -5955,18 +5957,18 @@ void mem_cgroup_uncharge_swap(swp_entry_t entry)
 	if (!do_swap_account)
 		return;
 
-	id = swap_cgroup_record(entry, 0);
+	id = swap_cgroup_record(entry, 0, nr_pages);
 	rcu_read_lock();
 	memcg = mem_cgroup_from_id(id);
 	if (memcg) {
 		if (!mem_cgroup_is_root(memcg)) {
 			if (cgroup_subsys_on_dfl(memory_cgrp_subsys))
-				page_counter_uncharge(&memcg->swap, 1);
+				page_counter_uncharge(&memcg->swap, nr_pages);
 			else
-				page_counter_uncharge(&memcg->memsw, 1);
+				page_counter_uncharge(&memcg->memsw, nr_pages);
 		}
-		mem_cgroup_swap_statistics(memcg, false);
-		mem_cgroup_id_put(memcg);
+		mem_cgroup_swap_statistics(memcg, -nr_pages);
+		mem_cgroup_id_put_many(memcg, nr_pages);
 	}
 	rcu_read_unlock();
 }
diff --git a/mm/shmem.c b/mm/shmem.c
index 9100c4952698..bbb987c58dad 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1291,7 +1291,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
 		SetPageUptodate(page);
 	}
 
-	swap = get_swap_page();
+	swap = get_swap_page(page);
 	if (!swap.val)
 		goto redirty;
 
diff --git a/mm/swap_cgroup.c b/mm/swap_cgroup.c
index 3405b4ee1757..fcd2740f4ed7 100644
--- a/mm/swap_cgroup.c
+++ b/mm/swap_cgroup.c
@@ -61,21 +61,27 @@ not_enough_page:
 	return -ENOMEM;
 }
 
+static struct swap_cgroup *__lookup_swap_cgroup(struct swap_cgroup_ctrl *ctrl,
+						pgoff_t offset)
+{
+	struct page *mappage;
+	struct swap_cgroup *sc;
+
+	mappage = ctrl->map[offset / SC_PER_PAGE];
+	sc = page_address(mappage);
+	return sc + offset % SC_PER_PAGE;
+}
+
 static struct swap_cgroup *lookup_swap_cgroup(swp_entry_t ent,
 					struct swap_cgroup_ctrl **ctrlp)
 {
 	pgoff_t offset = swp_offset(ent);
 	struct swap_cgroup_ctrl *ctrl;
-	struct page *mappage;
-	struct swap_cgroup *sc;
 
 	ctrl = &swap_cgroup_ctrl[swp_type(ent)];
 	if (ctrlp)
 		*ctrlp = ctrl;
-
-	mappage = ctrl->map[offset / SC_PER_PAGE];
-	sc = page_address(mappage);
-	return sc + offset % SC_PER_PAGE;
+	return __lookup_swap_cgroup(ctrl, offset);
 }
 
 /**
@@ -108,25 +114,39 @@ unsigned short swap_cgroup_cmpxchg(swp_entry_t ent,
 }
 
 /**
- * swap_cgroup_record - record mem_cgroup for this swp_entry.
- * @ent: swap entry to be recorded into
+ * swap_cgroup_record - record mem_cgroup for a set of swap entries
+ * @ent: the first swap entry to be recorded into
  * @id: mem_cgroup to be recorded
+ * @nr_ents: number of swap entries to be recorded
  *
  * Returns old value at success, 0 at failure.
  * (Of course, old value can be 0.)
  */
-unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id)
+unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id,
+				  unsigned int nr_ents)
 {
 	struct swap_cgroup_ctrl *ctrl;
 	struct swap_cgroup *sc;
 	unsigned short old;
 	unsigned long flags;
+	pgoff_t offset = swp_offset(ent);
+	pgoff_t end = offset + nr_ents;
 
 	sc = lookup_swap_cgroup(ent, &ctrl);
 
 	spin_lock_irqsave(&ctrl->lock, flags);
 	old = sc->id;
-	sc->id = id;
+	for (;;) {
+		VM_BUG_ON(sc->id != old);
+		sc->id = id;
+		offset++;
+		if (offset == end)
+			break;
+		if (offset % SC_PER_PAGE)
+			sc++;
+		else
+			sc = __lookup_swap_cgroup(ctrl, offset);
+	}
 	spin_unlock_irqrestore(&ctrl->lock, flags);
 
 	return old;
diff --git a/mm/swap_slots.c b/mm/swap_slots.c
index 58f6c78f1dad..90c1032a8ac3 100644
--- a/mm/swap_slots.c
+++ b/mm/swap_slots.c
@@ -263,7 +263,8 @@ static int refill_swap_slots_cache(struct swap_slots_cache *cache)
 
 	cache->cur = 0;
 	if (swap_slot_cache_active)
-		cache->nr = get_swap_pages(SWAP_SLOTS_CACHE_SIZE, cache->slots);
+		cache->nr = get_swap_pages(SWAP_SLOTS_CACHE_SIZE, false,
+					   cache->slots);
 
 	return cache->nr;
 }
@@ -301,11 +302,19 @@ direct_free:
 	return 0;
 }
 
-swp_entry_t get_swap_page(void)
+swp_entry_t get_swap_page(struct page *page)
 {
 	swp_entry_t entry, *pentry;
 	struct swap_slots_cache *cache;
 
+	entry.val = 0;
+
+	if (PageTransHuge(page)) {
+		if (IS_ENABLED(CONFIG_THP_SWAP))
+			get_swap_pages(1, true, &entry);
+		return entry;
+	}
+
 	/*
 	 * Preemption is allowed here, because we may sleep
 	 * in refill_swap_slots_cache().  But it is safe, because
@@ -317,7 +326,6 @@ swp_entry_t get_swap_page(void)
 	 */
 	cache = raw_cpu_ptr(&swp_slots);
 
-	entry.val = 0;
 	if (check_cache_active()) {
 		mutex_lock(&cache->alloc_lock);
 		if (cache->slots) {
@@ -337,7 +345,7 @@ repeat:
 			return entry;
 	}
 
-	get_swap_pages(1, &entry);
+	get_swap_pages(1, false, &entry);
 
 	return entry;
 }
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 539b8885e3d1..16ff89d058f4 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -19,6 +19,7 @@
 #include <linux/migrate.h>
 #include <linux/vmalloc.h>
 #include <linux/swap_slots.h>
+#include <linux/huge_mm.h>
 
 #include <asm/pgtable.h>
 
@@ -38,6 +39,7 @@ struct address_space *swapper_spaces[MAX_SWAPFILES];
 static unsigned int nr_swapper_spaces[MAX_SWAPFILES];
 
 #define INC_CACHE_INFO(x)	do { swap_cache_info.x++; } while (0)
+#define ADD_CACHE_INFO(x, nr)	do { swap_cache_info.x += (nr); } while (0)
 
 static struct {
 	unsigned long add_total;
@@ -90,39 +92,46 @@ void show_swap_cache_info(void)
  */
 int __add_to_swap_cache(struct page *page, swp_entry_t entry)
 {
-	int error;
+	int error, i, nr = hpage_nr_pages(page);
 	struct address_space *address_space;
+	pgoff_t idx = swp_offset(entry);
 
 	VM_BUG_ON_PAGE(!PageLocked(page), page);
 	VM_BUG_ON_PAGE(PageSwapCache(page), page);
 	VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
 
-	get_page(page);
+	page_ref_add(page, nr);
 	SetPageSwapCache(page);
-	set_page_private(page, entry.val);
 
 	address_space = swap_address_space(entry);
 	spin_lock_irq(&address_space->tree_lock);
-	error = radix_tree_insert(&address_space->page_tree,
-				  swp_offset(entry), page);
-	if (likely(!error)) {
-		address_space->nrpages++;
-		__inc_node_page_state(page, NR_FILE_PAGES);
-		INC_CACHE_INFO(add_total);
+	for (i = 0; i < nr; i++) {
+		set_page_private(page + i, entry.val + i);
+		error = radix_tree_insert(&address_space->page_tree,
+					  idx + i, page + i);
+		if (unlikely(error))
+			break;
 	}
-	spin_unlock_irq(&address_space->tree_lock);
-
-	if (unlikely(error)) {
+	if (likely(!error)) {
+		address_space->nrpages += nr;
+		__mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, nr);
+		ADD_CACHE_INFO(add_total, nr);
+	} else {
 		/*
 		 * Only the context which have set SWAP_HAS_CACHE flag
 		 * would call add_to_swap_cache().
 		 * So add_to_swap_cache() doesn't returns -EEXIST.
 		 */
 		VM_BUG_ON(error == -EEXIST);
-		set_page_private(page, 0UL);
+		set_page_private(page + i, 0UL);
+		while (i--) {
+			radix_tree_delete(&address_space->page_tree, idx + i);
+			set_page_private(page + i, 0UL);
+		}
 		ClearPageSwapCache(page);
-		put_page(page);
+		page_ref_sub(page, nr);
 	}
+	spin_unlock_irq(&address_space->tree_lock);
 
 	return error;
 }
@@ -132,7 +141,7 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
 {
 	int error;
 
-	error = radix_tree_maybe_preload(gfp_mask);
+	error = radix_tree_maybe_preload_order(gfp_mask, compound_order(page));
 	if (!error) {
 		error = __add_to_swap_cache(page, entry);
 		radix_tree_preload_end();
@@ -146,8 +155,10 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
  */
 void __delete_from_swap_cache(struct page *page)
 {
-	swp_entry_t entry;
 	struct address_space *address_space;
+	int i, nr = hpage_nr_pages(page);
+	swp_entry_t entry;
+	pgoff_t idx;
 
 	VM_BUG_ON_PAGE(!PageLocked(page), page);
 	VM_BUG_ON_PAGE(!PageSwapCache(page), page);
@@ -155,12 +166,15 @@ void __delete_from_swap_cache(struct page *page)
 
 	entry.val = page_private(page);
 	address_space = swap_address_space(entry);
-	radix_tree_delete(&address_space->page_tree, swp_offset(entry));
-	set_page_private(page, 0);
+	idx = swp_offset(entry);
+	for (i = 0; i < nr; i++) {
+		radix_tree_delete(&address_space->page_tree, idx + i);
+		set_page_private(page + i, 0);
+	}
 	ClearPageSwapCache(page);
-	address_space->nrpages--;
-	__dec_node_page_state(page, NR_FILE_PAGES);
-	INC_CACHE_INFO(del_total);
+	address_space->nrpages -= nr;
+	__mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, -nr);
+	ADD_CACHE_INFO(del_total, nr);
 }
 
 /**
@@ -178,20 +192,12 @@ int add_to_swap(struct page *page, struct list_head *list)
 	VM_BUG_ON_PAGE(!PageLocked(page), page);
 	VM_BUG_ON_PAGE(!PageUptodate(page), page);
 
-	entry = get_swap_page();
+retry:
+	entry = get_swap_page(page);
 	if (!entry.val)
-		return 0;
-
-	if (mem_cgroup_try_charge_swap(page, entry)) {
-		swapcache_free(entry);
-		return 0;
-	}
-
-	if (unlikely(PageTransHuge(page)))
-		if (unlikely(split_huge_page_to_list(page, list))) {
-			swapcache_free(entry);
-			return 0;
-		}
+		goto fail;
+	if (mem_cgroup_try_charge_swap(page, entry))
+		goto fail_free;
 
 	/*
 	 * Radix-tree node allocations from PF_MEMALLOC contexts could
@@ -206,17 +212,33 @@ int add_to_swap(struct page *page, struct list_head *list)
 	 */
 	err = add_to_swap_cache(page, entry,
 			__GFP_HIGH|__GFP_NOMEMALLOC|__GFP_NOWARN);
-
-	if (!err) {
-		return 1;
-	} else {	/* -ENOMEM radix-tree allocation failure */
+	/* -ENOMEM radix-tree allocation failure */
+	if (err)
 		/*
 		 * add_to_swap_cache() doesn't return -EEXIST, so we can safely
 		 * clear SWAP_HAS_CACHE flag.
 		 */
-		swapcache_free(entry);
-		return 0;
+		goto fail_free;
+
+	if (PageTransHuge(page)) {
+		err = split_huge_page_to_list(page, list);
+		if (err) {
+			delete_from_swap_cache(page);
+			return 0;
+		}
 	}
+
+	return 1;
+
+fail_free:
+	if (PageTransHuge(page))
+		swapcache_free_cluster(entry);
+	else
+		swapcache_free(entry);
+fail:
+	if (PageTransHuge(page) && !split_huge_page_to_list(page, list))
+		goto retry;
+	return 0;
 }
 
 /*
@@ -237,8 +259,12 @@ void delete_from_swap_cache(struct page *page)
 	__delete_from_swap_cache(page);
 	spin_unlock_irq(&address_space->tree_lock);
 
-	swapcache_free(entry);
-	put_page(page);
+	if (PageTransHuge(page))
+		swapcache_free_cluster(entry);
+	else
+		swapcache_free(entry);
+
+	page_ref_sub(page, hpage_nr_pages(page));
 }
 
 /* 
@@ -295,7 +321,7 @@ struct page * lookup_swap_cache(swp_entry_t entry)
 
 	page = find_get_page(swap_address_space(entry), swp_offset(entry));
 
-	if (page) {
+	if (page && likely(!PageTransCompound(page))) {
 		INC_CACHE_INFO(find_success);
 		if (TestClearPageReadahead(page))
 			atomic_inc(&swapin_readahead_hits);
@@ -506,7 +532,7 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
 						gfp_mask, vma, addr);
 		if (!page)
 			continue;
-		if (offset != entry_offset)
+		if (offset != entry_offset && likely(!PageTransCompound(page)))
 			SetPageReadahead(page);
 		put_page(page);
 	}
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 4f6cba1b6632..984f0dd94948 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -199,7 +199,11 @@ static void discard_swap_cluster(struct swap_info_struct *si,
 	}
 }
 
+#ifdef CONFIG_THP_SWAP
+#define SWAPFILE_CLUSTER	HPAGE_PMD_NR
+#else
 #define SWAPFILE_CLUSTER	256
+#endif
 #define LATENCY_LIMIT		256
 
 static inline void cluster_set_flag(struct swap_cluster_info *info,
@@ -374,6 +378,14 @@ static void swap_cluster_schedule_discard(struct swap_info_struct *si,
 	schedule_work(&si->discard_work);
 }
 
+static void __free_cluster(struct swap_info_struct *si, unsigned long idx)
+{
+	struct swap_cluster_info *ci = si->cluster_info;
+
+	cluster_set_flag(ci + idx, CLUSTER_FLAG_FREE);
+	cluster_list_add_tail(&si->free_clusters, ci, idx);
+}
+
 /*
  * Doing discard actually. After a cluster discard is finished, the cluster
  * will be added to free cluster list. caller should hold si->lock.
@@ -394,10 +406,7 @@ static void swap_do_scheduled_discard(struct swap_info_struct *si)
 
 		spin_lock(&si->lock);
 		ci = lock_cluster(si, idx * SWAPFILE_CLUSTER);
-		cluster_set_flag(ci, CLUSTER_FLAG_FREE);
-		unlock_cluster(ci);
-		cluster_list_add_tail(&si->free_clusters, info, idx);
-		ci = lock_cluster(si, idx * SWAPFILE_CLUSTER);
+		__free_cluster(si, idx);
 		memset(si->swap_map + idx * SWAPFILE_CLUSTER,
 				0, SWAPFILE_CLUSTER);
 		unlock_cluster(ci);
@@ -415,6 +424,34 @@ static void swap_discard_work(struct work_struct *work)
 	spin_unlock(&si->lock);
 }
 
+static void alloc_cluster(struct swap_info_struct *si, unsigned long idx)
+{
+	struct swap_cluster_info *ci = si->cluster_info;
+
+	VM_BUG_ON(cluster_list_first(&si->free_clusters) != idx);
+	cluster_list_del_first(&si->free_clusters, ci);
+	cluster_set_count_flag(ci + idx, 0, 0);
+}
+
+static void free_cluster(struct swap_info_struct *si, unsigned long idx)
+{
+	struct swap_cluster_info *ci = si->cluster_info + idx;
+
+	VM_BUG_ON(cluster_count(ci) != 0);
+	/*
+	 * If the swap is discardable, prepare discard the cluster
+	 * instead of free it immediately. The cluster will be freed
+	 * after discard.
+	 */
+	if ((si->flags & (SWP_WRITEOK | SWP_PAGE_DISCARD)) ==
+	    (SWP_WRITEOK | SWP_PAGE_DISCARD)) {
+		swap_cluster_schedule_discard(si, idx);
+		return;
+	}
+
+	__free_cluster(si, idx);
+}
+
 /*
  * The cluster corresponding to page_nr will be used. The cluster will be
  * removed from free cluster list and its usage counter will be increased.
@@ -426,11 +463,8 @@ static void inc_cluster_info_page(struct swap_info_struct *p,
 
 	if (!cluster_info)
 		return;
-	if (cluster_is_free(&cluster_info[idx])) {
-		VM_BUG_ON(cluster_list_first(&p->free_clusters) != idx);
-		cluster_list_del_first(&p->free_clusters, cluster_info);
-		cluster_set_count_flag(&cluster_info[idx], 0, 0);
-	}
+	if (cluster_is_free(&cluster_info[idx]))
+		alloc_cluster(p, idx);
 
 	VM_BUG_ON(cluster_count(&cluster_info[idx]) >= SWAPFILE_CLUSTER);
 	cluster_set_count(&cluster_info[idx],
@@ -454,21 +488,8 @@ static void dec_cluster_info_page(struct swap_info_struct *p,
 	cluster_set_count(&cluster_info[idx],
 		cluster_count(&cluster_info[idx]) - 1);
 
-	if (cluster_count(&cluster_info[idx]) == 0) {
-		/*
-		 * If the swap is discardable, prepare discard the cluster
-		 * instead of free it immediately. The cluster will be freed
-		 * after discard.
-		 */
-		if ((p->flags & (SWP_WRITEOK | SWP_PAGE_DISCARD)) ==
-				 (SWP_WRITEOK | SWP_PAGE_DISCARD)) {
-			swap_cluster_schedule_discard(p, idx);
-			return;
-		}
-
-		cluster_set_flag(&cluster_info[idx], CLUSTER_FLAG_FREE);
-		cluster_list_add_tail(&p->free_clusters, cluster_info, idx);
-	}
+	if (cluster_count(&cluster_info[idx]) == 0)
+		free_cluster(p, idx);
 }
 
 /*
@@ -558,6 +579,60 @@ new_cluster:
 	return found_free;
 }
 
+static void swap_range_alloc(struct swap_info_struct *si, unsigned long offset,
+			     unsigned int nr_entries)
+{
+	unsigned int end = offset + nr_entries - 1;
+
+	if (offset == si->lowest_bit)
+		si->lowest_bit += nr_entries;
+	if (end == si->highest_bit)
+		si->highest_bit -= nr_entries;
+	si->inuse_pages += nr_entries;
+	if (si->inuse_pages == si->pages) {
+		si->lowest_bit = si->max;
+		si->highest_bit = 0;
+		spin_lock(&swap_avail_lock);
+		plist_del(&si->avail_list, &swap_avail_head);
+		spin_unlock(&swap_avail_lock);
+	}
+}
+
+static void swap_range_free(struct swap_info_struct *si, unsigned long offset,
+			    unsigned int nr_entries)
+{
+	unsigned long end = offset + nr_entries - 1;
+	void (*swap_slot_free_notify)(struct block_device *, unsigned long);
+
+	if (offset < si->lowest_bit)
+		si->lowest_bit = offset;
+	if (end > si->highest_bit) {
+		bool was_full = !si->highest_bit;
+
+		si->highest_bit = end;
+		if (was_full && (si->flags & SWP_WRITEOK)) {
+			spin_lock(&swap_avail_lock);
+			WARN_ON(!plist_node_empty(&si->avail_list));
+			if (plist_node_empty(&si->avail_list))
+				plist_add(&si->avail_list, &swap_avail_head);
+			spin_unlock(&swap_avail_lock);
+		}
+	}
+	atomic_long_add(nr_entries, &nr_swap_pages);
+	si->inuse_pages -= nr_entries;
+	if (si->flags & SWP_BLKDEV)
+		swap_slot_free_notify =
+			si->bdev->bd_disk->fops->swap_slot_free_notify;
+	else
+		swap_slot_free_notify = NULL;
+	while (offset <= end) {
+		frontswap_invalidate_page(si->type, offset);
+		if (swap_slot_free_notify)
+			swap_slot_free_notify(si->bdev, offset);
+		offset++;
+	}
+}
+
 static int scan_swap_map_slots(struct swap_info_struct *si,
 			       unsigned char usage, int nr,
 			       swp_entry_t slots[])
@@ -676,18 +751,7 @@ checks:
 	inc_cluster_info_page(si, si->cluster_info, offset);
 	unlock_cluster(ci);
 
-	if (offset == si->lowest_bit)
-		si->lowest_bit++;
-	if (offset == si->highest_bit)
-		si->highest_bit--;
-	si->inuse_pages++;
-	if (si->inuse_pages == si->pages) {
-		si->lowest_bit = si->max;
-		si->highest_bit = 0;
-		spin_lock(&swap_avail_lock);
-		plist_del(&si->avail_list, &swap_avail_head);
-		spin_unlock(&swap_avail_lock);
-	}
+	swap_range_alloc(si, offset, 1);
 	si->cluster_next = offset + 1;
 	slots[n_ret++] = swp_entry(si->type, offset);
 
@@ -766,6 +830,52 @@ no_page:
 	return n_ret;
 }
 
+#ifdef CONFIG_THP_SWAP
+static int swap_alloc_cluster(struct swap_info_struct *si, swp_entry_t *slot)
+{
+	unsigned long idx;
+	struct swap_cluster_info *ci;
+	unsigned long offset, i;
+	unsigned char *map;
+
+	if (cluster_list_empty(&si->free_clusters))
+		return 0;
+
+	idx = cluster_list_first(&si->free_clusters);
+	offset = idx * SWAPFILE_CLUSTER;
+	ci = lock_cluster(si, offset);
+	alloc_cluster(si, idx);
+	cluster_set_count_flag(ci, SWAPFILE_CLUSTER, 0);
+
+	map = si->swap_map + offset;
+	for (i = 0; i < SWAPFILE_CLUSTER; i++)
+		map[i] = SWAP_HAS_CACHE;
+	unlock_cluster(ci);
+	swap_range_alloc(si, offset, SWAPFILE_CLUSTER);
+	*slot = swp_entry(si->type, offset);
+
+	return 1;
+}
+
+static void swap_free_cluster(struct swap_info_struct *si, unsigned long idx)
+{
+	unsigned long offset = idx * SWAPFILE_CLUSTER;
+	struct swap_cluster_info *ci;
+
+	ci = lock_cluster(si, offset);
+	cluster_set_count_flag(ci, 0, 0);
+	free_cluster(si, idx);
+	unlock_cluster(ci);
+	swap_range_free(si, offset, SWAPFILE_CLUSTER);
+}
+#else
+static int swap_alloc_cluster(struct swap_info_struct *si, swp_entry_t *slot)
+{
+	VM_WARN_ON_ONCE(1);
+	return 0;
+}
+#endif /* CONFIG_THP_SWAP */
+
 static unsigned long scan_swap_map(struct swap_info_struct *si,
 				   unsigned char usage)
 {
@@ -781,13 +891,17 @@ static unsigned long scan_swap_map(struct swap_info_struct *si,
 
 }
 
-int get_swap_pages(int n_goal, swp_entry_t swp_entries[])
+int get_swap_pages(int n_goal, bool cluster, swp_entry_t swp_entries[])
 {
+	unsigned long nr_pages = cluster ? SWAPFILE_CLUSTER : 1;
 	struct swap_info_struct *si, *next;
 	long avail_pgs;
 	int n_ret = 0;
 
-	avail_pgs = atomic_long_read(&nr_swap_pages);
+	/* Only single cluster request supported */
+	WARN_ON_ONCE(n_goal > 1 && cluster);
+
+	avail_pgs = atomic_long_read(&nr_swap_pages) / nr_pages;
 	if (avail_pgs <= 0)
 		goto noswap;
 
@@ -797,7 +911,7 @@ int get_swap_pages(int n_goal, swp_entry_t swp_entries[])
 	if (n_goal > avail_pgs)
 		n_goal = avail_pgs;
 
-	atomic_long_sub(n_goal, &nr_swap_pages);
+	atomic_long_sub(n_goal * nr_pages, &nr_swap_pages);
 
 	spin_lock(&swap_avail_lock);
 
@@ -823,10 +937,13 @@ start_over:
 			spin_unlock(&si->lock);
 			goto nextsi;
 		}
-		n_ret = scan_swap_map_slots(si, SWAP_HAS_CACHE,
-					    n_goal, swp_entries);
+		if (cluster)
+			n_ret = swap_alloc_cluster(si, swp_entries);
+		else
+			n_ret = scan_swap_map_slots(si, SWAP_HAS_CACHE,
+						    n_goal, swp_entries);
 		spin_unlock(&si->lock);
-		if (n_ret)
+		if (n_ret || cluster)
 			goto check_out;
 		pr_debug("scan_swap_map of si %d failed to find offset\n",
 			si->type);
@@ -852,7 +969,8 @@ nextsi:
 
 check_out:
 	if (n_ret < n_goal)
-		atomic_long_add((long) (n_goal-n_ret), &nr_swap_pages);
+		atomic_long_add((long)(n_goal - n_ret) * nr_pages,
+				&nr_swap_pages);
 noswap:
 	return n_ret;
 }
@@ -1008,32 +1126,8 @@ static void swap_entry_free(struct swap_info_struct *p, swp_entry_t entry)
 	dec_cluster_info_page(p, p->cluster_info, offset);
 	unlock_cluster(ci);
 
-	mem_cgroup_uncharge_swap(entry);
-	if (offset < p->lowest_bit)
-		p->lowest_bit = offset;
-	if (offset > p->highest_bit) {
-		bool was_full = !p->highest_bit;
-
-		p->highest_bit = offset;
-		if (was_full && (p->flags & SWP_WRITEOK)) {
-			spin_lock(&swap_avail_lock);
-			WARN_ON(!plist_node_empty(&p->avail_list));
-			if (plist_node_empty(&p->avail_list))
-				plist_add(&p->avail_list,
-					  &swap_avail_head);
-			spin_unlock(&swap_avail_lock);
-		}
-	}
-	atomic_long_inc(&nr_swap_pages);
-	p->inuse_pages--;
-	frontswap_invalidate_page(p->type, offset);
-	if (p->flags & SWP_BLKDEV) {
-		struct gendisk *disk = p->bdev->bd_disk;
-
-		if (disk->fops->swap_slot_free_notify)
-			disk->fops->swap_slot_free_notify(p->bdev,
-							  offset);
-	}
+	mem_cgroup_uncharge_swap(entry, 1);
+	swap_range_free(p, offset, 1);
 }
 
 /*
@@ -1065,6 +1159,33 @@ void swapcache_free(swp_entry_t entry)
 	}
 }
 
+#ifdef CONFIG_THP_SWAP
+void swapcache_free_cluster(swp_entry_t entry)
+{
+	unsigned long offset = swp_offset(entry);
+	unsigned long idx = offset / SWAPFILE_CLUSTER;
+	struct swap_cluster_info *ci;
+	struct swap_info_struct *si;
+	unsigned char *map;
+	unsigned int i;
+
+	si = swap_info_get(entry);
+	if (!si)
+		return;
+
+	ci = lock_cluster(si, offset);
+	map = si->swap_map + offset;
+	for (i = 0; i < SWAPFILE_CLUSTER; i++) {
+		VM_BUG_ON(map[i] != SWAP_HAS_CACHE);
+		map[i] = 0;
+	}
+	unlock_cluster(ci);
+	mem_cgroup_uncharge_swap(entry, SWAPFILE_CLUSTER);
+	swap_free_cluster(si, idx);
+	spin_unlock(&si->lock);
+}
+#endif /* CONFIG_THP_SWAP */
+
 void swapcache_free_entries(swp_entry_t *entries, int n)
 {
 	struct swap_info_struct *p, *prev;
-- 
cgit v1.2.3


From 75f6d6d29a40b5541f0f107201cf7dec134ad210 Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Thu, 6 Jul 2017 15:37:21 -0700
Subject: mm, THP, swap: unify swap slot free functions to put_swap_page

Now, get_swap_page takes struct page and allocates swap space according
to page size(ie, normal or THP) so it would be more cleaner to introduce
put_swap_page which is a counter function of get_swap_page.  Then, it
calls right swap slot free function depending on page's size.

[ying.huang@intel.com: minor cleanup and fix]
Link: http://lkml.kernel.org/r/20170515112522.32457-3-ying.huang@intel.com
Signed-off-by: Minchan Kim <minchan@kernel.org>
Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Ebru Akagunduz <ebru.akagunduz@gmail.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Shaohua Li <shli@kernel.org>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h | 12 ++----------
 mm/shmem.c           |  2 +-
 mm/swap_state.c      | 13 +++----------
 mm/swapfile.c        | 16 ++++++++++++++--
 mm/vmscan.c          |  2 +-
 5 files changed, 21 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index d18876384de0..ead6fd7966b4 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -387,6 +387,7 @@ static inline long get_nr_swap_pages(void)
 
 extern void si_swapinfo(struct sysinfo *);
 extern swp_entry_t get_swap_page(struct page *page);
+extern void put_swap_page(struct page *page, swp_entry_t entry);
 extern swp_entry_t get_swap_page_of_type(int);
 extern int get_swap_pages(int n, bool cluster, swp_entry_t swp_entries[]);
 extern int add_swap_count_continuation(swp_entry_t, gfp_t);
@@ -394,7 +395,6 @@ extern void swap_shmem_alloc(swp_entry_t);
 extern int swap_duplicate(swp_entry_t);
 extern int swapcache_prepare(swp_entry_t);
 extern void swap_free(swp_entry_t);
-extern void swapcache_free(swp_entry_t);
 extern void swapcache_free_entries(swp_entry_t *entries, int n);
 extern int free_swap_and_cache(swp_entry_t);
 extern int swap_type_of(dev_t, sector_t, struct block_device **);
@@ -453,7 +453,7 @@ static inline void swap_free(swp_entry_t swp)
 {
 }
 
-static inline void swapcache_free(swp_entry_t swp)
+static inline void put_swap_page(struct page *page, swp_entry_t swp)
 {
 }
 
@@ -578,13 +578,5 @@ static inline bool mem_cgroup_swap_full(struct page *page)
 }
 #endif
 
-#ifdef CONFIG_THP_SWAP
-extern void swapcache_free_cluster(swp_entry_t entry);
-#else
-static inline void swapcache_free_cluster(swp_entry_t entry)
-{
-}
-#endif
-
 #endif /* __KERNEL__*/
 #endif /* _LINUX_SWAP_H */
diff --git a/mm/shmem.c b/mm/shmem.c
index bbb987c58dad..a06f23731d3f 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1327,7 +1327,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
 
 	mutex_unlock(&shmem_swaplist_mutex);
 free_swap:
-	swapcache_free(swap);
+	put_swap_page(page, swap);
 redirty:
 	set_page_dirty(page);
 	if (wbc->for_reclaim)
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 16ff89d058f4..0ad214d7a7ad 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -231,10 +231,7 @@ retry:
 	return 1;
 
 fail_free:
-	if (PageTransHuge(page))
-		swapcache_free_cluster(entry);
-	else
-		swapcache_free(entry);
+	put_swap_page(page, entry);
 fail:
 	if (PageTransHuge(page) && !split_huge_page_to_list(page, list))
 		goto retry;
@@ -259,11 +256,7 @@ void delete_from_swap_cache(struct page *page)
 	__delete_from_swap_cache(page);
 	spin_unlock_irq(&address_space->tree_lock);
 
-	if (PageTransHuge(page))
-		swapcache_free_cluster(entry);
-	else
-		swapcache_free(entry);
-
+	put_swap_page(page, entry);
 	page_ref_sub(page, hpage_nr_pages(page));
 }
 
@@ -415,7 +408,7 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 		 * add_to_swap_cache() doesn't return -EEXIST, so we can safely
 		 * clear SWAP_HAS_CACHE flag.
 		 */
-		swapcache_free(entry);
+		put_swap_page(new_page, entry);
 	} while (err != -ENOMEM);
 
 	if (new_page)
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 984f0dd94948..8a6cdf9e55f9 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1148,7 +1148,7 @@ void swap_free(swp_entry_t entry)
 /*
  * Called after dropping swapcache to decrease refcnt to swap entries.
  */
-void swapcache_free(swp_entry_t entry)
+static void swapcache_free(swp_entry_t entry)
 {
 	struct swap_info_struct *p;
 
@@ -1160,7 +1160,7 @@ void swapcache_free(swp_entry_t entry)
 }
 
 #ifdef CONFIG_THP_SWAP
-void swapcache_free_cluster(swp_entry_t entry)
+static void swapcache_free_cluster(swp_entry_t entry)
 {
 	unsigned long offset = swp_offset(entry);
 	unsigned long idx = offset / SWAPFILE_CLUSTER;
@@ -1184,8 +1184,20 @@ void swapcache_free_cluster(swp_entry_t entry)
 	swap_free_cluster(si, idx);
 	spin_unlock(&si->lock);
 }
+#else
+static inline void swapcache_free_cluster(swp_entry_t entry)
+{
+}
 #endif /* CONFIG_THP_SWAP */
 
+void put_swap_page(struct page *page, swp_entry_t entry)
+{
+	if (!PageTransHuge(page))
+		swapcache_free(entry);
+	else
+		swapcache_free_cluster(entry);
+}
+
 void swapcache_free_entries(swp_entry_t *entries, int n)
 {
 	struct swap_info_struct *p, *prev;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index a10e05870835..cb7c154a4a9d 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -708,7 +708,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
 		mem_cgroup_swapout(page, swap);
 		__delete_from_swap_cache(page);
 		spin_unlock_irqrestore(&mapping->tree_lock, flags);
-		swapcache_free(swap);
+		put_swap_page(page, swap);
 	} else {
 		void (*freepage)(struct page *);
 		void *shadow = NULL;
-- 
cgit v1.2.3


From 0f0746589e4be071a8f890b2035c97c30c7a4e16 Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Thu, 6 Jul 2017 15:37:24 -0700
Subject: mm, THP, swap: move anonymous THP split logic to vmscan

The add_to_swap aims to allocate swap_space(ie, swap slot and swapcache)
so if it fails due to lack of space in case of THP or something(hdd swap
but tries THP swapout) *caller* rather than add_to_swap itself should
split the THP page and retry it with base page which is more natural.

Link: http://lkml.kernel.org/r/20170515112522.32457-4-ying.huang@intel.com
Signed-off-by: Minchan Kim <minchan@kernel.org>
Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Ebru Akagunduz <ebru.akagunduz@gmail.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Shaohua Li <shli@kernel.org>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h |  4 ++--
 mm/swap_state.c      | 23 ++++++-----------------
 mm/vmscan.c          | 17 ++++++++++++++++-
 3 files changed, 24 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index ead6fd7966b4..5ab1c98c7d27 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -353,7 +353,7 @@ extern struct address_space *swapper_spaces[];
 		>> SWAP_ADDRESS_SPACE_SHIFT])
 extern unsigned long total_swapcache_pages(void);
 extern void show_swap_cache_info(void);
-extern int add_to_swap(struct page *, struct list_head *list);
+extern int add_to_swap(struct page *page);
 extern int add_to_swap_cache(struct page *, swp_entry_t, gfp_t);
 extern int __add_to_swap_cache(struct page *page, swp_entry_t entry);
 extern void __delete_from_swap_cache(struct page *);
@@ -473,7 +473,7 @@ static inline struct page *lookup_swap_cache(swp_entry_t swp)
 	return NULL;
 }
 
-static inline int add_to_swap(struct page *page, struct list_head *list)
+static inline int add_to_swap(struct page *page)
 {
 	return 0;
 }
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 0ad214d7a7ad..9c71b6b2562f 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -184,7 +184,7 @@ void __delete_from_swap_cache(struct page *page)
  * Allocate swap space for the page and add the page to the
  * swap cache.  Caller needs to hold the page lock. 
  */
-int add_to_swap(struct page *page, struct list_head *list)
+int add_to_swap(struct page *page)
 {
 	swp_entry_t entry;
 	int err;
@@ -192,12 +192,12 @@ int add_to_swap(struct page *page, struct list_head *list)
 	VM_BUG_ON_PAGE(!PageLocked(page), page);
 	VM_BUG_ON_PAGE(!PageUptodate(page), page);
 
-retry:
 	entry = get_swap_page(page);
 	if (!entry.val)
-		goto fail;
+		return 0;
+
 	if (mem_cgroup_try_charge_swap(page, entry))
-		goto fail_free;
+		goto fail;
 
 	/*
 	 * Radix-tree node allocations from PF_MEMALLOC contexts could
@@ -218,23 +218,12 @@ retry:
 		 * add_to_swap_cache() doesn't return -EEXIST, so we can safely
 		 * clear SWAP_HAS_CACHE flag.
 		 */
-		goto fail_free;
-
-	if (PageTransHuge(page)) {
-		err = split_huge_page_to_list(page, list);
-		if (err) {
-			delete_from_swap_cache(page);
-			return 0;
-		}
-	}
+		goto fail;
 
 	return 1;
 
-fail_free:
-	put_swap_page(page, entry);
 fail:
-	if (PageTransHuge(page) && !split_huge_page_to_list(page, list))
-		goto retry;
+	put_swap_page(page, entry);
 	return 0;
 }
 
diff --git a/mm/vmscan.c b/mm/vmscan.c
index cb7c154a4a9d..729e37f02de6 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1125,8 +1125,23 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 		    !PageSwapCache(page)) {
 			if (!(sc->gfp_mask & __GFP_IO))
 				goto keep_locked;
-			if (!add_to_swap(page, page_list))
+			if (!add_to_swap(page)) {
+				if (!PageTransHuge(page))
+					goto activate_locked;
+				/* Split THP and swap individual base pages */
+				if (split_huge_page_to_list(page, page_list))
+					goto activate_locked;
+				if (!add_to_swap(page))
+					goto activate_locked;
+			}
+
+			/* XXX: We don't support THP writes */
+			if (PageTransHuge(page) &&
+				  split_huge_page_to_list(page, page_list)) {
+				delete_from_swap_cache(page);
 				goto activate_locked;
+			}
+
 			may_enter_fs = 1;
 
 			/* Adding to swap updated mapping */
-- 
cgit v1.2.3


From b8f593cd0896b8b14c2b494a9776531b5cd54d98 Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Thu, 6 Jul 2017 15:37:28 -0700
Subject: mm, THP, swap: check whether THP can be split firstly

To swap out THP (Transparent Huage Page), before splitting the THP, the
swap cluster will be allocated and the THP will be added into the swap
cache.  But it is possible that the THP cannot be split, so that we must
delete the THP from the swap cache and free the swap cluster.  To avoid
that, in this patch, whether the THP can be split is checked firstly.
The check can only be done racy, but it is good enough for most cases.

With the patch, the swap out throughput improves 3.6% (from about
4.16GB/s to about 4.31GB/s) in the vm-scalability swap-w-seq test case
with 8 processes.  The test is done on a Xeon E5 v3 system.  The swap
device used is a RAM simulated PMEM (persistent memory) device.  To test
the sequential swapping out, the test case creates 8 processes, which
sequentially allocate and write to the anonymous pages until the RAM and
part of the swap device is used up.

Link: http://lkml.kernel.org/r/20170515112522.32457-5-ying.huang@intel.com
Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> [for can_split_huge_page()]
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Ebru Akagunduz <ebru.akagunduz@gmail.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Shaohua Li <shli@kernel.org>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/huge_mm.h |  7 +++++++
 mm/huge_memory.c        | 20 ++++++++++++++++----
 mm/vmscan.c             |  4 ++++
 3 files changed, 27 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index a3762d49ba39..d3b3e8fcc717 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -113,6 +113,7 @@ extern unsigned long thp_get_unmapped_area(struct file *filp,
 extern void prep_transhuge_page(struct page *page);
 extern void free_transhuge_page(struct page *page);
 
+bool can_split_huge_page(struct page *page, int *pextra_pins);
 int split_huge_page_to_list(struct page *page, struct list_head *list);
 static inline int split_huge_page(struct page *page)
 {
@@ -231,6 +232,12 @@ static inline void prep_transhuge_page(struct page *page) {}
 
 #define thp_get_unmapped_area	NULL
 
+static inline bool
+can_split_huge_page(struct page *page, int *pextra_pins)
+{
+	BUILD_BUG();
+	return false;
+}
 static inline int
 split_huge_page_to_list(struct page *page, struct list_head *list)
 {
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 1a168e4bac4b..86975dec0ba1 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2390,6 +2390,21 @@ int page_trans_huge_mapcount(struct page *page, int *total_mapcount)
 	return ret;
 }
 
+/* Racy check whether the huge page can be split */
+bool can_split_huge_page(struct page *page, int *pextra_pins)
+{
+	int extra_pins;
+
+	/* Additional pins from radix tree */
+	if (PageAnon(page))
+		extra_pins = PageSwapCache(page) ? HPAGE_PMD_NR : 0;
+	else
+		extra_pins = HPAGE_PMD_NR;
+	if (pextra_pins)
+		*pextra_pins = extra_pins;
+	return total_mapcount(page) == page_count(page) - extra_pins - 1;
+}
+
 /*
  * This function splits huge page into normal pages. @page can point to any
  * subpage of huge page to split. Split doesn't change the position of @page.
@@ -2437,7 +2452,6 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
 			ret = -EBUSY;
 			goto out;
 		}
-		extra_pins = PageSwapCache(page) ? HPAGE_PMD_NR : 0;
 		mapping = NULL;
 		anon_vma_lock_write(anon_vma);
 	} else {
@@ -2449,8 +2463,6 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
 			goto out;
 		}
 
-		/* Addidional pins from radix tree */
-		extra_pins = HPAGE_PMD_NR;
 		anon_vma = NULL;
 		i_mmap_lock_read(mapping);
 	}
@@ -2459,7 +2471,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
 	 * Racy check if we can split the page, before freeze_page() will
 	 * split PMDs
 	 */
-	if (total_mapcount(head) != page_count(head) - extra_pins - 1) {
+	if (!can_split_huge_page(head, &extra_pins)) {
 		ret = -EBUSY;
 		goto out_unlock;
 	}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 729e37f02de6..abaf0215a352 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1125,6 +1125,10 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 		    !PageSwapCache(page)) {
 			if (!(sc->gfp_mask & __GFP_IO))
 				goto keep_locked;
+			/* cannot split THP, skip it */
+			if (PageTransHuge(page) &&
+			    !can_split_huge_page(page, NULL))
+				goto activate_locked;
 			if (!add_to_swap(page)) {
 				if (!PageTransHuge(page))
 					goto activate_locked;
-- 
cgit v1.2.3


From dc0bbf3b7fb9ed2246f62bba4379070589e2135c Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Thu, 6 Jul 2017 15:37:35 -0700
Subject: mm: remove return value from init_currently_empty_zone

Patch series "mm: make movable onlining suck less", v4.

Movable onlining is a real hack with many downsides - mainly
reintroduction of lowmem/highmem issues we used to have on 32b systems -
but it is the only way to make the memory hotremove more reliable which
is something that people are asking for.

The current semantic of memory movable onlinening is really cumbersome,
however.  The main reason for this is that the udev driven approach is
basically unusable because udev races with the memory probing while only
the last memory block or the one adjacent to the existing zone_movable
are allowed to be onlined movable.  In short the criterion for the
successful online_movable changes under udev's feet.  A reliable udev
approach would require a 2 phase approach where the first successful
movable online would have to check all the previous blocks and online
them in descending order.  This is hard to be considered sane.

This patchset aims at making the onlining semantic more usable.  First
of all it allows to online memory movable as long as it doesn't clash
with the existing ZONE_NORMAL.  That means that ZONE_NORMAL and
ZONE_MOVABLE cannot overlap.  Currently I preserve the original ordering
semantic so the zone always precedes the movable zone but I have plans
to remove this restriction in future because it is not really necessary.

First 3 patches are cleanups which should be ready to be merged right
away (unless I have missed something subtle of course).

Patch 4 deals with ZONE_DEVICE dependencies down the __add_pages path.

Patch 5 deals with implicit assumptions of register_one_node on pgdat
initialization.

Patches 6-10 deal with offline holes in the zone for pfn walkers.  I
hope I got all of them right but people familiar with compaction should
double check this.

Patch 11 is the core of the change.  In order to make it easier to
review I have tried it to be as minimalistic as possible and the large
code removal is moved to patch 14.

Patch 12 is a trivial follow up cleanup.  Patch 13 fixes sparse warnings
and finally patch 14 removes the unused code.

I have tested the patches in kvm:
  # qemu-system-x86_64 -enable-kvm -monitor pty -m 2G,slots=4,maxmem=4G -numa node,mem=1G -numa node,mem=1G ...

and then probed the additional memory by
  (qemu) object_add memory-backend-ram,id=mem1,size=1G
  (qemu) device_add pc-dimm,id=dimm1,memdev=mem1

Then I have used this simple script to probe the memory block by hand
  # cat probe_memblock.sh
  #!/bin/sh

  BLOCK_NR=$1

  # echo $((0x100000000+$BLOCK_NR*(128<<20))) > /sys/devices/system/memory/probe

  # for i in $(seq 10); do sh probe_memblock.sh $i; done
  # grep . /sys/devices/system/memory/memory3?/valid_zones 2>/dev/null
  /sys/devices/system/memory/memory33/valid_zones:Normal Movable
  /sys/devices/system/memory/memory34/valid_zones:Normal Movable
  /sys/devices/system/memory/memory35/valid_zones:Normal Movable
  /sys/devices/system/memory/memory36/valid_zones:Normal Movable
  /sys/devices/system/memory/memory37/valid_zones:Normal Movable
  /sys/devices/system/memory/memory38/valid_zones:Normal Movable
  /sys/devices/system/memory/memory39/valid_zones:Normal Movable

The main difference to the original implementation is that all new
memblocks can be both online_kernel and online_movable initially because
there is no clash obviously.  For the comparison the original
implementation would have

  /sys/devices/system/memory/memory33/valid_zones:Normal
  /sys/devices/system/memory/memory34/valid_zones:Normal
  /sys/devices/system/memory/memory35/valid_zones:Normal
  /sys/devices/system/memory/memory36/valid_zones:Normal
  /sys/devices/system/memory/memory37/valid_zones:Normal
  /sys/devices/system/memory/memory38/valid_zones:Normal
  /sys/devices/system/memory/memory39/valid_zones:Normal Movable

Now
  # echo online_movable > /sys/devices/system/memory/memory34/state
  # grep . /sys/devices/system/memory/memory3?/valid_zones 2>/dev/null
  /sys/devices/system/memory/memory33/valid_zones:Normal Movable
  /sys/devices/system/memory/memory34/valid_zones:Movable
  /sys/devices/system/memory/memory35/valid_zones:Movable
  /sys/devices/system/memory/memory36/valid_zones:Movable
  /sys/devices/system/memory/memory37/valid_zones:Movable
  /sys/devices/system/memory/memory38/valid_zones:Movable
  /sys/devices/system/memory/memory39/valid_zones:Movable

Block 33 can still be online both kernel and movable while all
the remaining can be only movable.

/proc/zonelist says
  Node 0, zone   Normal
    pages free     0
          min      0
          low      0
          high     0
          spanned  0
          present  0
  --
  Node 0, zone  Movable
    pages free     32753
          min      85
          low      117
          high     149
          spanned  32768
          present  32768

A new memblock at a lower address will result in a new memblock (32)
which will still allow both Normal and Movable.

  # sh probe_memblock.sh 0
  # grep . /sys/devices/system/memory/memory3[2-5]/valid_zones 2>/dev/null
  /sys/devices/system/memory/memory32/valid_zones:Normal Movable
  /sys/devices/system/memory/memory33/valid_zones:Normal Movable
  /sys/devices/system/memory/memory34/valid_zones:Movable
  /sys/devices/system/memory/memory35/valid_zones:Movable

and online_kernel will convert it to the zone normal properly
while 33 can be still onlined both ways.

  # echo online_kernel > /sys/devices/system/memory/memory32/state
  # grep . /sys/devices/system/memory/memory3[2-5]/valid_zones 2>/dev/null
  /sys/devices/system/memory/memory32/valid_zones:Normal
  /sys/devices/system/memory/memory33/valid_zones:Normal Movable
  /sys/devices/system/memory/memory34/valid_zones:Movable
  /sys/devices/system/memory/memory35/valid_zones:Movable

/proc/zoneinfo will now tell
  Node 0, zone   Normal
    pages free     65441
          min      165
          low      230
          high     295
          spanned  65536
          present  65536
  --
  Node 0, zone  Movable
    pages free     32740
          min      82
          low      114
          high     146
          spanned  32768
          present  32768

so both zones have one memblock spanned and present.

Onlining 39 should associate this block to the movable zone

  # echo online > /sys/devices/system/memory/memory39/state

/proc/zoneinfo will now tell
  Node 0, zone   Normal
    pages free     32765
          min      80
          low      112
          high     144
          spanned  32768
          present  32768
  --
  Node 0, zone  Movable
    pages free     65501
          min      160
          low      225
          high     290
          spanned  196608
          present  65536

so we will have a movable zone which spans 6 memblocks, 2 present and 4
representing a hole.

Offlining both movable blocks will lead to the zone with no present
pages which is the expected behavior I believe.

  # echo offline > /sys/devices/system/memory/memory39/state
  # echo offline > /sys/devices/system/memory/memory34/state
  # grep -A6 "Movable\|Normal" /proc/zoneinfo
  Node 0, zone   Normal
    pages free     32735
          min      90
          low      122
          high     154
          spanned  32768
          present  32768
  --
  Node 0, zone  Movable
    pages free     0
          min      0
          low      0
          high     0
          spanned  196608
          present  0

As a bonus we will get a nice cleanup in the memory hotplug codebase.

This patch (of 16):

init_currently_empty_zone doesn't have any error to return yet it is
still an int and callers try to be defensive and try to handle potential
error.  Remove this nonsense and simplify all callers.

This patch shouldn't have any visible effect

Link: http://lkml.kernel.org/r/20170515085827.16474-2-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Reviewed-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Acked-by: Balbir Singh <bsingharora@gmail.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Daniel Kiper <daniel.kiper@oracle.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Igor Mammedov <imammedo@redhat.com>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: Joonsoo Kim <js1304@gmail.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Reza Arbab <arbab@linux.vnet.ibm.com>
Cc: Tobias Regnery <tobias.regnery@gmail.com>
Cc: Toshi Kani <toshi.kani@hpe.com>
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Cc: Xishi Qiu <qiuxishi@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h |  2 +-
 mm/memory_hotplug.c    | 23 +++++------------------
 mm/page_alloc.c        |  8 ++------
 3 files changed, 8 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index fc39f85d273c..976a1202bec1 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -772,7 +772,7 @@ enum memmap_context {
 	MEMMAP_EARLY,
 	MEMMAP_HOTPLUG,
 };
-extern int init_currently_empty_zone(struct zone *zone, unsigned long start_pfn,
+extern void init_currently_empty_zone(struct zone *zone, unsigned long start_pfn,
 				     unsigned long size);
 
 extern void lruvec_init(struct lruvec *lruvec);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index b63d7d1239df..b93c88125766 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -348,27 +348,20 @@ static void fix_zone_id(struct zone *zone, unsigned long start_pfn,
 		set_page_links(pfn_to_page(pfn), zid, nid, pfn);
 }
 
-/* Can fail with -ENOMEM from allocating a wait table with vmalloc() or
- * alloc_bootmem_node_nopanic()/memblock_virt_alloc_node_nopanic() */
-static int __ref ensure_zone_is_initialized(struct zone *zone,
+static void __ref ensure_zone_is_initialized(struct zone *zone,
 			unsigned long start_pfn, unsigned long num_pages)
 {
 	if (!zone_is_initialized(zone))
-		return init_currently_empty_zone(zone, start_pfn, num_pages);
-
-	return 0;
+		init_currently_empty_zone(zone, start_pfn, num_pages);
 }
 
 static int __meminit move_pfn_range_left(struct zone *z1, struct zone *z2,
 		unsigned long start_pfn, unsigned long end_pfn)
 {
-	int ret;
 	unsigned long flags;
 	unsigned long z1_start_pfn;
 
-	ret = ensure_zone_is_initialized(z1, start_pfn, end_pfn - start_pfn);
-	if (ret)
-		return ret;
+	ensure_zone_is_initialized(z1, start_pfn, end_pfn - start_pfn);
 
 	pgdat_resize_lock(z1->zone_pgdat, &flags);
 
@@ -404,13 +397,10 @@ out_fail:
 static int __meminit move_pfn_range_right(struct zone *z1, struct zone *z2,
 		unsigned long start_pfn, unsigned long end_pfn)
 {
-	int ret;
 	unsigned long flags;
 	unsigned long z2_end_pfn;
 
-	ret = ensure_zone_is_initialized(z2, start_pfn, end_pfn - start_pfn);
-	if (ret)
-		return ret;
+	ensure_zone_is_initialized(z2, start_pfn, end_pfn - start_pfn);
 
 	pgdat_resize_lock(z1->zone_pgdat, &flags);
 
@@ -481,12 +471,9 @@ static int __meminit __add_zone(struct zone *zone, unsigned long phys_start_pfn)
 	int nid = pgdat->node_id;
 	int zone_type;
 	unsigned long flags, pfn;
-	int ret;
 
 	zone_type = zone - pgdat->node_zones;
-	ret = ensure_zone_is_initialized(zone, phys_start_pfn, nr_pages);
-	if (ret)
-		return ret;
+	ensure_zone_is_initialized(zone, phys_start_pfn, nr_pages);
 
 	pgdat_resize_lock(zone->zone_pgdat, &flags);
 	grow_zone_span(zone, phys_start_pfn, phys_start_pfn + nr_pages);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 2302f250d6b1..73f854344735 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5528,7 +5528,7 @@ static __meminit void zone_pcp_init(struct zone *zone)
 					 zone_batchsize(zone));
 }
 
-int __meminit init_currently_empty_zone(struct zone *zone,
+void __meminit init_currently_empty_zone(struct zone *zone,
 					unsigned long zone_start_pfn,
 					unsigned long size)
 {
@@ -5546,8 +5546,6 @@ int __meminit init_currently_empty_zone(struct zone *zone,
 
 	zone_init_free_lists(zone);
 	zone->initialized = 1;
-
-	return 0;
 }
 
 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
@@ -6005,7 +6003,6 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
 {
 	enum zone_type j;
 	int nid = pgdat->node_id;
-	int ret;
 
 	pgdat_resize_init(pgdat);
 #ifdef CONFIG_NUMA_BALANCING
@@ -6087,8 +6084,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
 
 		set_pageblock_order();
 		setup_usemap(pgdat, zone, zone_start_pfn, size);
-		ret = init_currently_empty_zone(zone, zone_start_pfn, size);
-		BUG_ON(ret);
+		init_currently_empty_zone(zone, zone_start_pfn, size);
 		memmap_init(size, nid, j, zone_start_pfn);
 	}
 }
-- 
cgit v1.2.3


From 1b862aecfbd419cdc4553645bf86d07554279bed Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Thu, 6 Jul 2017 15:37:45 -0700
Subject: mm, memory_hotplug: get rid of is_zone_device_section

Device memory hotplug hooks into regular memory hotplug only half way.
It needs memory sections to track struct pages but there is no
need/desire to associate those sections with memory blocks and export
them to the userspace via sysfs because they cannot be onlined anyway.

This is currently expressed by for_device argument to arch_add_memory
which then makes sure to associate the given memory range with
ZONE_DEVICE.  register_new_memory then relies on is_zone_device_section
to distinguish special memory hotplug from the regular one.  While this
works now, later patches in this series want to move __add_zone outside
of arch_add_memory path so we have to come up with something else.

Add want_memblock down the __add_pages path and use it to control
whether the section->memblock association should be done.
arch_add_memory then just trivially want memblock for everything but
for_device hotplug.

remove_memory_section doesn't need is_zone_device_section either.  We
can simply skip all the memblock specific cleanup if there is no
memblock for the given section.

This shouldn't introduce any functional change.

Link: http://lkml.kernel.org/r/20170515085827.16474-5-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Tested-by: Dan Williams <dan.j.williams@intel.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Daniel Kiper <daniel.kiper@oracle.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Igor Mammedov <imammedo@redhat.com>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: Joonsoo Kim <js1304@gmail.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Reza Arbab <arbab@linux.vnet.ibm.com>
Cc: Tobias Regnery <tobias.regnery@gmail.com>
Cc: Toshi Kani <toshi.kani@hpe.com>
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Cc: Xishi Qiu <qiuxishi@huawei.com>
Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/mm/init.c            |  2 +-
 arch/powerpc/mm/mem.c          |  2 +-
 arch/s390/mm/init.c            |  2 +-
 arch/sh/mm/init.c              |  2 +-
 arch/x86/mm/init_32.c          |  2 +-
 arch/x86/mm/init_64.c          |  2 +-
 drivers/base/memory.c          | 23 +++++++++--------------
 include/linux/memory_hotplug.h |  2 +-
 mm/memory_hotplug.c            |  9 ++++++---
 9 files changed, 22 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 8f3efa682ee8..39e2aeb4669d 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -658,7 +658,7 @@ int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
 
 	zone = pgdat->node_zones +
 		zone_for_memory(nid, start, size, ZONE_NORMAL, for_device);
-	ret = __add_pages(nid, zone, start_pfn, nr_pages);
+	ret = __add_pages(nid, zone, start_pfn, nr_pages, !for_device);
 
 	if (ret)
 		printk("%s: Problem encountered in __add_pages() as ret=%d\n",
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 9ee536ec0739..e6b2e6618b6c 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -151,7 +151,7 @@ int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
 	zone = pgdata->node_zones +
 		zone_for_memory(nid, start, size, 0, for_device);
 
-	return __add_pages(nid, zone, start_pfn, nr_pages);
+	return __add_pages(nid, zone, start_pfn, nr_pages, !for_device);
 }
 
 #ifdef CONFIG_MEMORY_HOTREMOVE
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 3348e60dd8ad..a3d549966b6a 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -195,7 +195,7 @@ int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
 			continue;
 		nr_pages = (start_pfn + size_pages > zone_end_pfn) ?
 			   zone_end_pfn - start_pfn : size_pages;
-		rc = __add_pages(nid, zone, start_pfn, nr_pages);
+		rc = __add_pages(nid, zone, start_pfn, nr_pages, !for_device);
 		if (rc)
 			break;
 		start_pfn += nr_pages;
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index 75491862d900..a9d57f75ae8c 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -498,7 +498,7 @@ int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
 	ret = __add_pages(nid, pgdat->node_zones +
 			zone_for_memory(nid, start, size, ZONE_NORMAL,
 			for_device),
-			start_pfn, nr_pages);
+			start_pfn, nr_pages, !for_device);
 	if (unlikely(ret))
 		printk("%s: Failed, __add_pages() == %d\n", __func__, ret);
 
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 99fb83819a5f..94594b889144 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -831,7 +831,7 @@ int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
 	unsigned long start_pfn = start >> PAGE_SHIFT;
 	unsigned long nr_pages = size >> PAGE_SHIFT;
 
-	return __add_pages(nid, zone, start_pfn, nr_pages);
+	return __add_pages(nid, zone, start_pfn, nr_pages, !for_device);
 }
 
 #ifdef CONFIG_MEMORY_HOTREMOVE
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index dae6a5e5ad4a..9d64291459b6 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -787,7 +787,7 @@ int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
 
 	init_memory_mapping(start, start + size);
 
-	ret = __add_pages(nid, zone, start_pfn, nr_pages);
+	ret = __add_pages(nid, zone, start_pfn, nr_pages, !for_device);
 	WARN_ON_ONCE(ret);
 
 	/* update max_pfn, max_low_pfn and high_memory */
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 90225ffee501..f8fd562c3f18 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -685,14 +685,6 @@ static int add_memory_block(int base_section_nr)
 	return 0;
 }
 
-static bool is_zone_device_section(struct mem_section *ms)
-{
-	struct page *page;
-
-	page = sparse_decode_mem_map(ms->section_mem_map, __section_nr(ms));
-	return is_zone_device_page(page);
-}
-
 /*
  * need an interface for the VM to add new memory regions,
  * but without onlining it.
@@ -702,9 +694,6 @@ int register_new_memory(int nid, struct mem_section *section)
 	int ret = 0;
 	struct memory_block *mem;
 
-	if (is_zone_device_section(section))
-		return 0;
-
 	mutex_lock(&mem_sysfs_mutex);
 
 	mem = find_memory_block(section);
@@ -741,11 +730,16 @@ static int remove_memory_section(unsigned long node_id,
 {
 	struct memory_block *mem;
 
-	if (is_zone_device_section(section))
-		return 0;
-
 	mutex_lock(&mem_sysfs_mutex);
+
+	/*
+	 * Some users of the memory hotplug do not want/need memblock to
+	 * track all sections. Skip over those.
+	 */
 	mem = find_memory_block(section);
+	if (!mem)
+		goto out_unlock;
+
 	unregister_mem_sect_under_nodes(mem, __section_nr(section));
 
 	mem->section_count--;
@@ -754,6 +748,7 @@ static int remove_memory_section(unsigned long node_id,
 	else
 		put_device(&mem->dev);
 
+out_unlock:
 	mutex_unlock(&mem_sysfs_mutex);
 	return 0;
 }
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 134a2f69c21a..3c8cf86201c3 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -111,7 +111,7 @@ extern int __remove_pages(struct zone *zone, unsigned long start_pfn,
 
 /* reasonably generic interface to expand the physical pages in a zone  */
 extern int __add_pages(int nid, struct zone *zone, unsigned long start_pfn,
-	unsigned long nr_pages);
+	unsigned long nr_pages, bool want_memblock);
 
 #ifdef CONFIG_NUMA
 extern int memory_add_physaddr_to_nid(u64 start);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 6b6362819be2..c0147d3024eb 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -494,7 +494,7 @@ static int __meminit __add_zone(struct zone *zone, unsigned long phys_start_pfn)
 }
 
 static int __meminit __add_section(int nid, struct zone *zone,
-					unsigned long phys_start_pfn)
+		unsigned long phys_start_pfn, bool want_memblock)
 {
 	int ret;
 
@@ -511,6 +511,9 @@ static int __meminit __add_section(int nid, struct zone *zone,
 	if (ret < 0)
 		return ret;
 
+	if (!want_memblock)
+		return 0;
+
 	return register_new_memory(nid, __pfn_to_section(phys_start_pfn));
 }
 
@@ -521,7 +524,7 @@ static int __meminit __add_section(int nid, struct zone *zone,
  * add the new pages.
  */
 int __ref __add_pages(int nid, struct zone *zone, unsigned long phys_start_pfn,
-			unsigned long nr_pages)
+			unsigned long nr_pages, bool want_memblock)
 {
 	unsigned long i;
 	int err = 0;
@@ -549,7 +552,7 @@ int __ref __add_pages(int nid, struct zone *zone, unsigned long phys_start_pfn,
 	}
 
 	for (i = start_sec; i <= end_sec; i++) {
-		err = __add_section(nid, zone, section_nr_to_pfn(i));
+		err = __add_section(nid, zone, section_nr_to_pfn(i), want_memblock);
 
 		/*
 		 * EEXIST is finally dealt with by ioresource collision
-- 
cgit v1.2.3


From 9037a9934349b0e180896fc8cacaf1819418ba03 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Thu, 6 Jul 2017 15:37:49 -0700
Subject: mm, memory_hotplug: split up register_one_node()

Memory hotplug (add_memory_resource) has to reinitialize node
infrastructure if the node is offline (one which went through the
complete add_memory(); remove_memory() cycle).  That involves node
registration to the kobj infrastructure (register_node), the proper
association with cpus (register_cpu_under_node) and finally creation of
node<->memblock symlinks (link_mem_sections).

The last part requires to know node_start_pfn and node_spanned_pages
which we currently have but a leter patch will postpone this
initialization to the onlining phase which happens later.  In fact we do
not need to rely on the early pgdat initialization even now because the
currently hot added pfn range is currently known.

Split register_one_node into core which does all the common work for the
boot time NUMA initialization and the hotplug (__register_one_node).
register_one_node keeps the full initialization while hotplug calls
__register_one_node and manually calls link_mem_sections for the proper
range.

This shouldn't introduce any functional change.

Link: http://lkml.kernel.org/r/20170515085827.16474-6-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Daniel Kiper <daniel.kiper@oracle.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Igor Mammedov <imammedo@redhat.com>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: Joonsoo Kim <js1304@gmail.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Reza Arbab <arbab@linux.vnet.ibm.com>
Cc: Tobias Regnery <tobias.regnery@gmail.com>
Cc: Toshi Kani <toshi.kani@hpe.com>
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Cc: Xishi Qiu <qiuxishi@huawei.com>
Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/base/node.c  | 51 ++++++++++++++++++++-------------------------------
 include/linux/node.h | 35 ++++++++++++++++++++++++++++++++++-
 mm/memory_hotplug.c  | 17 ++++++++++++++++-
 3 files changed, 70 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/node.c b/drivers/base/node.c
index db769d3148b7..1da0005341a1 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -461,10 +461,9 @@ int unregister_mem_sect_under_nodes(struct memory_block *mem_blk,
 	return 0;
 }
 
-static int link_mem_sections(int nid)
+int link_mem_sections(int nid, unsigned long start_pfn, unsigned long nr_pages)
 {
-	unsigned long start_pfn = NODE_DATA(nid)->node_start_pfn;
-	unsigned long end_pfn = start_pfn + NODE_DATA(nid)->node_spanned_pages;
+	unsigned long end_pfn = start_pfn + nr_pages;
 	unsigned long pfn;
 	struct memory_block *mem_blk = NULL;
 	int err = 0;
@@ -552,10 +551,7 @@ static int node_memory_callback(struct notifier_block *self,
 	return NOTIFY_OK;
 }
 #endif	/* CONFIG_HUGETLBFS */
-#else	/* !CONFIG_MEMORY_HOTPLUG_SPARSE */
-
-static int link_mem_sections(int nid) { return 0; }
-#endif	/* CONFIG_MEMORY_HOTPLUG_SPARSE */
+#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
 
 #if !defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || \
     !defined(CONFIG_HUGETLBFS)
@@ -569,39 +565,32 @@ static void init_node_hugetlb_work(int nid) { }
 
 #endif
 
-int register_one_node(int nid)
+int __register_one_node(int nid)
 {
-	int error = 0;
+	int p_node = parent_node(nid);
+	struct node *parent = NULL;
+	int error;
 	int cpu;
 
-	if (node_online(nid)) {
-		int p_node = parent_node(nid);
-		struct node *parent = NULL;
-
-		if (p_node != nid)
-			parent = node_devices[p_node];
-
-		node_devices[nid] = kzalloc(sizeof(struct node), GFP_KERNEL);
-		if (!node_devices[nid])
-			return -ENOMEM;
-
-		error = register_node(node_devices[nid], nid, parent);
+	if (p_node != nid)
+		parent = node_devices[p_node];
 
-		/* link cpu under this node */
-		for_each_present_cpu(cpu) {
-			if (cpu_to_node(cpu) == nid)
-				register_cpu_under_node(cpu, nid);
-		}
+	node_devices[nid] = kzalloc(sizeof(struct node), GFP_KERNEL);
+	if (!node_devices[nid])
+		return -ENOMEM;
 
-		/* link memory sections under this node */
-		error = link_mem_sections(nid);
+	error = register_node(node_devices[nid], nid, parent);
 
-		/* initialize work queue for memory hot plug */
-		init_node_hugetlb_work(nid);
+	/* link cpu under this node */
+	for_each_present_cpu(cpu) {
+		if (cpu_to_node(cpu) == nid)
+			register_cpu_under_node(cpu, nid);
 	}
 
-	return error;
+	/* initialize work queue for memory hot plug */
+	init_node_hugetlb_work(nid);
 
+	return error;
 }
 
 void unregister_one_node(int nid)
diff --git a/include/linux/node.h b/include/linux/node.h
index 2115ad5d6f19..d1751beb462c 100644
--- a/include/linux/node.h
+++ b/include/linux/node.h
@@ -30,9 +30,38 @@ struct memory_block;
 extern struct node *node_devices[];
 typedef  void (*node_registration_func_t)(struct node *);
 
+#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_NUMA)
+extern int link_mem_sections(int nid, unsigned long start_pfn, unsigned long nr_pages);
+#else
+static inline int link_mem_sections(int nid, unsigned long start_pfn, unsigned long nr_pages)
+{
+	return 0;
+}
+#endif
+
 extern void unregister_node(struct node *node);
 #ifdef CONFIG_NUMA
-extern int register_one_node(int nid);
+/* Core of the node registration - only memory hotplug should use this */
+extern int __register_one_node(int nid);
+
+/* Registers an online node */
+static inline int register_one_node(int nid)
+{
+	int error = 0;
+
+	if (node_online(nid)) {
+		struct pglist_data *pgdat = NODE_DATA(nid);
+
+		error = __register_one_node(nid);
+		if (error)
+			return error;
+		/* link memory sections under this node */
+		error = link_mem_sections(nid, pgdat->node_start_pfn, pgdat->node_spanned_pages);
+	}
+
+	return error;
+}
+
 extern void unregister_one_node(int nid);
 extern int register_cpu_under_node(unsigned int cpu, unsigned int nid);
 extern int unregister_cpu_under_node(unsigned int cpu, unsigned int nid);
@@ -46,6 +75,10 @@ extern void register_hugetlbfs_with_node(node_registration_func_t doregister,
 					 node_registration_func_t unregister);
 #endif
 #else
+static inline int __register_one_node(int nid)
+{
+	return 0;
+}
 static inline int register_one_node(int nid)
 {
 	return 0;
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index c0147d3024eb..caa58338d121 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1388,7 +1388,22 @@ int __ref add_memory_resource(int nid, struct resource *res, bool online)
 	node_set_online(nid);
 
 	if (new_node) {
-		ret = register_one_node(nid);
+		unsigned long start_pfn = start >> PAGE_SHIFT;
+		unsigned long nr_pages = size >> PAGE_SHIFT;
+
+		ret = __register_one_node(nid);
+		if (ret)
+			goto register_fail;
+
+		/*
+		 * link memory sections under this node. This is already
+		 * done when creatig memory section in register_new_memory
+		 * but that depends to have the node registered so offline
+		 * nodes have to go through register_node.
+		 * TODO clean up this mess.
+		 */
+		ret = link_mem_sections(nid, start_pfn, nr_pages);
+register_fail:
 		/*
 		 * If sysfs file of new node can't create, cpu on the node
 		 * can't be hot-added. There is no rollback way now.
-- 
cgit v1.2.3


From 2d070eab2e8270c8a84d480bb91e4f739315f03d Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Thu, 6 Jul 2017 15:37:56 -0700
Subject: mm: consider zone which is not fully populated to have holes

__pageblock_pfn_to_page has two users currently, set_zone_contiguous
which checks whether the given zone contains holes and
pageblock_pfn_to_page which then carefully returns a first valid page
from the given pfn range for the given zone.  This doesn't handle zones
which are not fully populated though.  Memory pageblocks can be offlined
or might not have been onlined yet.  In such a case the zone should be
considered to have holes otherwise pfn walkers can touch and play with
offline pages.

Current callers of pageblock_pfn_to_page in compaction seem to work
properly right now because they only isolate PageBuddy
(isolate_freepages_block) or PageLRU resp.  __PageMovable
(isolate_migratepages_block) which will be always false for these pages.
It would be safer to skip these pages altogether, though.

In order to do this patch adds a new memory section state
(SECTION_IS_ONLINE) which is set in memory_present (during boot time) or
in online_pages_range during the memory hotplug.  Similarly
offline_mem_sections clears the bit and it is called when the memory
range is offlined.

pfn_to_online_page helper is then added which check the mem section and
only returns a page if it is onlined already.

Use the new helper in __pageblock_pfn_to_page and skip the whole page
block in such a case.

[mhocko@suse.com: check valid section number in pfn_to_online_page (Vlastimil),
 mark sections online after all struct pages are initialized in
 online_pages_range (Vlastimil)]
  Link: http://lkml.kernel.org/r/20170518164210.GD18333@dhcp22.suse.cz
Link: http://lkml.kernel.org/r/20170515085827.16474-8-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Daniel Kiper <daniel.kiper@oracle.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Igor Mammedov <imammedo@redhat.com>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: Joonsoo Kim <js1304@gmail.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Reza Arbab <arbab@linux.vnet.ibm.com>
Cc: Tobias Regnery <tobias.regnery@gmail.com>
Cc: Toshi Kani <toshi.kani@hpe.com>
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Cc: Xishi Qiu <qiuxishi@huawei.com>
Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memory_hotplug.h | 22 +++++++++++++++++++++
 include/linux/mmzone.h         | 35 ++++++++++++++++++++++++++------
 mm/memory_hotplug.c            |  4 ++++
 mm/page_alloc.c                |  5 ++++-
 mm/sparse.c                    | 45 +++++++++++++++++++++++++++++++++++++++++-
 5 files changed, 103 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 3c8cf86201c3..a61aede1b391 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -14,6 +14,20 @@ struct memory_block;
 struct resource;
 
 #ifdef CONFIG_MEMORY_HOTPLUG
+/*
+ * Return page for the valid pfn only if the page is online. All pfn
+ * walkers which rely on the fully initialized page->flags and others
+ * should use this rather than pfn_valid && pfn_to_page
+ */
+#define pfn_to_online_page(pfn)				\
+({							\
+	struct page *___page = NULL;			\
+	unsigned long ___nr = pfn_to_section_nr(pfn);	\
+							\
+	if (___nr < NR_MEM_SECTIONS && online_section_nr(___nr))\
+		___page = pfn_to_page(pfn);		\
+	___page;					\
+})
 
 /*
  * Types for free bootmem stored in page->lru.next. These have to be in
@@ -203,6 +217,14 @@ extern void set_zone_contiguous(struct zone *zone);
 extern void clear_zone_contiguous(struct zone *zone);
 
 #else /* ! CONFIG_MEMORY_HOTPLUG */
+#define pfn_to_online_page(pfn)			\
+({						\
+	struct page *___page = NULL;		\
+	if (pfn_valid(pfn))			\
+		___page = pfn_to_page(pfn);	\
+	___page;				\
+ })
+
 /*
  * Stub functions for when hotplug is off
  */
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 976a1202bec1..2aaf7e08c5a8 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1144,9 +1144,10 @@ extern unsigned long usemap_size(void);
  */
 #define	SECTION_MARKED_PRESENT	(1UL<<0)
 #define SECTION_HAS_MEM_MAP	(1UL<<1)
-#define SECTION_MAP_LAST_BIT	(1UL<<2)
+#define SECTION_IS_ONLINE	(1UL<<2)
+#define SECTION_MAP_LAST_BIT	(1UL<<3)
 #define SECTION_MAP_MASK	(~(SECTION_MAP_LAST_BIT-1))
-#define SECTION_NID_SHIFT	2
+#define SECTION_NID_SHIFT	3
 
 static inline struct page *__section_mem_map_addr(struct mem_section *section)
 {
@@ -1175,6 +1176,23 @@ static inline int valid_section_nr(unsigned long nr)
 	return valid_section(__nr_to_section(nr));
 }
 
+static inline int online_section(struct mem_section *section)
+{
+	return (section && (section->section_mem_map & SECTION_IS_ONLINE));
+}
+
+static inline int online_section_nr(unsigned long nr)
+{
+	return online_section(__nr_to_section(nr));
+}
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+void online_mem_sections(unsigned long start_pfn, unsigned long end_pfn);
+#ifdef CONFIG_MEMORY_HOTREMOVE
+void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn);
+#endif
+#endif
+
 static inline struct mem_section *__pfn_to_section(unsigned long pfn)
 {
 	return __nr_to_section(pfn_to_section_nr(pfn));
@@ -1253,10 +1271,15 @@ unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long);
 #ifdef CONFIG_ARCH_HAS_HOLES_MEMORYMODEL
 /*
  * pfn_valid() is meant to be able to tell if a given PFN has valid memmap
- * associated with it or not. In FLATMEM, it is expected that holes always
- * have valid memmap as long as there is valid PFNs either side of the hole.
- * In SPARSEMEM, it is assumed that a valid section has a memmap for the
- * entire section.
+ * associated with it or not. This means that a struct page exists for this
+ * pfn. The caller cannot assume the page is fully initialized in general.
+ * Hotplugable pages might not have been onlined yet. pfn_to_online_page()
+ * will ensure the struct page is fully online and initialized. Special pages
+ * (e.g. ZONE_DEVICE) are never onlined and should be treated accordingly.
+ *
+ * In FLATMEM, it is expected that holes always have valid memmap as long as
+ * there is valid PFNs either side of the hole. In SPARSEMEM, it is assumed
+ * that a valid section has a memmap for the entire section.
  *
  * However, an ARM, and maybe other embedded architectures in the future
  * free memmap backing holes to save memory on the assumption the memmap is
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index caa58338d121..b2ebe9ad7f6c 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -929,12 +929,16 @@ static int online_pages_range(unsigned long start_pfn, unsigned long nr_pages,
 	unsigned long i;
 	unsigned long onlined_pages = *(unsigned long *)arg;
 	struct page *page;
+
 	if (PageReserved(pfn_to_page(start_pfn)))
 		for (i = 0; i < nr_pages; i++) {
 			page = pfn_to_page(start_pfn + i);
 			(*online_page_callback)(page);
 			onlined_pages++;
 		}
+
+	online_mem_sections(start_pfn, start_pfn + nr_pages);
+
 	*(unsigned long *)arg = onlined_pages;
 	return 0;
 }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 73f854344735..387f20db217c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1365,7 +1365,9 @@ struct page *__pageblock_pfn_to_page(unsigned long start_pfn,
 	if (!pfn_valid(start_pfn) || !pfn_valid(end_pfn))
 		return NULL;
 
-	start_page = pfn_to_page(start_pfn);
+	start_page = pfn_to_online_page(start_pfn);
+	if (!start_page)
+		return NULL;
 
 	if (page_zone(start_page) != zone)
 		return NULL;
@@ -7656,6 +7658,7 @@ __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
 			break;
 	if (pfn == end_pfn)
 		return;
+	offline_mem_sections(pfn, end_pfn);
 	zone = page_zone(pfn_to_page(pfn));
 	spin_lock_irqsave(&zone->lock, flags);
 	pfn = start_pfn;
diff --git a/mm/sparse.c b/mm/sparse.c
index 5032c9a619de..9d7fd666015e 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -222,7 +222,8 @@ void __init memory_present(int nid, unsigned long start, unsigned long end)
 
 		ms = __nr_to_section(section);
 		if (!ms->section_mem_map) {
-			ms->section_mem_map = sparse_encode_early_nid(nid);
+			ms->section_mem_map = sparse_encode_early_nid(nid) |
+							SECTION_IS_ONLINE;
 			section_mark_present(ms);
 		}
 	}
@@ -622,6 +623,48 @@ void __init sparse_init(void)
 }
 
 #ifdef CONFIG_MEMORY_HOTPLUG
+
+/* Mark all memory sections within the pfn range as online */
+void online_mem_sections(unsigned long start_pfn, unsigned long end_pfn)
+{
+	unsigned long pfn;
+
+	for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
+		unsigned long section_nr = pfn_to_section_nr(start_pfn);
+		struct mem_section *ms;
+
+		/* onlining code should never touch invalid ranges */
+		if (WARN_ON(!valid_section_nr(section_nr)))
+			continue;
+
+		ms = __nr_to_section(section_nr);
+		ms->section_mem_map |= SECTION_IS_ONLINE;
+	}
+}
+
+#ifdef CONFIG_MEMORY_HOTREMOVE
+/* Mark all memory sections within the pfn range as online */
+void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn)
+{
+	unsigned long pfn;
+
+	for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
+		unsigned long section_nr = pfn_to_section_nr(start_pfn);
+		struct mem_section *ms;
+
+		/*
+		 * TODO this needs some double checking. Offlining code makes
+		 * sure to check pfn_valid but those checks might be just bogus
+		 */
+		if (WARN_ON(!valid_section_nr(section_nr)))
+			continue;
+
+		ms = __nr_to_section(section_nr);
+		ms->section_mem_map &= ~SECTION_IS_ONLINE;
+	}
+}
+#endif
+
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
 static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid)
 {
-- 
cgit v1.2.3


From f1dd2cd13c4bbbc9a7c4617b3b034fa643de98fe Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Thu, 6 Jul 2017 15:38:11 -0700
Subject: mm, memory_hotplug: do not associate hotadded memory to zones until
 online

The current memory hotplug implementation relies on having all the
struct pages associate with a zone/node during the physical hotplug
phase (arch_add_memory->__add_pages->__add_section->__add_zone).  In the
vast majority of cases this means that they are added to ZONE_NORMAL.
This has been so since 9d99aaa31f59 ("[PATCH] x86_64: Support memory
hotadd without sparsemem") and it wasn't a big deal back then because
movable onlining didn't exist yet.

Much later memory hotplug wanted to (ab)use ZONE_MOVABLE for movable
onlining 511c2aba8f07 ("mm, memory-hotplug: dynamic configure movable
memory and portion memory") and then things got more complicated.
Rather than reconsidering the zone association which was no longer
needed (because the memory hotplug already depended on SPARSEMEM) a
convoluted semantic of zone shifting has been developed.  Only the
currently last memblock or the one adjacent to the zone_movable can be
onlined movable.  This essentially means that the online type changes as
the new memblocks are added.

Let's simulate memory hot online manually
  $ echo 0x100000000 > /sys/devices/system/memory/probe
  $ grep . /sys/devices/system/memory/memory32/valid_zones
  Normal Movable

  $ echo $((0x100000000+(128<<20))) > /sys/devices/system/memory/probe
  $ grep . /sys/devices/system/memory/memory3?/valid_zones
  /sys/devices/system/memory/memory32/valid_zones:Normal
  /sys/devices/system/memory/memory33/valid_zones:Normal Movable

  $ echo $((0x100000000+2*(128<<20))) > /sys/devices/system/memory/probe
  $ grep . /sys/devices/system/memory/memory3?/valid_zones
  /sys/devices/system/memory/memory32/valid_zones:Normal
  /sys/devices/system/memory/memory33/valid_zones:Normal
  /sys/devices/system/memory/memory34/valid_zones:Normal Movable

  $ echo online_movable > /sys/devices/system/memory/memory34/state
  $ grep . /sys/devices/system/memory/memory3?/valid_zones
  /sys/devices/system/memory/memory32/valid_zones:Normal
  /sys/devices/system/memory/memory33/valid_zones:Normal Movable
  /sys/devices/system/memory/memory34/valid_zones:Movable Normal

This is an awkward semantic because an udev event is sent as soon as the
block is onlined and an udev handler might want to online it based on
some policy (e.g.  association with a node) but it will inherently race
with new blocks showing up.

This patch changes the physical online phase to not associate pages with
any zone at all.  All the pages are just marked reserved and wait for
the onlining phase to be associated with the zone as per the online
request.  There are only two requirements

	- existing ZONE_NORMAL and ZONE_MOVABLE cannot overlap

	- ZONE_NORMAL precedes ZONE_MOVABLE in physical addresses

the latter one is not an inherent requirement and can be changed in the
future.  It preserves the current behavior and made the code slightly
simpler.  This is subject to change in future.

This means that the same physical online steps as above will lead to the
following state: Normal Movable

  /sys/devices/system/memory/memory32/valid_zones:Normal Movable
  /sys/devices/system/memory/memory33/valid_zones:Normal Movable

  /sys/devices/system/memory/memory32/valid_zones:Normal Movable
  /sys/devices/system/memory/memory33/valid_zones:Normal Movable
  /sys/devices/system/memory/memory34/valid_zones:Normal Movable

  /sys/devices/system/memory/memory32/valid_zones:Normal Movable
  /sys/devices/system/memory/memory33/valid_zones:Normal Movable
  /sys/devices/system/memory/memory34/valid_zones:Movable

Implementation:
The current move_pfn_range is reimplemented to check the above
requirements (allow_online_pfn_range) and then updates the respective
zone (move_pfn_range_to_zone), the pgdat and links all the pages in the
pfn range with the zone/node.  __add_pages is updated to not require the
zone and only initializes sections in the range.  This allowed to
simplify the arch_add_memory code (s390 could get rid of quite some of
code).

devm_memremap_pages is the only user of arch_add_memory which relies on
the zone association because it only hooks into the memory hotplug only
half way.  It uses it to associate the new memory with ZONE_DEVICE but
doesn't allow it to be {on,off}lined via sysfs.  This means that this
particular code path has to call move_pfn_range_to_zone explicitly.

The original zone shifting code is kept in place and will be removed in
the follow up patch for an easier review.

Please note that this patch also changes the original behavior when
offlining a memory block adjacent to another zone (Normal vs.  Movable)
used to allow to change its movable type.  This will be handled later.

[richard.weiyang@gmail.com: simplify zone_intersects()]
  Link: http://lkml.kernel.org/r/20170616092335.5177-1-richard.weiyang@gmail.com
[richard.weiyang@gmail.com: remove duplicate call for set_page_links]
  Link: http://lkml.kernel.org/r/20170616092335.5177-2-richard.weiyang@gmail.com
[akpm@linux-foundation.org: remove unused local `i']
Link: http://lkml.kernel.org/r/20170515085827.16474-12-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
Tested-by: Dan Williams <dan.j.williams@intel.com>
Tested-by: Reza Arbab <arbab@linux.vnet.ibm.com>
Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com> # For s390 bits
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Daniel Kiper <daniel.kiper@oracle.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Igor Mammedov <imammedo@redhat.com>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: Joonsoo Kim <js1304@gmail.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Tobias Regnery <tobias.regnery@gmail.com>
Cc: Toshi Kani <toshi.kani@hpe.com>
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Cc: Xishi Qiu <qiuxishi@huawei.com>
Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/mm/init.c            |   9 +-
 arch/powerpc/mm/mem.c          |  10 +-
 arch/s390/mm/init.c            |  30 +-----
 arch/sh/mm/init.c              |   8 +-
 arch/x86/mm/init_32.c          |   5 +-
 arch/x86/mm/init_64.c          |   9 +-
 drivers/base/memory.c          |  52 ++++++-----
 include/linux/memory_hotplug.h |  13 +--
 include/linux/mmzone.h         |  16 ++++
 kernel/memremap.c              |   4 +
 mm/memory_hotplug.c            | 201 +++++++++++++++++++++++++----------------
 mm/sparse.c                    |   3 +-
 12 files changed, 185 insertions(+), 175 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 39e2aeb4669d..80db57d063d0 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -648,18 +648,11 @@ mem_init (void)
 #ifdef CONFIG_MEMORY_HOTPLUG
 int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
 {
-	pg_data_t *pgdat;
-	struct zone *zone;
 	unsigned long start_pfn = start >> PAGE_SHIFT;
 	unsigned long nr_pages = size >> PAGE_SHIFT;
 	int ret;
 
-	pgdat = NODE_DATA(nid);
-
-	zone = pgdat->node_zones +
-		zone_for_memory(nid, start, size, ZONE_NORMAL, for_device);
-	ret = __add_pages(nid, zone, start_pfn, nr_pages, !for_device);
-
+	ret = __add_pages(nid, start_pfn, nr_pages, !for_device);
 	if (ret)
 		printk("%s: Problem encountered in __add_pages() as ret=%d\n",
 		       __func__,  ret);
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index e6b2e6618b6c..72c46eb53215 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -128,16 +128,12 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end)
 
 int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
 {
-	struct pglist_data *pgdata;
-	struct zone *zone;
 	unsigned long start_pfn = start >> PAGE_SHIFT;
 	unsigned long nr_pages = size >> PAGE_SHIFT;
 	int rc;
 
 	resize_hpt_for_hotplug(memblock_phys_mem_size());
 
-	pgdata = NODE_DATA(nid);
-
 	start = (unsigned long)__va(start);
 	rc = create_section_mapping(start, start + size);
 	if (rc) {
@@ -147,11 +143,7 @@ int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
 		return -EFAULT;
 	}
 
-	/* this should work for most non-highmem platforms */
-	zone = pgdata->node_zones +
-		zone_for_memory(nid, start, size, 0, for_device);
-
-	return __add_pages(nid, zone, start_pfn, nr_pages, !for_device);
+	return __add_pages(nid, start_pfn, nr_pages, !for_device);
 }
 
 #ifdef CONFIG_MEMORY_HOTREMOVE
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index a3d549966b6a..bfa918e3592b 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -168,41 +168,15 @@ unsigned long memory_block_size_bytes(void)
 #ifdef CONFIG_MEMORY_HOTPLUG
 int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
 {
-	unsigned long zone_start_pfn, zone_end_pfn, nr_pages;
 	unsigned long start_pfn = PFN_DOWN(start);
 	unsigned long size_pages = PFN_DOWN(size);
-	pg_data_t *pgdat = NODE_DATA(nid);
-	struct zone *zone;
-	int rc, i;
+	int rc;
 
 	rc = vmem_add_mapping(start, size);
 	if (rc)
 		return rc;
 
-	for (i = 0; i < MAX_NR_ZONES; i++) {
-		zone = pgdat->node_zones + i;
-		if (zone_idx(zone) != ZONE_MOVABLE) {
-			/* Add range within existing zone limits, if possible */
-			zone_start_pfn = zone->zone_start_pfn;
-			zone_end_pfn = zone->zone_start_pfn +
-				       zone->spanned_pages;
-		} else {
-			/* Add remaining range to ZONE_MOVABLE */
-			zone_start_pfn = start_pfn;
-			zone_end_pfn = start_pfn + size_pages;
-		}
-		if (start_pfn < zone_start_pfn || start_pfn >= zone_end_pfn)
-			continue;
-		nr_pages = (start_pfn + size_pages > zone_end_pfn) ?
-			   zone_end_pfn - start_pfn : size_pages;
-		rc = __add_pages(nid, zone, start_pfn, nr_pages, !for_device);
-		if (rc)
-			break;
-		start_pfn += nr_pages;
-		size_pages -= nr_pages;
-		if (!size_pages)
-			break;
-	}
+	rc = __add_pages(nid, start_pfn, size_pages, !for_device);
 	if (rc)
 		vmem_remove_mapping(start, size);
 	return rc;
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index a9d57f75ae8c..3813a610a2bb 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -487,18 +487,12 @@ void free_initrd_mem(unsigned long start, unsigned long end)
 #ifdef CONFIG_MEMORY_HOTPLUG
 int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
 {
-	pg_data_t *pgdat;
 	unsigned long start_pfn = PFN_DOWN(start);
 	unsigned long nr_pages = size >> PAGE_SHIFT;
 	int ret;
 
-	pgdat = NODE_DATA(nid);
-
 	/* We only have ZONE_NORMAL, so this is easy.. */
-	ret = __add_pages(nid, pgdat->node_zones +
-			zone_for_memory(nid, start, size, ZONE_NORMAL,
-			for_device),
-			start_pfn, nr_pages, !for_device);
+	ret = __add_pages(nid, start_pfn, nr_pages, !for_device);
 	if (unlikely(ret))
 		printk("%s: Failed, __add_pages() == %d\n", __func__, ret);
 
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 94594b889144..a424066d0552 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -825,13 +825,10 @@ void __init mem_init(void)
 #ifdef CONFIG_MEMORY_HOTPLUG
 int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
 {
-	struct pglist_data *pgdata = NODE_DATA(nid);
-	struct zone *zone = pgdata->node_zones +
-		zone_for_memory(nid, start, size, ZONE_HIGHMEM, for_device);
 	unsigned long start_pfn = start >> PAGE_SHIFT;
 	unsigned long nr_pages = size >> PAGE_SHIFT;
 
-	return __add_pages(nid, zone, start_pfn, nr_pages, !for_device);
+	return __add_pages(nid, start_pfn, nr_pages, !for_device);
 }
 
 #ifdef CONFIG_MEMORY_HOTREMOVE
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 9d64291459b6..06afa84ac0a0 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -772,22 +772,15 @@ static void  update_end_of_memory_vars(u64 start, u64 size)
 	}
 }
 
-/*
- * Memory is added always to NORMAL zone. This means you will never get
- * additional DMA/DMA32 memory.
- */
 int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
 {
-	struct pglist_data *pgdat = NODE_DATA(nid);
-	struct zone *zone = pgdat->node_zones +
-		zone_for_memory(nid, start, size, ZONE_NORMAL, for_device);
 	unsigned long start_pfn = start >> PAGE_SHIFT;
 	unsigned long nr_pages = size >> PAGE_SHIFT;
 	int ret;
 
 	init_memory_mapping(start, start + size);
 
-	ret = __add_pages(nid, zone, start_pfn, nr_pages, !for_device);
+	ret = __add_pages(nid, start_pfn, nr_pages, !for_device);
 	WARN_ON_ONCE(ret);
 
 	/* update max_pfn, max_low_pfn and high_memory */
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 1e884d82af6f..b86fda30ce62 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -392,39 +392,43 @@ static ssize_t show_valid_zones(struct device *dev,
 				struct device_attribute *attr, char *buf)
 {
 	struct memory_block *mem = to_memory_block(dev);
-	unsigned long start_pfn, end_pfn;
-	unsigned long valid_start, valid_end, valid_pages;
+	unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
 	unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
-	struct zone *zone;
-	int zone_shift = 0;
+	unsigned long valid_start_pfn, valid_end_pfn;
+	bool append = false;
+	int nid;
 
-	start_pfn = section_nr_to_pfn(mem->start_section_nr);
-	end_pfn = start_pfn + nr_pages;
-
-	/* The block contains more than one zone can not be offlined. */
-	if (!test_pages_in_a_zone(start_pfn, end_pfn, &valid_start, &valid_end))
+	/*
+	 * The block contains more than one zone can not be offlined.
+	 * This can happen e.g. for ZONE_DMA and ZONE_DMA32
+	 */
+	if (!test_pages_in_a_zone(start_pfn, start_pfn + nr_pages, &valid_start_pfn, &valid_end_pfn))
 		return sprintf(buf, "none\n");
 
-	zone = page_zone(pfn_to_page(valid_start));
-	valid_pages = valid_end - valid_start;
-
-	/* MMOP_ONLINE_KEEP */
-	sprintf(buf, "%s", zone->name);
+	start_pfn = valid_start_pfn;
+	nr_pages = valid_end_pfn - start_pfn;
 
-	/* MMOP_ONLINE_KERNEL */
-	zone_can_shift(valid_start, valid_pages, ZONE_NORMAL, &zone_shift);
-	if (zone_shift) {
-		strcat(buf, " ");
-		strcat(buf, (zone + zone_shift)->name);
+	/*
+	 * Check the existing zone. Make sure that we do that only on the
+	 * online nodes otherwise the page_zone is not reliable
+	 */
+	if (mem->state == MEM_ONLINE) {
+		strcat(buf, page_zone(pfn_to_page(start_pfn))->name);
+		goto out;
 	}
 
-	/* MMOP_ONLINE_MOVABLE */
-	zone_can_shift(valid_start, valid_pages, ZONE_MOVABLE, &zone_shift);
-	if (zone_shift) {
-		strcat(buf, " ");
-		strcat(buf, (zone + zone_shift)->name);
+	nid = pfn_to_nid(start_pfn);
+	if (allow_online_pfn_range(nid, start_pfn, nr_pages, MMOP_ONLINE_KERNEL)) {
+		strcat(buf, NODE_DATA(nid)->node_zones[ZONE_NORMAL].name);
+		append = true;
 	}
 
+	if (allow_online_pfn_range(nid, start_pfn, nr_pages, MMOP_ONLINE_MOVABLE)) {
+		if (append)
+			strcat(buf, " ");
+		strcat(buf, NODE_DATA(nid)->node_zones[ZONE_MOVABLE].name);
+	}
+out:
 	strcat(buf, "\n");
 
 	return strlen(buf);
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index a61aede1b391..8a07a49fd8dc 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -123,8 +123,8 @@ extern int __remove_pages(struct zone *zone, unsigned long start_pfn,
 	unsigned long nr_pages);
 #endif /* CONFIG_MEMORY_HOTREMOVE */
 
-/* reasonably generic interface to expand the physical pages in a zone  */
-extern int __add_pages(int nid, struct zone *zone, unsigned long start_pfn,
+/* reasonably generic interface to expand the physical pages */
+extern int __add_pages(int nid, unsigned long start_pfn,
 	unsigned long nr_pages, bool want_memblock);
 
 #ifdef CONFIG_NUMA
@@ -299,15 +299,16 @@ extern int add_memory_resource(int nid, struct resource *resource, bool online);
 extern int zone_for_memory(int nid, u64 start, u64 size, int zone_default,
 		bool for_device);
 extern int arch_add_memory(int nid, u64 start, u64 size, bool for_device);
+extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
+		unsigned long nr_pages);
 extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
 extern bool is_memblock_offlined(struct memory_block *mem);
 extern void remove_memory(int nid, u64 start, u64 size);
-extern int sparse_add_one_section(struct zone *zone, unsigned long start_pfn);
+extern int sparse_add_one_section(struct pglist_data *pgdat, unsigned long start_pfn);
 extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms,
 		unsigned long map_offset);
 extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map,
 					  unsigned long pnum);
-extern bool zone_can_shift(unsigned long pfn, unsigned long nr_pages,
-			  enum zone_type target, int *zone_shift);
-
+extern bool allow_online_pfn_range(int nid, unsigned long pfn, unsigned long nr_pages,
+		int online_type);
 #endif /* __LINUX_MEMORY_HOTPLUG_H */
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 2aaf7e08c5a8..abc1641011f2 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -532,6 +532,22 @@ static inline bool zone_is_empty(struct zone *zone)
 	return zone->spanned_pages == 0;
 }
 
+/*
+ * Return true if [start_pfn, start_pfn + nr_pages) range has a non-empty
+ * intersection with the given zone
+ */
+static inline bool zone_intersects(struct zone *zone,
+		unsigned long start_pfn, unsigned long nr_pages)
+{
+	if (zone_is_empty(zone))
+		return false;
+	if (start_pfn >= zone_end_pfn(zone) ||
+	    start_pfn + nr_pages <= zone->zone_start_pfn)
+		return false;
+
+	return true;
+}
+
 /*
  * The "priority" of VM scanning is how much of the queues we will scan in one
  * go. A value of 12 for DEF_PRIORITY implies that we will scan 1/4096th of the
diff --git a/kernel/memremap.c b/kernel/memremap.c
index 23a6483c3666..281eb478856a 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -359,6 +359,10 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
 
 	mem_hotplug_begin();
 	error = arch_add_memory(nid, align_start, align_size, true);
+	if (!error)
+		move_pfn_range_to_zone(&NODE_DATA(nid)->node_zones[ZONE_DEVICE],
+					align_start >> PAGE_SHIFT,
+					align_size >> PAGE_SHIFT);
 	mem_hotplug_done();
 	if (error)
 		goto err_add_memory;
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index b2ebe9ad7f6c..9438ffe24cb2 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -433,25 +433,6 @@ out_fail:
 	return -1;
 }
 
-static struct zone * __meminit move_pfn_range(int zone_shift,
-		unsigned long start_pfn, unsigned long end_pfn)
-{
-	struct zone *zone = page_zone(pfn_to_page(start_pfn));
-	int ret = 0;
-
-	if (zone_shift < 0)
-		ret = move_pfn_range_left(zone + zone_shift, zone,
-					  start_pfn, end_pfn);
-	else if (zone_shift)
-		ret = move_pfn_range_right(zone, zone + zone_shift,
-					   start_pfn, end_pfn);
-
-	if (ret)
-		return NULL;
-
-	return zone + zone_shift;
-}
-
 static void __meminit grow_pgdat_span(struct pglist_data *pgdat, unsigned long start_pfn,
 				      unsigned long end_pfn)
 {
@@ -493,23 +474,35 @@ static int __meminit __add_zone(struct zone *zone, unsigned long phys_start_pfn)
 	return 0;
 }
 
-static int __meminit __add_section(int nid, struct zone *zone,
-		unsigned long phys_start_pfn, bool want_memblock)
+static int __meminit __add_section(int nid, unsigned long phys_start_pfn,
+		bool want_memblock)
 {
 	int ret;
+	int i;
 
 	if (pfn_valid(phys_start_pfn))
 		return -EEXIST;
 
-	ret = sparse_add_one_section(zone, phys_start_pfn);
-
+	ret = sparse_add_one_section(NODE_DATA(nid), phys_start_pfn);
 	if (ret < 0)
 		return ret;
 
-	ret = __add_zone(zone, phys_start_pfn);
+	/*
+	 * Make all the pages reserved so that nobody will stumble over half
+	 * initialized state.
+	 * FIXME: We also have to associate it with a node because pfn_to_node
+	 * relies on having page with the proper node.
+	 */
+	for (i = 0; i < PAGES_PER_SECTION; i++) {
+		unsigned long pfn = phys_start_pfn + i;
+		struct page *page;
+		if (!pfn_valid(pfn))
+			continue;
 
-	if (ret < 0)
-		return ret;
+		page = pfn_to_page(pfn);
+		set_page_node(page, nid);
+		SetPageReserved(page);
+	}
 
 	if (!want_memblock)
 		return 0;
@@ -523,7 +516,7 @@ static int __meminit __add_section(int nid, struct zone *zone,
  * call this function after deciding the zone to which to
  * add the new pages.
  */
-int __ref __add_pages(int nid, struct zone *zone, unsigned long phys_start_pfn,
+int __ref __add_pages(int nid, unsigned long phys_start_pfn,
 			unsigned long nr_pages, bool want_memblock)
 {
 	unsigned long i;
@@ -531,8 +524,6 @@ int __ref __add_pages(int nid, struct zone *zone, unsigned long phys_start_pfn,
 	int start_sec, end_sec;
 	struct vmem_altmap *altmap;
 
-	clear_zone_contiguous(zone);
-
 	/* during initialize mem_map, align hot-added range to section */
 	start_sec = pfn_to_section_nr(phys_start_pfn);
 	end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1);
@@ -552,7 +543,7 @@ int __ref __add_pages(int nid, struct zone *zone, unsigned long phys_start_pfn,
 	}
 
 	for (i = start_sec; i <= end_sec; i++) {
-		err = __add_section(nid, zone, section_nr_to_pfn(i), want_memblock);
+		err = __add_section(nid, section_nr_to_pfn(i), want_memblock);
 
 		/*
 		 * EEXIST is finally dealt with by ioresource collision
@@ -565,7 +556,6 @@ int __ref __add_pages(int nid, struct zone *zone, unsigned long phys_start_pfn,
 	}
 	vmemmap_populate_print_last();
 out:
-	set_zone_contiguous(zone);
 	return err;
 }
 EXPORT_SYMBOL_GPL(__add_pages);
@@ -1034,39 +1024,109 @@ static void node_states_set_node(int node, struct memory_notify *arg)
 	node_set_state(node, N_MEMORY);
 }
 
-bool zone_can_shift(unsigned long pfn, unsigned long nr_pages,
-		   enum zone_type target, int *zone_shift)
+bool allow_online_pfn_range(int nid, unsigned long pfn, unsigned long nr_pages, int online_type)
 {
-	struct zone *zone = page_zone(pfn_to_page(pfn));
-	enum zone_type idx = zone_idx(zone);
-	int i;
+	struct pglist_data *pgdat = NODE_DATA(nid);
+	struct zone *movable_zone = &pgdat->node_zones[ZONE_MOVABLE];
+	struct zone *normal_zone =  &pgdat->node_zones[ZONE_NORMAL];
 
-	*zone_shift = 0;
+	/*
+	 * TODO there shouldn't be any inherent reason to have ZONE_NORMAL
+	 * physically before ZONE_MOVABLE. All we need is they do not
+	 * overlap. Historically we didn't allow ZONE_NORMAL after ZONE_MOVABLE
+	 * though so let's stick with it for simplicity for now.
+	 * TODO make sure we do not overlap with ZONE_DEVICE
+	 */
+	if (online_type == MMOP_ONLINE_KERNEL) {
+		if (zone_is_empty(movable_zone))
+			return true;
+		return movable_zone->zone_start_pfn >= pfn + nr_pages;
+	} else if (online_type == MMOP_ONLINE_MOVABLE) {
+		return zone_end_pfn(normal_zone) <= pfn;
+	}
 
-	if (idx < target) {
-		/* pages must be at end of current zone */
-		if (pfn + nr_pages != zone_end_pfn(zone))
-			return false;
+	/* MMOP_ONLINE_KEEP will always succeed and inherits the current zone */
+	return online_type == MMOP_ONLINE_KEEP;
+}
 
-		/* no zones in use between current zone and target */
-		for (i = idx + 1; i < target; i++)
-			if (zone_is_initialized(zone - idx + i))
-				return false;
-	}
+static void __meminit resize_zone_range(struct zone *zone, unsigned long start_pfn,
+		unsigned long nr_pages)
+{
+	unsigned long old_end_pfn = zone_end_pfn(zone);
 
-	if (target < idx) {
-		/* pages must be at beginning of current zone */
-		if (pfn != zone->zone_start_pfn)
-			return false;
+	if (zone_is_empty(zone) || start_pfn < zone->zone_start_pfn)
+		zone->zone_start_pfn = start_pfn;
+
+	zone->spanned_pages = max(start_pfn + nr_pages, old_end_pfn) - zone->zone_start_pfn;
+}
+
+static void __meminit resize_pgdat_range(struct pglist_data *pgdat, unsigned long start_pfn,
+                                     unsigned long nr_pages)
+{
+	unsigned long old_end_pfn = pgdat_end_pfn(pgdat);
 
-		/* no zones in use between current zone and target */
-		for (i = target + 1; i < idx; i++)
-			if (zone_is_initialized(zone - idx + i))
-				return false;
+	if (!pgdat->node_spanned_pages || start_pfn < pgdat->node_start_pfn)
+		pgdat->node_start_pfn = start_pfn;
+
+	pgdat->node_spanned_pages = max(start_pfn + nr_pages, old_end_pfn) - pgdat->node_start_pfn;
+}
+
+void move_pfn_range_to_zone(struct zone *zone,
+		unsigned long start_pfn, unsigned long nr_pages)
+{
+	struct pglist_data *pgdat = zone->zone_pgdat;
+	int nid = pgdat->node_id;
+	unsigned long flags;
+
+	if (zone_is_empty(zone))
+		init_currently_empty_zone(zone, start_pfn, nr_pages);
+
+	clear_zone_contiguous(zone);
+
+	/* TODO Huh pgdat is irqsave while zone is not. It used to be like that before */
+	pgdat_resize_lock(pgdat, &flags);
+	zone_span_writelock(zone);
+	resize_zone_range(zone, start_pfn, nr_pages);
+	zone_span_writeunlock(zone);
+	resize_pgdat_range(pgdat, start_pfn, nr_pages);
+	pgdat_resize_unlock(pgdat, &flags);
+
+	/*
+	 * TODO now we have a visible range of pages which are not associated
+	 * with their zone properly. Not nice but set_pfnblock_flags_mask
+	 * expects the zone spans the pfn range. All the pages in the range
+	 * are reserved so nobody should be touching them so we should be safe
+	 */
+	memmap_init_zone(nr_pages, nid, zone_idx(zone), start_pfn, MEMMAP_HOTPLUG);
+
+	set_zone_contiguous(zone);
+}
+
+/*
+ * Associates the given pfn range with the given node and the zone appropriate
+ * for the given online type.
+ */
+static struct zone * __meminit move_pfn_range(int online_type, int nid,
+		unsigned long start_pfn, unsigned long nr_pages)
+{
+	struct pglist_data *pgdat = NODE_DATA(nid);
+	struct zone *zone = &pgdat->node_zones[ZONE_NORMAL];
+
+	if (online_type == MMOP_ONLINE_KEEP) {
+		struct zone *movable_zone = &pgdat->node_zones[ZONE_MOVABLE];
+		/*
+		 * MMOP_ONLINE_KEEP inherits the current zone which is
+		 * ZONE_NORMAL by default but we might be within ZONE_MOVABLE
+		 * already.
+		 */
+		if (zone_intersects(movable_zone, start_pfn, nr_pages))
+			zone = movable_zone;
+	} else if (online_type == MMOP_ONLINE_MOVABLE) {
+		zone = &pgdat->node_zones[ZONE_MOVABLE];
 	}
 
-	*zone_shift = target - idx;
-	return true;
+	move_pfn_range_to_zone(zone, start_pfn, nr_pages);
+	return zone;
 }
 
 /* Must be protected by mem_hotplug_begin() */
@@ -1079,38 +1139,21 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
 	int nid;
 	int ret;
 	struct memory_notify arg;
-	int zone_shift = 0;
 
-	/*
-	 * This doesn't need a lock to do pfn_to_page().
-	 * The section can't be removed here because of the
-	 * memory_block->state_mutex.
-	 */
-	zone = page_zone(pfn_to_page(pfn));
-
-	if ((zone_idx(zone) > ZONE_NORMAL ||
-	    online_type == MMOP_ONLINE_MOVABLE) &&
-	    !can_online_high_movable(pfn_to_nid(pfn)))
+	nid = pfn_to_nid(pfn);
+	if (!allow_online_pfn_range(nid, pfn, nr_pages, online_type))
 		return -EINVAL;
 
-	if (online_type == MMOP_ONLINE_KERNEL) {
-		if (!zone_can_shift(pfn, nr_pages, ZONE_NORMAL, &zone_shift))
-			return -EINVAL;
-	} else if (online_type == MMOP_ONLINE_MOVABLE) {
-		if (!zone_can_shift(pfn, nr_pages, ZONE_MOVABLE, &zone_shift))
-			return -EINVAL;
-	}
-
-	zone = move_pfn_range(zone_shift, pfn, pfn + nr_pages);
-	if (!zone)
+	if (online_type == MMOP_ONLINE_MOVABLE && !can_online_high_movable(nid))
 		return -EINVAL;
 
+	/* associate pfn range with the zone */
+	zone = move_pfn_range(online_type, nid, pfn, nr_pages);
+
 	arg.start_pfn = pfn;
 	arg.nr_pages = nr_pages;
 	node_states_check_changes_online(nr_pages, zone, &arg);
 
-	nid = zone_to_nid(zone);
-
 	ret = memory_notify(MEM_GOING_ONLINE, &arg);
 	ret = notifier_to_errno(ret);
 	if (ret)
diff --git a/mm/sparse.c b/mm/sparse.c
index 9d7fd666015e..7b4be3fd5cac 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -761,10 +761,9 @@ static void free_map_bootmem(struct page *memmap)
  * set.  If this is <=0, then that means that the passed-in
  * map was not consumed and must be freed.
  */
-int __meminit sparse_add_one_section(struct zone *zone, unsigned long start_pfn)
+int __meminit sparse_add_one_section(struct pglist_data *pgdat, unsigned long start_pfn)
 {
 	unsigned long section_nr = pfn_to_section_nr(start_pfn);
-	struct pglist_data *pgdat = zone->zone_pgdat;
 	struct mem_section *ms;
 	struct page *memmap;
 	unsigned long *usemap;
-- 
cgit v1.2.3


From c246a213f5bad687c6c2cea27d7265eaf8f6f5d7 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Thu, 6 Jul 2017 15:38:18 -0700
Subject: mm, memory_hotplug: do not assume ZONE_NORMAL is default kernel zone

Heiko Carstens has noticed that he can generate overlapping zones for
ZONE_DMA and ZONE_NORMAL:

  DMA      [mem 0x0000000000000000-0x000000007fffffff]
  Normal   [mem 0x0000000080000000-0x000000017fffffff]

  $ cat /sys/devices/system/memory/block_size_bytes
  10000000
  $ cat /sys/devices/system/memory/memory5/valid_zones
  DMA
  $ echo 0 > /sys/devices/system/memory/memory5/online
  $ cat /sys/devices/system/memory/memory5/valid_zones
  Normal
  $ echo 1 > /sys/devices/system/memory/memory5/online
  Normal

  $ cat /proc/zoneinfo
  Node 0, zone      DMA
  spanned  524288        <-----
  present  458752
  managed  455078
  start_pfn:           0 <-----

  Node 0, zone   Normal
  spanned  720896
  present  589824
  managed  571648
  start_pfn:           327680 <-----

The reason is that we assume that the default zone for kernel onlining
is ZONE_NORMAL.  This was a simplification introduced by the memory
hotplug rework and it is easily fixable by checking the range overlap in
the zone order and considering the first matching zone as the default
one.  If there is no such zone then assume ZONE_NORMAL as we have been
doing so far.

Fixes: "mm, memory_hotplug: do not associate hotadded memory to zones until online"
Link: http://lkml.kernel.org/r/20170601083746.4924-3-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Reported-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Tested-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Reza Arbab <arbab@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/base/memory.c          |  2 +-
 include/linux/memory_hotplug.h |  2 ++
 mm/memory_hotplug.c            | 27 ++++++++++++++++++++++++---
 3 files changed, 27 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index b86fda30ce62..c7c4e0325cdb 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -419,7 +419,7 @@ static ssize_t show_valid_zones(struct device *dev,
 
 	nid = pfn_to_nid(start_pfn);
 	if (allow_online_pfn_range(nid, start_pfn, nr_pages, MMOP_ONLINE_KERNEL)) {
-		strcat(buf, NODE_DATA(nid)->node_zones[ZONE_NORMAL].name);
+		strcat(buf, default_zone_for_pfn(nid, start_pfn, nr_pages)->name);
 		append = true;
 	}
 
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 8a07a49fd8dc..4d65a2fcac15 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -311,4 +311,6 @@ extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map,
 					  unsigned long pnum);
 extern bool allow_online_pfn_range(int nid, unsigned long pfn, unsigned long nr_pages,
 		int online_type);
+extern struct zone *default_zone_for_pfn(int nid, unsigned long pfn,
+		unsigned long nr_pages);
 #endif /* __LINUX_MEMORY_HOTPLUG_H */
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 1a20e44635d3..4263fa6f2ab4 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1028,7 +1028,7 @@ bool allow_online_pfn_range(int nid, unsigned long pfn, unsigned long nr_pages,
 {
 	struct pglist_data *pgdat = NODE_DATA(nid);
 	struct zone *movable_zone = &pgdat->node_zones[ZONE_MOVABLE];
-	struct zone *normal_zone =  &pgdat->node_zones[ZONE_NORMAL];
+	struct zone *default_zone = default_zone_for_pfn(nid, pfn, nr_pages);
 
 	/*
 	 * TODO there shouldn't be any inherent reason to have ZONE_NORMAL
@@ -1042,7 +1042,7 @@ bool allow_online_pfn_range(int nid, unsigned long pfn, unsigned long nr_pages,
 			return true;
 		return movable_zone->zone_start_pfn >= pfn + nr_pages;
 	} else if (online_type == MMOP_ONLINE_MOVABLE) {
-		return zone_end_pfn(normal_zone) <= pfn;
+		return zone_end_pfn(default_zone) <= pfn;
 	}
 
 	/* MMOP_ONLINE_KEEP will always succeed and inherits the current zone */
@@ -1102,6 +1102,27 @@ void move_pfn_range_to_zone(struct zone *zone,
 	set_zone_contiguous(zone);
 }
 
+/*
+ * Returns a default kernel memory zone for the given pfn range.
+ * If no kernel zone covers this pfn range it will automatically go
+ * to the ZONE_NORMAL.
+ */
+struct zone *default_zone_for_pfn(int nid, unsigned long start_pfn,
+		unsigned long nr_pages)
+{
+	struct pglist_data *pgdat = NODE_DATA(nid);
+	int zid;
+
+	for (zid = 0; zid <= ZONE_NORMAL; zid++) {
+		struct zone *zone = &pgdat->node_zones[zid];
+
+		if (zone_intersects(zone, start_pfn, nr_pages))
+			return zone;
+	}
+
+	return &pgdat->node_zones[ZONE_NORMAL];
+}
+
 /*
  * Associates the given pfn range with the given node and the zone appropriate
  * for the given online type.
@@ -1110,7 +1131,7 @@ static struct zone * __meminit move_pfn_range(int online_type, int nid,
 		unsigned long start_pfn, unsigned long nr_pages)
 {
 	struct pglist_data *pgdat = NODE_DATA(nid);
-	struct zone *zone = &pgdat->node_zones[ZONE_NORMAL];
+	struct zone *zone = default_zone_for_pfn(nid, start_pfn, nr_pages);
 
 	if (online_type == MMOP_ONLINE_KEEP) {
 		struct zone *movable_zone = &pgdat->node_zones[ZONE_MOVABLE];
-- 
cgit v1.2.3


From 3d79a728f9b2e6ddcce4e02c91c4de1076548a4c Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Thu, 6 Jul 2017 15:38:21 -0700
Subject: mm, memory_hotplug: replace for_device by want_memblock in
 arch_add_memory

arch_add_memory gets for_device argument which then controls whether we
want to create memblocks for created memory sections.  Simplify the
logic by telling whether we want memblocks directly rather than going
through pointless negation.  This also makes the api easier to
understand because it is clear what we want rather than nothing telling
for_device which can mean anything.

This shouldn't introduce any functional change.

Link: http://lkml.kernel.org/r/20170515085827.16474-13-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Tested-by: Dan Williams <dan.j.williams@intel.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Daniel Kiper <daniel.kiper@oracle.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Igor Mammedov <imammedo@redhat.com>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: Joonsoo Kim <js1304@gmail.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Reza Arbab <arbab@linux.vnet.ibm.com>
Cc: Tobias Regnery <tobias.regnery@gmail.com>
Cc: Toshi Kani <toshi.kani@hpe.com>
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Cc: Xishi Qiu <qiuxishi@huawei.com>
Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/mm/init.c            | 4 ++--
 arch/powerpc/mm/mem.c          | 4 ++--
 arch/s390/mm/init.c            | 4 ++--
 arch/sh/mm/init.c              | 4 ++--
 arch/x86/mm/init_32.c          | 4 ++--
 arch/x86/mm/init_64.c          | 4 ++--
 include/linux/memory_hotplug.h | 2 +-
 kernel/memremap.c              | 2 +-
 mm/memory_hotplug.c            | 2 +-
 9 files changed, 15 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 80db57d063d0..a4e8d6bd9cfa 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -646,13 +646,13 @@ mem_init (void)
 }
 
 #ifdef CONFIG_MEMORY_HOTPLUG
-int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
+int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
 {
 	unsigned long start_pfn = start >> PAGE_SHIFT;
 	unsigned long nr_pages = size >> PAGE_SHIFT;
 	int ret;
 
-	ret = __add_pages(nid, start_pfn, nr_pages, !for_device);
+	ret = __add_pages(nid, start_pfn, nr_pages, want_memblock);
 	if (ret)
 		printk("%s: Problem encountered in __add_pages() as ret=%d\n",
 		       __func__,  ret);
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 72c46eb53215..de5a90e1ceaa 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -126,7 +126,7 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end)
 	return -ENODEV;
 }
 
-int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
+int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
 {
 	unsigned long start_pfn = start >> PAGE_SHIFT;
 	unsigned long nr_pages = size >> PAGE_SHIFT;
@@ -143,7 +143,7 @@ int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
 		return -EFAULT;
 	}
 
-	return __add_pages(nid, start_pfn, nr_pages, !for_device);
+	return __add_pages(nid, start_pfn, nr_pages, want_memblock);
 }
 
 #ifdef CONFIG_MEMORY_HOTREMOVE
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index bfa918e3592b..8111694ce55a 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -166,7 +166,7 @@ unsigned long memory_block_size_bytes(void)
 }
 
 #ifdef CONFIG_MEMORY_HOTPLUG
-int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
+int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
 {
 	unsigned long start_pfn = PFN_DOWN(start);
 	unsigned long size_pages = PFN_DOWN(size);
@@ -176,7 +176,7 @@ int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
 	if (rc)
 		return rc;
 
-	rc = __add_pages(nid, start_pfn, size_pages, !for_device);
+	rc = __add_pages(nid, start_pfn, size_pages, want_memblock);
 	if (rc)
 		vmem_remove_mapping(start, size);
 	return rc;
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index 3813a610a2bb..bf726af5f1a5 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -485,14 +485,14 @@ void free_initrd_mem(unsigned long start, unsigned long end)
 #endif
 
 #ifdef CONFIG_MEMORY_HOTPLUG
-int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
+int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
 {
 	unsigned long start_pfn = PFN_DOWN(start);
 	unsigned long nr_pages = size >> PAGE_SHIFT;
 	int ret;
 
 	/* We only have ZONE_NORMAL, so this is easy.. */
-	ret = __add_pages(nid, start_pfn, nr_pages, !for_device);
+	ret = __add_pages(nid, start_pfn, nr_pages, want_memblock);
 	if (unlikely(ret))
 		printk("%s: Failed, __add_pages() == %d\n", __func__, ret);
 
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index a424066d0552..8a64a6f2848d 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -823,12 +823,12 @@ void __init mem_init(void)
 }
 
 #ifdef CONFIG_MEMORY_HOTPLUG
-int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
+int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
 {
 	unsigned long start_pfn = start >> PAGE_SHIFT;
 	unsigned long nr_pages = size >> PAGE_SHIFT;
 
-	return __add_pages(nid, start_pfn, nr_pages, !for_device);
+	return __add_pages(nid, start_pfn, nr_pages, want_memblock);
 }
 
 #ifdef CONFIG_MEMORY_HOTREMOVE
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 06afa84ac0a0..136422d7d539 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -772,7 +772,7 @@ static void  update_end_of_memory_vars(u64 start, u64 size)
 	}
 }
 
-int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
+int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
 {
 	unsigned long start_pfn = start >> PAGE_SHIFT;
 	unsigned long nr_pages = size >> PAGE_SHIFT;
@@ -780,7 +780,7 @@ int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
 
 	init_memory_mapping(start, start + size);
 
-	ret = __add_pages(nid, start_pfn, nr_pages, !for_device);
+	ret = __add_pages(nid, start_pfn, nr_pages, want_memblock);
 	WARN_ON_ONCE(ret);
 
 	/* update max_pfn, max_low_pfn and high_memory */
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 4d65a2fcac15..780c806e17d3 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -298,7 +298,7 @@ extern int add_memory(int nid, u64 start, u64 size);
 extern int add_memory_resource(int nid, struct resource *resource, bool online);
 extern int zone_for_memory(int nid, u64 start, u64 size, int zone_default,
 		bool for_device);
-extern int arch_add_memory(int nid, u64 start, u64 size, bool for_device);
+extern int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock);
 extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
 		unsigned long nr_pages);
 extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
diff --git a/kernel/memremap.c b/kernel/memremap.c
index 281eb478856a..124bed776532 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -358,7 +358,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
 		goto err_pfn_remap;
 
 	mem_hotplug_begin();
-	error = arch_add_memory(nid, align_start, align_size, true);
+	error = arch_add_memory(nid, align_start, align_size, false);
 	if (!error)
 		move_pfn_range_to_zone(&NODE_DATA(nid)->node_zones[ZONE_DEVICE],
 					align_start >> PAGE_SHIFT,
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 4263fa6f2ab4..9b04cf5ea813 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1448,7 +1448,7 @@ int __ref add_memory_resource(int nid, struct resource *res, bool online)
 	}
 
 	/* call arch's memory hotadd */
-	ret = arch_add_memory(nid, start, size, false);
+	ret = arch_add_memory(nid, start, size, true);
 
 	if (ret < 0)
 		goto error;
-- 
cgit v1.2.3


From 559bfc7d1beff814a8e9999d102bf1157ef1f010 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Thu, 6 Jul 2017 15:38:28 -0700
Subject: mm, memory_hotplug: remove unused cruft after memory hotplug rework

zone_for_memory doesn't have any user anymore as well as the whole zone
shifting infrastructure so drop them all.

This shouldn't introduce any functional changes.

Link: http://lkml.kernel.org/r/20170515085827.16474-15-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Daniel Kiper <daniel.kiper@oracle.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Igor Mammedov <imammedo@redhat.com>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: Joonsoo Kim <js1304@gmail.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Reza Arbab <arbab@linux.vnet.ibm.com>
Cc: Tobias Regnery <tobias.regnery@gmail.com>
Cc: Toshi Kani <toshi.kani@hpe.com>
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Cc: Xishi Qiu <qiuxishi@huawei.com>
Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memory_hotplug.h |   2 -
 mm/memory_hotplug.c            | 207 -----------------------------------------
 2 files changed, 209 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 780c806e17d3..ed167541e4fc 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -296,8 +296,6 @@ extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn,
 		void *arg, int (*func)(struct memory_block *, void *));
 extern int add_memory(int nid, u64 start, u64 size);
 extern int add_memory_resource(int nid, struct resource *resource, bool online);
-extern int zone_for_memory(int nid, u64 start, u64 size, int zone_default,
-		bool for_device);
 extern int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock);
 extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
 		unsigned long nr_pages);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 7fbb32b0b041..e4fdb97b6ef2 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -300,180 +300,6 @@ void __init register_page_bootmem_info_node(struct pglist_data *pgdat)
 }
 #endif /* CONFIG_HAVE_BOOTMEM_INFO_NODE */
 
-static void __meminit grow_zone_span(struct zone *zone, unsigned long start_pfn,
-				     unsigned long end_pfn)
-{
-	unsigned long old_zone_end_pfn;
-
-	zone_span_writelock(zone);
-
-	old_zone_end_pfn = zone_end_pfn(zone);
-	if (zone_is_empty(zone) || start_pfn < zone->zone_start_pfn)
-		zone->zone_start_pfn = start_pfn;
-
-	zone->spanned_pages = max(old_zone_end_pfn, end_pfn) -
-				zone->zone_start_pfn;
-
-	zone_span_writeunlock(zone);
-}
-
-static void resize_zone(struct zone *zone, unsigned long start_pfn,
-		unsigned long end_pfn)
-{
-	zone_span_writelock(zone);
-
-	if (end_pfn - start_pfn) {
-		zone->zone_start_pfn = start_pfn;
-		zone->spanned_pages = end_pfn - start_pfn;
-	} else {
-		/*
-		 * make it consist as free_area_init_core(),
-		 * if spanned_pages = 0, then keep start_pfn = 0
-		 */
-		zone->zone_start_pfn = 0;
-		zone->spanned_pages = 0;
-	}
-
-	zone_span_writeunlock(zone);
-}
-
-static void fix_zone_id(struct zone *zone, unsigned long start_pfn,
-		unsigned long end_pfn)
-{
-	enum zone_type zid = zone_idx(zone);
-	int nid = zone->zone_pgdat->node_id;
-	unsigned long pfn;
-
-	for (pfn = start_pfn; pfn < end_pfn; pfn++)
-		set_page_links(pfn_to_page(pfn), zid, nid, pfn);
-}
-
-static void __ref ensure_zone_is_initialized(struct zone *zone,
-			unsigned long start_pfn, unsigned long num_pages)
-{
-	if (!zone_is_initialized(zone))
-		init_currently_empty_zone(zone, start_pfn, num_pages);
-}
-
-static int __meminit move_pfn_range_left(struct zone *z1, struct zone *z2,
-		unsigned long start_pfn, unsigned long end_pfn)
-{
-	unsigned long flags;
-	unsigned long z1_start_pfn;
-
-	ensure_zone_is_initialized(z1, start_pfn, end_pfn - start_pfn);
-
-	pgdat_resize_lock(z1->zone_pgdat, &flags);
-
-	/* can't move pfns which are higher than @z2 */
-	if (end_pfn > zone_end_pfn(z2))
-		goto out_fail;
-	/* the move out part must be at the left most of @z2 */
-	if (start_pfn > z2->zone_start_pfn)
-		goto out_fail;
-	/* must included/overlap */
-	if (end_pfn <= z2->zone_start_pfn)
-		goto out_fail;
-
-	/* use start_pfn for z1's start_pfn if z1 is empty */
-	if (!zone_is_empty(z1))
-		z1_start_pfn = z1->zone_start_pfn;
-	else
-		z1_start_pfn = start_pfn;
-
-	resize_zone(z1, z1_start_pfn, end_pfn);
-	resize_zone(z2, end_pfn, zone_end_pfn(z2));
-
-	pgdat_resize_unlock(z1->zone_pgdat, &flags);
-
-	fix_zone_id(z1, start_pfn, end_pfn);
-
-	return 0;
-out_fail:
-	pgdat_resize_unlock(z1->zone_pgdat, &flags);
-	return -1;
-}
-
-static int __meminit move_pfn_range_right(struct zone *z1, struct zone *z2,
-		unsigned long start_pfn, unsigned long end_pfn)
-{
-	unsigned long flags;
-	unsigned long z2_end_pfn;
-
-	ensure_zone_is_initialized(z2, start_pfn, end_pfn - start_pfn);
-
-	pgdat_resize_lock(z1->zone_pgdat, &flags);
-
-	/* can't move pfns which are lower than @z1 */
-	if (z1->zone_start_pfn > start_pfn)
-		goto out_fail;
-	/* the move out part mast at the right most of @z1 */
-	if (zone_end_pfn(z1) >  end_pfn)
-		goto out_fail;
-	/* must included/overlap */
-	if (start_pfn >= zone_end_pfn(z1))
-		goto out_fail;
-
-	/* use end_pfn for z2's end_pfn if z2 is empty */
-	if (!zone_is_empty(z2))
-		z2_end_pfn = zone_end_pfn(z2);
-	else
-		z2_end_pfn = end_pfn;
-
-	resize_zone(z1, z1->zone_start_pfn, start_pfn);
-	resize_zone(z2, start_pfn, z2_end_pfn);
-
-	pgdat_resize_unlock(z1->zone_pgdat, &flags);
-
-	fix_zone_id(z2, start_pfn, end_pfn);
-
-	return 0;
-out_fail:
-	pgdat_resize_unlock(z1->zone_pgdat, &flags);
-	return -1;
-}
-
-static void __meminit grow_pgdat_span(struct pglist_data *pgdat, unsigned long start_pfn,
-				      unsigned long end_pfn)
-{
-	unsigned long old_pgdat_end_pfn = pgdat_end_pfn(pgdat);
-
-	if (!pgdat->node_spanned_pages || start_pfn < pgdat->node_start_pfn)
-		pgdat->node_start_pfn = start_pfn;
-
-	pgdat->node_spanned_pages = max(old_pgdat_end_pfn, end_pfn) -
-					pgdat->node_start_pfn;
-}
-
-static int __meminit __add_zone(struct zone *zone, unsigned long phys_start_pfn)
-{
-	struct pglist_data *pgdat = zone->zone_pgdat;
-	int nr_pages = PAGES_PER_SECTION;
-	int nid = pgdat->node_id;
-	int zone_type;
-	unsigned long flags, pfn;
-
-	zone_type = zone - pgdat->node_zones;
-	ensure_zone_is_initialized(zone, phys_start_pfn, nr_pages);
-
-	pgdat_resize_lock(zone->zone_pgdat, &flags);
-	grow_zone_span(zone, phys_start_pfn, phys_start_pfn + nr_pages);
-	grow_pgdat_span(zone->zone_pgdat, phys_start_pfn,
-			phys_start_pfn + nr_pages);
-	pgdat_resize_unlock(zone->zone_pgdat, &flags);
-	memmap_init_zone(nr_pages, nid, zone_type,
-			 phys_start_pfn, MEMMAP_HOTPLUG);
-
-	/* online_page_range is called later and expects pages reserved */
-	for (pfn = phys_start_pfn; pfn < phys_start_pfn + nr_pages; pfn++) {
-		if (!pfn_valid(pfn))
-			continue;
-
-		SetPageReserved(pfn_to_page(pfn));
-	}
-	return 0;
-}
-
 static int __meminit __add_section(int nid, unsigned long phys_start_pfn,
 		bool want_memblock)
 {
@@ -1370,39 +1196,6 @@ static int check_hotplug_memory_range(u64 start, u64 size)
 	return 0;
 }
 
-/*
- * If movable zone has already been setup, newly added memory should be check.
- * If its address is higher than movable zone, it should be added as movable.
- * Without this check, movable zone may overlap with other zone.
- */
-static int should_add_memory_movable(int nid, u64 start, u64 size)
-{
-	unsigned long start_pfn = start >> PAGE_SHIFT;
-	pg_data_t *pgdat = NODE_DATA(nid);
-	struct zone *movable_zone = pgdat->node_zones + ZONE_MOVABLE;
-
-	if (zone_is_empty(movable_zone))
-		return 0;
-
-	if (movable_zone->zone_start_pfn <= start_pfn)
-		return 1;
-
-	return 0;
-}
-
-int zone_for_memory(int nid, u64 start, u64 size, int zone_default,
-		bool for_device)
-{
-#ifdef CONFIG_ZONE_DEVICE
-	if (for_device)
-		return ZONE_DEVICE;
-#endif
-	if (should_add_memory_movable(nid, start, size))
-		return ZONE_MOVABLE;
-
-	return zone_default;
-}
-
 static int online_memory_block(struct memory_block *mem, void *arg)
 {
 	return device_online(&mem->dev);
-- 
cgit v1.2.3


From 94310cbcaa3c2bc1b790ba997270f28dc173d8ce Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <khandual@linux.vnet.ibm.com>
Date: Thu, 6 Jul 2017 15:38:38 -0700
Subject: mm/madvise: enable (soft|hard) offline of HugeTLB pages at PGD level

Though migrating gigantic HugeTLB pages does not sound much like real
world use case, they can be affected by memory errors.  Hence migration
at the PGD level HugeTLB pages should be supported just to enable soft
and hard offline use cases.

While allocating the new gigantic HugeTLB page, it should not matter
whether new page comes from the same node or not.  There would be very
few gigantic pages on the system afterall, we should not be bothered
about node locality when trying to save a big page from crashing.

This change renames dequeu_huge_page_node() function as dequeue_huge
_page_node_exact() preserving it's original functionality.  Now the new
dequeue_huge_page_node() function scans through all available online nodes
to allocate a huge page for the NUMA_NO_NODE case and just falls back
calling dequeu_huge_page_node_exact() for all other cases.

[arnd@arndb.de: make hstate_is_gigantic() inline]
  Link: http://lkml.kernel.org/r/20170522124748.3911296-1-arnd@arndb.de
Link: http://lkml.kernel.org/r/20170516100509.20122-1-khandual@linux.vnet.ibm.com
Signed-off-by: Anshuman Khandual <khandual@linux.vnet.ibm.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h | 11 ++++++++++-
 mm/hugetlb.c            | 18 +++++++++++++++++-
 mm/memory-failure.c     | 13 +++++++++----
 3 files changed, 36 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index b857fc8cc2ec..5f539f985e2a 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -466,7 +466,11 @@ extern int dissolve_free_huge_pages(unsigned long start_pfn,
 static inline bool hugepage_migration_supported(struct hstate *h)
 {
 #ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION
-	return huge_page_shift(h) == PMD_SHIFT;
+	if ((huge_page_shift(h) == PMD_SHIFT) ||
+		(huge_page_shift(h) == PGDIR_SHIFT))
+		return true;
+	else
+		return false;
 #else
 	return false;
 #endif
@@ -518,6 +522,11 @@ struct hstate {};
 #define vma_mmu_pagesize(v) PAGE_SIZE
 #define huge_page_order(h) 0
 #define huge_page_shift(h) PAGE_SHIFT
+static inline bool hstate_is_gigantic(struct hstate *h)
+{
+	return false;
+}
+
 static inline unsigned int pages_per_huge_page(struct hstate *h)
 {
 	return 1;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 3eedb187e549..040d53ac1f8d 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -867,7 +867,7 @@ static void enqueue_huge_page(struct hstate *h, struct page *page)
 	h->free_huge_pages_node[nid]++;
 }
 
-static struct page *dequeue_huge_page_node(struct hstate *h, int nid)
+static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid)
 {
 	struct page *page;
 
@@ -887,6 +887,22 @@ static struct page *dequeue_huge_page_node(struct hstate *h, int nid)
 	return page;
 }
 
+static struct page *dequeue_huge_page_node(struct hstate *h, int nid)
+{
+	struct page *page;
+	int node;
+
+	if (nid != NUMA_NO_NODE)
+		return dequeue_huge_page_node_exact(h, nid);
+
+	for_each_online_node(node) {
+		page = dequeue_huge_page_node_exact(h, node);
+		if (page)
+			return page;
+	}
+	return NULL;
+}
+
 /* Movability of hugepages depends on migration support. */
 static inline gfp_t htlb_alloc_mask(struct hstate *h)
 {
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index ecc183fd94f3..a74c8311db95 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1492,11 +1492,16 @@ EXPORT_SYMBOL(unpoison_memory);
 static struct page *new_page(struct page *p, unsigned long private, int **x)
 {
 	int nid = page_to_nid(p);
-	if (PageHuge(p))
-		return alloc_huge_page_node(page_hstate(compound_head(p)),
-						   nid);
-	else
+	if (PageHuge(p)) {
+		struct hstate *hstate = page_hstate(compound_head(p));
+
+		if (hstate_is_gigantic(hstate))
+			return alloc_huge_page_node(hstate, NUMA_NO_NODE);
+
+		return alloc_huge_page_node(hstate, nid);
+	} else {
 		return __alloc_pages_node(nid, GFP_HIGHUSER_MOVABLE, 0);
+	}
 }
 
 /*
-- 
cgit v1.2.3


From d5ed7444dafb94b6877410d1b66a846eb7184a09 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Thu, 6 Jul 2017 15:38:47 -0700
Subject: mm/hugetlb: export hugetlb_entry_migration helper

We will be using this later from the ppc64 code.  Change the return type
to bool.

Link: http://lkml.kernel.org/r/1494926612-23928-4-git-send-email-aneesh.kumar@linux.vnet.ibm.com
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Anshuman Khandual <khandual@linux.vnet.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Mike Kravetz <kravetz@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h | 1 +
 mm/hugetlb.c            | 8 ++++----
 2 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 5f539f985e2a..aa1df49b9a14 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -126,6 +126,7 @@ int pud_huge(pud_t pud);
 unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
 		unsigned long address, unsigned long end, pgprot_t newprot);
 
+bool is_hugetlb_entry_migration(pte_t pte);
 #else /* !CONFIG_HUGETLB_PAGE */
 
 static inline void reset_vma_resv_huge_pages(struct vm_area_struct *vma)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 040d53ac1f8d..65c84414a6b7 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3201,17 +3201,17 @@ static void set_huge_ptep_writable(struct vm_area_struct *vma,
 		update_mmu_cache(vma, address, ptep);
 }
 
-static int is_hugetlb_entry_migration(pte_t pte)
+bool is_hugetlb_entry_migration(pte_t pte)
 {
 	swp_entry_t swp;
 
 	if (huge_pte_none(pte) || pte_present(pte))
-		return 0;
+		return false;
 	swp = pte_to_swp_entry(pte);
 	if (non_swap_entry(swp) && is_migration_entry(swp))
-		return 1;
+		return true;
 	else
-		return 0;
+		return false;
 }
 
 static int is_hugetlb_entry_hwpoisoned(pte_t pte)
-- 
cgit v1.2.3


From faaa5b62d3f7907e217b179556038f9f8e157ee0 Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <khandual@linux.vnet.ibm.com>
Date: Thu, 6 Jul 2017 15:38:50 -0700
Subject: mm/follow_page_mask: add support for hugetlb pgd entries

ppc64 supports pgd hugetlb entries.  Add code to handle hugetlb pgd
entries to follow_page_mask so that ppc64 can switch to it to handle
hugetlbe entries.

Link: http://lkml.kernel.org/r/1494926612-23928-5-git-send-email-aneesh.kumar@linux.vnet.ibm.com
Signed-off-by: Anshuman Khandual <khandual@linux.vnet.ibm.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Mike Kravetz <kravetz@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h | 4 ++++
 mm/gup.c                | 7 +++++++
 mm/hugetlb.c            | 9 +++++++++
 3 files changed, 20 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index aa1df49b9a14..3656ce605dc9 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -121,6 +121,9 @@ struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
 				pmd_t *pmd, int flags);
 struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address,
 				pud_t *pud, int flags);
+struct page *follow_huge_pgd(struct mm_struct *mm, unsigned long address,
+			     pgd_t *pgd, int flags);
+
 int pmd_huge(pmd_t pmd);
 int pud_huge(pud_t pud);
 unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
@@ -150,6 +153,7 @@ static inline void hugetlb_show_meminfo(void)
 }
 #define follow_huge_pmd(mm, addr, pmd, flags)	NULL
 #define follow_huge_pud(mm, addr, pud, flags)	NULL
+#define follow_huge_pgd(mm, addr, pgd, flags)	NULL
 #define prepare_hugepage_range(file, addr, len)	(-EINVAL)
 #define pmd_huge(x)	0
 #define pud_huge(x)	0
diff --git a/mm/gup.c b/mm/gup.c
index bf68e21d7a3a..fe95a37a4172 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -357,6 +357,13 @@ struct page *follow_page_mask(struct vm_area_struct *vma,
 	if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
 		return no_page_table(vma, flags);
 
+	if (pgd_huge(*pgd)) {
+		page = follow_huge_pgd(mm, address, pgd, flags);
+		if (page)
+			return page;
+		return no_page_table(vma, flags);
+	}
+
 	return follow_p4d_mask(vma, address, pgd, flags, page_mask);
 }
 
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 65c84414a6b7..a446869aa7f1 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4715,6 +4715,15 @@ follow_huge_pud(struct mm_struct *mm, unsigned long address,
 	return pte_page(*(pte_t *)pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT);
 }
 
+struct page * __weak
+follow_huge_pgd(struct mm_struct *mm, unsigned long address, pgd_t *pgd, int flags)
+{
+	if (flags & FOLL_GET)
+		return NULL;
+
+	return pte_page(*(pte_t *)pgd) + ((address & ~PGDIR_MASK) >> PAGE_SHIFT);
+}
+
 #ifdef CONFIG_MEMORY_FAILURE
 
 /*
-- 
cgit v1.2.3


From e22992923f741c951b830121655b58342fce202e Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Thu, 6 Jul 2017 15:38:53 -0700
Subject: mm/hugetlb: move default definition of hugepd_t earlier in the header

This enable to use the hugepd_t type early.  No functional change in
this patch.

Link: http://lkml.kernel.org/r/1494926612-23928-6-git-send-email-aneesh.kumar@linux.vnet.ibm.com
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Anshuman Khandual <khandual@linux.vnet.ibm.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Mike Kravetz <kravetz@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h | 47 ++++++++++++++++++++++++-----------------------
 1 file changed, 24 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 3656ce605dc9..f01427c79947 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -14,6 +14,30 @@ struct ctl_table;
 struct user_struct;
 struct mmu_gather;
 
+#ifndef is_hugepd
+/*
+ * Some architectures requires a hugepage directory format that is
+ * required to support multiple hugepage sizes. For example
+ * a4fe3ce76 "powerpc/mm: Allow more flexible layouts for hugepage pagetables"
+ * introduced the same on powerpc. This allows for a more flexible hugepage
+ * pagetable layout.
+ */
+typedef struct { unsigned long pd; } hugepd_t;
+#define is_hugepd(hugepd) (0)
+#define __hugepd(x) ((hugepd_t) { (x) })
+static inline int gup_huge_pd(hugepd_t hugepd, unsigned long addr,
+			      unsigned pdshift, unsigned long end,
+			      int write, struct page **pages, int *nr)
+{
+	return 0;
+}
+#else
+extern int gup_huge_pd(hugepd_t hugepd, unsigned long addr,
+		       unsigned pdshift, unsigned long end,
+		       int write, struct page **pages, int *nr);
+#endif
+
+
 #ifdef CONFIG_HUGETLB_PAGE
 
 #include <linux/mempolicy.h>
@@ -222,29 +246,6 @@ static inline int pud_write(pud_t pud)
 }
 #endif
 
-#ifndef is_hugepd
-/*
- * Some architectures requires a hugepage directory format that is
- * required to support multiple hugepage sizes. For example
- * a4fe3ce76 "powerpc/mm: Allow more flexible layouts for hugepage pagetables"
- * introduced the same on powerpc. This allows for a more flexible hugepage
- * pagetable layout.
- */
-typedef struct { unsigned long pd; } hugepd_t;
-#define is_hugepd(hugepd) (0)
-#define __hugepd(x) ((hugepd_t) { (x) })
-static inline int gup_huge_pd(hugepd_t hugepd, unsigned long addr,
-			      unsigned pdshift, unsigned long end,
-			      int write, struct page **pages, int *nr)
-{
-	return 0;
-}
-#else
-extern int gup_huge_pd(hugepd_t hugepd, unsigned long addr,
-		       unsigned pdshift, unsigned long end,
-		       int write, struct page **pages, int *nr);
-#endif
-
 #define HUGETLB_ANON_FILE "anon_hugepage"
 
 enum {
-- 
cgit v1.2.3


From 4dc71451a2078efcad2f66bd6ef130d2296827b1 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Thu, 6 Jul 2017 15:38:56 -0700
Subject: mm/follow_page_mask: add support for hugepage directory entry

Architectures like ppc64 supports hugepage size that is not mapped to
any of of the page table levels.  Instead they add an alternate page
table entry format called hugepage directory (hugepd).  hugepd indicates
that the page table entry maps to a set of hugetlb pages.  Add support
for this in generic follow_page_mask code.  We already support this
format in the generic gup code.

The default implementation prints warning and returns NULL.  We will add
ppc64 support in later patches

Link: http://lkml.kernel.org/r/1494926612-23928-7-git-send-email-aneesh.kumar@linux.vnet.ibm.com
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Anshuman Khandual <khandual@linux.vnet.ibm.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Mike Kravetz <kravetz@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h |  4 ++++
 mm/gup.c                | 33 +++++++++++++++++++++++++++++++++
 mm/hugetlb.c            |  8 ++++++++
 3 files changed, 45 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index f01427c79947..c92a1f0c7240 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -141,6 +141,9 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr);
 int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep);
 struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
 			      int write);
+struct page *follow_huge_pd(struct vm_area_struct *vma,
+			    unsigned long address, hugepd_t hpd,
+			    int flags, int pdshift);
 struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
 				pmd_t *pmd, int flags);
 struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address,
@@ -175,6 +178,7 @@ static inline void hugetlb_report_meminfo(struct seq_file *m)
 static inline void hugetlb_show_meminfo(void)
 {
 }
+#define follow_huge_pd(vma, addr, hpd, flags, pdshift) NULL
 #define follow_huge_pmd(mm, addr, pmd, flags)	NULL
 #define follow_huge_pud(mm, addr, pud, flags)	NULL
 #define follow_huge_pgd(mm, addr, pgd, flags)	NULL
diff --git a/mm/gup.c b/mm/gup.c
index fe95a37a4172..9e472cb835b5 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -226,6 +226,14 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma,
 			return page;
 		return no_page_table(vma, flags);
 	}
+	if (is_hugepd(__hugepd(pmd_val(*pmd)))) {
+		page = follow_huge_pd(vma, address,
+				      __hugepd(pmd_val(*pmd)), flags,
+				      PMD_SHIFT);
+		if (page)
+			return page;
+		return no_page_table(vma, flags);
+	}
 	if (pmd_devmap(*pmd)) {
 		ptl = pmd_lock(mm, pmd);
 		page = follow_devmap_pmd(vma, address, pmd, flags);
@@ -292,6 +300,14 @@ static struct page *follow_pud_mask(struct vm_area_struct *vma,
 			return page;
 		return no_page_table(vma, flags);
 	}
+	if (is_hugepd(__hugepd(pud_val(*pud)))) {
+		page = follow_huge_pd(vma, address,
+				      __hugepd(pud_val(*pud)), flags,
+				      PUD_SHIFT);
+		if (page)
+			return page;
+		return no_page_table(vma, flags);
+	}
 	if (pud_devmap(*pud)) {
 		ptl = pud_lock(mm, pud);
 		page = follow_devmap_pud(vma, address, pud, flags);
@@ -311,6 +327,7 @@ static struct page *follow_p4d_mask(struct vm_area_struct *vma,
 				    unsigned int flags, unsigned int *page_mask)
 {
 	p4d_t *p4d;
+	struct page *page;
 
 	p4d = p4d_offset(pgdp, address);
 	if (p4d_none(*p4d))
@@ -319,6 +336,14 @@ static struct page *follow_p4d_mask(struct vm_area_struct *vma,
 	if (unlikely(p4d_bad(*p4d)))
 		return no_page_table(vma, flags);
 
+	if (is_hugepd(__hugepd(p4d_val(*p4d)))) {
+		page = follow_huge_pd(vma, address,
+				      __hugepd(p4d_val(*p4d)), flags,
+				      P4D_SHIFT);
+		if (page)
+			return page;
+		return no_page_table(vma, flags);
+	}
 	return follow_pud_mask(vma, address, p4d, flags, page_mask);
 }
 
@@ -363,6 +388,14 @@ struct page *follow_page_mask(struct vm_area_struct *vma,
 			return page;
 		return no_page_table(vma, flags);
 	}
+	if (is_hugepd(__hugepd(pgd_val(*pgd)))) {
+		page = follow_huge_pd(vma, address,
+				      __hugepd(pgd_val(*pgd)), flags,
+				      PGDIR_SHIFT);
+		if (page)
+			return page;
+		return no_page_table(vma, flags);
+	}
 
 	return follow_p4d_mask(vma, address, pgd, flags, page_mask);
 }
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index a446869aa7f1..332637342555 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4668,6 +4668,14 @@ follow_huge_addr(struct mm_struct *mm, unsigned long address,
 	return ERR_PTR(-EINVAL);
 }
 
+struct page * __weak
+follow_huge_pd(struct vm_area_struct *vma,
+	       unsigned long address, hugepd_t hpd, int flags, int pdshift)
+{
+	WARN(1, "hugepd follow called with no support for hugepage directory format\n");
+	return NULL;
+}
+
 struct page * __weak
 follow_huge_pmd(struct mm_struct *mm, unsigned long address,
 		pmd_t *pmd, int flags)
-- 
cgit v1.2.3


From 3749a8f008eac3355a9e50b366ba08317a7e9cf8 Mon Sep 17 00:00:00 2001
From: Pavel Tatashin <pasha.tatashin@oracle.com>
Date: Thu, 6 Jul 2017 15:39:08 -0700
Subject: mm: zero hash tables in allocator

Add a new flag HASH_ZERO which when provided grantees that the hash
table that is returned by alloc_large_system_hash() is zeroed.  In most
cases that is what is needed by the caller.  Use page level allocator's
__GFP_ZERO flags to zero the memory.  It is using memset() which is
efficient method to zero memory and is optimized for most platforms.

Link: http://lkml.kernel.org/r/1488432825-92126-3-git-send-email-pasha.tatashin@oracle.com
Signed-off-by: Pavel Tatashin <pasha.tatashin@oracle.com>
Reviewed-by: Babu Moger <babu.moger@oracle.com>
Cc: David Miller <davem@davemloft.net>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bootmem.h |  1 +
 mm/page_alloc.c         | 12 +++++++++---
 2 files changed, 10 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 962164d36506..e223d91b6439 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -358,6 +358,7 @@ extern void *alloc_large_system_hash(const char *tablename,
 #define HASH_EARLY	0x00000001	/* Allocating during early boot? */
 #define HASH_SMALL	0x00000002	/* sub-page allocation allowed, min
 					 * shift passed via *_hash_shift */
+#define HASH_ZERO	0x00000004	/* Zero allocated hash table */
 
 /* Only NUMA needs hash distribution. 64bit NUMA architectures have
  * sufficient vmalloc space.
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 387f20db217c..34240e2a0583 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -7198,6 +7198,7 @@ void *__init alloc_large_system_hash(const char *tablename,
 	unsigned long long max = high_limit;
 	unsigned long log2qty, size;
 	void *table = NULL;
+	gfp_t gfp_flags;
 
 	/* allow the kernel cmdline to have a say */
 	if (!numentries) {
@@ -7242,12 +7243,17 @@ void *__init alloc_large_system_hash(const char *tablename,
 
 	log2qty = ilog2(numentries);
 
+	/*
+	 * memblock allocator returns zeroed memory already, so HASH_ZERO is
+	 * currently not used when HASH_EARLY is specified.
+	 */
+	gfp_flags = (flags & HASH_ZERO) ? GFP_ATOMIC | __GFP_ZERO : GFP_ATOMIC;
 	do {
 		size = bucketsize << log2qty;
 		if (flags & HASH_EARLY)
 			table = memblock_virt_alloc_nopanic(size, 0);
 		else if (hashdist)
-			table = __vmalloc(size, GFP_ATOMIC, PAGE_KERNEL);
+			table = __vmalloc(size, gfp_flags, PAGE_KERNEL);
 		else {
 			/*
 			 * If bucketsize is not a power-of-two, we may free
@@ -7255,8 +7261,8 @@ void *__init alloc_large_system_hash(const char *tablename,
 			 * alloc_pages_exact() automatically does
 			 */
 			if (get_order(size) < MAX_ORDER) {
-				table = alloc_pages_exact(size, GFP_ATOMIC);
-				kmemleak_alloc(table, size, 1, GFP_ATOMIC);
+				table = alloc_pages_exact(size, gfp_flags);
+				kmemleak_alloc(table, size, 1, gfp_flags);
 			}
 		}
 	} while (!table && size > PAGE_SIZE && --log2qty);
-- 
cgit v1.2.3


From 7868a2087ec13ec4a5df0c5e00999863be132ba8 Mon Sep 17 00:00:00 2001
From: Punit Agrawal <punit.agrawal@arm.com>
Date: Thu, 6 Jul 2017 15:39:42 -0700
Subject: mm/hugetlb: add size parameter to huge_pte_offset()

A poisoned or migrated hugepage is stored as a swap entry in the page
tables.  On architectures that support hugepages consisting of
contiguous page table entries (such as on arm64) this leads to ambiguity
in determining the page table entry to return in huge_pte_offset() when
a poisoned entry is encountered.

Let's remove the ambiguity by adding a size parameter to convey
additional information about the requested address.  Also fixup the
definition/usage of huge_pte_offset() throughout the tree.

Link: http://lkml.kernel.org/r/20170522133604.11392-4-punit.agrawal@arm.com
Signed-off-by: Punit Agrawal <punit.agrawal@arm.com>
Acked-by: Steve Capper <steve.capper@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: James Hogan <james.hogan@imgtec.com> (odd fixer:METAG ARCHITECTURE)
Cc: Ralf Baechle <ralf@linux-mips.org> (supporter:MIPS)
Cc: "James E.J. Bottomley" <jejb@parisc-linux.org>
Cc: Helge Deller <deller@gmx.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Cc: Rich Felker <dalias@libc.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Chris Metcalf <cmetcalf@mellanox.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm64/mm/hugetlbpage.c   |  3 ++-
 arch/ia64/mm/hugetlbpage.c    |  4 ++--
 arch/metag/mm/hugetlbpage.c   |  3 ++-
 arch/mips/mm/hugetlbpage.c    |  3 ++-
 arch/parisc/mm/hugetlbpage.c  |  3 ++-
 arch/powerpc/mm/hugetlbpage.c |  2 +-
 arch/s390/mm/hugetlbpage.c    |  3 ++-
 arch/sh/mm/hugetlbpage.c      |  3 ++-
 arch/sparc/mm/hugetlbpage.c   |  3 ++-
 arch/tile/mm/hugetlbpage.c    |  3 ++-
 arch/x86/mm/hugetlbpage.c     |  2 +-
 fs/userfaultfd.c              |  7 +++++--
 include/linux/hugetlb.h       |  5 +++--
 mm/hugetlb.c                  | 23 ++++++++++++++---------
 mm/page_vma_mapped.c          |  3 ++-
 mm/pagewalk.c                 |  3 ++-
 16 files changed, 46 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index f89aa8fa5855..656e0ece2289 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -131,7 +131,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
 	return pte;
 }
 
-pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+pte_t *huge_pte_offset(struct mm_struct *mm,
+		       unsigned long addr, unsigned long sz)
 {
 	pgd_t *pgd;
 	pud_t *pud;
diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c
index 85de86d36fdf..ae35140332f7 100644
--- a/arch/ia64/mm/hugetlbpage.c
+++ b/arch/ia64/mm/hugetlbpage.c
@@ -44,7 +44,7 @@ huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz)
 }
 
 pte_t *
-huge_pte_offset (struct mm_struct *mm, unsigned long addr)
+huge_pte_offset (struct mm_struct *mm, unsigned long addr, unsigned long sz)
 {
 	unsigned long taddr = htlbpage_to_page(addr);
 	pgd_t *pgd;
@@ -92,7 +92,7 @@ struct page *follow_huge_addr(struct mm_struct *mm, unsigned long addr, int writ
 	if (REGION_NUMBER(addr) != RGN_HPAGE)
 		return ERR_PTR(-EINVAL);
 
-	ptep = huge_pte_offset(mm, addr);
+	ptep = huge_pte_offset(mm, addr, HPAGE_SIZE);
 	if (!ptep || pte_none(*ptep))
 		return NULL;
 	page = pte_page(*ptep);
diff --git a/arch/metag/mm/hugetlbpage.c b/arch/metag/mm/hugetlbpage.c
index db1b7da91e4f..67fd53e2935a 100644
--- a/arch/metag/mm/hugetlbpage.c
+++ b/arch/metag/mm/hugetlbpage.c
@@ -74,7 +74,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
 	return pte;
 }
 
-pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+pte_t *huge_pte_offset(struct mm_struct *mm,
+		       unsigned long addr, unsigned long sz)
 {
 	pgd_t *pgd;
 	pud_t *pud;
diff --git a/arch/mips/mm/hugetlbpage.c b/arch/mips/mm/hugetlbpage.c
index 74aa6f62468f..cef152234312 100644
--- a/arch/mips/mm/hugetlbpage.c
+++ b/arch/mips/mm/hugetlbpage.c
@@ -36,7 +36,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr,
 	return pte;
 }
 
-pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr,
+		       unsigned long sz)
 {
 	pgd_t *pgd;
 	pud_t *pud;
diff --git a/arch/parisc/mm/hugetlbpage.c b/arch/parisc/mm/hugetlbpage.c
index aa50ac090e9b..5eb8f633b282 100644
--- a/arch/parisc/mm/hugetlbpage.c
+++ b/arch/parisc/mm/hugetlbpage.c
@@ -69,7 +69,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
 	return pte;
 }
 
-pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+pte_t *huge_pte_offset(struct mm_struct *mm,
+		       unsigned long addr, unsigned long sz)
 {
 	pgd_t *pgd;
 	pud_t *pud;
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 1816b965a142..c41dc44472c5 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -57,7 +57,7 @@ static unsigned nr_gpages;
 
 #define hugepd_none(hpd)	(hpd_val(hpd) == 0)
 
-pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz)
 {
 	/* Only called for hugetlbfs pages, hence can ignore THP */
 	return __find_linux_pte_or_hugepte(mm->pgd, addr, NULL, NULL);
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index d3a5e39756f6..44a8e6f0391e 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -180,7 +180,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
 	return (pte_t *) pmdp;
 }
 
-pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+pte_t *huge_pte_offset(struct mm_struct *mm,
+		       unsigned long addr, unsigned long sz)
 {
 	pgd_t *pgdp;
 	p4d_t *p4dp;
diff --git a/arch/sh/mm/hugetlbpage.c b/arch/sh/mm/hugetlbpage.c
index cc948db74878..d2412d2d6462 100644
--- a/arch/sh/mm/hugetlbpage.c
+++ b/arch/sh/mm/hugetlbpage.c
@@ -42,7 +42,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
 	return pte;
 }
 
-pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+pte_t *huge_pte_offset(struct mm_struct *mm,
+		       unsigned long addr, unsigned long sz)
 {
 	pgd_t *pgd;
 	pud_t *pud;
diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c
index 88855e383b34..28ee8d8ffa07 100644
--- a/arch/sparc/mm/hugetlbpage.c
+++ b/arch/sparc/mm/hugetlbpage.c
@@ -277,7 +277,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
 	return pte;
 }
 
-pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+pte_t *huge_pte_offset(struct mm_struct *mm,
+		       unsigned long addr, unsigned long sz)
 {
 	pgd_t *pgd;
 	pud_t *pud;
diff --git a/arch/tile/mm/hugetlbpage.c b/arch/tile/mm/hugetlbpage.c
index 03e5cc4e76e4..0986d426a413 100644
--- a/arch/tile/mm/hugetlbpage.c
+++ b/arch/tile/mm/hugetlbpage.c
@@ -102,7 +102,8 @@ static pte_t *get_pte(pte_t *base, int index, int level)
 	return ptep;
 }
 
-pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+pte_t *huge_pte_offset(struct mm_struct *mm,
+		       unsigned long addr, unsigned long sz)
 {
 	pgd_t *pgd;
 	pud_t *pud;
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index adad702b39cd..2824607df108 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -33,7 +33,7 @@ follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
 	if (!vma || !is_vm_hugetlb_page(vma))
 		return ERR_PTR(-EINVAL);
 
-	pte = huge_pte_offset(mm, address);
+	pte = huge_pte_offset(mm, address, vma_mmu_pagesize(vma));
 
 	/* hugetlb should be locked, and hence, prefaulted */
 	WARN_ON(!pte || pte_none(*pte));
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 3d0dd082337a..cadcd12a3d35 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -214,6 +214,7 @@ static inline struct uffd_msg userfault_msg(unsigned long address,
  * hugepmd ranges.
  */
 static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx,
+					 struct vm_area_struct *vma,
 					 unsigned long address,
 					 unsigned long flags,
 					 unsigned long reason)
@@ -224,7 +225,7 @@ static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx,
 
 	VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
 
-	pte = huge_pte_offset(mm, address);
+	pte = huge_pte_offset(mm, address, vma_mmu_pagesize(vma));
 	if (!pte)
 		goto out;
 
@@ -243,6 +244,7 @@ out:
 }
 #else
 static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx,
+					 struct vm_area_struct *vma,
 					 unsigned long address,
 					 unsigned long flags,
 					 unsigned long reason)
@@ -448,7 +450,8 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
 		must_wait = userfaultfd_must_wait(ctx, vmf->address, vmf->flags,
 						  reason);
 	else
-		must_wait = userfaultfd_huge_must_wait(ctx, vmf->address,
+		must_wait = userfaultfd_huge_must_wait(ctx, vmf->vma,
+						       vmf->address,
 						       vmf->flags, reason);
 	up_read(&mm->mmap_sem);
 
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index c92a1f0c7240..31e665fbcf76 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -137,7 +137,8 @@ extern struct list_head huge_boot_pages;
 
 pte_t *huge_pte_alloc(struct mm_struct *mm,
 			unsigned long addr, unsigned long sz);
-pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr);
+pte_t *huge_pte_offset(struct mm_struct *mm,
+		       unsigned long addr, unsigned long sz);
 int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep);
 struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
 			      int write);
@@ -190,7 +191,7 @@ static inline void hugetlb_show_meminfo(void)
 #define hugetlb_fault(mm, vma, addr, flags)	({ BUG(); 0; })
 #define hugetlb_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma, dst_addr, \
 				src_addr, pagep)	({ BUG(); 0; })
-#define huge_pte_offset(mm, address)	0
+#define huge_pte_offset(mm, address, sz)	0
 static inline int dequeue_hwpoisoned_huge_page(struct page *page)
 {
 	return 0;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index c73828e43100..075345532396 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3246,7 +3246,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
 
 	for (addr = vma->vm_start; addr < vma->vm_end; addr += sz) {
 		spinlock_t *src_ptl, *dst_ptl;
-		src_pte = huge_pte_offset(src, addr);
+		src_pte = huge_pte_offset(src, addr, sz);
 		if (!src_pte)
 			continue;
 		dst_pte = huge_pte_alloc(dst, addr, sz);
@@ -3330,7 +3330,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
 	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
 	address = start;
 	for (; address < end; address += sz) {
-		ptep = huge_pte_offset(mm, address);
+		ptep = huge_pte_offset(mm, address, sz);
 		if (!ptep)
 			continue;
 
@@ -3548,7 +3548,8 @@ retry_avoidcopy:
 			unmap_ref_private(mm, vma, old_page, address);
 			BUG_ON(huge_pte_none(pte));
 			spin_lock(ptl);
-			ptep = huge_pte_offset(mm, address & huge_page_mask(h));
+			ptep = huge_pte_offset(mm, address & huge_page_mask(h),
+					       huge_page_size(h));
 			if (likely(ptep &&
 				   pte_same(huge_ptep_get(ptep), pte)))
 				goto retry_avoidcopy;
@@ -3587,7 +3588,8 @@ retry_avoidcopy:
 	 * before the page tables are altered
 	 */
 	spin_lock(ptl);
-	ptep = huge_pte_offset(mm, address & huge_page_mask(h));
+	ptep = huge_pte_offset(mm, address & huge_page_mask(h),
+			       huge_page_size(h));
 	if (likely(ptep && pte_same(huge_ptep_get(ptep), pte))) {
 		ClearPagePrivate(new_page);
 
@@ -3874,7 +3876,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 
 	address &= huge_page_mask(h);
 
-	ptep = huge_pte_offset(mm, address);
+	ptep = huge_pte_offset(mm, address, huge_page_size(h));
 	if (ptep) {
 		entry = huge_ptep_get(ptep);
 		if (unlikely(is_hugetlb_entry_migration(entry))) {
@@ -4131,7 +4133,8 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		 *
 		 * Note that page table lock is not held when pte is null.
 		 */
-		pte = huge_pte_offset(mm, vaddr & huge_page_mask(h));
+		pte = huge_pte_offset(mm, vaddr & huge_page_mask(h),
+				      huge_page_size(h));
 		if (pte)
 			ptl = huge_pte_lock(h, mm, pte);
 		absent = !pte || huge_pte_none(huge_ptep_get(pte));
@@ -4270,7 +4273,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
 	i_mmap_lock_write(vma->vm_file->f_mapping);
 	for (; address < end; address += huge_page_size(h)) {
 		spinlock_t *ptl;
-		ptep = huge_pte_offset(mm, address);
+		ptep = huge_pte_offset(mm, address, huge_page_size(h));
 		if (!ptep)
 			continue;
 		ptl = huge_pte_lock(h, mm, ptep);
@@ -4534,7 +4537,8 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
 
 		saddr = page_table_shareable(svma, vma, addr, idx);
 		if (saddr) {
-			spte = huge_pte_offset(svma->vm_mm, saddr);
+			spte = huge_pte_offset(svma->vm_mm, saddr,
+					       vma_mmu_pagesize(svma));
 			if (spte) {
 				get_page(virt_to_page(spte));
 				break;
@@ -4630,7 +4634,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
 	return pte;
 }
 
-pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+pte_t *huge_pte_offset(struct mm_struct *mm,
+		       unsigned long addr, unsigned long sz)
 {
 	pgd_t *pgd;
 	p4d_t *p4d;
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index de9c40d7304a..8ec6ba230bb9 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -116,7 +116,8 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
 
 	if (unlikely(PageHuge(pvmw->page))) {
 		/* when pud is not present, pte will be NULL */
-		pvmw->pte = huge_pte_offset(mm, pvmw->address);
+		pvmw->pte = huge_pte_offset(mm, pvmw->address,
+					    PAGE_SIZE << compound_order(page));
 		if (!pvmw->pte)
 			return false;
 
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index 60f7856e508f..1a4197965415 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -180,12 +180,13 @@ static int walk_hugetlb_range(unsigned long addr, unsigned long end,
 	struct hstate *h = hstate_vma(vma);
 	unsigned long next;
 	unsigned long hmask = huge_page_mask(h);
+	unsigned long sz = huge_page_size(h);
 	pte_t *pte;
 	int err = 0;
 
 	do {
 		next = hugetlb_entry_end(h, addr, end);
-		pte = huge_pte_offset(walk->mm, addr & hmask);
+		pte = huge_pte_offset(walk->mm, addr & hmask, sz);
 		if (pte && walk->hugetlb_entry)
 			err = walk->hugetlb_entry(pte, hmask, addr, next, walk);
 		if (err)
-- 
cgit v1.2.3


From e5251fd43007f9e1155331f0fa30685604a8e3a1 Mon Sep 17 00:00:00 2001
From: Punit Agrawal <punit.agrawal@arm.com>
Date: Thu, 6 Jul 2017 15:39:50 -0700
Subject: mm/hugetlb: introduce set_huge_swap_pte_at() helper

set_huge_pte_at(), an architecture callback to populate hugepage ptes,
does not provide the range of virtual memory that is targeted.  This
leads to ambiguity when dealing with swap entries on architectures that
support hugepages consisting of contiguous ptes.

Fix the problem by introducing an overridable helper that is called when
populating the page tables with swap entries.  The size of the targeted
region is provided to the helper to help determine the number of entries
to be updated.

Provide a default implementation that maintains the current behaviour.

[punit.agrawal@arm.com: v4]
  Link: http://lkml.kernel.org/r/20170524115409.31309-8-punit.agrawal@arm.com
[punit.agrawal@arm.com: add an empty definition for set_huge_swap_pte_at()]
  Link: http://lkml.kernel.org/r/20170525171331.31469-1-punit.agrawal@arm.com
Link: http://lkml.kernel.org/r/20170522133604.11392-6-punit.agrawal@arm.com
Signed-off-by: Punit Agrawal <punit.agrawal@arm.com>
Acked-by: Steve Capper <steve.capper@arm.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Michal Hocko <mhocko@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h | 13 +++++++++++++
 mm/hugetlb.c            |  8 +++++---
 2 files changed, 18 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 31e665fbcf76..46bfb702e7d6 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -516,6 +516,14 @@ static inline void hugetlb_count_sub(long l, struct mm_struct *mm)
 {
 	atomic_long_sub(l, &mm->hugetlb_usage);
 }
+
+#ifndef set_huge_swap_pte_at
+static inline void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
+					pte_t *ptep, pte_t pte, unsigned long sz)
+{
+	set_huge_pte_at(mm, addr, ptep, pte);
+}
+#endif
 #else	/* CONFIG_HUGETLB_PAGE */
 struct hstate {};
 #define alloc_huge_page(v, a, r) NULL
@@ -565,6 +573,11 @@ static inline void hugetlb_report_usage(struct seq_file *f, struct mm_struct *m)
 static inline void hugetlb_count_sub(long l, struct mm_struct *mm)
 {
 }
+
+static inline void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
+					pte_t *ptep, pte_t pte, unsigned long sz)
+{
+}
 #endif	/* CONFIG_HUGETLB_PAGE */
 
 static inline spinlock_t *huge_pte_lock(struct hstate *h,
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 3dbe3e257975..51d31352a5bf 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3276,9 +3276,10 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
 				 */
 				make_migration_entry_read(&swp_entry);
 				entry = swp_entry_to_pte(swp_entry);
-				set_huge_pte_at(src, addr, src_pte, entry);
+				set_huge_swap_pte_at(src, addr, src_pte,
+						     entry, sz);
 			}
-			set_huge_pte_at(dst, addr, dst_pte, entry);
+			set_huge_swap_pte_at(dst, addr, dst_pte, entry, sz);
 		} else {
 			if (cow) {
 				huge_ptep_set_wrprotect(src, addr, src_pte);
@@ -4295,7 +4296,8 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
 
 				make_migration_entry_read(&entry);
 				newpte = swp_entry_to_pte(entry);
-				set_huge_pte_at(mm, address, ptep, newpte);
+				set_huge_swap_pte_at(mm, address, ptep,
+						     newpte, huge_page_size(h));
 				pages++;
 			}
 			spin_unlock(ptl);
-- 
cgit v1.2.3


From 45816682b2cd6771cf63cb7dc7dbebdd827a0132 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Thu, 6 Jul 2017 15:39:59 -0700
Subject: mm, mempolicy: stop adjusting current->il_next in
 mpol_rebind_nodemask()

The task->il_next variable stores the next allocation node id for task's
MPOL_INTERLEAVE policy.  mpol_rebind_nodemask() updates interleave and
bind mempolicies due to changing cpuset mems.  Currently it also tries
to make sure that current->il_next is valid within the updated nodemask.
This is bogus, because 1) we are updating potentially any task's
mempolicy, not just current, and 2) we might be updating a per-vma
mempolicy, not task one.

The interleave_nodes() function that uses il_next can cope fine with the
value not being within the currently allowed nodes, so this hasn't
manifested as an actual issue.

We can remove the need for updating il_next completely by changing it to
il_prev and store the node id of the previous interleave allocation
instead of the next id.  Then interleave_nodes() can calculate the next
id using the current nodemask and also store it as il_prev, except when
querying the next node via do_get_mempolicy().

Link: http://lkml.kernel.org/r/20170517081140.30654-3-vbabka@suse.cz
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Christoph Lameter <cl@linux.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Anshuman Khandual <khandual@linux.vnet.ibm.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Dimitri Sivanich <sivanich@sgi.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Li Zefan <lizefan@huawei.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Michal Hocko <mhocko@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h |  2 +-
 mm/mempolicy.c        | 22 +++++++---------------
 2 files changed, 8 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9c4ca7433d9d..5e8759b1b428 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -904,7 +904,7 @@ struct task_struct {
 #ifdef CONFIG_NUMA
 	/* Protected by alloc_lock: */
 	struct mempolicy		*mempolicy;
-	short				il_next;
+	short				il_prev;
 	short				pref_node_fork;
 #endif
 #ifdef CONFIG_NUMA_BALANCING
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 37d0b334bfe9..d77177c7283b 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -349,12 +349,6 @@ static void mpol_rebind_nodemask(struct mempolicy *pol, const nodemask_t *nodes,
 		pol->v.nodes = tmp;
 	else
 		BUG();
-
-	if (!node_isset(current->il_next, tmp)) {
-		current->il_next = next_node_in(current->il_next, tmp);
-		if (current->il_next >= MAX_NUMNODES)
-			current->il_next = numa_node_id();
-	}
 }
 
 static void mpol_rebind_preferred(struct mempolicy *pol,
@@ -812,9 +806,8 @@ static long do_set_mempolicy(unsigned short mode, unsigned short flags,
 	}
 	old = current->mempolicy;
 	current->mempolicy = new;
-	if (new && new->mode == MPOL_INTERLEAVE &&
-	    nodes_weight(new->v.nodes))
-		current->il_next = first_node(new->v.nodes);
+	if (new && new->mode == MPOL_INTERLEAVE)
+		current->il_prev = MAX_NUMNODES-1;
 	task_unlock(current);
 	mpol_put(old);
 	ret = 0;
@@ -916,7 +909,7 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask,
 			*policy = err;
 		} else if (pol == current->mempolicy &&
 				pol->mode == MPOL_INTERLEAVE) {
-			*policy = current->il_next;
+			*policy = next_node_in(current->il_prev, pol->v.nodes);
 		} else {
 			err = -EINVAL;
 			goto out;
@@ -1697,14 +1690,13 @@ static struct zonelist *policy_zonelist(gfp_t gfp, struct mempolicy *policy,
 /* Do dynamic interleaving for a process */
 static unsigned interleave_nodes(struct mempolicy *policy)
 {
-	unsigned nid, next;
+	unsigned next;
 	struct task_struct *me = current;
 
-	nid = me->il_next;
-	next = next_node_in(nid, policy->v.nodes);
+	next = next_node_in(me->il_prev, policy->v.nodes);
 	if (next < MAX_NUMNODES)
-		me->il_next = next;
-	return nid;
+		me->il_prev = next;
+	return next;
 }
 
 /*
-- 
cgit v1.2.3


From 04ec6264f28793e56114d0a367bb4d3af667ab6a Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Thu, 6 Jul 2017 15:40:03 -0700
Subject: mm, page_alloc: pass preferred nid instead of zonelist to allocator

The main allocator function __alloc_pages_nodemask() takes a zonelist
pointer as one of its parameters.  All of its callers directly or
indirectly obtain the zonelist via node_zonelist() using a preferred
node id and gfp_mask.  We can make the code a bit simpler by doing the
zonelist lookup in __alloc_pages_nodemask(), passing it a preferred node
id instead (gfp_mask is already another parameter).

There are some code size benefits thanks to removal of inlined
node_zonelist():

  bloat-o-meter add/remove: 2/2 grow/shrink: 4/36 up/down: 399/-1351 (-952)

This will also make things simpler if we proceed with converting cpusets
to zonelists.

Link: http://lkml.kernel.org/r/20170517081140.30654-4-vbabka@suse.cz
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Christoph Lameter <cl@linux.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Dimitri Sivanich <sivanich@sgi.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Anshuman Khandual <khandual@linux.vnet.ibm.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Li Zefan <lizefan@huawei.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h       | 11 +++++------
 include/linux/mempolicy.h |  6 +++---
 mm/hugetlb.c              | 15 +++++++++------
 mm/memory_hotplug.c       |  6 ++----
 mm/mempolicy.c            | 41 +++++++++++++++++++----------------------
 mm/page_alloc.c           | 10 +++++-----
 6 files changed, 43 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index a89d37e8b387..4c6656f1fee7 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -432,14 +432,13 @@ static inline void arch_alloc_page(struct page *page, int order) { }
 #endif
 
 struct page *
-__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
-		       struct zonelist *zonelist, nodemask_t *nodemask);
+__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid,
+							nodemask_t *nodemask);
 
 static inline struct page *
-__alloc_pages(gfp_t gfp_mask, unsigned int order,
-		struct zonelist *zonelist)
+__alloc_pages(gfp_t gfp_mask, unsigned int order, int preferred_nid)
 {
-	return __alloc_pages_nodemask(gfp_mask, order, zonelist, NULL);
+	return __alloc_pages_nodemask(gfp_mask, order, preferred_nid, NULL);
 }
 
 /*
@@ -452,7 +451,7 @@ __alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order)
 	VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES);
 	VM_WARN_ON(!node_online(nid));
 
-	return __alloc_pages(gfp_mask, order, node_zonelist(nid, gfp_mask));
+	return __alloc_pages(gfp_mask, order, nid);
 }
 
 /*
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 5f4d8281832b..ecb6cbeede5a 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -146,7 +146,7 @@ extern void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new,
 				enum mpol_rebind_step step);
 extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new);
 
-extern struct zonelist *huge_zonelist(struct vm_area_struct *vma,
+extern int huge_node(struct vm_area_struct *vma,
 				unsigned long addr, gfp_t gfp_flags,
 				struct mempolicy **mpol, nodemask_t **nodemask);
 extern bool init_nodemask_of_mempolicy(nodemask_t *mask);
@@ -269,13 +269,13 @@ static inline void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new)
 {
 }
 
-static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma,
+static inline int huge_node(struct vm_area_struct *vma,
 				unsigned long addr, gfp_t gfp_flags,
 				struct mempolicy **mpol, nodemask_t **nodemask)
 {
 	*mpol = NULL;
 	*nodemask = NULL;
-	return node_zonelist(0, gfp_flags);
+	return 0;
 }
 
 static inline bool init_nodemask_of_mempolicy(nodemask_t *m)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 51d31352a5bf..1a88006ec634 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -920,6 +920,8 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
 	struct page *page = NULL;
 	struct mempolicy *mpol;
 	nodemask_t *nodemask;
+	gfp_t gfp_mask;
+	int nid;
 	struct zonelist *zonelist;
 	struct zone *zone;
 	struct zoneref *z;
@@ -940,12 +942,13 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
 
 retry_cpuset:
 	cpuset_mems_cookie = read_mems_allowed_begin();
-	zonelist = huge_zonelist(vma, address,
-					htlb_alloc_mask(h), &mpol, &nodemask);
+	gfp_mask = htlb_alloc_mask(h);
+	nid = huge_node(vma, address, gfp_mask, &mpol, &nodemask);
+	zonelist = node_zonelist(nid, gfp_mask);
 
 	for_each_zone_zonelist_nodemask(zone, z, zonelist,
 						MAX_NR_ZONES - 1, nodemask) {
-		if (cpuset_zone_allowed(zone, htlb_alloc_mask(h))) {
+		if (cpuset_zone_allowed(zone, gfp_mask)) {
 			page = dequeue_huge_page_node(h, zone_to_nid(zone));
 			if (page) {
 				if (avoid_reserve)
@@ -1558,13 +1561,13 @@ static struct page *__hugetlb_alloc_buddy_huge_page(struct hstate *h,
 	do {
 		struct page *page;
 		struct mempolicy *mpol;
-		struct zonelist *zl;
+		int nid;
 		nodemask_t *nodemask;
 
 		cpuset_mems_cookie = read_mems_allowed_begin();
-		zl = huge_zonelist(vma, addr, gfp, &mpol, &nodemask);
+		nid = huge_node(vma, addr, gfp, &mpol, &nodemask);
 		mpol_cond_put(mpol);
-		page = __alloc_pages_nodemask(gfp, order, zl, nodemask);
+		page = __alloc_pages_nodemask(gfp, order, nid, nodemask);
 		if (page)
 			return page;
 	} while (read_mems_allowed_retry(cpuset_mems_cookie));
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index e4fdb97b6ef2..9ac997b8f2a6 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1459,11 +1459,9 @@ static struct page *new_node_page(struct page *page, unsigned long private,
 		gfp_mask |= __GFP_HIGHMEM;
 
 	if (!nodes_empty(nmask))
-		new_page = __alloc_pages_nodemask(gfp_mask, 0,
-					node_zonelist(nid, gfp_mask), &nmask);
+		new_page = __alloc_pages_nodemask(gfp_mask, 0, nid, &nmask);
 	if (!new_page)
-		new_page = __alloc_pages(gfp_mask, 0,
-					node_zonelist(nid, gfp_mask));
+		new_page = __alloc_pages(gfp_mask, 0, nid);
 
 	return new_page;
 }
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index d77177c7283b..c60807625fd5 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1669,9 +1669,9 @@ static nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *policy)
 	return NULL;
 }
 
-/* Return a zonelist indicated by gfp for node representing a mempolicy */
-static struct zonelist *policy_zonelist(gfp_t gfp, struct mempolicy *policy,
-	int nd)
+/* Return the node id preferred by the given mempolicy, or the given id */
+static int policy_node(gfp_t gfp, struct mempolicy *policy,
+								int nd)
 {
 	if (policy->mode == MPOL_PREFERRED && !(policy->flags & MPOL_F_LOCAL))
 		nd = policy->v.preferred_node;
@@ -1684,7 +1684,7 @@ static struct zonelist *policy_zonelist(gfp_t gfp, struct mempolicy *policy,
 		WARN_ON_ONCE(policy->mode == MPOL_BIND && (gfp & __GFP_THISNODE));
 	}
 
-	return node_zonelist(nd, gfp);
+	return nd;
 }
 
 /* Do dynamic interleaving for a process */
@@ -1791,38 +1791,37 @@ static inline unsigned interleave_nid(struct mempolicy *pol,
 
 #ifdef CONFIG_HUGETLBFS
 /*
- * huge_zonelist(@vma, @addr, @gfp_flags, @mpol)
+ * huge_node(@vma, @addr, @gfp_flags, @mpol)
  * @vma: virtual memory area whose policy is sought
  * @addr: address in @vma for shared policy lookup and interleave policy
  * @gfp_flags: for requested zone
  * @mpol: pointer to mempolicy pointer for reference counted mempolicy
  * @nodemask: pointer to nodemask pointer for MPOL_BIND nodemask
  *
- * Returns a zonelist suitable for a huge page allocation and a pointer
+ * Returns a nid suitable for a huge page allocation and a pointer
  * to the struct mempolicy for conditional unref after allocation.
  * If the effective policy is 'BIND, returns a pointer to the mempolicy's
  * @nodemask for filtering the zonelist.
  *
  * Must be protected by read_mems_allowed_begin()
  */
-struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr,
-				gfp_t gfp_flags, struct mempolicy **mpol,
-				nodemask_t **nodemask)
+int huge_node(struct vm_area_struct *vma, unsigned long addr, gfp_t gfp_flags,
+				struct mempolicy **mpol, nodemask_t **nodemask)
 {
-	struct zonelist *zl;
+	int nid;
 
 	*mpol = get_vma_policy(vma, addr);
 	*nodemask = NULL;	/* assume !MPOL_BIND */
 
 	if (unlikely((*mpol)->mode == MPOL_INTERLEAVE)) {
-		zl = node_zonelist(interleave_nid(*mpol, vma, addr,
-				huge_page_shift(hstate_vma(vma))), gfp_flags);
+		nid = interleave_nid(*mpol, vma, addr,
+					huge_page_shift(hstate_vma(vma)));
 	} else {
-		zl = policy_zonelist(gfp_flags, *mpol, numa_node_id());
+		nid = policy_node(gfp_flags, *mpol, numa_node_id());
 		if ((*mpol)->mode == MPOL_BIND)
 			*nodemask = &(*mpol)->v.nodes;
 	}
-	return zl;
+	return nid;
 }
 
 /*
@@ -1924,12 +1923,10 @@ out:
 static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
 					unsigned nid)
 {
-	struct zonelist *zl;
 	struct page *page;
 
-	zl = node_zonelist(nid, gfp);
-	page = __alloc_pages(gfp, order, zl);
-	if (page && page_zone(page) == zonelist_zone(&zl->_zonerefs[0]))
+	page = __alloc_pages(gfp, order, nid);
+	if (page && page_to_nid(page) == nid)
 		inc_zone_page_state(page, NUMA_INTERLEAVE_HIT);
 	return page;
 }
@@ -1963,8 +1960,8 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
 {
 	struct mempolicy *pol;
 	struct page *page;
+	int preferred_nid;
 	unsigned int cpuset_mems_cookie;
-	struct zonelist *zl;
 	nodemask_t *nmask;
 
 retry_cpuset:
@@ -2007,8 +2004,8 @@ retry_cpuset:
 	}
 
 	nmask = policy_nodemask(gfp, pol);
-	zl = policy_zonelist(gfp, pol, node);
-	page = __alloc_pages_nodemask(gfp, order, zl, nmask);
+	preferred_nid = policy_node(gfp, pol, node);
+	page = __alloc_pages_nodemask(gfp, order, preferred_nid, nmask);
 	mpol_cond_put(pol);
 out:
 	if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie)))
@@ -2055,7 +2052,7 @@ retry_cpuset:
 		page = alloc_page_interleave(gfp, order, interleave_nodes(pol));
 	else
 		page = __alloc_pages_nodemask(gfp, order,
-				policy_zonelist(gfp, pol, numa_node_id()),
+				policy_node(gfp, pol, numa_node_id()),
 				policy_nodemask(gfp, pol));
 
 	if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie)))
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 019af778ec55..8aa860017d66 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3980,12 +3980,12 @@ got_pg:
 }
 
 static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order,
-		struct zonelist *zonelist, nodemask_t *nodemask,
+		int preferred_nid, nodemask_t *nodemask,
 		struct alloc_context *ac, gfp_t *alloc_mask,
 		unsigned int *alloc_flags)
 {
 	ac->high_zoneidx = gfp_zone(gfp_mask);
-	ac->zonelist = zonelist;
+	ac->zonelist = node_zonelist(preferred_nid, gfp_mask);
 	ac->nodemask = nodemask;
 	ac->migratetype = gfpflags_to_migratetype(gfp_mask);
 
@@ -4030,8 +4030,8 @@ static inline void finalise_ac(gfp_t gfp_mask,
  * This is the 'heart' of the zoned buddy allocator.
  */
 struct page *
-__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
-			struct zonelist *zonelist, nodemask_t *nodemask)
+__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid,
+							nodemask_t *nodemask)
 {
 	struct page *page;
 	unsigned int alloc_flags = ALLOC_WMARK_LOW;
@@ -4039,7 +4039,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
 	struct alloc_context ac = { };
 
 	gfp_mask &= gfp_allowed_mask;
-	if (!prepare_alloc_pages(gfp_mask, order, zonelist, nodemask, &ac, &alloc_mask, &alloc_flags))
+	if (!prepare_alloc_pages(gfp_mask, order, preferred_nid, nodemask, &ac, &alloc_mask, &alloc_flags))
 		return NULL;
 
 	finalise_ac(gfp_mask, order, &ac);
-- 
cgit v1.2.3


From 213980c0f23b6c4932fd5516da7e8443b2a615ea Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Thu, 6 Jul 2017 15:40:06 -0700
Subject: mm, mempolicy: simplify rebinding mempolicies when updating cpusets

Commit c0ff7453bb5c ("cpuset,mm: fix no node to alloc memory when
changing cpuset's mems") has introduced a two-step protocol when
rebinding task's mempolicy due to cpuset update, in order to avoid a
parallel allocation seeing an empty effective nodemask and failing.

Later, commit cc9a6c877661 ("cpuset: mm: reduce large amounts of memory
barrier related damage v3") introduced a seqlock protection and removed
the synchronization point between the two update steps.  At that point
(or perhaps later), the two-step rebinding became unnecessary.

Currently it only makes sure that the update first adds new nodes in
step 1 and then removes nodes in step 2.  Without memory barriers the
effects are questionable, and even then this cannot prevent a parallel
zonelist iteration checking the nodemask at each step to observe all
nodes as unusable for allocation.  We now fully rely on the seqlock to
prevent premature OOMs and allocation failures.

We can thus remove the two-step update parts and simplify the code.

Link: http://lkml.kernel.org/r/20170517081140.30654-5-vbabka@suse.cz
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Anshuman Khandual <khandual@linux.vnet.ibm.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Dimitri Sivanich <sivanich@sgi.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Li Zefan <lizefan@huawei.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mempolicy.h      |   6 +--
 include/uapi/linux/mempolicy.h |   8 ----
 kernel/cgroup/cpuset.c         |   4 +-
 mm/mempolicy.c                 | 102 ++++++++---------------------------------
 4 files changed, 21 insertions(+), 99 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index ecb6cbeede5a..3a58b4be1b0c 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -142,8 +142,7 @@ bool vma_policy_mof(struct vm_area_struct *vma);
 
 extern void numa_default_policy(void);
 extern void numa_policy_init(void);
-extern void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new,
-				enum mpol_rebind_step step);
+extern void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new);
 extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new);
 
 extern int huge_node(struct vm_area_struct *vma,
@@ -260,8 +259,7 @@ static inline void numa_default_policy(void)
 }
 
 static inline void mpol_rebind_task(struct task_struct *tsk,
-				const nodemask_t *new,
-				enum mpol_rebind_step step)
+				const nodemask_t *new)
 {
 }
 
diff --git a/include/uapi/linux/mempolicy.h b/include/uapi/linux/mempolicy.h
index 9cd8b21dddbe..2a4d89508fec 100644
--- a/include/uapi/linux/mempolicy.h
+++ b/include/uapi/linux/mempolicy.h
@@ -24,13 +24,6 @@ enum {
 	MPOL_MAX,	/* always last member of enum */
 };
 
-enum mpol_rebind_step {
-	MPOL_REBIND_ONCE,	/* do rebind work at once(not by two step) */
-	MPOL_REBIND_STEP1,	/* first step(set all the newly nodes) */
-	MPOL_REBIND_STEP2,	/* second step(clean all the disallowed nodes)*/
-	MPOL_REBIND_NSTEP,
-};
-
 /* Flags for set_mempolicy */
 #define MPOL_F_STATIC_NODES	(1 << 15)
 #define MPOL_F_RELATIVE_NODES	(1 << 14)
@@ -65,7 +58,6 @@ enum mpol_rebind_step {
  */
 #define MPOL_F_SHARED  (1 << 0)	/* identify shared policies */
 #define MPOL_F_LOCAL   (1 << 1)	/* preferred local allocation */
-#define MPOL_F_REBINDING (1 << 2)	/* identify policies in rebinding */
 #define MPOL_F_MOF	(1 << 3) /* this policy wants migrate on fault */
 #define MPOL_F_MORON	(1 << 4) /* Migrate On protnone Reference On Node */
 
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index ae643412948a..5fd1bdbaa381 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -1063,9 +1063,7 @@ static void cpuset_change_task_nodemask(struct task_struct *tsk,
 	}
 
 	nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems);
-	mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1);
-
-	mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP2);
+	mpol_rebind_task(tsk, newmems);
 	tsk->mems_allowed = *newmems;
 
 	if (need_loop) {
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index c60807625fd5..047181452040 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -146,22 +146,7 @@ struct mempolicy *get_task_policy(struct task_struct *p)
 
 static const struct mempolicy_operations {
 	int (*create)(struct mempolicy *pol, const nodemask_t *nodes);
-	/*
-	 * If read-side task has no lock to protect task->mempolicy, write-side
-	 * task will rebind the task->mempolicy by two step. The first step is
-	 * setting all the newly nodes, and the second step is cleaning all the
-	 * disallowed nodes. In this way, we can avoid finding no node to alloc
-	 * page.
-	 * If we have a lock to protect task->mempolicy in read-side, we do
-	 * rebind directly.
-	 *
-	 * step:
-	 * 	MPOL_REBIND_ONCE - do rebind work at once
-	 * 	MPOL_REBIND_STEP1 - set all the newly nodes
-	 * 	MPOL_REBIND_STEP2 - clean all the disallowed nodes
-	 */
-	void (*rebind)(struct mempolicy *pol, const nodemask_t *nodes,
-			enum mpol_rebind_step step);
+	void (*rebind)(struct mempolicy *pol, const nodemask_t *nodes);
 } mpol_ops[MPOL_MAX];
 
 static inline int mpol_store_user_nodemask(const struct mempolicy *pol)
@@ -304,19 +289,11 @@ void __mpol_put(struct mempolicy *p)
 	kmem_cache_free(policy_cache, p);
 }
 
-static void mpol_rebind_default(struct mempolicy *pol, const nodemask_t *nodes,
-				enum mpol_rebind_step step)
+static void mpol_rebind_default(struct mempolicy *pol, const nodemask_t *nodes)
 {
 }
 
-/*
- * step:
- * 	MPOL_REBIND_ONCE  - do rebind work at once
- * 	MPOL_REBIND_STEP1 - set all the newly nodes
- * 	MPOL_REBIND_STEP2 - clean all the disallowed nodes
- */
-static void mpol_rebind_nodemask(struct mempolicy *pol, const nodemask_t *nodes,
-				 enum mpol_rebind_step step)
+static void mpol_rebind_nodemask(struct mempolicy *pol, const nodemask_t *nodes)
 {
 	nodemask_t tmp;
 
@@ -325,35 +302,19 @@ static void mpol_rebind_nodemask(struct mempolicy *pol, const nodemask_t *nodes,
 	else if (pol->flags & MPOL_F_RELATIVE_NODES)
 		mpol_relative_nodemask(&tmp, &pol->w.user_nodemask, nodes);
 	else {
-		/*
-		 * if step == 1, we use ->w.cpuset_mems_allowed to cache the
-		 * result
-		 */
-		if (step == MPOL_REBIND_ONCE || step == MPOL_REBIND_STEP1) {
-			nodes_remap(tmp, pol->v.nodes,
-					pol->w.cpuset_mems_allowed, *nodes);
-			pol->w.cpuset_mems_allowed = step ? tmp : *nodes;
-		} else if (step == MPOL_REBIND_STEP2) {
-			tmp = pol->w.cpuset_mems_allowed;
-			pol->w.cpuset_mems_allowed = *nodes;
-		} else
-			BUG();
+		nodes_remap(tmp, pol->v.nodes,pol->w.cpuset_mems_allowed,
+								*nodes);
+		pol->w.cpuset_mems_allowed = tmp;
 	}
 
 	if (nodes_empty(tmp))
 		tmp = *nodes;
 
-	if (step == MPOL_REBIND_STEP1)
-		nodes_or(pol->v.nodes, pol->v.nodes, tmp);
-	else if (step == MPOL_REBIND_ONCE || step == MPOL_REBIND_STEP2)
-		pol->v.nodes = tmp;
-	else
-		BUG();
+	pol->v.nodes = tmp;
 }
 
 static void mpol_rebind_preferred(struct mempolicy *pol,
-				  const nodemask_t *nodes,
-				  enum mpol_rebind_step step)
+						const nodemask_t *nodes)
 {
 	nodemask_t tmp;
 
@@ -379,42 +340,19 @@ static void mpol_rebind_preferred(struct mempolicy *pol,
 /*
  * mpol_rebind_policy - Migrate a policy to a different set of nodes
  *
- * If read-side task has no lock to protect task->mempolicy, write-side
- * task will rebind the task->mempolicy by two step. The first step is
- * setting all the newly nodes, and the second step is cleaning all the
- * disallowed nodes. In this way, we can avoid finding no node to alloc
- * page.
- * If we have a lock to protect task->mempolicy in read-side, we do
- * rebind directly.
- *
- * step:
- * 	MPOL_REBIND_ONCE  - do rebind work at once
- * 	MPOL_REBIND_STEP1 - set all the newly nodes
- * 	MPOL_REBIND_STEP2 - clean all the disallowed nodes
+ * Per-vma policies are protected by mmap_sem. Allocations using per-task
+ * policies are protected by task->mems_allowed_seq to prevent a premature
+ * OOM/allocation failure due to parallel nodemask modification.
  */
-static void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *newmask,
-				enum mpol_rebind_step step)
+static void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *newmask)
 {
 	if (!pol)
 		return;
-	if (!mpol_store_user_nodemask(pol) && step == MPOL_REBIND_ONCE &&
+	if (!mpol_store_user_nodemask(pol) &&
 	    nodes_equal(pol->w.cpuset_mems_allowed, *newmask))
 		return;
 
-	if (step == MPOL_REBIND_STEP1 && (pol->flags & MPOL_F_REBINDING))
-		return;
-
-	if (step == MPOL_REBIND_STEP2 && !(pol->flags & MPOL_F_REBINDING))
-		BUG();
-
-	if (step == MPOL_REBIND_STEP1)
-		pol->flags |= MPOL_F_REBINDING;
-	else if (step == MPOL_REBIND_STEP2)
-		pol->flags &= ~MPOL_F_REBINDING;
-	else if (step >= MPOL_REBIND_NSTEP)
-		BUG();
-
-	mpol_ops[pol->mode].rebind(pol, newmask, step);
+	mpol_ops[pol->mode].rebind(pol, newmask);
 }
 
 /*
@@ -424,10 +362,9 @@ static void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *newmask,
  * Called with task's alloc_lock held.
  */
 
-void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new,
-			enum mpol_rebind_step step)
+void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new)
 {
-	mpol_rebind_policy(tsk->mempolicy, new, step);
+	mpol_rebind_policy(tsk->mempolicy, new);
 }
 
 /*
@@ -442,7 +379,7 @@ void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new)
 
 	down_write(&mm->mmap_sem);
 	for (vma = mm->mmap; vma; vma = vma->vm_next)
-		mpol_rebind_policy(vma->vm_policy, new, MPOL_REBIND_ONCE);
+		mpol_rebind_policy(vma->vm_policy, new);
 	up_write(&mm->mmap_sem);
 }
 
@@ -2101,10 +2038,7 @@ struct mempolicy *__mpol_dup(struct mempolicy *old)
 
 	if (current_cpuset_is_being_rebound()) {
 		nodemask_t mems = cpuset_mems_allowed(current);
-		if (new->flags & MPOL_F_REBINDING)
-			mpol_rebind_policy(new, &mems, MPOL_REBIND_STEP2);
-		else
-			mpol_rebind_policy(new, &mems, MPOL_REBIND_ONCE);
+		mpol_rebind_policy(new, &mems);
 	}
 	atomic_set(&new->refcnt, 1);
 	return new;
-- 
cgit v1.2.3


From 94f4a1618b4c2b268f9e70bd1516932927782293 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Thu, 6 Jul 2017 15:40:22 -0700
Subject: mm: kmemleak: treat vm_struct as alternative reference to vmalloc'ed
 objects

Kmemleak requires that vmalloc'ed objects have a minimum reference count
of 2: one in the corresponding vm_struct object and the other owned by
the vmalloc() caller.  There are cases, however, where the original
vmalloc() returned pointer is lost and, instead, a pointer to vm_struct
is stored (see free_thread_stack()).  Kmemleak currently reports such
objects as leaks.

This patch adds support for treating any surplus references to an object
as additional references to a specified object.  It introduces the
kmemleak_vmalloc() API function which takes a vm_struct pointer and sets
its surplus reference passing to the actual vmalloc() returned pointer.
The __vmalloc_node_range() calling site has been modified accordingly.

Link: http://lkml.kernel.org/r/1495726937-23557-4-git-send-email-catalin.marinas@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Reported-by: "Luis R. Rodriguez" <mcgrof@kernel.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: "Luis R. Rodriguez" <mcgrof@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/dev-tools/kmemleak.rst |  1 +
 include/linux/kmemleak.h             |  7 +++
 mm/kmemleak.c                        | 93 ++++++++++++++++++++++++++++++++++--
 mm/vmalloc.c                         |  7 +--
 4 files changed, 98 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/dev-tools/kmemleak.rst b/Documentation/dev-tools/kmemleak.rst
index b2391b829169..cb8862659178 100644
--- a/Documentation/dev-tools/kmemleak.rst
+++ b/Documentation/dev-tools/kmemleak.rst
@@ -150,6 +150,7 @@ See the include/linux/kmemleak.h header for the functions prototype.
 - ``kmemleak_init``		 - initialize kmemleak
 - ``kmemleak_alloc``		 - notify of a memory block allocation
 - ``kmemleak_alloc_percpu``	 - notify of a percpu memory block allocation
+- ``kmemleak_vmalloc``		 - notify of a vmalloc() memory allocation
 - ``kmemleak_free``		 - notify of a memory block freeing
 - ``kmemleak_free_part``	 - notify of a partial memory block freeing
 - ``kmemleak_free_percpu``	 - notify of a percpu memory block freeing
diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h
index 1c2a32829620..590343f6c1b1 100644
--- a/include/linux/kmemleak.h
+++ b/include/linux/kmemleak.h
@@ -22,6 +22,7 @@
 #define __KMEMLEAK_H
 
 #include <linux/slab.h>
+#include <linux/vmalloc.h>
 
 #ifdef CONFIG_DEBUG_KMEMLEAK
 
@@ -30,6 +31,8 @@ extern void kmemleak_alloc(const void *ptr, size_t size, int min_count,
 			   gfp_t gfp) __ref;
 extern void kmemleak_alloc_percpu(const void __percpu *ptr, size_t size,
 				  gfp_t gfp) __ref;
+extern void kmemleak_vmalloc(const struct vm_struct *area, size_t size,
+			     gfp_t gfp) __ref;
 extern void kmemleak_free(const void *ptr) __ref;
 extern void kmemleak_free_part(const void *ptr, size_t size) __ref;
 extern void kmemleak_free_percpu(const void __percpu *ptr) __ref;
@@ -81,6 +84,10 @@ static inline void kmemleak_alloc_percpu(const void __percpu *ptr, size_t size,
 					 gfp_t gfp)
 {
 }
+static inline void kmemleak_vmalloc(const struct vm_struct *area, size_t size,
+				    gfp_t gfp)
+{
+}
 static inline void kmemleak_free(const void *ptr)
 {
 }
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 266482f460c2..7780cd83a495 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -159,6 +159,8 @@ struct kmemleak_object {
 	atomic_t use_count;
 	unsigned long pointer;
 	size_t size;
+	/* pass surplus references to this pointer */
+	unsigned long excess_ref;
 	/* minimum number of a pointers found before it is considered leak */
 	int min_count;
 	/* the total number of pointers found pointing to this object */
@@ -253,7 +255,8 @@ enum {
 	KMEMLEAK_NOT_LEAK,
 	KMEMLEAK_IGNORE,
 	KMEMLEAK_SCAN_AREA,
-	KMEMLEAK_NO_SCAN
+	KMEMLEAK_NO_SCAN,
+	KMEMLEAK_SET_EXCESS_REF
 };
 
 /*
@@ -264,7 +267,10 @@ struct early_log {
 	int op_type;			/* kmemleak operation type */
 	int min_count;			/* minimum reference count */
 	const void *ptr;		/* allocated/freed memory block */
-	size_t size;			/* memory block size */
+	union {
+		size_t size;		/* memory block size */
+		unsigned long excess_ref; /* surplus reference passing */
+	};
 	unsigned long trace[MAX_TRACE];	/* stack trace */
 	unsigned int trace_len;		/* stack trace length */
 };
@@ -562,6 +568,7 @@ static struct kmemleak_object *create_object(unsigned long ptr, size_t size,
 	object->flags = OBJECT_ALLOCATED;
 	object->pointer = ptr;
 	object->size = size;
+	object->excess_ref = 0;
 	object->min_count = min_count;
 	object->count = 0;			/* white color initially */
 	object->jiffies = jiffies;
@@ -794,6 +801,30 @@ out:
 	put_object(object);
 }
 
+/*
+ * Any surplus references (object already gray) to 'ptr' are passed to
+ * 'excess_ref'. This is used in the vmalloc() case where a pointer to
+ * vm_struct may be used as an alternative reference to the vmalloc'ed object
+ * (see free_thread_stack()).
+ */
+static void object_set_excess_ref(unsigned long ptr, unsigned long excess_ref)
+{
+	unsigned long flags;
+	struct kmemleak_object *object;
+
+	object = find_and_get_object(ptr, 0);
+	if (!object) {
+		kmemleak_warn("Setting excess_ref on unknown object at 0x%08lx\n",
+			      ptr);
+		return;
+	}
+
+	spin_lock_irqsave(&object->lock, flags);
+	object->excess_ref = excess_ref;
+	spin_unlock_irqrestore(&object->lock, flags);
+	put_object(object);
+}
+
 /*
  * Set the OBJECT_NO_SCAN flag for the object corresponding to the give
  * pointer. Such object will not be scanned by kmemleak but references to it
@@ -908,7 +939,7 @@ static void early_alloc_percpu(struct early_log *log)
  * @gfp:	kmalloc() flags used for kmemleak internal memory allocations
  *
  * This function is called from the kernel allocators when a new object
- * (memory block) is allocated (kmem_cache_alloc, kmalloc, vmalloc etc.).
+ * (memory block) is allocated (kmem_cache_alloc, kmalloc etc.).
  */
 void __ref kmemleak_alloc(const void *ptr, size_t size, int min_count,
 			  gfp_t gfp)
@@ -951,6 +982,36 @@ void __ref kmemleak_alloc_percpu(const void __percpu *ptr, size_t size,
 }
 EXPORT_SYMBOL_GPL(kmemleak_alloc_percpu);
 
+/**
+ * kmemleak_vmalloc - register a newly vmalloc'ed object
+ * @area:	pointer to vm_struct
+ * @size:	size of the object
+ * @gfp:	__vmalloc() flags used for kmemleak internal memory allocations
+ *
+ * This function is called from the vmalloc() kernel allocator when a new
+ * object (memory block) is allocated.
+ */
+void __ref kmemleak_vmalloc(const struct vm_struct *area, size_t size, gfp_t gfp)
+{
+	pr_debug("%s(0x%p, %zu)\n", __func__, area, size);
+
+	/*
+	 * A min_count = 2 is needed because vm_struct contains a reference to
+	 * the virtual address of the vmalloc'ed block.
+	 */
+	if (kmemleak_enabled) {
+		create_object((unsigned long)area->addr, size, 2, gfp);
+		object_set_excess_ref((unsigned long)area,
+				      (unsigned long)area->addr);
+	} else if (kmemleak_early_log) {
+		log_early(KMEMLEAK_ALLOC, area->addr, size, 2);
+		/* reusing early_log.size for storing area->addr */
+		log_early(KMEMLEAK_SET_EXCESS_REF,
+			  area, (unsigned long)area->addr, 0);
+	}
+}
+EXPORT_SYMBOL_GPL(kmemleak_vmalloc);
+
 /**
  * kmemleak_free - unregister a previously registered object
  * @ptr:	pointer to beginning of the object
@@ -1248,6 +1309,7 @@ static void scan_block(void *_start, void *_end,
 	for (ptr = start; ptr < end; ptr++) {
 		struct kmemleak_object *object;
 		unsigned long pointer;
+		unsigned long excess_ref;
 
 		if (scan_should_stop())
 			break;
@@ -1283,8 +1345,27 @@ static void scan_block(void *_start, void *_end,
 		 * enclosed by scan_mutex.
 		 */
 		spin_lock_nested(&object->lock, SINGLE_DEPTH_NESTING);
-		update_refs(object);
+		/* only pass surplus references (object already gray) */
+		if (color_gray(object)) {
+			excess_ref = object->excess_ref;
+			/* no need for update_refs() if object already gray */
+		} else {
+			excess_ref = 0;
+			update_refs(object);
+		}
 		spin_unlock(&object->lock);
+
+		if (excess_ref) {
+			object = lookup_object(excess_ref, 0);
+			if (!object)
+				continue;
+			if (object == scanned)
+				/* circular reference, ignore */
+				continue;
+			spin_lock_nested(&object->lock, SINGLE_DEPTH_NESTING);
+			update_refs(object);
+			spin_unlock(&object->lock);
+		}
 	}
 	read_unlock_irqrestore(&kmemleak_lock, flags);
 }
@@ -1987,6 +2068,10 @@ void __init kmemleak_init(void)
 		case KMEMLEAK_NO_SCAN:
 			kmemleak_no_scan(log->ptr);
 			break;
+		case KMEMLEAK_SET_EXCESS_REF:
+			object_set_excess_ref((unsigned long)log->ptr,
+					      log->excess_ref);
+			break;
 		default:
 			kmemleak_warn("Unknown early log operation: %d\n",
 				      log->op_type);
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index ecc97f74ab18..6211a807cb31 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1770,12 +1770,7 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
 	 */
 	clear_vm_uninitialized_flag(area);
 
-	/*
-	 * A ref_count = 2 is needed because vm_struct allocated in
-	 * __get_vm_area_node() contains a reference to the virtual address of
-	 * the vmalloc'ed block.
-	 */
-	kmemleak_alloc(addr, real_size, 2, gfp_mask);
+	kmemleak_vmalloc(area, size, gfp_mask);
 
 	return addr;
 
-- 
cgit v1.2.3


From 2262185c5b287f2758afda79c149b7cf6bee165c Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Thu, 6 Jul 2017 15:40:25 -0700
Subject: mm: per-cgroup memory reclaim stats

Track the following reclaim counters for every memory cgroup: PGREFILL,
PGSCAN, PGSTEAL, PGACTIVATE, PGDEACTIVATE, PGLAZYFREE and PGLAZYFREED.

These values are exposed using the memory.stats interface of cgroup v2.

The meaning of each value is the same as for global counters, available
using /proc/vmstat.

Also, for consistency, rename mem_cgroup_count_vm_event() to
count_memcg_event_mm().

Link: http://lkml.kernel.org/r/1494530183-30808-1-git-send-email-guro@fb.com
Signed-off-by: Roman Gushchin <guro@fb.com>
Suggested-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vladimir Davydov <vdavydov.dev@gmail.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Li Zefan <lizefan@huawei.com>
Cc: Balbir Singh <bsingharora@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/cgroup-v2.txt | 28 ++++++++++++++++++++++++++
 fs/dax.c                    |  2 +-
 fs/ncpfs/mmap.c             |  2 +-
 include/linux/memcontrol.h  | 48 ++++++++++++++++++++++++++++++++++++++++++---
 mm/filemap.c                |  2 +-
 mm/memcontrol.c             | 10 ++++++++++
 mm/memory.c                 |  4 ++--
 mm/shmem.c                  |  3 +--
 mm/swap.c                   |  1 +
 mm/vmscan.c                 | 30 +++++++++++++++++++++-------
 10 files changed, 113 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/cgroup-v2.txt b/Documentation/cgroup-v2.txt
index 558c3a739baf..5ac2fbde97e6 100644
--- a/Documentation/cgroup-v2.txt
+++ b/Documentation/cgroup-v2.txt
@@ -956,6 +956,34 @@ PAGE_SIZE multiple when read back.
 
 		Number of times a shadow node has been reclaimed
 
+	  pgrefill
+
+		Amount of scanned pages (in an active LRU list)
+
+	  pgscan
+
+		Amount of scanned pages (in an inactive LRU list)
+
+	  pgsteal
+
+		Amount of reclaimed pages
+
+	  pgactivate
+
+		Amount of pages moved to the active LRU list
+
+	  pgdeactivate
+
+		Amount of pages moved to the inactive LRU lis
+
+	  pglazyfree
+
+		Amount of pages postponed to be freed under memory pressure
+
+	  pglazyfreed
+
+		Amount of reclaimed lazyfree pages
+
   memory.swap.current
 
 	A read-only single value file which exists on non-root
diff --git a/fs/dax.c b/fs/dax.c
index 33d05aa02aad..cd8fced592d0 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1213,7 +1213,7 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf,
 	case IOMAP_MAPPED:
 		if (iomap.flags & IOMAP_F_NEW) {
 			count_vm_event(PGMAJFAULT);
-			mem_cgroup_count_vm_event(vmf->vma->vm_mm, PGMAJFAULT);
+			count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT);
 			major = VM_FAULT_MAJOR;
 		}
 		error = dax_insert_mapping(mapping, iomap.bdev, iomap.dax_dev,
diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c
index 0c3905e0542e..6719c0be674d 100644
--- a/fs/ncpfs/mmap.c
+++ b/fs/ncpfs/mmap.c
@@ -89,7 +89,7 @@ static int ncp_file_mmap_fault(struct vm_fault *vmf)
 	 * -- nyc
 	 */
 	count_vm_event(PGMAJFAULT);
-	mem_cgroup_count_vm_event(vmf->vma->vm_mm, PGMAJFAULT);
+	count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT);
 	return VM_FAULT_MAJOR;
 }
 
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 899949bbb2f9..b2a5b1cd4e55 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -357,6 +357,17 @@ static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
 }
 struct mem_cgroup *mem_cgroup_from_id(unsigned short id);
 
+static inline struct mem_cgroup *lruvec_memcg(struct lruvec *lruvec)
+{
+	struct mem_cgroup_per_node *mz;
+
+	if (mem_cgroup_disabled())
+		return NULL;
+
+	mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
+	return mz->memcg;
+}
+
 /**
  * parent_mem_cgroup - find the accounting parent of a memcg
  * @memcg: memcg whose parent to find
@@ -546,8 +557,23 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
 						gfp_t gfp_mask,
 						unsigned long *total_scanned);
 
-static inline void mem_cgroup_count_vm_event(struct mm_struct *mm,
-					     enum vm_event_item idx)
+static inline void count_memcg_events(struct mem_cgroup *memcg,
+				      enum vm_event_item idx,
+				      unsigned long count)
+{
+	if (!mem_cgroup_disabled())
+		this_cpu_add(memcg->stat->events[idx], count);
+}
+
+static inline void count_memcg_page_event(struct page *page,
+					  enum memcg_stat_item idx)
+{
+	if (page->mem_cgroup)
+		count_memcg_events(page->mem_cgroup, idx, 1);
+}
+
+static inline void count_memcg_event_mm(struct mm_struct *mm,
+					enum vm_event_item idx)
 {
 	struct mem_cgroup *memcg;
 
@@ -675,6 +701,11 @@ static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
 	return NULL;
 }
 
+static inline struct mem_cgroup *lruvec_memcg(struct lruvec *lruvec)
+{
+	return NULL;
+}
+
 static inline bool mem_cgroup_online(struct mem_cgroup *memcg)
 {
 	return true;
@@ -789,8 +820,19 @@ static inline void mem_cgroup_split_huge_fixup(struct page *head)
 {
 }
 
+static inline void count_memcg_events(struct mem_cgroup *memcg,
+				      enum vm_event_item idx,
+				      unsigned long count)
+{
+}
+
+static inline void count_memcg_page_event(struct page *page,
+					  enum memcg_stat_item idx)
+{
+}
+
 static inline
-void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx)
+void count_memcg_event_mm(struct mm_struct *mm, enum vm_event_item idx)
 {
 }
 #endif /* CONFIG_MEMCG */
diff --git a/mm/filemap.c b/mm/filemap.c
index aea58e983a73..2e906ef52143 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2265,7 +2265,7 @@ int filemap_fault(struct vm_fault *vmf)
 		/* No page in the page cache at all */
 		do_sync_mmap_readahead(vmf->vma, ra, file, offset);
 		count_vm_event(PGMAJFAULT);
-		mem_cgroup_count_vm_event(vmf->vma->vm_mm, PGMAJFAULT);
+		count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT);
 		ret = VM_FAULT_MAJOR;
 retry_find:
 		page = find_get_page(mapping, offset);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index fc51a33ddcd1..3e2f8cf85b4c 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5230,6 +5230,16 @@ static int memory_stat_show(struct seq_file *m, void *v)
 	seq_printf(m, "pgfault %lu\n", events[PGFAULT]);
 	seq_printf(m, "pgmajfault %lu\n", events[PGMAJFAULT]);
 
+	seq_printf(m, "pgrefill %lu\n", events[PGREFILL]);
+	seq_printf(m, "pgscan %lu\n", events[PGSCAN_KSWAPD] +
+		   events[PGSCAN_DIRECT]);
+	seq_printf(m, "pgsteal %lu\n", events[PGSTEAL_KSWAPD] +
+		   events[PGSTEAL_DIRECT]);
+	seq_printf(m, "pgactivate %lu\n", events[PGACTIVATE]);
+	seq_printf(m, "pgdeactivate %lu\n", events[PGDEACTIVATE]);
+	seq_printf(m, "pglazyfree %lu\n", events[PGLAZYFREE]);
+	seq_printf(m, "pglazyfreed %lu\n", events[PGLAZYFREED]);
+
 	seq_printf(m, "workingset_refault %lu\n",
 		   stat[WORKINGSET_REFAULT]);
 	seq_printf(m, "workingset_activate %lu\n",
diff --git a/mm/memory.c b/mm/memory.c
index bf3aab1684e9..e31dd97e6114 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2719,7 +2719,7 @@ int do_swap_page(struct vm_fault *vmf)
 		/* Had to read the page from swap area: Major fault */
 		ret = VM_FAULT_MAJOR;
 		count_vm_event(PGMAJFAULT);
-		mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
+		count_memcg_event_mm(vma->vm_mm, PGMAJFAULT);
 	} else if (PageHWPoison(page)) {
 		/*
 		 * hwpoisoned dirty swapcache pages are kept for killing
@@ -3837,7 +3837,7 @@ int handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
 	__set_current_state(TASK_RUNNING);
 
 	count_vm_event(PGFAULT);
-	mem_cgroup_count_vm_event(vma->vm_mm, PGFAULT);
+	count_memcg_event_mm(vma->vm_mm, PGFAULT);
 
 	/* do counter updates before entering really critical section. */
 	check_sync_rss_stat(current);
diff --git a/mm/shmem.c b/mm/shmem.c
index a06f23731d3f..9418f5a9bc46 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1646,8 +1646,7 @@ repeat:
 			if (fault_type) {
 				*fault_type |= VM_FAULT_MAJOR;
 				count_vm_event(PGMAJFAULT);
-				mem_cgroup_count_vm_event(charge_mm,
-							  PGMAJFAULT);
+				count_memcg_event_mm(charge_mm, PGMAJFAULT);
 			}
 			/* Here we actually start the io */
 			page = shmem_swapin(swap, gfp, info, index);
diff --git a/mm/swap.c b/mm/swap.c
index 98d08b4579fa..4f44dbd7f780 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -591,6 +591,7 @@ static void lru_lazyfree_fn(struct page *page, struct lruvec *lruvec,
 		add_page_to_lru_list(page, lruvec, LRU_INACTIVE_FILE);
 
 		__count_vm_events(PGLAZYFREE, hpage_nr_pages(page));
+		count_memcg_page_event(page, PGLAZYFREE);
 		update_page_reclaim_stat(lruvec, 1, 0);
 	}
 }
diff --git a/mm/vmscan.c b/mm/vmscan.c
index aebb157258f2..7d3c6c59897c 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1294,6 +1294,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 			}
 
 			count_vm_event(PGLAZYFREED);
+			count_memcg_page_event(page, PGLAZYFREED);
 		} else if (!mapping || !__remove_mapping(mapping, page, true))
 			goto keep_locked;
 		/*
@@ -1323,6 +1324,7 @@ activate_locked:
 		if (!PageMlocked(page)) {
 			SetPageActive(page);
 			pgactivate++;
+			count_memcg_page_event(page, PGACTIVATE);
 		}
 keep_locked:
 		unlock_page(page);
@@ -1762,11 +1764,16 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
 	__mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, nr_taken);
 	reclaim_stat->recent_scanned[file] += nr_taken;
 
-	if (global_reclaim(sc)) {
-		if (current_is_kswapd())
+	if (current_is_kswapd()) {
+		if (global_reclaim(sc))
 			__count_vm_events(PGSCAN_KSWAPD, nr_scanned);
-		else
+		count_memcg_events(lruvec_memcg(lruvec), PGSCAN_KSWAPD,
+				   nr_scanned);
+	} else {
+		if (global_reclaim(sc))
 			__count_vm_events(PGSCAN_DIRECT, nr_scanned);
+		count_memcg_events(lruvec_memcg(lruvec), PGSCAN_DIRECT,
+				   nr_scanned);
 	}
 	spin_unlock_irq(&pgdat->lru_lock);
 
@@ -1778,11 +1785,16 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
 
 	spin_lock_irq(&pgdat->lru_lock);
 
-	if (global_reclaim(sc)) {
-		if (current_is_kswapd())
+	if (current_is_kswapd()) {
+		if (global_reclaim(sc))
 			__count_vm_events(PGSTEAL_KSWAPD, nr_reclaimed);
-		else
+		count_memcg_events(lruvec_memcg(lruvec), PGSTEAL_KSWAPD,
+				   nr_reclaimed);
+	} else {
+		if (global_reclaim(sc))
 			__count_vm_events(PGSTEAL_DIRECT, nr_reclaimed);
+		count_memcg_events(lruvec_memcg(lruvec), PGSTEAL_DIRECT,
+				   nr_reclaimed);
 	}
 
 	putback_inactive_pages(lruvec, &page_list);
@@ -1927,8 +1939,11 @@ static unsigned move_active_pages_to_lru(struct lruvec *lruvec,
 		}
 	}
 
-	if (!is_active_lru(lru))
+	if (!is_active_lru(lru)) {
 		__count_vm_events(PGDEACTIVATE, nr_moved);
+		count_memcg_events(lruvec_memcg(lruvec), PGDEACTIVATE,
+				   nr_moved);
+	}
 
 	return nr_moved;
 }
@@ -1966,6 +1981,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
 	reclaim_stat->recent_scanned[file] += nr_taken;
 
 	__count_vm_events(PGREFILL, nr_scanned);
+	count_memcg_events(lruvec_memcg(lruvec), PGREFILL, nr_scanned);
 
 	spin_unlock_irq(&pgdat->lru_lock);
 
-- 
cgit v1.2.3


From 8e675f7af50747e1e9e96538e8706767e4f80e2c Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Date: Thu, 6 Jul 2017 15:40:28 -0700
Subject: mm/oom_kill: count global and memory cgroup oom kills

Show count of oom killer invocations in /proc/vmstat and count of
processes killed in memory cgroup in knob "memory.events" (in
memory.oom_control for v1 cgroup).

Also describe difference between "oom" and "oom_kill" in memory cgroup
documentation.  Currently oom in memory cgroup kills tasks iff shortage
has happened inside page fault.

These counters helps in monitoring oom kills - for now the only way is
grepping for magic words in kernel log.

[akpm@linux-foundation.org: fix for mem_cgroup_count_vm_event() rename]
[akpm@linux-foundation.org: fix comment, per Konstantin]
Link: http://lkml.kernel.org/r/149570810989.203600.9492483715840752937.stgit@buzz
Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Roman Guschin <guroan@gmail.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/cgroup-v2.txt   | 20 ++++++++++++++++----
 include/linux/memcontrol.h    |  5 ++++-
 include/linux/vm_event_item.h |  1 +
 mm/memcontrol.c               |  2 ++
 mm/oom_kill.c                 |  5 +++++
 mm/vmstat.c                   |  1 +
 6 files changed, 29 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/cgroup-v2.txt b/Documentation/cgroup-v2.txt
index 5ac2fbde97e6..e6101976e0f1 100644
--- a/Documentation/cgroup-v2.txt
+++ b/Documentation/cgroup-v2.txt
@@ -852,13 +852,25 @@ PAGE_SIZE multiple when read back.
 
 		The number of times the cgroup's memory usage was
 		about to go over the max boundary.  If direct reclaim
-		fails to bring it down, the OOM killer is invoked.
+		fails to bring it down, the cgroup goes to OOM state.
 
 	  oom
 
-		The number of times the OOM killer has been invoked in
-		the cgroup.  This may not exactly match the number of
-		processes killed but should generally be close.
+		The number of time the cgroup's memory usage was
+		reached the limit and allocation was about to fail.
+
+		Depending on context result could be invocation of OOM
+		killer and retrying allocation or failing alloction.
+
+		Failed allocation in its turn could be returned into
+		userspace as -ENOMEM or siletly ignored in cases like
+		disk readahead.	 For now OOM in memory cgroup kills
+		tasks iff shortage has happened inside page fault.
+
+	  oom_kill
+
+		The number of processes belonging to this cgroup
+		killed by any kind of OOM killer.
 
   memory.stat
 
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index b2a5b1cd4e55..72d0853beb31 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -582,8 +582,11 @@ static inline void count_memcg_event_mm(struct mm_struct *mm,
 
 	rcu_read_lock();
 	memcg = mem_cgroup_from_task(rcu_dereference(mm->owner));
-	if (likely(memcg))
+	if (likely(memcg)) {
 		this_cpu_inc(memcg->stat->events[idx]);
+		if (idx == OOM_KILL)
+			cgroup_file_notify(&memcg->events_file);
+	}
 	rcu_read_unlock();
 }
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index be3ab2d13adf..37e8d31a4632 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -41,6 +41,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 		KSWAPD_LOW_WMARK_HIT_QUICKLY, KSWAPD_HIGH_WMARK_HIT_QUICKLY,
 		PAGEOUTRUN, PGROTATED,
 		DROP_PAGECACHE, DROP_SLAB,
+		OOM_KILL,
 #ifdef CONFIG_NUMA_BALANCING
 		NUMA_PTE_UPDATES,
 		NUMA_HUGE_PTE_UPDATES,
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 3e2f8cf85b4c..4f686fc1c5fa 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3573,6 +3573,7 @@ static int mem_cgroup_oom_control_read(struct seq_file *sf, void *v)
 
 	seq_printf(sf, "oom_kill_disable %d\n", memcg->oom_kill_disable);
 	seq_printf(sf, "under_oom %d\n", (bool)memcg->under_oom);
+	seq_printf(sf, "oom_kill %lu\n", memcg_sum_events(memcg, OOM_KILL));
 	return 0;
 }
 
@@ -5164,6 +5165,7 @@ static int memory_events_show(struct seq_file *m, void *v)
 	seq_printf(m, "high %lu\n", memcg_sum_events(memcg, MEMCG_HIGH));
 	seq_printf(m, "max %lu\n", memcg_sum_events(memcg, MEMCG_MAX));
 	seq_printf(m, "oom %lu\n", memcg_sum_events(memcg, MEMCG_OOM));
+	seq_printf(m, "oom_kill %lu\n", memcg_sum_events(memcg, OOM_KILL));
 
 	return 0;
 }
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 04c9143a8625..0e2c925e7826 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -876,6 +876,11 @@ static void oom_kill_process(struct oom_control *oc, const char *message)
 	/* Get a reference to safely compare mm after task_unlock(victim) */
 	mm = victim->mm;
 	mmgrab(mm);
+
+	/* Raise event before sending signal: task reaper must see this */
+	count_vm_event(OOM_KILL);
+	count_memcg_event_mm(mm, OOM_KILL);
+
 	/*
 	 * We should send SIGKILL before setting TIF_MEMDIE in order to prevent
 	 * the OOM victim from depleting the memory reserves from the user
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 6dae6b240b21..46281825c710 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1018,6 +1018,7 @@ const char * const vmstat_text[] = {
 
 	"drop_pagecache",
 	"drop_slab",
+	"oom_kill",
 
 #ifdef CONFIG_NUMA_BALANCING
 	"numa_pte_updates",
-- 
cgit v1.2.3


From 385386cff4c6f047907655e05791d88198c4c523 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Thu, 6 Jul 2017 15:40:43 -0700
Subject: mm: vmstat: move slab statistics from zone to node counters

Patch series "mm: per-lruvec slab stats"

Josef is working on a new approach to balancing slab caches and the page
cache.  For this to work, he needs slab cache statistics on the lruvec
level.  These patches implement that by adding infrastructure that
allows updating and reading generic VM stat items per lruvec, then
switches some existing VM accounting sites, including the slab
accounting ones, to this new cgroup-aware API.

I'll follow up with more patches on this, because there is actually
substantial simplification that can be done to the memory controller
when we replace private memcg accounting with making the existing VM
accounting sites cgroup-aware.  But this is enough for Josef to base his
slab reclaim work on, so here goes.

This patch (of 5):

To re-implement slab cache vs.  page cache balancing, we'll need the
slab counters at the lruvec level, which, ever since lru reclaim was
moved from the zone to the node, is the intersection of the node, not
the zone, and the memcg.

We could retain the per-zone counters for when the page allocator dumps
its memory information on failures, and have counters on both levels -
which on all but NUMA node 0 is usually redundant.  But let's keep it
simple for now and just move them.  If anybody complains we can restore
the per-zone counters.

[hannes@cmpxchg.org: fix oops]
  Link: http://lkml.kernel.org/r/20170605183511.GA8915@cmpxchg.org
Link: http://lkml.kernel.org/r/20170530181724.27197-3-hannes@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/base/node.c    | 10 +++++-----
 include/linux/mmzone.h |  4 ++--
 mm/page_alloc.c        |  7 +++----
 mm/slab.c              |  8 ++++----
 mm/slub.c              |  4 ++--
 mm/vmscan.c            |  2 +-
 mm/vmstat.c            |  4 ++--
 7 files changed, 19 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/node.c b/drivers/base/node.c
index 1da0005341a1..6b1ee371ee97 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -129,11 +129,11 @@ static ssize_t node_read_meminfo(struct device *dev,
 		       nid, K(node_page_state(pgdat, NR_UNSTABLE_NFS)),
 		       nid, K(sum_zone_node_page_state(nid, NR_BOUNCE)),
 		       nid, K(node_page_state(pgdat, NR_WRITEBACK_TEMP)),
-		       nid, K(sum_zone_node_page_state(nid, NR_SLAB_RECLAIMABLE) +
-				sum_zone_node_page_state(nid, NR_SLAB_UNRECLAIMABLE)),
-		       nid, K(sum_zone_node_page_state(nid, NR_SLAB_RECLAIMABLE)),
+		       nid, K(node_page_state(pgdat, NR_SLAB_RECLAIMABLE) +
+			      node_page_state(pgdat, NR_SLAB_UNRECLAIMABLE)),
+		       nid, K(node_page_state(pgdat, NR_SLAB_RECLAIMABLE)),
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-		       nid, K(sum_zone_node_page_state(nid, NR_SLAB_UNRECLAIMABLE)),
+		       nid, K(node_page_state(pgdat, NR_SLAB_UNRECLAIMABLE)),
 		       nid, K(node_page_state(pgdat, NR_ANON_THPS) *
 				       HPAGE_PMD_NR),
 		       nid, K(node_page_state(pgdat, NR_SHMEM_THPS) *
@@ -141,7 +141,7 @@ static ssize_t node_read_meminfo(struct device *dev,
 		       nid, K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED) *
 				       HPAGE_PMD_NR));
 #else
-		       nid, K(sum_zone_node_page_state(nid, NR_SLAB_UNRECLAIMABLE)));
+		       nid, K(node_page_state(pgdat, NR_SLAB_UNRECLAIMABLE)));
 #endif
 	n += hugetlb_report_node_meminfo(nid, buf + n);
 	return n;
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index abc1641011f2..7e8f100cb56d 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -125,8 +125,6 @@ enum zone_stat_item {
 	NR_ZONE_UNEVICTABLE,
 	NR_ZONE_WRITE_PENDING,	/* Count of dirty, writeback and unstable pages */
 	NR_MLOCK,		/* mlock()ed pages found and moved off LRU */
-	NR_SLAB_RECLAIMABLE,
-	NR_SLAB_UNRECLAIMABLE,
 	NR_PAGETABLE,		/* used for pagetables */
 	NR_KERNEL_STACK_KB,	/* measured in KiB */
 	/* Second 128 byte cacheline */
@@ -152,6 +150,8 @@ enum node_stat_item {
 	NR_INACTIVE_FILE,	/*  "     "     "   "       "         */
 	NR_ACTIVE_FILE,		/*  "     "     "   "       "         */
 	NR_UNEVICTABLE,		/*  "     "     "   "       "         */
+	NR_SLAB_RECLAIMABLE,
+	NR_SLAB_UNRECLAIMABLE,
 	NR_ISOLATED_ANON,	/* Temporary isolated pages from anon lru */
 	NR_ISOLATED_FILE,	/* Temporary isolated pages from file lru */
 	WORKINGSET_REFAULT,
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8aa860017d66..a35add8d7c0b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4643,8 +4643,6 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
 			" present:%lukB"
 			" managed:%lukB"
 			" mlocked:%lukB"
-			" slab_reclaimable:%lukB"
-			" slab_unreclaimable:%lukB"
 			" kernel_stack:%lukB"
 			" pagetables:%lukB"
 			" bounce:%lukB"
@@ -4666,8 +4664,6 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
 			K(zone->present_pages),
 			K(zone->managed_pages),
 			K(zone_page_state(zone, NR_MLOCK)),
-			K(zone_page_state(zone, NR_SLAB_RECLAIMABLE)),
-			K(zone_page_state(zone, NR_SLAB_UNRECLAIMABLE)),
 			zone_page_state(zone, NR_KERNEL_STACK_KB),
 			K(zone_page_state(zone, NR_PAGETABLE)),
 			K(zone_page_state(zone, NR_BOUNCE)),
@@ -5153,6 +5149,7 @@ static void build_zonelists(pg_data_t *pgdat)
  */
 static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch);
 static DEFINE_PER_CPU(struct per_cpu_pageset, boot_pageset);
+static DEFINE_PER_CPU(struct per_cpu_nodestat, boot_nodestats);
 static void setup_zone_pageset(struct zone *zone);
 
 /*
@@ -6053,6 +6050,8 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
 	spin_lock_init(&pgdat->lru_lock);
 	lruvec_init(node_lruvec(pgdat));
 
+	pgdat->per_cpu_nodestats = &boot_nodestats;
+
 	for (j = 0; j < MAX_NR_ZONES; j++) {
 		struct zone *zone = pgdat->node_zones + j;
 		unsigned long size, realsize, freesize, memmap_pages;
diff --git a/mm/slab.c b/mm/slab.c
index 503317188926..a38634ed478e 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1425,10 +1425,10 @@ static struct page *kmem_getpages(struct kmem_cache *cachep, gfp_t flags,
 
 	nr_pages = (1 << cachep->gfporder);
 	if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
-		add_zone_page_state(page_zone(page),
+		add_node_page_state(page_pgdat(page),
 			NR_SLAB_RECLAIMABLE, nr_pages);
 	else
-		add_zone_page_state(page_zone(page),
+		add_node_page_state(page_pgdat(page),
 			NR_SLAB_UNRECLAIMABLE, nr_pages);
 
 	__SetPageSlab(page);
@@ -1459,10 +1459,10 @@ static void kmem_freepages(struct kmem_cache *cachep, struct page *page)
 	kmemcheck_free_shadow(page, order);
 
 	if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
-		sub_zone_page_state(page_zone(page),
+		sub_node_page_state(page_pgdat(page),
 				NR_SLAB_RECLAIMABLE, nr_freed);
 	else
-		sub_zone_page_state(page_zone(page),
+		sub_node_page_state(page_pgdat(page),
 				NR_SLAB_UNRECLAIMABLE, nr_freed);
 
 	BUG_ON(!PageSlab(page));
diff --git a/mm/slub.c b/mm/slub.c
index 388f66d1da5e..aa5aa6bfb35e 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1615,7 +1615,7 @@ out:
 	if (!page)
 		return NULL;
 
-	mod_zone_page_state(page_zone(page),
+	mod_node_page_state(page_pgdat(page),
 		(s->flags & SLAB_RECLAIM_ACCOUNT) ?
 		NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
 		1 << oo_order(oo));
@@ -1655,7 +1655,7 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
 
 	kmemcheck_free_shadow(page, compound_order(page));
 
-	mod_zone_page_state(page_zone(page),
+	mod_node_page_state(page_pgdat(page),
 		(s->flags & SLAB_RECLAIM_ACCOUNT) ?
 		NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
 		-pages);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 7d3c6c59897c..9e95fafc026b 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3874,7 +3874,7 @@ int node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned int order)
 	 * unmapped file backed pages.
 	 */
 	if (node_pagecache_reclaimable(pgdat) <= pgdat->min_unmapped_pages &&
-	    sum_zone_node_page_state(pgdat->node_id, NR_SLAB_RECLAIMABLE) <= pgdat->min_slab_pages)
+	    node_page_state(pgdat, NR_SLAB_RECLAIMABLE) <= pgdat->min_slab_pages)
 		return NODE_RECLAIM_FULL;
 
 	/*
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 46281825c710..744ceaeb42a0 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -928,8 +928,6 @@ const char * const vmstat_text[] = {
 	"nr_zone_unevictable",
 	"nr_zone_write_pending",
 	"nr_mlock",
-	"nr_slab_reclaimable",
-	"nr_slab_unreclaimable",
 	"nr_page_table_pages",
 	"nr_kernel_stack",
 	"nr_bounce",
@@ -952,6 +950,8 @@ const char * const vmstat_text[] = {
 	"nr_inactive_file",
 	"nr_active_file",
 	"nr_unevictable",
+	"nr_slab_reclaimable",
+	"nr_slab_unreclaimable",
 	"nr_isolated_anon",
 	"nr_isolated_file",
 	"workingset_refault",
-- 
cgit v1.2.3


From 320492961c1cf21da5547b00c23e525851c1d16f Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Thu, 6 Jul 2017 15:40:46 -0700
Subject: mm: memcontrol: use the node-native slab memory counters

Now that the slab counters are moved from the zone to the node level we
can drop the private memcg node stats and use the official ones.

Link: http://lkml.kernel.org/r/20170530181724.27197-4-hannes@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 2 --
 mm/memcontrol.c            | 8 ++++----
 mm/slab.h                  | 4 ++--
 3 files changed, 6 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 72d0853beb31..fa506ae61d66 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -44,8 +44,6 @@ enum memcg_stat_item {
 	MEMCG_SOCK,
 	/* XXX: why are these zone and not node counters? */
 	MEMCG_KERNEL_STACK_KB,
-	MEMCG_SLAB_RECLAIMABLE,
-	MEMCG_SLAB_UNRECLAIMABLE,
 	MEMCG_NR_STAT,
 };
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 4f686fc1c5fa..dceb0deb8d5e 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5198,8 +5198,8 @@ static int memory_stat_show(struct seq_file *m, void *v)
 	seq_printf(m, "kernel_stack %llu\n",
 		   (u64)stat[MEMCG_KERNEL_STACK_KB] * 1024);
 	seq_printf(m, "slab %llu\n",
-		   (u64)(stat[MEMCG_SLAB_RECLAIMABLE] +
-			 stat[MEMCG_SLAB_UNRECLAIMABLE]) * PAGE_SIZE);
+		   (u64)(stat[NR_SLAB_RECLAIMABLE] +
+			 stat[NR_SLAB_UNRECLAIMABLE]) * PAGE_SIZE);
 	seq_printf(m, "sock %llu\n",
 		   (u64)stat[MEMCG_SOCK] * PAGE_SIZE);
 
@@ -5223,9 +5223,9 @@ static int memory_stat_show(struct seq_file *m, void *v)
 	}
 
 	seq_printf(m, "slab_reclaimable %llu\n",
-		   (u64)stat[MEMCG_SLAB_RECLAIMABLE] * PAGE_SIZE);
+		   (u64)stat[NR_SLAB_RECLAIMABLE] * PAGE_SIZE);
 	seq_printf(m, "slab_unreclaimable %llu\n",
-		   (u64)stat[MEMCG_SLAB_UNRECLAIMABLE] * PAGE_SIZE);
+		   (u64)stat[NR_SLAB_UNRECLAIMABLE] * PAGE_SIZE);
 
 	/* Accumulated memory events */
 
diff --git a/mm/slab.h b/mm/slab.h
index 9cfcf099709c..69f0579cb5aa 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -287,7 +287,7 @@ static __always_inline int memcg_charge_slab(struct page *page,
 
 	memcg_kmem_update_page_stat(page,
 			(s->flags & SLAB_RECLAIM_ACCOUNT) ?
-			MEMCG_SLAB_RECLAIMABLE : MEMCG_SLAB_UNRECLAIMABLE,
+			NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
 			1 << order);
 	return 0;
 }
@@ -300,7 +300,7 @@ static __always_inline void memcg_uncharge_slab(struct page *page, int order,
 
 	memcg_kmem_update_page_stat(page,
 			(s->flags & SLAB_RECLAIM_ACCOUNT) ?
-			MEMCG_SLAB_RECLAIMABLE : MEMCG_SLAB_UNRECLAIMABLE,
+			NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
 			-(1 << order));
 	memcg_kmem_uncharge(page, order);
 }
-- 
cgit v1.2.3


From ed52be7bfd45533b194b429f43361493d24599a7 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Thu, 6 Jul 2017 15:40:49 -0700
Subject: mm: memcontrol: use generic mod_memcg_page_state for kmem pages

The kmem-specific functions do the same thing.  Switch and drop.

Link: http://lkml.kernel.org/r/20170530181724.27197-5-hannes@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 17 -----------------
 kernel/fork.c              |  8 ++++----
 mm/slab.h                  | 16 ++++++++--------
 3 files changed, 12 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index fa506ae61d66..5a72d8377942 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -929,19 +929,6 @@ static inline int memcg_cache_id(struct mem_cgroup *memcg)
 	return memcg ? memcg->kmemcg_id : -1;
 }
 
-/**
- * memcg_kmem_update_page_stat - update kmem page state statistics
- * @page: the page
- * @idx: page state item to account
- * @val: number of pages (positive or negative)
- */
-static inline void memcg_kmem_update_page_stat(struct page *page,
-				enum memcg_stat_item idx, int val)
-{
-	if (memcg_kmem_enabled() && page->mem_cgroup)
-		this_cpu_add(page->mem_cgroup->stat->count[idx], val);
-}
-
 #else
 #define for_each_memcg_cache_index(_idx)	\
 	for (; NULL; )
@@ -964,10 +951,6 @@ static inline void memcg_put_cache_ids(void)
 {
 }
 
-static inline void memcg_kmem_update_page_stat(struct page *page,
-				enum memcg_stat_item idx, int val)
-{
-}
 #endif /* CONFIG_MEMCG && !CONFIG_SLOB */
 
 #endif /* _LINUX_MEMCONTROL_H */
diff --git a/kernel/fork.c b/kernel/fork.c
index e53770d2bf95..aa01b810c0bd 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -326,8 +326,8 @@ static void account_kernel_stack(struct task_struct *tsk, int account)
 		}
 
 		/* All stack pages belong to the same memcg. */
-		memcg_kmem_update_page_stat(vm->pages[0], MEMCG_KERNEL_STACK_KB,
-					    account * (THREAD_SIZE / 1024));
+		mod_memcg_page_state(vm->pages[0], MEMCG_KERNEL_STACK_KB,
+				     account * (THREAD_SIZE / 1024));
 	} else {
 		/*
 		 * All stack pages are in the same zone and belong to the
@@ -338,8 +338,8 @@ static void account_kernel_stack(struct task_struct *tsk, int account)
 		mod_zone_page_state(page_zone(first_page), NR_KERNEL_STACK_KB,
 				    THREAD_SIZE / 1024 * account);
 
-		memcg_kmem_update_page_stat(first_page, MEMCG_KERNEL_STACK_KB,
-					    account * (THREAD_SIZE / 1024));
+		mod_memcg_page_state(first_page, MEMCG_KERNEL_STACK_KB,
+				     account * (THREAD_SIZE / 1024));
 	}
 }
 
diff --git a/mm/slab.h b/mm/slab.h
index 69f0579cb5aa..7b84e3839dfe 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -285,10 +285,10 @@ static __always_inline int memcg_charge_slab(struct page *page,
 	if (ret)
 		return ret;
 
-	memcg_kmem_update_page_stat(page,
-			(s->flags & SLAB_RECLAIM_ACCOUNT) ?
-			NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
-			1 << order);
+	mod_memcg_page_state(page,
+			     (s->flags & SLAB_RECLAIM_ACCOUNT) ?
+			     NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
+			     1 << order);
 	return 0;
 }
 
@@ -298,10 +298,10 @@ static __always_inline void memcg_uncharge_slab(struct page *page, int order,
 	if (!memcg_kmem_enabled())
 		return;
 
-	memcg_kmem_update_page_stat(page,
-			(s->flags & SLAB_RECLAIM_ACCOUNT) ?
-			NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
-			-(1 << order));
+	mod_memcg_page_state(page,
+			     (s->flags & SLAB_RECLAIM_ACCOUNT) ?
+			     NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
+			     -(1 << order));
 	memcg_kmem_uncharge(page, order);
 }
 
-- 
cgit v1.2.3


From 00f3ca2c2d6635d85108571c4dd9a29088668662 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Thu, 6 Jul 2017 15:40:52 -0700
Subject: mm: memcontrol: per-lruvec stats infrastructure

lruvecs are at the intersection of the NUMA node and memcg, which is the
scope for most paging activity.

Introduce a convenient accounting infrastructure that maintains
statistics per node, per memcg, and the lruvec itself.

Then convert over accounting sites for statistics that are already
tracked in both nodes and memcgs and can be easily switched.

[hannes@cmpxchg.org: fix crash in the new cgroup stat keeping code]
  Link: http://lkml.kernel.org/r/20170531171450.GA10481@cmpxchg.org
[hannes@cmpxchg.org: don't track uncharged pages at all
  Link: http://lkml.kernel.org/r/20170605175254.GA8547@cmpxchg.org
[hannes@cmpxchg.org: add missing free_percpu()]
  Link: http://lkml.kernel.org/r/20170605175354.GB8547@cmpxchg.org
[linux@roeck-us.net: hexagon: fix build error caused by include file order]
  Link: http://lkml.kernel.org/r/20170617153721.GA4382@roeck-us.net
Link: http://lkml.kernel.org/r/20170530181724.27197-6-hannes@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Acked-by: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/hexagon/include/asm/pgtable.h |   1 -
 arch/hexagon/kernel/asm-offsets.c  |   1 -
 arch/hexagon/mm/vm_tlb.c           |   1 +
 include/linux/memcontrol.h         | 246 ++++++++++++++++++++++++++++++++-----
 include/linux/vmstat.h             |   1 -
 mm/memcontrol.c                    |  11 +-
 mm/page-writeback.c                |  15 +--
 mm/rmap.c                          |   8 +-
 mm/workingset.c                    |   9 +-
 9 files changed, 238 insertions(+), 55 deletions(-)

(limited to 'include/linux')

diff --git a/arch/hexagon/include/asm/pgtable.h b/arch/hexagon/include/asm/pgtable.h
index 24a9177fb897..aef02f7ca8aa 100644
--- a/arch/hexagon/include/asm/pgtable.h
+++ b/arch/hexagon/include/asm/pgtable.h
@@ -24,7 +24,6 @@
 /*
  * Page table definitions for Qualcomm Hexagon processor.
  */
-#include <linux/swap.h>
 #include <asm/page.h>
 #define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
diff --git a/arch/hexagon/kernel/asm-offsets.c b/arch/hexagon/kernel/asm-offsets.c
index 308be68d4fb3..3980c0407aa1 100644
--- a/arch/hexagon/kernel/asm-offsets.c
+++ b/arch/hexagon/kernel/asm-offsets.c
@@ -25,7 +25,6 @@
 #include <linux/compat.h>
 #include <linux/types.h>
 #include <linux/sched.h>
-#include <linux/mm.h>
 #include <linux/interrupt.h>
 #include <linux/kbuild.h>
 #include <asm/ptrace.h>
diff --git a/arch/hexagon/mm/vm_tlb.c b/arch/hexagon/mm/vm_tlb.c
index 9647d00cb761..b474065533ce 100644
--- a/arch/hexagon/mm/vm_tlb.c
+++ b/arch/hexagon/mm/vm_tlb.c
@@ -24,6 +24,7 @@
  * be instantiated for it, differently from a native build.
  */
 #include <linux/mm.h>
+#include <linux/sched.h>
 #include <asm/page.h>
 #include <asm/hexagon_vm.h>
 
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 5a72d8377942..3914e3dd6168 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -26,7 +26,8 @@
 #include <linux/page_counter.h>
 #include <linux/vmpressure.h>
 #include <linux/eventfd.h>
-#include <linux/mmzone.h>
+#include <linux/mm.h>
+#include <linux/vmstat.h>
 #include <linux/writeback.h>
 #include <linux/page-flags.h>
 
@@ -98,11 +99,16 @@ struct mem_cgroup_reclaim_iter {
 	unsigned int generation;
 };
 
+struct lruvec_stat {
+	long count[NR_VM_NODE_STAT_ITEMS];
+};
+
 /*
  * per-zone information in memory controller.
  */
 struct mem_cgroup_per_node {
 	struct lruvec		lruvec;
+	struct lruvec_stat __percpu *lruvec_stat;
 	unsigned long		lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS];
 
 	struct mem_cgroup_reclaim_iter	iter[DEF_PRIORITY + 1];
@@ -496,23 +502,18 @@ static inline unsigned long memcg_page_state(struct mem_cgroup *memcg,
 	return val;
 }
 
-static inline void mod_memcg_state(struct mem_cgroup *memcg,
-				   enum memcg_stat_item idx, int val)
+static inline void __mod_memcg_state(struct mem_cgroup *memcg,
+				     enum memcg_stat_item idx, int val)
 {
 	if (!mem_cgroup_disabled())
-		this_cpu_add(memcg->stat->count[idx], val);
-}
-
-static inline void inc_memcg_state(struct mem_cgroup *memcg,
-				   enum memcg_stat_item idx)
-{
-	mod_memcg_state(memcg, idx, 1);
+		__this_cpu_add(memcg->stat->count[idx], val);
 }
 
-static inline void dec_memcg_state(struct mem_cgroup *memcg,
-				   enum memcg_stat_item idx)
+static inline void mod_memcg_state(struct mem_cgroup *memcg,
+				   enum memcg_stat_item idx, int val)
 {
-	mod_memcg_state(memcg, idx, -1);
+	if (!mem_cgroup_disabled())
+		this_cpu_add(memcg->stat->count[idx], val);
 }
 
 /**
@@ -532,6 +533,13 @@ static inline void dec_memcg_state(struct mem_cgroup *memcg,
  *
  * Kernel pages are an exception to this, since they'll never move.
  */
+static inline void __mod_memcg_page_state(struct page *page,
+					  enum memcg_stat_item idx, int val)
+{
+	if (page->mem_cgroup)
+		__mod_memcg_state(page->mem_cgroup, idx, val);
+}
+
 static inline void mod_memcg_page_state(struct page *page,
 					enum memcg_stat_item idx, int val)
 {
@@ -539,16 +547,76 @@ static inline void mod_memcg_page_state(struct page *page,
 		mod_memcg_state(page->mem_cgroup, idx, val);
 }
 
-static inline void inc_memcg_page_state(struct page *page,
-					enum memcg_stat_item idx)
+static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
+					      enum node_stat_item idx)
 {
-	mod_memcg_page_state(page, idx, 1);
+	struct mem_cgroup_per_node *pn;
+	long val = 0;
+	int cpu;
+
+	if (mem_cgroup_disabled())
+		return node_page_state(lruvec_pgdat(lruvec), idx);
+
+	pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
+	for_each_possible_cpu(cpu)
+		val += per_cpu(pn->lruvec_stat->count[idx], cpu);
+
+	if (val < 0)
+		val = 0;
+
+	return val;
 }
 
-static inline void dec_memcg_page_state(struct page *page,
-					enum memcg_stat_item idx)
+static inline void __mod_lruvec_state(struct lruvec *lruvec,
+				      enum node_stat_item idx, int val)
 {
-	mod_memcg_page_state(page, idx, -1);
+	struct mem_cgroup_per_node *pn;
+
+	__mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
+	if (mem_cgroup_disabled())
+		return;
+	pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
+	__mod_memcg_state(pn->memcg, idx, val);
+	__this_cpu_add(pn->lruvec_stat->count[idx], val);
+}
+
+static inline void mod_lruvec_state(struct lruvec *lruvec,
+				    enum node_stat_item idx, int val)
+{
+	struct mem_cgroup_per_node *pn;
+
+	mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
+	if (mem_cgroup_disabled())
+		return;
+	pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
+	mod_memcg_state(pn->memcg, idx, val);
+	this_cpu_add(pn->lruvec_stat->count[idx], val);
+}
+
+static inline void __mod_lruvec_page_state(struct page *page,
+					   enum node_stat_item idx, int val)
+{
+	struct mem_cgroup_per_node *pn;
+
+	__mod_node_page_state(page_pgdat(page), idx, val);
+	if (mem_cgroup_disabled() || !page->mem_cgroup)
+		return;
+	__mod_memcg_state(page->mem_cgroup, idx, val);
+	pn = page->mem_cgroup->nodeinfo[page_to_nid(page)];
+	__this_cpu_add(pn->lruvec_stat->count[idx], val);
+}
+
+static inline void mod_lruvec_page_state(struct page *page,
+					 enum node_stat_item idx, int val)
+{
+	struct mem_cgroup_per_node *pn;
+
+	mod_node_page_state(page_pgdat(page), idx, val);
+	if (mem_cgroup_disabled() || !page->mem_cgroup)
+		return;
+	mod_memcg_state(page->mem_cgroup, idx, val);
+	pn = page->mem_cgroup->nodeinfo[page_to_nid(page)];
+	this_cpu_add(pn->lruvec_stat->count[idx], val);
 }
 
 unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
@@ -777,19 +845,21 @@ static inline unsigned long memcg_page_state(struct mem_cgroup *memcg,
 	return 0;
 }
 
-static inline void mod_memcg_state(struct mem_cgroup *memcg,
-				   enum memcg_stat_item idx,
-				   int nr)
+static inline void __mod_memcg_state(struct mem_cgroup *memcg,
+				     enum memcg_stat_item idx,
+				     int nr)
 {
 }
 
-static inline void inc_memcg_state(struct mem_cgroup *memcg,
-				   enum memcg_stat_item idx)
+static inline void mod_memcg_state(struct mem_cgroup *memcg,
+				   enum memcg_stat_item idx,
+				   int nr)
 {
 }
 
-static inline void dec_memcg_state(struct mem_cgroup *memcg,
-				   enum memcg_stat_item idx)
+static inline void __mod_memcg_page_state(struct page *page,
+					  enum memcg_stat_item idx,
+					  int nr)
 {
 }
 
@@ -799,14 +869,34 @@ static inline void mod_memcg_page_state(struct page *page,
 {
 }
 
-static inline void inc_memcg_page_state(struct page *page,
-					enum memcg_stat_item idx)
+static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
+					      enum node_stat_item idx)
 {
+	return node_page_state(lruvec_pgdat(lruvec), idx);
 }
 
-static inline void dec_memcg_page_state(struct page *page,
-					enum memcg_stat_item idx)
+static inline void __mod_lruvec_state(struct lruvec *lruvec,
+				      enum node_stat_item idx, int val)
 {
+	__mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
+}
+
+static inline void mod_lruvec_state(struct lruvec *lruvec,
+				    enum node_stat_item idx, int val)
+{
+	mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
+}
+
+static inline void __mod_lruvec_page_state(struct page *page,
+					   enum node_stat_item idx, int val)
+{
+	__mod_node_page_state(page_pgdat(page), idx, val);
+}
+
+static inline void mod_lruvec_page_state(struct page *page,
+					 enum node_stat_item idx, int val)
+{
+	mod_node_page_state(page_pgdat(page), idx, val);
 }
 
 static inline
@@ -838,6 +928,102 @@ void count_memcg_event_mm(struct mm_struct *mm, enum vm_event_item idx)
 }
 #endif /* CONFIG_MEMCG */
 
+static inline void __inc_memcg_state(struct mem_cgroup *memcg,
+				     enum memcg_stat_item idx)
+{
+	__mod_memcg_state(memcg, idx, 1);
+}
+
+static inline void __dec_memcg_state(struct mem_cgroup *memcg,
+				     enum memcg_stat_item idx)
+{
+	__mod_memcg_state(memcg, idx, -1);
+}
+
+static inline void __inc_memcg_page_state(struct page *page,
+					  enum memcg_stat_item idx)
+{
+	__mod_memcg_page_state(page, idx, 1);
+}
+
+static inline void __dec_memcg_page_state(struct page *page,
+					  enum memcg_stat_item idx)
+{
+	__mod_memcg_page_state(page, idx, -1);
+}
+
+static inline void __inc_lruvec_state(struct lruvec *lruvec,
+				      enum node_stat_item idx)
+{
+	__mod_lruvec_state(lruvec, idx, 1);
+}
+
+static inline void __dec_lruvec_state(struct lruvec *lruvec,
+				      enum node_stat_item idx)
+{
+	__mod_lruvec_state(lruvec, idx, -1);
+}
+
+static inline void __inc_lruvec_page_state(struct page *page,
+					   enum node_stat_item idx)
+{
+	__mod_lruvec_page_state(page, idx, 1);
+}
+
+static inline void __dec_lruvec_page_state(struct page *page,
+					   enum node_stat_item idx)
+{
+	__mod_lruvec_page_state(page, idx, -1);
+}
+
+static inline void inc_memcg_state(struct mem_cgroup *memcg,
+				   enum memcg_stat_item idx)
+{
+	mod_memcg_state(memcg, idx, 1);
+}
+
+static inline void dec_memcg_state(struct mem_cgroup *memcg,
+				   enum memcg_stat_item idx)
+{
+	mod_memcg_state(memcg, idx, -1);
+}
+
+static inline void inc_memcg_page_state(struct page *page,
+					enum memcg_stat_item idx)
+{
+	mod_memcg_page_state(page, idx, 1);
+}
+
+static inline void dec_memcg_page_state(struct page *page,
+					enum memcg_stat_item idx)
+{
+	mod_memcg_page_state(page, idx, -1);
+}
+
+static inline void inc_lruvec_state(struct lruvec *lruvec,
+				    enum node_stat_item idx)
+{
+	mod_lruvec_state(lruvec, idx, 1);
+}
+
+static inline void dec_lruvec_state(struct lruvec *lruvec,
+				    enum node_stat_item idx)
+{
+	mod_lruvec_state(lruvec, idx, -1);
+}
+
+static inline void inc_lruvec_page_state(struct page *page,
+					 enum node_stat_item idx)
+{
+	mod_lruvec_page_state(page, idx, 1);
+}
+
+static inline void dec_lruvec_page_state(struct page *page,
+					 enum node_stat_item idx)
+{
+	mod_lruvec_page_state(page, idx, -1);
+}
+
 #ifdef CONFIG_CGROUP_WRITEBACK
 
 struct list_head *mem_cgroup_cgwb_list(struct mem_cgroup *memcg);
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 613771909b6e..b3d85f30d424 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -3,7 +3,6 @@
 
 #include <linux/types.h>
 #include <linux/percpu.h>
-#include <linux/mm.h>
 #include <linux/mmzone.h>
 #include <linux/vm_event_item.h>
 #include <linux/atomic.h>
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index dceb0deb8d5e..425aa0caa712 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4122,6 +4122,12 @@ static int alloc_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
 	if (!pn)
 		return 1;
 
+	pn->lruvec_stat = alloc_percpu(struct lruvec_stat);
+	if (!pn->lruvec_stat) {
+		kfree(pn);
+		return 1;
+	}
+
 	lruvec_init(&pn->lruvec);
 	pn->usage_in_excess = 0;
 	pn->on_tree = false;
@@ -4133,7 +4139,10 @@ static int alloc_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
 
 static void free_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
 {
-	kfree(memcg->nodeinfo[node]);
+	struct mem_cgroup_per_node *pn = memcg->nodeinfo[node];
+
+	free_percpu(pn->lruvec_stat);
+	kfree(pn);
 }
 
 static void __mem_cgroup_free(struct mem_cgroup *memcg)
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 143c1c25d680..8989eada0ef7 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2433,8 +2433,7 @@ void account_page_dirtied(struct page *page, struct address_space *mapping)
 		inode_attach_wb(inode, page);
 		wb = inode_to_wb(inode);
 
-		inc_memcg_page_state(page, NR_FILE_DIRTY);
-		__inc_node_page_state(page, NR_FILE_DIRTY);
+		__inc_lruvec_page_state(page, NR_FILE_DIRTY);
 		__inc_zone_page_state(page, NR_ZONE_WRITE_PENDING);
 		__inc_node_page_state(page, NR_DIRTIED);
 		__inc_wb_stat(wb, WB_RECLAIMABLE);
@@ -2455,8 +2454,7 @@ void account_page_cleaned(struct page *page, struct address_space *mapping,
 			  struct bdi_writeback *wb)
 {
 	if (mapping_cap_account_dirty(mapping)) {
-		dec_memcg_page_state(page, NR_FILE_DIRTY);
-		dec_node_page_state(page, NR_FILE_DIRTY);
+		dec_lruvec_page_state(page, NR_FILE_DIRTY);
 		dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
 		dec_wb_stat(wb, WB_RECLAIMABLE);
 		task_io_account_cancelled_write(PAGE_SIZE);
@@ -2712,8 +2710,7 @@ int clear_page_dirty_for_io(struct page *page)
 		 */
 		wb = unlocked_inode_to_wb_begin(inode, &locked);
 		if (TestClearPageDirty(page)) {
-			dec_memcg_page_state(page, NR_FILE_DIRTY);
-			dec_node_page_state(page, NR_FILE_DIRTY);
+			dec_lruvec_page_state(page, NR_FILE_DIRTY);
 			dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
 			dec_wb_stat(wb, WB_RECLAIMABLE);
 			ret = 1;
@@ -2759,8 +2756,7 @@ int test_clear_page_writeback(struct page *page)
 		ret = TestClearPageWriteback(page);
 	}
 	if (ret) {
-		dec_memcg_page_state(page, NR_WRITEBACK);
-		dec_node_page_state(page, NR_WRITEBACK);
+		dec_lruvec_page_state(page, NR_WRITEBACK);
 		dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
 		inc_node_page_state(page, NR_WRITTEN);
 	}
@@ -2814,8 +2810,7 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
 		ret = TestSetPageWriteback(page);
 	}
 	if (!ret) {
-		inc_memcg_page_state(page, NR_WRITEBACK);
-		inc_node_page_state(page, NR_WRITEBACK);
+		inc_lruvec_page_state(page, NR_WRITEBACK);
 		inc_zone_page_state(page, NR_ZONE_WRITE_PENDING);
 	}
 	unlock_page_memcg(page);
diff --git a/mm/rmap.c b/mm/rmap.c
index b255743351e5..ced14f1af6dc 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1145,8 +1145,7 @@ void page_add_file_rmap(struct page *page, bool compound)
 		if (!atomic_inc_and_test(&page->_mapcount))
 			goto out;
 	}
-	__mod_node_page_state(page_pgdat(page), NR_FILE_MAPPED, nr);
-	mod_memcg_page_state(page, NR_FILE_MAPPED, nr);
+	__mod_lruvec_page_state(page, NR_FILE_MAPPED, nr);
 out:
 	unlock_page_memcg(page);
 }
@@ -1181,12 +1180,11 @@ static void page_remove_file_rmap(struct page *page, bool compound)
 	}
 
 	/*
-	 * We use the irq-unsafe __{inc|mod}_zone_page_state because
+	 * We use the irq-unsafe __{inc|mod}_lruvec_page_state because
 	 * these counters are not modified in interrupt context, and
 	 * pte lock(a spinlock) is held, which implies preemption disabled.
 	 */
-	__mod_node_page_state(page_pgdat(page), NR_FILE_MAPPED, -nr);
-	mod_memcg_page_state(page, NR_FILE_MAPPED, -nr);
+	__mod_lruvec_page_state(page, NR_FILE_MAPPED, -nr);
 
 	if (unlikely(PageMlocked(page)))
 		clear_page_mlock(page);
diff --git a/mm/workingset.c b/mm/workingset.c
index b8c9ab678479..7119cd745ace 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -288,12 +288,10 @@ bool workingset_refault(void *shadow)
 	 */
 	refault_distance = (refault - eviction) & EVICTION_MASK;
 
-	inc_node_state(pgdat, WORKINGSET_REFAULT);
-	inc_memcg_state(memcg, WORKINGSET_REFAULT);
+	inc_lruvec_state(lruvec, WORKINGSET_REFAULT);
 
 	if (refault_distance <= active_file) {
-		inc_node_state(pgdat, WORKINGSET_ACTIVATE);
-		inc_memcg_state(memcg, WORKINGSET_ACTIVATE);
+		inc_lruvec_state(lruvec, WORKINGSET_ACTIVATE);
 		rcu_read_unlock();
 		return true;
 	}
@@ -474,8 +472,7 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
 	}
 	if (WARN_ON_ONCE(node->exceptional))
 		goto out_invalid;
-	inc_node_state(page_pgdat(virt_to_page(node)), WORKINGSET_NODERECLAIM);
-	inc_memcg_page_state(virt_to_page(node), WORKINGSET_NODERECLAIM);
+	inc_lruvec_page_state(virt_to_page(node), WORKINGSET_NODERECLAIM);
 	__radix_tree_delete_node(&mapping->page_tree, node,
 				 workingset_update_node, mapping);
 
-- 
cgit v1.2.3


From f70029bbaacbfa8f082d2b4988717cba4e269f17 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Thu, 6 Jul 2017 15:41:02 -0700
Subject: mm, memory_hotplug: drop CONFIG_MOVABLE_NODE

Commit 20b2f52b73fe ("numa: add CONFIG_MOVABLE_NODE for
movable-dedicated node") has introduced CONFIG_MOVABLE_NODE without a
good explanation on why it is actually useful.

It makes a lot of sense to make movable node semantic opt in but we
already have that because the feature has to be explicitly enabled on
the kernel command line.  A config option on top only makes the
configuration space larger without a good reason.  It also adds an
additional ifdefery that pollutes the code.

Just drop the config option and make it de-facto always enabled.  This
shouldn't introduce any change to the semantic.

Link: http://lkml.kernel.org/r/20170529114141.536-3-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Acked-by: Reza Arbab <arbab@linux.vnet.ibm.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: Yasuaki Ishimatsu <yasu.isimatu@gmail.com>
Cc: Xishi Qiu <qiuxishi@huawei.com>
Cc: Kani Toshimitsu <toshi.kani@hpe.com>
Cc: Chen Yucong <slaoub@gmail.com>
Cc: Joonsoo Kim <js1304@gmail.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Daniel Kiper <daniel.kiper@oracle.com>
Cc: Igor Mammedov <imammedo@redhat.com>
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/admin-guide/kernel-parameters.txt |  7 +++++--
 drivers/base/node.c                             |  4 ----
 include/linux/memblock.h                        | 18 -----------------
 include/linux/nodemask.h                        |  4 ----
 mm/Kconfig                                      | 26 -------------------------
 mm/memblock.c                                   |  2 --
 mm/memory_hotplug.c                             |  4 ----
 mm/page_alloc.c                                 |  2 --
 8 files changed, 5 insertions(+), 62 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 34ae9663aefd..dd7abbea8188 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2303,8 +2303,11 @@
 			that the amount of memory usable for all allocations
 			is not too small.
 
-	movable_node	[KNL] Boot-time switch to enable the effects
-			of CONFIG_MOVABLE_NODE=y. See mm/Kconfig for details.
+	movable_node	[KNL] Boot-time switch to make hotplugable memory
+			NUMA nodes to be movable. This means that the memory
+			of such nodes will be usable only for movable
+			allocations which rules out almost all kernel
+			allocations. Use with caution!
 
 	MTD_Partition=	[MTD]
 			Format: <name>,<region-number>,<size>,<offset>
diff --git a/drivers/base/node.c b/drivers/base/node.c
index 6b1ee371ee97..73d39bc58c42 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -639,9 +639,7 @@ static struct node_attr node_state_attr[] = {
 #ifdef CONFIG_HIGHMEM
 	[N_HIGH_MEMORY] = _NODE_ATTR(has_high_memory, N_HIGH_MEMORY),
 #endif
-#ifdef CONFIG_MOVABLE_NODE
 	[N_MEMORY] = _NODE_ATTR(has_memory, N_MEMORY),
-#endif
 	[N_CPU] = _NODE_ATTR(has_cpu, N_CPU),
 };
 
@@ -652,9 +650,7 @@ static struct attribute *node_state_attrs[] = {
 #ifdef CONFIG_HIGHMEM
 	&node_state_attr[N_HIGH_MEMORY].attr.attr,
 #endif
-#ifdef CONFIG_MOVABLE_NODE
 	&node_state_attr[N_MEMORY].attr.attr,
-#endif
 	&node_state_attr[N_CPU].attr.attr,
 	NULL
 };
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 8098695e5d8d..1199e605d676 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -57,10 +57,8 @@ struct memblock {
 
 extern struct memblock memblock;
 extern int memblock_debug;
-#ifdef CONFIG_MOVABLE_NODE
 /* If movable_node boot option specified */
 extern bool movable_node_enabled;
-#endif /* CONFIG_MOVABLE_NODE */
 
 #ifdef CONFIG_ARCH_DISCARD_MEMBLOCK
 #define __init_memblock __meminit
@@ -169,7 +167,6 @@ void __next_reserved_mem_region(u64 *idx, phys_addr_t *out_start,
 	     i != (u64)ULLONG_MAX;					\
 	     __next_reserved_mem_region(&i, p_start, p_end))
 
-#ifdef CONFIG_MOVABLE_NODE
 static inline bool memblock_is_hotpluggable(struct memblock_region *m)
 {
 	return m->flags & MEMBLOCK_HOTPLUG;
@@ -179,16 +176,6 @@ static inline bool __init_memblock movable_node_is_enabled(void)
 {
 	return movable_node_enabled;
 }
-#else
-static inline bool memblock_is_hotpluggable(struct memblock_region *m)
-{
-	return false;
-}
-static inline bool movable_node_is_enabled(void)
-{
-	return false;
-}
-#endif
 
 static inline bool memblock_is_mirror(struct memblock_region *m)
 {
@@ -296,7 +283,6 @@ phys_addr_t memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid)
 
 phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align);
 
-#ifdef CONFIG_MOVABLE_NODE
 /*
  * Set the allocation direction to bottom-up or top-down.
  */
@@ -314,10 +300,6 @@ static inline bool memblock_bottom_up(void)
 {
 	return memblock.bottom_up;
 }
-#else
-static inline void __init memblock_set_bottom_up(bool enable) {}
-static inline bool memblock_bottom_up(void) { return false; }
-#endif
 
 /* Flags for memblock_alloc_base() amd __memblock_alloc_base() */
 #define MEMBLOCK_ALLOC_ANYWHERE	(~(phys_addr_t)0)
diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index f746e44d4046..cf0b91c3ec12 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -387,11 +387,7 @@ enum node_states {
 #else
 	N_HIGH_MEMORY = N_NORMAL_MEMORY,
 #endif
-#ifdef CONFIG_MOVABLE_NODE
 	N_MEMORY,		/* The node has memory(regular, high, movable) */
-#else
-	N_MEMORY = N_HIGH_MEMORY,
-#endif
 	N_CPU,		/* The node has one or more cpus */
 	NR_NODE_STATES
 };
diff --git a/mm/Kconfig b/mm/Kconfig
index 9870baafb096..857f6ef368d4 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -149,32 +149,6 @@ config NO_BOOTMEM
 config MEMORY_ISOLATION
 	bool
 
-config MOVABLE_NODE
-	bool "Enable to assign a node which has only movable memory"
-	depends on HAVE_MEMBLOCK
-	depends on NO_BOOTMEM
-	depends on X86_64 || OF_EARLY_FLATTREE || MEMORY_HOTPLUG
-	depends on NUMA
-	default n
-	help
-	  Allow a node to have only movable memory.  Pages used by the kernel,
-	  such as direct mapping pages cannot be migrated.  So the corresponding
-	  memory device cannot be hotplugged.  This option allows the following
-	  two things:
-	  - When the system is booting, node full of hotpluggable memory can
-	  be arranged to have only movable memory so that the whole node can
-	  be hot-removed. (need movable_node boot option specified).
-	  - After the system is up, the option allows users to online all the
-	  memory of a node as movable memory so that the whole node can be
-	  hot-removed.
-
-	  Users who don't use the memory hotplug feature are fine with this
-	  option on since they don't specify movable_node boot option or they
-	  don't online memory as movable.
-
-	  Say Y here if you want to hotplug a whole node.
-	  Say N here if you want kernel to use memory on all nodes evenly.
-
 #
 # Only be set on architectures that have completely implemented memory hotplug
 # feature. If you are not sure, don't touch it.
diff --git a/mm/memblock.c b/mm/memblock.c
index 7b8a5db76a2f..41eaeebb03dc 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -54,9 +54,7 @@ struct memblock memblock __initdata_memblock = {
 };
 
 int memblock_debug __initdata_memblock;
-#ifdef CONFIG_MOVABLE_NODE
 bool movable_node_enabled __initdata_memblock = false;
-#endif
 static bool system_has_some_mirror __initdata_memblock = false;
 static int memblock_can_resize __initdata_memblock;
 static int memblock_memory_in_slab __initdata_memblock = 0;
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 937319899e61..0dc8cf0a59d7 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1572,11 +1572,7 @@ check_pages_isolated(unsigned long start_pfn, unsigned long end_pfn)
 
 static int __init cmdline_parse_movable_node(char *p)
 {
-#ifdef CONFIG_MOVABLE_NODE
 	movable_node_enabled = true;
-#else
-	pr_warn("movable_node option not supported\n");
-#endif
 	return 0;
 }
 early_param("movable_node", cmdline_parse_movable_node);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a35add8d7c0b..bd65b60939b6 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -113,9 +113,7 @@ nodemask_t node_states[NR_NODE_STATES] __read_mostly = {
 #ifdef CONFIG_HIGHMEM
 	[N_HIGH_MEMORY] = { { [0] = 1UL } },
 #endif
-#ifdef CONFIG_MOVABLE_NODE
 	[N_MEMORY] = { { [0] = 1UL } },
-#endif
 	[N_CPU] = { { [0] = 1UL } },
 #endif	/* NUMA */
 };
-- 
cgit v1.2.3


From 4932381ee2a77a21641009149722e1bb92bd99e2 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Thu, 6 Jul 2017 15:41:05 -0700
Subject: mm, memory_hotplug: move movable_node to the hotplug proper

movable_node_is_enabled is defined in memblock proper while it is
initialized from the memory hotplug proper.  This is quite messy and it
makes a dependency between the two so move movable_node along with the
helper functions to memory_hotplug.

To make it more entertaining the kernel parameter is ignored unless
CONFIG_HAVE_MEMBLOCK_NODE_MAP=y because we do not have the node
information for each memblock otherwise.  So let's warn when the option
is disabled.

Link: http://lkml.kernel.org/r/20170529114141.536-4-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Acked-by: Reza Arbab <arbab@linux.vnet.ibm.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: Yasuaki Ishimatsu <yasu.isimatu@gmail.com>
Cc: Xishi Qiu <qiuxishi@huawei.com>
Cc: Kani Toshimitsu <toshi.kani@hpe.com>
Cc: Chen Yucong <slaoub@gmail.com>
Cc: Joonsoo Kim <js1304@gmail.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Daniel Kiper <daniel.kiper@oracle.com>
Cc: Igor Mammedov <imammedo@redhat.com>
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memblock.h       |  7 -------
 include/linux/memory_hotplug.h | 10 ++++++++++
 mm/memblock.c                  |  1 -
 mm/memory_hotplug.c            |  6 ++++++
 4 files changed, 16 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 1199e605d676..77d427974f57 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -57,8 +57,6 @@ struct memblock {
 
 extern struct memblock memblock;
 extern int memblock_debug;
-/* If movable_node boot option specified */
-extern bool movable_node_enabled;
 
 #ifdef CONFIG_ARCH_DISCARD_MEMBLOCK
 #define __init_memblock __meminit
@@ -172,11 +170,6 @@ static inline bool memblock_is_hotpluggable(struct memblock_region *m)
 	return m->flags & MEMBLOCK_HOTPLUG;
 }
 
-static inline bool __init_memblock movable_node_is_enabled(void)
-{
-	return movable_node_enabled;
-}
-
 static inline bool memblock_is_mirror(struct memblock_region *m)
 {
 	return m->flags & MEMBLOCK_MIRROR;
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index ed167541e4fc..c8a5056a5ae0 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -115,6 +115,12 @@ extern void __online_page_free(struct page *page);
 extern int try_online_node(int nid);
 
 extern bool memhp_auto_online;
+/* If movable_node boot option specified */
+extern bool movable_node_enabled;
+static inline bool movable_node_is_enabled(void)
+{
+	return movable_node_enabled;
+}
 
 #ifdef CONFIG_MEMORY_HOTREMOVE
 extern bool is_pageblock_removable_nolock(struct page *page);
@@ -266,6 +272,10 @@ static inline void put_online_mems(void) {}
 static inline void mem_hotplug_begin(void) {}
 static inline void mem_hotplug_done(void) {}
 
+static inline bool movable_node_is_enabled(void)
+{
+	return false;
+}
 #endif /* ! CONFIG_MEMORY_HOTPLUG */
 
 #ifdef CONFIG_MEMORY_HOTREMOVE
diff --git a/mm/memblock.c b/mm/memblock.c
index 41eaeebb03dc..2cb25fe4452c 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -54,7 +54,6 @@ struct memblock memblock __initdata_memblock = {
 };
 
 int memblock_debug __initdata_memblock;
-bool movable_node_enabled __initdata_memblock = false;
 static bool system_has_some_mirror __initdata_memblock = false;
 static int memblock_can_resize __initdata_memblock;
 static int memblock_memory_in_slab __initdata_memblock = 0;
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 0dc8cf0a59d7..f79aac7a12b5 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -79,6 +79,8 @@ static struct {
 #define memhp_lock_acquire()      lock_map_acquire(&mem_hotplug.dep_map)
 #define memhp_lock_release()      lock_map_release(&mem_hotplug.dep_map)
 
+bool movable_node_enabled = false;
+
 #ifndef CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE
 bool memhp_auto_online;
 #else
@@ -1572,7 +1574,11 @@ check_pages_isolated(unsigned long start_pfn, unsigned long end_pfn)
 
 static int __init cmdline_parse_movable_node(char *p)
 {
+#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
 	movable_node_enabled = true;
+#else
+	pr_warn("movable_node parameter depends on CONFIG_HAVE_MEMBLOCK_NODE_MAP to work properly\n");
+#endif
 	return 0;
 }
 early_param("movable_node", cmdline_parse_movable_node);
-- 
cgit v1.2.3


From f77af15165847406b15d8f70c382c4cb15846b2a Mon Sep 17 00:00:00 2001
From: Josh Zimmerman <joshz@google.com>
Date: Sun, 25 Jun 2017 14:53:23 -0700
Subject: Add "shutdown" to "struct class".

The TPM class has some common shutdown code that must be executed for
all drivers. This adds some needed functionality for that.

Signed-off-by: Josh Zimmerman <joshz@google.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: stable@vger.kernel.org
Fixes: 74d6b3ceaa17 ("tpm: fix suspend/resume paths for TPM 2.0")
Reviewed-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Tested-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: James Morris <james.l.morris@oracle.com>
---
 drivers/base/core.c    | 6 +++++-
 include/linux/device.h | 2 ++
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 8dde934f8d15..755451f684bc 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -2664,7 +2664,11 @@ void device_shutdown(void)
 		pm_runtime_get_noresume(dev);
 		pm_runtime_barrier(dev);
 
-		if (dev->bus && dev->bus->shutdown) {
+		if (dev->class && dev->class->shutdown) {
+			if (initcall_debug)
+				dev_info(dev, "shutdown\n");
+			dev->class->shutdown(dev);
+		} else if (dev->bus && dev->bus->shutdown) {
 			if (initcall_debug)
 				dev_info(dev, "shutdown\n");
 			dev->bus->shutdown(dev);
diff --git a/include/linux/device.h b/include/linux/device.h
index 6baa1238f158..723cd54b94da 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -375,6 +375,7 @@ int subsys_virtual_register(struct bus_type *subsys,
  * @suspend:	Used to put the device to sleep mode, usually to a low power
  *		state.
  * @resume:	Used to bring the device from the sleep mode.
+ * @shutdown:	Called at shut-down time to quiesce the device.
  * @ns_type:	Callbacks so sysfs can detemine namespaces.
  * @namespace:	Namespace of the device belongs to this class.
  * @pm:		The default device power management operations of this class.
@@ -403,6 +404,7 @@ struct class {
 
 	int (*suspend)(struct device *dev, pm_message_t state);
 	int (*resume)(struct device *dev);
+	int (*shutdown)(struct device *dev);
 
 	const struct kobj_ns_type_operations *ns_type;
 	const void *(*namespace)(struct device *dev);
-- 
cgit v1.2.3


From 0e4524a5d341e719e8ee9ee7db5d58e2c5a4c10e Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Thu, 6 Jul 2017 14:44:28 +0200
Subject: KVM: mark vcpu->pid pointer as rcu protected

We do use rcu to protect the pid pointer. Mark it as such and
adopt all code to use the proper access methods.

This was detected by sparse.
"virt/kvm/kvm_main.c:2248:15: error: incompatible types in comparison
expression (different address spaces)"

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h |  2 +-
 virt/kvm/kvm_main.c      | 15 +++++++++++----
 2 files changed, 12 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 0b50e7b35ed4..bcd37b855c66 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -234,7 +234,7 @@ struct kvm_vcpu {
 
 	int guest_fpu_loaded, guest_xcr0_loaded;
 	struct swait_queue_head wq;
-	struct pid *pid;
+	struct pid __rcu *pid;
 	int sigset_active;
 	sigset_t sigset;
 	struct kvm_vcpu_stat stat;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 19f0ecb9b93e..fc2d58312fd5 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -293,7 +293,12 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_init);
 
 void kvm_vcpu_uninit(struct kvm_vcpu *vcpu)
 {
-	put_pid(vcpu->pid);
+	/*
+	 * no need for rcu_read_lock as VCPU_RUN is the only place that
+	 * will change the vcpu->pid pointer and on uninit all file
+	 * descriptors are already gone.
+	 */
+	put_pid(rcu_dereference_protected(vcpu->pid, 1));
 	kvm_arch_vcpu_uninit(vcpu);
 	free_page((unsigned long)vcpu->run);
 }
@@ -2551,13 +2556,14 @@ static long kvm_vcpu_ioctl(struct file *filp,
 	if (r)
 		return r;
 	switch (ioctl) {
-	case KVM_RUN:
+	case KVM_RUN: {
+		struct pid *oldpid;
 		r = -EINVAL;
 		if (arg)
 			goto out;
-		if (unlikely(vcpu->pid != current->pids[PIDTYPE_PID].pid)) {
+		oldpid = rcu_access_pointer(vcpu->pid);
+		if (unlikely(oldpid != current->pids[PIDTYPE_PID].pid)) {
 			/* The thread running this VCPU changed. */
-			struct pid *oldpid = vcpu->pid;
 			struct pid *newpid = get_task_pid(current, PIDTYPE_PID);
 
 			rcu_assign_pointer(vcpu->pid, newpid);
@@ -2568,6 +2574,7 @@ static long kvm_vcpu_ioctl(struct file *filp,
 		r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run);
 		trace_kvm_userspace_exit(vcpu->run->exit_reason, r);
 		break;
+	}
 	case KVM_GET_REGS: {
 		struct kvm_regs *kvm_regs;
 
-- 
cgit v1.2.3


From 3068a254d5519cd5116f61297462da6d1aa84c20 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Thu, 6 Jul 2017 11:42:00 +0200
Subject: rtc: introduce new registration method

Introduce rtc_register_device() to register an already allocated and
initialized struct rtc_device. It automatically sets up the owner and the
two steps allocation/registration will allow to remove race conditions in
the IRQ handling of some driver. It also allows to properly extend the core
without adding more arguments to rtc_device_register().

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/class.c | 84 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/rtc.h |  7 +++++
 2 files changed, 91 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c
index 93ce88e1b1cb..58e2a05765bb 100644
--- a/drivers/rtc/class.c
+++ b/drivers/rtc/class.c
@@ -150,6 +150,7 @@ static SIMPLE_DEV_PM_OPS(rtc_class_dev_pm_ops, rtc_suspend, rtc_resume);
 #define RTC_CLASS_DEV_PM_OPS	NULL
 #endif
 
+/* Ensure the caller will set the id before releasing the device */
 static struct rtc_device *rtc_allocate_device(void)
 {
 	struct rtc_device *rtc;
@@ -372,6 +373,89 @@ void devm_rtc_device_unregister(struct device *dev, struct rtc_device *rtc)
 }
 EXPORT_SYMBOL_GPL(devm_rtc_device_unregister);
 
+static void devm_rtc_release_device(struct device *dev, void *res)
+{
+	struct rtc_device *rtc = *(struct rtc_device **)res;
+
+	if (rtc->registered)
+		rtc_device_unregister(rtc);
+	else
+		put_device(&rtc->dev);
+}
+
+struct rtc_device *devm_rtc_allocate_device(struct device *dev)
+{
+	struct rtc_device **ptr, *rtc;
+	int id, err;
+
+	id = rtc_device_get_id(dev);
+	if (id < 0)
+		return ERR_PTR(id);
+
+	ptr = devres_alloc(devm_rtc_release_device, sizeof(*ptr), GFP_KERNEL);
+	if (!ptr) {
+		err = -ENOMEM;
+		goto exit_ida;
+	}
+
+	rtc = rtc_allocate_device();
+	if (!rtc) {
+		err = -ENOMEM;
+		goto exit_devres;
+	}
+
+	*ptr = rtc;
+	devres_add(dev, ptr);
+
+	rtc->id = id;
+	rtc->dev.parent = dev;
+	dev_set_name(&rtc->dev, "rtc%d", id);
+
+	return rtc;
+
+exit_devres:
+	devres_free(ptr);
+exit_ida:
+	ida_simple_remove(&rtc_ida, id);
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(devm_rtc_allocate_device);
+
+int __rtc_register_device(struct module *owner, struct rtc_device *rtc)
+{
+	struct rtc_wkalrm alrm;
+	int err;
+
+	if (!rtc->ops)
+		return -EINVAL;
+
+	rtc->owner = owner;
+
+	/* Check to see if there is an ALARM already set in hw */
+	err = __rtc_read_alarm(rtc, &alrm);
+	if (!err && !rtc_valid_tm(&alrm.time))
+		rtc_initialize_alarm(rtc, &alrm);
+
+	rtc_dev_prepare(rtc);
+
+	err = cdev_device_add(&rtc->char_dev, &rtc->dev);
+	if (err)
+		dev_warn(rtc->dev.parent, "failed to add char device %d:%d\n",
+			 MAJOR(rtc->dev.devt), rtc->id);
+	else
+		dev_dbg(rtc->dev.parent, "char device (%d:%d)\n",
+			MAJOR(rtc->dev.devt), rtc->id);
+
+	rtc_proc_add_device(rtc);
+
+	rtc->registered = true;
+	dev_info(rtc->dev.parent, "registered as %s\n",
+		 dev_name(&rtc->dev));
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(__rtc_register_device);
+
 static int __init rtc_init(void)
 {
 	rtc_class = class_create(THIS_MODULE, "rtc");
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index d354f56e0cf5..8e4a5f44f59e 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -142,6 +142,8 @@ struct rtc_device {
 	/* Some hardware can't support UIE mode */
 	int uie_unsupported;
 
+	bool registered;
+
 #ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL
 	struct work_struct uie_task;
 	struct timer_list uie_timer;
@@ -163,6 +165,8 @@ extern struct rtc_device *devm_rtc_device_register(struct device *dev,
 					const char *name,
 					const struct rtc_class_ops *ops,
 					struct module *owner);
+struct rtc_device *devm_rtc_allocate_device(struct device *dev);
+int __rtc_register_device(struct module *owner, struct rtc_device *rtc);
 extern void rtc_device_unregister(struct rtc_device *rtc);
 extern void devm_rtc_device_unregister(struct device *dev,
 					struct rtc_device *rtc);
@@ -218,6 +222,9 @@ static inline bool is_leap_year(unsigned int year)
 	return (!(year % 4) && (year % 100)) || !(year % 400);
 }
 
+#define rtc_register_device(device) \
+	__rtc_register_device(THIS_MODULE, device)
+
 #ifdef CONFIG_RTC_HCTOSYS_DEVICE
 extern int rtc_hctosys_ret;
 #else
-- 
cgit v1.2.3


From 697e5a47aa12cdab6f2a8b284cc923cdf704eafc Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Thu, 6 Jul 2017 11:42:02 +0200
Subject: rtc: add generic nvmem support

Many RTCs have an on board non volatile storage. It can be battery backed
RAM or an EEPROM. Use the nvmem subsystem to export it to both userspace
and in-kernel consumers.

This stays compatible with the previous (non documented) ABI that was using
/sys/class/rtc/rtcx/device/nvram to export that memory. But will warn about
the deprecation.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 Documentation/rtc.txt  |   2 +
 drivers/rtc/Kconfig    |   8 ++++
 drivers/rtc/Makefile   |   1 +
 drivers/rtc/class.c    |   4 ++
 drivers/rtc/nvmem.c    | 113 +++++++++++++++++++++++++++++++++++++++++++++++++
 drivers/rtc/rtc-core.h |   8 ++++
 include/linux/rtc.h    |   7 +++
 7 files changed, 143 insertions(+)
 create mode 100644 drivers/rtc/nvmem.c

(limited to 'include/linux')

diff --git a/Documentation/rtc.txt b/Documentation/rtc.txt
index 47feb4414b7e..c0c977445fb9 100644
--- a/Documentation/rtc.txt
+++ b/Documentation/rtc.txt
@@ -164,6 +164,8 @@ offset		 The amount which the rtc clock has been adjusted in firmware.
 		 which are added to or removed from the rtc's base clock per
 		 billion ticks. A positive value makes a day pass more slowly,
 		 longer, and a negative value makes a day pass more quickly.
+*/nvmem		 The non volatile storage exported as a raw file, as described
+		 in Documentation/nvmem/nvmem.txt
 ================ ==============================================================
 
 IOCTL interface
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index d0e4b4a1c2a1..72419ac2c52a 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -77,6 +77,14 @@ config RTC_DEBUG
 	  Say yes here to enable debugging support in the RTC framework
 	  and individual RTC drivers.
 
+config RTC_NVMEM
+	bool "RTC non volatile storage support"
+	select NVMEM
+	default RTC_CLASS
+	help
+	  Say yes here to add support for the non volatile (often battery
+	  backed) storage present on RTCs.
+
 comment "RTC interfaces"
 
 config RTC_INTF_SYSFS
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index 4050fc8b9271..acd366b41c85 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -15,6 +15,7 @@ ifdef CONFIG_RTC_DRV_EFI
 rtc-core-y			+= rtc-efi-platform.o
 endif
 
+rtc-core-$(CONFIG_RTC_NVMEM)		+= nvmem.o
 rtc-core-$(CONFIG_RTC_INTF_DEV)		+= rtc-dev.o
 rtc-core-$(CONFIG_RTC_INTF_PROC)	+= rtc-proc.o
 rtc-core-$(CONFIG_RTC_INTF_SYSFS)	+= rtc-sysfs.o
diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c
index 58e2a05765bb..2ed970d61da1 100644
--- a/drivers/rtc/class.c
+++ b/drivers/rtc/class.c
@@ -290,6 +290,8 @@ EXPORT_SYMBOL_GPL(rtc_device_register);
  */
 void rtc_device_unregister(struct rtc_device *rtc)
 {
+	rtc_nvmem_unregister(rtc);
+
 	mutex_lock(&rtc->ops_lock);
 	/*
 	 * Remove innards of this RTC, then disable it, before
@@ -448,6 +450,8 @@ int __rtc_register_device(struct module *owner, struct rtc_device *rtc)
 
 	rtc_proc_add_device(rtc);
 
+	rtc_nvmem_register(rtc);
+
 	rtc->registered = true;
 	dev_info(rtc->dev.parent, "registered as %s\n",
 		 dev_name(&rtc->dev));
diff --git a/drivers/rtc/nvmem.c b/drivers/rtc/nvmem.c
new file mode 100644
index 000000000000..8567b4ed9ac6
--- /dev/null
+++ b/drivers/rtc/nvmem.c
@@ -0,0 +1,113 @@
+/*
+ * RTC subsystem, nvmem interface
+ *
+ * Copyright (C) 2017 Alexandre Belloni
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/err.h>
+#include <linux/types.h>
+#include <linux/nvmem-consumer.h>
+#include <linux/rtc.h>
+#include <linux/sysfs.h>
+
+#include "rtc-core.h"
+
+/*
+ * Deprecated ABI compatibility, this should be removed at some point
+ */
+
+static const char nvram_warning[] = "Deprecated ABI, please use nvmem";
+
+static ssize_t
+rtc_nvram_read(struct file *filp, struct kobject *kobj,
+	       struct bin_attribute *attr,
+	       char *buf, loff_t off, size_t count)
+{
+	struct rtc_device *rtc = attr->private;
+
+	dev_warn_once(kobj_to_dev(kobj), nvram_warning);
+
+	return nvmem_device_read(rtc->nvmem, off, count, buf);
+}
+
+static ssize_t
+rtc_nvram_write(struct file *filp, struct kobject *kobj,
+		struct bin_attribute *attr,
+		char *buf, loff_t off, size_t count)
+{
+	struct rtc_device *rtc = attr->private;
+
+	dev_warn_once(kobj_to_dev(kobj), nvram_warning);
+
+	return nvmem_device_write(rtc->nvmem, off, count, buf);
+}
+
+static int rtc_nvram_register(struct rtc_device *rtc)
+{
+	int err;
+
+	rtc->nvram = devm_kzalloc(rtc->dev.parent,
+				sizeof(struct bin_attribute),
+				GFP_KERNEL);
+	if (!rtc->nvram)
+		return -ENOMEM;
+
+	rtc->nvram->attr.name = "nvram";
+	rtc->nvram->attr.mode = 0644;
+	rtc->nvram->private = rtc;
+
+	sysfs_bin_attr_init(rtc->nvram);
+
+	rtc->nvram->read = rtc_nvram_read;
+	rtc->nvram->write = rtc_nvram_write;
+	rtc->nvram->size = rtc->nvmem_config->size;
+
+	err = sysfs_create_bin_file(&rtc->dev.parent->kobj,
+				    rtc->nvram);
+	if (err) {
+		devm_kfree(rtc->dev.parent, rtc->nvram);
+		rtc->nvram = NULL;
+	}
+
+	return err;
+}
+
+static void rtc_nvram_unregister(struct rtc_device *rtc)
+{
+	sysfs_remove_bin_file(&rtc->dev.parent->kobj, rtc->nvram);
+}
+
+/*
+ * New ABI, uses nvmem
+ */
+void rtc_nvmem_register(struct rtc_device *rtc)
+{
+	if (!rtc->nvmem_config)
+		return;
+
+	rtc->nvmem_config->dev = &rtc->dev;
+	rtc->nvmem_config->owner = rtc->owner;
+	rtc->nvmem = nvmem_register(rtc->nvmem_config);
+	if (IS_ERR_OR_NULL(rtc->nvmem))
+		return;
+
+	/* Register the old ABI */
+	if (rtc->nvram_old_abi)
+		rtc_nvram_register(rtc);
+}
+
+void rtc_nvmem_unregister(struct rtc_device *rtc)
+{
+	if (IS_ERR_OR_NULL(rtc->nvmem))
+		return;
+
+	/* unregister the old ABI */
+	if (rtc->nvram)
+		rtc_nvram_unregister(rtc);
+
+	nvmem_unregister(rtc->nvmem);
+}
diff --git a/drivers/rtc/rtc-core.h b/drivers/rtc/rtc-core.h
index 7a4ed2f7c7d7..ecab76a3207c 100644
--- a/drivers/rtc/rtc-core.h
+++ b/drivers/rtc/rtc-core.h
@@ -45,3 +45,11 @@ static inline const struct attribute_group **rtc_get_dev_attribute_groups(void)
 	return NULL;
 }
 #endif
+
+#ifdef CONFIG_RTC_NVMEM
+void rtc_nvmem_register(struct rtc_device *rtc);
+void rtc_nvmem_unregister(struct rtc_device *rtc);
+#else
+static inline void rtc_nvmem_register(struct rtc_device *rtc) {}
+static inline void rtc_nvmem_unregister(struct rtc_device *rtc) {}
+#endif
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index 8e4a5f44f59e..d53ecdc060cf 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -14,6 +14,7 @@
 
 #include <linux/types.h>
 #include <linux/interrupt.h>
+#include <linux/nvmem-provider.h>
 #include <uapi/linux/rtc.h>
 
 extern int rtc_month_days(unsigned int month, unsigned int year);
@@ -144,6 +145,12 @@ struct rtc_device {
 
 	bool registered;
 
+	struct nvmem_config *nvmem_config;
+	struct nvmem_device *nvmem;
+	/* Old ABI support */
+	bool nvram_old_abi;
+	struct bin_attribute *nvram;
+
 #ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL
 	struct work_struct uie_task;
 	struct timer_list uie_timer;
-- 
cgit v1.2.3


From 4a12f95177280a660bda99e81838919b1cc6a91a Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Fri, 7 Jul 2017 10:51:38 +0200
Subject: KVM: mark kvm->busses as rcu protected

mark kvm->busses as rcu protected and use the correct access
function everywhere.

found by sparse
virt/kvm/kvm_main.c:3490:15: error: incompatible types in comparison expression (different address spaces)
virt/kvm/kvm_main.c:3509:15: error: incompatible types in comparison expression (different address spaces)
virt/kvm/kvm_main.c:3561:15: error: incompatible types in comparison expression (different address spaces)
virt/kvm/kvm_main.c:3644:15: error: incompatible types in comparison expression (different address spaces)

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 include/linux/kvm_host.h |  8 +++++++-
 virt/kvm/eventfd.c       |  8 +++++---
 virt/kvm/kvm_main.c      | 17 ++++++++++-------
 3 files changed, 22 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index bcd37b855c66..6a164f9eb02c 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -404,7 +404,7 @@ struct kvm {
 	int last_boosted_vcpu;
 	struct list_head vm_list;
 	struct mutex lock;
-	struct kvm_io_bus *buses[KVM_NR_BUSES];
+	struct kvm_io_bus __rcu *buses[KVM_NR_BUSES];
 #ifdef CONFIG_HAVE_KVM_EVENTFD
 	struct {
 		spinlock_t        lock;
@@ -473,6 +473,12 @@ struct kvm {
 #define vcpu_err(vcpu, fmt, ...)					\
 	kvm_err("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__)
 
+static inline struct kvm_io_bus *kvm_get_bus(struct kvm *kvm, enum kvm_bus idx)
+{
+	return srcu_dereference_check(kvm->buses[idx], &kvm->srcu,
+				      lockdep_is_held(&kvm->slots_lock));
+}
+
 static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
 {
 	/* Pairs with smp_wmb() in kvm_vm_ioctl_create_vcpu, in case
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index a8d540398bbd..d016aadd5fbb 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -825,7 +825,7 @@ static int kvm_assign_ioeventfd_idx(struct kvm *kvm,
 	if (ret < 0)
 		goto unlock_fail;
 
-	kvm->buses[bus_idx]->ioeventfd_count++;
+	kvm_get_bus(kvm, bus_idx)->ioeventfd_count++;
 	list_add_tail(&p->list, &kvm->ioeventfds);
 
 	mutex_unlock(&kvm->slots_lock);
@@ -848,6 +848,7 @@ kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx,
 {
 	struct _ioeventfd        *p, *tmp;
 	struct eventfd_ctx       *eventfd;
+	struct kvm_io_bus	 *bus;
 	int                       ret = -ENOENT;
 
 	eventfd = eventfd_ctx_fdget(args->fd);
@@ -870,8 +871,9 @@ kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx,
 			continue;
 
 		kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
-		if (kvm->buses[bus_idx])
-			kvm->buses[bus_idx]->ioeventfd_count--;
+		bus = kvm_get_bus(kvm, bus_idx);
+		if (bus)
+			bus->ioeventfd_count--;
 		ioeventfd_release(p);
 		ret = 0;
 		break;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index fc2d58312fd5..d76e822f8929 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -679,8 +679,8 @@ static struct kvm *kvm_create_vm(unsigned long type)
 	if (init_srcu_struct(&kvm->irq_srcu))
 		goto out_err_no_irq_srcu;
 	for (i = 0; i < KVM_NR_BUSES; i++) {
-		kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus),
-					GFP_KERNEL);
+		rcu_assign_pointer(kvm->buses[i],
+			kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL));
 		if (!kvm->buses[i])
 			goto out_err;
 	}
@@ -705,7 +705,7 @@ out_err_no_srcu:
 	hardware_disable_all();
 out_err_no_disable:
 	for (i = 0; i < KVM_NR_BUSES; i++)
-		kfree(kvm->buses[i]);
+		kfree(rcu_access_pointer(kvm->buses[i]));
 	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
 		kvm_free_memslots(kvm, kvm->memslots[i]);
 	kvm_arch_free_vm(kvm);
@@ -740,8 +740,11 @@ static void kvm_destroy_vm(struct kvm *kvm)
 	spin_unlock(&kvm_lock);
 	kvm_free_irq_routing(kvm);
 	for (i = 0; i < KVM_NR_BUSES; i++) {
-		if (kvm->buses[i])
-			kvm_io_bus_destroy(kvm->buses[i]);
+		struct kvm_io_bus *bus;
+
+		bus = rcu_dereference_protected(kvm->buses[i], 1);
+		if (bus)
+			kvm_io_bus_destroy(bus);
 		kvm->buses[i] = NULL;
 	}
 	kvm_coalesced_mmio_free(kvm);
@@ -3570,7 +3573,7 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
 {
 	struct kvm_io_bus *new_bus, *bus;
 
-	bus = kvm->buses[bus_idx];
+	bus = kvm_get_bus(kvm, bus_idx);
 	if (!bus)
 		return -ENOMEM;
 
@@ -3599,7 +3602,7 @@ void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
 	int i;
 	struct kvm_io_bus *new_bus, *bus;
 
-	bus = kvm->buses[bus_idx];
+	bus = kvm_get_bus(kvm, bus_idx);
 	if (!bus)
 		return;
 
-- 
cgit v1.2.3


From a80cf7b5f4149753d5f19c872a47e66195b167d4 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Thu, 6 Jul 2017 16:17:14 +0200
Subject: KVM: mark memory slots as rcu

we access the memslots array via srcu. Mark it as such and
use the right access functions also for the freeing of
memory slots.

Found by sparse:
./include/linux/kvm_host.h:565:16: error: incompatible types in
comparison expression (different address spaces)

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 2 +-
 virt/kvm/kvm_main.c      | 6 ++++--
 2 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 6a164f9eb02c..b3ca77a96b2d 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -390,7 +390,7 @@ struct kvm {
 	spinlock_t mmu_lock;
 	struct mutex slots_lock;
 	struct mm_struct *mm; /* userspace tied to this vm */
-	struct kvm_memslots *memslots[KVM_ADDRESS_SPACE_NUM];
+	struct kvm_memslots __rcu *memslots[KVM_ADDRESS_SPACE_NUM];
 	struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
 
 	/*
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index d76e822f8929..6e6d4edf0e92 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -707,7 +707,8 @@ out_err_no_disable:
 	for (i = 0; i < KVM_NR_BUSES; i++)
 		kfree(rcu_access_pointer(kvm->buses[i]));
 	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
-		kvm_free_memslots(kvm, kvm->memslots[i]);
+		kvm_free_memslots(kvm,
+			rcu_dereference_protected(kvm->memslots[i], 1));
 	kvm_arch_free_vm(kvm);
 	mmdrop(current->mm);
 	return ERR_PTR(r);
@@ -756,7 +757,8 @@ static void kvm_destroy_vm(struct kvm *kvm)
 	kvm_arch_destroy_vm(kvm);
 	kvm_destroy_devices(kvm);
 	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
-		kvm_free_memslots(kvm, kvm->memslots[i]);
+		kvm_free_memslots(kvm,
+			rcu_dereference_protected(kvm->memslots[i], 1));
 	cleanup_srcu_struct(&kvm->irq_srcu);
 	cleanup_srcu_struct(&kvm->srcu);
 	kvm_arch_free_vm(kvm);
-- 
cgit v1.2.3


From dcbbd97ccb9c6f4dad39875c1404d2643eaf110b Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Mon, 5 Jun 2017 14:44:59 +0200
Subject: libceph: remove ceph_sanitize_features() workaround

Reflects ceph.git commit ff1959282826ae6acd7134e1b1ede74ffd1cc04a.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/ceph_features.h | 21 ---------------------
 net/ceph/messenger.c               |  3 +--
 2 files changed, 1 insertion(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index fd8b2953c78f..4962708841b5 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h
@@ -77,29 +77,8 @@
 #define CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING   (1ULL<<58) /* New, v7 encoding */
 #define CEPH_FEATURE_FS_FILE_LAYOUT_V2       (1ULL<<58) /* file_layout_t */
 
-/*
- * The introduction of CEPH_FEATURE_OSD_SNAPMAPPER caused the feature
- * vector to evaluate to 64 bit ~0.  To cope, we designate 1ULL << 63
- * to mean 33 bit ~0, and introduce a helper below to do the
- * translation.
- *
- * This was introduced by ceph.git commit
- *   9ea02b84104045c2ffd7e7f4e7af512953855ecd v0.58-657-g9ea02b8
- * and fixed by ceph.git commit
- *   4255b5c2fb54ae40c53284b3ab700fdfc7e61748 v0.65-263-g4255b5c
- */
 #define CEPH_FEATURE_RESERVED (1ULL<<63)
 
-static inline u64 ceph_sanitize_features(u64 features)
-{
-	if (features & CEPH_FEATURE_RESERVED) {
-		/* everything through OSD_SNAPMAPPER */
-		return 0x1ffffffffull;
-	} else {
-		return features;
-	}
-}
-
 /*
  * Features supported.
  */
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 588a91930051..9daed2540639 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -2033,8 +2033,7 @@ static int process_connect(struct ceph_connection *con)
 {
 	u64 sup_feat = from_msgr(con->msgr)->supported_features;
 	u64 req_feat = from_msgr(con->msgr)->required_features;
-	u64 server_feat = ceph_sanitize_features(
-				le64_to_cpu(con->in_reply.features));
+	u64 server_feat = le64_to_cpu(con->in_reply.features);
 	int ret;
 
 	dout("process_connect on %p tag %d\n", con, (int)con->in_tag);
-- 
cgit v1.2.3


From f179d3ba8cb9073c2d96315b79ff7bc658a1feee Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Mon, 5 Jun 2017 14:44:59 +0200
Subject: libceph: new features macros

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/ceph_features.h | 242 +++++++++++++++++++++++++------------
 1 file changed, 167 insertions(+), 75 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index 4962708841b5..7fce9ed44af0 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h
@@ -2,82 +2,174 @@
 #define __CEPH_FEATURES
 
 /*
- * feature bits
+ * Each time we reclaim bits for reuse we need to specify another bit
+ * that, if present, indicates we have the new incarnation of that
+ * feature.  Base case is 1 (first use).
  */
-#define CEPH_FEATURE_UID            (1ULL<<0)
-#define CEPH_FEATURE_NOSRCADDR      (1ULL<<1)
-#define CEPH_FEATURE_MONCLOCKCHECK  (1ULL<<2)
-#define CEPH_FEATURE_FLOCK          (1ULL<<3)
-#define CEPH_FEATURE_SUBSCRIBE2     (1ULL<<4)
-#define CEPH_FEATURE_MONNAMES       (1ULL<<5)
-#define CEPH_FEATURE_RECONNECT_SEQ  (1ULL<<6)
-#define CEPH_FEATURE_DIRLAYOUTHASH  (1ULL<<7)
-#define CEPH_FEATURE_OBJECTLOCATOR  (1ULL<<8)
-#define CEPH_FEATURE_PGID64         (1ULL<<9)
-#define CEPH_FEATURE_INCSUBOSDMAP   (1ULL<<10)
-#define CEPH_FEATURE_PGPOOL3        (1ULL<<11)
-#define CEPH_FEATURE_OSDREPLYMUX    (1ULL<<12)
-#define CEPH_FEATURE_OSDENC         (1ULL<<13)
-#define CEPH_FEATURE_OMAP           (1ULL<<14)
-#define CEPH_FEATURE_MONENC         (1ULL<<15)
-#define CEPH_FEATURE_QUERY_T        (1ULL<<16)
-#define CEPH_FEATURE_INDEP_PG_MAP   (1ULL<<17)
-#define CEPH_FEATURE_CRUSH_TUNABLES (1ULL<<18)
-#define CEPH_FEATURE_CHUNKY_SCRUB   (1ULL<<19)
-#define CEPH_FEATURE_MON_NULLROUTE  (1ULL<<20)
-#define CEPH_FEATURE_MON_GV         (1ULL<<21)
-#define CEPH_FEATURE_BACKFILL_RESERVATION (1ULL<<22)
-#define CEPH_FEATURE_MSG_AUTH	    (1ULL<<23)
-#define CEPH_FEATURE_RECOVERY_RESERVATION (1ULL<<24)
-#define CEPH_FEATURE_CRUSH_TUNABLES2 (1ULL<<25)
-#define CEPH_FEATURE_CREATEPOOLID   (1ULL<<26)
-#define CEPH_FEATURE_REPLY_CREATE_INODE   (1ULL<<27)
-#define CEPH_FEATURE_OSD_HBMSGS     (1ULL<<28)
-#define CEPH_FEATURE_MDSENC         (1ULL<<29)
-#define CEPH_FEATURE_OSDHASHPSPOOL  (1ULL<<30)
-#define CEPH_FEATURE_MON_SINGLE_PAXOS (1ULL<<31)
-#define CEPH_FEATURE_OSD_SNAPMAPPER (1ULL<<32)
-#define CEPH_FEATURE_MON_SCRUB      (1ULL<<33)
-#define CEPH_FEATURE_OSD_PACKED_RECOVERY (1ULL<<34)
-#define CEPH_FEATURE_OSD_CACHEPOOL (1ULL<<35)
-#define CEPH_FEATURE_CRUSH_V2      (1ULL<<36)  /* new indep; SET_* steps */
-#define CEPH_FEATURE_EXPORT_PEER   (1ULL<<37)
-#define CEPH_FEATURE_OSD_ERASURE_CODES (1ULL<<38)
-#define CEPH_FEATURE_OSD_TMAP2OMAP (1ULL<<38)   /* overlap with EC */
-/* The process supports new-style OSDMap encoding. Monitors also use
-   this bit to determine if peers support NAK messages. */
-#define CEPH_FEATURE_OSDMAP_ENC    (1ULL<<39)
-#define CEPH_FEATURE_MDS_INLINE_DATA     (1ULL<<40)
-#define CEPH_FEATURE_CRUSH_TUNABLES3     (1ULL<<41)
-#define CEPH_FEATURE_OSD_PRIMARY_AFFINITY (1ULL<<41)  /* overlap w/ tunables3 */
-#define CEPH_FEATURE_MSGR_KEEPALIVE2   (1ULL<<42)
-#define CEPH_FEATURE_OSD_POOLRESEND    (1ULL<<43)
-#define CEPH_FEATURE_ERASURE_CODE_PLUGINS_V2 (1ULL<<44)
-#define CEPH_FEATURE_OSD_SET_ALLOC_HINT (1ULL<<45)
-#define CEPH_FEATURE_OSD_FADVISE_FLAGS (1ULL<<46)
-#define CEPH_FEATURE_OSD_REPOP         (1ULL<<46)   /* overlap with fadvise */
-#define CEPH_FEATURE_OSD_OBJECT_DIGEST  (1ULL<<46)  /* overlap with fadvise */
-#define CEPH_FEATURE_OSD_TRANSACTION_MAY_LAYOUT (1ULL<<46) /* overlap w/ fadvise */
-#define CEPH_FEATURE_MDS_QUOTA      (1ULL<<47)
-#define CEPH_FEATURE_CRUSH_V4      (1ULL<<48)  /* straw2 buckets */
-#define CEPH_FEATURE_OSD_MIN_SIZE_RECOVERY (1ULL<<49)
-// duplicated since it was introduced at the same time as MIN_SIZE_RECOVERY
-#define CEPH_FEATURE_OSD_PROXY_FEATURES (1ULL<<49)  /* overlap w/ above */
-#define CEPH_FEATURE_MON_METADATA (1ULL<<50)
-#define CEPH_FEATURE_OSD_BITWISE_HOBJ_SORT (1ULL<<51) /* can sort objs bitwise */
-#define CEPH_FEATURE_OSD_PROXY_WRITE_FEATURES (1ULL<<52)
-#define CEPH_FEATURE_ERASURE_CODE_PLUGINS_V3 (1ULL<<53)
-#define CEPH_FEATURE_OSD_HITSET_GMT (1ULL<<54)
-#define CEPH_FEATURE_HAMMER_0_94_4 (1ULL<<55)
-#define CEPH_FEATURE_NEW_OSDOP_ENCODING   (1ULL<<56) /* New, v7 encoding */
-#define CEPH_FEATURE_MON_STATEFUL_SUB (1ULL<<57) /* stateful mon subscription */
-#define CEPH_FEATURE_MON_ROUTE_OSDMAP (1ULL<<57) /* peon sends osdmaps */
-#define CEPH_FEATURE_CRUSH_TUNABLES5	(1ULL<<58) /* chooseleaf stable mode */
-// duplicated since it was introduced at the same time as CEPH_FEATURE_CRUSH_TUNABLES5
-#define CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING   (1ULL<<58) /* New, v7 encoding */
-#define CEPH_FEATURE_FS_FILE_LAYOUT_V2       (1ULL<<58) /* file_layout_t */
-
-#define CEPH_FEATURE_RESERVED (1ULL<<63)
+#define CEPH_FEATURE_INCARNATION_1 (0ull)
+#define CEPH_FEATURE_INCARNATION_2 (1ull<<57) // CEPH_FEATURE_SERVER_JEWEL
+
+#define DEFINE_CEPH_FEATURE(bit, incarnation, name)			\
+	const static uint64_t CEPH_FEATURE_##name = (1ULL<<bit);		\
+	const static uint64_t CEPH_FEATUREMASK_##name =			\
+		(1ULL<<bit | CEPH_FEATURE_INCARNATION_##incarnation);
+
+/* this bit is ignored but still advertised by release *when* */
+#define DEFINE_CEPH_FEATURE_DEPRECATED(bit, incarnation, name, when) \
+	const static uint64_t DEPRECATED_CEPH_FEATURE_##name = (1ULL<<bit); \
+	const static uint64_t DEPRECATED_CEPH_FEATUREMASK_##name =		\
+		(1ULL<<bit | CEPH_FEATURE_INCARNATION_##incarnation);
+
+/*
+ * this bit is ignored by release *unused* and not advertised by
+ * release *unadvertised*
+ */
+#define DEFINE_CEPH_FEATURE_RETIRED(bit, inc, name, unused, unadvertised)
+
+
+/*
+ * test for a feature.  this test is safer than a typical mask against
+ * the bit because it ensures that we have the bit AND the marker for the
+ * bit's incarnation.  this must be used in any case where the features
+ * bits may include an old meaning of the bit.
+ */
+#define CEPH_HAVE_FEATURE(x, name)			\
+	(((x) & (CEPH_FEATUREMASK_##name)) == (CEPH_FEATUREMASK_##name))
+
+
+/*
+ * Notes on deprecation:
+ *
+ * A *major* release is a release through which all upgrades must pass
+ * (e.g., jewel).  For example, no pre-jewel server will ever talk to
+ * a post-jewel server (mon, osd, etc).
+ *
+ * For feature bits used *only* on the server-side:
+ *
+ *  - In the first phase we indicate that a feature is DEPRECATED as of
+ *    a particular release.  This is the first major release X (say,
+ *    jewel) that does not depend on its peers advertising the feature.
+ *    That is, it safely assumes its peers all have the feature.  We
+ *    indicate this with the DEPRECATED macro.  For example,
+ *
+ *      DEFINE_CEPH_FEATURE_DEPRECATED( 2, 1, MONCLOCKCHECK, JEWEL)
+ *
+ *    because 10.2.z (jewel) did not care if its peers advertised this
+ *    feature bit.
+ *
+ *  - In the second phase we stop advertising the the bit and call it
+ *    RETIRED.  This can normally be done in the *next* major release
+ *    following the one in which we marked the feature DEPRECATED.  In
+ *    the above example, for 12.0.z (luminous) we can say:
+ *
+ *      DEFINE_CEPH_FEATURE_RETIRED( 2, 1, MONCLOCKCHECK, JEWEL, LUMINOUS)
+ *
+ *  - The bit can be reused in the first post-luminous release, 13.0.z
+ *    (m).
+ *
+ * This ensures that no two versions who have different meanings for
+ * the bit ever speak to each other.
+ */
+
+DEFINE_CEPH_FEATURE( 0, 1, UID)
+DEFINE_CEPH_FEATURE( 1, 1, NOSRCADDR)
+DEFINE_CEPH_FEATURE_RETIRED( 2, 1, MONCLOCKCHECK, JEWEL, LUMINOUS)
+
+DEFINE_CEPH_FEATURE( 3, 1, FLOCK)
+DEFINE_CEPH_FEATURE( 4, 1, SUBSCRIBE2)
+DEFINE_CEPH_FEATURE( 5, 1, MONNAMES)
+DEFINE_CEPH_FEATURE( 6, 1, RECONNECT_SEQ)
+DEFINE_CEPH_FEATURE( 7, 1, DIRLAYOUTHASH)
+DEFINE_CEPH_FEATURE( 8, 1, OBJECTLOCATOR)
+DEFINE_CEPH_FEATURE( 9, 1, PGID64)
+DEFINE_CEPH_FEATURE(10, 1, INCSUBOSDMAP)
+DEFINE_CEPH_FEATURE(11, 1, PGPOOL3)
+DEFINE_CEPH_FEATURE(12, 1, OSDREPLYMUX)
+DEFINE_CEPH_FEATURE(13, 1, OSDENC)
+DEFINE_CEPH_FEATURE_RETIRED(14, 1, OMAP, HAMMER, JEWEL)
+DEFINE_CEPH_FEATURE(14, 2, SERVER_KRAKEN)
+DEFINE_CEPH_FEATURE(15, 1, MONENC)
+DEFINE_CEPH_FEATURE_RETIRED(16, 1, QUERY_T, JEWEL, LUMINOUS)
+
+DEFINE_CEPH_FEATURE_RETIRED(17, 1, INDEP_PG_MAP, JEWEL, LUMINOUS)
+
+DEFINE_CEPH_FEATURE(18, 1, CRUSH_TUNABLES)
+DEFINE_CEPH_FEATURE_RETIRED(19, 1, CHUNKY_SCRUB, JEWEL, LUMINOUS)
+
+DEFINE_CEPH_FEATURE_RETIRED(20, 1, MON_NULLROUTE, JEWEL, LUMINOUS)
+
+DEFINE_CEPH_FEATURE_RETIRED(21, 1, MON_GV, HAMMER, JEWEL)
+DEFINE_CEPH_FEATURE(21, 2, SERVER_LUMINOUS)
+DEFINE_CEPH_FEATURE(21, 2, RESEND_ON_SPLIT)  // overlap
+DEFINE_CEPH_FEATURE(21, 2, RADOS_BACKOFF)    // overlap
+DEFINE_CEPH_FEATURE(21, 2, OSDMAP_PG_UPMAP)  // overlap
+DEFINE_CEPH_FEATURE(21, 2, CRUSH_CHOOSE_ARGS) // overlap
+DEFINE_CEPH_FEATURE_RETIRED(22, 1, BACKFILL_RESERVATION, JEWEL, LUMINOUS)
+
+DEFINE_CEPH_FEATURE(23, 1, MSG_AUTH)
+DEFINE_CEPH_FEATURE_RETIRED(24, 1, RECOVERY_RESERVATION, JEWEL, LUNINOUS)
+
+DEFINE_CEPH_FEATURE(25, 1, CRUSH_TUNABLES2)
+DEFINE_CEPH_FEATURE(26, 1, CREATEPOOLID)
+DEFINE_CEPH_FEATURE(27, 1, REPLY_CREATE_INODE)
+DEFINE_CEPH_FEATURE_RETIRED(28, 1, OSD_HBMSGS, HAMMER, JEWEL)
+DEFINE_CEPH_FEATURE(28, 2, SERVER_M)
+DEFINE_CEPH_FEATURE(29, 1, MDSENC)
+DEFINE_CEPH_FEATURE(30, 1, OSDHASHPSPOOL)
+DEFINE_CEPH_FEATURE(31, 1, MON_SINGLE_PAXOS)  // deprecate me
+DEFINE_CEPH_FEATURE_RETIRED(32, 1, OSD_SNAPMAPPER, JEWEL, LUMINOUS)
+
+DEFINE_CEPH_FEATURE_RETIRED(33, 1, MON_SCRUB, JEWEL, LUMINOUS)
+
+DEFINE_CEPH_FEATURE_RETIRED(34, 1, OSD_PACKED_RECOVERY, JEWEL, LUMINOUS)
+
+DEFINE_CEPH_FEATURE(35, 1, OSD_CACHEPOOL)
+DEFINE_CEPH_FEATURE(36, 1, CRUSH_V2)
+DEFINE_CEPH_FEATURE(37, 1, EXPORT_PEER)
+DEFINE_CEPH_FEATURE(38, 1, OSD_ERASURE_CODES)
+DEFINE_CEPH_FEATURE(38, 1, OSD_OSD_TMAP2OMAP) // overlap
+DEFINE_CEPH_FEATURE(39, 1, OSDMAP_ENC)
+DEFINE_CEPH_FEATURE(40, 1, MDS_INLINE_DATA)
+DEFINE_CEPH_FEATURE(41, 1, CRUSH_TUNABLES3)
+DEFINE_CEPH_FEATURE(41, 1, OSD_PRIMARY_AFFINITY) // overlap
+DEFINE_CEPH_FEATURE(42, 1, MSGR_KEEPALIVE2)
+DEFINE_CEPH_FEATURE(43, 1, OSD_POOLRESEND)
+DEFINE_CEPH_FEATURE(44, 1, ERASURE_CODE_PLUGINS_V2)
+DEFINE_CEPH_FEATURE_RETIRED(45, 1, OSD_SET_ALLOC_HINT, JEWEL, LUMINOUS)
+
+DEFINE_CEPH_FEATURE(46, 1, OSD_FADVISE_FLAGS)
+DEFINE_CEPH_FEATURE_RETIRED(46, 1, OSD_REPOP, JEWEL, LUMINOUS) // overlap
+DEFINE_CEPH_FEATURE_RETIRED(46, 1, OSD_OBJECT_DIGEST, JEWEL, LUMINOUS) // overlap
+DEFINE_CEPH_FEATURE_RETIRED(46, 1, OSD_TRANSACTION_MAY_LAYOUT, JEWEL, LUMINOUS) // overlap
+
+DEFINE_CEPH_FEATURE(47, 1, MDS_QUOTA)
+DEFINE_CEPH_FEATURE(48, 1, CRUSH_V4)
+DEFINE_CEPH_FEATURE_RETIRED(49, 1, OSD_MIN_SIZE_RECOVERY, JEWEL, LUMINOUS)
+DEFINE_CEPH_FEATURE_RETIRED(49, 1, OSD_PROXY_FEATURES, JEWEL, LUMINOUS) // overlap
+
+DEFINE_CEPH_FEATURE(50, 1, MON_METADATA)
+DEFINE_CEPH_FEATURE(51, 1, OSD_BITWISE_HOBJ_SORT)
+DEFINE_CEPH_FEATURE(52, 1, OSD_PROXY_WRITE_FEATURES)
+DEFINE_CEPH_FEATURE(53, 1, ERASURE_CODE_PLUGINS_V3)
+DEFINE_CEPH_FEATURE(54, 1, OSD_HITSET_GMT)
+DEFINE_CEPH_FEATURE(55, 1, HAMMER_0_94_4)
+DEFINE_CEPH_FEATURE(56, 1, NEW_OSDOP_ENCODING)
+DEFINE_CEPH_FEATURE(57, 1, MON_STATEFUL_SUB)
+DEFINE_CEPH_FEATURE(57, 1, MON_ROUTE_OSDMAP) // overlap
+DEFINE_CEPH_FEATURE(57, 1, OSDSUBOP_NO_SNAPCONTEXT) // overlap
+DEFINE_CEPH_FEATURE(57, 1, SERVER_JEWEL) // overlap
+DEFINE_CEPH_FEATURE(58, 1, CRUSH_TUNABLES5)
+DEFINE_CEPH_FEATURE(58, 1, NEW_OSDOPREPLY_ENCODING) // overlap
+DEFINE_CEPH_FEATURE(58, 1, FS_FILE_LAYOUT_V2) // overlap
+DEFINE_CEPH_FEATURE(59, 1, FS_BTIME)
+DEFINE_CEPH_FEATURE(59, 1, FS_CHANGE_ATTR) // overlap
+DEFINE_CEPH_FEATURE(59, 1, MSG_ADDR2) // overlap
+DEFINE_CEPH_FEATURE(60, 1, BLKIN_TRACING)  // *do not share this bit*
+
+DEFINE_CEPH_FEATURE(61, 1, RESERVED2)          // unused, but slow down!
+DEFINE_CEPH_FEATURE(62, 1, RESERVED)           // do not use; used as a sentinal
+DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facing
+
 
 /*
  * Features supported.
-- 
cgit v1.2.3


From 2d7522e0bda17eb3c7af0ffe74f03dd6c3cca443 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Mon, 5 Jun 2017 14:45:00 +0200
Subject: libceph: advertise support for OSD_POOLRESEND

The code has been in place since commit 63244fa123a7 ("libceph:
introduce ceph_osd_request_target, calc_target()"), and, with the
ceph_{oloc,oid}_copy() issue fixed in the previous commit, is now
in working order.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/ceph_features.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index 7fce9ed44af0..89c68af48539 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h
@@ -197,6 +197,7 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin
 	 CEPH_FEATURE_CRUSH_TUNABLES3 |		\
 	 CEPH_FEATURE_OSD_PRIMARY_AFFINITY |	\
 	 CEPH_FEATURE_MSGR_KEEPALIVE2 |		\
+	 CEPH_FEATURE_OSD_POOLRESEND |		\
 	 CEPH_FEATURE_CRUSH_V4 |		\
 	 CEPH_FEATURE_CRUSH_TUNABLES5 |		\
 	 CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING)
-- 
cgit v1.2.3


From 220abf5aa7ba5f544f1b589bde33761c60bbf9a0 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Mon, 5 Jun 2017 14:45:00 +0200
Subject: libceph: support SERVER_JEWEL feature bits

Only MON_STATEFUL_SUB, really.  MON_ROUTE_OSDMAP and
OSDSUBOP_NO_SNAPCONTEXT are irrelevant.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/ceph_features.h | 2 ++
 net/ceph/mon_client.c              | 8 +++++++-
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index 89c68af48539..78a58770e6e9 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h
@@ -199,6 +199,8 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin
 	 CEPH_FEATURE_MSGR_KEEPALIVE2 |		\
 	 CEPH_FEATURE_OSD_POOLRESEND |		\
 	 CEPH_FEATURE_CRUSH_V4 |		\
+	 CEPH_FEATURE_SERVER_JEWEL |		\
+	 CEPH_FEATURE_MON_STATEFUL_SUB |	\
 	 CEPH_FEATURE_CRUSH_TUNABLES5 |		\
 	 CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING)
 
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 250f11f78609..875675765531 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -6,6 +6,7 @@
 #include <linux/random.h>
 #include <linux/sched.h>
 
+#include <linux/ceph/ceph_features.h>
 #include <linux/ceph/mon_client.h>
 #include <linux/ceph/libceph.h>
 #include <linux/ceph/debugfs.h>
@@ -297,6 +298,10 @@ static void handle_subscribe_ack(struct ceph_mon_client *monc,
 
 	mutex_lock(&monc->mutex);
 	if (monc->sub_renew_sent) {
+		/*
+		 * This is only needed for legacy (infernalis or older)
+		 * MONs -- see delayed_work().
+		 */
 		monc->sub_renew_after = monc->sub_renew_sent +
 					    (seconds >> 1) * HZ - 1;
 		dout("%s sent %lu duration %d renew after %lu\n", __func__,
@@ -955,7 +960,8 @@ static void delayed_work(struct work_struct *work)
 			__validate_auth(monc);
 		}
 
-		if (is_auth) {
+		if (is_auth &&
+		    !(monc->con.peer_features & CEPH_FEATURE_MON_STATEFUL_SUB)) {
 			unsigned long now = jiffies;
 
 			dout("%s renew subs? now %lu renew after %lu\n",
-- 
cgit v1.2.3


From dc93e0e2831de2f80817b89aae4864b88332fcce Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Mon, 5 Jun 2017 14:45:00 +0200
Subject: libceph: fold [l]req->last_force_resend into ceph_osd_request_target

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/osd_client.h |  4 ++--
 net/ceph/osd_client.c           | 21 ++++++++++-----------
 2 files changed, 12 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 85650b415e73..ef630ebd1169 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -148,6 +148,8 @@ struct ceph_osd_request_target {
 	unsigned int flags;                /* CEPH_OSD_FLAG_* */
 	bool paused;
 
+	u32 last_force_resend;
+
 	int osd;
 };
 
@@ -193,7 +195,6 @@ struct ceph_osd_request {
 	unsigned long r_stamp;                /* jiffies, send or check time */
 	unsigned long r_start_stamp;          /* jiffies */
 	int r_attempts;
-	u32 r_last_force_resend;
 	u32 r_map_dne_bound;
 
 	struct ceph_osd_req_op r_ops[];
@@ -221,7 +222,6 @@ struct ceph_osd_linger_request {
 	struct list_head pending_lworks;
 
 	struct ceph_osd_request_target t;
-	u32 last_force_resend;
 	u32 map_dne_bound;
 
 	struct timespec mtime;
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 924f07c36ddb..aca6319b99b6 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -384,6 +384,8 @@ static void target_copy(struct ceph_osd_request_target *dest,
 	dest->flags = src->flags;
 	dest->paused = src->paused;
 
+	dest->last_force_resend = src->last_force_resend;
+
 	dest->osd = src->osd;
 }
 
@@ -1311,7 +1313,6 @@ enum calc_target_result {
 
 static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
 					   struct ceph_osd_request_target *t,
-					   u32 *last_force_resend,
 					   bool any_change)
 {
 	struct ceph_pg_pool_info *pi;
@@ -1332,11 +1333,10 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
 	}
 
 	if (osdc->osdmap->epoch == pi->last_force_request_resend) {
-		if (last_force_resend &&
-		    *last_force_resend < pi->last_force_request_resend) {
-			*last_force_resend = pi->last_force_request_resend;
+		if (t->last_force_resend < pi->last_force_request_resend) {
+			t->last_force_resend = pi->last_force_request_resend;
 			force_resend = true;
-		} else if (!last_force_resend) {
+		} else if (t->last_force_resend == 0) {
 			force_resend = true;
 		}
 	}
@@ -1645,7 +1645,7 @@ static void __submit_request(struct ceph_osd_request *req, bool wrlocked)
 	dout("%s req %p wrlocked %d\n", __func__, req, wrlocked);
 
 again:
-	ct_res = calc_target(osdc, &req->r_t, &req->r_last_force_resend, false);
+	ct_res = calc_target(osdc, &req->r_t, false);
 	if (ct_res == CALC_TARGET_POOL_DNE && !wrlocked)
 		goto promote;
 
@@ -2441,7 +2441,7 @@ static void linger_submit(struct ceph_osd_linger_request *lreq)
 	struct ceph_osd_client *osdc = lreq->osdc;
 	struct ceph_osd *osd;
 
-	calc_target(osdc, &lreq->t, &lreq->last_force_resend, false);
+	calc_target(osdc, &lreq->t, false);
 	osd = lookup_create_osd(osdc, lreq->t.osd, true);
 	link_linger(osd, lreq);
 
@@ -3059,7 +3059,7 @@ recalc_linger_target(struct ceph_osd_linger_request *lreq)
 	struct ceph_osd_client *osdc = lreq->osdc;
 	enum calc_target_result ct_res;
 
-	ct_res = calc_target(osdc, &lreq->t, &lreq->last_force_resend, true);
+	ct_res = calc_target(osdc, &lreq->t, true);
 	if (ct_res == CALC_TARGET_NEED_RESEND) {
 		struct ceph_osd *osd;
 
@@ -3130,8 +3130,7 @@ static void scan_requests(struct ceph_osd *osd,
 		n = rb_next(n); /* unlink_request(), check_pool_dne() */
 
 		dout("%s req %p tid %llu\n", __func__, req, req->r_tid);
-		ct_res = calc_target(osdc, &req->r_t,
-				     &req->r_last_force_resend, false);
+		ct_res = calc_target(osdc, &req->r_t, false);
 		switch (ct_res) {
 		case CALC_TARGET_NO_ACTION:
 			force_resend_writes = cleared_full ||
@@ -3240,7 +3239,7 @@ static void kick_requests(struct ceph_osd_client *osdc,
 		erase_request(need_resend, req); /* before link_request() */
 
 		WARN_ON(req->r_osd);
-		calc_target(osdc, &req->r_t, NULL, false);
+		calc_target(osdc, &req->r_t, false);
 		osd = lookup_create_osd(osdc, req->r_t.osd, true);
 		link_request(osd, req);
 		if (!req->r_linger) {
-- 
cgit v1.2.3


From dc98ff7230e5ccf11c621dff0d590e83574a7184 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Thu, 15 Jun 2017 16:30:53 +0200
Subject: libceph: introduce ceph_spg, ceph_pg_to_primary_shard()

Store both raw pgid and actual spgid in ceph_osd_request_target.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/osd_client.h |  3 ++-
 include/linux/ceph/osdmap.h     | 10 ++++++++++
 net/ceph/debugfs.c              | 11 ++++++++++-
 net/ceph/osd_client.c           |  7 +++++--
 net/ceph/osdmap.c               | 33 +++++++++++++++++++++++++++++++++
 5 files changed, 60 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index ef630ebd1169..6114f7b02135 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -136,7 +136,8 @@ struct ceph_osd_request_target {
 	struct ceph_object_id target_oid;
 	struct ceph_object_locator target_oloc;
 
-	struct ceph_pg pgid;
+	struct ceph_pg pgid;               /* last raw pg we mapped to */
+	struct ceph_spg spgid;             /* last actual spg we mapped to */
 	u32 pg_num;
 	u32 pg_num_mask;
 	struct ceph_osds acting;
diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index 938656f70807..7ae5b416b4b6 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -24,6 +24,13 @@ struct ceph_pg {
 	uint32_t seed;
 };
 
+#define CEPH_SPG_NOSHARD	-1
+
+struct ceph_spg {
+	struct ceph_pg pgid;
+	s8 shard;
+};
+
 int ceph_pg_compare(const struct ceph_pg *lhs, const struct ceph_pg *rhs);
 
 #define CEPH_POOL_FLAG_HASHPSPOOL	(1ULL << 0) /* hash pg seed and pool id
@@ -271,6 +278,9 @@ void ceph_pg_to_up_acting_osds(struct ceph_osdmap *osdmap,
 			       const struct ceph_pg *raw_pgid,
 			       struct ceph_osds *up,
 			       struct ceph_osds *acting);
+bool ceph_pg_to_primary_shard(struct ceph_osdmap *osdmap,
+			      const struct ceph_pg *raw_pgid,
+			      struct ceph_spg *spgid);
 int ceph_pg_to_acting_primary(struct ceph_osdmap *osdmap,
 			      const struct ceph_pg *raw_pgid);
 
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index 71ba13927b3d..50ab1bdb16e2 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c
@@ -147,11 +147,20 @@ static int monc_show(struct seq_file *s, void *p)
 	return 0;
 }
 
+static void dump_spgid(struct seq_file *s, const struct ceph_spg *spgid)
+{
+	seq_printf(s, "%llu.%x", spgid->pgid.pool, spgid->pgid.seed);
+	if (spgid->shard != CEPH_SPG_NOSHARD)
+		seq_printf(s, "s%d", spgid->shard);
+}
+
 static void dump_target(struct seq_file *s, struct ceph_osd_request_target *t)
 {
 	int i;
 
-	seq_printf(s, "osd%d\t%llu.%x\t[", t->osd, t->pgid.pool, t->pgid.seed);
+	seq_printf(s, "osd%d\t%llu.%x\t", t->osd, t->pgid.pool, t->pgid.seed);
+	dump_spgid(s, &t->spgid);
+	seq_puts(s, "\t[");
 	for (i = 0; i < t->up.size; i++)
 		seq_printf(s, "%s%d", (!i ? "" : ","), t->up.osds[i]);
 	seq_printf(s, "]/%d\t[", t->up.primary);
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index aca6319b99b6..66509414f4c8 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -373,6 +373,7 @@ static void target_copy(struct ceph_osd_request_target *dest,
 	ceph_oloc_copy(&dest->target_oloc, &src->target_oloc);
 
 	dest->pgid = src->pgid; /* struct */
+	dest->spgid = src->spgid; /* struct */
 	dest->pg_num = src->pg_num;
 	dest->pg_num_mask = src->pg_num_mask;
 	ceph_osds_copy(&dest->acting, &src->acting);
@@ -1394,6 +1395,7 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
 	    ceph_osds_changed(&t->acting, &acting, any_change) ||
 	    force_resend) {
 		t->pgid = pgid; /* struct */
+		ceph_pg_to_primary_shard(osdc->osdmap, &pgid, &t->spgid);
 		ceph_osds_copy(&t->acting, &acting);
 		ceph_osds_copy(&t->up, &up);
 		t->size = pi->size;
@@ -1595,9 +1597,10 @@ static void send_request(struct ceph_osd_request *req)
 
 	encode_request(req, req->r_request);
 
-	dout("%s req %p tid %llu to pg %llu.%x osd%d flags 0x%x attempt %d\n",
+	dout("%s req %p tid %llu to pgid %llu.%x spgid %llu.%xs%d osd%d flags 0x%x attempt %d\n",
 	     __func__, req, req->r_tid, req->r_t.pgid.pool, req->r_t.pgid.seed,
-	     req->r_t.osd, req->r_flags, req->r_attempts);
+	     req->r_t.spgid.pgid.pool, req->r_t.spgid.pgid.seed,
+	     req->r_t.spgid.shard, osd->o_osd, req->r_flags, req->r_attempts);
 
 	req->r_t.paused = false;
 	req->r_stamp = jiffies;
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 710ee3dc01b9..a4155620eace 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -2265,6 +2265,39 @@ out:
 	WARN_ON(!osds_valid(up) || !osds_valid(acting));
 }
 
+bool ceph_pg_to_primary_shard(struct ceph_osdmap *osdmap,
+			      const struct ceph_pg *raw_pgid,
+			      struct ceph_spg *spgid)
+{
+	struct ceph_pg_pool_info *pi;
+	struct ceph_pg pgid;
+	struct ceph_osds up, acting;
+	int i;
+
+	pi = ceph_pg_pool_by_id(osdmap, raw_pgid->pool);
+	if (!pi)
+		return false;
+
+	raw_pg_to_pg(pi, raw_pgid, &pgid);
+
+	if (ceph_can_shift_osds(pi)) {
+		spgid->pgid = pgid; /* struct */
+		spgid->shard = CEPH_SPG_NOSHARD;
+		return true;
+	}
+
+	ceph_pg_to_up_acting_osds(osdmap, &pgid, &up, &acting);
+	for (i = 0; i < acting.size; i++) {
+		if (acting.osds[i] == acting.primary) {
+			spgid->pgid = pgid; /* struct */
+			spgid->shard = i;
+			return true;
+		}
+	}
+
+	return false;
+}
+
 /*
  * Return acting primary for given PG, or -1 if none.
  */
-- 
cgit v1.2.3


From 98ad5ebd1505eb903ae8bc27e94c1ab0d1c3e651 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Thu, 15 Jun 2017 16:30:54 +0200
Subject: libceph: ceph_connection_operations::reencode_message() method

Give upper layers a chance to reencode the message after the connection
is negotiated and ->peer_features is set.  OSD client will use this to
support both luminous and pre-luminous OSDs (in a single cluster): the
former need MOSDOp v8; the latter will continue to be sent MOSDOp v4.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/messenger.h | 2 ++
 net/ceph/messenger.c           | 7 +++++--
 2 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index c5c4c713e00f..fbd94d9fa5dd 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -44,6 +44,8 @@ struct ceph_connection_operations {
 					struct ceph_msg_header *hdr,
 					int *skip);
 
+	void (*reencode_message) (struct ceph_msg *msg);
+
 	int (*sign_message) (struct ceph_msg *msg);
 	int (*check_message_signature) (struct ceph_msg *msg);
 };
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 9daed2540639..0c31035bbfee 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -1288,13 +1288,16 @@ static void prepare_write_message(struct ceph_connection *con)
 		m->hdr.seq = cpu_to_le64(++con->out_seq);
 		m->needs_out_seq = false;
 	}
-	WARN_ON(m->data_length != le32_to_cpu(m->hdr.data_len));
+
+	if (con->ops->reencode_message)
+		con->ops->reencode_message(m);
 
 	dout("prepare_write_message %p seq %lld type %d len %d+%d+%zd\n",
 	     m, con->out_seq, le16_to_cpu(m->hdr.type),
 	     le32_to_cpu(m->hdr.front_len), le32_to_cpu(m->hdr.middle_len),
 	     m->data_length);
-	BUG_ON(le32_to_cpu(m->hdr.front_len) != m->front.iov_len);
+	WARN_ON(m->front.iov_len != le32_to_cpu(m->hdr.front_len));
+	WARN_ON(m->data_length != le32_to_cpu(m->hdr.data_len));
 
 	/* tag + hdr + front + middle */
 	con_out_kvec_add(con, sizeof (tag_msg), &tag_msg);
-- 
cgit v1.2.3


From 8cb441c0545dfd4dafeedc1e2d7157e1072413ac Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Thu, 15 Jun 2017 16:30:54 +0200
Subject: libceph: MOSDOp v8 encoding (actual spgid + full hash)

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/osd_client.h |  17 +++++
 include/linux/ceph/osdmap.h     |   4 +-
 net/ceph/osd_client.c           | 153 +++++++++++++++++++++++++++++++++++-----
 3 files changed, 154 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 6114f7b02135..bca2718ac253 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -205,6 +205,23 @@ struct ceph_request_redirect {
 	struct ceph_object_locator oloc;
 };
 
+/*
+ * osd request identifier
+ *
+ * caller name + incarnation# + tid to unique identify this request
+ */
+struct ceph_osd_reqid {
+	struct ceph_entity_name name;
+	__le64 tid;
+	__le32 inc;
+} __packed;
+
+struct ceph_blkin_trace_info {
+	__le64 trace_id;
+	__le64 span_id;
+	__le64 parent_span_id;
+} __packed;
+
 typedef void (*rados_watchcb2_t)(void *arg, u64 notify_id, u64 cookie,
 				 u64 notifier_id, void *data, size_t data_len);
 typedef void (*rados_watcherrcb_t)(void *arg, u64 cookie, int err);
diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index 7ae5b416b4b6..66447fc7f334 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -205,11 +205,13 @@ static inline struct ceph_entity_addr *ceph_osd_addr(struct ceph_osdmap *map,
 	return &map->osd_addr[osd];
 }
 
+#define CEPH_PGID_ENCODING_LEN		(1 + 8 + 4 + 4)
+
 static inline int ceph_decode_pgid(void **p, void *end, struct ceph_pg *pgid)
 {
 	__u8 version;
 
-	if (!ceph_has_room(p, end, 1 + 8 + 4 + 4)) {
+	if (!ceph_has_room(p, end, CEPH_PGID_ENCODING_LEN)) {
 		pr_warn("incomplete pg encoding\n");
 		return -EINVAL;
 	}
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index b0298c3681b6..eaaf17e7c37b 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -12,6 +12,7 @@
 #include <linux/bio.h>
 #endif
 
+#include <linux/ceph/ceph_features.h>
 #include <linux/ceph/libceph.h>
 #include <linux/ceph/osd_client.h>
 #include <linux/ceph/messenger.h>
@@ -555,17 +556,21 @@ int ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp)
 	WARN_ON(ceph_oloc_empty(&req->r_base_oloc));
 
 	/* create request message */
-	msg_size = 4 + 4 + 4; /* client_inc, osdmap_epoch, flags */
-	msg_size += 4 + 4 + 4 + 8; /* mtime, reassert_version */
+	msg_size = CEPH_ENCODING_START_BLK_LEN +
+			CEPH_PGID_ENCODING_LEN + 1; /* spgid */
+	msg_size += 4 + 4 + 4; /* hash, osdmap_epoch, flags */
+	msg_size += CEPH_ENCODING_START_BLK_LEN +
+			sizeof(struct ceph_osd_reqid); /* reqid */
+	msg_size += sizeof(struct ceph_blkin_trace_info); /* trace */
+	msg_size += 4 + sizeof(struct ceph_timespec); /* client_inc, mtime */
 	msg_size += CEPH_ENCODING_START_BLK_LEN +
 			ceph_oloc_encoding_size(&req->r_base_oloc); /* oloc */
-	msg_size += 1 + 8 + 4 + 4; /* pgid */
 	msg_size += 4 + req->r_base_oid.name_len; /* oid */
 	msg_size += 2 + req->r_num_ops * sizeof(struct ceph_osd_op);
 	msg_size += 8; /* snapid */
 	msg_size += 8; /* snap_seq */
 	msg_size += 4 + 8 * (req->r_snapc ? req->r_snapc->num_snaps : 0);
-	msg_size += 4; /* retry_attempt */
+	msg_size += 4 + 8; /* retry_attempt, features */
 
 	if (req->r_mempool)
 		msg = ceph_msgpool_get(&osdc->msgpool_op, 0);
@@ -1493,6 +1498,13 @@ static void encode_pgid(void **p, const struct ceph_pg *pgid)
 	ceph_encode_32(p, -1); /* preferred */
 }
 
+static void encode_spgid(void **p, const struct ceph_spg *spgid)
+{
+	ceph_start_encoding(p, 1, 1, CEPH_PGID_ENCODING_LEN + 1);
+	encode_pgid(p, &spgid->pgid);
+	ceph_encode_8(p, spgid->shard);
+}
+
 static void encode_oloc(void **p, void *end,
 			const struct ceph_object_locator *oloc)
 {
@@ -1507,7 +1519,8 @@ static void encode_oloc(void **p, void *end,
 		ceph_encode_32(p, 0);
 }
 
-static void encode_request(struct ceph_osd_request *req, struct ceph_msg *msg)
+static void encode_request_partial(struct ceph_osd_request *req,
+				   struct ceph_msg *msg)
 {
 	void *p = msg->front.iov_base;
 	void *const end = p + msg->front_alloc_len;
@@ -1524,18 +1537,25 @@ static void encode_request(struct ceph_osd_request *req, struct ceph_msg *msg)
 
 	setup_request_data(req, msg);
 
-	ceph_encode_32(&p, 1); /* client_inc, always 1 */
+	encode_spgid(&p, &req->r_t.spgid); /* actual spg */
+	ceph_encode_32(&p, req->r_t.pgid.seed); /* raw hash */
 	ceph_encode_32(&p, req->r_osdc->osdmap->epoch);
 	ceph_encode_32(&p, req->r_flags);
+
+	/* reqid */
+	ceph_start_encoding(&p, 2, 2, sizeof(struct ceph_osd_reqid));
+	memset(p, 0, sizeof(struct ceph_osd_reqid));
+	p += sizeof(struct ceph_osd_reqid);
+
+	/* trace */
+	memset(p, 0, sizeof(struct ceph_blkin_trace_info));
+	p += sizeof(struct ceph_blkin_trace_info);
+
+	ceph_encode_32(&p, 0); /* client_inc, always 0 */
 	ceph_encode_timespec(p, &req->r_mtime);
 	p += sizeof(struct ceph_timespec);
 
-	/* reassert_version */
-	memset(p, 0, sizeof(struct ceph_eversion));
-	p += sizeof(struct ceph_eversion);
-
 	encode_oloc(&p, end, &req->r_t.target_oloc);
-	encode_pgid(&p, &req->r_t.pgid);
 	ceph_encode_string(&p, end, req->r_t.target_oid.name,
 			   req->r_t.target_oid.name_len);
 
@@ -1558,11 +1578,10 @@ static void encode_request(struct ceph_osd_request *req, struct ceph_msg *msg)
 	}
 
 	ceph_encode_32(&p, req->r_attempts); /* retry_attempt */
+	BUG_ON(p != end - 8); /* space for features */
 
-	BUG_ON(p > end);
-	msg->front.iov_len = p - msg->front.iov_base;
-	msg->hdr.version = cpu_to_le16(4); /* MOSDOp v4 */
-	msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
+	msg->hdr.version = cpu_to_le16(8); /* MOSDOp v8 */
+	/* front_len is finalized in encode_request_finish() */
 	msg->hdr.data_len = cpu_to_le32(data_len);
 	/*
 	 * The header "data_off" is a hint to the receiver allowing it
@@ -1571,9 +1590,99 @@ static void encode_request(struct ceph_osd_request *req, struct ceph_msg *msg)
 	 */
 	msg->hdr.data_off = cpu_to_le16(req->r_data_offset);
 
-	dout("%s req %p oid %s oid_len %d front %zu data %u\n", __func__,
-	     req, req->r_t.target_oid.name, req->r_t.target_oid.name_len,
-	     msg->front.iov_len, data_len);
+	dout("%s req %p msg %p oid %s oid_len %d\n", __func__, req, msg,
+	     req->r_t.target_oid.name, req->r_t.target_oid.name_len);
+}
+
+static void encode_request_finish(struct ceph_msg *msg)
+{
+	void *p = msg->front.iov_base;
+	void *const end = p + msg->front_alloc_len;
+
+	if (CEPH_HAVE_FEATURE(msg->con->peer_features, RESEND_ON_SPLIT)) {
+		/* luminous OSD -- encode features and be done */
+		p = end - 8;
+		ceph_encode_64(&p, msg->con->peer_features);
+	} else {
+		struct {
+			char spgid[CEPH_ENCODING_START_BLK_LEN +
+				   CEPH_PGID_ENCODING_LEN + 1];
+			__le32 hash;
+			__le32 epoch;
+			__le32 flags;
+			char reqid[CEPH_ENCODING_START_BLK_LEN +
+				   sizeof(struct ceph_osd_reqid)];
+			char trace[sizeof(struct ceph_blkin_trace_info)];
+			__le32 client_inc;
+			struct ceph_timespec mtime;
+		} __packed head;
+		struct ceph_pg pgid;
+		void *oloc, *oid, *tail;
+		int oloc_len, oid_len, tail_len;
+		int len;
+
+		/*
+		 * Pre-luminous OSD -- reencode v8 into v4 using @head
+		 * as a temporary buffer.  Encode the raw PG; the rest
+		 * is just a matter of moving oloc, oid and tail blobs
+		 * around.
+		 */
+		memcpy(&head, p, sizeof(head));
+		p += sizeof(head);
+
+		oloc = p;
+		p += CEPH_ENCODING_START_BLK_LEN;
+		pgid.pool = ceph_decode_64(&p);
+		p += 4 + 4; /* preferred, key len */
+		len = ceph_decode_32(&p);
+		p += len;   /* nspace */
+		oloc_len = p - oloc;
+
+		oid = p;
+		len = ceph_decode_32(&p);
+		p += len;
+		oid_len = p - oid;
+
+		tail = p;
+		tail_len = (end - p) - 8;
+
+		p = msg->front.iov_base;
+		ceph_encode_copy(&p, &head.client_inc, sizeof(head.client_inc));
+		ceph_encode_copy(&p, &head.epoch, sizeof(head.epoch));
+		ceph_encode_copy(&p, &head.flags, sizeof(head.flags));
+		ceph_encode_copy(&p, &head.mtime, sizeof(head.mtime));
+
+		/* reassert_version */
+		memset(p, 0, sizeof(struct ceph_eversion));
+		p += sizeof(struct ceph_eversion);
+
+		BUG_ON(p >= oloc);
+		memmove(p, oloc, oloc_len);
+		p += oloc_len;
+
+		pgid.seed = le32_to_cpu(head.hash);
+		encode_pgid(&p, &pgid); /* raw pg */
+
+		BUG_ON(p >= oid);
+		memmove(p, oid, oid_len);
+		p += oid_len;
+
+		/* tail -- ops, snapid, snapc, retry_attempt */
+		BUG_ON(p >= tail);
+		memmove(p, tail, tail_len);
+		p += tail_len;
+
+		msg->hdr.version = cpu_to_le16(4); /* MOSDOp v4 */
+	}
+
+	BUG_ON(p > end);
+	msg->front.iov_len = p - msg->front.iov_base;
+	msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
+
+	dout("%s msg %p tid %llu %u+%u+%u v%d\n", __func__, msg,
+	     le64_to_cpu(msg->hdr.tid), le32_to_cpu(msg->hdr.front_len),
+	     le32_to_cpu(msg->hdr.middle_len), le32_to_cpu(msg->hdr.data_len),
+	     le16_to_cpu(msg->hdr.version));
 }
 
 /*
@@ -1599,7 +1708,7 @@ static void send_request(struct ceph_osd_request *req)
 	else
 		WARN_ON(req->r_flags & CEPH_OSD_FLAG_RETRY);
 
-	encode_request(req, req->r_request);
+	encode_request_partial(req, req->r_request);
 
 	dout("%s req %p tid %llu to pgid %llu.%x spgid %llu.%xs%d osd%d flags 0x%x attempt %d\n",
 	     __func__, req, req->r_tid, req->r_t.pgid.pool, req->r_t.pgid.seed,
@@ -4577,6 +4686,11 @@ static int invalidate_authorizer(struct ceph_connection *con)
 	return ceph_monc_validate_auth(&osdc->client->monc);
 }
 
+static void osd_reencode_message(struct ceph_msg *msg)
+{
+	encode_request_finish(msg);
+}
+
 static int osd_sign_message(struct ceph_msg *msg)
 {
 	struct ceph_osd *o = msg->con->private;
@@ -4601,6 +4715,7 @@ static const struct ceph_connection_operations osd_con_ops = {
 	.verify_authorizer_reply = verify_authorizer_reply,
 	.invalidate_authorizer = invalidate_authorizer,
 	.alloc_msg = alloc_msg,
+	.reencode_message = osd_reencode_message,
 	.sign_message = osd_sign_message,
 	.check_message_signature = osd_check_message_signature,
 	.fault = osd_fault,
-- 
cgit v1.2.3


From 7de030d6b10a56e991312a978ace6be3c090097c Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Thu, 15 Jun 2017 16:30:54 +0200
Subject: libceph: resend on PG splits if OSD has RESEND_ON_SPLIT

Note that ceph_osd_request_target fields are updated regardless of
RESEND_ON_SPLIT.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/osdmap.h |  2 ++
 net/ceph/osd_client.c       | 21 ++++++++++++++-------
 net/ceph/osdmap.c           |  7 +++----
 3 files changed, 19 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index 66447fc7f334..63fb073a3355 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -249,6 +249,8 @@ static inline void ceph_osds_init(struct ceph_osds *set)
 
 void ceph_osds_copy(struct ceph_osds *dest, const struct ceph_osds *src);
 
+bool ceph_pg_is_split(const struct ceph_pg *pgid, u32 old_pg_num,
+		      u32 new_pg_num);
 bool ceph_is_new_interval(const struct ceph_osds *old_acting,
 			  const struct ceph_osds *new_acting,
 			  const struct ceph_osds *old_up,
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 4143f73590f3..518dbac599d0 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1319,6 +1319,7 @@ enum calc_target_result {
 
 static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
 					   struct ceph_osd_request_target *t,
+					   struct ceph_connection *con,
 					   bool any_change)
 {
 	struct ceph_pg_pool_info *pi;
@@ -1327,6 +1328,7 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
 	bool force_resend = false;
 	bool unpaused = false;
 	bool legacy_change;
+	bool split = false;
 	bool need_check_tiering = false;
 	bool sort_bitwise = ceph_osdmap_flag(osdc, CEPH_OSDMAP_SORTBITWISE);
 	enum calc_target_result ct_res;
@@ -1398,8 +1400,10 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
 	}
 	legacy_change = ceph_pg_compare(&t->pgid, &pgid) ||
 			ceph_osds_changed(&t->acting, &acting, any_change);
+	if (t->pg_num)
+		split = ceph_pg_is_split(&last_pgid, t->pg_num, pi->pg_num);
 
-	if (legacy_change || force_resend) {
+	if (legacy_change || force_resend || split) {
 		t->pgid = pgid; /* struct */
 		ceph_pg_to_primary_shard(osdc->osdmap, &pgid, &t->spgid);
 		ceph_osds_copy(&t->acting, &acting);
@@ -1413,7 +1417,9 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
 		t->osd = acting.primary;
 	}
 
-	if (unpaused || legacy_change || force_resend)
+	if (unpaused || legacy_change || force_resend ||
+	    (split && con && CEPH_HAVE_FEATURE(con->peer_features,
+					       RESEND_ON_SPLIT)))
 		ct_res = CALC_TARGET_NEED_RESEND;
 	else
 		ct_res = CALC_TARGET_NO_ACTION;
@@ -1765,7 +1771,7 @@ static void __submit_request(struct ceph_osd_request *req, bool wrlocked)
 	dout("%s req %p wrlocked %d\n", __func__, req, wrlocked);
 
 again:
-	ct_res = calc_target(osdc, &req->r_t, false);
+	ct_res = calc_target(osdc, &req->r_t, NULL, false);
 	if (ct_res == CALC_TARGET_POOL_DNE && !wrlocked)
 		goto promote;
 
@@ -2561,7 +2567,7 @@ static void linger_submit(struct ceph_osd_linger_request *lreq)
 	struct ceph_osd_client *osdc = lreq->osdc;
 	struct ceph_osd *osd;
 
-	calc_target(osdc, &lreq->t, false);
+	calc_target(osdc, &lreq->t, NULL, false);
 	osd = lookup_create_osd(osdc, lreq->t.osd, true);
 	link_linger(osd, lreq);
 
@@ -3179,7 +3185,7 @@ recalc_linger_target(struct ceph_osd_linger_request *lreq)
 	struct ceph_osd_client *osdc = lreq->osdc;
 	enum calc_target_result ct_res;
 
-	ct_res = calc_target(osdc, &lreq->t, true);
+	ct_res = calc_target(osdc, &lreq->t, NULL, true);
 	if (ct_res == CALC_TARGET_NEED_RESEND) {
 		struct ceph_osd *osd;
 
@@ -3250,7 +3256,8 @@ static void scan_requests(struct ceph_osd *osd,
 		n = rb_next(n); /* unlink_request(), check_pool_dne() */
 
 		dout("%s req %p tid %llu\n", __func__, req, req->r_tid);
-		ct_res = calc_target(osdc, &req->r_t, false);
+		ct_res = calc_target(osdc, &req->r_t, &req->r_osd->o_con,
+				     false);
 		switch (ct_res) {
 		case CALC_TARGET_NO_ACTION:
 			force_resend_writes = cleared_full ||
@@ -3359,7 +3366,7 @@ static void kick_requests(struct ceph_osd_client *osdc,
 		erase_request(need_resend, req); /* before link_request() */
 
 		WARN_ON(req->r_osd);
-		calc_target(osdc, &req->r_t, false);
+		calc_target(osdc, &req->r_t, NULL, false);
 		osd = lookup_create_osd(osdc, req->r_t.osd, true);
 		link_request(osd, req);
 		if (!req->r_linger) {
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index a4155620eace..367879afed58 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -1753,9 +1753,8 @@ void ceph_osds_copy(struct ceph_osds *dest, const struct ceph_osds *src)
 	dest->primary = src->primary;
 }
 
-static bool is_split(const struct ceph_pg *pgid,
-		     u32 old_pg_num,
-		     u32 new_pg_num)
+bool ceph_pg_is_split(const struct ceph_pg *pgid, u32 old_pg_num,
+		      u32 new_pg_num)
 {
 	int old_bits = calc_bits_of(old_pg_num);
 	int old_mask = (1 << old_bits) - 1;
@@ -1800,7 +1799,7 @@ bool ceph_is_new_interval(const struct ceph_osds *old_acting,
 	       !osds_equal(old_up, new_up) ||
 	       old_size != new_size ||
 	       old_min_size != new_min_size ||
-	       is_split(pgid, old_pg_num, new_pg_num) ||
+	       ceph_pg_is_split(pgid, old_pg_num, new_pg_num) ||
 	       old_sort_bitwise != new_sort_bitwise;
 }
 
-- 
cgit v1.2.3


From 04c7d789e269c2b82bbd08106049a5a979cdb3fd Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Thu, 15 Jun 2017 16:30:55 +0200
Subject: libceph: make sure need_resend targets reflect latest map

Otherwise we may miss events like PG splits, pool deletions, etc when
we get multiple incremental maps at once.  Because check_pool_dne() can
now be fed an unlinked request, finish_request() needed to be taught to
handle unlinked requests.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/osd_client.h |  1 +
 net/ceph/debugfs.c              |  2 +-
 net/ceph/osd_client.c           | 33 +++++++++++++++++++++++++--------
 3 files changed, 27 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index bca2718ac253..62c672bcbb31 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -149,6 +149,7 @@ struct ceph_osd_request_target {
 	unsigned int flags;                /* CEPH_OSD_FLAG_* */
 	bool paused;
 
+	u32 epoch;
 	u32 last_force_resend;
 
 	int osd;
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index 50ab1bdb16e2..c0089f8ccaeb 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c
@@ -166,7 +166,7 @@ static void dump_target(struct seq_file *s, struct ceph_osd_request_target *t)
 	seq_printf(s, "]/%d\t[", t->up.primary);
 	for (i = 0; i < t->acting.size; i++)
 		seq_printf(s, "%s%d", (!i ? "" : ","), t->acting.osds[i]);
-	seq_printf(s, "]/%d\t", t->acting.primary);
+	seq_printf(s, "]/%d\te%u\t", t->acting.primary, t->epoch);
 	if (t->target_oloc.pool_ns) {
 		seq_printf(s, "%*pE/%*pE\t0x%x",
 			(int)t->target_oloc.pool_ns->len,
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 576101b635ef..173ab9c68eb6 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -386,6 +386,7 @@ static void target_copy(struct ceph_osd_request_target *dest,
 	dest->flags = src->flags;
 	dest->paused = src->paused;
 
+	dest->epoch = src->epoch;
 	dest->last_force_resend = src->last_force_resend;
 
 	dest->osd = src->osd;
@@ -1334,6 +1335,7 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
 	enum calc_target_result ct_res;
 	int ret;
 
+	t->epoch = osdc->osdmap->epoch;
 	pi = ceph_pg_pool_by_id(osdc->osdmap, t->base_oloc.pool);
 	if (!pi) {
 		t->osd = CEPH_HOMELESS_OSD;
@@ -1720,10 +1722,11 @@ static void send_request(struct ceph_osd_request *req)
 
 	encode_request_partial(req, req->r_request);
 
-	dout("%s req %p tid %llu to pgid %llu.%x spgid %llu.%xs%d osd%d flags 0x%x attempt %d\n",
+	dout("%s req %p tid %llu to pgid %llu.%x spgid %llu.%xs%d osd%d e%u flags 0x%x attempt %d\n",
 	     __func__, req, req->r_tid, req->r_t.pgid.pool, req->r_t.pgid.seed,
 	     req->r_t.spgid.pgid.pool, req->r_t.spgid.pgid.seed,
-	     req->r_t.spgid.shard, osd->o_osd, req->r_flags, req->r_attempts);
+	     req->r_t.spgid.shard, osd->o_osd, req->r_t.epoch, req->r_flags,
+	     req->r_attempts);
 
 	req->r_t.paused = false;
 	req->r_stamp = jiffies;
@@ -1863,13 +1866,12 @@ static void submit_request(struct ceph_osd_request *req, bool wrlocked)
 static void finish_request(struct ceph_osd_request *req)
 {
 	struct ceph_osd_client *osdc = req->r_osdc;
-	struct ceph_osd *osd = req->r_osd;
 
-	verify_osd_locked(osd);
+	WARN_ON(lookup_request_mc(&osdc->map_checks, req->r_tid));
 	dout("%s req %p tid %llu\n", __func__, req, req->r_tid);
 
-	WARN_ON(lookup_request_mc(&osdc->map_checks, req->r_tid));
-	unlink_request(osd, req);
+	if (req->r_osd)
+		unlink_request(req->r_osd, req);
 	atomic_dec(&osdc->num_requests);
 
 	/*
@@ -3356,8 +3358,25 @@ static void kick_requests(struct ceph_osd_client *osdc,
 			  struct list_head *need_resend_linger)
 {
 	struct ceph_osd_linger_request *lreq, *nlreq;
+	enum calc_target_result ct_res;
 	struct rb_node *n;
 
+	/* make sure need_resend targets reflect latest map */
+	for (n = rb_first(need_resend); n; ) {
+		struct ceph_osd_request *req =
+		    rb_entry(n, struct ceph_osd_request, r_node);
+
+		n = rb_next(n);
+
+		if (req->r_t.epoch < osdc->osdmap->epoch) {
+			ct_res = calc_target(osdc, &req->r_t, NULL, false);
+			if (ct_res == CALC_TARGET_POOL_DNE) {
+				erase_request(need_resend, req);
+				check_pool_dne(req);
+			}
+		}
+	}
+
 	for (n = rb_first(need_resend); n; ) {
 		struct ceph_osd_request *req =
 		    rb_entry(n, struct ceph_osd_request, r_node);
@@ -3366,8 +3385,6 @@ static void kick_requests(struct ceph_osd_client *osdc,
 		n = rb_next(n);
 		erase_request(need_resend, req); /* before link_request() */
 
-		WARN_ON(req->r_osd);
-		calc_target(osdc, &req->r_t, NULL, false);
 		osd = lookup_create_osd(osdc, req->r_t.osd, true);
 		link_request(osd, req);
 		if (!req->r_linger) {
-- 
cgit v1.2.3


From df28152d53b449a72258000f592472215fc9371e Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Thu, 15 Jun 2017 16:30:56 +0200
Subject: libceph: avoid unnecessary pi lookups in calc_target()

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/osdmap.h | 10 +++++++--
 net/ceph/osd_client.c       |  8 +++----
 net/ceph/osdmap.c           | 54 +++++++++++++++++++++++++--------------------
 3 files changed, 42 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index 63fb073a3355..060d059acbf8 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -273,16 +273,22 @@ extern int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
 					 u64 off, u64 len,
 					 u64 *bno, u64 *oxoff, u64 *oxlen);
 
+int __ceph_object_locator_to_pg(struct ceph_pg_pool_info *pi,
+				const struct ceph_object_id *oid,
+				const struct ceph_object_locator *oloc,
+				struct ceph_pg *raw_pgid);
 int ceph_object_locator_to_pg(struct ceph_osdmap *osdmap,
-			      struct ceph_object_id *oid,
-			      struct ceph_object_locator *oloc,
+			      const struct ceph_object_id *oid,
+			      const struct ceph_object_locator *oloc,
 			      struct ceph_pg *raw_pgid);
 
 void ceph_pg_to_up_acting_osds(struct ceph_osdmap *osdmap,
+			       struct ceph_pg_pool_info *pi,
 			       const struct ceph_pg *raw_pgid,
 			       struct ceph_osds *up,
 			       struct ceph_osds *acting);
 bool ceph_pg_to_primary_shard(struct ceph_osdmap *osdmap,
+			      struct ceph_pg_pool_info *pi,
 			      const struct ceph_pg *raw_pgid,
 			      struct ceph_spg *spgid);
 int ceph_pg_to_acting_primary(struct ceph_osdmap *osdmap,
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index ce0055dc7544..620aa43a6a0a 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1368,8 +1368,8 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
 		}
 	}
 
-	ret = ceph_object_locator_to_pg(osdc->osdmap, &t->target_oid,
-					&t->target_oloc, &pgid);
+	ret = __ceph_object_locator_to_pg(pi, &t->target_oid, &t->target_oloc,
+					  &pgid);
 	if (ret) {
 		WARN_ON(ret != -ENOENT);
 		t->osd = CEPH_HOMELESS_OSD;
@@ -1379,7 +1379,7 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
 	last_pgid.pool = pgid.pool;
 	last_pgid.seed = ceph_stable_mod(pgid.seed, t->pg_num, t->pg_num_mask);
 
-	ceph_pg_to_up_acting_osds(osdc->osdmap, &pgid, &up, &acting);
+	ceph_pg_to_up_acting_osds(osdc->osdmap, pi, &pgid, &up, &acting);
 	if (any_change &&
 	    ceph_is_new_interval(&t->acting,
 				 &acting,
@@ -1407,7 +1407,7 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
 
 	if (legacy_change || force_resend || split) {
 		t->pgid = pgid; /* struct */
-		ceph_pg_to_primary_shard(osdc->osdmap, &pgid, &t->spgid);
+		ceph_pg_to_primary_shard(osdc->osdmap, pi, &pgid, &t->spgid);
 		ceph_osds_copy(&t->acting, &acting);
 		ceph_osds_copy(&t->up, &up);
 		t->size = pi->size;
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 367879afed58..1e2e190a4c2a 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -1923,16 +1923,12 @@ EXPORT_SYMBOL(ceph_calc_file_object_mapping);
  * Should only be called with target_oid and target_oloc (as opposed to
  * base_oid and base_oloc), since tiering isn't taken into account.
  */
-int ceph_object_locator_to_pg(struct ceph_osdmap *osdmap,
-			      struct ceph_object_id *oid,
-			      struct ceph_object_locator *oloc,
-			      struct ceph_pg *raw_pgid)
+int __ceph_object_locator_to_pg(struct ceph_pg_pool_info *pi,
+				const struct ceph_object_id *oid,
+				const struct ceph_object_locator *oloc,
+				struct ceph_pg *raw_pgid)
 {
-	struct ceph_pg_pool_info *pi;
-
-	pi = ceph_pg_pool_by_id(osdmap, oloc->pool);
-	if (!pi)
-		return -ENOENT;
+	WARN_ON(pi->id != oloc->pool);
 
 	if (!oloc->pool_ns) {
 		raw_pgid->pool = oloc->pool;
@@ -1964,6 +1960,20 @@ int ceph_object_locator_to_pg(struct ceph_osdmap *osdmap,
 	}
 	return 0;
 }
+
+int ceph_object_locator_to_pg(struct ceph_osdmap *osdmap,
+			      const struct ceph_object_id *oid,
+			      const struct ceph_object_locator *oloc,
+			      struct ceph_pg *raw_pgid)
+{
+	struct ceph_pg_pool_info *pi;
+
+	pi = ceph_pg_pool_by_id(osdmap, oloc->pool);
+	if (!pi)
+		return -ENOENT;
+
+	return __ceph_object_locator_to_pg(pi, oid, oloc, raw_pgid);
+}
 EXPORT_SYMBOL(ceph_object_locator_to_pg);
 
 /*
@@ -2236,19 +2246,14 @@ static void get_temp_osds(struct ceph_osdmap *osdmap,
  * resend a request.
  */
 void ceph_pg_to_up_acting_osds(struct ceph_osdmap *osdmap,
+			       struct ceph_pg_pool_info *pi,
 			       const struct ceph_pg *raw_pgid,
 			       struct ceph_osds *up,
 			       struct ceph_osds *acting)
 {
-	struct ceph_pg_pool_info *pi;
 	u32 pps;
 
-	pi = ceph_pg_pool_by_id(osdmap, raw_pgid->pool);
-	if (!pi) {
-		ceph_osds_init(up);
-		ceph_osds_init(acting);
-		goto out;
-	}
+	WARN_ON(pi->id != raw_pgid->pool);
 
 	pg_to_raw_osds(osdmap, pi, raw_pgid, up, &pps);
 	raw_to_up_osds(osdmap, pi, up);
@@ -2260,23 +2265,19 @@ void ceph_pg_to_up_acting_osds(struct ceph_osdmap *osdmap,
 		if (acting->primary == -1)
 			acting->primary = up->primary;
 	}
-out:
 	WARN_ON(!osds_valid(up) || !osds_valid(acting));
 }
 
 bool ceph_pg_to_primary_shard(struct ceph_osdmap *osdmap,
+			      struct ceph_pg_pool_info *pi,
 			      const struct ceph_pg *raw_pgid,
 			      struct ceph_spg *spgid)
 {
-	struct ceph_pg_pool_info *pi;
 	struct ceph_pg pgid;
 	struct ceph_osds up, acting;
 	int i;
 
-	pi = ceph_pg_pool_by_id(osdmap, raw_pgid->pool);
-	if (!pi)
-		return false;
-
+	WARN_ON(pi->id != raw_pgid->pool);
 	raw_pg_to_pg(pi, raw_pgid, &pgid);
 
 	if (ceph_can_shift_osds(pi)) {
@@ -2285,7 +2286,7 @@ bool ceph_pg_to_primary_shard(struct ceph_osdmap *osdmap,
 		return true;
 	}
 
-	ceph_pg_to_up_acting_osds(osdmap, &pgid, &up, &acting);
+	ceph_pg_to_up_acting_osds(osdmap, pi, &pgid, &up, &acting);
 	for (i = 0; i < acting.size; i++) {
 		if (acting.osds[i] == acting.primary) {
 			spgid->pgid = pgid; /* struct */
@@ -2303,9 +2304,14 @@ bool ceph_pg_to_primary_shard(struct ceph_osdmap *osdmap,
 int ceph_pg_to_acting_primary(struct ceph_osdmap *osdmap,
 			      const struct ceph_pg *raw_pgid)
 {
+	struct ceph_pg_pool_info *pi;
 	struct ceph_osds up, acting;
 
-	ceph_pg_to_up_acting_osds(osdmap, raw_pgid, &up, &acting);
+	pi = ceph_pg_pool_by_id(osdmap, raw_pgid->pool);
+	if (!pi)
+		return -1;
+
+	ceph_pg_to_up_acting_osds(osdmap, pi, raw_pgid, &up, &acting);
 	return acting.primary;
 }
 EXPORT_SYMBOL(ceph_pg_to_acting_primary);
-- 
cgit v1.2.3


From 76f827a7b1faaaebc53f89d184e95ea3a0b8dd71 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Mon, 19 Jun 2017 12:18:05 +0200
Subject: libceph: make DEFINE_RB_* helpers more general

Initially for ceph_pg_mapping, ceph_spg_mapping and ceph_hobject_id,
compared with ceph_pg_compare(), ceph_spg_compare() and hoid_compare()
respectively.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/libceph.h | 49 +++++++++++++++++++++++++++++++++-----------
 1 file changed, 37 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 3229ae6c7846..8a79587e1317 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -184,10 +184,11 @@ static inline int calc_pages_for(u64 off, u64 len)
 		(off >> PAGE_SHIFT);
 }
 
-/*
- * These are not meant to be generic - an integer key is assumed.
- */
-#define DEFINE_RB_INSDEL_FUNCS(name, type, keyfld, nodefld)		\
+#define RB_BYVAL(a)      (a)
+#define RB_BYPTR(a)      (&(a))
+#define RB_CMP3WAY(a, b) ((a) < (b) ? -1 : (a) > (b))
+
+#define DEFINE_RB_INSDEL_FUNCS2(name, type, keyfld, cmpexp, keyexp, nodefld) \
 static void insert_##name(struct rb_root *root, type *t)		\
 {									\
 	struct rb_node **n = &root->rb_node;				\
@@ -197,11 +198,13 @@ static void insert_##name(struct rb_root *root, type *t)		\
 									\
 	while (*n) {							\
 		type *cur = rb_entry(*n, type, nodefld);		\
+		int cmp;						\
 									\
 		parent = *n;						\
-		if (t->keyfld < cur->keyfld)				\
+		cmp = cmpexp(keyexp(t->keyfld), keyexp(cur->keyfld));	\
+		if (cmp < 0)						\
 			n = &(*n)->rb_left;				\
-		else if (t->keyfld > cur->keyfld)			\
+		else if (cmp > 0)					\
 			n = &(*n)->rb_right;				\
 		else							\
 			BUG();						\
@@ -217,19 +220,24 @@ static void erase_##name(struct rb_root *root, type *t)			\
 	RB_CLEAR_NODE(&t->nodefld);					\
 }
 
-#define DEFINE_RB_LOOKUP_FUNC(name, type, keyfld, nodefld)		\
-extern type __lookup_##name##_key;					\
-static type *lookup_##name(struct rb_root *root,			\
-			   typeof(__lookup_##name##_key.keyfld) key)	\
+/*
+ * @lookup_param_type is a parameter and not constructed from (@type,
+ * @keyfld) with typeof() because adding const is too unwieldy.
+ */
+#define DEFINE_RB_LOOKUP_FUNC2(name, type, keyfld, cmpexp, keyexp,	\
+			       lookup_param_type, nodefld)		\
+static type *lookup_##name(struct rb_root *root, lookup_param_type key)	\
 {									\
 	struct rb_node *n = root->rb_node;				\
 									\
 	while (n) {							\
 		type *cur = rb_entry(n, type, nodefld);			\
+		int cmp;						\
 									\
-		if (key < cur->keyfld)					\
+		cmp = cmpexp(key, keyexp(cur->keyfld));			\
+		if (cmp < 0)						\
 			n = n->rb_left;					\
-		else if (key > cur->keyfld)				\
+		else if (cmp > 0)					\
 			n = n->rb_right;				\
 		else							\
 			return cur;					\
@@ -238,6 +246,23 @@ static type *lookup_##name(struct rb_root *root,			\
 	return NULL;							\
 }
 
+#define DEFINE_RB_FUNCS2(name, type, keyfld, cmpexp, keyexp,		\
+			 lookup_param_type, nodefld)			\
+DEFINE_RB_INSDEL_FUNCS2(name, type, keyfld, cmpexp, keyexp, nodefld)	\
+DEFINE_RB_LOOKUP_FUNC2(name, type, keyfld, cmpexp, keyexp,		\
+		       lookup_param_type, nodefld)
+
+/*
+ * Shorthands for integer keys.
+ */
+#define DEFINE_RB_INSDEL_FUNCS(name, type, keyfld, nodefld)		\
+DEFINE_RB_INSDEL_FUNCS2(name, type, keyfld, RB_CMP3WAY, RB_BYVAL, nodefld)
+
+#define DEFINE_RB_LOOKUP_FUNC(name, type, keyfld, nodefld)		\
+extern type __lookup_##name##_key;					\
+DEFINE_RB_LOOKUP_FUNC2(name, type, keyfld, RB_CMP3WAY, RB_BYVAL,	\
+		       typeof(__lookup_##name##_key.keyfld), nodefld)
+
 #define DEFINE_RB_FUNCS(name, type, keyfld, nodefld)			\
 DEFINE_RB_INSDEL_FUNCS(name, type, keyfld, nodefld)			\
 DEFINE_RB_LOOKUP_FUNC(name, type, keyfld, nodefld)
-- 
cgit v1.2.3


From a02a946dfe9633d7e0202359836f6b5217a62824 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Mon, 19 Jun 2017 12:18:05 +0200
Subject: libceph: respect RADOS_BACKOFF backoffs

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/ceph_fs.h    |   1 +
 include/linux/ceph/osd_client.h |  45 +++
 include/linux/ceph/osdmap.h     |   1 +
 include/linux/ceph/rados.h      |   6 +
 net/ceph/ceph_common.c          |   1 +
 net/ceph/debugfs.c              |  74 +++++
 net/ceph/osd_client.c           | 593 ++++++++++++++++++++++++++++++++++++++++
 net/ceph/osdmap.c               |  16 ++
 8 files changed, 737 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index ad078ebe25d6..edf5b04b918a 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -147,6 +147,7 @@ struct ceph_dir_layout {
 #define CEPH_MSG_OSD_OP                 42
 #define CEPH_MSG_OSD_OPREPLY            43
 #define CEPH_MSG_WATCH_NOTIFY           44
+#define CEPH_MSG_OSD_BACKOFF            61
 
 
 /* watch-notify operations */
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 62c672bcbb31..c6d96a5f46fd 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -1,6 +1,7 @@
 #ifndef _FS_CEPH_OSD_CLIENT_H
 #define _FS_CEPH_OSD_CLIENT_H
 
+#include <linux/bitrev.h>
 #include <linux/completion.h>
 #include <linux/kref.h>
 #include <linux/mempool.h>
@@ -36,6 +37,8 @@ struct ceph_osd {
 	struct ceph_connection o_con;
 	struct rb_root o_requests;
 	struct rb_root o_linger_requests;
+	struct rb_root o_backoff_mappings;
+	struct rb_root o_backoffs_by_id;
 	struct list_head o_osd_lru;
 	struct ceph_auth_handshake o_auth;
 	unsigned long lru_ttl;
@@ -275,6 +278,48 @@ struct ceph_watch_item {
 	struct ceph_entity_addr addr;
 };
 
+struct ceph_spg_mapping {
+	struct rb_node node;
+	struct ceph_spg spgid;
+
+	struct rb_root backoffs;
+};
+
+struct ceph_hobject_id {
+	void *key;
+	size_t key_len;
+	void *oid;
+	size_t oid_len;
+	u64 snapid;
+	u32 hash;
+	u8 is_max;
+	void *nspace;
+	size_t nspace_len;
+	s64 pool;
+
+	/* cache */
+	u32 hash_reverse_bits;
+};
+
+static inline void ceph_hoid_build_hash_cache(struct ceph_hobject_id *hoid)
+{
+	hoid->hash_reverse_bits = bitrev32(hoid->hash);
+}
+
+/*
+ * PG-wide backoff: [begin, end)
+ * per-object backoff: begin == end
+ */
+struct ceph_osd_backoff {
+	struct rb_node spg_node;
+	struct rb_node id_node;
+
+	struct ceph_spg spgid;
+	u64 id;
+	struct ceph_hobject_id *begin;
+	struct ceph_hobject_id *end;
+};
+
 #define CEPH_LINGER_ID_START	0xffff000000000000ULL
 
 struct ceph_osd_client {
diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index 060d059acbf8..fe6d189bdd30 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -32,6 +32,7 @@ struct ceph_spg {
 };
 
 int ceph_pg_compare(const struct ceph_pg *lhs, const struct ceph_pg *rhs);
+int ceph_spg_compare(const struct ceph_spg *lhs, const struct ceph_spg *rhs);
 
 #define CEPH_POOL_FLAG_HASHPSPOOL	(1ULL << 0) /* hash pg seed and pool id
 						       together */
diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h
index 5d0018782d50..385db08bb8b2 100644
--- a/include/linux/ceph/rados.h
+++ b/include/linux/ceph/rados.h
@@ -439,6 +439,12 @@ enum {
 
 const char *ceph_osd_watch_op_name(int o);
 
+enum {
+	CEPH_OSD_BACKOFF_OP_BLOCK = 1,
+	CEPH_OSD_BACKOFF_OP_ACK_BLOCK = 2,
+	CEPH_OSD_BACKOFF_OP_UNBLOCK = 3,
+};
+
 /*
  * an individual object operation.  each may be accompanied by some data
  * payload
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 47e94b560ba0..3d265c5cb6d0 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -85,6 +85,7 @@ const char *ceph_msg_type_name(int type)
 	case CEPH_MSG_OSD_OP: return "osd_op";
 	case CEPH_MSG_OSD_OPREPLY: return "osd_opreply";
 	case CEPH_MSG_WATCH_NOTIFY: return "watch_notify";
+	case CEPH_MSG_OSD_BACKOFF: return "osd_backoff";
 	default: return "unknown";
 	}
 }
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index c0089f8ccaeb..017f15c575f8 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c
@@ -243,6 +243,73 @@ static void dump_linger_requests(struct seq_file *s, struct ceph_osd *osd)
 	mutex_unlock(&osd->lock);
 }
 
+static void dump_snapid(struct seq_file *s, u64 snapid)
+{
+	if (snapid == CEPH_NOSNAP)
+		seq_puts(s, "head");
+	else if (snapid == CEPH_SNAPDIR)
+		seq_puts(s, "snapdir");
+	else
+		seq_printf(s, "%llx", snapid);
+}
+
+static void dump_name_escaped(struct seq_file *s, unsigned char *name,
+			      size_t len)
+{
+	size_t i;
+
+	for (i = 0; i < len; i++) {
+		if (name[i] == '%' || name[i] == ':' || name[i] == '/' ||
+		    name[i] < 32 || name[i] >= 127) {
+			seq_printf(s, "%%%02x", name[i]);
+		} else {
+			seq_putc(s, name[i]);
+		}
+	}
+}
+
+static void dump_hoid(struct seq_file *s, const struct ceph_hobject_id *hoid)
+{
+	if (hoid->snapid == 0 && hoid->hash == 0 && !hoid->is_max &&
+	    hoid->pool == S64_MIN) {
+		seq_puts(s, "MIN");
+		return;
+	}
+	if (hoid->is_max) {
+		seq_puts(s, "MAX");
+		return;
+	}
+	seq_printf(s, "%lld:%08x:", hoid->pool, hoid->hash_reverse_bits);
+	dump_name_escaped(s, hoid->nspace, hoid->nspace_len);
+	seq_putc(s, ':');
+	dump_name_escaped(s, hoid->key, hoid->key_len);
+	seq_putc(s, ':');
+	dump_name_escaped(s, hoid->oid, hoid->oid_len);
+	seq_putc(s, ':');
+	dump_snapid(s, hoid->snapid);
+}
+
+static void dump_backoffs(struct seq_file *s, struct ceph_osd *osd)
+{
+	struct rb_node *n;
+
+	mutex_lock(&osd->lock);
+	for (n = rb_first(&osd->o_backoffs_by_id); n; n = rb_next(n)) {
+		struct ceph_osd_backoff *backoff =
+		    rb_entry(n, struct ceph_osd_backoff, id_node);
+
+		seq_printf(s, "osd%d\t", osd->o_osd);
+		dump_spgid(s, &backoff->spgid);
+		seq_printf(s, "\t%llu\t", backoff->id);
+		dump_hoid(s, backoff->begin);
+		seq_putc(s, '\t');
+		dump_hoid(s, backoff->end);
+		seq_putc(s, '\n');
+	}
+
+	mutex_unlock(&osd->lock);
+}
+
 static int osdc_show(struct seq_file *s, void *pp)
 {
 	struct ceph_client *client = s->private;
@@ -268,6 +335,13 @@ static int osdc_show(struct seq_file *s, void *pp)
 	}
 	dump_linger_requests(s, &osdc->homeless_osd);
 
+	seq_puts(s, "BACKOFFS\n");
+	for (n = rb_first(&osdc->osds); n; n = rb_next(n)) {
+		struct ceph_osd *osd = rb_entry(n, struct ceph_osd, o_node);
+
+		dump_backoffs(s, osd);
+	}
+
 	up_read(&osdc->lock);
 	return 0;
 }
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 620aa43a6a0a..86a9737d8e3f 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -50,6 +50,7 @@ static void link_linger(struct ceph_osd *osd,
 			struct ceph_osd_linger_request *lreq);
 static void unlink_linger(struct ceph_osd *osd,
 			  struct ceph_osd_linger_request *lreq);
+static void clear_backoffs(struct ceph_osd *osd);
 
 #if 1
 static inline bool rwsem_is_wrlocked(struct rw_semaphore *sem)
@@ -1019,6 +1020,8 @@ static void osd_init(struct ceph_osd *osd)
 	RB_CLEAR_NODE(&osd->o_node);
 	osd->o_requests = RB_ROOT;
 	osd->o_linger_requests = RB_ROOT;
+	osd->o_backoff_mappings = RB_ROOT;
+	osd->o_backoffs_by_id = RB_ROOT;
 	INIT_LIST_HEAD(&osd->o_osd_lru);
 	INIT_LIST_HEAD(&osd->o_keepalive_item);
 	osd->o_incarnation = 1;
@@ -1030,6 +1033,8 @@ static void osd_cleanup(struct ceph_osd *osd)
 	WARN_ON(!RB_EMPTY_NODE(&osd->o_node));
 	WARN_ON(!RB_EMPTY_ROOT(&osd->o_requests));
 	WARN_ON(!RB_EMPTY_ROOT(&osd->o_linger_requests));
+	WARN_ON(!RB_EMPTY_ROOT(&osd->o_backoff_mappings));
+	WARN_ON(!RB_EMPTY_ROOT(&osd->o_backoffs_by_id));
 	WARN_ON(!list_empty(&osd->o_osd_lru));
 	WARN_ON(!list_empty(&osd->o_keepalive_item));
 
@@ -1150,6 +1155,7 @@ static void close_osd(struct ceph_osd *osd)
 		unlink_linger(osd, lreq);
 		link_linger(&osdc->homeless_osd, lreq);
 	}
+	clear_backoffs(osd);
 
 	__remove_osd_from_lru(osd);
 	erase_osd(&osdc->osds, osd);
@@ -1431,6 +1437,328 @@ out:
 	return ct_res;
 }
 
+static struct ceph_spg_mapping *alloc_spg_mapping(void)
+{
+	struct ceph_spg_mapping *spg;
+
+	spg = kmalloc(sizeof(*spg), GFP_NOIO);
+	if (!spg)
+		return NULL;
+
+	RB_CLEAR_NODE(&spg->node);
+	spg->backoffs = RB_ROOT;
+	return spg;
+}
+
+static void free_spg_mapping(struct ceph_spg_mapping *spg)
+{
+	WARN_ON(!RB_EMPTY_NODE(&spg->node));
+	WARN_ON(!RB_EMPTY_ROOT(&spg->backoffs));
+
+	kfree(spg);
+}
+
+/*
+ * rbtree of ceph_spg_mapping for handling map<spg_t, ...>, similar to
+ * ceph_pg_mapping.  Used to track OSD backoffs -- a backoff [range] is
+ * defined only within a specific spgid; it does not pass anything to
+ * children on split, or to another primary.
+ */
+DEFINE_RB_FUNCS2(spg_mapping, struct ceph_spg_mapping, spgid, ceph_spg_compare,
+		 RB_BYPTR, const struct ceph_spg *, node)
+
+static u64 hoid_get_bitwise_key(const struct ceph_hobject_id *hoid)
+{
+	return hoid->is_max ? 0x100000000ull : hoid->hash_reverse_bits;
+}
+
+static void hoid_get_effective_key(const struct ceph_hobject_id *hoid,
+				   void **pkey, size_t *pkey_len)
+{
+	if (hoid->key_len) {
+		*pkey = hoid->key;
+		*pkey_len = hoid->key_len;
+	} else {
+		*pkey = hoid->oid;
+		*pkey_len = hoid->oid_len;
+	}
+}
+
+static int compare_names(const void *name1, size_t name1_len,
+			 const void *name2, size_t name2_len)
+{
+	int ret;
+
+	ret = memcmp(name1, name2, min(name1_len, name2_len));
+	if (!ret) {
+		if (name1_len < name2_len)
+			ret = -1;
+		else if (name1_len > name2_len)
+			ret = 1;
+	}
+	return ret;
+}
+
+static int hoid_compare(const struct ceph_hobject_id *lhs,
+			const struct ceph_hobject_id *rhs)
+{
+	void *effective_key1, *effective_key2;
+	size_t effective_key1_len, effective_key2_len;
+	int ret;
+
+	if (lhs->is_max < rhs->is_max)
+		return -1;
+	if (lhs->is_max > rhs->is_max)
+		return 1;
+
+	if (lhs->pool < rhs->pool)
+		return -1;
+	if (lhs->pool > rhs->pool)
+		return 1;
+
+	if (hoid_get_bitwise_key(lhs) < hoid_get_bitwise_key(rhs))
+		return -1;
+	if (hoid_get_bitwise_key(lhs) > hoid_get_bitwise_key(rhs))
+		return 1;
+
+	ret = compare_names(lhs->nspace, lhs->nspace_len,
+			    rhs->nspace, rhs->nspace_len);
+	if (ret)
+		return ret;
+
+	hoid_get_effective_key(lhs, &effective_key1, &effective_key1_len);
+	hoid_get_effective_key(rhs, &effective_key2, &effective_key2_len);
+	ret = compare_names(effective_key1, effective_key1_len,
+			    effective_key2, effective_key2_len);
+	if (ret)
+		return ret;
+
+	ret = compare_names(lhs->oid, lhs->oid_len, rhs->oid, rhs->oid_len);
+	if (ret)
+		return ret;
+
+	if (lhs->snapid < rhs->snapid)
+		return -1;
+	if (lhs->snapid > rhs->snapid)
+		return 1;
+
+	return 0;
+}
+
+/*
+ * For decoding ->begin and ->end of MOSDBackoff only -- no MIN/MAX
+ * compat stuff here.
+ *
+ * Assumes @hoid is zero-initialized.
+ */
+static int decode_hoid(void **p, void *end, struct ceph_hobject_id *hoid)
+{
+	u8 struct_v;
+	u32 struct_len;
+	int ret;
+
+	ret = ceph_start_decoding(p, end, 4, "hobject_t", &struct_v,
+				  &struct_len);
+	if (ret)
+		return ret;
+
+	if (struct_v < 4) {
+		pr_err("got struct_v %d < 4 of hobject_t\n", struct_v);
+		goto e_inval;
+	}
+
+	hoid->key = ceph_extract_encoded_string(p, end, &hoid->key_len,
+						GFP_NOIO);
+	if (IS_ERR(hoid->key)) {
+		ret = PTR_ERR(hoid->key);
+		hoid->key = NULL;
+		return ret;
+	}
+
+	hoid->oid = ceph_extract_encoded_string(p, end, &hoid->oid_len,
+						GFP_NOIO);
+	if (IS_ERR(hoid->oid)) {
+		ret = PTR_ERR(hoid->oid);
+		hoid->oid = NULL;
+		return ret;
+	}
+
+	ceph_decode_64_safe(p, end, hoid->snapid, e_inval);
+	ceph_decode_32_safe(p, end, hoid->hash, e_inval);
+	ceph_decode_8_safe(p, end, hoid->is_max, e_inval);
+
+	hoid->nspace = ceph_extract_encoded_string(p, end, &hoid->nspace_len,
+						   GFP_NOIO);
+	if (IS_ERR(hoid->nspace)) {
+		ret = PTR_ERR(hoid->nspace);
+		hoid->nspace = NULL;
+		return ret;
+	}
+
+	ceph_decode_64_safe(p, end, hoid->pool, e_inval);
+
+	ceph_hoid_build_hash_cache(hoid);
+	return 0;
+
+e_inval:
+	return -EINVAL;
+}
+
+static int hoid_encoding_size(const struct ceph_hobject_id *hoid)
+{
+	return 8 + 4 + 1 + 8 + /* snapid, hash, is_max, pool */
+	       4 + hoid->key_len + 4 + hoid->oid_len + 4 + hoid->nspace_len;
+}
+
+static void encode_hoid(void **p, void *end, const struct ceph_hobject_id *hoid)
+{
+	ceph_start_encoding(p, 4, 3, hoid_encoding_size(hoid));
+	ceph_encode_string(p, end, hoid->key, hoid->key_len);
+	ceph_encode_string(p, end, hoid->oid, hoid->oid_len);
+	ceph_encode_64(p, hoid->snapid);
+	ceph_encode_32(p, hoid->hash);
+	ceph_encode_8(p, hoid->is_max);
+	ceph_encode_string(p, end, hoid->nspace, hoid->nspace_len);
+	ceph_encode_64(p, hoid->pool);
+}
+
+static void free_hoid(struct ceph_hobject_id *hoid)
+{
+	if (hoid) {
+		kfree(hoid->key);
+		kfree(hoid->oid);
+		kfree(hoid->nspace);
+		kfree(hoid);
+	}
+}
+
+static struct ceph_osd_backoff *alloc_backoff(void)
+{
+	struct ceph_osd_backoff *backoff;
+
+	backoff = kzalloc(sizeof(*backoff), GFP_NOIO);
+	if (!backoff)
+		return NULL;
+
+	RB_CLEAR_NODE(&backoff->spg_node);
+	RB_CLEAR_NODE(&backoff->id_node);
+	return backoff;
+}
+
+static void free_backoff(struct ceph_osd_backoff *backoff)
+{
+	WARN_ON(!RB_EMPTY_NODE(&backoff->spg_node));
+	WARN_ON(!RB_EMPTY_NODE(&backoff->id_node));
+
+	free_hoid(backoff->begin);
+	free_hoid(backoff->end);
+	kfree(backoff);
+}
+
+/*
+ * Within a specific spgid, backoffs are managed by ->begin hoid.
+ */
+DEFINE_RB_INSDEL_FUNCS2(backoff, struct ceph_osd_backoff, begin, hoid_compare,
+			RB_BYVAL, spg_node);
+
+static struct ceph_osd_backoff *lookup_containing_backoff(struct rb_root *root,
+					    const struct ceph_hobject_id *hoid)
+{
+	struct rb_node *n = root->rb_node;
+
+	while (n) {
+		struct ceph_osd_backoff *cur =
+		    rb_entry(n, struct ceph_osd_backoff, spg_node);
+		int cmp;
+
+		cmp = hoid_compare(hoid, cur->begin);
+		if (cmp < 0) {
+			n = n->rb_left;
+		} else if (cmp > 0) {
+			if (hoid_compare(hoid, cur->end) < 0)
+				return cur;
+
+			n = n->rb_right;
+		} else {
+			return cur;
+		}
+	}
+
+	return NULL;
+}
+
+/*
+ * Each backoff has a unique id within its OSD session.
+ */
+DEFINE_RB_FUNCS(backoff_by_id, struct ceph_osd_backoff, id, id_node)
+
+static void clear_backoffs(struct ceph_osd *osd)
+{
+	while (!RB_EMPTY_ROOT(&osd->o_backoff_mappings)) {
+		struct ceph_spg_mapping *spg =
+		    rb_entry(rb_first(&osd->o_backoff_mappings),
+			     struct ceph_spg_mapping, node);
+
+		while (!RB_EMPTY_ROOT(&spg->backoffs)) {
+			struct ceph_osd_backoff *backoff =
+			    rb_entry(rb_first(&spg->backoffs),
+				     struct ceph_osd_backoff, spg_node);
+
+			erase_backoff(&spg->backoffs, backoff);
+			erase_backoff_by_id(&osd->o_backoffs_by_id, backoff);
+			free_backoff(backoff);
+		}
+		erase_spg_mapping(&osd->o_backoff_mappings, spg);
+		free_spg_mapping(spg);
+	}
+}
+
+/*
+ * Set up a temporary, non-owning view into @t.
+ */
+static void hoid_fill_from_target(struct ceph_hobject_id *hoid,
+				  const struct ceph_osd_request_target *t)
+{
+	hoid->key = NULL;
+	hoid->key_len = 0;
+	hoid->oid = t->target_oid.name;
+	hoid->oid_len = t->target_oid.name_len;
+	hoid->snapid = CEPH_NOSNAP;
+	hoid->hash = t->pgid.seed;
+	hoid->is_max = false;
+	if (t->target_oloc.pool_ns) {
+		hoid->nspace = t->target_oloc.pool_ns->str;
+		hoid->nspace_len = t->target_oloc.pool_ns->len;
+	} else {
+		hoid->nspace = NULL;
+		hoid->nspace_len = 0;
+	}
+	hoid->pool = t->target_oloc.pool;
+	ceph_hoid_build_hash_cache(hoid);
+}
+
+static bool should_plug_request(struct ceph_osd_request *req)
+{
+	struct ceph_osd *osd = req->r_osd;
+	struct ceph_spg_mapping *spg;
+	struct ceph_osd_backoff *backoff;
+	struct ceph_hobject_id hoid;
+
+	spg = lookup_spg_mapping(&osd->o_backoff_mappings, &req->r_t.spgid);
+	if (!spg)
+		return false;
+
+	hoid_fill_from_target(&hoid, &req->r_t);
+	backoff = lookup_containing_backoff(&spg->backoffs, &hoid);
+	if (!backoff)
+		return false;
+
+	dout("%s req %p tid %llu backoff osd%d spgid %llu.%xs%d id %llu\n",
+	     __func__, req, req->r_tid, osd->o_osd, backoff->spgid.pgid.pool,
+	     backoff->spgid.pgid.seed, backoff->spgid.shard, backoff->id);
+	return true;
+}
+
 static void setup_request_data(struct ceph_osd_request *req,
 			       struct ceph_msg *msg)
 {
@@ -1707,6 +2035,10 @@ static void send_request(struct ceph_osd_request *req)
 	verify_osd_locked(osd);
 	WARN_ON(osd->o_osd != req->r_t.osd);
 
+	/* backoff? */
+	if (should_plug_request(req))
+		return;
+
 	/*
 	 * We may have a previously queued request message hanging
 	 * around.  Cancel it to avoid corrupting the msgr.
@@ -3527,6 +3859,8 @@ static void kick_osd_requests(struct ceph_osd *osd)
 {
 	struct rb_node *n;
 
+	clear_backoffs(osd);
+
 	for (n = rb_first(&osd->o_requests); n; ) {
 		struct ceph_osd_request *req =
 		    rb_entry(n, struct ceph_osd_request, r_node);
@@ -3572,6 +3906,261 @@ out_unlock:
 	up_write(&osdc->lock);
 }
 
+struct MOSDBackoff {
+	struct ceph_spg spgid;
+	u32 map_epoch;
+	u8 op;
+	u64 id;
+	struct ceph_hobject_id *begin;
+	struct ceph_hobject_id *end;
+};
+
+static int decode_MOSDBackoff(const struct ceph_msg *msg, struct MOSDBackoff *m)
+{
+	void *p = msg->front.iov_base;
+	void *const end = p + msg->front.iov_len;
+	u8 struct_v;
+	u32 struct_len;
+	int ret;
+
+	ret = ceph_start_decoding(&p, end, 1, "spg_t", &struct_v, &struct_len);
+	if (ret)
+		return ret;
+
+	ret = ceph_decode_pgid(&p, end, &m->spgid.pgid);
+	if (ret)
+		return ret;
+
+	ceph_decode_8_safe(&p, end, m->spgid.shard, e_inval);
+	ceph_decode_32_safe(&p, end, m->map_epoch, e_inval);
+	ceph_decode_8_safe(&p, end, m->op, e_inval);
+	ceph_decode_64_safe(&p, end, m->id, e_inval);
+
+	m->begin = kzalloc(sizeof(*m->begin), GFP_NOIO);
+	if (!m->begin)
+		return -ENOMEM;
+
+	ret = decode_hoid(&p, end, m->begin);
+	if (ret) {
+		free_hoid(m->begin);
+		return ret;
+	}
+
+	m->end = kzalloc(sizeof(*m->end), GFP_NOIO);
+	if (!m->end) {
+		free_hoid(m->begin);
+		return -ENOMEM;
+	}
+
+	ret = decode_hoid(&p, end, m->end);
+	if (ret) {
+		free_hoid(m->begin);
+		free_hoid(m->end);
+		return ret;
+	}
+
+	return 0;
+
+e_inval:
+	return -EINVAL;
+}
+
+static struct ceph_msg *create_backoff_message(
+				const struct ceph_osd_backoff *backoff,
+				u32 map_epoch)
+{
+	struct ceph_msg *msg;
+	void *p, *end;
+	int msg_size;
+
+	msg_size = CEPH_ENCODING_START_BLK_LEN +
+			CEPH_PGID_ENCODING_LEN + 1; /* spgid */
+	msg_size += 4 + 1 + 8; /* map_epoch, op, id */
+	msg_size += CEPH_ENCODING_START_BLK_LEN +
+			hoid_encoding_size(backoff->begin);
+	msg_size += CEPH_ENCODING_START_BLK_LEN +
+			hoid_encoding_size(backoff->end);
+
+	msg = ceph_msg_new(CEPH_MSG_OSD_BACKOFF, msg_size, GFP_NOIO, true);
+	if (!msg)
+		return NULL;
+
+	p = msg->front.iov_base;
+	end = p + msg->front_alloc_len;
+
+	encode_spgid(&p, &backoff->spgid);
+	ceph_encode_32(&p, map_epoch);
+	ceph_encode_8(&p, CEPH_OSD_BACKOFF_OP_ACK_BLOCK);
+	ceph_encode_64(&p, backoff->id);
+	encode_hoid(&p, end, backoff->begin);
+	encode_hoid(&p, end, backoff->end);
+	BUG_ON(p != end);
+
+	msg->front.iov_len = p - msg->front.iov_base;
+	msg->hdr.version = cpu_to_le16(1); /* MOSDBackoff v1 */
+	msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
+
+	return msg;
+}
+
+static void handle_backoff_block(struct ceph_osd *osd, struct MOSDBackoff *m)
+{
+	struct ceph_spg_mapping *spg;
+	struct ceph_osd_backoff *backoff;
+	struct ceph_msg *msg;
+
+	dout("%s osd%d spgid %llu.%xs%d id %llu\n", __func__, osd->o_osd,
+	     m->spgid.pgid.pool, m->spgid.pgid.seed, m->spgid.shard, m->id);
+
+	spg = lookup_spg_mapping(&osd->o_backoff_mappings, &m->spgid);
+	if (!spg) {
+		spg = alloc_spg_mapping();
+		if (!spg) {
+			pr_err("%s failed to allocate spg\n", __func__);
+			return;
+		}
+		spg->spgid = m->spgid; /* struct */
+		insert_spg_mapping(&osd->o_backoff_mappings, spg);
+	}
+
+	backoff = alloc_backoff();
+	if (!backoff) {
+		pr_err("%s failed to allocate backoff\n", __func__);
+		return;
+	}
+	backoff->spgid = m->spgid; /* struct */
+	backoff->id = m->id;
+	backoff->begin = m->begin;
+	m->begin = NULL; /* backoff now owns this */
+	backoff->end = m->end;
+	m->end = NULL;   /* ditto */
+
+	insert_backoff(&spg->backoffs, backoff);
+	insert_backoff_by_id(&osd->o_backoffs_by_id, backoff);
+
+	/*
+	 * Ack with original backoff's epoch so that the OSD can
+	 * discard this if there was a PG split.
+	 */
+	msg = create_backoff_message(backoff, m->map_epoch);
+	if (!msg) {
+		pr_err("%s failed to allocate msg\n", __func__);
+		return;
+	}
+	ceph_con_send(&osd->o_con, msg);
+}
+
+static bool target_contained_by(const struct ceph_osd_request_target *t,
+				const struct ceph_hobject_id *begin,
+				const struct ceph_hobject_id *end)
+{
+	struct ceph_hobject_id hoid;
+	int cmp;
+
+	hoid_fill_from_target(&hoid, t);
+	cmp = hoid_compare(&hoid, begin);
+	return !cmp || (cmp > 0 && hoid_compare(&hoid, end) < 0);
+}
+
+static void handle_backoff_unblock(struct ceph_osd *osd,
+				   const struct MOSDBackoff *m)
+{
+	struct ceph_spg_mapping *spg;
+	struct ceph_osd_backoff *backoff;
+	struct rb_node *n;
+
+	dout("%s osd%d spgid %llu.%xs%d id %llu\n", __func__, osd->o_osd,
+	     m->spgid.pgid.pool, m->spgid.pgid.seed, m->spgid.shard, m->id);
+
+	backoff = lookup_backoff_by_id(&osd->o_backoffs_by_id, m->id);
+	if (!backoff) {
+		pr_err("%s osd%d spgid %llu.%xs%d id %llu backoff dne\n",
+		       __func__, osd->o_osd, m->spgid.pgid.pool,
+		       m->spgid.pgid.seed, m->spgid.shard, m->id);
+		return;
+	}
+
+	if (hoid_compare(backoff->begin, m->begin) &&
+	    hoid_compare(backoff->end, m->end)) {
+		pr_err("%s osd%d spgid %llu.%xs%d id %llu bad range?\n",
+		       __func__, osd->o_osd, m->spgid.pgid.pool,
+		       m->spgid.pgid.seed, m->spgid.shard, m->id);
+		/* unblock it anyway... */
+	}
+
+	spg = lookup_spg_mapping(&osd->o_backoff_mappings, &backoff->spgid);
+	BUG_ON(!spg);
+
+	erase_backoff(&spg->backoffs, backoff);
+	erase_backoff_by_id(&osd->o_backoffs_by_id, backoff);
+	free_backoff(backoff);
+
+	if (RB_EMPTY_ROOT(&spg->backoffs)) {
+		erase_spg_mapping(&osd->o_backoff_mappings, spg);
+		free_spg_mapping(spg);
+	}
+
+	for (n = rb_first(&osd->o_requests); n; n = rb_next(n)) {
+		struct ceph_osd_request *req =
+		    rb_entry(n, struct ceph_osd_request, r_node);
+
+		if (!ceph_spg_compare(&req->r_t.spgid, &m->spgid)) {
+			/*
+			 * Match against @m, not @backoff -- the PG may
+			 * have split on the OSD.
+			 */
+			if (target_contained_by(&req->r_t, m->begin, m->end)) {
+				/*
+				 * If no other installed backoff applies,
+				 * resend.
+				 */
+				send_request(req);
+			}
+		}
+	}
+}
+
+static void handle_backoff(struct ceph_osd *osd, struct ceph_msg *msg)
+{
+	struct ceph_osd_client *osdc = osd->o_osdc;
+	struct MOSDBackoff m;
+	int ret;
+
+	down_read(&osdc->lock);
+	if (!osd_registered(osd)) {
+		dout("%s osd%d unknown\n", __func__, osd->o_osd);
+		up_read(&osdc->lock);
+		return;
+	}
+	WARN_ON(osd->o_osd != le64_to_cpu(msg->hdr.src.num));
+
+	mutex_lock(&osd->lock);
+	ret = decode_MOSDBackoff(msg, &m);
+	if (ret) {
+		pr_err("failed to decode MOSDBackoff: %d\n", ret);
+		ceph_msg_dump(msg);
+		goto out_unlock;
+	}
+
+	switch (m.op) {
+	case CEPH_OSD_BACKOFF_OP_BLOCK:
+		handle_backoff_block(osd, &m);
+		break;
+	case CEPH_OSD_BACKOFF_OP_UNBLOCK:
+		handle_backoff_unblock(osd, &m);
+		break;
+	default:
+		pr_err("%s osd%d unknown op %d\n", __func__, osd->o_osd, m.op);
+	}
+
+	free_hoid(m.begin);
+	free_hoid(m.end);
+
+out_unlock:
+	mutex_unlock(&osd->lock);
+	up_read(&osdc->lock);
+}
+
 /*
  * Process osd watch notifications
  */
@@ -4509,6 +5098,9 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
 	case CEPH_MSG_OSD_OPREPLY:
 		handle_reply(osd, msg);
 		break;
+	case CEPH_MSG_OSD_BACKOFF:
+		handle_backoff(osd, msg);
+		break;
 	case CEPH_MSG_WATCH_NOTIFY:
 		handle_watch_notify(osdc, msg);
 		break;
@@ -4631,6 +5223,7 @@ static struct ceph_msg *alloc_msg(struct ceph_connection *con,
 	*skip = 0;
 	switch (type) {
 	case CEPH_MSG_OSD_MAP:
+	case CEPH_MSG_OSD_BACKOFF:
 	case CEPH_MSG_WATCH_NOTIFY:
 		return alloc_msg_with_page_vector(hdr);
 	case CEPH_MSG_OSD_OPREPLY:
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 1e2e190a4c2a..1d87a736221b 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -418,6 +418,22 @@ int ceph_pg_compare(const struct ceph_pg *lhs, const struct ceph_pg *rhs)
 	return 0;
 }
 
+int ceph_spg_compare(const struct ceph_spg *lhs, const struct ceph_spg *rhs)
+{
+	int ret;
+
+	ret = ceph_pg_compare(&lhs->pgid, &rhs->pgid);
+	if (ret)
+		return ret;
+
+	if (lhs->shard < rhs->shard)
+		return -1;
+	if (lhs->shard > rhs->shard)
+		return 1;
+
+	return 0;
+}
+
 /*
  * rbtree of pg_mapping for handling pg_temp (explicit mapping of pgid
  * to a set of osds) and primary_temp (explicit primary setting)
-- 
cgit v1.2.3


From 278b1d709c6acc6f7d138fed775c76695b068e43 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Wed, 21 Jun 2017 17:27:17 +0200
Subject: libceph: ceph_decode_skip_* helpers

Some of these won't be as efficient as they could be (e.g.
ceph_decode_skip_set(... 32 ...) could advance by len * sizeof(u32)
once instead of advancing by sizeof(u32) len times), but that's fine
and not worth a bunch of extra macro code.

Replace skip_name_map() with ceph_decode_skip_map as an example.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/decode.h | 60 +++++++++++++++++++++++++++++++++++++++++++++
 net/ceph/osdmap.c           | 25 +++----------------
 2 files changed, 63 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/decode.h b/include/linux/ceph/decode.h
index f990f2cc907a..14af9b70d301 100644
--- a/include/linux/ceph/decode.h
+++ b/include/linux/ceph/decode.h
@@ -132,6 +132,66 @@ bad:
 	return ERR_PTR(-ERANGE);
 }
 
+/*
+ * skip helpers
+ */
+#define ceph_decode_skip_n(p, end, n, bad)			\
+	do {							\
+		ceph_decode_need(p, end, n, bad);		\
+                *p += n;					\
+	} while (0)
+
+#define ceph_decode_skip_64(p, end, bad)			\
+ceph_decode_skip_n(p, end, sizeof(u64), bad)
+
+#define ceph_decode_skip_32(p, end, bad)			\
+ceph_decode_skip_n(p, end, sizeof(u32), bad)
+
+#define ceph_decode_skip_16(p, end, bad)			\
+ceph_decode_skip_n(p, end, sizeof(u16), bad)
+
+#define ceph_decode_skip_8(p, end, bad)				\
+ceph_decode_skip_n(p, end, sizeof(u8), bad)
+
+#define ceph_decode_skip_string(p, end, bad)			\
+	do {							\
+		u32 len;					\
+								\
+		ceph_decode_32_safe(p, end, len, bad);		\
+		ceph_decode_skip_n(p, end, len, bad);		\
+	} while (0)
+
+#define ceph_decode_skip_set(p, end, type, bad)			\
+	do {							\
+		u32 len;					\
+								\
+		ceph_decode_32_safe(p, end, len, bad);		\
+		while (len--)					\
+			ceph_decode_skip_##type(p, end, bad);	\
+	} while (0)
+
+#define ceph_decode_skip_map(p, end, ktype, vtype, bad)		\
+	do {							\
+		u32 len;					\
+								\
+		ceph_decode_32_safe(p, end, len, bad);		\
+		while (len--) {					\
+			ceph_decode_skip_##ktype(p, end, bad);	\
+			ceph_decode_skip_##vtype(p, end, bad);	\
+		}						\
+	} while (0)
+
+#define ceph_decode_skip_map_of_map(p, end, ktype1, ktype2, vtype2, bad) \
+	do {							\
+		u32 len;					\
+								\
+		ceph_decode_32_safe(p, end, len, bad);		\
+		while (len--) {					\
+			ceph_decode_skip_##ktype1(p, end, bad);	\
+			ceph_decode_skip_map(p, end, ktype2, vtype2, bad); \
+		}						\
+	} while (0)
+
 /*
  * struct ceph_timespec <-> struct timespec
  */
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 423747714017..f6d561edd511 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -138,21 +138,6 @@ bad:
 	return -EINVAL;
 }
 
-static int skip_name_map(void **p, void *end)
-{
-        int len;
-        ceph_decode_32_safe(p, end, len ,bad);
-        while (len--) {
-                int strlen;
-                *p += sizeof(u32);
-                ceph_decode_32_safe(p, end, strlen, bad);
-                *p += strlen;
-}
-        return 0;
-bad:
-        return -EINVAL;
-}
-
 static void crush_finalize(struct crush_map *c)
 {
 	__s32 b;
@@ -187,7 +172,6 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
 	void **p = &pbyval;
 	void *start = pbyval;
 	u32 magic;
-	u32 num_name_maps;
 
 	dout("crush_decode %p to %p len %d\n", *p, end, (int)(end - *p));
 
@@ -353,12 +337,9 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
 		}
 	}
 
-	/* ignore trailing name maps. */
-        for (num_name_maps = 0; num_name_maps < 3; num_name_maps++) {
-                err = skip_name_map(p, end);
-                if (err < 0)
-                        goto done;
-        }
+	ceph_decode_skip_map(p, end, 32, string, bad); /* type_map */
+	ceph_decode_skip_map(p, end, 32, string, bad); /* name_map */
+	ceph_decode_skip_map(p, end, 32, string, bad); /* rule_name_map */
 
         /* tunables */
         ceph_decode_need(p, end, 3*sizeof(u32), done);
-- 
cgit v1.2.3


From 6f428df47dae2c8ea31fd4c0c74a12a8a5ac2d1d Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Wed, 21 Jun 2017 17:27:18 +0200
Subject: libceph: pg_upmap[_items] infrastructure

pg_temp and pg_upmap encodings are the same (PG -> array of osds),
except for the incremental remove: it's an empty mapping in new_pg_temp
for pg_temp and a separate old_pg_upmap set for pg_upmap.  (This isn't
to allow for empty pg_upmap mappings -- apparently, pg_temp just wasn't
looked at as an example for pg_upmap encoding.)

Reuse __decode_pg_temp() for decoding pg_upmap and new_pg_upmap.
__decode_pg_temp() stores into pg_temp union member, but since pg_upmap
union member is identical, reading through pg_upmap later is OK.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/osdmap.h |  10 +++-
 net/ceph/debugfs.c          |  23 ++++++++
 net/ceph/osdmap.c           | 135 +++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 164 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index fe6d189bdd30..c612cff81f5c 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -143,10 +143,14 @@ struct ceph_pg_mapping {
 		struct {
 			int len;
 			int osds[];
-		} pg_temp;
+		} pg_temp, pg_upmap;
 		struct {
 			int osd;
 		} primary_temp;
+		struct {
+			int len;
+			int from_to[][2];
+		} pg_upmap_items;
 	};
 };
 
@@ -165,6 +169,10 @@ struct ceph_osdmap {
 	struct rb_root pg_temp;
 	struct rb_root primary_temp;
 
+	/* remap (post-CRUSH, pre-up) */
+	struct rb_root pg_upmap;	/* PG := raw set */
+	struct rb_root pg_upmap_items;	/* from -> to within raw set */
+
 	u32 *osd_primary_affinity;
 
 	struct rb_root pg_pools;
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index 017f15c575f8..4f57d5bcaba2 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c
@@ -104,6 +104,29 @@ static int osdmap_show(struct seq_file *s, void *p)
 		seq_printf(s, "primary_temp %llu.%x %d\n", pg->pgid.pool,
 			   pg->pgid.seed, pg->primary_temp.osd);
 	}
+	for (n = rb_first(&map->pg_upmap); n; n = rb_next(n)) {
+		struct ceph_pg_mapping *pg =
+			rb_entry(n, struct ceph_pg_mapping, node);
+
+		seq_printf(s, "pg_upmap %llu.%x [", pg->pgid.pool,
+			   pg->pgid.seed);
+		for (i = 0; i < pg->pg_upmap.len; i++)
+			seq_printf(s, "%s%d", (i == 0 ? "" : ","),
+				   pg->pg_upmap.osds[i]);
+		seq_printf(s, "]\n");
+	}
+	for (n = rb_first(&map->pg_upmap_items); n; n = rb_next(n)) {
+		struct ceph_pg_mapping *pg =
+			rb_entry(n, struct ceph_pg_mapping, node);
+
+		seq_printf(s, "pg_upmap_items %llu.%x [", pg->pgid.pool,
+			   pg->pgid.seed);
+		for (i = 0; i < pg->pg_upmap_items.len; i++)
+			seq_printf(s, "%s%d->%d", (i == 0 ? "" : ","),
+				   pg->pg_upmap_items.from_to[i][0],
+				   pg->pg_upmap_items.from_to[i][1]);
+		seq_printf(s, "]\n");
+	}
 
 	up_read(&osdc->lock);
 	return 0;
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index f6d561edd511..a3f60d0bfd13 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -735,6 +735,8 @@ struct ceph_osdmap *ceph_osdmap_alloc(void)
 	map->pool_max = -1;
 	map->pg_temp = RB_ROOT;
 	map->primary_temp = RB_ROOT;
+	map->pg_upmap = RB_ROOT;
+	map->pg_upmap_items = RB_ROOT;
 	mutex_init(&map->crush_workspace_mutex);
 
 	return map;
@@ -759,6 +761,20 @@ void ceph_osdmap_destroy(struct ceph_osdmap *map)
 		erase_pg_mapping(&map->primary_temp, pg);
 		free_pg_mapping(pg);
 	}
+	while (!RB_EMPTY_ROOT(&map->pg_upmap)) {
+		struct ceph_pg_mapping *pg =
+			rb_entry(rb_first(&map->pg_upmap),
+				 struct ceph_pg_mapping, node);
+		rb_erase(&pg->node, &map->pg_upmap);
+		kfree(pg);
+	}
+	while (!RB_EMPTY_ROOT(&map->pg_upmap_items)) {
+		struct ceph_pg_mapping *pg =
+			rb_entry(rb_first(&map->pg_upmap_items),
+				 struct ceph_pg_mapping, node);
+		rb_erase(&pg->node, &map->pg_upmap_items);
+		kfree(pg);
+	}
 	while (!RB_EMPTY_ROOT(&map->pg_pools)) {
 		struct ceph_pg_pool_info *pi =
 			rb_entry(rb_first(&map->pg_pools),
@@ -1161,6 +1177,75 @@ e_inval:
 	return -EINVAL;
 }
 
+static struct ceph_pg_mapping *__decode_pg_upmap(void **p, void *end,
+						 bool __unused)
+{
+	return __decode_pg_temp(p, end, false);
+}
+
+static int decode_pg_upmap(void **p, void *end, struct ceph_osdmap *map)
+{
+	return decode_pg_mapping(p, end, &map->pg_upmap, __decode_pg_upmap,
+				 false);
+}
+
+static int decode_new_pg_upmap(void **p, void *end, struct ceph_osdmap *map)
+{
+	return decode_pg_mapping(p, end, &map->pg_upmap, __decode_pg_upmap,
+				 true);
+}
+
+static int decode_old_pg_upmap(void **p, void *end, struct ceph_osdmap *map)
+{
+	return decode_pg_mapping(p, end, &map->pg_upmap, NULL, true);
+}
+
+static struct ceph_pg_mapping *__decode_pg_upmap_items(void **p, void *end,
+						       bool __unused)
+{
+	struct ceph_pg_mapping *pg;
+	u32 len, i;
+
+	ceph_decode_32_safe(p, end, len, e_inval);
+	if (len > (SIZE_MAX - sizeof(*pg)) / (2 * sizeof(u32)))
+		return ERR_PTR(-EINVAL);
+
+	ceph_decode_need(p, end, 2 * len * sizeof(u32), e_inval);
+	pg = kzalloc(sizeof(*pg) + 2 * len * sizeof(u32), GFP_NOIO);
+	if (!pg)
+		return ERR_PTR(-ENOMEM);
+
+	pg->pg_upmap_items.len = len;
+	for (i = 0; i < len; i++) {
+		pg->pg_upmap_items.from_to[i][0] = ceph_decode_32(p);
+		pg->pg_upmap_items.from_to[i][1] = ceph_decode_32(p);
+	}
+
+	return pg;
+
+e_inval:
+	return ERR_PTR(-EINVAL);
+}
+
+static int decode_pg_upmap_items(void **p, void *end, struct ceph_osdmap *map)
+{
+	return decode_pg_mapping(p, end, &map->pg_upmap_items,
+				 __decode_pg_upmap_items, false);
+}
+
+static int decode_new_pg_upmap_items(void **p, void *end,
+				     struct ceph_osdmap *map)
+{
+	return decode_pg_mapping(p, end, &map->pg_upmap_items,
+				 __decode_pg_upmap_items, true);
+}
+
+static int decode_old_pg_upmap_items(void **p, void *end,
+				     struct ceph_osdmap *map)
+{
+	return decode_pg_mapping(p, end, &map->pg_upmap_items, NULL, true);
+}
+
 /*
  * decode a full map.
  */
@@ -1250,9 +1335,7 @@ static int osdmap_decode(void **p, void *end, struct ceph_osdmap *map)
 		if (err)
 			goto bad;
 	} else {
-		/* XXX can this happen? */
-		kfree(map->osd_primary_affinity);
-		map->osd_primary_affinity = NULL;
+		WARN_ON(map->osd_primary_affinity);
 	}
 
 	/* crush */
@@ -1261,6 +1344,26 @@ static int osdmap_decode(void **p, void *end, struct ceph_osdmap *map)
 	if (err)
 		goto bad;
 
+	*p += len;
+	if (struct_v >= 3) {
+		/* erasure_code_profiles */
+		ceph_decode_skip_map_of_map(p, end, string, string, string,
+					    bad);
+	}
+
+	if (struct_v >= 4) {
+		err = decode_pg_upmap(p, end, map);
+		if (err)
+			goto bad;
+
+		err = decode_pg_upmap_items(p, end, map);
+		if (err)
+			goto bad;
+	} else {
+		WARN_ON(!RB_EMPTY_ROOT(&map->pg_upmap));
+		WARN_ON(!RB_EMPTY_ROOT(&map->pg_upmap_items));
+	}
+
 	/* ignore the rest */
 	*p = end;
 
@@ -1520,6 +1623,32 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
 			goto bad;
 	}
 
+	if (struct_v >= 3) {
+		/* new_erasure_code_profiles */
+		ceph_decode_skip_map_of_map(p, end, string, string, string,
+					    bad);
+		/* old_erasure_code_profiles */
+		ceph_decode_skip_set(p, end, string, bad);
+	}
+
+	if (struct_v >= 4) {
+		err = decode_new_pg_upmap(p, end, map);
+		if (err)
+			goto bad;
+
+		err = decode_old_pg_upmap(p, end, map);
+		if (err)
+			goto bad;
+
+		err = decode_new_pg_upmap_items(p, end, map);
+		if (err)
+			goto bad;
+
+		err = decode_old_pg_upmap_items(p, end, map);
+		if (err)
+			goto bad;
+	}
+
 	/* ignore the rest */
 	*p = end;
 
-- 
cgit v1.2.3


From 069f3222ca96acfe8c59937e98c401bda5475b48 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Thu, 22 Jun 2017 19:44:05 +0200
Subject: crush: implement weight and id overrides for straw2

bucket_straw2_choose needs to use weights that may be different from
weight_items. For instance to compensate for an uneven distribution
caused by a low number of values. Or to fix the probability biais
introduced by conditional probabilities (see
http://tracker.ceph.com/issues/15653 for more information).

We introduce a weight_set for each straw2 bucket to set the desired
weight for a given item at a given position. The weight of a given item
when picking the first replica (first position) may be different from
the weight the second replica (second position). For instance the weight
matrix for a given bucket containing items 3, 7 and 13 could be as
follows:

          position 0   position 1

item 3     0x10000      0x100000
item 7     0x40000       0x10000
item 13    0x40000       0x10000

When crush_do_rule picks the first of two replicas (position 0), item 7,
3 are four times more likely to be choosen by bucket_straw2_choose than
item 13. When choosing the second replica (position 1), item 3 is ten
times more likely to be choosen than item 7, 13.

By default the weight_set of each bucket exactly matches the content of
item_weights for each position to ensure backward compatibility.

bucket_straw2_choose compares items by using their id. The same ids are
also used to index buckets and they must be unique. For each item in a
bucket an array of ids can be provided for placement purposes and they
are used instead of the ids. If no replacement ids are provided, the
legacy behavior is preserved.

Reflects ceph.git commit 19537a450fd5c5a0bb8b7830947507a76db2ceca.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/crush/crush.h  | 58 ++++++++++++++++++++++++++++++++++
 include/linux/crush/mapper.h |  9 +++---
 net/ceph/crush/mapper.c      | 74 +++++++++++++++++++++++++++++++++-----------
 net/ceph/osdmap.c            |  2 +-
 4 files changed, 119 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h
index fbecbd089d75..d8676e56fa23 100644
--- a/include/linux/crush/crush.h
+++ b/include/linux/crush/crush.h
@@ -137,6 +137,64 @@ struct crush_bucket {
 
 };
 
+/** @ingroup API
+ *
+ * Replacement weights for each item in a bucket. The size of the
+ * array must be exactly the size of the straw2 bucket, just as the
+ * item_weights array.
+ *
+ */
+struct crush_weight_set {
+	__u32 *weights; /*!< 16.16 fixed point weights
+                             in the same order as items */
+	__u32 size;     /*!< size of the __weights__ array */
+};
+
+/** @ingroup API
+ *
+ * Replacement weights and ids for a given straw2 bucket, for
+ * placement purposes.
+ *
+ * When crush_do_rule() chooses the Nth item from a straw2 bucket, the
+ * replacement weights found at __weight_set[N]__ are used instead of
+ * the weights from __item_weights__. If __N__ is greater than
+ * __weight_set_size__, the weights found at __weight_set_size-1__ are
+ * used instead. For instance if __weight_set__ is:
+ *
+ *    [ [ 0x10000, 0x20000 ],   // position 0
+ *      [ 0x20000, 0x40000 ] ]  // position 1
+ *
+ * choosing the 0th item will use position 0 weights [ 0x10000, 0x20000 ]
+ * choosing the 1th item will use position 1 weights [ 0x20000, 0x40000 ]
+ * choosing the 2th item will use position 1 weights [ 0x20000, 0x40000 ]
+ * etc.
+ *
+ */
+struct crush_choose_arg {
+	__s32 *ids;            /*!< values to use instead of items */
+	__u32 ids_size;        /*!< size of the __ids__ array */
+	struct crush_weight_set *weight_set; /*!< weight replacements for
+                                                  a given position */
+	__u32 weight_set_size; /*!< size of the __weight_set__ array */
+};
+
+/** @ingroup API
+ *
+ * Replacement weights and ids for each bucket in the crushmap. The
+ * __size__ of the __args__ array must be exactly the same as the
+ * __map->max_buckets__.
+ *
+ * The __crush_choose_arg__ at index N will be used when choosing
+ * an item from the bucket __map->buckets[N]__ bucket, provided it
+ * is a straw2 bucket.
+ *
+ */
+struct crush_choose_arg_map {
+	struct crush_choose_arg *args; /*!< replacement for each bucket
+                                            in the crushmap */
+	__u32 size;                    /*!< size of the __args__ array */
+};
+
 struct crush_bucket_uniform {
 	struct crush_bucket h;
 	__u32 item_weight;  /* 16-bit fixed point; all items equally weighted */
diff --git a/include/linux/crush/mapper.h b/include/linux/crush/mapper.h
index c95e19e1ff11..141edabb947e 100644
--- a/include/linux/crush/mapper.h
+++ b/include/linux/crush/mapper.h
@@ -11,11 +11,10 @@
 #include "crush.h"
 
 extern int crush_find_rule(const struct crush_map *map, int ruleset, int type, int size);
-extern int crush_do_rule(const struct crush_map *map,
-			 int ruleno,
-			 int x, int *result, int result_max,
-			 const __u32 *weights, int weight_max,
-			 void *cwin);
+int crush_do_rule(const struct crush_map *map,
+		  int ruleno, int x, int *result, int result_max,
+		  const __u32 *weight, int weight_max,
+		  void *cwin, const struct crush_choose_arg *choose_args);
 
 /*
  * Returns the exact amount of workspace that will need to be used
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index b5cd8c21bfdf..0b2646a9cc50 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -302,19 +302,42 @@ static __u64 crush_ln(unsigned int xin)
  *
  */
 
+static __u32 *get_choose_arg_weights(const struct crush_bucket_straw2 *bucket,
+				     const struct crush_choose_arg *arg,
+				     int position)
+{
+	if (!arg || !arg->weight_set || arg->weight_set_size == 0)
+		return bucket->item_weights;
+
+	if (position >= arg->weight_set_size)
+		position = arg->weight_set_size - 1;
+	return arg->weight_set[position].weights;
+}
+
+static __s32 *get_choose_arg_ids(const struct crush_bucket_straw2 *bucket,
+				 const struct crush_choose_arg *arg)
+{
+	if (!arg || !arg->ids)
+		return bucket->h.items;
+
+	return arg->ids;
+}
+
 static int bucket_straw2_choose(const struct crush_bucket_straw2 *bucket,
-				int x, int r)
+				int x, int r,
+				const struct crush_choose_arg *arg,
+				int position)
 {
 	unsigned int i, high = 0;
 	unsigned int u;
-	unsigned int w;
 	__s64 ln, draw, high_draw = 0;
+	__u32 *weights = get_choose_arg_weights(bucket, arg, position);
+	__s32 *ids = get_choose_arg_ids(bucket, arg);
 
 	for (i = 0; i < bucket->h.size; i++) {
-		w = bucket->item_weights[i];
-		if (w) {
-			u = crush_hash32_3(bucket->h.hash, x,
-					   bucket->h.items[i], r);
+		dprintk("weight 0x%x item %d\n", weights[i], ids[i]);
+		if (weights[i]) {
+			u = crush_hash32_3(bucket->h.hash, x, ids[i], r);
 			u &= 0xffff;
 
 			/*
@@ -335,7 +358,7 @@ static int bucket_straw2_choose(const struct crush_bucket_straw2 *bucket,
 			 * weight means a larger (less negative) value
 			 * for draw.
 			 */
-			draw = div64_s64(ln, w);
+			draw = div64_s64(ln, weights[i]);
 		} else {
 			draw = S64_MIN;
 		}
@@ -352,7 +375,9 @@ static int bucket_straw2_choose(const struct crush_bucket_straw2 *bucket,
 
 static int crush_bucket_choose(const struct crush_bucket *in,
 			       struct crush_work_bucket *work,
-			       int x, int r)
+			       int x, int r,
+			       const struct crush_choose_arg *arg,
+			       int position)
 {
 	dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r);
 	BUG_ON(in->size == 0);
@@ -374,7 +399,7 @@ static int crush_bucket_choose(const struct crush_bucket *in,
 	case CRUSH_BUCKET_STRAW2:
 		return bucket_straw2_choose(
 			(const struct crush_bucket_straw2 *)in,
-			x, r);
+			x, r, arg, position);
 	default:
 		dprintk("unknown bucket %d alg %d\n", in->id, in->alg);
 		return in->items[0];
@@ -436,7 +461,8 @@ static int crush_choose_firstn(const struct crush_map *map,
 			       unsigned int vary_r,
 			       unsigned int stable,
 			       int *out2,
-			       int parent_r)
+			       int parent_r,
+			       const struct crush_choose_arg *choose_args)
 {
 	int rep;
 	unsigned int ftotal, flocal;
@@ -486,7 +512,10 @@ static int crush_choose_firstn(const struct crush_map *map,
 				else
 					item = crush_bucket_choose(
 						in, work->work[-1-in->id],
-						x, r);
+						x, r,
+						(choose_args ?
+						 &choose_args[-1-in->id] : 0),
+						outpos);
 				if (item >= map->max_devices) {
 					dprintk("   bad item %d\n", item);
 					skip_rep = 1;
@@ -543,7 +572,8 @@ static int crush_choose_firstn(const struct crush_map *map,
 							    vary_r,
 							    stable,
 							    NULL,
-							    sub_r) <= outpos)
+							    sub_r,
+							    choose_args) <= outpos)
 							/* didn't get leaf */
 							reject = 1;
 					} else {
@@ -620,7 +650,8 @@ static void crush_choose_indep(const struct crush_map *map,
 			       unsigned int recurse_tries,
 			       int recurse_to_leaf,
 			       int *out2,
-			       int parent_r)
+			       int parent_r,
+			       const struct crush_choose_arg *choose_args)
 {
 	const struct crush_bucket *in = bucket;
 	int endpos = outpos + left;
@@ -692,7 +723,10 @@ static void crush_choose_indep(const struct crush_map *map,
 
 				item = crush_bucket_choose(
 					in, work->work[-1-in->id],
-					x, r);
+					x, r,
+					(choose_args ?
+					 &choose_args[-1-in->id] : 0),
+					outpos);
 				if (item >= map->max_devices) {
 					dprintk("   bad item %d\n", item);
 					out[rep] = CRUSH_ITEM_NONE;
@@ -746,7 +780,8 @@ static void crush_choose_indep(const struct crush_map *map,
 							x, 1, numrep, 0,
 							out2, rep,
 							recurse_tries, 0,
-							0, NULL, r);
+							0, NULL, r,
+							choose_args);
 						if (out2[rep] == CRUSH_ITEM_NONE) {
 							/* placed nothing; no leaf */
 							break;
@@ -854,11 +889,12 @@ void crush_init_workspace(const struct crush_map *map, void *v)
  * @weight: weight vector (for map leaves)
  * @weight_max: size of weight vector
  * @cwin: pointer to at least crush_work_size() bytes of memory
+ * @choose_args: weights and ids for each known bucket
  */
 int crush_do_rule(const struct crush_map *map,
 		  int ruleno, int x, int *result, int result_max,
 		  const __u32 *weight, int weight_max,
-		  void *cwin)
+		  void *cwin, const struct crush_choose_arg *choose_args)
 {
 	int result_len;
 	struct crush_work *cw = cwin;
@@ -1013,7 +1049,8 @@ int crush_do_rule(const struct crush_map *map,
 						vary_r,
 						stable,
 						c+osize,
-						0);
+						0,
+						choose_args);
 				} else {
 					out_size = ((numrep < (result_max-osize)) ?
 						    numrep : (result_max-osize));
@@ -1030,7 +1067,8 @@ int crush_do_rule(const struct crush_map *map,
 						   choose_leaf_tries : 1,
 						recurse_to_leaf,
 						c+osize,
-						0);
+						0,
+						choose_args);
 					osize += out_size;
 				}
 			}
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 93baa69407c5..9da0ee61aca5 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -2111,7 +2111,7 @@ static int do_crush(struct ceph_osdmap *map, int ruleno, int x,
 
 	mutex_lock(&map->crush_workspace_mutex);
 	r = crush_do_rule(map->crush, ruleno, x, result, result_max,
-			  weight, weight_max, map->crush_workspace);
+			  weight, weight_max, map->crush_workspace, NULL);
 	mutex_unlock(&map->crush_workspace_mutex);
 
 	return r;
-- 
cgit v1.2.3


From 5cf9c4a9959b6273675310d14a834ef14fbca37c Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Thu, 22 Jun 2017 19:44:05 +0200
Subject: libceph, crush: per-pool crush_choose_arg_map for crush_do_rule()

If there is no crush_choose_arg_map for a given pool, a NULL pointer is
passed to preserve existing crush_do_rule() behavior.

Reflects ceph.git commits 55fb91d64071552ea1bc65ab4ea84d3c8b73ab4b,
                          dbe36e08be00c6519a8c89718dd47b0219c20516.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/crush/crush.h |   8 ++
 net/ceph/crush/crush.c      |   3 +
 net/ceph/osdmap.c           | 200 +++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 208 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h
index d8676e56fa23..92e165d417a6 100644
--- a/include/linux/crush/crush.h
+++ b/include/linux/crush/crush.h
@@ -2,6 +2,7 @@
 #define CEPH_CRUSH_CRUSH_H
 
 #ifdef __KERNEL__
+# include <linux/rbtree.h>
 # include <linux/types.h>
 #else
 # include "crush_compat.h"
@@ -190,6 +191,10 @@ struct crush_choose_arg {
  *
  */
 struct crush_choose_arg_map {
+#ifdef __KERNEL__
+	struct rb_node node;
+	u64 choose_args_index;
+#endif
 	struct crush_choose_arg *args; /*!< replacement for each bucket
                                             in the crushmap */
 	__u32 size;                    /*!< size of the __args__ array */
@@ -294,6 +299,9 @@ struct crush_map {
 	__u32 allowed_bucket_algs;
 
 	__u32 *choose_tries;
+#else
+	/* CrushWrapper::choose_args */
+	struct rb_root choose_args;
 #endif
 };
 
diff --git a/net/ceph/crush/crush.c b/net/ceph/crush/crush.c
index 5bf94c04f645..4b428f46a8ca 100644
--- a/net/ceph/crush/crush.c
+++ b/net/ceph/crush/crush.c
@@ -1,6 +1,7 @@
 #ifdef __KERNEL__
 # include <linux/slab.h>
 # include <linux/crush/crush.h>
+void clear_choose_args(struct crush_map *c);
 #else
 # include "crush_compat.h"
 # include "crush.h"
@@ -127,6 +128,8 @@ void crush_destroy(struct crush_map *map)
 
 #ifndef __KERNEL__
 	kfree(map->choose_tries);
+#else
+	clear_choose_args(map);
 #endif
 	kfree(map);
 }
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 9da0ee61aca5..f630d1072299 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -138,6 +138,177 @@ bad:
 	return -EINVAL;
 }
 
+static struct crush_choose_arg_map *alloc_choose_arg_map(void)
+{
+	struct crush_choose_arg_map *arg_map;
+
+	arg_map = kzalloc(sizeof(*arg_map), GFP_NOIO);
+	if (!arg_map)
+		return NULL;
+
+	RB_CLEAR_NODE(&arg_map->node);
+	return arg_map;
+}
+
+static void free_choose_arg_map(struct crush_choose_arg_map *arg_map)
+{
+	if (arg_map) {
+		int i, j;
+
+		WARN_ON(!RB_EMPTY_NODE(&arg_map->node));
+
+		for (i = 0; i < arg_map->size; i++) {
+			struct crush_choose_arg *arg = &arg_map->args[i];
+
+			for (j = 0; j < arg->weight_set_size; j++)
+				kfree(arg->weight_set[j].weights);
+			kfree(arg->weight_set);
+			kfree(arg->ids);
+		}
+		kfree(arg_map->args);
+		kfree(arg_map);
+	}
+}
+
+DEFINE_RB_FUNCS(choose_arg_map, struct crush_choose_arg_map, choose_args_index,
+		node);
+
+void clear_choose_args(struct crush_map *c)
+{
+	while (!RB_EMPTY_ROOT(&c->choose_args)) {
+		struct crush_choose_arg_map *arg_map =
+		    rb_entry(rb_first(&c->choose_args),
+			     struct crush_choose_arg_map, node);
+
+		erase_choose_arg_map(&c->choose_args, arg_map);
+		free_choose_arg_map(arg_map);
+	}
+}
+
+static u32 *decode_array_32_alloc(void **p, void *end, u32 *plen)
+{
+	u32 *a = NULL;
+	u32 len;
+	int ret;
+
+	ceph_decode_32_safe(p, end, len, e_inval);
+	if (len) {
+		u32 i;
+
+		a = kmalloc_array(len, sizeof(u32), GFP_NOIO);
+		if (!a) {
+			ret = -ENOMEM;
+			goto fail;
+		}
+
+		ceph_decode_need(p, end, len * sizeof(u32), e_inval);
+		for (i = 0; i < len; i++)
+			a[i] = ceph_decode_32(p);
+	}
+
+	*plen = len;
+	return a;
+
+e_inval:
+	ret = -EINVAL;
+fail:
+	kfree(a);
+	return ERR_PTR(ret);
+}
+
+/*
+ * Assumes @arg is zero-initialized.
+ */
+static int decode_choose_arg(void **p, void *end, struct crush_choose_arg *arg)
+{
+	int ret;
+
+	ceph_decode_32_safe(p, end, arg->weight_set_size, e_inval);
+	if (arg->weight_set_size) {
+		u32 i;
+
+		arg->weight_set = kmalloc_array(arg->weight_set_size,
+						sizeof(*arg->weight_set),
+						GFP_NOIO);
+		if (!arg->weight_set)
+			return -ENOMEM;
+
+		for (i = 0; i < arg->weight_set_size; i++) {
+			struct crush_weight_set *w = &arg->weight_set[i];
+
+			w->weights = decode_array_32_alloc(p, end, &w->size);
+			if (IS_ERR(w->weights)) {
+				ret = PTR_ERR(w->weights);
+				w->weights = NULL;
+				return ret;
+			}
+		}
+	}
+
+	arg->ids = decode_array_32_alloc(p, end, &arg->ids_size);
+	if (IS_ERR(arg->ids)) {
+		ret = PTR_ERR(arg->ids);
+		arg->ids = NULL;
+		return ret;
+	}
+
+	return 0;
+
+e_inval:
+	return -EINVAL;
+}
+
+static int decode_choose_args(void **p, void *end, struct crush_map *c)
+{
+	struct crush_choose_arg_map *arg_map = NULL;
+	u32 num_choose_arg_maps, num_buckets;
+	int ret;
+
+	ceph_decode_32_safe(p, end, num_choose_arg_maps, e_inval);
+	while (num_choose_arg_maps--) {
+		arg_map = alloc_choose_arg_map();
+		if (!arg_map) {
+			ret = -ENOMEM;
+			goto fail;
+		}
+
+		ceph_decode_64_safe(p, end, arg_map->choose_args_index,
+				    e_inval);
+		arg_map->size = c->max_buckets;
+		arg_map->args = kcalloc(arg_map->size, sizeof(*arg_map->args),
+					GFP_NOIO);
+		if (!arg_map->args) {
+			ret = -ENOMEM;
+			goto fail;
+		}
+
+		ceph_decode_32_safe(p, end, num_buckets, e_inval);
+		while (num_buckets--) {
+			struct crush_choose_arg *arg;
+			u32 bucket_index;
+
+			ceph_decode_32_safe(p, end, bucket_index, e_inval);
+			if (bucket_index >= arg_map->size)
+				goto e_inval;
+
+			arg = &arg_map->args[bucket_index];
+			ret = decode_choose_arg(p, end, arg);
+			if (ret)
+				goto fail;
+		}
+
+		insert_choose_arg_map(&c->choose_args, arg_map);
+	}
+
+	return 0;
+
+e_inval:
+	ret = -EINVAL;
+fail:
+	free_choose_arg_map(arg_map);
+	return ret;
+}
+
 static void crush_finalize(struct crush_map *c)
 {
 	__s32 b;
@@ -179,6 +350,8 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
 	if (c == NULL)
 		return ERR_PTR(-ENOMEM);
 
+	c->choose_args = RB_ROOT;
+
         /* set tunables to default values */
         c->choose_local_tries = 2;
         c->choose_local_fallback_tries = 5;
@@ -372,6 +545,21 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
 	dout("crush decode tunable chooseleaf_stable = %d\n",
 	     c->chooseleaf_stable);
 
+	if (*p != end) {
+		/* class_map */
+		ceph_decode_skip_map(p, end, 32, 32, bad);
+		/* class_name */
+		ceph_decode_skip_map(p, end, 32, string, bad);
+		/* class_bucket */
+		ceph_decode_skip_map_of_map(p, end, 32, 32, 32, bad);
+	}
+
+	if (*p != end) {
+		err = decode_choose_args(p, end, c);
+		if (err)
+			goto bad;
+	}
+
 done:
 	crush_finalize(c);
 	dout("crush_decode success\n");
@@ -2103,15 +2291,21 @@ static u32 raw_pg_to_pps(struct ceph_pg_pool_info *pi,
 
 static int do_crush(struct ceph_osdmap *map, int ruleno, int x,
 		    int *result, int result_max,
-		    const __u32 *weight, int weight_max)
+		    const __u32 *weight, int weight_max,
+		    u64 choose_args_index)
 {
+	struct crush_choose_arg_map *arg_map;
 	int r;
 
 	BUG_ON(result_max > CEPH_PG_MAX_SIZE);
 
+	arg_map = lookup_choose_arg_map(&map->crush->choose_args,
+					choose_args_index);
+
 	mutex_lock(&map->crush_workspace_mutex);
 	r = crush_do_rule(map->crush, ruleno, x, result, result_max,
-			  weight, weight_max, map->crush_workspace, NULL);
+			  weight, weight_max, map->crush_workspace,
+			  arg_map ? arg_map->args : NULL);
 	mutex_unlock(&map->crush_workspace_mutex);
 
 	return r;
@@ -2181,7 +2375,7 @@ static void pg_to_raw_osds(struct ceph_osdmap *osdmap,
 	}
 
 	len = do_crush(osdmap, ruleno, pps, raw->osds, pi->size,
-		       osdmap->osd_weight, osdmap->max_osd);
+		       osdmap->osd_weight, osdmap->max_osd, pi->id);
 	if (len < 0) {
 		pr_err("error %d from crush rule %d: pool %lld ruleset %d type %d size %d\n",
 		       len, ruleno, pi->id, pi->crush_ruleset, pi->type,
-- 
cgit v1.2.3


From 0bb05da2ec57163b7a25efef001ed8f52b18b070 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Thu, 22 Jun 2017 19:44:06 +0200
Subject: libceph: osd_state is 32 bits wide in luminous

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/osdmap.h |  4 ++--
 net/ceph/debugfs.c          |  2 +-
 net/ceph/osdmap.c           | 29 ++++++++++++++++++++---------
 3 files changed, 23 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index c612cff81f5c..a0996cb9faed 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -162,7 +162,7 @@ struct ceph_osdmap {
 	u32 flags;         /* CEPH_OSDMAP_* */
 
 	u32 max_osd;       /* size of osd_state, _offload, _addr arrays */
-	u8 *osd_state;     /* CEPH_OSD_* */
+	u32 *osd_state;    /* CEPH_OSD_* */
 	u32 *osd_weight;   /* 0 = failed, 0x10000 = 100% normal */
 	struct ceph_entity_addr *osd_addr;
 
@@ -203,7 +203,7 @@ static inline bool ceph_osd_is_down(struct ceph_osdmap *map, int osd)
 	return !ceph_osd_is_up(map, osd);
 }
 
-extern char *ceph_osdmap_state_str(char *str, int len, int state);
+char *ceph_osdmap_state_str(char *str, int len, u32 state);
 extern u32 ceph_get_primary_affinity(struct ceph_osdmap *map, int osd);
 
 static inline struct ceph_entity_addr *ceph_osd_addr(struct ceph_osdmap *map,
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index 4f57d5bcaba2..fa5233e0d01c 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c
@@ -77,7 +77,7 @@ static int osdmap_show(struct seq_file *s, void *p)
 	}
 	for (i = 0; i < map->max_osd; i++) {
 		struct ceph_entity_addr *addr = &map->osd_addr[i];
-		int state = map->osd_state[i];
+		u32 state = map->osd_state[i];
 		char sb[64];
 
 		seq_printf(s, "osd%d\t%s\t%3d%%\t(%s)\t%3d%%\n",
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index f630d1072299..864789c5974e 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -11,7 +11,7 @@
 #include <linux/crush/hash.h>
 #include <linux/crush/mapper.h>
 
-char *ceph_osdmap_state_str(char *str, int len, int state)
+char *ceph_osdmap_state_str(char *str, int len, u32 state)
 {
 	if (!len)
 		return str;
@@ -984,7 +984,7 @@ void ceph_osdmap_destroy(struct ceph_osdmap *map)
  */
 static int osdmap_set_max_osd(struct ceph_osdmap *map, int max)
 {
-	u8 *state;
+	u32 *state;
 	u32 *weight;
 	struct ceph_entity_addr *addr;
 	int i;
@@ -1484,13 +1484,21 @@ static int osdmap_decode(void **p, void *end, struct ceph_osdmap *map)
 
 	/* osd_state, osd_weight, osd_addrs->client_addr */
 	ceph_decode_need(p, end, 3*sizeof(u32) +
-			 map->max_osd*(1 + sizeof(*map->osd_weight) +
+			 map->max_osd*((struct_v >= 5 ? sizeof(u32) :
+							sizeof(u8)) +
+				       sizeof(*map->osd_weight) +
 				       sizeof(*map->osd_addr)), e_inval);
 
 	if (ceph_decode_32(p) != map->max_osd)
 		goto e_inval;
 
-	ceph_decode_copy(p, map->osd_state, map->max_osd);
+	if (struct_v >= 5) {
+		for (i = 0; i < map->max_osd; i++)
+			map->osd_state[i] = ceph_decode_32(p);
+	} else {
+		for (i = 0; i < map->max_osd; i++)
+			map->osd_state[i] = ceph_decode_8(p);
+	}
 
 	if (ceph_decode_32(p) != map->max_osd)
 		goto e_inval;
@@ -1598,7 +1606,7 @@ struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end)
  *     new_up_client: { osd=6, addr=... } # set osd_state and addr
  *     new_state: { osd=6, xorstate=EXISTS } # clear osd_state
  */
-static int decode_new_up_state_weight(void **p, void *end,
+static int decode_new_up_state_weight(void **p, void *end, u8 struct_v,
 				      struct ceph_osdmap *map)
 {
 	void *new_up_client;
@@ -1614,7 +1622,7 @@ static int decode_new_up_state_weight(void **p, void *end,
 
 	new_state = *p;
 	ceph_decode_32_safe(p, end, len, e_inval);
-	len *= sizeof(u32) + sizeof(u8);
+	len *= sizeof(u32) + (struct_v >= 5 ? sizeof(u32) : sizeof(u8));
 	ceph_decode_need(p, end, len, e_inval);
 	*p += len;
 
@@ -1650,11 +1658,14 @@ static int decode_new_up_state_weight(void **p, void *end,
 	len = ceph_decode_32(p);
 	while (len--) {
 		s32 osd;
-		u8 xorstate;
+		u32 xorstate;
 		int ret;
 
 		osd = ceph_decode_32(p);
-		xorstate = ceph_decode_8(p);
+		if (struct_v >= 5)
+			xorstate = ceph_decode_32(p);
+		else
+			xorstate = ceph_decode_8(p);
 		if (xorstate == 0)
 			xorstate = CEPH_OSD_UP;
 		BUG_ON(osd >= map->max_osd);
@@ -1788,7 +1799,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
 	}
 
 	/* new_up_client, new_state, new_weight */
-	err = decode_new_up_state_weight(p, end, map);
+	err = decode_new_up_state_weight(p, end, struct_v, map);
 	if (err)
 		goto bad;
 
-- 
cgit v1.2.3


From 33e9c8dbfbcef8e4cda8e43a445e692ab7e0d8c0 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Mon, 26 Jun 2017 12:05:55 +0200
Subject: libceph: advertise support for NEW_OSDOP_ENCODING and SERVER_LUMINOUS

All four SERVER_LUMINOUS feature bits are implemented, switch it on!

NEW_OSDOP_ENCODING doesn't mean much for the client (it signifies
support for MOSDOp v6) but needs to be enabled in order to get the
latest (currently v25) pg_pool_t.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Acked-by: Sage Weil <sage@redhat.com>
---
 include/linux/ceph/ceph_features.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index 78a58770e6e9..f0f6c537b64c 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h
@@ -184,6 +184,11 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin
 	 CEPH_FEATURE_PGPOOL3 |			\
 	 CEPH_FEATURE_OSDENC |			\
 	 CEPH_FEATURE_CRUSH_TUNABLES |		\
+	 CEPH_FEATURE_SERVER_LUMINOUS |		\
+	 CEPH_FEATURE_RESEND_ON_SPLIT |		\
+	 CEPH_FEATURE_RADOS_BACKOFF |		\
+	 CEPH_FEATURE_OSDMAP_PG_UPMAP |		\
+	 CEPH_FEATURE_CRUSH_CHOOSE_ARGS |	\
 	 CEPH_FEATURE_MSG_AUTH |		\
 	 CEPH_FEATURE_CRUSH_TUNABLES2 |		\
 	 CEPH_FEATURE_REPLY_CREATE_INODE |	\
@@ -199,6 +204,7 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin
 	 CEPH_FEATURE_MSGR_KEEPALIVE2 |		\
 	 CEPH_FEATURE_OSD_POOLRESEND |		\
 	 CEPH_FEATURE_CRUSH_V4 |		\
+	 CEPH_FEATURE_NEW_OSDOP_ENCODING |	\
 	 CEPH_FEATURE_SERVER_JEWEL |		\
 	 CEPH_FEATURE_MON_STATEFUL_SUB |	\
 	 CEPH_FEATURE_CRUSH_TUNABLES5 |		\
-- 
cgit v1.2.3


From 5e14e9fac308daf5607362f879e6de67e0b8dd5b Mon Sep 17 00:00:00 2001
From: Marc Gonzalez <marc_gonzalez@sigmadesigns.com>
Date: Tue, 20 Jun 2017 10:17:40 +0200
Subject: PCI: tango: Add Sigma Designs Tango SMP8759 PCIe host bridge support

This driver is required to work around several hardware bugs in the PCIe
controller.

The SMP8759 does not support legacy interrupts or IO space.

Signed-off-by: Marc Gonzalez <marc_gonzalez@sigmadesigns.com>
[bhelgaas: add CONFIG_BROKEN dependency, various cleanups]
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/host/Kconfig      |  14 +++++
 drivers/pci/host/Makefile     |   1 +
 drivers/pci/host/pcie-tango.c | 141 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/pci_ids.h       |   2 +
 4 files changed, 158 insertions(+)
 create mode 100644 drivers/pci/host/pcie-tango.c

(limited to 'include/linux')

diff --git a/drivers/pci/host/Kconfig b/drivers/pci/host/Kconfig
index 7f47cd5e10a5..8e54115681d8 100644
--- a/drivers/pci/host/Kconfig
+++ b/drivers/pci/host/Kconfig
@@ -180,6 +180,20 @@ config PCIE_ROCKCHIP
 	  There is 1 internal PCIe port available to support GEN2 with
 	  4 slots.
 
+config PCIE_TANGO_SMP8759
+	bool "Tango SMP8759 PCIe controller (DANGEROUS)"
+	depends on ARCH_TANGO && PCI_MSI && OF
+	depends on BROKEN
+	select PCI_HOST_COMMON
+	help
+	  Say Y here to enable PCIe controller support for Sigma Designs
+	  Tango SMP8759-based systems.
+
+	  Note: The SMP8759 controller multiplexes PCI config and MMIO
+	  accesses, and Linux doesn't provide a way to serialize them.
+	  This can lead to data corruption if drivers perform concurrent
+	  config and MMIO accesses.
+
 config VMD
 	depends on PCI_MSI && X86_64 && SRCU
 	tristate "Intel Volume Management Device Driver"
diff --git a/drivers/pci/host/Makefile b/drivers/pci/host/Makefile
index cab879578003..eb48aa6377c8 100644
--- a/drivers/pci/host/Makefile
+++ b/drivers/pci/host/Makefile
@@ -18,6 +18,7 @@ obj-$(CONFIG_PCIE_IPROC_BCMA) += pcie-iproc-bcma.o
 obj-$(CONFIG_PCIE_ALTERA) += pcie-altera.o
 obj-$(CONFIG_PCIE_ALTERA_MSI) += pcie-altera-msi.o
 obj-$(CONFIG_PCIE_ROCKCHIP) += pcie-rockchip.o
+obj-$(CONFIG_PCIE_TANGO_SMP8759) += pcie-tango.o
 obj-$(CONFIG_VMD) += vmd.o
 
 # The following drivers are for devices that use the generic ACPI
diff --git a/drivers/pci/host/pcie-tango.c b/drivers/pci/host/pcie-tango.c
new file mode 100644
index 000000000000..6bbb81f06a53
--- /dev/null
+++ b/drivers/pci/host/pcie-tango.c
@@ -0,0 +1,141 @@
+#include <linux/pci-ecam.h>
+#include <linux/delay.h>
+#include <linux/of.h>
+
+#define SMP8759_MUX		0x48
+#define SMP8759_TEST_OUT	0x74
+
+struct tango_pcie {
+	void __iomem *base;
+};
+
+static int smp8759_config_read(struct pci_bus *bus, unsigned int devfn,
+			       int where, int size, u32 *val)
+{
+	struct pci_config_window *cfg = bus->sysdata;
+	struct tango_pcie *pcie = dev_get_drvdata(cfg->parent);
+	int ret;
+
+	/* Reads in configuration space outside devfn 0 return garbage */
+	if (devfn != 0)
+		return PCIBIOS_FUNC_NOT_SUPPORTED;
+
+	/*
+	 * PCI config and MMIO accesses are muxed.  Linux doesn't have a
+	 * mutual exclusion mechanism for config vs. MMIO accesses, so
+	 * concurrent accesses may cause corruption.
+	 */
+	writel_relaxed(1, pcie->base + SMP8759_MUX);
+	ret = pci_generic_config_read(bus, devfn, where, size, val);
+	writel_relaxed(0, pcie->base + SMP8759_MUX);
+
+	return ret;
+}
+
+static int smp8759_config_write(struct pci_bus *bus, unsigned int devfn,
+				int where, int size, u32 val)
+{
+	struct pci_config_window *cfg = bus->sysdata;
+	struct tango_pcie *pcie = dev_get_drvdata(cfg->parent);
+	int ret;
+
+	writel_relaxed(1, pcie->base + SMP8759_MUX);
+	ret = pci_generic_config_write(bus, devfn, where, size, val);
+	writel_relaxed(0, pcie->base + SMP8759_MUX);
+
+	return ret;
+}
+
+static struct pci_ecam_ops smp8759_ecam_ops = {
+	.bus_shift	= 20,
+	.pci_ops	= {
+		.map_bus	= pci_ecam_map_bus,
+		.read		= smp8759_config_read,
+		.write		= smp8759_config_write,
+	}
+};
+
+static int tango_pcie_link_up(struct tango_pcie *pcie)
+{
+	void __iomem *test_out = pcie->base + SMP8759_TEST_OUT;
+	int i;
+
+	writel_relaxed(16, test_out);
+	for (i = 0; i < 10; ++i) {
+		u32 ltssm_state = readl_relaxed(test_out) >> 8;
+		if ((ltssm_state & 0x1f) == 0xf) /* L0 */
+			return 1;
+		usleep_range(3000, 4000);
+	}
+
+	return 0;
+}
+
+static int tango_pcie_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct tango_pcie *pcie;
+	struct resource *res;
+	int ret;
+
+	dev_warn(dev, "simultaneous PCI config and MMIO accesses may cause data corruption\n");
+	add_taint(TAINT_CRAP, LOCKDEP_STILL_OK);
+
+	pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL);
+	if (!pcie)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	pcie->base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(pcie->base))
+		return PTR_ERR(pcie->base);
+
+	platform_set_drvdata(pdev, pcie);
+
+	if (!tango_pcie_link_up(pcie))
+		return -ENODEV;
+
+	return pci_host_common_probe(pdev, &smp8759_ecam_ops);
+}
+
+static const struct of_device_id tango_pcie_ids[] = {
+	{ .compatible = "sigma,smp8759-pcie" },
+	{ },
+};
+
+static struct platform_driver tango_pcie_driver = {
+	.probe	= tango_pcie_probe,
+	.driver	= {
+		.name = KBUILD_MODNAME,
+		.of_match_table = tango_pcie_ids,
+		.suppress_bind_attrs = true,
+	},
+};
+builtin_platform_driver(tango_pcie_driver);
+
+/*
+ * The root complex advertises the wrong device class.
+ * Header Type 1 is for PCI-to-PCI bridges.
+ */
+static void tango_fixup_class(struct pci_dev *dev)
+{
+	dev->class = PCI_CLASS_BRIDGE_PCI << 8;
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SIGMA, 0x0024, tango_fixup_class);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SIGMA, 0x0028, tango_fixup_class);
+
+/*
+ * The root complex exposes a "fake" BAR, which is used to filter
+ * bus-to-system accesses.  Only accesses within the range defined by this
+ * BAR are forwarded to the host, others are ignored.
+ *
+ * By default, the DMA framework expects an identity mapping, and DRAM0 is
+ * mapped at 0x80000000.
+ */
+static void tango_fixup_bar(struct pci_dev *dev)
+{
+	dev->non_compliant_bars = true;
+	pci_write_config_dword(dev, PCI_BASE_ADDRESS_0, 0x80000000);
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SIGMA, 0x0024, tango_fixup_bar);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SIGMA, 0x0028, tango_fixup_bar);
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 5f6b71d15393..c71e532da458 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1373,6 +1373,8 @@
 #define PCI_DEVICE_ID_TTI_HPT374	0x0008
 #define PCI_DEVICE_ID_TTI_HPT372N	0x0009	/* apparently a 372N variant? */
 
+#define PCI_VENDOR_ID_SIGMA		0x1105
+
 #define PCI_VENDOR_ID_VIA		0x1106
 #define PCI_DEVICE_ID_VIA_8763_0	0x0198
 #define PCI_DEVICE_ID_VIA_8380_0	0x0204
-- 
cgit v1.2.3


From 49d31c2f389acfe83417083e1208422b4091cd9e Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 7 Jul 2017 14:51:19 -0400
Subject: dentry name snapshots

take_dentry_name_snapshot() takes a safe snapshot of dentry name;
if the name is a short one, it gets copied into caller-supplied
structure, otherwise an extra reference to external name is grabbed
(those are never modified).  In either case the pointer to stable
string is stored into the same structure.

dentry must be held by the caller of take_dentry_name_snapshot(),
but may be freely dropped afterwards - the snapshot will stay
until destroyed by release_dentry_name_snapshot().

Intended use:
	struct name_snapshot s;

	take_dentry_name_snapshot(&s, dentry);
	...
	access s.name
	...
	release_dentry_name_snapshot(&s);

Replaces fsnotify_oldname_...(), gets used in fsnotify to obtain the name
to pass down with event.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c              | 27 +++++++++++++++++++++++++++
 fs/debugfs/inode.c       | 10 +++++-----
 fs/namei.c               |  8 ++++----
 fs/notify/fsnotify.c     |  8 ++++++--
 include/linux/dcache.h   |  6 ++++++
 include/linux/fsnotify.h | 31 -------------------------------
 6 files changed, 48 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index b85da8897ffa..831f3a9a8f05 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -277,6 +277,33 @@ static inline int dname_external(const struct dentry *dentry)
 	return dentry->d_name.name != dentry->d_iname;
 }
 
+void take_dentry_name_snapshot(struct name_snapshot *name, struct dentry *dentry)
+{
+	spin_lock(&dentry->d_lock);
+	if (unlikely(dname_external(dentry))) {
+		struct external_name *p = external_name(dentry);
+		atomic_inc(&p->u.count);
+		spin_unlock(&dentry->d_lock);
+		name->name = p->name;
+	} else {
+		memcpy(name->inline_name, dentry->d_iname, DNAME_INLINE_LEN);
+		spin_unlock(&dentry->d_lock);
+		name->name = name->inline_name;
+	}
+}
+EXPORT_SYMBOL(take_dentry_name_snapshot);
+
+void release_dentry_name_snapshot(struct name_snapshot *name)
+{
+	if (unlikely(name->name != name->inline_name)) {
+		struct external_name *p;
+		p = container_of(name->name, struct external_name, name[0]);
+		if (unlikely(atomic_dec_and_test(&p->u.count)))
+			kfree_rcu(p, u.head);
+	}
+}
+EXPORT_SYMBOL(release_dentry_name_snapshot);
+
 static inline void __d_set_inode_and_type(struct dentry *dentry,
 					  struct inode *inode,
 					  unsigned type_flags)
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index e892ae7d89f8..acd3be2cc691 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -766,7 +766,7 @@ struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry,
 {
 	int error;
 	struct dentry *dentry = NULL, *trap;
-	const char *old_name;
+	struct name_snapshot old_name;
 
 	trap = lock_rename(new_dir, old_dir);
 	/* Source or destination directories don't exist? */
@@ -781,19 +781,19 @@ struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry,
 	if (IS_ERR(dentry) || dentry == trap || d_really_is_positive(dentry))
 		goto exit;
 
-	old_name = fsnotify_oldname_init(old_dentry->d_name.name);
+	take_dentry_name_snapshot(&old_name, old_dentry);
 
 	error = simple_rename(d_inode(old_dir), old_dentry, d_inode(new_dir),
 			      dentry, 0);
 	if (error) {
-		fsnotify_oldname_free(old_name);
+		release_dentry_name_snapshot(&old_name);
 		goto exit;
 	}
 	d_move(old_dentry, dentry);
-	fsnotify_move(d_inode(old_dir), d_inode(new_dir), old_name,
+	fsnotify_move(d_inode(old_dir), d_inode(new_dir), old_name.name,
 		d_is_dir(old_dentry),
 		NULL, old_dentry);
-	fsnotify_oldname_free(old_name);
+	release_dentry_name_snapshot(&old_name);
 	unlock_rename(new_dir, old_dir);
 	dput(dentry);
 	return old_dentry;
diff --git a/fs/namei.c b/fs/namei.c
index efe53a5d0737..c5588e837b15 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -4362,11 +4362,11 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 {
 	int error;
 	bool is_dir = d_is_dir(old_dentry);
-	const unsigned char *old_name;
 	struct inode *source = old_dentry->d_inode;
 	struct inode *target = new_dentry->d_inode;
 	bool new_is_dir = false;
 	unsigned max_links = new_dir->i_sb->s_max_links;
+	struct name_snapshot old_name;
 
 	if (source == target)
 		return 0;
@@ -4413,7 +4413,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	if (error)
 		return error;
 
-	old_name = fsnotify_oldname_init(old_dentry->d_name.name);
+	take_dentry_name_snapshot(&old_name, old_dentry);
 	dget(new_dentry);
 	if (!is_dir || (flags & RENAME_EXCHANGE))
 		lock_two_nondirectories(source, target);
@@ -4468,14 +4468,14 @@ out:
 		inode_unlock(target);
 	dput(new_dentry);
 	if (!error) {
-		fsnotify_move(old_dir, new_dir, old_name, is_dir,
+		fsnotify_move(old_dir, new_dir, old_name.name, is_dir,
 			      !(flags & RENAME_EXCHANGE) ? target : NULL, old_dentry);
 		if (flags & RENAME_EXCHANGE) {
 			fsnotify_move(new_dir, old_dir, old_dentry->d_name.name,
 				      new_is_dir, NULL, new_dentry);
 		}
 	}
-	fsnotify_oldname_free(old_name);
+	release_dentry_name_snapshot(&old_name);
 
 	return error;
 }
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 01a9f0f007d4..0c4583b61717 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -161,16 +161,20 @@ int __fsnotify_parent(const struct path *path, struct dentry *dentry, __u32 mask
 	if (unlikely(!fsnotify_inode_watches_children(p_inode)))
 		__fsnotify_update_child_dentry_flags(p_inode);
 	else if (p_inode->i_fsnotify_mask & mask) {
+		struct name_snapshot name;
+
 		/* we are notifying a parent so come up with the new mask which
 		 * specifies these are events which came from a child. */
 		mask |= FS_EVENT_ON_CHILD;
 
+		take_dentry_name_snapshot(&name, dentry);
 		if (path)
 			ret = fsnotify(p_inode, mask, path, FSNOTIFY_EVENT_PATH,
-				       dentry->d_name.name, 0);
+				       name.name, 0);
 		else
 			ret = fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE,
-				       dentry->d_name.name, 0);
+				       name.name, 0);
+		release_dentry_name_snapshot(&name);
 	}
 
 	dput(parent);
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index d2e38dc6172c..025727bf6797 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -591,5 +591,11 @@ static inline struct inode *d_real_inode(const struct dentry *dentry)
 	return d_backing_inode(d_real((struct dentry *) dentry, NULL, 0));
 }
 
+struct name_snapshot {
+	const char *name;
+	char inline_name[DNAME_INLINE_LEN];
+};
+void take_dentry_name_snapshot(struct name_snapshot *, struct dentry *);
+void release_dentry_name_snapshot(struct name_snapshot *);
 
 #endif	/* __LINUX_DCACHE_H */
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index b43d3f5bd9ea..b78aa7ac77ce 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -293,35 +293,4 @@ static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid)
 	}
 }
 
-#if defined(CONFIG_FSNOTIFY)	/* notify helpers */
-
-/*
- * fsnotify_oldname_init - save off the old filename before we change it
- */
-static inline const unsigned char *fsnotify_oldname_init(const unsigned char *name)
-{
-	return kstrdup(name, GFP_KERNEL);
-}
-
-/*
- * fsnotify_oldname_free - free the name we got from fsnotify_oldname_init
- */
-static inline void fsnotify_oldname_free(const unsigned char *old_name)
-{
-	kfree(old_name);
-}
-
-#else	/* CONFIG_FSNOTIFY */
-
-static inline const char *fsnotify_oldname_init(const unsigned char *name)
-{
-	return NULL;
-}
-
-static inline void fsnotify_oldname_free(const unsigned char *old_name)
-{
-}
-
-#endif	/*  CONFIG_FSNOTIFY */
-
 #endif	/* _LINUX_FS_NOTIFY_H */
-- 
cgit v1.2.3


From 659b957f20c78fd470083c80af5e79eedfb39e5b Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Fri, 7 Jul 2017 22:37:24 +0530
Subject: kprobes: Rename [arch_]function_offset_within_entry() to
 [arch_]kprobe_on_func_entry()

Rename function_offset_within_entry() to scope it to kprobe namespace by
using kprobe_ prefix, and to also simplify it.

Suggested-by: Ingo Molnar <mingo@kernel.org>
Suggested-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Cc: Ananth N Mavinakayanahalli <ananth@linux.vnet.ibm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/3aa6c7e2e4fb6e00f3c24fa306496a66edb558ea.1499443367.git.naveen.n.rao@linux.vnet.ibm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/powerpc/kernel/kprobes.c | 2 +-
 include/linux/kprobes.h       | 4 ++--
 kernel/kprobes.c              | 8 ++++----
 kernel/trace/trace_kprobe.c   | 2 +-
 4 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 01addfb0ed0a..586508e949f0 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -221,7 +221,7 @@ static nokprobe_inline void set_current_kprobe(struct kprobe *p, struct pt_regs
 	kcb->kprobe_saved_msr = regs->msr;
 }
 
-bool arch_function_offset_within_entry(unsigned long offset)
+bool arch_kprobe_on_func_entry(unsigned long offset)
 {
 #ifdef PPC64_ELF_ABI_v2
 #ifdef CONFIG_KPROBES_ON_FTRACE
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 541df0b5b815..bd2684700b74 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -267,8 +267,8 @@ extern int arch_init_kprobes(void);
 extern void show_registers(struct pt_regs *regs);
 extern void kprobes_inc_nmissed_count(struct kprobe *p);
 extern bool arch_within_kprobe_blacklist(unsigned long addr);
-extern bool arch_function_offset_within_entry(unsigned long offset);
-extern bool function_offset_within_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset);
+extern bool arch_kprobe_on_func_entry(unsigned long offset);
+extern bool kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset);
 
 extern bool within_kprobe_blacklist(unsigned long addr);
 
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 6756d750b31b..a519219169fd 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1888,12 +1888,12 @@ static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
 }
 NOKPROBE_SYMBOL(pre_handler_kretprobe);
 
-bool __weak arch_function_offset_within_entry(unsigned long offset)
+bool __weak arch_kprobe_on_func_entry(unsigned long offset)
 {
 	return !offset;
 }
 
-bool function_offset_within_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset)
+bool kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset)
 {
 	kprobe_opcode_t *kp_addr = _kprobe_addr(addr, sym, offset);
 
@@ -1901,7 +1901,7 @@ bool function_offset_within_entry(kprobe_opcode_t *addr, const char *sym, unsign
 		return false;
 
 	if (!kallsyms_lookup_size_offset((unsigned long)kp_addr, NULL, &offset) ||
-						!arch_function_offset_within_entry(offset))
+						!arch_kprobe_on_func_entry(offset))
 		return false;
 
 	return true;
@@ -1914,7 +1914,7 @@ int register_kretprobe(struct kretprobe *rp)
 	int i;
 	void *addr;
 
-	if (!function_offset_within_entry(rp->kp.addr, rp->kp.symbol_name, rp->kp.offset))
+	if (!kprobe_on_func_entry(rp->kp.addr, rp->kp.symbol_name, rp->kp.offset))
 		return -EINVAL;
 
 	if (kretprobe_blacklist_size) {
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index b53c8d369163..2c5221819be5 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -720,7 +720,7 @@ static int create_trace_kprobe(int argc, char **argv)
 			return ret;
 		}
 		if (offset && is_return &&
-		    !function_offset_within_entry(NULL, symbol, offset)) {
+		    !kprobe_on_func_entry(NULL, symbol, offset)) {
 			pr_info("Given offset is not valid for return probe.\n");
 			return -EINVAL;
 		}
-- 
cgit v1.2.3


From f51048c3e07b68c90b21a77541fc4b208f9244d7 Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Thu, 6 Jul 2017 15:01:57 -0700
Subject: bonding: avoid NETDEV_CHANGEMTU event when unregistering slave

As Hongjun/Nicolas summarized in their original patch:

"
When a device changes from one netns to another, it's first unregistered,
then the netns reference is updated and the dev is registered in the new
netns. Thus, when a slave moves to another netns, it is first
unregistered. This triggers a NETDEV_UNREGISTER event which is caught by
the bonding driver. The driver calls bond_release(), which calls
dev_set_mtu() and thus triggers NETDEV_CHANGEMTU (the device is still in
the old netns).
"

This is a very special case, because the device is being unregistered
no one should still care about the NETDEV_CHANGEMTU event triggered
at this point, we can avoid broadcasting this event on this path,
and avoid touching inetdev_event()/addrconf_notify() path.

It requires to export __dev_set_mtu() to bonding driver.

Reported-by: Hongjun Li <hongjun.li@6wind.com>
Reported-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Cc: Jay Vosburgh <j.vosburgh@gmail.com>
Cc: Veaceslav Falico <vfalico@gmail.com>
Cc: Andy Gospodarek <andy@greyhouse.net>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 15 +++++++++------
 include/linux/netdevice.h       |  1 +
 net/core/dev.c                  |  3 ++-
 3 files changed, 12 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 2865f31c6076..14ff622190a5 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1820,7 +1820,7 @@ err_undo_flags:
  */
 static int __bond_release_one(struct net_device *bond_dev,
 			      struct net_device *slave_dev,
-			      bool all)
+			      bool all, bool unregister)
 {
 	struct bonding *bond = netdev_priv(bond_dev);
 	struct slave *slave, *oldcurrent;
@@ -1965,7 +1965,10 @@ static int __bond_release_one(struct net_device *bond_dev,
 		dev_set_mac_address(slave_dev, (struct sockaddr *)&ss);
 	}
 
-	dev_set_mtu(slave_dev, slave->original_mtu);
+	if (unregister)
+		__dev_set_mtu(slave_dev, slave->original_mtu);
+	else
+		dev_set_mtu(slave_dev, slave->original_mtu);
 
 	slave_dev->priv_flags &= ~IFF_BONDING;
 
@@ -1977,7 +1980,7 @@ static int __bond_release_one(struct net_device *bond_dev,
 /* A wrapper used because of ndo_del_link */
 int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
 {
-	return __bond_release_one(bond_dev, slave_dev, false);
+	return __bond_release_one(bond_dev, slave_dev, false, false);
 }
 
 /* First release a slave and then destroy the bond if no more slaves are left.
@@ -1989,7 +1992,7 @@ static int  bond_release_and_destroy(struct net_device *bond_dev,
 	struct bonding *bond = netdev_priv(bond_dev);
 	int ret;
 
-	ret = bond_release(bond_dev, slave_dev);
+	ret = __bond_release_one(bond_dev, slave_dev, false, true);
 	if (ret == 0 && !bond_has_slaves(bond)) {
 		bond_dev->priv_flags |= IFF_DISABLE_NETPOLL;
 		netdev_info(bond_dev, "Destroying bond %s\n",
@@ -3060,7 +3063,7 @@ static int bond_slave_netdev_event(unsigned long event,
 		if (bond_dev->type != ARPHRD_ETHER)
 			bond_release_and_destroy(bond_dev, slave_dev);
 		else
-			bond_release(bond_dev, slave_dev);
+			__bond_release_one(bond_dev, slave_dev, false, true);
 		break;
 	case NETDEV_UP:
 	case NETDEV_CHANGE:
@@ -4252,7 +4255,7 @@ static void bond_uninit(struct net_device *bond_dev)
 
 	/* Release the bonded slaves */
 	bond_for_each_slave(bond, slave, iter)
-		__bond_release_one(bond_dev, slave->dev, true);
+		__bond_release_one(bond_dev, slave->dev, true, true);
 	netdev_info(bond_dev, "Released all slaves\n");
 
 	arr = rtnl_dereference(bond->slave_arr);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e48ee2eaaa3e..779b23595596 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3284,6 +3284,7 @@ void __dev_notify_flags(struct net_device *, unsigned int old_flags,
 int dev_change_name(struct net_device *, const char *);
 int dev_set_alias(struct net_device *, const char *, size_t);
 int dev_change_net_namespace(struct net_device *, struct net *, const char *);
+int __dev_set_mtu(struct net_device *, int);
 int dev_set_mtu(struct net_device *, int);
 void dev_set_group(struct net_device *, int);
 int dev_set_mac_address(struct net_device *, struct sockaddr *);
diff --git a/net/core/dev.c b/net/core/dev.c
index 7098fba52be1..02440518dd69 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6765,7 +6765,7 @@ int dev_change_flags(struct net_device *dev, unsigned int flags)
 }
 EXPORT_SYMBOL(dev_change_flags);
 
-static int __dev_set_mtu(struct net_device *dev, int new_mtu)
+int __dev_set_mtu(struct net_device *dev, int new_mtu)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
 
@@ -6775,6 +6775,7 @@ static int __dev_set_mtu(struct net_device *dev, int new_mtu)
 	dev->mtu = new_mtu;
 	return 0;
 }
+EXPORT_SYMBOL(__dev_set_mtu);
 
 /**
  *	dev_set_mtu - Change maximum transfer unit
-- 
cgit v1.2.3


From 8f1a357d41a22009150cf404b5aa5876efdb59b1 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 6 Jul 2017 20:26:17 +0300
Subject: i2c: Provide a stub for i2c_detect_slave_mode()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Drivers would like to call i2c_detect_slave_mode() even if !I2C_SLAVE.
Give them what they want to,

Otherwise kernel will not compile:
drivers/i2c/busses/i2c-designware-platdrv.c: In function ‘dw_i2c_plat_probe’:
drivers/i2c/busses/i2c-designware-platdrv.c:331:6: error: implicit declaration of function ‘i2c_detect_slave_mode’ [-Werror=implicit-function-declaration]
  if (i2c_detect_slave_mode(&pdev->dev))
      ^~~~~~~~~~~~~~~~~~~~~
cc1: some warnings being treated as errors

Fixes: 6e38cf3b4421 ("i2c: designware: Let slave adapter support be optional")
Reported-by: Abdul Haleem <abdhalee@linux.vnet.ibm.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 include/linux/i2c.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 72d0ece70ed3..00ca5b86a753 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -295,6 +295,8 @@ static inline int i2c_slave_event(struct i2c_client *client,
 {
 	return client->slave_cb(client, event, val);
 }
+#else
+static inline bool i2c_detect_slave_mode(struct device *dev) { return false; }
 #endif
 
 /**
-- 
cgit v1.2.3


From d1438ad8f3eec7207618b8e01f9f3eec7b6f67c4 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Fri, 7 Jul 2017 18:08:25 -0700
Subject: nvme_fc/nvmet_fc: revise Create Association descriptor length

Revises the Create Association LS for the amount of pad expected in 1.16.

Add defines for the minimum lengths that a target can accept (e.g. variable
pad lengths)

Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 include/linux/nvme-fc.h | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nvme-fc.h b/include/linux/nvme-fc.h
index bc711a10be05..21c37e39e41a 100644
--- a/include/linux/nvme-fc.h
+++ b/include/linux/nvme-fc.h
@@ -17,6 +17,7 @@
 
 /*
  * This file contains definitions relative to FC-NVME r1.14 (16-020vB).
+ * The fcnvme_lsdesc_cr_assoc_cmd struct reflects expected r1.16 content.
  */
 
 #ifndef _NVME_FC_H
@@ -193,9 +194,21 @@ struct fcnvme_lsdesc_cr_assoc_cmd {
 	uuid_t	hostid;
 	u8	hostnqn[FCNVME_ASSOC_HOSTNQN_LEN];
 	u8	subnqn[FCNVME_ASSOC_SUBNQN_LEN];
-	u8	rsvd632[384];
+	__be32	rsvd584[108];		/* pad to 1016 bytes,
+					 * which makes overall LS rqst
+					 * payload 1024 bytes
+					 */
 };
 
+#define FCNVME_LSDESC_CRA_CMD_DESC_MINLEN	\
+		offsetof(struct fcnvme_lsdesc_cr_assoc_cmd, rsvd584)
+
+#define FCNVME_LSDESC_CRA_CMD_DESC_MIN_DESCLEN	\
+		(FCNVME_LSDESC_CRA_CMD_DESC_MINLEN - \
+		 offsetof(struct fcnvme_lsdesc_cr_assoc_cmd, ersp_ratio))
+
+
+
 /* FCNVME_LSDESC_CREATE_CONN_CMD */
 struct fcnvme_lsdesc_cr_conn_cmd {
 	__be32	desc_tag;		/* FCNVME_LSDESC_xxx */
@@ -273,6 +286,14 @@ struct fcnvme_ls_cr_assoc_rqst {
 	struct fcnvme_lsdesc_cr_assoc_cmd	assoc_cmd;
 };
 
+#define FCNVME_LSDESC_CRA_RQST_MINLEN	\
+		(offsetof(struct fcnvme_ls_cr_assoc_rqst, assoc_cmd) + \
+			FCNVME_LSDESC_CRA_CMD_DESC_MINLEN)
+
+#define FCNVME_LSDESC_CRA_RQST_MIN_LISTLEN	\
+		FCNVME_LSDESC_CRA_CMD_DESC_MINLEN
+
+
 struct fcnvme_ls_cr_assoc_acc {
 	struct fcnvme_ls_acc_hdr		hdr;
 	struct fcnvme_lsdesc_assoc_id		associd;
-- 
cgit v1.2.3


From 7e988b103d0d52190244517edc76e649071284bb Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Fri, 7 Jul 2017 15:49:00 +0200
Subject: KVM: use correct accessor function for __kvm_memslots

kvm memslots are protected by srcu and not by rcu. We must use
srcu_dereference_check instead of rcu_dereference_check.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index b3ca77a96b2d..648b34cabb38 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -568,9 +568,8 @@ void kvm_put_kvm(struct kvm *kvm);
 
 static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id)
 {
-	return rcu_dereference_check(kvm->memslots[as_id],
-			srcu_read_lock_held(&kvm->srcu)
-			|| lockdep_is_held(&kvm->slots_lock));
+	return srcu_dereference_check(kvm->memslots[as_id], &kvm->srcu,
+			lockdep_is_held(&kvm->slots_lock));
 }
 
 static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm)
-- 
cgit v1.2.3


From c43aeb198048f64abda8655fdcdebe71cf1877ba Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 10 Jul 2017 07:40:49 -0400
Subject: fix brown paperbag bug in inlined copy_..._iter()

"copied nothing" == "return 0", not "return full size".

Fixes: aa28de275a24 "iov_iter/hardening: move object size checks to inlined part"
Spotted-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/uio.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/uio.h b/include/linux/uio.h
index 342d2dc225b9..8a642cda641c 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -103,7 +103,7 @@ static __always_inline __must_check
 size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
 {
 	if (unlikely(!check_copy_size(addr, bytes, true)))
-		return bytes;
+		return 0;
 	else
 		return _copy_to_iter(addr, bytes, i);
 }
@@ -112,7 +112,7 @@ static __always_inline __must_check
 size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
 {
 	if (unlikely(!check_copy_size(addr, bytes, false)))
-		return bytes;
+		return 0;
 	else
 		return _copy_from_iter(addr, bytes, i);
 }
@@ -130,7 +130,7 @@ static __always_inline __must_check
 size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
 {
 	if (unlikely(!check_copy_size(addr, bytes, false)))
-		return bytes;
+		return 0;
 	else
 		return _copy_from_iter_nocache(addr, bytes, i);
 }
@@ -160,7 +160,7 @@ static __always_inline __must_check
 size_t copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
 {
 	if (unlikely(!check_copy_size(addr, bytes, false)))
-		return bytes;
+		return 0;
 	else
 		return _copy_from_iter_flushcache(addr, bytes, i);
 }
-- 
cgit v1.2.3


From 42a6e0996084972574e0a2b23e7326b78b0f64c5 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 10 Jul 2017 13:22:50 +0200
Subject: nvmem: include linux/err.h from header

The new support for nvmem devices from the rtc layer caused a build
error in some configurations:

include/linux/nvmem-provider.h: In function 'nvmem_register':
include/linux/nvmem-provider.h:51:9: error: implicit declaration of function 'ERR_PTR' [-Werror=implicit-function-declaration]

This adds the missing include to ensure we can always include
the header.

Fixes: 697e5a47aa12 ("rtc: add generic nvmem support")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 include/linux/nvmem-provider.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nvmem-provider.h b/include/linux/nvmem-provider.h
index cd93416d762e..497706f5adca 100644
--- a/include/linux/nvmem-provider.h
+++ b/include/linux/nvmem-provider.h
@@ -12,6 +12,9 @@
 #ifndef _LINUX_NVMEM_PROVIDER_H
 #define _LINUX_NVMEM_PROVIDER_H
 
+#include <linux/err.h>
+#include <linux/errno.h>
+
 struct nvmem_device;
 struct nvmem_cell_info;
 typedef int (*nvmem_reg_read_t)(void *priv, unsigned int offset,
-- 
cgit v1.2.3


From 23955622ff8d231bcc9650b3d06583f117a6e3ba Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@fb.com>
Date: Mon, 10 Jul 2017 15:47:11 -0700
Subject: swap: add block io poll in swapin path

For fast flash disk, async IO could introduce overhead because of
context switch.  block-mq now supports IO poll, which improves
performance and latency a lot.  swapin is a good place to use this
technique, because the task is waiting for the swapin page to continue
execution.

In my virtual machine, directly read 4k data from a NVMe with iopoll is
about 60% better than that without poll.  With iopoll support in swapin
patch, my microbenchmark (a task does random memory write) is about
10%~25% faster.  CPU utilization increases a lot though, 2x and even 3x
CPU utilization.  This will depend on disk speed.

While iopoll in swapin isn't intended for all usage cases, it's a win
for latency sensistive workloads with high speed swap disk.  block layer
has knob to control poll in runtime.  If poll isn't enabled in block
layer, there should be no noticeable change in swapin.

I got a chance to run the same test in a NVMe with DRAM as the media.
In simple fio IO test, blkpoll boosts 50% performance in single thread
test and ~20% in 8 threads test.  So this is the base line.  In above
swap test, blkpoll boosts ~27% performance in single thread test.
blkpoll uses 2x CPU time though.

If we enable hybid polling, the performance gain has very slight drop
but CPU time is only 50% worse than that without blkpoll.  Also we can
adjust parameter of hybid poll, with it, the CPU time penality is
reduced further.  In 8 threads test, blkpoll doesn't help though.  The
performance is similar to that without blkpoll, but cpu utilization is
similar too.  There is lock contention in swap path.  The cpu time
spending on blkpoll isn't high.  So overall, blkpoll swapin isn't worse
than that without it.

The swapin readahead might read several pages in in the same time and
form a big IO request.  Since the IO will take longer time, it doesn't
make sense to do poll, so the patch only does iopoll for single page
swapin.

[akpm@linux-foundation.org: coding-style fixes]
Link: http://lkml.kernel.org/r/070c3c3e40b711e7b1390002c991e86a-b5408f0@7511894063d3764ff01ea8111f5a004d7dd700ed078797c204a24e620ddb965c
Signed-off-by: Shaohua Li <shli@fb.com>
Cc: Tim Chen <tim.c.chen@intel.com>
Cc: Huang Ying <ying.huang@intel.com>
Cc: Jens Axboe <axboe@fb.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h |  5 +++--
 mm/madvise.c         |  4 ++--
 mm/page_io.c         | 23 +++++++++++++++++++++--
 mm/swap_state.c      | 10 ++++++----
 mm/swapfile.c        |  2 +-
 5 files changed, 33 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 5ab1c98c7d27..61e7180cee21 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -331,7 +331,7 @@ extern void kswapd_stop(int nid);
 #include <linux/blk_types.h> /* for bio_end_io_t */
 
 /* linux/mm/page_io.c */
-extern int swap_readpage(struct page *);
+extern int swap_readpage(struct page *page, bool do_poll);
 extern int swap_writepage(struct page *page, struct writeback_control *wbc);
 extern void end_swap_bio_write(struct bio *bio);
 extern int __swap_writepage(struct page *page, struct writeback_control *wbc,
@@ -362,7 +362,8 @@ extern void free_page_and_swap_cache(struct page *);
 extern void free_pages_and_swap_cache(struct page **, int);
 extern struct page *lookup_swap_cache(swp_entry_t);
 extern struct page *read_swap_cache_async(swp_entry_t, gfp_t,
-			struct vm_area_struct *vma, unsigned long addr);
+			struct vm_area_struct *vma, unsigned long addr,
+			bool do_poll);
 extern struct page *__read_swap_cache_async(swp_entry_t, gfp_t,
 			struct vm_area_struct *vma, unsigned long addr,
 			bool *new_page_allocated);
diff --git a/mm/madvise.c b/mm/madvise.c
index 25b78ee4fc2c..8eda1841c576 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -205,7 +205,7 @@ static int swapin_walk_pmd_entry(pmd_t *pmd, unsigned long start,
 			continue;
 
 		page = read_swap_cache_async(entry, GFP_HIGHUSER_MOVABLE,
-								vma, index);
+							vma, index, false);
 		if (page)
 			put_page(page);
 	}
@@ -246,7 +246,7 @@ static void force_shm_swapin_readahead(struct vm_area_struct *vma,
 		}
 		swap = radix_to_swp_entry(page);
 		page = read_swap_cache_async(swap, GFP_HIGHUSER_MOVABLE,
-								NULL, 0);
+							NULL, 0, false);
 		if (page)
 			put_page(page);
 	}
diff --git a/mm/page_io.c b/mm/page_io.c
index 2da71e627812..b6c4ac388209 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -117,6 +117,7 @@ static void swap_slot_free_notify(struct page *page)
 static void end_swap_bio_read(struct bio *bio)
 {
 	struct page *page = bio->bi_io_vec[0].bv_page;
+	struct task_struct *waiter = bio->bi_private;
 
 	if (bio->bi_status) {
 		SetPageError(page);
@@ -132,7 +133,9 @@ static void end_swap_bio_read(struct bio *bio)
 	swap_slot_free_notify(page);
 out:
 	unlock_page(page);
+	WRITE_ONCE(bio->bi_private, NULL);
 	bio_put(bio);
+	wake_up_process(waiter);
 }
 
 int generic_swapfile_activate(struct swap_info_struct *sis,
@@ -329,11 +332,13 @@ out:
 	return ret;
 }
 
-int swap_readpage(struct page *page)
+int swap_readpage(struct page *page, bool do_poll)
 {
 	struct bio *bio;
 	int ret = 0;
 	struct swap_info_struct *sis = page_swap_info(page);
+	blk_qc_t qc;
+	struct block_device *bdev;
 
 	VM_BUG_ON_PAGE(!PageSwapCache(page), page);
 	VM_BUG_ON_PAGE(!PageLocked(page), page);
@@ -372,9 +377,23 @@ int swap_readpage(struct page *page)
 		ret = -ENOMEM;
 		goto out;
 	}
+	bdev = bio->bi_bdev;
+	bio->bi_private = current;
 	bio_set_op_attrs(bio, REQ_OP_READ, 0);
 	count_vm_event(PSWPIN);
-	submit_bio(bio);
+	bio_get(bio);
+	qc = submit_bio(bio);
+	while (do_poll) {
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		if (!READ_ONCE(bio->bi_private))
+			break;
+
+		if (!blk_mq_poll(bdev_get_queue(bdev), qc))
+			break;
+	}
+	__set_current_state(TASK_RUNNING);
+	bio_put(bio);
+
 out:
 	return ret;
 }
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 9c71b6b2562f..b68c93014f50 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -412,14 +412,14 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
  * the swap entry is no longer in use.
  */
 struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
-			struct vm_area_struct *vma, unsigned long addr)
+		struct vm_area_struct *vma, unsigned long addr, bool do_poll)
 {
 	bool page_was_allocated;
 	struct page *retpage = __read_swap_cache_async(entry, gfp_mask,
 			vma, addr, &page_was_allocated);
 
 	if (page_was_allocated)
-		swap_readpage(retpage);
+		swap_readpage(retpage, do_poll);
 
 	return retpage;
 }
@@ -496,11 +496,13 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
 	unsigned long start_offset, end_offset;
 	unsigned long mask;
 	struct blk_plug plug;
+	bool do_poll = true;
 
 	mask = swapin_nr_pages(offset) - 1;
 	if (!mask)
 		goto skip;
 
+	do_poll = false;
 	/* Read a page_cluster sized and aligned cluster around offset. */
 	start_offset = offset & ~mask;
 	end_offset = offset | mask;
@@ -511,7 +513,7 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
 	for (offset = start_offset; offset <= end_offset ; offset++) {
 		/* Ok, do the async read-ahead now */
 		page = read_swap_cache_async(swp_entry(swp_type(entry), offset),
-						gfp_mask, vma, addr);
+						gfp_mask, vma, addr, false);
 		if (!page)
 			continue;
 		if (offset != entry_offset && likely(!PageTransCompound(page)))
@@ -522,7 +524,7 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
 
 	lru_add_drain();	/* Push any new pages onto the LRU now */
 skip:
-	return read_swap_cache_async(entry, gfp_mask, vma, addr);
+	return read_swap_cache_async(entry, gfp_mask, vma, addr, do_poll);
 }
 
 int init_swap_address_space(unsigned int type, unsigned long nr_pages)
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 811d90e1c929..6ba4aab2db0b 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1868,7 +1868,7 @@ int try_to_unuse(unsigned int type, bool frontswap,
 		swap_map = &si->swap_map[i];
 		entry = swp_entry(type, i);
 		page = read_swap_cache_async(entry,
-					GFP_HIGHUSER_MOVABLE, NULL, 0);
+					GFP_HIGHUSER_MOVABLE, NULL, 0, false);
 		if (!page) {
 			/*
 			 * Either swap_duplicate() failed because entry
-- 
cgit v1.2.3


From b37ff71cc626a0c1b5e098ff9a0b723815f6aaeb Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Date: Mon, 10 Jul 2017 15:47:38 -0700
Subject: mm: hwpoison: change PageHWPoison behavior on hugetlb pages

We'd like to narrow down the error region in memory error on hugetlb
pages.  However, currently we set PageHWPoison flags on all subpages in
the error hugepage and add # of subpages to num_hwpoison_pages, which
doesn't fit our purpose.

So this patch changes the behavior and we only set PageHWPoison on the
head page then increase num_hwpoison_pages only by 1.  This is a
preparation for narrow-down part which comes in later patches.

Link: http://lkml.kernel.org/r/1496305019-5493-4-git-send-email-n-horiguchi@ah.jp.nec.com
Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Cc: Anshuman Khandual <khandual@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swapops.h |  9 -----
 mm/memory-failure.c     | 87 ++++++++++++++-----------------------------------
 2 files changed, 24 insertions(+), 72 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 5c3a5f3e7eec..c5ff7b217ee6 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -196,15 +196,6 @@ static inline void num_poisoned_pages_dec(void)
 	atomic_long_dec(&num_poisoned_pages);
 }
 
-static inline void num_poisoned_pages_add(long num)
-{
-	atomic_long_add(num, &num_poisoned_pages);
-}
-
-static inline void num_poisoned_pages_sub(long num)
-{
-	atomic_long_sub(num, &num_poisoned_pages);
-}
 #else
 
 static inline swp_entry_t make_hwpoison_entry(struct page *page)
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index e3bf6432ed25..a9ddb0e72f5b 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1009,22 +1009,6 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
 	return unmap_success;
 }
 
-static void set_page_hwpoison_huge_page(struct page *hpage)
-{
-	int i;
-	int nr_pages = 1 << compound_order(hpage);
-	for (i = 0; i < nr_pages; i++)
-		SetPageHWPoison(hpage + i);
-}
-
-static void clear_page_hwpoison_huge_page(struct page *hpage)
-{
-	int i;
-	int nr_pages = 1 << compound_order(hpage);
-	for (i = 0; i < nr_pages; i++)
-		ClearPageHWPoison(hpage + i);
-}
-
 /**
  * memory_failure - Handle memory failure of a page.
  * @pfn: Page Number of the corrupted page
@@ -1050,7 +1034,6 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
 	struct page *hpage;
 	struct page *orig_head;
 	int res;
-	unsigned int nr_pages;
 	unsigned long page_flags;
 
 	if (!sysctl_memory_failure_recovery)
@@ -1064,24 +1047,23 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
 
 	p = pfn_to_page(pfn);
 	orig_head = hpage = compound_head(p);
+
+	/* tmporary check code, to be updated in later patches */
+	if (PageHuge(p)) {
+		if (TestSetPageHWPoison(hpage)) {
+			pr_err("Memory failure: %#lx: already hardware poisoned\n", pfn);
+			return 0;
+		}
+		goto tmp;
+	}
 	if (TestSetPageHWPoison(p)) {
 		pr_err("Memory failure: %#lx: already hardware poisoned\n",
 			pfn);
 		return 0;
 	}
 
-	/*
-	 * Currently errors on hugetlbfs pages are measured in hugepage units,
-	 * so nr_pages should be 1 << compound_order.  OTOH when errors are on
-	 * transparent hugepages, they are supposed to be split and error
-	 * measurement is done in normal page units.  So nr_pages should be one
-	 * in this case.
-	 */
-	if (PageHuge(p))
-		nr_pages = 1 << compound_order(hpage);
-	else /* normal page or thp */
-		nr_pages = 1;
-	num_poisoned_pages_add(nr_pages);
+tmp:
+	num_poisoned_pages_inc();
 
 	/*
 	 * We need/can do nothing about count=0 pages.
@@ -1109,12 +1091,11 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
 			if (PageHWPoison(hpage)) {
 				if ((hwpoison_filter(p) && TestClearPageHWPoison(p))
 				    || (p != hpage && TestSetPageHWPoison(hpage))) {
-					num_poisoned_pages_sub(nr_pages);
+					num_poisoned_pages_dec();
 					unlock_page(hpage);
 					return 0;
 				}
 			}
-			set_page_hwpoison_huge_page(hpage);
 			res = dequeue_hwpoisoned_huge_page(hpage);
 			action_result(pfn, MF_MSG_FREE_HUGE,
 				      res ? MF_IGNORED : MF_DELAYED);
@@ -1137,7 +1118,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
 				pr_err("Memory failure: %#lx: thp split failed\n",
 					pfn);
 			if (TestClearPageHWPoison(p))
-				num_poisoned_pages_sub(nr_pages);
+				num_poisoned_pages_dec();
 			put_hwpoison_page(p);
 			return -EBUSY;
 		}
@@ -1193,14 +1174,14 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
 	 */
 	if (!PageHWPoison(p)) {
 		pr_err("Memory failure: %#lx: just unpoisoned\n", pfn);
-		num_poisoned_pages_sub(nr_pages);
+		num_poisoned_pages_dec();
 		unlock_page(hpage);
 		put_hwpoison_page(hpage);
 		return 0;
 	}
 	if (hwpoison_filter(p)) {
 		if (TestClearPageHWPoison(p))
-			num_poisoned_pages_sub(nr_pages);
+			num_poisoned_pages_dec();
 		unlock_page(hpage);
 		put_hwpoison_page(hpage);
 		return 0;
@@ -1219,14 +1200,6 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
 		put_hwpoison_page(hpage);
 		return 0;
 	}
-	/*
-	 * Set PG_hwpoison on all pages in an error hugepage,
-	 * because containment is done in hugepage unit for now.
-	 * Since we have done TestSetPageHWPoison() for the head page with
-	 * page lock held, we can safely set PG_hwpoison bits on tail pages.
-	 */
-	if (PageHuge(p))
-		set_page_hwpoison_huge_page(hpage);
 
 	/*
 	 * It's very difficult to mess with pages currently under IO
@@ -1397,7 +1370,6 @@ int unpoison_memory(unsigned long pfn)
 	struct page *page;
 	struct page *p;
 	int freeit = 0;
-	unsigned int nr_pages;
 	static DEFINE_RATELIMIT_STATE(unpoison_rs, DEFAULT_RATELIMIT_INTERVAL,
 					DEFAULT_RATELIMIT_BURST);
 
@@ -1442,8 +1414,6 @@ int unpoison_memory(unsigned long pfn)
 		return 0;
 	}
 
-	nr_pages = 1 << compound_order(page);
-
 	if (!get_hwpoison_page(p)) {
 		/*
 		 * Since HWPoisoned hugepage should have non-zero refcount,
@@ -1473,10 +1443,8 @@ int unpoison_memory(unsigned long pfn)
 	if (TestClearPageHWPoison(page)) {
 		unpoison_pr_info("Unpoison: Software-unpoisoned page %#lx\n",
 				 pfn, &unpoison_rs);
-		num_poisoned_pages_sub(nr_pages);
+		num_poisoned_pages_dec();
 		freeit = 1;
-		if (PageHuge(page))
-			clear_page_hwpoison_huge_page(page);
 	}
 	unlock_page(page);
 
@@ -1608,14 +1576,10 @@ static int soft_offline_huge_page(struct page *page, int flags)
 			ret = -EIO;
 	} else {
 		/* overcommit hugetlb page will be freed to buddy */
-		if (PageHuge(page)) {
-			set_page_hwpoison_huge_page(hpage);
+		SetPageHWPoison(page);
+		if (PageHuge(page))
 			dequeue_hwpoisoned_huge_page(hpage);
-			num_poisoned_pages_add(1 << compound_order(hpage));
-		} else {
-			SetPageHWPoison(page);
-			num_poisoned_pages_inc();
-		}
+		num_poisoned_pages_inc();
 	}
 	return ret;
 }
@@ -1731,15 +1695,12 @@ static int soft_offline_in_use_page(struct page *page, int flags)
 
 static void soft_offline_free_page(struct page *page)
 {
-	if (PageHuge(page)) {
-		struct page *hpage = compound_head(page);
+	struct page *head = compound_head(page);
 
-		set_page_hwpoison_huge_page(hpage);
-		if (!dequeue_hwpoisoned_huge_page(hpage))
-			num_poisoned_pages_add(1 << compound_order(hpage));
-	} else {
-		if (!TestSetPageHWPoison(page))
-			num_poisoned_pages_inc();
+	if (!TestSetPageHWPoison(head)) {
+		num_poisoned_pages_inc();
+		if (PageHuge(head))
+			dequeue_hwpoisoned_huge_page(head);
 	}
 }
 
-- 
cgit v1.2.3


From c3114a84f7f96c9d5c73c8bfa7e21ff42fda97e2 Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <khandual@linux.vnet.ibm.com>
Date: Mon, 10 Jul 2017 15:47:41 -0700
Subject: mm: hugetlb: soft-offline: dissolve source hugepage after successful
 migration

Currently hugepage migrated by soft-offline (i.e.  due to correctable
memory errors) is contained as a hugepage, which means many non-error
pages in it are unreusable, i.e.  wasted.

This patch solves this issue by dissolving source hugepages into buddy.
As done in previous patch, PageHWPoison is set only on a head page of
the error hugepage.  Then in dissoliving we move the PageHWPoison flag
to the raw error page so that all healthy subpages return back to buddy.

[arnd@arndb.de: fix warnings: replace some macros with inline functions]
  Link: http://lkml.kernel.org/r/20170609102544.2947326-1-arnd@arndb.de
Link: http://lkml.kernel.org/r/1496305019-5493-5-git-send-email-n-horiguchi@ah.jp.nec.com
Signed-off-by: Anshuman Khandual <khandual@linux.vnet.ibm.com>
Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h | 31 +++++++++++++++++++++++++++----
 mm/hugetlb.c            | 10 +++++++++-
 mm/memory-failure.c     |  5 +----
 mm/migrate.c            |  2 ++
 4 files changed, 39 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 46bfb702e7d6..668ab1742ef6 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -472,6 +472,7 @@ static inline pgoff_t basepage_index(struct page *page)
 	return __basepage_index(page);
 }
 
+extern int dissolve_free_huge_page(struct page *page);
 extern int dissolve_free_huge_pages(unsigned long start_pfn,
 				    unsigned long end_pfn);
 static inline bool hugepage_migration_supported(struct hstate *h)
@@ -550,15 +551,37 @@ static inline unsigned int pages_per_huge_page(struct hstate *h)
 {
 	return 1;
 }
-#define hstate_index_to_shift(index) 0
-#define hstate_index(h) 0
+
+static inline unsigned hstate_index_to_shift(unsigned index)
+{
+	return 0;
+}
+
+static inline int hstate_index(struct hstate *h)
+{
+	return 0;
+}
 
 static inline pgoff_t basepage_index(struct page *page)
 {
 	return page->index;
 }
-#define dissolve_free_huge_pages(s, e)	0
-#define hugepage_migration_supported(h)	false
+
+static inline int dissolve_free_huge_page(struct page *page)
+{
+	return 0;
+}
+
+static inline int dissolve_free_huge_pages(unsigned long start_pfn,
+					   unsigned long end_pfn)
+{
+	return 0;
+}
+
+static inline bool hugepage_migration_supported(struct hstate *h)
+{
+	return false;
+}
 
 static inline spinlock_t *huge_pte_lockptr(struct hstate *h,
 					   struct mm_struct *mm, pte_t *pte)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 41a1b48cefbf..b2d44363837a 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1459,7 +1459,7 @@ static int free_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed,
  * number of free hugepages would be reduced below the number of reserved
  * hugepages.
  */
-static int dissolve_free_huge_page(struct page *page)
+int dissolve_free_huge_page(struct page *page)
 {
 	int rc = 0;
 
@@ -1472,6 +1472,14 @@ static int dissolve_free_huge_page(struct page *page)
 			rc = -EBUSY;
 			goto out;
 		}
+		/*
+		 * Move PageHWPoison flag from head page to the raw error page,
+		 * which makes any subpages rather than the error page reusable.
+		 */
+		if (PageHWPoison(head) && page != head) {
+			SetPageHWPoison(page);
+			ClearPageHWPoison(head);
+		}
 		list_del(&head->lru);
 		h->free_huge_pages--;
 		h->free_huge_pages_node[nid]--;
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index a9ddb0e72f5b..42c5803e6275 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1575,11 +1575,8 @@ static int soft_offline_huge_page(struct page *page, int flags)
 		if (ret > 0)
 			ret = -EIO;
 	} else {
-		/* overcommit hugetlb page will be freed to buddy */
-		SetPageHWPoison(page);
 		if (PageHuge(page))
-			dequeue_hwpoisoned_huge_page(hpage);
-		num_poisoned_pages_inc();
+			dissolve_free_huge_page(page);
 	}
 	return ret;
 }
diff --git a/mm/migrate.c b/mm/migrate.c
index 051cc1555d36..8935cbe362ce 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1252,6 +1252,8 @@ put_anon:
 out:
 	if (rc != -EAGAIN)
 		putback_active_hugepage(hpage);
+	if (reason == MR_MEMORY_FAILURE && !test_set_page_hwpoison(hpage))
+		num_poisoned_pages_inc();
 
 	/*
 	 * If migration was not successful and there's a freeing callback, use
-- 
cgit v1.2.3


From ddd40d8a2c4ef8f2152ea6d227e11475cf7e5bfa Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Date: Mon, 10 Jul 2017 15:47:53 -0700
Subject: mm: hugetlb: delete dequeue_hwpoisoned_huge_page()

dequeue_hwpoisoned_huge_page() is no longer used, so let's remove it.

Link: http://lkml.kernel.org/r/1496305019-5493-9-git-send-email-n-horiguchi@ah.jp.nec.com
Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h |  5 -----
 mm/hugetlb.c            | 34 ----------------------------------
 mm/memory-failure.c     | 11 -----------
 3 files changed, 50 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 668ab1742ef6..57f700ac127e 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -116,7 +116,6 @@ int hugetlb_reserve_pages(struct inode *inode, long from, long to,
 						vm_flags_t vm_flags);
 long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
 						long freed);
-int dequeue_hwpoisoned_huge_page(struct page *page);
 bool isolate_huge_page(struct page *page, struct list_head *list);
 void putback_active_hugepage(struct page *page);
 void free_huge_page(struct page *page);
@@ -192,10 +191,6 @@ static inline void hugetlb_show_meminfo(void)
 #define hugetlb_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma, dst_addr, \
 				src_addr, pagep)	({ BUG(); 0; })
 #define huge_pte_offset(mm, address, sz)	0
-static inline int dequeue_hwpoisoned_huge_page(struct page *page)
-{
-	return 0;
-}
 
 static inline bool isolate_huge_page(struct page *page, struct list_head *list)
 {
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index b2d44363837a..8254e8f6db6b 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4746,40 +4746,6 @@ follow_huge_pgd(struct mm_struct *mm, unsigned long address, pgd_t *pgd, int fla
 	return pte_page(*(pte_t *)pgd) + ((address & ~PGDIR_MASK) >> PAGE_SHIFT);
 }
 
-#ifdef CONFIG_MEMORY_FAILURE
-
-/*
- * This function is called from memory failure code.
- */
-int dequeue_hwpoisoned_huge_page(struct page *hpage)
-{
-	struct hstate *h = page_hstate(hpage);
-	int nid = page_to_nid(hpage);
-	int ret = -EBUSY;
-
-	spin_lock(&hugetlb_lock);
-	/*
-	 * Just checking !page_huge_active is not enough, because that could be
-	 * an isolated/hwpoisoned hugepage (which have >0 refcount).
-	 */
-	if (!page_huge_active(hpage) && !page_count(hpage)) {
-		/*
-		 * Hwpoisoned hugepage isn't linked to activelist or freelist,
-		 * but dangling hpage->lru can trigger list-debug warnings
-		 * (this happens when we call unpoison_memory() on it),
-		 * so let it point to itself with list_del_init().
-		 */
-		list_del_init(&hpage->lru);
-		set_page_refcounted(hpage);
-		h->free_huge_pages--;
-		h->free_huge_pages_node[nid]--;
-		ret = 0;
-	}
-	spin_unlock(&hugetlb_lock);
-	return ret;
-}
-#endif
-
 bool isolate_huge_page(struct page *page, struct list_head *list)
 {
 	bool ret = true;
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 6f8f69f4a986..2aec57c07652 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1458,17 +1458,6 @@ int unpoison_memory(unsigned long pfn)
 	}
 
 	if (!get_hwpoison_page(p)) {
-		/*
-		 * Since HWPoisoned hugepage should have non-zero refcount,
-		 * race between memory failure and unpoison seems to happen.
-		 * In such case unpoison fails and memory failure runs
-		 * to the end.
-		 */
-		if (PageHuge(page)) {
-			unpoison_pr_info("Unpoison: Memory failure is now running on free hugepage %#lx\n",
-					 pfn, &unpoison_rs);
-			return 0;
-		}
 		if (TestClearPageHWPoison(p))
 			num_poisoned_pages_dec();
 		unpoison_pr_info("Unpoison: Software-unpoisoned free page %#lx\n",
-- 
cgit v1.2.3


From 1860033237d4be09c5d7382585f0c7229367a534 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Mon, 10 Jul 2017 15:48:02 -0700
Subject: mm: make PR_SET_THP_DISABLE immediately active

PR_SET_THP_DISABLE has a rather subtle semantic.  It doesn't affect any
existing mapping because it only updated mm->def_flags which is a
template for new mappings.

The mappings created after prctl(PR_SET_THP_DISABLE) have VM_NOHUGEPAGE
flag set.  This can be quite surprising for all those applications which
do not do prctl(); fork() & exec() and want to control their own THP
behavior.

Another usecase when the immediate semantic of the prctl might be useful
is a combination of pre- and post-copy migration of containers with
CRIU.  In this case CRIU populates a part of a memory region with data
that was saved during the pre-copy stage.  Afterwards, the region is
registered with userfaultfd and CRIU expects to get page faults for the
parts of the region that were not yet populated.  However, khugepaged
collapses the pages and the expected page faults do not occur.

In more general case, the prctl(PR_SET_THP_DISABLE) could be used as a
temporary mechanism for enabling/disabling THP process wide.

Implementation wise, a new MMF_DISABLE_THP flag is added.  This flag is
tested when decision whether to use huge pages is taken either during
page fault of at the time of THP collapse.

It should be noted, that the new implementation makes PR_SET_THP_DISABLE
master override to any per-VMA setting, which was not the case
previously.

Fixes: a0715cc22601 ("mm, thp: add VM_INIT_DEF_MASK and PRCTL_THP_DISABLE")
Link: http://lkml.kernel.org/r/1496415802-30944-1-git-send-email-rppt@linux.vnet.ibm.com
Signed-off-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Pavel Emelyanov <xemul@virtuozzo.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/huge_mm.h        | 1 +
 include/linux/khugepaged.h     | 3 ++-
 include/linux/sched/coredump.h | 5 ++++-
 kernel/sys.c                   | 6 +++---
 mm/khugepaged.c                | 3 ++-
 mm/shmem.c                     | 8 +++++---
 6 files changed, 17 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index d3b3e8fcc717..40d7b7dd2653 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -92,6 +92,7 @@ extern bool is_vma_temporary_stack(struct vm_area_struct *vma);
 	   (1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG) &&			\
 	   ((__vma)->vm_flags & VM_HUGEPAGE))) &&			\
 	 !((__vma)->vm_flags & VM_NOHUGEPAGE) &&			\
+	 !test_bit(MMF_DISABLE_THP, &(__vma)->vm_mm->flags) &&		\
 	 !is_vma_temporary_stack(__vma))
 #define transparent_hugepage_use_zero_page()				\
 	(transparent_hugepage_flags &					\
diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h
index 5d9a400af509..f0d7335336cd 100644
--- a/include/linux/khugepaged.h
+++ b/include/linux/khugepaged.h
@@ -48,7 +48,8 @@ static inline int khugepaged_enter(struct vm_area_struct *vma,
 	if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags))
 		if ((khugepaged_always() ||
 		     (khugepaged_req_madv() && (vm_flags & VM_HUGEPAGE))) &&
-		    !(vm_flags & VM_NOHUGEPAGE))
+		    !(vm_flags & VM_NOHUGEPAGE) &&
+		    !test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
 			if (__khugepaged_enter(vma->vm_mm))
 				return -ENOMEM;
 	return 0;
diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h
index 69eedcef8f03..98ae0d05aa32 100644
--- a/include/linux/sched/coredump.h
+++ b/include/linux/sched/coredump.h
@@ -68,7 +68,10 @@ static inline int get_dumpable(struct mm_struct *mm)
 #define MMF_OOM_SKIP		21	/* mm is of no interest for the OOM killer */
 #define MMF_UNSTABLE		22	/* mm is unstable for copy_from_user */
 #define MMF_HUGE_ZERO_PAGE	23      /* mm has ever used the global huge zero page */
+#define MMF_DISABLE_THP		24	/* disable THP for all VMAs */
+#define MMF_DISABLE_THP_MASK	(1 << MMF_DISABLE_THP)
 
-#define MMF_INIT_MASK		(MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK)
+#define MMF_INIT_MASK		(MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\
+				 MMF_DISABLE_THP_MASK)
 
 #endif /* _LINUX_SCHED_COREDUMP_H */
diff --git a/kernel/sys.c b/kernel/sys.c
index 47d901586b4e..73fc0af147d0 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2360,7 +2360,7 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 	case PR_GET_THP_DISABLE:
 		if (arg2 || arg3 || arg4 || arg5)
 			return -EINVAL;
-		error = !!(me->mm->def_flags & VM_NOHUGEPAGE);
+		error = !!test_bit(MMF_DISABLE_THP, &me->mm->flags);
 		break;
 	case PR_SET_THP_DISABLE:
 		if (arg3 || arg4 || arg5)
@@ -2368,9 +2368,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 		if (down_write_killable(&me->mm->mmap_sem))
 			return -EINTR;
 		if (arg2)
-			me->mm->def_flags |= VM_NOHUGEPAGE;
+			set_bit(MMF_DISABLE_THP, &me->mm->flags);
 		else
-			me->mm->def_flags &= ~VM_NOHUGEPAGE;
+			clear_bit(MMF_DISABLE_THP, &me->mm->flags);
 		up_write(&me->mm->mmap_sem);
 		break;
 	case PR_MPX_ENABLE_MANAGEMENT:
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index df4ebdb2b10a..c01f177a1120 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -816,7 +816,8 @@ khugepaged_alloc_page(struct page **hpage, gfp_t gfp, int node)
 static bool hugepage_vma_check(struct vm_area_struct *vma)
 {
 	if ((!(vma->vm_flags & VM_HUGEPAGE) && !khugepaged_always()) ||
-	    (vma->vm_flags & VM_NOHUGEPAGE))
+	    (vma->vm_flags & VM_NOHUGEPAGE) ||
+	    test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
 		return false;
 	if (shmem_file(vma->vm_file)) {
 		if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE))
diff --git a/mm/shmem.c b/mm/shmem.c
index 9418f5a9bc46..b0aa6075d164 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1977,10 +1977,12 @@ static int shmem_fault(struct vm_fault *vmf)
 	}
 
 	sgp = SGP_CACHE;
-	if (vma->vm_flags & VM_HUGEPAGE)
-		sgp = SGP_HUGE;
-	else if (vma->vm_flags & VM_NOHUGEPAGE)
+
+	if ((vma->vm_flags & VM_NOHUGEPAGE) ||
+	    test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
 		sgp = SGP_NOHUGE;
+	else if (vma->vm_flags & VM_HUGEPAGE)
+		sgp = SGP_HUGE;
 
 	error = shmem_getpage_gfp(inode, vmf->pgoff, &vmf->page, sgp,
 				  gfp, vma, vmf, &ret);
-- 
cgit v1.2.3


From 16981d763501c0e06e434cf6b59f964c520e0ccc Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 10 Jul 2017 15:48:22 -0700
Subject: mm: improve readability of transparent_hugepage_enabled()

Turn the macro into a static inline and rewrite the condition checks for
better readability in preparation for adding another condition.

[ross.zwisler@linux.intel.com: fix logic to make conversion equivalent]
[akpm@linux-foundation.org: resolve vs mm-make-pr_set_thp_disable-immediately-active.patch]
[akpm@linux-foundation.org: include coredump.h for MMF_DISABLE_THP]
Link: http://lkml.kernel.org/r/149739530612.20686.14760671150202647861.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Acked-by: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/huge_mm.h | 41 +++++++++++++++++++++++++++++------------
 1 file changed, 29 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 40d7b7dd2653..f4239d3c9c73 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -1,6 +1,8 @@
 #ifndef _LINUX_HUGE_MM_H
 #define _LINUX_HUGE_MM_H
 
+#include <linux/sched/coredump.h>
+
 extern int do_huge_pmd_anonymous_page(struct vm_fault *vmf);
 extern int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 			 pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr,
@@ -85,15 +87,29 @@ extern struct kobj_attribute shmem_enabled_attr;
 
 extern bool is_vma_temporary_stack(struct vm_area_struct *vma);
 
-#define transparent_hugepage_enabled(__vma)				\
-	((transparent_hugepage_flags &					\
-	  (1<<TRANSPARENT_HUGEPAGE_FLAG) ||				\
-	  (transparent_hugepage_flags &					\
-	   (1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG) &&			\
-	   ((__vma)->vm_flags & VM_HUGEPAGE))) &&			\
-	 !((__vma)->vm_flags & VM_NOHUGEPAGE) &&			\
-	 !test_bit(MMF_DISABLE_THP, &(__vma)->vm_mm->flags) &&		\
-	 !is_vma_temporary_stack(__vma))
+extern unsigned long transparent_hugepage_flags;
+
+static inline bool transparent_hugepage_enabled(struct vm_area_struct *vma)
+{
+	if (vma->vm_flags & VM_NOHUGEPAGE)
+		return false;
+
+	if (is_vma_temporary_stack(vma))
+		return false;
+
+	if (test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
+		return false;
+
+	if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_FLAG))
+		return true;
+
+	if (transparent_hugepage_flags &
+				(1 << TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG))
+		return !!(vma->vm_flags & VM_HUGEPAGE);
+
+	return false;
+}
+
 #define transparent_hugepage_use_zero_page()				\
 	(transparent_hugepage_flags &					\
 	 (1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG))
@@ -105,8 +121,6 @@ extern bool is_vma_temporary_stack(struct vm_area_struct *vma);
 #define transparent_hugepage_debug_cow() 0
 #endif /* CONFIG_DEBUG_VM */
 
-extern unsigned long transparent_hugepage_flags;
-
 extern unsigned long thp_get_unmapped_area(struct file *filp,
 		unsigned long addr, unsigned long len, unsigned long pgoff,
 		unsigned long flags);
@@ -225,7 +239,10 @@ void mm_put_huge_zero_page(struct mm_struct *mm);
 
 #define hpage_nr_pages(x) 1
 
-#define transparent_hugepage_enabled(__vma) 0
+static inline bool transparent_hugepage_enabled(struct vm_area_struct *vma)
+{
+	return false;
+}
 
 static inline void prep_transhuge_page(struct page *page) {}
 
-- 
cgit v1.2.3


From baabda261424517110ea98c6651f632ebf2561e3 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 10 Jul 2017 15:48:25 -0700
Subject: mm: always enable thp for dax mappings

The madvise policy for transparent huge pages is meant to avoid unwanted
allocations of transparent huge pages.  It allows a policy of disabling
the extra memory pressure and effort to arrange for a huge page when it
is not needed.

DAX by definition never incurs this overhead since it is statically
allocated.  The policy choice makes even less sense for device-dax which
tries to guarantee a given tlb-fault size.  Specifically, the following
setting:

	echo never > /sys/kernel/mm/transparent_hugepage/enabled

...violates that guarantee and silently disables all device-dax
instances with a 2M or 1G alignment.  So, let's avoid that non-obvious
side effect by force enabling thp for dax mappings in all cases.

It is worth noting that the reason this uses vma_is_dax(), and the
resulting header include changes, is that previous attempts to add a
VM_DAX flag were NAKd.

Link: http://lkml.kernel.org/r/149739531127.20686.15813586620597484283.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Christoph Hellwig <hch@lst.de>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Michal Hocko <mhocko@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/dax.h     | 5 -----
 include/linux/fs.h      | 6 ++++++
 include/linux/huge_mm.h | 5 +++++
 3 files changed, 11 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dax.h b/include/linux/dax.h
index 8f39db7439c3..794811875732 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -154,11 +154,6 @@ static inline unsigned int dax_radix_order(void *entry)
 #endif
 int dax_pfn_mkwrite(struct vm_fault *vmf);
 
-static inline bool vma_is_dax(struct vm_area_struct *vma)
-{
-	return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host);
-}
-
 static inline bool dax_mapping(struct address_space *mapping)
 {
 	return mapping->host && IS_DAX(mapping->host);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0cfa47125d52..78e1dbbe4cfd 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -18,6 +18,7 @@
 #include <linux/bug.h>
 #include <linux/mutex.h>
 #include <linux/rwsem.h>
+#include <linux/mm_types.h>
 #include <linux/capability.h>
 #include <linux/semaphore.h>
 #include <linux/fcntl.h>
@@ -3127,6 +3128,11 @@ static inline bool io_is_direct(struct file *filp)
 	return (filp->f_flags & O_DIRECT) || IS_DAX(filp->f_mapping->host);
 }
 
+static inline bool vma_is_dax(struct vm_area_struct *vma)
+{
+	return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host);
+}
+
 static inline int iocb_flags(struct file *file)
 {
 	int res = 0;
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index f4239d3c9c73..ee696347f928 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -3,6 +3,8 @@
 
 #include <linux/sched/coredump.h>
 
+#include <linux/fs.h> /* only for vma_is_dax() */
+
 extern int do_huge_pmd_anonymous_page(struct vm_fault *vmf);
 extern int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 			 pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr,
@@ -103,6 +105,9 @@ static inline bool transparent_hugepage_enabled(struct vm_area_struct *vma)
 	if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_FLAG))
 		return true;
 
+	if (vma_is_dax(vma))
+		return true;
+
 	if (transparent_hugepage_flags &
 				(1 << TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG))
 		return !!(vma->vm_flags & VM_HUGEPAGE);
-- 
cgit v1.2.3


From 108a7ac448caff8e35e8c3f92f65faad893e5aca Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 10 Jul 2017 15:48:28 -0700
Subject: include/linux/page_ref.h: ensure page_ref_unfreeze is ordered against
 prior accesses

page_ref_freeze and page_ref_unfreeze are designed to be used as a pair,
wrapping a critical section where struct pages can be modified without
having to worry about consistency for a concurrent fast-GUP.

Whilst page_ref_freeze has full barrier semantics due to its use of
atomic_cmpxchg, page_ref_unfreeze is implemented using atomic_set, which
doesn't provide any barrier semantics and allows the operation to be
reordered with respect to page modifications in the critical section.

This patch ensures that page_ref_unfreeze is ordered after any critical
section updates, by invoking smp_mb() prior to the atomic_set.

Link: http://lkml.kernel.org/r/1497349722-6731-3-git-send-email-will.deacon@arm.com
Signed-off-by: Will Deacon <will.deacon@arm.com>
Acked-by: Steve Capper <steve.capper@arm.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page_ref.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h
index 610e13271918..1fd71733aa68 100644
--- a/include/linux/page_ref.h
+++ b/include/linux/page_ref.h
@@ -174,6 +174,7 @@ static inline void page_ref_unfreeze(struct page *page, int count)
 	VM_BUG_ON_PAGE(page_count(page) != 0, page);
 	VM_BUG_ON(count == 0);
 
+	smp_mb();
 	atomic_set(&page->_refcount, count);
 	if (page_ref_tracepoint_active(__tracepoint_page_ref_unfreeze))
 		__page_ref_unfreeze(page, count);
-- 
cgit v1.2.3


From 4db9b2efe94967be34e3b136a93251a3c1736dd5 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Mon, 10 Jul 2017 15:48:44 -0700
Subject: hugetlb, memory_hotplug: prefer to use reserved pages for migration

new_node_page will try to use the origin's next NUMA node as the
migration destination for hugetlb pages.  If such a node doesn't have
any preallocated pool it falls back to __alloc_buddy_huge_page_no_mpol
to allocate a surplus page instead.  This is quite subotpimal for any
configuration when hugetlb pages are no distributed to all NUMA nodes
evenly.  Say we have a hotplugable node 4 and spare hugetlb pages are
node 0

  /sys/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages:10000
  /sys/devices/system/node/node1/hugepages/hugepages-2048kB/nr_hugepages:0
  /sys/devices/system/node/node2/hugepages/hugepages-2048kB/nr_hugepages:0
  /sys/devices/system/node/node3/hugepages/hugepages-2048kB/nr_hugepages:0
  /sys/devices/system/node/node4/hugepages/hugepages-2048kB/nr_hugepages:10000
  /sys/devices/system/node/node5/hugepages/hugepages-2048kB/nr_hugepages:0
  /sys/devices/system/node/node6/hugepages/hugepages-2048kB/nr_hugepages:0
  /sys/devices/system/node/node7/hugepages/hugepages-2048kB/nr_hugepages:0

Now we consume the whole pool on node 4 and try to offline this node.
All the allocated pages should be moved to node0 which has enough
preallocated pages to hold them.  With the current implementation
offlining very likely fails because hugetlb allocations during runtime
are much less reliable.

Fix this by reusing the nodemask which excludes migration source and try
to find a first node which has a page in the preallocated pool first and
fall back to __alloc_buddy_huge_page_no_mpol only when the whole pool is
consumed.

[akpm@linux-foundation.org: remove bogus arg from alloc_huge_page_nodemask() stub]
Link: http://lkml.kernel.org/r/20170608074553.22152-3-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Xishi Qiu <qiuxishi@huawei.com>
Cc: zhong jiang <zhongjiang@huawei.com>
Cc: Joonsoo Kim <js1304@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h |  2 ++
 mm/hugetlb.c            | 27 +++++++++++++++++++++++++++
 mm/memory_hotplug.c     |  9 ++-------
 3 files changed, 31 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 57f700ac127e..8fd0725d3f30 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -349,6 +349,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
 struct page *alloc_huge_page_node(struct hstate *h, int nid);
 struct page *alloc_huge_page_noerr(struct vm_area_struct *vma,
 				unsigned long addr, int avoid_reserve);
+struct page *alloc_huge_page_nodemask(struct hstate *h, const nodemask_t *nmask);
 int huge_add_to_page_cache(struct page *page, struct address_space *mapping,
 			pgoff_t idx);
 
@@ -524,6 +525,7 @@ static inline void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr
 struct hstate {};
 #define alloc_huge_page(v, a, r) NULL
 #define alloc_huge_page_node(h, nid) NULL
+#define alloc_huge_page_nodemask(h, nmask) NULL
 #define alloc_huge_page_noerr(v, a, r) NULL
 #define alloc_bootmem_huge_page(h) NULL
 #define hstate_file(f) NULL
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 761a669d0b62..01c11ceb47d6 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1723,6 +1723,33 @@ struct page *alloc_huge_page_node(struct hstate *h, int nid)
 	return page;
 }
 
+struct page *alloc_huge_page_nodemask(struct hstate *h, const nodemask_t *nmask)
+{
+	struct page *page = NULL;
+	int node;
+
+	spin_lock(&hugetlb_lock);
+	if (h->free_huge_pages - h->resv_huge_pages > 0) {
+		for_each_node_mask(node, *nmask) {
+			page = dequeue_huge_page_node_exact(h, node);
+			if (page)
+				break;
+		}
+	}
+	spin_unlock(&hugetlb_lock);
+	if (page)
+		return page;
+
+	/* No reservations, try to overcommit */
+	for_each_node_mask(node, *nmask) {
+		page = __alloc_buddy_huge_page_no_mpol(h, node);
+		if (page)
+			return page;
+	}
+
+	return NULL;
+}
+
 /*
  * Increase the hugetlb pool such that it can accommodate a reservation
  * of size 'delta'.
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index f42a8ef93ec4..1cf3404bd065 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1446,14 +1446,9 @@ static struct page *new_node_page(struct page *page, unsigned long private,
 	if (nodes_empty(nmask))
 		node_set(nid, nmask);
 
-	/*
-	 * TODO: allocate a destination hugepage from a nearest neighbor node,
-	 * accordance with memory policy of the user process if possible. For
-	 * now as a simple work-around, we use the next node for destination.
-	 */
 	if (PageHuge(page))
-		return alloc_huge_page_node(page_hstate(compound_head(page)),
-					next_node_in(nid, nmask));
+		return alloc_huge_page_nodemask(
+				page_hstate(compound_head(page)), &nmask);
 
 	if (PageHighMem(page)
 	    || (zone_idx(page_zone(page)) == ZONE_MOVABLE))
-- 
cgit v1.2.3


From 8b9132388964df2cfe151a88fd1dd8219dabf23c Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Mon, 10 Jul 2017 15:48:47 -0700
Subject: mm: unify new_node_page and alloc_migrate_target

Commit 394e31d2ceb4 ("mem-hotplug: alloc new page from a nearest
neighbor node when mem-offline") has duplicated a large part of
alloc_migrate_target with some hotplug specific special casing.

To be more precise it tried to enfore the allocation from a different
node than the original page.  As a result the two function diverged in
their shared logic, e.g.  the hugetlb allocation strategy.

Let's unify the two and express different NUMA requirements by the given
nodemask.  new_node_page will simply exclude the node it doesn't care
about and alloc_migrate_target will use all the available nodes.
alloc_migrate_target will then learn to migrate hugetlb pages more
sanely and use preallocated pool when possible.

Please note that alloc_migrate_target used to call alloc_page resp.
alloc_pages_current so the memory policy of the current context which is
quite strange when we consider that it is used in the context of
alloc_contig_range which just tries to migrate pages which stand in the
way.

Link: http://lkml.kernel.org/r/20170608074553.22152-4-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Xishi Qiu <qiuxishi@huawei.com>
Cc: zhong jiang <zhongjiang@huawei.com>
Cc: Joonsoo Kim <js1304@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/migrate.h | 16 ++++++++++++++++
 mm/memory_hotplug.c     | 11 +----------
 mm/page_isolation.c     | 18 ++----------------
 3 files changed, 19 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 48e24844b3c5..d9675b665cc4 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -4,6 +4,7 @@
 #include <linux/mm.h>
 #include <linux/mempolicy.h>
 #include <linux/migrate_mode.h>
+#include <linux/hugetlb.h>
 
 typedef struct page *new_page_t(struct page *page, unsigned long private,
 				int **reason);
@@ -30,6 +31,21 @@ enum migrate_reason {
 /* In mm/debug.c; also keep sync with include/trace/events/migrate.h */
 extern char *migrate_reason_names[MR_TYPES];
 
+static inline struct page *new_page_nodemask(struct page *page,
+				int preferred_nid, nodemask_t *nodemask)
+{
+	gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE;
+
+	if (PageHuge(page))
+		return alloc_huge_page_nodemask(page_hstate(compound_head(page)),
+				nodemask);
+
+	if (PageHighMem(page) || (zone_idx(page_zone(page)) == ZONE_MOVABLE))
+		gfp_mask |= __GFP_HIGHMEM;
+
+	return __alloc_pages_nodemask(gfp_mask, 0, preferred_nid, nodemask);
+}
+
 #ifdef CONFIG_MIGRATION
 
 extern void putback_movable_pages(struct list_head *l);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 1cf3404bd065..203c46306a74 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1433,7 +1433,6 @@ static unsigned long scan_movable_pages(unsigned long start, unsigned long end)
 static struct page *new_node_page(struct page *page, unsigned long private,
 		int **result)
 {
-	gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE;
 	int nid = page_to_nid(page);
 	nodemask_t nmask = node_states[N_MEMORY];
 
@@ -1446,15 +1445,7 @@ static struct page *new_node_page(struct page *page, unsigned long private,
 	if (nodes_empty(nmask))
 		node_set(nid, nmask);
 
-	if (PageHuge(page))
-		return alloc_huge_page_nodemask(
-				page_hstate(compound_head(page)), &nmask);
-
-	if (PageHighMem(page)
-	    || (zone_idx(page_zone(page)) == ZONE_MOVABLE))
-		gfp_mask |= __GFP_HIGHMEM;
-
-	return __alloc_pages_nodemask(gfp_mask, 0, nid, &nmask);
+	return new_page_nodemask(page, nid, &nmask);
 }
 
 #define NR_OFFLINE_AT_ONCE_PAGES	(256)
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index 3606104893e0..757410d9f758 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -8,6 +8,7 @@
 #include <linux/memory.h>
 #include <linux/hugetlb.h>
 #include <linux/page_owner.h>
+#include <linux/migrate.h>
 #include "internal.h"
 
 #define CREATE_TRACE_POINTS
@@ -294,20 +295,5 @@ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn,
 struct page *alloc_migrate_target(struct page *page, unsigned long private,
 				  int **resultp)
 {
-	gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE;
-
-	/*
-	 * TODO: allocate a destination hugepage from a nearest neighbor node,
-	 * accordance with memory policy of the user process if possible. For
-	 * now as a simple work-around, we use the next node for destination.
-	 */
-	if (PageHuge(page))
-		return alloc_huge_page_node(page_hstate(compound_head(page)),
-					    next_node_in(page_to_nid(page),
-							 node_online_map));
-
-	if (PageHighMem(page))
-		gfp_mask |= __GFP_HIGHMEM;
-
-	return alloc_page(gfp_mask);
+	return new_page_nodemask(page, numa_node_id(), &node_states[N_MEMORY]);
 }
-- 
cgit v1.2.3


From aaf14e40a33a2c9350471387031ca40c00f5a006 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Mon, 10 Jul 2017 15:49:08 -0700
Subject: mm, hugetlb: unclutter hugetlb allocation layers

Patch series "mm, hugetlb: allow proper node fallback dequeue".

While working on a hugetlb migration issue addressed in a separate
patchset[1] I have noticed that the hugetlb allocations from the
preallocated pool are quite subotimal.

 [1] //lkml.kernel.org/r/20170608074553.22152-1-mhocko@kernel.org

There is no fallback mechanism implemented and no notion of preferred
node.  I have tried to work around it but Vlastimil was right to push
back for a more robust solution.  It seems that such a solution is to
reuse zonelist approach we use for the page alloctor.

This series has 3 patches.  The first one tries to make hugetlb
allocation layers more clear.  The second one implements the zonelist
hugetlb pool allocation and introduces a preferred node semantic which
is used by the migration callbacks.  The last patch is a clean up.

This patch (of 3):

Hugetlb allocation path for fresh huge pages is unnecessarily complex
and it mixes different interfaces between layers.

__alloc_buddy_huge_page is the central place to perform a new
allocation.  It checks for the hugetlb overcommit and then relies on
__hugetlb_alloc_buddy_huge_page to invoke the page allocator.  This is
all good except that __alloc_buddy_huge_page pushes vma and address down
the callchain and so __hugetlb_alloc_buddy_huge_page has to deal with
two different allocation modes - one for memory policy and other node
specific (or to make it more obscure node non-specific) requests.

This just screams for a reorganization.

This patch pulls out all the vma specific handling up to
__alloc_buddy_huge_page_with_mpol where it belongs.
__alloc_buddy_huge_page will get nodemask argument and
__hugetlb_alloc_buddy_huge_page will become a trivial wrapper over the
page allocator.

In short:
__alloc_buddy_huge_page_with_mpol - memory policy handling
  __alloc_buddy_huge_page - overcommit handling and accounting
    __hugetlb_alloc_buddy_huge_page - page allocator layer

Also note that __hugetlb_alloc_buddy_huge_page and its cpuset retry loop
is not really needed because the page allocator already handles the
cpusets update.

Finally __hugetlb_alloc_buddy_huge_page had a special case for node
specific allocations (when no policy is applied and there is a node
given).  This has relied on __GFP_THISNODE to not fallback to a different
node.  alloc_huge_page_node is the only caller which relies on this
behavior so move the __GFP_THISNODE there.

Not only does this remove quite some code it also should make those
layers easier to follow and clear wrt responsibilities.

Link: http://lkml.kernel.org/r/20170622193034.28972-2-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Tested-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Mel Gorman <mgorman@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h |   2 +-
 mm/hugetlb.c            | 133 +++++++++++-------------------------------------
 2 files changed, 30 insertions(+), 105 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 8fd0725d3f30..66b621469f52 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -349,7 +349,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
 struct page *alloc_huge_page_node(struct hstate *h, int nid);
 struct page *alloc_huge_page_noerr(struct vm_area_struct *vma,
 				unsigned long addr, int avoid_reserve);
-struct page *alloc_huge_page_nodemask(struct hstate *h, const nodemask_t *nmask);
+struct page *alloc_huge_page_nodemask(struct hstate *h, nodemask_t *nmask);
 int huge_add_to_page_cache(struct page *page, struct address_space *mapping,
 			pgoff_t idx);
 
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 907786581812..fd6e0c50f949 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1521,82 +1521,19 @@ int dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn)
 	return rc;
 }
 
-/*
- * There are 3 ways this can get called:
- * 1. With vma+addr: we use the VMA's memory policy
- * 2. With !vma, but nid=NUMA_NO_NODE:  We try to allocate a huge
- *    page from any node, and let the buddy allocator itself figure
- *    it out.
- * 3. With !vma, but nid!=NUMA_NO_NODE.  We allocate a huge page
- *    strictly from 'nid'
- */
 static struct page *__hugetlb_alloc_buddy_huge_page(struct hstate *h,
-		struct vm_area_struct *vma, unsigned long addr, int nid)
+		gfp_t gfp_mask, int nid, nodemask_t *nmask)
 {
 	int order = huge_page_order(h);
-	gfp_t gfp = htlb_alloc_mask(h)|__GFP_COMP|__GFP_REPEAT|__GFP_NOWARN;
-	unsigned int cpuset_mems_cookie;
 
-	/*
-	 * We need a VMA to get a memory policy.  If we do not
-	 * have one, we use the 'nid' argument.
-	 *
-	 * The mempolicy stuff below has some non-inlined bits
-	 * and calls ->vm_ops.  That makes it hard to optimize at
-	 * compile-time, even when NUMA is off and it does
-	 * nothing.  This helps the compiler optimize it out.
-	 */
-	if (!IS_ENABLED(CONFIG_NUMA) || !vma) {
-		/*
-		 * If a specific node is requested, make sure to
-		 * get memory from there, but only when a node
-		 * is explicitly specified.
-		 */
-		if (nid != NUMA_NO_NODE)
-			gfp |= __GFP_THISNODE;
-		/*
-		 * Make sure to call something that can handle
-		 * nid=NUMA_NO_NODE
-		 */
-		return alloc_pages_node(nid, gfp, order);
-	}
-
-	/*
-	 * OK, so we have a VMA.  Fetch the mempolicy and try to
-	 * allocate a huge page with it.  We will only reach this
-	 * when CONFIG_NUMA=y.
-	 */
-	do {
-		struct page *page;
-		struct mempolicy *mpol;
-		int nid;
-		nodemask_t *nodemask;
-
-		cpuset_mems_cookie = read_mems_allowed_begin();
-		nid = huge_node(vma, addr, gfp, &mpol, &nodemask);
-		mpol_cond_put(mpol);
-		page = __alloc_pages_nodemask(gfp, order, nid, nodemask);
-		if (page)
-			return page;
-	} while (read_mems_allowed_retry(cpuset_mems_cookie));
-
-	return NULL;
+	gfp_mask |= __GFP_COMP|__GFP_REPEAT|__GFP_NOWARN;
+	if (nid == NUMA_NO_NODE)
+		nid = numa_mem_id();
+	return __alloc_pages_nodemask(gfp_mask, order, nid, nmask);
 }
 
-/*
- * There are two ways to allocate a huge page:
- * 1. When you have a VMA and an address (like a fault)
- * 2. When you have no VMA (like when setting /proc/.../nr_hugepages)
- *
- * 'vma' and 'addr' are only for (1).  'nid' is always NUMA_NO_NODE in
- * this case which signifies that the allocation should be done with
- * respect for the VMA's memory policy.
- *
- * For (2), we ignore 'vma' and 'addr' and use 'nid' exclusively. This
- * implies that memory policies will not be taken in to account.
- */
-static struct page *__alloc_buddy_huge_page(struct hstate *h,
-		struct vm_area_struct *vma, unsigned long addr, int nid)
+static struct page *__alloc_buddy_huge_page(struct hstate *h, gfp_t gfp_mask,
+		int nid, nodemask_t *nmask)
 {
 	struct page *page;
 	unsigned int r_nid;
@@ -1604,15 +1541,6 @@ static struct page *__alloc_buddy_huge_page(struct hstate *h,
 	if (hstate_is_gigantic(h))
 		return NULL;
 
-	/*
-	 * Make sure that anyone specifying 'nid' is not also specifying a VMA.
-	 * This makes sure the caller is picking _one_ of the modes with which
-	 * we can call this function, not both.
-	 */
-	if (vma || (addr != -1)) {
-		VM_WARN_ON_ONCE(addr == -1);
-		VM_WARN_ON_ONCE(nid != NUMA_NO_NODE);
-	}
 	/*
 	 * Assume we will successfully allocate the surplus page to
 	 * prevent racing processes from causing the surplus to exceed
@@ -1646,7 +1574,7 @@ static struct page *__alloc_buddy_huge_page(struct hstate *h,
 	}
 	spin_unlock(&hugetlb_lock);
 
-	page = __hugetlb_alloc_buddy_huge_page(h, vma, addr, nid);
+	page = __hugetlb_alloc_buddy_huge_page(h, gfp_mask, nid, nmask);
 
 	spin_lock(&hugetlb_lock);
 	if (page) {
@@ -1670,19 +1598,6 @@ static struct page *__alloc_buddy_huge_page(struct hstate *h,
 	return page;
 }
 
-/*
- * Allocate a huge page from 'nid'.  Note, 'nid' may be
- * NUMA_NO_NODE, which means that it may be allocated
- * anywhere.
- */
-static
-struct page *__alloc_buddy_huge_page_no_mpol(struct hstate *h, int nid)
-{
-	unsigned long addr = -1;
-
-	return __alloc_buddy_huge_page(h, NULL, addr, nid);
-}
-
 /*
  * Use the VMA's mpolicy to allocate a huge page from the buddy.
  */
@@ -1690,7 +1605,17 @@ static
 struct page *__alloc_buddy_huge_page_with_mpol(struct hstate *h,
 		struct vm_area_struct *vma, unsigned long addr)
 {
-	return __alloc_buddy_huge_page(h, vma, addr, NUMA_NO_NODE);
+	struct page *page;
+	struct mempolicy *mpol;
+	gfp_t gfp_mask = htlb_alloc_mask(h);
+	int nid;
+	nodemask_t *nodemask;
+
+	nid = huge_node(vma, addr, gfp_mask, &mpol, &nodemask);
+	page = __alloc_buddy_huge_page(h, gfp_mask, nid, nodemask);
+	mpol_cond_put(mpol);
+
+	return page;
 }
 
 /*
@@ -1700,21 +1625,26 @@ struct page *__alloc_buddy_huge_page_with_mpol(struct hstate *h,
  */
 struct page *alloc_huge_page_node(struct hstate *h, int nid)
 {
+	gfp_t gfp_mask = htlb_alloc_mask(h);
 	struct page *page = NULL;
 
+	if (nid != NUMA_NO_NODE)
+		gfp_mask |= __GFP_THISNODE;
+
 	spin_lock(&hugetlb_lock);
 	if (h->free_huge_pages - h->resv_huge_pages > 0)
 		page = dequeue_huge_page_node(h, nid);
 	spin_unlock(&hugetlb_lock);
 
 	if (!page)
-		page = __alloc_buddy_huge_page_no_mpol(h, nid);
+		page = __alloc_buddy_huge_page(h, gfp_mask, nid, NULL);
 
 	return page;
 }
 
-struct page *alloc_huge_page_nodemask(struct hstate *h, const nodemask_t *nmask)
+struct page *alloc_huge_page_nodemask(struct hstate *h, nodemask_t *nmask)
 {
+	gfp_t gfp_mask = htlb_alloc_mask(h);
 	struct page *page = NULL;
 	int node;
 
@@ -1731,13 +1661,7 @@ struct page *alloc_huge_page_nodemask(struct hstate *h, const nodemask_t *nmask)
 		return page;
 
 	/* No reservations, try to overcommit */
-	for_each_node_mask(node, *nmask) {
-		page = __alloc_buddy_huge_page_no_mpol(h, node);
-		if (page)
-			return page;
-	}
-
-	return NULL;
+	return __alloc_buddy_huge_page(h, gfp_mask, NUMA_NO_NODE, nmask);
 }
 
 /*
@@ -1765,7 +1689,8 @@ static int gather_surplus_pages(struct hstate *h, int delta)
 retry:
 	spin_unlock(&hugetlb_lock);
 	for (i = 0; i < needed; i++) {
-		page = __alloc_buddy_huge_page_no_mpol(h, NUMA_NO_NODE);
+		page = __alloc_buddy_huge_page(h, htlb_alloc_mask(h),
+				NUMA_NO_NODE, NULL);
 		if (!page) {
 			alloc_ok = false;
 			break;
-- 
cgit v1.2.3


From 3e59fcb0e8c1c40aecb60fa4c2d1543d6a097184 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Mon, 10 Jul 2017 15:49:11 -0700
Subject: hugetlb: add support for preferred node to alloc_huge_page_nodemask

alloc_huge_page_nodemask tries to allocate from any numa node in the
allowed node mask starting from lower numa nodes.  This might lead to
filling up those low NUMA nodes while others are not used.  We can
reduce this risk by introducing a concept of the preferred node similar
to what we have in the regular page allocator.  We will start allocating
from the preferred nid and then iterate over all allowed nodes in the
zonelist order until we try them all.

This is mimicing the page allocator logic except it operates on per-node
mempools.  dequeue_huge_page_vma already does this so distill the
zonelist logic into a more generic dequeue_huge_page_nodemask and use it
in alloc_huge_page_nodemask.

This will allow us to use proper per numa distance fallback also for
alloc_huge_page_node which can use alloc_huge_page_nodemask now and we
can get rid of alloc_huge_page_node helper which doesn't have any user
anymore.

Link: http://lkml.kernel.org/r/20170622193034.28972-3-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Tested-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Mel Gorman <mgorman@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h |  5 +--
 include/linux/migrate.h |  2 +-
 mm/hugetlb.c            | 88 ++++++++++++++++++++++++-------------------------
 3 files changed, 48 insertions(+), 47 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 66b621469f52..8d9fe131a240 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -349,7 +349,8 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
 struct page *alloc_huge_page_node(struct hstate *h, int nid);
 struct page *alloc_huge_page_noerr(struct vm_area_struct *vma,
 				unsigned long addr, int avoid_reserve);
-struct page *alloc_huge_page_nodemask(struct hstate *h, nodemask_t *nmask);
+struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
+				nodemask_t *nmask);
 int huge_add_to_page_cache(struct page *page, struct address_space *mapping,
 			pgoff_t idx);
 
@@ -525,7 +526,7 @@ static inline void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr
 struct hstate {};
 #define alloc_huge_page(v, a, r) NULL
 #define alloc_huge_page_node(h, nid) NULL
-#define alloc_huge_page_nodemask(h, nmask) NULL
+#define alloc_huge_page_nodemask(h, preferred_nid, nmask) NULL
 #define alloc_huge_page_noerr(v, a, r) NULL
 #define alloc_bootmem_huge_page(h) NULL
 #define hstate_file(f) NULL
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index d9675b665cc4..4634da521238 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -38,7 +38,7 @@ static inline struct page *new_page_nodemask(struct page *page,
 
 	if (PageHuge(page))
 		return alloc_huge_page_nodemask(page_hstate(compound_head(page)),
-				nodemask);
+				preferred_nid, nodemask);
 
 	if (PageHighMem(page) || (zone_idx(page_zone(page)) == ZONE_MOVABLE))
 		gfp_mask |= __GFP_HIGHMEM;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index fd6e0c50f949..1e516520433d 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -887,19 +887,39 @@ static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid)
 	return page;
 }
 
-static struct page *dequeue_huge_page_node(struct hstate *h, int nid)
+static struct page *dequeue_huge_page_nodemask(struct hstate *h, gfp_t gfp_mask, int nid,
+		nodemask_t *nmask)
 {
-	struct page *page;
-	int node;
+	unsigned int cpuset_mems_cookie;
+	struct zonelist *zonelist;
+	struct zone *zone;
+	struct zoneref *z;
+	int node = -1;
 
-	if (nid != NUMA_NO_NODE)
-		return dequeue_huge_page_node_exact(h, nid);
+	zonelist = node_zonelist(nid, gfp_mask);
+
+retry_cpuset:
+	cpuset_mems_cookie = read_mems_allowed_begin();
+	for_each_zone_zonelist_nodemask(zone, z, zonelist, gfp_zone(gfp_mask), nmask) {
+		struct page *page;
+
+		if (!cpuset_zone_allowed(zone, gfp_mask))
+			continue;
+		/*
+		 * no need to ask again on the same node. Pool is node rather than
+		 * zone aware
+		 */
+		if (zone_to_nid(zone) == node)
+			continue;
+		node = zone_to_nid(zone);
 
-	for_each_online_node(node) {
 		page = dequeue_huge_page_node_exact(h, node);
 		if (page)
 			return page;
 	}
+	if (unlikely(read_mems_allowed_retry(cpuset_mems_cookie)))
+		goto retry_cpuset;
+
 	return NULL;
 }
 
@@ -917,15 +937,11 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
 				unsigned long address, int avoid_reserve,
 				long chg)
 {
-	struct page *page = NULL;
+	struct page *page;
 	struct mempolicy *mpol;
-	nodemask_t *nodemask;
 	gfp_t gfp_mask;
+	nodemask_t *nodemask;
 	int nid;
-	struct zonelist *zonelist;
-	struct zone *zone;
-	struct zoneref *z;
-	unsigned int cpuset_mems_cookie;
 
 	/*
 	 * A child process with MAP_PRIVATE mappings created by their parent
@@ -940,32 +956,15 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
 	if (avoid_reserve && h->free_huge_pages - h->resv_huge_pages == 0)
 		goto err;
 
-retry_cpuset:
-	cpuset_mems_cookie = read_mems_allowed_begin();
 	gfp_mask = htlb_alloc_mask(h);
 	nid = huge_node(vma, address, gfp_mask, &mpol, &nodemask);
-	zonelist = node_zonelist(nid, gfp_mask);
-
-	for_each_zone_zonelist_nodemask(zone, z, zonelist,
-						MAX_NR_ZONES - 1, nodemask) {
-		if (cpuset_zone_allowed(zone, gfp_mask)) {
-			page = dequeue_huge_page_node(h, zone_to_nid(zone));
-			if (page) {
-				if (avoid_reserve)
-					break;
-				if (!vma_has_reserves(vma, chg))
-					break;
-
-				SetPagePrivate(page);
-				h->resv_huge_pages--;
-				break;
-			}
-		}
+	page = dequeue_huge_page_nodemask(h, gfp_mask, nid, nodemask);
+	if (page && !avoid_reserve && vma_has_reserves(vma, chg)) {
+		SetPagePrivate(page);
+		h->resv_huge_pages--;
 	}
 
 	mpol_cond_put(mpol);
-	if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie)))
-		goto retry_cpuset;
 	return page;
 
 err:
@@ -1633,7 +1632,7 @@ struct page *alloc_huge_page_node(struct hstate *h, int nid)
 
 	spin_lock(&hugetlb_lock);
 	if (h->free_huge_pages - h->resv_huge_pages > 0)
-		page = dequeue_huge_page_node(h, nid);
+		page = dequeue_huge_page_nodemask(h, gfp_mask, nid, NULL);
 	spin_unlock(&hugetlb_lock);
 
 	if (!page)
@@ -1642,26 +1641,27 @@ struct page *alloc_huge_page_node(struct hstate *h, int nid)
 	return page;
 }
 
-struct page *alloc_huge_page_nodemask(struct hstate *h, nodemask_t *nmask)
+
+struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
+		nodemask_t *nmask)
 {
 	gfp_t gfp_mask = htlb_alloc_mask(h);
-	struct page *page = NULL;
-	int node;
 
 	spin_lock(&hugetlb_lock);
 	if (h->free_huge_pages - h->resv_huge_pages > 0) {
-		for_each_node_mask(node, *nmask) {
-			page = dequeue_huge_page_node_exact(h, node);
-			if (page)
-				break;
+		struct page *page;
+
+		page = dequeue_huge_page_nodemask(h, gfp_mask, preferred_nid, nmask);
+		if (page) {
+			spin_unlock(&hugetlb_lock);
+			return page;
 		}
 	}
 	spin_unlock(&hugetlb_lock);
-	if (page)
-		return page;
 
 	/* No reservations, try to overcommit */
-	return __alloc_buddy_huge_page(h, gfp_mask, NUMA_NO_NODE, nmask);
+
+	return __alloc_buddy_huge_page(h, gfp_mask, preferred_nid, nmask);
 }
 
 /*
-- 
cgit v1.2.3


From 618b8c20d03c9ea06711bd36d906322ba35c0add Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 10 Jul 2017 15:49:32 -0700
Subject: include/linux/mmzone.h: remove ancient/ambiguous comment

Currently pg_data_t is just a struct which describes a NUMA node memory
layout.  Let's keep the comment simple and remove ambiguity.

Link: http://lkml.kernel.org/r/1498220534-22717-1-git-send-email-nborisov@suse.com
Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 7e8f100cb56d..16532fa0bb64 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -603,12 +603,9 @@ extern struct page *mem_map;
 #endif
 
 /*
- * The pg_data_t structure is used in machines with CONFIG_DISCONTIGMEM
- * (mostly NUMA machines?) to denote a higher-level memory zone than the
- * zone denotes.
- *
  * On NUMA machines, each NUMA node would have a pg_data_t to describe
- * it's memory layout.
+ * it's memory layout. On UMA machines there is a single pglist_data which
+ * describes the whole memory.
  *
  * Memory statistics and page replacement data structures are maintained on a
  * per-zone basis.
-- 
cgit v1.2.3


From e3d3910a57ab9c70cddb2522ae711ff9bff89e7c Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 10 Jul 2017 15:49:35 -0700
Subject: include/linux/backing-dev.h: simplify wb_stat_sum

wb_stat_sum() disables interrupts and calls __wb_stat_sum() which
eventually calls __percpu_counter_sum().  However, the percpu routine is
already irq-safe.  Simplify the code a bit by making wb_stat_sum()
directly call percpu_counter_sum_positive() and not disable interrupts.

Also remove the now-uneeded __wb_stat_sum() which was just a wrapper
over percpu_counter_sum_positive().

Link: http://lkml.kernel.org/r/1498230681-29103-1-git-send-email-nborisov@suse.com
Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Jan Kara <jack@suse.cz>
Cc: Jens Axboe <axboe@fb.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/backing-dev.h | 15 +--------------
 1 file changed, 1 insertion(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index ace73f96eb1e..334165c911f0 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -104,22 +104,9 @@ static inline s64 wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
 	return percpu_counter_read_positive(&wb->stat[item]);
 }
 
-static inline s64 __wb_stat_sum(struct bdi_writeback *wb,
-				enum wb_stat_item item)
-{
-	return percpu_counter_sum_positive(&wb->stat[item]);
-}
-
 static inline s64 wb_stat_sum(struct bdi_writeback *wb, enum wb_stat_item item)
 {
-	s64 sum;
-	unsigned long flags;
-
-	local_irq_save(flags);
-	sum = __wb_stat_sum(wb, item);
-	local_irq_restore(flags);
-
-	return sum;
+	return percpu_counter_sum_positive(&wb->stat[item]);
 }
 
 extern void wb_writeout_inc(struct bdi_writeback *wb);
-- 
cgit v1.2.3


From 2c80cd57c74339889a8752b20862a16c28929c3a Mon Sep 17 00:00:00 2001
From: Sahitya Tummala <stummala@codeaurora.org>
Date: Mon, 10 Jul 2017 15:49:57 -0700
Subject: mm/list_lru.c: fix list_lru_count_node() to be race free

list_lru_count_node() iterates over all memcgs to get the total number of
entries on the node but it can race with memcg_drain_all_list_lrus(),
which migrates the entries from a dead cgroup to another.  This can return
incorrect number of entries from list_lru_count_node().

Fix this by keeping track of entries per node and simply return it in
list_lru_count_node().

Link: http://lkml.kernel.org/r/1498707555-30525-1-git-send-email-stummala@codeaurora.org
Signed-off-by: Sahitya Tummala <stummala@codeaurora.org>
Acked-by: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Alexander Polakov <apolyakov@beget.ru>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/list_lru.h |  1 +
 mm/list_lru.c            | 14 ++++++--------
 2 files changed, 7 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h
index cb0ba9f2a9a2..fa7fd03cb5f9 100644
--- a/include/linux/list_lru.h
+++ b/include/linux/list_lru.h
@@ -44,6 +44,7 @@ struct list_lru_node {
 	/* for cgroup aware lrus points to per cgroup lists, otherwise NULL */
 	struct list_lru_memcg	*memcg_lrus;
 #endif
+	long nr_items;
 } ____cacheline_aligned_in_smp;
 
 struct list_lru {
diff --git a/mm/list_lru.c b/mm/list_lru.c
index 234676e31edd..7a40fa2be858 100644
--- a/mm/list_lru.c
+++ b/mm/list_lru.c
@@ -117,6 +117,7 @@ bool list_lru_add(struct list_lru *lru, struct list_head *item)
 		l = list_lru_from_kmem(nlru, item);
 		list_add_tail(item, &l->list);
 		l->nr_items++;
+		nlru->nr_items++;
 		spin_unlock(&nlru->lock);
 		return true;
 	}
@@ -136,6 +137,7 @@ bool list_lru_del(struct list_lru *lru, struct list_head *item)
 		l = list_lru_from_kmem(nlru, item);
 		list_del_init(item);
 		l->nr_items--;
+		nlru->nr_items--;
 		spin_unlock(&nlru->lock);
 		return true;
 	}
@@ -183,15 +185,10 @@ EXPORT_SYMBOL_GPL(list_lru_count_one);
 
 unsigned long list_lru_count_node(struct list_lru *lru, int nid)
 {
-	long count = 0;
-	int memcg_idx;
+	struct list_lru_node *nlru;
 
-	count += __list_lru_count_one(lru, nid, -1);
-	if (list_lru_memcg_aware(lru)) {
-		for_each_memcg_cache_index(memcg_idx)
-			count += __list_lru_count_one(lru, nid, memcg_idx);
-	}
-	return count;
+	nlru = &lru->node[nid];
+	return nlru->nr_items;
 }
 EXPORT_SYMBOL_GPL(list_lru_count_node);
 
@@ -226,6 +223,7 @@ restart:
 			assert_spin_locked(&nlru->lock);
 		case LRU_REMOVED:
 			isolated++;
+			nlru->nr_items--;
 			/*
 			 * If the lru lock has been dropped, our list
 			 * traversal is now invalid and so we have to
-- 
cgit v1.2.3


From a47fed5b5b014f5a13878b90ef2c3a7dc294189f Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 10 Jul 2017 15:50:06 -0700
Subject: mm: swap: provide lru_add_drain_all_cpuslocked()

The rework of the cpu hotplug locking unearthed potential deadlocks with
the memory hotplug locking code.

The solution for these is to rework the memory hotplug locking code as
well and take the cpu hotplug lock before the memory hotplug lock in
mem_hotplug_begin(), but this will cause a recursive locking of the cpu
hotplug lock when the memory hotplug code calls lru_add_drain_all().

Split out the inner workings of lru_add_drain_all() into
lru_add_drain_all_cpuslocked() so this function can be invoked from the
memory hotplug code with the cpu hotplug lock held.

Link: http://lkml.kernel.org/r/20170704093421.419329357@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reported-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h |  1 +
 mm/swap.c            | 11 ++++++++---
 2 files changed, 9 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 61e7180cee21..d83d28e53e62 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -277,6 +277,7 @@ extern void mark_page_accessed(struct page *);
 extern void lru_add_drain(void);
 extern void lru_add_drain_cpu(int cpu);
 extern void lru_add_drain_all(void);
+extern void lru_add_drain_all_cpuslocked(void);
 extern void rotate_reclaimable_page(struct page *page);
 extern void deactivate_file_page(struct page *page);
 extern void mark_page_lazyfree(struct page *page);
diff --git a/mm/swap.c b/mm/swap.c
index 4f44dbd7f780..60b1d2a75852 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -688,7 +688,7 @@ static void lru_add_drain_per_cpu(struct work_struct *dummy)
 
 static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work);
 
-void lru_add_drain_all(void)
+void lru_add_drain_all_cpuslocked(void)
 {
 	static DEFINE_MUTEX(lock);
 	static struct cpumask has_work;
@@ -702,7 +702,6 @@ void lru_add_drain_all(void)
 		return;
 
 	mutex_lock(&lock);
-	get_online_cpus();
 	cpumask_clear(&has_work);
 
 	for_each_online_cpu(cpu) {
@@ -722,10 +721,16 @@ void lru_add_drain_all(void)
 	for_each_cpu(cpu, &has_work)
 		flush_work(&per_cpu(lru_add_drain_work, cpu));
 
-	put_online_cpus();
 	mutex_unlock(&lock);
 }
 
+void lru_add_drain_all(void)
+{
+	get_online_cpus();
+	lru_add_drain_all_cpuslocked();
+	put_online_cpus();
+}
+
 /**
  * release_pages - batched put_page()
  * @pages: array of pages to release
-- 
cgit v1.2.3


From 9d1f4b3f5b29bea431525e528a3ff2dc806ad904 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Mon, 10 Jul 2017 15:50:12 -0700
Subject: mm: disallow early_pfn_to_nid on configurations which do not
 implement it

early_pfn_to_nid will return node 0 if both HAVE_ARCH_EARLY_PFN_TO_NID
and HAVE_MEMBLOCK_NODE_MAP are disabled.  It seems we are safe now
because all architectures which support NUMA define one of them (with an
exception of alpha which however has CONFIG_NUMA marked as broken) so
this works as expected.  It can get silently and subtly broken too
easily, though.  Make sure we fail the compilation if NUMA is enabled
and there is no proper implementation for this function.  If that ever
happens we know that either the specific configuration is invalid and
the fix should either disable NUMA or enable one of the above configs.

Link: http://lkml.kernel.org/r/20170704075803.15979-1-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Joonsoo Kim <js1304@gmail.com>
Cc: Yang Shi <yang.shi@linaro.org>
Cc: Mel Gorman <mgorman@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 16532fa0bb64..fc14b8b3f6ce 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1055,6 +1055,7 @@ static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist,
 	!defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP)
 static inline unsigned long early_pfn_to_nid(unsigned long pfn)
 {
+	BUILD_BUG_ON(IS_ENABLED(CONFIG_NUMA));
 	return 0;
 }
 #endif
-- 
cgit v1.2.3


From e9d5a48499391fe5b0615610858665ba8149e255 Mon Sep 17 00:00:00 2001
From: Ian Abbott <abbotti@mev.co.uk>
Date: Mon, 10 Jul 2017 15:50:58 -0700
Subject: linux/bug.h: correct formatting of block comment

Correct these checkpatch.pl warnings:

|WARNING: Block comments use * on subsequent lines
|#34: FILE: include/linux/bug.h:34:
|+/* Force a compilation error if condition is true, but also produce a
|+   result (of value 0 and type size_t), so the expression can be used

|WARNING: Block comments use a trailing */ on a separate line
|#36: FILE: include/linux/bug.h:36:
|+   aren't permitted). */

Link: http://lkml.kernel.org/r/20170525120316.24473-3-abbotti@mev.co.uk
Signed-off-by: Ian Abbott <abbotti@mev.co.uk>
Acked-by: Michal Nazarewicz <mina86@mina86.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Jakub Kicinski <jakub.kicinski@netronome.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bug.h | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bug.h b/include/linux/bug.h
index 687b557fc5eb..ca24007e2dc3 100644
--- a/include/linux/bug.h
+++ b/include/linux/bug.h
@@ -30,10 +30,12 @@ struct pt_regs;
 #define BUILD_BUG_ON_NOT_POWER_OF_2(n)			\
 	BUILD_BUG_ON((n) == 0 || (((n) & ((n) - 1)) != 0))
 
-/* Force a compilation error if condition is true, but also produce a
-   result (of value 0 and type size_t), so the expression can be used
-   e.g. in a structure initializer (or where-ever else comma expressions
-   aren't permitted). */
+/*
+ * Force a compilation error if condition is true, but also produce a
+ * result (of value 0 and type size_t), so the expression can be used
+ * e.g. in a structure initializer (or where-ever else comma expressions
+ * aren't permitted).
+ */
 #define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); }))
 #define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); }))
 
-- 
cgit v1.2.3


From 8cdd7cca9287abf4c849c01e2a4e8207ad3e3a82 Mon Sep 17 00:00:00 2001
From: Ian Abbott <abbotti@mev.co.uk>
Date: Mon, 10 Jul 2017 15:51:01 -0700
Subject: linux/bug.h: correct "(foo*)" should be "(foo *)"

Correct this checkpatch.pl error:

|ERROR: "(foo*)" should be "(foo *)"
|#19: FILE: include/linux/bug.h:19:
|+#define BUILD_BUG_ON_NULL(e) ((void*)0)

Link: http://lkml.kernel.org/r/20170525120316.24473-4-abbotti@mev.co.uk
Signed-off-by: Ian Abbott <abbotti@mev.co.uk>
Acked-by: Michal Nazarewicz <mina86@mina86.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Jakub Kicinski <jakub.kicinski@netronome.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bug.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bug.h b/include/linux/bug.h
index ca24007e2dc3..216a1b79653d 100644
--- a/include/linux/bug.h
+++ b/include/linux/bug.h
@@ -16,7 +16,7 @@ struct pt_regs;
 #define __BUILD_BUG_ON_NOT_POWER_OF_2(n) (0)
 #define BUILD_BUG_ON_NOT_POWER_OF_2(n) (0)
 #define BUILD_BUG_ON_ZERO(e) (0)
-#define BUILD_BUG_ON_NULL(e) ((void*)0)
+#define BUILD_BUG_ON_NULL(e) ((void *)0)
 #define BUILD_BUG_ON_INVALID(e) (0)
 #define BUILD_BUG_ON_MSG(cond, msg) (0)
 #define BUILD_BUG_ON(condition) (0)
-- 
cgit v1.2.3


From 47e81e59d98b90727a02ceb486407eeed5eb8727 Mon Sep 17 00:00:00 2001
From: Ian Abbott <abbotti@mev.co.uk>
Date: Mon, 10 Jul 2017 15:51:04 -0700
Subject: linux/bug.h: correct "space required before that '-'"

Correct these checkpatch.pl errors:

|ERROR: space required before that '-' (ctx:OxO)
|#37: FILE: include/linux/bug.h:37:
|+#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); }))

|ERROR: space required before that '-' (ctx:OxO)
|#38: FILE: include/linux/bug.h:38:
|+#define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); }))

I decided to wrap the bitfield expressions that begin with minus signs
in parentheses rather than insert spaces before the minus signs.

Link: http://lkml.kernel.org/r/20170525120316.24473-5-abbotti@mev.co.uk
Signed-off-by: Ian Abbott <abbotti@mev.co.uk>
Acked-by: Michal Nazarewicz <mina86@mina86.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Jakub Kicinski <jakub.kicinski@netronome.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bug.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bug.h b/include/linux/bug.h
index 216a1b79653d..483207cb99fb 100644
--- a/include/linux/bug.h
+++ b/include/linux/bug.h
@@ -36,8 +36,8 @@ struct pt_regs;
  * e.g. in a structure initializer (or where-ever else comma expressions
  * aren't permitted).
  */
-#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); }))
-#define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); }))
+#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:(-!!(e)); }))
+#define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:(-!!(e)); }))
 
 /*
  * BUILD_BUG_ON_INVALID() permits the compiler to check the validity of the
-- 
cgit v1.2.3


From bc6245e5efd70c41eaf9334b1b5e646745cb0fb3 Mon Sep 17 00:00:00 2001
From: Ian Abbott <abbotti@mev.co.uk>
Date: Mon, 10 Jul 2017 15:51:07 -0700
Subject: bug: split BUILD_BUG stuff out into <linux/build_bug.h>

Including <linux/bug.h> pulls in a lot of bloat from <asm/bug.h> and
<asm-generic/bug.h> that is not needed to call the BUILD_BUG() family of
macros.  Split them out into their own header, <linux/build_bug.h>.

Also correct some checkpatch.pl errors for the BUILD_BUG_ON_ZERO() and
BUILD_BUG_ON_NULL() macros by adding parentheses around the bitfield
widths that begin with a minus sign.

Link: http://lkml.kernel.org/r/20170525120316.24473-6-abbotti@mev.co.uk
Signed-off-by: Ian Abbott <abbotti@mev.co.uk>
Acked-by: Michal Nazarewicz <mina86@mina86.com>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Jakub Kicinski <jakub.kicinski@netronome.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bug.h       | 74 +----------------------------------------
 include/linux/build_bug.h | 84 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 85 insertions(+), 73 deletions(-)
 create mode 100644 include/linux/build_bug.h

(limited to 'include/linux')

diff --git a/include/linux/bug.h b/include/linux/bug.h
index 483207cb99fb..5d5554c874fd 100644
--- a/include/linux/bug.h
+++ b/include/linux/bug.h
@@ -3,6 +3,7 @@
 
 #include <asm/bug.h>
 #include <linux/compiler.h>
+#include <linux/build_bug.h>
 
 enum bug_trap_type {
 	BUG_TRAP_TYPE_NONE = 0,
@@ -13,82 +14,9 @@ enum bug_trap_type {
 struct pt_regs;
 
 #ifdef __CHECKER__
-#define __BUILD_BUG_ON_NOT_POWER_OF_2(n) (0)
-#define BUILD_BUG_ON_NOT_POWER_OF_2(n) (0)
-#define BUILD_BUG_ON_ZERO(e) (0)
-#define BUILD_BUG_ON_NULL(e) ((void *)0)
-#define BUILD_BUG_ON_INVALID(e) (0)
-#define BUILD_BUG_ON_MSG(cond, msg) (0)
-#define BUILD_BUG_ON(condition) (0)
-#define BUILD_BUG() (0)
 #define MAYBE_BUILD_BUG_ON(cond) (0)
 #else /* __CHECKER__ */
 
-/* Force a compilation error if a constant expression is not a power of 2 */
-#define __BUILD_BUG_ON_NOT_POWER_OF_2(n)	\
-	BUILD_BUG_ON(((n) & ((n) - 1)) != 0)
-#define BUILD_BUG_ON_NOT_POWER_OF_2(n)			\
-	BUILD_BUG_ON((n) == 0 || (((n) & ((n) - 1)) != 0))
-
-/*
- * Force a compilation error if condition is true, but also produce a
- * result (of value 0 and type size_t), so the expression can be used
- * e.g. in a structure initializer (or where-ever else comma expressions
- * aren't permitted).
- */
-#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:(-!!(e)); }))
-#define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:(-!!(e)); }))
-
-/*
- * BUILD_BUG_ON_INVALID() permits the compiler to check the validity of the
- * expression but avoids the generation of any code, even if that expression
- * has side-effects.
- */
-#define BUILD_BUG_ON_INVALID(e) ((void)(sizeof((__force long)(e))))
-
-/**
- * BUILD_BUG_ON_MSG - break compile if a condition is true & emit supplied
- *		      error message.
- * @condition: the condition which the compiler should know is false.
- *
- * See BUILD_BUG_ON for description.
- */
-#define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
-
-/**
- * BUILD_BUG_ON - break compile if a condition is true.
- * @condition: the condition which the compiler should know is false.
- *
- * If you have some code which relies on certain constants being equal, or
- * some other compile-time-evaluated condition, you should use BUILD_BUG_ON to
- * detect if someone changes it.
- *
- * The implementation uses gcc's reluctance to create a negative array, but gcc
- * (as of 4.4) only emits that error for obvious cases (e.g. not arguments to
- * inline functions).  Luckily, in 4.3 they added the "error" function
- * attribute just for this type of case.  Thus, we use a negative sized array
- * (should always create an error on gcc versions older than 4.4) and then call
- * an undefined function with the error attribute (should always create an
- * error on gcc 4.3 and later).  If for some reason, neither creates a
- * compile-time error, we'll still have a link-time error, which is harder to
- * track down.
- */
-#ifndef __OPTIMIZE__
-#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
-#else
-#define BUILD_BUG_ON(condition) \
-	BUILD_BUG_ON_MSG(condition, "BUILD_BUG_ON failed: " #condition)
-#endif
-
-/**
- * BUILD_BUG - break compile if used.
- *
- * If you have some code that you expect the compiler to eliminate at
- * build time, you should use BUILD_BUG to detect if it is
- * unexpectedly used.
- */
-#define BUILD_BUG() BUILD_BUG_ON_MSG(1, "BUILD_BUG failed")
-
 #define MAYBE_BUILD_BUG_ON(cond)			\
 	do {						\
 		if (__builtin_constant_p((cond)))       \
diff --git a/include/linux/build_bug.h b/include/linux/build_bug.h
new file mode 100644
index 000000000000..b7d22d60008a
--- /dev/null
+++ b/include/linux/build_bug.h
@@ -0,0 +1,84 @@
+#ifndef _LINUX_BUILD_BUG_H
+#define _LINUX_BUILD_BUG_H
+
+#include <linux/compiler.h>
+
+#ifdef __CHECKER__
+#define __BUILD_BUG_ON_NOT_POWER_OF_2(n) (0)
+#define BUILD_BUG_ON_NOT_POWER_OF_2(n) (0)
+#define BUILD_BUG_ON_ZERO(e) (0)
+#define BUILD_BUG_ON_NULL(e) ((void *)0)
+#define BUILD_BUG_ON_INVALID(e) (0)
+#define BUILD_BUG_ON_MSG(cond, msg) (0)
+#define BUILD_BUG_ON(condition) (0)
+#define BUILD_BUG() (0)
+#else /* __CHECKER__ */
+
+/* Force a compilation error if a constant expression is not a power of 2 */
+#define __BUILD_BUG_ON_NOT_POWER_OF_2(n)	\
+	BUILD_BUG_ON(((n) & ((n) - 1)) != 0)
+#define BUILD_BUG_ON_NOT_POWER_OF_2(n)			\
+	BUILD_BUG_ON((n) == 0 || (((n) & ((n) - 1)) != 0))
+
+/*
+ * Force a compilation error if condition is true, but also produce a
+ * result (of value 0 and type size_t), so the expression can be used
+ * e.g. in a structure initializer (or where-ever else comma expressions
+ * aren't permitted).
+ */
+#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:(-!!(e)); }))
+#define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:(-!!(e)); }))
+
+/*
+ * BUILD_BUG_ON_INVALID() permits the compiler to check the validity of the
+ * expression but avoids the generation of any code, even if that expression
+ * has side-effects.
+ */
+#define BUILD_BUG_ON_INVALID(e) ((void)(sizeof((__force long)(e))))
+
+/**
+ * BUILD_BUG_ON_MSG - break compile if a condition is true & emit supplied
+ *		      error message.
+ * @condition: the condition which the compiler should know is false.
+ *
+ * See BUILD_BUG_ON for description.
+ */
+#define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
+
+/**
+ * BUILD_BUG_ON - break compile if a condition is true.
+ * @condition: the condition which the compiler should know is false.
+ *
+ * If you have some code which relies on certain constants being equal, or
+ * some other compile-time-evaluated condition, you should use BUILD_BUG_ON to
+ * detect if someone changes it.
+ *
+ * The implementation uses gcc's reluctance to create a negative array, but gcc
+ * (as of 4.4) only emits that error for obvious cases (e.g. not arguments to
+ * inline functions).  Luckily, in 4.3 they added the "error" function
+ * attribute just for this type of case.  Thus, we use a negative sized array
+ * (should always create an error on gcc versions older than 4.4) and then call
+ * an undefined function with the error attribute (should always create an
+ * error on gcc 4.3 and later).  If for some reason, neither creates a
+ * compile-time error, we'll still have a link-time error, which is harder to
+ * track down.
+ */
+#ifndef __OPTIMIZE__
+#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
+#else
+#define BUILD_BUG_ON(condition) \
+	BUILD_BUG_ON_MSG(condition, "BUILD_BUG_ON failed: " #condition)
+#endif
+
+/**
+ * BUILD_BUG - break compile if used.
+ *
+ * If you have some code that you expect the compiler to eliminate at
+ * build time, you should use BUILD_BUG to detect if it is
+ * unexpectedly used.
+ */
+#define BUILD_BUG() BUILD_BUG_ON_MSG(1, "BUILD_BUG failed")
+
+#endif	/* __CHECKER__ */
+
+#endif	/* _LINUX_BUILD_BUG_H */
-- 
cgit v1.2.3


From 287f3ca563d8ba0ede4ac0cec84218a1ea5e848f Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Mon, 10 Jul 2017 15:51:10 -0700
Subject: ARM: fix rd_size declaration

The global variable 'rd_size' is declared as 'int' in source file
arch/arm/kernel/atags_parse.c and as 'unsigned long' in
drivers/block/brd.c.  Fix this inconsistency.

Additionally, remove the declarations of rd_image_start, rd_prompt and
rd_doload from parse_tag_ramdisk() since these duplicate existing
declarations in <linux/initrd.h>.

Link: http://lkml.kernel.org/r/20170627065024.12347-1-bart.vanassche@wdc.com
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Acked-by: Russell King <rmk+kernel@armlinux.org.uk>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Jan Kara <jack@suse.cz>
Cc: Jason Yan <yanaijie@huawei.com>
Cc: Zhaohongjiang <zhaohongjiang@huawei.com>
Cc: Miao Xie <miaoxie@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/kernel/atags_parse.c | 3 +--
 drivers/block/brd.c           | 1 +
 include/linux/initrd.h        | 3 +++
 3 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/kernel/atags_parse.c b/arch/arm/kernel/atags_parse.c
index 68c6ae0b9e4c..98fbfd235ac8 100644
--- a/arch/arm/kernel/atags_parse.c
+++ b/arch/arm/kernel/atags_parse.c
@@ -18,6 +18,7 @@
  */
 
 #include <linux/init.h>
+#include <linux/initrd.h>
 #include <linux/kernel.h>
 #include <linux/fs.h>
 #include <linux/root_dev.h>
@@ -91,8 +92,6 @@ __tagtable(ATAG_VIDEOTEXT, parse_tag_videotext);
 #ifdef CONFIG_BLK_DEV_RAM
 static int __init parse_tag_ramdisk(const struct tag *tag)
 {
-	extern int rd_size, rd_image_start, rd_prompt, rd_doload;
-
 	rd_image_start = tag->u.ramdisk.start;
 	rd_doload = (tag->u.ramdisk.flags & 1) == 0;
 	rd_prompt = (tag->u.ramdisk.flags & 2) == 0;
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 17723fd50a53..104b71c0490d 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -9,6 +9,7 @@
  */
 
 #include <linux/init.h>
+#include <linux/initrd.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/major.h>
diff --git a/include/linux/initrd.h b/include/linux/initrd.h
index 55289d261b4f..bc67b767f9ce 100644
--- a/include/linux/initrd.h
+++ b/include/linux/initrd.h
@@ -10,6 +10,9 @@ extern int rd_prompt;
 /* starting block # of image */
 extern int rd_image_start;
 
+/* size of a single RAM disk */
+extern unsigned long rd_size;
+
 /* 1 if it is not an error if initrd_start < memory_start */
 extern int initrd_below_start_ok;
 
-- 
cgit v1.2.3


From e5af323c9badd5dc09af7ccf9d45616ebffc623c Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Mon, 10 Jul 2017 15:51:29 -0700
Subject: bitmap: optimise bitmap_set and bitmap_clear of a single bit

We have eight users calling bitmap_clear for a single bit and seventeen
calling bitmap_set for a single bit.  Rather than fix all of them to
call __clear_bit or __set_bit, turn bitmap_clear and bitmap_set into
inline functions and make this special case efficient.

Link: http://lkml.kernel.org/r/20170628153221.11322-3-willy@infradead.org
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
Acked-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitmap.h | 23 ++++++++++++++++++++---
 lib/bitmap.c           |  8 ++++----
 lib/test_bitmap.c      |  3 ---
 3 files changed, 24 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 3b77588a9360..4e0f0c8167af 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -112,9 +112,8 @@ extern int __bitmap_intersects(const unsigned long *bitmap1,
 extern int __bitmap_subset(const unsigned long *bitmap1,
 			const unsigned long *bitmap2, unsigned int nbits);
 extern int __bitmap_weight(const unsigned long *bitmap, unsigned int nbits);
-
-extern void bitmap_set(unsigned long *map, unsigned int start, int len);
-extern void bitmap_clear(unsigned long *map, unsigned int start, int len);
+extern void __bitmap_set(unsigned long *map, unsigned int start, int len);
+extern void __bitmap_clear(unsigned long *map, unsigned int start, int len);
 
 extern unsigned long bitmap_find_next_zero_area_off(unsigned long *map,
 						    unsigned long size,
@@ -315,6 +314,24 @@ static __always_inline int bitmap_weight(const unsigned long *src, unsigned int
 	return __bitmap_weight(src, nbits);
 }
 
+static __always_inline void bitmap_set(unsigned long *map, unsigned int start,
+		unsigned int nbits)
+{
+	if (__builtin_constant_p(nbits) && nbits == 1)
+		__set_bit(start, map);
+	else
+		__bitmap_set(map, start, nbits);
+}
+
+static __always_inline void bitmap_clear(unsigned long *map, unsigned int start,
+		unsigned int nbits)
+{
+	if (__builtin_constant_p(nbits) && nbits == 1)
+		__clear_bit(start, map);
+	else
+		__bitmap_clear(map, start, nbits);
+}
+
 static inline void bitmap_shift_right(unsigned long *dst, const unsigned long *src,
 				unsigned int shift, int nbits)
 {
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 08c6ef3a2b6f..9a532805364b 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -251,7 +251,7 @@ int __bitmap_weight(const unsigned long *bitmap, unsigned int bits)
 }
 EXPORT_SYMBOL(__bitmap_weight);
 
-void bitmap_set(unsigned long *map, unsigned int start, int len)
+void __bitmap_set(unsigned long *map, unsigned int start, int len)
 {
 	unsigned long *p = map + BIT_WORD(start);
 	const unsigned int size = start + len;
@@ -270,9 +270,9 @@ void bitmap_set(unsigned long *map, unsigned int start, int len)
 		*p |= mask_to_set;
 	}
 }
-EXPORT_SYMBOL(bitmap_set);
+EXPORT_SYMBOL(__bitmap_set);
 
-void bitmap_clear(unsigned long *map, unsigned int start, int len)
+void __bitmap_clear(unsigned long *map, unsigned int start, int len)
 {
 	unsigned long *p = map + BIT_WORD(start);
 	const unsigned int size = start + len;
@@ -291,7 +291,7 @@ void bitmap_clear(unsigned long *map, unsigned int start, int len)
 		*p &= ~mask_to_clear;
 	}
 }
-EXPORT_SYMBOL(bitmap_clear);
+EXPORT_SYMBOL(__bitmap_clear);
 
 /**
  * bitmap_find_next_zero_area_off - find a contiguous aligned zero area
diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c
index 252d3bddbe7d..2526a2975c51 100644
--- a/lib/test_bitmap.c
+++ b/lib/test_bitmap.c
@@ -333,9 +333,6 @@ static void __init test_bitmap_u32_array_conversions(void)
 	}
 }
 
-#define __bitmap_set(a, b, c)	bitmap_set(a, b, c)
-#define __bitmap_clear(a, b, c)	bitmap_clear(a, b, c)
-
 static void noinline __init test_mem_optimisations(void)
 {
 	DECLARE_BITMAP(bmap1, 1024);
-- 
cgit v1.2.3


From 2a98dc028f911a7c59c87d11d4eed6626be1605b Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Mon, 10 Jul 2017 15:51:32 -0700
Subject: include/linux/bitmap.h: turn bitmap_set and bitmap_clear into memset
 when possible

Several callers have constant 'start' and an 'nbits' that is a multiple
of 8, so we can turn them into calls to memset.  We don't need the
entirety of 'start' and 'nbits' to be constant, we just need to know
whether they're divisible by 8.

Link: http://lkml.kernel.org/r/20170628153221.11322-4-willy@infradead.org
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
Acked-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitmap.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 4e0f0c8167af..c04c9d155e59 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -319,6 +319,9 @@ static __always_inline void bitmap_set(unsigned long *map, unsigned int start,
 {
 	if (__builtin_constant_p(nbits) && nbits == 1)
 		__set_bit(start, map);
+	else if (__builtin_constant_p(start & 7) && IS_ALIGNED(start, 8) &&
+		 __builtin_constant_p(nbits & 7) && IS_ALIGNED(nbits, 8))
+		memset((char *)map + start / 8, 0xff, nbits / 8);
 	else
 		__bitmap_set(map, start, nbits);
 }
@@ -328,6 +331,9 @@ static __always_inline void bitmap_clear(unsigned long *map, unsigned int start,
 {
 	if (__builtin_constant_p(nbits) && nbits == 1)
 		__clear_bit(start, map);
+	else if (__builtin_constant_p(start & 7) && IS_ALIGNED(start, 8) &&
+		 __builtin_constant_p(nbits & 7) && IS_ALIGNED(nbits, 8))
+		memset((char *)map + start / 8, 0, nbits / 8);
 	else
 		__bitmap_clear(map, start, nbits);
 }
-- 
cgit v1.2.3


From 2c6deb01525ac11cc03c44fe31e3f45ce2cadaf9 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Mon, 10 Jul 2017 15:51:35 -0700
Subject: bitmap: use memcmp optimisation in more situations

Commit 7dd968163f7c ("bitmap: bitmap_equal memcmp optimization") was
rather more restrictive than necessary; we can use memcmp() to implement
bitmap_equal() as long as the number of bits can be proved to be a
multiple of 8.  And architectures other than s390 may be able to make
good use of this optimisation.

[arnd@arndb.de: fix build: add a memcmp() declaration]
  Link: http://lkml.kernel.org/r/20170630153908.3439707-1-arnd@arndb.de
Link: http://lkml.kernel.org/r/20170628153221.11322-5-willy@infradead.org
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/boot/compressed/decompress.c | 1 +
 include/linux/bitmap.h                | 4 +---
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/boot/compressed/decompress.c b/arch/arm/boot/compressed/decompress.c
index ea7832702a8f..f3a4bedd1afc 100644
--- a/arch/arm/boot/compressed/decompress.c
+++ b/arch/arm/boot/compressed/decompress.c
@@ -33,6 +33,7 @@ extern void error(char *);
 /* Not needed, but used in some headers pulled in by decompressors */
 extern char * strstr(const char * s1, const char *s2);
 extern size_t strlen(const char *s);
+extern int memcmp(const void *cs, const void *ct, size_t count);
 
 #ifdef CONFIG_KERNEL_GZIP
 #include "../../../../lib/decompress_inflate.c"
diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index c04c9d155e59..5797ca6fdfe2 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -266,10 +266,8 @@ static inline int bitmap_equal(const unsigned long *src1,
 {
 	if (small_const_nbits(nbits))
 		return !((*src1 ^ *src2) & BITMAP_LAST_WORD_MASK(nbits));
-#ifdef CONFIG_S390
-	if (__builtin_constant_p(nbits) && (nbits % BITS_PER_LONG) == 0)
+	if (__builtin_constant_p(nbits & 7) && IS_ALIGNED(nbits, 8))
 		return !memcmp(src1, src2, nbits / 8);
-#endif
 	return __bitmap_equal(src1, src2, nbits);
 }
 
-- 
cgit v1.2.3


From a94c33dd1f677d16c4f1a162b4b3e9eba1b07c24 Mon Sep 17 00:00:00 2001
From: Thomas Meyer <thomas@m3y3r.de>
Date: Mon, 10 Jul 2017 15:51:58 -0700
Subject: lib/extable.c: use bsearch() library function in search_extable()

[thomas@m3y3r.de: v3: fix arch specific implementations]
  Link: http://lkml.kernel.org/r/1497890858.12931.7.camel@m3y3r.de
Signed-off-by: Thomas Meyer <thomas@m3y3r.de>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/mips/kernel/module.c |  3 ++-
 arch/mips/kernel/traps.c  |  3 ++-
 arch/sh/mm/extable_64.c   | 34 ++++++++++++++++++----------------
 arch/sparc/mm/extable.c   | 28 ++++++++++++++--------------
 include/linux/extable.h   |  5 +++--
 kernel/extable.c          |  3 ++-
 kernel/module.c           |  2 +-
 lib/extable.c             | 41 +++++++++++++++++++++--------------------
 8 files changed, 63 insertions(+), 56 deletions(-)

(limited to 'include/linux')

diff --git a/arch/mips/kernel/module.c b/arch/mips/kernel/module.c
index 94627a3a6a0d..50c020c47e54 100644
--- a/arch/mips/kernel/module.c
+++ b/arch/mips/kernel/module.c
@@ -317,7 +317,8 @@ const struct exception_table_entry *search_module_dbetables(unsigned long addr)
 
 	spin_lock_irqsave(&dbe_lock, flags);
 	list_for_each_entry(dbe, &dbe_list, dbe_list) {
-		e = search_extable(dbe->dbe_start, dbe->dbe_end - 1, addr);
+		e = search_extable(dbe->dbe_start,
+				   dbe->dbe_end - dbe->dbe_start, addr);
 		if (e)
 			break;
 	}
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 38dfa27730ff..b68b4d0726d3 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -429,7 +429,8 @@ static const struct exception_table_entry *search_dbe_tables(unsigned long addr)
 {
 	const struct exception_table_entry *e;
 
-	e = search_extable(__start___dbe_table, __stop___dbe_table - 1, addr);
+	e = search_extable(__start___dbe_table,
+			   __stop___dbe_table - __start___dbe_table, addr);
 	if (!e)
 		e = search_module_dbetables(addr);
 	return e;
diff --git a/arch/sh/mm/extable_64.c b/arch/sh/mm/extable_64.c
index b90cdfad2c78..7a3b4d33d2e7 100644
--- a/arch/sh/mm/extable_64.c
+++ b/arch/sh/mm/extable_64.c
@@ -10,6 +10,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  */
+#include <linux/bsearch.h>
 #include <linux/rwsem.h>
 #include <linux/extable.h>
 #include <linux/uaccess.h>
@@ -40,10 +41,23 @@ static const struct exception_table_entry *check_exception_ranges(unsigned long
 	return NULL;
 }
 
+static int cmp_ex_search(const void *key, const void *elt)
+{
+	const struct exception_table_entry *_elt = elt;
+	unsigned long _key = *(unsigned long *)key;
+
+	/* avoid overflow */
+	if (_key > _elt->insn)
+		return 1;
+	if (_key < _elt->insn)
+		return -1;
+	return 0;
+}
+
 /* Simple binary search */
 const struct exception_table_entry *
-search_extable(const struct exception_table_entry *first,
-		 const struct exception_table_entry *last,
+search_extable(const struct exception_table_entry *base,
+		 const size_t num,
 		 unsigned long value)
 {
 	const struct exception_table_entry *mid;
@@ -52,20 +66,8 @@ search_extable(const struct exception_table_entry *first,
 	if (mid)
 		return mid;
 
-        while (first <= last) {
-		long diff;
-
-		mid = (last - first) / 2 + first;
-		diff = mid->insn - value;
-                if (diff == 0)
-                        return mid;
-                else if (diff < 0)
-                        first = mid+1;
-                else
-                        last = mid-1;
-        }
-
-        return NULL;
+	return bsearch(&value, base, num,
+		       sizeof(struct exception_table_entry), cmp_ex_search);
 }
 
 int fixup_exception(struct pt_regs *regs)
diff --git a/arch/sparc/mm/extable.c b/arch/sparc/mm/extable.c
index db214e9931d9..2422511dc8c5 100644
--- a/arch/sparc/mm/extable.c
+++ b/arch/sparc/mm/extable.c
@@ -13,11 +13,11 @@ void sort_extable(struct exception_table_entry *start,
 
 /* Caller knows they are in a range if ret->fixup == 0 */
 const struct exception_table_entry *
-search_extable(const struct exception_table_entry *start,
-	       const struct exception_table_entry *last,
+search_extable(const struct exception_table_entry *base,
+	       const size_t num,
 	       unsigned long value)
 {
-	const struct exception_table_entry *walk;
+	int i;
 
 	/* Single insn entries are encoded as:
 	 *	word 1:	insn address
@@ -37,30 +37,30 @@ search_extable(const struct exception_table_entry *start,
 	 */
 
 	/* 1. Try to find an exact match. */
-	for (walk = start; walk <= last; walk++) {
-		if (walk->fixup == 0) {
+	for (i = 0; i < num; i++) {
+		if (base[i].fixup == 0) {
 			/* A range entry, skip both parts. */
-			walk++;
+			i++;
 			continue;
 		}
 
 		/* A deleted entry; see trim_init_extable */
-		if (walk->fixup == -1)
+		if (base[i].fixup == -1)
 			continue;
 
-		if (walk->insn == value)
-			return walk;
+		if (base[i].insn == value)
+			return &base[i];
 	}
 
 	/* 2. Try to find a range match. */
-	for (walk = start; walk <= (last - 1); walk++) {
-		if (walk->fixup)
+	for (i = 0; i < (num - 1); i++) {
+		if (base[i].fixup)
 			continue;
 
-		if (walk[0].insn <= value && walk[1].insn > value)
-			return walk;
+		if (base[i].insn <= value && base[i + 1].insn > value)
+			return &base[i];
 
-		walk++;
+		i++;
 	}
 
         return NULL;
diff --git a/include/linux/extable.h b/include/linux/extable.h
index 7effea4b257d..28addad0dda7 100644
--- a/include/linux/extable.h
+++ b/include/linux/extable.h
@@ -2,13 +2,14 @@
 #define _LINUX_EXTABLE_H
 
 #include <linux/stddef.h>	/* for NULL */
+#include <linux/types.h>
 
 struct module;
 struct exception_table_entry;
 
 const struct exception_table_entry *
-search_extable(const struct exception_table_entry *first,
-	       const struct exception_table_entry *last,
+search_extable(const struct exception_table_entry *base,
+	       const size_t num,
 	       unsigned long value);
 void sort_extable(struct exception_table_entry *start,
 		  struct exception_table_entry *finish);
diff --git a/kernel/extable.c b/kernel/extable.c
index 223df4a328a4..38c2412401a1 100644
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -55,7 +55,8 @@ const struct exception_table_entry *search_exception_tables(unsigned long addr)
 {
 	const struct exception_table_entry *e;
 
-	e = search_extable(__start___ex_table, __stop___ex_table-1, addr);
+	e = search_extable(__start___ex_table,
+			   __stop___ex_table - __start___ex_table, addr);
 	if (!e)
 		e = search_module_extables(addr);
 	return e;
diff --git a/kernel/module.c b/kernel/module.c
index b3dbdde82e80..b0f92a365140 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -4196,7 +4196,7 @@ const struct exception_table_entry *search_module_extables(unsigned long addr)
 		goto out;
 
 	e = search_extable(mod->extable,
-			   mod->extable + mod->num_exentries - 1,
+			   mod->num_exentries,
 			   addr);
 out:
 	preempt_enable();
diff --git a/lib/extable.c b/lib/extable.c
index 62968daa66a9..f54996fdd0b8 100644
--- a/lib/extable.c
+++ b/lib/extable.c
@@ -9,6 +9,7 @@
  * 2 of the License, or (at your option) any later version.
  */
 
+#include <linux/bsearch.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/sort.h>
@@ -51,7 +52,7 @@ static void swap_ex(void *a, void *b, int size)
  * This is used both for the kernel exception table and for
  * the exception tables of modules that get loaded.
  */
-static int cmp_ex(const void *a, const void *b)
+static int cmp_ex_sort(const void *a, const void *b)
 {
 	const struct exception_table_entry *x = a, *y = b;
 
@@ -67,7 +68,7 @@ void sort_extable(struct exception_table_entry *start,
 		  struct exception_table_entry *finish)
 {
 	sort(start, finish - start, sizeof(struct exception_table_entry),
-	     cmp_ex, swap_ex);
+	     cmp_ex_sort, swap_ex);
 }
 
 #ifdef CONFIG_MODULES
@@ -93,6 +94,20 @@ void trim_init_extable(struct module *m)
 #endif /* !ARCH_HAS_SORT_EXTABLE */
 
 #ifndef ARCH_HAS_SEARCH_EXTABLE
+
+static int cmp_ex_search(const void *key, const void *elt)
+{
+	const struct exception_table_entry *_elt = elt;
+	unsigned long _key = *(unsigned long *)key;
+
+	/* avoid overflow */
+	if (_key > ex_to_insn(_elt))
+		return 1;
+	if (_key < ex_to_insn(_elt))
+		return -1;
+	return 0;
+}
+
 /*
  * Search one exception table for an entry corresponding to the
  * given instruction address, and return the address of the entry,
@@ -101,25 +116,11 @@ void trim_init_extable(struct module *m)
  * already sorted.
  */
 const struct exception_table_entry *
-search_extable(const struct exception_table_entry *first,
-	       const struct exception_table_entry *last,
+search_extable(const struct exception_table_entry *base,
+	       const size_t num,
 	       unsigned long value)
 {
-	while (first <= last) {
-		const struct exception_table_entry *mid;
-
-		mid = ((last - first) >> 1) + first;
-		/*
-		 * careful, the distance between value and insn
-		 * can be larger than MAX_LONG:
-		 */
-		if (ex_to_insn(mid) < value)
-			first = mid + 1;
-		else if (ex_to_insn(mid) > value)
-			last = mid - 1;
-		else
-			return mid;
-	}
-	return NULL;
+	return bsearch(&value, base, num,
+		       sizeof(struct exception_table_entry), cmp_ex_search);
 }
 #endif
-- 
cgit v1.2.3


From 1d278a879081ddc40286500e58868aaee47de257 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 5 Jul 2017 16:25:53 +0100
Subject: VFS: Kill off s_options and helpers

Kill off s_options, save/replace_mount_options() and generic_show_options()
as all filesystems now implement ->show_options() for themselves.  This
should make it easier to implement a context-based mount where the mount
options can be passed individually over a file descriptor.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 Documentation/filesystems/vfs.txt |  6 ----
 fs/efivarfs/super.c               |  1 -
 fs/namespace.c                    | 59 ---------------------------------------
 fs/super.c                        |  1 -
 include/linux/fs.h                |  9 ------
 5 files changed, 76 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index f42b90687d40..ee56a7d10da9 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -1187,12 +1187,6 @@ The underlying reason for the above rules is to make sure, that a
 mount can be accurately replicated (e.g. umounting and mounting again)
 based on the information found in /proc/mounts.
 
-A simple method of saving options at mount/remount time and showing
-them is provided with the save_mount_options() and
-generic_show_options() helper functions.  Please note, that using
-these may have drawbacks.  For more info see header comments for these
-functions in fs/namespace.c.
-
 Resources
 =========
 
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
index d7a7c53803c1..5b68e4294faa 100644
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c
@@ -29,7 +29,6 @@ static const struct super_operations efivarfs_ops = {
 	.statfs = simple_statfs,
 	.drop_inode = generic_delete_inode,
 	.evict_inode = efivarfs_evict_inode,
-	.show_options = generic_show_options,
 };
 
 static struct super_block *efivarfs_sb;
diff --git a/fs/namespace.c b/fs/namespace.c
index 544ab84642eb..0e1fdb306133 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1237,65 +1237,6 @@ struct vfsmount *mnt_clone_internal(const struct path *path)
 	return &p->mnt;
 }
 
-static inline void mangle(struct seq_file *m, const char *s)
-{
-	seq_escape(m, s, " \t\n\\");
-}
-
-/*
- * Simple .show_options callback for filesystems which don't want to
- * implement more complex mount option showing.
- *
- * See also save_mount_options().
- */
-int generic_show_options(struct seq_file *m, struct dentry *root)
-{
-	const char *options;
-
-	rcu_read_lock();
-	options = rcu_dereference(root->d_sb->s_options);
-
-	if (options != NULL && options[0]) {
-		seq_putc(m, ',');
-		mangle(m, options);
-	}
-	rcu_read_unlock();
-
-	return 0;
-}
-EXPORT_SYMBOL(generic_show_options);
-
-/*
- * If filesystem uses generic_show_options(), this function should be
- * called from the fill_super() callback.
- *
- * The .remount_fs callback usually needs to be handled in a special
- * way, to make sure, that previous options are not overwritten if the
- * remount fails.
- *
- * Also note, that if the filesystem's .remount_fs function doesn't
- * reset all options to their default value, but changes only newly
- * given options, then the displayed options will not reflect reality
- * any more.
- */
-void save_mount_options(struct super_block *sb, char *options)
-{
-	BUG_ON(sb->s_options);
-	rcu_assign_pointer(sb->s_options, kstrdup(options, GFP_KERNEL));
-}
-EXPORT_SYMBOL(save_mount_options);
-
-void replace_mount_options(struct super_block *sb, char *options)
-{
-	char *old = sb->s_options;
-	rcu_assign_pointer(sb->s_options, options);
-	if (old) {
-		synchronize_rcu();
-		kfree(old);
-	}
-}
-EXPORT_SYMBOL(replace_mount_options);
-
 #ifdef CONFIG_PROC_FS
 /* iterator; we want it to have access to namespace_sem, thus here... */
 static void *m_start(struct seq_file *m, loff_t *pos)
diff --git a/fs/super.c b/fs/super.c
index dfb56a9665d8..6bc3352adcf3 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -168,7 +168,6 @@ static void destroy_super(struct super_block *s)
 	WARN_ON(!list_empty(&s->s_mounts));
 	put_user_ns(s->s_user_ns);
 	kfree(s->s_subtype);
-	kfree(s->s_options);
 	call_rcu(&s->rcu, destroy_super_rcu);
 }
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index bc0c054894b9..e265b2ea72c6 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1351,11 +1351,6 @@ struct super_block {
 	 */
 	char *s_subtype;
 
-	/*
-	 * Saved mount options for lazy filesystems using
-	 * generic_show_options()
-	 */
-	char __rcu *s_options;
 	const struct dentry_operations *s_d_op; /* default d_op for dentries */
 
 	/*
@@ -3033,10 +3028,6 @@ extern void setattr_copy(struct inode *inode, const struct iattr *attr);
 
 extern int file_update_time(struct file *file);
 
-extern int generic_show_options(struct seq_file *m, struct dentry *root);
-extern void save_mount_options(struct super_block *sb, char *options);
-extern void replace_mount_options(struct super_block *sb, char *options);
-
 static inline bool io_is_direct(struct file *filp)
 {
 	return (filp->f_flags & O_DIRECT) || IS_DAX(filp->f_mapping->host);
-- 
cgit v1.2.3


From 2fd1d2c4ceb2248a727696962cf3370dc9f5a0a4 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 6 Jul 2017 08:41:06 -0500
Subject: proc: Fix proc_sys_prune_dcache to hold a sb reference

Andrei Vagin writes:
FYI: This bug has been reproduced on 4.11.7
> BUG: Dentry ffff895a3dd01240{i=4e7c09a,n=lo}  still in use (1) [unmount of proc proc]
> ------------[ cut here ]------------
> WARNING: CPU: 1 PID: 13588 at fs/dcache.c:1445 umount_check+0x6e/0x80
> CPU: 1 PID: 13588 Comm: kworker/1:1 Not tainted 4.11.7-200.fc25.x86_64 #1
> Hardware name: CompuLab sbc-flt1/fitlet, BIOS SBCFLT_0.08.04 06/27/2015
> Workqueue: events proc_cleanup_work
> Call Trace:
>  dump_stack+0x63/0x86
>  __warn+0xcb/0xf0
>  warn_slowpath_null+0x1d/0x20
>  umount_check+0x6e/0x80
>  d_walk+0xc6/0x270
>  ? dentry_free+0x80/0x80
>  do_one_tree+0x26/0x40
>  shrink_dcache_for_umount+0x2d/0x90
>  generic_shutdown_super+0x1f/0xf0
>  kill_anon_super+0x12/0x20
>  proc_kill_sb+0x40/0x50
>  deactivate_locked_super+0x43/0x70
>  deactivate_super+0x5a/0x60
>  cleanup_mnt+0x3f/0x90
>  mntput_no_expire+0x13b/0x190
>  kern_unmount+0x3e/0x50
>  pid_ns_release_proc+0x15/0x20
>  proc_cleanup_work+0x15/0x20
>  process_one_work+0x197/0x450
>  worker_thread+0x4e/0x4a0
>  kthread+0x109/0x140
>  ? process_one_work+0x450/0x450
>  ? kthread_park+0x90/0x90
>  ret_from_fork+0x2c/0x40
> ---[ end trace e1c109611e5d0b41 ]---
> VFS: Busy inodes after unmount of proc. Self-destruct in 5 seconds.  Have a nice day...
> BUG: unable to handle kernel NULL pointer dereference at           (null)
> IP: _raw_spin_lock+0xc/0x30
> PGD 0

Fix this by taking a reference to the super block in proc_sys_prune_dcache.

The superblock reference is the core of the fix however the sysctl_inodes
list is converted to a hlist so that hlist_del_init_rcu may be used.  This
allows proc_sys_prune_dache to remove inodes the sysctl_inodes list, while
not causing problems for proc_sys_evict_inode when if it later choses to
remove the inode from the sysctl_inodes list.  Removing inodes from the
sysctl_inodes list allows proc_sys_prune_dcache to have a progress
guarantee, while still being able to drop all locks.  The fact that
head->unregistering is set in start_unregistering ensures that no more
inodes will be added to the the sysctl_inodes list.

Previously the code did a dance where it delayed calling iput until the
next entry in the list was being considered to ensure the inode remained on
the sysctl_inodes list until the next entry was walked to.  The structure
of the loop in this patch does not need that so is much easier to
understand and maintain.

Cc: stable@vger.kernel.org
Reported-by: Andrei Vagin <avagin@gmail.com>
Tested-by: Andrei Vagin <avagin@openvz.org>
Fixes: ace0c791e6c3 ("proc/sysctl: Don't grab i_lock under sysctl_lock.")
Fixes: d6cffbbe9a7e ("proc/sysctl: prune stale dentries during unregistering")
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/proc/internal.h     |  2 +-
 fs/proc/proc_sysctl.c  | 43 ++++++++++++++++++++++++++++++-------------
 include/linux/sysctl.h |  2 +-
 3 files changed, 32 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index c5ae09b6c726..18694598bebf 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -67,7 +67,7 @@ struct proc_inode {
 	struct proc_dir_entry *pde;
 	struct ctl_table_header *sysctl;
 	struct ctl_table *sysctl_entry;
-	struct list_head sysctl_inodes;
+	struct hlist_node sysctl_inodes;
 	const struct proc_ns_operations *ns_ops;
 	struct inode vfs_inode;
 };
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 67985a7233c2..9bf06e2b1284 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -191,7 +191,7 @@ static void init_header(struct ctl_table_header *head,
 	head->set = set;
 	head->parent = NULL;
 	head->node = node;
-	INIT_LIST_HEAD(&head->inodes);
+	INIT_HLIST_HEAD(&head->inodes);
 	if (node) {
 		struct ctl_table *entry;
 		for (entry = table; entry->procname; entry++, node++)
@@ -261,25 +261,42 @@ static void unuse_table(struct ctl_table_header *p)
 			complete(p->unregistering);
 }
 
-/* called under sysctl_lock */
 static void proc_sys_prune_dcache(struct ctl_table_header *head)
 {
-	struct inode *inode, *prev = NULL;
+	struct inode *inode;
 	struct proc_inode *ei;
+	struct hlist_node *node;
+	struct super_block *sb;
 
 	rcu_read_lock();
-	list_for_each_entry_rcu(ei, &head->inodes, sysctl_inodes) {
-		inode = igrab(&ei->vfs_inode);
-		if (inode) {
-			rcu_read_unlock();
-			iput(prev);
-			prev = inode;
-			d_prune_aliases(inode);
+	for (;;) {
+		node = hlist_first_rcu(&head->inodes);
+		if (!node)
+			break;
+		ei = hlist_entry(node, struct proc_inode, sysctl_inodes);
+		spin_lock(&sysctl_lock);
+		hlist_del_init_rcu(&ei->sysctl_inodes);
+		spin_unlock(&sysctl_lock);
+
+		inode = &ei->vfs_inode;
+		sb = inode->i_sb;
+		if (!atomic_inc_not_zero(&sb->s_active))
+			continue;
+		inode = igrab(inode);
+		rcu_read_unlock();
+		if (unlikely(!inode)) {
+			deactivate_super(sb);
 			rcu_read_lock();
+			continue;
 		}
+
+		d_prune_aliases(inode);
+		iput(inode);
+		deactivate_super(sb);
+
+		rcu_read_lock();
 	}
 	rcu_read_unlock();
-	iput(prev);
 }
 
 /* called under sysctl_lock, will reacquire if has to wait */
@@ -461,7 +478,7 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
 	}
 	ei->sysctl = head;
 	ei->sysctl_entry = table;
-	list_add_rcu(&ei->sysctl_inodes, &head->inodes);
+	hlist_add_head_rcu(&ei->sysctl_inodes, &head->inodes);
 	head->count++;
 	spin_unlock(&sysctl_lock);
 
@@ -489,7 +506,7 @@ out:
 void proc_sys_evict_inode(struct inode *inode, struct ctl_table_header *head)
 {
 	spin_lock(&sysctl_lock);
-	list_del_rcu(&PROC_I(inode)->sysctl_inodes);
+	hlist_del_init_rcu(&PROC_I(inode)->sysctl_inodes);
 	if (!--head->count)
 		kfree_rcu(head, rcu);
 	spin_unlock(&sysctl_lock);
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 80d07816def0..1c04a26bfd2f 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -143,7 +143,7 @@ struct ctl_table_header
 	struct ctl_table_set *set;
 	struct ctl_dir *parent;
 	struct ctl_node *node;
-	struct list_head inodes; /* head for proc_inode->sysctl_inodes */
+	struct hlist_head inodes; /* head for proc_inode->sysctl_inodes */
 };
 
 struct ctl_dir {
-- 
cgit v1.2.3


From e8158b486d5f3f55cf372c5a32b42f263bf7f123 Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Tue, 11 Jul 2017 18:20:20 +0300
Subject: device property: Introduce fwnode_call_bool_op() for ops that return
 bool

fwnode_call_int_op() isn't suitable for calling ops that return bool
since it effectively causes the result returned to the user to be
true when an op hasn't been defined or the fwnode is NULL.

Address this by introducing fwnode_call_bool_op() for calling ops
that return bool.

Fixes: 3708184afc77 "device property: Move FW type specific functionality to FW specific files"
Fixes: 2294b3af05e9 "device property: Introduce fwnode_device_is_available()"
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/property.c | 6 +++---
 include/linux/fwnode.h  | 4 ++++
 2 files changed, 7 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/property.c b/drivers/base/property.c
index 692007e5a94b..edf02c1b5845 100644
--- a/drivers/base/property.c
+++ b/drivers/base/property.c
@@ -253,10 +253,10 @@ bool fwnode_property_present(struct fwnode_handle *fwnode, const char *propname)
 {
 	bool ret;
 
-	ret = fwnode_call_int_op(fwnode, property_present, propname);
+	ret = fwnode_call_bool_op(fwnode, property_present, propname);
 	if (ret == false && !IS_ERR_OR_NULL(fwnode) &&
 	    !IS_ERR_OR_NULL(fwnode->secondary))
-		ret = fwnode_call_int_op(fwnode->secondary, property_present,
+		ret = fwnode_call_bool_op(fwnode->secondary, property_present,
 					 propname);
 	return ret;
 }
@@ -1027,7 +1027,7 @@ EXPORT_SYMBOL_GPL(fwnode_handle_put);
  */
 bool fwnode_device_is_available(struct fwnode_handle *fwnode)
 {
-	return fwnode_call_int_op(fwnode, device_is_available);
+	return fwnode_call_bool_op(fwnode, device_is_available);
 }
 EXPORT_SYMBOL_GPL(fwnode_device_is_available);
 
diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h
index 9ab375419189..50893a1646cf 100644
--- a/include/linux/fwnode.h
+++ b/include/linux/fwnode.h
@@ -99,6 +99,10 @@ struct fwnode_operations {
 	(fwnode ? (fwnode_has_op(fwnode, op) ?				\
 		   (fwnode)->ops->op(fwnode, ## __VA_ARGS__) : -ENXIO) : \
 	 -EINVAL)
+#define fwnode_call_bool_op(fwnode, op, ...)				\
+	(fwnode ? (fwnode_has_op(fwnode, op) ?				\
+		   (fwnode)->ops->op(fwnode, ## __VA_ARGS__) : false) : \
+	 false)
 #define fwnode_call_ptr_op(fwnode, op, ...)		\
 	(fwnode_has_op(fwnode, op) ?			\
 	 (fwnode)->ops->op(fwnode, ## __VA_ARGS__) : NULL)
-- 
cgit v1.2.3


From 8c6ae4980e70395cbdfdf605c29673c5a6a89d9a Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 30 Jun 2017 12:03:54 -0400
Subject: sunrpc: Allocate up to RPCSVC_MAXPAGES per svc_rqst

svcrdma needs 259 pages allocated to receive 1MB NFSv4.0 WRITE requests:

 - 1 page for the transport header and head iovec
 - 256 pages for the data payload
 - 1 page for the trailing GETATTR request (since NFSD XDR decoding
   does not look for a tail iovec, the GETATTR is stuck at the end
   of the rqstp->rq_arg.pages list)
 - 1 page for building the reply xdr_buf

But RPCSVC_MAXPAGES is already 259 (on x86_64). The problem is that
svc_alloc_arg never allocates that many pages. To address this:

1. The final element of rq_pages always points to NULL. To
   accommodate up to 259 pages in rq_pages, add an extra element
   to rq_pages for the array termination sentinel.

2. Adjust the calculation of "pages" to match how RPCSVC_MAXPAGES
   is calculated, so it can go up to 259. Bruce noted that the
   calculation assumes sv_max_mesg is a multiple of PAGE_SIZE,
   which might not always be true. I didn't change this assumption.

3. Change the loop boundaries to allow 259 pages to be allocated.

Additional clean-up: WARN_ON_ONCE adds an extra conditional branch,
which is basically never taken. And there's no need to dump the
stack here because svc_alloc_arg has only one caller.

Keeping that NULL "array termination sentinel"; there doesn't appear to
be any code that depends on it, only code in nfsd_splice_actor() which
needs the 259th element to be initialized to *something*.  So it's
possible we could just keep the array at 259 elements and drop that
final NULL, but we're being conservative for now.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc.h |  2 +-
 net/sunrpc/svc_xprt.c      | 10 ++++++----
 2 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index eec04982a7ea..a3f8af9bd543 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -246,7 +246,7 @@ struct svc_rqst {
 	size_t			rq_xprt_hlen;	/* xprt header len */
 	struct xdr_buf		rq_arg;
 	struct xdr_buf		rq_res;
-	struct page *		rq_pages[RPCSVC_MAXPAGES];
+	struct page		*rq_pages[RPCSVC_MAXPAGES + 1];
 	struct page *		*rq_respages;	/* points into rq_pages */
 	struct page *		*rq_next_page; /* next reply page to use */
 	struct page *		*rq_page_end;  /* one past the last page */
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 7bfe1fb42add..d16a8b423c20 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -659,11 +659,13 @@ static int svc_alloc_arg(struct svc_rqst *rqstp)
 	int i;
 
 	/* now allocate needed pages.  If we get a failure, sleep briefly */
-	pages = (serv->sv_max_mesg + PAGE_SIZE) / PAGE_SIZE;
-	WARN_ON_ONCE(pages >= RPCSVC_MAXPAGES);
-	if (pages >= RPCSVC_MAXPAGES)
+	pages = (serv->sv_max_mesg + 2 * PAGE_SIZE) >> PAGE_SHIFT;
+	if (pages > RPCSVC_MAXPAGES) {
+		pr_warn_once("svc: warning: pages=%u > RPCSVC_MAXPAGES=%lu\n",
+			     pages, RPCSVC_MAXPAGES);
 		/* use as many pages as possible */
-		pages = RPCSVC_MAXPAGES - 1;
+		pages = RPCSVC_MAXPAGES;
+	}
 	for (i = 0; i < pages ; i++)
 		while (rqstp->rq_pages[i] == NULL) {
 			struct page *p = alloc_page(GFP_KERNEL);
-- 
cgit v1.2.3


From 026d958b38c628a1b4ced534808945365e2747a5 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 23 Jun 2017 17:18:24 -0400
Subject: svcrdma: Add recvfrom helpers to svc_rdma_rw.c

svc_rdma_rw.c already contains helpers for the sendto path.
Introduce helpers for the recvfrom path.

The plan is to replace the local NFSD bespoke code that constructs
and posts RDMA Read Work Requests with calls to the rdma_rw API.
This shares code with other RDMA-enabled ULPs that manages the gory
details of buffer registration and posting Work Requests.

This new code also puts all RDMA_NOMSG-specific logic in one place.

Lastly, the use of rqstp->rq_arg.pages is deprecated in favor of
using rqstp->rq_pages directly, for clarity.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_rdma.h   |   3 +
 net/sunrpc/xprtrdma/svc_rdma_rw.c | 425 +++++++++++++++++++++++++++++++++++++-
 2 files changed, 427 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 3ca991657889..cf5d5412298b 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -196,6 +196,9 @@ extern int rdma_read_chunk_frmr(struct svcxprt_rdma *, struct svc_rqst *,
 
 /* svc_rdma_rw.c */
 extern void svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma);
+extern int svc_rdma_recv_read_chunk(struct svcxprt_rdma *rdma,
+				    struct svc_rqst *rqstp,
+				    struct svc_rdma_op_ctxt *head, __be32 *p);
 extern int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma,
 				     __be32 *wr_ch, struct xdr_buf *xdr);
 extern int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma,
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index 3b35364d9a6b..77d38a4b8d99 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -12,6 +12,9 @@
 
 #define RPCDBG_FACILITY	RPCDBG_SVCXPRT
 
+static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc);
+static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc);
+
 /* Each R/W context contains state for one chain of RDMA Read or
  * Write Work Requests.
  *
@@ -177,6 +180,7 @@ svc_rdma_write_info_alloc(struct svcxprt_rdma *rdma, __be32 *chunk)
 	info->wi_nsegs = be32_to_cpup(++chunk);
 	info->wi_segs = ++chunk;
 	svc_rdma_cc_init(rdma, &info->wi_cc, DMA_TO_DEVICE);
+	info->wi_cc.cc_cqe.done = svc_rdma_write_done;
 	return info;
 }
 
@@ -216,6 +220,76 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
 	svc_rdma_write_info_free(info);
 }
 
+/* State for pulling a Read chunk.
+ */
+struct svc_rdma_read_info {
+	struct svc_rdma_op_ctxt		*ri_readctxt;
+	unsigned int			ri_position;
+	unsigned int			ri_pageno;
+	unsigned int			ri_pageoff;
+	unsigned int			ri_chunklen;
+
+	struct svc_rdma_chunk_ctxt	ri_cc;
+};
+
+static struct svc_rdma_read_info *
+svc_rdma_read_info_alloc(struct svcxprt_rdma *rdma)
+{
+	struct svc_rdma_read_info *info;
+
+	info = kmalloc(sizeof(*info), GFP_KERNEL);
+	if (!info)
+		return info;
+
+	svc_rdma_cc_init(rdma, &info->ri_cc, DMA_FROM_DEVICE);
+	info->ri_cc.cc_cqe.done = svc_rdma_wc_read_done;
+	return info;
+}
+
+static void svc_rdma_read_info_free(struct svc_rdma_read_info *info)
+{
+	svc_rdma_cc_release(&info->ri_cc);
+	kfree(info);
+}
+
+/**
+ * svc_rdma_wc_read_done - Handle completion of an RDMA Read ctx
+ * @cq: controlling Completion Queue
+ * @wc: Work Completion
+ *
+ */
+static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+	struct ib_cqe *cqe = wc->wr_cqe;
+	struct svc_rdma_chunk_ctxt *cc =
+			container_of(cqe, struct svc_rdma_chunk_ctxt, cc_cqe);
+	struct svcxprt_rdma *rdma = cc->cc_rdma;
+	struct svc_rdma_read_info *info =
+			container_of(cc, struct svc_rdma_read_info, ri_cc);
+
+	atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail);
+	wake_up(&rdma->sc_send_wait);
+
+	if (unlikely(wc->status != IB_WC_SUCCESS)) {
+		set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
+		if (wc->status != IB_WC_WR_FLUSH_ERR)
+			pr_err("svcrdma: read ctx: %s (%u/0x%x)\n",
+			       ib_wc_status_msg(wc->status),
+			       wc->status, wc->vendor_err);
+		svc_rdma_put_context(info->ri_readctxt, 1);
+	} else {
+		spin_lock(&rdma->sc_rq_dto_lock);
+		list_add_tail(&info->ri_readctxt->list,
+			      &rdma->sc_read_complete_q);
+		spin_unlock(&rdma->sc_rq_dto_lock);
+
+		set_bit(XPT_DATA, &rdma->sc_xprt.xpt_flags);
+		svc_xprt_enqueue(&rdma->sc_xprt);
+	}
+
+	svc_rdma_read_info_free(info);
+}
+
 /* This function sleeps when the transport's Send Queue is congested.
  *
  * Assumptions:
@@ -335,7 +409,6 @@ svc_rdma_build_writes(struct svc_rdma_write_info *info,
 	__be32 *seg;
 	int ret;
 
-	cc->cc_cqe.done = svc_rdma_write_done;
 	seg = info->wi_segs + info->wi_seg_no * rpcrdma_segment_maxsz;
 	do {
 		unsigned int write_len;
@@ -515,3 +588,353 @@ out_err:
 	svc_rdma_write_info_free(info);
 	return ret;
 }
+
+static int svc_rdma_build_read_segment(struct svc_rdma_read_info *info,
+				       struct svc_rqst *rqstp,
+				       u32 rkey, u32 len, u64 offset)
+{
+	struct svc_rdma_op_ctxt *head = info->ri_readctxt;
+	struct svc_rdma_chunk_ctxt *cc = &info->ri_cc;
+	struct svc_rdma_rw_ctxt *ctxt;
+	unsigned int sge_no, seg_len;
+	struct scatterlist *sg;
+	int ret;
+
+	sge_no = PAGE_ALIGN(info->ri_pageoff + len) >> PAGE_SHIFT;
+	ctxt = svc_rdma_get_rw_ctxt(cc->cc_rdma, sge_no);
+	if (!ctxt)
+		goto out_noctx;
+	ctxt->rw_nents = sge_no;
+
+	dprintk("svcrdma: reading segment %u@0x%016llx:0x%08x (%u sges)\n",
+		len, offset, rkey, sge_no);
+
+	sg = ctxt->rw_sg_table.sgl;
+	for (sge_no = 0; sge_no < ctxt->rw_nents; sge_no++) {
+		seg_len = min_t(unsigned int, len,
+				PAGE_SIZE - info->ri_pageoff);
+
+		head->arg.pages[info->ri_pageno] =
+			rqstp->rq_pages[info->ri_pageno];
+		if (!info->ri_pageoff)
+			head->count++;
+
+		sg_set_page(sg, rqstp->rq_pages[info->ri_pageno],
+			    seg_len, info->ri_pageoff);
+		sg = sg_next(sg);
+
+		info->ri_pageoff += seg_len;
+		if (info->ri_pageoff == PAGE_SIZE) {
+			info->ri_pageno++;
+			info->ri_pageoff = 0;
+		}
+		len -= seg_len;
+
+		/* Safety check */
+		if (len &&
+		    &rqstp->rq_pages[info->ri_pageno + 1] > rqstp->rq_page_end)
+			goto out_overrun;
+	}
+
+	ret = rdma_rw_ctx_init(&ctxt->rw_ctx, cc->cc_rdma->sc_qp,
+			       cc->cc_rdma->sc_port_num,
+			       ctxt->rw_sg_table.sgl, ctxt->rw_nents,
+			       0, offset, rkey, DMA_FROM_DEVICE);
+	if (ret < 0)
+		goto out_initerr;
+
+	list_add(&ctxt->rw_list, &cc->cc_rwctxts);
+	cc->cc_sqecount += ret;
+	return 0;
+
+out_noctx:
+	dprintk("svcrdma: no R/W ctxs available\n");
+	return -ENOMEM;
+
+out_overrun:
+	dprintk("svcrdma: request overruns rq_pages\n");
+	return -EINVAL;
+
+out_initerr:
+	svc_rdma_put_rw_ctxt(cc->cc_rdma, ctxt);
+	pr_err("svcrdma: failed to map pagelist (%d)\n", ret);
+	return -EIO;
+}
+
+static int svc_rdma_build_read_chunk(struct svc_rqst *rqstp,
+				     struct svc_rdma_read_info *info,
+				     __be32 *p)
+{
+	int ret;
+
+	info->ri_chunklen = 0;
+	while (*p++ != xdr_zero) {
+		u32 rs_handle, rs_length;
+		u64 rs_offset;
+
+		if (be32_to_cpup(p++) != info->ri_position)
+			break;
+		rs_handle = be32_to_cpup(p++);
+		rs_length = be32_to_cpup(p++);
+		p = xdr_decode_hyper(p, &rs_offset);
+
+		ret = svc_rdma_build_read_segment(info, rqstp,
+						  rs_handle, rs_length,
+						  rs_offset);
+		if (ret < 0)
+			break;
+
+		info->ri_chunklen += rs_length;
+	}
+
+	return ret;
+}
+
+/* If there is inline content following the Read chunk, append it to
+ * the page list immediately following the data payload. This has to
+ * be done after the reader function has determined how many pages
+ * were consumed for RDMA Read.
+ *
+ * On entry, ri_pageno and ri_pageoff point directly to the end of the
+ * page list. On exit, both have been updated to the new "next byte".
+ *
+ * Assumptions:
+ *	- Inline content fits entirely in rq_pages[0]
+ *	- Trailing content is only a handful of bytes
+ */
+static int svc_rdma_copy_tail(struct svc_rqst *rqstp,
+			      struct svc_rdma_read_info *info)
+{
+	struct svc_rdma_op_ctxt *head = info->ri_readctxt;
+	unsigned int tail_length, remaining;
+	u8 *srcp, *destp;
+
+	/* Assert that all inline content fits in page 0. This is an
+	 * implementation limit, not a protocol limit.
+	 */
+	if (head->arg.head[0].iov_len > PAGE_SIZE) {
+		pr_warn_once("svcrdma: too much trailing inline content\n");
+		return -EINVAL;
+	}
+
+	srcp = head->arg.head[0].iov_base;
+	srcp += info->ri_position;
+	tail_length = head->arg.head[0].iov_len - info->ri_position;
+	remaining = tail_length;
+
+	/* If there is room on the last page in the page list, try to
+	 * fit the trailing content there.
+	 */
+	if (info->ri_pageoff > 0) {
+		unsigned int len;
+
+		len = min_t(unsigned int, remaining,
+			    PAGE_SIZE - info->ri_pageoff);
+		destp = page_address(rqstp->rq_pages[info->ri_pageno]);
+		destp += info->ri_pageoff;
+
+		memcpy(destp, srcp, len);
+		srcp += len;
+		destp += len;
+		info->ri_pageoff += len;
+		remaining -= len;
+
+		if (info->ri_pageoff == PAGE_SIZE) {
+			info->ri_pageno++;
+			info->ri_pageoff = 0;
+		}
+	}
+
+	/* Otherwise, a fresh page is needed. */
+	if (remaining) {
+		head->arg.pages[info->ri_pageno] =
+				rqstp->rq_pages[info->ri_pageno];
+		head->count++;
+
+		destp = page_address(rqstp->rq_pages[info->ri_pageno]);
+		memcpy(destp, srcp, remaining);
+		info->ri_pageoff += remaining;
+	}
+
+	head->arg.page_len += tail_length;
+	head->arg.len += tail_length;
+	head->arg.buflen += tail_length;
+	return 0;
+}
+
+/* Construct RDMA Reads to pull over a normal Read chunk. The chunk
+ * data lands in the page list of head->arg.pages.
+ *
+ * Currently NFSD does not look at the head->arg.tail[0] iovec.
+ * Therefore, XDR round-up of the Read chunk and trailing
+ * inline content must both be added at the end of the pagelist.
+ */
+static int svc_rdma_build_normal_read_chunk(struct svc_rqst *rqstp,
+					    struct svc_rdma_read_info *info,
+					    __be32 *p)
+{
+	struct svc_rdma_op_ctxt *head = info->ri_readctxt;
+	int ret;
+
+	dprintk("svcrdma: Reading Read chunk at position %u\n",
+		info->ri_position);
+
+	info->ri_pageno = head->hdr_count;
+	info->ri_pageoff = 0;
+
+	ret = svc_rdma_build_read_chunk(rqstp, info, p);
+	if (ret < 0)
+		goto out;
+
+	/* Read chunk may need XDR round-up (see RFC 5666, s. 3.7).
+	 */
+	if (info->ri_chunklen & 3) {
+		u32 padlen = 4 - (info->ri_chunklen & 3);
+
+		info->ri_chunklen += padlen;
+
+		/* NB: data payload always starts on XDR alignment,
+		 * thus the pad can never contain a page boundary.
+		 */
+		info->ri_pageoff += padlen;
+		if (info->ri_pageoff == PAGE_SIZE) {
+			info->ri_pageno++;
+			info->ri_pageoff = 0;
+		}
+	}
+
+	head->arg.page_len = info->ri_chunklen;
+	head->arg.len += info->ri_chunklen;
+	head->arg.buflen += info->ri_chunklen;
+
+	if (info->ri_position < head->arg.head[0].iov_len) {
+		ret = svc_rdma_copy_tail(rqstp, info);
+		if (ret < 0)
+			goto out;
+	}
+	head->arg.head[0].iov_len = info->ri_position;
+
+out:
+	return ret;
+}
+
+/* Construct RDMA Reads to pull over a Position Zero Read chunk.
+ * The start of the data lands in the first page just after
+ * the Transport header, and the rest lands in the page list of
+ * head->arg.pages.
+ *
+ * Assumptions:
+ *	- A PZRC has an XDR-aligned length (no implicit round-up).
+ *	- There can be no trailing inline content (IOW, we assume
+ *	  a PZRC is never sent in an RDMA_MSG message, though it's
+ *	  allowed by spec).
+ */
+static int svc_rdma_build_pz_read_chunk(struct svc_rqst *rqstp,
+					struct svc_rdma_read_info *info,
+					__be32 *p)
+{
+	struct svc_rdma_op_ctxt *head = info->ri_readctxt;
+	int ret;
+
+	dprintk("svcrdma: Reading Position Zero Read chunk\n");
+
+	info->ri_pageno = head->hdr_count - 1;
+	info->ri_pageoff = offset_in_page(head->byte_len);
+
+	ret = svc_rdma_build_read_chunk(rqstp, info, p);
+	if (ret < 0)
+		goto out;
+
+	head->arg.len += info->ri_chunklen;
+	head->arg.buflen += info->ri_chunklen;
+
+	if (head->arg.len <= head->sge[0].length) {
+		/* Transport header and RPC message fit entirely
+		 * in page where head iovec resides.
+		 */
+		head->arg.head[0].iov_len = info->ri_chunklen;
+	} else {
+		/* Transport header and part of RPC message reside
+		 * in the head iovec's page.
+		 */
+		head->arg.head[0].iov_len =
+				head->sge[0].length - head->byte_len;
+		head->arg.page_len =
+				info->ri_chunklen - head->arg.head[0].iov_len;
+	}
+
+out:
+	return ret;
+}
+
+/**
+ * svc_rdma_recv_read_chunk - Pull a Read chunk from the client
+ * @rdma: controlling RDMA transport
+ * @rqstp: set of pages to use as Read sink buffers
+ * @head: pages under I/O collect here
+ * @p: pointer to start of Read chunk
+ *
+ * Returns:
+ *	%0 if all needed RDMA Reads were posted successfully,
+ *	%-EINVAL if client provided too many segments,
+ *	%-ENOMEM if rdma_rw context pool was exhausted,
+ *	%-ENOTCONN if posting failed (connection is lost),
+ *	%-EIO if rdma_rw initialization failed (DMA mapping, etc).
+ *
+ * Assumptions:
+ * - All Read segments in @p have the same Position value.
+ */
+int svc_rdma_recv_read_chunk(struct svcxprt_rdma *rdma, struct svc_rqst *rqstp,
+			     struct svc_rdma_op_ctxt *head, __be32 *p)
+{
+	struct svc_rdma_read_info *info;
+	struct page **page;
+	int ret;
+
+	/* The request (with page list) is constructed in
+	 * head->arg. Pages involved with RDMA Read I/O are
+	 * transferred there.
+	 */
+	head->hdr_count = head->count;
+	head->arg.head[0] = rqstp->rq_arg.head[0];
+	head->arg.tail[0] = rqstp->rq_arg.tail[0];
+	head->arg.pages = head->pages;
+	head->arg.page_base = 0;
+	head->arg.page_len = 0;
+	head->arg.len = rqstp->rq_arg.len;
+	head->arg.buflen = rqstp->rq_arg.buflen;
+
+	info = svc_rdma_read_info_alloc(rdma);
+	if (!info)
+		return -ENOMEM;
+	info->ri_readctxt = head;
+
+	info->ri_position = be32_to_cpup(p + 1);
+	if (info->ri_position)
+		ret = svc_rdma_build_normal_read_chunk(rqstp, info, p);
+	else
+		ret = svc_rdma_build_pz_read_chunk(rqstp, info, p);
+
+	/* Mark the start of the pages that can be used for the reply */
+	if (info->ri_pageoff > 0)
+		info->ri_pageno++;
+	rqstp->rq_respages = &rqstp->rq_pages[info->ri_pageno];
+	rqstp->rq_next_page = rqstp->rq_respages + 1;
+
+	if (ret < 0)
+		goto out;
+
+	ret = svc_rdma_post_chunk_ctxt(&info->ri_cc);
+
+out:
+	/* Read sink pages have been moved from rqstp->rq_pages to
+	 * head->arg.pages. Force svc_recv to refill those slots
+	 * in rq_pages.
+	 */
+	for (page = rqstp->rq_pages; page < rqstp->rq_respages; page++)
+		*page = NULL;
+
+	if (ret < 0)
+		svc_rdma_read_info_free(info);
+	return ret;
+}
-- 
cgit v1.2.3


From cafc739892f34b9090413179ca259409fc43bfae Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 23 Jun 2017 17:18:33 -0400
Subject: svcrdma: Use generic RDMA R/W API in RPC Call path

The current svcrdma recvfrom code path has a lot of detail about
registration mode and the type of port (iWARP, IB, etc).

Instead, use the RDMA core's generic R/W API. This shares code with
other RDMA-enabled ULPs that manages the gory details of buffer
registration and the posting of RDMA Read Work Requests.

Since the Read list marshaling code is being replaced, I took the
opportunity to replace C structure-based XDR encoding code with more
portable code that uses pointer arithmetic.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_rdma.h          |  14 -
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c  | 547 ++++++-------------------------
 net/sunrpc/xprtrdma/svc_rdma_transport.c |  13 -
 3 files changed, 106 insertions(+), 468 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index cf5d5412298b..b1ba19ba1071 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -82,10 +82,7 @@ struct svc_rdma_op_ctxt {
 	int hdr_count;
 	struct xdr_buf arg;
 	struct ib_cqe cqe;
-	struct ib_cqe reg_cqe;
-	struct ib_cqe inv_cqe;
 	u32 byte_len;
-	u32 position;
 	struct svcxprt_rdma *xprt;
 	unsigned long flags;
 	enum dma_data_direction direction;
@@ -116,7 +113,6 @@ struct svcxprt_rdma {
 	struct list_head     sc_accept_q;	/* Conn. waiting accept */
 	int		     sc_ord;		/* RDMA read limit */
 	int                  sc_max_sge;
-	int                  sc_max_sge_rd;	/* max sge for read target */
 	bool		     sc_snd_w_inv;	/* OK to use Send With Invalidate */
 
 	atomic_t             sc_sq_avail;	/* SQEs ready to be consumed */
@@ -141,10 +137,6 @@ struct svcxprt_rdma {
 	struct ib_qp         *sc_qp;
 	struct ib_cq         *sc_rq_cq;
 	struct ib_cq         *sc_sq_cq;
-	int		     (*sc_reader)(struct svcxprt_rdma *,
-					  struct svc_rqst *,
-					  struct svc_rdma_op_ctxt *,
-					  int *, u32 *, u32, u32, u64, bool);
 	u32		     sc_dev_caps;	/* distilled device caps */
 	unsigned int	     sc_frmr_pg_list_len;
 	struct list_head     sc_frmr_q;
@@ -187,12 +179,6 @@ extern int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt,
 
 /* svc_rdma_recvfrom.c */
 extern int svc_rdma_recvfrom(struct svc_rqst *);
-extern int rdma_read_chunk_lcl(struct svcxprt_rdma *, struct svc_rqst *,
-			       struct svc_rdma_op_ctxt *, int *, u32 *,
-			       u32, u32, u64, bool);
-extern int rdma_read_chunk_frmr(struct svcxprt_rdma *, struct svc_rqst *,
-				struct svc_rdma_op_ctxt *, int *, u32 *,
-				u32, u32, u64, bool);
 
 /* svc_rdma_rw.c */
 extern void svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 1452bd02d857..5fbcb73dd68d 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -41,13 +41,66 @@
  * Author: Tom Tucker <tom@opengridcomputing.com>
  */
 
-#include <linux/sunrpc/xdr.h>
-#include <linux/sunrpc/debug.h>
-#include <linux/sunrpc/rpc_rdma.h>
-#include <linux/spinlock.h>
+/* Operation
+ *
+ * The main entry point is svc_rdma_recvfrom. This is called from
+ * svc_recv when the transport indicates there is incoming data to
+ * be read. "Data Ready" is signaled when an RDMA Receive completes,
+ * or when a set of RDMA Reads complete.
+ *
+ * An svc_rqst is passed in. This structure contains an array of
+ * free pages (rq_pages) that will contain the incoming RPC message.
+ *
+ * Short messages are moved directly into svc_rqst::rq_arg, and
+ * the RPC Call is ready to be processed by the Upper Layer.
+ * svc_rdma_recvfrom returns the length of the RPC Call message,
+ * completing the reception of the RPC Call.
+ *
+ * However, when an incoming message has Read chunks,
+ * svc_rdma_recvfrom must post RDMA Reads to pull the RPC Call's
+ * data payload from the client. svc_rdma_recvfrom sets up the
+ * RDMA Reads using pages in svc_rqst::rq_pages, which are
+ * transferred to an svc_rdma_op_ctxt for the duration of the
+ * I/O. svc_rdma_recvfrom then returns zero, since the RPC message
+ * is still not yet ready.
+ *
+ * When the Read chunk payloads have become available on the
+ * server, "Data Ready" is raised again, and svc_recv calls
+ * svc_rdma_recvfrom again. This second call may use a different
+ * svc_rqst than the first one, thus any information that needs
+ * to be preserved across these two calls is kept in an
+ * svc_rdma_op_ctxt.
+ *
+ * The second call to svc_rdma_recvfrom performs final assembly
+ * of the RPC Call message, using the RDMA Read sink pages kept in
+ * the svc_rdma_op_ctxt. The xdr_buf is copied from the
+ * svc_rdma_op_ctxt to the second svc_rqst. The second call returns
+ * the length of the completed RPC Call message.
+ *
+ * Page Management
+ *
+ * Pages under I/O must be transferred from the first svc_rqst to an
+ * svc_rdma_op_ctxt before the first svc_rdma_recvfrom call returns.
+ *
+ * The first svc_rqst supplies pages for RDMA Reads. These are moved
+ * from rqstp::rq_pages into ctxt::pages. The consumed elements of
+ * the rq_pages array are set to NULL and refilled with the first
+ * svc_rdma_recvfrom call returns.
+ *
+ * During the second svc_rdma_recvfrom call, RDMA Read sink pages
+ * are transferred from the svc_rdma_op_ctxt to the second svc_rqst
+ * (see rdma_read_complete() below).
+ */
+
 #include <asm/unaligned.h>
 #include <rdma/ib_verbs.h>
 #include <rdma/rdma_cm.h>
+
+#include <linux/spinlock.h>
+
+#include <linux/sunrpc/xdr.h>
+#include <linux/sunrpc/debug.h>
+#include <linux/sunrpc/rpc_rdma.h>
 #include <linux/sunrpc/svc_rdma.h>
 
 #define RPCDBG_FACILITY	RPCDBG_SVCXPRT
@@ -61,7 +114,6 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
 			       struct svc_rdma_op_ctxt *ctxt,
 			       u32 byte_count)
 {
-	struct rpcrdma_msg *rmsgp;
 	struct page *page;
 	u32 bc;
 	int sge_no;
@@ -85,13 +137,6 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
 	rqstp->rq_arg.page_len = bc;
 	rqstp->rq_arg.page_base = 0;
 
-	/* RDMA_NOMSG: RDMA READ data should land just after RDMA RECV data */
-	rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
-	if (rmsgp->rm_type == rdma_nomsg)
-		rqstp->rq_arg.pages = &rqstp->rq_pages[0];
-	else
-		rqstp->rq_arg.pages = &rqstp->rq_pages[1];
-
 	sge_no = 1;
 	while (bc && sge_no < ctxt->count) {
 		page = ctxt->pages[sge_no];
@@ -320,395 +365,6 @@ out_inval:
 	return -EINVAL;
 }
 
-/* Issue an RDMA_READ using the local lkey to map the data sink */
-int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
-			struct svc_rqst *rqstp,
-			struct svc_rdma_op_ctxt *head,
-			int *page_no,
-			u32 *page_offset,
-			u32 rs_handle,
-			u32 rs_length,
-			u64 rs_offset,
-			bool last)
-{
-	struct ib_rdma_wr read_wr;
-	int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
-	struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt);
-	int ret, read, pno;
-	u32 pg_off = *page_offset;
-	u32 pg_no = *page_no;
-
-	ctxt->direction = DMA_FROM_DEVICE;
-	ctxt->read_hdr = head;
-	pages_needed = min_t(int, pages_needed, xprt->sc_max_sge_rd);
-	read = min_t(int, (pages_needed << PAGE_SHIFT) - *page_offset,
-		     rs_length);
-
-	for (pno = 0; pno < pages_needed; pno++) {
-		int len = min_t(int, rs_length, PAGE_SIZE - pg_off);
-
-		head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no];
-		head->arg.page_len += len;
-
-		head->arg.len += len;
-		if (!pg_off)
-			head->count++;
-		rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1];
-		rqstp->rq_next_page = rqstp->rq_respages + 1;
-		ctxt->sge[pno].addr =
-			ib_dma_map_page(xprt->sc_cm_id->device,
-					head->arg.pages[pg_no], pg_off,
-					PAGE_SIZE - pg_off,
-					DMA_FROM_DEVICE);
-		ret = ib_dma_mapping_error(xprt->sc_cm_id->device,
-					   ctxt->sge[pno].addr);
-		if (ret)
-			goto err;
-		svc_rdma_count_mappings(xprt, ctxt);
-
-		ctxt->sge[pno].lkey = xprt->sc_pd->local_dma_lkey;
-		ctxt->sge[pno].length = len;
-		ctxt->count++;
-
-		/* adjust offset and wrap to next page if needed */
-		pg_off += len;
-		if (pg_off == PAGE_SIZE) {
-			pg_off = 0;
-			pg_no++;
-		}
-		rs_length -= len;
-	}
-
-	if (last && rs_length == 0)
-		set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
-	else
-		clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
-
-	memset(&read_wr, 0, sizeof(read_wr));
-	ctxt->cqe.done = svc_rdma_wc_read;
-	read_wr.wr.wr_cqe = &ctxt->cqe;
-	read_wr.wr.opcode = IB_WR_RDMA_READ;
-	read_wr.wr.send_flags = IB_SEND_SIGNALED;
-	read_wr.rkey = rs_handle;
-	read_wr.remote_addr = rs_offset;
-	read_wr.wr.sg_list = ctxt->sge;
-	read_wr.wr.num_sge = pages_needed;
-
-	ret = svc_rdma_send(xprt, &read_wr.wr);
-	if (ret) {
-		pr_err("svcrdma: Error %d posting RDMA_READ\n", ret);
-		set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
-		goto err;
-	}
-
-	/* return current location in page array */
-	*page_no = pg_no;
-	*page_offset = pg_off;
-	ret = read;
-	atomic_inc(&rdma_stat_read);
-	return ret;
- err:
-	svc_rdma_unmap_dma(ctxt);
-	svc_rdma_put_context(ctxt, 0);
-	return ret;
-}
-
-/* Issue an RDMA_READ using an FRMR to map the data sink */
-int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
-			 struct svc_rqst *rqstp,
-			 struct svc_rdma_op_ctxt *head,
-			 int *page_no,
-			 u32 *page_offset,
-			 u32 rs_handle,
-			 u32 rs_length,
-			 u64 rs_offset,
-			 bool last)
-{
-	struct ib_rdma_wr read_wr;
-	struct ib_send_wr inv_wr;
-	struct ib_reg_wr reg_wr;
-	u8 key;
-	int nents = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
-	struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt);
-	struct svc_rdma_fastreg_mr *frmr = svc_rdma_get_frmr(xprt);
-	int ret, read, pno, dma_nents, n;
-	u32 pg_off = *page_offset;
-	u32 pg_no = *page_no;
-
-	if (IS_ERR(frmr))
-		return -ENOMEM;
-
-	ctxt->direction = DMA_FROM_DEVICE;
-	ctxt->frmr = frmr;
-	nents = min_t(unsigned int, nents, xprt->sc_frmr_pg_list_len);
-	read = min_t(int, (nents << PAGE_SHIFT) - *page_offset, rs_length);
-
-	frmr->direction = DMA_FROM_DEVICE;
-	frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE);
-	frmr->sg_nents = nents;
-
-	for (pno = 0; pno < nents; pno++) {
-		int len = min_t(int, rs_length, PAGE_SIZE - pg_off);
-
-		head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no];
-		head->arg.page_len += len;
-		head->arg.len += len;
-		if (!pg_off)
-			head->count++;
-
-		sg_set_page(&frmr->sg[pno], rqstp->rq_arg.pages[pg_no],
-			    len, pg_off);
-
-		rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1];
-		rqstp->rq_next_page = rqstp->rq_respages + 1;
-
-		/* adjust offset and wrap to next page if needed */
-		pg_off += len;
-		if (pg_off == PAGE_SIZE) {
-			pg_off = 0;
-			pg_no++;
-		}
-		rs_length -= len;
-	}
-
-	if (last && rs_length == 0)
-		set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
-	else
-		clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
-
-	dma_nents = ib_dma_map_sg(xprt->sc_cm_id->device,
-				  frmr->sg, frmr->sg_nents,
-				  frmr->direction);
-	if (!dma_nents) {
-		pr_err("svcrdma: failed to dma map sg %p\n",
-		       frmr->sg);
-		return -ENOMEM;
-	}
-
-	n = ib_map_mr_sg(frmr->mr, frmr->sg, frmr->sg_nents, NULL, PAGE_SIZE);
-	if (unlikely(n != frmr->sg_nents)) {
-		pr_err("svcrdma: failed to map mr %p (%d/%d elements)\n",
-		       frmr->mr, n, frmr->sg_nents);
-		return n < 0 ? n : -EINVAL;
-	}
-
-	/* Bump the key */
-	key = (u8)(frmr->mr->lkey & 0x000000FF);
-	ib_update_fast_reg_key(frmr->mr, ++key);
-
-	ctxt->sge[0].addr = frmr->mr->iova;
-	ctxt->sge[0].lkey = frmr->mr->lkey;
-	ctxt->sge[0].length = frmr->mr->length;
-	ctxt->count = 1;
-	ctxt->read_hdr = head;
-
-	/* Prepare REG WR */
-	ctxt->reg_cqe.done = svc_rdma_wc_reg;
-	reg_wr.wr.wr_cqe = &ctxt->reg_cqe;
-	reg_wr.wr.opcode = IB_WR_REG_MR;
-	reg_wr.wr.send_flags = IB_SEND_SIGNALED;
-	reg_wr.wr.num_sge = 0;
-	reg_wr.mr = frmr->mr;
-	reg_wr.key = frmr->mr->lkey;
-	reg_wr.access = frmr->access_flags;
-	reg_wr.wr.next = &read_wr.wr;
-
-	/* Prepare RDMA_READ */
-	memset(&read_wr, 0, sizeof(read_wr));
-	ctxt->cqe.done = svc_rdma_wc_read;
-	read_wr.wr.wr_cqe = &ctxt->cqe;
-	read_wr.wr.send_flags = IB_SEND_SIGNALED;
-	read_wr.rkey = rs_handle;
-	read_wr.remote_addr = rs_offset;
-	read_wr.wr.sg_list = ctxt->sge;
-	read_wr.wr.num_sge = 1;
-	if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_READ_W_INV) {
-		read_wr.wr.opcode = IB_WR_RDMA_READ_WITH_INV;
-		read_wr.wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey;
-	} else {
-		read_wr.wr.opcode = IB_WR_RDMA_READ;
-		read_wr.wr.next = &inv_wr;
-		/* Prepare invalidate */
-		memset(&inv_wr, 0, sizeof(inv_wr));
-		ctxt->inv_cqe.done = svc_rdma_wc_inv;
-		inv_wr.wr_cqe = &ctxt->inv_cqe;
-		inv_wr.opcode = IB_WR_LOCAL_INV;
-		inv_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_FENCE;
-		inv_wr.ex.invalidate_rkey = frmr->mr->lkey;
-	}
-
-	/* Post the chain */
-	ret = svc_rdma_send(xprt, &reg_wr.wr);
-	if (ret) {
-		pr_err("svcrdma: Error %d posting RDMA_READ\n", ret);
-		set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
-		goto err;
-	}
-
-	/* return current location in page array */
-	*page_no = pg_no;
-	*page_offset = pg_off;
-	ret = read;
-	atomic_inc(&rdma_stat_read);
-	return ret;
- err:
-	svc_rdma_put_context(ctxt, 0);
-	svc_rdma_put_frmr(xprt, frmr);
-	return ret;
-}
-
-/* If there was additional inline content, append it to the end of arg.pages.
- * Tail copy has to be done after the reader function has determined how many
- * pages are needed for RDMA READ.
- */
-static int
-rdma_copy_tail(struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *head,
-	       u32 position, u32 byte_count, u32 page_offset, int page_no)
-{
-	char *srcp, *destp;
-
-	srcp = head->arg.head[0].iov_base + position;
-	byte_count = head->arg.head[0].iov_len - position;
-	if (byte_count > PAGE_SIZE) {
-		dprintk("svcrdma: large tail unsupported\n");
-		return 0;
-	}
-
-	/* Fit as much of the tail on the current page as possible */
-	if (page_offset != PAGE_SIZE) {
-		destp = page_address(rqstp->rq_arg.pages[page_no]);
-		destp += page_offset;
-		while (byte_count--) {
-			*destp++ = *srcp++;
-			page_offset++;
-			if (page_offset == PAGE_SIZE && byte_count)
-				goto more;
-		}
-		goto done;
-	}
-
-more:
-	/* Fit the rest on the next page */
-	page_no++;
-	destp = page_address(rqstp->rq_arg.pages[page_no]);
-	while (byte_count--)
-		*destp++ = *srcp++;
-
-	rqstp->rq_respages = &rqstp->rq_arg.pages[page_no+1];
-	rqstp->rq_next_page = rqstp->rq_respages + 1;
-
-done:
-	byte_count = head->arg.head[0].iov_len - position;
-	head->arg.page_len += byte_count;
-	head->arg.len += byte_count;
-	head->arg.buflen += byte_count;
-	return 1;
-}
-
-/* Returns the address of the first read chunk or <nul> if no read chunk
- * is present
- */
-static struct rpcrdma_read_chunk *
-svc_rdma_get_read_chunk(struct rpcrdma_msg *rmsgp)
-{
-	struct rpcrdma_read_chunk *ch =
-		(struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
-
-	if (ch->rc_discrim == xdr_zero)
-		return NULL;
-	return ch;
-}
-
-static int rdma_read_chunks(struct svcxprt_rdma *xprt,
-			    struct rpcrdma_msg *rmsgp,
-			    struct svc_rqst *rqstp,
-			    struct svc_rdma_op_ctxt *head)
-{
-	int page_no, ret;
-	struct rpcrdma_read_chunk *ch;
-	u32 handle, page_offset, byte_count;
-	u32 position;
-	u64 rs_offset;
-	bool last;
-
-	/* If no read list is present, return 0 */
-	ch = svc_rdma_get_read_chunk(rmsgp);
-	if (!ch)
-		return 0;
-
-	/* The request is completed when the RDMA_READs complete. The
-	 * head context keeps all the pages that comprise the
-	 * request.
-	 */
-	head->arg.head[0] = rqstp->rq_arg.head[0];
-	head->arg.tail[0] = rqstp->rq_arg.tail[0];
-	head->hdr_count = head->count;
-	head->arg.page_base = 0;
-	head->arg.page_len = 0;
-	head->arg.len = rqstp->rq_arg.len;
-	head->arg.buflen = rqstp->rq_arg.buflen;
-
-	/* RDMA_NOMSG: RDMA READ data should land just after RDMA RECV data */
-	position = be32_to_cpu(ch->rc_position);
-	if (position == 0) {
-		head->arg.pages = &head->pages[0];
-		page_offset = head->byte_len;
-	} else {
-		head->arg.pages = &head->pages[head->count];
-		page_offset = 0;
-	}
-
-	ret = 0;
-	page_no = 0;
-	for (; ch->rc_discrim != xdr_zero; ch++) {
-		if (be32_to_cpu(ch->rc_position) != position)
-			goto err;
-
-		handle = be32_to_cpu(ch->rc_target.rs_handle),
-		byte_count = be32_to_cpu(ch->rc_target.rs_length);
-		xdr_decode_hyper((__be32 *)&ch->rc_target.rs_offset,
-				 &rs_offset);
-
-		while (byte_count > 0) {
-			last = (ch + 1)->rc_discrim == xdr_zero;
-			ret = xprt->sc_reader(xprt, rqstp, head,
-					      &page_no, &page_offset,
-					      handle, byte_count,
-					      rs_offset, last);
-			if (ret < 0)
-				goto err;
-			byte_count -= ret;
-			rs_offset += ret;
-			head->arg.buflen += ret;
-		}
-	}
-
-	/* Read list may need XDR round-up (see RFC 5666, s. 3.7) */
-	if (page_offset & 3) {
-		u32 pad = 4 - (page_offset & 3);
-
-		head->arg.tail[0].iov_len += pad;
-		head->arg.len += pad;
-		head->arg.buflen += pad;
-		page_offset += pad;
-	}
-
-	ret = 1;
-	if (position && position < head->arg.head[0].iov_len)
-		ret = rdma_copy_tail(rqstp, head, position,
-				     byte_count, page_offset, page_no);
-	head->arg.head[0].iov_len = position;
-	head->position = position;
-
- err:
-	/* Detach arg pages. svc_recv will replenish them */
-	for (page_no = 0;
-	     &rqstp->rq_pages[page_no] < rqstp->rq_respages; page_no++)
-		rqstp->rq_pages[page_no] = NULL;
-
-	return ret;
-}
-
 static void rdma_read_complete(struct svc_rqst *rqstp,
 			       struct svc_rdma_op_ctxt *head)
 {
@@ -720,24 +376,9 @@ static void rdma_read_complete(struct svc_rqst *rqstp,
 		rqstp->rq_pages[page_no] = head->pages[page_no];
 	}
 
-	/* Adjustments made for RDMA_NOMSG type requests */
-	if (head->position == 0) {
-		if (head->arg.len <= head->sge[0].length) {
-			head->arg.head[0].iov_len = head->arg.len -
-							head->byte_len;
-			head->arg.page_len = 0;
-		} else {
-			head->arg.head[0].iov_len = head->sge[0].length -
-								head->byte_len;
-			head->arg.page_len = head->arg.len -
-						head->sge[0].length;
-		}
-	}
-
 	/* Point rq_arg.pages past header */
 	rqstp->rq_arg.pages = &rqstp->rq_pages[head->hdr_count];
 	rqstp->rq_arg.page_len = head->arg.page_len;
-	rqstp->rq_arg.page_base = head->arg.page_base;
 
 	/* rq_respages starts after the last arg page */
 	rqstp->rq_respages = &rqstp->rq_pages[page_no];
@@ -834,10 +475,35 @@ static bool svc_rdma_is_backchannel_reply(struct svc_xprt *xprt,
 	return true;
 }
 
-/*
- * Set up the rqstp thread context to point to the RQ buffer. If
- * necessary, pull additional data from the client with an RDMA_READ
- * request.
+/**
+ * svc_rdma_recvfrom - Receive an RPC call
+ * @rqstp: request structure into which to receive an RPC Call
+ *
+ * Returns:
+ *	The positive number of bytes in the RPC Call message,
+ *	%0 if there were no Calls ready to return,
+ *	%-EINVAL if the Read chunk data is too large,
+ *	%-ENOMEM if rdma_rw context pool was exhausted,
+ *	%-ENOTCONN if posting failed (connection is lost),
+ *	%-EIO if rdma_rw initialization failed (DMA mapping, etc).
+ *
+ * Called in a loop when XPT_DATA is set. XPT_DATA is cleared only
+ * when there are no remaining ctxt's to process.
+ *
+ * The next ctxt is removed from the "receive" lists.
+ *
+ * - If the ctxt completes a Read, then finish assembling the Call
+ *   message and return the number of bytes in the message.
+ *
+ * - If the ctxt completes a Receive, then construct the Call
+ *   message from the contents of the Receive buffer.
+ *
+ *   - If there are no Read chunks in this message, then finish
+ *     assembling the Call message and return the number of bytes
+ *     in the message.
+ *
+ *   - If there are Read chunks in this message, post Read WRs to
+ *     pull that payload and return 0.
  */
 int svc_rdma_recvfrom(struct svc_rqst *rqstp)
 {
@@ -845,11 +511,9 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
 	struct svcxprt_rdma *rdma_xprt =
 		container_of(xprt, struct svcxprt_rdma, sc_xprt);
 	struct svc_rdma_op_ctxt *ctxt;
-	struct rpcrdma_msg *rmsgp;
+	__be32 *p;
 	int ret;
 
-	dprintk("svcrdma: rqstp=%p\n", rqstp);
-
 	spin_lock(&rdma_xprt->sc_rq_dto_lock);
 	if (!list_empty(&rdma_xprt->sc_read_complete_q)) {
 		ctxt = list_first_entry(&rdma_xprt->sc_read_complete_q,
@@ -870,7 +534,7 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
 	}
 	spin_unlock(&rdma_xprt->sc_rq_dto_lock);
 
-	dprintk("svcrdma: processing ctxt=%p on xprt=%p, rqstp=%p\n",
+	dprintk("svcrdma: recvfrom: ctxt=%p on xprt=%p, rqstp=%p\n",
 		ctxt, rdma_xprt, rqstp);
 	atomic_inc(&rdma_stat_recv);
 
@@ -878,7 +542,7 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
 	rdma_build_arg_xdr(rqstp, ctxt, ctxt->byte_len);
 
 	/* Decode the RDMA header. */
-	rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
+	p = (__be32 *)rqstp->rq_arg.head[0].iov_base;
 	ret = svc_rdma_xdr_decode_req(&rqstp->rq_arg);
 	if (ret < 0)
 		goto out_err;
@@ -886,9 +550,8 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
 		goto out_drop;
 	rqstp->rq_xprt_hlen = ret;
 
-	if (svc_rdma_is_backchannel_reply(xprt, &rmsgp->rm_xid)) {
-		ret = svc_rdma_handle_bc_reply(xprt->xpt_bc_xprt,
-					       &rmsgp->rm_xid,
+	if (svc_rdma_is_backchannel_reply(xprt, p)) {
+		ret = svc_rdma_handle_bc_reply(xprt->xpt_bc_xprt, p,
 					       &rqstp->rq_arg);
 		svc_rdma_put_context(ctxt, 0);
 		if (ret)
@@ -896,16 +559,9 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
 		return ret;
 	}
 
-	/* Read read-list data. */
-	ret = rdma_read_chunks(rdma_xprt, rmsgp, rqstp, ctxt);
-	if (ret > 0) {
-		/* read-list posted, defer until data received from client. */
-		goto defer;
-	} else if (ret < 0) {
-		/* Post of read-list failed, free context. */
-		svc_rdma_put_context(ctxt, 1);
-		return 0;
-	}
+	p += rpcrdma_fixed_maxsz;
+	if (*p != xdr_zero)
+		goto out_readchunk;
 
 complete:
 	ret = rqstp->rq_arg.head[0].iov_len
@@ -921,13 +577,22 @@ complete:
 	svc_xprt_copy_addrs(rqstp, xprt);
 	return ret;
 
+out_readchunk:
+	ret = svc_rdma_recv_read_chunk(rdma_xprt, rqstp, ctxt, p);
+	if (ret < 0)
+		goto out_postfail;
+	return 0;
+
 out_err:
-	svc_rdma_send_error(rdma_xprt, &rmsgp->rm_xid, ret);
+	svc_rdma_send_error(rdma_xprt, p, ret);
 	svc_rdma_put_context(ctxt, 0);
 	return 0;
 
-defer:
-	return 0;
+out_postfail:
+	if (ret == -EINVAL)
+		svc_rdma_send_error(rdma_xprt, p, ret);
+	svc_rdma_put_context(ctxt, 1);
+	return ret;
 
 out_drop:
 	svc_rdma_put_context(ctxt, 1);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index a9d9cb1ba4c6..72d2dcdca2e1 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -908,8 +908,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 	 * capabilities of this particular device */
 	newxprt->sc_max_sge = min((size_t)dev->attrs.max_sge,
 				  (size_t)RPCSVC_MAXPAGES);
-	newxprt->sc_max_sge_rd = min_t(size_t, dev->attrs.max_sge_rd,
-				       RPCSVC_MAXPAGES);
 	newxprt->sc_max_req_size = svcrdma_max_req_size;
 	newxprt->sc_max_requests = min_t(u32, dev->attrs.max_qp_wr,
 					 svcrdma_max_requests);
@@ -998,12 +996,10 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 	 * NB:	iWARP requires remote write access for the data sink
 	 *	of an RDMA_READ. IB does not.
 	 */
-	newxprt->sc_reader = rdma_read_chunk_lcl;
 	if (dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
 		newxprt->sc_frmr_pg_list_len =
 			dev->attrs.max_fast_reg_page_list_len;
 		newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_FAST_REG;
-		newxprt->sc_reader = rdma_read_chunk_frmr;
 	} else
 		newxprt->sc_snd_w_inv = false;
 
@@ -1056,7 +1052,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 	sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr;
 	dprintk("    remote address  : %pIS:%u\n", sap, rpc_get_port(sap));
 	dprintk("    max_sge         : %d\n", newxprt->sc_max_sge);
-	dprintk("    max_sge_rd      : %d\n", newxprt->sc_max_sge_rd);
 	dprintk("    sq_depth        : %d\n", newxprt->sc_sq_depth);
 	dprintk("    max_requests    : %d\n", newxprt->sc_max_requests);
 	dprintk("    ord             : %d\n", newxprt->sc_ord);
@@ -1117,12 +1112,6 @@ static void __svc_rdma_free(struct work_struct *work)
 		pr_err("svcrdma: sc_xprt still in use? (%d)\n",
 		       kref_read(&xprt->xpt_ref));
 
-	/*
-	 * Destroy queued, but not processed read completions. Note
-	 * that this cleanup has to be done before destroying the
-	 * cm_id because the device ptr is needed to unmap the dma in
-	 * svc_rdma_put_context.
-	 */
 	while (!list_empty(&rdma->sc_read_complete_q)) {
 		struct svc_rdma_op_ctxt *ctxt;
 		ctxt = list_first_entry(&rdma->sc_read_complete_q,
@@ -1130,8 +1119,6 @@ static void __svc_rdma_free(struct work_struct *work)
 		list_del(&ctxt->list);
 		svc_rdma_put_context(ctxt, 1);
 	}
-
-	/* Destroy queued, but not processed recv completions */
 	while (!list_empty(&rdma->sc_rq_dto_q)) {
 		struct svc_rdma_op_ctxt *ctxt;
 		ctxt = list_first_entry(&rdma->sc_rq_dto_q,
-- 
cgit v1.2.3


From c84dc900d737a8d8f08768622226980ee863403b Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 23 Jun 2017 17:18:49 -0400
Subject: svcrdma: Remove unused Read completion handlers

Clean up:

The generic RDMA R/W API conversion of svc_rdma_recvfrom replaced
the Register, Read, and Invalidate completion handlers. Remove the
old ones, which are no longer used.

These handlers shared some helper code with svc_rdma_wc_send. Fold
the wc_common helper back into the one remaining completion handler.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_rdma.h          |  4 +-
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 93 ++++----------------------------
 2 files changed, 10 insertions(+), 87 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index b1ba19ba1071..06d58a3f74bc 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -77,17 +77,15 @@ extern atomic_t rdma_stat_sq_prod;
  */
 struct svc_rdma_op_ctxt {
 	struct list_head list;
-	struct svc_rdma_op_ctxt *read_hdr;
 	struct svc_rdma_fastreg_mr *frmr;
-	int hdr_count;
 	struct xdr_buf arg;
 	struct ib_cqe cqe;
 	u32 byte_len;
 	struct svcxprt_rdma *xprt;
-	unsigned long flags;
 	enum dma_data_direction direction;
 	int count;
 	unsigned int mapped_sges;
+	int hdr_count;
 	struct ib_send_wr send_wr;
 	struct ib_sge sge[1 + RPCRDMA_MAX_INLINE_THRESH / PAGE_SIZE];
 	struct page *pages[RPCSVC_MAXPAGES];
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 72d2dcdca2e1..c915cba0f8e6 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -346,36 +346,6 @@ out:
 	svc_xprt_put(&xprt->sc_xprt);
 }
 
-static void svc_rdma_send_wc_common(struct svcxprt_rdma *xprt,
-				    struct ib_wc *wc,
-				    const char *opname)
-{
-	if (wc->status != IB_WC_SUCCESS)
-		goto err;
-
-out:
-	atomic_inc(&xprt->sc_sq_avail);
-	wake_up(&xprt->sc_send_wait);
-	return;
-
-err:
-	set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
-	if (wc->status != IB_WC_WR_FLUSH_ERR)
-		pr_err("svcrdma: %s: %s (%u/0x%x)\n",
-		       opname, ib_wc_status_msg(wc->status),
-		       wc->status, wc->vendor_err);
-	goto out;
-}
-
-static void svc_rdma_send_wc_common_put(struct ib_cq *cq, struct ib_wc *wc,
-					const char *opname)
-{
-	struct svcxprt_rdma *xprt = cq->cq_context;
-
-	svc_rdma_send_wc_common(xprt, wc, opname);
-	svc_xprt_put(&xprt->sc_xprt);
-}
-
 /**
  * svc_rdma_wc_send - Invoked by RDMA provider for each polled Send WC
  * @cq:        completion queue
@@ -383,74 +353,29 @@ static void svc_rdma_send_wc_common_put(struct ib_cq *cq, struct ib_wc *wc,
  *
  */
 void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
-{
-	struct ib_cqe *cqe = wc->wr_cqe;
-	struct svc_rdma_op_ctxt *ctxt;
-
-	svc_rdma_send_wc_common_put(cq, wc, "send");
-
-	ctxt = container_of(cqe, struct svc_rdma_op_ctxt, cqe);
-	svc_rdma_unmap_dma(ctxt);
-	svc_rdma_put_context(ctxt, 1);
-}
-
-/**
- * svc_rdma_wc_reg - Invoked by RDMA provider for each polled FASTREG WC
- * @cq:        completion queue
- * @wc:        completed WR
- *
- */
-void svc_rdma_wc_reg(struct ib_cq *cq, struct ib_wc *wc)
-{
-	svc_rdma_send_wc_common_put(cq, wc, "fastreg");
-}
-
-/**
- * svc_rdma_wc_read - Invoked by RDMA provider for each polled Read WC
- * @cq:        completion queue
- * @wc:        completed WR
- *
- */
-void svc_rdma_wc_read(struct ib_cq *cq, struct ib_wc *wc)
 {
 	struct svcxprt_rdma *xprt = cq->cq_context;
 	struct ib_cqe *cqe = wc->wr_cqe;
 	struct svc_rdma_op_ctxt *ctxt;
 
-	svc_rdma_send_wc_common(xprt, wc, "read");
+	atomic_inc(&xprt->sc_sq_avail);
+	wake_up(&xprt->sc_send_wait);
 
 	ctxt = container_of(cqe, struct svc_rdma_op_ctxt, cqe);
 	svc_rdma_unmap_dma(ctxt);
-	svc_rdma_put_frmr(xprt, ctxt->frmr);
-
-	if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
-		struct svc_rdma_op_ctxt *read_hdr;
-
-		read_hdr = ctxt->read_hdr;
-		spin_lock(&xprt->sc_rq_dto_lock);
-		list_add_tail(&read_hdr->list,
-			      &xprt->sc_read_complete_q);
-		spin_unlock(&xprt->sc_rq_dto_lock);
+	svc_rdma_put_context(ctxt, 1);
 
-		set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
-		svc_xprt_enqueue(&xprt->sc_xprt);
+	if (unlikely(wc->status != IB_WC_SUCCESS)) {
+		set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
+		if (wc->status != IB_WC_WR_FLUSH_ERR)
+			pr_err("svcrdma: Send: %s (%u/0x%x)\n",
+			       ib_wc_status_msg(wc->status),
+			       wc->status, wc->vendor_err);
 	}
 
-	svc_rdma_put_context(ctxt, 0);
 	svc_xprt_put(&xprt->sc_xprt);
 }
 
-/**
- * svc_rdma_wc_inv - Invoked by RDMA provider for each polled LOCAL_INV WC
- * @cq:        completion queue
- * @wc:        completed WR
- *
- */
-void svc_rdma_wc_inv(struct ib_cq *cq, struct ib_wc *wc)
-{
-	svc_rdma_send_wc_common_put(cq, wc, "localInv");
-}
-
 static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
 					     int listener)
 {
-- 
cgit v1.2.3


From 463e63d7014442002399903af027b63ae38f6e77 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 23 Jun 2017 17:18:57 -0400
Subject: svcrdma: Remove frmr cache

Clean up: Now that the svc_rdma_recvfrom path uses the rdma_rw API,
the details of Read sink buffer registration are dealt with by the
kernel's RDMA core. This cache is no longer used, and can be
removed.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_rdma.h          | 18 -------
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 86 --------------------------------
 2 files changed, 104 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 06d58a3f74bc..fd7775f70bb5 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -77,7 +77,6 @@ extern atomic_t rdma_stat_sq_prod;
  */
 struct svc_rdma_op_ctxt {
 	struct list_head list;
-	struct svc_rdma_fastreg_mr *frmr;
 	struct xdr_buf arg;
 	struct ib_cqe cqe;
 	u32 byte_len;
@@ -91,17 +90,6 @@ struct svc_rdma_op_ctxt {
 	struct page *pages[RPCSVC_MAXPAGES];
 };
 
-struct svc_rdma_fastreg_mr {
-	struct ib_mr *mr;
-	struct scatterlist *sg;
-	int sg_nents;
-	unsigned long access_flags;
-	enum dma_data_direction direction;
-	struct list_head frmr_list;
-};
-
-#define RDMACTXT_F_LAST_CTXT	2
-
 #define	SVCRDMA_DEVCAP_FAST_REG		1	/* fast mr registration */
 #define	SVCRDMA_DEVCAP_READ_W_INV	2	/* read w/ invalidate */
 
@@ -136,9 +124,6 @@ struct svcxprt_rdma {
 	struct ib_cq         *sc_rq_cq;
 	struct ib_cq         *sc_sq_cq;
 	u32		     sc_dev_caps;	/* distilled device caps */
-	unsigned int	     sc_frmr_pg_list_len;
-	struct list_head     sc_frmr_q;
-	spinlock_t	     sc_frmr_q_lock;
 
 	spinlock_t	     sc_lock;		/* transport lock */
 
@@ -210,9 +195,6 @@ extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *);
 extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *);
 extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int);
 extern void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt);
-extern struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *);
-extern void svc_rdma_put_frmr(struct svcxprt_rdma *,
-			      struct svc_rdma_fastreg_mr *);
 extern void svc_sq_reap(struct svcxprt_rdma *);
 extern void svc_rq_reap(struct svcxprt_rdma *);
 extern void svc_rdma_prep_reply_hdr(struct svc_rqst *);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index c915cba0f8e6..8864105d86ec 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -202,7 +202,6 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
 out:
 	ctxt->count = 0;
 	ctxt->mapped_sges = 0;
-	ctxt->frmr = NULL;
 	return ctxt;
 
 out_empty:
@@ -387,14 +386,12 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
 	INIT_LIST_HEAD(&cma_xprt->sc_accept_q);
 	INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
 	INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q);
-	INIT_LIST_HEAD(&cma_xprt->sc_frmr_q);
 	INIT_LIST_HEAD(&cma_xprt->sc_ctxts);
 	INIT_LIST_HEAD(&cma_xprt->sc_rw_ctxts);
 	init_waitqueue_head(&cma_xprt->sc_send_wait);
 
 	spin_lock_init(&cma_xprt->sc_lock);
 	spin_lock_init(&cma_xprt->sc_rq_dto_lock);
-	spin_lock_init(&cma_xprt->sc_frmr_q_lock);
 	spin_lock_init(&cma_xprt->sc_ctxt_lock);
 	spin_lock_init(&cma_xprt->sc_rw_ctxt_lock);
 
@@ -705,86 +702,6 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
 	return ERR_PTR(ret);
 }
 
-static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct svcxprt_rdma *xprt)
-{
-	struct ib_mr *mr;
-	struct scatterlist *sg;
-	struct svc_rdma_fastreg_mr *frmr;
-	u32 num_sg;
-
-	frmr = kmalloc(sizeof(*frmr), GFP_KERNEL);
-	if (!frmr)
-		goto err;
-
-	num_sg = min_t(u32, RPCSVC_MAXPAGES, xprt->sc_frmr_pg_list_len);
-	mr = ib_alloc_mr(xprt->sc_pd, IB_MR_TYPE_MEM_REG, num_sg);
-	if (IS_ERR(mr))
-		goto err_free_frmr;
-
-	sg = kcalloc(RPCSVC_MAXPAGES, sizeof(*sg), GFP_KERNEL);
-	if (!sg)
-		goto err_free_mr;
-
-	sg_init_table(sg, RPCSVC_MAXPAGES);
-
-	frmr->mr = mr;
-	frmr->sg = sg;
-	INIT_LIST_HEAD(&frmr->frmr_list);
-	return frmr;
-
- err_free_mr:
-	ib_dereg_mr(mr);
- err_free_frmr:
-	kfree(frmr);
- err:
-	return ERR_PTR(-ENOMEM);
-}
-
-static void rdma_dealloc_frmr_q(struct svcxprt_rdma *xprt)
-{
-	struct svc_rdma_fastreg_mr *frmr;
-
-	while (!list_empty(&xprt->sc_frmr_q)) {
-		frmr = list_entry(xprt->sc_frmr_q.next,
-				  struct svc_rdma_fastreg_mr, frmr_list);
-		list_del_init(&frmr->frmr_list);
-		kfree(frmr->sg);
-		ib_dereg_mr(frmr->mr);
-		kfree(frmr);
-	}
-}
-
-struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *rdma)
-{
-	struct svc_rdma_fastreg_mr *frmr = NULL;
-
-	spin_lock(&rdma->sc_frmr_q_lock);
-	if (!list_empty(&rdma->sc_frmr_q)) {
-		frmr = list_entry(rdma->sc_frmr_q.next,
-				  struct svc_rdma_fastreg_mr, frmr_list);
-		list_del_init(&frmr->frmr_list);
-		frmr->sg_nents = 0;
-	}
-	spin_unlock(&rdma->sc_frmr_q_lock);
-	if (frmr)
-		return frmr;
-
-	return rdma_alloc_frmr(rdma);
-}
-
-void svc_rdma_put_frmr(struct svcxprt_rdma *rdma,
-		       struct svc_rdma_fastreg_mr *frmr)
-{
-	if (frmr) {
-		ib_dma_unmap_sg(rdma->sc_cm_id->device,
-				frmr->sg, frmr->sg_nents, frmr->direction);
-		spin_lock(&rdma->sc_frmr_q_lock);
-		WARN_ON_ONCE(!list_empty(&frmr->frmr_list));
-		list_add(&frmr->frmr_list, &rdma->sc_frmr_q);
-		spin_unlock(&rdma->sc_frmr_q_lock);
-	}
-}
-
 /*
  * This is the xpo_recvfrom function for listening endpoints. Its
  * purpose is to accept incoming connections. The CMA callback handler
@@ -922,8 +839,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 	 *	of an RDMA_READ. IB does not.
 	 */
 	if (dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
-		newxprt->sc_frmr_pg_list_len =
-			dev->attrs.max_fast_reg_page_list_len;
 		newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_FAST_REG;
 	} else
 		newxprt->sc_snd_w_inv = false;
@@ -1063,7 +978,6 @@ static void __svc_rdma_free(struct work_struct *work)
 		xprt->xpt_bc_xprt = NULL;
 	}
 
-	rdma_dealloc_frmr_q(rdma);
 	svc_rdma_destroy_rw_ctxts(rdma);
 	svc_rdma_destroy_ctxts(rdma);
 
-- 
cgit v1.2.3


From 9450ca8e2febb0000a5efd4f5870915d59ae62bc Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 23 Jun 2017 17:19:13 -0400
Subject: svcrdma: Clean up after converting svc_rdma_recvfrom to rdma_rw API

Clean up: Registration mode details are now handled by the rdma_rw
API, and thus can be removed from svcrdma.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_rdma.h          |  4 ----
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 39 ++++----------------------------
 2 files changed, 4 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index fd7775f70bb5..995c6fe9ee90 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -90,9 +90,6 @@ struct svc_rdma_op_ctxt {
 	struct page *pages[RPCSVC_MAXPAGES];
 };
 
-#define	SVCRDMA_DEVCAP_FAST_REG		1	/* fast mr registration */
-#define	SVCRDMA_DEVCAP_READ_W_INV	2	/* read w/ invalidate */
-
 struct svcxprt_rdma {
 	struct svc_xprt      sc_xprt;		/* SVC transport structure */
 	struct rdma_cm_id    *sc_cm_id;		/* RDMA connection id */
@@ -123,7 +120,6 @@ struct svcxprt_rdma {
 	struct ib_qp         *sc_qp;
 	struct ib_cq         *sc_rq_cq;
 	struct ib_cq         *sc_sq_cq;
-	u32		     sc_dev_caps;	/* distilled device caps */
 
 	spinlock_t	     sc_lock;		/* transport lock */
 
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 75bd11f2b69e..e660d4965b18 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -783,7 +783,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 	memset(&qp_attr, 0, sizeof qp_attr);
 	qp_attr.event_handler = qp_event_handler;
 	qp_attr.qp_context = &newxprt->sc_xprt;
-	qp_attr.port_num = newxprt->sc_cm_id->port_num;
+	qp_attr.port_num = newxprt->sc_port_num;
 	qp_attr.cap.max_rdma_ctxs = newxprt->sc_max_requests;
 	qp_attr.cap.max_send_wr = newxprt->sc_sq_depth;
 	qp_attr.cap.max_recv_wr = newxprt->sc_rq_depth;
@@ -807,43 +807,12 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 	}
 	newxprt->sc_qp = newxprt->sc_cm_id->qp;
 
-	/*
-	 * Use the most secure set of MR resources based on the
-	 * transport type and available memory management features in
-	 * the device. Here's the table implemented below:
-	 *
-	 *		Fast	Global	DMA	Remote WR
-	 *		Reg	LKEY	MR	Access
-	 *		Sup'd	Sup'd	Needed	Needed
-	 *
-	 * IWARP	N	N	Y	Y
-	 *		N	Y	Y	Y
-	 *		Y	N	Y	N
-	 *		Y	Y	N	-
-	 *
-	 * IB		N	N	Y	N
-	 *		N	Y	N	-
-	 *		Y	N	Y	N
-	 *		Y	Y	N	-
-	 *
-	 * NB:	iWARP requires remote write access for the data sink
-	 *	of an RDMA_READ. IB does not.
-	 */
-	if (dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
-		newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_FAST_REG;
-	} else
+	if (!(dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS))
 		newxprt->sc_snd_w_inv = false;
-
-	/*
-	 * Determine if a DMA MR is required and if so, what privs are required
-	 */
-	if (!rdma_protocol_iwarp(dev, newxprt->sc_cm_id->port_num) &&
-	    !rdma_ib_or_roce(dev, newxprt->sc_cm_id->port_num))
+	if (!rdma_protocol_iwarp(dev, newxprt->sc_port_num) &&
+	    !rdma_ib_or_roce(dev, newxprt->sc_port_num))
 		goto errout;
 
-	if (rdma_protocol_iwarp(dev, newxprt->sc_cm_id->port_num))
-		newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_READ_W_INV;
-
 	/* Post receive buffers */
 	for (i = 0; i < newxprt->sc_max_requests; i++) {
 		ret = svc_rdma_post_recv(newxprt, GFP_KERNEL);
-- 
cgit v1.2.3


From 40bf6a35483ee25271ce2a90d8976cf1409a033a Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Wed, 12 Jul 2017 10:23:13 +0200
Subject: rtc: Remove wrong deprecation comment

rtc_time_to_tm and rtc_tm_to_time are not deprecated and make perfect sense
for RTCs that are simple 32bit counters.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 include/linux/rtc.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index d53ecdc060cf..0a0f0d14a5fb 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -33,17 +33,11 @@ static inline time64_t rtc_tm_sub(struct rtc_time *lhs, struct rtc_time *rhs)
 	return rtc_tm_to_time64(lhs) - rtc_tm_to_time64(rhs);
 }
 
-/**
- * Deprecated. Use rtc_time64_to_tm().
- */
 static inline void rtc_time_to_tm(unsigned long time, struct rtc_time *tm)
 {
 	rtc_time64_to_tm(time, tm);
 }
 
-/**
- * Deprecated. Use rtc_tm_to_time64().
- */
 static inline int rtc_tm_to_time(struct rtc_time *tm, unsigned long *time)
 {
 	*time = rtc_tm_to_time64(tm);
-- 
cgit v1.2.3


From 0a2c13d9cd76c84f2520f573ff83f777eb7464aa Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Wed, 12 Jul 2017 14:33:01 -0700
Subject: include/linux/dcache.h: use unsigned chars in struct name_snapshot

"kernel.h: handle pointers to arrays better in container_of()" triggers:

In file included from include/uapi/linux/stddef.h:1:0,
                 from include/linux/stddef.h:4,
                 from include/uapi/linux/posix_types.h:4,
                 from include/uapi/linux/types.h:13,
                 from include/linux/types.h:5,
                 from include/linux/syscalls.h:71,
                 from fs/dcache.c:17:
fs/dcache.c: In function 'release_dentry_name_snapshot':
include/linux/compiler.h:542:38: error: call to '__compiletime_assert_305' declared with attribute error: pointer type mismatch in container_of()
  _compiletime_assert(condition, msg, __compiletime_assert_, __LINE__)
                                      ^
include/linux/compiler.h:525:4: note: in definition of macro '__compiletime_assert'
    prefix ## suffix();    \
    ^
include/linux/compiler.h:542:2: note: in expansion of macro '_compiletime_assert'
  _compiletime_assert(condition, msg, __compiletime_assert_, __LINE__)
  ^
include/linux/build_bug.h:46:37: note: in expansion of macro 'compiletime_assert'
 #define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
                                     ^
include/linux/kernel.h:860:2: note: in expansion of macro 'BUILD_BUG_ON_MSG'
  BUILD_BUG_ON_MSG(!__same_type(*(ptr), ((type *)0)->member) && \
  ^
fs/dcache.c:305:7: note: in expansion of macro 'container_of'
   p = container_of(name->name, struct external_name, name[0]);

Switch name_snapshot to use unsigned chars, matching struct qstr and
struct external_name.

Link: http://lkml.kernel.org/r/20170710152134.0f78c1e6@canb.auug.org.au
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/dcache.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 025727bf6797..c706eaac692e 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -592,8 +592,8 @@ static inline struct inode *d_real_inode(const struct dentry *dentry)
 }
 
 struct name_snapshot {
-	const char *name;
-	char inline_name[DNAME_INLINE_LEN];
+	const unsigned char *name;
+	unsigned char inline_name[DNAME_INLINE_LEN];
 };
 void take_dentry_name_snapshot(struct name_snapshot *, struct dentry *);
 void release_dentry_name_snapshot(struct name_snapshot *);
-- 
cgit v1.2.3


From c7acec713d14c6ce8a20154f9dfda258d6bcad3b Mon Sep 17 00:00:00 2001
From: Ian Abbott <abbotti@mev.co.uk>
Date: Wed, 12 Jul 2017 14:33:04 -0700
Subject: kernel.h: handle pointers to arrays better in container_of()

If the first parameter of container_of() is a pointer to a
non-const-qualified array type (and the third parameter names a
non-const-qualified array member), the local variable __mptr will be
defined with a const-qualified array type.  In ISO C, these types are
incompatible.  They work as expected in GNU C, but some versions will
issue warnings.  For example, GCC 4.9 produces the warning
"initialization from incompatible pointer type".

Here is an example of where the problem occurs:

-------------------------------------------------------
   #include <linux/kernel.h>
   #include <linux/module.h>

  MODULE_LICENSE("GPL");

  struct st {
  	int a;
  	char b[16];
  };

  static int __init example_init(void) {
  	struct st t = { .a = 101, .b = "hello" };
  	char (*p)[16] = &t.b;
  	struct st *x = container_of(p, struct st, b);
  	printk(KERN_DEBUG "%p %p\n", (void *)&t, (void *)x);
  	return 0;
  }

  static void __exit example_exit(void) {
  }

  module_init(example_init);
  module_exit(example_exit);
-------------------------------------------------------

Building the module with gcc-4.9 results in these warnings (where '{m}'
is the module source and '{k}' is the kernel source):

-------------------------------------------------------
  In file included from {m}/example.c:1:0:
  {m}/example.c: In function `example_init':
  {k}/include/linux/kernel.h:854:48: warning: initialization from incompatible pointer type
    const typeof( ((type *)0)->member ) *__mptr = (ptr); \
                                                  ^
  {m}/example.c:14:17: note: in expansion of macro `container_of'
    struct st *x = container_of(p, struct st, b);
                   ^
  {k}/include/linux/kernel.h:854:48: warning: (near initialization for `x')
    const typeof( ((type *)0)->member ) *__mptr = (ptr); \
                                                  ^
  {m}/example.c:14:17: note: in expansion of macro `container_of'
    struct st *x = container_of(p, struct st, b);
                   ^
-------------------------------------------------------

Replace the type checking performed by the macro to avoid these
warnings.  Make sure `*(ptr)` either has type compatible with the
member, or has type compatible with `void`, ignoring qualifiers.  Raise
compiler errors if this is not true.  This is stronger than the previous
behaviour, which only resulted in compiler warnings for a type mismatch.

[arnd@arndb.de: fix new warnings for container_of()]
  Link: http://lkml.kernel.org/r/20170620200940.90557-1-arnd@arndb.de
Link: http://lkml.kernel.org/r/20170525120316.24473-7-abbotti@mev.co.uk
Signed-off-by: Ian Abbott <abbotti@mev.co.uk>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Michal Nazarewicz <mina86@mina86.com>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Johannes Berg <johannes.berg@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Alexander Potapenko <glider@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 1c91f26e2996..bd6d96cf80b1 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -11,6 +11,7 @@
 #include <linux/log2.h>
 #include <linux/typecheck.h>
 #include <linux/printk.h>
+#include <linux/build_bug.h>
 #include <asm/byteorder.h>
 #include <uapi/linux/kernel.h>
 
@@ -854,9 +855,12 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
  * @member:	the name of the member within the struct.
  *
  */
-#define container_of(ptr, type, member) ({			\
-	const typeof( ((type *)0)->member ) *__mptr = (ptr);	\
-	(type *)( (char *)__mptr - offsetof(type,member) );})
+#define container_of(ptr, type, member) ({				\
+	void *__mptr = (void *)(ptr);					\
+	BUILD_BUG_ON_MSG(!__same_type(*(ptr), ((type *)0)->member) &&	\
+			 !__same_type(*(ptr), void),			\
+			 "pointer type mismatch in container_of()");	\
+	((type *)(__mptr - offsetof(type, member))); })
 
 /* Rebuild everything on CONFIG_FTRACE_MCOUNT_RECORD */
 #ifdef CONFIG_FTRACE_MCOUNT_RECORD
-- 
cgit v1.2.3


From 203e9e41219b4e7357104e525e91ac609fba2c6c Mon Sep 17 00:00:00 2001
From: Xunlei Pang <xlpang@redhat.com>
Date: Wed, 12 Jul 2017 14:33:14 -0700
Subject: kexec: move vmcoreinfo out of the kernel's .bss section

As Eric said,
 "what we need to do is move the variable vmcoreinfo_note out of the
  kernel's .bss section. And modify the code to regenerate and keep this
  information in something like the control page.

  Definitely something like this needs a page all to itself, and ideally
  far away from any other kernel data structures. I clearly was not
  watching closely the data someone decided to keep this silly thing in
  the kernel's .bss section."

This patch allocates extra pages for these vmcoreinfo_XXX variables, one
advantage is that it enhances some safety of vmcoreinfo, because
vmcoreinfo now is kept far away from other kernel data structures.

Link: http://lkml.kernel.org/r/1493281021-20737-1-git-send-email-xlpang@redhat.com
Signed-off-by: Xunlei Pang <xlpang@redhat.com>
Tested-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Suggested-by: Eric Biederman <ebiederm@xmission.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Dave Young <dyoung@redhat.com>
Cc: Hari Bathini <hbathini@linux.vnet.ibm.com>
Cc: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/kernel/machine_kexec.c |  5 -----
 arch/s390/kernel/machine_kexec.c |  1 +
 arch/s390/kernel/setup.c         |  6 ------
 arch/x86/kernel/crash.c          |  2 +-
 arch/x86/xen/mmu_pv.c            |  4 ++--
 include/linux/crash_core.h       |  4 ++--
 kernel/crash_core.c              | 26 ++++++++++++++++++++++----
 kernel/ksysfs.c                  |  2 +-
 8 files changed, 29 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kernel/machine_kexec.c b/arch/ia64/kernel/machine_kexec.c
index 599507bcec91..c14815dca747 100644
--- a/arch/ia64/kernel/machine_kexec.c
+++ b/arch/ia64/kernel/machine_kexec.c
@@ -163,8 +163,3 @@ void arch_crash_save_vmcoreinfo(void)
 #endif
 }
 
-phys_addr_t paddr_vmcoreinfo_note(void)
-{
-	return ia64_tpa((unsigned long)(char *)&vmcoreinfo_note);
-}
-
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index 49a6bd45957b..3d0b14afa232 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -246,6 +246,7 @@ void arch_crash_save_vmcoreinfo(void)
 	VMCOREINFO_SYMBOL(lowcore_ptr);
 	VMCOREINFO_SYMBOL(high_memory);
 	VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS);
+	mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note());
 }
 
 void machine_shutdown(void)
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 3ae756c0db3d..3d1d808ea8a9 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -496,11 +496,6 @@ static void __init setup_memory_end(void)
 	pr_notice("The maximum memory size is %luMB\n", memory_end >> 20);
 }
 
-static void __init setup_vmcoreinfo(void)
-{
-	mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note());
-}
-
 #ifdef CONFIG_CRASH_DUMP
 
 /*
@@ -939,7 +934,6 @@ void __init setup_arch(char **cmdline_p)
 #endif
 
 	setup_resources();
-	setup_vmcoreinfo();
 	setup_lowcore();
 	smp_fill_possible_mask();
 	cpu_detect_mhz_feature();
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 22217ece26c8..44404e2307bb 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -457,7 +457,7 @@ static int prepare_elf64_headers(struct crash_elf_data *ced,
 	bufp += sizeof(Elf64_Phdr);
 	phdr->p_type = PT_NOTE;
 	phdr->p_offset = phdr->p_paddr = paddr_vmcoreinfo_note();
-	phdr->p_filesz = phdr->p_memsz = sizeof(vmcoreinfo_note);
+	phdr->p_filesz = phdr->p_memsz = VMCOREINFO_NOTE_SIZE;
 	(ehdr->e_phnum)++;
 
 #ifdef CONFIG_X86_64
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 1d7a7213a310..cab28cf2cffb 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -2693,8 +2693,8 @@ EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region);
 phys_addr_t paddr_vmcoreinfo_note(void)
 {
 	if (xen_pv_domain())
-		return virt_to_machine(&vmcoreinfo_note).maddr;
+		return virt_to_machine(vmcoreinfo_note).maddr;
 	else
-		return __pa_symbol(&vmcoreinfo_note);
+		return __pa(vmcoreinfo_note);
 }
 #endif /* CONFIG_KEXEC_CORE */
diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
index 4090a42578a8..87506a02e914 100644
--- a/include/linux/crash_core.h
+++ b/include/linux/crash_core.h
@@ -19,7 +19,7 @@
 				     CRASH_CORE_NOTE_NAME_BYTES +	\
 				     CRASH_CORE_NOTE_DESC_BYTES)
 
-#define VMCOREINFO_BYTES	   (4096)
+#define VMCOREINFO_BYTES	   PAGE_SIZE
 #define VMCOREINFO_NOTE_NAME	   "VMCOREINFO"
 #define VMCOREINFO_NOTE_NAME_BYTES ALIGN(sizeof(VMCOREINFO_NOTE_NAME), 4)
 #define VMCOREINFO_NOTE_SIZE	   ((CRASH_CORE_NOTE_HEAD_BYTES * 2) +	\
@@ -56,7 +56,7 @@ phys_addr_t paddr_vmcoreinfo_note(void);
 #define VMCOREINFO_CONFIG(name) \
 	vmcoreinfo_append_str("CONFIG_%s=y\n", #name)
 
-extern u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
+extern u32 *vmcoreinfo_note;
 extern size_t vmcoreinfo_size;
 extern size_t vmcoreinfo_max_size;
 
diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index fcbd568f1e95..2837d6164db8 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -14,10 +14,10 @@
 #include <asm/sections.h>
 
 /* vmcoreinfo stuff */
-static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES];
-u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
+static unsigned char *vmcoreinfo_data;
 size_t vmcoreinfo_size;
-size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data);
+size_t vmcoreinfo_max_size = VMCOREINFO_BYTES;
+u32 *vmcoreinfo_note;
 
 /*
  * parsing the "crashkernel" commandline
@@ -326,6 +326,9 @@ static void update_vmcoreinfo_note(void)
 
 void crash_save_vmcoreinfo(void)
 {
+	if (!vmcoreinfo_note)
+		return;
+
 	vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds());
 	update_vmcoreinfo_note();
 }
@@ -356,11 +359,26 @@ void __weak arch_crash_save_vmcoreinfo(void)
 
 phys_addr_t __weak paddr_vmcoreinfo_note(void)
 {
-	return __pa_symbol((unsigned long)(char *)&vmcoreinfo_note);
+	return __pa(vmcoreinfo_note);
 }
 
 static int __init crash_save_vmcoreinfo_init(void)
 {
+	vmcoreinfo_data = (unsigned char *)get_zeroed_page(GFP_KERNEL);
+	if (!vmcoreinfo_data) {
+		pr_warn("Memory allocation for vmcoreinfo_data failed\n");
+		return -ENOMEM;
+	}
+
+	vmcoreinfo_note = alloc_pages_exact(VMCOREINFO_NOTE_SIZE,
+						GFP_KERNEL | __GFP_ZERO);
+	if (!vmcoreinfo_note) {
+		free_page((unsigned long)vmcoreinfo_data);
+		vmcoreinfo_data = NULL;
+		pr_warn("Memory allocation for vmcoreinfo_note failed\n");
+		return -ENOMEM;
+	}
+
 	VMCOREINFO_OSRELEASE(init_uts_ns.name.release);
 	VMCOREINFO_PAGESIZE(PAGE_SIZE);
 
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index df1a9aa602a0..46ba853656f6 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -134,7 +134,7 @@ static ssize_t vmcoreinfo_show(struct kobject *kobj,
 {
 	phys_addr_t vmcore_base = paddr_vmcoreinfo_note();
 	return sprintf(buf, "%pa %x\n", &vmcore_base,
-		       (unsigned int)sizeof(vmcoreinfo_note));
+			(unsigned int)VMCOREINFO_NOTE_SIZE);
 }
 KERNEL_ATTR_RO(vmcoreinfo);
 
-- 
cgit v1.2.3


From 5203f4995d9a87952a83c2ce7866adbbe8f97bb5 Mon Sep 17 00:00:00 2001
From: Xunlei Pang <xlpang@redhat.com>
Date: Wed, 12 Jul 2017 14:33:17 -0700
Subject: powerpc/fadump: use the correct VMCOREINFO_NOTE_SIZE for phdr

vmcoreinfo_max_size stands for the vmcoreinfo_data, the correct one we
should use is vmcoreinfo_note whose total size is VMCOREINFO_NOTE_SIZE.

Like explained in commit 77019967f06b ("kdump: fix exported size of
vmcoreinfo note"), it should not affect the actual function, but we
better fix it, also this change should be safe and backward compatible.

After this, we can get rid of variable vmcoreinfo_max_size, let's use
the corresponding macros directly, fewer variables means more safety for
vmcoreinfo operation.

[xlpang@redhat.com: fix build warning]
  Link: http://lkml.kernel.org/r/1494830606-27736-1-git-send-email-xlpang@redhat.com
Link: http://lkml.kernel.org/r/1493281021-20737-2-git-send-email-xlpang@redhat.com
Signed-off-by: Xunlei Pang <xlpang@redhat.com>
Reviewed-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Reviewed-by: Dave Young <dyoung@redhat.com>
Cc: Hari Bathini <hbathini@linux.vnet.ibm.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/kernel/fadump.c | 3 +--
 include/linux/crash_core.h   | 1 -
 kernel/crash_core.c          | 3 +--
 3 files changed, 2 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 3079518f2245..dc0c49cfd90a 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -999,8 +999,7 @@ static int fadump_create_elfcore_headers(char *bufp)
 
 	phdr->p_paddr	= fadump_relocate(paddr_vmcoreinfo_note());
 	phdr->p_offset	= phdr->p_paddr;
-	phdr->p_memsz	= vmcoreinfo_max_size;
-	phdr->p_filesz	= vmcoreinfo_max_size;
+	phdr->p_memsz	= phdr->p_filesz = VMCOREINFO_NOTE_SIZE;
 
 	/* Increment number of program headers. */
 	(elf->e_phnum)++;
diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
index 87506a02e914..e5df1b3cf072 100644
--- a/include/linux/crash_core.h
+++ b/include/linux/crash_core.h
@@ -58,7 +58,6 @@ phys_addr_t paddr_vmcoreinfo_note(void);
 
 extern u32 *vmcoreinfo_note;
 extern size_t vmcoreinfo_size;
-extern size_t vmcoreinfo_max_size;
 
 Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type,
 			  void *data, size_t data_len);
diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index 2837d6164db8..315adbf9cb68 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -16,7 +16,6 @@
 /* vmcoreinfo stuff */
 static unsigned char *vmcoreinfo_data;
 size_t vmcoreinfo_size;
-size_t vmcoreinfo_max_size = VMCOREINFO_BYTES;
 u32 *vmcoreinfo_note;
 
 /*
@@ -343,7 +342,7 @@ void vmcoreinfo_append_str(const char *fmt, ...)
 	r = vscnprintf(buf, sizeof(buf), fmt, args);
 	va_end(args);
 
-	r = min(r, vmcoreinfo_max_size - vmcoreinfo_size);
+	r = min(r, (size_t)VMCOREINFO_BYTES - vmcoreinfo_size);
 
 	memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r);
 
-- 
cgit v1.2.3


From 1229384f5b856d83698c38f9dedfd836e26711cb Mon Sep 17 00:00:00 2001
From: Xunlei Pang <xlpang@redhat.com>
Date: Wed, 12 Jul 2017 14:33:21 -0700
Subject: kdump: protect vmcoreinfo data under the crash memory

Currently vmcoreinfo data is updated at boot time subsys_initcall(), it
has the risk of being modified by some wrong code during system is
running.

As a result, vmcore dumped may contain the wrong vmcoreinfo.  Later on,
when using "crash", "makedumpfile", etc utility to parse this vmcore, we
probably will get "Segmentation fault" or other unexpected errors.

E.g.  1) wrong code overwrites vmcoreinfo_data; 2) further crashes the
system; 3) trigger kdump, then we obviously will fail to recognize the
crash context correctly due to the corrupted vmcoreinfo.

Now except for vmcoreinfo, all the crash data is well
protected(including the cpu note which is fully updated in the crash
path, thus its correctness is guaranteed).  Given that vmcoreinfo data
is a large chunk prepared for kdump, we better protect it as well.

To solve this, we relocate and copy vmcoreinfo_data to the crash memory
when kdump is loading via kexec syscalls.  Because the whole crash
memory will be protected by existing arch_kexec_protect_crashkres()
mechanism, we naturally protect vmcoreinfo_data from write(even read)
access under kernel direct mapping after kdump is loaded.

Since kdump is usually loaded at the very early stage after boot, we can
trust the correctness of the vmcoreinfo data copied.

On the other hand, we still need to operate the vmcoreinfo safe copy
when crash happens to generate vmcoreinfo_note again, we rely on vmap()
to map out a new kernel virtual address and update to use this new one
instead in the following crash_save_vmcoreinfo().

BTW, we do not touch vmcoreinfo_note, because it will be fully updated
using the protected vmcoreinfo_data after crash which is surely correct
just like the cpu crash note.

Link: http://lkml.kernel.org/r/1493281021-20737-3-git-send-email-xlpang@redhat.com
Signed-off-by: Xunlei Pang <xlpang@redhat.com>
Tested-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Dave Young <dyoung@redhat.com>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Hari Bathini <hbathini@linux.vnet.ibm.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/crash_core.h |  2 +-
 include/linux/kexec.h      |  2 ++
 kernel/crash_core.c        | 17 ++++++++++++++++-
 kernel/kexec.c             |  8 ++++++++
 kernel/kexec_core.c        | 39 +++++++++++++++++++++++++++++++++++++++
 kernel/kexec_file.c        |  8 ++++++++
 6 files changed, 74 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
index e5df1b3cf072..2df2118fbe13 100644
--- a/include/linux/crash_core.h
+++ b/include/linux/crash_core.h
@@ -28,6 +28,7 @@
 
 typedef u32 note_buf_t[CRASH_CORE_NOTE_BYTES/4];
 
+void crash_update_vmcoreinfo_safecopy(void *ptr);
 void crash_save_vmcoreinfo(void);
 void arch_crash_save_vmcoreinfo(void);
 __printf(1, 2)
@@ -57,7 +58,6 @@ phys_addr_t paddr_vmcoreinfo_note(void);
 	vmcoreinfo_append_str("CONFIG_%s=y\n", #name)
 
 extern u32 *vmcoreinfo_note;
-extern size_t vmcoreinfo_size;
 
 Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type,
 			  void *data, size_t data_len);
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 65888418fb69..dd056fab9e35 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -172,6 +172,7 @@ struct kimage {
 	unsigned long start;
 	struct page *control_code_page;
 	struct page *swap_page;
+	void *vmcoreinfo_data_copy; /* locates in the crash memory */
 
 	unsigned long nr_segments;
 	struct kexec_segment segment[KEXEC_SEGMENT_MAX];
@@ -241,6 +242,7 @@ extern void crash_kexec(struct pt_regs *);
 int kexec_should_crash(struct task_struct *);
 int kexec_crash_loaded(void);
 void crash_save_cpu(struct pt_regs *regs, int cpu);
+extern int kimage_crash_copy_vmcoreinfo(struct kimage *image);
 
 extern struct kimage *kexec_image;
 extern struct kimage *kexec_crash_image;
diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index 315adbf9cb68..6db80fc0810b 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -15,9 +15,12 @@
 
 /* vmcoreinfo stuff */
 static unsigned char *vmcoreinfo_data;
-size_t vmcoreinfo_size;
+static size_t vmcoreinfo_size;
 u32 *vmcoreinfo_note;
 
+/* trusted vmcoreinfo, e.g. we can make a copy in the crash memory */
+static unsigned char *vmcoreinfo_data_safecopy;
+
 /*
  * parsing the "crashkernel" commandline
  *
@@ -323,11 +326,23 @@ static void update_vmcoreinfo_note(void)
 	final_note(buf);
 }
 
+void crash_update_vmcoreinfo_safecopy(void *ptr)
+{
+	if (ptr)
+		memcpy(ptr, vmcoreinfo_data, vmcoreinfo_size);
+
+	vmcoreinfo_data_safecopy = ptr;
+}
+
 void crash_save_vmcoreinfo(void)
 {
 	if (!vmcoreinfo_note)
 		return;
 
+	/* Use the safe copy to generate vmcoreinfo note if have */
+	if (vmcoreinfo_data_safecopy)
+		vmcoreinfo_data = vmcoreinfo_data_safecopy;
+
 	vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds());
 	update_vmcoreinfo_note();
 }
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 980936a90ee6..e62ec4dc6620 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -144,6 +144,14 @@ static int do_kexec_load(unsigned long entry, unsigned long nr_segments,
 	if (ret)
 		goto out;
 
+	/*
+	 * Some architecture(like S390) may touch the crash memory before
+	 * machine_kexec_prepare(), we must copy vmcoreinfo data after it.
+	 */
+	ret = kimage_crash_copy_vmcoreinfo(image);
+	if (ret)
+		goto out;
+
 	for (i = 0; i < nr_segments; i++) {
 		ret = kimage_load_segment(image, &image->segment[i]);
 		if (ret)
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index 154ffb489b93..1ae7c41c33c1 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -482,6 +482,40 @@ struct page *kimage_alloc_control_pages(struct kimage *image,
 	return pages;
 }
 
+int kimage_crash_copy_vmcoreinfo(struct kimage *image)
+{
+	struct page *vmcoreinfo_page;
+	void *safecopy;
+
+	if (image->type != KEXEC_TYPE_CRASH)
+		return 0;
+
+	/*
+	 * For kdump, allocate one vmcoreinfo safe copy from the
+	 * crash memory. as we have arch_kexec_protect_crashkres()
+	 * after kexec syscall, we naturally protect it from write
+	 * (even read) access under kernel direct mapping. But on
+	 * the other hand, we still need to operate it when crash
+	 * happens to generate vmcoreinfo note, hereby we rely on
+	 * vmap for this purpose.
+	 */
+	vmcoreinfo_page = kimage_alloc_control_pages(image, 0);
+	if (!vmcoreinfo_page) {
+		pr_warn("Could not allocate vmcoreinfo buffer\n");
+		return -ENOMEM;
+	}
+	safecopy = vmap(&vmcoreinfo_page, 1, VM_MAP, PAGE_KERNEL);
+	if (!safecopy) {
+		pr_warn("Could not vmap vmcoreinfo buffer\n");
+		return -ENOMEM;
+	}
+
+	image->vmcoreinfo_data_copy = safecopy;
+	crash_update_vmcoreinfo_safecopy(safecopy);
+
+	return 0;
+}
+
 static int kimage_add_entry(struct kimage *image, kimage_entry_t entry)
 {
 	if (*image->entry != 0)
@@ -569,6 +603,11 @@ void kimage_free(struct kimage *image)
 	if (!image)
 		return;
 
+	if (image->vmcoreinfo_data_copy) {
+		crash_update_vmcoreinfo_safecopy(NULL);
+		vunmap(image->vmcoreinfo_data_copy);
+	}
+
 	kimage_free_extra_pages(image);
 	for_each_kimage_entry(image, ptr, entry) {
 		if (entry & IND_INDIRECTION) {
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index 766e7e4d3ad9..c8f7f77e9fa9 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -298,6 +298,14 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
 	if (ret)
 		goto out;
 
+	/*
+	 * Some architecture(like S390) may touch the crash memory before
+	 * machine_kexec_prepare(), we must copy vmcoreinfo data after it.
+	 */
+	ret = kimage_crash_copy_vmcoreinfo(image);
+	if (ret)
+		goto out;
+
 	ret = kexec_calculate_store_digests(image);
 	if (ret)
 		goto out;
-- 
cgit v1.2.3


From 61d9b56a89208d8cccd0b4cfec7e6959717e16e3 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@kernel.org>
Date: Wed, 12 Jul 2017 14:33:40 -0700
Subject: sysctl: add unsigned int range support

To keep parity with regular int interfaces provide the an unsigned int
proc_douintvec_minmax() which allows you to specify a range of allowed
valid numbers.

Adding proc_douintvec_minmax_sysadmin() is easy but we can wait for an
actual user for that.

Link: http://lkml.kernel.org/r/20170519033554.18592-6-mcgrof@kernel.org
Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Cc: Heinrich Schuchardt <xypron.glpk@gmx.de>
Cc: Kees Cook <keescook@chromium.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/proc_sysctl.c  |  4 ++-
 include/linux/sysctl.h |  3 +++
 kernel/sysctl.c        | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 72 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index ee6feba8b6c0..8f9d564d0969 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -1065,7 +1065,8 @@ static int sysctl_check_table_array(const char *path, struct ctl_table *table)
 {
 	int err = 0;
 
-	if (table->proc_handler == proc_douintvec) {
+	if ((table->proc_handler == proc_douintvec) ||
+	    (table->proc_handler == proc_douintvec_minmax)) {
 		if (table->maxlen != sizeof(unsigned int))
 			err |= sysctl_err(path, table, "array now allowed");
 	}
@@ -1083,6 +1084,7 @@ static int sysctl_check_table(const char *path, struct ctl_table *table)
 		if ((table->proc_handler == proc_dostring) ||
 		    (table->proc_handler == proc_dointvec) ||
 		    (table->proc_handler == proc_douintvec) ||
+		    (table->proc_handler == proc_douintvec_minmax) ||
 		    (table->proc_handler == proc_dointvec_minmax) ||
 		    (table->proc_handler == proc_dointvec_jiffies) ||
 		    (table->proc_handler == proc_dointvec_userhz_jiffies) ||
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 80d07816def0..225001d437ae 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -47,6 +47,9 @@ extern int proc_douintvec(struct ctl_table *, int,
 			 void __user *, size_t *, loff_t *);
 extern int proc_dointvec_minmax(struct ctl_table *, int,
 				void __user *, size_t *, loff_t *);
+extern int proc_douintvec_minmax(struct ctl_table *table, int write,
+				 void __user *buffer, size_t *lenp,
+				 loff_t *ppos);
 extern int proc_dointvec_jiffies(struct ctl_table *, int,
 				 void __user *, size_t *, loff_t *);
 extern int proc_dointvec_userhz_jiffies(struct ctl_table *, int,
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index d12078fc215f..df9f2a367882 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2567,6 +2567,65 @@ int proc_dointvec_minmax(struct ctl_table *table, int write,
 				do_proc_dointvec_minmax_conv, &param);
 }
 
+struct do_proc_douintvec_minmax_conv_param {
+	unsigned int *min;
+	unsigned int *max;
+};
+
+static int do_proc_douintvec_minmax_conv(unsigned long *lvalp,
+					 unsigned int *valp,
+					 int write, void *data)
+{
+	struct do_proc_douintvec_minmax_conv_param *param = data;
+
+	if (write) {
+		unsigned int val = *lvalp;
+
+		if ((param->min && *param->min > val) ||
+		    (param->max && *param->max < val))
+			return -ERANGE;
+
+		if (*lvalp > UINT_MAX)
+			return -EINVAL;
+		*valp = val;
+	} else {
+		unsigned int val = *valp;
+		*lvalp = (unsigned long) val;
+	}
+
+	return 0;
+}
+
+/**
+ * proc_douintvec_minmax - read a vector of unsigned ints with min/max values
+ * @table: the sysctl table
+ * @write: %TRUE if this is a write to the sysctl file
+ * @buffer: the user buffer
+ * @lenp: the size of the user buffer
+ * @ppos: file position
+ *
+ * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
+ * values from/to the user buffer, treated as an ASCII string. Negative
+ * strings are not allowed.
+ *
+ * This routine will ensure the values are within the range specified by
+ * table->extra1 (min) and table->extra2 (max). There is a final sanity
+ * check for UINT_MAX to avoid having to support wrap around uses from
+ * userspace.
+ *
+ * Returns 0 on success.
+ */
+int proc_douintvec_minmax(struct ctl_table *table, int write,
+			  void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	struct do_proc_douintvec_minmax_conv_param param = {
+		.min = (unsigned int *) table->extra1,
+		.max = (unsigned int *) table->extra2,
+	};
+	return do_proc_douintvec(table, write, buffer, lenp, ppos,
+				 do_proc_douintvec_minmax_conv, &param);
+}
+
 static void validate_coredump_safety(void)
 {
 #ifdef CONFIG_COREDUMP
@@ -3066,6 +3125,12 @@ int proc_dointvec_minmax(struct ctl_table *table, int write,
 	return -ENOSYS;
 }
 
+int proc_douintvec_minmax(struct ctl_table *table, int write,
+			  void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	return -ENOSYS;
+}
+
 int proc_dointvec_jiffies(struct ctl_table *table, int write,
 		    void __user *buffer, size_t *lenp, loff_t *ppos)
 {
@@ -3108,6 +3173,7 @@ EXPORT_SYMBOL(proc_dointvec);
 EXPORT_SYMBOL(proc_douintvec);
 EXPORT_SYMBOL(proc_dointvec_jiffies);
 EXPORT_SYMBOL(proc_dointvec_minmax);
+EXPORT_SYMBOL_GPL(proc_douintvec_minmax);
 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
 EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
 EXPORT_SYMBOL(proc_dostring);
-- 
cgit v1.2.3


From 0791e3644e5ef21646fe565b9061788d05ec71d4 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Wed, 12 Jul 2017 14:34:28 -0700
Subject: kcmp: add KCMP_EPOLL_TFD mode to compare epoll target files

With current epoll architecture target files are addressed with
file_struct and file descriptor number, where the last is not unique.
Moreover files can be transferred from another process via unix socket,
added into queue and closed then so we won't find this descriptor in the
task fdinfo list.

Thus to checkpoint and restore such processes CRIU needs to find out
where exactly the target file is present to add it into epoll queue.
For this sake one can use kcmp call where some particular target file
from the queue is compared with arbitrary file passed as an argument.

Because epoll target files can have same file descriptor number but
different file_struct a caller should explicitly specify the offset
within.

To test if some particular file is matching entry inside epoll one have
to

 - fill kcmp_epoll_slot structure with epoll file descriptor,
   target file number and target file offset (in case if only
   one target is present then it should be 0)

 - call kcmp as kcmp(pid1, pid2, KCMP_EPOLL_TFD, fd, &kcmp_epoll_slot)
    - the kernel fetch file pointer matching file descriptor @fd of pid1
    - lookups for file struct in epoll queue of pid2 and returns traditional
      0,1,2 result for sorting purpose

Link: http://lkml.kernel.org/r/20170424154423.511592110@gmail.com
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Acked-by: Andrey Vagin <avagin@openvz.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Pavel Emelyanov <xemul@virtuozzo.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Jason Baron <jbaron@akamai.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/eventpoll.c            | 42 ++++++++++++++++++++++++++++++++++
 include/linux/eventpoll.h |  3 +++
 include/uapi/linux/kcmp.h | 10 +++++++++
 kernel/kcmp.c             | 57 +++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 112 insertions(+)

(limited to 'include/linux')

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 322904c3ebdf..e7e9901c3790 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1077,6 +1077,48 @@ static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd)
 	return epir;
 }
 
+static struct epitem *ep_find_tfd(struct eventpoll *ep, int tfd, unsigned long toff)
+{
+	struct rb_node *rbp;
+	struct epitem *epi;
+
+	for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
+		epi = rb_entry(rbp, struct epitem, rbn);
+		if (epi->ffd.fd == tfd) {
+			if (toff == 0)
+				return epi;
+			else
+				toff--;
+		}
+		cond_resched();
+	}
+
+	return NULL;
+}
+
+struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd,
+				     unsigned long toff)
+{
+	struct file *file_raw;
+	struct eventpoll *ep;
+	struct epitem *epi;
+
+	if (!is_file_epoll(file))
+		return ERR_PTR(-EINVAL);
+
+	ep = file->private_data;
+
+	mutex_lock(&ep->mtx);
+	epi = ep_find_tfd(ep, tfd, toff);
+	if (epi)
+		file_raw = epi->ffd.file;
+	else
+		file_raw = ERR_PTR(-ENOENT);
+	mutex_unlock(&ep->mtx);
+
+	return file_raw;
+}
+
 /*
  * This is the callback that is passed to the wait queue wakeup
  * mechanism. It is called by the stored file descriptors when they
diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h
index 6daf6d4971f6..d8625d214ea7 100644
--- a/include/linux/eventpoll.h
+++ b/include/linux/eventpoll.h
@@ -14,6 +14,7 @@
 #define _LINUX_EVENTPOLL_H
 
 #include <uapi/linux/eventpoll.h>
+#include <uapi/linux/kcmp.h>
 
 
 /* Forward declarations to avoid compiler errors */
@@ -22,6 +23,8 @@ struct file;
 
 #ifdef CONFIG_EPOLL
 
+struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd, unsigned long toff);
+
 /* Used to initialize the epoll bits inside the "struct file" */
 static inline void eventpoll_init_file(struct file *file)
 {
diff --git a/include/uapi/linux/kcmp.h b/include/uapi/linux/kcmp.h
index 84df14b37360..481e103da78e 100644
--- a/include/uapi/linux/kcmp.h
+++ b/include/uapi/linux/kcmp.h
@@ -1,6 +1,8 @@
 #ifndef _UAPI_LINUX_KCMP_H
 #define _UAPI_LINUX_KCMP_H
 
+#include <linux/types.h>
+
 /* Comparison type */
 enum kcmp_type {
 	KCMP_FILE,
@@ -10,8 +12,16 @@ enum kcmp_type {
 	KCMP_SIGHAND,
 	KCMP_IO,
 	KCMP_SYSVSEM,
+	KCMP_EPOLL_TFD,
 
 	KCMP_TYPES,
 };
 
+/* Slot for KCMP_EPOLL_TFD */
+struct kcmp_epoll_slot {
+	__u32 efd;		/* epoll file descriptor */
+	__u32 tfd;		/* target file number */
+	__u32 toff;		/* target offset within same numbered sequence */
+};
+
 #endif /* _UAPI_LINUX_KCMP_H */
diff --git a/kernel/kcmp.c b/kernel/kcmp.c
index 3a47fa998fe0..ea34ed8bb952 100644
--- a/kernel/kcmp.c
+++ b/kernel/kcmp.c
@@ -11,6 +11,10 @@
 #include <linux/bug.h>
 #include <linux/err.h>
 #include <linux/kcmp.h>
+#include <linux/capability.h>
+#include <linux/list.h>
+#include <linux/eventpoll.h>
+#include <linux/file.h>
 
 #include <asm/unistd.h>
 
@@ -94,6 +98,56 @@ static int kcmp_lock(struct mutex *m1, struct mutex *m2)
 	return err;
 }
 
+#ifdef CONFIG_EPOLL
+static int kcmp_epoll_target(struct task_struct *task1,
+			     struct task_struct *task2,
+			     unsigned long idx1,
+			     struct kcmp_epoll_slot __user *uslot)
+{
+	struct file *filp, *filp_epoll, *filp_tgt;
+	struct kcmp_epoll_slot slot;
+	struct files_struct *files;
+
+	if (copy_from_user(&slot, uslot, sizeof(slot)))
+		return -EFAULT;
+
+	filp = get_file_raw_ptr(task1, idx1);
+	if (!filp)
+		return -EBADF;
+
+	files = get_files_struct(task2);
+	if (!files)
+		return -EBADF;
+
+	spin_lock(&files->file_lock);
+	filp_epoll = fcheck_files(files, slot.efd);
+	if (filp_epoll)
+		get_file(filp_epoll);
+	else
+		filp_tgt = ERR_PTR(-EBADF);
+	spin_unlock(&files->file_lock);
+	put_files_struct(files);
+
+	if (filp_epoll) {
+		filp_tgt = get_epoll_tfile_raw_ptr(filp_epoll, slot.tfd, slot.toff);
+		fput(filp_epoll);
+	} else
+
+	if (IS_ERR(filp_tgt))
+		return PTR_ERR(filp_tgt);
+
+	return kcmp_ptr(filp, filp_tgt, KCMP_FILE);
+}
+#else
+static int kcmp_epoll_target(struct task_struct *task1,
+			     struct task_struct *task2,
+			     unsigned long idx1,
+			     struct kcmp_epoll_slot __user *uslot)
+{
+	return -EOPNOTSUPP;
+}
+#endif
+
 SYSCALL_DEFINE5(kcmp, pid_t, pid1, pid_t, pid2, int, type,
 		unsigned long, idx1, unsigned long, idx2)
 {
@@ -165,6 +219,9 @@ SYSCALL_DEFINE5(kcmp, pid_t, pid1, pid_t, pid2, int, type,
 		ret = -EOPNOTSUPP;
 #endif
 		break;
+	case KCMP_EPOLL_TFD:
+		ret = kcmp_epoll_target(task1, task2, idx1, (void *)idx2);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
-- 
cgit v1.2.3


From 92ef6da3d06ff551a86de41ae37df9cc4b58d7a0 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Wed, 12 Jul 2017 14:34:31 -0700
Subject: kcmp: fs/epoll: wrap kcmp code with CONFIG_CHECKPOINT_RESTORE

kcmp syscall is build iif CONFIG_CHECKPOINT_RESTORE is selected, so wrap
appropriate helpers in epoll code with the config to build it
conditionally.

Link: http://lkml.kernel.org/r/20170513083456.GG1881@uranus.lan
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Reported-by: Andrew Morton <akpm@linuxfoundation.org>
Cc: Andrey Vagin <avagin@openvz.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Pavel Emelyanov <xemul@virtuozzo.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Jason Baron <jbaron@akamai.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/eventpoll.c            | 2 ++
 include/linux/eventpoll.h | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index e7e9901c3790..e767e4389cb1 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1077,6 +1077,7 @@ static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd)
 	return epir;
 }
 
+#ifdef CONFIG_CHECKPOINT_RESTORE
 static struct epitem *ep_find_tfd(struct eventpoll *ep, int tfd, unsigned long toff)
 {
 	struct rb_node *rbp;
@@ -1118,6 +1119,7 @@ struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd,
 
 	return file_raw;
 }
+#endif /* CONFIG_CHECKPOINT_RESTORE */
 
 /*
  * This is the callback that is passed to the wait queue wakeup
diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h
index d8625d214ea7..2f14ac73d01d 100644
--- a/include/linux/eventpoll.h
+++ b/include/linux/eventpoll.h
@@ -23,7 +23,9 @@ struct file;
 
 #ifdef CONFIG_EPOLL
 
+#ifdef CONFIG_CHECKPOINT_RESTORE
 struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd, unsigned long toff);
+#endif
 
 /* Used to initialize the epoll bits inside the "struct file" */
 static inline void eventpoll_init_file(struct file *file)
-- 
cgit v1.2.3


From e41d58185f1444368873d4d7422f7664a68be61d Mon Sep 17 00:00:00 2001
From: Dmitry Vyukov <dvyukov@google.com>
Date: Wed, 12 Jul 2017 14:34:35 -0700
Subject: fault-inject: support systematic fault injection

Add /proc/self/task/<current-tid>/fail-nth file that allows failing
0-th, 1-st, 2-nd and so on calls systematically.
Excerpt from the added documentation:

 "Write to this file of integer N makes N-th call in the current task
  fail (N is 0-based). Read from this file returns a single char 'Y' or
  'N' that says if the fault setup with a previous write to this file
  was injected or not, and disables the fault if it wasn't yet injected.
  Note that this file enables all types of faults (slab, futex, etc).
  This setting takes precedence over all other generic settings like
  probability, interval, times, etc. But per-capability settings (e.g.
  fail_futex/ignore-private) take precedence over it. This feature is
  intended for systematic testing of faults in a single system call. See
  an example below"

Why add a new setting:
1. Existing settings are global rather than per-task.
   So parallel testing is not possible.
2. attr->interval is close but it depends on attr->count
   which is non reset to 0, so interval does not work as expected.
3. Trying to model this with existing settings requires manipulations
   of all of probability, interval, times, space, task-filter and
   unexposed count and per-task make-it-fail files.
4. Existing settings are per-failure-type, and the set of failure
   types is potentially expanding.
5. make-it-fail can't be changed by unprivileged user and aggressive
   stress testing better be done from an unprivileged user.
   Similarly, this would require opening the debugfs files to the
   unprivileged user, as he would need to reopen at least times file
   (not possible to pre-open before dropping privs).

The proposed interface solves all of the above (see the example).

We want to integrate this into syzkaller fuzzer.  A prototype has found
10 bugs in kernel in first day of usage:

  https://groups.google.com/forum/#!searchin/syzkaller/%22FAULT_INJECTION%22%7Csort:relevance

I've made the current interface work with all types of our sandboxes.
For setuid the secret sauce was prctl(PR_SET_DUMPABLE, 1, 0, 0, 0) to
make /proc entries non-root owned.  So I am fine with the current
version of the code.

[akpm@linux-foundation.org: fix build]
Link: http://lkml.kernel.org/r/20170328130128.101773-1-dvyukov@google.com
Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
Cc: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Michal Hocko <mhocko@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/fault-injection/fault-injection.txt | 78 +++++++++++++++++++++++
 fs/proc/base.c                                    | 52 +++++++++++++++
 include/linux/sched.h                             |  1 +
 kernel/fork.c                                     |  4 ++
 lib/fault-inject.c                                |  7 ++
 5 files changed, 142 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/fault-injection/fault-injection.txt b/Documentation/fault-injection/fault-injection.txt
index 415484f3d59a..192d8cbcc5f9 100644
--- a/Documentation/fault-injection/fault-injection.txt
+++ b/Documentation/fault-injection/fault-injection.txt
@@ -134,6 +134,22 @@ use the boot option:
 	fail_futex=
 	mmc_core.fail_request=<interval>,<probability>,<space>,<times>
 
+o proc entries
+
+- /proc/self/task/<current-tid>/fail-nth:
+
+	Write to this file of integer N makes N-th call in the current task fail
+	(N is 0-based). Read from this file returns a single char 'Y' or 'N'
+	that says if the fault setup with a previous write to this file was
+	injected or not, and disables the fault if it wasn't yet injected.
+	Note that this file enables all types of faults (slab, futex, etc).
+	This setting takes precedence over all other generic debugfs settings
+	like probability, interval, times, etc. But per-capability settings
+	(e.g. fail_futex/ignore-private) take precedence over it.
+
+	This feature is intended for systematic testing of faults in a single
+	system call. See an example below.
+
 How to add new fault injection capability
 -----------------------------------------
 
@@ -278,3 +294,65 @@ allocation failure.
 	# env FAILCMD_TYPE=fail_page_alloc \
 		./tools/testing/fault-injection/failcmd.sh --times=100 \
                 -- make -C tools/testing/selftests/ run_tests
+
+Systematic faults using fail-nth
+---------------------------------
+
+The following code systematically faults 0-th, 1-st, 2-nd and so on
+capabilities in the socketpair() system call.
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/syscall.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+
+int main()
+{
+	int i, err, res, fail_nth, fds[2];
+	char buf[128];
+
+	system("echo N > /sys/kernel/debug/failslab/ignore-gfp-wait");
+	sprintf(buf, "/proc/self/task/%ld/fail-nth", syscall(SYS_gettid));
+	fail_nth = open(buf, O_RDWR);
+	for (i = 0;; i++) {
+		sprintf(buf, "%d", i);
+		write(fail_nth, buf, strlen(buf));
+		res = socketpair(AF_LOCAL, SOCK_STREAM, 0, fds);
+		err = errno;
+		read(fail_nth, buf, 1);
+		if (res == 0) {
+			close(fds[0]);
+			close(fds[1]);
+		}
+		printf("%d-th fault %c: res=%d/%d\n", i, buf[0], res, err);
+		if (buf[0] != 'Y')
+			break;
+	}
+	return 0;
+}
+
+An example output:
+
+0-th fault Y: res=-1/23
+1-th fault Y: res=-1/23
+2-th fault Y: res=-1/23
+3-th fault Y: res=-1/12
+4-th fault Y: res=-1/12
+5-th fault Y: res=-1/23
+6-th fault Y: res=-1/23
+7-th fault Y: res=-1/23
+8-th fault Y: res=-1/12
+9-th fault Y: res=-1/12
+10-th fault Y: res=-1/12
+11-th fault Y: res=-1/12
+12-th fault Y: res=-1/12
+13-th fault Y: res=-1/12
+14-th fault Y: res=-1/12
+15-th fault Y: res=-1/12
+16-th fault N: res=0/12
diff --git a/fs/proc/base.c b/fs/proc/base.c
index f1e1927ccd48..88b773f318cd 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1355,6 +1355,53 @@ static const struct file_operations proc_fault_inject_operations = {
 	.write		= proc_fault_inject_write,
 	.llseek		= generic_file_llseek,
 };
+
+static ssize_t proc_fail_nth_write(struct file *file, const char __user *buf,
+				   size_t count, loff_t *ppos)
+{
+	struct task_struct *task;
+	int err, n;
+
+	task = get_proc_task(file_inode(file));
+	if (!task)
+		return -ESRCH;
+	put_task_struct(task);
+	if (task != current)
+		return -EPERM;
+	err = kstrtoint_from_user(buf, count, 10, &n);
+	if (err)
+		return err;
+	if (n < 0 || n == INT_MAX)
+		return -EINVAL;
+	current->fail_nth = n + 1;
+	return count;
+}
+
+static ssize_t proc_fail_nth_read(struct file *file, char __user *buf,
+				  size_t count, loff_t *ppos)
+{
+	struct task_struct *task;
+	int err;
+
+	task = get_proc_task(file_inode(file));
+	if (!task)
+		return -ESRCH;
+	put_task_struct(task);
+	if (task != current)
+		return -EPERM;
+	if (count < 1)
+		return -EINVAL;
+	err = put_user((char)(current->fail_nth ? 'N' : 'Y'), buf);
+	if (err)
+		return err;
+	current->fail_nth = 0;
+	return 1;
+}
+
+static const struct file_operations proc_fail_nth_operations = {
+	.read		= proc_fail_nth_read,
+	.write		= proc_fail_nth_write,
+};
 #endif
 
 
@@ -3311,6 +3358,11 @@ static const struct pid_entry tid_base_stuff[] = {
 #endif
 #ifdef CONFIG_FAULT_INJECTION
 	REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
+	/*
+	 * Operations on the file check that the task is current,
+	 * so we create it with 0666 to support testing under unprivileged user.
+	 */
+	REG("fail-nth", 0666, proc_fail_nth_operations),
 #endif
 #ifdef CONFIG_TASK_IO_ACCOUNTING
 	ONE("io",	S_IRUSR, proc_tid_io_accounting),
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 20814b7d7d70..3822d749fc9e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -974,6 +974,7 @@ struct task_struct {
 
 #ifdef CONFIG_FAULT_INJECTION
 	int				make_it_fail;
+	int fail_nth;
 #endif
 	/*
 	 * When (nr_dirtied >= nr_dirtied_pause), it's time to call
diff --git a/kernel/fork.c b/kernel/fork.c
index d2b9d7c31eaf..ade237a96308 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -573,6 +573,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 
 	kcov_task_init(tsk);
 
+#ifdef CONFIG_FAULT_INJECTION
+	tsk->fail_nth = 0;
+#endif
+
 	return tsk;
 
 free_stack:
diff --git a/lib/fault-inject.c b/lib/fault-inject.c
index 4ff157159a0d..09ac73c177fd 100644
--- a/lib/fault-inject.c
+++ b/lib/fault-inject.c
@@ -107,6 +107,12 @@ static inline bool fail_stacktrace(struct fault_attr *attr)
 
 bool should_fail(struct fault_attr *attr, ssize_t size)
 {
+	if (in_task() && current->fail_nth) {
+		if (--current->fail_nth == 0)
+			goto fail;
+		return false;
+	}
+
 	/* No need to check any other properties if the probability is 0 */
 	if (attr->probability == 0)
 		return false;
@@ -134,6 +140,7 @@ bool should_fail(struct fault_attr *attr, ssize_t size)
 	if (!fail_stacktrace(attr))
 		return false;
 
+fail:
 	fail_dump(attr);
 
 	if (atomic_read(&attr->times) != -1)
-- 
cgit v1.2.3


From 1a23395672658969a4035dcc518ea6cab835c579 Mon Sep 17 00:00:00 2001
From: Manfred Spraul <manfred@colorfullife.com>
Date: Wed, 12 Jul 2017 14:34:38 -0700
Subject: ipc/sem.c: remove sem_base, embed struct sem

sma->sem_base is initialized with

	sma->sem_base = (struct sem *) &sma[1];

The current code has four problems:
 - There is an unnecessary pointer dereference - sem_base is not needed.
 - Alignment for struct sem only works by chance.
 - The current code causes false positive for static code analysis.
 - This is a cast between different non-void types, which the future
   randstruct GCC plugin warns on.

And, as bonus, the code size gets smaller:

  Before:
    0 .text         00003770
  After:
    0 .text         0000374e

[manfred@colorfullife.com: s/[0]/[]/, per hch]
  Link: http://lkml.kernel.org/r/20170525185107.12869-2-manfred@colorfullife.com
Link: http://lkml.kernel.org/r/20170515171912.6298-2-manfred@colorfullife.com
Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: <1vier1@web.de>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Fabian Frederick <fabf@skynet.be>
Cc: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sem.h | 22 +++++++++++++-
 ipc/sem.c           | 88 +++++++++++++++++++++--------------------------------
 2 files changed, 55 insertions(+), 55 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sem.h b/include/linux/sem.h
index 9edec926e9d9..9db14093b73c 100644
--- a/include/linux/sem.h
+++ b/include/linux/sem.h
@@ -8,11 +8,29 @@
 
 struct task_struct;
 
+/* One semaphore structure for each semaphore in the system. */
+struct sem {
+	int	semval;		/* current value */
+	/*
+	 * PID of the process that last modified the semaphore. For
+	 * Linux, specifically these are:
+	 *  - semop
+	 *  - semctl, via SETVAL and SETALL.
+	 *  - at task exit when performing undo adjustments (see exit_sem).
+	 */
+	int	sempid;
+	spinlock_t	lock;	/* spinlock for fine-grained semtimedop */
+	struct list_head pending_alter; /* pending single-sop operations */
+					/* that alter the semaphore */
+	struct list_head pending_const; /* pending single-sop operations */
+					/* that do not alter the semaphore*/
+	time_t	sem_otime;	/* candidate for sem_otime */
+} ____cacheline_aligned_in_smp;
+
 /* One sem_array data structure for each set of semaphores in the system. */
 struct sem_array {
 	struct kern_ipc_perm	sem_perm;	/* permissions .. see ipc.h */
 	time_t			sem_ctime;	/* last change time */
-	struct sem		*sem_base;	/* ptr to first semaphore in array */
 	struct list_head	pending_alter;	/* pending operations */
 						/* that alter the array */
 	struct list_head	pending_const;	/* pending complex operations */
@@ -21,6 +39,8 @@ struct sem_array {
 	int			sem_nsems;	/* no. of semaphores in array */
 	int			complex_count;	/* pending complex operations */
 	unsigned int		use_global_lock;/* >0: global lock required */
+
+	struct sem		sems[];
 };
 
 #ifdef CONFIG_SYSVIPC
diff --git a/ipc/sem.c b/ipc/sem.c
index 947dc2348271..fff8337ebab3 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -87,24 +87,6 @@
 #include <linux/uaccess.h>
 #include "util.h"
 
-/* One semaphore structure for each semaphore in the system. */
-struct sem {
-	int	semval;		/* current value */
-	/*
-	 * PID of the process that last modified the semaphore. For
-	 * Linux, specifically these are:
-	 *  - semop
-	 *  - semctl, via SETVAL and SETALL.
-	 *  - at task exit when performing undo adjustments (see exit_sem).
-	 */
-	int	sempid;
-	spinlock_t	lock;	/* spinlock for fine-grained semtimedop */
-	struct list_head pending_alter; /* pending single-sop operations */
-					/* that alter the semaphore */
-	struct list_head pending_const; /* pending single-sop operations */
-					/* that do not alter the semaphore*/
-	time_t	sem_otime;	/* candidate for sem_otime */
-} ____cacheline_aligned_in_smp;
 
 /* One queue for each sleeping process in the system. */
 struct sem_queue {
@@ -175,7 +157,7 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it);
  *	sem_array.sem_undo
  *
  * b) global or semaphore sem_lock() for read/write:
- *	sem_array.sem_base[i].pending_{const,alter}:
+ *	sem_array.sems[i].pending_{const,alter}:
  *
  * c) special:
  *	sem_undo_list.list_proc:
@@ -250,7 +232,7 @@ static void unmerge_queues(struct sem_array *sma)
 	 */
 	list_for_each_entry_safe(q, tq, &sma->pending_alter, list) {
 		struct sem *curr;
-		curr = &sma->sem_base[q->sops[0].sem_num];
+		curr = &sma->sems[q->sops[0].sem_num];
 
 		list_add_tail(&q->list, &curr->pending_alter);
 	}
@@ -270,7 +252,7 @@ static void merge_queues(struct sem_array *sma)
 {
 	int i;
 	for (i = 0; i < sma->sem_nsems; i++) {
-		struct sem *sem = sma->sem_base + i;
+		struct sem *sem = &sma->sems[i];
 
 		list_splice_init(&sem->pending_alter, &sma->pending_alter);
 	}
@@ -306,7 +288,7 @@ static void complexmode_enter(struct sem_array *sma)
 	sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS;
 
 	for (i = 0; i < sma->sem_nsems; i++) {
-		sem = sma->sem_base + i;
+		sem = &sma->sems[i];
 		spin_lock(&sem->lock);
 		spin_unlock(&sem->lock);
 	}
@@ -366,7 +348,7 @@ static inline int sem_lock(struct sem_array *sma, struct sembuf *sops,
 	 *
 	 * Both facts are tracked by use_global_mode.
 	 */
-	sem = sma->sem_base + sops->sem_num;
+	sem = &sma->sems[sops->sem_num];
 
 	/*
 	 * Initial check for use_global_lock. Just an optimization,
@@ -421,7 +403,7 @@ static inline void sem_unlock(struct sem_array *sma, int locknum)
 		complexmode_tryleave(sma);
 		ipc_unlock_object(&sma->sem_perm);
 	} else {
-		struct sem *sem = sma->sem_base + locknum;
+		struct sem *sem = &sma->sems[locknum];
 		spin_unlock(&sem->lock);
 	}
 }
@@ -487,7 +469,7 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
 	if (ns->used_sems + nsems > ns->sc_semmns)
 		return -ENOSPC;
 
-	size = sizeof(*sma) + nsems * sizeof(struct sem);
+	size = sizeof(*sma) + nsems * sizeof(sma->sems[0]);
 	sma = ipc_rcu_alloc(size);
 	if (!sma)
 		return -ENOMEM;
@@ -504,12 +486,10 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
 		return retval;
 	}
 
-	sma->sem_base = (struct sem *) &sma[1];
-
 	for (i = 0; i < nsems; i++) {
-		INIT_LIST_HEAD(&sma->sem_base[i].pending_alter);
-		INIT_LIST_HEAD(&sma->sem_base[i].pending_const);
-		spin_lock_init(&sma->sem_base[i].lock);
+		INIT_LIST_HEAD(&sma->sems[i].pending_alter);
+		INIT_LIST_HEAD(&sma->sems[i].pending_const);
+		spin_lock_init(&sma->sems[i].lock);
 	}
 
 	sma->complex_count = 0;
@@ -612,7 +592,7 @@ static int perform_atomic_semop_slow(struct sem_array *sma, struct sem_queue *q)
 	un = q->undo;
 
 	for (sop = sops; sop < sops + nsops; sop++) {
-		curr = sma->sem_base + sop->sem_num;
+		curr = &sma->sems[sop->sem_num];
 		sem_op = sop->sem_op;
 		result = curr->semval;
 
@@ -639,7 +619,7 @@ static int perform_atomic_semop_slow(struct sem_array *sma, struct sem_queue *q)
 	sop--;
 	pid = q->pid;
 	while (sop >= sops) {
-		sma->sem_base[sop->sem_num].sempid = pid;
+		sma->sems[sop->sem_num].sempid = pid;
 		sop--;
 	}
 
@@ -661,7 +641,7 @@ undo:
 	sop--;
 	while (sop >= sops) {
 		sem_op = sop->sem_op;
-		sma->sem_base[sop->sem_num].semval -= sem_op;
+		sma->sems[sop->sem_num].semval -= sem_op;
 		if (sop->sem_flg & SEM_UNDO)
 			un->semadj[sop->sem_num] += sem_op;
 		sop--;
@@ -692,7 +672,7 @@ static int perform_atomic_semop(struct sem_array *sma, struct sem_queue *q)
 	 * until the operations can go through.
 	 */
 	for (sop = sops; sop < sops + nsops; sop++) {
-		curr = sma->sem_base + sop->sem_num;
+		curr = &sma->sems[sop->sem_num];
 		sem_op = sop->sem_op;
 		result = curr->semval;
 
@@ -716,7 +696,7 @@ static int perform_atomic_semop(struct sem_array *sma, struct sem_queue *q)
 	}
 
 	for (sop = sops; sop < sops + nsops; sop++) {
-		curr = sma->sem_base + sop->sem_num;
+		curr = &sma->sems[sop->sem_num];
 		sem_op = sop->sem_op;
 		result = curr->semval;
 
@@ -815,7 +795,7 @@ static int wake_const_ops(struct sem_array *sma, int semnum,
 	if (semnum == -1)
 		pending_list = &sma->pending_const;
 	else
-		pending_list = &sma->sem_base[semnum].pending_const;
+		pending_list = &sma->sems[semnum].pending_const;
 
 	list_for_each_entry_safe(q, tmp, pending_list, list) {
 		int error = perform_atomic_semop(sma, q);
@@ -856,7 +836,7 @@ static int do_smart_wakeup_zero(struct sem_array *sma, struct sembuf *sops,
 		for (i = 0; i < nsops; i++) {
 			int num = sops[i].sem_num;
 
-			if (sma->sem_base[num].semval == 0) {
+			if (sma->sems[num].semval == 0) {
 				got_zero = 1;
 				semop_completed |= wake_const_ops(sma, num, wake_q);
 			}
@@ -867,7 +847,7 @@ static int do_smart_wakeup_zero(struct sem_array *sma, struct sembuf *sops,
 		 * Assume all were changed.
 		 */
 		for (i = 0; i < sma->sem_nsems; i++) {
-			if (sma->sem_base[i].semval == 0) {
+			if (sma->sems[i].semval == 0) {
 				got_zero = 1;
 				semop_completed |= wake_const_ops(sma, i, wake_q);
 			}
@@ -909,7 +889,7 @@ static int update_queue(struct sem_array *sma, int semnum, struct wake_q_head *w
 	if (semnum == -1)
 		pending_list = &sma->pending_alter;
 	else
-		pending_list = &sma->sem_base[semnum].pending_alter;
+		pending_list = &sma->sems[semnum].pending_alter;
 
 again:
 	list_for_each_entry_safe(q, tmp, pending_list, list) {
@@ -922,7 +902,7 @@ again:
 		 * be in the  per semaphore pending queue, and decrements
 		 * cannot be successful if the value is already 0.
 		 */
-		if (semnum != -1 && sma->sem_base[semnum].semval == 0)
+		if (semnum != -1 && sma->sems[semnum].semval == 0)
 			break;
 
 		error = perform_atomic_semop(sma, q);
@@ -959,9 +939,9 @@ again:
 static void set_semotime(struct sem_array *sma, struct sembuf *sops)
 {
 	if (sops == NULL) {
-		sma->sem_base[0].sem_otime = get_seconds();
+		sma->sems[0].sem_otime = get_seconds();
 	} else {
-		sma->sem_base[sops[0].sem_num].sem_otime =
+		sma->sems[sops[0].sem_num].sem_otime =
 							get_seconds();
 	}
 }
@@ -1067,9 +1047,9 @@ static int count_semcnt(struct sem_array *sma, ushort semnum,
 	semcnt = 0;
 	/* First: check the simple operations. They are easy to evaluate */
 	if (count_zero)
-		l = &sma->sem_base[semnum].pending_const;
+		l = &sma->sems[semnum].pending_const;
 	else
-		l = &sma->sem_base[semnum].pending_alter;
+		l = &sma->sems[semnum].pending_alter;
 
 	list_for_each_entry(q, l, list) {
 		/* all task on a per-semaphore list sleep on exactly
@@ -1124,7 +1104,7 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 		wake_up_sem_queue_prepare(q, -EIDRM, &wake_q);
 	}
 	for (i = 0; i < sma->sem_nsems; i++) {
-		struct sem *sem = sma->sem_base + i;
+		struct sem *sem = &sma->sems[i];
 		list_for_each_entry_safe(q, tq, &sem->pending_const, list) {
 			unlink_queue(sma, q);
 			wake_up_sem_queue_prepare(q, -EIDRM, &wake_q);
@@ -1174,9 +1154,9 @@ static time_t get_semotime(struct sem_array *sma)
 	int i;
 	time_t res;
 
-	res = sma->sem_base[0].sem_otime;
+	res = sma->sems[0].sem_otime;
 	for (i = 1; i < sma->sem_nsems; i++) {
-		time_t to = sma->sem_base[i].sem_otime;
+		time_t to = sma->sems[i].sem_otime;
 
 		if (to > res)
 			res = to;
@@ -1325,7 +1305,7 @@ static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum,
 		return -EIDRM;
 	}
 
-	curr = &sma->sem_base[semnum];
+	curr = &sma->sems[semnum];
 
 	ipc_assert_locked_object(&sma->sem_perm);
 	list_for_each_entry(un, &sma->list_id, list_id)
@@ -1402,7 +1382,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
 			}
 		}
 		for (i = 0; i < sma->sem_nsems; i++)
-			sem_io[i] = sma->sem_base[i].semval;
+			sem_io[i] = sma->sems[i].semval;
 		sem_unlock(sma, -1);
 		rcu_read_unlock();
 		err = 0;
@@ -1450,8 +1430,8 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
 		}
 
 		for (i = 0; i < nsems; i++) {
-			sma->sem_base[i].semval = sem_io[i];
-			sma->sem_base[i].sempid = task_tgid_vnr(current);
+			sma->sems[i].semval = sem_io[i];
+			sma->sems[i].sempid = task_tgid_vnr(current);
 		}
 
 		ipc_assert_locked_object(&sma->sem_perm);
@@ -1476,7 +1456,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
 		err = -EIDRM;
 		goto out_unlock;
 	}
-	curr = &sma->sem_base[semnum];
+	curr = &sma->sems[semnum];
 
 	switch (cmd) {
 	case GETVAL:
@@ -1932,7 +1912,7 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
 	 */
 	if (nsops == 1) {
 		struct sem *curr;
-		curr = &sma->sem_base[sops->sem_num];
+		curr = &sma->sems[sops->sem_num];
 
 		if (alter) {
 			if (sma->complex_count) {
@@ -2146,7 +2126,7 @@ void exit_sem(struct task_struct *tsk)
 
 		/* perform adjustments registered in un */
 		for (i = 0; i < sma->sem_nsems; i++) {
-			struct sem *semaphore = &sma->sem_base[i];
+			struct sem *semaphore = &sma->sems[i];
 			if (un->semadj[i]) {
 				semaphore->semval += un->semadj[i];
 				/*
-- 
cgit v1.2.3


From dba4cdd39e698d8dcdad0656825423052ac90ccd Mon Sep 17 00:00:00 2001
From: Manfred Spraul <manfred@colorfullife.com>
Date: Wed, 12 Jul 2017 14:34:41 -0700
Subject: ipc: merge ipc_rcu and kern_ipc_perm

ipc has two management structures that exist for every id:
 - struct kern_ipc_perm, it contains e.g. the permissions.
 - struct ipc_rcu, it contains the rcu head for rcu handling and the
   refcount.

The patch merges both structures.

As a bonus, we may save one cacheline, because both structures are
cacheline aligned.  In addition, it reduces the number of casts, instead
most codepaths can use container_of.

To simplify code, the ipc_rcu_alloc initializes the allocation to 0.

[manfred@colorfullife.com: really include the memset() into ipc_alloc_rcu()]
  Link: http://lkml.kernel.org/r/564f8612-0601-b267-514f-a9f650ec9b32@colorfullife.com
Link: http://lkml.kernel.org/r/20170525185107.12869-3-manfred@colorfullife.com
Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Kees Cook <keescook@chromium.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ipc.h |  3 +++
 ipc/msg.c           | 19 +++++++++++--------
 ipc/sem.c           | 34 +++++++++++++++++-----------------
 ipc/shm.c           | 18 +++++++++++-------
 ipc/util.c          | 35 +++++++++++++++++------------------
 ipc/util.h          | 18 +++++++-----------
 6 files changed, 66 insertions(+), 61 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipc.h b/include/linux/ipc.h
index 71fd92d81b26..5591f055e13f 100644
--- a/include/linux/ipc.h
+++ b/include/linux/ipc.h
@@ -20,6 +20,9 @@ struct kern_ipc_perm {
 	umode_t		mode;
 	unsigned long	seq;
 	void		*security;
+
+	struct rcu_head rcu;
+	atomic_t refcount;
 } ____cacheline_aligned_in_smp;
 
 #endif /* _LINUX_IPC_H */
diff --git a/ipc/msg.c b/ipc/msg.c
index 104926dc72be..0ed7dae7d4e8 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -97,8 +97,8 @@ static inline void msg_rmid(struct ipc_namespace *ns, struct msg_queue *s)
 
 static void msg_rcu_free(struct rcu_head *head)
 {
-	struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
-	struct msg_queue *msq = ipc_rcu_to_struct(p);
+	struct kern_ipc_perm *p = container_of(head, struct kern_ipc_perm, rcu);
+	struct msg_queue *msq = container_of(p, struct msg_queue, q_perm);
 
 	security_msg_queue_free(msq);
 	ipc_rcu_free(head);
@@ -118,7 +118,10 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params)
 	key_t key = params->key;
 	int msgflg = params->flg;
 
-	msq = ipc_rcu_alloc(sizeof(*msq));
+	BUILD_BUG_ON(offsetof(struct msg_queue, q_perm) != 0);
+
+	msq = container_of(ipc_rcu_alloc(sizeof(*msq)), struct msg_queue,
+				q_perm);
 	if (!msq)
 		return -ENOMEM;
 
@@ -128,7 +131,7 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params)
 	msq->q_perm.security = NULL;
 	retval = security_msg_queue_alloc(msq);
 	if (retval) {
-		ipc_rcu_putref(msq, ipc_rcu_free);
+		ipc_rcu_putref(&msq->q_perm, ipc_rcu_free);
 		return retval;
 	}
 
@@ -144,7 +147,7 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params)
 	/* ipc_addid() locks msq upon success. */
 	id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni);
 	if (id < 0) {
-		ipc_rcu_putref(msq, msg_rcu_free);
+		ipc_rcu_putref(&msq->q_perm, msg_rcu_free);
 		return id;
 	}
 
@@ -249,7 +252,7 @@ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 		free_msg(msg);
 	}
 	atomic_sub(msq->q_cbytes, &ns->msg_bytes);
-	ipc_rcu_putref(msq, msg_rcu_free);
+	ipc_rcu_putref(&msq->q_perm, msg_rcu_free);
 }
 
 /*
@@ -688,7 +691,7 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
 		/* enqueue the sender and prepare to block */
 		ss_add(msq, &s, msgsz);
 
-		if (!ipc_rcu_getref(msq)) {
+		if (!ipc_rcu_getref(&msq->q_perm)) {
 			err = -EIDRM;
 			goto out_unlock0;
 		}
@@ -700,7 +703,7 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
 		rcu_read_lock();
 		ipc_lock_object(&msq->q_perm);
 
-		ipc_rcu_putref(msq, msg_rcu_free);
+		ipc_rcu_putref(&msq->q_perm, msg_rcu_free);
 		/* raced with RMID? */
 		if (!ipc_valid_object(&msq->q_perm)) {
 			err = -EIDRM;
diff --git a/ipc/sem.c b/ipc/sem.c
index fff8337ebab3..bdff6d93d2c7 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -260,8 +260,8 @@ static void merge_queues(struct sem_array *sma)
 
 static void sem_rcu_free(struct rcu_head *head)
 {
-	struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
-	struct sem_array *sma = ipc_rcu_to_struct(p);
+	struct kern_ipc_perm *p = container_of(head, struct kern_ipc_perm, rcu);
+	struct sem_array *sma = container_of(p, struct sem_array, sem_perm);
 
 	security_sem_free(sma);
 	ipc_rcu_free(head);
@@ -438,7 +438,7 @@ static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns
 static inline void sem_lock_and_putref(struct sem_array *sma)
 {
 	sem_lock(sma, NULL, -1);
-	ipc_rcu_putref(sma, sem_rcu_free);
+	ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
 }
 
 static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
@@ -469,20 +469,20 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
 	if (ns->used_sems + nsems > ns->sc_semmns)
 		return -ENOSPC;
 
+	BUILD_BUG_ON(offsetof(struct sem_array, sem_perm) != 0);
+
 	size = sizeof(*sma) + nsems * sizeof(sma->sems[0]);
-	sma = ipc_rcu_alloc(size);
+	sma = container_of(ipc_rcu_alloc(size), struct sem_array, sem_perm);
 	if (!sma)
 		return -ENOMEM;
 
-	memset(sma, 0, size);
-
 	sma->sem_perm.mode = (semflg & S_IRWXUGO);
 	sma->sem_perm.key = key;
 
 	sma->sem_perm.security = NULL;
 	retval = security_sem_alloc(sma);
 	if (retval) {
-		ipc_rcu_putref(sma, ipc_rcu_free);
+		ipc_rcu_putref(&sma->sem_perm, ipc_rcu_free);
 		return retval;
 	}
 
@@ -502,7 +502,7 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
 
 	id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni);
 	if (id < 0) {
-		ipc_rcu_putref(sma, sem_rcu_free);
+		ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
 		return id;
 	}
 	ns->used_sems += nsems;
@@ -1122,7 +1122,7 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 
 	wake_up_q(&wake_q);
 	ns->used_sems -= sma->sem_nsems;
-	ipc_rcu_putref(sma, sem_rcu_free);
+	ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
 }
 
 static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version)
@@ -1362,7 +1362,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
 			goto out_unlock;
 		}
 		if (nsems > SEMMSL_FAST) {
-			if (!ipc_rcu_getref(sma)) {
+			if (!ipc_rcu_getref(&sma->sem_perm)) {
 				err = -EIDRM;
 				goto out_unlock;
 			}
@@ -1370,7 +1370,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
 			rcu_read_unlock();
 			sem_io = ipc_alloc(sizeof(ushort)*nsems);
 			if (sem_io == NULL) {
-				ipc_rcu_putref(sma, sem_rcu_free);
+				ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
 				return -ENOMEM;
 			}
 
@@ -1395,7 +1395,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
 		int i;
 		struct sem_undo *un;
 
-		if (!ipc_rcu_getref(sma)) {
+		if (!ipc_rcu_getref(&sma->sem_perm)) {
 			err = -EIDRM;
 			goto out_rcu_wakeup;
 		}
@@ -1404,20 +1404,20 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
 		if (nsems > SEMMSL_FAST) {
 			sem_io = ipc_alloc(sizeof(ushort)*nsems);
 			if (sem_io == NULL) {
-				ipc_rcu_putref(sma, sem_rcu_free);
+				ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
 				return -ENOMEM;
 			}
 		}
 
 		if (copy_from_user(sem_io, p, nsems*sizeof(ushort))) {
-			ipc_rcu_putref(sma, sem_rcu_free);
+			ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
 			err = -EFAULT;
 			goto out_free;
 		}
 
 		for (i = 0; i < nsems; i++) {
 			if (sem_io[i] > SEMVMX) {
-				ipc_rcu_putref(sma, sem_rcu_free);
+				ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
 				err = -ERANGE;
 				goto out_free;
 			}
@@ -1699,7 +1699,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
 	}
 
 	nsems = sma->sem_nsems;
-	if (!ipc_rcu_getref(sma)) {
+	if (!ipc_rcu_getref(&sma->sem_perm)) {
 		rcu_read_unlock();
 		un = ERR_PTR(-EIDRM);
 		goto out;
@@ -1709,7 +1709,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
 	/* step 2: allocate new undo structure */
 	new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);
 	if (!new) {
-		ipc_rcu_putref(sma, sem_rcu_free);
+		ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
 		return ERR_PTR(-ENOMEM);
 	}
 
diff --git a/ipc/shm.c b/ipc/shm.c
index f45c7959b264..5ef6d31a52c5 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -174,9 +174,10 @@ static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp)
 
 static void shm_rcu_free(struct rcu_head *head)
 {
-	struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
-	struct shmid_kernel *shp = ipc_rcu_to_struct(p);
-
+	struct kern_ipc_perm *ptr = container_of(head, struct kern_ipc_perm,
+							rcu);
+	struct shmid_kernel *shp = container_of(ptr, struct shmid_kernel,
+							shm_perm);
 	security_shm_free(shp);
 	ipc_rcu_free(head);
 }
@@ -241,7 +242,7 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
 		user_shm_unlock(i_size_read(file_inode(shm_file)),
 				shp->mlock_user);
 	fput(shm_file);
-	ipc_rcu_putref(shp, shm_rcu_free);
+	ipc_rcu_putref(&shp->shm_perm, shm_rcu_free);
 }
 
 /*
@@ -542,7 +543,10 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
 			ns->shm_tot + numpages > ns->shm_ctlall)
 		return -ENOSPC;
 
-	shp = ipc_rcu_alloc(sizeof(*shp));
+	BUILD_BUG_ON(offsetof(struct shmid_kernel, shm_perm) != 0);
+
+	shp = container_of(ipc_rcu_alloc(sizeof(*shp)), struct shmid_kernel,
+				shm_perm);
 	if (!shp)
 		return -ENOMEM;
 
@@ -553,7 +557,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
 	shp->shm_perm.security = NULL;
 	error = security_shm_alloc(shp);
 	if (error) {
-		ipc_rcu_putref(shp, ipc_rcu_free);
+		ipc_rcu_putref(&shp->shm_perm, ipc_rcu_free);
 		return error;
 	}
 
@@ -624,7 +628,7 @@ no_id:
 		user_shm_unlock(size, shp->mlock_user);
 	fput(file);
 no_file:
-	ipc_rcu_putref(shp, shm_rcu_free);
+	ipc_rcu_putref(&shp->shm_perm, shm_rcu_free);
 	return error;
 }
 
diff --git a/ipc/util.c b/ipc/util.c
index caec7b1bfaa3..5d1ff1035efe 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -418,46 +418,45 @@ void ipc_free(void *ptr)
 }
 
 /**
- * ipc_rcu_alloc - allocate ipc and rcu space
+ * ipc_rcu_alloc - allocate ipc space
  * @size: size desired
  *
- * Allocate memory for the rcu header structure +  the object.
- * Returns the pointer to the object or NULL upon failure.
+ * Allocate memory for an ipc object.
+ * The first member must be struct kern_ipc_perm.
  */
-void *ipc_rcu_alloc(int size)
+struct kern_ipc_perm *ipc_rcu_alloc(int size)
 {
 	/*
 	 * We prepend the allocation with the rcu struct
 	 */
-	struct ipc_rcu *out = ipc_alloc(sizeof(struct ipc_rcu) + size);
+	struct kern_ipc_perm *out = ipc_alloc(size);
 	if (unlikely(!out))
 		return NULL;
+
+	memset(out, 0, size);
 	atomic_set(&out->refcount, 1);
-	return out + 1;
+	return out;
 }
 
-int ipc_rcu_getref(void *ptr)
+int ipc_rcu_getref(struct kern_ipc_perm *ptr)
 {
-	struct ipc_rcu *p = ((struct ipc_rcu *)ptr) - 1;
-
-	return atomic_inc_not_zero(&p->refcount);
+	return atomic_inc_not_zero(&ptr->refcount);
 }
 
-void ipc_rcu_putref(void *ptr, void (*func)(struct rcu_head *head))
+void ipc_rcu_putref(struct kern_ipc_perm *ptr,
+			void (*func)(struct rcu_head *head))
 {
-	struct ipc_rcu *p = ((struct ipc_rcu *)ptr) - 1;
-
-	if (!atomic_dec_and_test(&p->refcount))
+	if (!atomic_dec_and_test(&ptr->refcount))
 		return;
 
-	call_rcu(&p->rcu, func);
+	call_rcu(&ptr->rcu, func);
 }
 
-void ipc_rcu_free(struct rcu_head *head)
+void ipc_rcu_free(struct rcu_head *h)
 {
-	struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
+	struct kern_ipc_perm *ptr = container_of(h, struct kern_ipc_perm, rcu);
 
-	kvfree(p);
+	kvfree(ptr);
 }
 
 /**
diff --git a/ipc/util.h b/ipc/util.h
index 60ddccca464d..09d0f918c3e2 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -47,13 +47,6 @@ static inline void msg_exit_ns(struct ipc_namespace *ns) { }
 static inline void shm_exit_ns(struct ipc_namespace *ns) { }
 #endif
 
-struct ipc_rcu {
-	struct rcu_head rcu;
-	atomic_t refcount;
-} ____cacheline_aligned_in_smp;
-
-#define ipc_rcu_to_struct(p)  ((void *)(p+1))
-
 /*
  * Structure that holds the parameters needed by the ipc operations
  * (see after)
@@ -125,11 +118,14 @@ void ipc_free(void *ptr);
  * Objects are reference counted, they start with reference count 1.
  * getref increases the refcount, the putref call that reduces the recount
  * to 0 schedules the rcu destruction. Caller must guarantee locking.
+ *
+ * struct kern_ipc_perm must be the first member in the allocated structure.
  */
-void *ipc_rcu_alloc(int size);
-int ipc_rcu_getref(void *ptr);
-void ipc_rcu_putref(void *ptr, void (*func)(struct rcu_head *head));
-void ipc_rcu_free(struct rcu_head *head);
+struct kern_ipc_perm *ipc_rcu_alloc(int size);
+int ipc_rcu_getref(struct kern_ipc_perm *ptr);
+void ipc_rcu_putref(struct kern_ipc_perm *ptr,
+			void (*func)(struct rcu_head *head));
+void ipc_rcu_free(struct rcu_head *h);
 
 struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int);
 struct kern_ipc_perm *ipc_obtain_object_idr(struct ipc_ids *ids, int id);
-- 
cgit v1.2.3


From 2cd648c110b5570c3280bd645797658cabbe5f5c Mon Sep 17 00:00:00 2001
From: Manfred Spraul <manfred@colorfullife.com>
Date: Wed, 12 Jul 2017 14:34:44 -0700
Subject: include/linux/sem.h: correctly document sem_ctime

sem_ctime is initialized to the semget() time and then updated at every
semctl() that changes the array.

Thus it does not represent the time of the last change.

Especially, semop() calls are only stored in sem_otime, not in
sem_ctime.

This is already described in ipc/sem.c, I just overlooked that there is
a comment in include/linux/sem.h and man semctl(2) as well.

So: Correct wrong comments.

Link: http://lkml.kernel.org/r/20170515171912.6298-4-manfred@colorfullife.com
Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: <1vier1@web.de>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Fabian Frederick <fabf@skynet.be>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sem.h      | 2 +-
 include/uapi/linux/sem.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sem.h b/include/linux/sem.h
index 9db14093b73c..be5cf2ea14ad 100644
--- a/include/linux/sem.h
+++ b/include/linux/sem.h
@@ -30,7 +30,7 @@ struct sem {
 /* One sem_array data structure for each set of semaphores in the system. */
 struct sem_array {
 	struct kern_ipc_perm	sem_perm;	/* permissions .. see ipc.h */
-	time_t			sem_ctime;	/* last change time */
+	time_t			sem_ctime;	/* create/last semctl() time */
 	struct list_head	pending_alter;	/* pending operations */
 						/* that alter the array */
 	struct list_head	pending_const;	/* pending complex operations */
diff --git a/include/uapi/linux/sem.h b/include/uapi/linux/sem.h
index dd73b908b2f3..67eb90361692 100644
--- a/include/uapi/linux/sem.h
+++ b/include/uapi/linux/sem.h
@@ -23,7 +23,7 @@
 struct semid_ds {
 	struct ipc_perm	sem_perm;		/* permissions .. see ipc.h */
 	__kernel_time_t	sem_otime;		/* last semop time */
-	__kernel_time_t	sem_ctime;		/* last change time */
+	__kernel_time_t	sem_ctime;		/* create/last semctl() time */
 	struct sem	*sem_base;		/* ptr to first semaphore in array */
 	struct sem_queue *sem_pending;		/* pending operations to be processed */
 	struct sem_queue **sem_pending_last;	/* last pending operation */
-- 
cgit v1.2.3


From 24bb44612c5f93a1dff1f7e71b7b7b109a988791 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Wed, 12 Jul 2017 14:35:40 -0700
Subject: kernel/watchdog: remove unused declaration

Patch series "Improve watchdog config for arch watchdogs", v4.

A series to make the hardlockup watchdog more easily replaceable by arch
code.  The last patch provides some justification for why we want to do
this (existing sparc watchdog is another that could benefit).

This patch (of 5):

Remove unused declaration.

Link: http://lkml.kernel.org/r/20170616065715.18390-2-npiggin@gmail.com
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Don Zickus <dzickus@redhat.com>
Reviewed-by: Babu Moger <babu.moger@oracle.com>
Tested-by: Babu Moger <babu.moger@oracle.com>	[sparc]
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/nmi.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index aa3cd0878270..5e2e57536d98 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -12,9 +12,6 @@ extern void touch_softlockup_watchdog_sched(void);
 extern void touch_softlockup_watchdog(void);
 extern void touch_softlockup_watchdog_sync(void);
 extern void touch_all_softlockup_watchdogs(void);
-extern int proc_dowatchdog_thresh(struct ctl_table *table, int write,
-				  void __user *buffer,
-				  size_t *lenp, loff_t *ppos);
 extern unsigned int  softlockup_panic;
 extern unsigned int  hardlockup_panic;
 void lockup_detector_init(void);
-- 
cgit v1.2.3


From f2e0cff85ed111a3cf24d894c3fa11697dfae628 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Wed, 12 Jul 2017 14:35:43 -0700
Subject: kernel/watchdog: introduce arch_touch_nmi_watchdog()

For architectures that define HAVE_NMI_WATCHDOG, instead of having them
provide the complete touch_nmi_watchdog() function, just have them
provide arch_touch_nmi_watchdog().

This gives the generic code more flexibility in implementing this
function, and arch implementations don't miss out on touching the
softlockup watchdog or other generic details.

Link: http://lkml.kernel.org/r/20170616065715.18390-3-npiggin@gmail.com
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Don Zickus <dzickus@redhat.com>
Reviewed-by: Babu Moger <babu.moger@oracle.com>
Tested-by: Babu Moger <babu.moger@oracle.com>	[sparc]
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/blackfin/include/asm/nmi.h            |  2 ++
 arch/blackfin/kernel/nmi.c                 |  2 +-
 arch/mn10300/include/asm/nmi.h             |  2 ++
 arch/mn10300/kernel/mn10300-watchdog-low.S |  8 ++++----
 arch/mn10300/kernel/mn10300-watchdog.c     |  2 +-
 arch/sparc/include/asm/nmi.h               |  1 +
 arch/sparc/kernel/nmi.c                    |  6 ++----
 include/linux/nmi.h                        | 27 ++++++++++++++++-----------
 kernel/watchdog_hld.c                      |  5 ++---
 9 files changed, 31 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/arch/blackfin/include/asm/nmi.h b/arch/blackfin/include/asm/nmi.h
index b9caac4fcfd8..107d23705f46 100644
--- a/arch/blackfin/include/asm/nmi.h
+++ b/arch/blackfin/include/asm/nmi.h
@@ -9,4 +9,6 @@
 
 #include <linux/nmi.h>
 
+extern void arch_touch_nmi_watchdog(void);
+
 #endif
diff --git a/arch/blackfin/kernel/nmi.c b/arch/blackfin/kernel/nmi.c
index 633c37083e87..1e714329fe8a 100644
--- a/arch/blackfin/kernel/nmi.c
+++ b/arch/blackfin/kernel/nmi.c
@@ -190,7 +190,7 @@ static int __init init_nmi_wdt(void)
 }
 device_initcall(init_nmi_wdt);
 
-void touch_nmi_watchdog(void)
+void arch_touch_nmi_watchdog(void)
 {
 	atomic_set(&nmi_touched[smp_processor_id()], 1);
 }
diff --git a/arch/mn10300/include/asm/nmi.h b/arch/mn10300/include/asm/nmi.h
index f3671cbbc117..b05627597b1b 100644
--- a/arch/mn10300/include/asm/nmi.h
+++ b/arch/mn10300/include/asm/nmi.h
@@ -11,4 +11,6 @@
 #ifndef _ASM_NMI_H
 #define _ASM_NMI_H
 
+extern void arch_touch_nmi_watchdog(void);
+
 #endif /* _ASM_NMI_H */
diff --git a/arch/mn10300/kernel/mn10300-watchdog-low.S b/arch/mn10300/kernel/mn10300-watchdog-low.S
index f2f5c9cfaabd..34f8773de7d0 100644
--- a/arch/mn10300/kernel/mn10300-watchdog-low.S
+++ b/arch/mn10300/kernel/mn10300-watchdog-low.S
@@ -50,9 +50,9 @@ watchdog_handler:
 #   we can't inline it)
 #
 ###############################################################################
-	.globl	touch_nmi_watchdog
-	.type	touch_nmi_watchdog,@function
-touch_nmi_watchdog:
+	.globl	arch_touch_nmi_watchdog
+	.type	arch_touch_nmi_watchdog,@function
+arch_touch_nmi_watchdog:
 	clr	d0
 	clr	d1
 	mov	watchdog_alert_counter, a0
@@ -63,4 +63,4 @@ touch_nmi_watchdog:
 	lne
 	ret	[],0
 
-	.size	touch_nmi_watchdog,.-touch_nmi_watchdog
+	.size	arch_touch_nmi_watchdog,.-arch_touch_nmi_watchdog
diff --git a/arch/mn10300/kernel/mn10300-watchdog.c b/arch/mn10300/kernel/mn10300-watchdog.c
index a2d8e6938d67..0d5641beadf5 100644
--- a/arch/mn10300/kernel/mn10300-watchdog.c
+++ b/arch/mn10300/kernel/mn10300-watchdog.c
@@ -31,7 +31,7 @@ static unsigned int watchdog;
 static unsigned int watchdog_hz = 1;
 unsigned int watchdog_alert_counter[NR_CPUS];
 
-EXPORT_SYMBOL(touch_nmi_watchdog);
+EXPORT_SYMBOL(arch_touch_nmi_watchdog);
 
 /*
  * the best way to detect whether a CPU has a 'hard lockup' problem
diff --git a/arch/sparc/include/asm/nmi.h b/arch/sparc/include/asm/nmi.h
index 26ad2b2607c6..284eac3ffaf2 100644
--- a/arch/sparc/include/asm/nmi.h
+++ b/arch/sparc/include/asm/nmi.h
@@ -7,6 +7,7 @@ void nmi_adjust_hz(unsigned int new_hz);
 
 extern atomic_t nmi_active;
 
+void arch_touch_nmi_watchdog(void);
 void start_nmi_watchdog(void *unused);
 void stop_nmi_watchdog(void *unused);
 
diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c
index 95e73c63c99d..048ad783ea3f 100644
--- a/arch/sparc/kernel/nmi.c
+++ b/arch/sparc/kernel/nmi.c
@@ -51,7 +51,7 @@ static DEFINE_PER_CPU(unsigned int, last_irq_sum);
 static DEFINE_PER_CPU(long, alert_counter);
 static DEFINE_PER_CPU(int, nmi_touch);
 
-void touch_nmi_watchdog(void)
+void arch_touch_nmi_watchdog(void)
 {
 	if (atomic_read(&nmi_active)) {
 		int cpu;
@@ -61,10 +61,8 @@ void touch_nmi_watchdog(void)
 				per_cpu(nmi_touch, cpu) = 1;
 		}
 	}
-
-	touch_softlockup_watchdog();
 }
-EXPORT_SYMBOL(touch_nmi_watchdog);
+EXPORT_SYMBOL(arch_touch_nmi_watchdog);
 
 static void die_nmi(const char *str, struct pt_regs *regs, int do_panic)
 {
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 5e2e57536d98..bd387ef8bccd 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -6,6 +6,9 @@
 
 #include <linux/sched.h>
 #include <asm/irq.h>
+#if defined(CONFIG_HAVE_NMI_WATCHDOG)
+#include <asm/nmi.h>
+#endif
 
 #ifdef CONFIG_LOCKUP_DETECTOR
 extern void touch_softlockup_watchdog_sched(void);
@@ -58,6 +61,18 @@ static inline void reset_hung_task_detector(void)
 #define NMI_WATCHDOG_ENABLED      (1 << NMI_WATCHDOG_ENABLED_BIT)
 #define SOFT_WATCHDOG_ENABLED     (1 << SOFT_WATCHDOG_ENABLED_BIT)
 
+#if defined(CONFIG_HARDLOCKUP_DETECTOR)
+extern void hardlockup_detector_disable(void);
+#else
+static inline void hardlockup_detector_disable(void) {}
+#endif
+
+#if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_NMI_WATCHDOG)
+extern void arch_touch_nmi_watchdog(void);
+#else
+static inline void arch_touch_nmi_watchdog(void) {}
+#endif
+
 /**
  * touch_nmi_watchdog - restart NMI watchdog timeout.
  * 
@@ -65,21 +80,11 @@ static inline void reset_hung_task_detector(void)
  * may be used to reset the timeout - for code which intentionally
  * disables interrupts for a long time. This call is stateless.
  */
-#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
-#include <asm/nmi.h>
-extern void touch_nmi_watchdog(void);
-#else
 static inline void touch_nmi_watchdog(void)
 {
+	arch_touch_nmi_watchdog();
 	touch_softlockup_watchdog();
 }
-#endif
-
-#if defined(CONFIG_HARDLOCKUP_DETECTOR)
-extern void hardlockup_detector_disable(void);
-#else
-static inline void hardlockup_detector_disable(void) {}
-#endif
 
 /*
  * Create trigger_all_cpu_backtrace() out of the arch-provided
diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c
index 54a427d1f344..90d688df6ce1 100644
--- a/kernel/watchdog_hld.c
+++ b/kernel/watchdog_hld.c
@@ -56,7 +56,7 @@ static int __init hardlockup_panic_setup(char *str)
 }
 __setup("nmi_watchdog=", hardlockup_panic_setup);
 
-void touch_nmi_watchdog(void)
+void arch_touch_nmi_watchdog(void)
 {
 	/*
 	 * Using __raw here because some code paths have
@@ -66,9 +66,8 @@ void touch_nmi_watchdog(void)
 	 * going off.
 	 */
 	raw_cpu_write(watchdog_nmi_touch, true);
-	touch_softlockup_watchdog();
 }
-EXPORT_SYMBOL(touch_nmi_watchdog);
+EXPORT_SYMBOL(arch_touch_nmi_watchdog);
 
 static struct perf_event_attr wd_hw_attr = {
 	.type		= PERF_TYPE_HARDWARE,
-- 
cgit v1.2.3


From 05a4a95279311c3a4633b4277a5d21cfd616c6c7 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Wed, 12 Jul 2017 14:35:46 -0700
Subject: kernel/watchdog: split up config options

Split SOFTLOCKUP_DETECTOR from LOCKUP_DETECTOR, and split
HARDLOCKUP_DETECTOR_PERF from HARDLOCKUP_DETECTOR.

LOCKUP_DETECTOR implies the general boot, sysctl, and programming
interfaces for the lockup detectors.

An architecture that wants to use a hard lockup detector must define
HAVE_HARDLOCKUP_DETECTOR_PERF or HAVE_HARDLOCKUP_DETECTOR_ARCH.

Alternatively an arch can define HAVE_NMI_WATCHDOG, which provides the
minimum arch_touch_nmi_watchdog, and it otherwise does its own thing and
does not implement the LOCKUP_DETECTOR interfaces.

sparc is unusual in that it has started to implement some of the
interfaces, but not fully yet.  It should probably be converted to a full
HAVE_HARDLOCKUP_DETECTOR_ARCH.

[npiggin@gmail.com: fix]
  Link: http://lkml.kernel.org/r/20170617223522.66c0ad88@roar.ozlabs.ibm.com
Link: http://lkml.kernel.org/r/20170616065715.18390-4-npiggin@gmail.com
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Don Zickus <dzickus@redhat.com>
Reviewed-by: Babu Moger <babu.moger@oracle.com>
Tested-by: Babu Moger <babu.moger@oracle.com>	[sparc]
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/Kconfig                   |  25 ++++-
 arch/powerpc/Kconfig           |   1 +
 arch/powerpc/kernel/setup_64.c |   2 +-
 arch/x86/Kconfig               |   1 +
 arch/x86/kernel/apic/hw_nmi.c  |   2 +-
 include/linux/nmi.h            |  29 +++--
 kernel/Makefile                |   2 +-
 kernel/sysctl.c                |  31 +++---
 kernel/watchdog.c              | 243 +++++++++++++++++++++++++++--------------
 kernel/watchdog_hld.c          |  32 ------
 lib/Kconfig.debug              |  45 +++++---
 11 files changed, 251 insertions(+), 162 deletions(-)

(limited to 'include/linux')

diff --git a/arch/Kconfig b/arch/Kconfig
index cae0958a2298..fb9bd7d36b05 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -198,9 +198,6 @@ config HAVE_KPROBES_ON_FTRACE
 config HAVE_NMI
 	bool
 
-config HAVE_NMI_WATCHDOG
-	depends on HAVE_NMI
-	bool
 #
 # An arch should select this if it provides all these things:
 #
@@ -288,6 +285,28 @@ config HAVE_PERF_EVENTS_NMI
 	  subsystem.  Also has support for calculating CPU cycle events
 	  to determine how many clock cycles in a given period.
 
+config HAVE_HARDLOCKUP_DETECTOR_PERF
+	bool
+	depends on HAVE_PERF_EVENTS_NMI
+	help
+	  The arch chooses to use the generic perf-NMI-based hardlockup
+	  detector. Must define HAVE_PERF_EVENTS_NMI.
+
+config HAVE_NMI_WATCHDOG
+	depends on HAVE_NMI
+	bool
+	help
+	  The arch provides a low level NMI watchdog. It provides
+	  asm/nmi.h, and defines its own arch_touch_nmi_watchdog().
+
+config HAVE_HARDLOCKUP_DETECTOR_ARCH
+	bool
+	select HAVE_NMI_WATCHDOG
+	help
+	  The arch chooses to provide its own hardlockup detector, which is
+	  a superset of the HAVE_NMI_WATCHDOG. It also conforms to config
+	  interfaces and parameters provided by hardlockup detector subsystem.
+
 config HAVE_PERF_REGS
 	bool
 	help
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 7177a3f4f418..63ed758e1d20 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -197,6 +197,7 @@ config PPC
 	select HAVE_OPTPROBES			if PPC64
 	select HAVE_PERF_EVENTS
 	select HAVE_PERF_EVENTS_NMI		if PPC64
+	select HAVE_HARDLOCKUP_DETECTOR_PERF	if HAVE_PERF_EVENTS_NMI
 	select HAVE_PERF_REGS
 	select HAVE_PERF_USER_STACK_DUMP
 	select HAVE_RCU_TABLE_FREE		if SMP
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 4640f6d64f8b..074a075a9cdb 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -752,7 +752,7 @@ struct ppc_pci_io ppc_pci_io;
 EXPORT_SYMBOL(ppc_pci_io);
 #endif
 
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF
 u64 hw_nmi_get_sample_period(int watchdog_thresh)
 {
 	return ppc_proc_freq * watchdog_thresh;
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 94a18681353d..3d2b8ce54e00 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -162,6 +162,7 @@ config X86
 	select HAVE_PCSPKR_PLATFORM
 	select HAVE_PERF_EVENTS
 	select HAVE_PERF_EVENTS_NMI
+	select HAVE_HARDLOCKUP_DETECTOR_PERF	if HAVE_PERF_EVENTS_NMI
 	select HAVE_PERF_REGS
 	select HAVE_PERF_USER_STACK_DUMP
 	select HAVE_REGS_AND_STACK_ACCESS_API
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index c73c9fb281e1..d6f387780849 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -19,7 +19,7 @@
 #include <linux/init.h>
 #include <linux/delay.h>
 
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF
 u64 hw_nmi_get_sample_period(int watchdog_thresh)
 {
 	return (u64)(cpu_khz) * 1000 * watchdog_thresh;
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index bd387ef8bccd..8aa01fd859fb 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -11,13 +11,21 @@
 #endif
 
 #ifdef CONFIG_LOCKUP_DETECTOR
+void lockup_detector_init(void);
+#else
+static inline void lockup_detector_init(void)
+{
+}
+#endif
+
+#ifdef CONFIG_SOFTLOCKUP_DETECTOR
 extern void touch_softlockup_watchdog_sched(void);
 extern void touch_softlockup_watchdog(void);
 extern void touch_softlockup_watchdog_sync(void);
 extern void touch_all_softlockup_watchdogs(void);
 extern unsigned int  softlockup_panic;
-extern unsigned int  hardlockup_panic;
-void lockup_detector_init(void);
+extern int soft_watchdog_enabled;
+extern atomic_t watchdog_park_in_progress;
 #else
 static inline void touch_softlockup_watchdog_sched(void)
 {
@@ -31,9 +39,6 @@ static inline void touch_softlockup_watchdog_sync(void)
 static inline void touch_all_softlockup_watchdogs(void)
 {
 }
-static inline void lockup_detector_init(void)
-{
-}
 #endif
 
 #ifdef CONFIG_DETECT_HUNG_TASK
@@ -63,15 +68,18 @@ static inline void reset_hung_task_detector(void)
 
 #if defined(CONFIG_HARDLOCKUP_DETECTOR)
 extern void hardlockup_detector_disable(void);
+extern unsigned int hardlockup_panic;
 #else
 static inline void hardlockup_detector_disable(void) {}
 #endif
 
-#if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_NMI_WATCHDOG)
+#if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF)
 extern void arch_touch_nmi_watchdog(void);
 #else
+#if !defined(CONFIG_HAVE_NMI_WATCHDOG)
 static inline void arch_touch_nmi_watchdog(void) {}
 #endif
+#endif
 
 /**
  * touch_nmi_watchdog - restart NMI watchdog timeout.
@@ -141,15 +149,18 @@ static inline bool trigger_single_cpu_backtrace(int cpu)
 }
 #endif
 
-#ifdef CONFIG_LOCKUP_DETECTOR
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF
 u64 hw_nmi_get_sample_period(int watchdog_thresh);
+#endif
+
+#ifdef CONFIG_LOCKUP_DETECTOR
 extern int nmi_watchdog_enabled;
-extern int soft_watchdog_enabled;
 extern int watchdog_user_enabled;
 extern int watchdog_thresh;
 extern unsigned long watchdog_enabled;
+extern struct cpumask watchdog_cpumask;
 extern unsigned long *watchdog_cpumask_bits;
-extern atomic_t watchdog_park_in_progress;
+extern int __read_mostly watchdog_suspended;
 #ifdef CONFIG_SMP
 extern int sysctl_softlockup_all_cpu_backtrace;
 extern int sysctl_hardlockup_all_cpu_backtrace;
diff --git a/kernel/Makefile b/kernel/Makefile
index 72aa080f91f0..4cb8e8b23c6e 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -82,7 +82,7 @@ obj-$(CONFIG_KPROBES) += kprobes.o
 obj-$(CONFIG_KGDB) += debug/
 obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
 obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o
-obj-$(CONFIG_HARDLOCKUP_DETECTOR) += watchdog_hld.o
+obj-$(CONFIG_HARDLOCKUP_DETECTOR_PERF) += watchdog_hld.o
 obj-$(CONFIG_SECCOMP) += seccomp.o
 obj-$(CONFIG_RELAY) += relay.o
 obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index df9f2a367882..6648fbbb8157 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -900,6 +900,14 @@ static struct ctl_table kern_table[] = {
 		.extra2		= &zero,
 #endif
 	},
+	{
+		.procname	= "watchdog_cpumask",
+		.data		= &watchdog_cpumask_bits,
+		.maxlen		= NR_CPUS,
+		.mode		= 0644,
+		.proc_handler	= proc_watchdog_cpumask,
+	},
+#ifdef CONFIG_SOFTLOCKUP_DETECTOR
 	{
 		.procname       = "soft_watchdog",
 		.data           = &soft_watchdog_enabled,
@@ -909,13 +917,6 @@ static struct ctl_table kern_table[] = {
 		.extra1		= &zero,
 		.extra2		= &one,
 	},
-	{
-		.procname	= "watchdog_cpumask",
-		.data		= &watchdog_cpumask_bits,
-		.maxlen		= NR_CPUS,
-		.mode		= 0644,
-		.proc_handler	= proc_watchdog_cpumask,
-	},
 	{
 		.procname	= "softlockup_panic",
 		.data		= &softlockup_panic,
@@ -925,27 +926,29 @@ static struct ctl_table kern_table[] = {
 		.extra1		= &zero,
 		.extra2		= &one,
 	},
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
+#ifdef CONFIG_SMP
 	{
-		.procname	= "hardlockup_panic",
-		.data		= &hardlockup_panic,
+		.procname	= "softlockup_all_cpu_backtrace",
+		.data		= &sysctl_softlockup_all_cpu_backtrace,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
 		.extra1		= &zero,
 		.extra2		= &one,
 	},
+#endif /* CONFIG_SMP */
 #endif
-#ifdef CONFIG_SMP
+#ifdef CONFIG_HARDLOCKUP_DETECTOR
 	{
-		.procname	= "softlockup_all_cpu_backtrace",
-		.data		= &sysctl_softlockup_all_cpu_backtrace,
+		.procname	= "hardlockup_panic",
+		.data		= &hardlockup_panic,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
 		.extra1		= &zero,
 		.extra2		= &one,
 	},
+#ifdef CONFIG_SMP
 	{
 		.procname	= "hardlockup_all_cpu_backtrace",
 		.data		= &sysctl_hardlockup_all_cpu_backtrace,
@@ -957,6 +960,8 @@ static struct ctl_table kern_table[] = {
 	},
 #endif /* CONFIG_SMP */
 #endif
+#endif
+
 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
 	{
 		.procname       = "unknown_nmi_panic",
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 03e0b69bb5bf..1fba9c3d66dc 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -29,15 +29,58 @@
 #include <linux/kvm_para.h>
 #include <linux/kthread.h>
 
+/* Watchdog configuration */
 static DEFINE_MUTEX(watchdog_proc_mutex);
 
-#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
-unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED;
+int __read_mostly nmi_watchdog_enabled;
+
+#if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_NMI_WATCHDOG)
+unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED |
+						NMI_WATCHDOG_ENABLED;
 #else
 unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED;
 #endif
-int __read_mostly nmi_watchdog_enabled;
+
+#ifdef CONFIG_HARDLOCKUP_DETECTOR
+/* boot commands */
+/*
+ * Should we panic when a soft-lockup or hard-lockup occurs:
+ */
+unsigned int __read_mostly hardlockup_panic =
+			CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
+/*
+ * We may not want to enable hard lockup detection by default in all cases,
+ * for example when running the kernel as a guest on a hypervisor. In these
+ * cases this function can be called to disable hard lockup detection. This
+ * function should only be executed once by the boot processor before the
+ * kernel command line parameters are parsed, because otherwise it is not
+ * possible to override this in hardlockup_panic_setup().
+ */
+void hardlockup_detector_disable(void)
+{
+	watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
+}
+
+static int __init hardlockup_panic_setup(char *str)
+{
+	if (!strncmp(str, "panic", 5))
+		hardlockup_panic = 1;
+	else if (!strncmp(str, "nopanic", 7))
+		hardlockup_panic = 0;
+	else if (!strncmp(str, "0", 1))
+		watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
+	else if (!strncmp(str, "1", 1))
+		watchdog_enabled |= NMI_WATCHDOG_ENABLED;
+	return 1;
+}
+__setup("nmi_watchdog=", hardlockup_panic_setup);
+
+#endif
+
+#ifdef CONFIG_SOFTLOCKUP_DETECTOR
 int __read_mostly soft_watchdog_enabled;
+#endif
+
 int __read_mostly watchdog_user_enabled;
 int __read_mostly watchdog_thresh = 10;
 
@@ -45,15 +88,9 @@ int __read_mostly watchdog_thresh = 10;
 int __read_mostly sysctl_softlockup_all_cpu_backtrace;
 int __read_mostly sysctl_hardlockup_all_cpu_backtrace;
 #endif
-static struct cpumask watchdog_cpumask __read_mostly;
+struct cpumask watchdog_cpumask __read_mostly;
 unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
 
-/* Helper for online, unparked cpus. */
-#define for_each_watchdog_cpu(cpu) \
-	for_each_cpu_and((cpu), cpu_online_mask, &watchdog_cpumask)
-
-atomic_t watchdog_park_in_progress = ATOMIC_INIT(0);
-
 /*
  * The 'watchdog_running' variable is set to 1 when the watchdog threads
  * are registered/started and is set to 0 when the watchdog threads are
@@ -72,7 +109,27 @@ static int __read_mostly watchdog_running;
  * of 'watchdog_running' cannot change while the watchdog is deactivated
  * temporarily (see related code in 'proc' handlers).
  */
-static int __read_mostly watchdog_suspended;
+int __read_mostly watchdog_suspended;
+
+/*
+ * These functions can be overridden if an architecture implements its
+ * own hardlockup detector.
+ */
+int __weak watchdog_nmi_enable(unsigned int cpu)
+{
+	return 0;
+}
+void __weak watchdog_nmi_disable(unsigned int cpu)
+{
+}
+
+#ifdef CONFIG_SOFTLOCKUP_DETECTOR
+
+/* Helper for online, unparked cpus. */
+#define for_each_watchdog_cpu(cpu) \
+	for_each_cpu_and((cpu), cpu_online_mask, &watchdog_cpumask)
+
+atomic_t watchdog_park_in_progress = ATOMIC_INIT(0);
 
 static u64 __read_mostly sample_period;
 
@@ -120,6 +177,7 @@ static int __init softlockup_all_cpu_backtrace_setup(char *str)
 	return 1;
 }
 __setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup);
+#ifdef CONFIG_HARDLOCKUP_DETECTOR
 static int __init hardlockup_all_cpu_backtrace_setup(char *str)
 {
 	sysctl_hardlockup_all_cpu_backtrace =
@@ -128,6 +186,7 @@ static int __init hardlockup_all_cpu_backtrace_setup(char *str)
 }
 __setup("hardlockup_all_cpu_backtrace=", hardlockup_all_cpu_backtrace_setup);
 #endif
+#endif
 
 /*
  * Hard-lockup warnings should be triggered after just a few seconds. Soft-
@@ -213,18 +272,6 @@ void touch_softlockup_watchdog_sync(void)
 	__this_cpu_write(watchdog_touch_ts, 0);
 }
 
-/* watchdog detector functions */
-bool is_hardlockup(void)
-{
-	unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
-
-	if (__this_cpu_read(hrtimer_interrupts_saved) == hrint)
-		return true;
-
-	__this_cpu_write(hrtimer_interrupts_saved, hrint);
-	return false;
-}
-
 static int is_softlockup(unsigned long touch_ts)
 {
 	unsigned long now = get_timestamp();
@@ -237,21 +284,21 @@ static int is_softlockup(unsigned long touch_ts)
 	return 0;
 }
 
-static void watchdog_interrupt_count(void)
+/* watchdog detector functions */
+bool is_hardlockup(void)
 {
-	__this_cpu_inc(hrtimer_interrupts);
-}
+	unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
 
-/*
- * These two functions are mostly architecture specific
- * defining them as weak here.
- */
-int __weak watchdog_nmi_enable(unsigned int cpu)
-{
-	return 0;
+	if (__this_cpu_read(hrtimer_interrupts_saved) == hrint)
+		return true;
+
+	__this_cpu_write(hrtimer_interrupts_saved, hrint);
+	return false;
 }
-void __weak watchdog_nmi_disable(unsigned int cpu)
+
+static void watchdog_interrupt_count(void)
 {
+	__this_cpu_inc(hrtimer_interrupts);
 }
 
 static int watchdog_enable_all_cpus(void);
@@ -502,57 +549,6 @@ static void watchdog_unpark_threads(void)
 		kthread_unpark(per_cpu(softlockup_watchdog, cpu));
 }
 
-/*
- * Suspend the hard and soft lockup detector by parking the watchdog threads.
- */
-int lockup_detector_suspend(void)
-{
-	int ret = 0;
-
-	get_online_cpus();
-	mutex_lock(&watchdog_proc_mutex);
-	/*
-	 * Multiple suspend requests can be active in parallel (counted by
-	 * the 'watchdog_suspended' variable). If the watchdog threads are
-	 * running, the first caller takes care that they will be parked.
-	 * The state of 'watchdog_running' cannot change while a suspend
-	 * request is active (see related code in 'proc' handlers).
-	 */
-	if (watchdog_running && !watchdog_suspended)
-		ret = watchdog_park_threads();
-
-	if (ret == 0)
-		watchdog_suspended++;
-	else {
-		watchdog_disable_all_cpus();
-		pr_err("Failed to suspend lockup detectors, disabled\n");
-		watchdog_enabled = 0;
-	}
-
-	mutex_unlock(&watchdog_proc_mutex);
-
-	return ret;
-}
-
-/*
- * Resume the hard and soft lockup detector by unparking the watchdog threads.
- */
-void lockup_detector_resume(void)
-{
-	mutex_lock(&watchdog_proc_mutex);
-
-	watchdog_suspended--;
-	/*
-	 * The watchdog threads are unparked if they were previously running
-	 * and if there is no more active suspend request.
-	 */
-	if (watchdog_running && !watchdog_suspended)
-		watchdog_unpark_threads();
-
-	mutex_unlock(&watchdog_proc_mutex);
-	put_online_cpus();
-}
-
 static int update_watchdog_all_cpus(void)
 {
 	int ret;
@@ -604,6 +600,81 @@ static void watchdog_disable_all_cpus(void)
 	}
 }
 
+#else /* SOFTLOCKUP */
+static int watchdog_park_threads(void)
+{
+	return 0;
+}
+
+static void watchdog_unpark_threads(void)
+{
+}
+
+static int watchdog_enable_all_cpus(void)
+{
+	return 0;
+}
+
+static void watchdog_disable_all_cpus(void)
+{
+}
+
+static void set_sample_period(void)
+{
+}
+#endif /* SOFTLOCKUP */
+
+/*
+ * Suspend the hard and soft lockup detector by parking the watchdog threads.
+ */
+int lockup_detector_suspend(void)
+{
+	int ret = 0;
+
+	get_online_cpus();
+	mutex_lock(&watchdog_proc_mutex);
+	/*
+	 * Multiple suspend requests can be active in parallel (counted by
+	 * the 'watchdog_suspended' variable). If the watchdog threads are
+	 * running, the first caller takes care that they will be parked.
+	 * The state of 'watchdog_running' cannot change while a suspend
+	 * request is active (see related code in 'proc' handlers).
+	 */
+	if (watchdog_running && !watchdog_suspended)
+		ret = watchdog_park_threads();
+
+	if (ret == 0)
+		watchdog_suspended++;
+	else {
+		watchdog_disable_all_cpus();
+		pr_err("Failed to suspend lockup detectors, disabled\n");
+		watchdog_enabled = 0;
+	}
+
+	mutex_unlock(&watchdog_proc_mutex);
+
+	return ret;
+}
+
+/*
+ * Resume the hard and soft lockup detector by unparking the watchdog threads.
+ */
+void lockup_detector_resume(void)
+{
+	mutex_lock(&watchdog_proc_mutex);
+
+	watchdog_suspended--;
+	/*
+	 * The watchdog threads are unparked if they were previously running
+	 * and if there is no more active suspend request.
+	 */
+	if (watchdog_running && !watchdog_suspended)
+		watchdog_unpark_threads();
+
+	mutex_unlock(&watchdog_proc_mutex);
+	put_online_cpus();
+}
+
 #ifdef CONFIG_SYSCTL
 
 /*
@@ -810,9 +881,11 @@ int proc_watchdog_cpumask(struct ctl_table *table, int write,
 			 * a temporary cpumask, so we are likely not in a
 			 * position to do much else to make things better.
 			 */
+#ifdef CONFIG_SOFTLOCKUP_DETECTOR
 			if (smpboot_update_cpumask_percpu_thread(
 				    &watchdog_threads, &watchdog_cpumask) != 0)
 				pr_err("cpumask update failed\n");
+#endif
 		}
 	}
 out:
diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c
index 90d688df6ce1..295a0d84934c 100644
--- a/kernel/watchdog_hld.c
+++ b/kernel/watchdog_hld.c
@@ -22,39 +22,7 @@ static DEFINE_PER_CPU(bool, hard_watchdog_warn);
 static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
 static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
 
-/* boot commands */
-/*
- * Should we panic when a soft-lockup or hard-lockup occurs:
- */
-unsigned int __read_mostly hardlockup_panic =
-			CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
 static unsigned long hardlockup_allcpu_dumped;
-/*
- * We may not want to enable hard lockup detection by default in all cases,
- * for example when running the kernel as a guest on a hypervisor. In these
- * cases this function can be called to disable hard lockup detection. This
- * function should only be executed once by the boot processor before the
- * kernel command line parameters are parsed, because otherwise it is not
- * possible to override this in hardlockup_panic_setup().
- */
-void hardlockup_detector_disable(void)
-{
-	watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
-}
-
-static int __init hardlockup_panic_setup(char *str)
-{
-	if (!strncmp(str, "panic", 5))
-		hardlockup_panic = 1;
-	else if (!strncmp(str, "nopanic", 7))
-		hardlockup_panic = 0;
-	else if (!strncmp(str, "0", 1))
-		watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
-	else if (!strncmp(str, "1", 1))
-		watchdog_enabled |= NMI_WATCHDOG_ENABLED;
-	return 1;
-}
-__setup("nmi_watchdog=", hardlockup_panic_setup);
 
 void arch_touch_nmi_watchdog(void)
 {
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index f28f4252e54a..b0d01c6d4e03 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -778,34 +778,45 @@ config DEBUG_SHIRQ
 menu "Debug Lockups and Hangs"
 
 config LOCKUP_DETECTOR
-	bool "Detect Hard and Soft Lockups"
+	bool
+
+config SOFTLOCKUP_DETECTOR
+	bool "Detect Soft Lockups"
 	depends on DEBUG_KERNEL && !S390
+	select LOCKUP_DETECTOR
 	help
 	  Say Y here to enable the kernel to act as a watchdog to detect
-	  hard and soft lockups.
+	  soft lockups.
 
 	  Softlockups are bugs that cause the kernel to loop in kernel
 	  mode for more than 20 seconds, without giving other tasks a
 	  chance to run.  The current stack trace is displayed upon
 	  detection and the system will stay locked up.
 
+config HARDLOCKUP_DETECTOR_PERF
+	bool
+	select SOFTLOCKUP_DETECTOR
+
+#
+# arch/ can define HAVE_HARDLOCKUP_DETECTOR_ARCH to provide their own hard
+# lockup detector rather than the perf based detector.
+#
+config HARDLOCKUP_DETECTOR
+	bool "Detect Hard Lockups"
+	depends on DEBUG_KERNEL && !S390
+	depends on HAVE_HARDLOCKUP_DETECTOR_PERF || HAVE_HARDLOCKUP_DETECTOR_ARCH
+	select LOCKUP_DETECTOR
+	select HARDLOCKUP_DETECTOR_PERF if HAVE_HARDLOCKUP_DETECTOR_PERF
+	select HARDLOCKUP_DETECTOR_ARCH if HAVE_HARDLOCKUP_DETECTOR_ARCH
+	help
+	  Say Y here to enable the kernel to act as a watchdog to detect
+	  hard lockups.
+
 	  Hardlockups are bugs that cause the CPU to loop in kernel mode
 	  for more than 10 seconds, without letting other interrupts have a
 	  chance to run.  The current stack trace is displayed upon detection
 	  and the system will stay locked up.
 
-	  The overhead should be minimal.  A periodic hrtimer runs to
-	  generate interrupts and kick the watchdog task every 4 seconds.
-	  An NMI is generated every 10 seconds or so to check for hardlockups.
-
-	  The frequency of hrtimer and NMI events and the soft and hard lockup
-	  thresholds can be controlled through the sysctl watchdog_thresh.
-
-config HARDLOCKUP_DETECTOR
-	def_bool y
-	depends on LOCKUP_DETECTOR && !HAVE_NMI_WATCHDOG
-	depends on PERF_EVENTS && HAVE_PERF_EVENTS_NMI
-
 config BOOTPARAM_HARDLOCKUP_PANIC
 	bool "Panic (Reboot) On Hard Lockups"
 	depends on HARDLOCKUP_DETECTOR
@@ -826,7 +837,7 @@ config BOOTPARAM_HARDLOCKUP_PANIC_VALUE
 
 config BOOTPARAM_SOFTLOCKUP_PANIC
 	bool "Panic (Reboot) On Soft Lockups"
-	depends on LOCKUP_DETECTOR
+	depends on SOFTLOCKUP_DETECTOR
 	help
 	  Say Y here to enable the kernel to panic on "soft lockups",
 	  which are bugs that cause the kernel to loop in kernel
@@ -843,7 +854,7 @@ config BOOTPARAM_SOFTLOCKUP_PANIC
 
 config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE
 	int
-	depends on LOCKUP_DETECTOR
+	depends on SOFTLOCKUP_DETECTOR
 	range 0 1
 	default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC
 	default 1 if BOOTPARAM_SOFTLOCKUP_PANIC
@@ -851,7 +862,7 @@ config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE
 config DETECT_HUNG_TASK
 	bool "Detect Hung Tasks"
 	depends on DEBUG_KERNEL
-	default LOCKUP_DETECTOR
+	default SOFTLOCKUP_DETECTOR
 	help
 	  Say Y here to enable the kernel to detect "hung tasks",
 	  which are bugs that cause the task to be stuck in
-- 
cgit v1.2.3


From 6974f0c4555e285ab217cee58b6e874f776ff409 Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Wed, 12 Jul 2017 14:36:10 -0700
Subject: include/linux/string.h: add the option of fortified string.h
 functions

This adds support for compiling with a rough equivalent to the glibc
_FORTIFY_SOURCE=1 feature, providing compile-time and runtime buffer
overflow checks for string.h functions when the compiler determines the
size of the source or destination buffer at compile-time.  Unlike glibc,
it covers buffer reads in addition to writes.

GNU C __builtin_*_chk intrinsics are avoided because they would force a
much more complex implementation.  They aren't designed to detect read
overflows and offer no real benefit when using an implementation based
on inline checks.  Inline checks don't add up to much code size and
allow full use of the regular string intrinsics while avoiding the need
for a bunch of _chk functions and per-arch assembly to avoid wrapper
overhead.

This detects various overflows at compile-time in various drivers and
some non-x86 core kernel code.  There will likely be issues caught in
regular use at runtime too.

Future improvements left out of initial implementation for simplicity,
as it's all quite optional and can be done incrementally:

* Some of the fortified string functions (strncpy, strcat), don't yet
  place a limit on reads from the source based on __builtin_object_size of
  the source buffer.

* Extending coverage to more string functions like strlcat.

* It should be possible to optionally use __builtin_object_size(x, 1) for
  some functions (C strings) to detect intra-object overflows (like
  glibc's _FORTIFY_SOURCE=2), but for now this takes the conservative
  approach to avoid likely compatibility issues.

* The compile-time checks should be made available via a separate config
  option which can be enabled by default (or always enabled) once enough
  time has passed to get the issues it catches fixed.

Kees said:
 "This is great to have. While it was out-of-tree code, it would have
  blocked at least CVE-2016-3858 from being exploitable (improper size
  argument to strlcpy()). I've sent a number of fixes for
  out-of-bounds-reads that this detected upstream already"

[arnd@arndb.de: x86: fix fortified memcpy]
  Link: http://lkml.kernel.org/r/20170627150047.660360-1-arnd@arndb.de
[keescook@chromium.org: avoid panic() in favor of BUG()]
  Link: http://lkml.kernel.org/r/20170626235122.GA25261@beast
[keescook@chromium.org: move from -mm, add ARCH_HAS_FORTIFY_SOURCE, tweak Kconfig help]
Link: http://lkml.kernel.org/r/20170526095404.20439-1-danielmicay@gmail.com
Link: http://lkml.kernel.org/r/1497903987-21002-8-git-send-email-keescook@chromium.org
Signed-off-by: Daniel Micay <danielmicay@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Daniel Axtens <dja@axtens.net>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Chris Metcalf <cmetcalf@ezchip.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/Kconfig                     |   6 ++
 arch/arm64/Kconfig               |   1 +
 arch/arm64/include/asm/string.h  |   5 +
 arch/powerpc/Kconfig             |   1 +
 arch/x86/Kconfig                 |   1 +
 arch/x86/boot/compressed/misc.c  |   5 +
 arch/x86/include/asm/string_32.h |   9 ++
 arch/x86/include/asm/string_64.h |   7 ++
 arch/x86/lib/memcpy_32.c         |   2 +-
 include/linux/string.h           | 200 +++++++++++++++++++++++++++++++++++++++
 lib/string.c                     |   7 ++
 security/Kconfig                 |   7 ++
 12 files changed, 250 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/arch/Kconfig b/arch/Kconfig
index fb9bd7d36b05..21d0089117fe 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -223,6 +223,12 @@ config GENERIC_SMP_IDLE_THREAD
 config GENERIC_IDLE_POLL_SETUP
        bool
 
+config ARCH_HAS_FORTIFY_SOURCE
+	bool
+	help
+	  An architecture should select this when it can successfully
+	  build and run with CONFIG_FORTIFY_SOURCE.
+
 # Select if arch has all set_memory_ro/rw/x/nx() functions in asm/cacheflush.h
 config ARCH_HAS_SET_MEMORY
 	bool
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 8addb851ab5e..dfd908630631 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -12,6 +12,7 @@ config ARM64
 	select ARCH_HAS_DEVMEM_IS_ALLOWED
 	select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
 	select ARCH_HAS_ELF_RANDOMIZE
+	select ARCH_HAS_FORTIFY_SOURCE
 	select ARCH_HAS_GCOV_PROFILE_ALL
 	select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA
 	select ARCH_HAS_KCOV
diff --git a/arch/arm64/include/asm/string.h b/arch/arm64/include/asm/string.h
index 2eb714c4639f..d0aa42907569 100644
--- a/arch/arm64/include/asm/string.h
+++ b/arch/arm64/include/asm/string.h
@@ -63,6 +63,11 @@ extern int memcmp(const void *, const void *, size_t);
 #define memcpy(dst, src, len) __memcpy(dst, src, len)
 #define memmove(dst, src, len) __memmove(dst, src, len)
 #define memset(s, c, n) __memset(s, c, n)
+
+#ifndef __NO_FORTIFY
+#define __NO_FORTIFY /* FORTIFY_SOURCE uses __builtin_memcpy, etc. */
+#endif
+
 #endif
 
 #endif
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index fce2f4f20891..36f858c37ca7 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -125,6 +125,7 @@ config PPC
 	select ARCH_HAS_DEVMEM_IS_ALLOWED
 	select ARCH_HAS_DMA_SET_COHERENT_MASK
 	select ARCH_HAS_ELF_RANDOMIZE
+	select ARCH_HAS_FORTIFY_SOURCE
 	select ARCH_HAS_GCOV_PROFILE_ALL
 	select ARCH_HAS_SCALED_CPUTIME		if VIRT_CPU_ACCOUNTING_NATIVE
 	select ARCH_HAS_SG_CHAIN
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 3d2b8ce54e00..781521b7cf9e 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -50,6 +50,7 @@ config X86
 	select ARCH_HAS_DEVMEM_IS_ALLOWED
 	select ARCH_HAS_ELF_RANDOMIZE
 	select ARCH_HAS_FAST_MULTIPLIER
+	select ARCH_HAS_FORTIFY_SOURCE
 	select ARCH_HAS_GCOV_PROFILE_ALL
 	select ARCH_HAS_KCOV			if X86_64
 	select ARCH_HAS_MMIO_FLUSH
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 00241c815524..a0838ab929f2 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -411,3 +411,8 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
 	debug_putstr("done.\nBooting the kernel.\n");
 	return output;
 }
+
+void fortify_panic(const char *name)
+{
+	error("detected buffer overflow");
+}
diff --git a/arch/x86/include/asm/string_32.h b/arch/x86/include/asm/string_32.h
index 3d3e8353ee5c..e9ee84873de5 100644
--- a/arch/x86/include/asm/string_32.h
+++ b/arch/x86/include/asm/string_32.h
@@ -142,7 +142,9 @@ static __always_inline void *__constant_memcpy(void *to, const void *from,
 }
 
 #define __HAVE_ARCH_MEMCPY
+extern void *memcpy(void *, const void *, size_t);
 
+#ifndef CONFIG_FORTIFY_SOURCE
 #ifdef CONFIG_X86_USE_3DNOW
 
 #include <asm/mmx.h>
@@ -195,11 +197,15 @@ static inline void *__memcpy3d(void *to, const void *from, size_t len)
 #endif
 
 #endif
+#endif /* !CONFIG_FORTIFY_SOURCE */
 
 #define __HAVE_ARCH_MEMMOVE
 void *memmove(void *dest, const void *src, size_t n);
 
+extern int memcmp(const void *, const void *, size_t);
+#ifndef CONFIG_FORTIFY_SOURCE
 #define memcmp __builtin_memcmp
+#endif
 
 #define __HAVE_ARCH_MEMCHR
 extern void *memchr(const void *cs, int c, size_t count);
@@ -321,6 +327,8 @@ void *__constant_c_and_count_memset(void *s, unsigned long pattern,
 	 : __memset_generic((s), (c), (count)))
 
 #define __HAVE_ARCH_MEMSET
+extern void *memset(void *, int, size_t);
+#ifndef CONFIG_FORTIFY_SOURCE
 #if (__GNUC__ >= 4)
 #define memset(s, c, count) __builtin_memset(s, c, count)
 #else
@@ -330,6 +338,7 @@ void *__constant_c_and_count_memset(void *s, unsigned long pattern,
 				 (count))				\
 	 : __memset((s), (c), (count)))
 #endif
+#endif /* !CONFIG_FORTIFY_SOURCE */
 
 /*
  * find the first occurrence of byte 'c', or 1 past the area if none
diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h
index 1f22bc277c45..2a8c822de1fc 100644
--- a/arch/x86/include/asm/string_64.h
+++ b/arch/x86/include/asm/string_64.h
@@ -31,6 +31,7 @@ static __always_inline void *__inline_memcpy(void *to, const void *from, size_t
 extern void *memcpy(void *to, const void *from, size_t len);
 extern void *__memcpy(void *to, const void *from, size_t len);
 
+#ifndef CONFIG_FORTIFY_SOURCE
 #ifndef CONFIG_KMEMCHECK
 #if (__GNUC__ == 4 && __GNUC_MINOR__ < 3) || __GNUC__ < 4
 #define memcpy(dst, src, len)					\
@@ -51,6 +52,7 @@ extern void *__memcpy(void *to, const void *from, size_t len);
  */
 #define memcpy(dst, src, len) __inline_memcpy((dst), (src), (len))
 #endif
+#endif /* !CONFIG_FORTIFY_SOURCE */
 
 #define __HAVE_ARCH_MEMSET
 void *memset(void *s, int c, size_t n);
@@ -77,6 +79,11 @@ int strcmp(const char *cs, const char *ct);
 #define memcpy(dst, src, len) __memcpy(dst, src, len)
 #define memmove(dst, src, len) __memmove(dst, src, len)
 #define memset(s, c, n) __memset(s, c, n)
+
+#ifndef __NO_FORTIFY
+#define __NO_FORTIFY /* FORTIFY_SOURCE uses __builtin_memcpy, etc. */
+#endif
+
 #endif
 
 #define __HAVE_ARCH_MEMCPY_MCSAFE 1
diff --git a/arch/x86/lib/memcpy_32.c b/arch/x86/lib/memcpy_32.c
index cad12634d6bd..2eab7d0bfedd 100644
--- a/arch/x86/lib/memcpy_32.c
+++ b/arch/x86/lib/memcpy_32.c
@@ -6,7 +6,7 @@
 
 __visible void *memcpy(void *to, const void *from, size_t n)
 {
-#ifdef CONFIG_X86_USE_3DNOW
+#if defined(CONFIG_X86_USE_3DNOW) && !defined(CONFIG_FORTIFY_SOURCE)
 	return __memcpy3d(to, from, n);
 #else
 	return __memcpy(to, from, n);
diff --git a/include/linux/string.h b/include/linux/string.h
index 7439d83eaa33..96f5a5fd0377 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -193,4 +193,204 @@ static inline const char *kbasename(const char *path)
 	return tail ? tail + 1 : path;
 }
 
+#define __FORTIFY_INLINE extern __always_inline __attribute__((gnu_inline))
+#define __RENAME(x) __asm__(#x)
+
+void fortify_panic(const char *name) __noreturn __cold;
+void __read_overflow(void) __compiletime_error("detected read beyond size of object passed as 1st parameter");
+void __read_overflow2(void) __compiletime_error("detected read beyond size of object passed as 2nd parameter");
+void __write_overflow(void) __compiletime_error("detected write beyond size of object passed as 1st parameter");
+
+#if !defined(__NO_FORTIFY) && defined(__OPTIMIZE__) && defined(CONFIG_FORTIFY_SOURCE)
+__FORTIFY_INLINE char *strcpy(char *p, const char *q)
+{
+	size_t p_size = __builtin_object_size(p, 0);
+	size_t q_size = __builtin_object_size(q, 0);
+	if (p_size == (size_t)-1 && q_size == (size_t)-1)
+		return __builtin_strcpy(p, q);
+	if (strscpy(p, q, p_size < q_size ? p_size : q_size) < 0)
+		fortify_panic(__func__);
+	return p;
+}
+
+__FORTIFY_INLINE char *strncpy(char *p, const char *q, __kernel_size_t size)
+{
+	size_t p_size = __builtin_object_size(p, 0);
+	if (__builtin_constant_p(size) && p_size < size)
+		__write_overflow();
+	if (p_size < size)
+		fortify_panic(__func__);
+	return __builtin_strncpy(p, q, size);
+}
+
+__FORTIFY_INLINE char *strcat(char *p, const char *q)
+{
+	size_t p_size = __builtin_object_size(p, 0);
+	if (p_size == (size_t)-1)
+		return __builtin_strcat(p, q);
+	if (strlcat(p, q, p_size) >= p_size)
+		fortify_panic(__func__);
+	return p;
+}
+
+__FORTIFY_INLINE __kernel_size_t strlen(const char *p)
+{
+	__kernel_size_t ret;
+	size_t p_size = __builtin_object_size(p, 0);
+	if (p_size == (size_t)-1)
+		return __builtin_strlen(p);
+	ret = strnlen(p, p_size);
+	if (p_size <= ret)
+		fortify_panic(__func__);
+	return ret;
+}
+
+extern __kernel_size_t __real_strnlen(const char *, __kernel_size_t) __RENAME(strnlen);
+__FORTIFY_INLINE __kernel_size_t strnlen(const char *p, __kernel_size_t maxlen)
+{
+	size_t p_size = __builtin_object_size(p, 0);
+	__kernel_size_t ret = __real_strnlen(p, maxlen < p_size ? maxlen : p_size);
+	if (p_size <= ret && maxlen != ret)
+		fortify_panic(__func__);
+	return ret;
+}
+
+/* defined after fortified strlen to reuse it */
+extern size_t __real_strlcpy(char *, const char *, size_t) __RENAME(strlcpy);
+__FORTIFY_INLINE size_t strlcpy(char *p, const char *q, size_t size)
+{
+	size_t ret;
+	size_t p_size = __builtin_object_size(p, 0);
+	size_t q_size = __builtin_object_size(q, 0);
+	if (p_size == (size_t)-1 && q_size == (size_t)-1)
+		return __real_strlcpy(p, q, size);
+	ret = strlen(q);
+	if (size) {
+		size_t len = (ret >= size) ? size - 1 : ret;
+		if (__builtin_constant_p(len) && len >= p_size)
+			__write_overflow();
+		if (len >= p_size)
+			fortify_panic(__func__);
+		__builtin_memcpy(p, q, len);
+		p[len] = '\0';
+	}
+	return ret;
+}
+
+/* defined after fortified strlen and strnlen to reuse them */
+__FORTIFY_INLINE char *strncat(char *p, const char *q, __kernel_size_t count)
+{
+	size_t p_len, copy_len;
+	size_t p_size = __builtin_object_size(p, 0);
+	size_t q_size = __builtin_object_size(q, 0);
+	if (p_size == (size_t)-1 && q_size == (size_t)-1)
+		return __builtin_strncat(p, q, count);
+	p_len = strlen(p);
+	copy_len = strnlen(q, count);
+	if (p_size < p_len + copy_len + 1)
+		fortify_panic(__func__);
+	__builtin_memcpy(p + p_len, q, copy_len);
+	p[p_len + copy_len] = '\0';
+	return p;
+}
+
+__FORTIFY_INLINE void *memset(void *p, int c, __kernel_size_t size)
+{
+	size_t p_size = __builtin_object_size(p, 0);
+	if (__builtin_constant_p(size) && p_size < size)
+		__write_overflow();
+	if (p_size < size)
+		fortify_panic(__func__);
+	return __builtin_memset(p, c, size);
+}
+
+__FORTIFY_INLINE void *memcpy(void *p, const void *q, __kernel_size_t size)
+{
+	size_t p_size = __builtin_object_size(p, 0);
+	size_t q_size = __builtin_object_size(q, 0);
+	if (__builtin_constant_p(size)) {
+		if (p_size < size)
+			__write_overflow();
+		if (q_size < size)
+			__read_overflow2();
+	}
+	if (p_size < size || q_size < size)
+		fortify_panic(__func__);
+	return __builtin_memcpy(p, q, size);
+}
+
+__FORTIFY_INLINE void *memmove(void *p, const void *q, __kernel_size_t size)
+{
+	size_t p_size = __builtin_object_size(p, 0);
+	size_t q_size = __builtin_object_size(q, 0);
+	if (__builtin_constant_p(size)) {
+		if (p_size < size)
+			__write_overflow();
+		if (q_size < size)
+			__read_overflow2();
+	}
+	if (p_size < size || q_size < size)
+		fortify_panic(__func__);
+	return __builtin_memmove(p, q, size);
+}
+
+extern void *__real_memscan(void *, int, __kernel_size_t) __RENAME(memscan);
+__FORTIFY_INLINE void *memscan(void *p, int c, __kernel_size_t size)
+{
+	size_t p_size = __builtin_object_size(p, 0);
+	if (__builtin_constant_p(size) && p_size < size)
+		__read_overflow();
+	if (p_size < size)
+		fortify_panic(__func__);
+	return __real_memscan(p, c, size);
+}
+
+__FORTIFY_INLINE int memcmp(const void *p, const void *q, __kernel_size_t size)
+{
+	size_t p_size = __builtin_object_size(p, 0);
+	size_t q_size = __builtin_object_size(q, 0);
+	if (__builtin_constant_p(size)) {
+		if (p_size < size)
+			__read_overflow();
+		if (q_size < size)
+			__read_overflow2();
+	}
+	if (p_size < size || q_size < size)
+		fortify_panic(__func__);
+	return __builtin_memcmp(p, q, size);
+}
+
+__FORTIFY_INLINE void *memchr(const void *p, int c, __kernel_size_t size)
+{
+	size_t p_size = __builtin_object_size(p, 0);
+	if (__builtin_constant_p(size) && p_size < size)
+		__read_overflow();
+	if (p_size < size)
+		fortify_panic(__func__);
+	return __builtin_memchr(p, c, size);
+}
+
+void *__real_memchr_inv(const void *s, int c, size_t n) __RENAME(memchr_inv);
+__FORTIFY_INLINE void *memchr_inv(const void *p, int c, size_t size)
+{
+	size_t p_size = __builtin_object_size(p, 0);
+	if (__builtin_constant_p(size) && p_size < size)
+		__read_overflow();
+	if (p_size < size)
+		fortify_panic(__func__);
+	return __real_memchr_inv(p, c, size);
+}
+
+extern void *__real_kmemdup(const void *src, size_t len, gfp_t gfp) __RENAME(kmemdup);
+__FORTIFY_INLINE void *kmemdup(const void *p, size_t size, gfp_t gfp)
+{
+	size_t p_size = __builtin_object_size(p, 0);
+	if (__builtin_constant_p(size) && p_size < size)
+		__read_overflow();
+	if (p_size < size)
+		fortify_panic(__func__);
+	return __real_kmemdup(p, size, gfp);
+}
+#endif
+
 #endif /* _LINUX_STRING_H_ */
diff --git a/lib/string.c b/lib/string.c
index 1c1fc9187b05..ebbb99c775bd 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -978,3 +978,10 @@ char *strreplace(char *s, char old, char new)
 	return s;
 }
 EXPORT_SYMBOL(strreplace);
+
+void fortify_panic(const char *name)
+{
+	pr_emerg("detected buffer overflow in %s\n", name);
+	BUG();
+}
+EXPORT_SYMBOL(fortify_panic);
diff --git a/security/Kconfig b/security/Kconfig
index d540bfe73190..e8e449444e65 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -163,6 +163,13 @@ config HARDENED_USERCOPY_PAGESPAN
 	  been removed. This config is intended to be used only while
 	  trying to find such users.
 
+config FORTIFY_SOURCE
+	bool "Harden common str/mem functions against buffer overflows"
+	depends on ARCH_HAS_FORTIFY_SOURCE
+	help
+	  Detect overflows of buffers in common string and memory functions
+	  where the compiler can determine and validate the buffer sizes.
+
 config STATIC_USERMODEHELPER
 	bool "Force all usermode helper calls through a single binary"
 	help
-- 
cgit v1.2.3


From 022c204040f3fd22d6445bc35517786195b7ae80 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Wed, 12 Jul 2017 14:36:17 -0700
Subject: random,stackprotect: introduce get_random_canary function

Patch series "stackprotector: ascii armor the stack canary", v2.

Zero out the first byte of the stack canary value on 64 bit systems, in
order to mitigate unterminated C string overflows.

The null byte both prevents C string functions from reading the canary,
and from writing it if the canary value were guessed or obtained through
some other means.

Reducing the entropy by 8 bits is acceptable on 64-bit systems, which
will still have 56 bits of entropy left, but not on 32 bit systems, so
the "ascii armor" canary is only implemented on 64-bit systems.

Inspired by the "ascii armor" code in execshield and Daniel Micay's
linux-hardened tree.

Also see https://github.com/thestinger/linux-hardened/

This patch (of 5):

Introduce get_random_canary(), which provides a random unsigned long
canary value with the first byte zeroed out on 64 bit architectures, in
order to mitigate non-terminated C string overflows.

The null byte both prevents C string functions from reading the canary,
and from writing it if the canary value were guessed or obtained through
some other means.

Reducing the entropy by 8 bits is acceptable on 64-bit systems, which
will still have 56 bits of entropy left, but not on 32 bit systems, so
the "ascii armor" canary is only implemented on 64-bit systems.

Inspired by the "ascii armor" code in the old execshield patches, and
Daniel Micay's linux-hardened tree.

Link: http://lkml.kernel.org/r/20170524155751.424-2-riel@redhat.com
Signed-off-by: Rik van Riel <riel@redhat.com>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: Daniel Micay <danielmicay@gmail.com>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/random.h | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/random.h b/include/linux/random.h
index ed5c3838780d..1fa0dc880bd7 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -57,6 +57,27 @@ static inline unsigned long get_random_long(void)
 #endif
 }
 
+/*
+ * On 64-bit architectures, protect against non-terminated C string overflows
+ * by zeroing out the first byte of the canary; this leaves 56 bits of entropy.
+ */
+#ifdef CONFIG_64BIT
+# ifdef __LITTLE_ENDIAN
+#  define CANARY_MASK 0xffffffffffffff00UL
+# else /* big endian, 64 bits: */
+#  define CANARY_MASK 0x00ffffffffffffffUL
+# endif
+#else /* 32 bits: */
+# define CANARY_MASK 0xffffffffUL
+#endif
+
+static inline unsigned long get_random_canary(void)
+{
+	unsigned long val = get_random_long();
+
+	return val & CANARY_MASK;
+}
+
 unsigned long randomize_page(unsigned long start, unsigned long range);
 
 u32 prandom_u32(void);
-- 
cgit v1.2.3


From dcda9b04713c3f6ff0875652924844fae28286ea Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Wed, 12 Jul 2017 14:36:45 -0700
Subject: mm, tree wide: replace __GFP_REPEAT by __GFP_RETRY_MAYFAIL with more
 useful semantic

__GFP_REPEAT was designed to allow retry-but-eventually-fail semantic to
the page allocator.  This has been true but only for allocations
requests larger than PAGE_ALLOC_COSTLY_ORDER.  It has been always
ignored for smaller sizes.  This is a bit unfortunate because there is
no way to express the same semantic for those requests and they are
considered too important to fail so they might end up looping in the
page allocator for ever, similarly to GFP_NOFAIL requests.

Now that the whole tree has been cleaned up and accidental or misled
usage of __GFP_REPEAT flag has been removed for !costly requests we can
give the original flag a better name and more importantly a more useful
semantic.  Let's rename it to __GFP_RETRY_MAYFAIL which tells the user
that the allocator would try really hard but there is no promise of a
success.  This will work independent of the order and overrides the
default allocator behavior.  Page allocator users have several levels of
guarantee vs.  cost options (take GFP_KERNEL as an example)

 - GFP_KERNEL & ~__GFP_RECLAIM - optimistic allocation without _any_
   attempt to free memory at all. The most light weight mode which even
   doesn't kick the background reclaim. Should be used carefully because
   it might deplete the memory and the next user might hit the more
   aggressive reclaim

 - GFP_KERNEL & ~__GFP_DIRECT_RECLAIM (or GFP_NOWAIT)- optimistic
   allocation without any attempt to free memory from the current
   context but can wake kswapd to reclaim memory if the zone is below
   the low watermark. Can be used from either atomic contexts or when
   the request is a performance optimization and there is another
   fallback for a slow path.

 - (GFP_KERNEL|__GFP_HIGH) & ~__GFP_DIRECT_RECLAIM (aka GFP_ATOMIC) -
   non sleeping allocation with an expensive fallback so it can access
   some portion of memory reserves. Usually used from interrupt/bh
   context with an expensive slow path fallback.

 - GFP_KERNEL - both background and direct reclaim are allowed and the
   _default_ page allocator behavior is used. That means that !costly
   allocation requests are basically nofail but there is no guarantee of
   that behavior so failures have to be checked properly by callers
   (e.g. OOM killer victim is allowed to fail currently).

 - GFP_KERNEL | __GFP_NORETRY - overrides the default allocator behavior
   and all allocation requests fail early rather than cause disruptive
   reclaim (one round of reclaim in this implementation). The OOM killer
   is not invoked.

 - GFP_KERNEL | __GFP_RETRY_MAYFAIL - overrides the default allocator
   behavior and all allocation requests try really hard. The request
   will fail if the reclaim cannot make any progress. The OOM killer
   won't be triggered.

 - GFP_KERNEL | __GFP_NOFAIL - overrides the default allocator behavior
   and all allocation requests will loop endlessly until they succeed.
   This might be really dangerous especially for larger orders.

Existing users of __GFP_REPEAT are changed to __GFP_RETRY_MAYFAIL
because they already had their semantic.  No new users are added.
__alloc_pages_slowpath is changed to bail out for __GFP_RETRY_MAYFAIL if
there is no progress and we have already passed the OOM point.

This means that all the reclaim opportunities have been exhausted except
the most disruptive one (the OOM killer) and a user defined fallback
behavior is more sensible than keep retrying in the page allocator.

[akpm@linux-foundation.org: fix arch/sparc/kernel/mdesc.c]
[mhocko@suse.com: semantic fix]
  Link: http://lkml.kernel.org/r/20170626123847.GM11534@dhcp22.suse.cz
[mhocko@kernel.org: address other thing spotted by Vlastimil]
  Link: http://lkml.kernel.org/r/20170626124233.GN11534@dhcp22.suse.cz
Link: http://lkml.kernel.org/r/20170623085345.11304-3-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Alex Belits <alex.belits@cavium.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Darrick J. Wong <darrick.wong@oracle.com>
Cc: David Daney <david.daney@cavium.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: NeilBrown <neilb@suse.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/DMA-ISA-LPC.txt                |  2 +-
 arch/powerpc/include/asm/book3s/64/pgalloc.h |  2 +-
 arch/powerpc/kvm/book3s_64_mmu_hv.c          |  2 +-
 arch/sparc/kernel/mdesc.c                    |  2 +-
 drivers/mmc/host/wbsd.c                      |  2 +-
 drivers/s390/char/vmcp.c                     |  2 +-
 drivers/target/target_core_transport.c       |  2 +-
 drivers/vhost/net.c                          |  2 +-
 drivers/vhost/scsi.c                         |  2 +-
 drivers/vhost/vsock.c                        |  2 +-
 include/linux/gfp.h                          | 56 +++++++++++++++++++++-------
 include/linux/slab.h                         |  3 +-
 include/trace/events/mmflags.h               |  2 +-
 mm/hugetlb.c                                 |  4 +-
 mm/internal.h                                |  2 +-
 mm/page_alloc.c                              | 14 +++++--
 mm/sparse-vmemmap.c                          |  4 +-
 mm/util.c                                    |  6 +--
 mm/vmalloc.c                                 |  2 +-
 mm/vmscan.c                                  |  8 ++--
 net/core/dev.c                               |  6 +--
 net/core/skbuff.c                            |  2 +-
 net/sched/sch_fq.c                           |  2 +-
 tools/perf/builtin-kmem.c                    |  2 +-
 24 files changed, 86 insertions(+), 47 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/DMA-ISA-LPC.txt b/Documentation/DMA-ISA-LPC.txt
index c41331398752..7a065ac4a9d1 100644
--- a/Documentation/DMA-ISA-LPC.txt
+++ b/Documentation/DMA-ISA-LPC.txt
@@ -42,7 +42,7 @@ requirements you pass the flag GFP_DMA to kmalloc.
 
 Unfortunately the memory available for ISA DMA is scarce so unless you
 allocate the memory during boot-up it's a good idea to also pass
-__GFP_REPEAT and __GFP_NOWARN to make the allocator try a bit harder.
+__GFP_RETRY_MAYFAIL and __GFP_NOWARN to make the allocator try a bit harder.
 
 (This scarcity also means that you should allocate the buffer as
 early as possible and not release it until the driver is unloaded.)
diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index 20b1485ff1e8..e2329db9d6f4 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -56,7 +56,7 @@ static inline pgd_t *radix__pgd_alloc(struct mm_struct *mm)
 	return (pgd_t *)__get_free_page(pgtable_gfp_flags(mm, PGALLOC_GFP));
 #else
 	struct page *page;
-	page = alloc_pages(pgtable_gfp_flags(mm, PGALLOC_GFP | __GFP_REPEAT),
+	page = alloc_pages(pgtable_gfp_flags(mm, PGALLOC_GFP | __GFP_RETRY_MAYFAIL),
 				4);
 	if (!page)
 		return NULL;
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 710e491206ed..8cb0190e2a73 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -93,7 +93,7 @@ int kvmppc_allocate_hpt(struct kvm_hpt_info *info, u32 order)
 	}
 
 	if (!hpt)
-		hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT
+		hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_RETRY_MAYFAIL
 				       |__GFP_NOWARN, order - PAGE_SHIFT);
 
 	if (!hpt)
diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c
index e4b4e790bf89..fa466ce45bc9 100644
--- a/arch/sparc/kernel/mdesc.c
+++ b/arch/sparc/kernel/mdesc.c
@@ -205,7 +205,7 @@ static struct mdesc_handle *mdesc_kmalloc(unsigned int mdesc_size)
 	handle_size = (sizeof(struct mdesc_handle) -
 		       sizeof(struct mdesc_hdr) +
 		       mdesc_size);
-	base = kmalloc(handle_size + 15, GFP_KERNEL | __GFP_REPEAT);
+	base = kmalloc(handle_size + 15, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
 	if (!base)
 		return NULL;
 
diff --git a/drivers/mmc/host/wbsd.c b/drivers/mmc/host/wbsd.c
index e15a9733fcfd..9668616faf16 100644
--- a/drivers/mmc/host/wbsd.c
+++ b/drivers/mmc/host/wbsd.c
@@ -1386,7 +1386,7 @@ static void wbsd_request_dma(struct wbsd_host *host, int dma)
 	 * order for ISA to be able to DMA to it.
 	 */
 	host->dma_buffer = kmalloc(WBSD_DMA_SIZE,
-		GFP_NOIO | GFP_DMA | __GFP_REPEAT | __GFP_NOWARN);
+		GFP_NOIO | GFP_DMA | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
 	if (!host->dma_buffer)
 		goto free;
 
diff --git a/drivers/s390/char/vmcp.c b/drivers/s390/char/vmcp.c
index 65f5a794f26d..98749fa817da 100644
--- a/drivers/s390/char/vmcp.c
+++ b/drivers/s390/char/vmcp.c
@@ -98,7 +98,7 @@ vmcp_write(struct file *file, const char __user *buff, size_t count,
 	}
 	if (!session->response)
 		session->response = (char *)__get_free_pages(GFP_KERNEL
-						| __GFP_REPEAT | GFP_DMA,
+						| __GFP_RETRY_MAYFAIL | GFP_DMA,
 						get_order(session->bufsize));
 	if (!session->response) {
 		mutex_unlock(&session->mutex);
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index f1b3a46bdcaf..1bdc10651bcd 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -252,7 +252,7 @@ int transport_alloc_session_tags(struct se_session *se_sess,
 	int rc;
 
 	se_sess->sess_cmd_map = kzalloc(tag_num * tag_size,
-					GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
+					GFP_KERNEL | __GFP_NOWARN | __GFP_RETRY_MAYFAIL);
 	if (!se_sess->sess_cmd_map) {
 		se_sess->sess_cmd_map = vzalloc(tag_num * tag_size);
 		if (!se_sess->sess_cmd_map) {
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index e3d7ea1288c6..06d044862e58 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -897,7 +897,7 @@ static int vhost_net_open(struct inode *inode, struct file *f)
 	struct sk_buff **queue;
 	int i;
 
-	n = kvmalloc(sizeof *n, GFP_KERNEL | __GFP_REPEAT);
+	n = kvmalloc(sizeof *n, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
 	if (!n)
 		return -ENOMEM;
 	vqs = kmalloc(VHOST_NET_VQ_MAX * sizeof(*vqs), GFP_KERNEL);
diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index fd6c8b66f06f..ff02a942c4d5 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -1404,7 +1404,7 @@ static int vhost_scsi_open(struct inode *inode, struct file *f)
 	struct vhost_virtqueue **vqs;
 	int r = -ENOMEM, i;
 
-	vs = kzalloc(sizeof(*vs), GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
+	vs = kzalloc(sizeof(*vs), GFP_KERNEL | __GFP_NOWARN | __GFP_RETRY_MAYFAIL);
 	if (!vs) {
 		vs = vzalloc(sizeof(*vs));
 		if (!vs)
diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index 3f63e03de8e8..c9de9c41aa97 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -508,7 +508,7 @@ static int vhost_vsock_dev_open(struct inode *inode, struct file *file)
 	/* This struct is large and allocation could fail, fall back to vmalloc
 	 * if there is no other way.
 	 */
-	vsock = kvmalloc(sizeof(*vsock), GFP_KERNEL | __GFP_REPEAT);
+	vsock = kvmalloc(sizeof(*vsock), GFP_KERNEL | __GFP_RETRY_MAYFAIL);
 	if (!vsock)
 		return -ENOMEM;
 
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 4c6656f1fee7..bcfb9f7c46f5 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -25,7 +25,7 @@ struct vm_area_struct;
 #define ___GFP_FS		0x80u
 #define ___GFP_COLD		0x100u
 #define ___GFP_NOWARN		0x200u
-#define ___GFP_REPEAT		0x400u
+#define ___GFP_RETRY_MAYFAIL	0x400u
 #define ___GFP_NOFAIL		0x800u
 #define ___GFP_NORETRY		0x1000u
 #define ___GFP_MEMALLOC		0x2000u
@@ -136,26 +136,56 @@ struct vm_area_struct;
  *
  * __GFP_RECLAIM is shorthand to allow/forbid both direct and kswapd reclaim.
  *
- * __GFP_REPEAT: Try hard to allocate the memory, but the allocation attempt
- *   _might_ fail.  This depends upon the particular VM implementation.
+ * The default allocator behavior depends on the request size. We have a concept
+ * of so called costly allocations (with order > PAGE_ALLOC_COSTLY_ORDER).
+ * !costly allocations are too essential to fail so they are implicitly
+ * non-failing by default (with some exceptions like OOM victims might fail so
+ * the caller still has to check for failures) while costly requests try to be
+ * not disruptive and back off even without invoking the OOM killer.
+ * The following three modifiers might be used to override some of these
+ * implicit rules
+ *
+ * __GFP_NORETRY: The VM implementation will try only very lightweight
+ *   memory direct reclaim to get some memory under memory pressure (thus
+ *   it can sleep). It will avoid disruptive actions like OOM killer. The
+ *   caller must handle the failure which is quite likely to happen under
+ *   heavy memory pressure. The flag is suitable when failure can easily be
+ *   handled at small cost, such as reduced throughput
+ *
+ * __GFP_RETRY_MAYFAIL: The VM implementation will retry memory reclaim
+ *   procedures that have previously failed if there is some indication
+ *   that progress has been made else where.  It can wait for other
+ *   tasks to attempt high level approaches to freeing memory such as
+ *   compaction (which removes fragmentation) and page-out.
+ *   There is still a definite limit to the number of retries, but it is
+ *   a larger limit than with __GFP_NORETRY.
+ *   Allocations with this flag may fail, but only when there is
+ *   genuinely little unused memory. While these allocations do not
+ *   directly trigger the OOM killer, their failure indicates that
+ *   the system is likely to need to use the OOM killer soon.  The
+ *   caller must handle failure, but can reasonably do so by failing
+ *   a higher-level request, or completing it only in a much less
+ *   efficient manner.
+ *   If the allocation does fail, and the caller is in a position to
+ *   free some non-essential memory, doing so could benefit the system
+ *   as a whole.
  *
  * __GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller
- *   cannot handle allocation failures. New users should be evaluated carefully
- *   (and the flag should be used only when there is no reasonable failure
- *   policy) but it is definitely preferable to use the flag rather than
- *   opencode endless loop around allocator.
- *
- * __GFP_NORETRY: The VM implementation must not retry indefinitely and will
- *   return NULL when direct reclaim and memory compaction have failed to allow
- *   the allocation to succeed.  The OOM killer is not called with the current
- *   implementation.
+ *   cannot handle allocation failures. The allocation could block
+ *   indefinitely but will never return with failure. Testing for
+ *   failure is pointless.
+ *   New users should be evaluated carefully (and the flag should be
+ *   used only when there is no reasonable failure policy) but it is
+ *   definitely preferable to use the flag rather than opencode endless
+ *   loop around allocator.
+ *   Using this flag for costly allocations is _highly_ discouraged.
  */
 #define __GFP_IO	((__force gfp_t)___GFP_IO)
 #define __GFP_FS	((__force gfp_t)___GFP_FS)
 #define __GFP_DIRECT_RECLAIM	((__force gfp_t)___GFP_DIRECT_RECLAIM) /* Caller can reclaim */
 #define __GFP_KSWAPD_RECLAIM	((__force gfp_t)___GFP_KSWAPD_RECLAIM) /* kswapd can wake */
 #define __GFP_RECLAIM ((__force gfp_t)(___GFP_DIRECT_RECLAIM|___GFP_KSWAPD_RECLAIM))
-#define __GFP_REPEAT	((__force gfp_t)___GFP_REPEAT)
+#define __GFP_RETRY_MAYFAIL	((__force gfp_t)___GFP_RETRY_MAYFAIL)
 #define __GFP_NOFAIL	((__force gfp_t)___GFP_NOFAIL)
 #define __GFP_NORETRY	((__force gfp_t)___GFP_NORETRY)
 
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 04a7f7993e67..41473df6dfb0 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -471,7 +471,8 @@ static __always_inline void *kmalloc_large(size_t size, gfp_t flags)
  *
  * %__GFP_NOWARN - If allocation fails, don't issue any warnings.
  *
- * %__GFP_REPEAT - If allocation fails initially, try once more before failing.
+ * %__GFP_RETRY_MAYFAIL - Try really hard to succeed the allocation but fail
+ *   eventually.
  *
  * There are other flags available as well, but these are not intended
  * for general use, and so are not documented here. For a full list of
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index 10e3663a75a6..8e50d01c645f 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -34,7 +34,7 @@
 	{(unsigned long)__GFP_FS,		"__GFP_FS"},		\
 	{(unsigned long)__GFP_COLD,		"__GFP_COLD"},		\
 	{(unsigned long)__GFP_NOWARN,		"__GFP_NOWARN"},	\
-	{(unsigned long)__GFP_REPEAT,		"__GFP_REPEAT"},	\
+	{(unsigned long)__GFP_RETRY_MAYFAIL,	"__GFP_RETRY_MAYFAIL"},	\
 	{(unsigned long)__GFP_NOFAIL,		"__GFP_NOFAIL"},	\
 	{(unsigned long)__GFP_NORETRY,		"__GFP_NORETRY"},	\
 	{(unsigned long)__GFP_COMP,		"__GFP_COMP"},		\
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 1e516520433d..bc48ee783dd9 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1384,7 +1384,7 @@ static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid)
 
 	page = __alloc_pages_node(nid,
 		htlb_alloc_mask(h)|__GFP_COMP|__GFP_THISNODE|
-						__GFP_REPEAT|__GFP_NOWARN,
+						__GFP_RETRY_MAYFAIL|__GFP_NOWARN,
 		huge_page_order(h));
 	if (page) {
 		prep_new_huge_page(h, page, nid);
@@ -1525,7 +1525,7 @@ static struct page *__hugetlb_alloc_buddy_huge_page(struct hstate *h,
 {
 	int order = huge_page_order(h);
 
-	gfp_mask |= __GFP_COMP|__GFP_REPEAT|__GFP_NOWARN;
+	gfp_mask |= __GFP_COMP|__GFP_RETRY_MAYFAIL|__GFP_NOWARN;
 	if (nid == NUMA_NO_NODE)
 		nid = numa_mem_id();
 	return __alloc_pages_nodemask(gfp_mask, order, nid, nmask);
diff --git a/mm/internal.h b/mm/internal.h
index 0e4f558412fb..24d88f084705 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -23,7 +23,7 @@
  * hints such as HIGHMEM usage.
  */
 #define GFP_RECLAIM_MASK (__GFP_RECLAIM|__GFP_HIGH|__GFP_IO|__GFP_FS|\
-			__GFP_NOWARN|__GFP_REPEAT|__GFP_NOFAIL|\
+			__GFP_NOWARN|__GFP_RETRY_MAYFAIL|__GFP_NOFAIL|\
 			__GFP_NORETRY|__GFP_MEMALLOC|__GFP_NOMEMALLOC|\
 			__GFP_ATOMIC)
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 64b7d82a9b1a..6d30e914afb6 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3284,6 +3284,14 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
 	/* The OOM killer will not help higher order allocs */
 	if (order > PAGE_ALLOC_COSTLY_ORDER)
 		goto out;
+	/*
+	 * We have already exhausted all our reclaim opportunities without any
+	 * success so it is time to admit defeat. We will skip the OOM killer
+	 * because it is very likely that the caller has a more reasonable
+	 * fallback than shooting a random task.
+	 */
+	if (gfp_mask & __GFP_RETRY_MAYFAIL)
+		goto out;
 	/* The OOM killer does not needlessly kill tasks for lowmem */
 	if (ac->high_zoneidx < ZONE_NORMAL)
 		goto out;
@@ -3413,7 +3421,7 @@ should_compact_retry(struct alloc_context *ac, int order, int alloc_flags,
 	}
 
 	/*
-	 * !costly requests are much more important than __GFP_REPEAT
+	 * !costly requests are much more important than __GFP_RETRY_MAYFAIL
 	 * costly ones because they are de facto nofail and invoke OOM
 	 * killer to move on while costly can fail and users are ready
 	 * to cope with that. 1/4 retries is rather arbitrary but we
@@ -3920,9 +3928,9 @@ retry:
 
 	/*
 	 * Do not retry costly high order allocations unless they are
-	 * __GFP_REPEAT
+	 * __GFP_RETRY_MAYFAIL
 	 */
-	if (costly_order && !(gfp_mask & __GFP_REPEAT))
+	if (costly_order && !(gfp_mask & __GFP_RETRY_MAYFAIL))
 		goto nopage;
 
 	if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags,
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index a56c3989f773..c50b1a14d55e 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -56,11 +56,11 @@ void * __meminit vmemmap_alloc_block(unsigned long size, int node)
 
 		if (node_state(node, N_HIGH_MEMORY))
 			page = alloc_pages_node(
-				node, GFP_KERNEL | __GFP_ZERO | __GFP_REPEAT,
+				node, GFP_KERNEL | __GFP_ZERO | __GFP_RETRY_MAYFAIL,
 				get_order(size));
 		else
 			page = alloc_pages(
-				GFP_KERNEL | __GFP_ZERO | __GFP_REPEAT,
+				GFP_KERNEL | __GFP_ZERO | __GFP_RETRY_MAYFAIL,
 				get_order(size));
 		if (page)
 			return page_address(page);
diff --git a/mm/util.c b/mm/util.c
index 26be6407abd7..6520f2d4a226 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -339,7 +339,7 @@ EXPORT_SYMBOL(vm_mmap);
  * Uses kmalloc to get the memory but if the allocation fails then falls back
  * to the vmalloc allocator. Use kvfree for freeing the memory.
  *
- * Reclaim modifiers - __GFP_NORETRY and __GFP_NOFAIL are not supported. __GFP_REPEAT
+ * Reclaim modifiers - __GFP_NORETRY and __GFP_NOFAIL are not supported. __GFP_RETRY_MAYFAIL
  * is supported only for large (>32kB) allocations, and it should be used only if
  * kmalloc is preferable to the vmalloc fallback, due to visible performance drawbacks.
  *
@@ -367,11 +367,11 @@ void *kvmalloc_node(size_t size, gfp_t flags, int node)
 		kmalloc_flags |= __GFP_NOWARN;
 
 		/*
-		 * We have to override __GFP_REPEAT by __GFP_NORETRY for !costly
+		 * We have to override __GFP_RETRY_MAYFAIL by __GFP_NORETRY for !costly
 		 * requests because there is no other way to tell the allocator
 		 * that we want to fail rather than retry endlessly.
 		 */
-		if (!(kmalloc_flags & __GFP_REPEAT) ||
+		if (!(kmalloc_flags & __GFP_RETRY_MAYFAIL) ||
 				(size <= PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER))
 			kmalloc_flags |= __GFP_NORETRY;
 	}
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 6016ab079e2b..8698c1c86c4d 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1795,7 +1795,7 @@ fail:
  *	allocator with @gfp_mask flags.  Map them into contiguous
  *	kernel virtual space, using a pagetable protection of @prot.
  *
- *	Reclaim modifiers in @gfp_mask - __GFP_NORETRY, __GFP_REPEAT
+ *	Reclaim modifiers in @gfp_mask - __GFP_NORETRY, __GFP_RETRY_MAYFAIL
  *	and __GFP_NOFAIL are not supported
  *
  *	Any use of gfp flags outside of GFP_KERNEL should be consulted
diff --git a/mm/vmscan.c b/mm/vmscan.c
index e9210f825219..a1af041930a6 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2506,18 +2506,18 @@ static inline bool should_continue_reclaim(struct pglist_data *pgdat,
 		return false;
 
 	/* Consider stopping depending on scan and reclaim activity */
-	if (sc->gfp_mask & __GFP_REPEAT) {
+	if (sc->gfp_mask & __GFP_RETRY_MAYFAIL) {
 		/*
-		 * For __GFP_REPEAT allocations, stop reclaiming if the
+		 * For __GFP_RETRY_MAYFAIL allocations, stop reclaiming if the
 		 * full LRU list has been scanned and we are still failing
 		 * to reclaim pages. This full LRU scan is potentially
-		 * expensive but a __GFP_REPEAT caller really wants to succeed
+		 * expensive but a __GFP_RETRY_MAYFAIL caller really wants to succeed
 		 */
 		if (!nr_reclaimed && !nr_scanned)
 			return false;
 	} else {
 		/*
-		 * For non-__GFP_REPEAT allocations which can presumably
+		 * For non-__GFP_RETRY_MAYFAIL allocations which can presumably
 		 * fail without consequence, stop if we failed to reclaim
 		 * any pages from the last SWAP_CLUSTER_MAX number of
 		 * pages that were scanned. This will return to the
diff --git a/net/core/dev.c b/net/core/dev.c
index 02440518dd69..8515f8fe0460 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -7384,7 +7384,7 @@ static int netif_alloc_rx_queues(struct net_device *dev)
 
 	BUG_ON(count < 1);
 
-	rx = kvzalloc(sz, GFP_KERNEL | __GFP_REPEAT);
+	rx = kvzalloc(sz, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
 	if (!rx)
 		return -ENOMEM;
 
@@ -7424,7 +7424,7 @@ static int netif_alloc_netdev_queues(struct net_device *dev)
 	if (count < 1 || count > 0xffff)
 		return -EINVAL;
 
-	tx = kvzalloc(sz, GFP_KERNEL | __GFP_REPEAT);
+	tx = kvzalloc(sz, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
 	if (!tx)
 		return -ENOMEM;
 
@@ -7965,7 +7965,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 	/* ensure 32-byte alignment of whole construct */
 	alloc_size += NETDEV_ALIGN - 1;
 
-	p = kvzalloc(alloc_size, GFP_KERNEL | __GFP_REPEAT);
+	p = kvzalloc(alloc_size, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
 	if (!p)
 		return NULL;
 
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 8b11341ed69a..f990eb8b30a9 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4747,7 +4747,7 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
 
 	gfp_head = gfp_mask;
 	if (gfp_head & __GFP_DIRECT_RECLAIM)
-		gfp_head |= __GFP_REPEAT;
+		gfp_head |= __GFP_RETRY_MAYFAIL;
 
 	*errcode = -ENOBUFS;
 	skb = alloc_skb(header_len, gfp_head);
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 147fde73a0f5..263d16e3219e 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -648,7 +648,7 @@ static int fq_resize(struct Qdisc *sch, u32 log)
 		return 0;
 
 	/* If XPS was setup, we can allocate memory on right NUMA node */
-	array = kvmalloc_node(sizeof(struct rb_root) << log, GFP_KERNEL | __GFP_REPEAT,
+	array = kvmalloc_node(sizeof(struct rb_root) << log, GFP_KERNEL | __GFP_RETRY_MAYFAIL,
 			      netdev_queue_numa_node_read(sch->dev_queue));
 	if (!array)
 		return -ENOMEM;
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 0a8a1c45af87..a1497c516d85 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -643,7 +643,7 @@ static const struct {
 	{ "__GFP_FS",			"F" },
 	{ "__GFP_COLD",			"CO" },
 	{ "__GFP_NOWARN",		"NWR" },
-	{ "__GFP_REPEAT",		"R" },
+	{ "__GFP_RETRY_MAYFAIL",	"R" },
 	{ "__GFP_NOFAIL",		"NF" },
 	{ "__GFP_NORETRY",		"NR" },
 	{ "__GFP_COMP",			"C" },
-- 
cgit v1.2.3


From 0f55685627d6dd2beda55a82abc02297f0f8e5c2 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Wed, 12 Jul 2017 14:36:58 -0700
Subject: mm, migration: do not trigger OOM killer when migrating memory

Page migration (for memory hotplug, soft_offline_page or mbind) needs to
allocate a new memory.  This can trigger an oom killer if the target
memory is depleated.  Although quite unlikely, still possible,
especially for the memory hotplug (offlining of memoery).

Up to now we didn't really have reasonable means to back off.
__GFP_NORETRY can fail just too easily and __GFP_THISNODE sticks to a
single node and that is not suitable for all callers.

But now that we have __GFP_RETRY_MAYFAIL we should use it.  It is
preferable to fail the migration than disrupt the system by killing some
processes.

Link: http://lkml.kernel.org/r/20170623085345.11304-7-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Alex Belits <alex.belits@cavium.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Darrick J. Wong <darrick.wong@oracle.com>
Cc: David Daney <david.daney@cavium.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: NeilBrown <neilb@suse.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/migrate.h | 2 +-
 mm/mempolicy.c          | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 4634da521238..3e0d405dc842 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -34,7 +34,7 @@ extern char *migrate_reason_names[MR_TYPES];
 static inline struct page *new_page_nodemask(struct page *page,
 				int preferred_nid, nodemask_t *nodemask)
 {
-	gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE;
+	gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL;
 
 	if (PageHuge(page))
 		return alloc_huge_page_nodemask(page_hstate(compound_head(page)),
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 7d8e56214ac0..d911fa5cb2a7 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1078,7 +1078,8 @@ static struct page *new_page(struct page *page, unsigned long start, int **x)
 	/*
 	 * if !vma, alloc_page_vma() will use task or system default policy
 	 */
-	return alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
+	return alloc_page_vma(GFP_HIGHUSER_MOVABLE | __GFP_RETRY_MAYFAIL,
+			vma, address);
 }
 #else
 
-- 
cgit v1.2.3


From c945dccc80856107f109c36a7d0e29a371b5d1b5 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Wed, 12 Jul 2017 14:37:48 -0700
Subject: ARM: samsung: usb-ohci: move inline before return type

Make the code like the rest of the kernel.

Link: http://lkml.kernel.org/r/667a515b8d0f10f2465d519f8595edd91552fc5e.1499284835.git.joe@perches.com
Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/platform_data/usb-ohci-s3c2410.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/usb-ohci-s3c2410.h b/include/linux/platform_data/usb-ohci-s3c2410.h
index 7fa1fbefc3f2..cc7554ae6e8b 100644
--- a/include/linux/platform_data/usb-ohci-s3c2410.h
+++ b/include/linux/platform_data/usb-ohci-s3c2410.h
@@ -31,7 +31,7 @@ struct s3c2410_hcd_info {
 	void		(*report_oc)(struct s3c2410_hcd_info *, int ports);
 };
 
-static void inline s3c2410_usb_report_oc(struct s3c2410_hcd_info *info, int ports)
+static inline void s3c2410_usb_report_oc(struct s3c2410_hcd_info *info, int ports)
 {
 	if (info->report_oc != NULL) {
 		(info->report_oc)(info, ports);
-- 
cgit v1.2.3


From 3e8f399da490e6ac20a3cfd6aa404c9aa961a9a2 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Wed, 12 Jul 2017 14:37:51 -0700
Subject: writeback: rework wb_[dec|inc]_stat family of functions

Currently the writeback statistics code uses a percpu counters to hold
various statistics.  Furthermore we have 2 families of functions - those
which disable local irq and those which doesn't and whose names begin
with double underscore.  However, they both end up calling
__add_wb_stats which in turn calls percpu_counter_add_batch which is
already irq-safe.

Exploiting this fact allows to eliminated the __wb_* functions since
they don't add any further protection than we already have.
Furthermore, refactor the wb_* function to call __add_wb_stat directly
without the irq-disabling dance.  This will likely result in better
runtime of code which deals with modifying the stat counters.

While at it also document why percpu_counter_add_batch is in fact
preempt and irq-safe since at least 3 people got confused.

Link: http://lkml.kernel.org/r/1498029937-27293-1-git-send-email-nborisov@suse.com
Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Acked-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Josef Bacik <jbacik@fb.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fs-writeback.c           |  8 ++++----
 include/linux/backing-dev.h | 24 ++----------------------
 lib/percpu_counter.c        |  7 +++++++
 mm/page-writeback.c         | 10 +++++-----
 4 files changed, 18 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 8b426f83909f..245c430a2e41 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -380,8 +380,8 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
 		struct page *page = radix_tree_deref_slot_protected(slot,
 							&mapping->tree_lock);
 		if (likely(page) && PageDirty(page)) {
-			__dec_wb_stat(old_wb, WB_RECLAIMABLE);
-			__inc_wb_stat(new_wb, WB_RECLAIMABLE);
+			dec_wb_stat(old_wb, WB_RECLAIMABLE);
+			inc_wb_stat(new_wb, WB_RECLAIMABLE);
 		}
 	}
 
@@ -391,8 +391,8 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
 							&mapping->tree_lock);
 		if (likely(page)) {
 			WARN_ON_ONCE(!PageWriteback(page));
-			__dec_wb_stat(old_wb, WB_WRITEBACK);
-			__inc_wb_stat(new_wb, WB_WRITEBACK);
+			dec_wb_stat(old_wb, WB_WRITEBACK);
+			inc_wb_stat(new_wb, WB_WRITEBACK);
 		}
 	}
 
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 334165c911f0..854e1bdd0b2a 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -69,34 +69,14 @@ static inline void __add_wb_stat(struct bdi_writeback *wb,
 	percpu_counter_add_batch(&wb->stat[item], amount, WB_STAT_BATCH);
 }
 
-static inline void __inc_wb_stat(struct bdi_writeback *wb,
-				 enum wb_stat_item item)
-{
-	__add_wb_stat(wb, item, 1);
-}
-
 static inline void inc_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
 {
-	unsigned long flags;
-
-	local_irq_save(flags);
-	__inc_wb_stat(wb, item);
-	local_irq_restore(flags);
-}
-
-static inline void __dec_wb_stat(struct bdi_writeback *wb,
-				 enum wb_stat_item item)
-{
-	__add_wb_stat(wb, item, -1);
+	__add_wb_stat(wb, item, 1);
 }
 
 static inline void dec_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
 {
-	unsigned long flags;
-
-	local_irq_save(flags);
-	__dec_wb_stat(wb, item);
-	local_irq_restore(flags);
+	__add_wb_stat(wb, item, -1);
 }
 
 static inline s64 wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index 8ee7e5ec21be..3bf4a9984f4c 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -72,6 +72,13 @@ void percpu_counter_set(struct percpu_counter *fbc, s64 amount)
 }
 EXPORT_SYMBOL(percpu_counter_set);
 
+/**
+ * This function is both preempt and irq safe. The former is due to explicit
+ * preemption disable. The latter is guaranteed by the fact that the slow path
+ * is explicitly protected by an irq-safe spinlock whereas the fast patch uses
+ * this_cpu_add which is irq-safe by definition. Hence there is no need muck
+ * with irq state before calling this one
+ */
 void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch)
 {
 	s64 count;
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 0b60cc7ddac2..96e93b214d31 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -601,7 +601,7 @@ static inline void __wb_writeout_inc(struct bdi_writeback *wb)
 {
 	struct wb_domain *cgdom;
 
-	__inc_wb_stat(wb, WB_WRITTEN);
+	inc_wb_stat(wb, WB_WRITTEN);
 	wb_domain_writeout_inc(&global_wb_domain, &wb->completions,
 			       wb->bdi->max_prop_frac);
 
@@ -2435,8 +2435,8 @@ void account_page_dirtied(struct page *page, struct address_space *mapping)
 		__inc_lruvec_page_state(page, NR_FILE_DIRTY);
 		__inc_zone_page_state(page, NR_ZONE_WRITE_PENDING);
 		__inc_node_page_state(page, NR_DIRTIED);
-		__inc_wb_stat(wb, WB_RECLAIMABLE);
-		__inc_wb_stat(wb, WB_DIRTIED);
+		inc_wb_stat(wb, WB_RECLAIMABLE);
+		inc_wb_stat(wb, WB_DIRTIED);
 		task_io_account_write(PAGE_SIZE);
 		current->nr_dirtied++;
 		this_cpu_inc(bdp_ratelimits);
@@ -2741,7 +2741,7 @@ int test_clear_page_writeback(struct page *page)
 			if (bdi_cap_account_writeback(bdi)) {
 				struct bdi_writeback *wb = inode_to_wb(inode);
 
-				__dec_wb_stat(wb, WB_WRITEBACK);
+				dec_wb_stat(wb, WB_WRITEBACK);
 				__wb_writeout_inc(wb);
 			}
 		}
@@ -2786,7 +2786,7 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
 						page_index(page),
 						PAGECACHE_TAG_WRITEBACK);
 			if (bdi_cap_account_writeback(bdi))
-				__inc_wb_stat(inode_to_wb(inode), WB_WRITEBACK);
+				inc_wb_stat(inode_to_wb(inode), WB_WRITEBACK);
 
 			/*
 			 * We can come through here when swapping anonymous
-- 
cgit v1.2.3


From 3c48d86cc959309ee168fb87737a8cb3f97c5224 Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@linaro.org>
Date: Wed, 12 Jul 2017 15:04:16 -0700
Subject: clk: Provide bulk prepare_enable disable_unprepare variants

This extends the existing set of bulk helpers with prepare_enable and
disable_unprepare variants.

Cc: Russell King <linux@armlinux.org.uk>,
Cc: Dong Aisheng <aisheng.dong@nxp.com>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 include/linux/clk.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/clk.h b/include/linux/clk.h
index c673f0b91751..690e6a6921e1 100644
--- a/include/linux/clk.h
+++ b/include/linux/clk.h
@@ -657,6 +657,28 @@ static inline void clk_disable_unprepare(struct clk *clk)
 	clk_unprepare(clk);
 }
 
+static inline int clk_bulk_prepare_enable(int num_clks,
+					  struct clk_bulk_data *clks)
+{
+	int ret;
+
+	ret = clk_bulk_prepare(num_clks, clks);
+	if (ret)
+		return ret;
+	ret = clk_bulk_enable(num_clks, clks);
+	if (ret)
+		clk_bulk_unprepare(num_clks, clks);
+
+	return ret;
+}
+
+static inline void clk_bulk_disable_unprepare(int num_clks,
+					      struct clk_bulk_data *clks)
+{
+	clk_bulk_disable(num_clks, clks);
+	clk_bulk_unprepare(num_clks, clks);
+}
+
 #if defined(CONFIG_OF) && defined(CONFIG_COMMON_CLK)
 struct clk *of_clk_get(struct device_node *np, int index);
 struct clk *of_clk_get_by_name(struct device_node *np, const char *name);
-- 
cgit v1.2.3


From 0aebdc52ca824c38837a652548028e45da72628f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 09:31:19 +0200
Subject: sunrpc: properly type argument to kxdreproc_t

Pass struct rpc_request as the first argument instead of an untyped blob,
and mark the data object as const.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
---
 include/linux/sunrpc/xdr.h | 5 ++++-
 net/sunrpc/clnt.c          | 3 ++-
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 054c8cde18f3..290f189de200 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -17,6 +17,8 @@
 #include <asm/unaligned.h>
 #include <linux/scatterlist.h>
 
+struct rpc_rqst;
+
 /*
  * Buffer adjustment
  */
@@ -222,7 +224,8 @@ struct xdr_stream {
 /*
  * These are the xdr_stream style generic XDR encode and decode functions.
  */
-typedef void	(*kxdreproc_t)(void *rqstp, struct xdr_stream *xdr, void *obj);
+typedef void	(*kxdreproc_t)(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+		const void *obj);
 typedef int	(*kxdrdproc_t)(void *rqstp, struct xdr_stream *xdr, void *obj);
 
 extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index b5cb921775a0..9fee20dc0c80 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -2476,7 +2476,8 @@ out_overflow:
 	goto out_garbage;
 }
 
-static void rpcproc_encode_null(void *rqstp, struct xdr_stream *xdr, void *obj)
+static void rpcproc_encode_null(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+		const void *obj)
 {
 }
 
-- 
cgit v1.2.3


From 993328e2b31fedc35276a4828039ad7af6d519cb Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 14:58:11 +0200
Subject: sunrpc: properly type argument to kxdrdproc_t

Pass struct rpc_request as the first argument instead of an untyped blob.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Acked-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 include/linux/sunrpc/xdr.h | 3 ++-
 net/sunrpc/clnt.c          | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 290f189de200..ed0fbf0d8d0f 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -226,7 +226,8 @@ struct xdr_stream {
  */
 typedef void	(*kxdreproc_t)(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 		const void *obj);
-typedef int	(*kxdrdproc_t)(void *rqstp, struct xdr_stream *xdr, void *obj);
+typedef int	(*kxdrdproc_t)(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+		void *obj);
 
 extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
 extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 9fee20dc0c80..964d5c4a1b60 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -2481,7 +2481,8 @@ static void rpcproc_encode_null(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 {
 }
 
-static int rpcproc_decode_null(void *rqstp, struct xdr_stream *xdr, void *obj)
+static int rpcproc_decode_null(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+		void *obj)
 {
 	return 0;
 }
-- 
cgit v1.2.3


From c551858a884b6d81def3d1528a9002ba97f5d4ad Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 23:27:10 +0200
Subject: sunrpc: move p_count out of struct rpc_procinfo

p_count is the only writeable memeber of struct rpc_procinfo, which is
a good candidate to be const-ified as it contains function pointers.

This patch moves it into out out struct rpc_procinfo, and into a
separate writable array that is pointed to by struct rpc_version and
indexed by p_statidx.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/lockd/clnt4xdr.c                  |  2 ++
 fs/lockd/clntxdr.c                   |  4 ++++
 fs/lockd/mon.c                       |  4 +++-
 fs/nfs/mount_clnt.c                  |  5 ++++-
 fs/nfs/nfs2xdr.c                     |  4 +++-
 fs/nfs/nfs3xdr.c                     |  6 +++++-
 fs/nfs/nfs4xdr.c                     |  4 +++-
 fs/nfsd/nfs4callback.c               |  4 +++-
 include/linux/sunrpc/clnt.h          |  2 +-
 net/sunrpc/auth_gss/gss_rpc_upcall.c |  3 ++-
 net/sunrpc/clnt.c                    |  6 ++++--
 net/sunrpc/rpcb_clnt.c               | 12 +++++++++---
 net/sunrpc/stats.c                   |  3 +--
 13 files changed, 44 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c
index f0ab7a99dd23..7c255d1d7c64 100644
--- a/fs/lockd/clnt4xdr.c
+++ b/fs/lockd/clnt4xdr.c
@@ -602,8 +602,10 @@ static struct rpc_procinfo	nlm4_procedures[] = {
 	PROC(GRANTED_RES,	res,		norep),
 };
 
+static unsigned int nlm_version4_counts[ARRAY_SIZE(nlm4_procedures)];
 const struct rpc_version nlm_version4 = {
 	.number		= 4,
 	.nrprocs	= ARRAY_SIZE(nlm4_procedures),
 	.procs		= nlm4_procedures,
+	.counts		= nlm_version4_counts,
 };
diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c
index bd8a976785ae..39500c5743a5 100644
--- a/fs/lockd/clntxdr.c
+++ b/fs/lockd/clntxdr.c
@@ -600,16 +600,20 @@ static struct rpc_procinfo	nlm_procedures[] = {
 	PROC(GRANTED_RES,	res,		norep),
 };
 
+static unsigned int nlm_version1_counts[ARRAY_SIZE(nlm_procedures)];
 static const struct rpc_version	nlm_version1 = {
 	.number		= 1,
 	.nrprocs	= ARRAY_SIZE(nlm_procedures),
 	.procs		= nlm_procedures,
+	.counts		= nlm_version1_counts,
 };
 
+static unsigned int nlm_version3_counts[ARRAY_SIZE(nlm_procedures)];
 static const struct rpc_version	nlm_version3 = {
 	.number		= 3,
 	.nrprocs	= ARRAY_SIZE(nlm_procedures),
 	.procs		= nlm_procedures,
+	.counts		= nlm_version3_counts,
 };
 
 static const struct rpc_version	*nlm_versions[] = {
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 62424e929a7f..fe4ec82764fe 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -552,10 +552,12 @@ static struct rpc_procinfo	nsm_procedures[] = {
 	},
 };
 
+static unsigned int nsm_version1_counts[ARRAY_SIZE(nsm_procedures)];
 static const struct rpc_version nsm_version1 = {
 	.number		= 1,
 	.nrprocs	= ARRAY_SIZE(nsm_procedures),
-	.procs		= nsm_procedures
+	.procs		= nsm_procedures,
+	.counts		= nsm_version1_counts,
 };
 
 static const struct rpc_version *nsm_version[] = {
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 806657d65074..d25914aa8bf9 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -504,17 +504,20 @@ static struct rpc_procinfo mnt3_procedures[] = {
 	},
 };
 
-
+static unsigned int mnt_counts[ARRAY_SIZE(mnt_procedures)];
 static const struct rpc_version mnt_version1 = {
 	.number		= 1,
 	.nrprocs	= ARRAY_SIZE(mnt_procedures),
 	.procs		= mnt_procedures,
+	.counts		= mnt_counts,
 };
 
+static unsigned int mnt3_counts[ARRAY_SIZE(mnt_procedures)];
 static const struct rpc_version mnt_version3 = {
 	.number		= 3,
 	.nrprocs	= ARRAY_SIZE(mnt3_procedures),
 	.procs		= mnt3_procedures,
+	.counts		= mnt3_counts,
 };
 
 static const struct rpc_version *mnt_version[] = {
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index a299648ea321..16b4526299c1 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -1170,8 +1170,10 @@ struct rpc_procinfo	nfs_procedures[] = {
 	PROC(STATFS,	fhandle,	statfsres,	0),
 };
 
+static unsigned int nfs_version2_counts[ARRAY_SIZE(nfs_procedures)];
 const struct rpc_version nfs_version2 = {
 	.number			= 2,
 	.nrprocs		= ARRAY_SIZE(nfs_procedures),
-	.procs			= nfs_procedures
+	.procs			= nfs_procedures,
+	.counts			= nfs_version2_counts,
 };
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index cc272eb8be3e..a017ec5c7a9d 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -2578,10 +2578,12 @@ struct rpc_procinfo	nfs3_procedures[] = {
 	PROC(COMMIT,		commit,		commit,		5),
 };
 
+static unsigned int nfs_version3_counts[ARRAY_SIZE(nfs3_procedures)];
 const struct rpc_version nfs_version3 = {
 	.number			= 3,
 	.nrprocs		= ARRAY_SIZE(nfs3_procedures),
-	.procs			= nfs3_procedures
+	.procs			= nfs3_procedures,
+	.counts			= nfs_version3_counts,
 };
 
 #ifdef CONFIG_NFS_V3_ACL
@@ -2606,10 +2608,12 @@ static struct rpc_procinfo	nfs3_acl_procedures[] = {
 	},
 };
 
+static unsigned int nfs3_acl_counts[ARRAY_SIZE(nfs3_acl_procedures)];
 const struct rpc_version nfsacl_version3 = {
 	.number			= 3,
 	.nrprocs		= sizeof(nfs3_acl_procedures)/
 				  sizeof(nfs3_acl_procedures[0]),
 	.procs			= nfs3_acl_procedures,
+	.counts			= nfs3_acl_counts,
 };
 #endif  /* CONFIG_NFS_V3_ACL */
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 797f3ce75286..40cf5529e65f 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -7661,10 +7661,12 @@ struct rpc_procinfo	nfs4_procedures[] = {
 #endif /* CONFIG_NFS_V4_2 */
 };
 
+static unsigned int nfs_version4_counts[ARRAY_SIZE(nfs4_procedures)];
 const struct rpc_version nfs_version4 = {
 	.number			= 4,
 	.nrprocs		= ARRAY_SIZE(nfs4_procedures),
-	.procs			= nfs4_procedures
+	.procs			= nfs4_procedures,
+	.counts			= nfs_version4_counts,
 };
 
 /*
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index a2bedbd05b2b..afa961fe073c 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -705,6 +705,7 @@ static struct rpc_procinfo nfs4_cb_procedures[] = {
 	PROC(CB_NOTIFY_LOCK,	COMPOUND,	cb_notify_lock,	cb_notify_lock),
 };
 
+static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)];
 static struct rpc_version nfs_cb_version4 = {
 /*
  * Note on the callback rpc program version number: despite language in rfc
@@ -715,7 +716,8 @@ static struct rpc_version nfs_cb_version4 = {
  */
 	.number			= 1,
 	.nrprocs		= ARRAY_SIZE(nfs4_cb_procedures),
-	.procs			= nfs4_cb_procedures
+	.procs			= nfs4_cb_procedures,
+	.counts			= nfs4_cb_counts,
 };
 
 static const struct rpc_version *nfs_cb_version[] = {
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 6095ecba0dde..c75ba37151fe 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -88,6 +88,7 @@ struct rpc_version {
 	u32			number;		/* version number */
 	unsigned int		nrprocs;	/* number of procs */
 	struct rpc_procinfo *	procs;		/* procedure array */
+	unsigned int		*counts;	/* call counts */
 };
 
 /*
@@ -99,7 +100,6 @@ struct rpc_procinfo {
 	kxdrdproc_t		p_decode;	/* XDR decode function */
 	unsigned int		p_arglen;	/* argument hdr length (u32) */
 	unsigned int		p_replen;	/* reply hdr length (u32) */
-	unsigned int		p_count;	/* call count */
 	unsigned int		p_timer;	/* Which RTT timer to use */
 	u32			p_statidx;	/* Which procedure to account */
 	const char *		p_name;		/* name of procedure */
diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c
index a80b8e607478..f8729b647605 100644
--- a/net/sunrpc/auth_gss/gss_rpc_upcall.c
+++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c
@@ -364,11 +364,12 @@ void gssp_free_upcall_data(struct gssp_upcall_data *data)
 /*
  * Initialization stuff
  */
-
+static unsigned int gssp_version1_counts[ARRAY_SIZE(gssp_procedures)];
 static const struct rpc_version gssp_version1 = {
 	.number		= GSSPROXY_VERS_1,
 	.nrprocs	= ARRAY_SIZE(gssp_procedures),
 	.procs		= gssp_procedures,
+	.counts		= gssp_version1_counts,
 };
 
 static const struct rpc_version *gssp_version[] = {
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 964d5c4a1b60..f2d1f971247b 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1517,14 +1517,16 @@ static void
 call_start(struct rpc_task *task)
 {
 	struct rpc_clnt	*clnt = task->tk_client;
+	int idx = task->tk_msg.rpc_proc->p_statidx;
 
 	dprintk("RPC: %5u call_start %s%d proc %s (%s)\n", task->tk_pid,
 			clnt->cl_program->name, clnt->cl_vers,
 			rpc_proc_name(task),
 			(RPC_IS_ASYNC(task) ? "async" : "sync"));
 
-	/* Increment call count */
-	task->tk_msg.rpc_proc->p_count++;
+	/* Increment call count (version might not be valid for ping) */
+	if (clnt->cl_program->version[clnt->cl_vers])
+		clnt->cl_program->version[clnt->cl_vers]->counts[idx]++;
 	clnt->cl_stats->rpccnt++;
 	task->tk_action = call_reserve;
 }
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index f67b9e2897b4..9d47b9d3bbee 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -1117,22 +1117,28 @@ static const struct rpcb_info rpcb_next_version6[] = {
 	},
 };
 
+static unsigned int rpcb_version2_counts[ARRAY_SIZE(rpcb_procedures2)];
 static const struct rpc_version rpcb_version2 = {
 	.number		= RPCBVERS_2,
 	.nrprocs	= ARRAY_SIZE(rpcb_procedures2),
-	.procs		= rpcb_procedures2
+	.procs		= rpcb_procedures2,
+	.counts		= rpcb_version2_counts,
 };
 
+static unsigned int rpcb_version3_counts[ARRAY_SIZE(rpcb_procedures3)];
 static const struct rpc_version rpcb_version3 = {
 	.number		= RPCBVERS_3,
 	.nrprocs	= ARRAY_SIZE(rpcb_procedures3),
-	.procs		= rpcb_procedures3
+	.procs		= rpcb_procedures3,
+	.counts		= rpcb_version3_counts,
 };
 
+static unsigned int rpcb_version4_counts[ARRAY_SIZE(rpcb_procedures4)];
 static const struct rpc_version rpcb_version4 = {
 	.number		= RPCBVERS_4,
 	.nrprocs	= ARRAY_SIZE(rpcb_procedures4),
-	.procs		= rpcb_procedures4
+	.procs		= rpcb_procedures4,
+	.counts		= rpcb_version4_counts,
 };
 
 static const struct rpc_version *rpcb_version[] = {
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index caeb01ad2b5a..91c84d18bf9a 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -55,8 +55,7 @@ static int rpc_proc_show(struct seq_file *seq, void *v) {
 		seq_printf(seq, "proc%u %u",
 					vers->number, vers->nrprocs);
 		for (j = 0; j < vers->nrprocs; j++)
-			seq_printf(seq, " %u",
-					vers->procs[j].p_count);
+			seq_printf(seq, " %u", vers->counts[j]);
 		seq_putc(seq, '\n');
 	}
 	return 0;
-- 
cgit v1.2.3


From 511e936bf2b3e8be2a3160ace3d86be07962a7a8 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 12 May 2017 15:36:49 +0200
Subject: sunrpc: mark all struct rpc_procinfo instances as const

struct rpc_procinfo contains function pointers, and marking it as
constant avoids it being able to be used as an attach vector for
code injections.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/lockd/clnt4xdr.c                  |  2 +-
 fs/lockd/clntxdr.c                   |  2 +-
 fs/lockd/mon.c                       |  2 +-
 fs/nfs/internal.h                    |  6 +++---
 fs/nfs/mount_clnt.c                  |  4 ++--
 fs/nfs/nfs2xdr.c                     |  2 +-
 fs/nfs/nfs3xdr.c                     |  4 ++--
 fs/nfs/nfs4_fs.h                     |  2 +-
 fs/nfs/nfs4xdr.c                     |  2 +-
 fs/nfsd/nfs4callback.c               |  2 +-
 include/linux/sunrpc/clnt.h          |  4 ++--
 include/linux/sunrpc/sched.h         |  2 +-
 net/sunrpc/auth_gss/gss_rpc_upcall.c |  2 +-
 net/sunrpc/clnt.c                    |  4 ++--
 net/sunrpc/rpcb_clnt.c               | 19 ++++++++++---------
 net/sunrpc/stats.c                   |  2 +-
 16 files changed, 31 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c
index 7c255d1d7c64..c349fc0f9b80 100644
--- a/fs/lockd/clnt4xdr.c
+++ b/fs/lockd/clnt4xdr.c
@@ -584,7 +584,7 @@ out:
 	.p_name      = #proc,						\
 	}
 
-static struct rpc_procinfo	nlm4_procedures[] = {
+static const struct rpc_procinfo nlm4_procedures[] = {
 	PROC(TEST,		testargs,	testres),
 	PROC(LOCK,		lockargs,	res),
 	PROC(CANCEL,		cancargs,	res),
diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c
index 39500c5743a5..3b4724a6c4ee 100644
--- a/fs/lockd/clntxdr.c
+++ b/fs/lockd/clntxdr.c
@@ -582,7 +582,7 @@ out:
 	.p_name      = #proc,						\
 	}
 
-static struct rpc_procinfo	nlm_procedures[] = {
+static const struct rpc_procinfo nlm_procedures[] = {
 	PROC(TEST,		testargs,	testres),
 	PROC(LOCK,		lockargs,	res),
 	PROC(CANCEL,		cancargs,	res),
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index fe4ec82764fe..9d8166c39c54 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -531,7 +531,7 @@ static int nsm_xdr_dec_stat(struct rpc_rqst *rqstp,
 #define SM_monres_sz	2
 #define SM_unmonres_sz	1
 
-static struct rpc_procinfo	nsm_procedures[] = {
+static const struct rpc_procinfo nsm_procedures[] = {
 [NSMPROC_MON] = {
 		.p_proc		= NSMPROC_MON,
 		.p_encode	= nsm_xdr_enc_mon,
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 3e24392f2caa..dc2a29a7d48b 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -270,12 +270,12 @@ static inline bool nfs_match_open_context(const struct nfs_open_context *ctx1,
 }
 
 /* nfs2xdr.c */
-extern struct rpc_procinfo nfs_procedures[];
+extern const struct rpc_procinfo nfs_procedures[];
 extern int nfs2_decode_dirent(struct xdr_stream *,
 				struct nfs_entry *, int);
 
 /* nfs3xdr.c */
-extern struct rpc_procinfo nfs3_procedures[];
+extern const struct rpc_procinfo nfs3_procedures[];
 extern int nfs3_decode_dirent(struct xdr_stream *,
 				struct nfs_entry *, int);
 
@@ -292,7 +292,7 @@ extern const u32 nfs41_maxgetdevinfo_overhead;
 
 /* nfs4proc.c */
 #if IS_ENABLED(CONFIG_NFS_V4)
-extern struct rpc_procinfo nfs4_procedures[];
+extern const struct rpc_procinfo nfs4_procedures[];
 #endif
 
 #ifdef CONFIG_NFS_V4_SECURITY_LABEL
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index d25914aa8bf9..3efe946672be 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -466,7 +466,7 @@ static int mnt_xdr_dec_mountres3(struct rpc_rqst *req,
 	return decode_auth_flavors(xdr, res);
 }
 
-static struct rpc_procinfo mnt_procedures[] = {
+static const struct rpc_procinfo mnt_procedures[] = {
 	[MOUNTPROC_MNT] = {
 		.p_proc		= MOUNTPROC_MNT,
 		.p_encode	= mnt_xdr_enc_dirpath,
@@ -485,7 +485,7 @@ static struct rpc_procinfo mnt_procedures[] = {
 	},
 };
 
-static struct rpc_procinfo mnt3_procedures[] = {
+static const struct rpc_procinfo mnt3_procedures[] = {
 	[MOUNTPROC3_MNT] = {
 		.p_proc		= MOUNTPROC3_MNT,
 		.p_encode	= mnt_xdr_enc_dirpath,
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 16b4526299c1..c8a7e98c1371 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -1152,7 +1152,7 @@ static int nfs_stat_to_errno(enum nfs_stat status)
 	.p_statidx  =  NFSPROC_##proc,					\
 	.p_name     =  #proc,						\
 	}
-struct rpc_procinfo	nfs_procedures[] = {
+const struct rpc_procinfo nfs_procedures[] = {
 	PROC(GETATTR,	fhandle,	attrstat,	1),
 	PROC(SETATTR,	sattrargs,	attrstat,	0),
 	PROC(LOOKUP,	diropargs,	diropres,	2),
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 85ff1187e637..670eddb3ae36 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -2554,7 +2554,7 @@ static int nfs3_stat_to_errno(enum nfs_stat status)
 	.p_name      = #proc,						\
 	}
 
-struct rpc_procinfo	nfs3_procedures[] = {
+const struct rpc_procinfo nfs3_procedures[] = {
 	PROC(GETATTR,		getattr,	getattr,	1),
 	PROC(SETATTR,		setattr,	setattr,	0),
 	PROC(LOOKUP,		lookup,		lookup,		2),
@@ -2587,7 +2587,7 @@ const struct rpc_version nfs_version3 = {
 };
 
 #ifdef CONFIG_NFS_V3_ACL
-static struct rpc_procinfo	nfs3_acl_procedures[] = {
+static const struct rpc_procinfo nfs3_acl_procedures[] = {
 	[ACLPROC3_GETACL] = {
 		.p_proc = ACLPROC3_GETACL,
 		.p_encode = nfs3_xdr_enc_getacl3args,
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index af285cc27ccf..9b0cf3872722 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -493,7 +493,7 @@ static inline void nfs4_unregister_sysctl(void)
 #endif
 
 /* nfs4xdr.c */
-extern struct rpc_procinfo nfs4_procedures[];
+extern const struct rpc_procinfo nfs4_procedures[];
 
 struct nfs4_mount_data;
 
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 40cf5529e65f..0f1f290c97cd 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -7594,7 +7594,7 @@ nfs4_stat_to_errno(int stat)
 	.p_name = #proc,	\
 }
 
-struct rpc_procinfo	nfs4_procedures[] = {
+const struct rpc_procinfo nfs4_procedures[] = {
 	PROC(READ,		enc_read,		dec_read),
 	PROC(WRITE,		enc_write,		dec_write),
 	PROC(COMMIT,		enc_commit,		dec_commit),
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index afa961fe073c..ac10f78c0fb3 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -696,7 +696,7 @@ static int nfs4_xdr_dec_cb_notify_lock(struct rpc_rqst *rqstp,
 	.p_name    = #proc,						\
 }
 
-static struct rpc_procinfo nfs4_cb_procedures[] = {
+static const struct rpc_procinfo nfs4_cb_procedures[] = {
 	PROC(CB_NULL,	NULL,		cb_null,	cb_null),
 	PROC(CB_RECALL,	COMPOUND,	cb_recall,	cb_recall),
 #ifdef CONFIG_NFSD_PNFS
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index c75ba37151fe..55ef67bea06b 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -39,7 +39,7 @@ struct rpc_clnt {
 	struct list_head	cl_tasks;	/* List of tasks */
 	spinlock_t		cl_lock;	/* spinlock */
 	struct rpc_xprt __rcu *	cl_xprt;	/* transport */
-	struct rpc_procinfo *	cl_procinfo;	/* procedure info */
+	const struct rpc_procinfo *cl_procinfo;	/* procedure info */
 	u32			cl_prog,	/* RPC program number */
 				cl_vers,	/* RPC version number */
 				cl_maxproc;	/* max procedure number */
@@ -87,7 +87,7 @@ struct rpc_program {
 struct rpc_version {
 	u32			number;		/* version number */
 	unsigned int		nrprocs;	/* number of procs */
-	struct rpc_procinfo *	procs;		/* procedure array */
+	const struct rpc_procinfo *procs;	/* procedure array */
 	unsigned int		*counts;	/* call counts */
 };
 
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 7ba040c797ec..ed60253abd0a 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -22,7 +22,7 @@
  */
 struct rpc_procinfo;
 struct rpc_message {
-	struct rpc_procinfo *	rpc_proc;	/* Procedure information */
+	const struct rpc_procinfo *rpc_proc;	/* Procedure information */
 	void *			rpc_argp;	/* Arguments */
 	void *			rpc_resp;	/* Result */
 	struct rpc_cred *	rpc_cred;	/* Credentials */
diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c
index f8729b647605..46b295e4f2b8 100644
--- a/net/sunrpc/auth_gss/gss_rpc_upcall.c
+++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c
@@ -63,7 +63,7 @@ enum {
 	.p_name   = #proc,				\
 }
 
-static struct rpc_procinfo gssp_procedures[] = {
+static const struct rpc_procinfo gssp_procedures[] = {
 	PROC(INDICATE_MECHS, indicate_mechs),
         PROC(GET_CALL_CONTEXT, get_call_context),
         PROC(IMPORT_AND_CANON_NAME, import_and_canon_name),
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index f2d1f971247b..2e49d1f892b7 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1674,7 +1674,7 @@ call_allocate(struct rpc_task *task)
 	unsigned int slack = task->tk_rqstp->rq_cred->cr_auth->au_cslack;
 	struct rpc_rqst *req = task->tk_rqstp;
 	struct rpc_xprt *xprt = req->rq_xprt;
-	struct rpc_procinfo *proc = task->tk_msg.rpc_proc;
+	const struct rpc_procinfo *proc = task->tk_msg.rpc_proc;
 	int status;
 
 	dprint_status(task);
@@ -2489,7 +2489,7 @@ static int rpcproc_decode_null(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 	return 0;
 }
 
-static struct rpc_procinfo rpcproc_null = {
+static const struct rpc_procinfo rpcproc_null = {
 	.p_encode = rpcproc_encode_null,
 	.p_decode = rpcproc_decode_null,
 };
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 9d47b9d3bbee..ea0676f199c8 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -128,13 +128,13 @@ struct rpcbind_args {
 	int			r_status;
 };
 
-static struct rpc_procinfo rpcb_procedures2[];
-static struct rpc_procinfo rpcb_procedures3[];
-static struct rpc_procinfo rpcb_procedures4[];
+static const struct rpc_procinfo rpcb_procedures2[];
+static const struct rpc_procinfo rpcb_procedures3[];
+static const struct rpc_procinfo rpcb_procedures4[];
 
 struct rpcb_info {
 	u32			rpc_vers;
-	struct rpc_procinfo *	rpc_proc;
+	const struct rpc_procinfo *rpc_proc;
 };
 
 static const struct rpcb_info rpcb_next_version[];
@@ -620,7 +620,8 @@ int rpcb_v4_register(struct net *net, const u32 program, const u32 version,
 	return -EAFNOSUPPORT;
 }
 
-static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbind_args *map, struct rpc_procinfo *proc)
+static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt,
+		struct rpcbind_args *map, const struct rpc_procinfo *proc)
 {
 	struct rpc_message msg = {
 		.rpc_proc = proc,
@@ -671,7 +672,7 @@ static struct rpc_clnt *rpcb_find_transport_owner(struct rpc_clnt *clnt)
 void rpcb_getport_async(struct rpc_task *task)
 {
 	struct rpc_clnt *clnt;
-	struct rpc_procinfo *proc;
+	const struct rpc_procinfo *proc;
 	u32 bind_version;
 	struct rpc_xprt *xprt;
 	struct rpc_clnt	*rpcb_clnt;
@@ -994,7 +995,7 @@ out_fail:
  * since the Linux kernel RPC code requires only these.
  */
 
-static struct rpc_procinfo rpcb_procedures2[] = {
+static const struct rpc_procinfo rpcb_procedures2[] = {
 	[RPCBPROC_SET] = {
 		.p_proc		= RPCBPROC_SET,
 		.p_encode	= rpcb_enc_mapping,
@@ -1027,7 +1028,7 @@ static struct rpc_procinfo rpcb_procedures2[] = {
 	},
 };
 
-static struct rpc_procinfo rpcb_procedures3[] = {
+static const struct rpc_procinfo rpcb_procedures3[] = {
 	[RPCBPROC_SET] = {
 		.p_proc		= RPCBPROC_SET,
 		.p_encode	= rpcb_enc_getaddr,
@@ -1060,7 +1061,7 @@ static struct rpc_procinfo rpcb_procedures3[] = {
 	},
 };
 
-static struct rpc_procinfo rpcb_procedures4[] = {
+static const struct rpc_procinfo rpcb_procedures4[] = {
 	[RPCBPROC_SET] = {
 		.p_proc		= RPCBPROC_SET,
 		.p_encode	= rpcb_enc_getaddr,
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index 91c84d18bf9a..8b6c35ae1d57 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -191,7 +191,7 @@ void rpc_count_iostats(const struct rpc_task *task, struct rpc_iostats *stats)
 EXPORT_SYMBOL_GPL(rpc_count_iostats);
 
 static void _print_name(struct seq_file *seq, unsigned int op,
-			struct rpc_procinfo *procs)
+			const struct rpc_procinfo *procs)
 {
 	if (procs[op].p_name)
 		seq_printf(seq, "\t%12s: ", procs[op].p_name);
-- 
cgit v1.2.3


From 1c8a5409f3c4748ff42c1721d9578dd03091f378 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 17:35:49 +0200
Subject: sunrpc: properly type pc_func callbacks

Drop the argp and resp arguments as they can trivially be derived from
the rqstp argument.  With that all functions now have the same prototype,
and we can remove the unsafe casting to svc_procfunc as well as the
svc_procfunc typedef itself.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/lockd/svc4proc.c        | 118 ++++++++++++++++++++++-------------
 fs/lockd/svcproc.c         | 118 ++++++++++++++++++++++-------------
 fs/nfs/callback_xdr.c      |   7 ++-
 fs/nfsd/nfs2acl.c          |  25 ++++----
 fs/nfsd/nfs3acl.c          |  15 ++---
 fs/nfsd/nfs3proc.c         | 151 ++++++++++++++++++++++++++-------------------
 fs/nfsd/nfs4proc.c         |   9 +--
 fs/nfsd/nfsproc.c          | 104 +++++++++++++++++--------------
 fs/nfsd/nfssvc.c           |   2 +-
 include/linux/sunrpc/svc.h |   4 +-
 net/sunrpc/svc.c           |   2 +-
 11 files changed, 328 insertions(+), 227 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 09c576f26c7b..3e4cba029d3d 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -62,7 +62,7 @@ no_locks:
  * NULL: Test for presence of service
  */
 static __be32
-nlm4svc_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
+nlm4svc_proc_null(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: NULL          called\n");
 	return rpc_success;
@@ -72,9 +72,9 @@ nlm4svc_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
  * TEST: Check for conflicting lock
  */
 static __be32
-nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
-				         struct nlm_res  *resp)
+__nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 	__be32 rc = rpc_success;
@@ -99,9 +99,15 @@ nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
 }
 
 static __be32
-nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
-				         struct nlm_res  *resp)
+nlm4svc_proc_test(struct svc_rqst *rqstp)
 {
+	return __nlm4svc_proc_test(rqstp, rqstp->rq_resp);
+}
+
+static __be32
+__nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_res *resp)
+{
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 	__be32 rc = rpc_success;
@@ -141,9 +147,15 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
 }
 
 static __be32
-nlm4svc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp,
-				           struct nlm_res  *resp)
+nlm4svc_proc_lock(struct svc_rqst *rqstp)
+{
+	return __nlm4svc_proc_lock(rqstp, rqstp->rq_resp);
+}
+
+static __be32
+__nlm4svc_proc_cancel(struct svc_rqst *rqstp, struct nlm_res *resp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 
@@ -170,13 +182,19 @@ nlm4svc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp,
 	return rpc_success;
 }
 
+static __be32
+nlm4svc_proc_cancel(struct svc_rqst *rqstp)
+{
+	return __nlm4svc_proc_cancel(rqstp, rqstp->rq_resp);
+}
+
 /*
  * UNLOCK: release a lock
  */
 static __be32
-nlm4svc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp,
-				           struct nlm_res  *resp)
+__nlm4svc_proc_unlock(struct svc_rqst *rqstp, struct nlm_res *resp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 
@@ -203,14 +221,21 @@ nlm4svc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp,
 	return rpc_success;
 }
 
+static __be32
+nlm4svc_proc_unlock(struct svc_rqst *rqstp)
+{
+	return __nlm4svc_proc_unlock(rqstp, rqstp->rq_resp);
+}
+
 /*
  * GRANTED: A server calls us to tell that a process' lock request
  * was granted
  */
 static __be32
-nlm4svc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
-				            struct nlm_res  *resp)
+__nlm4svc_proc_granted(struct svc_rqst *rqstp, struct nlm_res *resp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
+
 	resp->cookie = argp->cookie;
 
 	dprintk("lockd: GRANTED       called\n");
@@ -219,6 +244,12 @@ nlm4svc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
 	return rpc_success;
 }
 
+static __be32
+nlm4svc_proc_granted(struct svc_rqst *rqstp)
+{
+	return __nlm4svc_proc_granted(rqstp, rqstp->rq_resp);
+}
+
 /*
  * This is the generic lockd callback for async RPC calls
  */
@@ -243,9 +274,10 @@ static const struct rpc_call_ops nlm4svc_callback_ops = {
  * because we send the callback before the reply proper. I hope this
  * doesn't break any clients.
  */
-static __be32 nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args *argp,
-		__be32 (*func)(struct svc_rqst *, struct nlm_args *, struct nlm_res  *))
+static __be32 nlm4svc_callback(struct svc_rqst *rqstp, u32 proc,
+		__be32 (*func)(struct svc_rqst *,  struct nlm_res *))
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 	struct nlm_rqst	*call;
 	__be32 stat;
@@ -261,7 +293,7 @@ static __be32 nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args
 	if (call == NULL)
 		return rpc_system_err;
 
-	stat = func(rqstp, argp, &call->a_res);
+	stat = func(rqstp, &call->a_res);
 	if (stat != 0) {
 		nlmsvc_release_call(call);
 		return stat;
@@ -273,48 +305,44 @@ static __be32 nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args
 	return rpc_success;
 }
 
-static __be32 nlm4svc_proc_test_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-					     void	     *resp)
+static __be32 nlm4svc_proc_test_msg(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: TEST_MSG      called\n");
-	return nlm4svc_callback(rqstp, NLMPROC_TEST_RES, argp, nlm4svc_proc_test);
+	return nlm4svc_callback(rqstp, NLMPROC_TEST_RES, __nlm4svc_proc_test);
 }
 
-static __be32 nlm4svc_proc_lock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-					     void	     *resp)
+static __be32 nlm4svc_proc_lock_msg(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: LOCK_MSG      called\n");
-	return nlm4svc_callback(rqstp, NLMPROC_LOCK_RES, argp, nlm4svc_proc_lock);
+	return nlm4svc_callback(rqstp, NLMPROC_LOCK_RES, __nlm4svc_proc_lock);
 }
 
-static __be32 nlm4svc_proc_cancel_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-					       void	       *resp)
+static __be32 nlm4svc_proc_cancel_msg(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: CANCEL_MSG    called\n");
-	return nlm4svc_callback(rqstp, NLMPROC_CANCEL_RES, argp, nlm4svc_proc_cancel);
+	return nlm4svc_callback(rqstp, NLMPROC_CANCEL_RES, __nlm4svc_proc_cancel);
 }
 
-static __be32 nlm4svc_proc_unlock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-                                               void            *resp)
+static __be32 nlm4svc_proc_unlock_msg(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: UNLOCK_MSG    called\n");
-	return nlm4svc_callback(rqstp, NLMPROC_UNLOCK_RES, argp, nlm4svc_proc_unlock);
+	return nlm4svc_callback(rqstp, NLMPROC_UNLOCK_RES, __nlm4svc_proc_unlock);
 }
 
-static __be32 nlm4svc_proc_granted_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-                                                void            *resp)
+static __be32 nlm4svc_proc_granted_msg(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: GRANTED_MSG   called\n");
-	return nlm4svc_callback(rqstp, NLMPROC_GRANTED_RES, argp, nlm4svc_proc_granted);
+	return nlm4svc_callback(rqstp, NLMPROC_GRANTED_RES, __nlm4svc_proc_granted);
 }
 
 /*
  * SHARE: create a DOS share or alter existing share.
  */
 static __be32
-nlm4svc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp,
-				          struct nlm_res  *resp)
+nlm4svc_proc_share(struct svc_rqst *rqstp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
+	struct nlm_res *resp = rqstp->rq_resp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 
@@ -345,9 +373,10 @@ nlm4svc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp,
  * UNSHARE: Release a DOS share.
  */
 static __be32
-nlm4svc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp,
-				            struct nlm_res  *resp)
+nlm4svc_proc_unshare(struct svc_rqst *rqstp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
+	struct nlm_res *resp = rqstp->rq_resp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 
@@ -378,22 +407,23 @@ nlm4svc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp,
  * NM_LOCK: Create an unmonitored lock
  */
 static __be32
-nlm4svc_proc_nm_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
-				            struct nlm_res  *resp)
+nlm4svc_proc_nm_lock(struct svc_rqst *rqstp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
+
 	dprintk("lockd: NM_LOCK       called\n");
 
 	argp->monitor = 0;		/* just clean the monitor flag */
-	return nlm4svc_proc_lock(rqstp, argp, resp);
+	return nlm4svc_proc_lock(rqstp);
 }
 
 /*
  * FREE_ALL: Release all locks and shares held by client
  */
 static __be32
-nlm4svc_proc_free_all(struct svc_rqst *rqstp, struct nlm_args *argp,
-					     void            *resp)
+nlm4svc_proc_free_all(struct svc_rqst *rqstp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 
 	/* Obtain client */
@@ -409,9 +439,10 @@ nlm4svc_proc_free_all(struct svc_rqst *rqstp, struct nlm_args *argp,
  * SM_NOTIFY: private callback from statd (not part of official NLM proto)
  */
 static __be32
-nlm4svc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
-					      void	        *resp)
+nlm4svc_proc_sm_notify(struct svc_rqst *rqstp)
 {
+	struct nlm_reboot *argp = rqstp->rq_argp;
+
 	dprintk("lockd: SM_NOTIFY     called\n");
 
 	if (!nlm_privileged_requester(rqstp)) {
@@ -429,9 +460,10 @@ nlm4svc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
  * client sent a GRANTED_RES, let's remove the associated block
  */
 static __be32
-nlm4svc_proc_granted_res(struct svc_rqst *rqstp, struct nlm_res  *argp,
-                                                void            *resp)
+nlm4svc_proc_granted_res(struct svc_rqst *rqstp)
 {
+	struct nlm_res *argp = rqstp->rq_argp;
+
         if (!nlmsvc_ops)
                 return rpc_success;
 
@@ -463,7 +495,7 @@ nlm4svc_proc_granted_res(struct svc_rqst *rqstp, struct nlm_res  *argp,
 struct nlm_void			{ int dummy; };
 
 #define PROC(name, xargt, xrest, argt, rest, respsize)	\
- { .pc_func	= (svc_procfunc) nlm4svc_proc_##name,	\
+ { .pc_func	= nlm4svc_proc_##name,	\
    .pc_decode	= (kxdrproc_t) nlm4svc_decode_##xargt,	\
    .pc_encode	= (kxdrproc_t) nlm4svc_encode_##xrest,	\
    .pc_release	= NULL,					\
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index fb26b9f522e7..3add50661fab 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -92,7 +92,7 @@ no_locks:
  * NULL: Test for presence of service
  */
 static __be32
-nlmsvc_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
+nlmsvc_proc_null(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: NULL          called\n");
 	return rpc_success;
@@ -102,9 +102,9 @@ nlmsvc_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
  * TEST: Check for conflicting lock
  */
 static __be32
-nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
-				         struct nlm_res  *resp)
+__nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 	__be32 rc = rpc_success;
@@ -130,9 +130,15 @@ nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
 }
 
 static __be32
-nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
-				         struct nlm_res  *resp)
+nlmsvc_proc_test(struct svc_rqst *rqstp)
 {
+	return __nlmsvc_proc_test(rqstp, rqstp->rq_resp);
+}
+
+static __be32
+__nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_res *resp)
+{
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 	__be32 rc = rpc_success;
@@ -172,9 +178,15 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
 }
 
 static __be32
-nlmsvc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp,
-				           struct nlm_res  *resp)
+nlmsvc_proc_lock(struct svc_rqst *rqstp)
+{
+	return __nlmsvc_proc_lock(rqstp, rqstp->rq_resp);
+}
+
+static __be32
+__nlmsvc_proc_cancel(struct svc_rqst *rqstp, struct nlm_res *resp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 	struct net *net = SVC_NET(rqstp);
@@ -202,13 +214,19 @@ nlmsvc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp,
 	return rpc_success;
 }
 
+static __be32
+nlmsvc_proc_cancel(struct svc_rqst *rqstp)
+{
+	return __nlmsvc_proc_cancel(rqstp, rqstp->rq_resp);
+}
+
 /*
  * UNLOCK: release a lock
  */
 static __be32
-nlmsvc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp,
-				           struct nlm_res  *resp)
+__nlmsvc_proc_unlock(struct svc_rqst *rqstp, struct nlm_res *resp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 	struct net *net = SVC_NET(rqstp);
@@ -236,14 +254,21 @@ nlmsvc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp,
 	return rpc_success;
 }
 
+static __be32
+nlmsvc_proc_unlock(struct svc_rqst *rqstp)
+{
+	return __nlmsvc_proc_unlock(rqstp, rqstp->rq_resp);
+}
+
 /*
  * GRANTED: A server calls us to tell that a process' lock request
  * was granted
  */
 static __be32
-nlmsvc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
-				            struct nlm_res  *resp)
+__nlmsvc_proc_granted(struct svc_rqst *rqstp, struct nlm_res *resp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
+
 	resp->cookie = argp->cookie;
 
 	dprintk("lockd: GRANTED       called\n");
@@ -252,6 +277,12 @@ nlmsvc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
 	return rpc_success;
 }
 
+static __be32
+nlmsvc_proc_granted(struct svc_rqst *rqstp)
+{
+	return __nlmsvc_proc_granted(rqstp, rqstp->rq_resp);
+}
+
 /*
  * This is the generic lockd callback for async RPC calls
  */
@@ -284,9 +315,10 @@ static const struct rpc_call_ops nlmsvc_callback_ops = {
  * because we send the callback before the reply proper. I hope this
  * doesn't break any clients.
  */
-static __be32 nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args *argp,
-		__be32 (*func)(struct svc_rqst *, struct nlm_args *, struct nlm_res  *))
+static __be32 nlmsvc_callback(struct svc_rqst *rqstp, u32 proc,
+		__be32 (*func)(struct svc_rqst *, struct nlm_res *))
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 	struct nlm_rqst	*call;
 	__be32 stat;
@@ -302,7 +334,7 @@ static __be32 nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args
 	if (call == NULL)
 		return rpc_system_err;
 
-	stat = func(rqstp, argp, &call->a_res);
+	stat = func(rqstp, &call->a_res);
 	if (stat != 0) {
 		nlmsvc_release_call(call);
 		return stat;
@@ -314,50 +346,46 @@ static __be32 nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args
 	return rpc_success;
 }
 
-static __be32 nlmsvc_proc_test_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-					     void	     *resp)
+static __be32 nlmsvc_proc_test_msg(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: TEST_MSG      called\n");
-	return nlmsvc_callback(rqstp, NLMPROC_TEST_RES, argp, nlmsvc_proc_test);
+	return nlmsvc_callback(rqstp, NLMPROC_TEST_RES, __nlmsvc_proc_test);
 }
 
-static __be32 nlmsvc_proc_lock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-					     void	     *resp)
+static __be32 nlmsvc_proc_lock_msg(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: LOCK_MSG      called\n");
-	return nlmsvc_callback(rqstp, NLMPROC_LOCK_RES, argp, nlmsvc_proc_lock);
+	return nlmsvc_callback(rqstp, NLMPROC_LOCK_RES, __nlmsvc_proc_lock);
 }
 
-static __be32 nlmsvc_proc_cancel_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-					       void	       *resp)
+static __be32 nlmsvc_proc_cancel_msg(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: CANCEL_MSG    called\n");
-	return nlmsvc_callback(rqstp, NLMPROC_CANCEL_RES, argp, nlmsvc_proc_cancel);
+	return nlmsvc_callback(rqstp, NLMPROC_CANCEL_RES, __nlmsvc_proc_cancel);
 }
 
 static __be32
-nlmsvc_proc_unlock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-                                               void            *resp)
+nlmsvc_proc_unlock_msg(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: UNLOCK_MSG    called\n");
-	return nlmsvc_callback(rqstp, NLMPROC_UNLOCK_RES, argp, nlmsvc_proc_unlock);
+	return nlmsvc_callback(rqstp, NLMPROC_UNLOCK_RES, __nlmsvc_proc_unlock);
 }
 
 static __be32
-nlmsvc_proc_granted_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-                                                void            *resp)
+nlmsvc_proc_granted_msg(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: GRANTED_MSG   called\n");
-	return nlmsvc_callback(rqstp, NLMPROC_GRANTED_RES, argp, nlmsvc_proc_granted);
+	return nlmsvc_callback(rqstp, NLMPROC_GRANTED_RES, __nlmsvc_proc_granted);
 }
 
 /*
  * SHARE: create a DOS share or alter existing share.
  */
 static __be32
-nlmsvc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp,
-				          struct nlm_res  *resp)
+nlmsvc_proc_share(struct svc_rqst *rqstp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
+	struct nlm_res *resp = rqstp->rq_resp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 
@@ -388,9 +416,10 @@ nlmsvc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp,
  * UNSHARE: Release a DOS share.
  */
 static __be32
-nlmsvc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp,
-				            struct nlm_res  *resp)
+nlmsvc_proc_unshare(struct svc_rqst *rqstp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
+	struct nlm_res *resp = rqstp->rq_resp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 
@@ -421,22 +450,23 @@ nlmsvc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp,
  * NM_LOCK: Create an unmonitored lock
  */
 static __be32
-nlmsvc_proc_nm_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
-				            struct nlm_res  *resp)
+nlmsvc_proc_nm_lock(struct svc_rqst *rqstp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
+
 	dprintk("lockd: NM_LOCK       called\n");
 
 	argp->monitor = 0;		/* just clean the monitor flag */
-	return nlmsvc_proc_lock(rqstp, argp, resp);
+	return nlmsvc_proc_lock(rqstp);
 }
 
 /*
  * FREE_ALL: Release all locks and shares held by client
  */
 static __be32
-nlmsvc_proc_free_all(struct svc_rqst *rqstp, struct nlm_args *argp,
-					     void            *resp)
+nlmsvc_proc_free_all(struct svc_rqst *rqstp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 
 	/* Obtain client */
@@ -452,9 +482,10 @@ nlmsvc_proc_free_all(struct svc_rqst *rqstp, struct nlm_args *argp,
  * SM_NOTIFY: private callback from statd (not part of official NLM proto)
  */
 static __be32
-nlmsvc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
-					      void	        *resp)
+nlmsvc_proc_sm_notify(struct svc_rqst *rqstp)
 {
+	struct nlm_reboot *argp = rqstp->rq_argp;
+
 	dprintk("lockd: SM_NOTIFY     called\n");
 
 	if (!nlm_privileged_requester(rqstp)) {
@@ -472,9 +503,10 @@ nlmsvc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
  * client sent a GRANTED_RES, let's remove the associated block
  */
 static __be32
-nlmsvc_proc_granted_res(struct svc_rqst *rqstp, struct nlm_res  *argp,
-                                                void            *resp)
+nlmsvc_proc_granted_res(struct svc_rqst *rqstp)
 {
+	struct nlm_res *argp = rqstp->rq_argp;
+
 	if (!nlmsvc_ops)
 		return rpc_success;
 
@@ -505,7 +537,7 @@ nlmsvc_proc_granted_res(struct svc_rqst *rqstp, struct nlm_res  *argp,
 struct nlm_void			{ int dummy; };
 
 #define PROC(name, xargt, xrest, argt, rest, respsize)	\
- { .pc_func	= (svc_procfunc) nlmsvc_proc_##name,	\
+ { .pc_func	= nlmsvc_proc_##name,			\
    .pc_decode	= (kxdrproc_t) nlmsvc_decode_##xargt,	\
    .pc_encode	= (kxdrproc_t) nlmsvc_encode_##xrest,	\
    .pc_release	= NULL,					\
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 287c02202b25..5a14bdaa5986 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -53,7 +53,7 @@ struct callback_op {
 
 static struct callback_op callback_ops[];
 
-static __be32 nfs4_callback_null(struct svc_rqst *rqstp, void *argp, void *resp)
+static __be32 nfs4_callback_null(struct svc_rqst *rqstp)
 {
 	return htonl(NFS4_OK);
 }
@@ -880,7 +880,7 @@ encode_hdr:
 /*
  * Decode, process and encode a COMPOUND
  */
-static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *resp)
+static __be32 nfs4_callback_compound(struct svc_rqst *rqstp)
 {
 	struct cb_compound_hdr_arg hdr_arg = { 0 };
 	struct cb_compound_hdr_res hdr_res = { NULL };
@@ -916,7 +916,8 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
 
 	while (status == 0 && nops != hdr_arg.nops) {
 		status = process_op(nops, rqstp, &xdr_in,
-				    argp, &xdr_out, resp, &cps);
+				    rqstp->rq_argp, &xdr_out, rqstp->rq_resp,
+				    &cps);
 		nops++;
 	}
 
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 12933d07204c..4b7f84fa1fa5 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -19,7 +19,7 @@
  * NULL call.
  */
 static __be32
-nfsacld_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
+nfsacld_proc_null(struct svc_rqst *rqstp)
 {
 	return nfs_ok;
 }
@@ -27,9 +27,10 @@ nfsacld_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
 /*
  * Get the Access and/or Default ACL of a file.
  */
-static __be32 nfsacld_proc_getacl(struct svc_rqst * rqstp,
-		struct nfsd3_getaclargs *argp, struct nfsd3_getaclres *resp)
+static __be32 nfsacld_proc_getacl(struct svc_rqst *rqstp)
 {
+	struct nfsd3_getaclargs *argp = rqstp->rq_argp;
+	struct nfsd3_getaclres *resp = rqstp->rq_resp;
 	struct posix_acl *acl;
 	struct inode *inode;
 	svc_fh *fh;
@@ -87,10 +88,10 @@ fail:
 /*
  * Set the Access and/or Default ACL of a file.
  */
-static __be32 nfsacld_proc_setacl(struct svc_rqst * rqstp,
-		struct nfsd3_setaclargs *argp,
-		struct nfsd_attrstat *resp)
+static __be32 nfsacld_proc_setacl(struct svc_rqst *rqstp)
 {
+	struct nfsd3_setaclargs *argp = rqstp->rq_argp;
+	struct nfsd_attrstat *resp = rqstp->rq_resp;
 	struct inode *inode;
 	svc_fh *fh;
 	__be32 nfserr = 0;
@@ -141,9 +142,10 @@ out_errno:
 /*
  * Check file attributes
  */
-static __be32 nfsacld_proc_getattr(struct svc_rqst * rqstp,
-		struct nfsd_fhandle *argp, struct nfsd_attrstat *resp)
+static __be32 nfsacld_proc_getattr(struct svc_rqst *rqstp)
 {
+	struct nfsd_fhandle *argp = rqstp->rq_argp;
+	struct nfsd_attrstat *resp = rqstp->rq_resp;
 	__be32 nfserr;
 	dprintk("nfsd: GETATTR  %s\n", SVCFH_fmt(&argp->fh));
 
@@ -158,9 +160,10 @@ static __be32 nfsacld_proc_getattr(struct svc_rqst * rqstp,
 /*
  * Check file access
  */
-static __be32 nfsacld_proc_access(struct svc_rqst *rqstp, struct nfsd3_accessargs *argp,
-		struct nfsd3_accessres *resp)
+static __be32 nfsacld_proc_access(struct svc_rqst *rqstp)
 {
+	struct nfsd3_accessargs *argp = rqstp->rq_argp;
+	struct nfsd3_accessres *resp = rqstp->rq_resp;
 	__be32 nfserr;
 
 	dprintk("nfsd: ACCESS(2acl)   %s 0x%x\n",
@@ -347,7 +350,7 @@ struct nfsd3_voidargs { int dummy; };
 
 #define PROC(name, argt, rest, relt, cache, respsize)			\
 {									\
-	.pc_func	= (svc_procfunc) nfsacld_proc_##name,		\
+	.pc_func	= nfsacld_proc_##name,				\
 	.pc_decode	= (kxdrproc_t) nfsaclsvc_decode_##argt##args,	\
 	.pc_encode	= (kxdrproc_t) nfsaclsvc_encode_##rest##res,	\
 	.pc_release	= (kxdrproc_t) nfsaclsvc_release_##relt,	\
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index db988a229b3a..5e42004035e0 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -18,7 +18,7 @@
  * NULL call.
  */
 static __be32
-nfsd3_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
+nfsd3_proc_null(struct svc_rqst *rqstp)
 {
 	return nfs_ok;
 }
@@ -26,9 +26,10 @@ nfsd3_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
 /*
  * Get the Access and/or Default ACL of a file.
  */
-static __be32 nfsd3_proc_getacl(struct svc_rqst * rqstp,
-		struct nfsd3_getaclargs *argp, struct nfsd3_getaclres *resp)
+static __be32 nfsd3_proc_getacl(struct svc_rqst *rqstp)
 {
+	struct nfsd3_getaclargs *argp = rqstp->rq_argp;
+	struct nfsd3_getaclres *resp = rqstp->rq_resp;
 	struct posix_acl *acl;
 	struct inode *inode;
 	svc_fh *fh;
@@ -80,10 +81,10 @@ fail:
 /*
  * Set the Access and/or Default ACL of a file.
  */
-static __be32 nfsd3_proc_setacl(struct svc_rqst * rqstp,
-		struct nfsd3_setaclargs *argp,
-		struct nfsd3_attrstat *resp)
+static __be32 nfsd3_proc_setacl(struct svc_rqst *rqstp)
 {
+	struct nfsd3_setaclargs *argp = rqstp->rq_argp;
+	struct nfsd3_attrstat *resp = rqstp->rq_resp;
 	struct inode *inode;
 	svc_fh *fh;
 	__be32 nfserr = 0;
@@ -239,7 +240,7 @@ struct nfsd3_voidargs { int dummy; };
 
 #define PROC(name, argt, rest, relt, cache, respsize)			\
 {									\
-	.pc_func	= (svc_procfunc) nfsd3_proc_##name,		\
+	.pc_func	= nfsd3_proc_##name,				\
 	.pc_decode	= (kxdrproc_t) nfs3svc_decode_##argt##args,	\
 	.pc_encode	= (kxdrproc_t) nfs3svc_encode_##rest##res,	\
 	.pc_release	= (kxdrproc_t) nfs3svc_release_##relt,		\
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 24e035190770..4a2bae07cfbf 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -31,7 +31,7 @@ static int	nfs3_ftypes[] = {
  * NULL call.
  */
 static __be32
-nfsd3_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
+nfsd3_proc_null(struct svc_rqst *rqstp)
 {
 	return nfs_ok;
 }
@@ -40,9 +40,10 @@ nfsd3_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
  * Get a file's attributes
  */
 static __be32
-nfsd3_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle  *argp,
-					   struct nfsd3_attrstat *resp)
+nfsd3_proc_getattr(struct svc_rqst *rqstp)
 {
+	struct nfsd_fhandle *argp = rqstp->rq_argp;
+	struct nfsd3_attrstat *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: GETATTR(3)  %s\n",
@@ -63,9 +64,10 @@ nfsd3_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle  *argp,
  * Set a file's attributes
  */
 static __be32
-nfsd3_proc_setattr(struct svc_rqst *rqstp, struct nfsd3_sattrargs *argp,
-					   struct nfsd3_attrstat  *resp)
+nfsd3_proc_setattr(struct svc_rqst *rqstp)
 {
+	struct nfsd3_sattrargs *argp = rqstp->rq_argp;
+	struct nfsd3_attrstat *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: SETATTR(3)  %s\n",
@@ -81,9 +83,10 @@ nfsd3_proc_setattr(struct svc_rqst *rqstp, struct nfsd3_sattrargs *argp,
  * Look up a path name component
  */
 static __be32
-nfsd3_proc_lookup(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp,
-					  struct nfsd3_diropres  *resp)
+nfsd3_proc_lookup(struct svc_rqst *rqstp)
 {
+	struct nfsd3_diropargs *argp = rqstp->rq_argp;
+	struct nfsd3_diropres  *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: LOOKUP(3)   %s %.*s\n",
@@ -105,9 +108,10 @@ nfsd3_proc_lookup(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp,
  * Check file access
  */
 static __be32
-nfsd3_proc_access(struct svc_rqst *rqstp, struct nfsd3_accessargs *argp,
-					  struct nfsd3_accessres *resp)
+nfsd3_proc_access(struct svc_rqst *rqstp)
 {
+	struct nfsd3_accessargs *argp = rqstp->rq_argp;
+	struct nfsd3_accessres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: ACCESS(3)   %s 0x%x\n",
@@ -124,9 +128,10 @@ nfsd3_proc_access(struct svc_rqst *rqstp, struct nfsd3_accessargs *argp,
  * Read a symlink.
  */
 static __be32
-nfsd3_proc_readlink(struct svc_rqst *rqstp, struct nfsd3_readlinkargs *argp,
-					   struct nfsd3_readlinkres *resp)
+nfsd3_proc_readlink(struct svc_rqst *rqstp)
 {
+	struct nfsd3_readlinkargs *argp = rqstp->rq_argp;
+	struct nfsd3_readlinkres *resp = rqstp->rq_resp;
 	__be32 nfserr;
 
 	dprintk("nfsd: READLINK(3) %s\n", SVCFH_fmt(&argp->fh));
@@ -142,9 +147,10 @@ nfsd3_proc_readlink(struct svc_rqst *rqstp, struct nfsd3_readlinkargs *argp,
  * Read a portion of a file.
  */
 static __be32
-nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
-				        struct nfsd3_readres  *resp)
+nfsd3_proc_read(struct svc_rqst *rqstp)
 {
+	struct nfsd3_readargs *argp = rqstp->rq_argp;
+	struct nfsd3_readres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 	u32	max_blocksize = svc_max_payload(rqstp);
 	unsigned long cnt = min(argp->count, max_blocksize);
@@ -179,9 +185,10 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
  * Write data to a file
  */
 static __be32
-nfsd3_proc_write(struct svc_rqst *rqstp, struct nfsd3_writeargs *argp,
-					 struct nfsd3_writeres  *resp)
+nfsd3_proc_write(struct svc_rqst *rqstp)
 {
+	struct nfsd3_writeargs *argp = rqstp->rq_argp;
+	struct nfsd3_writeres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 	unsigned long cnt = argp->len;
 
@@ -206,9 +213,10 @@ nfsd3_proc_write(struct svc_rqst *rqstp, struct nfsd3_writeargs *argp,
  * first reports about SunOS compatibility problems start to pour in...
  */
 static __be32
-nfsd3_proc_create(struct svc_rqst *rqstp, struct nfsd3_createargs *argp,
-					  struct nfsd3_diropres   *resp)
+nfsd3_proc_create(struct svc_rqst *rqstp)
 {
+	struct nfsd3_createargs *argp = rqstp->rq_argp;
+	struct nfsd3_diropres *resp = rqstp->rq_resp;
 	svc_fh		*dirfhp, *newfhp = NULL;
 	struct iattr	*attr;
 	__be32		nfserr;
@@ -243,9 +251,10 @@ nfsd3_proc_create(struct svc_rqst *rqstp, struct nfsd3_createargs *argp,
  * Make directory. This operation is not idempotent.
  */
 static __be32
-nfsd3_proc_mkdir(struct svc_rqst *rqstp, struct nfsd3_createargs *argp,
-					 struct nfsd3_diropres   *resp)
+nfsd3_proc_mkdir(struct svc_rqst *rqstp)
 {
+	struct nfsd3_createargs *argp = rqstp->rq_argp;
+	struct nfsd3_diropres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: MKDIR(3)    %s %.*s\n",
@@ -263,9 +272,10 @@ nfsd3_proc_mkdir(struct svc_rqst *rqstp, struct nfsd3_createargs *argp,
 }
 
 static __be32
-nfsd3_proc_symlink(struct svc_rqst *rqstp, struct nfsd3_symlinkargs *argp,
-					   struct nfsd3_diropres    *resp)
+nfsd3_proc_symlink(struct svc_rqst *rqstp)
 {
+	struct nfsd3_symlinkargs *argp = rqstp->rq_argp;
+	struct nfsd3_diropres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: SYMLINK(3)  %s %.*s -> %.*s\n",
@@ -284,9 +294,10 @@ nfsd3_proc_symlink(struct svc_rqst *rqstp, struct nfsd3_symlinkargs *argp,
  * Make socket/fifo/device.
  */
 static __be32
-nfsd3_proc_mknod(struct svc_rqst *rqstp, struct nfsd3_mknodargs *argp,
-					 struct nfsd3_diropres  *resp)
+nfsd3_proc_mknod(struct svc_rqst *rqstp)
 {
+	struct nfsd3_mknodargs *argp = rqstp->rq_argp;
+	struct nfsd3_diropres  *resp = rqstp->rq_resp;
 	__be32	nfserr;
 	int type;
 	dev_t	rdev = 0;
@@ -321,9 +332,10 @@ nfsd3_proc_mknod(struct svc_rqst *rqstp, struct nfsd3_mknodargs *argp,
  * Remove file/fifo/socket etc.
  */
 static __be32
-nfsd3_proc_remove(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp,
-					  struct nfsd3_attrstat  *resp)
+nfsd3_proc_remove(struct svc_rqst *rqstp)
 {
+	struct nfsd3_diropargs *argp = rqstp->rq_argp;
+	struct nfsd3_attrstat *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: REMOVE(3)   %s %.*s\n",
@@ -342,9 +354,10 @@ nfsd3_proc_remove(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp,
  * Remove a directory
  */
 static __be32
-nfsd3_proc_rmdir(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp,
-					 struct nfsd3_attrstat  *resp)
+nfsd3_proc_rmdir(struct svc_rqst *rqstp)
 {
+	struct nfsd3_diropargs *argp = rqstp->rq_argp;
+	struct nfsd3_attrstat *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: RMDIR(3)    %s %.*s\n",
@@ -359,9 +372,10 @@ nfsd3_proc_rmdir(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp,
 }
 
 static __be32
-nfsd3_proc_rename(struct svc_rqst *rqstp, struct nfsd3_renameargs *argp,
-					  struct nfsd3_renameres  *resp)
+nfsd3_proc_rename(struct svc_rqst *rqstp)
 {
+	struct nfsd3_renameargs *argp = rqstp->rq_argp;
+	struct nfsd3_renameres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: RENAME(3)   %s %.*s ->\n",
@@ -381,9 +395,10 @@ nfsd3_proc_rename(struct svc_rqst *rqstp, struct nfsd3_renameargs *argp,
 }
 
 static __be32
-nfsd3_proc_link(struct svc_rqst *rqstp, struct nfsd3_linkargs *argp,
-					struct nfsd3_linkres  *resp)
+nfsd3_proc_link(struct svc_rqst *rqstp)
 {
+	struct nfsd3_linkargs *argp = rqstp->rq_argp;
+	struct nfsd3_linkres  *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: LINK(3)     %s ->\n",
@@ -404,9 +419,10 @@ nfsd3_proc_link(struct svc_rqst *rqstp, struct nfsd3_linkargs *argp,
  * Read a portion of a directory.
  */
 static __be32
-nfsd3_proc_readdir(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp,
-					   struct nfsd3_readdirres  *resp)
+nfsd3_proc_readdir(struct svc_rqst *rqstp)
 {
+	struct nfsd3_readdirargs *argp = rqstp->rq_argp;
+	struct nfsd3_readdirres  *resp = rqstp->rq_resp;
 	__be32		nfserr;
 	int		count;
 
@@ -440,9 +456,10 @@ nfsd3_proc_readdir(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp,
  * For now, we choose to ignore the dircount parameter.
  */
 static __be32
-nfsd3_proc_readdirplus(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp,
-					       struct nfsd3_readdirres  *resp)
+nfsd3_proc_readdirplus(struct svc_rqst *rqstp)
 {
+	struct nfsd3_readdirargs *argp = rqstp->rq_argp;
+	struct nfsd3_readdirres  *resp = rqstp->rq_resp;
 	__be32	nfserr;
 	int	count = 0;
 	loff_t	offset;
@@ -507,9 +524,10 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp,
  * Get file system stats
  */
 static __be32
-nfsd3_proc_fsstat(struct svc_rqst * rqstp, struct nfsd_fhandle    *argp,
-					   struct nfsd3_fsstatres *resp)
+nfsd3_proc_fsstat(struct svc_rqst *rqstp)
 {
+	struct nfsd_fhandle *argp = rqstp->rq_argp;
+	struct nfsd3_fsstatres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: FSSTAT(3)   %s\n",
@@ -524,9 +542,10 @@ nfsd3_proc_fsstat(struct svc_rqst * rqstp, struct nfsd_fhandle    *argp,
  * Get file system info
  */
 static __be32
-nfsd3_proc_fsinfo(struct svc_rqst * rqstp, struct nfsd_fhandle    *argp,
-					   struct nfsd3_fsinfores *resp)
+nfsd3_proc_fsinfo(struct svc_rqst *rqstp)
 {
+	struct nfsd_fhandle *argp = rqstp->rq_argp;
+	struct nfsd3_fsinfores *resp = rqstp->rq_resp;
 	__be32	nfserr;
 	u32	max_blocksize = svc_max_payload(rqstp);
 
@@ -567,9 +586,10 @@ nfsd3_proc_fsinfo(struct svc_rqst * rqstp, struct nfsd_fhandle    *argp,
  * Get pathconf info for the specified file
  */
 static __be32
-nfsd3_proc_pathconf(struct svc_rqst * rqstp, struct nfsd_fhandle      *argp,
-					     struct nfsd3_pathconfres *resp)
+nfsd3_proc_pathconf(struct svc_rqst *rqstp)
 {
+	struct nfsd_fhandle *argp = rqstp->rq_argp;
+	struct nfsd3_pathconfres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: PATHCONF(3) %s\n",
@@ -610,9 +630,10 @@ nfsd3_proc_pathconf(struct svc_rqst * rqstp, struct nfsd_fhandle      *argp,
  * Commit a file (range) to stable storage.
  */
 static __be32
-nfsd3_proc_commit(struct svc_rqst * rqstp, struct nfsd3_commitargs *argp,
-					   struct nfsd3_commitres  *resp)
+nfsd3_proc_commit(struct svc_rqst *rqstp)
 {
+	struct nfsd3_commitargs *argp = rqstp->rq_argp;
+	struct nfsd3_commitres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: COMMIT(3)   %s %u@%Lu\n",
@@ -655,7 +676,7 @@ struct nfsd3_voidargs { int dummy; };
 
 static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_NULL] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_null,
+		.pc_func = nfsd3_proc_null,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_voidres,
 		.pc_argsize = sizeof(struct nfsd3_voidargs),
 		.pc_ressize = sizeof(struct nfsd3_voidres),
@@ -663,7 +684,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST,
 	},
 	[NFS3PROC_GETATTR] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_getattr,
+		.pc_func = nfsd3_proc_getattr,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_attrstatres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
@@ -673,7 +694,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+AT,
 	},
 	[NFS3PROC_SETATTR] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_setattr,
+		.pc_func = nfsd3_proc_setattr,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_sattrargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
@@ -683,7 +704,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+WC,
 	},
 	[NFS3PROC_LOOKUP] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_lookup,
+		.pc_func = nfsd3_proc_lookup,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_diropres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
@@ -693,7 +714,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+FH+pAT+pAT,
 	},
 	[NFS3PROC_ACCESS] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_access,
+		.pc_func = nfsd3_proc_access,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_accessargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_accessres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
@@ -703,7 +724,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+pAT+1,
 	},
 	[NFS3PROC_READLINK] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_readlink,
+		.pc_func = nfsd3_proc_readlink,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_readlinkargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_readlinkres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
@@ -713,7 +734,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+pAT+1+NFS3_MAXPATHLEN/4,
 	},
 	[NFS3PROC_READ] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_read,
+		.pc_func = nfsd3_proc_read,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_readargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_readres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
@@ -723,7 +744,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+pAT+4+NFSSVC_MAXBLKSIZE/4,
 	},
 	[NFS3PROC_WRITE] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_write,
+		.pc_func = nfsd3_proc_write,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_writeargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_writeres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
@@ -733,7 +754,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+WC+4,
 	},
 	[NFS3PROC_CREATE] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_create,
+		.pc_func = nfsd3_proc_create,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_createargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
@@ -743,7 +764,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+(1+FH+pAT)+WC,
 	},
 	[NFS3PROC_MKDIR] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_mkdir,
+		.pc_func = nfsd3_proc_mkdir,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_mkdirargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
@@ -753,7 +774,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+(1+FH+pAT)+WC,
 	},
 	[NFS3PROC_SYMLINK] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_symlink,
+		.pc_func = nfsd3_proc_symlink,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_symlinkargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
@@ -763,7 +784,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+(1+FH+pAT)+WC,
 	},
 	[NFS3PROC_MKNOD] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_mknod,
+		.pc_func = nfsd3_proc_mknod,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_mknodargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
@@ -773,7 +794,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+(1+FH+pAT)+WC,
 	},
 	[NFS3PROC_REMOVE] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_remove,
+		.pc_func = nfsd3_proc_remove,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
@@ -783,7 +804,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+WC,
 	},
 	[NFS3PROC_RMDIR] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_rmdir,
+		.pc_func = nfsd3_proc_rmdir,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
@@ -793,7 +814,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+WC,
 	},
 	[NFS3PROC_RENAME] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_rename,
+		.pc_func = nfsd3_proc_rename,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_renameargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_renameres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
@@ -803,7 +824,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+WC+WC,
 	},
 	[NFS3PROC_LINK] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_link,
+		.pc_func = nfsd3_proc_link,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_linkargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_linkres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
@@ -813,7 +834,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+pAT+WC,
 	},
 	[NFS3PROC_READDIR] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_readdir,
+		.pc_func = nfsd3_proc_readdir,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_readdirargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_readdirres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
@@ -822,7 +843,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_cachetype = RC_NOCACHE,
 	},
 	[NFS3PROC_READDIRPLUS] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_readdirplus,
+		.pc_func = nfsd3_proc_readdirplus,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_readdirplusargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_readdirres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
@@ -831,7 +852,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_cachetype = RC_NOCACHE,
 	},
 	[NFS3PROC_FSSTAT] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_fsstat,
+		.pc_func = nfsd3_proc_fsstat,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_fsstatres,
 		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
@@ -840,7 +861,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+pAT+2*6+1,
 	},
 	[NFS3PROC_FSINFO] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_fsinfo,
+		.pc_func = nfsd3_proc_fsinfo,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_fsinfores,
 		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
@@ -849,7 +870,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+pAT+12,
 	},
 	[NFS3PROC_PATHCONF] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_pathconf,
+		.pc_func = nfsd3_proc_pathconf,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_pathconfres,
 		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
@@ -858,7 +879,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+pAT+6,
 	},
 	[NFS3PROC_COMMIT] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_commit,
+		.pc_func = nfsd3_proc_commit,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_commitargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_commitres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 0833686ccdd3..43b2e5ab4430 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1510,7 +1510,7 @@ out:
  * NULL call.
  */
 static __be32
-nfsd4_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
+nfsd4_proc_null(struct svc_rqst *rqstp)
 {
 	return nfs_ok;
 }
@@ -1524,6 +1524,7 @@ static inline void nfsd4_increment_op_stats(u32 opnum)
 typedef __be32(*nfsd4op_func)(struct svc_rqst *, struct nfsd4_compound_state *,
 			      void *);
 typedef u32(*nfsd4op_rsize)(struct svc_rqst *, struct nfsd4_op *op);
+
 typedef void(*stateid_setter)(struct nfsd4_compound_state *, void *);
 typedef void(*stateid_getter)(struct nfsd4_compound_state *, void *);
 
@@ -1673,10 +1674,10 @@ static void svcxdr_init_encode(struct svc_rqst *rqstp,
  * COMPOUND call.
  */
 static __be32
-nfsd4_proc_compound(struct svc_rqst *rqstp, void *arg, void *res)
+nfsd4_proc_compound(struct svc_rqst *rqstp)
 {
-	struct nfsd4_compoundargs *args = arg;
-	struct nfsd4_compoundres *resp = res;
+	struct nfsd4_compoundargs *args = rqstp->rq_argp;
+	struct nfsd4_compoundres *resp = rqstp->rq_resp;
 	struct nfsd4_op	*op;
 	struct nfsd4_operation *opdesc;
 	struct nfsd4_compound_state *cstate = &resp->cstate;
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 03a7e9da4da0..448505b939db 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -17,7 +17,7 @@ typedef struct svc_buf	svc_buf;
 
 
 static __be32
-nfsd_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
+nfsd_proc_null(struct svc_rqst *rqstp)
 {
 	return nfs_ok;
 }
@@ -39,9 +39,10 @@ nfsd_return_dirop(__be32 err, struct nfsd_diropres *resp)
  * N.B. After this call resp->fh needs an fh_put
  */
 static __be32
-nfsd_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle  *argp,
-					  struct nfsd_attrstat *resp)
+nfsd_proc_getattr(struct svc_rqst *rqstp)
 {
+	struct nfsd_fhandle *argp = rqstp->rq_argp;
+	struct nfsd_attrstat *resp = rqstp->rq_resp;
 	__be32 nfserr;
 	dprintk("nfsd: GETATTR  %s\n", SVCFH_fmt(&argp->fh));
 
@@ -56,9 +57,10 @@ nfsd_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle  *argp,
  * N.B. After this call resp->fh needs an fh_put
  */
 static __be32
-nfsd_proc_setattr(struct svc_rqst *rqstp, struct nfsd_sattrargs *argp,
-					  struct nfsd_attrstat  *resp)
+nfsd_proc_setattr(struct svc_rqst *rqstp)
 {
+	struct nfsd_sattrargs *argp = rqstp->rq_argp;
+	struct nfsd_attrstat *resp = rqstp->rq_resp;
 	struct iattr *iap = &argp->attrs;
 	struct svc_fh *fhp;
 	__be32 nfserr;
@@ -122,9 +124,10 @@ done:
  * N.B. After this call resp->fh needs an fh_put
  */
 static __be32
-nfsd_proc_lookup(struct svc_rqst *rqstp, struct nfsd_diropargs *argp,
-					 struct nfsd_diropres  *resp)
+nfsd_proc_lookup(struct svc_rqst *rqstp)
 {
+	struct nfsd_diropargs *argp = rqstp->rq_argp;
+	struct nfsd_diropres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: LOOKUP   %s %.*s\n",
@@ -142,9 +145,10 @@ nfsd_proc_lookup(struct svc_rqst *rqstp, struct nfsd_diropargs *argp,
  * Read a symlink.
  */
 static __be32
-nfsd_proc_readlink(struct svc_rqst *rqstp, struct nfsd_readlinkargs *argp,
-					   struct nfsd_readlinkres *resp)
+nfsd_proc_readlink(struct svc_rqst *rqstp)
 {
+	struct nfsd_readlinkargs *argp = rqstp->rq_argp;
+	struct nfsd_readlinkres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: READLINK %s\n", SVCFH_fmt(&argp->fh));
@@ -162,9 +166,10 @@ nfsd_proc_readlink(struct svc_rqst *rqstp, struct nfsd_readlinkargs *argp,
  * N.B. After this call resp->fh needs an fh_put
  */
 static __be32
-nfsd_proc_read(struct svc_rqst *rqstp, struct nfsd_readargs *argp,
-				       struct nfsd_readres  *resp)
+nfsd_proc_read(struct svc_rqst *rqstp)
 {
+	struct nfsd_readargs *argp = rqstp->rq_argp;
+	struct nfsd_readres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: READ    %s %d bytes at %d\n",
@@ -200,9 +205,10 @@ nfsd_proc_read(struct svc_rqst *rqstp, struct nfsd_readargs *argp,
  * N.B. After this call resp->fh needs an fh_put
  */
 static __be32
-nfsd_proc_write(struct svc_rqst *rqstp, struct nfsd_writeargs *argp,
-					struct nfsd_attrstat  *resp)
+nfsd_proc_write(struct svc_rqst *rqstp)
 {
+	struct nfsd_writeargs *argp = rqstp->rq_argp;
+	struct nfsd_attrstat *resp = rqstp->rq_resp;
 	__be32	nfserr;
 	unsigned long cnt = argp->len;
 
@@ -222,9 +228,10 @@ nfsd_proc_write(struct svc_rqst *rqstp, struct nfsd_writeargs *argp,
  * N.B. After this call _both_ argp->fh and resp->fh need an fh_put
  */
 static __be32
-nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
-					 struct nfsd_diropres   *resp)
+nfsd_proc_create(struct svc_rqst *rqstp)
 {
+	struct nfsd_createargs *argp = rqstp->rq_argp;
+	struct nfsd_diropres *resp = rqstp->rq_resp;
 	svc_fh		*dirfhp = &argp->fh;
 	svc_fh		*newfhp = &resp->fh;
 	struct iattr	*attr = &argp->attrs;
@@ -377,9 +384,9 @@ done:
 }
 
 static __be32
-nfsd_proc_remove(struct svc_rqst *rqstp, struct nfsd_diropargs *argp,
-					 void		       *resp)
+nfsd_proc_remove(struct svc_rqst *rqstp)
 {
+	struct nfsd_diropargs *argp = rqstp->rq_argp;
 	__be32	nfserr;
 
 	dprintk("nfsd: REMOVE   %s %.*s\n", SVCFH_fmt(&argp->fh),
@@ -392,9 +399,9 @@ nfsd_proc_remove(struct svc_rqst *rqstp, struct nfsd_diropargs *argp,
 }
 
 static __be32
-nfsd_proc_rename(struct svc_rqst *rqstp, struct nfsd_renameargs *argp,
-				  	 void		        *resp)
+nfsd_proc_rename(struct svc_rqst *rqstp)
 {
+	struct nfsd_renameargs *argp = rqstp->rq_argp;
 	__be32	nfserr;
 
 	dprintk("nfsd: RENAME   %s %.*s -> \n",
@@ -410,9 +417,9 @@ nfsd_proc_rename(struct svc_rqst *rqstp, struct nfsd_renameargs *argp,
 }
 
 static __be32
-nfsd_proc_link(struct svc_rqst *rqstp, struct nfsd_linkargs *argp,
-				void			    *resp)
+nfsd_proc_link(struct svc_rqst *rqstp)
 {
+	struct nfsd_linkargs *argp = rqstp->rq_argp;
 	__be32	nfserr;
 
 	dprintk("nfsd: LINK     %s ->\n",
@@ -430,9 +437,9 @@ nfsd_proc_link(struct svc_rqst *rqstp, struct nfsd_linkargs *argp,
 }
 
 static __be32
-nfsd_proc_symlink(struct svc_rqst *rqstp, struct nfsd_symlinkargs *argp,
-				          void			  *resp)
+nfsd_proc_symlink(struct svc_rqst *rqstp)
 {
+	struct nfsd_symlinkargs *argp = rqstp->rq_argp;
 	struct svc_fh	newfh;
 	__be32		nfserr;
 
@@ -460,9 +467,10 @@ nfsd_proc_symlink(struct svc_rqst *rqstp, struct nfsd_symlinkargs *argp,
  * N.B. After this call resp->fh needs an fh_put
  */
 static __be32
-nfsd_proc_mkdir(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
-					struct nfsd_diropres   *resp)
+nfsd_proc_mkdir(struct svc_rqst *rqstp)
 {
+	struct nfsd_createargs *argp = rqstp->rq_argp;
+	struct nfsd_diropres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: MKDIR    %s %.*s\n", SVCFH_fmt(&argp->fh), argp->len, argp->name);
@@ -484,9 +492,9 @@ nfsd_proc_mkdir(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
  * Remove a directory
  */
 static __be32
-nfsd_proc_rmdir(struct svc_rqst *rqstp, struct nfsd_diropargs *argp,
-				 	void		      *resp)
+nfsd_proc_rmdir(struct svc_rqst *rqstp)
 {
+	struct nfsd_diropargs *argp = rqstp->rq_argp;
 	__be32	nfserr;
 
 	dprintk("nfsd: RMDIR    %s %.*s\n", SVCFH_fmt(&argp->fh), argp->len, argp->name);
@@ -500,9 +508,10 @@ nfsd_proc_rmdir(struct svc_rqst *rqstp, struct nfsd_diropargs *argp,
  * Read a portion of a directory.
  */
 static __be32
-nfsd_proc_readdir(struct svc_rqst *rqstp, struct nfsd_readdirargs *argp,
-					  struct nfsd_readdirres  *resp)
+nfsd_proc_readdir(struct svc_rqst *rqstp)
 {
+	struct nfsd_readdirargs *argp = rqstp->rq_argp;
+	struct nfsd_readdirres *resp = rqstp->rq_resp;
 	int		count;
 	__be32		nfserr;
 	loff_t		offset;
@@ -540,9 +549,10 @@ nfsd_proc_readdir(struct svc_rqst *rqstp, struct nfsd_readdirargs *argp,
  * Get file system info
  */
 static __be32
-nfsd_proc_statfs(struct svc_rqst * rqstp, struct nfsd_fhandle   *argp,
-					  struct nfsd_statfsres *resp)
+nfsd_proc_statfs(struct svc_rqst *rqstp)
 {
+	struct nfsd_fhandle *argp = rqstp->rq_argp;
+	struct nfsd_statfsres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: STATFS   %s\n", SVCFH_fmt(&argp->fh));
@@ -565,7 +575,7 @@ struct nfsd_void { int dummy; };
 
 static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_NULL] = {
-		.pc_func = (svc_procfunc) nfsd_proc_null,
+		.pc_func = nfsd_proc_null,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_void,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_void),
@@ -574,7 +584,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST,
 	},
 	[NFSPROC_GETATTR] = {
-		.pc_func = (svc_procfunc) nfsd_proc_getattr,
+		.pc_func = nfsd_proc_getattr,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_fhandle,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_attrstat,
 		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
@@ -584,7 +594,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST+AT,
 	},
 	[NFSPROC_SETATTR] = {
-		.pc_func = (svc_procfunc) nfsd_proc_setattr,
+		.pc_func = nfsd_proc_setattr,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_sattrargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_attrstat,
 		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
@@ -602,7 +612,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST,
 	},
 	[NFSPROC_LOOKUP] = {
-		.pc_func = (svc_procfunc) nfsd_proc_lookup,
+		.pc_func = nfsd_proc_lookup,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_diropres,
 		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
@@ -612,7 +622,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST+FH+AT,
 	},
 	[NFSPROC_READLINK] = {
-		.pc_func = (svc_procfunc) nfsd_proc_readlink,
+		.pc_func = nfsd_proc_readlink,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_readlinkargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_readlinkres,
 		.pc_argsize = sizeof(struct nfsd_readlinkargs),
@@ -621,7 +631,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST+1+NFS_MAXPATHLEN/4,
 	},
 	[NFSPROC_READ] = {
-		.pc_func = (svc_procfunc) nfsd_proc_read,
+		.pc_func = nfsd_proc_read,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_readargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_readres,
 		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
@@ -639,7 +649,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST,
 	},
 	[NFSPROC_WRITE] = {
-		.pc_func = (svc_procfunc) nfsd_proc_write,
+		.pc_func = nfsd_proc_write,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_writeargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_attrstat,
 		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
@@ -649,7 +659,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST+AT,
 	},
 	[NFSPROC_CREATE] = {
-		.pc_func = (svc_procfunc) nfsd_proc_create,
+		.pc_func = nfsd_proc_create,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_createargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_diropres,
 		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
@@ -659,7 +669,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST+FH+AT,
 	},
 	[NFSPROC_REMOVE] = {
-		.pc_func = (svc_procfunc) nfsd_proc_remove,
+		.pc_func = nfsd_proc_remove,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_diropargs),
@@ -668,7 +678,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST,
 	},
 	[NFSPROC_RENAME] = {
-		.pc_func = (svc_procfunc) nfsd_proc_rename,
+		.pc_func = nfsd_proc_rename,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_renameargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_renameargs),
@@ -677,7 +687,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST,
 	},
 	[NFSPROC_LINK] = {
-		.pc_func = (svc_procfunc) nfsd_proc_link,
+		.pc_func = nfsd_proc_link,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_linkargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_linkargs),
@@ -686,7 +696,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST,
 	},
 	[NFSPROC_SYMLINK] = {
-		.pc_func = (svc_procfunc) nfsd_proc_symlink,
+		.pc_func = nfsd_proc_symlink,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_symlinkargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_symlinkargs),
@@ -695,7 +705,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST,
 	},
 	[NFSPROC_MKDIR] = {
-		.pc_func = (svc_procfunc) nfsd_proc_mkdir,
+		.pc_func = nfsd_proc_mkdir,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_createargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_diropres,
 		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
@@ -705,7 +715,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST+FH+AT,
 	},
 	[NFSPROC_RMDIR] = {
-		.pc_func = (svc_procfunc) nfsd_proc_rmdir,
+		.pc_func = nfsd_proc_rmdir,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_diropargs),
@@ -714,7 +724,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST,
 	},
 	[NFSPROC_READDIR] = {
-		.pc_func = (svc_procfunc) nfsd_proc_readdir,
+		.pc_func = nfsd_proc_readdir,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_readdirargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_readdirres,
 		.pc_argsize = sizeof(struct nfsd_readdirargs),
@@ -722,7 +732,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_cachetype = RC_NOCACHE,
 	},
 	[NFSPROC_STATFS] = {
-		.pc_func = (svc_procfunc) nfsd_proc_statfs,
+		.pc_func = nfsd_proc_statfs,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_fhandle,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_statfsres,
 		.pc_argsize = sizeof(struct nfsd_fhandle),
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 59979f0bbd4b..d64895fd8d25 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -827,7 +827,7 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
 	rqstp->rq_res.head[0].iov_len += sizeof(__be32);
 
 	/* Now call the procedure handler, and encode NFS status. */
-	nfserr = proc->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp);
+	nfserr = proc->pc_func(rqstp);
 	nfserr = map_new_errors(rqstp->rq_vers, nfserr);
 	if (nfserr == nfserr_dropit || test_bit(RQ_DROPME, &rqstp->rq_flags)) {
 		dprintk("nfsd: Dropping request; may be revisited later\n");
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 11cef5a7bc87..3b58c55614c7 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -419,9 +419,9 @@ struct svc_version {
 /*
  * RPC procedure info
  */
-typedef __be32	(*svc_procfunc)(struct svc_rqst *, void *argp, void *resp);
 struct svc_procedure {
-	svc_procfunc		pc_func;	/* process the request */
+	/* process the request: */
+	__be32			(*pc_func)(struct svc_rqst *);
 	kxdrproc_t		pc_decode;	/* XDR decode args */
 	kxdrproc_t		pc_encode;	/* XDR encode result */
 	kxdrproc_t		pc_release;	/* XDR free result */
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index bc0f5a0ecbdc..95335455ad38 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1281,7 +1281,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 		if (xdr && !xdr(rqstp, argv->iov_base, rqstp->rq_argp))
 			goto err_garbage;
 
-		*statp = procp->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp);
+		*statp = procp->pc_func(rqstp);
 
 		/* Encode reply */
 		if (*statp == rpc_drop_reply ||
-- 
cgit v1.2.3


From 1150ded804c2be6d02efef3e39e39e570c9cea21 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 18:48:24 +0200
Subject: sunrpc: properly type pc_release callbacks

Drop the p and resp arguments as they are always NULL or can trivially
be derived from the rqstp argument.  With that all functions now have the
same prototype, and we can remove the unsafe casting to kxdrproc_t.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/nfsd/nfs2acl.c          | 22 +++++++++++-----------
 fs/nfsd/nfs3acl.c          |  8 ++++----
 fs/nfsd/nfs3proc.c         | 36 ++++++++++++++++++------------------
 fs/nfsd/nfs3xdr.c          | 16 ++++++++--------
 fs/nfsd/nfs4xdr.c          |  4 +---
 fs/nfsd/nfsproc.c          | 14 +++++++-------
 fs/nfsd/nfsxdr.c           |  8 ++++----
 fs/nfsd/xdr.h              |  2 +-
 fs/nfsd/xdr3.h             |  6 ++----
 fs/nfsd/xdr4.h             |  2 +-
 include/linux/sunrpc/svc.h |  3 ++-
 net/sunrpc/svc.c           |  8 ++++----
 12 files changed, 63 insertions(+), 66 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 4b7f84fa1fa5..302441027f50 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -318,27 +318,27 @@ static int nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, __be32 *p,
 /*
  * XDR release functions
  */
-static int nfsaclsvc_release_getacl(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd3_getaclres *resp)
+static void nfsaclsvc_release_getacl(struct svc_rqst *rqstp)
 {
+	struct nfsd3_getaclres *resp = rqstp->rq_resp;
+
 	fh_put(&resp->fh);
 	posix_acl_release(resp->acl_access);
 	posix_acl_release(resp->acl_default);
-	return 1;
 }
 
-static int nfsaclsvc_release_attrstat(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd_attrstat *resp)
+static void nfsaclsvc_release_attrstat(struct svc_rqst *rqstp)
 {
+	struct nfsd_attrstat *resp = rqstp->rq_resp;
+
 	fh_put(&resp->fh);
-	return 1;
 }
 
-static int nfsaclsvc_release_access(struct svc_rqst *rqstp, __be32 *p,
-               struct nfsd3_accessres *resp)
+static void nfsaclsvc_release_access(struct svc_rqst *rqstp)
 {
-       fh_put(&resp->fh);
-       return 1;
+	struct nfsd3_accessres *resp = rqstp->rq_resp;
+
+	fh_put(&resp->fh);
 }
 
 #define nfsaclsvc_decode_voidargs	NULL
@@ -353,7 +353,7 @@ struct nfsd3_voidargs { int dummy; };
 	.pc_func	= nfsacld_proc_##name,				\
 	.pc_decode	= (kxdrproc_t) nfsaclsvc_decode_##argt##args,	\
 	.pc_encode	= (kxdrproc_t) nfsaclsvc_encode_##rest##res,	\
-	.pc_release	= (kxdrproc_t) nfsaclsvc_release_##relt,	\
+	.pc_release	= nfsaclsvc_release_##relt,	\
 	.pc_argsize	= sizeof(struct nfsd3_##argt##args),		\
 	.pc_ressize	= sizeof(struct nfsd3_##rest##res),		\
 	.pc_cachetype	= cache,					\
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 5e42004035e0..56cdff4e954c 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -223,13 +223,13 @@ static int nfs3svc_encode_setaclres(struct svc_rqst *rqstp, __be32 *p,
 /*
  * XDR release functions
  */
-static int nfs3svc_release_getacl(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd3_getaclres *resp)
+static void nfs3svc_release_getacl(struct svc_rqst *rqstp)
 {
+	struct nfsd3_getaclres *resp = rqstp->rq_resp;
+
 	fh_put(&resp->fh);
 	posix_acl_release(resp->acl_access);
 	posix_acl_release(resp->acl_default);
-	return 1;
 }
 
 #define nfs3svc_decode_voidargs		NULL
@@ -243,7 +243,7 @@ struct nfsd3_voidargs { int dummy; };
 	.pc_func	= nfsd3_proc_##name,				\
 	.pc_decode	= (kxdrproc_t) nfs3svc_decode_##argt##args,	\
 	.pc_encode	= (kxdrproc_t) nfs3svc_encode_##rest##res,	\
-	.pc_release	= (kxdrproc_t) nfs3svc_release_##relt,		\
+	.pc_release	= nfs3svc_release_##relt,			\
 	.pc_argsize	= sizeof(struct nfsd3_##argt##args),		\
 	.pc_ressize	= sizeof(struct nfsd3_##rest##res),		\
 	.pc_cachetype	= cache,					\
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 4a2bae07cfbf..f0cccc0768ce 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -687,7 +687,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_getattr,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_attrstatres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
 		.pc_ressize = sizeof(struct nfsd3_attrstatres),
 		.pc_cachetype = RC_NOCACHE,
@@ -697,7 +697,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_setattr,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_sattrargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_sattrargs),
 		.pc_ressize = sizeof(struct nfsd3_wccstatres),
 		.pc_cachetype = RC_REPLBUFF,
@@ -707,7 +707,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_lookup,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_diropres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
+		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_diropargs),
 		.pc_ressize = sizeof(struct nfsd3_diropres),
 		.pc_cachetype = RC_NOCACHE,
@@ -717,7 +717,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_access,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_accessargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_accessres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_accessargs),
 		.pc_ressize = sizeof(struct nfsd3_accessres),
 		.pc_cachetype = RC_NOCACHE,
@@ -727,7 +727,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_readlink,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_readlinkargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_readlinkres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_readlinkargs),
 		.pc_ressize = sizeof(struct nfsd3_readlinkres),
 		.pc_cachetype = RC_NOCACHE,
@@ -737,7 +737,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_read,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_readargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_readres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_readargs),
 		.pc_ressize = sizeof(struct nfsd3_readres),
 		.pc_cachetype = RC_NOCACHE,
@@ -747,7 +747,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_write,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_writeargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_writeres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_writeargs),
 		.pc_ressize = sizeof(struct nfsd3_writeres),
 		.pc_cachetype = RC_REPLBUFF,
@@ -757,7 +757,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_create,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_createargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
+		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_createargs),
 		.pc_ressize = sizeof(struct nfsd3_createres),
 		.pc_cachetype = RC_REPLBUFF,
@@ -767,7 +767,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_mkdir,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_mkdirargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
+		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_mkdirargs),
 		.pc_ressize = sizeof(struct nfsd3_createres),
 		.pc_cachetype = RC_REPLBUFF,
@@ -777,7 +777,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_symlink,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_symlinkargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
+		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_symlinkargs),
 		.pc_ressize = sizeof(struct nfsd3_createres),
 		.pc_cachetype = RC_REPLBUFF,
@@ -787,7 +787,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_mknod,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_mknodargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
+		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_mknodargs),
 		.pc_ressize = sizeof(struct nfsd3_createres),
 		.pc_cachetype = RC_REPLBUFF,
@@ -797,7 +797,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_remove,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_diropargs),
 		.pc_ressize = sizeof(struct nfsd3_wccstatres),
 		.pc_cachetype = RC_REPLBUFF,
@@ -807,7 +807,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_rmdir,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_diropargs),
 		.pc_ressize = sizeof(struct nfsd3_wccstatres),
 		.pc_cachetype = RC_REPLBUFF,
@@ -817,7 +817,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_rename,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_renameargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_renameres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
+		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_renameargs),
 		.pc_ressize = sizeof(struct nfsd3_renameres),
 		.pc_cachetype = RC_REPLBUFF,
@@ -827,7 +827,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_link,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_linkargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_linkres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
+		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_linkargs),
 		.pc_ressize = sizeof(struct nfsd3_linkres),
 		.pc_cachetype = RC_REPLBUFF,
@@ -837,7 +837,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_readdir,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_readdirargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_readdirres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_readdirargs),
 		.pc_ressize = sizeof(struct nfsd3_readdirres),
 		.pc_cachetype = RC_NOCACHE,
@@ -846,7 +846,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_readdirplus,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_readdirplusargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_readdirres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_readdirplusargs),
 		.pc_ressize = sizeof(struct nfsd3_readdirres),
 		.pc_cachetype = RC_NOCACHE,
@@ -882,7 +882,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_commit,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_commitargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_commitres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_commitargs),
 		.pc_ressize = sizeof(struct nfsd3_commitres),
 		.pc_cachetype = RC_NOCACHE,
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 452334694a5d..c3d3ef28347c 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -1103,19 +1103,19 @@ nfs3svc_encode_commitres(struct svc_rqst *rqstp, __be32 *p,
 /*
  * XDR release functions
  */
-int
-nfs3svc_release_fhandle(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_attrstat *resp)
+void
+nfs3svc_release_fhandle(struct svc_rqst *rqstp)
 {
+	struct nfsd3_attrstat *resp = rqstp->rq_resp;
+
 	fh_put(&resp->fh);
-	return 1;
 }
 
-int
-nfs3svc_release_fhandle2(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_fhandle_pair *resp)
+void
+nfs3svc_release_fhandle2(struct svc_rqst *rqstp)
 {
+	struct nfsd3_fhandle_pair *resp = rqstp->rq_resp;
+
 	fh_put(&resp->fh1);
 	fh_put(&resp->fh2);
-	return 1;
 }
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 26780d53a6f9..5aa847bdfc63 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -4543,9 +4543,8 @@ nfs4svc_encode_voidres(struct svc_rqst *rqstp, __be32 *p, void *dummy)
         return xdr_ressize_check(rqstp, p);
 }
 
-int nfsd4_release_compoundargs(void *rq, __be32 *p, void *resp)
+void nfsd4_release_compoundargs(struct svc_rqst *rqstp)
 {
-	struct svc_rqst *rqstp = rq;
 	struct nfsd4_compoundargs *args = rqstp->rq_argp;
 
 	if (args->ops != args->iops) {
@@ -4559,7 +4558,6 @@ int nfsd4_release_compoundargs(void *rq, __be32 *p, void *resp)
 		args->to_free = tb->next;
 		kfree(tb);
 	}
-	return 1;
 }
 
 int
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 448505b939db..dc32e0f8480d 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -587,7 +587,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_func = nfsd_proc_getattr,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_fhandle,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_attrstat,
-		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
+		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_fhandle),
 		.pc_ressize = sizeof(struct nfsd_attrstat),
 		.pc_cachetype = RC_NOCACHE,
@@ -597,7 +597,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_func = nfsd_proc_setattr,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_sattrargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_attrstat,
-		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
+		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_sattrargs),
 		.pc_ressize = sizeof(struct nfsd_attrstat),
 		.pc_cachetype = RC_REPLBUFF,
@@ -615,7 +615,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_func = nfsd_proc_lookup,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_diropres,
-		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
+		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_diropargs),
 		.pc_ressize = sizeof(struct nfsd_diropres),
 		.pc_cachetype = RC_NOCACHE,
@@ -634,7 +634,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_func = nfsd_proc_read,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_readargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_readres,
-		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
+		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_readargs),
 		.pc_ressize = sizeof(struct nfsd_readres),
 		.pc_cachetype = RC_NOCACHE,
@@ -652,7 +652,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_func = nfsd_proc_write,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_writeargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_attrstat,
-		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
+		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_writeargs),
 		.pc_ressize = sizeof(struct nfsd_attrstat),
 		.pc_cachetype = RC_REPLBUFF,
@@ -662,7 +662,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_func = nfsd_proc_create,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_createargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_diropres,
-		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
+		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_createargs),
 		.pc_ressize = sizeof(struct nfsd_diropres),
 		.pc_cachetype = RC_REPLBUFF,
@@ -708,7 +708,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_func = nfsd_proc_mkdir,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_createargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_diropres,
-		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
+		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_createargs),
 		.pc_ressize = sizeof(struct nfsd_diropres),
 		.pc_cachetype = RC_REPLBUFF,
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index de07ff625777..2facfc6ac8f3 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -543,10 +543,10 @@ nfssvc_encode_entry(void *ccdv, const char *name,
 /*
  * XDR release functions
  */
-int
-nfssvc_release_fhandle(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_fhandle *resp)
+void
+nfssvc_release_fhandle(struct svc_rqst *rqstp)
 {
+	struct nfsd_fhandle *resp = rqstp->rq_resp;
+
 	fh_put(&resp->fh);
-	return 1;
 }
diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h
index 4f0481d63804..2c21fa843fbf 100644
--- a/fs/nfsd/xdr.h
+++ b/fs/nfsd/xdr.h
@@ -164,7 +164,7 @@ int nfssvc_encode_readdirres(struct svc_rqst *, __be32 *, struct nfsd_readdirres
 int nfssvc_encode_entry(void *, const char *name,
 			int namlen, loff_t offset, u64 ino, unsigned int);
 
-int nfssvc_release_fhandle(struct svc_rqst *, __be32 *, struct nfsd_fhandle *);
+void nfssvc_release_fhandle(struct svc_rqst *);
 
 /* Helper functions for NFSv2 ACL code */
 __be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, struct kstat *stat);
diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
index 335e04aaf7db..23fe456a223b 100644
--- a/fs/nfsd/xdr3.h
+++ b/fs/nfsd/xdr3.h
@@ -330,10 +330,8 @@ int nfs3svc_encode_pathconfres(struct svc_rqst *, __be32 *,
 int nfs3svc_encode_commitres(struct svc_rqst *, __be32 *,
 				struct nfsd3_commitres *);
 
-int nfs3svc_release_fhandle(struct svc_rqst *, __be32 *,
-				struct nfsd3_attrstat *);
-int nfs3svc_release_fhandle2(struct svc_rqst *, __be32 *,
-				struct nfsd3_fhandle_pair *);
+void nfs3svc_release_fhandle(struct svc_rqst *);
+void nfs3svc_release_fhandle2(struct svc_rqst *);
 int nfs3svc_encode_entry(void *, const char *name,
 				int namlen, loff_t offset, u64 ino,
 				unsigned int);
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 8fda4abdf3b1..a158579d55a2 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -743,7 +743,7 @@ extern __be32
 nfsd4_release_lockowner(struct svc_rqst *rqstp,
 		struct nfsd4_compound_state *,
 		struct nfsd4_release_lockowner *rlockowner);
-extern int nfsd4_release_compoundargs(void *rq, __be32 *p, void *resp);
+extern void nfsd4_release_compoundargs(struct svc_rqst *rqstp);
 extern __be32 nfsd4_delegreturn(struct svc_rqst *rqstp,
 		struct nfsd4_compound_state *, struct nfsd4_delegreturn *dr);
 extern __be32 nfsd4_renew(struct svc_rqst *rqstp,
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 3b58c55614c7..c73194e9c2bd 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -424,7 +424,8 @@ struct svc_procedure {
 	__be32			(*pc_func)(struct svc_rqst *);
 	kxdrproc_t		pc_decode;	/* XDR decode args */
 	kxdrproc_t		pc_encode;	/* XDR encode result */
-	kxdrproc_t		pc_release;	/* XDR free result */
+	/* XDR free result: */
+	void			(*pc_release)(struct svc_rqst *);
 	unsigned int		pc_argsize;	/* argument struct size */
 	unsigned int		pc_ressize;	/* result struct size */
 	unsigned int		pc_count;	/* call count */
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 95335455ad38..4611cb7adc04 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1287,12 +1287,12 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 		if (*statp == rpc_drop_reply ||
 		    test_bit(RQ_DROPME, &rqstp->rq_flags)) {
 			if (procp->pc_release)
-				procp->pc_release(rqstp, NULL, rqstp->rq_resp);
+				procp->pc_release(rqstp);
 			goto dropit;
 		}
 		if (*statp == rpc_autherr_badcred) {
 			if (procp->pc_release)
-				procp->pc_release(rqstp, NULL, rqstp->rq_resp);
+				procp->pc_release(rqstp);
 			goto err_bad_auth;
 		}
 		if (*statp == rpc_success &&
@@ -1307,7 +1307,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 		if (!versp->vs_dispatch(rqstp, statp)) {
 			/* Release reply info */
 			if (procp->pc_release)
-				procp->pc_release(rqstp, NULL, rqstp->rq_resp);
+				procp->pc_release(rqstp);
 			goto dropit;
 		}
 	}
@@ -1318,7 +1318,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 
 	/* Release reply info */
 	if (procp->pc_release)
-		procp->pc_release(rqstp, NULL, rqstp->rq_resp);
+		procp->pc_release(rqstp);
 
 	if (procp->pc_encode == NULL)
 		goto dropit;
-- 
cgit v1.2.3


From cc6acc20a606f9ca65a6338af8204b7ae9e67b1a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 19:01:48 +0200
Subject: sunrpc: properly type pc_decode callbacks

Drop the argp argument as it can trivially be derived from the rqstp
argument.  With that all functions now have the same prototype, and we
can remove the unsafe casting to kxdrproc_t.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/lockd/svc4proc.c        |  2 +-
 fs/lockd/svcproc.c         |  2 +-
 fs/lockd/xdr.c             | 29 ++++++++++++------
 fs/lockd/xdr4.c            | 29 ++++++++++++------
 fs/nfs/callback_xdr.c      |  4 +--
 fs/nfsd/nfs2acl.c          | 21 +++++++------
 fs/nfsd/nfs3acl.c          | 11 +++----
 fs/nfsd/nfs3proc.c         | 42 +++++++++++++-------------
 fs/nfsd/nfs3xdr.c          | 74 +++++++++++++++++++++++++++-------------------
 fs/nfsd/nfs4proc.c         |  2 +-
 fs/nfsd/nfs4xdr.c          |  4 ++-
 fs/nfsd/nfsproc.c          | 36 +++++++++++-----------
 fs/nfsd/nfssvc.c           |  5 ++--
 fs/nfsd/nfsxdr.c           | 53 ++++++++++++++++++++-------------
 fs/nfsd/xdr.h              | 34 ++++++++-------------
 fs/nfsd/xdr3.h             | 47 ++++++++++-------------------
 fs/nfsd/xdr4.h             |  3 +-
 include/linux/lockd/xdr.h  | 18 +++++------
 include/linux/lockd/xdr4.h | 18 +++++------
 include/linux/sunrpc/svc.h |  3 +-
 net/sunrpc/svc.c           |  9 ++++--
 21 files changed, 237 insertions(+), 209 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 3e4cba029d3d..804744f7528c 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -496,7 +496,7 @@ struct nlm_void			{ int dummy; };
 
 #define PROC(name, xargt, xrest, argt, rest, respsize)	\
  { .pc_func	= nlm4svc_proc_##name,	\
-   .pc_decode	= (kxdrproc_t) nlm4svc_decode_##xargt,	\
+   .pc_decode	= nlm4svc_decode_##xargt,	\
    .pc_encode	= (kxdrproc_t) nlm4svc_encode_##xrest,	\
    .pc_release	= NULL,					\
    .pc_argsize	= sizeof(struct nlm_##argt),		\
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 3add50661fab..204a698f7d41 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -538,7 +538,7 @@ struct nlm_void			{ int dummy; };
 
 #define PROC(name, xargt, xrest, argt, rest, respsize)	\
  { .pc_func	= nlmsvc_proc_##name,			\
-   .pc_decode	= (kxdrproc_t) nlmsvc_decode_##xargt,	\
+   .pc_decode	= nlmsvc_decode_##xargt,		\
    .pc_encode	= (kxdrproc_t) nlmsvc_encode_##xrest,	\
    .pc_release	= NULL,					\
    .pc_argsize	= sizeof(struct nlm_##argt),		\
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index 5b651daad518..b57af63fba56 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -182,8 +182,9 @@ nlm_encode_testres(__be32 *p, struct nlm_res *resp)
  * First, the server side XDR functions
  */
 int
-nlmsvc_decode_testargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
+nlmsvc_decode_testargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	u32	exclusive;
 
 	if (!(p = nlm_decode_cookie(p, &argp->cookie)))
@@ -207,8 +208,9 @@ nlmsvc_encode_testres(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
 }
 
 int
-nlmsvc_decode_lockargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
+nlmsvc_decode_lockargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	u32	exclusive;
 
 	if (!(p = nlm_decode_cookie(p, &argp->cookie)))
@@ -227,8 +229,9 @@ nlmsvc_decode_lockargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
 }
 
 int
-nlmsvc_decode_cancargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
+nlmsvc_decode_cancargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	u32	exclusive;
 
 	if (!(p = nlm_decode_cookie(p, &argp->cookie)))
@@ -243,8 +246,10 @@ nlmsvc_decode_cancargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
 }
 
 int
-nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
+nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
+
 	if (!(p = nlm_decode_cookie(p, &argp->cookie))
 	 || !(p = nlm_decode_lock(p, &argp->lock)))
 		return 0;
@@ -253,8 +258,9 @@ nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
 }
 
 int
-nlmsvc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
+nlmsvc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_lock	*lock = &argp->lock;
 
 	memset(lock, 0, sizeof(*lock));
@@ -293,8 +299,9 @@ nlmsvc_encode_res(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
 }
 
 int
-nlmsvc_decode_notify(struct svc_rqst *rqstp, __be32 *p, struct nlm_args *argp)
+nlmsvc_decode_notify(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_lock	*lock = &argp->lock;
 
 	if (!(p = xdr_decode_string_inplace(p, &lock->caller,
@@ -305,8 +312,10 @@ nlmsvc_decode_notify(struct svc_rqst *rqstp, __be32 *p, struct nlm_args *argp)
 }
 
 int
-nlmsvc_decode_reboot(struct svc_rqst *rqstp, __be32 *p, struct nlm_reboot *argp)
+nlmsvc_decode_reboot(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_reboot *argp = rqstp->rq_argp;
+
 	if (!(p = xdr_decode_string_inplace(p, &argp->mon, &argp->len, SM_MAXSTRLEN)))
 		return 0;
 	argp->state = ntohl(*p++);
@@ -316,8 +325,10 @@ nlmsvc_decode_reboot(struct svc_rqst *rqstp, __be32 *p, struct nlm_reboot *argp)
 }
 
 int
-nlmsvc_decode_res(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
+nlmsvc_decode_res(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_res *resp = rqstp->rq_argp;
+
 	if (!(p = nlm_decode_cookie(p, &resp->cookie)))
 		return 0;
 	resp->status = *p++;
@@ -325,7 +336,7 @@ nlmsvc_decode_res(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
 }
 
 int
-nlmsvc_decode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
+nlmsvc_decode_void(struct svc_rqst *rqstp, __be32 *p)
 {
 	return xdr_argsize_check(rqstp, p);
 }
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
index dfa4789cd460..46e18598a15c 100644
--- a/fs/lockd/xdr4.c
+++ b/fs/lockd/xdr4.c
@@ -179,8 +179,9 @@ nlm4_encode_testres(__be32 *p, struct nlm_res *resp)
  * First, the server side XDR functions
  */
 int
-nlm4svc_decode_testargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
+nlm4svc_decode_testargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	u32	exclusive;
 
 	if (!(p = nlm4_decode_cookie(p, &argp->cookie)))
@@ -204,8 +205,9 @@ nlm4svc_encode_testres(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
 }
 
 int
-nlm4svc_decode_lockargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
+nlm4svc_decode_lockargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	u32	exclusive;
 
 	if (!(p = nlm4_decode_cookie(p, &argp->cookie)))
@@ -224,8 +226,9 @@ nlm4svc_decode_lockargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
 }
 
 int
-nlm4svc_decode_cancargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
+nlm4svc_decode_cancargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	u32	exclusive;
 
 	if (!(p = nlm4_decode_cookie(p, &argp->cookie)))
@@ -240,8 +243,10 @@ nlm4svc_decode_cancargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
 }
 
 int
-nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
+nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
+
 	if (!(p = nlm4_decode_cookie(p, &argp->cookie))
 	 || !(p = nlm4_decode_lock(p, &argp->lock)))
 		return 0;
@@ -250,8 +255,9 @@ nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
 }
 
 int
-nlm4svc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
+nlm4svc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_lock	*lock = &argp->lock;
 
 	memset(lock, 0, sizeof(*lock));
@@ -290,8 +296,9 @@ nlm4svc_encode_res(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
 }
 
 int
-nlm4svc_decode_notify(struct svc_rqst *rqstp, __be32 *p, struct nlm_args *argp)
+nlm4svc_decode_notify(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_lock	*lock = &argp->lock;
 
 	if (!(p = xdr_decode_string_inplace(p, &lock->caller,
@@ -302,8 +309,10 @@ nlm4svc_decode_notify(struct svc_rqst *rqstp, __be32 *p, struct nlm_args *argp)
 }
 
 int
-nlm4svc_decode_reboot(struct svc_rqst *rqstp, __be32 *p, struct nlm_reboot *argp)
+nlm4svc_decode_reboot(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_reboot *argp = rqstp->rq_argp;
+
 	if (!(p = xdr_decode_string_inplace(p, &argp->mon, &argp->len, SM_MAXSTRLEN)))
 		return 0;
 	argp->state = ntohl(*p++);
@@ -313,8 +322,10 @@ nlm4svc_decode_reboot(struct svc_rqst *rqstp, __be32 *p, struct nlm_reboot *argp
 }
 
 int
-nlm4svc_decode_res(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
+nlm4svc_decode_res(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_res *resp = rqstp->rq_argp;
+
 	if (!(p = nlm4_decode_cookie(p, &resp->cookie)))
 		return 0;
 	resp->status = *p++;
@@ -322,7 +333,7 @@ nlm4svc_decode_res(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
 }
 
 int
-nlm4svc_decode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
+nlm4svc_decode_void(struct svc_rqst *rqstp, __be32 *p)
 {
 	return xdr_argsize_check(rqstp, p);
 }
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 5a14bdaa5986..23ecbf7a40c1 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -58,7 +58,7 @@ static __be32 nfs4_callback_null(struct svc_rqst *rqstp)
 	return htonl(NFS4_OK);
 }
 
-static int nfs4_decode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
+static int nfs4_decode_void(struct svc_rqst *rqstp, __be32 *p)
 {
 	return xdr_argsize_check(rqstp, p);
 }
@@ -998,7 +998,7 @@ static struct callback_op callback_ops[] = {
 static struct svc_procedure nfs4_callback_procedures1[] = {
 	[CB_NULL] = {
 		.pc_func = nfs4_callback_null,
-		.pc_decode = (kxdrproc_t)nfs4_decode_void,
+		.pc_decode = nfs4_decode_void,
 		.pc_encode = (kxdrproc_t)nfs4_encode_void,
 		.pc_xdrressize = 1,
 	},
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 302441027f50..bcfdaa83ee6c 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -182,9 +182,10 @@ static __be32 nfsacld_proc_access(struct svc_rqst *rqstp)
 /*
  * XDR decode functions
  */
-static int nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd3_getaclargs *argp)
+static int nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_getaclargs *argp = rqstp->rq_argp;
+
 	p = nfs2svc_decode_fh(p, &argp->fh);
 	if (!p)
 		return 0;
@@ -194,9 +195,9 @@ static int nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 
-static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd3_setaclargs *argp)
+static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_setaclargs *argp = rqstp->rq_argp;
 	struct kvec *head = rqstp->rq_arg.head;
 	unsigned int base;
 	int n;
@@ -220,18 +221,20 @@ static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p,
 	return (n > 0);
 }
 
-static int nfsaclsvc_decode_fhandleargs(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd_fhandle *argp)
+static int nfsaclsvc_decode_fhandleargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_fhandle *argp = rqstp->rq_argp;
+
 	p = nfs2svc_decode_fh(p, &argp->fh);
 	if (!p)
 		return 0;
 	return xdr_argsize_check(rqstp, p);
 }
 
-static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd3_accessargs *argp)
+static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_accessargs *argp = rqstp->rq_argp;
+
 	p = nfs2svc_decode_fh(p, &argp->fh);
 	if (!p)
 		return 0;
@@ -351,7 +354,7 @@ struct nfsd3_voidargs { int dummy; };
 #define PROC(name, argt, rest, relt, cache, respsize)			\
 {									\
 	.pc_func	= nfsacld_proc_##name,				\
-	.pc_decode	= (kxdrproc_t) nfsaclsvc_decode_##argt##args,	\
+	.pc_decode	= nfsaclsvc_decode_##argt##args,		\
 	.pc_encode	= (kxdrproc_t) nfsaclsvc_encode_##rest##res,	\
 	.pc_release	= nfsaclsvc_release_##relt,	\
 	.pc_argsize	= sizeof(struct nfsd3_##argt##args),		\
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 56cdff4e954c..4e68d6b5f409 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -124,9 +124,10 @@ out:
 /*
  * XDR decode functions
  */
-static int nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd3_getaclargs *args)
+static int nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_getaclargs *args = rqstp->rq_argp;
+
 	p = nfs3svc_decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
@@ -136,9 +137,9 @@ static int nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 
-static int nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd3_setaclargs *args)
+static int nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_setaclargs *args = rqstp->rq_argp;
 	struct kvec *head = rqstp->rq_arg.head;
 	unsigned int base;
 	int n;
@@ -241,7 +242,7 @@ struct nfsd3_voidargs { int dummy; };
 #define PROC(name, argt, rest, relt, cache, respsize)			\
 {									\
 	.pc_func	= nfsd3_proc_##name,				\
-	.pc_decode	= (kxdrproc_t) nfs3svc_decode_##argt##args,	\
+	.pc_decode	= nfs3svc_decode_##argt##args,			\
 	.pc_encode	= (kxdrproc_t) nfs3svc_encode_##rest##res,	\
 	.pc_release	= nfs3svc_release_##relt,			\
 	.pc_argsize	= sizeof(struct nfsd3_##argt##args),		\
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index f0cccc0768ce..ed83e8a9e7b4 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -685,7 +685,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_GETATTR] = {
 		.pc_func = nfsd3_proc_getattr,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs,
+		.pc_decode = nfs3svc_decode_fhandleargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_attrstatres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
@@ -695,7 +695,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_SETATTR] = {
 		.pc_func = nfsd3_proc_setattr,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_sattrargs,
+		.pc_decode = nfs3svc_decode_sattrargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_sattrargs),
@@ -705,7 +705,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_LOOKUP] = {
 		.pc_func = nfsd3_proc_lookup,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs,
+		.pc_decode = nfs3svc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_diropres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_diropargs),
@@ -715,7 +715,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_ACCESS] = {
 		.pc_func = nfsd3_proc_access,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_accessargs,
+		.pc_decode = nfs3svc_decode_accessargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_accessres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_accessargs),
@@ -725,7 +725,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_READLINK] = {
 		.pc_func = nfsd3_proc_readlink,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_readlinkargs,
+		.pc_decode = nfs3svc_decode_readlinkargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_readlinkres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_readlinkargs),
@@ -735,7 +735,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_READ] = {
 		.pc_func = nfsd3_proc_read,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_readargs,
+		.pc_decode = nfs3svc_decode_readargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_readres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_readargs),
@@ -745,7 +745,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_WRITE] = {
 		.pc_func = nfsd3_proc_write,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_writeargs,
+		.pc_decode = nfs3svc_decode_writeargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_writeres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_writeargs),
@@ -755,7 +755,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_CREATE] = {
 		.pc_func = nfsd3_proc_create,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_createargs,
+		.pc_decode = nfs3svc_decode_createargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_createargs),
@@ -765,7 +765,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_MKDIR] = {
 		.pc_func = nfsd3_proc_mkdir,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_mkdirargs,
+		.pc_decode = nfs3svc_decode_mkdirargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_mkdirargs),
@@ -775,7 +775,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_SYMLINK] = {
 		.pc_func = nfsd3_proc_symlink,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_symlinkargs,
+		.pc_decode = nfs3svc_decode_symlinkargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_symlinkargs),
@@ -785,7 +785,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_MKNOD] = {
 		.pc_func = nfsd3_proc_mknod,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_mknodargs,
+		.pc_decode = nfs3svc_decode_mknodargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_mknodargs),
@@ -795,7 +795,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_REMOVE] = {
 		.pc_func = nfsd3_proc_remove,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs,
+		.pc_decode = nfs3svc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_diropargs),
@@ -805,7 +805,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_RMDIR] = {
 		.pc_func = nfsd3_proc_rmdir,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs,
+		.pc_decode = nfs3svc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_diropargs),
@@ -815,7 +815,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_RENAME] = {
 		.pc_func = nfsd3_proc_rename,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_renameargs,
+		.pc_decode = nfs3svc_decode_renameargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_renameres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_renameargs),
@@ -825,7 +825,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_LINK] = {
 		.pc_func = nfsd3_proc_link,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_linkargs,
+		.pc_decode = nfs3svc_decode_linkargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_linkres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_linkargs),
@@ -835,7 +835,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_READDIR] = {
 		.pc_func = nfsd3_proc_readdir,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_readdirargs,
+		.pc_decode = nfs3svc_decode_readdirargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_readdirres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_readdirargs),
@@ -844,7 +844,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_READDIRPLUS] = {
 		.pc_func = nfsd3_proc_readdirplus,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_readdirplusargs,
+		.pc_decode = nfs3svc_decode_readdirplusargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_readdirres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_readdirplusargs),
@@ -853,7 +853,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_FSSTAT] = {
 		.pc_func = nfsd3_proc_fsstat,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs,
+		.pc_decode = nfs3svc_decode_fhandleargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_fsstatres,
 		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
 		.pc_ressize = sizeof(struct nfsd3_fsstatres),
@@ -862,7 +862,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_FSINFO] = {
 		.pc_func = nfsd3_proc_fsinfo,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs,
+		.pc_decode = nfs3svc_decode_fhandleargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_fsinfores,
 		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
 		.pc_ressize = sizeof(struct nfsd3_fsinfores),
@@ -871,7 +871,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_PATHCONF] = {
 		.pc_func = nfsd3_proc_pathconf,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs,
+		.pc_decode = nfs3svc_decode_fhandleargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_pathconfres,
 		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
 		.pc_ressize = sizeof(struct nfsd3_pathconfres),
@@ -880,7 +880,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_COMMIT] = {
 		.pc_func = nfsd3_proc_commit,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_commitargs,
+		.pc_decode = nfs3svc_decode_commitargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_commitres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_commitargs),
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index c3d3ef28347c..c56089ac2819 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -273,8 +273,10 @@ void fill_post_wcc(struct svc_fh *fhp)
  * XDR decode functions
  */
 int
-nfs3svc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p, struct nfsd_fhandle *args)
+nfs3svc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_fhandle *args = rqstp->rq_argp;
+
 	p = decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
@@ -282,9 +284,10 @@ nfs3svc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p, struct nfsd_fhandle *a
 }
 
 int
-nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_sattrargs *args)
+nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_sattrargs *args = rqstp->rq_argp;
+
 	p = decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
@@ -300,9 +303,10 @@ nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_diropargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_diropargs *args)
+nfs3svc_decode_diropargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_diropargs *args = rqstp->rq_argp;
+
 	if (!(p = decode_fh(p, &args->fh))
 	 || !(p = decode_filename(p, &args->name, &args->len)))
 		return 0;
@@ -311,9 +315,10 @@ nfs3svc_decode_diropargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_accessargs *args)
+nfs3svc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_accessargs *args = rqstp->rq_argp;
+
 	p = decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
@@ -323,9 +328,9 @@ nfs3svc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_readargs *args)
+nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_readargs *args = rqstp->rq_argp;
 	unsigned int len;
 	int v;
 	u32 max_blocksize = svc_max_payload(rqstp);
@@ -353,9 +358,9 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_writeargs *args)
+nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_writeargs *args = rqstp->rq_argp;
 	unsigned int len, v, hdr, dlen;
 	u32 max_blocksize = svc_max_payload(rqstp);
 	struct kvec *head = rqstp->rq_arg.head;
@@ -413,9 +418,10 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_createargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_createargs *args)
+nfs3svc_decode_createargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_createargs *args = rqstp->rq_argp;
+
 	if (!(p = decode_fh(p, &args->fh))
 	 || !(p = decode_filename(p, &args->name, &args->len)))
 		return 0;
@@ -435,10 +441,12 @@ nfs3svc_decode_createargs(struct svc_rqst *rqstp, __be32 *p,
 
 	return xdr_argsize_check(rqstp, p);
 }
+
 int
-nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_createargs *args)
+nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_createargs *args = rqstp->rq_argp;
+
 	if (!(p = decode_fh(p, &args->fh)) ||
 	    !(p = decode_filename(p, &args->name, &args->len)))
 		return 0;
@@ -448,9 +456,9 @@ nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_symlinkargs *args)
+nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_symlinkargs *args = rqstp->rq_argp;
 	unsigned int len, avail;
 	char *old, *new;
 	struct kvec *vec;
@@ -500,9 +508,10 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_mknodargs *args)
+nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_mknodargs *args = rqstp->rq_argp;
+
 	if (!(p = decode_fh(p, &args->fh))
 	 || !(p = decode_filename(p, &args->name, &args->len)))
 		return 0;
@@ -522,9 +531,10 @@ nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_renameargs *args)
+nfs3svc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_renameargs *args = rqstp->rq_argp;
+
 	if (!(p = decode_fh(p, &args->ffh))
 	 || !(p = decode_filename(p, &args->fname, &args->flen))
 	 || !(p = decode_fh(p, &args->tfh))
@@ -535,9 +545,10 @@ nfs3svc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_readlinkargs *args)
+nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_readlinkargs *args = rqstp->rq_argp;
+
 	p = decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
@@ -547,9 +558,10 @@ nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_linkargs *args)
+nfs3svc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_linkargs *args = rqstp->rq_argp;
+
 	if (!(p = decode_fh(p, &args->ffh))
 	 || !(p = decode_fh(p, &args->tfh))
 	 || !(p = decode_filename(p, &args->tname, &args->tlen)))
@@ -559,9 +571,9 @@ nfs3svc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_readdirargs *args)
+nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_readdirargs *args = rqstp->rq_argp;
 	p = decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
@@ -576,9 +588,9 @@ nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_readdirargs *args)
+nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_readdirargs *args = rqstp->rq_argp;
 	int len;
 	u32 max_blocksize = svc_max_payload(rqstp);
 
@@ -602,9 +614,9 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_commitargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_commitargs *args)
+nfs3svc_decode_commitargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_commitargs *args = rqstp->rq_argp;
 	p = decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 43b2e5ab4430..19c7d4b989c4 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -2527,7 +2527,7 @@ static struct svc_procedure		nfsd_procedures4[2] = {
 	},
 	[NFSPROC4_COMPOUND] = {
 		.pc_func = nfsd4_proc_compound,
-		.pc_decode = (kxdrproc_t) nfs4svc_decode_compoundargs,
+		.pc_decode = nfs4svc_decode_compoundargs,
 		.pc_encode = (kxdrproc_t) nfs4svc_encode_compoundres,
 		.pc_argsize = sizeof(struct nfsd4_compoundargs),
 		.pc_ressize = sizeof(struct nfsd4_compoundres),
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 5aa847bdfc63..3a7e117bd11e 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -4561,8 +4561,10 @@ void nfsd4_release_compoundargs(struct svc_rqst *rqstp)
 }
 
 int
-nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compoundargs *args)
+nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd4_compoundargs *args = rqstp->rq_argp;
+
 	if (rqstp->rq_arg.head[0].iov_len % 4) {
 		/* client is nuts */
 		dprintk("%s: compound not properly padded! (peeraddr=%pISc xid=0x%x)",
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index dc32e0f8480d..d351d0ef6d34 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -576,7 +576,7 @@ struct nfsd_void { int dummy; };
 static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_NULL] = {
 		.pc_func = nfsd_proc_null,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_void,
+		.pc_decode = nfssvc_decode_void,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_void),
 		.pc_ressize = sizeof(struct nfsd_void),
@@ -585,7 +585,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_GETATTR] = {
 		.pc_func = nfsd_proc_getattr,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_fhandle,
+		.pc_decode = nfssvc_decode_fhandle,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_attrstat,
 		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_fhandle),
@@ -595,7 +595,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_SETATTR] = {
 		.pc_func = nfsd_proc_setattr,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_sattrargs,
+		.pc_decode = nfssvc_decode_sattrargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_attrstat,
 		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_sattrargs),
@@ -604,7 +604,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST+AT,
 	},
 	[NFSPROC_ROOT] = {
-		.pc_decode = (kxdrproc_t) nfssvc_decode_void,
+		.pc_decode = nfssvc_decode_void,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_void),
 		.pc_ressize = sizeof(struct nfsd_void),
@@ -613,7 +613,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_LOOKUP] = {
 		.pc_func = nfsd_proc_lookup,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_diropargs,
+		.pc_decode = nfssvc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_diropres,
 		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_diropargs),
@@ -623,7 +623,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_READLINK] = {
 		.pc_func = nfsd_proc_readlink,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_readlinkargs,
+		.pc_decode = nfssvc_decode_readlinkargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_readlinkres,
 		.pc_argsize = sizeof(struct nfsd_readlinkargs),
 		.pc_ressize = sizeof(struct nfsd_readlinkres),
@@ -632,7 +632,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_READ] = {
 		.pc_func = nfsd_proc_read,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_readargs,
+		.pc_decode = nfssvc_decode_readargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_readres,
 		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_readargs),
@@ -641,7 +641,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST+AT+1+NFSSVC_MAXBLKSIZE_V2/4,
 	},
 	[NFSPROC_WRITECACHE] = {
-		.pc_decode = (kxdrproc_t) nfssvc_decode_void,
+		.pc_decode = nfssvc_decode_void,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_void),
 		.pc_ressize = sizeof(struct nfsd_void),
@@ -650,7 +650,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_WRITE] = {
 		.pc_func = nfsd_proc_write,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_writeargs,
+		.pc_decode = nfssvc_decode_writeargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_attrstat,
 		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_writeargs),
@@ -660,7 +660,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_CREATE] = {
 		.pc_func = nfsd_proc_create,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_createargs,
+		.pc_decode = nfssvc_decode_createargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_diropres,
 		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_createargs),
@@ -670,7 +670,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_REMOVE] = {
 		.pc_func = nfsd_proc_remove,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_diropargs,
+		.pc_decode = nfssvc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_diropargs),
 		.pc_ressize = sizeof(struct nfsd_void),
@@ -679,7 +679,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_RENAME] = {
 		.pc_func = nfsd_proc_rename,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_renameargs,
+		.pc_decode = nfssvc_decode_renameargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_renameargs),
 		.pc_ressize = sizeof(struct nfsd_void),
@@ -688,7 +688,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_LINK] = {
 		.pc_func = nfsd_proc_link,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_linkargs,
+		.pc_decode = nfssvc_decode_linkargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_linkargs),
 		.pc_ressize = sizeof(struct nfsd_void),
@@ -697,7 +697,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_SYMLINK] = {
 		.pc_func = nfsd_proc_symlink,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_symlinkargs,
+		.pc_decode = nfssvc_decode_symlinkargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_symlinkargs),
 		.pc_ressize = sizeof(struct nfsd_void),
@@ -706,7 +706,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_MKDIR] = {
 		.pc_func = nfsd_proc_mkdir,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_createargs,
+		.pc_decode = nfssvc_decode_createargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_diropres,
 		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_createargs),
@@ -716,7 +716,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_RMDIR] = {
 		.pc_func = nfsd_proc_rmdir,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_diropargs,
+		.pc_decode = nfssvc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_diropargs),
 		.pc_ressize = sizeof(struct nfsd_void),
@@ -725,7 +725,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_READDIR] = {
 		.pc_func = nfsd_proc_readdir,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_readdirargs,
+		.pc_decode = nfssvc_decode_readdirargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_readdirres,
 		.pc_argsize = sizeof(struct nfsd_readdirargs),
 		.pc_ressize = sizeof(struct nfsd_readdirres),
@@ -733,7 +733,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_STATFS] = {
 		.pc_func = nfsd_proc_statfs,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_fhandle,
+		.pc_decode = nfssvc_decode_fhandle,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_statfsres,
 		.pc_argsize = sizeof(struct nfsd_fhandle),
 		.pc_ressize = sizeof(struct nfsd_statfsres),
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index d64895fd8d25..3e00499d7ad7 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -801,9 +801,8 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
 	 */
 	rqstp->rq_cachetype = proc->pc_cachetype;
 	/* Decode arguments */
-	xdr = proc->pc_decode;
-	if (xdr && !xdr(rqstp, (__be32*)rqstp->rq_arg.head[0].iov_base,
-			rqstp->rq_argp)) {
+	if (proc->pc_decode &&
+	    !proc->pc_decode(rqstp, (__be32*)rqstp->rq_arg.head[0].iov_base)) {
 		dprintk("nfsd: failed to decode arguments!\n");
 		*statp = rpc_garbage_args;
 		return 1;
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 2facfc6ac8f3..19cf04ebf388 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -206,14 +206,16 @@ __be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *f
  * XDR decode functions
  */
 int
-nfssvc_decode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
+nfssvc_decode_void(struct svc_rqst *rqstp, __be32 *p)
 {
 	return xdr_argsize_check(rqstp, p);
 }
 
 int
-nfssvc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p, struct nfsd_fhandle *args)
+nfssvc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_fhandle *args = rqstp->rq_argp;
+
 	p = decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
@@ -221,9 +223,10 @@ nfssvc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p, struct nfsd_fhandle *ar
 }
 
 int
-nfssvc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_sattrargs *args)
+nfssvc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_sattrargs *args = rqstp->rq_argp;
+
 	p = decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
@@ -233,9 +236,10 @@ nfssvc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfssvc_decode_diropargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_diropargs *args)
+nfssvc_decode_diropargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_diropargs *args = rqstp->rq_argp;
+
 	if (!(p = decode_fh(p, &args->fh))
 	 || !(p = decode_filename(p, &args->name, &args->len)))
 		return 0;
@@ -244,9 +248,9 @@ nfssvc_decode_diropargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_readargs *args)
+nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_readargs *args = rqstp->rq_argp;
 	unsigned int len;
 	int v;
 	p = decode_fh(p, &args->fh);
@@ -276,9 +280,9 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_writeargs *args)
+nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_writeargs *args = rqstp->rq_argp;
 	unsigned int len, hdr, dlen;
 	struct kvec *head = rqstp->rq_arg.head;
 	int v;
@@ -332,9 +336,10 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfssvc_decode_createargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_createargs *args)
+nfssvc_decode_createargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_createargs *args = rqstp->rq_argp;
+
 	if (   !(p = decode_fh(p, &args->fh))
 	    || !(p = decode_filename(p, &args->name, &args->len)))
 		return 0;
@@ -344,9 +349,10 @@ nfssvc_decode_createargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfssvc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_renameargs *args)
+nfssvc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_renameargs *args = rqstp->rq_argp;
+
 	if (!(p = decode_fh(p, &args->ffh))
 	 || !(p = decode_filename(p, &args->fname, &args->flen))
 	 || !(p = decode_fh(p, &args->tfh))
@@ -357,8 +363,10 @@ nfssvc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfssvc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_readlinkargs *args)
+nfssvc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_readlinkargs *args = rqstp->rq_argp;
+
 	p = decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
@@ -368,9 +376,10 @@ nfssvc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_readli
 }
 
 int
-nfssvc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_linkargs *args)
+nfssvc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_linkargs *args = rqstp->rq_argp;
+
 	if (!(p = decode_fh(p, &args->ffh))
 	 || !(p = decode_fh(p, &args->tfh))
 	 || !(p = decode_filename(p, &args->tname, &args->tlen)))
@@ -380,9 +389,10 @@ nfssvc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_symlinkargs *args)
+nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_symlinkargs *args = rqstp->rq_argp;
+
 	if (   !(p = decode_fh(p, &args->ffh))
 	    || !(p = decode_filename(p, &args->fname, &args->flen))
 	    || !(p = decode_pathname(p, &args->tname, &args->tlen)))
@@ -393,9 +403,10 @@ nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_readdirargs *args)
+nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_readdirargs *args = rqstp->rq_argp;
+
 	p = decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h
index 2c21fa843fbf..8eeb752cf6f8 100644
--- a/fs/nfsd/xdr.h
+++ b/fs/nfsd/xdr.h
@@ -131,28 +131,18 @@ union nfsd_xdrstore {
 #define NFS2_SVC_XDRSIZE	sizeof(union nfsd_xdrstore)
 
 
-int nfssvc_decode_void(struct svc_rqst *, __be32 *, void *);
-int nfssvc_decode_fhandle(struct svc_rqst *, __be32 *, struct nfsd_fhandle *);
-int nfssvc_decode_sattrargs(struct svc_rqst *, __be32 *,
-				struct nfsd_sattrargs *);
-int nfssvc_decode_diropargs(struct svc_rqst *, __be32 *,
-				struct nfsd_diropargs *);
-int nfssvc_decode_readargs(struct svc_rqst *, __be32 *,
-				struct nfsd_readargs *);
-int nfssvc_decode_writeargs(struct svc_rqst *, __be32 *,
-				struct nfsd_writeargs *);
-int nfssvc_decode_createargs(struct svc_rqst *, __be32 *,
-				struct nfsd_createargs *);
-int nfssvc_decode_renameargs(struct svc_rqst *, __be32 *,
-				struct nfsd_renameargs *);
-int nfssvc_decode_readlinkargs(struct svc_rqst *, __be32 *,
-				struct nfsd_readlinkargs *);
-int nfssvc_decode_linkargs(struct svc_rqst *, __be32 *,
-				struct nfsd_linkargs *);
-int nfssvc_decode_symlinkargs(struct svc_rqst *, __be32 *,
-				struct nfsd_symlinkargs *);
-int nfssvc_decode_readdirargs(struct svc_rqst *, __be32 *,
-				struct nfsd_readdirargs *);
+int nfssvc_decode_void(struct svc_rqst *, __be32 *);
+int nfssvc_decode_fhandle(struct svc_rqst *, __be32 *);
+int nfssvc_decode_sattrargs(struct svc_rqst *, __be32 *);
+int nfssvc_decode_diropargs(struct svc_rqst *, __be32 *);
+int nfssvc_decode_readargs(struct svc_rqst *, __be32 *);
+int nfssvc_decode_writeargs(struct svc_rqst *, __be32 *);
+int nfssvc_decode_createargs(struct svc_rqst *, __be32 *);
+int nfssvc_decode_renameargs(struct svc_rqst *, __be32 *);
+int nfssvc_decode_readlinkargs(struct svc_rqst *, __be32 *);
+int nfssvc_decode_linkargs(struct svc_rqst *, __be32 *);
+int nfssvc_decode_symlinkargs(struct svc_rqst *, __be32 *);
+int nfssvc_decode_readdirargs(struct svc_rqst *, __be32 *);
 int nfssvc_encode_void(struct svc_rqst *, __be32 *, void *);
 int nfssvc_encode_attrstat(struct svc_rqst *, __be32 *, struct nfsd_attrstat *);
 int nfssvc_encode_diropres(struct svc_rqst *, __be32 *, struct nfsd_diropres *);
diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
index 23fe456a223b..f79be4c42e4a 100644
--- a/fs/nfsd/xdr3.h
+++ b/fs/nfsd/xdr3.h
@@ -269,37 +269,22 @@ union nfsd3_xdrstore {
 
 #define NFS3_SVC_XDRSIZE		sizeof(union nfsd3_xdrstore)
 
-int nfs3svc_decode_fhandle(struct svc_rqst *, __be32 *, struct nfsd_fhandle *);
-int nfs3svc_decode_sattrargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_sattrargs *);
-int nfs3svc_decode_diropargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_diropargs *);
-int nfs3svc_decode_accessargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_accessargs *);
-int nfs3svc_decode_readargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_readargs *);
-int nfs3svc_decode_writeargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_writeargs *);
-int nfs3svc_decode_createargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_createargs *);
-int nfs3svc_decode_mkdirargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_createargs *);
-int nfs3svc_decode_mknodargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_mknodargs *);
-int nfs3svc_decode_renameargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_renameargs *);
-int nfs3svc_decode_readlinkargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_readlinkargs *);
-int nfs3svc_decode_linkargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_linkargs *);
-int nfs3svc_decode_symlinkargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_symlinkargs *);
-int nfs3svc_decode_readdirargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_readdirargs *);
-int nfs3svc_decode_readdirplusargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_readdirargs *);
-int nfs3svc_decode_commitargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_commitargs *);
+int nfs3svc_decode_fhandle(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_sattrargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_diropargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_accessargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_readargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_writeargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_createargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_mkdirargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_mknodargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_renameargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_readlinkargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_linkargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_symlinkargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_readdirargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_readdirplusargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_commitargs(struct svc_rqst *, __be32 *);
 int nfs3svc_encode_voidres(struct svc_rqst *, __be32 *, void *);
 int nfs3svc_encode_attrstat(struct svc_rqst *, __be32 *,
 				struct nfsd3_attrstat *);
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index a158579d55a2..2a53c1233884 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -683,8 +683,7 @@ set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp)
 
 bool nfsd4_mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp);
 int nfs4svc_encode_voidres(struct svc_rqst *, __be32 *, void *);
-int nfs4svc_decode_compoundargs(struct svc_rqst *, __be32 *,
-		struct nfsd4_compoundargs *);
+int nfs4svc_decode_compoundargs(struct svc_rqst *, __be32 *);
 int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *,
 		struct nfsd4_compoundres *);
 __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *, u32);
diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h
index d39ed1cc5fbf..0416600844ce 100644
--- a/include/linux/lockd/xdr.h
+++ b/include/linux/lockd/xdr.h
@@ -95,19 +95,19 @@ struct nlm_reboot {
  */
 #define NLMSVC_XDRSIZE		sizeof(struct nlm_args)
 
-int	nlmsvc_decode_testargs(struct svc_rqst *, __be32 *, struct nlm_args *);
+int	nlmsvc_decode_testargs(struct svc_rqst *, __be32 *);
 int	nlmsvc_encode_testres(struct svc_rqst *, __be32 *, struct nlm_res *);
-int	nlmsvc_decode_lockargs(struct svc_rqst *, __be32 *, struct nlm_args *);
-int	nlmsvc_decode_cancargs(struct svc_rqst *, __be32 *, struct nlm_args *);
-int	nlmsvc_decode_unlockargs(struct svc_rqst *, __be32 *, struct nlm_args *);
+int	nlmsvc_decode_lockargs(struct svc_rqst *, __be32 *);
+int	nlmsvc_decode_cancargs(struct svc_rqst *, __be32 *);
+int	nlmsvc_decode_unlockargs(struct svc_rqst *, __be32 *);
 int	nlmsvc_encode_res(struct svc_rqst *, __be32 *, struct nlm_res *);
-int	nlmsvc_decode_res(struct svc_rqst *, __be32 *, struct nlm_res *);
+int	nlmsvc_decode_res(struct svc_rqst *, __be32 *);
 int	nlmsvc_encode_void(struct svc_rqst *, __be32 *, void *);
-int	nlmsvc_decode_void(struct svc_rqst *, __be32 *, void *);
-int	nlmsvc_decode_shareargs(struct svc_rqst *, __be32 *, struct nlm_args *);
+int	nlmsvc_decode_void(struct svc_rqst *, __be32 *);
+int	nlmsvc_decode_shareargs(struct svc_rqst *, __be32 *);
 int	nlmsvc_encode_shareres(struct svc_rqst *, __be32 *, struct nlm_res *);
-int	nlmsvc_decode_notify(struct svc_rqst *, __be32 *, struct nlm_args *);
-int	nlmsvc_decode_reboot(struct svc_rqst *, __be32 *, struct nlm_reboot *);
+int	nlmsvc_decode_notify(struct svc_rqst *, __be32 *);
+int	nlmsvc_decode_reboot(struct svc_rqst *, __be32 *);
 /*
 int	nlmclt_encode_testargs(struct rpc_rqst *, u32 *, struct nlm_args *);
 int	nlmclt_encode_lockargs(struct rpc_rqst *, u32 *, struct nlm_args *);
diff --git a/include/linux/lockd/xdr4.h b/include/linux/lockd/xdr4.h
index e58c88b52ce1..951bbe31fdb8 100644
--- a/include/linux/lockd/xdr4.h
+++ b/include/linux/lockd/xdr4.h
@@ -23,19 +23,19 @@
 
 
-int	nlm4svc_decode_testargs(struct svc_rqst *, __be32 *, struct nlm_args *);
+int	nlm4svc_decode_testargs(struct svc_rqst *, __be32 *);
 int	nlm4svc_encode_testres(struct svc_rqst *, __be32 *, struct nlm_res *);
-int	nlm4svc_decode_lockargs(struct svc_rqst *, __be32 *, struct nlm_args *);
-int	nlm4svc_decode_cancargs(struct svc_rqst *, __be32 *, struct nlm_args *);
-int	nlm4svc_decode_unlockargs(struct svc_rqst *, __be32 *, struct nlm_args *);
+int	nlm4svc_decode_lockargs(struct svc_rqst *, __be32 *);
+int	nlm4svc_decode_cancargs(struct svc_rqst *, __be32 *);
+int	nlm4svc_decode_unlockargs(struct svc_rqst *, __be32 *);
 int	nlm4svc_encode_res(struct svc_rqst *, __be32 *, struct nlm_res *);
-int	nlm4svc_decode_res(struct svc_rqst *, __be32 *, struct nlm_res *);
+int	nlm4svc_decode_res(struct svc_rqst *, __be32 *);
 int	nlm4svc_encode_void(struct svc_rqst *, __be32 *, void *);
-int	nlm4svc_decode_void(struct svc_rqst *, __be32 *, void *);
-int	nlm4svc_decode_shareargs(struct svc_rqst *, __be32 *, struct nlm_args *);
+int	nlm4svc_decode_void(struct svc_rqst *, __be32 *);
+int	nlm4svc_decode_shareargs(struct svc_rqst *, __be32 *);
 int	nlm4svc_encode_shareres(struct svc_rqst *, __be32 *, struct nlm_res *);
-int	nlm4svc_decode_notify(struct svc_rqst *, __be32 *, struct nlm_args *);
-int	nlm4svc_decode_reboot(struct svc_rqst *, __be32 *, struct nlm_reboot *);
+int	nlm4svc_decode_notify(struct svc_rqst *, __be32 *);
+int	nlm4svc_decode_reboot(struct svc_rqst *, __be32 *);
 /*
 int	nlmclt_encode_testargs(struct rpc_rqst *, u32 *, struct nlm_args *);
 int	nlmclt_encode_lockargs(struct rpc_rqst *, u32 *, struct nlm_args *);
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index c73194e9c2bd..d8703a5ab81e 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -422,7 +422,8 @@ struct svc_version {
 struct svc_procedure {
 	/* process the request: */
 	__be32			(*pc_func)(struct svc_rqst *);
-	kxdrproc_t		pc_decode;	/* XDR decode args */
+	/* XDR decode args: */
+	int			(*pc_decode)(struct svc_rqst *, __be32 *data);
 	kxdrproc_t		pc_encode;	/* XDR encode result */
 	/* XDR free result: */
 	void			(*pc_release)(struct svc_rqst *);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 4611cb7adc04..18024c1b9b7b 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1276,9 +1276,12 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 
 	/* Call the function that processes the request. */
 	if (!versp->vs_dispatch) {
-		/* Decode arguments */
-		xdr = procp->pc_decode;
-		if (xdr && !xdr(rqstp, argv->iov_base, rqstp->rq_argp))
+		/*
+		 * Decode arguments
+		 * XXX: why do we ignore the return value?
+		 */
+		if (procp->pc_decode &&
+		    !procp->pc_decode(rqstp, argv->iov_base))
 			goto err_garbage;
 
 		*statp = procp->pc_func(rqstp);
-- 
cgit v1.2.3


From d16d1867215663907f3212590d1a9d32398a0f47 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 19:42:02 +0200
Subject: sunrpc: properly type pc_encode callbacks

Drop the resp argument as it can trivially be derived from the rqstp
argument.  With that all functions now have the same prototype, and we
can remove the unsafe casting to kxdrproc_t.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/lockd/svc4proc.c        |  2 +-
 fs/lockd/svcproc.c         |  2 +-
 fs/lockd/xdr.c             | 14 ++++++---
 fs/lockd/xdr4.c            | 14 ++++++---
 fs/nfs/callback_xdr.c      |  6 ++--
 fs/nfsd/nfs2acl.c          | 18 ++++++-----
 fs/nfsd/nfs3acl.c          | 11 +++----
 fs/nfsd/nfs3proc.c         | 44 +++++++++++++--------------
 fs/nfsd/nfs3xdr.c          | 74 +++++++++++++++++++++++++++-------------------
 fs/nfsd/nfs4proc.c         |  4 +--
 fs/nfsd/nfs4xdr.c          |  5 ++--
 fs/nfsd/nfsproc.c          | 36 +++++++++++-----------
 fs/nfsd/nfssvc.c           |  5 +---
 fs/nfsd/nfsxdr.c           | 31 +++++++++++--------
 fs/nfsd/xdr.h              | 14 ++++-----
 fs/nfsd/xdr3.h             | 45 ++++++++++------------------
 fs/nfsd/xdr4.h             |  5 ++--
 include/linux/lockd/xdr.h  |  8 ++---
 include/linux/lockd/xdr4.h |  8 ++---
 include/linux/sunrpc/svc.h |  3 +-
 net/sunrpc/svc.c           |  6 ++--
 21 files changed, 185 insertions(+), 170 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 804744f7528c..fed016155791 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -497,7 +497,7 @@ struct nlm_void			{ int dummy; };
 #define PROC(name, xargt, xrest, argt, rest, respsize)	\
  { .pc_func	= nlm4svc_proc_##name,	\
    .pc_decode	= nlm4svc_decode_##xargt,	\
-   .pc_encode	= (kxdrproc_t) nlm4svc_encode_##xrest,	\
+   .pc_encode	= nlm4svc_encode_##xrest,	\
    .pc_release	= NULL,					\
    .pc_argsize	= sizeof(struct nlm_##argt),		\
    .pc_ressize	= sizeof(struct nlm_##rest),		\
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 204a698f7d41..14648b051eba 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -539,7 +539,7 @@ struct nlm_void			{ int dummy; };
 #define PROC(name, xargt, xrest, argt, rest, respsize)	\
  { .pc_func	= nlmsvc_proc_##name,			\
    .pc_decode	= nlmsvc_decode_##xargt,		\
-   .pc_encode	= (kxdrproc_t) nlmsvc_encode_##xrest,	\
+   .pc_encode	= nlmsvc_encode_##xrest,		\
    .pc_release	= NULL,					\
    .pc_argsize	= sizeof(struct nlm_##argt),		\
    .pc_ressize	= sizeof(struct nlm_##rest),		\
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index b57af63fba56..442bbd0b0b29 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -200,8 +200,10 @@ nlmsvc_decode_testargs(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nlmsvc_encode_testres(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
+nlmsvc_encode_testres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_res *resp = rqstp->rq_resp;
+
 	if (!(p = nlm_encode_testres(p, resp)))
 		return 0;
 	return xdr_ressize_check(rqstp, p);
@@ -280,8 +282,10 @@ nlmsvc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nlmsvc_encode_shareres(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
+nlmsvc_encode_shareres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_res *resp = rqstp->rq_resp;
+
 	if (!(p = nlm_encode_cookie(p, &resp->cookie)))
 		return 0;
 	*p++ = resp->status;
@@ -290,8 +294,10 @@ nlmsvc_encode_shareres(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
 }
 
 int
-nlmsvc_encode_res(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
+nlmsvc_encode_res(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_res *resp = rqstp->rq_resp;
+
 	if (!(p = nlm_encode_cookie(p, &resp->cookie)))
 		return 0;
 	*p++ = resp->status;
@@ -342,7 +348,7 @@ nlmsvc_decode_void(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nlmsvc_encode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
+nlmsvc_encode_void(struct svc_rqst *rqstp, __be32 *p)
 {
 	return xdr_ressize_check(rqstp, p);
 }
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
index 46e18598a15c..2a0cd5679c49 100644
--- a/fs/lockd/xdr4.c
+++ b/fs/lockd/xdr4.c
@@ -197,8 +197,10 @@ nlm4svc_decode_testargs(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nlm4svc_encode_testres(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
+nlm4svc_encode_testres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_res *resp = rqstp->rq_resp;
+
 	if (!(p = nlm4_encode_testres(p, resp)))
 		return 0;
 	return xdr_ressize_check(rqstp, p);
@@ -277,8 +279,10 @@ nlm4svc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nlm4svc_encode_shareres(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
+nlm4svc_encode_shareres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_res *resp = rqstp->rq_resp;
+
 	if (!(p = nlm4_encode_cookie(p, &resp->cookie)))
 		return 0;
 	*p++ = resp->status;
@@ -287,8 +291,10 @@ nlm4svc_encode_shareres(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
 }
 
 int
-nlm4svc_encode_res(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
+nlm4svc_encode_res(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_res *resp = rqstp->rq_resp;
+
 	if (!(p = nlm4_encode_cookie(p, &resp->cookie)))
 		return 0;
 	*p++ = resp->status;
@@ -339,7 +345,7 @@ nlm4svc_decode_void(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nlm4svc_encode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
+nlm4svc_encode_void(struct svc_rqst *rqstp, __be32 *p)
 {
 	return xdr_ressize_check(rqstp, p);
 }
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 23ecbf7a40c1..acf75dc63e14 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -63,7 +63,7 @@ static int nfs4_decode_void(struct svc_rqst *rqstp, __be32 *p)
 	return xdr_argsize_check(rqstp, p);
 }
 
-static int nfs4_encode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
+static int nfs4_encode_void(struct svc_rqst *rqstp, __be32 *p)
 {
 	return xdr_ressize_check(rqstp, p);
 }
@@ -999,12 +999,12 @@ static struct svc_procedure nfs4_callback_procedures1[] = {
 	[CB_NULL] = {
 		.pc_func = nfs4_callback_null,
 		.pc_decode = nfs4_decode_void,
-		.pc_encode = (kxdrproc_t)nfs4_encode_void,
+		.pc_encode = nfs4_encode_void,
 		.pc_xdrressize = 1,
 	},
 	[CB_COMPOUND] = {
 		.pc_func = nfs4_callback_compound,
-		.pc_encode = (kxdrproc_t)nfs4_encode_void,
+		.pc_encode = nfs4_encode_void,
 		.pc_argsize = 256,
 		.pc_ressize = 256,
 		.pc_xdrressize = NFS4_CALLBACK_BUFSIZE,
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index bcfdaa83ee6c..fc6b179c8fff 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -251,15 +251,15 @@ static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p)
  * There must be an encoding function for void results so svc_process
  * will work properly.
  */
-static int nfsaclsvc_encode_voidres(struct svc_rqst *rqstp, __be32 *p, void *dummy)
+static int nfsaclsvc_encode_voidres(struct svc_rqst *rqstp, __be32 *p)
 {
 	return xdr_ressize_check(rqstp, p);
 }
 
 /* GETACL */
-static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd3_getaclres *resp)
+static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_getaclres *resp = rqstp->rq_resp;
 	struct dentry *dentry = resp->fh.fh_dentry;
 	struct inode *inode;
 	struct kvec *head = rqstp->rq_res.head;
@@ -302,17 +302,19 @@ static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p,
 	return (n > 0);
 }
 
-static int nfsaclsvc_encode_attrstatres(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd_attrstat *resp)
+static int nfsaclsvc_encode_attrstatres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_attrstat *resp = rqstp->rq_resp;
+
 	p = nfs2svc_encode_fattr(rqstp, p, &resp->fh, &resp->stat);
 	return xdr_ressize_check(rqstp, p);
 }
 
 /* ACCESS */
-static int nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd3_accessres *resp)
+static int nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_accessres *resp = rqstp->rq_resp;
+
 	p = nfs2svc_encode_fattr(rqstp, p, &resp->fh, &resp->stat);
 	*p++ = htonl(resp->access);
 	return xdr_ressize_check(rqstp, p);
@@ -355,7 +357,7 @@ struct nfsd3_voidargs { int dummy; };
 {									\
 	.pc_func	= nfsacld_proc_##name,				\
 	.pc_decode	= nfsaclsvc_decode_##argt##args,		\
-	.pc_encode	= (kxdrproc_t) nfsaclsvc_encode_##rest##res,	\
+	.pc_encode	= nfsaclsvc_encode_##rest##res,			\
 	.pc_release	= nfsaclsvc_release_##relt,	\
 	.pc_argsize	= sizeof(struct nfsd3_##argt##args),		\
 	.pc_ressize	= sizeof(struct nfsd3_##rest##res),		\
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 4e68d6b5f409..9437b758cbfd 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -168,9 +168,9 @@ static int nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p)
  */
 
 /* GETACL */
-static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd3_getaclres *resp)
+static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_getaclres *resp = rqstp->rq_resp;
 	struct dentry *dentry = resp->fh.fh_dentry;
 
 	p = nfs3svc_encode_post_op_attr(rqstp, p, &resp->fh);
@@ -213,9 +213,10 @@ static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p,
 }
 
 /* SETACL */
-static int nfs3svc_encode_setaclres(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd3_attrstat *resp)
+static int nfs3svc_encode_setaclres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_attrstat *resp = rqstp->rq_resp;
+
 	p = nfs3svc_encode_post_op_attr(rqstp, p, &resp->fh);
 
 	return xdr_ressize_check(rqstp, p);
@@ -243,7 +244,7 @@ struct nfsd3_voidargs { int dummy; };
 {									\
 	.pc_func	= nfsd3_proc_##name,				\
 	.pc_decode	= nfs3svc_decode_##argt##args,			\
-	.pc_encode	= (kxdrproc_t) nfs3svc_encode_##rest##res,	\
+	.pc_encode	= nfs3svc_encode_##rest##res,			\
 	.pc_release	= nfs3svc_release_##relt,			\
 	.pc_argsize	= sizeof(struct nfsd3_##argt##args),		\
 	.pc_ressize	= sizeof(struct nfsd3_##rest##res),		\
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index ed83e8a9e7b4..17c90c41a3a6 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -677,7 +677,7 @@ struct nfsd3_voidargs { int dummy; };
 static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_NULL] = {
 		.pc_func = nfsd3_proc_null,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_voidres,
+		.pc_encode = nfs3svc_encode_voidres,
 		.pc_argsize = sizeof(struct nfsd3_voidargs),
 		.pc_ressize = sizeof(struct nfsd3_voidres),
 		.pc_cachetype = RC_NOCACHE,
@@ -686,7 +686,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_GETATTR] = {
 		.pc_func = nfsd3_proc_getattr,
 		.pc_decode = nfs3svc_decode_fhandleargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_attrstatres,
+		.pc_encode = nfs3svc_encode_attrstatres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
 		.pc_ressize = sizeof(struct nfsd3_attrstatres),
@@ -696,7 +696,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_SETATTR] = {
 		.pc_func = nfsd3_proc_setattr,
 		.pc_decode = nfs3svc_decode_sattrargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
+		.pc_encode = nfs3svc_encode_wccstatres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_sattrargs),
 		.pc_ressize = sizeof(struct nfsd3_wccstatres),
@@ -706,7 +706,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_LOOKUP] = {
 		.pc_func = nfsd3_proc_lookup,
 		.pc_decode = nfs3svc_decode_diropargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_diropres,
+		.pc_encode = nfs3svc_encode_diropres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_diropargs),
 		.pc_ressize = sizeof(struct nfsd3_diropres),
@@ -716,7 +716,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_ACCESS] = {
 		.pc_func = nfsd3_proc_access,
 		.pc_decode = nfs3svc_decode_accessargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_accessres,
+		.pc_encode = nfs3svc_encode_accessres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_accessargs),
 		.pc_ressize = sizeof(struct nfsd3_accessres),
@@ -726,7 +726,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_READLINK] = {
 		.pc_func = nfsd3_proc_readlink,
 		.pc_decode = nfs3svc_decode_readlinkargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_readlinkres,
+		.pc_encode = nfs3svc_encode_readlinkres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_readlinkargs),
 		.pc_ressize = sizeof(struct nfsd3_readlinkres),
@@ -736,7 +736,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_READ] = {
 		.pc_func = nfsd3_proc_read,
 		.pc_decode = nfs3svc_decode_readargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_readres,
+		.pc_encode = nfs3svc_encode_readres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_readargs),
 		.pc_ressize = sizeof(struct nfsd3_readres),
@@ -746,7 +746,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_WRITE] = {
 		.pc_func = nfsd3_proc_write,
 		.pc_decode = nfs3svc_decode_writeargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_writeres,
+		.pc_encode = nfs3svc_encode_writeres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_writeargs),
 		.pc_ressize = sizeof(struct nfsd3_writeres),
@@ -756,7 +756,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_CREATE] = {
 		.pc_func = nfsd3_proc_create,
 		.pc_decode = nfs3svc_decode_createargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
+		.pc_encode = nfs3svc_encode_createres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_createargs),
 		.pc_ressize = sizeof(struct nfsd3_createres),
@@ -766,7 +766,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_MKDIR] = {
 		.pc_func = nfsd3_proc_mkdir,
 		.pc_decode = nfs3svc_decode_mkdirargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
+		.pc_encode = nfs3svc_encode_createres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_mkdirargs),
 		.pc_ressize = sizeof(struct nfsd3_createres),
@@ -776,7 +776,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_SYMLINK] = {
 		.pc_func = nfsd3_proc_symlink,
 		.pc_decode = nfs3svc_decode_symlinkargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
+		.pc_encode = nfs3svc_encode_createres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_symlinkargs),
 		.pc_ressize = sizeof(struct nfsd3_createres),
@@ -786,7 +786,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_MKNOD] = {
 		.pc_func = nfsd3_proc_mknod,
 		.pc_decode = nfs3svc_decode_mknodargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
+		.pc_encode = nfs3svc_encode_createres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_mknodargs),
 		.pc_ressize = sizeof(struct nfsd3_createres),
@@ -796,7 +796,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_REMOVE] = {
 		.pc_func = nfsd3_proc_remove,
 		.pc_decode = nfs3svc_decode_diropargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
+		.pc_encode = nfs3svc_encode_wccstatres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_diropargs),
 		.pc_ressize = sizeof(struct nfsd3_wccstatres),
@@ -806,7 +806,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_RMDIR] = {
 		.pc_func = nfsd3_proc_rmdir,
 		.pc_decode = nfs3svc_decode_diropargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
+		.pc_encode = nfs3svc_encode_wccstatres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_diropargs),
 		.pc_ressize = sizeof(struct nfsd3_wccstatres),
@@ -816,7 +816,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_RENAME] = {
 		.pc_func = nfsd3_proc_rename,
 		.pc_decode = nfs3svc_decode_renameargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_renameres,
+		.pc_encode = nfs3svc_encode_renameres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_renameargs),
 		.pc_ressize = sizeof(struct nfsd3_renameres),
@@ -826,7 +826,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_LINK] = {
 		.pc_func = nfsd3_proc_link,
 		.pc_decode = nfs3svc_decode_linkargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_linkres,
+		.pc_encode = nfs3svc_encode_linkres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_linkargs),
 		.pc_ressize = sizeof(struct nfsd3_linkres),
@@ -836,7 +836,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_READDIR] = {
 		.pc_func = nfsd3_proc_readdir,
 		.pc_decode = nfs3svc_decode_readdirargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_readdirres,
+		.pc_encode = nfs3svc_encode_readdirres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_readdirargs),
 		.pc_ressize = sizeof(struct nfsd3_readdirres),
@@ -845,7 +845,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_READDIRPLUS] = {
 		.pc_func = nfsd3_proc_readdirplus,
 		.pc_decode = nfs3svc_decode_readdirplusargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_readdirres,
+		.pc_encode = nfs3svc_encode_readdirres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_readdirplusargs),
 		.pc_ressize = sizeof(struct nfsd3_readdirres),
@@ -854,7 +854,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_FSSTAT] = {
 		.pc_func = nfsd3_proc_fsstat,
 		.pc_decode = nfs3svc_decode_fhandleargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_fsstatres,
+		.pc_encode = nfs3svc_encode_fsstatres,
 		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
 		.pc_ressize = sizeof(struct nfsd3_fsstatres),
 		.pc_cachetype = RC_NOCACHE,
@@ -863,7 +863,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_FSINFO] = {
 		.pc_func = nfsd3_proc_fsinfo,
 		.pc_decode = nfs3svc_decode_fhandleargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_fsinfores,
+		.pc_encode = nfs3svc_encode_fsinfores,
 		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
 		.pc_ressize = sizeof(struct nfsd3_fsinfores),
 		.pc_cachetype = RC_NOCACHE,
@@ -872,7 +872,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_PATHCONF] = {
 		.pc_func = nfsd3_proc_pathconf,
 		.pc_decode = nfs3svc_decode_fhandleargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_pathconfres,
+		.pc_encode = nfs3svc_encode_pathconfres,
 		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
 		.pc_ressize = sizeof(struct nfsd3_pathconfres),
 		.pc_cachetype = RC_NOCACHE,
@@ -881,7 +881,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_COMMIT] = {
 		.pc_func = nfsd3_proc_commit,
 		.pc_decode = nfs3svc_decode_commitargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_commitres,
+		.pc_encode = nfs3svc_encode_commitres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_commitargs),
 		.pc_ressize = sizeof(struct nfsd3_commitres),
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index c56089ac2819..b8838d3023ff 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -634,16 +634,17 @@ nfs3svc_decode_commitargs(struct svc_rqst *rqstp, __be32 *p)
  * will work properly.
  */
 int
-nfs3svc_encode_voidres(struct svc_rqst *rqstp, __be32 *p, void *dummy)
+nfs3svc_encode_voidres(struct svc_rqst *rqstp, __be32 *p)
 {
 	return xdr_ressize_check(rqstp, p);
 }
 
 /* GETATTR */
 int
-nfs3svc_encode_attrstat(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_attrstat *resp)
+nfs3svc_encode_attrstat(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_attrstat *resp = rqstp->rq_resp;
+
 	if (resp->status == 0) {
 		lease_get_mtime(d_inode(resp->fh.fh_dentry),
 				&resp->stat.mtime);
@@ -654,18 +655,20 @@ nfs3svc_encode_attrstat(struct svc_rqst *rqstp, __be32 *p,
 
 /* SETATTR, REMOVE, RMDIR */
 int
-nfs3svc_encode_wccstat(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_attrstat *resp)
+nfs3svc_encode_wccstat(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_attrstat *resp = rqstp->rq_resp;
+
 	p = encode_wcc_data(rqstp, p, &resp->fh);
 	return xdr_ressize_check(rqstp, p);
 }
 
 /* LOOKUP */
 int
-nfs3svc_encode_diropres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_diropres *resp)
+nfs3svc_encode_diropres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_diropres *resp = rqstp->rq_resp;
+
 	if (resp->status == 0) {
 		p = encode_fh(p, &resp->fh);
 		p = encode_post_op_attr(rqstp, p, &resp->fh);
@@ -676,9 +679,10 @@ nfs3svc_encode_diropres(struct svc_rqst *rqstp, __be32 *p,
 
 /* ACCESS */
 int
-nfs3svc_encode_accessres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_accessres *resp)
+nfs3svc_encode_accessres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_accessres *resp = rqstp->rq_resp;
+
 	p = encode_post_op_attr(rqstp, p, &resp->fh);
 	if (resp->status == 0)
 		*p++ = htonl(resp->access);
@@ -687,9 +691,10 @@ nfs3svc_encode_accessres(struct svc_rqst *rqstp, __be32 *p,
 
 /* READLINK */
 int
-nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_readlinkres *resp)
+nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_readlinkres *resp = rqstp->rq_resp;
+
 	p = encode_post_op_attr(rqstp, p, &resp->fh);
 	if (resp->status == 0) {
 		*p++ = htonl(resp->len);
@@ -708,9 +713,10 @@ nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p,
 
 /* READ */
 int
-nfs3svc_encode_readres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_readres *resp)
+nfs3svc_encode_readres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_readres *resp = rqstp->rq_resp;
+
 	p = encode_post_op_attr(rqstp, p, &resp->fh);
 	if (resp->status == 0) {
 		*p++ = htonl(resp->count);
@@ -732,9 +738,9 @@ nfs3svc_encode_readres(struct svc_rqst *rqstp, __be32 *p,
 
 /* WRITE */
 int
-nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_writeres *resp)
+nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_writeres *resp = rqstp->rq_resp;
 	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 
 	p = encode_wcc_data(rqstp, p, &resp->fh);
@@ -749,9 +755,10 @@ nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p,
 
 /* CREATE, MKDIR, SYMLINK, MKNOD */
 int
-nfs3svc_encode_createres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_diropres *resp)
+nfs3svc_encode_createres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_diropres *resp = rqstp->rq_resp;
+
 	if (resp->status == 0) {
 		*p++ = xdr_one;
 		p = encode_fh(p, &resp->fh);
@@ -763,9 +770,10 @@ nfs3svc_encode_createres(struct svc_rqst *rqstp, __be32 *p,
 
 /* RENAME */
 int
-nfs3svc_encode_renameres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_renameres *resp)
+nfs3svc_encode_renameres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_renameres *resp = rqstp->rq_resp;
+
 	p = encode_wcc_data(rqstp, p, &resp->ffh);
 	p = encode_wcc_data(rqstp, p, &resp->tfh);
 	return xdr_ressize_check(rqstp, p);
@@ -773,9 +781,10 @@ nfs3svc_encode_renameres(struct svc_rqst *rqstp, __be32 *p,
 
 /* LINK */
 int
-nfs3svc_encode_linkres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_linkres *resp)
+nfs3svc_encode_linkres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_linkres *resp = rqstp->rq_resp;
+
 	p = encode_post_op_attr(rqstp, p, &resp->fh);
 	p = encode_wcc_data(rqstp, p, &resp->tfh);
 	return xdr_ressize_check(rqstp, p);
@@ -783,9 +792,10 @@ nfs3svc_encode_linkres(struct svc_rqst *rqstp, __be32 *p,
 
 /* READDIR */
 int
-nfs3svc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_readdirres *resp)
+nfs3svc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_readdirres *resp = rqstp->rq_resp;
+
 	p = encode_post_op_attr(rqstp, p, &resp->fh);
 
 	if (resp->status == 0) {
@@ -1033,9 +1043,9 @@ nfs3svc_encode_entry_plus(void *cd, const char *name,
 
 /* FSSTAT */
 int
-nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_fsstatres *resp)
+nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_fsstatres *resp = rqstp->rq_resp;
 	struct kstatfs	*s = &resp->stats;
 	u64		bs = s->f_bsize;
 
@@ -1055,9 +1065,10 @@ nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, __be32 *p,
 
 /* FSINFO */
 int
-nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_fsinfores *resp)
+nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_fsinfores *resp = rqstp->rq_resp;
+
 	*p++ = xdr_zero;	/* no post_op_attr */
 
 	if (resp->status == 0) {
@@ -1079,9 +1090,10 @@ nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, __be32 *p,
 
 /* PATHCONF */
 int
-nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_pathconfres *resp)
+nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_pathconfres *resp = rqstp->rq_resp;
+
 	*p++ = xdr_zero;	/* no post_op_attr */
 
 	if (resp->status == 0) {
@@ -1098,9 +1110,9 @@ nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, __be32 *p,
 
 /* COMMIT */
 int
-nfs3svc_encode_commitres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_commitres *resp)
+nfs3svc_encode_commitres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_commitres *resp = rqstp->rq_resp;
 	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 
 	p = encode_wcc_data(rqstp, p, &resp->fh);
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 19c7d4b989c4..726d28376f7b 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -2519,7 +2519,7 @@ struct nfsd4_voidargs { int dummy; };
 static struct svc_procedure		nfsd_procedures4[2] = {
 	[NFSPROC4_NULL] = {
 		.pc_func = nfsd4_proc_null,
-		.pc_encode = (kxdrproc_t) nfs4svc_encode_voidres,
+		.pc_encode = nfs4svc_encode_voidres,
 		.pc_argsize = sizeof(struct nfsd4_voidargs),
 		.pc_ressize = sizeof(struct nfsd4_voidres),
 		.pc_cachetype = RC_NOCACHE,
@@ -2528,7 +2528,7 @@ static struct svc_procedure		nfsd_procedures4[2] = {
 	[NFSPROC4_COMPOUND] = {
 		.pc_func = nfsd4_proc_compound,
 		.pc_decode = nfs4svc_decode_compoundargs,
-		.pc_encode = (kxdrproc_t) nfs4svc_encode_compoundres,
+		.pc_encode = nfs4svc_encode_compoundres,
 		.pc_argsize = sizeof(struct nfsd4_compoundargs),
 		.pc_ressize = sizeof(struct nfsd4_compoundres),
 		.pc_release = nfsd4_release_compoundargs,
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 3a7e117bd11e..54e212e3541e 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -4538,7 +4538,7 @@ nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op)
 }
 
 int
-nfs4svc_encode_voidres(struct svc_rqst *rqstp, __be32 *p, void *dummy)
+nfs4svc_encode_voidres(struct svc_rqst *rqstp, __be32 *p)
 {
         return xdr_ressize_check(rqstp, p);
 }
@@ -4584,11 +4584,12 @@ nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compoundres *resp)
+nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p)
 {
 	/*
 	 * All that remains is to write the tag and operation count...
 	 */
+	struct nfsd4_compoundres *resp = rqstp->rq_resp;
 	struct xdr_buf *buf = resp->xdr.buf;
 
 	WARN_ON_ONCE(buf->len != buf->head[0].iov_len + buf->page_len +
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index d351d0ef6d34..0ef88d0e67d9 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -577,7 +577,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_NULL] = {
 		.pc_func = nfsd_proc_null,
 		.pc_decode = nfssvc_decode_void,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
+		.pc_encode = nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_void),
 		.pc_ressize = sizeof(struct nfsd_void),
 		.pc_cachetype = RC_NOCACHE,
@@ -586,7 +586,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_GETATTR] = {
 		.pc_func = nfsd_proc_getattr,
 		.pc_decode = nfssvc_decode_fhandle,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_attrstat,
+		.pc_encode = nfssvc_encode_attrstat,
 		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_fhandle),
 		.pc_ressize = sizeof(struct nfsd_attrstat),
@@ -596,7 +596,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_SETATTR] = {
 		.pc_func = nfsd_proc_setattr,
 		.pc_decode = nfssvc_decode_sattrargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_attrstat,
+		.pc_encode = nfssvc_encode_attrstat,
 		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_sattrargs),
 		.pc_ressize = sizeof(struct nfsd_attrstat),
@@ -605,7 +605,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_ROOT] = {
 		.pc_decode = nfssvc_decode_void,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
+		.pc_encode = nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_void),
 		.pc_ressize = sizeof(struct nfsd_void),
 		.pc_cachetype = RC_NOCACHE,
@@ -614,7 +614,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_LOOKUP] = {
 		.pc_func = nfsd_proc_lookup,
 		.pc_decode = nfssvc_decode_diropargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_diropres,
+		.pc_encode = nfssvc_encode_diropres,
 		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_diropargs),
 		.pc_ressize = sizeof(struct nfsd_diropres),
@@ -624,7 +624,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_READLINK] = {
 		.pc_func = nfsd_proc_readlink,
 		.pc_decode = nfssvc_decode_readlinkargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_readlinkres,
+		.pc_encode = nfssvc_encode_readlinkres,
 		.pc_argsize = sizeof(struct nfsd_readlinkargs),
 		.pc_ressize = sizeof(struct nfsd_readlinkres),
 		.pc_cachetype = RC_NOCACHE,
@@ -633,7 +633,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_READ] = {
 		.pc_func = nfsd_proc_read,
 		.pc_decode = nfssvc_decode_readargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_readres,
+		.pc_encode = nfssvc_encode_readres,
 		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_readargs),
 		.pc_ressize = sizeof(struct nfsd_readres),
@@ -642,7 +642,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_WRITECACHE] = {
 		.pc_decode = nfssvc_decode_void,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
+		.pc_encode = nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_void),
 		.pc_ressize = sizeof(struct nfsd_void),
 		.pc_cachetype = RC_NOCACHE,
@@ -651,7 +651,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_WRITE] = {
 		.pc_func = nfsd_proc_write,
 		.pc_decode = nfssvc_decode_writeargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_attrstat,
+		.pc_encode = nfssvc_encode_attrstat,
 		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_writeargs),
 		.pc_ressize = sizeof(struct nfsd_attrstat),
@@ -661,7 +661,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_CREATE] = {
 		.pc_func = nfsd_proc_create,
 		.pc_decode = nfssvc_decode_createargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_diropres,
+		.pc_encode = nfssvc_encode_diropres,
 		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_createargs),
 		.pc_ressize = sizeof(struct nfsd_diropres),
@@ -671,7 +671,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_REMOVE] = {
 		.pc_func = nfsd_proc_remove,
 		.pc_decode = nfssvc_decode_diropargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
+		.pc_encode = nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_diropargs),
 		.pc_ressize = sizeof(struct nfsd_void),
 		.pc_cachetype = RC_REPLSTAT,
@@ -680,7 +680,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_RENAME] = {
 		.pc_func = nfsd_proc_rename,
 		.pc_decode = nfssvc_decode_renameargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
+		.pc_encode = nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_renameargs),
 		.pc_ressize = sizeof(struct nfsd_void),
 		.pc_cachetype = RC_REPLSTAT,
@@ -689,7 +689,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_LINK] = {
 		.pc_func = nfsd_proc_link,
 		.pc_decode = nfssvc_decode_linkargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
+		.pc_encode = nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_linkargs),
 		.pc_ressize = sizeof(struct nfsd_void),
 		.pc_cachetype = RC_REPLSTAT,
@@ -698,7 +698,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_SYMLINK] = {
 		.pc_func = nfsd_proc_symlink,
 		.pc_decode = nfssvc_decode_symlinkargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
+		.pc_encode = nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_symlinkargs),
 		.pc_ressize = sizeof(struct nfsd_void),
 		.pc_cachetype = RC_REPLSTAT,
@@ -707,7 +707,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_MKDIR] = {
 		.pc_func = nfsd_proc_mkdir,
 		.pc_decode = nfssvc_decode_createargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_diropres,
+		.pc_encode = nfssvc_encode_diropres,
 		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_createargs),
 		.pc_ressize = sizeof(struct nfsd_diropres),
@@ -717,7 +717,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_RMDIR] = {
 		.pc_func = nfsd_proc_rmdir,
 		.pc_decode = nfssvc_decode_diropargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
+		.pc_encode = nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_diropargs),
 		.pc_ressize = sizeof(struct nfsd_void),
 		.pc_cachetype = RC_REPLSTAT,
@@ -726,7 +726,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_READDIR] = {
 		.pc_func = nfsd_proc_readdir,
 		.pc_decode = nfssvc_decode_readdirargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_readdirres,
+		.pc_encode = nfssvc_encode_readdirres,
 		.pc_argsize = sizeof(struct nfsd_readdirargs),
 		.pc_ressize = sizeof(struct nfsd_readdirres),
 		.pc_cachetype = RC_NOCACHE,
@@ -734,7 +734,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_STATFS] = {
 		.pc_func = nfsd_proc_statfs,
 		.pc_decode = nfssvc_decode_fhandle,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_statfsres,
+		.pc_encode = nfssvc_encode_statfsres,
 		.pc_argsize = sizeof(struct nfsd_fhandle),
 		.pc_ressize = sizeof(struct nfsd_statfsres),
 		.pc_cachetype = RC_NOCACHE,
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 3e00499d7ad7..555233664124 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -782,7 +782,6 @@ int
 nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
 {
 	struct svc_procedure	*proc;
-	kxdrproc_t		xdr;
 	__be32			nfserr;
 	__be32			*nfserrp;
 
@@ -841,9 +840,7 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
 	 * For NFSv2, additional info is never returned in case of an error.
 	 */
 	if (!(nfserr && rqstp->rq_vers == 2)) {
-		xdr = proc->pc_encode;
-		if (xdr && !xdr(rqstp, nfserrp,
-				rqstp->rq_resp)) {
+		if (proc->pc_encode && !proc->pc_encode(rqstp, nfserrp)) {
 			/* Failed to encode result. Release cache entry */
 			dprintk("nfsd: failed to encode result!\n");
 			nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 19cf04ebf388..e4da2717982d 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -422,32 +422,35 @@ nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p)
  * XDR encode functions
  */
 int
-nfssvc_encode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
+nfssvc_encode_void(struct svc_rqst *rqstp, __be32 *p)
 {
 	return xdr_ressize_check(rqstp, p);
 }
 
 int
-nfssvc_encode_attrstat(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_attrstat *resp)
+nfssvc_encode_attrstat(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_attrstat *resp = rqstp->rq_resp;
+
 	p = encode_fattr(rqstp, p, &resp->fh, &resp->stat);
 	return xdr_ressize_check(rqstp, p);
 }
 
 int
-nfssvc_encode_diropres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_diropres *resp)
+nfssvc_encode_diropres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_diropres *resp = rqstp->rq_resp;
+
 	p = encode_fh(p, &resp->fh);
 	p = encode_fattr(rqstp, p, &resp->fh, &resp->stat);
 	return xdr_ressize_check(rqstp, p);
 }
 
 int
-nfssvc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_readlinkres *resp)
+nfssvc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_readlinkres *resp = rqstp->rq_resp;
+
 	*p++ = htonl(resp->len);
 	xdr_ressize_check(rqstp, p);
 	rqstp->rq_res.page_len = resp->len;
@@ -461,9 +464,10 @@ nfssvc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfssvc_encode_readres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_readres *resp)
+nfssvc_encode_readres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_readres *resp = rqstp->rq_resp;
+
 	p = encode_fattr(rqstp, p, &resp->fh, &resp->stat);
 	*p++ = htonl(resp->count);
 	xdr_ressize_check(rqstp, p);
@@ -480,9 +484,10 @@ nfssvc_encode_readres(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfssvc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_readdirres *resp)
+nfssvc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_readdirres *resp = rqstp->rq_resp;
+
 	xdr_ressize_check(rqstp, p);
 	p = resp->buffer;
 	*p++ = 0;			/* no more entries */
@@ -493,9 +498,9 @@ nfssvc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfssvc_encode_statfsres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_statfsres *resp)
+nfssvc_encode_statfsres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_statfsres *resp = rqstp->rq_resp;
 	struct kstatfs	*stat = &resp->stats;
 
 	*p++ = htonl(NFSSVC_MAXBLKSIZE_V2);	/* max transfer size */
diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h
index 8eeb752cf6f8..457ce45e5084 100644
--- a/fs/nfsd/xdr.h
+++ b/fs/nfsd/xdr.h
@@ -143,13 +143,13 @@ int nfssvc_decode_readlinkargs(struct svc_rqst *, __be32 *);
 int nfssvc_decode_linkargs(struct svc_rqst *, __be32 *);
 int nfssvc_decode_symlinkargs(struct svc_rqst *, __be32 *);
 int nfssvc_decode_readdirargs(struct svc_rqst *, __be32 *);
-int nfssvc_encode_void(struct svc_rqst *, __be32 *, void *);
-int nfssvc_encode_attrstat(struct svc_rqst *, __be32 *, struct nfsd_attrstat *);
-int nfssvc_encode_diropres(struct svc_rqst *, __be32 *, struct nfsd_diropres *);
-int nfssvc_encode_readlinkres(struct svc_rqst *, __be32 *, struct nfsd_readlinkres *);
-int nfssvc_encode_readres(struct svc_rqst *, __be32 *, struct nfsd_readres *);
-int nfssvc_encode_statfsres(struct svc_rqst *, __be32 *, struct nfsd_statfsres *);
-int nfssvc_encode_readdirres(struct svc_rqst *, __be32 *, struct nfsd_readdirres *);
+int nfssvc_encode_void(struct svc_rqst *, __be32 *);
+int nfssvc_encode_attrstat(struct svc_rqst *, __be32 *);
+int nfssvc_encode_diropres(struct svc_rqst *, __be32 *);
+int nfssvc_encode_readlinkres(struct svc_rqst *, __be32 *);
+int nfssvc_encode_readres(struct svc_rqst *, __be32 *);
+int nfssvc_encode_statfsres(struct svc_rqst *, __be32 *);
+int nfssvc_encode_readdirres(struct svc_rqst *, __be32 *);
 
 int nfssvc_encode_entry(void *, const char *name,
 			int namlen, loff_t offset, u64 ino, unsigned int);
diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
index f79be4c42e4a..80d7da620e91 100644
--- a/fs/nfsd/xdr3.h
+++ b/fs/nfsd/xdr3.h
@@ -285,35 +285,22 @@ int nfs3svc_decode_symlinkargs(struct svc_rqst *, __be32 *);
 int nfs3svc_decode_readdirargs(struct svc_rqst *, __be32 *);
 int nfs3svc_decode_readdirplusargs(struct svc_rqst *, __be32 *);
 int nfs3svc_decode_commitargs(struct svc_rqst *, __be32 *);
-int nfs3svc_encode_voidres(struct svc_rqst *, __be32 *, void *);
-int nfs3svc_encode_attrstat(struct svc_rqst *, __be32 *,
-				struct nfsd3_attrstat *);
-int nfs3svc_encode_wccstat(struct svc_rqst *, __be32 *,
-				struct nfsd3_attrstat *);
-int nfs3svc_encode_diropres(struct svc_rqst *, __be32 *,
-				struct nfsd3_diropres *);
-int nfs3svc_encode_accessres(struct svc_rqst *, __be32 *,
-				struct nfsd3_accessres *);
-int nfs3svc_encode_readlinkres(struct svc_rqst *, __be32 *,
-				struct nfsd3_readlinkres *);
-int nfs3svc_encode_readres(struct svc_rqst *, __be32 *, struct nfsd3_readres *);
-int nfs3svc_encode_writeres(struct svc_rqst *, __be32 *, struct nfsd3_writeres *);
-int nfs3svc_encode_createres(struct svc_rqst *, __be32 *,
-				struct nfsd3_diropres *);
-int nfs3svc_encode_renameres(struct svc_rqst *, __be32 *,
-				struct nfsd3_renameres *);
-int nfs3svc_encode_linkres(struct svc_rqst *, __be32 *,
-				struct nfsd3_linkres *);
-int nfs3svc_encode_readdirres(struct svc_rqst *, __be32 *,
-				struct nfsd3_readdirres *);
-int nfs3svc_encode_fsstatres(struct svc_rqst *, __be32 *,
-				struct nfsd3_fsstatres *);
-int nfs3svc_encode_fsinfores(struct svc_rqst *, __be32 *,
-				struct nfsd3_fsinfores *);
-int nfs3svc_encode_pathconfres(struct svc_rqst *, __be32 *,
-				struct nfsd3_pathconfres *);
-int nfs3svc_encode_commitres(struct svc_rqst *, __be32 *,
-				struct nfsd3_commitres *);
+int nfs3svc_encode_voidres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_attrstat(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_wccstat(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_diropres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_accessres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_readlinkres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_readres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_writeres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_createres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_renameres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_linkres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_readdirres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_fsstatres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_fsinfores(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_pathconfres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_commitres(struct svc_rqst *, __be32 *);
 
 void nfs3svc_release_fhandle(struct svc_rqst *);
 void nfs3svc_release_fhandle2(struct svc_rqst *);
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 2a53c1233884..eb7f9239304f 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -682,10 +682,9 @@ set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp)
 
 
 bool nfsd4_mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp);
-int nfs4svc_encode_voidres(struct svc_rqst *, __be32 *, void *);
+int nfs4svc_encode_voidres(struct svc_rqst *, __be32 *);
 int nfs4svc_decode_compoundargs(struct svc_rqst *, __be32 *);
-int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *,
-		struct nfsd4_compoundres *);
+int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *);
 __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *, u32);
 void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *);
 void nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op);
diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h
index 0416600844ce..7acbecc21a40 100644
--- a/include/linux/lockd/xdr.h
+++ b/include/linux/lockd/xdr.h
@@ -96,16 +96,16 @@ struct nlm_reboot {
 #define NLMSVC_XDRSIZE		sizeof(struct nlm_args)
 
 int	nlmsvc_decode_testargs(struct svc_rqst *, __be32 *);
-int	nlmsvc_encode_testres(struct svc_rqst *, __be32 *, struct nlm_res *);
+int	nlmsvc_encode_testres(struct svc_rqst *, __be32 *);
 int	nlmsvc_decode_lockargs(struct svc_rqst *, __be32 *);
 int	nlmsvc_decode_cancargs(struct svc_rqst *, __be32 *);
 int	nlmsvc_decode_unlockargs(struct svc_rqst *, __be32 *);
-int	nlmsvc_encode_res(struct svc_rqst *, __be32 *, struct nlm_res *);
+int	nlmsvc_encode_res(struct svc_rqst *, __be32 *);
 int	nlmsvc_decode_res(struct svc_rqst *, __be32 *);
-int	nlmsvc_encode_void(struct svc_rqst *, __be32 *, void *);
+int	nlmsvc_encode_void(struct svc_rqst *, __be32 *);
 int	nlmsvc_decode_void(struct svc_rqst *, __be32 *);
 int	nlmsvc_decode_shareargs(struct svc_rqst *, __be32 *);
-int	nlmsvc_encode_shareres(struct svc_rqst *, __be32 *, struct nlm_res *);
+int	nlmsvc_encode_shareres(struct svc_rqst *, __be32 *);
 int	nlmsvc_decode_notify(struct svc_rqst *, __be32 *);
 int	nlmsvc_decode_reboot(struct svc_rqst *, __be32 *);
 /*
diff --git a/include/linux/lockd/xdr4.h b/include/linux/lockd/xdr4.h
index 951bbe31fdb8..bf1645609225 100644
--- a/include/linux/lockd/xdr4.h
+++ b/include/linux/lockd/xdr4.h
@@ -24,16 +24,16 @@
 
 
 int	nlm4svc_decode_testargs(struct svc_rqst *, __be32 *);
-int	nlm4svc_encode_testres(struct svc_rqst *, __be32 *, struct nlm_res *);
+int	nlm4svc_encode_testres(struct svc_rqst *, __be32 *);
 int	nlm4svc_decode_lockargs(struct svc_rqst *, __be32 *);
 int	nlm4svc_decode_cancargs(struct svc_rqst *, __be32 *);
 int	nlm4svc_decode_unlockargs(struct svc_rqst *, __be32 *);
-int	nlm4svc_encode_res(struct svc_rqst *, __be32 *, struct nlm_res *);
+int	nlm4svc_encode_res(struct svc_rqst *, __be32 *);
 int	nlm4svc_decode_res(struct svc_rqst *, __be32 *);
-int	nlm4svc_encode_void(struct svc_rqst *, __be32 *, void *);
+int	nlm4svc_encode_void(struct svc_rqst *, __be32 *);
 int	nlm4svc_decode_void(struct svc_rqst *, __be32 *);
 int	nlm4svc_decode_shareargs(struct svc_rqst *, __be32 *);
-int	nlm4svc_encode_shareres(struct svc_rqst *, __be32 *, struct nlm_res *);
+int	nlm4svc_encode_shareres(struct svc_rqst *, __be32 *);
 int	nlm4svc_decode_notify(struct svc_rqst *, __be32 *);
 int	nlm4svc_decode_reboot(struct svc_rqst *, __be32 *);
 /*
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index d8703a5ab81e..bd9e313c444a 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -424,7 +424,8 @@ struct svc_procedure {
 	__be32			(*pc_func)(struct svc_rqst *);
 	/* XDR decode args: */
 	int			(*pc_decode)(struct svc_rqst *, __be32 *data);
-	kxdrproc_t		pc_encode;	/* XDR encode result */
+	/* XDR encode result: */
+	int			(*pc_encode)(struct svc_rqst *, __be32 *data);
 	/* XDR free result: */
 	void			(*pc_release)(struct svc_rqst *);
 	unsigned int		pc_argsize;	/* argument struct size */
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 18024c1b9b7b..aa643a29fdc6 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1154,7 +1154,6 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 	struct svc_version	*versp = NULL;	/* compiler food */
 	struct svc_procedure	*procp = NULL;
 	struct svc_serv		*serv = rqstp->rq_server;
-	kxdrproc_t		xdr;
 	__be32			*statp;
 	u32			prog, vers, proc;
 	__be32			auth_stat, rpc_stat;
@@ -1298,9 +1297,8 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 				procp->pc_release(rqstp);
 			goto err_bad_auth;
 		}
-		if (*statp == rpc_success &&
-		    (xdr = procp->pc_encode) &&
-		    !xdr(rqstp, resv->iov_base+resv->iov_len, rqstp->rq_resp)) {
+		if (*statp == rpc_success && procp->pc_encode &&
+		    !procp->pc_encode(rqstp, resv->iov_base + resv->iov_len)) {
 			dprintk("svc: failed to encode reply\n");
 			/* serv->sv_stats->rpcsystemerr++; */
 			*statp = rpc_system_err;
-- 
cgit v1.2.3


From 408b3d46ae06e1d219f31cbe629789a5e5c862aa Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 19:56:10 +0200
Subject: sunrpc: remove kxdrproc_t

Remove the now unused typedef.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/sunrpc/xdr.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index ed0fbf0d8d0f..261b48a2701d 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -34,13 +34,6 @@ struct xdr_netobj {
 	u8 *			data;
 };
 
-/*
- * This is the legacy generic XDR function. rqstp is either a rpc_rqst
- * (client side) or svc_rqst pointer (server side).
- * Encode functions always assume there's enough room in the buffer.
- */
-typedef int	(*kxdrproc_t)(void *rqstp, __be32 *data, void *obj);
-
 /*
  * Basic structure for transmission/reception of a client XDR message.
  * Features a header (for a linear buffer containing RPC headers
-- 
cgit v1.2.3


From 0becc1181cdba562730be4d4b8a5fcb4368ef527 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 23:40:27 +0200
Subject: sunrpc: move pc_count out of struct svc_procinfo

pc_count is the only writeable memeber of struct svc_procinfo, which is
a good candidate to be const-ified as it contains function pointers.

This patch moves it into out out struct svc_procinfo, and into a
separate writable array that is pointed to by struct svc_version.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/lockd/svc.c             |  6 ++++++
 fs/nfs/callback_xdr.c      |  4 ++++
 fs/nfsd/nfs2acl.c          |  2 ++
 fs/nfsd/nfs3acl.c          |  2 ++
 fs/nfsd/nfs3proc.c         |  2 ++
 fs/nfsd/nfs4proc.c         |  2 ++
 fs/nfsd/nfsproc.c          |  2 ++
 include/linux/sunrpc/svc.h |  2 +-
 net/sunrpc/stats.c         | 11 ++++++-----
 net/sunrpc/svc.c           |  2 +-
 10 files changed, 28 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 5d481e8a1b5d..cc6abe6280bc 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -739,23 +739,29 @@ module_exit(exit_nlm);
 /*
  * Define NLM program and procedures
  */
+static unsigned int nlmsvc_version1_count[17];
 static struct svc_version	nlmsvc_version1 = {
 		.vs_vers	= 1,
 		.vs_nproc	= 17,
 		.vs_proc	= nlmsvc_procedures,
+		.vs_count	= nlmsvc_version1_count,
 		.vs_xdrsize	= NLMSVC_XDRSIZE,
 };
+static unsigned int nlmsvc_version3_count[24];
 static struct svc_version	nlmsvc_version3 = {
 		.vs_vers	= 3,
 		.vs_nproc	= 24,
 		.vs_proc	= nlmsvc_procedures,
+		.vs_count	= nlmsvc_version3_count,
 		.vs_xdrsize	= NLMSVC_XDRSIZE,
 };
 #ifdef CONFIG_LOCKD_V4
+static unsigned int nlmsvc_version4_count[24];
 static struct svc_version	nlmsvc_version4 = {
 		.vs_vers	= 4,
 		.vs_nproc	= 24,
 		.vs_proc	= nlmsvc_procedures4,
+		.vs_count	= nlmsvc_version4_count,
 		.vs_xdrsize	= NLMSVC_XDRSIZE,
 };
 #endif
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index acf75dc63e14..ecd46b8c0985 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -1011,20 +1011,24 @@ static struct svc_procedure nfs4_callback_procedures1[] = {
 	}
 };
 
+static unsigned int nfs4_callback_count1[ARRAY_SIZE(nfs4_callback_procedures1)];
 struct svc_version nfs4_callback_version1 = {
 	.vs_vers = 1,
 	.vs_nproc = ARRAY_SIZE(nfs4_callback_procedures1),
 	.vs_proc = nfs4_callback_procedures1,
+	.vs_count = nfs4_callback_count1,
 	.vs_xdrsize = NFS4_CALLBACK_XDRSIZE,
 	.vs_dispatch = NULL,
 	.vs_hidden = true,
 	.vs_need_cong_ctrl = true,
 };
 
+static unsigned int nfs4_callback_count4[ARRAY_SIZE(nfs4_callback_procedures1)];
 struct svc_version nfs4_callback_version4 = {
 	.vs_vers = 4,
 	.vs_nproc = ARRAY_SIZE(nfs4_callback_procedures1),
 	.vs_proc = nfs4_callback_procedures1,
+	.vs_count = nfs4_callback_count4,
 	.vs_xdrsize = NFS4_CALLBACK_XDRSIZE,
 	.vs_dispatch = NULL,
 	.vs_hidden = true,
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index fc6b179c8fff..026edfe73fd5 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -378,10 +378,12 @@ static struct svc_procedure		nfsd_acl_procedures2[] = {
   PROC(access,	access,		access,		access,   RC_NOCACHE, ST+AT+1),
 };
 
+static unsigned int nfsd_acl_count2[ARRAY_SIZE(nfsd_acl_procedures2)];
 struct svc_version	nfsd_acl_version2 = {
 		.vs_vers	= 2,
 		.vs_nproc	= 5,
 		.vs_proc	= nfsd_acl_procedures2,
+		.vs_count	= nfsd_acl_count2,
 		.vs_dispatch	= nfsd_dispatch,
 		.vs_xdrsize	= NFS3_SVC_XDRSIZE,
 };
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 9437b758cbfd..73c0970ccefb 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -263,10 +263,12 @@ static struct svc_procedure		nfsd_acl_procedures3[] = {
   PROC(setacl,	setacl,		setacl,		fhandle,  RC_NOCACHE, ST+pAT),
 };
 
+static unsigned int nfsd_acl_count3[ARRAY_SIZE(nfsd_acl_procedures3)];
 struct svc_version	nfsd_acl_version3 = {
 		.vs_vers	= 3,
 		.vs_nproc	= 3,
 		.vs_proc	= nfsd_acl_procedures3,
+		.vs_count	= nfsd_acl_count3,
 		.vs_dispatch	= nfsd_dispatch,
 		.vs_xdrsize	= NFS3_SVC_XDRSIZE,
 };
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 17c90c41a3a6..b5823802e278 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -890,10 +890,12 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 };
 
+static unsigned int nfsd_count3[ARRAY_SIZE(nfsd_procedures3)];
 struct svc_version	nfsd_version3 = {
 		.vs_vers	= 3,
 		.vs_nproc	= 22,
 		.vs_proc	= nfsd_procedures3,
+		.vs_count	= nfsd_count3,
 		.vs_dispatch	= nfsd_dispatch,
 		.vs_xdrsize	= NFS3_SVC_XDRSIZE,
 };
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 403c1d14d99a..4de6a9950722 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -2553,10 +2553,12 @@ static struct svc_procedure		nfsd_procedures4[2] = {
 	},
 };
 
+static unsigned int nfsd_count3[ARRAY_SIZE(nfsd_procedures4)];
 struct svc_version	nfsd_version4 = {
 	.vs_vers		= 4,
 	.vs_nproc		= 2,
 	.vs_proc		= nfsd_procedures4,
+	.vs_count		= nfsd_count3,
 	.vs_dispatch		= nfsd_dispatch,
 	.vs_xdrsize		= NFS4_SVC_XDRSIZE,
 	.vs_rpcb_optnl		= true,
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 0ef88d0e67d9..44b157553733 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -743,10 +743,12 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 };
 
 
+static unsigned int nfsd_count2[ARRAY_SIZE(nfsd_procedures2)];
 struct svc_version	nfsd_version2 = {
 		.vs_vers	= 2,
 		.vs_nproc	= 18,
 		.vs_proc	= nfsd_procedures2,
+		.vs_count	= nfsd_count2,
 		.vs_dispatch	= nfsd_dispatch,
 		.vs_xdrsize	= NFS2_SVC_XDRSIZE,
 };
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index bd9e313c444a..bcd114f038ef 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -398,6 +398,7 @@ struct svc_version {
 	u32			vs_vers;	/* version number */
 	u32			vs_nproc;	/* number of procedures */
 	struct svc_procedure *	vs_proc;	/* per-procedure info */
+	unsigned int		*vs_count;	/* call counts */
 	u32			vs_xdrsize;	/* xdrsize needed for this version */
 
 	/* Don't register with rpcbind */
@@ -430,7 +431,6 @@ struct svc_procedure {
 	void			(*pc_release)(struct svc_rqst *);
 	unsigned int		pc_argsize;	/* argument struct size */
 	unsigned int		pc_ressize;	/* result struct size */
-	unsigned int		pc_count;	/* call count */
 	unsigned int		pc_cachetype;	/* cache info (NFS) */
 	unsigned int		pc_xdrressize;	/* maximum size of XDR reply */
 };
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index 8b6c35ae1d57..1e671333c3d5 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -77,9 +77,9 @@ static const struct file_operations rpc_proc_fops = {
 /*
  * Get RPC server stats
  */
-void svc_seq_show(struct seq_file *seq, const struct svc_stat *statp) {
+void svc_seq_show(struct seq_file *seq, const struct svc_stat *statp)
+{
 	const struct svc_program *prog = statp->program;
-	const struct svc_procedure *proc;
 	const struct svc_version *vers;
 	unsigned int i, j;
 
@@ -98,11 +98,12 @@ void svc_seq_show(struct seq_file *seq, const struct svc_stat *statp) {
 			statp->rpcbadclnt);
 
 	for (i = 0; i < prog->pg_nvers; i++) {
-		if (!(vers = prog->pg_vers[i]) || !(proc = vers->vs_proc))
+		vers = prog->pg_vers[i];
+		if (!vers)
 			continue;
 		seq_printf(seq, "proc%d %u", i, vers->vs_nproc);
-		for (j = 0; j < vers->vs_nproc; j++, proc++)
-			seq_printf(seq, " %u", proc->pc_count);
+		for (j = 0; j < vers->vs_nproc; j++)
+			seq_printf(seq, " %u", vers->vs_count[j]);
 		seq_putc(seq, '\n');
 	}
 }
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index aa643a29fdc6..6452592194ac 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1261,7 +1261,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 	svc_putnl(resv, RPC_SUCCESS);
 
 	/* Bump per-procedure stats counter */
-	procp->pc_count++;
+	versp->vs_count[proc]++;
 
 	/* Initialize storage for argp and resp */
 	memset(rqstp->rq_argp, 0, procp->pc_argsize);
-- 
cgit v1.2.3


From b9c744c19c441f306239ac3e60a2a95b40d698f8 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 12 May 2017 16:11:49 +0200
Subject: sunrpc: mark all struct svc_procinfo instances as const

struct svc_procinfo contains function pointers, and marking it as
constant avoids it being able to be used as an attach vector for
code injections.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/lockd/svc4proc.c         | 2 +-
 fs/lockd/svcproc.c          | 2 +-
 fs/nfs/callback_xdr.c       | 2 +-
 fs/nfsd/nfs2acl.c           | 2 +-
 fs/nfsd/nfs3acl.c           | 2 +-
 fs/nfsd/nfs3proc.c          | 2 +-
 fs/nfsd/nfs4proc.c          | 2 +-
 fs/nfsd/nfsproc.c           | 2 +-
 fs/nfsd/nfssvc.c            | 4 ++--
 include/linux/lockd/lockd.h | 4 ++--
 include/linux/sunrpc/svc.h  | 4 ++--
 net/sunrpc/svc.c            | 2 +-
 12 files changed, 15 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index fed016155791..82925f17ec45 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -507,7 +507,7 @@ struct nlm_void			{ int dummy; };
 #define	No	(1+1024/4)				/* netobj */
 #define	St	1					/* status */
 #define	Rg	4					/* range (offset + length) */
-struct svc_procedure		nlmsvc_procedures4[] = {
+const struct svc_procedure nlmsvc_procedures4[] = {
   PROC(null,		void,		void,		void,	void, 1),
   PROC(test,		testargs,	testres,	args,	res, Ck+St+2+No+Rg),
   PROC(lock,		lockargs,	res,		args,	res, Ck+St),
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 14648b051eba..07915162581d 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -551,7 +551,7 @@ struct nlm_void			{ int dummy; };
 #define	No	(1+1024/4)			/* Net Obj */
 #define	Rg	2				/* range - offset + size */
 
-struct svc_procedure		nlmsvc_procedures[] = {
+const struct svc_procedure nlmsvc_procedures[] = {
   PROC(null,		void,		void,		void,	void, 1),
   PROC(test,		testargs,	testres,	args,	res, Ck+St+2+No+Rg),
   PROC(lock,		lockargs,	res,		args,	res, Ck+St),
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index ecd46b8c0985..ae249f27297f 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -995,7 +995,7 @@ static struct callback_op callback_ops[] = {
 /*
  * Define NFS4 callback procedures
  */
-static struct svc_procedure nfs4_callback_procedures1[] = {
+static const struct svc_procedure nfs4_callback_procedures1[] = {
 	[CB_NULL] = {
 		.pc_func = nfs4_callback_null,
 		.pc_decode = nfs4_decode_void,
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 026edfe73fd5..c3f6b8a6b659 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -370,7 +370,7 @@ struct nfsd3_voidargs { int dummy; };
 #define pAT (1+AT)	/* post attributes - conditional */
 #define ACL (1+NFS_ACL_MAX_ENTRIES*3)  /* Access Control List */
 
-static struct svc_procedure		nfsd_acl_procedures2[] = {
+static const struct svc_procedure nfsd_acl_procedures2[] = {
   PROC(null,	void,		void,		void,	  RC_NOCACHE, ST),
   PROC(getacl,	getacl,		getacl,		getacl,	  RC_NOCACHE, ST+1+2*(1+ACL)),
   PROC(setacl,	setacl,		attrstat,	attrstat, RC_NOCACHE, ST+AT),
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 73c0970ccefb..1a482ac9d4e9 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -257,7 +257,7 @@ struct nfsd3_voidargs { int dummy; };
 #define pAT (1+AT)	/* post attributes - conditional */
 #define ACL (1+NFS_ACL_MAX_ENTRIES*3)  /* Access Control List */
 
-static struct svc_procedure		nfsd_acl_procedures3[] = {
+static const struct svc_procedure nfsd_acl_procedures3[] = {
   PROC(null,	void,		void,		void,	  RC_NOCACHE, ST),
   PROC(getacl,	getacl,		getacl,		getacl,	  RC_NOCACHE, ST+1+2*(1+ACL)),
   PROC(setacl,	setacl,		setacl,		fhandle,  RC_NOCACHE, ST+pAT),
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index b5823802e278..96e0e6a2af51 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -674,7 +674,7 @@ struct nfsd3_voidargs { int dummy; };
 #define pAT (1+AT)	/* post attributes - conditional */
 #define WC (7+pAT)	/* WCC attributes */
 
-static struct svc_procedure		nfsd_procedures3[22] = {
+static const struct svc_procedure nfsd_procedures3[22] = {
 	[NFS3PROC_NULL] = {
 		.pc_func = nfsd3_proc_null,
 		.pc_encode = nfs3svc_encode_voidres,
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 4de6a9950722..68aa175b1cb3 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -2532,7 +2532,7 @@ static const char *nfsd4_op_name(unsigned opnum)
 #define nfsd4_voidres			nfsd4_voidargs
 struct nfsd4_voidargs { int dummy; };
 
-static struct svc_procedure		nfsd_procedures4[2] = {
+static const struct svc_procedure nfsd_procedures4[2] = {
 	[NFSPROC4_NULL] = {
 		.pc_func = nfsd4_proc_null,
 		.pc_encode = nfs4svc_encode_voidres,
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 44b157553733..a68b686fda12 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -573,7 +573,7 @@ struct nfsd_void { int dummy; };
 #define FH 8		/* filehandle */
 #define	AT 18		/* attributes */
 
-static struct svc_procedure		nfsd_procedures2[18] = {
+static const struct svc_procedure nfsd_procedures2[18] = {
 	[NFSPROC_NULL] = {
 		.pc_func = nfsd_proc_null,
 		.pc_decode = nfssvc_decode_void,
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 555233664124..379b310c445d 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -756,7 +756,7 @@ static __be32 map_new_errors(u32 vers, __be32 nfserr)
  * problem, we enforce these assumptions here:
  */
 static bool nfs_request_too_big(struct svc_rqst *rqstp,
-				struct svc_procedure *proc)
+				const struct svc_procedure *proc)
 {
 	/*
 	 * The ACL code has more careful bounds-checking and is not
@@ -781,7 +781,7 @@ static bool nfs_request_too_big(struct svc_rqst *rqstp,
 int
 nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
 {
-	struct svc_procedure	*proc;
+	const struct svc_procedure *proc;
 	__be32			nfserr;
 	__be32			*nfserrp;
 
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 41f7b6a04d69..3eca67728366 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -192,9 +192,9 @@ struct nlm_block {
  * Global variables
  */
 extern const struct rpc_program	nlm_program;
-extern struct svc_procedure	nlmsvc_procedures[];
+extern const struct svc_procedure nlmsvc_procedures[];
 #ifdef CONFIG_LOCKD_V4
-extern struct svc_procedure	nlmsvc_procedures4[];
+extern const struct svc_procedure nlmsvc_procedures4[];
 #endif
 extern int			nlmsvc_grace_period;
 extern unsigned long		nlmsvc_timeout;
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index bcd114f038ef..992ea3419795 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -237,7 +237,7 @@ struct svc_rqst {
 
 	struct svc_serv *	rq_server;	/* RPC service definition */
 	struct svc_pool *	rq_pool;	/* thread pool */
-	struct svc_procedure *	rq_procinfo;	/* procedure info */
+	const struct svc_procedure *rq_procinfo;/* procedure info */
 	struct auth_ops *	rq_authop;	/* authentication flavour */
 	struct svc_cred		rq_cred;	/* auth info */
 	void *			rq_xprt_ctxt;	/* transport specific context ptr */
@@ -397,7 +397,7 @@ struct svc_program {
 struct svc_version {
 	u32			vs_vers;	/* version number */
 	u32			vs_nproc;	/* number of procedures */
-	struct svc_procedure *	vs_proc;	/* per-procedure info */
+	const struct svc_procedure *vs_proc;	/* per-procedure info */
 	unsigned int		*vs_count;	/* call counts */
 	u32			vs_xdrsize;	/* xdrsize needed for this version */
 
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 6452592194ac..049963d676a7 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1152,7 +1152,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 {
 	struct svc_program	*progp;
 	struct svc_version	*versp = NULL;	/* compiler food */
-	struct svc_procedure	*procp = NULL;
+	const struct svc_procedure *procp = NULL;
 	struct svc_serv		*serv = rqstp->rq_server;
 	__be32			*statp;
 	u32			prog, vers, proc;
-- 
cgit v1.2.3


From aa8217d5dcb1db594d816794ef6ab434ebf3e127 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 12 May 2017 16:21:37 +0200
Subject: sunrpc: mark all struct svc_version instances as const

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/lockd/svc.c             | 38 +++++++++++++++++++-------------------
 fs/nfs/callback.c          |  2 +-
 fs/nfs/callback_xdr.c      |  4 ++--
 fs/nfs/internal.h          |  4 ++--
 fs/nfs/nfs4_fs.h           |  4 ++--
 fs/nfsd/nfs2acl.c          | 14 +++++++-------
 fs/nfsd/nfs3acl.c          | 14 +++++++-------
 fs/nfsd/nfs3proc.c         | 14 +++++++-------
 fs/nfsd/nfs4proc.c         |  2 +-
 fs/nfsd/nfsd.h             |  6 +++---
 fs/nfsd/nfsproc.c          | 14 +++++++-------
 fs/nfsd/nfssvc.c           |  8 ++++----
 include/linux/sunrpc/svc.h |  2 +-
 net/sunrpc/svc.c           |  4 ++--
 14 files changed, 65 insertions(+), 65 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index cc6abe6280bc..726b6cecf430 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -740,32 +740,32 @@ module_exit(exit_nlm);
  * Define NLM program and procedures
  */
 static unsigned int nlmsvc_version1_count[17];
-static struct svc_version	nlmsvc_version1 = {
-		.vs_vers	= 1,
-		.vs_nproc	= 17,
-		.vs_proc	= nlmsvc_procedures,
-		.vs_count	= nlmsvc_version1_count,
-		.vs_xdrsize	= NLMSVC_XDRSIZE,
+static const struct svc_version	nlmsvc_version1 = {
+	.vs_vers	= 1,
+	.vs_nproc	= 17,
+	.vs_proc	= nlmsvc_procedures,
+	.vs_count	= nlmsvc_version1_count,
+	.vs_xdrsize	= NLMSVC_XDRSIZE,
 };
 static unsigned int nlmsvc_version3_count[24];
-static struct svc_version	nlmsvc_version3 = {
-		.vs_vers	= 3,
-		.vs_nproc	= 24,
-		.vs_proc	= nlmsvc_procedures,
-		.vs_count	= nlmsvc_version3_count,
-		.vs_xdrsize	= NLMSVC_XDRSIZE,
+static const struct svc_version	nlmsvc_version3 = {
+	.vs_vers	= 3,
+	.vs_nproc	= 24,
+	.vs_proc	= nlmsvc_procedures,
+	.vs_count	= nlmsvc_version3_count,
+	.vs_xdrsize	= NLMSVC_XDRSIZE,
 };
 #ifdef CONFIG_LOCKD_V4
 static unsigned int nlmsvc_version4_count[24];
-static struct svc_version	nlmsvc_version4 = {
-		.vs_vers	= 4,
-		.vs_nproc	= 24,
-		.vs_proc	= nlmsvc_procedures4,
-		.vs_count	= nlmsvc_version4_count,
-		.vs_xdrsize	= NLMSVC_XDRSIZE,
+static const struct svc_version	nlmsvc_version4 = {
+	.vs_vers	= 4,
+	.vs_nproc	= 24,
+	.vs_proc	= nlmsvc_procedures4,
+	.vs_count	= nlmsvc_version4_count,
+	.vs_xdrsize	= NLMSVC_XDRSIZE,
 };
 #endif
-static struct svc_version *	nlmsvc_version[] = {
+static const struct svc_version *nlmsvc_version[] = {
 	[1] = &nlmsvc_version1,
 	[3] = &nlmsvc_version3,
 #ifdef CONFIG_LOCKD_V4
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 73a1f928226c..34323877ec13 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -439,7 +439,7 @@ static int nfs_callback_authenticate(struct svc_rqst *rqstp)
 /*
  * Define NFS4 callback program
  */
-static struct svc_version *nfs4_callback_version[] = {
+static const struct svc_version *nfs4_callback_version[] = {
 	[1] = &nfs4_callback_version1,
 	[4] = &nfs4_callback_version4,
 };
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index ae249f27297f..01a430e51daa 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -1012,7 +1012,7 @@ static const struct svc_procedure nfs4_callback_procedures1[] = {
 };
 
 static unsigned int nfs4_callback_count1[ARRAY_SIZE(nfs4_callback_procedures1)];
-struct svc_version nfs4_callback_version1 = {
+const struct svc_version nfs4_callback_version1 = {
 	.vs_vers = 1,
 	.vs_nproc = ARRAY_SIZE(nfs4_callback_procedures1),
 	.vs_proc = nfs4_callback_procedures1,
@@ -1024,7 +1024,7 @@ struct svc_version nfs4_callback_version1 = {
 };
 
 static unsigned int nfs4_callback_count4[ARRAY_SIZE(nfs4_callback_procedures1)];
-struct svc_version nfs4_callback_version4 = {
+const struct svc_version nfs4_callback_version4 = {
 	.vs_vers = 4,
 	.vs_nproc = ARRAY_SIZE(nfs4_callback_procedures1),
 	.vs_proc = nfs4_callback_procedures1,
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index dc2a29a7d48b..1c0ce9c15e94 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -225,8 +225,8 @@ static inline void nfs_fs_proc_exit(void)
 #endif
 
 /* callback_xdr.c */
-extern struct svc_version nfs4_callback_version1;
-extern struct svc_version nfs4_callback_version4;
+extern const struct svc_version nfs4_callback_version1;
+extern const struct svc_version nfs4_callback_version4;
 
 struct nfs_pageio_descriptor;
 /* pagelist.c */
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 9b0cf3872722..40bd05f05e74 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -498,8 +498,8 @@ extern const struct rpc_procinfo nfs4_procedures[];
 struct nfs4_mount_data;
 
 /* callback_xdr.c */
-extern struct svc_version nfs4_callback_version1;
-extern struct svc_version nfs4_callback_version4;
+extern const struct svc_version nfs4_callback_version1;
+extern const struct svc_version nfs4_callback_version4;
 
 static inline void nfs4_stateid_copy(nfs4_stateid *dst, const nfs4_stateid *src)
 {
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index c3f6b8a6b659..6276ec8608b0 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -379,11 +379,11 @@ static const struct svc_procedure nfsd_acl_procedures2[] = {
 };
 
 static unsigned int nfsd_acl_count2[ARRAY_SIZE(nfsd_acl_procedures2)];
-struct svc_version	nfsd_acl_version2 = {
-		.vs_vers	= 2,
-		.vs_nproc	= 5,
-		.vs_proc	= nfsd_acl_procedures2,
-		.vs_count	= nfsd_acl_count2,
-		.vs_dispatch	= nfsd_dispatch,
-		.vs_xdrsize	= NFS3_SVC_XDRSIZE,
+const struct svc_version nfsd_acl_version2 = {
+	.vs_vers	= 2,
+	.vs_nproc	= 5,
+	.vs_proc	= nfsd_acl_procedures2,
+	.vs_count	= nfsd_acl_count2,
+	.vs_dispatch	= nfsd_dispatch,
+	.vs_xdrsize	= NFS3_SVC_XDRSIZE,
 };
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 1a482ac9d4e9..01976529f042 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -264,12 +264,12 @@ static const struct svc_procedure nfsd_acl_procedures3[] = {
 };
 
 static unsigned int nfsd_acl_count3[ARRAY_SIZE(nfsd_acl_procedures3)];
-struct svc_version	nfsd_acl_version3 = {
-		.vs_vers	= 3,
-		.vs_nproc	= 3,
-		.vs_proc	= nfsd_acl_procedures3,
-		.vs_count	= nfsd_acl_count3,
-		.vs_dispatch	= nfsd_dispatch,
-		.vs_xdrsize	= NFS3_SVC_XDRSIZE,
+const struct svc_version nfsd_acl_version3 = {
+	.vs_vers	= 3,
+	.vs_nproc	= 3,
+	.vs_proc	= nfsd_acl_procedures3,
+	.vs_count	= nfsd_acl_count3,
+	.vs_dispatch	= nfsd_dispatch,
+	.vs_xdrsize	= NFS3_SVC_XDRSIZE,
 };
 
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 96e0e6a2af51..2cb56a0d6625 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -891,11 +891,11 @@ static const struct svc_procedure nfsd_procedures3[22] = {
 };
 
 static unsigned int nfsd_count3[ARRAY_SIZE(nfsd_procedures3)];
-struct svc_version	nfsd_version3 = {
-		.vs_vers	= 3,
-		.vs_nproc	= 22,
-		.vs_proc	= nfsd_procedures3,
-		.vs_count	= nfsd_count3,
-		.vs_dispatch	= nfsd_dispatch,
-		.vs_xdrsize	= NFS3_SVC_XDRSIZE,
+const struct svc_version nfsd_version3 = {
+	.vs_vers	= 3,
+	.vs_nproc	= 22,
+	.vs_proc	= nfsd_procedures3,
+	.vs_dispatch	= nfsd_dispatch,
+	.vs_count	= nfsd_count3,
+	.vs_xdrsize	= NFS3_SVC_XDRSIZE,
 };
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 68aa175b1cb3..3cbd065c639f 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -2554,7 +2554,7 @@ static const struct svc_procedure nfsd_procedures4[2] = {
 };
 
 static unsigned int nfsd_count3[ARRAY_SIZE(nfsd_procedures4)];
-struct svc_version	nfsd_version4 = {
+const struct svc_version nfsd_version4 = {
 	.vs_vers		= 4,
 	.vs_nproc		= 2,
 	.vs_proc		= nfsd_procedures4,
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index d96606801d47..b9c538ab7a59 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -60,7 +60,7 @@ struct readdir_cd {
 
 
 extern struct svc_program	nfsd_program;
-extern struct svc_version	nfsd_version2, nfsd_version3,
+extern const struct svc_version	nfsd_version2, nfsd_version3,
 				nfsd_version4;
 extern struct mutex		nfsd_mutex;
 extern spinlock_t		nfsd_drc_lock;
@@ -86,12 +86,12 @@ void		nfsd_destroy(struct net *net);
 
 #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
 #ifdef CONFIG_NFSD_V2_ACL
-extern struct svc_version nfsd_acl_version2;
+extern const struct svc_version nfsd_acl_version2;
 #else
 #define nfsd_acl_version2 NULL
 #endif
 #ifdef CONFIG_NFSD_V3_ACL
-extern struct svc_version nfsd_acl_version3;
+extern const struct svc_version nfsd_acl_version3;
 #else
 #define nfsd_acl_version3 NULL
 #endif
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index a68b686fda12..5076ae2b8258 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -744,13 +744,13 @@ static const struct svc_procedure nfsd_procedures2[18] = {
 
 
 static unsigned int nfsd_count2[ARRAY_SIZE(nfsd_procedures2)];
-struct svc_version	nfsd_version2 = {
-		.vs_vers	= 2,
-		.vs_nproc	= 18,
-		.vs_proc	= nfsd_procedures2,
-		.vs_count	= nfsd_count2,
-		.vs_dispatch	= nfsd_dispatch,
-		.vs_xdrsize	= NFS2_SVC_XDRSIZE,
+const struct svc_version nfsd_version2 = {
+	.vs_vers	= 2,
+	.vs_nproc	= 18,
+	.vs_proc	= nfsd_procedures2,
+	.vs_count	= nfsd_count2,
+	.vs_dispatch	= nfsd_dispatch,
+	.vs_xdrsize	= NFS2_SVC_XDRSIZE,
 };
 
 /*
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 379b310c445d..063ae7de2c12 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -68,14 +68,14 @@ unsigned long	nfsd_drc_mem_used;
 
 #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
 static struct svc_stat	nfsd_acl_svcstats;
-static struct svc_version *	nfsd_acl_version[] = {
+static const struct svc_version *nfsd_acl_version[] = {
 	[2] = &nfsd_acl_version2,
 	[3] = &nfsd_acl_version3,
 };
 
 #define NFSD_ACL_MINVERS            2
 #define NFSD_ACL_NRVERS		ARRAY_SIZE(nfsd_acl_version)
-static struct svc_version *nfsd_acl_versions[NFSD_ACL_NRVERS];
+static const struct svc_version *nfsd_acl_versions[NFSD_ACL_NRVERS];
 
 static struct svc_program	nfsd_acl_program = {
 	.pg_prog		= NFS_ACL_PROGRAM,
@@ -92,7 +92,7 @@ static struct svc_stat	nfsd_acl_svcstats = {
 };
 #endif /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */
 
-static struct svc_version *	nfsd_version[] = {
+static const struct svc_version *nfsd_version[] = {
 	[2] = &nfsd_version2,
 #if defined(CONFIG_NFSD_V3)
 	[3] = &nfsd_version3,
@@ -104,7 +104,7 @@ static struct svc_version *	nfsd_version[] = {
 
 #define NFSD_MINVERS    	2
 #define NFSD_NRVERS		ARRAY_SIZE(nfsd_version)
-static struct svc_version *nfsd_versions[NFSD_NRVERS];
+static const struct svc_version *nfsd_versions[NFSD_NRVERS];
 
 struct svc_program		nfsd_program = {
 #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 992ea3419795..eec04982a7ea 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -384,7 +384,7 @@ struct svc_program {
 	unsigned int		pg_lovers;	/* lowest version */
 	unsigned int		pg_hivers;	/* highest version */
 	unsigned int		pg_nvers;	/* number of versions */
-	struct svc_version **	pg_vers;	/* version array */
+	const struct svc_version **pg_vers;	/* version array */
 	char *			pg_name;	/* service name */
 	char *			pg_class;	/* class name: services sharing authentication */
 	struct svc_stat *	pg_stats;	/* rpc statistics */
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 049963d676a7..45b4f2d2e3bd 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1008,7 +1008,7 @@ int svc_register(const struct svc_serv *serv, struct net *net,
 		 const unsigned short port)
 {
 	struct svc_program	*progp;
-	struct svc_version	*vers;
+	const struct svc_version *vers;
 	unsigned int		i;
 	int			error = 0;
 
@@ -1151,7 +1151,7 @@ static int
 svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 {
 	struct svc_program	*progp;
-	struct svc_version	*versp = NULL;	/* compiler food */
+	const struct svc_version *versp = NULL;	/* compiler food */
 	const struct svc_procedure *procp = NULL;
 	struct svc_serv		*serv = rqstp->rq_server;
 	__be32			*statp;
-- 
cgit v1.2.3


From a7a3b1e971cd806b81ecea3a234d8dae9de0add0 Mon Sep 17 00:00:00 2001
From: Benjamin Coddington <bcodding@redhat.com>
Date: Tue, 20 Jun 2017 08:33:44 -0400
Subject: NFS: convert flags to bool

NFS uses some int, and unsigned int :1, and bool as flags in structs and
args.  Assert the preference for uniformly replacing these with the bool
type.

Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/dir.c            | 16 ++++++++--------
 fs/nfs/internal.h       |  6 +++---
 fs/nfs/nfs2xdr.c        |  2 +-
 fs/nfs/nfs3proc.c       |  2 +-
 fs/nfs/nfs3xdr.c        |  2 +-
 fs/nfs/nfs4proc.c       | 42 +++++++++++++++++++++---------------------
 fs/nfs/nfs4xdr.c        |  2 +-
 fs/nfs/proc.c           |  2 +-
 include/linux/nfs_xdr.h | 10 +++++-----
 9 files changed, 42 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 32ccd7754f8a..9688e9bb13dc 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -151,7 +151,7 @@ struct nfs_cache_array {
 	struct nfs_cache_array_entry array[0];
 };
 
-typedef int (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, int);
+typedef int (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, bool);
 typedef struct {
 	struct file	*file;
 	struct page	*page;
@@ -165,8 +165,8 @@ typedef struct {
 	unsigned long	timestamp;
 	unsigned long	gencount;
 	unsigned int	cache_entry_index;
-	unsigned int	plus:1;
-	unsigned int	eof:1;
+	bool plus;
+	bool eof;
 } nfs_readdir_descriptor_t;
 
 /*
@@ -355,7 +355,7 @@ int nfs_readdir_xdr_filler(struct page **pages, nfs_readdir_descriptor_t *desc,
 		if (error == -ENOTSUPP && desc->plus) {
 			NFS_SERVER(inode)->caps &= ~NFS_CAP_READDIRPLUS;
 			clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
-			desc->plus = 0;
+			desc->plus = false;
 			goto again;
 		}
 		goto error;
@@ -557,7 +557,7 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
 
 		count++;
 
-		if (desc->plus != 0)
+		if (desc->plus)
 			nfs_prime_dcache(file_dentry(desc->file), entry);
 
 		status = nfs_readdir_add_to_array(entry, page);
@@ -860,7 +860,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
 	desc->ctx = ctx;
 	desc->dir_cookie = &dir_ctx->dir_cookie;
 	desc->decode = NFS_PROTO(inode)->decode_dirent;
-	desc->plus = nfs_use_readdirplus(inode, ctx) ? 1 : 0;
+	desc->plus = nfs_use_readdirplus(inode, ctx);
 
 	if (ctx->pos == 0 || nfs_attribute_cache_expired(inode))
 		res = nfs_revalidate_mapping(inode, file->f_mapping);
@@ -885,8 +885,8 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
 			clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
 			nfs_zap_caches(inode);
 			desc->page_index = 0;
-			desc->plus = 0;
-			desc->eof = 0;
+			desc->plus = false;
+			desc->eof = false;
 			continue;
 		}
 		if (res < 0)
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 1c0ce9c15e94..c5054edb0157 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -272,17 +272,17 @@ static inline bool nfs_match_open_context(const struct nfs_open_context *ctx1,
 /* nfs2xdr.c */
 extern const struct rpc_procinfo nfs_procedures[];
 extern int nfs2_decode_dirent(struct xdr_stream *,
-				struct nfs_entry *, int);
+				struct nfs_entry *, bool);
 
 /* nfs3xdr.c */
 extern const struct rpc_procinfo nfs3_procedures[];
 extern int nfs3_decode_dirent(struct xdr_stream *,
-				struct nfs_entry *, int);
+				struct nfs_entry *, bool);
 
 /* nfs4xdr.c */
 #if IS_ENABLED(CONFIG_NFS_V4)
 extern int nfs4_decode_dirent(struct xdr_stream *,
-				struct nfs_entry *, int);
+				struct nfs_entry *, bool);
 #endif
 #ifdef CONFIG_NFS_V4_1
 extern const u32 nfs41_maxread_overhead;
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index c8a7e98c1371..fe68dabfbde6 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -939,7 +939,7 @@ static int nfs2_xdr_dec_writeres(struct rpc_rqst *req, struct xdr_stream *xdr,
  *	};
  */
 int nfs2_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
-		       int plus)
+		       bool plus)
 {
 	__be32 *p;
 	int error;
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 0c07b567118d..df4a7d3ab915 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -621,7 +621,7 @@ out:
  */
 static int
 nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
-		  u64 cookie, struct page **pages, unsigned int count, int plus)
+		  u64 cookie, struct page **pages, unsigned int count, bool plus)
 {
 	struct inode		*dir = d_inode(dentry);
 	__be32			*verf = NFS_I(dir)->cookieverf;
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 670eddb3ae36..e82c9e553224 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -1991,7 +1991,7 @@ out_status:
  *	};
  */
 int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
-		       int plus)
+		       bool plus)
 {
 	struct nfs_entry old = *entry;
 	__be32 *p;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 735b3068a2d1..e678fa8f6979 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1037,11 +1037,11 @@ struct nfs4_opendata {
 	struct nfs4_state *state;
 	struct iattr attrs;
 	unsigned long timestamp;
-	unsigned int rpc_done : 1;
-	unsigned int file_created : 1;
-	unsigned int is_recover : 1;
+	bool rpc_done;
+	bool file_created;
+	bool is_recover;
+	bool cancelled;
 	int rpc_status;
-	int cancelled;
 };
 
 static bool nfs4_clear_cap_atomic_open_v1(struct nfs_server *server,
@@ -1965,7 +1965,7 @@ static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata)
 		nfs4_stateid_copy(&data->o_res.stateid, &data->c_res.stateid);
 		nfs_confirm_seqid(&data->owner->so_seqid, 0);
 		renew_lease(data->o_res.server, data->timestamp);
-		data->rpc_done = 1;
+		data->rpc_done = true;
 	}
 }
 
@@ -1975,7 +1975,7 @@ static void nfs4_open_confirm_release(void *calldata)
 	struct nfs4_state *state = NULL;
 
 	/* If this request hasn't been cancelled, do nothing */
-	if (data->cancelled == 0)
+	if (!data->cancelled)
 		goto out_free;
 	/* In case of error, no cleanup! */
 	if (!data->rpc_done)
@@ -2018,7 +2018,7 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data)
 
 	nfs4_init_sequence(&data->c_arg.seq_args, &data->c_res.seq_res, 1);
 	kref_get(&data->kref);
-	data->rpc_done = 0;
+	data->rpc_done = false;
 	data->rpc_status = 0;
 	data->timestamp = jiffies;
 	if (data->is_recover)
@@ -2028,7 +2028,7 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data)
 		return PTR_ERR(task);
 	status = rpc_wait_for_completion_task(task);
 	if (status != 0) {
-		data->cancelled = 1;
+		data->cancelled = true;
 		smp_wmb();
 	} else
 		status = data->rpc_status;
@@ -2127,7 +2127,7 @@ static void nfs4_open_done(struct rpc_task *task, void *calldata)
 		if (!(data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM))
 			nfs_confirm_seqid(&data->owner->so_seqid, 0);
 	}
-	data->rpc_done = 1;
+	data->rpc_done = true;
 }
 
 static void nfs4_open_release(void *calldata)
@@ -2136,7 +2136,7 @@ static void nfs4_open_release(void *calldata)
 	struct nfs4_state *state = NULL;
 
 	/* If this request hasn't been cancelled, do nothing */
-	if (data->cancelled == 0)
+	if (!data->cancelled)
 		goto out_free;
 	/* In case of error, no cleanup! */
 	if (data->rpc_status != 0 || !data->rpc_done)
@@ -2182,20 +2182,20 @@ static int nfs4_run_open_task(struct nfs4_opendata *data, int isrecover)
 
 	nfs4_init_sequence(&o_arg->seq_args, &o_res->seq_res, 1);
 	kref_get(&data->kref);
-	data->rpc_done = 0;
+	data->rpc_done = false;
 	data->rpc_status = 0;
-	data->cancelled = 0;
-	data->is_recover = 0;
+	data->cancelled = false;
+	data->is_recover = false;
 	if (isrecover) {
 		nfs4_set_sequence_privileged(&o_arg->seq_args);
-		data->is_recover = 1;
+		data->is_recover = true;
 	}
 	task = rpc_run_task(&task_setup_data);
 	if (IS_ERR(task))
 		return PTR_ERR(task);
 	status = rpc_wait_for_completion_task(task);
 	if (status != 0) {
-		data->cancelled = 1;
+		data->cancelled = true;
 		smp_wmb();
 	} else
 		status = data->rpc_status;
@@ -2290,9 +2290,9 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
 
 	if (o_arg->open_flags & O_CREAT) {
 		if (o_arg->open_flags & O_EXCL)
-			data->file_created = 1;
+			data->file_created = true;
 		else if (o_res->cinfo.before != o_res->cinfo.after)
-			data->file_created = 1;
+			data->file_created = true;
 		if (data->file_created || dir->i_version != o_res->cinfo.after)
 			update_changeattr(dir, &o_res->cinfo,
 					o_res->f_attr->time_start);
@@ -4275,7 +4275,7 @@ static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
 }
 
 static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
-		u64 cookie, struct page **pages, unsigned int count, int plus)
+		u64 cookie, struct page **pages, unsigned int count, bool plus)
 {
 	struct inode		*dir = d_inode(dentry);
 	struct nfs4_readdir_arg args = {
@@ -4313,7 +4313,7 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
 }
 
 static int nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
-		u64 cookie, struct page **pages, unsigned int count, int plus)
+		u64 cookie, struct page **pages, unsigned int count, bool plus)
 {
 	struct nfs4_exception exception = { };
 	int err;
@@ -6137,7 +6137,7 @@ static void nfs4_lock_release(void *calldata)
 
 	dprintk("%s: begin!\n", __func__);
 	nfs_free_seqid(data->arg.open_seqid);
-	if (data->cancelled != 0) {
+	if (data->cancelled) {
 		struct rpc_task *task;
 		task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp,
 				data->arg.lock_seqid);
@@ -6220,7 +6220,7 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
 			nfs4_handle_setlk_error(data->server, data->lsp,
 					data->arg.new_lock_owner, ret);
 	} else
-		data->cancelled = 1;
+		data->cancelled = true;
 	rpc_put_task(task);
 	dprintk("%s: done, ret = %d!\n", __func__, ret);
 	trace_nfs4_set_lock(fl, state, &data->res.stateid, cmd, ret);
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 0f1f290c97cd..495d493d3a35 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -7447,7 +7447,7 @@ out:
  * on a directory already in our cache.
  */
 int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
-		       int plus)
+		       bool plus)
 {
 	unsigned int savep;
 	uint32_t bitmap[3] = {0};
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 9872cf676a50..7962e49097c3 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -485,7 +485,7 @@ nfs_proc_rmdir(struct inode *dir, const struct qstr *name)
  */
 static int
 nfs_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
-		 u64 cookie, struct page **pages, unsigned int count, int plus)
+		 u64 cookie, struct page **pages, unsigned int count, bool plus)
 {
 	struct inode		*dir = d_inode(dentry);
 	struct nfs_readdirargs	arg = {
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index b28c83475ee8..9b42bffbe07b 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -878,7 +878,7 @@ struct nfs3_readdirargs {
 	struct nfs_fh *		fh;
 	__u64			cookie;
 	__be32			verf[2];
-	int			plus;
+	bool			plus;
 	unsigned int            count;
 	struct page **		pages;
 };
@@ -909,7 +909,7 @@ struct nfs3_linkres {
 struct nfs3_readdirres {
 	struct nfs_fattr *	dir_attr;
 	__be32 *		verf;
-	int			plus;
+	bool			plus;
 };
 
 struct nfs3_getaclres {
@@ -1053,7 +1053,7 @@ struct nfs4_readdir_arg {
 	struct page **			pages;	/* zero-copy data */
 	unsigned int			pgbase;	/* zero-copy data */
 	const u32 *			bitmask;
-	int				plus;
+	bool				plus;
 };
 
 struct nfs4_readdir_res {
@@ -1585,7 +1585,7 @@ struct nfs_rpc_ops {
 	int	(*mkdir)   (struct inode *, struct dentry *, struct iattr *);
 	int	(*rmdir)   (struct inode *, const struct qstr *);
 	int	(*readdir) (struct dentry *, struct rpc_cred *,
-			    u64, struct page **, unsigned int, int);
+			    u64, struct page **, unsigned int, bool);
 	int	(*mknod)   (struct inode *, struct dentry *, struct iattr *,
 			    dev_t);
 	int	(*statfs)  (struct nfs_server *, struct nfs_fh *,
@@ -1595,7 +1595,7 @@ struct nfs_rpc_ops {
 	int	(*pathconf) (struct nfs_server *, struct nfs_fh *,
 			     struct nfs_pathconf *);
 	int	(*set_capabilities)(struct nfs_server *, struct nfs_fh *);
-	int	(*decode_dirent)(struct xdr_stream *, struct nfs_entry *, int);
+	int	(*decode_dirent)(struct xdr_stream *, struct nfs_entry *, bool);
 	int	(*pgio_rpc_prepare)(struct rpc_task *,
 				    struct nfs_pgio_header *);
 	void	(*read_setup)(struct nfs_pgio_header *, struct rpc_message *);
-- 
cgit v1.2.3


From 818a8dbe83fddff534b814a7d4e0c75b511dff2e Mon Sep 17 00:00:00 2001
From: Benjamin Coddington <bcodding@redhat.com>
Date: Fri, 16 Jun 2017 11:13:00 -0400
Subject: NFS: nfs_rename() - revalidate directories on -ERESTARTSYS

An interrupted rename will leave the old dentry behind if the rename
succeeds.  Fix this by forcing a lookup the next time through
->d_revalidate.

A previous attempt at solving this problem took the approach to complete
the work of the rename asynchronously, however that approach was wrong
since it would allow the d_move() to occur after the directory's i_mutex
had been dropped by the original process.

Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/dir.c            |  6 +++++-
 fs/nfs/unlink.c         | 13 +++++++++++++
 include/linux/nfs_xdr.h |  1 +
 3 files changed, 19 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 9688e9bb13dc..ab69ebb48350 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2056,7 +2056,11 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	}
 
 	error = rpc_wait_for_completion_task(task);
-	if (error == 0)
+	if (error != 0) {
+		((struct nfs_renamedata *)task->tk_calldata)->cancelled = 1;
+		/* Paired with the atomic_dec_and_test() barrier in rpc_do_put_task() */
+		smp_wmb();
+	} else
 		error = task->tk_status;
 	rpc_put_task(task);
 out:
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 191aa577dd1f..e3949d93085c 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -288,6 +288,19 @@ static void nfs_async_rename_release(void *calldata)
 	if (d_really_is_positive(data->old_dentry))
 		nfs_mark_for_revalidate(d_inode(data->old_dentry));
 
+	/* The result of the rename is unknown. Play it safe by
+	 * forcing a new lookup */
+	if (data->cancelled) {
+		spin_lock(&data->old_dir->i_lock);
+		nfs_force_lookup_revalidate(data->old_dir);
+		spin_unlock(&data->old_dir->i_lock);
+		if (data->new_dir != data->old_dir) {
+			spin_lock(&data->new_dir->i_lock);
+			nfs_force_lookup_revalidate(data->new_dir);
+			spin_unlock(&data->new_dir->i_lock);
+		}
+	}
+
 	dput(data->old_dentry);
 	dput(data->new_dentry);
 	iput(data->old_dir);
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 9b42bffbe07b..9463eeff9e3c 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1533,6 +1533,7 @@ struct nfs_renamedata {
 	struct nfs_fattr	new_fattr;
 	void (*complete)(struct rpc_task *, struct nfs_renamedata *);
 	long timeout;
+	bool cancelled;
 };
 
 struct nfs_access_entry;
-- 
cgit v1.2.3


From b5973a8c1ccf375c9ab9e2428e1185e3f799af06 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Tue, 20 Jun 2017 19:35:36 -0400
Subject: NFS: Remove unused fields in the page I/O structures

Remove the 'layout_private' fields that were only used by the pNFS OSD
layout driver.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/pagelist.c        | 2 --
 include/linux/nfs_page.h | 1 -
 include/linux/nfs_xdr.h  | 1 -
 3 files changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index ad92b401326c..de107d866297 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -51,7 +51,6 @@ void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
 	hdr->io_start = req_offset(hdr->req);
 	hdr->good_bytes = mirror->pg_count;
 	hdr->dreq = desc->pg_dreq;
-	hdr->layout_private = desc->pg_layout_private;
 	hdr->release = release;
 	hdr->completion_ops = desc->pg_completion_ops;
 	if (hdr->completion_ops->init_hdr)
@@ -711,7 +710,6 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
 	desc->pg_error = 0;
 	desc->pg_lseg = NULL;
 	desc->pg_dreq = NULL;
-	desc->pg_layout_private = NULL;
 	desc->pg_bsize = bsize;
 
 	desc->pg_mirror_count = 1;
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 247cc3d3498f..6138cf91346b 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -94,7 +94,6 @@ struct nfs_pageio_descriptor {
 	const struct nfs_pgio_completion_ops *pg_completion_ops;
 	struct pnfs_layout_segment *pg_lseg;
 	struct nfs_direct_req	*pg_dreq;
-	void			*pg_layout_private;
 	unsigned int		pg_bsize;	/* default bsize for mirrors */
 
 	u32			pg_mirror_count;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 9463eeff9e3c..7f1e04941763 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1436,7 +1436,6 @@ struct nfs_pgio_header {
 	const struct nfs_pgio_completion_ops *completion_ops;
 	const struct nfs_rw_ops	*rw_ops;
 	struct nfs_direct_req	*dreq;
-	void			*layout_private;
 	spinlock_t		lock;
 	/* fields protected by lock */
 	int			pnfs_error;
-- 
cgit v1.2.3


From 919e3bd9a87593520a2c5dfda27bd3e6599852ed Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Tue, 20 Jun 2017 19:35:37 -0400
Subject: NFS: Ensure we commit after writeback is complete

If the page cache is being flushed, then we want to ensure that we
do start a commit once the pages are done being flushed.
If we just wait until all I/O is done to that file, we can end up
livelocking until the balance_dirty_pages() mechanism puts its
foot down and forces I/O to stop.
So instead we do more or less the same thing that O_DIRECT does,
and set up a counter to tell us when the flush is done,

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/pagelist.c        |  3 +++
 fs/nfs/write.c           | 57 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/nfs_page.h |  1 +
 include/linux/nfs_xdr.h  |  2 ++
 4 files changed, 63 insertions(+)

(limited to 'include/linux')

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index de107d866297..83d2918f1b13 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -50,6 +50,7 @@ void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
 	hdr->cred = hdr->req->wb_context->cred;
 	hdr->io_start = req_offset(hdr->req);
 	hdr->good_bytes = mirror->pg_count;
+	hdr->io_completion = desc->pg_io_completion;
 	hdr->dreq = desc->pg_dreq;
 	hdr->release = release;
 	hdr->completion_ops = desc->pg_completion_ops;
@@ -709,6 +710,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
 	desc->pg_ioflags = io_flags;
 	desc->pg_error = 0;
 	desc->pg_lseg = NULL;
+	desc->pg_io_completion = NULL;
 	desc->pg_dreq = NULL;
 	desc->pg_bsize = bsize;
 
@@ -1231,6 +1233,7 @@ int nfs_pageio_resend(struct nfs_pageio_descriptor *desc,
 {
 	LIST_HEAD(failed);
 
+	desc->pg_io_completion = hdr->io_completion;
 	desc->pg_dreq = hdr->dreq;
 	while (!list_empty(&hdr->pages)) {
 		struct nfs_page *req = nfs_list_entry(hdr->pages.next);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index db7ba542559e..051197cb9195 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -40,6 +40,12 @@
 #define MIN_POOL_WRITE		(32)
 #define MIN_POOL_COMMIT		(4)
 
+struct nfs_io_completion {
+	void (*complete)(void *data);
+	void *data;
+	struct kref refcount;
+};
+
 /*
  * Local function declarations
  */
@@ -108,6 +114,39 @@ static void nfs_writehdr_free(struct nfs_pgio_header *hdr)
 	mempool_free(hdr, nfs_wdata_mempool);
 }
 
+static struct nfs_io_completion *nfs_io_completion_alloc(gfp_t gfp_flags)
+{
+	return kmalloc(sizeof(struct nfs_io_completion), gfp_flags);
+}
+
+static void nfs_io_completion_init(struct nfs_io_completion *ioc,
+		void (*complete)(void *), void *data)
+{
+	ioc->complete = complete;
+	ioc->data = data;
+	kref_init(&ioc->refcount);
+}
+
+static void nfs_io_completion_release(struct kref *kref)
+{
+	struct nfs_io_completion *ioc = container_of(kref,
+			struct nfs_io_completion, refcount);
+	ioc->complete(ioc->data);
+	kfree(ioc);
+}
+
+static void nfs_io_completion_get(struct nfs_io_completion *ioc)
+{
+	if (ioc != NULL)
+		kref_get(&ioc->refcount);
+}
+
+static void nfs_io_completion_put(struct nfs_io_completion *ioc)
+{
+	if (ioc != NULL)
+		kref_put(&ioc->refcount, nfs_io_completion_release);
+}
+
 static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
 {
 	ctx->error = error;
@@ -681,18 +720,29 @@ static int nfs_writepages_callback(struct page *page, struct writeback_control *
 	return ret;
 }
 
+static void nfs_io_completion_commit(void *inode)
+{
+	nfs_commit_inode(inode, 0);
+}
+
 int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
 {
 	struct inode *inode = mapping->host;
 	struct nfs_pageio_descriptor pgio;
+	struct nfs_io_completion *ioc = nfs_io_completion_alloc(GFP_NOFS);
 	int err;
 
 	nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
 
+	if (ioc)
+		nfs_io_completion_init(ioc, nfs_io_completion_commit, inode);
+
 	nfs_pageio_init_write(&pgio, inode, wb_priority(wbc), false,
 				&nfs_async_write_completion_ops);
+	pgio.pg_io_completion = ioc;
 	err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio);
 	nfs_pageio_complete(&pgio);
+	nfs_io_completion_put(ioc);
 
 	if (err < 0)
 		goto out_err;
@@ -940,6 +990,11 @@ int nfs_write_need_commit(struct nfs_pgio_header *hdr)
 	return hdr->verf.committed != NFS_FILE_SYNC;
 }
 
+static void nfs_async_write_init(struct nfs_pgio_header *hdr)
+{
+	nfs_io_completion_get(hdr->io_completion);
+}
+
 static void nfs_write_completion(struct nfs_pgio_header *hdr)
 {
 	struct nfs_commit_info cinfo;
@@ -973,6 +1028,7 @@ next:
 		nfs_release_request(req);
 	}
 out:
+	nfs_io_completion_put(hdr->io_completion);
 	hdr->release(hdr);
 }
 
@@ -1378,6 +1434,7 @@ static void nfs_async_write_reschedule_io(struct nfs_pgio_header *hdr)
 }
 
 static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = {
+	.init_hdr = nfs_async_write_init,
 	.error_cleanup = nfs_async_write_error,
 	.completion = nfs_write_completion,
 	.reschedule_io = nfs_async_write_reschedule_io,
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 6138cf91346b..abbee2d15dce 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -93,6 +93,7 @@ struct nfs_pageio_descriptor {
 	const struct rpc_call_ops *pg_rpc_callops;
 	const struct nfs_pgio_completion_ops *pg_completion_ops;
 	struct pnfs_layout_segment *pg_lseg;
+	struct nfs_io_completion *pg_io_completion;
 	struct nfs_direct_req	*pg_dreq;
 	unsigned int		pg_bsize;	/* default bsize for mirrors */
 
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 7f1e04941763..89093341f076 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1422,6 +1422,7 @@ enum {
 	NFS_IOHDR_STAT,
 };
 
+struct nfs_io_completion;
 struct nfs_pgio_header {
 	struct inode		*inode;
 	struct rpc_cred		*cred;
@@ -1435,6 +1436,7 @@ struct nfs_pgio_header {
 	void (*release) (struct nfs_pgio_header *hdr);
 	const struct nfs_pgio_completion_ops *completion_ops;
 	const struct nfs_rw_ops	*rw_ops;
+	struct nfs_io_completion *io_completion;
 	struct nfs_direct_req	*dreq;
 	spinlock_t		lock;
 	/* fields protected by lock */
-- 
cgit v1.2.3


From 8dcbec6d20eb881ba368d0aebc3a8a678aebb1da Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 8 Jun 2017 11:52:44 -0400
Subject: NFSv4.1: Handle EXCHGID4_FLAG_CONFIRMED_R during NFSv4.1 migration

Transparent State Migration copies a client's lease state from the
server where a filesystem used to reside to the server where it now
resides. When an NFSv4.1 client first contacts that destination
server, it uses EXCHANGE_ID to detect trunking relationships.

The lease that was copied there is returned to that client, but the
destination server sets EXCHGID4_FLAG_CONFIRMED_R when replying to
the client. This is because the lease was confirmed on the source
server (before it was copied).

Normally, when CONFIRMED_R is set, a client purges the lease and
creates a new one. However, that throws away the entire benefit of
Transparent State Migration.

Therefore, the client must not purge that lease when it is possible
that Transparent State Migration has occurred.

Reported-by: Xuan Qi <xuan.qi@oracle.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Tested-by: Xuan Qi <xuan.qi@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs4client.c       |  5 +++++
 fs/nfs/nfs4state.c        | 16 +++++++++++-----
 include/linux/nfs_fs_sb.h |  2 ++
 3 files changed, 18 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 66776f022111..50566acb5469 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -414,6 +414,7 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp,
 	if (clp != old)
 		clp->cl_preserve_clid = true;
 	nfs_put_client(clp);
+	clear_bit(NFS_CS_TSM_POSSIBLE, &clp->cl_flags);
 	return old;
 
 error:
@@ -852,6 +853,8 @@ static int nfs4_set_client(struct nfs_server *server,
 		set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
 	if (server->options & NFS_OPTION_MIGRATION)
 		set_bit(NFS_CS_MIGRATION, &cl_init.init_flags);
+	if (test_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status))
+		set_bit(NFS_CS_TSM_POSSIBLE, &cl_init.init_flags);
 
 	/* Allocate or find a client reference we can use */
 	clp = nfs_get_client(&cl_init);
@@ -1212,9 +1215,11 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname,
 		return -EAFNOSUPPORT;
 
 	nfs_server_remove_lists(server);
+	set_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status);
 	error = nfs4_set_client(server, hostname, sap, salen, buf,
 				clp->cl_proto, clnt->cl_timeout,
 				clp->cl_minorversion, net);
+	clear_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status);
 	nfs_put_client(clp);
 	if (error != 0) {
 		nfs_server_insert_lists(server);
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index b34de036501b..83f8e60bb1e3 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -352,11 +352,17 @@ int nfs41_discover_server_trunking(struct nfs_client *clp,
 	if (clp != *result)
 		return 0;
 
-	/* Purge state if the client id was established in a prior instance */
-	if (clp->cl_exchange_flags & EXCHGID4_FLAG_CONFIRMED_R)
-		set_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state);
-	else
-		set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
+	/*
+	 * Purge state if the client id was established in a prior
+	 * instance and the client id could not have arrived on the
+	 * server via Transparent State Migration.
+	 */
+	if (clp->cl_exchange_flags & EXCHGID4_FLAG_CONFIRMED_R) {
+		if (!test_bit(NFS_CS_TSM_POSSIBLE, &clp->cl_flags))
+			set_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state);
+		else
+			set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
+	}
 	nfs4_schedule_state_manager(clp);
 	status = nfs_wait_client_init_complete(clp);
 	if (status < 0)
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index e418a1096662..74c44665e6d3 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -42,6 +42,7 @@ struct nfs_client {
 #define NFS_CS_MIGRATION	2		/* - transparent state migr */
 #define NFS_CS_INFINITE_SLOTS	3		/* - don't limit TCP slots */
 #define NFS_CS_NO_RETRANS_TIMEOUT	4	/* - Disable retransmit timeouts */
+#define NFS_CS_TSM_POSSIBLE	5		/* - Maybe state migration */
 	struct sockaddr_storage	cl_addr;	/* server identifier */
 	size_t			cl_addrlen;
 	char *			cl_hostname;	/* hostname of server */
@@ -210,6 +211,7 @@ struct nfs_server {
 	unsigned long		mig_status;
 #define NFS_MIG_IN_TRANSITION		(1)
 #define NFS_MIG_FAILED			(2)
+#define NFS_MIG_TSM_POSSIBLE		(3)
 
 	void (*destroy)(struct nfs_server *);
 
-- 
cgit v1.2.3


From f174ff7a0ab6a097455a94abfc99517940041c07 Mon Sep 17 00:00:00 2001
From: Peng Tao <tao.peng@primarydata.com>
Date: Thu, 29 Jun 2017 06:34:51 -0700
Subject: nfs: add a nfs_ilookup helper

This helper will allow to find an existing NFS inode by the file handle
and fattr.

Signed-off-by: Peng Tao <tao.peng@primarydata.com>
[hch: split from a larger patch]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/inode.c         | 22 ++++++++++++++++++++++
 include/linux/nfs_fs.h |  1 +
 2 files changed, 23 insertions(+)

(limited to 'include/linux')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 7e7a894601b9..109279d6d91b 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -386,6 +386,28 @@ void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr,
 #endif
 EXPORT_SYMBOL_GPL(nfs_setsecurity);
 
+/* Search for inode identified by fh, fileid and i_mode in inode cache. */
+struct inode *
+nfs_ilookup(struct super_block *sb, struct nfs_fattr *fattr, struct nfs_fh *fh)
+{
+	struct nfs_find_desc desc = {
+		.fh	= fh,
+		.fattr	= fattr,
+	};
+	struct inode *inode;
+	unsigned long hash;
+
+	if (!(fattr->valid & NFS_ATTR_FATTR_FILEID) ||
+	    !(fattr->valid & NFS_ATTR_FATTR_TYPE))
+		return NULL;
+
+	hash = nfs_fattr_to_ino_t(fattr);
+	inode = ilookup5(sb, hash, nfs_find_actor, &desc);
+
+	dprintk("%s: returning %p\n", __func__, inode);
+	return inode;
+}
+
 /*
  * This is our front-end to iget that looks up inodes by file handle
  * instead of inode number.
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index bb0eb2c9acca..e52cc55ac300 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -332,6 +332,7 @@ extern void nfs_zap_caches(struct inode *);
 extern void nfs_invalidate_atime(struct inode *);
 extern struct inode *nfs_fhget(struct super_block *, struct nfs_fh *,
 				struct nfs_fattr *, struct nfs4_label *);
+struct inode *nfs_ilookup(struct super_block *sb, struct nfs_fattr *, struct nfs_fh *);
 extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *);
 extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr);
 extern int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr);
-- 
cgit v1.2.3


From 5b5faaf6df73412af0278997db36dbcb51011d9d Mon Sep 17 00:00:00 2001
From: Jeff Layton <jeff.layton@primarydata.com>
Date: Thu, 29 Jun 2017 06:34:52 -0700
Subject: nfs4: add NFSv4 LOOKUPP handlers

This will be needed in order to implement the get_parent export op
for nfsd.

Signed-off-by: Jeff Layton <jeff.layton@primarydata.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs4proc.c       | 49 ++++++++++++++++++++++++++++++++
 fs/nfs/nfs4trace.h      | 29 +++++++++++++++++++
 fs/nfs/nfs4xdr.c        | 75 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/nfs4.h    |  1 +
 include/linux/nfs_xdr.h | 17 ++++++++++-
 5 files changed, 170 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 01dcd4c8dc4f..e1a26c653e78 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3805,6 +3805,54 @@ nfs4_proc_lookup_mountpoint(struct inode *dir, const struct qstr *name,
 	return (client == NFS_CLIENT(dir)) ? rpc_clone_client(client) : client;
 }
 
+static int _nfs4_proc_lookupp(struct inode *inode,
+		struct nfs_fh *fhandle, struct nfs_fattr *fattr,
+		struct nfs4_label *label)
+{
+	struct rpc_clnt *clnt = NFS_CLIENT(inode);
+	struct nfs_server *server = NFS_SERVER(inode);
+	int		       status;
+	struct nfs4_lookupp_arg args = {
+		.bitmask = server->attr_bitmask,
+		.fh = NFS_FH(inode),
+	};
+	struct nfs4_lookupp_res res = {
+		.server = server,
+		.fattr = fattr,
+		.label = label,
+		.fh = fhandle,
+	};
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUPP],
+		.rpc_argp = &args,
+		.rpc_resp = &res,
+	};
+
+	args.bitmask = nfs4_bitmask(server, label);
+
+	nfs_fattr_init(fattr);
+
+	dprintk("NFS call  lookupp ino=0x%lx\n", inode->i_ino);
+	status = nfs4_call_sync(clnt, server, &msg, &args.seq_args,
+				&res.seq_res, 0);
+	dprintk("NFS reply lookupp: %d\n", status);
+	return status;
+}
+
+static int nfs4_proc_lookupp(struct inode *inode, struct nfs_fh *fhandle,
+			     struct nfs_fattr *fattr, struct nfs4_label *label)
+{
+	struct nfs4_exception exception = { };
+	int err;
+	do {
+		err = _nfs4_proc_lookupp(inode, fhandle, fattr, label);
+		trace_nfs4_lookupp(inode, err);
+		err = nfs4_handle_exception(NFS_SERVER(inode), err,
+				&exception);
+	} while (exception.retry);
+	return err;
+}
+
 static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry)
 {
 	struct nfs_server *server = NFS_SERVER(inode);
@@ -9314,6 +9362,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
 	.getattr	= nfs4_proc_getattr,
 	.setattr	= nfs4_proc_setattr,
 	.lookup		= nfs4_proc_lookup,
+	.lookupp	= nfs4_proc_lookupp,
 	.access		= nfs4_proc_access,
 	.readlink	= nfs4_proc_readlink,
 	.create		= nfs4_proc_create,
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index 845d0eadefc9..be1da19c65d6 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -891,6 +891,35 @@ DEFINE_NFS4_LOOKUP_EVENT(nfs4_remove);
 DEFINE_NFS4_LOOKUP_EVENT(nfs4_get_fs_locations);
 DEFINE_NFS4_LOOKUP_EVENT(nfs4_secinfo);
 
+TRACE_EVENT(nfs4_lookupp,
+		TP_PROTO(
+			const struct inode *inode,
+			int error
+		),
+
+		TP_ARGS(inode, error),
+
+		TP_STRUCT__entry(
+			__field(dev_t, dev)
+			__field(u64, ino)
+			__field(int, error)
+		),
+
+		TP_fast_assign(
+			__entry->dev = inode->i_sb->s_dev;
+			__entry->ino = NFS_FILEID(inode);
+			__entry->error = error;
+		),
+
+		TP_printk(
+			"error=%d (%s) inode=%02x:%02x:%llu",
+			__entry->error,
+			show_nfsv4_errors(__entry->error),
+			MAJOR(__entry->dev), MINOR(__entry->dev),
+			(unsigned long long)__entry->ino
+		)
+);
+
 TRACE_EVENT(nfs4_rename,
 		TP_PROTO(
 			const struct inode *olddir,
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 495d493d3a35..fa3eb361d4f8 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -159,6 +159,8 @@ static int nfs4_stat_to_errno(int);
 				(op_decode_hdr_maxsz)
 #define encode_lookup_maxsz	(op_encode_hdr_maxsz + nfs4_name_maxsz)
 #define decode_lookup_maxsz	(op_decode_hdr_maxsz)
+#define encode_lookupp_maxsz	(op_encode_hdr_maxsz)
+#define decode_lookupp_maxsz	(op_decode_hdr_maxsz)
 #define encode_share_access_maxsz \
 				(2)
 #define encode_createmode_maxsz	(1 + encode_attrs_maxsz + encode_verifier_maxsz)
@@ -618,6 +620,18 @@ static int nfs4_stat_to_errno(int);
 				decode_lookup_maxsz + \
 				decode_getattr_maxsz + \
 				decode_getfh_maxsz)
+#define NFS4_enc_lookupp_sz	(compound_encode_hdr_maxsz + \
+				encode_sequence_maxsz + \
+				encode_putfh_maxsz + \
+				encode_lookupp_maxsz + \
+				encode_getattr_maxsz + \
+				encode_getfh_maxsz)
+#define NFS4_dec_lookupp_sz	(compound_decode_hdr_maxsz + \
+				decode_sequence_maxsz + \
+				decode_putfh_maxsz + \
+				decode_lookupp_maxsz + \
+				decode_getattr_maxsz + \
+				decode_getfh_maxsz)
 #define NFS4_enc_lookup_root_sz (compound_encode_hdr_maxsz + \
 				encode_sequence_maxsz + \
 				encode_putrootfh_maxsz + \
@@ -1368,6 +1382,11 @@ static void encode_lookup(struct xdr_stream *xdr, const struct qstr *name, struc
 	encode_string(xdr, name->len, name->name);
 }
 
+static void encode_lookupp(struct xdr_stream *xdr, struct compound_hdr *hdr)
+{
+	encode_op_hdr(xdr, OP_LOOKUPP, decode_lookupp_maxsz, hdr);
+}
+
 static void encode_share_access(struct xdr_stream *xdr, u32 share_access)
 {
 	__be32 *p;
@@ -2122,6 +2141,26 @@ static void nfs4_xdr_enc_lookup(struct rpc_rqst *req, struct xdr_stream *xdr,
 	encode_nops(&hdr);
 }
 
+/*
+ * Encode LOOKUPP request
+ */
+static void nfs4_xdr_enc_lookupp(struct rpc_rqst *req, struct xdr_stream *xdr,
+		const void *data)
+{
+	const struct nfs4_lookupp_arg *args = data;
+	struct compound_hdr hdr = {
+		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
+	};
+
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->fh, &hdr);
+	encode_lookupp(xdr, &hdr);
+	encode_getfh(xdr, &hdr);
+	encode_getfattr(xdr, args->bitmask, &hdr);
+	encode_nops(&hdr);
+}
+
 /*
  * Encode LOOKUP_ROOT request
  */
@@ -5058,6 +5097,11 @@ static int decode_lookup(struct xdr_stream *xdr)
 	return decode_op_hdr(xdr, OP_LOOKUP);
 }
 
+static int decode_lookupp(struct xdr_stream *xdr)
+{
+	return decode_op_hdr(xdr, OP_LOOKUPP);
+}
+
 /* This is too sick! */
 static int decode_space_limit(struct xdr_stream *xdr,
 		unsigned long *pagemod_limit)
@@ -6237,6 +6281,36 @@ out:
 	return status;
 }
 
+/*
+ * Decode LOOKUPP response
+ */
+static int nfs4_xdr_dec_lookupp(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+		void *data)
+{
+	struct nfs4_lookupp_res *res = data;
+	struct compound_hdr hdr;
+	int status;
+
+	status = decode_compound_hdr(xdr, &hdr);
+	if (status)
+		goto out;
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
+	if (status)
+		goto out;
+	status = decode_putfh(xdr);
+	if (status)
+		goto out;
+	status = decode_lookupp(xdr);
+	if (status)
+		goto out;
+	status = decode_getfh(xdr, res->fh);
+	if (status)
+		goto out;
+	status = decode_getfattr_label(xdr, res->fattr, res->label, res->server);
+out:
+	return status;
+}
+
 /*
  * Decode LOOKUP_ROOT response
  */
@@ -7614,6 +7688,7 @@ const struct rpc_procinfo nfs4_procedures[] = {
 	PROC(ACCESS,		enc_access,		dec_access),
 	PROC(GETATTR,		enc_getattr,		dec_getattr),
 	PROC(LOOKUP,		enc_lookup,		dec_lookup),
+	PROC(LOOKUPP,		enc_lookupp,		dec_lookupp),
 	PROC(LOOKUP_ROOT,	enc_lookup_root,	dec_lookup_root),
 	PROC(REMOVE,		enc_remove,		dec_remove),
 	PROC(RENAME,		enc_rename,		dec_rename),
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 1b1ca04820a3..47239c336688 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -479,6 +479,7 @@ enum {
 	NFSPROC4_CLNT_ACCESS,
 	NFSPROC4_CLNT_GETATTR,
 	NFSPROC4_CLNT_LOOKUP,
+	NFSPROC4_CLNT_LOOKUPP,
 	NFSPROC4_CLNT_LOOKUP_ROOT,
 	NFSPROC4_CLNT_REMOVE,
 	NFSPROC4_CLNT_RENAME,
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 89093341f076..ca3bcc4ed4e5 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1012,7 +1012,6 @@ struct nfs4_link_res {
 	struct nfs_fattr *		dir_attr;
 };
 
-
 struct nfs4_lookup_arg {
 	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh *		dir_fh;
@@ -1028,6 +1027,20 @@ struct nfs4_lookup_res {
 	struct nfs4_label		*label;
 };
 
+struct nfs4_lookupp_arg {
+	struct nfs4_sequence_args	seq_args;
+	const struct nfs_fh		*fh;
+	const u32			*bitmask;
+};
+
+struct nfs4_lookupp_res {
+	struct nfs4_sequence_res	seq_res;
+	const struct nfs_server		*server;
+	struct nfs_fattr		*fattr;
+	struct nfs_fh			*fh;
+	struct nfs4_label		*label;
+};
+
 struct nfs4_lookup_root_arg {
 	struct nfs4_sequence_args	seq_args;
 	const u32 *			bitmask;
@@ -1569,6 +1582,8 @@ struct nfs_rpc_ops {
 	int	(*lookup)  (struct inode *, const struct qstr *,
 			    struct nfs_fh *, struct nfs_fattr *,
 			    struct nfs4_label *);
+	int	(*lookupp) (struct inode *, struct nfs_fh *,
+			    struct nfs_fattr *, struct nfs4_label *);
 	int	(*access)  (struct inode *, struct nfs_access_entry *);
 	int	(*readlink)(struct inode *, struct page *, unsigned int,
 			    unsigned int);
-- 
cgit v1.2.3


From 301bfa483016d48b7fb9cbad87c0a04a15c25b90 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Tue, 11 Jul 2017 17:53:48 -0400
Subject: NFS: Don't run wake_up_bit() when nobody is waiting...

"perf lock" shows fairly heavy contention for the bit waitqueue locks
when doing an I/O heavy workload.
Use a bit to tell whether or not there has been contention for a lock
so that we can optimise away the bit waitqueue options in those cases.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/pagelist.c        | 17 ++++++++++++++++-
 include/linux/nfs_page.h |  2 ++
 2 files changed, 18 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 8a23e2b40b04..de9066a92c0d 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -155,9 +155,12 @@ nfs_page_group_lock(struct nfs_page *req, bool nonblock)
 	if (!test_and_set_bit(PG_HEADLOCK, &head->wb_flags))
 		return 0;
 
-	if (!nonblock)
+	if (!nonblock) {
+		set_bit(PG_CONTENDED1, &head->wb_flags);
+		smp_mb__after_atomic();
 		return wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK,
 				TASK_UNINTERRUPTIBLE);
+	}
 
 	return -EAGAIN;
 }
@@ -175,6 +178,10 @@ nfs_page_group_lock_wait(struct nfs_page *req)
 
 	WARN_ON_ONCE(head != head->wb_head);
 
+	if (!test_bit(PG_HEADLOCK, &head->wb_flags))
+		return;
+	set_bit(PG_CONTENDED1, &head->wb_flags);
+	smp_mb__after_atomic();
 	wait_on_bit(&head->wb_flags, PG_HEADLOCK,
 		TASK_UNINTERRUPTIBLE);
 }
@@ -193,6 +200,8 @@ nfs_page_group_unlock(struct nfs_page *req)
 	smp_mb__before_atomic();
 	clear_bit(PG_HEADLOCK, &head->wb_flags);
 	smp_mb__after_atomic();
+	if (!test_bit(PG_CONTENDED1, &head->wb_flags))
+		return;
 	wake_up_bit(&head->wb_flags, PG_HEADLOCK);
 }
 
@@ -383,6 +392,8 @@ void nfs_unlock_request(struct nfs_page *req)
 	smp_mb__before_atomic();
 	clear_bit(PG_BUSY, &req->wb_flags);
 	smp_mb__after_atomic();
+	if (!test_bit(PG_CONTENDED2, &req->wb_flags))
+		return;
 	wake_up_bit(&req->wb_flags, PG_BUSY);
 }
 
@@ -465,6 +476,10 @@ void nfs_release_request(struct nfs_page *req)
 int
 nfs_wait_on_request(struct nfs_page *req)
 {
+	if (!test_bit(PG_BUSY, &req->wb_flags))
+		return 0;
+	set_bit(PG_CONTENDED2, &req->wb_flags);
+	smp_mb__after_atomic();
 	return wait_on_bit_io(&req->wb_flags, PG_BUSY,
 			      TASK_UNINTERRUPTIBLE);
 }
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index abbee2d15dce..d67b67ae6c8b 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -33,6 +33,8 @@ enum {
 	PG_UPTODATE,		/* page group sync bit in read path */
 	PG_WB_END,		/* page group sync bit in write path */
 	PG_REMOVE,		/* page group sync bit in write path */
+	PG_CONTENDED1,		/* Is someone waiting for a lock? */
+	PG_CONTENDED2,		/* Is someone waiting for a lock? */
 };
 
 struct nfs_inode;
-- 
cgit v1.2.3


From b4f937cffa66b3d56eb8f586e620d0b223a281a3 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Tue, 11 Jul 2017 17:53:48 -0400
Subject: NFS: Don't run wake_up_bit() when nobody is waiting...

"perf lock" shows fairly heavy contention for the bit waitqueue locks
when doing an I/O heavy workload.
Use a bit to tell whether or not there has been contention for a lock
so that we can optimise away the bit waitqueue options in those cases.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/pagelist.c        | 17 ++++++++++++++++-
 include/linux/nfs_page.h |  2 ++
 2 files changed, 18 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 8a23e2b40b04..de9066a92c0d 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -155,9 +155,12 @@ nfs_page_group_lock(struct nfs_page *req, bool nonblock)
 	if (!test_and_set_bit(PG_HEADLOCK, &head->wb_flags))
 		return 0;
 
-	if (!nonblock)
+	if (!nonblock) {
+		set_bit(PG_CONTENDED1, &head->wb_flags);
+		smp_mb__after_atomic();
 		return wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK,
 				TASK_UNINTERRUPTIBLE);
+	}
 
 	return -EAGAIN;
 }
@@ -175,6 +178,10 @@ nfs_page_group_lock_wait(struct nfs_page *req)
 
 	WARN_ON_ONCE(head != head->wb_head);
 
+	if (!test_bit(PG_HEADLOCK, &head->wb_flags))
+		return;
+	set_bit(PG_CONTENDED1, &head->wb_flags);
+	smp_mb__after_atomic();
 	wait_on_bit(&head->wb_flags, PG_HEADLOCK,
 		TASK_UNINTERRUPTIBLE);
 }
@@ -193,6 +200,8 @@ nfs_page_group_unlock(struct nfs_page *req)
 	smp_mb__before_atomic();
 	clear_bit(PG_HEADLOCK, &head->wb_flags);
 	smp_mb__after_atomic();
+	if (!test_bit(PG_CONTENDED1, &head->wb_flags))
+		return;
 	wake_up_bit(&head->wb_flags, PG_HEADLOCK);
 }
 
@@ -383,6 +392,8 @@ void nfs_unlock_request(struct nfs_page *req)
 	smp_mb__before_atomic();
 	clear_bit(PG_BUSY, &req->wb_flags);
 	smp_mb__after_atomic();
+	if (!test_bit(PG_CONTENDED2, &req->wb_flags))
+		return;
 	wake_up_bit(&req->wb_flags, PG_BUSY);
 }
 
@@ -465,6 +476,10 @@ void nfs_release_request(struct nfs_page *req)
 int
 nfs_wait_on_request(struct nfs_page *req)
 {
+	if (!test_bit(PG_BUSY, &req->wb_flags))
+		return 0;
+	set_bit(PG_CONTENDED2, &req->wb_flags);
+	smp_mb__after_atomic();
 	return wait_on_bit_io(&req->wb_flags, PG_BUSY,
 			      TASK_UNINTERRUPTIBLE);
 }
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index abbee2d15dce..d67b67ae6c8b 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -33,6 +33,8 @@ enum {
 	PG_UPTODATE,		/* page group sync bit in read path */
 	PG_WB_END,		/* page group sync bit in write path */
 	PG_REMOVE,		/* page group sync bit in write path */
+	PG_CONTENDED1,		/* Is someone waiting for a lock? */
+	PG_CONTENDED2,		/* Is someone waiting for a lock? */
 };
 
 struct nfs_inode;
-- 
cgit v1.2.3


From 2b02c20ce0c28974b44e69a2e2f5ddc6a470ad6f Mon Sep 17 00:00:00 2001
From: Enrico Mioso <mrkiko.rs@gmail.com>
Date: Tue, 11 Jul 2017 17:21:52 +0200
Subject: cdc_ncm: Set NTB format again after altsetting switch for Huawei
 devices
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some firmwares in Huawei E3372H devices have been observed to switch back
to NTB 32-bit format after altsetting switch.
This patch implements a driver flag to check for the device settings and
set NTB format to 16-bit again if needed.
The flag has been activated for devices controlled by the huawei_cdc_ncm.c
driver.

V1->V2:
- fixed broken error checks
- some corrections to the commit message
V2->V3:
- variable name changes, to clarify what's happening
- check (and possibly set) the NTB format later in the common bind code path

Signed-off-by: Enrico Mioso <mrkiko.rs@gmail.com>
Reported-and-tested-by: Christian Panton <christian@panton.org>
Reviewed-by: Bjørn Mork <bjorn@mork.no>
CC: Bjørn Mork <bjorn@mork.no>
CC: Christian Panton <christian@panton.org>
CC: linux-usb@vger.kernel.org
CC: netdev@vger.kernel.org
CC: Oliver Neukum <oliver@neukum.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_ncm.c        | 28 ++++++++++++++++++++++++++++
 drivers/net/usb/huawei_cdc_ncm.c |  6 ++++++
 include/linux/usb/cdc_ncm.h      |  1 +
 3 files changed, 35 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index d103a1d4fb36..8f572b9f3625 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -768,8 +768,10 @@ int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_
 	u8 *buf;
 	int len;
 	int temp;
+	int err;
 	u8 iface_no;
 	struct usb_cdc_parsed_header hdr;
+	u16 curr_ntb_format;
 
 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
 	if (!ctx)
@@ -874,6 +876,32 @@ int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_
 		goto error2;
 	}
 
+	/*
+	 * Some Huawei devices have been observed to come out of reset in NDP32 mode.
+	 * Let's check if this is the case, and set the device to NDP16 mode again if
+	 * needed.
+	*/
+	if (ctx->drvflags & CDC_NCM_FLAG_RESET_NTB16) {
+		err = usbnet_read_cmd(dev, USB_CDC_GET_NTB_FORMAT,
+				      USB_TYPE_CLASS | USB_DIR_IN | USB_RECIP_INTERFACE,
+				      0, iface_no, &curr_ntb_format, 2);
+		if (err < 0) {
+			goto error2;
+		}
+
+		if (curr_ntb_format == USB_CDC_NCM_NTB32_FORMAT) {
+			dev_info(&intf->dev, "resetting NTB format to 16-bit");
+			err = usbnet_write_cmd(dev, USB_CDC_SET_NTB_FORMAT,
+					       USB_TYPE_CLASS | USB_DIR_OUT
+					       | USB_RECIP_INTERFACE,
+					       USB_CDC_NCM_NTB16_FORMAT,
+					       iface_no, NULL, 0);
+
+			if (err < 0)
+				goto error2;
+		}
+	}
+
 	cdc_ncm_find_endpoints(dev, ctx->data);
 	cdc_ncm_find_endpoints(dev, ctx->control);
 	if (!dev->in || !dev->out || !dev->status) {
diff --git a/drivers/net/usb/huawei_cdc_ncm.c b/drivers/net/usb/huawei_cdc_ncm.c
index 2680a65cd5e4..63f28908afda 100644
--- a/drivers/net/usb/huawei_cdc_ncm.c
+++ b/drivers/net/usb/huawei_cdc_ncm.c
@@ -80,6 +80,12 @@ static int huawei_cdc_ncm_bind(struct usbnet *usbnet_dev,
 	 * be at the end of the frame.
 	 */
 	drvflags |= CDC_NCM_FLAG_NDP_TO_END;
+
+	/* Additionally, it has been reported that some Huawei E3372H devices, with
+	 * firmware version 21.318.01.00.541, come out of reset in NTB32 format mode, hence
+	 * needing to be set to the NTB16 one again.
+	 */
+	drvflags |= CDC_NCM_FLAG_RESET_NTB16;
 	ret = cdc_ncm_bind_common(usbnet_dev, intf, 1, drvflags);
 	if (ret)
 		goto err;
diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h
index 021f7a88f52c..1a59699cf82a 100644
--- a/include/linux/usb/cdc_ncm.h
+++ b/include/linux/usb/cdc_ncm.h
@@ -83,6 +83,7 @@
 /* Driver flags */
 #define CDC_NCM_FLAG_NDP_TO_END			0x02	/* NDP is placed at end of frame */
 #define CDC_MBIM_FLAG_AVOID_ALTSETTING_TOGGLE	0x04	/* Avoid altsetting toggle during init */
+#define CDC_NCM_FLAG_RESET_NTB16 0x08	/* set NDP16 one more time after altsetting switch */
 
 #define cdc_ncm_comm_intf_is_mbim(x)  ((x)->desc.bInterfaceSubClass == USB_CDC_SUBCLASS_MBIM && \
 				       (x)->desc.bInterfaceProtocol == USB_CDC_PROTO_NONE)
-- 
cgit v1.2.3


From 76250f2b743b72cb685cc51ac0cdabb32957180b Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 14 Feb 2017 12:40:01 +0000
Subject: dma-buf/fence: Avoid use of uninitialised timestamp

[  236.821534] WARNING: kmemcheck: Caught 64-bit read from uninitialized memory (ffff8802538683d0)
[  236.828642] 420000001e7f0000000000000000000000080000000000000000000000000000
[  236.839543]  i i i i u u u u i i i i i i i i u u u u u u u u u u u u u u u u
[  236.850420]                                  ^
[  236.854123] RIP: 0010:[<ffffffff81396f07>]  [<ffffffff81396f07>] fence_signal+0x17/0xd0
[  236.861313] RSP: 0018:ffff88024acd7ba0  EFLAGS: 00010282
[  236.865027] RAX: ffffffff812f6a90 RBX: ffff8802527ca800 RCX: ffff880252cb30e0
[  236.868801] RDX: ffff88024ac5d918 RSI: ffff880252f780e0 RDI: ffff880253868380
[  236.872579] RBP: ffff88024acd7bc0 R08: ffff88024acd7be0 R09: 0000000000000000
[  236.876407] R10: 0000000000000000 R11: 0000000000000000 R12: ffff880253868380
[  236.880185] R13: ffff8802538684d0 R14: ffff880253868380 R15: ffff88024cd48e00
[  236.883983] FS:  00007f1646d1a740(0000) GS:ffff88025d000000(0000) knlGS:0000000000000000
[  236.890959] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  236.894702] CR2: ffff880251360318 CR3: 000000024ad21000 CR4: 00000000001406f0
[  236.898481]  [<ffffffff8130d1ad>] i915_gem_request_retire+0x1cd/0x230
[  236.902439]  [<ffffffff8130e2b3>] i915_gem_request_alloc+0xa3/0x2f0
[  236.906435]  [<ffffffff812fb1bd>] i915_gem_do_execbuffer.isra.41+0xb6d/0x18b0
[  236.910434]  [<ffffffff812fc265>] i915_gem_execbuffer2+0x95/0x1e0
[  236.914390]  [<ffffffff812ad625>] drm_ioctl+0x1e5/0x460
[  236.918275]  [<ffffffff8110d4cf>] do_vfs_ioctl+0x8f/0x5c0
[  236.922168]  [<ffffffff8110da3c>] SyS_ioctl+0x3c/0x70
[  236.926090]  [<ffffffff814b7a5f>] entry_SYSCALL_64_fastpath+0x17/0x93
[  236.930045]  [<ffffffffffffffff>] 0xffffffffffffffff

We only set the timestamp before we mark the fence as signaled. It is
done before to avoid observers having a window in which they may see the
fence as complete but no timestamp. Having it does incur a potential for
the timestamp to be written twice, and even for it to be corrupted if
the u64 write is not atomic. Instead use a new bit to record the
presence of the timestamp, and teach the readers to wait until it is set
if the fence is complete. There still remains a race where the timestamp
for the signaled fence may be shown before the fence is reported as
signaled, but that's a pre-existing error.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Sumit Semwal <sumit.semwal@linaro.org>
Cc: Gustavo Padovan <gustavo@padovan.org>
Cc: Daniel Vetter <daniel.vetter@intel.com>
Reported-by: Rafael Antognolli <rafael.antognolli@intel.com>
Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170214124001.1930-1-chris@chris-wilson.co.uk
---
 drivers/dma-buf/dma-fence.c  | 17 ++++++-----------
 drivers/dma-buf/sync_debug.c |  2 +-
 drivers/dma-buf/sync_file.c  |  8 +++++++-
 include/linux/dma-fence.h    |  2 ++
 4 files changed, 16 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
index 0918d3f003d6..13556fdda2a5 100644
--- a/drivers/dma-buf/dma-fence.c
+++ b/drivers/dma-buf/dma-fence.c
@@ -75,11 +75,6 @@ int dma_fence_signal_locked(struct dma_fence *fence)
 	if (WARN_ON(!fence))
 		return -EINVAL;
 
-	if (!ktime_to_ns(fence->timestamp)) {
-		fence->timestamp = ktime_get();
-		smp_mb__before_atomic();
-	}
-
 	if (test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
 		ret = -EINVAL;
 
@@ -87,8 +82,11 @@ int dma_fence_signal_locked(struct dma_fence *fence)
 		 * we might have raced with the unlocked dma_fence_signal,
 		 * still run through all callbacks
 		 */
-	} else
+	} else {
+		fence->timestamp = ktime_get();
+		set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags);
 		trace_dma_fence_signaled(fence);
+	}
 
 	list_for_each_entry_safe(cur, tmp, &fence->cb_list, node) {
 		list_del_init(&cur->node);
@@ -115,14 +113,11 @@ int dma_fence_signal(struct dma_fence *fence)
 	if (!fence)
 		return -EINVAL;
 
-	if (!ktime_to_ns(fence->timestamp)) {
-		fence->timestamp = ktime_get();
-		smp_mb__before_atomic();
-	}
-
 	if (test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
 		return -EINVAL;
 
+	fence->timestamp = ktime_get();
+	set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags);
 	trace_dma_fence_signaled(fence);
 
 	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &fence->flags)) {
diff --git a/drivers/dma-buf/sync_debug.c b/drivers/dma-buf/sync_debug.c
index c769dc653b34..bfead12390f2 100644
--- a/drivers/dma-buf/sync_debug.c
+++ b/drivers/dma-buf/sync_debug.c
@@ -84,7 +84,7 @@ static void sync_print_fence(struct seq_file *s,
 		   show ? "_" : "",
 		   sync_status_str(status));
 
-	if (status) {
+	if (test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags)) {
 		struct timespec64 ts64 =
 			ktime_to_timespec64(fence->timestamp);
 
diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c
index 2321035f6204..95f259b719fc 100644
--- a/drivers/dma-buf/sync_file.c
+++ b/drivers/dma-buf/sync_file.c
@@ -375,7 +375,13 @@ static void sync_fill_fence_info(struct dma_fence *fence,
 		sizeof(info->driver_name));
 
 	info->status = dma_fence_get_status(fence);
-	info->timestamp_ns = ktime_to_ns(fence->timestamp);
+	while (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags) &&
+	       !test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags))
+		cpu_relax();
+	info->timestamp_ns =
+		test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags) ?
+		ktime_to_ns(fence->timestamp) :
+		ktime_set(0, 0);
 }
 
 static long sync_file_ioctl_fence_info(struct sync_file *sync_file,
diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index a5195a7d6f77..0a186c4f3981 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -55,6 +55,7 @@ struct dma_fence_cb;
  * of the time.
  *
  * DMA_FENCE_FLAG_SIGNALED_BIT - fence is already signaled
+ * DMA_FENCE_FLAG_TIMESTAMP_BIT - timestamp recorded for fence signaling
  * DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT - enable_signaling might have been called
  * DMA_FENCE_FLAG_USER_BITS - start of the unused bits, can be used by the
  * implementer of the fence for its own purposes. Can be used in different
@@ -84,6 +85,7 @@ struct dma_fence {
 
 enum dma_fence_flag_bits {
 	DMA_FENCE_FLAG_SIGNALED_BIT,
+	DMA_FENCE_FLAG_TIMESTAMP_BIT,
 	DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
 	DMA_FENCE_FLAG_USER_BITS, /* must always be last member */
 };
-- 
cgit v1.2.3


From 9049f2f6e7bdfb5de0c63c2635bf3cdb70c4efb5 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Fri, 14 Jul 2017 14:49:52 -0700
Subject: fault-inject: parse as natural 1-based value for fail-nth write
 interface

The value written to fail-nth file is parsed as 0-based.  Parsing as
one-based is more natural to understand and it enables to cancel the
previous setup by simply writing '0'.

This change also converts task->fail_nth from signed to unsigned int.

Link: http://lkml.kernel.org/r/1491490561-10485-3-git-send-email-akinobu.mita@gmail.com
Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/fault-injection/fault-injection.txt | 7 +++----
 fs/proc/base.c                                    | 9 ++++-----
 include/linux/sched.h                             | 2 +-
 3 files changed, 8 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/fault-injection/fault-injection.txt b/Documentation/fault-injection/fault-injection.txt
index 192d8cbcc5f9..a32190508751 100644
--- a/Documentation/fault-injection/fault-injection.txt
+++ b/Documentation/fault-injection/fault-injection.txt
@@ -138,8 +138,8 @@ o proc entries
 
 - /proc/self/task/<current-tid>/fail-nth:
 
-	Write to this file of integer N makes N-th call in the current task fail
-	(N is 0-based). Read from this file returns a single char 'Y' or 'N'
+	Write to this file of integer N makes N-th call in the task fail.
+	Read from this file returns a single char 'Y' or 'N'
 	that says if the fault setup with a previous write to this file was
 	injected or not, and disables the fault if it wasn't yet injected.
 	Note that this file enables all types of faults (slab, futex, etc).
@@ -320,7 +320,7 @@ int main()
 	system("echo N > /sys/kernel/debug/failslab/ignore-gfp-wait");
 	sprintf(buf, "/proc/self/task/%ld/fail-nth", syscall(SYS_gettid));
 	fail_nth = open(buf, O_RDWR);
-	for (i = 0;; i++) {
+	for (i = 1;; i++) {
 		sprintf(buf, "%d", i);
 		write(fail_nth, buf, strlen(buf));
 		res = socketpair(AF_LOCAL, SOCK_STREAM, 0, fds);
@@ -339,7 +339,6 @@ int main()
 
 An example output:
 
-0-th fault Y: res=-1/23
 1-th fault Y: res=-1/23
 2-th fault Y: res=-1/23
 3-th fault Y: res=-1/12
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 5d93512beea1..c1fdaecb8d23 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1360,7 +1360,8 @@ static ssize_t proc_fail_nth_write(struct file *file, const char __user *buf,
 				   size_t count, loff_t *ppos)
 {
 	struct task_struct *task;
-	int err, n;
+	int err;
+	unsigned int n;
 
 	task = get_proc_task(file_inode(file));
 	if (!task)
@@ -1368,12 +1369,10 @@ static ssize_t proc_fail_nth_write(struct file *file, const char __user *buf,
 	put_task_struct(task);
 	if (task != current)
 		return -EPERM;
-	err = kstrtoint_from_user(buf, count, 0, &n);
+	err = kstrtouint_from_user(buf, count, 0, &n);
 	if (err)
 		return err;
-	if (n < 0 || n == INT_MAX)
-		return -EINVAL;
-	current->fail_nth = n + 1;
+	current->fail_nth = n;
 	return count;
 }
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3822d749fc9e..2ba9ec93423f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -974,7 +974,7 @@ struct task_struct {
 
 #ifdef CONFIG_FAULT_INJECTION
 	int				make_it_fail;
-	int fail_nth;
+	unsigned int			fail_nth;
 #endif
 	/*
 	 * When (nr_dirtied >= nr_dirtied_pause), it's time to call
-- 
cgit v1.2.3


From 077d2ba519b2e8bf1abd80cbade699b1de42cafe Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Fri, 14 Jul 2017 17:28:12 -0400
Subject: replace incorrect strscpy use in FORTIFY_SOURCE

Using strscpy was wrong because FORTIFY_SOURCE is passing the maximum
possible size of the outermost object, but strscpy defines the count
parameter as the exact buffer size, so this could copy past the end of
the source.  This would still be wrong with the planned usage of
__builtin_object_size(p, 1) for intra-object overflow checks since it's
the maximum possible size of the specified object with no guarantee of
it being that large.

Reuse of the fortified functions like this currently makes the runtime
error reporting less precise but that can be improved later on.

Noticed by Dave Jones and KASAN.

Signed-off-by: Daniel Micay <danielmicay@gmail.com>
Acked-by: Kees Cook <keescook@chromium.org>
Reported-by: Dave Jones <davej@codemonkey.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/string.h | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/string.h b/include/linux/string.h
index 96f5a5fd0377..049866760e8b 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -202,17 +202,6 @@ void __read_overflow2(void) __compiletime_error("detected read beyond size of ob
 void __write_overflow(void) __compiletime_error("detected write beyond size of object passed as 1st parameter");
 
 #if !defined(__NO_FORTIFY) && defined(__OPTIMIZE__) && defined(CONFIG_FORTIFY_SOURCE)
-__FORTIFY_INLINE char *strcpy(char *p, const char *q)
-{
-	size_t p_size = __builtin_object_size(p, 0);
-	size_t q_size = __builtin_object_size(q, 0);
-	if (p_size == (size_t)-1 && q_size == (size_t)-1)
-		return __builtin_strcpy(p, q);
-	if (strscpy(p, q, p_size < q_size ? p_size : q_size) < 0)
-		fortify_panic(__func__);
-	return p;
-}
-
 __FORTIFY_INLINE char *strncpy(char *p, const char *q, __kernel_size_t size)
 {
 	size_t p_size = __builtin_object_size(p, 0);
@@ -391,6 +380,18 @@ __FORTIFY_INLINE void *kmemdup(const void *p, size_t size, gfp_t gfp)
 		fortify_panic(__func__);
 	return __real_kmemdup(p, size, gfp);
 }
+
+/* defined after fortified strlen and memcpy to reuse them */
+__FORTIFY_INLINE char *strcpy(char *p, const char *q)
+{
+	size_t p_size = __builtin_object_size(p, 0);
+	size_t q_size = __builtin_object_size(q, 0);
+	if (p_size == (size_t)-1 && q_size == (size_t)-1)
+		return __builtin_strcpy(p, q);
+	memcpy(p, q, strlen(q) + 1);
+	return p;
+}
+
 #endif
 
 #endif /* _LINUX_STRING_H_ */
-- 
cgit v1.2.3


From e67ae2b7b23b283e657865b498b151e6a17b919d Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 10 Jul 2017 13:17:26 +0200
Subject: libceph: fix old style declaration warnings

The new macros don't follow the usual style for declarations,
which we get a warning for with 'make W=1':

In file included from fs/ceph/mds_client.c:16:0:
include/linux/ceph/ceph_features.h:74:1: error: 'static' is not at beginning of declaration [-Werror=old-style-declaration]

This moves the 'static' keyword to the front of the
declaration.

Fixes: f179d3ba8cb9 ("libceph: new features macros")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/ceph_features.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index f0f6c537b64c..040dd105c3e7 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h
@@ -10,14 +10,14 @@
 #define CEPH_FEATURE_INCARNATION_2 (1ull<<57) // CEPH_FEATURE_SERVER_JEWEL
 
 #define DEFINE_CEPH_FEATURE(bit, incarnation, name)			\
-	const static uint64_t CEPH_FEATURE_##name = (1ULL<<bit);		\
-	const static uint64_t CEPH_FEATUREMASK_##name =			\
+	static const uint64_t CEPH_FEATURE_##name = (1ULL<<bit);		\
+	static const uint64_t CEPH_FEATUREMASK_##name =			\
 		(1ULL<<bit | CEPH_FEATURE_INCARNATION_##incarnation);
 
 /* this bit is ignored but still advertised by release *when* */
 #define DEFINE_CEPH_FEATURE_DEPRECATED(bit, incarnation, name, when) \
-	const static uint64_t DEPRECATED_CEPH_FEATURE_##name = (1ULL<<bit); \
-	const static uint64_t DEPRECATED_CEPH_FEATUREMASK_##name =		\
+	static const uint64_t DEPRECATED_CEPH_FEATURE_##name = (1ULL<<bit); \
+	static const uint64_t DEPRECATED_CEPH_FEATUREMASK_##name =		\
 		(1ULL<<bit | CEPH_FEATURE_INCARNATION_##incarnation);
 
 /*
-- 
cgit v1.2.3


From cf56c2f892a8a1870a8358114ad896772da7543a Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 6 Jul 2017 23:17:44 +0200
Subject: netfilter: remove old pre-netns era hook api

no more users in the tree, remove this.

The old api is racy wrt. module removal, all users have been converted
to the netns-aware api.

The old api pretended we still have global hooks but that has not been
true for a long time.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter.h |   9 ---
 net/netfilter/core.c      | 143 ----------------------------------------------
 2 files changed, 152 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index a4b97be30b28..22f081065d49 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -61,8 +61,6 @@ typedef unsigned int nf_hookfn(void *priv,
 			       struct sk_buff *skb,
 			       const struct nf_hook_state *state);
 struct nf_hook_ops {
-	struct list_head	list;
-
 	/* User fills in from here down. */
 	nf_hookfn		*hook;
 	struct net_device	*dev;
@@ -160,13 +158,6 @@ int nf_register_net_hooks(struct net *net, const struct nf_hook_ops *reg,
 void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg,
 			     unsigned int n);
 
-int nf_register_hook(struct nf_hook_ops *reg);
-void nf_unregister_hook(struct nf_hook_ops *reg);
-int nf_register_hooks(struct nf_hook_ops *reg, unsigned int n);
-void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n);
-int _nf_register_hooks(struct nf_hook_ops *reg, unsigned int n);
-void _nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n);
-
 /* Functions to register get/setsockopt ranges (non-inclusive).  You
    need to check permissions yourself! */
 int nf_register_sockopt(struct nf_sockopt_ops *reg);
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 552d606e57ca..368610dbc3c0 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -227,114 +227,6 @@ void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg,
 }
 EXPORT_SYMBOL(nf_unregister_net_hooks);
 
-static LIST_HEAD(nf_hook_list);
-
-static int _nf_register_hook(struct nf_hook_ops *reg)
-{
-	struct net *net, *last;
-	int ret;
-
-	for_each_net(net) {
-		ret = nf_register_net_hook(net, reg);
-		if (ret && ret != -ENOENT)
-			goto rollback;
-	}
-	list_add_tail(&reg->list, &nf_hook_list);
-
-	return 0;
-rollback:
-	last = net;
-	for_each_net(net) {
-		if (net == last)
-			break;
-		nf_unregister_net_hook(net, reg);
-	}
-	return ret;
-}
-
-int nf_register_hook(struct nf_hook_ops *reg)
-{
-	int ret;
-
-	rtnl_lock();
-	ret = _nf_register_hook(reg);
-	rtnl_unlock();
-
-	return ret;
-}
-EXPORT_SYMBOL(nf_register_hook);
-
-static void _nf_unregister_hook(struct nf_hook_ops *reg)
-{
-	struct net *net;
-
-	list_del(&reg->list);
-	for_each_net(net)
-		nf_unregister_net_hook(net, reg);
-}
-
-void nf_unregister_hook(struct nf_hook_ops *reg)
-{
-	rtnl_lock();
-	_nf_unregister_hook(reg);
-	rtnl_unlock();
-}
-EXPORT_SYMBOL(nf_unregister_hook);
-
-int nf_register_hooks(struct nf_hook_ops *reg, unsigned int n)
-{
-	unsigned int i;
-	int err = 0;
-
-	for (i = 0; i < n; i++) {
-		err = nf_register_hook(&reg[i]);
-		if (err)
-			goto err;
-	}
-	return err;
-
-err:
-	if (i > 0)
-		nf_unregister_hooks(reg, i);
-	return err;
-}
-EXPORT_SYMBOL(nf_register_hooks);
-
-/* Caller MUST take rtnl_lock() */
-int _nf_register_hooks(struct nf_hook_ops *reg, unsigned int n)
-{
-	unsigned int i;
-	int err = 0;
-
-	for (i = 0; i < n; i++) {
-		err = _nf_register_hook(&reg[i]);
-		if (err)
-			goto err;
-	}
-	return err;
-
-err:
-	if (i > 0)
-		_nf_unregister_hooks(reg, i);
-	return err;
-}
-EXPORT_SYMBOL(_nf_register_hooks);
-
-void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n)
-{
-	while (n-- > 0)
-		nf_unregister_hook(&reg[n]);
-}
-EXPORT_SYMBOL(nf_unregister_hooks);
-
-/* Caller MUST take rtnl_lock */
-void _nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n)
-{
-	while (n-- > 0)
-		_nf_unregister_hook(&reg[n]);
-}
-EXPORT_SYMBOL(_nf_unregister_hooks);
-
 /* Returns 1 if okfn() needs to be executed by the caller,
  * -EPERM for NF_DROP, 0 otherwise.  Caller must hold rcu_read_lock. */
 int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
@@ -450,37 +342,6 @@ void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *);
 EXPORT_SYMBOL(nf_nat_decode_session_hook);
 #endif
 
-static int nf_register_hook_list(struct net *net)
-{
-	struct nf_hook_ops *elem;
-	int ret;
-
-	rtnl_lock();
-	list_for_each_entry(elem, &nf_hook_list, list) {
-		ret = nf_register_net_hook(net, elem);
-		if (ret && ret != -ENOENT)
-			goto out_undo;
-	}
-	rtnl_unlock();
-	return 0;
-
-out_undo:
-	list_for_each_entry_continue_reverse(elem, &nf_hook_list, list)
-		nf_unregister_net_hook(net, elem);
-	rtnl_unlock();
-	return ret;
-}
-
-static void nf_unregister_hook_list(struct net *net)
-{
-	struct nf_hook_ops *elem;
-
-	rtnl_lock();
-	list_for_each_entry(elem, &nf_hook_list, list)
-		nf_unregister_net_hook(net, elem);
-	rtnl_unlock();
-}
-
 static int __net_init netfilter_net_init(struct net *net)
 {
 	int i, h, ret;
@@ -500,16 +361,12 @@ static int __net_init netfilter_net_init(struct net *net)
 		return -ENOMEM;
 	}
 #endif
-	ret = nf_register_hook_list(net);
-	if (ret)
-		remove_proc_entry("netfilter", net->proc_net);
 
 	return ret;
 }
 
 static void __net_exit netfilter_net_exit(struct net *net)
 {
-	nf_unregister_hook_list(net);
 	remove_proc_entry("netfilter", net->proc_net);
 }
 
-- 
cgit v1.2.3


From 13c401f33e19c20431d9888a91d9ea82e5133bd9 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Fri, 14 Jul 2017 23:03:49 -0700
Subject: jhash: fix -Wimplicit-fallthrough warnings

GCC 7 added a new -Wimplicit-fallthrough warning.  It's only enabled
with W=1, but since linux/jhash.h is included in over hundred places
(including other global headers) it seems worthwhile fixing this
warning.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/jhash.h | 29 ++++++++++++++---------------
 1 file changed, 14 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/jhash.h b/include/linux/jhash.h
index 348c6f47e4cc..8037850f3104 100644
--- a/include/linux/jhash.h
+++ b/include/linux/jhash.h
@@ -85,19 +85,18 @@ static inline u32 jhash(const void *key, u32 length, u32 initval)
 		k += 12;
 	}
 	/* Last block: affect all 32 bits of (c) */
-	/* All the case statements fall through */
 	switch (length) {
-	case 12: c += (u32)k[11]<<24;
-	case 11: c += (u32)k[10]<<16;
-	case 10: c += (u32)k[9]<<8;
-	case 9:  c += k[8];
-	case 8:  b += (u32)k[7]<<24;
-	case 7:  b += (u32)k[6]<<16;
-	case 6:  b += (u32)k[5]<<8;
-	case 5:  b += k[4];
-	case 4:  a += (u32)k[3]<<24;
-	case 3:  a += (u32)k[2]<<16;
-	case 2:  a += (u32)k[1]<<8;
+	case 12: c += (u32)k[11]<<24;	/* fall through */
+	case 11: c += (u32)k[10]<<16;	/* fall through */
+	case 10: c += (u32)k[9]<<8;	/* fall through */
+	case 9:  c += k[8];		/* fall through */
+	case 8:  b += (u32)k[7]<<24;	/* fall through */
+	case 7:  b += (u32)k[6]<<16;	/* fall through */
+	case 6:  b += (u32)k[5]<<8;	/* fall through */
+	case 5:  b += k[4];		/* fall through */
+	case 4:  a += (u32)k[3]<<24;	/* fall through */
+	case 3:  a += (u32)k[2]<<16;	/* fall through */
+	case 2:  a += (u32)k[1]<<8;	/* fall through */
 	case 1:  a += k[0];
 		 __jhash_final(a, b, c);
 	case 0: /* Nothing left to add */
@@ -131,10 +130,10 @@ static inline u32 jhash2(const u32 *k, u32 length, u32 initval)
 		k += 3;
 	}
 
-	/* Handle the last 3 u32's: all the case statements fall through */
+	/* Handle the last 3 u32's */
 	switch (length) {
-	case 3: c += k[2];
-	case 2: b += k[1];
+	case 3: c += k[2];	/* fall through */
+	case 2: b += k[1];	/* fall through */
 	case 1: a += k[0];
 		__jhash_final(a, b, c);
 	case 0:	/* Nothing left to add */
-- 
cgit v1.2.3


From df39a9f106d53532443a804352894480ca6ca5fd Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Mon, 17 Jul 2017 11:42:55 -0700
Subject: bpf: check NULL for sk_to_full_sk() return value

When req->rsk_listener is NULL, sk_to_full_sk() returns
NULL too, so we have to check its return value against
NULL here.

Fixes: 40304b2a1567 ("bpf: BPF support for sock_ops")
Reported-by: David Ahern <dsahern@gmail.com>
Tested-by: David Ahern <dsahern@gmail.com>
Cc: Lawrence Brakmo <brakmo@fb.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf-cgroup.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 360c082e885c..d41d40ac3efd 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -85,7 +85,7 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
 	int __ret = 0;							       \
 	if (cgroup_bpf_enabled && (sock_ops)->sk) {	       \
 		typeof(sk) __sk = sk_to_full_sk((sock_ops)->sk);	       \
-		if (sk_fullsock(__sk))					       \
+		if (__sk && sk_fullsock(__sk))				       \
 			__ret = __cgroup_bpf_run_filter_sock_ops(__sk,	       \
 								 sock_ops,     \
 							 BPF_CGROUP_SOCK_OPS); \
-- 
cgit v1.2.3


From 8900b894e769dd88b53e519e3502e0e3c349fe95 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Tue, 23 May 2017 14:38:15 +0300
Subject: {net, IB}/mlx4: Remove gfp flags argument

The caller to the driver marks GFP_NOIO allocations with help
of memalloc_noio-* calls now. This makes redundant to pass down
to the driver gfp flags, which can be GFP_KERNEL only.

The patch removes the gfp flags argument and updates all driver paths.

Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/mlx4/cq.c                    |  6 ++--
 drivers/infiniband/hw/mlx4/mlx4_ib.h               |  1 -
 drivers/infiniband/hw/mlx4/qp.c                    | 40 +++++++++-------------
 drivers/infiniband/hw/mlx4/srq.c                   |  8 ++---
 drivers/net/ethernet/mellanox/mlx4/alloc.c         | 29 ++++++++--------
 drivers/net/ethernet/mellanox/mlx4/cq.c            |  4 +--
 drivers/net/ethernet/mellanox/mlx4/en_rx.c         |  7 ++--
 drivers/net/ethernet/mellanox/mlx4/en_tx.c         |  2 +-
 drivers/net/ethernet/mellanox/mlx4/icm.c           |  7 ++--
 drivers/net/ethernet/mellanox/mlx4/icm.h           |  3 +-
 drivers/net/ethernet/mellanox/mlx4/mlx4.h          |  4 +--
 drivers/net/ethernet/mellanox/mlx4/mr.c            | 17 +++++----
 drivers/net/ethernet/mellanox/mlx4/qp.c            | 20 +++++------
 .../net/ethernet/mellanox/mlx4/resource_tracker.c  |  4 +--
 drivers/net/ethernet/mellanox/mlx4/srq.c           |  4 +--
 include/linux/mlx4/device.h                        | 10 +++---
 16 files changed, 76 insertions(+), 90 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 4f5a143fc0a7..ff931c580557 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -102,7 +102,7 @@ static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *
 	int err;
 
 	err = mlx4_buf_alloc(dev->dev, nent * dev->dev->caps.cqe_size,
-			     PAGE_SIZE * 2, &buf->buf, GFP_KERNEL);
+			     PAGE_SIZE * 2, &buf->buf);
 
 	if (err)
 		goto out;
@@ -113,7 +113,7 @@ static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *
 	if (err)
 		goto err_buf;
 
-	err = mlx4_buf_write_mtt(dev->dev, &buf->mtt, &buf->buf, GFP_KERNEL);
+	err = mlx4_buf_write_mtt(dev->dev, &buf->mtt, &buf->buf);
 	if (err)
 		goto err_mtt;
 
@@ -219,7 +219,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
 
 		uar = &to_mucontext(context)->uar;
 	} else {
-		err = mlx4_db_alloc(dev->dev, &cq->db, 1, GFP_KERNEL);
+		err = mlx4_db_alloc(dev->dev, &cq->db, 1);
 		if (err)
 			goto err_cq;
 
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index c2b9cbf4da05..9db82e67e959 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -185,7 +185,6 @@ enum mlx4_ib_qp_flags {
 	MLX4_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO,
 	MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK,
 	MLX4_IB_QP_NETIF = IB_QP_CREATE_NETIF_QP,
-	MLX4_IB_QP_CREATE_USE_GFP_NOIO = IB_QP_CREATE_USE_GFP_NOIO,
 
 	/* Mellanox specific flags start from IB_QP_CREATE_RESERVED_START */
 	MLX4_IB_ROCE_V2_GSI_QP = MLX4_IB_QP_CREATE_ROCE_V2_GSI,
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 996e9058e515..75c0e6c5dd56 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -634,8 +634,8 @@ static void mlx4_ib_free_qp_counter(struct mlx4_ib_dev *dev,
 
 static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 			    struct ib_qp_init_attr *init_attr,
-			    struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp,
-			    gfp_t gfp)
+			    struct ib_udata *udata, int sqpn,
+			    struct mlx4_ib_qp **caller_qp)
 {
 	int qpn;
 	int err;
@@ -691,14 +691,14 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 		if (qp_type == MLX4_IB_QPT_SMI || qp_type == MLX4_IB_QPT_GSI ||
 		    (qp_type & (MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_SMI_OWNER |
 				MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER))) {
-			sqp = kzalloc(sizeof (struct mlx4_ib_sqp), gfp);
+			sqp = kzalloc(sizeof(struct mlx4_ib_sqp), GFP_KERNEL);
 			if (!sqp)
 				return -ENOMEM;
 			qp = &sqp->qp;
 			qp->pri.vid = 0xFFFF;
 			qp->alt.vid = 0xFFFF;
 		} else {
-			qp = kzalloc(sizeof (struct mlx4_ib_qp), gfp);
+			qp = kzalloc(sizeof(struct mlx4_ib_qp), GFP_KERNEL);
 			if (!qp)
 				return -ENOMEM;
 			qp->pri.vid = 0xFFFF;
@@ -780,7 +780,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 			goto err;
 
 		if (qp_has_rq(init_attr)) {
-			err = mlx4_db_alloc(dev->dev, &qp->db, 0, gfp);
+			err = mlx4_db_alloc(dev->dev, &qp->db, 0);
 			if (err)
 				goto err;
 
@@ -788,7 +788,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 		}
 
 		if (mlx4_buf_alloc(dev->dev, qp->buf_size, qp->buf_size,
-				   &qp->buf, gfp)) {
+				   &qp->buf)) {
 			memcpy(&init_attr->cap, &backup_cap,
 			       sizeof(backup_cap));
 			err = set_kernel_sq_size(dev, &init_attr->cap, qp_type,
@@ -797,7 +797,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 				goto err_db;
 
 			if (mlx4_buf_alloc(dev->dev, qp->buf_size,
-					   PAGE_SIZE * 2, &qp->buf, gfp)) {
+					   PAGE_SIZE * 2, &qp->buf)) {
 				err = -ENOMEM;
 				goto err_db;
 			}
@@ -808,20 +808,20 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 		if (err)
 			goto err_buf;
 
-		err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf, gfp);
+		err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf);
 		if (err)
 			goto err_mtt;
 
 		qp->sq.wrid = kmalloc_array(qp->sq.wqe_cnt, sizeof(u64),
-					gfp | __GFP_NOWARN);
+					GFP_KERNEL | __GFP_NOWARN);
 		if (!qp->sq.wrid)
 			qp->sq.wrid = __vmalloc(qp->sq.wqe_cnt * sizeof(u64),
-						gfp, PAGE_KERNEL);
+						GFP_KERNEL, PAGE_KERNEL);
 		qp->rq.wrid = kmalloc_array(qp->rq.wqe_cnt, sizeof(u64),
-					gfp | __GFP_NOWARN);
+					GFP_KERNEL | __GFP_NOWARN);
 		if (!qp->rq.wrid)
 			qp->rq.wrid = __vmalloc(qp->rq.wqe_cnt * sizeof(u64),
-						gfp, PAGE_KERNEL);
+						GFP_KERNEL, PAGE_KERNEL);
 		if (!qp->sq.wrid || !qp->rq.wrid) {
 			err = -ENOMEM;
 			goto err_wrid;
@@ -859,7 +859,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 	if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)
 		qp->flags |= MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK;
 
-	err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp, gfp);
+	err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp);
 	if (err)
 		goto err_qpn;
 
@@ -1127,10 +1127,7 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
 	int err;
 	int sup_u_create_flags = MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK;
 	u16 xrcdn = 0;
-	gfp_t gfp;
 
-	gfp = (init_attr->create_flags & MLX4_IB_QP_CREATE_USE_GFP_NOIO) ?
-		GFP_NOIO : GFP_KERNEL;
 	/*
 	 * We only support LSO, vendor flag1, and multicast loopback blocking,
 	 * and only for kernel UD QPs.
@@ -1140,8 +1137,7 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
 					MLX4_IB_SRIOV_TUNNEL_QP |
 					MLX4_IB_SRIOV_SQP |
 					MLX4_IB_QP_NETIF |
-					MLX4_IB_QP_CREATE_ROCE_V2_GSI |
-					MLX4_IB_QP_CREATE_USE_GFP_NOIO))
+					MLX4_IB_QP_CREATE_ROCE_V2_GSI))
 		return ERR_PTR(-EINVAL);
 
 	if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) {
@@ -1154,7 +1150,6 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
 			return ERR_PTR(-EINVAL);
 
 		if ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP |
-						 MLX4_IB_QP_CREATE_USE_GFP_NOIO |
 						 MLX4_IB_QP_CREATE_ROCE_V2_GSI  |
 						 MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK) &&
 		     init_attr->qp_type != IB_QPT_UD) ||
@@ -1179,7 +1174,7 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
 	case IB_QPT_RC:
 	case IB_QPT_UC:
 	case IB_QPT_RAW_PACKET:
-		qp = kzalloc(sizeof *qp, gfp);
+		qp = kzalloc(sizeof(*qp), GFP_KERNEL);
 		if (!qp)
 			return ERR_PTR(-ENOMEM);
 		qp->pri.vid = 0xFFFF;
@@ -1188,7 +1183,7 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
 	case IB_QPT_UD:
 	{
 		err = create_qp_common(to_mdev(pd->device), pd, init_attr,
-				       udata, 0, &qp, gfp);
+				       udata, 0, &qp);
 		if (err) {
 			kfree(qp);
 			return ERR_PTR(err);
@@ -1217,8 +1212,7 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
 		}
 
 		err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata,
-				       sqpn,
-				       &qp, gfp);
+				       sqpn, &qp);
 		if (err)
 			return ERR_PTR(err);
 
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
index e32dd58937a8..0facaf5f6d23 100644
--- a/drivers/infiniband/hw/mlx4/srq.c
+++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -135,14 +135,14 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
 		if (err)
 			goto err_mtt;
 	} else {
-		err = mlx4_db_alloc(dev->dev, &srq->db, 0, GFP_KERNEL);
+		err = mlx4_db_alloc(dev->dev, &srq->db, 0);
 		if (err)
 			goto err_srq;
 
 		*srq->db.db = 0;
 
-		if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &srq->buf,
-				   GFP_KERNEL)) {
+		if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2,
+				   &srq->buf)) {
 			err = -ENOMEM;
 			goto err_db;
 		}
@@ -167,7 +167,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
 		if (err)
 			goto err_buf;
 
-		err = mlx4_buf_write_mtt(dev->dev, &srq->mtt, &srq->buf, GFP_KERNEL);
+		err = mlx4_buf_write_mtt(dev->dev, &srq->mtt, &srq->buf);
 		if (err)
 			goto err_mtt;
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/alloc.c b/drivers/net/ethernet/mellanox/mlx4/alloc.c
index 249a4584401a..d94b3744a5b9 100644
--- a/drivers/net/ethernet/mellanox/mlx4/alloc.c
+++ b/drivers/net/ethernet/mellanox/mlx4/alloc.c
@@ -578,7 +578,7 @@ out:
 }
 
 static int mlx4_buf_direct_alloc(struct mlx4_dev *dev, int size,
-				 struct mlx4_buf *buf, gfp_t gfp)
+				 struct mlx4_buf *buf)
 {
 	dma_addr_t t;
 
@@ -587,7 +587,7 @@ static int mlx4_buf_direct_alloc(struct mlx4_dev *dev, int size,
 	buf->page_shift   = get_order(size) + PAGE_SHIFT;
 	buf->direct.buf   =
 		dma_zalloc_coherent(&dev->persist->pdev->dev,
-				    size, &t, gfp);
+				    size, &t, GFP_KERNEL);
 	if (!buf->direct.buf)
 		return -ENOMEM;
 
@@ -607,10 +607,10 @@ static int mlx4_buf_direct_alloc(struct mlx4_dev *dev, int size,
  *  multiple pages, so we don't require too much contiguous memory.
  */
 int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
-		   struct mlx4_buf *buf, gfp_t gfp)
+		   struct mlx4_buf *buf)
 {
 	if (size <= max_direct) {
-		return mlx4_buf_direct_alloc(dev, size, buf, gfp);
+		return mlx4_buf_direct_alloc(dev, size, buf);
 	} else {
 		dma_addr_t t;
 		int i;
@@ -620,14 +620,14 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
 		buf->npages	= buf->nbufs;
 		buf->page_shift  = PAGE_SHIFT;
 		buf->page_list   = kcalloc(buf->nbufs, sizeof(*buf->page_list),
-					   gfp);
+					   GFP_KERNEL);
 		if (!buf->page_list)
 			return -ENOMEM;
 
 		for (i = 0; i < buf->nbufs; ++i) {
 			buf->page_list[i].buf =
 				dma_zalloc_coherent(&dev->persist->pdev->dev,
-						    PAGE_SIZE, &t, gfp);
+						    PAGE_SIZE, &t, GFP_KERNEL);
 			if (!buf->page_list[i].buf)
 				goto err_free;
 
@@ -663,12 +663,11 @@ void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf)
 }
 EXPORT_SYMBOL_GPL(mlx4_buf_free);
 
-static struct mlx4_db_pgdir *mlx4_alloc_db_pgdir(struct device *dma_device,
-						 gfp_t gfp)
+static struct mlx4_db_pgdir *mlx4_alloc_db_pgdir(struct device *dma_device)
 {
 	struct mlx4_db_pgdir *pgdir;
 
-	pgdir = kzalloc(sizeof *pgdir, gfp);
+	pgdir = kzalloc(sizeof(*pgdir), GFP_KERNEL);
 	if (!pgdir)
 		return NULL;
 
@@ -676,7 +675,7 @@ static struct mlx4_db_pgdir *mlx4_alloc_db_pgdir(struct device *dma_device,
 	pgdir->bits[0] = pgdir->order0;
 	pgdir->bits[1] = pgdir->order1;
 	pgdir->db_page = dma_alloc_coherent(dma_device, PAGE_SIZE,
-					    &pgdir->db_dma, gfp);
+					    &pgdir->db_dma, GFP_KERNEL);
 	if (!pgdir->db_page) {
 		kfree(pgdir);
 		return NULL;
@@ -716,7 +715,7 @@ found:
 	return 0;
 }
 
-int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order, gfp_t gfp)
+int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	struct mlx4_db_pgdir *pgdir;
@@ -728,7 +727,7 @@ int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order, gfp_t gfp
 		if (!mlx4_alloc_db_from_pgdir(pgdir, db, order))
 			goto out;
 
-	pgdir = mlx4_alloc_db_pgdir(&dev->persist->pdev->dev, gfp);
+	pgdir = mlx4_alloc_db_pgdir(&dev->persist->pdev->dev);
 	if (!pgdir) {
 		ret = -ENOMEM;
 		goto out;
@@ -780,13 +779,13 @@ int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres,
 {
 	int err;
 
-	err = mlx4_db_alloc(dev, &wqres->db, 1, GFP_KERNEL);
+	err = mlx4_db_alloc(dev, &wqres->db, 1);
 	if (err)
 		return err;
 
 	*wqres->db.db = 0;
 
-	err = mlx4_buf_direct_alloc(dev, size, &wqres->buf, GFP_KERNEL);
+	err = mlx4_buf_direct_alloc(dev, size, &wqres->buf);
 	if (err)
 		goto err_db;
 
@@ -795,7 +794,7 @@ int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres,
 	if (err)
 		goto err_buf;
 
-	err = mlx4_buf_write_mtt(dev, &wqres->mtt, &wqres->buf, GFP_KERNEL);
+	err = mlx4_buf_write_mtt(dev, &wqres->mtt, &wqres->buf);
 	if (err)
 		goto err_mtt;
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c
index fa6d2354a0e9..c56a511b918e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cq.c
@@ -224,11 +224,11 @@ int __mlx4_cq_alloc_icm(struct mlx4_dev *dev, int *cqn)
 	if (*cqn == -1)
 		return -ENOMEM;
 
-	err = mlx4_table_get(dev, &cq_table->table, *cqn, GFP_KERNEL);
+	err = mlx4_table_get(dev, &cq_table->table, *cqn);
 	if (err)
 		goto err_out;
 
-	err = mlx4_table_get(dev, &cq_table->cmpt_table, *cqn, GFP_KERNEL);
+	err = mlx4_table_get(dev, &cq_table->cmpt_table, *cqn);
 	if (err)
 		goto err_put;
 	return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index e5fb89505a13..436f7689a032 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -1042,7 +1042,7 @@ static int mlx4_en_config_rss_qp(struct mlx4_en_priv *priv, int qpn,
 	if (!context)
 		return -ENOMEM;
 
-	err = mlx4_qp_alloc(mdev->dev, qpn, qp, GFP_KERNEL);
+	err = mlx4_qp_alloc(mdev->dev, qpn, qp);
 	if (err) {
 		en_err(priv, "Failed to allocate qp #%x\n", qpn);
 		goto out;
@@ -1086,7 +1086,7 @@ int mlx4_en_create_drop_qp(struct mlx4_en_priv *priv)
 		en_err(priv, "Failed reserving drop qpn\n");
 		return err;
 	}
-	err = mlx4_qp_alloc(priv->mdev->dev, qpn, &priv->drop_qp, GFP_KERNEL);
+	err = mlx4_qp_alloc(priv->mdev->dev, qpn, &priv->drop_qp);
 	if (err) {
 		en_err(priv, "Failed allocating drop qp\n");
 		mlx4_qp_release_range(priv->mdev->dev, qpn, 1);
@@ -1158,8 +1158,7 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv)
 	}
 
 	/* Configure RSS indirection qp */
-	err = mlx4_qp_alloc(mdev->dev, priv->base_qpn, rss_map->indir_qp,
-			    GFP_KERNEL);
+	err = mlx4_qp_alloc(mdev->dev, priv->base_qpn, rss_map->indir_qp);
 	if (err) {
 		en_err(priv, "Failed to allocate RSS indirection QP\n");
 		goto rss_err;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 4f3a9b27ce4a..73faa3d77921 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -111,7 +111,7 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
 		goto err_hwq_res;
 	}
 
-	err = mlx4_qp_alloc(mdev->dev, ring->qpn, &ring->sp_qp, GFP_KERNEL);
+	err = mlx4_qp_alloc(mdev->dev, ring->qpn, &ring->sp_qp);
 	if (err) {
 		en_err(priv, "Failed allocating qp %d\n", ring->qpn);
 		goto err_reserve;
diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.c b/drivers/net/ethernet/mellanox/mlx4/icm.c
index e1f9e7cebf8f..5a7816e7c7b4 100644
--- a/drivers/net/ethernet/mellanox/mlx4/icm.c
+++ b/drivers/net/ethernet/mellanox/mlx4/icm.c
@@ -251,8 +251,7 @@ int mlx4_UNMAP_ICM_AUX(struct mlx4_dev *dev)
 			MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
 }
 
-int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj,
-		   gfp_t gfp)
+int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj)
 {
 	u32 i = (obj & (table->num_obj - 1)) /
 			(MLX4_TABLE_CHUNK_SIZE / table->obj_size);
@@ -266,7 +265,7 @@ int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj,
 	}
 
 	table->icm[i] = mlx4_alloc_icm(dev, MLX4_TABLE_CHUNK_SIZE >> PAGE_SHIFT,
-				       (table->lowmem ? gfp : GFP_HIGHUSER) |
+				       (table->lowmem ? GFP_KERNEL : GFP_HIGHUSER) |
 				       __GFP_NOWARN, table->coherent);
 	if (!table->icm[i]) {
 		ret = -ENOMEM;
@@ -363,7 +362,7 @@ int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
 	u32 i;
 
 	for (i = start; i <= end; i += inc) {
-		err = mlx4_table_get(dev, table, i, GFP_KERNEL);
+		err = mlx4_table_get(dev, table, i);
 		if (err)
 			goto fail;
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.h b/drivers/net/ethernet/mellanox/mlx4/icm.h
index 0c7364550150..dee67fa39107 100644
--- a/drivers/net/ethernet/mellanox/mlx4/icm.h
+++ b/drivers/net/ethernet/mellanox/mlx4/icm.h
@@ -71,8 +71,7 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages,
 				gfp_t gfp_mask, int coherent);
 void mlx4_free_icm(struct mlx4_dev *dev, struct mlx4_icm *icm, int coherent);
 
-int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj,
-		   gfp_t gfp);
+int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj);
 void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj);
 int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
 			 u32 start, u32 end);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index 30616cd0140d..706d7f21ac5c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -969,7 +969,7 @@ void mlx4_cleanup_cq_table(struct mlx4_dev *dev);
 void mlx4_cleanup_qp_table(struct mlx4_dev *dev);
 void mlx4_cleanup_srq_table(struct mlx4_dev *dev);
 void mlx4_cleanup_mcg_table(struct mlx4_dev *dev);
-int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn, gfp_t gfp);
+int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn);
 void __mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn);
 int __mlx4_cq_alloc_icm(struct mlx4_dev *dev, int *cqn);
 void __mlx4_cq_free_icm(struct mlx4_dev *dev, int cqn);
@@ -977,7 +977,7 @@ int __mlx4_srq_alloc_icm(struct mlx4_dev *dev, int *srqn);
 void __mlx4_srq_free_icm(struct mlx4_dev *dev, int srqn);
 int __mlx4_mpt_reserve(struct mlx4_dev *dev);
 void __mlx4_mpt_release(struct mlx4_dev *dev, u32 index);
-int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index, gfp_t gfp);
+int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index);
 void __mlx4_mpt_free_icm(struct mlx4_dev *dev, u32 index);
 u32 __mlx4_alloc_mtt_range(struct mlx4_dev *dev, int order);
 void __mlx4_free_mtt_range(struct mlx4_dev *dev, u32 first_seg, int order);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c
index ce852ca22a96..24282cd017d3 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mr.c
@@ -479,14 +479,14 @@ static void mlx4_mpt_release(struct mlx4_dev *dev, u32 index)
 	__mlx4_mpt_release(dev, index);
 }
 
-int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index, gfp_t gfp)
+int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index)
 {
 	struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table;
 
-	return mlx4_table_get(dev, &mr_table->dmpt_table, index, gfp);
+	return mlx4_table_get(dev, &mr_table->dmpt_table, index);
 }
 
-static int mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index, gfp_t gfp)
+static int mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index)
 {
 	u64 param = 0;
 
@@ -497,7 +497,7 @@ static int mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index, gfp_t gfp)
 							MLX4_CMD_TIME_CLASS_A,
 							MLX4_CMD_WRAPPED);
 	}
-	return __mlx4_mpt_alloc_icm(dev, index, gfp);
+	return __mlx4_mpt_alloc_icm(dev, index);
 }
 
 void __mlx4_mpt_free_icm(struct mlx4_dev *dev, u32 index)
@@ -629,7 +629,7 @@ int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr)
 	struct mlx4_mpt_entry *mpt_entry;
 	int err;
 
-	err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mr->key), GFP_KERNEL);
+	err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mr->key));
 	if (err)
 		return err;
 
@@ -787,14 +787,13 @@ int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
 EXPORT_SYMBOL_GPL(mlx4_write_mtt);
 
 int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
-		       struct mlx4_buf *buf, gfp_t gfp)
+		       struct mlx4_buf *buf)
 {
 	u64 *page_list;
 	int err;
 	int i;
 
-	page_list = kmalloc(buf->npages * sizeof *page_list,
-			    gfp);
+	page_list = kcalloc(buf->npages, sizeof(*page_list), GFP_KERNEL);
 	if (!page_list)
 		return -ENOMEM;
 
@@ -841,7 +840,7 @@ int mlx4_mw_enable(struct mlx4_dev *dev, struct mlx4_mw *mw)
 	struct mlx4_mpt_entry *mpt_entry;
 	int err;
 
-	err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mw->key), GFP_KERNEL);
+	err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mw->key));
 	if (err)
 		return err;
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c
index 5a310d313e94..26747212526b 100644
--- a/drivers/net/ethernet/mellanox/mlx4/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx4/qp.c
@@ -301,29 +301,29 @@ void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt)
 }
 EXPORT_SYMBOL_GPL(mlx4_qp_release_range);
 
-int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn, gfp_t gfp)
+int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	struct mlx4_qp_table *qp_table = &priv->qp_table;
 	int err;
 
-	err = mlx4_table_get(dev, &qp_table->qp_table, qpn, gfp);
+	err = mlx4_table_get(dev, &qp_table->qp_table, qpn);
 	if (err)
 		goto err_out;
 
-	err = mlx4_table_get(dev, &qp_table->auxc_table, qpn, gfp);
+	err = mlx4_table_get(dev, &qp_table->auxc_table, qpn);
 	if (err)
 		goto err_put_qp;
 
-	err = mlx4_table_get(dev, &qp_table->altc_table, qpn, gfp);
+	err = mlx4_table_get(dev, &qp_table->altc_table, qpn);
 	if (err)
 		goto err_put_auxc;
 
-	err = mlx4_table_get(dev, &qp_table->rdmarc_table, qpn, gfp);
+	err = mlx4_table_get(dev, &qp_table->rdmarc_table, qpn);
 	if (err)
 		goto err_put_altc;
 
-	err = mlx4_table_get(dev, &qp_table->cmpt_table, qpn, gfp);
+	err = mlx4_table_get(dev, &qp_table->cmpt_table, qpn);
 	if (err)
 		goto err_put_rdmarc;
 
@@ -345,7 +345,7 @@ err_out:
 	return err;
 }
 
-static int mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn, gfp_t gfp)
+static int mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn)
 {
 	u64 param = 0;
 
@@ -355,7 +355,7 @@ static int mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn, gfp_t gfp)
 				    MLX4_CMD_ALLOC_RES, MLX4_CMD_TIME_CLASS_A,
 				    MLX4_CMD_WRAPPED);
 	}
-	return __mlx4_qp_alloc_icm(dev, qpn, gfp);
+	return __mlx4_qp_alloc_icm(dev, qpn);
 }
 
 void __mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn)
@@ -397,7 +397,7 @@ struct mlx4_qp *mlx4_qp_lookup(struct mlx4_dev *dev, u32 qpn)
 	return qp;
 }
 
-int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp, gfp_t gfp)
+int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	struct mlx4_qp_table *qp_table = &priv->qp_table;
@@ -408,7 +408,7 @@ int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp, gfp_t gfp)
 
 	qp->qpn = qpn;
 
-	err = mlx4_qp_alloc_icm(dev, qpn, gfp);
+	err = mlx4_qp_alloc_icm(dev, qpn);
 	if (err)
 		return err;
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 812783865205..215e21c3dc8a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -1822,7 +1822,7 @@ static int qp_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd,
 			return err;
 
 		if (!fw_reserved(dev, qpn)) {
-			err = __mlx4_qp_alloc_icm(dev, qpn, GFP_KERNEL);
+			err = __mlx4_qp_alloc_icm(dev, qpn);
 			if (err) {
 				res_abort_move(dev, slave, RES_QP, qpn);
 				return err;
@@ -1909,7 +1909,7 @@ static int mpt_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd,
 		if (err)
 			return err;
 
-		err = __mlx4_mpt_alloc_icm(dev, mpt->key, GFP_KERNEL);
+		err = __mlx4_mpt_alloc_icm(dev, mpt->key);
 		if (err) {
 			res_abort_move(dev, slave, RES_MPT, id);
 			return err;
diff --git a/drivers/net/ethernet/mellanox/mlx4/srq.c b/drivers/net/ethernet/mellanox/mlx4/srq.c
index f44d089e2ca6..bedf52126824 100644
--- a/drivers/net/ethernet/mellanox/mlx4/srq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/srq.c
@@ -100,11 +100,11 @@ int __mlx4_srq_alloc_icm(struct mlx4_dev *dev, int *srqn)
 	if (*srqn == -1)
 		return -ENOMEM;
 
-	err = mlx4_table_get(dev, &srq_table->table, *srqn, GFP_KERNEL);
+	err = mlx4_table_get(dev, &srq_table->table, *srqn);
 	if (err)
 		goto err_out;
 
-	err = mlx4_table_get(dev, &srq_table->cmpt_table, *srqn, GFP_KERNEL);
+	err = mlx4_table_get(dev, &srq_table->cmpt_table, *srqn);
 	if (err)
 		goto err_put;
 	return 0;
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index d5bed0875d30..aad5d81dfb44 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -1068,7 +1068,7 @@ static inline int mlx4_is_eth(struct mlx4_dev *dev, int port)
 }
 
 int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
-		   struct mlx4_buf *buf, gfp_t gfp);
+		   struct mlx4_buf *buf);
 void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf);
 static inline void *mlx4_buf_offset(struct mlx4_buf *buf, int offset)
 {
@@ -1105,10 +1105,9 @@ int mlx4_mw_enable(struct mlx4_dev *dev, struct mlx4_mw *mw);
 int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
 		   int start_index, int npages, u64 *page_list);
 int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
-		       struct mlx4_buf *buf, gfp_t gfp);
+		       struct mlx4_buf *buf);
 
-int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order,
-		  gfp_t gfp);
+int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order);
 void mlx4_db_free(struct mlx4_dev *dev, struct mlx4_db *db);
 
 int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres,
@@ -1124,8 +1123,7 @@ int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align,
 			  int *base, u8 flags);
 void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt);
 
-int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp,
-		  gfp_t gfp);
+int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp);
 void mlx4_qp_free(struct mlx4_dev *dev, struct mlx4_qp *qp);
 
 int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, u32 cqn, u16 xrcdn,
-- 
cgit v1.2.3


From 8bd226f9a7dc18740a916dcba3112f2bfc3ad9e8 Mon Sep 17 00:00:00 2001
From: Ruslan Bilovol <ruslan.bilovol@gmail.com>
Date: Sun, 25 Jun 2017 16:23:45 +0300
Subject: include: usb: audio: specify exact endiannes of descriptors

USB spec says that multiple byte fields are stored in
little-endian order (see chapter 8.1 of USB2.0 spec and
chapter 7.1 of USB3.0 spec), thus mark such fields as LE
for UAC1 and UAC2 headers

Signed-off-by: Ruslan Bilovol <ruslan.bilovol@gmail.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 include/linux/usb/audio-v2.h   | 14 +++++++-------
 include/uapi/linux/usb/audio.h |  6 +++---
 2 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/audio-v2.h b/include/linux/usb/audio-v2.h
index c5f2158ab00e..fd73bc0e9027 100644
--- a/include/linux/usb/audio-v2.h
+++ b/include/linux/usb/audio-v2.h
@@ -115,13 +115,13 @@ struct uac2_input_terminal_descriptor {
 	__u8 bDescriptorType;
 	__u8 bDescriptorSubtype;
 	__u8 bTerminalID;
-	__u16 wTerminalType;
+	__le16 wTerminalType;
 	__u8 bAssocTerminal;
 	__u8 bCSourceID;
 	__u8 bNrChannels;
-	__u32 bmChannelConfig;
+	__le32 bmChannelConfig;
 	__u8 iChannelNames;
-	__u16 bmControls;
+	__le16 bmControls;
 	__u8 iTerminal;
 } __attribute__((packed));
 
@@ -132,11 +132,11 @@ struct uac2_output_terminal_descriptor {
 	__u8 bDescriptorType;
 	__u8 bDescriptorSubtype;
 	__u8 bTerminalID;
-	__u16 wTerminalType;
+	__le16 wTerminalType;
 	__u8 bAssocTerminal;
 	__u8 bSourceID;
 	__u8 bCSourceID;
-	__u16 bmControls;
+	__le16 bmControls;
 	__u8 iTerminal;
 } __attribute__((packed));
 
@@ -164,9 +164,9 @@ struct uac2_as_header_descriptor {
 	__u8 bTerminalLink;
 	__u8 bmControls;
 	__u8 bFormatType;
-	__u32 bmFormats;
+	__le32 bmFormats;
 	__u8 bNrChannels;
-	__u32 bmChannelConfig;
+	__le32 bmChannelConfig;
 	__u8 iChannelNames;
 } __attribute__((packed));
 
diff --git a/include/uapi/linux/usb/audio.h b/include/uapi/linux/usb/audio.h
index d2314be4f0c0..a4680a5bf5dd 100644
--- a/include/uapi/linux/usb/audio.h
+++ b/include/uapi/linux/usb/audio.h
@@ -333,7 +333,7 @@ struct uac_processing_unit_descriptor {
 	__u8 bDescriptorType;
 	__u8 bDescriptorSubtype;
 	__u8 bUnitID;
-	__u16 wProcessType;
+	__le16 wProcessType;
 	__u8 bNrInPins;
 	__u8 baSourceID[];
 } __attribute__ ((packed));
@@ -491,8 +491,8 @@ struct uac_format_type_ii_ext_descriptor {
 	__u8 bDescriptorType;
 	__u8 bDescriptorSubtype;
 	__u8 bFormatType;
-	__u16 wMaxBitRate;
-	__u16 wSamplesPerFrame;
+	__le16 wMaxBitRate;
+	__le16 wSamplesPerFrame;
 	__u8 bHeaderLength;
 	__u8 bSideBandProtocol;
 } __attribute__((packed));
-- 
cgit v1.2.3


From beaec533fc2701a28a4d667f67c9f59c6e4e0d13 Mon Sep 17 00:00:00 2001
From: Alexander Potapenko <glider@google.com>
Date: Wed, 19 Jul 2017 20:27:30 +0200
Subject: llist: clang: introduce member_address_is_nonnull()

Currently llist_for_each_entry() and llist_for_each_entry_safe() iterate
until &pos->member != NULL.  But when building the kernel with Clang,
the compiler assumes &pos->member cannot be NULL if the member's offset
is greater than 0 (which would be equivalent to the object being
non-contiguous in memory).  Therefore the loop condition is always true,
and the loops become infinite.

To work around this, introduce the member_address_is_nonnull() macro,
which casts object pointer to uintptr_t, thus letting the member pointer
to be NULL.

Signed-off-by: Alexander Potapenko <glider@google.com>
Tested-by: Sodagudi Prasad <psodagud@codeaurora.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/llist.h | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/llist.h b/include/linux/llist.h
index d11738110a7a..1957635e6d5f 100644
--- a/include/linux/llist.h
+++ b/include/linux/llist.h
@@ -92,6 +92,23 @@ static inline void init_llist_head(struct llist_head *list)
 #define llist_entry(ptr, type, member)		\
 	container_of(ptr, type, member)
 
+/**
+ * member_address_is_nonnull - check whether the member address is not NULL
+ * @ptr:	the object pointer (struct type * that contains the llist_node)
+ * @member:	the name of the llist_node within the struct.
+ *
+ * This macro is conceptually the same as
+ *	&ptr->member != NULL
+ * but it works around the fact that compilers can decide that taking a member
+ * address is never a NULL pointer.
+ *
+ * Real objects that start at a high address and have a member at NULL are
+ * unlikely to exist, but such pointers may be returned e.g. by the
+ * container_of() macro.
+ */
+#define member_address_is_nonnull(ptr, member)	\
+	((uintptr_t)(ptr) + offsetof(typeof(*(ptr)), member) != 0)
+
 /**
  * llist_for_each - iterate over some deleted entries of a lock-less list
  * @pos:	the &struct llist_node to use as a loop cursor
@@ -145,7 +162,7 @@ static inline void init_llist_head(struct llist_head *list)
  */
 #define llist_for_each_entry(pos, node, member)				\
 	for ((pos) = llist_entry((node), typeof(*(pos)), member);	\
-	     &(pos)->member != NULL;					\
+	     member_address_is_nonnull(pos, member);			\
 	     (pos) = llist_entry((pos)->member.next, typeof(*(pos)), member))
 
 /**
@@ -167,7 +184,7 @@ static inline void init_llist_head(struct llist_head *list)
  */
 #define llist_for_each_entry_safe(pos, n, node, member)			       \
 	for (pos = llist_entry((node), typeof(*pos), member);		       \
-	     &pos->member != NULL &&					       \
+	     member_address_is_nonnull(pos, member) &&			       \
 	        (n = llist_entry(pos->member.next, typeof(*n), member), true); \
 	     pos = n)
 
-- 
cgit v1.2.3


From f86f418059b94aa01f9342611a272ca60c583e89 Mon Sep 17 00:00:00 2001
From: Chunyan Zhang <zhang.chunyan@linaro.org>
Date: Wed, 7 Jun 2017 16:12:51 +0800
Subject: trace: fix the errors caused by incompatible type of RCU variables

The variables which are processed by RCU functions should be annotated
as RCU, otherwise sparse will report the errors like below:

"error: incompatible types in comparison expression (different
address spaces)"

Link: http://lkml.kernel.org/r/1496823171-7758-1-git-send-email-zhang.chunyan@linaro.org

Signed-off-by: Chunyan Zhang <zhang.chunyan@linaro.org>
[ Updated to not be 100% 80 column strict ]
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/ftrace.h       |  6 +++---
 include/linux/trace_events.h |  2 +-
 kernel/trace/ftrace.c        | 41 +++++++++++++++++++++++++++--------------
 kernel/trace/trace.h         |  6 +++---
 4 files changed, 34 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 5857390ac35a..6383115e9d2c 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -145,8 +145,8 @@ enum {
 #ifdef CONFIG_DYNAMIC_FTRACE
 /* The hash used to know what functions callbacks trace */
 struct ftrace_ops_hash {
-	struct ftrace_hash		*notrace_hash;
-	struct ftrace_hash		*filter_hash;
+	struct ftrace_hash __rcu	*notrace_hash;
+	struct ftrace_hash __rcu	*filter_hash;
 	struct mutex			regex_lock;
 };
 
@@ -168,7 +168,7 @@ static inline void ftrace_free_init_mem(void) { }
  */
 struct ftrace_ops {
 	ftrace_func_t			func;
-	struct ftrace_ops		*next;
+	struct ftrace_ops __rcu		*next;
 	unsigned long			flags;
 	void				*private;
 	ftrace_func_t			saved_func;
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index f73cedfa2e0b..536c80ff7ad9 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -338,7 +338,7 @@ enum {
 struct trace_event_file {
 	struct list_head		list;
 	struct trace_event_call		*event_call;
-	struct event_filter		*filter;
+	struct event_filter __rcu	*filter;
 	struct dentry			*dir;
 	struct trace_array		*tr;
 	struct trace_subsystem_dir	*system;
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 53f6b6401cf0..02004ae91860 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -113,7 +113,7 @@ static int ftrace_disabled __read_mostly;
 
 static DEFINE_MUTEX(ftrace_lock);
 
-static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end;
+static struct ftrace_ops __rcu *ftrace_ops_list __read_mostly = &ftrace_list_end;
 ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
 static struct ftrace_ops global_ops;
 
@@ -169,8 +169,11 @@ int ftrace_nr_registered_ops(void)
 
 	mutex_lock(&ftrace_lock);
 
-	for (ops = ftrace_ops_list;
-	     ops != &ftrace_list_end; ops = ops->next)
+	for (ops = rcu_dereference_protected(ftrace_ops_list,
+					     lockdep_is_held(&ftrace_lock));
+	     ops != &ftrace_list_end;
+	     ops = rcu_dereference_protected(ops->next,
+					     lockdep_is_held(&ftrace_lock)))
 		cnt++;
 
 	mutex_unlock(&ftrace_lock);
@@ -275,10 +278,11 @@ static void update_ftrace_function(void)
 	 * If there's only one ftrace_ops registered, the ftrace_ops_list
 	 * will point to the ops we want.
 	 */
-	set_function_trace_op = ftrace_ops_list;
+	set_function_trace_op = rcu_dereference_protected(ftrace_ops_list,
+						lockdep_is_held(&ftrace_lock));
 
 	/* If there's no ftrace_ops registered, just call the stub function */
-	if (ftrace_ops_list == &ftrace_list_end) {
+	if (set_function_trace_op == &ftrace_list_end) {
 		func = ftrace_stub;
 
 	/*
@@ -286,7 +290,8 @@ static void update_ftrace_function(void)
 	 * recursion safe and not dynamic and the arch supports passing ops,
 	 * then have the mcount trampoline call the function directly.
 	 */
-	} else if (ftrace_ops_list->next == &ftrace_list_end) {
+	} else if (rcu_dereference_protected(ftrace_ops_list->next,
+			lockdep_is_held(&ftrace_lock)) == &ftrace_list_end) {
 		func = ftrace_ops_get_list_func(ftrace_ops_list);
 
 	} else {
@@ -348,9 +353,11 @@ int using_ftrace_ops_list_func(void)
 	return ftrace_trace_function == ftrace_ops_list_func;
 }
 
-static void add_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops)
+static void add_ftrace_ops(struct ftrace_ops __rcu **list,
+			   struct ftrace_ops *ops)
 {
-	ops->next = *list;
+	rcu_assign_pointer(ops->next, *list);
+
 	/*
 	 * We are entering ops into the list but another
 	 * CPU might be walking that list. We need to make sure
@@ -360,7 +367,8 @@ static void add_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops)
 	rcu_assign_pointer(*list, ops);
 }
 
-static int remove_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops)
+static int remove_ftrace_ops(struct ftrace_ops __rcu **list,
+			     struct ftrace_ops *ops)
 {
 	struct ftrace_ops **p;
 
@@ -368,7 +376,10 @@ static int remove_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops)
 	 * If we are removing the last function, then simply point
 	 * to the ftrace_stub.
 	 */
-	if (*list == ops && ops->next == &ftrace_list_end) {
+	if (rcu_dereference_protected(*list,
+			lockdep_is_held(&ftrace_lock)) == ops &&
+	    rcu_dereference_protected(ops->next,
+			lockdep_is_held(&ftrace_lock)) == &ftrace_list_end) {
 		*list = &ftrace_list_end;
 		return 0;
 	}
@@ -1569,8 +1580,8 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs)
 		return 0;
 #endif
 
-	hash.filter_hash = rcu_dereference_raw_notrace(ops->func_hash->filter_hash);
-	hash.notrace_hash = rcu_dereference_raw_notrace(ops->func_hash->notrace_hash);
+	rcu_assign_pointer(hash.filter_hash, ops->func_hash->filter_hash);
+	rcu_assign_pointer(hash.notrace_hash, ops->func_hash->notrace_hash);
 
 	if (hash_contains_ip(ip, &hash))
 		ret = 1;
@@ -2840,7 +2851,8 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command)
 	 * If there's no more ops registered with ftrace, run a
 	 * sanity check to make sure all rec flags are cleared.
 	 */
-	if (ftrace_ops_list == &ftrace_list_end) {
+	if (rcu_dereference_protected(ftrace_ops_list,
+			lockdep_is_held(&ftrace_lock)) == &ftrace_list_end) {
 		struct ftrace_page *pg;
 		struct dyn_ftrace *rec;
 
@@ -6453,7 +6465,8 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
 	if (ftrace_enabled) {
 
 		/* we are starting ftrace again */
-		if (ftrace_ops_list != &ftrace_list_end)
+		if (rcu_dereference_protected(ftrace_ops_list,
+			lockdep_is_held(&ftrace_lock)) != &ftrace_list_end)
 			update_ftrace_function();
 
 		ftrace_startup_sysctl();
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 6ade1c55cc3a..490ba229931d 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -1210,9 +1210,9 @@ struct ftrace_event_field {
 struct event_filter {
 	int			n_preds;	/* Number assigned */
 	int			a_preds;	/* allocated */
-	struct filter_pred	*preds;
-	struct filter_pred	*root;
-	char			*filter_string;
+	struct filter_pred __rcu	*preds;
+	struct filter_pred __rcu	*root;
+	char				*filter_string;
 };
 
 struct event_subsystem {
-- 
cgit v1.2.3


From dc1a0afbacaeaced8f5679a99047c0467f1099e9 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 14 Jul 2017 11:12:09 +0200
Subject: nvme: fix byte swapping in the streams code

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/host/core.c |  2 +-
 include/linux/nvme.h     | 14 +++++++-------
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index cb96f4a7ae3a..3b77cfe5aa1e 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -336,7 +336,7 @@ static int nvme_get_stream_params(struct nvme_ctrl *ctrl,
 
 	c.directive.opcode = nvme_admin_directive_recv;
 	c.directive.nsid = cpu_to_le32(nsid);
-	c.directive.numd = sizeof(*s);
+	c.directive.numd = cpu_to_le32(sizeof(*s));
 	c.directive.doper = NVME_DIR_RCV_ST_OP_PARAM;
 	c.directive.dtype = NVME_DIR_STREAMS;
 
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 6b8ee9e628e1..bc74da018bdc 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -963,14 +963,14 @@ struct nvme_dbbuf {
 };
 
 struct streams_directive_params {
-	__u16	msl;
-	__u16	nssa;
-	__u16	nsso;
+	__le16	msl;
+	__le16	nssa;
+	__le16	nsso;
 	__u8	rsvd[10];
-	__u32	sws;
-	__u16	sgs;
-	__u16	nsa;
-	__u16	nso;
+	__le32	sws;
+	__le16	sgs;
+	__le16	nsa;
+	__le16	nso;
 	__u8	rsvd2[6];
 };
 
-- 
cgit v1.2.3


From 4cabc5b186b5427b9ee5a7495172542af105f02b Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 21 Jul 2017 00:00:21 +0200
Subject: bpf: fix mixed signed/unsigned derived min/max value bounds

Edward reported that there's an issue in min/max value bounds
tracking when signed and unsigned compares both provide hints
on limits when having unknown variables. E.g. a program such
as the following should have been rejected:

   0: (7a) *(u64 *)(r10 -8) = 0
   1: (bf) r2 = r10
   2: (07) r2 += -8
   3: (18) r1 = 0xffff8a94cda93400
   5: (85) call bpf_map_lookup_elem#1
   6: (15) if r0 == 0x0 goto pc+7
  R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R10=fp
   7: (7a) *(u64 *)(r10 -16) = -8
   8: (79) r1 = *(u64 *)(r10 -16)
   9: (b7) r2 = -1
  10: (2d) if r1 > r2 goto pc+3
  R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=0
  R2=imm-1,max_value=18446744073709551615,min_align=1 R10=fp
  11: (65) if r1 s> 0x1 goto pc+2
  R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=0,max_value=1
  R2=imm-1,max_value=18446744073709551615,min_align=1 R10=fp
  12: (0f) r0 += r1
  13: (72) *(u8 *)(r0 +0) = 0
  R0=map_value_adj(ks=8,vs=8,id=0),min_value=0,max_value=1 R1=inv,min_value=0,max_value=1
  R2=imm-1,max_value=18446744073709551615,min_align=1 R10=fp
  14: (b7) r0 = 0
  15: (95) exit

What happens is that in the first part ...

   8: (79) r1 = *(u64 *)(r10 -16)
   9: (b7) r2 = -1
  10: (2d) if r1 > r2 goto pc+3

... r1 carries an unsigned value, and is compared as unsigned
against a register carrying an immediate. Verifier deduces in
reg_set_min_max() that since the compare is unsigned and operation
is greater than (>), that in the fall-through/false case, r1's
minimum bound must be 0 and maximum bound must be r2. Latter is
larger than the bound and thus max value is reset back to being
'invalid' aka BPF_REGISTER_MAX_RANGE. Thus, r1 state is now
'R1=inv,min_value=0'. The subsequent test ...

  11: (65) if r1 s> 0x1 goto pc+2

... is a signed compare of r1 with immediate value 1. Here,
verifier deduces in reg_set_min_max() that since the compare
is signed this time and operation is greater than (>), that
in the fall-through/false case, we can deduce that r1's maximum
bound must be 1, meaning with prior test, we result in r1 having
the following state: R1=inv,min_value=0,max_value=1. Given that
the actual value this holds is -8, the bounds are wrongly deduced.
When this is being added to r0 which holds the map_value(_adj)
type, then subsequent store access in above case will go through
check_mem_access() which invokes check_map_access_adj(), that
will then probe whether the map memory is in bounds based
on the min_value and max_value as well as access size since
the actual unknown value is min_value <= x <= max_value; commit
fce366a9dd0d ("bpf, verifier: fix alu ops against map_value{,
_adj} register types") provides some more explanation on the
semantics.

It's worth to note in this context that in the current code,
min_value and max_value tracking are used for two things, i)
dynamic map value access via check_map_access_adj() and since
commit 06c1c049721a ("bpf: allow helpers access to variable memory")
ii) also enforced at check_helper_mem_access() when passing a
memory address (pointer to packet, map value, stack) and length
pair to a helper and the length in this case is an unknown value
defining an access range through min_value/max_value in that
case. The min_value/max_value tracking is /not/ used in the
direct packet access case to track ranges. However, the issue
also affects case ii), for example, the following crafted program
based on the same principle must be rejected as well:

   0: (b7) r2 = 0
   1: (bf) r3 = r10
   2: (07) r3 += -512
   3: (7a) *(u64 *)(r10 -16) = -8
   4: (79) r4 = *(u64 *)(r10 -16)
   5: (b7) r6 = -1
   6: (2d) if r4 > r6 goto pc+5
  R1=ctx R2=imm0,min_value=0,max_value=0,min_align=2147483648 R3=fp-512
  R4=inv,min_value=0 R6=imm-1,max_value=18446744073709551615,min_align=1 R10=fp
   7: (65) if r4 s> 0x1 goto pc+4
  R1=ctx R2=imm0,min_value=0,max_value=0,min_align=2147483648 R3=fp-512
  R4=inv,min_value=0,max_value=1 R6=imm-1,max_value=18446744073709551615,min_align=1
  R10=fp
   8: (07) r4 += 1
   9: (b7) r5 = 0
  10: (6a) *(u16 *)(r10 -512) = 0
  11: (85) call bpf_skb_load_bytes#26
  12: (b7) r0 = 0
  13: (95) exit

Meaning, while we initialize the max_value stack slot that the
verifier thinks we access in the [1,2] range, in reality we
pass -7 as length which is interpreted as u32 in the helper.
Thus, this issue is relevant also for the case of helper ranges.
Resetting both bounds in check_reg_overflow() in case only one
of them exceeds limits is also not enough as similar test can be
created that uses values which are within range, thus also here
learned min value in r1 is incorrect when mixed with later signed
test to create a range:

   0: (7a) *(u64 *)(r10 -8) = 0
   1: (bf) r2 = r10
   2: (07) r2 += -8
   3: (18) r1 = 0xffff880ad081fa00
   5: (85) call bpf_map_lookup_elem#1
   6: (15) if r0 == 0x0 goto pc+7
  R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R10=fp
   7: (7a) *(u64 *)(r10 -16) = -8
   8: (79) r1 = *(u64 *)(r10 -16)
   9: (b7) r2 = 2
  10: (3d) if r2 >= r1 goto pc+3
  R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=3
  R2=imm2,min_value=2,max_value=2,min_align=2 R10=fp
  11: (65) if r1 s> 0x4 goto pc+2
  R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0
  R1=inv,min_value=3,max_value=4 R2=imm2,min_value=2,max_value=2,min_align=2 R10=fp
  12: (0f) r0 += r1
  13: (72) *(u8 *)(r0 +0) = 0
  R0=map_value_adj(ks=8,vs=8,id=0),min_value=3,max_value=4
  R1=inv,min_value=3,max_value=4 R2=imm2,min_value=2,max_value=2,min_align=2 R10=fp
  14: (b7) r0 = 0
  15: (95) exit

This leaves us with two options for fixing this: i) to invalidate
all prior learned information once we switch signed context, ii)
to track min/max signed and unsigned boundaries separately as
done in [0]. (Given latter introduces major changes throughout
the whole verifier, it's rather net-next material, thus this
patch follows option i), meaning we can derive bounds either
from only signed tests or only unsigned tests.) There is still the
case of adjust_reg_min_max_vals(), where we adjust bounds on ALU
operations, meaning programs like the following where boundaries
on the reg get mixed in context later on when bounds are merged
on the dst reg must get rejected, too:

   0: (7a) *(u64 *)(r10 -8) = 0
   1: (bf) r2 = r10
   2: (07) r2 += -8
   3: (18) r1 = 0xffff89b2bf87ce00
   5: (85) call bpf_map_lookup_elem#1
   6: (15) if r0 == 0x0 goto pc+6
  R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R10=fp
   7: (7a) *(u64 *)(r10 -16) = -8
   8: (79) r1 = *(u64 *)(r10 -16)
   9: (b7) r2 = 2
  10: (3d) if r2 >= r1 goto pc+2
  R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=3
  R2=imm2,min_value=2,max_value=2,min_align=2 R10=fp
  11: (b7) r7 = 1
  12: (65) if r7 s> 0x0 goto pc+2
  R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=3
  R2=imm2,min_value=2,max_value=2,min_align=2 R7=imm1,max_value=0 R10=fp
  13: (b7) r0 = 0
  14: (95) exit

  from 12 to 15: R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0
  R1=inv,min_value=3 R2=imm2,min_value=2,max_value=2,min_align=2 R7=imm1,min_value=1 R10=fp
  15: (0f) r7 += r1
  16: (65) if r7 s> 0x4 goto pc+2
  R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=3
  R2=imm2,min_value=2,max_value=2,min_align=2 R7=inv,min_value=4,max_value=4 R10=fp
  17: (0f) r0 += r7
  18: (72) *(u8 *)(r0 +0) = 0
  R0=map_value_adj(ks=8,vs=8,id=0),min_value=4,max_value=4 R1=inv,min_value=3
  R2=imm2,min_value=2,max_value=2,min_align=2 R7=inv,min_value=4,max_value=4 R10=fp
  19: (b7) r0 = 0
  20: (95) exit

Meaning, in adjust_reg_min_max_vals() we must also reset range
values on the dst when src/dst registers have mixed signed/
unsigned derived min/max value bounds with one unbounded value
as otherwise they can be added together deducing false boundaries.
Once both boundaries are established from either ALU ops or
compare operations w/o mixing signed/unsigned insns, then they
can safely be added to other regs also having both boundaries
established. Adding regs with one unbounded side to a map value
where the bounded side has been learned w/o mixing ops is
possible, but the resulting map value won't recover from that,
meaning such op is considered invalid on the time of actual
access. Invalid bounds are set on the dst reg in case i) src reg,
or ii) in case dst reg already had them. The only way to recover
would be to perform i) ALU ops but only 'add' is allowed on map
value types or ii) comparisons, but these are disallowed on
pointers in case they span a range. This is fine as only BPF_JEQ
and BPF_JNE may be performed on PTR_TO_MAP_VALUE_OR_NULL registers
which potentially turn them into PTR_TO_MAP_VALUE type depending
on the branch, so only here min/max value cannot be invalidated
for them.

In terms of state pruning, value_from_signed is considered
as well in states_equal() when dealing with adjusted map values.
With regards to breaking existing programs, there is a small
risk, but use-cases are rather quite narrow where this could
occur and mixing compares probably unlikely.

Joint work with Josef and Edward.

  [0] https://lists.iovisor.org/pipermail/iovisor-dev/2017-June/000822.html

Fixes: 484611357c19 ("bpf: allow access into map value arrays")
Reported-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf_verifier.h |   1 +
 kernel/bpf/verifier.c        | 108 +++++++++++++++++++++++++++++++++++++------
 2 files changed, 95 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 621076f56251..8e5d31f6faef 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -43,6 +43,7 @@ struct bpf_reg_state {
 	u32 min_align;
 	u32 aux_off;
 	u32 aux_off_align;
+	bool value_from_signed;
 };
 
 enum bpf_stack_slot_type {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 6a86723c5b64..af9e84a4944e 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -504,6 +504,7 @@ static void reset_reg_range_values(struct bpf_reg_state *regs, u32 regno)
 {
 	regs[regno].min_value = BPF_REGISTER_MIN_RANGE;
 	regs[regno].max_value = BPF_REGISTER_MAX_RANGE;
+	regs[regno].value_from_signed = false;
 	regs[regno].min_align = 0;
 }
 
@@ -777,12 +778,13 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off,
 	return -EACCES;
 }
 
-static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
+static bool __is_pointer_value(bool allow_ptr_leaks,
+			       const struct bpf_reg_state *reg)
 {
-	if (env->allow_ptr_leaks)
+	if (allow_ptr_leaks)
 		return false;
 
-	switch (env->cur_state.regs[regno].type) {
+	switch (reg->type) {
 	case UNKNOWN_VALUE:
 	case CONST_IMM:
 		return false;
@@ -791,6 +793,11 @@ static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
 	}
 }
 
+static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
+{
+	return __is_pointer_value(env->allow_ptr_leaks, &env->cur_state.regs[regno]);
+}
+
 static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg,
 				   int off, int size, bool strict)
 {
@@ -1832,10 +1839,24 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env,
 	dst_align = dst_reg->min_align;
 
 	/* We don't know anything about what was done to this register, mark it
-	 * as unknown.
+	 * as unknown. Also, if both derived bounds came from signed/unsigned
+	 * mixed compares and one side is unbounded, we cannot really do anything
+	 * with them as boundaries cannot be trusted. Thus, arithmetic of two
+	 * regs of such kind will get invalidated bounds on the dst side.
 	 */
-	if (min_val == BPF_REGISTER_MIN_RANGE &&
-	    max_val == BPF_REGISTER_MAX_RANGE) {
+	if ((min_val == BPF_REGISTER_MIN_RANGE &&
+	     max_val == BPF_REGISTER_MAX_RANGE) ||
+	    (BPF_SRC(insn->code) == BPF_X &&
+	     ((min_val != BPF_REGISTER_MIN_RANGE &&
+	       max_val == BPF_REGISTER_MAX_RANGE) ||
+	      (min_val == BPF_REGISTER_MIN_RANGE &&
+	       max_val != BPF_REGISTER_MAX_RANGE) ||
+	      (dst_reg->min_value != BPF_REGISTER_MIN_RANGE &&
+	       dst_reg->max_value == BPF_REGISTER_MAX_RANGE) ||
+	      (dst_reg->min_value == BPF_REGISTER_MIN_RANGE &&
+	       dst_reg->max_value != BPF_REGISTER_MAX_RANGE)) &&
+	     regs[insn->dst_reg].value_from_signed !=
+	     regs[insn->src_reg].value_from_signed)) {
 		reset_reg_range_values(regs, insn->dst_reg);
 		return;
 	}
@@ -2023,6 +2044,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
 			regs[insn->dst_reg].max_value = insn->imm;
 			regs[insn->dst_reg].min_value = insn->imm;
 			regs[insn->dst_reg].min_align = calc_align(insn->imm);
+			regs[insn->dst_reg].value_from_signed = false;
 		}
 
 	} else if (opcode > BPF_END) {
@@ -2198,40 +2220,63 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg,
 			    struct bpf_reg_state *false_reg, u64 val,
 			    u8 opcode)
 {
+	bool value_from_signed = true;
+	bool is_range = true;
+
 	switch (opcode) {
 	case BPF_JEQ:
 		/* If this is false then we know nothing Jon Snow, but if it is
 		 * true then we know for sure.
 		 */
 		true_reg->max_value = true_reg->min_value = val;
+		is_range = false;
 		break;
 	case BPF_JNE:
 		/* If this is true we know nothing Jon Snow, but if it is false
 		 * we know the value for sure;
 		 */
 		false_reg->max_value = false_reg->min_value = val;
+		is_range = false;
 		break;
 	case BPF_JGT:
-		/* Unsigned comparison, the minimum value is 0. */
-		false_reg->min_value = 0;
+		value_from_signed = false;
 		/* fallthrough */
 	case BPF_JSGT:
+		if (true_reg->value_from_signed != value_from_signed)
+			reset_reg_range_values(true_reg, 0);
+		if (false_reg->value_from_signed != value_from_signed)
+			reset_reg_range_values(false_reg, 0);
+		if (opcode == BPF_JGT) {
+			/* Unsigned comparison, the minimum value is 0. */
+			false_reg->min_value = 0;
+		}
 		/* If this is false then we know the maximum val is val,
 		 * otherwise we know the min val is val+1.
 		 */
 		false_reg->max_value = val;
+		false_reg->value_from_signed = value_from_signed;
 		true_reg->min_value = val + 1;
+		true_reg->value_from_signed = value_from_signed;
 		break;
 	case BPF_JGE:
-		/* Unsigned comparison, the minimum value is 0. */
-		false_reg->min_value = 0;
+		value_from_signed = false;
 		/* fallthrough */
 	case BPF_JSGE:
+		if (true_reg->value_from_signed != value_from_signed)
+			reset_reg_range_values(true_reg, 0);
+		if (false_reg->value_from_signed != value_from_signed)
+			reset_reg_range_values(false_reg, 0);
+		if (opcode == BPF_JGE) {
+			/* Unsigned comparison, the minimum value is 0. */
+			false_reg->min_value = 0;
+		}
 		/* If this is false then we know the maximum value is val - 1,
 		 * otherwise we know the mimimum value is val.
 		 */
 		false_reg->max_value = val - 1;
+		false_reg->value_from_signed = value_from_signed;
 		true_reg->min_value = val;
+		true_reg->value_from_signed = value_from_signed;
 		break;
 	default:
 		break;
@@ -2239,6 +2284,12 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg,
 
 	check_reg_overflow(false_reg);
 	check_reg_overflow(true_reg);
+	if (is_range) {
+		if (__is_pointer_value(false, false_reg))
+			reset_reg_range_values(false_reg, 0);
+		if (__is_pointer_value(false, true_reg))
+			reset_reg_range_values(true_reg, 0);
+	}
 }
 
 /* Same as above, but for the case that dst_reg is a CONST_IMM reg and src_reg
@@ -2248,41 +2299,64 @@ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
 				struct bpf_reg_state *false_reg, u64 val,
 				u8 opcode)
 {
+	bool value_from_signed = true;
+	bool is_range = true;
+
 	switch (opcode) {
 	case BPF_JEQ:
 		/* If this is false then we know nothing Jon Snow, but if it is
 		 * true then we know for sure.
 		 */
 		true_reg->max_value = true_reg->min_value = val;
+		is_range = false;
 		break;
 	case BPF_JNE:
 		/* If this is true we know nothing Jon Snow, but if it is false
 		 * we know the value for sure;
 		 */
 		false_reg->max_value = false_reg->min_value = val;
+		is_range = false;
 		break;
 	case BPF_JGT:
-		/* Unsigned comparison, the minimum value is 0. */
-		true_reg->min_value = 0;
+		value_from_signed = false;
 		/* fallthrough */
 	case BPF_JSGT:
+		if (true_reg->value_from_signed != value_from_signed)
+			reset_reg_range_values(true_reg, 0);
+		if (false_reg->value_from_signed != value_from_signed)
+			reset_reg_range_values(false_reg, 0);
+		if (opcode == BPF_JGT) {
+			/* Unsigned comparison, the minimum value is 0. */
+			true_reg->min_value = 0;
+		}
 		/*
 		 * If this is false, then the val is <= the register, if it is
 		 * true the register <= to the val.
 		 */
 		false_reg->min_value = val;
+		false_reg->value_from_signed = value_from_signed;
 		true_reg->max_value = val - 1;
+		true_reg->value_from_signed = value_from_signed;
 		break;
 	case BPF_JGE:
-		/* Unsigned comparison, the minimum value is 0. */
-		true_reg->min_value = 0;
+		value_from_signed = false;
 		/* fallthrough */
 	case BPF_JSGE:
+		if (true_reg->value_from_signed != value_from_signed)
+			reset_reg_range_values(true_reg, 0);
+		if (false_reg->value_from_signed != value_from_signed)
+			reset_reg_range_values(false_reg, 0);
+		if (opcode == BPF_JGE) {
+			/* Unsigned comparison, the minimum value is 0. */
+			true_reg->min_value = 0;
+		}
 		/* If this is false then constant < register, if it is true then
 		 * the register < constant.
 		 */
 		false_reg->min_value = val + 1;
+		false_reg->value_from_signed = value_from_signed;
 		true_reg->max_value = val;
+		true_reg->value_from_signed = value_from_signed;
 		break;
 	default:
 		break;
@@ -2290,6 +2364,12 @@ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
 
 	check_reg_overflow(false_reg);
 	check_reg_overflow(true_reg);
+	if (is_range) {
+		if (__is_pointer_value(false, false_reg))
+			reset_reg_range_values(false_reg, 0);
+		if (__is_pointer_value(false, true_reg))
+			reset_reg_range_values(true_reg, 0);
+	}
 }
 
 static void mark_map_reg(struct bpf_reg_state *regs, u32 regno, u32 id,
-- 
cgit v1.2.3


From bd8b2441742b49c76bec707757bd9c028ea9838e Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Tue, 11 Jul 2017 17:54:34 -0400
Subject: NFS: Store the raw NFS access mask in the inode's access cache

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/dir.c           | 9 ++++++---
 include/linux/nfs_fs.h | 2 +-
 2 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 24b3a6748062..8fae8b00b8f5 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2399,7 +2399,7 @@ nfs_access_calc_mask(u32 access_result)
 
 void nfs_access_set_mask(struct nfs_access_entry *entry, u32 access_result)
 {
-	entry->mask = nfs_access_calc_mask(access_result);
+	entry->mask = access_result;
 }
 EXPORT_SYMBOL_GPL(nfs_access_set_mask);
 
@@ -2407,6 +2407,7 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask)
 {
 	struct nfs_access_entry cache;
 	bool may_block = (mask & MAY_NOT_BLOCK) == 0;
+	int cache_mask;
 	int status;
 
 	trace_nfs_access_enter(inode);
@@ -2422,7 +2423,8 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask)
 		goto out;
 
 	/* Be clever: ask server to check for all possible rights */
-	cache.mask = MAY_EXEC | MAY_WRITE | MAY_READ;
+	cache.mask = NFS_MAY_LOOKUP | NFS_MAY_EXECUTE
+		     | NFS_MAY_WRITE | NFS_MAY_READ;
 	cache.cred = cred;
 	cache.jiffies = jiffies;
 	status = NFS_PROTO(inode)->access(inode, &cache);
@@ -2436,7 +2438,8 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask)
 	}
 	nfs_access_add_cache(inode, &cache);
 out_cached:
-	if ((mask & ~cache.mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) != 0)
+	cache_mask = nfs_access_calc_mask(cache.mask);
+	if ((mask & ~cache_mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) != 0)
 		status = -EACCES;
 out:
 	trace_nfs_access_exit(inode, status);
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index e52cc55ac300..5cc91d6381a3 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -51,7 +51,7 @@ struct nfs_access_entry {
 	struct list_head	lru;
 	unsigned long		jiffies;
 	struct rpc_cred *	cred;
-	int			mask;
+	__u32			mask;
 	struct rcu_head		rcu_head;
 };
 
-- 
cgit v1.2.3