summaryrefslogtreecommitdiff
path: root/tools/hv
diff options
context:
space:
mode:
authorDexuan Cui <decui@microsoft.com>2020-01-25 21:49:41 -0800
committerSasha Levin <sashal@kernel.org>2020-01-26 22:10:10 -0500
commit9fc3c01a1fae669a2ef9f13ee1e1a26e057d79f8 (patch)
treeabfa8d6dacc777830d4c10eafbfd5497460e4402 /tools/hv
parent3a6fb6c4255c3893ab61e2bd4e9ae01ca6bbcd94 (diff)
downloadlwn-9fc3c01a1fae669a2ef9f13ee1e1a26e057d79f8.tar.gz
lwn-9fc3c01a1fae669a2ef9f13ee1e1a26e057d79f8.zip
Tools: hv: Reopen the devices if read() or write() returns errors
The state machine in the hv_utils driver can run out of order in some corner cases, e.g. if the kvp daemon doesn't call write() fast enough due to some reason, kvp_timeout_func() can run first and move the state to HVUTIL_READY; next, when kvp_on_msg() is called it returns -EINVAL since kvp_transaction.state is smaller than HVUTIL_USERSPACE_REQ; later, the daemon's write() gets an error -EINVAL, and the daemon will exit(). We can reproduce the issue by sending a SIGSTOP signal to the daemon, wait for 1 minute, and send a SIGCONT signal to the daemon: the daemon will exit() quickly. We can fix the issue by forcing a reset of the device (which means the daemon can close() and open() the device again) and doing extra necessary clean-up. Signed-off-by: Dexuan Cui <decui@microsoft.com> Reviewed-by: Michael Kelley <mikelley@microsoft.com> Signed-off-by: Sasha Levin <sashal@kernel.org>
Diffstat (limited to 'tools/hv')
-rw-r--r--tools/hv/hv_fcopy_daemon.c37
-rw-r--r--tools/hv/hv_kvp_daemon.c36
-rw-r--r--tools/hv/hv_vss_daemon.c49
3 files changed, 91 insertions, 31 deletions
diff --git a/tools/hv/hv_fcopy_daemon.c b/tools/hv/hv_fcopy_daemon.c
index aea2d91ab364..16d629b22c25 100644
--- a/tools/hv/hv_fcopy_daemon.c
+++ b/tools/hv/hv_fcopy_daemon.c
@@ -80,6 +80,8 @@ static int hv_start_fcopy(struct hv_start_fcopy *smsg)
error = 0;
done:
+ if (error)
+ target_fname[0] = '\0';
return error;
}
@@ -108,15 +110,29 @@ static int hv_copy_data(struct hv_do_fcopy *cpmsg)
return ret;
}
+/*
+ * Reset target_fname to "" in the two below functions for hibernation: if
+ * the fcopy operation is aborted by hibernation, the daemon should remove the
+ * partially-copied file; to achieve this, the hv_utils driver always fakes a
+ * CANCEL_FCOPY message upon suspend, and later when the VM resumes back,
+ * the daemon calls hv_copy_cancel() to remove the file; if a file is copied
+ * successfully before suspend, hv_copy_finished() must reset target_fname to
+ * avoid that the file can be incorrectly removed upon resume, since the faked
+ * CANCEL_FCOPY message is spurious in this case.
+ */
static int hv_copy_finished(void)
{
close(target_fd);
+ target_fname[0] = '\0';
return 0;
}
static int hv_copy_cancel(void)
{
close(target_fd);
- unlink(target_fname);
+ if (strlen(target_fname) > 0) {
+ unlink(target_fname);
+ target_fname[0] = '\0';
+ }
return 0;
}
@@ -131,7 +147,7 @@ void print_usage(char *argv[])
int main(int argc, char *argv[])
{
- int fcopy_fd;
+ int fcopy_fd = -1;
int error;
int daemonize = 1, long_index = 0, opt;
int version = FCOPY_CURRENT_VERSION;
@@ -141,7 +157,7 @@ int main(int argc, char *argv[])
struct hv_do_fcopy copy;
__u32 kernel_modver;
} buffer = { };
- int in_handshake = 1;
+ int in_handshake;
static struct option long_options[] = {
{"help", no_argument, 0, 'h' },
@@ -170,6 +186,12 @@ int main(int argc, char *argv[])
openlog("HV_FCOPY", 0, LOG_USER);
syslog(LOG_INFO, "starting; pid is:%d", getpid());
+reopen_fcopy_fd:
+ if (fcopy_fd != -1)
+ close(fcopy_fd);
+ /* Remove any possible partially-copied file on error */
+ hv_copy_cancel();
+ in_handshake = 1;
fcopy_fd = open("/dev/vmbus/hv_fcopy", O_RDWR);
if (fcopy_fd < 0) {
@@ -196,7 +218,7 @@ int main(int argc, char *argv[])
len = pread(fcopy_fd, &buffer, sizeof(buffer), 0);
if (len < 0) {
syslog(LOG_ERR, "pread failed: %s", strerror(errno));
- exit(EXIT_FAILURE);
+ goto reopen_fcopy_fd;
}
if (in_handshake) {
@@ -231,9 +253,14 @@ int main(int argc, char *argv[])
}
+ /*
+ * pwrite() may return an error due to the faked CANCEL_FCOPY
+ * message upon hibernation. Ignore the error by resetting the
+ * dev file, i.e. closing and re-opening it.
+ */
if (pwrite(fcopy_fd, &error, sizeof(int), 0) != sizeof(int)) {
syslog(LOG_ERR, "pwrite failed: %s", strerror(errno));
- exit(EXIT_FAILURE);
+ goto reopen_fcopy_fd;
}
}
}
diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c
index e9ef4ca6a655..ee9c1bb2293e 100644
--- a/tools/hv/hv_kvp_daemon.c
+++ b/tools/hv/hv_kvp_daemon.c
@@ -76,7 +76,7 @@ enum {
DNS
};
-static int in_hand_shake = 1;
+static int in_hand_shake;
static char *os_name = "";
static char *os_major = "";
@@ -1360,7 +1360,7 @@ void print_usage(char *argv[])
int main(int argc, char *argv[])
{
- int kvp_fd, len;
+ int kvp_fd = -1, len;
int error;
struct pollfd pfd;
char *p;
@@ -1400,14 +1400,6 @@ int main(int argc, char *argv[])
openlog("KVP", 0, LOG_USER);
syslog(LOG_INFO, "KVP starting; pid is:%d", getpid());
- kvp_fd = open("/dev/vmbus/hv_kvp", O_RDWR | O_CLOEXEC);
-
- if (kvp_fd < 0) {
- syslog(LOG_ERR, "open /dev/vmbus/hv_kvp failed; error: %d %s",
- errno, strerror(errno));
- exit(EXIT_FAILURE);
- }
-
/*
* Retrieve OS release information.
*/
@@ -1423,6 +1415,18 @@ int main(int argc, char *argv[])
exit(EXIT_FAILURE);
}
+reopen_kvp_fd:
+ if (kvp_fd != -1)
+ close(kvp_fd);
+ in_hand_shake = 1;
+ kvp_fd = open("/dev/vmbus/hv_kvp", O_RDWR | O_CLOEXEC);
+
+ if (kvp_fd < 0) {
+ syslog(LOG_ERR, "open /dev/vmbus/hv_kvp failed; error: %d %s",
+ errno, strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+
/*
* Register ourselves with the kernel.
*/
@@ -1456,9 +1460,7 @@ int main(int argc, char *argv[])
if (len != sizeof(struct hv_kvp_msg)) {
syslog(LOG_ERR, "read failed; error:%d %s",
errno, strerror(errno));
-
- close(kvp_fd);
- return EXIT_FAILURE;
+ goto reopen_kvp_fd;
}
/*
@@ -1617,13 +1619,17 @@ int main(int argc, char *argv[])
break;
}
- /* Send the value back to the kernel. */
+ /*
+ * Send the value back to the kernel. Note: the write() may
+ * return an error due to hibernation; we can ignore the error
+ * by resetting the dev file, i.e. closing and re-opening it.
+ */
kvp_done:
len = write(kvp_fd, hv_msg, sizeof(struct hv_kvp_msg));
if (len != sizeof(struct hv_kvp_msg)) {
syslog(LOG_ERR, "write failed; error: %d %s", errno,
strerror(errno));
- exit(EXIT_FAILURE);
+ goto reopen_kvp_fd;
}
}
diff --git a/tools/hv/hv_vss_daemon.c b/tools/hv/hv_vss_daemon.c
index 92902a88f671..dd111870beee 100644
--- a/tools/hv/hv_vss_daemon.c
+++ b/tools/hv/hv_vss_daemon.c
@@ -28,6 +28,8 @@
#include <stdbool.h>
#include <dirent.h>
+static bool fs_frozen;
+
/* Don't use syslog() in the function since that can cause write to disk */
static int vss_do_freeze(char *dir, unsigned int cmd)
{
@@ -155,18 +157,27 @@ static int vss_operate(int operation)
continue;
}
error |= vss_do_freeze(ent->mnt_dir, cmd);
- if (error && operation == VSS_OP_FREEZE)
- goto err;
+ if (operation == VSS_OP_FREEZE) {
+ if (error)
+ goto err;
+ fs_frozen = true;
+ }
}
endmntent(mounts);
if (root_seen) {
error |= vss_do_freeze("/", cmd);
- if (error && operation == VSS_OP_FREEZE)
- goto err;
+ if (operation == VSS_OP_FREEZE) {
+ if (error)
+ goto err;
+ fs_frozen = true;
+ }
}
+ if (operation == VSS_OP_THAW && !error)
+ fs_frozen = false;
+
goto out;
err:
save_errno = errno;
@@ -175,6 +186,7 @@ err:
endmntent(mounts);
}
vss_operate(VSS_OP_THAW);
+ fs_frozen = false;
/* Call syslog after we thaw all filesystems */
if (ent)
syslog(LOG_ERR, "FREEZE of %s failed; error:%d %s",
@@ -196,13 +208,13 @@ void print_usage(char *argv[])
int main(int argc, char *argv[])
{
- int vss_fd, len;
+ int vss_fd = -1, len;
int error;
struct pollfd pfd;
int op;
struct hv_vss_msg vss_msg[1];
int daemonize = 1, long_index = 0, opt;
- int in_handshake = 1;
+ int in_handshake;
__u32 kernel_modver;
static struct option long_options[] = {
@@ -232,6 +244,18 @@ int main(int argc, char *argv[])
openlog("Hyper-V VSS", 0, LOG_USER);
syslog(LOG_INFO, "VSS starting; pid is:%d", getpid());
+reopen_vss_fd:
+ if (vss_fd != -1)
+ close(vss_fd);
+ if (fs_frozen) {
+ if (vss_operate(VSS_OP_THAW) || fs_frozen) {
+ syslog(LOG_ERR, "failed to thaw file system: err=%d",
+ errno);
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ in_handshake = 1;
vss_fd = open("/dev/vmbus/hv_vss", O_RDWR);
if (vss_fd < 0) {
syslog(LOG_ERR, "open /dev/vmbus/hv_vss failed; error: %d %s",
@@ -284,8 +308,7 @@ int main(int argc, char *argv[])
if (len != sizeof(struct hv_vss_msg)) {
syslog(LOG_ERR, "read failed; error:%d %s",
errno, strerror(errno));
- close(vss_fd);
- return EXIT_FAILURE;
+ goto reopen_vss_fd;
}
op = vss_msg->vss_hdr.operation;
@@ -312,14 +335,18 @@ int main(int argc, char *argv[])
default:
syslog(LOG_ERR, "Illegal op:%d\n", op);
}
+
+ /*
+ * The write() may return an error due to the faked VSS_OP_THAW
+ * message upon hibernation. Ignore the error by resetting the
+ * dev file, i.e. closing and re-opening it.
+ */
vss_msg->error = error;
len = write(vss_fd, vss_msg, sizeof(struct hv_vss_msg));
if (len != sizeof(struct hv_vss_msg)) {
syslog(LOG_ERR, "write failed; error: %d %s", errno,
strerror(errno));
-
- if (op == VSS_OP_FREEZE)
- vss_operate(VSS_OP_THAW);
+ goto reopen_vss_fd;
}
}