diff --git a/resources/patches/vmclock/5.10/0001-ptp-vmclock-add-vm-generation-counter.patch b/resources/patches/vmclock/5.10/0001-ptp-vmclock-add-vm-generation-counter.patch new file mode 100644 index 00000000000..28588e1c924 --- /dev/null +++ b/resources/patches/vmclock/5.10/0001-ptp-vmclock-add-vm-generation-counter.patch @@ -0,0 +1,60 @@ +From a46562c571c6d50e7afc3994b33d0ffb61ff7409 Mon Sep 17 00:00:00 2001 +From: Babis Chalios +Date: Tue, 2 Dec 2025 20:11:32 +0000 +Subject: [PATCH 1/4] ptp: vmclock: add vm generation counter + +Similar to live migration, loading a VM from some saved state (aka +snapshot) is also an event that calls for clock adjustments in the +guest. However, guests might want to take more actions as a response to +such events, e.g. as discarding UUIDs, resetting network connections, +reseeding entropy pools, etc. These are actions that guests don't +typically take during live migration, so add a new field in the +vmclock_abi called vm_generation_counter which informs the guest about +such events. + +Hypervisor advertises support for vm_generation_counter through the +VMCLOCK_FLAG_VM_GEN_COUNTER_PRESENT flag. Users need to check the +presence of this bit in vmclock_abi flags field before using this flag. + +Signed-off-by: Babis Chalios +Reviewed-by: David Woodhouse +--- + include/uapi/linux/vmclock-abi.h | 15 +++++++++++++++ + 1 file changed, 15 insertions(+) + +diff --git a/include/uapi/linux/vmclock-abi.h b/include/uapi/linux/vmclock-abi.h +index d7ca44313bf8..75deb6ae2b27 100644 +--- a/include/uapi/linux/vmclock-abi.h ++++ b/include/uapi/linux/vmclock-abi.h +@@ -119,6 +119,12 @@ struct vmclock_abi { + * bit again after the update, using the about-to-be-valid fields. + */ + #define VMCLOCK_FLAG_TIME_MONOTONIC (1 << 7) ++ /* ++ * If the VM_GEN_COUNTER_PRESENT flag is set, the hypervisor will ++ * bump the vm_generation_counter field every time the guest is ++ * loaded from some save state (restored from a snapshot). ++ */ ++#define VMCLOCK_FLAG_VM_GEN_COUNTER_PRESENT (1 << 8) + + uint8_t pad[2]; + uint8_t clock_status; +@@ -183,6 +189,15 @@ struct vmclock_abi { + uint64_t time_frac_sec; /* (seconds >> 64) */ + uint64_t time_esterror_picosec; /* (± picoseconds) */ + uint64_t time_maxerror_picosec; /* (± picoseconds) */ ++ ++ /* ++ * This field changes to another non-repeating value when the guest ++ * has been loaded from a snapshot. In addition to handling a ++ * disruption in time (which will also be signalled through the ++ * disruption_marker field), a guest may wish to discard UUIDs, ++ * reset network connections, reseed entropy, etc. ++ */ ++ uint64_t vm_generation_counter; + }; + + #endif /* __VMCLOCK_ABI_H__ */ +-- +2.34.1 + diff --git a/resources/patches/vmclock/5.10/0002-ptp-vmclock-support-device-notifications.patch b/resources/patches/vmclock/5.10/0002-ptp-vmclock-support-device-notifications.patch new file mode 100644 index 00000000000..f9cde8c7242 --- /dev/null +++ b/resources/patches/vmclock/5.10/0002-ptp-vmclock-support-device-notifications.patch @@ -0,0 +1,257 @@ +From d0a6bf47dd6cd2a9ed17dbdc32dd34a6ba0f5b5f Mon Sep 17 00:00:00 2001 +From: Babis Chalios +Date: Tue, 2 Dec 2025 20:11:44 +0000 +Subject: [PATCH 2/4] ptp: vmclock: support device notifications + +Add optional support for device notifications in VMClock. When +supported, the hypervisor will send a device notification every time it +updates the seq_count to a new even value. + +Moreover, add support for poll() in VMClock as a means to propagate this +notification to user space. poll() will return a POLLIN event to +listeners every time seq_count changes to a value different than the one +last seen (since open() or last read()/pread()). This means that when +poll() returns a POLLIN event, listeners need to use read() to observe +what has changed and update the reader's view of seq_count. In other +words, after a poll() returned, all subsequent calls to poll() will +immediately return with a POLLIN event until the listener calls read(). + +The device advertises support for the notification mechanism by setting +flag VMCLOCK_FLAG_NOTIFICATION_PRESENT in vmclock_abi flags field. If +the flag is not present the driver won't setup the ACPI notification +handler and poll() will always immediately return POLLHUP. + +Signed-off-by: Babis Chalios +--- + drivers/ptp/ptp_vmclock.c | 130 ++++++++++++++++++++++++++++--- + include/uapi/linux/vmclock-abi.h | 5 ++ + 2 files changed, 126 insertions(+), 9 deletions(-) + +diff --git a/drivers/ptp/ptp_vmclock.c b/drivers/ptp/ptp_vmclock.c +index 1ce69eada4b2..4673915c43e7 100644 +--- a/drivers/ptp/ptp_vmclock.c ++++ b/drivers/ptp/ptp_vmclock.c +@@ -5,6 +5,9 @@ + * Copyright © 2024 Amazon.com, Inc. or its affiliates. + */ + ++#include "linux/poll.h" ++#include "linux/types.h" ++#include "linux/wait.h" + #include + #include + #include +@@ -37,6 +40,7 @@ struct vmclock_state { + struct resource res; + struct vmclock_abi *clk; + struct miscdevice miscdev; ++ wait_queue_head_t disrupt_wait; + struct ptp_clock_info ptp_clock_info; + struct ptp_clock *ptp_clock; + enum clocksource_ids cs_id, sys_cs_id; +@@ -311,10 +315,15 @@ static const struct ptp_clock_info ptp_vmclock_info = { + .getcrosststamp = ptp_vmclock_getcrosststamp, + }; + ++struct vmclock_file_state { ++ struct vmclock_state *st; ++ atomic_t seq; ++}; ++ + static int vmclock_miscdev_mmap(struct file *fp, struct vm_area_struct *vma) + { +- struct vmclock_state *st = container_of(fp->private_data, +- struct vmclock_state, miscdev); ++ struct vmclock_file_state *fst = fp->private_data; ++ struct vmclock_state *st = fst->st; + + if ((vma->vm_flags & (VM_READ|VM_WRITE)) != VM_READ) + return -EROFS; +@@ -333,11 +342,12 @@ static int vmclock_miscdev_mmap(struct file *fp, struct vm_area_struct *vma) + static ssize_t vmclock_miscdev_read(struct file *fp, char __user *buf, + size_t count, loff_t *ppos) + { +- struct vmclock_state *st = container_of(fp->private_data, +- struct vmclock_state, miscdev); ++ struct vmclock_file_state *fst = fp->private_data; ++ struct vmclock_state *st = fst->st; ++ + ktime_t deadline = ktime_add(ktime_get(), VMCLOCK_MAX_WAIT); + size_t max_count; +- int32_t seq; ++ int32_t seq, old_seq; + + if (*ppos >= PAGE_SIZE) + return 0; +@@ -346,6 +356,7 @@ static ssize_t vmclock_miscdev_read(struct file *fp, char __user *buf, + if (count > max_count) + count = max_count; + ++ old_seq = atomic_read(&fst->seq); + while (1) { + seq = st->clk->seq_count & ~1ULL; + virt_rmb(); +@@ -354,8 +365,16 @@ static ssize_t vmclock_miscdev_read(struct file *fp, char __user *buf, + return -EFAULT; + + virt_rmb(); +- if (seq == st->clk->seq_count) +- break; ++ if (seq == st->clk->seq_count) { ++ /* ++ * Either we updated fst->seq to seq (the latest version we observed) ++ * or someone else did (old_seq == seq), so we can break. ++ */ ++ if (atomic_try_cmpxchg(&fst->seq, &old_seq, seq) || ++ old_seq == seq) { ++ break; ++ } ++ } + + if (ktime_after(ktime_get(), deadline)) + return -ETIMEDOUT; +@@ -365,9 +384,57 @@ static ssize_t vmclock_miscdev_read(struct file *fp, char __user *buf, + return count; + } + ++static __poll_t vmclock_miscdev_poll(struct file *fp, poll_table *wait) ++{ ++ struct vmclock_file_state *fst = fp->private_data; ++ struct vmclock_state *st = fst->st; ++ uint32_t seq; ++ ++ /* ++ * Hypervisor will not send us any notifications, so fail immediately ++ * to avoid having caller sleeping for ever. ++ */ ++ if (!(st->clk->flags & VMCLOCK_FLAG_NOTIFICATION_PRESENT)) ++ return POLLHUP; ++ ++ poll_wait(fp, &st->disrupt_wait, wait); ++ ++ seq = st->clk->seq_count; ++ if (atomic_read(&fst->seq) != seq) ++ return POLLIN | POLLRDNORM; ++ ++ return 0; ++} ++ ++static int vmclock_miscdev_open(struct inode *inode, struct file *fp) ++{ ++ struct vmclock_state *st = container_of(fp->private_data, ++ struct vmclock_state, miscdev); ++ struct vmclock_file_state *fst = kzalloc(sizeof(*fst), GFP_KERNEL); ++ ++ if (!fst) ++ return -ENOMEM; ++ ++ fst->st = st; ++ atomic_set(&fst->seq, 0); ++ ++ fp->private_data = fst; ++ ++ return 0; ++} ++ ++static int vmclock_miscdev_release(struct inode *inode, struct file *fp) ++{ ++ kfree(fp->private_data); ++ return 0; ++} ++ + static const struct file_operations vmclock_miscdev_fops = { +- .mmap = vmclock_miscdev_mmap, +- .read = vmclock_miscdev_read, ++ .open = vmclock_miscdev_open, ++ .release = vmclock_miscdev_release, ++ .mmap = vmclock_miscdev_mmap, ++ .read = vmclock_miscdev_read, ++ .poll = vmclock_miscdev_poll, + }; + + /* module operations */ +@@ -413,6 +480,44 @@ static acpi_status vmclock_acpi_resources(struct acpi_resource *ares, void *data + return AE_ERROR; + } + ++static void ++vmclock_acpi_notification_handler(acpi_handle __always_unused handle, ++ u32 __always_unused event, void *dev) ++{ ++ struct device *device = dev; ++ struct vmclock_state *st = device->driver_data; ++ ++ wake_up_interruptible(&st->disrupt_wait); ++} ++ ++static int vmclock_setup_notification(struct device *dev, struct vmclock_state *st) ++{ ++ struct acpi_device *adev = ACPI_COMPANION(dev); ++ acpi_status status; ++ ++ /* ++ * This should never happen as this function is only called when ++ * has_acpi_companion(dev) is true, but the logic is sufficiently ++ * complex that Coverity can't see the tautology. ++ */ ++ if (!adev) ++ return -ENODEV; ++ ++ /* The device does not support notifications. Nothing else to do */ ++ if (!(st->clk->flags & VMCLOCK_FLAG_NOTIFICATION_PRESENT)) ++ return 0; ++ ++ status = acpi_install_notify_handler(adev->handle, ACPI_DEVICE_NOTIFY, ++ vmclock_acpi_notification_handler, ++ dev); ++ if (ACPI_FAILURE(status)) { ++ dev_err(dev, "failed to install notification handler"); ++ return -ENODEV; ++ } ++ ++ return 0; ++} ++ + static int vmclock_probe_acpi(struct device *dev, struct vmclock_state *st) + { + struct acpi_device *adev = ACPI_COMPANION(dev); +@@ -495,6 +600,11 @@ static int vmclock_probe(struct platform_device *pdev) + goto out; + } + ++ init_waitqueue_head(&st->disrupt_wait); ++ ret = vmclock_setup_notification(dev, st); ++ if (ret) ++ return ret; ++ + /* If the structure is big enough, it can be mapped to userspace */ + if (st->clk->size >= PAGE_SIZE) { + st->miscdev.minor = MISC_DYNAMIC_MINOR; +@@ -544,6 +654,8 @@ static int vmclock_probe(struct platform_device *pdev) + goto out; + } + ++ dev->driver_data = st; ++ + dev_info(dev, "%s: registered %s%s%s\n", st->name, + st->miscdev.minor ? "miscdev" : "", + (st->miscdev.minor && st->ptp_clock) ? ", " : "", +diff --git a/include/uapi/linux/vmclock-abi.h b/include/uapi/linux/vmclock-abi.h +index 75deb6ae2b27..4b7cd2b8532c 100644 +--- a/include/uapi/linux/vmclock-abi.h ++++ b/include/uapi/linux/vmclock-abi.h +@@ -125,6 +125,11 @@ struct vmclock_abi { + * loaded from some save state (restored from a snapshot). + */ + #define VMCLOCK_FLAG_VM_GEN_COUNTER_PRESENT (1 << 8) ++ /* ++ * If the NOTIFICATION_PRESENT flag is set, the hypervisor will send ++ * a notification every time it updates seq_count to a new even number. ++ */ ++#define VMCLOCK_FLAG_NOTIFICATION_PRESENT (1 << 9) + + uint8_t pad[2]; + uint8_t clock_status; +-- +2.34.1 + diff --git a/resources/patches/vmclock/5.10/0003-dt-bindings-ptp-Add-amazon-vmclock.patch b/resources/patches/vmclock/5.10/0003-dt-bindings-ptp-Add-amazon-vmclock.patch new file mode 100644 index 00000000000..67fea022740 --- /dev/null +++ b/resources/patches/vmclock/5.10/0003-dt-bindings-ptp-Add-amazon-vmclock.patch @@ -0,0 +1,76 @@ +From d594b01069fb6fabb068379b59bd26e59dbd6661 Mon Sep 17 00:00:00 2001 +From: David Woodhouse +Date: Tue, 2 Dec 2025 20:11:55 +0000 +Subject: [PATCH 3/4] dt-bindings: ptp: Add amazon,vmclock + +The vmclock device provides a PTP clock source and precise timekeeping +across live migration and snapshot/restore operations. + +The binding has a required memory region containing the vmclock_abi +structure and an optional interrupt for clock disruption notifications. + +The full specification is at https://david.woodhou.se/VMClock.pdf + +Signed-off-by: David Woodhouse +Signed-off-by: Babis Chalios +Reviewed-by: Krzysztof Kozlowski +--- + .../bindings/ptp/amazon,vmclock.yaml | 46 +++++++++++++++++++ + 1 file changed, 46 insertions(+) + create mode 100644 Documentation/devicetree/bindings/ptp/amazon,vmclock.yaml + +diff --git a/Documentation/devicetree/bindings/ptp/amazon,vmclock.yaml b/Documentation/devicetree/bindings/ptp/amazon,vmclock.yaml +new file mode 100644 +index 000000000000..b98fee20ce5f +--- /dev/null ++++ b/Documentation/devicetree/bindings/ptp/amazon,vmclock.yaml +@@ -0,0 +1,46 @@ ++# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) ++%YAML 1.2 ++--- ++$id: http://devicetree.org/schemas/ptp/amazon,vmclock.yaml# ++$schema: http://devicetree.org/meta-schemas/core.yaml# ++ ++title: Virtual Machine Clock ++ ++maintainers: ++ - David Woodhouse ++ ++description: ++ The vmclock device provides a precise clock source and allows for ++ accurate timekeeping across live migration and snapshot/restore ++ operations. The full specification of the shared data structure ++ is available at https://david.woodhou.se/VMClock.pdf ++ ++properties: ++ compatible: ++ const: amazon,vmclock ++ ++ reg: ++ description: ++ Specifies the shared memory region containing the vmclock_abi structure. ++ maxItems: 1 ++ ++ interrupts: ++ description: ++ Interrupt used to notify when the contents of the vmclock_abi structure ++ have been updated. ++ maxItems: 1 ++ ++required: ++ - compatible ++ - reg ++ ++additionalProperties: false ++ ++examples: ++ - | ++ #include ++ ptp@80000000 { ++ compatible = "amazon,vmclock"; ++ reg = <0x80000000 0x1000>; ++ interrupts = ; ++ }; +-- +2.34.1 + diff --git a/resources/patches/vmclock/5.10/0004-ptp-ptp_vmclock-Add-device-tree-support.patch b/resources/patches/vmclock/5.10/0004-ptp-ptp_vmclock-Add-device-tree-support.patch new file mode 100644 index 00000000000..e7b4fbf568d --- /dev/null +++ b/resources/patches/vmclock/5.10/0004-ptp-ptp_vmclock-Add-device-tree-support.patch @@ -0,0 +1,180 @@ +From a70db7595dac8a3b84d14a8dc62b4067cc152055 Mon Sep 17 00:00:00 2001 +From: David Woodhouse +Date: Tue, 2 Dec 2025 20:12:07 +0000 +Subject: [PATCH 4/4] ptp: ptp_vmclock: Add device tree support + +Add device tree support to the ptp_vmclock driver, allowing it to probe +via device tree in addition to ACPI. + +Handle optional interrupt for clock disruption notifications, mirroring +the ACPI notification behavior. + +Signed-off-by: David Woodhouse +Signed-off-by: Babis Chalios +--- + drivers/ptp/Kconfig | 2 +- + drivers/ptp/ptp_vmclock.c | 83 ++++++++++++++++++++++++++++++++++++--- + 2 files changed, 78 insertions(+), 7 deletions(-) + +diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig +index 44bc88a0a772..8c1aad77d708 100644 +--- a/drivers/ptp/Kconfig ++++ b/drivers/ptp/Kconfig +@@ -121,7 +121,7 @@ config PTP_1588_CLOCK_KVM + config PTP_1588_CLOCK_VMCLOCK + tristate "Virtual machine PTP clock" + depends on X86_TSC || ARM_ARCH_TIMER +- depends on PTP_1588_CLOCK && ACPI && ARCH_SUPPORTS_INT128 ++ depends on PTP_1588_CLOCK && ARCH_SUPPORTS_INT128 + default y + help + This driver adds support for using a virtual precision clock +diff --git a/drivers/ptp/ptp_vmclock.c b/drivers/ptp/ptp_vmclock.c +index 4673915c43e7..4b8c7fa4ea91 100644 +--- a/drivers/ptp/ptp_vmclock.c ++++ b/drivers/ptp/ptp_vmclock.c +@@ -14,10 +14,13 @@ + #include + #include + #include ++#include ++#include + #include + #include + #include + #include ++#include + #include + #include + #include +@@ -453,6 +456,7 @@ static int vmclock_remove(struct platform_device *pdev) + return 0; + } + ++#ifdef CONFIG_ACPI + static acpi_status vmclock_acpi_resources(struct acpi_resource *ares, void *data) + { + struct vmclock_state *st = data; +@@ -490,7 +494,7 @@ vmclock_acpi_notification_handler(acpi_handle __always_unused handle, + wake_up_interruptible(&st->disrupt_wait); + } + +-static int vmclock_setup_notification(struct device *dev, struct vmclock_state *st) ++static int vmclock_setup_acpi_notification(struct device *dev) + { + struct acpi_device *adev = ACPI_COMPANION(dev); + acpi_status status; +@@ -503,10 +507,6 @@ static int vmclock_setup_notification(struct device *dev, struct vmclock_state * + if (!adev) + return -ENODEV; + +- /* The device does not support notifications. Nothing else to do */ +- if (!(st->clk->flags & VMCLOCK_FLAG_NOTIFICATION_PRESENT)) +- return 0; +- + status = acpi_install_notify_handler(adev->handle, ACPI_DEVICE_NOTIFY, + vmclock_acpi_notification_handler, + dev); +@@ -540,6 +540,70 @@ static int vmclock_probe_acpi(struct device *dev, struct vmclock_state *st) + + return 0; + } ++#else ++static int vmclock_probe_acpi(struct device *dev, struct vmclock_state *st) ++{ ++ return -EINVAL; ++} ++ ++static int vmclock_setup_acpi_notification(struct device *dev) ++{ ++ return -EINVAL; ++} ++ ++#endif ++ ++static irqreturn_t vmclock_of_irq_handler(int __always_unused irq, void *dev) ++{ ++ struct device *device = dev; ++ struct vmclock_state *st = device->driver_data; ++ ++ wake_up_interruptible(&st->disrupt_wait); ++ return IRQ_HANDLED; ++} ++ ++static int vmclock_probe_dt(struct device *dev, struct vmclock_state *st) ++{ ++ struct platform_device *pdev = to_platform_device(dev); ++ struct resource *res; ++ ++ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); ++ if (!res) ++ return -ENODEV; ++ ++ st->res = *res; ++ ++ return 0; ++} ++ ++static int vmclock_setup_of_notification(struct device *dev) ++{ ++ struct platform_device *pdev = to_platform_device(dev); ++ int irq; ++ ++ irq = platform_get_irq(pdev, 0); ++ if (irq < 0) ++ return irq; ++ ++ return devm_request_irq(dev, irq, vmclock_of_irq_handler, IRQF_SHARED, ++ "vmclock", dev); ++} ++ ++static int vmclock_setup_notification(struct device *dev, ++ struct vmclock_state *st) ++{ ++ /* The device does not support notifications. Nothing else to do */ ++ if (!(le64_to_cpu(st->clk->flags) & VMCLOCK_FLAG_NOTIFICATION_PRESENT)) ++ return 0; ++ ++ if (has_acpi_companion(dev)) { ++ return vmclock_setup_acpi_notification(dev); ++ } else { ++ return vmclock_setup_of_notification(dev); ++ } ++ ++} ++ + + static void vmclock_put_idx(void *data) + { +@@ -561,7 +625,7 @@ static int vmclock_probe(struct platform_device *pdev) + if (has_acpi_companion(dev)) + ret = vmclock_probe_acpi(dev, st); + else +- ret = -EINVAL; /* Only ACPI for now */ ++ ret = vmclock_probe_dt(dev, st); + + if (ret) { + dev_info(dev, "Failed to obtain physical address: %d\n", ret); +@@ -673,12 +737,19 @@ static const struct acpi_device_id vmclock_acpi_ids[] = { + }; + MODULE_DEVICE_TABLE(acpi, vmclock_acpi_ids); + ++static const struct of_device_id vmclock_of_ids[] = { ++ { .compatible = "amazon,vmclock", }, ++ { }, ++}; ++MODULE_DEVICE_TABLE(of, vmclock_of_ids); ++ + static struct platform_driver vmclock_platform_driver = { + .probe = vmclock_probe, + .remove = vmclock_remove, + .driver = { + .name = "vmclock", + .acpi_match_table = vmclock_acpi_ids, ++ .of_match_table = vmclock_of_ids, + }, + }; + +-- +2.34.1 + diff --git a/resources/patches/vmclock/6.1/0001-ptp-vmclock-add-vm-generation-counter.patch b/resources/patches/vmclock/6.1/0001-ptp-vmclock-add-vm-generation-counter.patch new file mode 100644 index 00000000000..28588e1c924 --- /dev/null +++ b/resources/patches/vmclock/6.1/0001-ptp-vmclock-add-vm-generation-counter.patch @@ -0,0 +1,60 @@ +From a46562c571c6d50e7afc3994b33d0ffb61ff7409 Mon Sep 17 00:00:00 2001 +From: Babis Chalios +Date: Tue, 2 Dec 2025 20:11:32 +0000 +Subject: [PATCH 1/4] ptp: vmclock: add vm generation counter + +Similar to live migration, loading a VM from some saved state (aka +snapshot) is also an event that calls for clock adjustments in the +guest. However, guests might want to take more actions as a response to +such events, e.g. as discarding UUIDs, resetting network connections, +reseeding entropy pools, etc. These are actions that guests don't +typically take during live migration, so add a new field in the +vmclock_abi called vm_generation_counter which informs the guest about +such events. + +Hypervisor advertises support for vm_generation_counter through the +VMCLOCK_FLAG_VM_GEN_COUNTER_PRESENT flag. Users need to check the +presence of this bit in vmclock_abi flags field before using this flag. + +Signed-off-by: Babis Chalios +Reviewed-by: David Woodhouse +--- + include/uapi/linux/vmclock-abi.h | 15 +++++++++++++++ + 1 file changed, 15 insertions(+) + +diff --git a/include/uapi/linux/vmclock-abi.h b/include/uapi/linux/vmclock-abi.h +index d7ca44313bf8..75deb6ae2b27 100644 +--- a/include/uapi/linux/vmclock-abi.h ++++ b/include/uapi/linux/vmclock-abi.h +@@ -119,6 +119,12 @@ struct vmclock_abi { + * bit again after the update, using the about-to-be-valid fields. + */ + #define VMCLOCK_FLAG_TIME_MONOTONIC (1 << 7) ++ /* ++ * If the VM_GEN_COUNTER_PRESENT flag is set, the hypervisor will ++ * bump the vm_generation_counter field every time the guest is ++ * loaded from some save state (restored from a snapshot). ++ */ ++#define VMCLOCK_FLAG_VM_GEN_COUNTER_PRESENT (1 << 8) + + uint8_t pad[2]; + uint8_t clock_status; +@@ -183,6 +189,15 @@ struct vmclock_abi { + uint64_t time_frac_sec; /* (seconds >> 64) */ + uint64_t time_esterror_picosec; /* (± picoseconds) */ + uint64_t time_maxerror_picosec; /* (± picoseconds) */ ++ ++ /* ++ * This field changes to another non-repeating value when the guest ++ * has been loaded from a snapshot. In addition to handling a ++ * disruption in time (which will also be signalled through the ++ * disruption_marker field), a guest may wish to discard UUIDs, ++ * reset network connections, reseed entropy, etc. ++ */ ++ uint64_t vm_generation_counter; + }; + + #endif /* __VMCLOCK_ABI_H__ */ +-- +2.34.1 + diff --git a/resources/patches/vmclock/6.1/0002-ptp-vmclock-support-device-notifications.patch b/resources/patches/vmclock/6.1/0002-ptp-vmclock-support-device-notifications.patch new file mode 100644 index 00000000000..f9cde8c7242 --- /dev/null +++ b/resources/patches/vmclock/6.1/0002-ptp-vmclock-support-device-notifications.patch @@ -0,0 +1,257 @@ +From d0a6bf47dd6cd2a9ed17dbdc32dd34a6ba0f5b5f Mon Sep 17 00:00:00 2001 +From: Babis Chalios +Date: Tue, 2 Dec 2025 20:11:44 +0000 +Subject: [PATCH 2/4] ptp: vmclock: support device notifications + +Add optional support for device notifications in VMClock. When +supported, the hypervisor will send a device notification every time it +updates the seq_count to a new even value. + +Moreover, add support for poll() in VMClock as a means to propagate this +notification to user space. poll() will return a POLLIN event to +listeners every time seq_count changes to a value different than the one +last seen (since open() or last read()/pread()). This means that when +poll() returns a POLLIN event, listeners need to use read() to observe +what has changed and update the reader's view of seq_count. In other +words, after a poll() returned, all subsequent calls to poll() will +immediately return with a POLLIN event until the listener calls read(). + +The device advertises support for the notification mechanism by setting +flag VMCLOCK_FLAG_NOTIFICATION_PRESENT in vmclock_abi flags field. If +the flag is not present the driver won't setup the ACPI notification +handler and poll() will always immediately return POLLHUP. + +Signed-off-by: Babis Chalios +--- + drivers/ptp/ptp_vmclock.c | 130 ++++++++++++++++++++++++++++--- + include/uapi/linux/vmclock-abi.h | 5 ++ + 2 files changed, 126 insertions(+), 9 deletions(-) + +diff --git a/drivers/ptp/ptp_vmclock.c b/drivers/ptp/ptp_vmclock.c +index 1ce69eada4b2..4673915c43e7 100644 +--- a/drivers/ptp/ptp_vmclock.c ++++ b/drivers/ptp/ptp_vmclock.c +@@ -5,6 +5,9 @@ + * Copyright © 2024 Amazon.com, Inc. or its affiliates. + */ + ++#include "linux/poll.h" ++#include "linux/types.h" ++#include "linux/wait.h" + #include + #include + #include +@@ -37,6 +40,7 @@ struct vmclock_state { + struct resource res; + struct vmclock_abi *clk; + struct miscdevice miscdev; ++ wait_queue_head_t disrupt_wait; + struct ptp_clock_info ptp_clock_info; + struct ptp_clock *ptp_clock; + enum clocksource_ids cs_id, sys_cs_id; +@@ -311,10 +315,15 @@ static const struct ptp_clock_info ptp_vmclock_info = { + .getcrosststamp = ptp_vmclock_getcrosststamp, + }; + ++struct vmclock_file_state { ++ struct vmclock_state *st; ++ atomic_t seq; ++}; ++ + static int vmclock_miscdev_mmap(struct file *fp, struct vm_area_struct *vma) + { +- struct vmclock_state *st = container_of(fp->private_data, +- struct vmclock_state, miscdev); ++ struct vmclock_file_state *fst = fp->private_data; ++ struct vmclock_state *st = fst->st; + + if ((vma->vm_flags & (VM_READ|VM_WRITE)) != VM_READ) + return -EROFS; +@@ -333,11 +342,12 @@ static int vmclock_miscdev_mmap(struct file *fp, struct vm_area_struct *vma) + static ssize_t vmclock_miscdev_read(struct file *fp, char __user *buf, + size_t count, loff_t *ppos) + { +- struct vmclock_state *st = container_of(fp->private_data, +- struct vmclock_state, miscdev); ++ struct vmclock_file_state *fst = fp->private_data; ++ struct vmclock_state *st = fst->st; ++ + ktime_t deadline = ktime_add(ktime_get(), VMCLOCK_MAX_WAIT); + size_t max_count; +- int32_t seq; ++ int32_t seq, old_seq; + + if (*ppos >= PAGE_SIZE) + return 0; +@@ -346,6 +356,7 @@ static ssize_t vmclock_miscdev_read(struct file *fp, char __user *buf, + if (count > max_count) + count = max_count; + ++ old_seq = atomic_read(&fst->seq); + while (1) { + seq = st->clk->seq_count & ~1ULL; + virt_rmb(); +@@ -354,8 +365,16 @@ static ssize_t vmclock_miscdev_read(struct file *fp, char __user *buf, + return -EFAULT; + + virt_rmb(); +- if (seq == st->clk->seq_count) +- break; ++ if (seq == st->clk->seq_count) { ++ /* ++ * Either we updated fst->seq to seq (the latest version we observed) ++ * or someone else did (old_seq == seq), so we can break. ++ */ ++ if (atomic_try_cmpxchg(&fst->seq, &old_seq, seq) || ++ old_seq == seq) { ++ break; ++ } ++ } + + if (ktime_after(ktime_get(), deadline)) + return -ETIMEDOUT; +@@ -365,9 +384,57 @@ static ssize_t vmclock_miscdev_read(struct file *fp, char __user *buf, + return count; + } + ++static __poll_t vmclock_miscdev_poll(struct file *fp, poll_table *wait) ++{ ++ struct vmclock_file_state *fst = fp->private_data; ++ struct vmclock_state *st = fst->st; ++ uint32_t seq; ++ ++ /* ++ * Hypervisor will not send us any notifications, so fail immediately ++ * to avoid having caller sleeping for ever. ++ */ ++ if (!(st->clk->flags & VMCLOCK_FLAG_NOTIFICATION_PRESENT)) ++ return POLLHUP; ++ ++ poll_wait(fp, &st->disrupt_wait, wait); ++ ++ seq = st->clk->seq_count; ++ if (atomic_read(&fst->seq) != seq) ++ return POLLIN | POLLRDNORM; ++ ++ return 0; ++} ++ ++static int vmclock_miscdev_open(struct inode *inode, struct file *fp) ++{ ++ struct vmclock_state *st = container_of(fp->private_data, ++ struct vmclock_state, miscdev); ++ struct vmclock_file_state *fst = kzalloc(sizeof(*fst), GFP_KERNEL); ++ ++ if (!fst) ++ return -ENOMEM; ++ ++ fst->st = st; ++ atomic_set(&fst->seq, 0); ++ ++ fp->private_data = fst; ++ ++ return 0; ++} ++ ++static int vmclock_miscdev_release(struct inode *inode, struct file *fp) ++{ ++ kfree(fp->private_data); ++ return 0; ++} ++ + static const struct file_operations vmclock_miscdev_fops = { +- .mmap = vmclock_miscdev_mmap, +- .read = vmclock_miscdev_read, ++ .open = vmclock_miscdev_open, ++ .release = vmclock_miscdev_release, ++ .mmap = vmclock_miscdev_mmap, ++ .read = vmclock_miscdev_read, ++ .poll = vmclock_miscdev_poll, + }; + + /* module operations */ +@@ -413,6 +480,44 @@ static acpi_status vmclock_acpi_resources(struct acpi_resource *ares, void *data + return AE_ERROR; + } + ++static void ++vmclock_acpi_notification_handler(acpi_handle __always_unused handle, ++ u32 __always_unused event, void *dev) ++{ ++ struct device *device = dev; ++ struct vmclock_state *st = device->driver_data; ++ ++ wake_up_interruptible(&st->disrupt_wait); ++} ++ ++static int vmclock_setup_notification(struct device *dev, struct vmclock_state *st) ++{ ++ struct acpi_device *adev = ACPI_COMPANION(dev); ++ acpi_status status; ++ ++ /* ++ * This should never happen as this function is only called when ++ * has_acpi_companion(dev) is true, but the logic is sufficiently ++ * complex that Coverity can't see the tautology. ++ */ ++ if (!adev) ++ return -ENODEV; ++ ++ /* The device does not support notifications. Nothing else to do */ ++ if (!(st->clk->flags & VMCLOCK_FLAG_NOTIFICATION_PRESENT)) ++ return 0; ++ ++ status = acpi_install_notify_handler(adev->handle, ACPI_DEVICE_NOTIFY, ++ vmclock_acpi_notification_handler, ++ dev); ++ if (ACPI_FAILURE(status)) { ++ dev_err(dev, "failed to install notification handler"); ++ return -ENODEV; ++ } ++ ++ return 0; ++} ++ + static int vmclock_probe_acpi(struct device *dev, struct vmclock_state *st) + { + struct acpi_device *adev = ACPI_COMPANION(dev); +@@ -495,6 +600,11 @@ static int vmclock_probe(struct platform_device *pdev) + goto out; + } + ++ init_waitqueue_head(&st->disrupt_wait); ++ ret = vmclock_setup_notification(dev, st); ++ if (ret) ++ return ret; ++ + /* If the structure is big enough, it can be mapped to userspace */ + if (st->clk->size >= PAGE_SIZE) { + st->miscdev.minor = MISC_DYNAMIC_MINOR; +@@ -544,6 +654,8 @@ static int vmclock_probe(struct platform_device *pdev) + goto out; + } + ++ dev->driver_data = st; ++ + dev_info(dev, "%s: registered %s%s%s\n", st->name, + st->miscdev.minor ? "miscdev" : "", + (st->miscdev.minor && st->ptp_clock) ? ", " : "", +diff --git a/include/uapi/linux/vmclock-abi.h b/include/uapi/linux/vmclock-abi.h +index 75deb6ae2b27..4b7cd2b8532c 100644 +--- a/include/uapi/linux/vmclock-abi.h ++++ b/include/uapi/linux/vmclock-abi.h +@@ -125,6 +125,11 @@ struct vmclock_abi { + * loaded from some save state (restored from a snapshot). + */ + #define VMCLOCK_FLAG_VM_GEN_COUNTER_PRESENT (1 << 8) ++ /* ++ * If the NOTIFICATION_PRESENT flag is set, the hypervisor will send ++ * a notification every time it updates seq_count to a new even number. ++ */ ++#define VMCLOCK_FLAG_NOTIFICATION_PRESENT (1 << 9) + + uint8_t pad[2]; + uint8_t clock_status; +-- +2.34.1 + diff --git a/resources/patches/vmclock/6.1/0003-dt-bindings-ptp-Add-amazon-vmclock.patch b/resources/patches/vmclock/6.1/0003-dt-bindings-ptp-Add-amazon-vmclock.patch new file mode 100644 index 00000000000..67fea022740 --- /dev/null +++ b/resources/patches/vmclock/6.1/0003-dt-bindings-ptp-Add-amazon-vmclock.patch @@ -0,0 +1,76 @@ +From d594b01069fb6fabb068379b59bd26e59dbd6661 Mon Sep 17 00:00:00 2001 +From: David Woodhouse +Date: Tue, 2 Dec 2025 20:11:55 +0000 +Subject: [PATCH 3/4] dt-bindings: ptp: Add amazon,vmclock + +The vmclock device provides a PTP clock source and precise timekeeping +across live migration and snapshot/restore operations. + +The binding has a required memory region containing the vmclock_abi +structure and an optional interrupt for clock disruption notifications. + +The full specification is at https://david.woodhou.se/VMClock.pdf + +Signed-off-by: David Woodhouse +Signed-off-by: Babis Chalios +Reviewed-by: Krzysztof Kozlowski +--- + .../bindings/ptp/amazon,vmclock.yaml | 46 +++++++++++++++++++ + 1 file changed, 46 insertions(+) + create mode 100644 Documentation/devicetree/bindings/ptp/amazon,vmclock.yaml + +diff --git a/Documentation/devicetree/bindings/ptp/amazon,vmclock.yaml b/Documentation/devicetree/bindings/ptp/amazon,vmclock.yaml +new file mode 100644 +index 000000000000..b98fee20ce5f +--- /dev/null ++++ b/Documentation/devicetree/bindings/ptp/amazon,vmclock.yaml +@@ -0,0 +1,46 @@ ++# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) ++%YAML 1.2 ++--- ++$id: http://devicetree.org/schemas/ptp/amazon,vmclock.yaml# ++$schema: http://devicetree.org/meta-schemas/core.yaml# ++ ++title: Virtual Machine Clock ++ ++maintainers: ++ - David Woodhouse ++ ++description: ++ The vmclock device provides a precise clock source and allows for ++ accurate timekeeping across live migration and snapshot/restore ++ operations. The full specification of the shared data structure ++ is available at https://david.woodhou.se/VMClock.pdf ++ ++properties: ++ compatible: ++ const: amazon,vmclock ++ ++ reg: ++ description: ++ Specifies the shared memory region containing the vmclock_abi structure. ++ maxItems: 1 ++ ++ interrupts: ++ description: ++ Interrupt used to notify when the contents of the vmclock_abi structure ++ have been updated. ++ maxItems: 1 ++ ++required: ++ - compatible ++ - reg ++ ++additionalProperties: false ++ ++examples: ++ - | ++ #include ++ ptp@80000000 { ++ compatible = "amazon,vmclock"; ++ reg = <0x80000000 0x1000>; ++ interrupts = ; ++ }; +-- +2.34.1 + diff --git a/resources/patches/vmclock/6.1/0004-ptp-ptp_vmclock-Add-device-tree-support.patch b/resources/patches/vmclock/6.1/0004-ptp-ptp_vmclock-Add-device-tree-support.patch new file mode 100644 index 00000000000..e7b4fbf568d --- /dev/null +++ b/resources/patches/vmclock/6.1/0004-ptp-ptp_vmclock-Add-device-tree-support.patch @@ -0,0 +1,180 @@ +From a70db7595dac8a3b84d14a8dc62b4067cc152055 Mon Sep 17 00:00:00 2001 +From: David Woodhouse +Date: Tue, 2 Dec 2025 20:12:07 +0000 +Subject: [PATCH 4/4] ptp: ptp_vmclock: Add device tree support + +Add device tree support to the ptp_vmclock driver, allowing it to probe +via device tree in addition to ACPI. + +Handle optional interrupt for clock disruption notifications, mirroring +the ACPI notification behavior. + +Signed-off-by: David Woodhouse +Signed-off-by: Babis Chalios +--- + drivers/ptp/Kconfig | 2 +- + drivers/ptp/ptp_vmclock.c | 83 ++++++++++++++++++++++++++++++++++++--- + 2 files changed, 78 insertions(+), 7 deletions(-) + +diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig +index 44bc88a0a772..8c1aad77d708 100644 +--- a/drivers/ptp/Kconfig ++++ b/drivers/ptp/Kconfig +@@ -121,7 +121,7 @@ config PTP_1588_CLOCK_KVM + config PTP_1588_CLOCK_VMCLOCK + tristate "Virtual machine PTP clock" + depends on X86_TSC || ARM_ARCH_TIMER +- depends on PTP_1588_CLOCK && ACPI && ARCH_SUPPORTS_INT128 ++ depends on PTP_1588_CLOCK && ARCH_SUPPORTS_INT128 + default y + help + This driver adds support for using a virtual precision clock +diff --git a/drivers/ptp/ptp_vmclock.c b/drivers/ptp/ptp_vmclock.c +index 4673915c43e7..4b8c7fa4ea91 100644 +--- a/drivers/ptp/ptp_vmclock.c ++++ b/drivers/ptp/ptp_vmclock.c +@@ -14,10 +14,13 @@ + #include + #include + #include ++#include ++#include + #include + #include + #include + #include ++#include + #include + #include + #include +@@ -453,6 +456,7 @@ static int vmclock_remove(struct platform_device *pdev) + return 0; + } + ++#ifdef CONFIG_ACPI + static acpi_status vmclock_acpi_resources(struct acpi_resource *ares, void *data) + { + struct vmclock_state *st = data; +@@ -490,7 +494,7 @@ vmclock_acpi_notification_handler(acpi_handle __always_unused handle, + wake_up_interruptible(&st->disrupt_wait); + } + +-static int vmclock_setup_notification(struct device *dev, struct vmclock_state *st) ++static int vmclock_setup_acpi_notification(struct device *dev) + { + struct acpi_device *adev = ACPI_COMPANION(dev); + acpi_status status; +@@ -503,10 +507,6 @@ static int vmclock_setup_notification(struct device *dev, struct vmclock_state * + if (!adev) + return -ENODEV; + +- /* The device does not support notifications. Nothing else to do */ +- if (!(st->clk->flags & VMCLOCK_FLAG_NOTIFICATION_PRESENT)) +- return 0; +- + status = acpi_install_notify_handler(adev->handle, ACPI_DEVICE_NOTIFY, + vmclock_acpi_notification_handler, + dev); +@@ -540,6 +540,70 @@ static int vmclock_probe_acpi(struct device *dev, struct vmclock_state *st) + + return 0; + } ++#else ++static int vmclock_probe_acpi(struct device *dev, struct vmclock_state *st) ++{ ++ return -EINVAL; ++} ++ ++static int vmclock_setup_acpi_notification(struct device *dev) ++{ ++ return -EINVAL; ++} ++ ++#endif ++ ++static irqreturn_t vmclock_of_irq_handler(int __always_unused irq, void *dev) ++{ ++ struct device *device = dev; ++ struct vmclock_state *st = device->driver_data; ++ ++ wake_up_interruptible(&st->disrupt_wait); ++ return IRQ_HANDLED; ++} ++ ++static int vmclock_probe_dt(struct device *dev, struct vmclock_state *st) ++{ ++ struct platform_device *pdev = to_platform_device(dev); ++ struct resource *res; ++ ++ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); ++ if (!res) ++ return -ENODEV; ++ ++ st->res = *res; ++ ++ return 0; ++} ++ ++static int vmclock_setup_of_notification(struct device *dev) ++{ ++ struct platform_device *pdev = to_platform_device(dev); ++ int irq; ++ ++ irq = platform_get_irq(pdev, 0); ++ if (irq < 0) ++ return irq; ++ ++ return devm_request_irq(dev, irq, vmclock_of_irq_handler, IRQF_SHARED, ++ "vmclock", dev); ++} ++ ++static int vmclock_setup_notification(struct device *dev, ++ struct vmclock_state *st) ++{ ++ /* The device does not support notifications. Nothing else to do */ ++ if (!(le64_to_cpu(st->clk->flags) & VMCLOCK_FLAG_NOTIFICATION_PRESENT)) ++ return 0; ++ ++ if (has_acpi_companion(dev)) { ++ return vmclock_setup_acpi_notification(dev); ++ } else { ++ return vmclock_setup_of_notification(dev); ++ } ++ ++} ++ + + static void vmclock_put_idx(void *data) + { +@@ -561,7 +625,7 @@ static int vmclock_probe(struct platform_device *pdev) + if (has_acpi_companion(dev)) + ret = vmclock_probe_acpi(dev, st); + else +- ret = -EINVAL; /* Only ACPI for now */ ++ ret = vmclock_probe_dt(dev, st); + + if (ret) { + dev_info(dev, "Failed to obtain physical address: %d\n", ret); +@@ -673,12 +737,19 @@ static const struct acpi_device_id vmclock_acpi_ids[] = { + }; + MODULE_DEVICE_TABLE(acpi, vmclock_acpi_ids); + ++static const struct of_device_id vmclock_of_ids[] = { ++ { .compatible = "amazon,vmclock", }, ++ { }, ++}; ++MODULE_DEVICE_TABLE(of, vmclock_of_ids); ++ + static struct platform_driver vmclock_platform_driver = { + .probe = vmclock_probe, + .remove = vmclock_remove, + .driver = { + .name = "vmclock", + .acpi_match_table = vmclock_acpi_ids, ++ .of_match_table = vmclock_of_ids, + }, + }; + +-- +2.34.1 + diff --git a/resources/rebuild.sh b/resources/rebuild.sh index 505afd555d1..50e9716c231 100755 --- a/resources/rebuild.sh +++ b/resources/rebuild.sh @@ -165,7 +165,18 @@ function build_al_kernel { # fails immediately after clone because nothing is checked out make distclean || true - git checkout $(get_tag $KERNEL_VERSION) + TAG=$(get_tag $KERNEL_VERSION) + + git checkout $TAG + # Create a temporary branch where we can apply patches and then + # easily discard them + git checkout -B tmp-$TAG + + # Apply any patchset we have for our kernels + for patchset in ../patches/*; do + echo "Applying patchset ${patchset}/${KERNEL_VERSION}" + git apply ${patchset}/${KERNEL_VERSION}/*.patch + done arch=$(uname -m) if [ "$arch" = "x86_64" ]; then @@ -194,6 +205,12 @@ function build_al_kernel { cp -v $binary_path $OUTPUT_FILE cp -v .config $OUTPUT_FILE.config + # Undo any patches previsouly applied, so that we can build the same kernel with different + # configs, e.g. no-acpi + git reset --hard HEAD + git clean -f -d + git checkout - + popd &>/dev/null } @@ -253,10 +270,10 @@ function build_al_kernels { VMCLOCK_CONFIG="$PWD/guest_configs/vmclock.config" if [[ "$KERNEL_VERSION" == @(all|5.10) ]]; then - build_al_kernel $PWD/guest_configs/microvm-kernel-ci-$ARCH-5.10.config "$CI_CONFIG" "$PCIE_CONFIG" "$PMEM_CONFIG" "$MEM_CONFIG" + build_al_kernel $PWD/guest_configs/microvm-kernel-ci-$ARCH-5.10.config "$CI_CONFIG" "$PCIE_CONFIG" "$PMEM_CONFIG" "$MEM_CONFIG" "$VMCLOCK_CONFIG" fi if [[ $ARCH == "x86_64" && "$KERNEL_VERSION" == @(all|5.10-no-acpi) ]]; then - build_al_kernel $PWD/guest_configs/microvm-kernel-ci-$ARCH-5.10-no-acpi.config "$CI_CONFIG" "$PCIE_CONFIG" "$PMEM_CONFIG" "$MEM_CONFIG" + build_al_kernel $PWD/guest_configs/microvm-kernel-ci-$ARCH-5.10-no-acpi.config "$CI_CONFIG" "$PCIE_CONFIG" "$PMEM_CONFIG" "$MEM_CONFIG" "$VMCLOCK_CONFIG" fi if [[ "$KERNEL_VERSION" == @(all|6.1) ]]; then build_al_kernel $PWD/guest_configs/microvm-kernel-ci-$ARCH-6.1.config "$CI_CONFIG" "$PCIE_CONFIG" "$PMEM_CONFIG" "$MEM_CONFIG" "$VMCLOCK_CONFIG" @@ -268,7 +285,7 @@ function build_al_kernels { OUTPUT_DIR=$OUTPUT_DIR/debug mkdir -pv $OUTPUT_DIR if [[ "$KERNEL_VERSION" == @(all|5.10) ]]; then - build_al_kernel "$PWD/guest_configs/microvm-kernel-ci-$ARCH-5.10.config" "$CI_CONFIG" "$PCIE_CONFIG" "$PMEM_CONFIG" "$MEM_CONFIG" "$FTRACE_CONFIG" "$DEBUG_CONFIG" + build_al_kernel "$PWD/guest_configs/microvm-kernel-ci-$ARCH-5.10.config" "$CI_CONFIG" "$PCIE_CONFIG" "$PMEM_CONFIG" "$MEM_CONFIG" "$FTRACE_CONFIG" "$DEBUG_CONFIG" "$VMCLOCK_CONFIG" vmlinux_split_debuginfo $OUTPUT_DIR/vmlinux-5.10.* fi if [[ "$KERNEL_VERSION" == @(all|6.1) ]]; then